aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c140
-rw-r--r--fs/jbd2/commit.c47
-rw-r--r--fs/jbd2/journal.c361
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c12
-rw-r--r--fs/jbd2/transaction.c48
6 files changed, 343 insertions, 270 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index d49d202903fb..c78841ee81cf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
88 * whole transaction. 88 * whole transaction.
89 * 89 *
90 * Requires j_list_lock 90 * Requires j_list_lock
91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
92 */ 91 */
93static int __try_to_free_cp_buf(struct journal_head *jh) 92static int __try_to_free_cp_buf(struct journal_head *jh)
94{ 93{
95 int ret = 0; 94 int ret = 0;
96 struct buffer_head *bh = jh2bh(jh); 95 struct buffer_head *bh = jh2bh(jh);
97 96
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 if (jh->b_transaction == NULL && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /* 99 /*
101 * Get our reference so that bh cannot be freed before 100 * Get our reference so that bh cannot be freed before
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
104 get_bh(bh); 103 get_bh(bh);
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 104 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 105 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
107 jbd_unlock_bh_state(bh);
108 BUFFER_TRACE(bh, "release"); 106 BUFFER_TRACE(bh, "release");
109 __brelse(bh); 107 __brelse(bh);
110 } else {
111 jbd_unlock_bh_state(bh);
112 } 108 }
113 return ret; 109 return ret;
114} 110}
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
180} 176}
181 177
182/* 178/*
183 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
184 * The caller must restart a list walk. Wait for someone else to run
185 * jbd_unlock_bh_state().
186 */
187static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
188 __releases(journal->j_list_lock)
189{
190 get_bh(bh);
191 spin_unlock(&journal->j_list_lock);
192 jbd_lock_bh_state(bh);
193 jbd_unlock_bh_state(bh);
194 put_bh(bh);
195}
196
197/*
198 * Clean up transaction's list of buffers submitted for io. 179 * Clean up transaction's list of buffers submitted for io.
199 * We wait for any pending IO to complete and remove any clean 180 * We wait for any pending IO to complete and remove any clean
200 * buffers. Note that we take the buffers in the opposite ordering 181 * buffers. Note that we take the buffers in the opposite ordering
@@ -222,15 +203,9 @@ restart:
222 while (!released && transaction->t_checkpoint_io_list) { 203 while (!released && transaction->t_checkpoint_io_list) {
223 jh = transaction->t_checkpoint_io_list; 204 jh = transaction->t_checkpoint_io_list;
224 bh = jh2bh(jh); 205 bh = jh2bh(jh);
225 if (!jbd_trylock_bh_state(bh)) {
226 jbd_sync_bh(journal, bh);
227 spin_lock(&journal->j_list_lock);
228 goto restart;
229 }
230 get_bh(bh); 206 get_bh(bh);
231 if (buffer_locked(bh)) { 207 if (buffer_locked(bh)) {
232 spin_unlock(&journal->j_list_lock); 208 spin_unlock(&journal->j_list_lock);
233 jbd_unlock_bh_state(bh);
234 wait_on_buffer(bh); 209 wait_on_buffer(bh);
235 /* the journal_head may have gone by now */ 210 /* the journal_head may have gone by now */
236 BUFFER_TRACE(bh, "brelse"); 211 BUFFER_TRACE(bh, "brelse");
@@ -246,7 +221,6 @@ restart:
246 * it has been written out and so we can drop it from the list 221 * it has been written out and so we can drop it from the list
247 */ 222 */
248 released = __jbd2_journal_remove_checkpoint(jh); 223 released = __jbd2_journal_remove_checkpoint(jh);
249 jbd_unlock_bh_state(bh);
250 __brelse(bh); 224 __brelse(bh);
251 } 225 }
252 226
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
266 240
267 for (i = 0; i < *batch_count; i++) { 241 for (i = 0; i < *batch_count; i++) {
268 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 242 struct buffer_head *bh = journal->j_chkpt_bhs[i];
269 clear_buffer_jwrite(bh);
270 BUFFER_TRACE(bh, "brelse"); 243 BUFFER_TRACE(bh, "brelse");
271 __brelse(bh); 244 __brelse(bh);
272 } 245 }
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
281 * be written out. 254 * be written out.
282 * 255 *
283 * Called with j_list_lock held and drops it if 1 is returned 256 * Called with j_list_lock held and drops it if 1 is returned
284 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
285 */ 257 */
286static int __process_buffer(journal_t *journal, struct journal_head *jh, 258static int __process_buffer(journal_t *journal, struct journal_head *jh,
287 int *batch_count, transaction_t *transaction) 259 int *batch_count, transaction_t *transaction)
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 if (buffer_locked(bh)) { 264 if (buffer_locked(bh)) {
293 get_bh(bh); 265 get_bh(bh);
294 spin_unlock(&journal->j_list_lock); 266 spin_unlock(&journal->j_list_lock);
295 jbd_unlock_bh_state(bh);
296 wait_on_buffer(bh); 267 wait_on_buffer(bh);
297 /* the journal_head may have gone by now */ 268 /* the journal_head may have gone by now */
298 BUFFER_TRACE(bh, "brelse"); 269 BUFFER_TRACE(bh, "brelse");
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
304 275
305 transaction->t_chp_stats.cs_forced_to_close++; 276 transaction->t_chp_stats.cs_forced_to_close++;
306 spin_unlock(&journal->j_list_lock); 277 spin_unlock(&journal->j_list_lock);
307 jbd_unlock_bh_state(bh);
308 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 278 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
309 /* 279 /*
310 * The journal thread is dead; so starting and 280 * The journal thread is dead; so starting and
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
323 if (unlikely(buffer_write_io_error(bh))) 293 if (unlikely(buffer_write_io_error(bh)))
324 ret = -EIO; 294 ret = -EIO;
325 get_bh(bh); 295 get_bh(bh);
326 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
327 BUFFER_TRACE(bh, "remove from checkpoint"); 296 BUFFER_TRACE(bh, "remove from checkpoint");
328 __jbd2_journal_remove_checkpoint(jh); 297 __jbd2_journal_remove_checkpoint(jh);
329 spin_unlock(&journal->j_list_lock); 298 spin_unlock(&journal->j_list_lock);
330 jbd_unlock_bh_state(bh);
331 __brelse(bh); 299 __brelse(bh);
332 } else { 300 } else {
333 /* 301 /*
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
340 BUFFER_TRACE(bh, "queue"); 308 BUFFER_TRACE(bh, "queue");
341 get_bh(bh); 309 get_bh(bh);
342 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 310 J_ASSERT_BH(bh, !buffer_jwrite(bh));
343 set_buffer_jwrite(bh);
344 journal->j_chkpt_bhs[*batch_count] = bh; 311 journal->j_chkpt_bhs[*batch_count] = bh;
345 __buffer_relink_io(jh); 312 __buffer_relink_io(jh);
346 jbd_unlock_bh_state(bh);
347 transaction->t_chp_stats.cs_written++; 313 transaction->t_chp_stats.cs_written++;
348 (*batch_count)++; 314 (*batch_count)++;
349 if (*batch_count == JBD2_NR_BATCH) { 315 if (*batch_count == JBD2_NR_BATCH) {
@@ -407,15 +373,7 @@ restart:
407 int retry = 0, err; 373 int retry = 0, err;
408 374
409 while (!retry && transaction->t_checkpoint_list) { 375 while (!retry && transaction->t_checkpoint_list) {
410 struct buffer_head *bh;
411
412 jh = transaction->t_checkpoint_list; 376 jh = transaction->t_checkpoint_list;
413 bh = jh2bh(jh);
414 if (!jbd_trylock_bh_state(bh)) {
415 jbd_sync_bh(journal, bh);
416 retry = 1;
417 break;
418 }
419 retry = __process_buffer(journal, jh, &batch_count, 377 retry = __process_buffer(journal, jh, &batch_count,
420 transaction); 378 transaction);
421 if (retry < 0 && !result) 379 if (retry < 0 && !result)
@@ -478,79 +436,28 @@ out:
478 436
479int jbd2_cleanup_journal_tail(journal_t *journal) 437int jbd2_cleanup_journal_tail(journal_t *journal)
480{ 438{
481 transaction_t * transaction;
482 tid_t first_tid; 439 tid_t first_tid;
483 unsigned long blocknr, freed; 440 unsigned long blocknr;
484 441
485 if (is_journal_aborted(journal)) 442 if (is_journal_aborted(journal))
486 return 1; 443 return 1;
487 444
488 /* OK, work out the oldest transaction remaining in the log, and 445 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
489 * the log block it starts at.
490 *
491 * If the log is now empty, we need to work out which is the
492 * next transaction ID we will write, and where it will
493 * start. */
494
495 write_lock(&journal->j_state_lock);
496 spin_lock(&journal->j_list_lock);
497 transaction = journal->j_checkpoint_transactions;
498 if (transaction) {
499 first_tid = transaction->t_tid;
500 blocknr = transaction->t_log_start;
501 } else if ((transaction = journal->j_committing_transaction) != NULL) {
502 first_tid = transaction->t_tid;
503 blocknr = transaction->t_log_start;
504 } else if ((transaction = journal->j_running_transaction) != NULL) {
505 first_tid = transaction->t_tid;
506 blocknr = journal->j_head;
507 } else {
508 first_tid = journal->j_transaction_sequence;
509 blocknr = journal->j_head;
510 }
511 spin_unlock(&journal->j_list_lock);
512 J_ASSERT(blocknr != 0);
513
514 /* If the oldest pinned transaction is at the tail of the log
515 already then there's not much we can do right now. */
516 if (journal->j_tail_sequence == first_tid) {
517 write_unlock(&journal->j_state_lock);
518 return 1; 446 return 1;
519 } 447 J_ASSERT(blocknr != 0);
520
521 /* OK, update the superblock to recover the freed space.
522 * Physical blocks come first: have we wrapped beyond the end of
523 * the log? */
524 freed = blocknr - journal->j_tail;
525 if (blocknr < journal->j_tail)
526 freed = freed + journal->j_last - journal->j_first;
527
528 trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
529 jbd_debug(1,
530 "Cleaning journal tail from %d to %d (offset %lu), "
531 "freeing %lu\n",
532 journal->j_tail_sequence, first_tid, blocknr, freed);
533
534 journal->j_free += freed;
535 journal->j_tail_sequence = first_tid;
536 journal->j_tail = blocknr;
537 write_unlock(&journal->j_state_lock);
538 448
539 /* 449 /*
540 * If there is an external journal, we need to make sure that 450 * We need to make sure that any blocks that were recently written out
541 * any data blocks that were recently written out --- perhaps 451 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
542 * by jbd2_log_do_checkpoint() --- are flushed out before we 452 * we drop the transactions from the journal. It's unlikely this will
543 * drop the transactions from the external journal. It's 453 * be necessary, especially with an appropriately sized journal, but we
544 * unlikely this will be necessary, especially with a 454 * need this to guarantee correctness. Fortunately
545 * appropriately sized journal, but we need this to guarantee 455 * jbd2_cleanup_journal_tail() doesn't get called all that often.
546 * correctness. Fortunately jbd2_cleanup_journal_tail()
547 * doesn't get called all that often.
548 */ 456 */
549 if ((journal->j_fs_dev != journal->j_dev) && 457 if (journal->j_flags & JBD2_BARRIER)
550 (journal->j_flags & JBD2_BARRIER))
551 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 458 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
552 if (!(journal->j_flags & JBD2_ABORT)) 459
553 jbd2_journal_update_superblock(journal, 1); 460 __jbd2_update_log_tail(journal, first_tid, blocknr);
554 return 0; 461 return 0;
555} 462}
556 463
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
582 do { 489 do {
583 jh = next_jh; 490 jh = next_jh;
584 next_jh = jh->b_cpnext; 491 next_jh = jh->b_cpnext;
585 /* Use trylock because of the ranking */ 492 ret = __try_to_free_cp_buf(jh);
586 if (jbd_trylock_bh_state(jh2bh(jh))) { 493 if (ret) {
587 ret = __try_to_free_cp_buf(jh); 494 freed++;
588 if (ret) { 495 if (ret == 2) {
589 freed++; 496 *released = 1;
590 if (ret == 2) { 497 return freed;
591 *released = 1;
592 return freed;
593 }
594 } 498 }
595 } 499 }
596 /* 500 /*
@@ -673,9 +577,7 @@ out:
673 * The function can free jh and bh. 577 * The function can free jh and bh.
674 * 578 *
675 * This function is called with j_list_lock held. 579 * This function is called with j_list_lock held.
676 * This function is called with jbd_lock_bh_state(jh2bh(jh))
677 */ 580 */
678
679int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 581int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
680{ 582{
681 struct transaction_chp_stats_s *stats; 583 struct transaction_chp_stats_s *stats;
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
722 transaction->t_tid, stats); 624 transaction->t_tid, stats);
723 625
724 __jbd2_journal_drop_transaction(journal, transaction); 626 __jbd2_journal_drop_transaction(journal, transaction);
725 kfree(transaction); 627 jbd2_journal_free_transaction(transaction);
726 628
727 /* Just in case anybody was waiting for more transactions to be 629 /* Just in case anybody was waiting for more transactions to be
728 checkpointed... */ 630 checkpointed... */
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
797 J_ASSERT(journal->j_committing_transaction != transaction); 699 J_ASSERT(journal->j_committing_transaction != transaction);
798 J_ASSERT(journal->j_running_transaction != transaction); 700 J_ASSERT(journal->j_running_transaction != transaction);
799 701
702 trace_jbd2_drop_transaction(journal, transaction);
703
800 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 704 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
801} 705}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c067a8cae63b..17f557f01cf0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
331 struct buffer_head *cbh = NULL; /* For transactional checksums */ 331 struct buffer_head *cbh = NULL; /* For transactional checksums */
332 __u32 crc32_sum = ~0; 332 __u32 crc32_sum = ~0;
333 struct blk_plug plug; 333 struct blk_plug plug;
334 /* Tail of the journal */
335 unsigned long first_block;
336 tid_t first_tid;
337 int update_tail;
334 338
335 /* 339 /*
336 * First job: lock down the current transaction and wait for 340 * First job: lock down the current transaction and wait for
@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 344 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
341 if (journal->j_flags & JBD2_FLUSHED) { 345 if (journal->j_flags & JBD2_FLUSHED) {
342 jbd_debug(3, "super block updated\n"); 346 jbd_debug(3, "super block updated\n");
343 jbd2_journal_update_superblock(journal, 1); 347 mutex_lock(&journal->j_checkpoint_mutex);
348 /*
349 * We hold j_checkpoint_mutex so tail cannot change under us.
350 * We don't need any special data guarantees for writing sb
351 * since journal is empty and it is ok for write to be
352 * flushed only with transaction commit.
353 */
354 jbd2_journal_update_sb_log_tail(journal,
355 journal->j_tail_sequence,
356 journal->j_tail,
357 WRITE_SYNC);
358 mutex_unlock(&journal->j_checkpoint_mutex);
344 } else { 359 } else {
345 jbd_debug(3, "superblock not updated\n"); 360 jbd_debug(3, "superblock not updated\n");
346 } 361 }
@@ -677,10 +692,30 @@ start_journal_io:
677 err = 0; 692 err = 0;
678 } 693 }
679 694
695 /*
696 * Get current oldest transaction in the log before we issue flush
697 * to the filesystem device. After the flush we can be sure that
698 * blocks of all older transactions are checkpointed to persistent
699 * storage and we will be safe to update journal start in the
700 * superblock with the numbers we get here.
701 */
702 update_tail =
703 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
704
680 write_lock(&journal->j_state_lock); 705 write_lock(&journal->j_state_lock);
706 if (update_tail) {
707 long freed = first_block - journal->j_tail;
708
709 if (first_block < journal->j_tail)
710 freed += journal->j_last - journal->j_first;
711 /* Update tail only if we free significant amount of space */
712 if (freed < journal->j_maxlen / 4)
713 update_tail = 0;
714 }
681 J_ASSERT(commit_transaction->t_state == T_COMMIT); 715 J_ASSERT(commit_transaction->t_state == T_COMMIT);
682 commit_transaction->t_state = T_COMMIT_DFLUSH; 716 commit_transaction->t_state = T_COMMIT_DFLUSH;
683 write_unlock(&journal->j_state_lock); 717 write_unlock(&journal->j_state_lock);
718
684 /* 719 /*
685 * If the journal is not located on the file system device, 720 * If the journal is not located on the file system device,
686 * then we must flush the file system device before we issue 721 * then we must flush the file system device before we issue
@@ -831,6 +866,14 @@ wait_for_iobuf:
831 if (err) 866 if (err)
832 jbd2_journal_abort(journal, err); 867 jbd2_journal_abort(journal, err);
833 868
869 /*
870 * Now disk caches for filesystem device are flushed so we are safe to
871 * erase checkpointed transactions from the log by updating journal
872 * superblock.
873 */
874 if (update_tail)
875 jbd2_update_log_tail(journal, first_tid, first_block);
876
834 /* End of a transaction! Finally, we can do checkpoint 877 /* End of a transaction! Finally, we can do checkpoint
835 processing: any buffers committed as a result of this 878 processing: any buffers committed as a result of this
836 transaction can be removed from any checkpoint list it was on 879 transaction can be removed from any checkpoint list it was on
@@ -1048,7 +1091,7 @@ restart_loop:
1048 jbd_debug(1, "JBD2: commit %d complete, head %d\n", 1091 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1049 journal->j_commit_sequence, journal->j_tail_sequence); 1092 journal->j_commit_sequence, journal->j_tail_sequence);
1050 if (to_free) 1093 if (to_free)
1051 kfree(commit_transaction); 1094 jbd2_journal_free_transaction(commit_transaction);
1052 1095
1053 wake_up(&journal->j_wait_done_commit); 1096 wake_up(&journal->j_wait_done_commit);
1054} 1097}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 839377e3d624..98ed6dbfe381 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -71,7 +71,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke);
71 71
72EXPORT_SYMBOL(jbd2_journal_init_dev); 72EXPORT_SYMBOL(jbd2_journal_init_dev);
73EXPORT_SYMBOL(jbd2_journal_init_inode); 73EXPORT_SYMBOL(jbd2_journal_init_inode);
74EXPORT_SYMBOL(jbd2_journal_update_format);
75EXPORT_SYMBOL(jbd2_journal_check_used_features); 74EXPORT_SYMBOL(jbd2_journal_check_used_features);
76EXPORT_SYMBOL(jbd2_journal_check_available_features); 75EXPORT_SYMBOL(jbd2_journal_check_available_features);
77EXPORT_SYMBOL(jbd2_journal_set_features); 76EXPORT_SYMBOL(jbd2_journal_set_features);
@@ -96,7 +95,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
96EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 95EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
97EXPORT_SYMBOL(jbd2_inode_cache); 96EXPORT_SYMBOL(jbd2_inode_cache);
98 97
99static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
100static void __journal_abort_soft (journal_t *journal, int errno); 98static void __journal_abort_soft (journal_t *journal, int errno);
101static int jbd2_journal_create_slab(size_t slab_size); 99static int jbd2_journal_create_slab(size_t slab_size);
102 100
@@ -746,6 +744,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
746 return jbd2_journal_add_journal_head(bh); 744 return jbd2_journal_add_journal_head(bh);
747} 745}
748 746
747/*
748 * Return tid of the oldest transaction in the journal and block in the journal
749 * where the transaction starts.
750 *
751 * If the journal is now empty, return which will be the next transaction ID
752 * we will write and where will that transaction start.
753 *
754 * The return value is 0 if journal tail cannot be pushed any further, 1 if
755 * it can.
756 */
757int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
758 unsigned long *block)
759{
760 transaction_t *transaction;
761 int ret;
762
763 read_lock(&journal->j_state_lock);
764 spin_lock(&journal->j_list_lock);
765 transaction = journal->j_checkpoint_transactions;
766 if (transaction) {
767 *tid = transaction->t_tid;
768 *block = transaction->t_log_start;
769 } else if ((transaction = journal->j_committing_transaction) != NULL) {
770 *tid = transaction->t_tid;
771 *block = transaction->t_log_start;
772 } else if ((transaction = journal->j_running_transaction) != NULL) {
773 *tid = transaction->t_tid;
774 *block = journal->j_head;
775 } else {
776 *tid = journal->j_transaction_sequence;
777 *block = journal->j_head;
778 }
779 ret = tid_gt(*tid, journal->j_tail_sequence);
780 spin_unlock(&journal->j_list_lock);
781 read_unlock(&journal->j_state_lock);
782
783 return ret;
784}
785
786/*
787 * Update information in journal structure and in on disk journal superblock
788 * about log tail. This function does not check whether information passed in
789 * really pushes log tail further. It's responsibility of the caller to make
790 * sure provided log tail information is valid (e.g. by holding
791 * j_checkpoint_mutex all the time between computing log tail and calling this
792 * function as is the case with jbd2_cleanup_journal_tail()).
793 *
794 * Requires j_checkpoint_mutex
795 */
796void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
797{
798 unsigned long freed;
799
800 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
801
802 /*
803 * We cannot afford for write to remain in drive's caches since as
804 * soon as we update j_tail, next transaction can start reusing journal
805 * space and if we lose sb update during power failure we'd replay
806 * old transaction with possibly newly overwritten data.
807 */
808 jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
809 write_lock(&journal->j_state_lock);
810 freed = block - journal->j_tail;
811 if (block < journal->j_tail)
812 freed += journal->j_last - journal->j_first;
813
814 trace_jbd2_update_log_tail(journal, tid, block, freed);
815 jbd_debug(1,
816 "Cleaning journal tail from %d to %d (offset %lu), "
817 "freeing %lu\n",
818 journal->j_tail_sequence, tid, block, freed);
819
820 journal->j_free += freed;
821 journal->j_tail_sequence = tid;
822 journal->j_tail = block;
823 write_unlock(&journal->j_state_lock);
824}
825
826/*
827 * This is a variaon of __jbd2_update_log_tail which checks for validity of
828 * provided log tail and locks j_checkpoint_mutex. So it is safe against races
829 * with other threads updating log tail.
830 */
831void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
832{
833 mutex_lock(&journal->j_checkpoint_mutex);
834 if (tid_gt(tid, journal->j_tail_sequence))
835 __jbd2_update_log_tail(journal, tid, block);
836 mutex_unlock(&journal->j_checkpoint_mutex);
837}
838
749struct jbd2_stats_proc_session { 839struct jbd2_stats_proc_session {
750 journal_t *journal; 840 journal_t *journal;
751 struct transaction_stats_s *stats; 841 struct transaction_stats_s *stats;
@@ -1114,40 +1204,45 @@ static int journal_reset(journal_t *journal)
1114 1204
1115 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1205 journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1116 1206
1117 /* Add the dynamic fields and write it to disk. */
1118 jbd2_journal_update_superblock(journal, 1);
1119 return jbd2_journal_start_thread(journal);
1120}
1121
1122/**
1123 * void jbd2_journal_update_superblock() - Update journal sb on disk.
1124 * @journal: The journal to update.
1125 * @wait: Set to '0' if you don't want to wait for IO completion.
1126 *
1127 * Update a journal's dynamic superblock fields and write it to disk,
1128 * optionally waiting for the IO to complete.
1129 */
1130void jbd2_journal_update_superblock(journal_t *journal, int wait)
1131{
1132 journal_superblock_t *sb = journal->j_superblock;
1133 struct buffer_head *bh = journal->j_sb_buffer;
1134
1135 /* 1207 /*
1136 * As a special case, if the on-disk copy is already marked as needing 1208 * As a special case, if the on-disk copy is already marked as needing
1137 * no recovery (s_start == 0) and there are no outstanding transactions 1209 * no recovery (s_start == 0), then we can safely defer the superblock
1138 * in the filesystem, then we can safely defer the superblock update 1210 * update until the next commit by setting JBD2_FLUSHED. This avoids
1139 * until the next commit by setting JBD2_FLUSHED. This avoids
1140 * attempting a write to a potential-readonly device. 1211 * attempting a write to a potential-readonly device.
1141 */ 1212 */
1142 if (sb->s_start == 0 && journal->j_tail_sequence == 1213 if (sb->s_start == 0) {
1143 journal->j_transaction_sequence) {
1144 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1214 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1145 "(start %ld, seq %d, errno %d)\n", 1215 "(start %ld, seq %d, errno %d)\n",
1146 journal->j_tail, journal->j_tail_sequence, 1216 journal->j_tail, journal->j_tail_sequence,
1147 journal->j_errno); 1217 journal->j_errno);
1148 goto out; 1218 journal->j_flags |= JBD2_FLUSHED;
1219 } else {
1220 /* Lock here to make assertions happy... */
1221 mutex_lock(&journal->j_checkpoint_mutex);
1222 /*
1223 * Update log tail information. We use WRITE_FUA since new
1224 * transaction will start reusing journal space and so we
1225 * must make sure information about current log tail is on
1226 * disk before that.
1227 */
1228 jbd2_journal_update_sb_log_tail(journal,
1229 journal->j_tail_sequence,
1230 journal->j_tail,
1231 WRITE_FUA);
1232 mutex_unlock(&journal->j_checkpoint_mutex);
1149 } 1233 }
1234 return jbd2_journal_start_thread(journal);
1235}
1150 1236
1237static void jbd2_write_superblock(journal_t *journal, int write_op)
1238{
1239 struct buffer_head *bh = journal->j_sb_buffer;
1240 int ret;
1241
1242 trace_jbd2_write_superblock(journal, write_op);
1243 if (!(journal->j_flags & JBD2_BARRIER))
1244 write_op &= ~(REQ_FUA | REQ_FLUSH);
1245 lock_buffer(bh);
1151 if (buffer_write_io_error(bh)) { 1246 if (buffer_write_io_error(bh)) {
1152 /* 1247 /*
1153 * Oh, dear. A previous attempt to write the journal 1248 * Oh, dear. A previous attempt to write the journal
@@ -1163,48 +1258,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1163 clear_buffer_write_io_error(bh); 1258 clear_buffer_write_io_error(bh);
1164 set_buffer_uptodate(bh); 1259 set_buffer_uptodate(bh);
1165 } 1260 }
1261 get_bh(bh);
1262 bh->b_end_io = end_buffer_write_sync;
1263 ret = submit_bh(write_op, bh);
1264 wait_on_buffer(bh);
1265 if (buffer_write_io_error(bh)) {
1266 clear_buffer_write_io_error(bh);
1267 set_buffer_uptodate(bh);
1268 ret = -EIO;
1269 }
1270 if (ret) {
1271 printk(KERN_ERR "JBD2: Error %d detected when updating "
1272 "journal superblock for %s.\n", ret,
1273 journal->j_devname);
1274 }
1275}
1276
1277/**
1278 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1279 * @journal: The journal to update.
1280 * @tail_tid: TID of the new transaction at the tail of the log
1281 * @tail_block: The first block of the transaction at the tail of the log
1282 * @write_op: With which operation should we write the journal sb
1283 *
1284 * Update a journal's superblock information about log tail and write it to
1285 * disk, waiting for the IO to complete.
1286 */
1287void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1288 unsigned long tail_block, int write_op)
1289{
1290 journal_superblock_t *sb = journal->j_superblock;
1291
1292 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1293 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1294 tail_block, tail_tid);
1295
1296 sb->s_sequence = cpu_to_be32(tail_tid);
1297 sb->s_start = cpu_to_be32(tail_block);
1298
1299 jbd2_write_superblock(journal, write_op);
1300
1301 /* Log is no longer empty */
1302 write_lock(&journal->j_state_lock);
1303 WARN_ON(!sb->s_sequence);
1304 journal->j_flags &= ~JBD2_FLUSHED;
1305 write_unlock(&journal->j_state_lock);
1306}
1307
1308/**
1309 * jbd2_mark_journal_empty() - Mark on disk journal as empty.
1310 * @journal: The journal to update.
1311 *
1312 * Update a journal's dynamic superblock fields to show that journal is empty.
1313 * Write updated superblock to disk waiting for IO to complete.
1314 */
1315static void jbd2_mark_journal_empty(journal_t *journal)
1316{
1317 journal_superblock_t *sb = journal->j_superblock;
1166 1318
1319 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1167 read_lock(&journal->j_state_lock); 1320 read_lock(&journal->j_state_lock);
1168 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", 1321 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1169 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1322 journal->j_tail_sequence);
1170 1323
1171 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1324 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1172 sb->s_start = cpu_to_be32(journal->j_tail); 1325 sb->s_start = cpu_to_be32(0);
1173 sb->s_errno = cpu_to_be32(journal->j_errno);
1174 read_unlock(&journal->j_state_lock); 1326 read_unlock(&journal->j_state_lock);
1175 1327
1176 BUFFER_TRACE(bh, "marking dirty"); 1328 jbd2_write_superblock(journal, WRITE_FUA);
1177 mark_buffer_dirty(bh);
1178 if (wait) {
1179 sync_dirty_buffer(bh);
1180 if (buffer_write_io_error(bh)) {
1181 printk(KERN_ERR "JBD2: I/O error detected "
1182 "when updating journal superblock for %s.\n",
1183 journal->j_devname);
1184 clear_buffer_write_io_error(bh);
1185 set_buffer_uptodate(bh);
1186 }
1187 } else
1188 write_dirty_buffer(bh, WRITE);
1189
1190out:
1191 /* If we have just flushed the log (by marking s_start==0), then
1192 * any future commit will have to be careful to update the
1193 * superblock again to re-record the true start of the log. */
1194 1329
1330 /* Log is no longer empty */
1195 write_lock(&journal->j_state_lock); 1331 write_lock(&journal->j_state_lock);
1196 if (sb->s_start) 1332 journal->j_flags |= JBD2_FLUSHED;
1197 journal->j_flags &= ~JBD2_FLUSHED;
1198 else
1199 journal->j_flags |= JBD2_FLUSHED;
1200 write_unlock(&journal->j_state_lock); 1333 write_unlock(&journal->j_state_lock);
1201} 1334}
1202 1335
1336
1337/**
1338 * jbd2_journal_update_sb_errno() - Update error in the journal.
1339 * @journal: The journal to update.
1340 *
1341 * Update a journal's errno. Write updated superblock to disk waiting for IO
1342 * to complete.
1343 */
1344static void jbd2_journal_update_sb_errno(journal_t *journal)
1345{
1346 journal_superblock_t *sb = journal->j_superblock;
1347
1348 read_lock(&journal->j_state_lock);
1349 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
1350 journal->j_errno);
1351 sb->s_errno = cpu_to_be32(journal->j_errno);
1352 read_unlock(&journal->j_state_lock);
1353
1354 jbd2_write_superblock(journal, WRITE_SYNC);
1355}
1356
1203/* 1357/*
1204 * Read the superblock for a given journal, performing initial 1358 * Read the superblock for a given journal, performing initial
1205 * validation of the format. 1359 * validation of the format.
1206 */ 1360 */
1207
1208static int journal_get_superblock(journal_t *journal) 1361static int journal_get_superblock(journal_t *journal)
1209{ 1362{
1210 struct buffer_head *bh; 1363 struct buffer_head *bh;
@@ -1398,14 +1551,11 @@ int jbd2_journal_destroy(journal_t *journal)
1398 1551
1399 if (journal->j_sb_buffer) { 1552 if (journal->j_sb_buffer) {
1400 if (!is_journal_aborted(journal)) { 1553 if (!is_journal_aborted(journal)) {
1401 /* We can now mark the journal as empty. */ 1554 mutex_lock(&journal->j_checkpoint_mutex);
1402 journal->j_tail = 0; 1555 jbd2_mark_journal_empty(journal);
1403 journal->j_tail_sequence = 1556 mutex_unlock(&journal->j_checkpoint_mutex);
1404 ++journal->j_transaction_sequence; 1557 } else
1405 jbd2_journal_update_superblock(journal, 1);
1406 } else {
1407 err = -EIO; 1558 err = -EIO;
1408 }
1409 brelse(journal->j_sb_buffer); 1559 brelse(journal->j_sb_buffer);
1410 } 1560 }
1411 1561
@@ -1552,61 +1702,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
1552EXPORT_SYMBOL(jbd2_journal_clear_features); 1702EXPORT_SYMBOL(jbd2_journal_clear_features);
1553 1703
1554/** 1704/**
1555 * int jbd2_journal_update_format () - Update on-disk journal structure.
1556 * @journal: Journal to act on.
1557 *
1558 * Given an initialised but unloaded journal struct, poke about in the
1559 * on-disk structure to update it to the most recent supported version.
1560 */
1561int jbd2_journal_update_format (journal_t *journal)
1562{
1563 journal_superblock_t *sb;
1564 int err;
1565
1566 err = journal_get_superblock(journal);
1567 if (err)
1568 return err;
1569
1570 sb = journal->j_superblock;
1571
1572 switch (be32_to_cpu(sb->s_header.h_blocktype)) {
1573 case JBD2_SUPERBLOCK_V2:
1574 return 0;
1575 case JBD2_SUPERBLOCK_V1:
1576 return journal_convert_superblock_v1(journal, sb);
1577 default:
1578 break;
1579 }
1580 return -EINVAL;
1581}
1582
1583static int journal_convert_superblock_v1(journal_t *journal,
1584 journal_superblock_t *sb)
1585{
1586 int offset, blocksize;
1587 struct buffer_head *bh;
1588
1589 printk(KERN_WARNING
1590 "JBD2: Converting superblock from version 1 to 2.\n");
1591
1592 /* Pre-initialise new fields to zero */
1593 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
1594 blocksize = be32_to_cpu(sb->s_blocksize);
1595 memset(&sb->s_feature_compat, 0, blocksize-offset);
1596
1597 sb->s_nr_users = cpu_to_be32(1);
1598 sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
1599 journal->j_format_version = 2;
1600
1601 bh = journal->j_sb_buffer;
1602 BUFFER_TRACE(bh, "marking dirty");
1603 mark_buffer_dirty(bh);
1604 sync_dirty_buffer(bh);
1605 return 0;
1606}
1607
1608
1609/**
1610 * int jbd2_journal_flush () - Flush journal 1705 * int jbd2_journal_flush () - Flush journal
1611 * @journal: Journal to act on. 1706 * @journal: Journal to act on.
1612 * 1707 *
@@ -1619,7 +1714,6 @@ int jbd2_journal_flush(journal_t *journal)
1619{ 1714{
1620 int err = 0; 1715 int err = 0;
1621 transaction_t *transaction = NULL; 1716 transaction_t *transaction = NULL;
1622 unsigned long old_tail;
1623 1717
1624 write_lock(&journal->j_state_lock); 1718 write_lock(&journal->j_state_lock);
1625 1719
@@ -1654,6 +1748,7 @@ int jbd2_journal_flush(journal_t *journal)
1654 if (is_journal_aborted(journal)) 1748 if (is_journal_aborted(journal))
1655 return -EIO; 1749 return -EIO;
1656 1750
1751 mutex_lock(&journal->j_checkpoint_mutex);
1657 jbd2_cleanup_journal_tail(journal); 1752 jbd2_cleanup_journal_tail(journal);
1658 1753
1659 /* Finally, mark the journal as really needing no recovery. 1754 /* Finally, mark the journal as really needing no recovery.
@@ -1661,14 +1756,9 @@ int jbd2_journal_flush(journal_t *journal)
1661 * the magic code for a fully-recovered superblock. Any future 1756 * the magic code for a fully-recovered superblock. Any future
1662 * commits of data to the journal will restore the current 1757 * commits of data to the journal will restore the current
1663 * s_start value. */ 1758 * s_start value. */
1759 jbd2_mark_journal_empty(journal);
1760 mutex_unlock(&journal->j_checkpoint_mutex);
1664 write_lock(&journal->j_state_lock); 1761 write_lock(&journal->j_state_lock);
1665 old_tail = journal->j_tail;
1666 journal->j_tail = 0;
1667 write_unlock(&journal->j_state_lock);
1668 jbd2_journal_update_superblock(journal, 1);
1669 write_lock(&journal->j_state_lock);
1670 journal->j_tail = old_tail;
1671
1672 J_ASSERT(!journal->j_running_transaction); 1762 J_ASSERT(!journal->j_running_transaction);
1673 J_ASSERT(!journal->j_committing_transaction); 1763 J_ASSERT(!journal->j_committing_transaction);
1674 J_ASSERT(!journal->j_checkpoint_transactions); 1764 J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1708,8 +1798,12 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1708 write ? "Clearing" : "Ignoring"); 1798 write ? "Clearing" : "Ignoring");
1709 1799
1710 err = jbd2_journal_skip_recovery(journal); 1800 err = jbd2_journal_skip_recovery(journal);
1711 if (write) 1801 if (write) {
1712 jbd2_journal_update_superblock(journal, 1); 1802 /* Lock to make assertions happy... */
1803 mutex_lock(&journal->j_checkpoint_mutex);
1804 jbd2_mark_journal_empty(journal);
1805 mutex_unlock(&journal->j_checkpoint_mutex);
1806 }
1713 1807
1714 no_recovery: 1808 no_recovery:
1715 return err; 1809 return err;
@@ -1759,7 +1853,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1759 __jbd2_journal_abort_hard(journal); 1853 __jbd2_journal_abort_hard(journal);
1760 1854
1761 if (errno) 1855 if (errno)
1762 jbd2_journal_update_superblock(journal, 1); 1856 jbd2_journal_update_sb_errno(journal);
1763} 1857}
1764 1858
1765/** 1859/**
@@ -2017,7 +2111,7 @@ static struct kmem_cache *jbd2_journal_head_cache;
2017static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2111static atomic_t nr_journal_heads = ATOMIC_INIT(0);
2018#endif 2112#endif
2019 2113
2020static int journal_init_jbd2_journal_head_cache(void) 2114static int jbd2_journal_init_journal_head_cache(void)
2021{ 2115{
2022 int retval; 2116 int retval;
2023 2117
@@ -2035,7 +2129,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2035 return retval; 2129 return retval;
2036} 2130}
2037 2131
2038static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 2132static void jbd2_journal_destroy_journal_head_cache(void)
2039{ 2133{
2040 if (jbd2_journal_head_cache) { 2134 if (jbd2_journal_head_cache) {
2041 kmem_cache_destroy(jbd2_journal_head_cache); 2135 kmem_cache_destroy(jbd2_journal_head_cache);
@@ -2323,7 +2417,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2323 2417
2324struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2418struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2325 2419
2326static int __init journal_init_handle_cache(void) 2420static int __init jbd2_journal_init_handle_cache(void)
2327{ 2421{
2328 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2422 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2329 if (jbd2_handle_cache == NULL) { 2423 if (jbd2_handle_cache == NULL) {
@@ -2358,17 +2452,20 @@ static int __init journal_init_caches(void)
2358 2452
2359 ret = jbd2_journal_init_revoke_caches(); 2453 ret = jbd2_journal_init_revoke_caches();
2360 if (ret == 0) 2454 if (ret == 0)
2361 ret = journal_init_jbd2_journal_head_cache(); 2455 ret = jbd2_journal_init_journal_head_cache();
2456 if (ret == 0)
2457 ret = jbd2_journal_init_handle_cache();
2362 if (ret == 0) 2458 if (ret == 0)
2363 ret = journal_init_handle_cache(); 2459 ret = jbd2_journal_init_transaction_cache();
2364 return ret; 2460 return ret;
2365} 2461}
2366 2462
2367static void jbd2_journal_destroy_caches(void) 2463static void jbd2_journal_destroy_caches(void)
2368{ 2464{
2369 jbd2_journal_destroy_revoke_caches(); 2465 jbd2_journal_destroy_revoke_caches();
2370 jbd2_journal_destroy_jbd2_journal_head_cache(); 2466 jbd2_journal_destroy_journal_head_cache();
2371 jbd2_journal_destroy_handle_cache(); 2467 jbd2_journal_destroy_handle_cache();
2468 jbd2_journal_destroy_transaction_cache();
2372 jbd2_journal_destroy_slabs(); 2469 jbd2_journal_destroy_slabs();
2373} 2470}
2374 2471
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf1390..c1a03354a22f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/crc32.h> 23#include <linux/crc32.h>
24#include <linux/blkdev.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
265 err2 = sync_blockdev(journal->j_fs_dev); 266 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err) 267 if (!err)
267 err = err2; 268 err = err2;
268 269 /* Make sure all replayed data is on permanent storage */
270 if (journal->j_flags & JBD2_BARRIER)
271 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
269 return err; 272 return err;
270} 273}
271 274
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 30b2867d6cc9..6973705d6a3d 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
208 J_ASSERT(!jbd2_revoke_record_cache); 208 J_ASSERT(!jbd2_revoke_record_cache);
209 J_ASSERT(!jbd2_revoke_table_cache); 209 J_ASSERT(!jbd2_revoke_table_cache);
210 210
211 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 211 jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
212 sizeof(struct jbd2_revoke_record_s), 212 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
213 0,
214 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
215 NULL);
216 if (!jbd2_revoke_record_cache) 213 if (!jbd2_revoke_record_cache)
217 goto record_cache_failure; 214 goto record_cache_failure;
218 215
219 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 216 jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
220 sizeof(struct jbd2_revoke_table_s), 217 SLAB_TEMPORARY);
221 0, SLAB_TEMPORARY, NULL);
222 if (!jbd2_revoke_table_cache) 218 if (!jbd2_revoke_table_cache)
223 goto table_cache_failure; 219 goto table_cache_failure;
224 return 0; 220 return 0;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e5aba56e1fd5..ddcd3549c6c2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -33,6 +33,35 @@
33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
34static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 34static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
35 35
36static struct kmem_cache *transaction_cache;
37int __init jbd2_journal_init_transaction_cache(void)
38{
39 J_ASSERT(!transaction_cache);
40 transaction_cache = kmem_cache_create("jbd2_transaction_s",
41 sizeof(transaction_t),
42 0,
43 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
44 NULL);
45 if (transaction_cache)
46 return 0;
47 return -ENOMEM;
48}
49
50void jbd2_journal_destroy_transaction_cache(void)
51{
52 if (transaction_cache) {
53 kmem_cache_destroy(transaction_cache);
54 transaction_cache = NULL;
55 }
56}
57
58void jbd2_journal_free_transaction(transaction_t *transaction)
59{
60 if (unlikely(ZERO_OR_NULL_PTR(transaction)))
61 return;
62 kmem_cache_free(transaction_cache, transaction);
63}
64
36/* 65/*
37 * jbd2_get_transaction: obtain a new transaction_t object. 66 * jbd2_get_transaction: obtain a new transaction_t object.
38 * 67 *
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
133 162
134alloc_transaction: 163alloc_transaction:
135 if (!journal->j_running_transaction) { 164 if (!journal->j_running_transaction) {
136 new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 165 new_transaction = kmem_cache_alloc(transaction_cache,
166 gfp_mask | __GFP_ZERO);
137 if (!new_transaction) { 167 if (!new_transaction) {
138 /* 168 /*
139 * If __GFP_FS is not present, then we may be 169 * If __GFP_FS is not present, then we may be
@@ -162,7 +192,7 @@ repeat:
162 if (is_journal_aborted(journal) || 192 if (is_journal_aborted(journal) ||
163 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 193 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
164 read_unlock(&journal->j_state_lock); 194 read_unlock(&journal->j_state_lock);
165 kfree(new_transaction); 195 jbd2_journal_free_transaction(new_transaction);
166 return -EROFS; 196 return -EROFS;
167 } 197 }
168 198
@@ -284,7 +314,7 @@ repeat:
284 read_unlock(&journal->j_state_lock); 314 read_unlock(&journal->j_state_lock);
285 315
286 lock_map_acquire(&handle->h_lockdep_map); 316 lock_map_acquire(&handle->h_lockdep_map);
287 kfree(new_transaction); 317 jbd2_journal_free_transaction(new_transaction);
288 return 0; 318 return 0;
289} 319}
290 320
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1549 * of these pointers, it could go bad. Generally the caller needs to re-read 1579 * of these pointers, it could go bad. Generally the caller needs to re-read
1550 * the pointer from the transaction_t. 1580 * the pointer from the transaction_t.
1551 * 1581 *
1552 * Called under j_list_lock. The journal may not be locked. 1582 * Called under j_list_lock.
1553 */ 1583 */
1554void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1584static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1555{ 1585{
1556 struct journal_head **list = NULL; 1586 struct journal_head **list = NULL;
1557 transaction_t *transaction; 1587 transaction_t *transaction;
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1646 spin_lock(&journal->j_list_lock); 1676 spin_lock(&journal->j_list_lock);
1647 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1677 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1648 /* written-back checkpointed metadata buffer */ 1678 /* written-back checkpointed metadata buffer */
1649 if (jh->b_jlist == BJ_None) { 1679 JBUFFER_TRACE(jh, "remove from checkpoint list");
1650 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1680 __jbd2_journal_remove_checkpoint(jh);
1651 __jbd2_journal_remove_checkpoint(jh);
1652 }
1653 } 1681 }
1654 spin_unlock(&journal->j_list_lock); 1682 spin_unlock(&journal->j_list_lock);
1655out: 1683out:
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked:
1949 clear_buffer_mapped(bh); 1977 clear_buffer_mapped(bh);
1950 clear_buffer_req(bh); 1978 clear_buffer_req(bh);
1951 clear_buffer_new(bh); 1979 clear_buffer_new(bh);
1980 clear_buffer_delay(bh);
1981 clear_buffer_unwritten(bh);
1952 bh->b_bdev = NULL; 1982 bh->b_bdev = NULL;
1953 return may_free; 1983 return may_free;
1954} 1984}