diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 140 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 47 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 361 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 5 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 12 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 48 |
6 files changed, 343 insertions, 270 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index d49d202903fb..c78841ee81cf 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh) | |||
88 | * whole transaction. | 88 | * whole transaction. |
89 | * | 89 | * |
90 | * Requires j_list_lock | 90 | * Requires j_list_lock |
91 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
92 | */ | 91 | */ |
93 | static int __try_to_free_cp_buf(struct journal_head *jh) | 92 | static int __try_to_free_cp_buf(struct journal_head *jh) |
94 | { | 93 | { |
95 | int ret = 0; | 94 | int ret = 0; |
96 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
97 | 96 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | 99 | /* |
101 | * Get our reference so that bh cannot be freed before | 100 | * Get our reference so that bh cannot be freed before |
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
104 | get_bh(bh); | 103 | get_bh(bh); |
105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
107 | jbd_unlock_bh_state(bh); | ||
108 | BUFFER_TRACE(bh, "release"); | 106 | BUFFER_TRACE(bh, "release"); |
109 | __brelse(bh); | 107 | __brelse(bh); |
110 | } else { | ||
111 | jbd_unlock_bh_state(bh); | ||
112 | } | 108 | } |
113 | return ret; | 109 | return ret; |
114 | } | 110 | } |
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
180 | } | 176 | } |
181 | 177 | ||
182 | /* | 178 | /* |
183 | * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. | ||
184 | * The caller must restart a list walk. Wait for someone else to run | ||
185 | * jbd_unlock_bh_state(). | ||
186 | */ | ||
187 | static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | ||
188 | __releases(journal->j_list_lock) | ||
189 | { | ||
190 | get_bh(bh); | ||
191 | spin_unlock(&journal->j_list_lock); | ||
192 | jbd_lock_bh_state(bh); | ||
193 | jbd_unlock_bh_state(bh); | ||
194 | put_bh(bh); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Clean up transaction's list of buffers submitted for io. | 179 | * Clean up transaction's list of buffers submitted for io. |
199 | * We wait for any pending IO to complete and remove any clean | 180 | * We wait for any pending IO to complete and remove any clean |
200 | * buffers. Note that we take the buffers in the opposite ordering | 181 | * buffers. Note that we take the buffers in the opposite ordering |
@@ -222,15 +203,9 @@ restart: | |||
222 | while (!released && transaction->t_checkpoint_io_list) { | 203 | while (!released && transaction->t_checkpoint_io_list) { |
223 | jh = transaction->t_checkpoint_io_list; | 204 | jh = transaction->t_checkpoint_io_list; |
224 | bh = jh2bh(jh); | 205 | bh = jh2bh(jh); |
225 | if (!jbd_trylock_bh_state(bh)) { | ||
226 | jbd_sync_bh(journal, bh); | ||
227 | spin_lock(&journal->j_list_lock); | ||
228 | goto restart; | ||
229 | } | ||
230 | get_bh(bh); | 206 | get_bh(bh); |
231 | if (buffer_locked(bh)) { | 207 | if (buffer_locked(bh)) { |
232 | spin_unlock(&journal->j_list_lock); | 208 | spin_unlock(&journal->j_list_lock); |
233 | jbd_unlock_bh_state(bh); | ||
234 | wait_on_buffer(bh); | 209 | wait_on_buffer(bh); |
235 | /* the journal_head may have gone by now */ | 210 | /* the journal_head may have gone by now */ |
236 | BUFFER_TRACE(bh, "brelse"); | 211 | BUFFER_TRACE(bh, "brelse"); |
@@ -246,7 +221,6 @@ restart: | |||
246 | * it has been written out and so we can drop it from the list | 221 | * it has been written out and so we can drop it from the list |
247 | */ | 222 | */ |
248 | released = __jbd2_journal_remove_checkpoint(jh); | 223 | released = __jbd2_journal_remove_checkpoint(jh); |
249 | jbd_unlock_bh_state(bh); | ||
250 | __brelse(bh); | 224 | __brelse(bh); |
251 | } | 225 | } |
252 | 226 | ||
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
266 | 240 | ||
267 | for (i = 0; i < *batch_count; i++) { | 241 | for (i = 0; i < *batch_count; i++) { |
268 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 242 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
269 | clear_buffer_jwrite(bh); | ||
270 | BUFFER_TRACE(bh, "brelse"); | 243 | BUFFER_TRACE(bh, "brelse"); |
271 | __brelse(bh); | 244 | __brelse(bh); |
272 | } | 245 | } |
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
281 | * be written out. | 254 | * be written out. |
282 | * | 255 | * |
283 | * Called with j_list_lock held and drops it if 1 is returned | 256 | * Called with j_list_lock held and drops it if 1 is returned |
284 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
285 | */ | 257 | */ |
286 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 258 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
287 | int *batch_count, transaction_t *transaction) | 259 | int *batch_count, transaction_t *transaction) |
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
292 | if (buffer_locked(bh)) { | 264 | if (buffer_locked(bh)) { |
293 | get_bh(bh); | 265 | get_bh(bh); |
294 | spin_unlock(&journal->j_list_lock); | 266 | spin_unlock(&journal->j_list_lock); |
295 | jbd_unlock_bh_state(bh); | ||
296 | wait_on_buffer(bh); | 267 | wait_on_buffer(bh); |
297 | /* the journal_head may have gone by now */ | 268 | /* the journal_head may have gone by now */ |
298 | BUFFER_TRACE(bh, "brelse"); | 269 | BUFFER_TRACE(bh, "brelse"); |
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | 275 | ||
305 | transaction->t_chp_stats.cs_forced_to_close++; | 276 | transaction->t_chp_stats.cs_forced_to_close++; |
306 | spin_unlock(&journal->j_list_lock); | 277 | spin_unlock(&journal->j_list_lock); |
307 | jbd_unlock_bh_state(bh); | ||
308 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | 278 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
309 | /* | 279 | /* |
310 | * The journal thread is dead; so starting and | 280 | * The journal thread is dead; so starting and |
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
323 | if (unlikely(buffer_write_io_error(bh))) | 293 | if (unlikely(buffer_write_io_error(bh))) |
324 | ret = -EIO; | 294 | ret = -EIO; |
325 | get_bh(bh); | 295 | get_bh(bh); |
326 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | ||
327 | BUFFER_TRACE(bh, "remove from checkpoint"); | 296 | BUFFER_TRACE(bh, "remove from checkpoint"); |
328 | __jbd2_journal_remove_checkpoint(jh); | 297 | __jbd2_journal_remove_checkpoint(jh); |
329 | spin_unlock(&journal->j_list_lock); | 298 | spin_unlock(&journal->j_list_lock); |
330 | jbd_unlock_bh_state(bh); | ||
331 | __brelse(bh); | 299 | __brelse(bh); |
332 | } else { | 300 | } else { |
333 | /* | 301 | /* |
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
340 | BUFFER_TRACE(bh, "queue"); | 308 | BUFFER_TRACE(bh, "queue"); |
341 | get_bh(bh); | 309 | get_bh(bh); |
342 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 310 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
343 | set_buffer_jwrite(bh); | ||
344 | journal->j_chkpt_bhs[*batch_count] = bh; | 311 | journal->j_chkpt_bhs[*batch_count] = bh; |
345 | __buffer_relink_io(jh); | 312 | __buffer_relink_io(jh); |
346 | jbd_unlock_bh_state(bh); | ||
347 | transaction->t_chp_stats.cs_written++; | 313 | transaction->t_chp_stats.cs_written++; |
348 | (*batch_count)++; | 314 | (*batch_count)++; |
349 | if (*batch_count == JBD2_NR_BATCH) { | 315 | if (*batch_count == JBD2_NR_BATCH) { |
@@ -407,15 +373,7 @@ restart: | |||
407 | int retry = 0, err; | 373 | int retry = 0, err; |
408 | 374 | ||
409 | while (!retry && transaction->t_checkpoint_list) { | 375 | while (!retry && transaction->t_checkpoint_list) { |
410 | struct buffer_head *bh; | ||
411 | |||
412 | jh = transaction->t_checkpoint_list; | 376 | jh = transaction->t_checkpoint_list; |
413 | bh = jh2bh(jh); | ||
414 | if (!jbd_trylock_bh_state(bh)) { | ||
415 | jbd_sync_bh(journal, bh); | ||
416 | retry = 1; | ||
417 | break; | ||
418 | } | ||
419 | retry = __process_buffer(journal, jh, &batch_count, | 377 | retry = __process_buffer(journal, jh, &batch_count, |
420 | transaction); | 378 | transaction); |
421 | if (retry < 0 && !result) | 379 | if (retry < 0 && !result) |
@@ -478,79 +436,28 @@ out: | |||
478 | 436 | ||
479 | int jbd2_cleanup_journal_tail(journal_t *journal) | 437 | int jbd2_cleanup_journal_tail(journal_t *journal) |
480 | { | 438 | { |
481 | transaction_t * transaction; | ||
482 | tid_t first_tid; | 439 | tid_t first_tid; |
483 | unsigned long blocknr, freed; | 440 | unsigned long blocknr; |
484 | 441 | ||
485 | if (is_journal_aborted(journal)) | 442 | if (is_journal_aborted(journal)) |
486 | return 1; | 443 | return 1; |
487 | 444 | ||
488 | /* OK, work out the oldest transaction remaining in the log, and | 445 | if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) |
489 | * the log block it starts at. | ||
490 | * | ||
491 | * If the log is now empty, we need to work out which is the | ||
492 | * next transaction ID we will write, and where it will | ||
493 | * start. */ | ||
494 | |||
495 | write_lock(&journal->j_state_lock); | ||
496 | spin_lock(&journal->j_list_lock); | ||
497 | transaction = journal->j_checkpoint_transactions; | ||
498 | if (transaction) { | ||
499 | first_tid = transaction->t_tid; | ||
500 | blocknr = transaction->t_log_start; | ||
501 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
502 | first_tid = transaction->t_tid; | ||
503 | blocknr = transaction->t_log_start; | ||
504 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
505 | first_tid = transaction->t_tid; | ||
506 | blocknr = journal->j_head; | ||
507 | } else { | ||
508 | first_tid = journal->j_transaction_sequence; | ||
509 | blocknr = journal->j_head; | ||
510 | } | ||
511 | spin_unlock(&journal->j_list_lock); | ||
512 | J_ASSERT(blocknr != 0); | ||
513 | |||
514 | /* If the oldest pinned transaction is at the tail of the log | ||
515 | already then there's not much we can do right now. */ | ||
516 | if (journal->j_tail_sequence == first_tid) { | ||
517 | write_unlock(&journal->j_state_lock); | ||
518 | return 1; | 446 | return 1; |
519 | } | 447 | J_ASSERT(blocknr != 0); |
520 | |||
521 | /* OK, update the superblock to recover the freed space. | ||
522 | * Physical blocks come first: have we wrapped beyond the end of | ||
523 | * the log? */ | ||
524 | freed = blocknr - journal->j_tail; | ||
525 | if (blocknr < journal->j_tail) | ||
526 | freed = freed + journal->j_last - journal->j_first; | ||
527 | |||
528 | trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
529 | jbd_debug(1, | ||
530 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
531 | "freeing %lu\n", | ||
532 | journal->j_tail_sequence, first_tid, blocknr, freed); | ||
533 | |||
534 | journal->j_free += freed; | ||
535 | journal->j_tail_sequence = first_tid; | ||
536 | journal->j_tail = blocknr; | ||
537 | write_unlock(&journal->j_state_lock); | ||
538 | 448 | ||
539 | /* | 449 | /* |
540 | * If there is an external journal, we need to make sure that | 450 | * We need to make sure that any blocks that were recently written out |
541 | * any data blocks that were recently written out --- perhaps | 451 | * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before |
542 | * by jbd2_log_do_checkpoint() --- are flushed out before we | 452 | * we drop the transactions from the journal. It's unlikely this will |
543 | * drop the transactions from the external journal. It's | 453 | * be necessary, especially with an appropriately sized journal, but we |
544 | * unlikely this will be necessary, especially with a | 454 | * need this to guarantee correctness. Fortunately |
545 | * appropriately sized journal, but we need this to guarantee | 455 | * jbd2_cleanup_journal_tail() doesn't get called all that often. |
546 | * correctness. Fortunately jbd2_cleanup_journal_tail() | ||
547 | * doesn't get called all that often. | ||
548 | */ | 456 | */ |
549 | if ((journal->j_fs_dev != journal->j_dev) && | 457 | if (journal->j_flags & JBD2_BARRIER) |
550 | (journal->j_flags & JBD2_BARRIER)) | ||
551 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 458 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
552 | if (!(journal->j_flags & JBD2_ABORT)) | 459 | |
553 | jbd2_journal_update_superblock(journal, 1); | 460 | __jbd2_update_log_tail(journal, first_tid, blocknr); |
554 | return 0; | 461 | return 0; |
555 | } | 462 | } |
556 | 463 | ||
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
582 | do { | 489 | do { |
583 | jh = next_jh; | 490 | jh = next_jh; |
584 | next_jh = jh->b_cpnext; | 491 | next_jh = jh->b_cpnext; |
585 | /* Use trylock because of the ranking */ | 492 | ret = __try_to_free_cp_buf(jh); |
586 | if (jbd_trylock_bh_state(jh2bh(jh))) { | 493 | if (ret) { |
587 | ret = __try_to_free_cp_buf(jh); | 494 | freed++; |
588 | if (ret) { | 495 | if (ret == 2) { |
589 | freed++; | 496 | *released = 1; |
590 | if (ret == 2) { | 497 | return freed; |
591 | *released = 1; | ||
592 | return freed; | ||
593 | } | ||
594 | } | 498 | } |
595 | } | 499 | } |
596 | /* | 500 | /* |
@@ -673,9 +577,7 @@ out: | |||
673 | * The function can free jh and bh. | 577 | * The function can free jh and bh. |
674 | * | 578 | * |
675 | * This function is called with j_list_lock held. | 579 | * This function is called with j_list_lock held. |
676 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | ||
677 | */ | 580 | */ |
678 | |||
679 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | 581 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) |
680 | { | 582 | { |
681 | struct transaction_chp_stats_s *stats; | 583 | struct transaction_chp_stats_s *stats; |
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
722 | transaction->t_tid, stats); | 624 | transaction->t_tid, stats); |
723 | 625 | ||
724 | __jbd2_journal_drop_transaction(journal, transaction); | 626 | __jbd2_journal_drop_transaction(journal, transaction); |
725 | kfree(transaction); | 627 | jbd2_journal_free_transaction(transaction); |
726 | 628 | ||
727 | /* Just in case anybody was waiting for more transactions to be | 629 | /* Just in case anybody was waiting for more transactions to be |
728 | checkpointed... */ | 630 | checkpointed... */ |
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
797 | J_ASSERT(journal->j_committing_transaction != transaction); | 699 | J_ASSERT(journal->j_committing_transaction != transaction); |
798 | J_ASSERT(journal->j_running_transaction != transaction); | 700 | J_ASSERT(journal->j_running_transaction != transaction); |
799 | 701 | ||
702 | trace_jbd2_drop_transaction(journal, transaction); | ||
703 | |||
800 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 704 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
801 | } | 705 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c067a8cae63b..17f557f01cf0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
331 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 331 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
332 | __u32 crc32_sum = ~0; | 332 | __u32 crc32_sum = ~0; |
333 | struct blk_plug plug; | 333 | struct blk_plug plug; |
334 | /* Tail of the journal */ | ||
335 | unsigned long first_block; | ||
336 | tid_t first_tid; | ||
337 | int update_tail; | ||
334 | 338 | ||
335 | /* | 339 | /* |
336 | * First job: lock down the current transaction and wait for | 340 | * First job: lock down the current transaction and wait for |
@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
340 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ | 344 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ |
341 | if (journal->j_flags & JBD2_FLUSHED) { | 345 | if (journal->j_flags & JBD2_FLUSHED) { |
342 | jbd_debug(3, "super block updated\n"); | 346 | jbd_debug(3, "super block updated\n"); |
343 | jbd2_journal_update_superblock(journal, 1); | 347 | mutex_lock(&journal->j_checkpoint_mutex); |
348 | /* | ||
349 | * We hold j_checkpoint_mutex so tail cannot change under us. | ||
350 | * We don't need any special data guarantees for writing sb | ||
351 | * since journal is empty and it is ok for write to be | ||
352 | * flushed only with transaction commit. | ||
353 | */ | ||
354 | jbd2_journal_update_sb_log_tail(journal, | ||
355 | journal->j_tail_sequence, | ||
356 | journal->j_tail, | ||
357 | WRITE_SYNC); | ||
358 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
344 | } else { | 359 | } else { |
345 | jbd_debug(3, "superblock not updated\n"); | 360 | jbd_debug(3, "superblock not updated\n"); |
346 | } | 361 | } |
@@ -677,10 +692,30 @@ start_journal_io: | |||
677 | err = 0; | 692 | err = 0; |
678 | } | 693 | } |
679 | 694 | ||
695 | /* | ||
696 | * Get current oldest transaction in the log before we issue flush | ||
697 | * to the filesystem device. After the flush we can be sure that | ||
698 | * blocks of all older transactions are checkpointed to persistent | ||
699 | * storage and we will be safe to update journal start in the | ||
700 | * superblock with the numbers we get here. | ||
701 | */ | ||
702 | update_tail = | ||
703 | jbd2_journal_get_log_tail(journal, &first_tid, &first_block); | ||
704 | |||
680 | write_lock(&journal->j_state_lock); | 705 | write_lock(&journal->j_state_lock); |
706 | if (update_tail) { | ||
707 | long freed = first_block - journal->j_tail; | ||
708 | |||
709 | if (first_block < journal->j_tail) | ||
710 | freed += journal->j_last - journal->j_first; | ||
711 | /* Update tail only if we free significant amount of space */ | ||
712 | if (freed < journal->j_maxlen / 4) | ||
713 | update_tail = 0; | ||
714 | } | ||
681 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 715 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
682 | commit_transaction->t_state = T_COMMIT_DFLUSH; | 716 | commit_transaction->t_state = T_COMMIT_DFLUSH; |
683 | write_unlock(&journal->j_state_lock); | 717 | write_unlock(&journal->j_state_lock); |
718 | |||
684 | /* | 719 | /* |
685 | * If the journal is not located on the file system device, | 720 | * If the journal is not located on the file system device, |
686 | * then we must flush the file system device before we issue | 721 | * then we must flush the file system device before we issue |
@@ -831,6 +866,14 @@ wait_for_iobuf: | |||
831 | if (err) | 866 | if (err) |
832 | jbd2_journal_abort(journal, err); | 867 | jbd2_journal_abort(journal, err); |
833 | 868 | ||
869 | /* | ||
870 | * Now disk caches for filesystem device are flushed so we are safe to | ||
871 | * erase checkpointed transactions from the log by updating journal | ||
872 | * superblock. | ||
873 | */ | ||
874 | if (update_tail) | ||
875 | jbd2_update_log_tail(journal, first_tid, first_block); | ||
876 | |||
834 | /* End of a transaction! Finally, we can do checkpoint | 877 | /* End of a transaction! Finally, we can do checkpoint |
835 | processing: any buffers committed as a result of this | 878 | processing: any buffers committed as a result of this |
836 | transaction can be removed from any checkpoint list it was on | 879 | transaction can be removed from any checkpoint list it was on |
@@ -1048,7 +1091,7 @@ restart_loop: | |||
1048 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", | 1091 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
1049 | journal->j_commit_sequence, journal->j_tail_sequence); | 1092 | journal->j_commit_sequence, journal->j_tail_sequence); |
1050 | if (to_free) | 1093 | if (to_free) |
1051 | kfree(commit_transaction); | 1094 | jbd2_journal_free_transaction(commit_transaction); |
1052 | 1095 | ||
1053 | wake_up(&journal->j_wait_done_commit); | 1096 | wake_up(&journal->j_wait_done_commit); |
1054 | } | 1097 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 839377e3d624..98ed6dbfe381 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -71,7 +71,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke); | |||
71 | 71 | ||
72 | EXPORT_SYMBOL(jbd2_journal_init_dev); | 72 | EXPORT_SYMBOL(jbd2_journal_init_dev); |
73 | EXPORT_SYMBOL(jbd2_journal_init_inode); | 73 | EXPORT_SYMBOL(jbd2_journal_init_inode); |
74 | EXPORT_SYMBOL(jbd2_journal_update_format); | ||
75 | EXPORT_SYMBOL(jbd2_journal_check_used_features); | 74 | EXPORT_SYMBOL(jbd2_journal_check_used_features); |
76 | EXPORT_SYMBOL(jbd2_journal_check_available_features); | 75 | EXPORT_SYMBOL(jbd2_journal_check_available_features); |
77 | EXPORT_SYMBOL(jbd2_journal_set_features); | 76 | EXPORT_SYMBOL(jbd2_journal_set_features); |
@@ -96,7 +95,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | |||
96 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | 95 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); |
97 | EXPORT_SYMBOL(jbd2_inode_cache); | 96 | EXPORT_SYMBOL(jbd2_inode_cache); |
98 | 97 | ||
99 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | ||
100 | static void __journal_abort_soft (journal_t *journal, int errno); | 98 | static void __journal_abort_soft (journal_t *journal, int errno); |
101 | static int jbd2_journal_create_slab(size_t slab_size); | 99 | static int jbd2_journal_create_slab(size_t slab_size); |
102 | 100 | ||
@@ -746,6 +744,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
746 | return jbd2_journal_add_journal_head(bh); | 744 | return jbd2_journal_add_journal_head(bh); |
747 | } | 745 | } |
748 | 746 | ||
747 | /* | ||
748 | * Return tid of the oldest transaction in the journal and block in the journal | ||
749 | * where the transaction starts. | ||
750 | * | ||
751 | * If the journal is now empty, return which will be the next transaction ID | ||
752 | * we will write and where will that transaction start. | ||
753 | * | ||
754 | * The return value is 0 if journal tail cannot be pushed any further, 1 if | ||
755 | * it can. | ||
756 | */ | ||
757 | int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, | ||
758 | unsigned long *block) | ||
759 | { | ||
760 | transaction_t *transaction; | ||
761 | int ret; | ||
762 | |||
763 | read_lock(&journal->j_state_lock); | ||
764 | spin_lock(&journal->j_list_lock); | ||
765 | transaction = journal->j_checkpoint_transactions; | ||
766 | if (transaction) { | ||
767 | *tid = transaction->t_tid; | ||
768 | *block = transaction->t_log_start; | ||
769 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
770 | *tid = transaction->t_tid; | ||
771 | *block = transaction->t_log_start; | ||
772 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
773 | *tid = transaction->t_tid; | ||
774 | *block = journal->j_head; | ||
775 | } else { | ||
776 | *tid = journal->j_transaction_sequence; | ||
777 | *block = journal->j_head; | ||
778 | } | ||
779 | ret = tid_gt(*tid, journal->j_tail_sequence); | ||
780 | spin_unlock(&journal->j_list_lock); | ||
781 | read_unlock(&journal->j_state_lock); | ||
782 | |||
783 | return ret; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Update information in journal structure and in on disk journal superblock | ||
788 | * about log tail. This function does not check whether information passed in | ||
789 | * really pushes log tail further. It's responsibility of the caller to make | ||
790 | * sure provided log tail information is valid (e.g. by holding | ||
791 | * j_checkpoint_mutex all the time between computing log tail and calling this | ||
792 | * function as is the case with jbd2_cleanup_journal_tail()). | ||
793 | * | ||
794 | * Requires j_checkpoint_mutex | ||
795 | */ | ||
796 | void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
797 | { | ||
798 | unsigned long freed; | ||
799 | |||
800 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
801 | |||
802 | /* | ||
803 | * We cannot afford for write to remain in drive's caches since as | ||
804 | * soon as we update j_tail, next transaction can start reusing journal | ||
805 | * space and if we lose sb update during power failure we'd replay | ||
806 | * old transaction with possibly newly overwritten data. | ||
807 | */ | ||
808 | jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); | ||
809 | write_lock(&journal->j_state_lock); | ||
810 | freed = block - journal->j_tail; | ||
811 | if (block < journal->j_tail) | ||
812 | freed += journal->j_last - journal->j_first; | ||
813 | |||
814 | trace_jbd2_update_log_tail(journal, tid, block, freed); | ||
815 | jbd_debug(1, | ||
816 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
817 | "freeing %lu\n", | ||
818 | journal->j_tail_sequence, tid, block, freed); | ||
819 | |||
820 | journal->j_free += freed; | ||
821 | journal->j_tail_sequence = tid; | ||
822 | journal->j_tail = block; | ||
823 | write_unlock(&journal->j_state_lock); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * This is a variaon of __jbd2_update_log_tail which checks for validity of | ||
828 | * provided log tail and locks j_checkpoint_mutex. So it is safe against races | ||
829 | * with other threads updating log tail. | ||
830 | */ | ||
831 | void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
832 | { | ||
833 | mutex_lock(&journal->j_checkpoint_mutex); | ||
834 | if (tid_gt(tid, journal->j_tail_sequence)) | ||
835 | __jbd2_update_log_tail(journal, tid, block); | ||
836 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
837 | } | ||
838 | |||
749 | struct jbd2_stats_proc_session { | 839 | struct jbd2_stats_proc_session { |
750 | journal_t *journal; | 840 | journal_t *journal; |
751 | struct transaction_stats_s *stats; | 841 | struct transaction_stats_s *stats; |
@@ -1114,40 +1204,45 @@ static int journal_reset(journal_t *journal) | |||
1114 | 1204 | ||
1115 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; | 1205 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; |
1116 | 1206 | ||
1117 | /* Add the dynamic fields and write it to disk. */ | ||
1118 | jbd2_journal_update_superblock(journal, 1); | ||
1119 | return jbd2_journal_start_thread(journal); | ||
1120 | } | ||
1121 | |||
1122 | /** | ||
1123 | * void jbd2_journal_update_superblock() - Update journal sb on disk. | ||
1124 | * @journal: The journal to update. | ||
1125 | * @wait: Set to '0' if you don't want to wait for IO completion. | ||
1126 | * | ||
1127 | * Update a journal's dynamic superblock fields and write it to disk, | ||
1128 | * optionally waiting for the IO to complete. | ||
1129 | */ | ||
1130 | void jbd2_journal_update_superblock(journal_t *journal, int wait) | ||
1131 | { | ||
1132 | journal_superblock_t *sb = journal->j_superblock; | ||
1133 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1134 | |||
1135 | /* | 1207 | /* |
1136 | * As a special case, if the on-disk copy is already marked as needing | 1208 | * As a special case, if the on-disk copy is already marked as needing |
1137 | * no recovery (s_start == 0) and there are no outstanding transactions | 1209 | * no recovery (s_start == 0), then we can safely defer the superblock |
1138 | * in the filesystem, then we can safely defer the superblock update | 1210 | * update until the next commit by setting JBD2_FLUSHED. This avoids |
1139 | * until the next commit by setting JBD2_FLUSHED. This avoids | ||
1140 | * attempting a write to a potential-readonly device. | 1211 | * attempting a write to a potential-readonly device. |
1141 | */ | 1212 | */ |
1142 | if (sb->s_start == 0 && journal->j_tail_sequence == | 1213 | if (sb->s_start == 0) { |
1143 | journal->j_transaction_sequence) { | ||
1144 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " | 1214 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " |
1145 | "(start %ld, seq %d, errno %d)\n", | 1215 | "(start %ld, seq %d, errno %d)\n", |
1146 | journal->j_tail, journal->j_tail_sequence, | 1216 | journal->j_tail, journal->j_tail_sequence, |
1147 | journal->j_errno); | 1217 | journal->j_errno); |
1148 | goto out; | 1218 | journal->j_flags |= JBD2_FLUSHED; |
1219 | } else { | ||
1220 | /* Lock here to make assertions happy... */ | ||
1221 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1222 | /* | ||
1223 | * Update log tail information. We use WRITE_FUA since new | ||
1224 | * transaction will start reusing journal space and so we | ||
1225 | * must make sure information about current log tail is on | ||
1226 | * disk before that. | ||
1227 | */ | ||
1228 | jbd2_journal_update_sb_log_tail(journal, | ||
1229 | journal->j_tail_sequence, | ||
1230 | journal->j_tail, | ||
1231 | WRITE_FUA); | ||
1232 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1149 | } | 1233 | } |
1234 | return jbd2_journal_start_thread(journal); | ||
1235 | } | ||
1150 | 1236 | ||
1237 | static void jbd2_write_superblock(journal_t *journal, int write_op) | ||
1238 | { | ||
1239 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1240 | int ret; | ||
1241 | |||
1242 | trace_jbd2_write_superblock(journal, write_op); | ||
1243 | if (!(journal->j_flags & JBD2_BARRIER)) | ||
1244 | write_op &= ~(REQ_FUA | REQ_FLUSH); | ||
1245 | lock_buffer(bh); | ||
1151 | if (buffer_write_io_error(bh)) { | 1246 | if (buffer_write_io_error(bh)) { |
1152 | /* | 1247 | /* |
1153 | * Oh, dear. A previous attempt to write the journal | 1248 | * Oh, dear. A previous attempt to write the journal |
@@ -1163,48 +1258,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1163 | clear_buffer_write_io_error(bh); | 1258 | clear_buffer_write_io_error(bh); |
1164 | set_buffer_uptodate(bh); | 1259 | set_buffer_uptodate(bh); |
1165 | } | 1260 | } |
1261 | get_bh(bh); | ||
1262 | bh->b_end_io = end_buffer_write_sync; | ||
1263 | ret = submit_bh(write_op, bh); | ||
1264 | wait_on_buffer(bh); | ||
1265 | if (buffer_write_io_error(bh)) { | ||
1266 | clear_buffer_write_io_error(bh); | ||
1267 | set_buffer_uptodate(bh); | ||
1268 | ret = -EIO; | ||
1269 | } | ||
1270 | if (ret) { | ||
1271 | printk(KERN_ERR "JBD2: Error %d detected when updating " | ||
1272 | "journal superblock for %s.\n", ret, | ||
1273 | journal->j_devname); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | /** | ||
1278 | * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. | ||
1279 | * @journal: The journal to update. | ||
1280 | * @tail_tid: TID of the new transaction at the tail of the log | ||
1281 | * @tail_block: The first block of the transaction at the tail of the log | ||
1282 | * @write_op: With which operation should we write the journal sb | ||
1283 | * | ||
1284 | * Update a journal's superblock information about log tail and write it to | ||
1285 | * disk, waiting for the IO to complete. | ||
1286 | */ | ||
1287 | void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, | ||
1288 | unsigned long tail_block, int write_op) | ||
1289 | { | ||
1290 | journal_superblock_t *sb = journal->j_superblock; | ||
1291 | |||
1292 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1293 | jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", | ||
1294 | tail_block, tail_tid); | ||
1295 | |||
1296 | sb->s_sequence = cpu_to_be32(tail_tid); | ||
1297 | sb->s_start = cpu_to_be32(tail_block); | ||
1298 | |||
1299 | jbd2_write_superblock(journal, write_op); | ||
1300 | |||
1301 | /* Log is no longer empty */ | ||
1302 | write_lock(&journal->j_state_lock); | ||
1303 | WARN_ON(!sb->s_sequence); | ||
1304 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1305 | write_unlock(&journal->j_state_lock); | ||
1306 | } | ||
1307 | |||
1308 | /** | ||
1309 | * jbd2_mark_journal_empty() - Mark on disk journal as empty. | ||
1310 | * @journal: The journal to update. | ||
1311 | * | ||
1312 | * Update a journal's dynamic superblock fields to show that journal is empty. | ||
1313 | * Write updated superblock to disk waiting for IO to complete. | ||
1314 | */ | ||
1315 | static void jbd2_mark_journal_empty(journal_t *journal) | ||
1316 | { | ||
1317 | journal_superblock_t *sb = journal->j_superblock; | ||
1166 | 1318 | ||
1319 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1167 | read_lock(&journal->j_state_lock); | 1320 | read_lock(&journal->j_state_lock); |
1168 | jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", | 1321 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", |
1169 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1322 | journal->j_tail_sequence); |
1170 | 1323 | ||
1171 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1324 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
1172 | sb->s_start = cpu_to_be32(journal->j_tail); | 1325 | sb->s_start = cpu_to_be32(0); |
1173 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1174 | read_unlock(&journal->j_state_lock); | 1326 | read_unlock(&journal->j_state_lock); |
1175 | 1327 | ||
1176 | BUFFER_TRACE(bh, "marking dirty"); | 1328 | jbd2_write_superblock(journal, WRITE_FUA); |
1177 | mark_buffer_dirty(bh); | ||
1178 | if (wait) { | ||
1179 | sync_dirty_buffer(bh); | ||
1180 | if (buffer_write_io_error(bh)) { | ||
1181 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1182 | "when updating journal superblock for %s.\n", | ||
1183 | journal->j_devname); | ||
1184 | clear_buffer_write_io_error(bh); | ||
1185 | set_buffer_uptodate(bh); | ||
1186 | } | ||
1187 | } else | ||
1188 | write_dirty_buffer(bh, WRITE); | ||
1189 | |||
1190 | out: | ||
1191 | /* If we have just flushed the log (by marking s_start==0), then | ||
1192 | * any future commit will have to be careful to update the | ||
1193 | * superblock again to re-record the true start of the log. */ | ||
1194 | 1329 | ||
1330 | /* Log is no longer empty */ | ||
1195 | write_lock(&journal->j_state_lock); | 1331 | write_lock(&journal->j_state_lock); |
1196 | if (sb->s_start) | 1332 | journal->j_flags |= JBD2_FLUSHED; |
1197 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1198 | else | ||
1199 | journal->j_flags |= JBD2_FLUSHED; | ||
1200 | write_unlock(&journal->j_state_lock); | 1333 | write_unlock(&journal->j_state_lock); |
1201 | } | 1334 | } |
1202 | 1335 | ||
1336 | |||
1337 | /** | ||
1338 | * jbd2_journal_update_sb_errno() - Update error in the journal. | ||
1339 | * @journal: The journal to update. | ||
1340 | * | ||
1341 | * Update a journal's errno. Write updated superblock to disk waiting for IO | ||
1342 | * to complete. | ||
1343 | */ | ||
1344 | static void jbd2_journal_update_sb_errno(journal_t *journal) | ||
1345 | { | ||
1346 | journal_superblock_t *sb = journal->j_superblock; | ||
1347 | |||
1348 | read_lock(&journal->j_state_lock); | ||
1349 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | ||
1350 | journal->j_errno); | ||
1351 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1352 | read_unlock(&journal->j_state_lock); | ||
1353 | |||
1354 | jbd2_write_superblock(journal, WRITE_SYNC); | ||
1355 | } | ||
1356 | |||
1203 | /* | 1357 | /* |
1204 | * Read the superblock for a given journal, performing initial | 1358 | * Read the superblock for a given journal, performing initial |
1205 | * validation of the format. | 1359 | * validation of the format. |
1206 | */ | 1360 | */ |
1207 | |||
1208 | static int journal_get_superblock(journal_t *journal) | 1361 | static int journal_get_superblock(journal_t *journal) |
1209 | { | 1362 | { |
1210 | struct buffer_head *bh; | 1363 | struct buffer_head *bh; |
@@ -1398,14 +1551,11 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1398 | 1551 | ||
1399 | if (journal->j_sb_buffer) { | 1552 | if (journal->j_sb_buffer) { |
1400 | if (!is_journal_aborted(journal)) { | 1553 | if (!is_journal_aborted(journal)) { |
1401 | /* We can now mark the journal as empty. */ | 1554 | mutex_lock(&journal->j_checkpoint_mutex); |
1402 | journal->j_tail = 0; | 1555 | jbd2_mark_journal_empty(journal); |
1403 | journal->j_tail_sequence = | 1556 | mutex_unlock(&journal->j_checkpoint_mutex); |
1404 | ++journal->j_transaction_sequence; | 1557 | } else |
1405 | jbd2_journal_update_superblock(journal, 1); | ||
1406 | } else { | ||
1407 | err = -EIO; | 1558 | err = -EIO; |
1408 | } | ||
1409 | brelse(journal->j_sb_buffer); | 1559 | brelse(journal->j_sb_buffer); |
1410 | } | 1560 | } |
1411 | 1561 | ||
@@ -1552,61 +1702,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, | |||
1552 | EXPORT_SYMBOL(jbd2_journal_clear_features); | 1702 | EXPORT_SYMBOL(jbd2_journal_clear_features); |
1553 | 1703 | ||
1554 | /** | 1704 | /** |
1555 | * int jbd2_journal_update_format () - Update on-disk journal structure. | ||
1556 | * @journal: Journal to act on. | ||
1557 | * | ||
1558 | * Given an initialised but unloaded journal struct, poke about in the | ||
1559 | * on-disk structure to update it to the most recent supported version. | ||
1560 | */ | ||
1561 | int jbd2_journal_update_format (journal_t *journal) | ||
1562 | { | ||
1563 | journal_superblock_t *sb; | ||
1564 | int err; | ||
1565 | |||
1566 | err = journal_get_superblock(journal); | ||
1567 | if (err) | ||
1568 | return err; | ||
1569 | |||
1570 | sb = journal->j_superblock; | ||
1571 | |||
1572 | switch (be32_to_cpu(sb->s_header.h_blocktype)) { | ||
1573 | case JBD2_SUPERBLOCK_V2: | ||
1574 | return 0; | ||
1575 | case JBD2_SUPERBLOCK_V1: | ||
1576 | return journal_convert_superblock_v1(journal, sb); | ||
1577 | default: | ||
1578 | break; | ||
1579 | } | ||
1580 | return -EINVAL; | ||
1581 | } | ||
1582 | |||
1583 | static int journal_convert_superblock_v1(journal_t *journal, | ||
1584 | journal_superblock_t *sb) | ||
1585 | { | ||
1586 | int offset, blocksize; | ||
1587 | struct buffer_head *bh; | ||
1588 | |||
1589 | printk(KERN_WARNING | ||
1590 | "JBD2: Converting superblock from version 1 to 2.\n"); | ||
1591 | |||
1592 | /* Pre-initialise new fields to zero */ | ||
1593 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); | ||
1594 | blocksize = be32_to_cpu(sb->s_blocksize); | ||
1595 | memset(&sb->s_feature_compat, 0, blocksize-offset); | ||
1596 | |||
1597 | sb->s_nr_users = cpu_to_be32(1); | ||
1598 | sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); | ||
1599 | journal->j_format_version = 2; | ||
1600 | |||
1601 | bh = journal->j_sb_buffer; | ||
1602 | BUFFER_TRACE(bh, "marking dirty"); | ||
1603 | mark_buffer_dirty(bh); | ||
1604 | sync_dirty_buffer(bh); | ||
1605 | return 0; | ||
1606 | } | ||
1607 | |||
1608 | |||
1609 | /** | ||
1610 | * int jbd2_journal_flush () - Flush journal | 1705 | * int jbd2_journal_flush () - Flush journal |
1611 | * @journal: Journal to act on. | 1706 | * @journal: Journal to act on. |
1612 | * | 1707 | * |
@@ -1619,7 +1714,6 @@ int jbd2_journal_flush(journal_t *journal) | |||
1619 | { | 1714 | { |
1620 | int err = 0; | 1715 | int err = 0; |
1621 | transaction_t *transaction = NULL; | 1716 | transaction_t *transaction = NULL; |
1622 | unsigned long old_tail; | ||
1623 | 1717 | ||
1624 | write_lock(&journal->j_state_lock); | 1718 | write_lock(&journal->j_state_lock); |
1625 | 1719 | ||
@@ -1654,6 +1748,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1654 | if (is_journal_aborted(journal)) | 1748 | if (is_journal_aborted(journal)) |
1655 | return -EIO; | 1749 | return -EIO; |
1656 | 1750 | ||
1751 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1657 | jbd2_cleanup_journal_tail(journal); | 1752 | jbd2_cleanup_journal_tail(journal); |
1658 | 1753 | ||
1659 | /* Finally, mark the journal as really needing no recovery. | 1754 | /* Finally, mark the journal as really needing no recovery. |
@@ -1661,14 +1756,9 @@ int jbd2_journal_flush(journal_t *journal) | |||
1661 | * the magic code for a fully-recovered superblock. Any future | 1756 | * the magic code for a fully-recovered superblock. Any future |
1662 | * commits of data to the journal will restore the current | 1757 | * commits of data to the journal will restore the current |
1663 | * s_start value. */ | 1758 | * s_start value. */ |
1759 | jbd2_mark_journal_empty(journal); | ||
1760 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1664 | write_lock(&journal->j_state_lock); | 1761 | write_lock(&journal->j_state_lock); |
1665 | old_tail = journal->j_tail; | ||
1666 | journal->j_tail = 0; | ||
1667 | write_unlock(&journal->j_state_lock); | ||
1668 | jbd2_journal_update_superblock(journal, 1); | ||
1669 | write_lock(&journal->j_state_lock); | ||
1670 | journal->j_tail = old_tail; | ||
1671 | |||
1672 | J_ASSERT(!journal->j_running_transaction); | 1762 | J_ASSERT(!journal->j_running_transaction); |
1673 | J_ASSERT(!journal->j_committing_transaction); | 1763 | J_ASSERT(!journal->j_committing_transaction); |
1674 | J_ASSERT(!journal->j_checkpoint_transactions); | 1764 | J_ASSERT(!journal->j_checkpoint_transactions); |
@@ -1708,8 +1798,12 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1708 | write ? "Clearing" : "Ignoring"); | 1798 | write ? "Clearing" : "Ignoring"); |
1709 | 1799 | ||
1710 | err = jbd2_journal_skip_recovery(journal); | 1800 | err = jbd2_journal_skip_recovery(journal); |
1711 | if (write) | 1801 | if (write) { |
1712 | jbd2_journal_update_superblock(journal, 1); | 1802 | /* Lock to make assertions happy... */ |
1803 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1804 | jbd2_mark_journal_empty(journal); | ||
1805 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1806 | } | ||
1713 | 1807 | ||
1714 | no_recovery: | 1808 | no_recovery: |
1715 | return err; | 1809 | return err; |
@@ -1759,7 +1853,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) | |||
1759 | __jbd2_journal_abort_hard(journal); | 1853 | __jbd2_journal_abort_hard(journal); |
1760 | 1854 | ||
1761 | if (errno) | 1855 | if (errno) |
1762 | jbd2_journal_update_superblock(journal, 1); | 1856 | jbd2_journal_update_sb_errno(journal); |
1763 | } | 1857 | } |
1764 | 1858 | ||
1765 | /** | 1859 | /** |
@@ -2017,7 +2111,7 @@ static struct kmem_cache *jbd2_journal_head_cache; | |||
2017 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); | 2111 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); |
2018 | #endif | 2112 | #endif |
2019 | 2113 | ||
2020 | static int journal_init_jbd2_journal_head_cache(void) | 2114 | static int jbd2_journal_init_journal_head_cache(void) |
2021 | { | 2115 | { |
2022 | int retval; | 2116 | int retval; |
2023 | 2117 | ||
@@ -2035,7 +2129,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
2035 | return retval; | 2129 | return retval; |
2036 | } | 2130 | } |
2037 | 2131 | ||
2038 | static void jbd2_journal_destroy_jbd2_journal_head_cache(void) | 2132 | static void jbd2_journal_destroy_journal_head_cache(void) |
2039 | { | 2133 | { |
2040 | if (jbd2_journal_head_cache) { | 2134 | if (jbd2_journal_head_cache) { |
2041 | kmem_cache_destroy(jbd2_journal_head_cache); | 2135 | kmem_cache_destroy(jbd2_journal_head_cache); |
@@ -2323,7 +2417,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) | |||
2323 | 2417 | ||
2324 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; | 2418 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; |
2325 | 2419 | ||
2326 | static int __init journal_init_handle_cache(void) | 2420 | static int __init jbd2_journal_init_handle_cache(void) |
2327 | { | 2421 | { |
2328 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); | 2422 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); |
2329 | if (jbd2_handle_cache == NULL) { | 2423 | if (jbd2_handle_cache == NULL) { |
@@ -2358,17 +2452,20 @@ static int __init journal_init_caches(void) | |||
2358 | 2452 | ||
2359 | ret = jbd2_journal_init_revoke_caches(); | 2453 | ret = jbd2_journal_init_revoke_caches(); |
2360 | if (ret == 0) | 2454 | if (ret == 0) |
2361 | ret = journal_init_jbd2_journal_head_cache(); | 2455 | ret = jbd2_journal_init_journal_head_cache(); |
2456 | if (ret == 0) | ||
2457 | ret = jbd2_journal_init_handle_cache(); | ||
2362 | if (ret == 0) | 2458 | if (ret == 0) |
2363 | ret = journal_init_handle_cache(); | 2459 | ret = jbd2_journal_init_transaction_cache(); |
2364 | return ret; | 2460 | return ret; |
2365 | } | 2461 | } |
2366 | 2462 | ||
2367 | static void jbd2_journal_destroy_caches(void) | 2463 | static void jbd2_journal_destroy_caches(void) |
2368 | { | 2464 | { |
2369 | jbd2_journal_destroy_revoke_caches(); | 2465 | jbd2_journal_destroy_revoke_caches(); |
2370 | jbd2_journal_destroy_jbd2_journal_head_cache(); | 2466 | jbd2_journal_destroy_journal_head_cache(); |
2371 | jbd2_journal_destroy_handle_cache(); | 2467 | jbd2_journal_destroy_handle_cache(); |
2468 | jbd2_journal_destroy_transaction_cache(); | ||
2372 | jbd2_journal_destroy_slabs(); | 2469 | jbd2_journal_destroy_slabs(); |
2373 | } | 2470 | } |
2374 | 2471 | ||
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index da6d7baf1390..c1a03354a22f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/crc32.h> | 23 | #include <linux/crc32.h> |
24 | #include <linux/blkdev.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal) | |||
265 | err2 = sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
266 | if (!err) | 267 | if (!err) |
267 | err = err2; | 268 | err = err2; |
268 | 269 | /* Make sure all replayed data is on permanent storage */ | |
270 | if (journal->j_flags & JBD2_BARRIER) | ||
271 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | ||
269 | return err; | 272 | return err; |
270 | } | 273 | } |
271 | 274 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 30b2867d6cc9..6973705d6a3d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void) | |||
208 | J_ASSERT(!jbd2_revoke_record_cache); | 208 | J_ASSERT(!jbd2_revoke_record_cache); |
209 | J_ASSERT(!jbd2_revoke_table_cache); | 209 | J_ASSERT(!jbd2_revoke_table_cache); |
210 | 210 | ||
211 | jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", | 211 | jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, |
212 | sizeof(struct jbd2_revoke_record_s), | 212 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); |
213 | 0, | ||
214 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
215 | NULL); | ||
216 | if (!jbd2_revoke_record_cache) | 213 | if (!jbd2_revoke_record_cache) |
217 | goto record_cache_failure; | 214 | goto record_cache_failure; |
218 | 215 | ||
219 | jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", | 216 | jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, |
220 | sizeof(struct jbd2_revoke_table_s), | 217 | SLAB_TEMPORARY); |
221 | 0, SLAB_TEMPORARY, NULL); | ||
222 | if (!jbd2_revoke_table_cache) | 218 | if (!jbd2_revoke_table_cache) |
223 | goto table_cache_failure; | 219 | goto table_cache_failure; |
224 | return 0; | 220 | return 0; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5aba56e1fd5..ddcd3549c6c2 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -33,6 +33,35 @@ | |||
33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | 34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); |
35 | 35 | ||
36 | static struct kmem_cache *transaction_cache; | ||
37 | int __init jbd2_journal_init_transaction_cache(void) | ||
38 | { | ||
39 | J_ASSERT(!transaction_cache); | ||
40 | transaction_cache = kmem_cache_create("jbd2_transaction_s", | ||
41 | sizeof(transaction_t), | ||
42 | 0, | ||
43 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
44 | NULL); | ||
45 | if (transaction_cache) | ||
46 | return 0; | ||
47 | return -ENOMEM; | ||
48 | } | ||
49 | |||
50 | void jbd2_journal_destroy_transaction_cache(void) | ||
51 | { | ||
52 | if (transaction_cache) { | ||
53 | kmem_cache_destroy(transaction_cache); | ||
54 | transaction_cache = NULL; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void jbd2_journal_free_transaction(transaction_t *transaction) | ||
59 | { | ||
60 | if (unlikely(ZERO_OR_NULL_PTR(transaction))) | ||
61 | return; | ||
62 | kmem_cache_free(transaction_cache, transaction); | ||
63 | } | ||
64 | |||
36 | /* | 65 | /* |
37 | * jbd2_get_transaction: obtain a new transaction_t object. | 66 | * jbd2_get_transaction: obtain a new transaction_t object. |
38 | * | 67 | * |
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
133 | 162 | ||
134 | alloc_transaction: | 163 | alloc_transaction: |
135 | if (!journal->j_running_transaction) { | 164 | if (!journal->j_running_transaction) { |
136 | new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); | 165 | new_transaction = kmem_cache_alloc(transaction_cache, |
166 | gfp_mask | __GFP_ZERO); | ||
137 | if (!new_transaction) { | 167 | if (!new_transaction) { |
138 | /* | 168 | /* |
139 | * If __GFP_FS is not present, then we may be | 169 | * If __GFP_FS is not present, then we may be |
@@ -162,7 +192,7 @@ repeat: | |||
162 | if (is_journal_aborted(journal) || | 192 | if (is_journal_aborted(journal) || |
163 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 193 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
164 | read_unlock(&journal->j_state_lock); | 194 | read_unlock(&journal->j_state_lock); |
165 | kfree(new_transaction); | 195 | jbd2_journal_free_transaction(new_transaction); |
166 | return -EROFS; | 196 | return -EROFS; |
167 | } | 197 | } |
168 | 198 | ||
@@ -284,7 +314,7 @@ repeat: | |||
284 | read_unlock(&journal->j_state_lock); | 314 | read_unlock(&journal->j_state_lock); |
285 | 315 | ||
286 | lock_map_acquire(&handle->h_lockdep_map); | 316 | lock_map_acquire(&handle->h_lockdep_map); |
287 | kfree(new_transaction); | 317 | jbd2_journal_free_transaction(new_transaction); |
288 | return 0; | 318 | return 0; |
289 | } | 319 | } |
290 | 320 | ||
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1549 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1579 | * of these pointers, it could go bad. Generally the caller needs to re-read |
1550 | * the pointer from the transaction_t. | 1580 | * the pointer from the transaction_t. |
1551 | * | 1581 | * |
1552 | * Called under j_list_lock. The journal may not be locked. | 1582 | * Called under j_list_lock. |
1553 | */ | 1583 | */ |
1554 | void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | 1584 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) |
1555 | { | 1585 | { |
1556 | struct journal_head **list = NULL; | 1586 | struct journal_head **list = NULL; |
1557 | transaction_t *transaction; | 1587 | transaction_t *transaction; |
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1646 | spin_lock(&journal->j_list_lock); | 1676 | spin_lock(&journal->j_list_lock); |
1647 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1677 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1648 | /* written-back checkpointed metadata buffer */ | 1678 | /* written-back checkpointed metadata buffer */ |
1649 | if (jh->b_jlist == BJ_None) { | 1679 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1650 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1680 | __jbd2_journal_remove_checkpoint(jh); |
1651 | __jbd2_journal_remove_checkpoint(jh); | ||
1652 | } | ||
1653 | } | 1681 | } |
1654 | spin_unlock(&journal->j_list_lock); | 1682 | spin_unlock(&journal->j_list_lock); |
1655 | out: | 1683 | out: |
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked: | |||
1949 | clear_buffer_mapped(bh); | 1977 | clear_buffer_mapped(bh); |
1950 | clear_buffer_req(bh); | 1978 | clear_buffer_req(bh); |
1951 | clear_buffer_new(bh); | 1979 | clear_buffer_new(bh); |
1980 | clear_buffer_delay(bh); | ||
1981 | clear_buffer_unwritten(bh); | ||
1952 | bh->b_bdev = NULL; | 1982 | bh->b_bdev = NULL; |
1953 | return may_free; | 1983 | return may_free; |
1954 | } | 1984 | } |