diff options
Diffstat (limited to 'fs/jbd2/checkpoint.c')
-rw-r--r-- | fs/jbd2/checkpoint.c | 140 |
1 files changed, 22 insertions, 118 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index d49d202903fb..c78841ee81cf 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh) | |||
88 | * whole transaction. | 88 | * whole transaction. |
89 | * | 89 | * |
90 | * Requires j_list_lock | 90 | * Requires j_list_lock |
91 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
92 | */ | 91 | */ |
93 | static int __try_to_free_cp_buf(struct journal_head *jh) | 92 | static int __try_to_free_cp_buf(struct journal_head *jh) |
94 | { | 93 | { |
95 | int ret = 0; | 94 | int ret = 0; |
96 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
97 | 96 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | 99 | /* |
101 | * Get our reference so that bh cannot be freed before | 100 | * Get our reference so that bh cannot be freed before |
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
104 | get_bh(bh); | 103 | get_bh(bh); |
105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
107 | jbd_unlock_bh_state(bh); | ||
108 | BUFFER_TRACE(bh, "release"); | 106 | BUFFER_TRACE(bh, "release"); |
109 | __brelse(bh); | 107 | __brelse(bh); |
110 | } else { | ||
111 | jbd_unlock_bh_state(bh); | ||
112 | } | 108 | } |
113 | return ret; | 109 | return ret; |
114 | } | 110 | } |
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
180 | } | 176 | } |
181 | 177 | ||
182 | /* | 178 | /* |
183 | * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. | ||
184 | * The caller must restart a list walk. Wait for someone else to run | ||
185 | * jbd_unlock_bh_state(). | ||
186 | */ | ||
187 | static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | ||
188 | __releases(journal->j_list_lock) | ||
189 | { | ||
190 | get_bh(bh); | ||
191 | spin_unlock(&journal->j_list_lock); | ||
192 | jbd_lock_bh_state(bh); | ||
193 | jbd_unlock_bh_state(bh); | ||
194 | put_bh(bh); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Clean up transaction's list of buffers submitted for io. | 179 | * Clean up transaction's list of buffers submitted for io. |
199 | * We wait for any pending IO to complete and remove any clean | 180 | * We wait for any pending IO to complete and remove any clean |
200 | * buffers. Note that we take the buffers in the opposite ordering | 181 | * buffers. Note that we take the buffers in the opposite ordering |
@@ -222,15 +203,9 @@ restart: | |||
222 | while (!released && transaction->t_checkpoint_io_list) { | 203 | while (!released && transaction->t_checkpoint_io_list) { |
223 | jh = transaction->t_checkpoint_io_list; | 204 | jh = transaction->t_checkpoint_io_list; |
224 | bh = jh2bh(jh); | 205 | bh = jh2bh(jh); |
225 | if (!jbd_trylock_bh_state(bh)) { | ||
226 | jbd_sync_bh(journal, bh); | ||
227 | spin_lock(&journal->j_list_lock); | ||
228 | goto restart; | ||
229 | } | ||
230 | get_bh(bh); | 206 | get_bh(bh); |
231 | if (buffer_locked(bh)) { | 207 | if (buffer_locked(bh)) { |
232 | spin_unlock(&journal->j_list_lock); | 208 | spin_unlock(&journal->j_list_lock); |
233 | jbd_unlock_bh_state(bh); | ||
234 | wait_on_buffer(bh); | 209 | wait_on_buffer(bh); |
235 | /* the journal_head may have gone by now */ | 210 | /* the journal_head may have gone by now */ |
236 | BUFFER_TRACE(bh, "brelse"); | 211 | BUFFER_TRACE(bh, "brelse"); |
@@ -246,7 +221,6 @@ restart: | |||
246 | * it has been written out and so we can drop it from the list | 221 | * it has been written out and so we can drop it from the list |
247 | */ | 222 | */ |
248 | released = __jbd2_journal_remove_checkpoint(jh); | 223 | released = __jbd2_journal_remove_checkpoint(jh); |
249 | jbd_unlock_bh_state(bh); | ||
250 | __brelse(bh); | 224 | __brelse(bh); |
251 | } | 225 | } |
252 | 226 | ||
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
266 | 240 | ||
267 | for (i = 0; i < *batch_count; i++) { | 241 | for (i = 0; i < *batch_count; i++) { |
268 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 242 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
269 | clear_buffer_jwrite(bh); | ||
270 | BUFFER_TRACE(bh, "brelse"); | 243 | BUFFER_TRACE(bh, "brelse"); |
271 | __brelse(bh); | 244 | __brelse(bh); |
272 | } | 245 | } |
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
281 | * be written out. | 254 | * be written out. |
282 | * | 255 | * |
283 | * Called with j_list_lock held and drops it if 1 is returned | 256 | * Called with j_list_lock held and drops it if 1 is returned |
284 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
285 | */ | 257 | */ |
286 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 258 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
287 | int *batch_count, transaction_t *transaction) | 259 | int *batch_count, transaction_t *transaction) |
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
292 | if (buffer_locked(bh)) { | 264 | if (buffer_locked(bh)) { |
293 | get_bh(bh); | 265 | get_bh(bh); |
294 | spin_unlock(&journal->j_list_lock); | 266 | spin_unlock(&journal->j_list_lock); |
295 | jbd_unlock_bh_state(bh); | ||
296 | wait_on_buffer(bh); | 267 | wait_on_buffer(bh); |
297 | /* the journal_head may have gone by now */ | 268 | /* the journal_head may have gone by now */ |
298 | BUFFER_TRACE(bh, "brelse"); | 269 | BUFFER_TRACE(bh, "brelse"); |
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | 275 | ||
305 | transaction->t_chp_stats.cs_forced_to_close++; | 276 | transaction->t_chp_stats.cs_forced_to_close++; |
306 | spin_unlock(&journal->j_list_lock); | 277 | spin_unlock(&journal->j_list_lock); |
307 | jbd_unlock_bh_state(bh); | ||
308 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | 278 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
309 | /* | 279 | /* |
310 | * The journal thread is dead; so starting and | 280 | * The journal thread is dead; so starting and |
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
323 | if (unlikely(buffer_write_io_error(bh))) | 293 | if (unlikely(buffer_write_io_error(bh))) |
324 | ret = -EIO; | 294 | ret = -EIO; |
325 | get_bh(bh); | 295 | get_bh(bh); |
326 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | ||
327 | BUFFER_TRACE(bh, "remove from checkpoint"); | 296 | BUFFER_TRACE(bh, "remove from checkpoint"); |
328 | __jbd2_journal_remove_checkpoint(jh); | 297 | __jbd2_journal_remove_checkpoint(jh); |
329 | spin_unlock(&journal->j_list_lock); | 298 | spin_unlock(&journal->j_list_lock); |
330 | jbd_unlock_bh_state(bh); | ||
331 | __brelse(bh); | 299 | __brelse(bh); |
332 | } else { | 300 | } else { |
333 | /* | 301 | /* |
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
340 | BUFFER_TRACE(bh, "queue"); | 308 | BUFFER_TRACE(bh, "queue"); |
341 | get_bh(bh); | 309 | get_bh(bh); |
342 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 310 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
343 | set_buffer_jwrite(bh); | ||
344 | journal->j_chkpt_bhs[*batch_count] = bh; | 311 | journal->j_chkpt_bhs[*batch_count] = bh; |
345 | __buffer_relink_io(jh); | 312 | __buffer_relink_io(jh); |
346 | jbd_unlock_bh_state(bh); | ||
347 | transaction->t_chp_stats.cs_written++; | 313 | transaction->t_chp_stats.cs_written++; |
348 | (*batch_count)++; | 314 | (*batch_count)++; |
349 | if (*batch_count == JBD2_NR_BATCH) { | 315 | if (*batch_count == JBD2_NR_BATCH) { |
@@ -407,15 +373,7 @@ restart: | |||
407 | int retry = 0, err; | 373 | int retry = 0, err; |
408 | 374 | ||
409 | while (!retry && transaction->t_checkpoint_list) { | 375 | while (!retry && transaction->t_checkpoint_list) { |
410 | struct buffer_head *bh; | ||
411 | |||
412 | jh = transaction->t_checkpoint_list; | 376 | jh = transaction->t_checkpoint_list; |
413 | bh = jh2bh(jh); | ||
414 | if (!jbd_trylock_bh_state(bh)) { | ||
415 | jbd_sync_bh(journal, bh); | ||
416 | retry = 1; | ||
417 | break; | ||
418 | } | ||
419 | retry = __process_buffer(journal, jh, &batch_count, | 377 | retry = __process_buffer(journal, jh, &batch_count, |
420 | transaction); | 378 | transaction); |
421 | if (retry < 0 && !result) | 379 | if (retry < 0 && !result) |
@@ -478,79 +436,28 @@ out: | |||
478 | 436 | ||
479 | int jbd2_cleanup_journal_tail(journal_t *journal) | 437 | int jbd2_cleanup_journal_tail(journal_t *journal) |
480 | { | 438 | { |
481 | transaction_t * transaction; | ||
482 | tid_t first_tid; | 439 | tid_t first_tid; |
483 | unsigned long blocknr, freed; | 440 | unsigned long blocknr; |
484 | 441 | ||
485 | if (is_journal_aborted(journal)) | 442 | if (is_journal_aborted(journal)) |
486 | return 1; | 443 | return 1; |
487 | 444 | ||
488 | /* OK, work out the oldest transaction remaining in the log, and | 445 | if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) |
489 | * the log block it starts at. | ||
490 | * | ||
491 | * If the log is now empty, we need to work out which is the | ||
492 | * next transaction ID we will write, and where it will | ||
493 | * start. */ | ||
494 | |||
495 | write_lock(&journal->j_state_lock); | ||
496 | spin_lock(&journal->j_list_lock); | ||
497 | transaction = journal->j_checkpoint_transactions; | ||
498 | if (transaction) { | ||
499 | first_tid = transaction->t_tid; | ||
500 | blocknr = transaction->t_log_start; | ||
501 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
502 | first_tid = transaction->t_tid; | ||
503 | blocknr = transaction->t_log_start; | ||
504 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
505 | first_tid = transaction->t_tid; | ||
506 | blocknr = journal->j_head; | ||
507 | } else { | ||
508 | first_tid = journal->j_transaction_sequence; | ||
509 | blocknr = journal->j_head; | ||
510 | } | ||
511 | spin_unlock(&journal->j_list_lock); | ||
512 | J_ASSERT(blocknr != 0); | ||
513 | |||
514 | /* If the oldest pinned transaction is at the tail of the log | ||
515 | already then there's not much we can do right now. */ | ||
516 | if (journal->j_tail_sequence == first_tid) { | ||
517 | write_unlock(&journal->j_state_lock); | ||
518 | return 1; | 446 | return 1; |
519 | } | 447 | J_ASSERT(blocknr != 0); |
520 | |||
521 | /* OK, update the superblock to recover the freed space. | ||
522 | * Physical blocks come first: have we wrapped beyond the end of | ||
523 | * the log? */ | ||
524 | freed = blocknr - journal->j_tail; | ||
525 | if (blocknr < journal->j_tail) | ||
526 | freed = freed + journal->j_last - journal->j_first; | ||
527 | |||
528 | trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
529 | jbd_debug(1, | ||
530 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
531 | "freeing %lu\n", | ||
532 | journal->j_tail_sequence, first_tid, blocknr, freed); | ||
533 | |||
534 | journal->j_free += freed; | ||
535 | journal->j_tail_sequence = first_tid; | ||
536 | journal->j_tail = blocknr; | ||
537 | write_unlock(&journal->j_state_lock); | ||
538 | 448 | ||
539 | /* | 449 | /* |
540 | * If there is an external journal, we need to make sure that | 450 | * We need to make sure that any blocks that were recently written out |
541 | * any data blocks that were recently written out --- perhaps | 451 | * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before |
542 | * by jbd2_log_do_checkpoint() --- are flushed out before we | 452 | * we drop the transactions from the journal. It's unlikely this will |
543 | * drop the transactions from the external journal. It's | 453 | * be necessary, especially with an appropriately sized journal, but we |
544 | * unlikely this will be necessary, especially with a | 454 | * need this to guarantee correctness. Fortunately |
545 | * appropriately sized journal, but we need this to guarantee | 455 | * jbd2_cleanup_journal_tail() doesn't get called all that often. |
546 | * correctness. Fortunately jbd2_cleanup_journal_tail() | ||
547 | * doesn't get called all that often. | ||
548 | */ | 456 | */ |
549 | if ((journal->j_fs_dev != journal->j_dev) && | 457 | if (journal->j_flags & JBD2_BARRIER) |
550 | (journal->j_flags & JBD2_BARRIER)) | ||
551 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 458 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
552 | if (!(journal->j_flags & JBD2_ABORT)) | 459 | |
553 | jbd2_journal_update_superblock(journal, 1); | 460 | __jbd2_update_log_tail(journal, first_tid, blocknr); |
554 | return 0; | 461 | return 0; |
555 | } | 462 | } |
556 | 463 | ||
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
582 | do { | 489 | do { |
583 | jh = next_jh; | 490 | jh = next_jh; |
584 | next_jh = jh->b_cpnext; | 491 | next_jh = jh->b_cpnext; |
585 | /* Use trylock because of the ranking */ | 492 | ret = __try_to_free_cp_buf(jh); |
586 | if (jbd_trylock_bh_state(jh2bh(jh))) { | 493 | if (ret) { |
587 | ret = __try_to_free_cp_buf(jh); | 494 | freed++; |
588 | if (ret) { | 495 | if (ret == 2) { |
589 | freed++; | 496 | *released = 1; |
590 | if (ret == 2) { | 497 | return freed; |
591 | *released = 1; | ||
592 | return freed; | ||
593 | } | ||
594 | } | 498 | } |
595 | } | 499 | } |
596 | /* | 500 | /* |
@@ -673,9 +577,7 @@ out: | |||
673 | * The function can free jh and bh. | 577 | * The function can free jh and bh. |
674 | * | 578 | * |
675 | * This function is called with j_list_lock held. | 579 | * This function is called with j_list_lock held. |
676 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | ||
677 | */ | 580 | */ |
678 | |||
679 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | 581 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) |
680 | { | 582 | { |
681 | struct transaction_chp_stats_s *stats; | 583 | struct transaction_chp_stats_s *stats; |
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
722 | transaction->t_tid, stats); | 624 | transaction->t_tid, stats); |
723 | 625 | ||
724 | __jbd2_journal_drop_transaction(journal, transaction); | 626 | __jbd2_journal_drop_transaction(journal, transaction); |
725 | kfree(transaction); | 627 | jbd2_journal_free_transaction(transaction); |
726 | 628 | ||
727 | /* Just in case anybody was waiting for more transactions to be | 629 | /* Just in case anybody was waiting for more transactions to be |
728 | checkpointed... */ | 630 | checkpointed... */ |
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
797 | J_ASSERT(journal->j_committing_transaction != transaction); | 699 | J_ASSERT(journal->j_committing_transaction != transaction); |
798 | J_ASSERT(journal->j_running_transaction != transaction); | 700 | J_ASSERT(journal->j_running_transaction != transaction); |
799 | 701 | ||
702 | trace_jbd2_drop_transaction(journal, transaction); | ||
703 | |||
800 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 704 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
801 | } | 705 | } |