diff options
Diffstat (limited to 'fs/jbd2/journal.c')
-rw-r--r-- | fs/jbd2/journal.c | 166 |
1 files changed, 91 insertions, 75 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..02c7ad9d7a41 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache); | |||
103 | static void __journal_abort_soft (journal_t *journal, int errno); | 103 | static void __journal_abort_soft (journal_t *journal, int errno); |
104 | static int jbd2_journal_create_slab(size_t slab_size); | 104 | static int jbd2_journal_create_slab(size_t slab_size); |
105 | 105 | ||
106 | #ifdef CONFIG_JBD2_DEBUG | ||
107 | void __jbd2_debug(int level, const char *file, const char *func, | ||
108 | unsigned int line, const char *fmt, ...) | ||
109 | { | ||
110 | struct va_format vaf; | ||
111 | va_list args; | ||
112 | |||
113 | if (level > jbd2_journal_enable_debug) | ||
114 | return; | ||
115 | va_start(args, fmt); | ||
116 | vaf.fmt = fmt; | ||
117 | vaf.va = &args; | ||
118 | printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); | ||
119 | va_end(args); | ||
120 | } | ||
121 | EXPORT_SYMBOL(__jbd2_debug); | ||
122 | #endif | ||
123 | |||
106 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
107 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
108 | { | 126 | { |
@@ -310,14 +328,12 @@ static void journal_kill_thread(journal_t *journal) | |||
310 | * | 328 | * |
311 | * If the source buffer has already been modified by a new transaction | 329 | * If the source buffer has already been modified by a new transaction |
312 | * since we took the last commit snapshot, we use the frozen copy of | 330 | * since we took the last commit snapshot, we use the frozen copy of |
313 | * that data for IO. If we end up using the existing buffer_head's data | 331 | * that data for IO. If we end up using the existing buffer_head's data |
314 | * for the write, then we *have* to lock the buffer to prevent anyone | 332 | * for the write, then we have to make sure nobody modifies it while the |
315 | * else from using and possibly modifying it while the IO is in | 333 | * IO is in progress. do_get_write_access() handles this. |
316 | * progress. | ||
317 | * | 334 | * |
318 | * The function returns a pointer to the buffer_heads to be used for IO. | 335 | * The function returns a pointer to the buffer_head to be used for IO. |
319 | * | 336 | * |
320 | * We assume that the journal has already been locked in this function. | ||
321 | * | 337 | * |
322 | * Return value: | 338 | * Return value: |
323 | * <0: Error | 339 | * <0: Error |
@@ -330,15 +346,14 @@ static void journal_kill_thread(journal_t *journal) | |||
330 | 346 | ||
331 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | 347 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, |
332 | struct journal_head *jh_in, | 348 | struct journal_head *jh_in, |
333 | struct journal_head **jh_out, | 349 | struct buffer_head **bh_out, |
334 | unsigned long long blocknr) | 350 | sector_t blocknr) |
335 | { | 351 | { |
336 | int need_copy_out = 0; | 352 | int need_copy_out = 0; |
337 | int done_copy_out = 0; | 353 | int done_copy_out = 0; |
338 | int do_escape = 0; | 354 | int do_escape = 0; |
339 | char *mapped_data; | 355 | char *mapped_data; |
340 | struct buffer_head *new_bh; | 356 | struct buffer_head *new_bh; |
341 | struct journal_head *new_jh; | ||
342 | struct page *new_page; | 357 | struct page *new_page; |
343 | unsigned int new_offset; | 358 | unsigned int new_offset; |
344 | struct buffer_head *bh_in = jh2bh(jh_in); | 359 | struct buffer_head *bh_in = jh2bh(jh_in); |
@@ -368,14 +383,13 @@ retry_alloc: | |||
368 | 383 | ||
369 | /* keep subsequent assertions sane */ | 384 | /* keep subsequent assertions sane */ |
370 | atomic_set(&new_bh->b_count, 1); | 385 | atomic_set(&new_bh->b_count, 1); |
371 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
372 | 386 | ||
387 | jbd_lock_bh_state(bh_in); | ||
388 | repeat: | ||
373 | /* | 389 | /* |
374 | * If a new transaction has already done a buffer copy-out, then | 390 | * If a new transaction has already done a buffer copy-out, then |
375 | * we use that version of the data for the commit. | 391 | * we use that version of the data for the commit. |
376 | */ | 392 | */ |
377 | jbd_lock_bh_state(bh_in); | ||
378 | repeat: | ||
379 | if (jh_in->b_frozen_data) { | 393 | if (jh_in->b_frozen_data) { |
380 | done_copy_out = 1; | 394 | done_copy_out = 1; |
381 | new_page = virt_to_page(jh_in->b_frozen_data); | 395 | new_page = virt_to_page(jh_in->b_frozen_data); |
@@ -415,7 +429,7 @@ repeat: | |||
415 | jbd_unlock_bh_state(bh_in); | 429 | jbd_unlock_bh_state(bh_in); |
416 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); | 430 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); |
417 | if (!tmp) { | 431 | if (!tmp) { |
418 | jbd2_journal_put_journal_head(new_jh); | 432 | brelse(new_bh); |
419 | return -ENOMEM; | 433 | return -ENOMEM; |
420 | } | 434 | } |
421 | jbd_lock_bh_state(bh_in); | 435 | jbd_lock_bh_state(bh_in); |
@@ -426,7 +440,7 @@ repeat: | |||
426 | 440 | ||
427 | jh_in->b_frozen_data = tmp; | 441 | jh_in->b_frozen_data = tmp; |
428 | mapped_data = kmap_atomic(new_page); | 442 | mapped_data = kmap_atomic(new_page); |
429 | memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); | 443 | memcpy(tmp, mapped_data + new_offset, bh_in->b_size); |
430 | kunmap_atomic(mapped_data); | 444 | kunmap_atomic(mapped_data); |
431 | 445 | ||
432 | new_page = virt_to_page(tmp); | 446 | new_page = virt_to_page(tmp); |
@@ -452,14 +466,14 @@ repeat: | |||
452 | } | 466 | } |
453 | 467 | ||
454 | set_bh_page(new_bh, new_page, new_offset); | 468 | set_bh_page(new_bh, new_page, new_offset); |
455 | new_jh->b_transaction = NULL; | 469 | new_bh->b_size = bh_in->b_size; |
456 | new_bh->b_size = jh2bh(jh_in)->b_size; | 470 | new_bh->b_bdev = journal->j_dev; |
457 | new_bh->b_bdev = transaction->t_journal->j_dev; | ||
458 | new_bh->b_blocknr = blocknr; | 471 | new_bh->b_blocknr = blocknr; |
472 | new_bh->b_private = bh_in; | ||
459 | set_buffer_mapped(new_bh); | 473 | set_buffer_mapped(new_bh); |
460 | set_buffer_dirty(new_bh); | 474 | set_buffer_dirty(new_bh); |
461 | 475 | ||
462 | *jh_out = new_jh; | 476 | *bh_out = new_bh; |
463 | 477 | ||
464 | /* | 478 | /* |
465 | * The to-be-written buffer needs to get moved to the io queue, | 479 | * The to-be-written buffer needs to get moved to the io queue, |
@@ -470,11 +484,9 @@ repeat: | |||
470 | spin_lock(&journal->j_list_lock); | 484 | spin_lock(&journal->j_list_lock); |
471 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | 485 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); |
472 | spin_unlock(&journal->j_list_lock); | 486 | spin_unlock(&journal->j_list_lock); |
487 | set_buffer_shadow(bh_in); | ||
473 | jbd_unlock_bh_state(bh_in); | 488 | jbd_unlock_bh_state(bh_in); |
474 | 489 | ||
475 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | ||
476 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | ||
477 | |||
478 | return do_escape | (done_copy_out << 1); | 490 | return do_escape | (done_copy_out << 1); |
479 | } | 491 | } |
480 | 492 | ||
@@ -484,35 +496,6 @@ repeat: | |||
484 | */ | 496 | */ |
485 | 497 | ||
486 | /* | 498 | /* |
487 | * __jbd2_log_space_left: Return the number of free blocks left in the journal. | ||
488 | * | ||
489 | * Called with the journal already locked. | ||
490 | * | ||
491 | * Called under j_state_lock | ||
492 | */ | ||
493 | |||
494 | int __jbd2_log_space_left(journal_t *journal) | ||
495 | { | ||
496 | int left = journal->j_free; | ||
497 | |||
498 | /* assert_spin_locked(&journal->j_state_lock); */ | ||
499 | |||
500 | /* | ||
501 | * Be pessimistic here about the number of those free blocks which | ||
502 | * might be required for log descriptor control blocks. | ||
503 | */ | ||
504 | |||
505 | #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ | ||
506 | |||
507 | left -= MIN_LOG_RESERVED_BLOCKS; | ||
508 | |||
509 | if (left <= 0) | ||
510 | return 0; | ||
511 | left -= (left >> 3); | ||
512 | return left; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Called with j_state_lock locked for writing. | 499 | * Called with j_state_lock locked for writing. |
517 | * Returns true if a transaction commit was started. | 500 | * Returns true if a transaction commit was started. |
518 | */ | 501 | */ |
@@ -564,20 +547,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) | |||
564 | } | 547 | } |
565 | 548 | ||
566 | /* | 549 | /* |
567 | * Force and wait upon a commit if the calling process is not within | 550 | * Force and wait any uncommitted transactions. We can only force the running |
568 | * transaction. This is used for forcing out undo-protected data which contains | 551 | * transaction if we don't have an active handle, otherwise, we will deadlock. |
569 | * bitmaps, when the fs is running out of space. | 552 | * Returns: <0 in case of error, |
570 | * | 553 | * 0 if nothing to commit, |
571 | * We can only force the running transaction if we don't have an active handle; | 554 | * 1 if transaction was successfully committed. |
572 | * otherwise, we will deadlock. | ||
573 | * | ||
574 | * Returns true if a transaction was started. | ||
575 | */ | 555 | */ |
576 | int jbd2_journal_force_commit_nested(journal_t *journal) | 556 | static int __jbd2_journal_force_commit(journal_t *journal) |
577 | { | 557 | { |
578 | transaction_t *transaction = NULL; | 558 | transaction_t *transaction = NULL; |
579 | tid_t tid; | 559 | tid_t tid; |
580 | int need_to_start = 0; | 560 | int need_to_start = 0, ret = 0; |
581 | 561 | ||
582 | read_lock(&journal->j_state_lock); | 562 | read_lock(&journal->j_state_lock); |
583 | if (journal->j_running_transaction && !current->journal_info) { | 563 | if (journal->j_running_transaction && !current->journal_info) { |
@@ -588,16 +568,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
588 | transaction = journal->j_committing_transaction; | 568 | transaction = journal->j_committing_transaction; |
589 | 569 | ||
590 | if (!transaction) { | 570 | if (!transaction) { |
571 | /* Nothing to commit */ | ||
591 | read_unlock(&journal->j_state_lock); | 572 | read_unlock(&journal->j_state_lock); |
592 | return 0; /* Nothing to retry */ | 573 | return 0; |
593 | } | 574 | } |
594 | |||
595 | tid = transaction->t_tid; | 575 | tid = transaction->t_tid; |
596 | read_unlock(&journal->j_state_lock); | 576 | read_unlock(&journal->j_state_lock); |
597 | if (need_to_start) | 577 | if (need_to_start) |
598 | jbd2_log_start_commit(journal, tid); | 578 | jbd2_log_start_commit(journal, tid); |
599 | jbd2_log_wait_commit(journal, tid); | 579 | ret = jbd2_log_wait_commit(journal, tid); |
600 | return 1; | 580 | if (!ret) |
581 | ret = 1; | ||
582 | |||
583 | return ret; | ||
584 | } | ||
585 | |||
586 | /** | ||
587 | * Force and wait upon a commit if the calling process is not within | ||
588 | * transaction. This is used for forcing out undo-protected data which contains | ||
589 | * bitmaps, when the fs is running out of space. | ||
590 | * | ||
591 | * @journal: journal to force | ||
592 | * Returns true if progress was made. | ||
593 | */ | ||
594 | int jbd2_journal_force_commit_nested(journal_t *journal) | ||
595 | { | ||
596 | int ret; | ||
597 | |||
598 | ret = __jbd2_journal_force_commit(journal); | ||
599 | return ret > 0; | ||
600 | } | ||
601 | |||
602 | /** | ||
603 | * int journal_force_commit() - force any uncommitted transactions | ||
604 | * @journal: journal to force | ||
605 | * | ||
606 | * Caller want unconditional commit. We can only force the running transaction | ||
607 | * if we don't have an active handle, otherwise, we will deadlock. | ||
608 | */ | ||
609 | int jbd2_journal_force_commit(journal_t *journal) | ||
610 | { | ||
611 | int ret; | ||
612 | |||
613 | J_ASSERT(!current->journal_info); | ||
614 | ret = __jbd2_journal_force_commit(journal); | ||
615 | if (ret > 0) | ||
616 | ret = 0; | ||
617 | return ret; | ||
601 | } | 618 | } |
602 | 619 | ||
603 | /* | 620 | /* |
@@ -798,7 +815,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
798 | * But we don't bother doing that, so there will be coherency problems with | 815 | * But we don't bother doing that, so there will be coherency problems with |
799 | * mmaps of blockdevs which hold live JBD-controlled filesystems. | 816 | * mmaps of blockdevs which hold live JBD-controlled filesystems. |
800 | */ | 817 | */ |
801 | struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | 818 | struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) |
802 | { | 819 | { |
803 | struct buffer_head *bh; | 820 | struct buffer_head *bh; |
804 | unsigned long long blocknr; | 821 | unsigned long long blocknr; |
@@ -817,7 +834,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
817 | set_buffer_uptodate(bh); | 834 | set_buffer_uptodate(bh); |
818 | unlock_buffer(bh); | 835 | unlock_buffer(bh); |
819 | BUFFER_TRACE(bh, "return this buffer"); | 836 | BUFFER_TRACE(bh, "return this buffer"); |
820 | return jbd2_journal_add_journal_head(bh); | 837 | return bh; |
821 | } | 838 | } |
822 | 839 | ||
823 | /* | 840 | /* |
@@ -1062,11 +1079,10 @@ static journal_t * journal_init_common (void) | |||
1062 | return NULL; | 1079 | return NULL; |
1063 | 1080 | ||
1064 | init_waitqueue_head(&journal->j_wait_transaction_locked); | 1081 | init_waitqueue_head(&journal->j_wait_transaction_locked); |
1065 | init_waitqueue_head(&journal->j_wait_logspace); | ||
1066 | init_waitqueue_head(&journal->j_wait_done_commit); | 1082 | init_waitqueue_head(&journal->j_wait_done_commit); |
1067 | init_waitqueue_head(&journal->j_wait_checkpoint); | ||
1068 | init_waitqueue_head(&journal->j_wait_commit); | 1083 | init_waitqueue_head(&journal->j_wait_commit); |
1069 | init_waitqueue_head(&journal->j_wait_updates); | 1084 | init_waitqueue_head(&journal->j_wait_updates); |
1085 | init_waitqueue_head(&journal->j_wait_reserved); | ||
1070 | mutex_init(&journal->j_barrier); | 1086 | mutex_init(&journal->j_barrier); |
1071 | mutex_init(&journal->j_checkpoint_mutex); | 1087 | mutex_init(&journal->j_checkpoint_mutex); |
1072 | spin_lock_init(&journal->j_revoke_lock); | 1088 | spin_lock_init(&journal->j_revoke_lock); |
@@ -1076,6 +1092,7 @@ static journal_t * journal_init_common (void) | |||
1076 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 1092 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
1077 | journal->j_min_batch_time = 0; | 1093 | journal->j_min_batch_time = 0; |
1078 | journal->j_max_batch_time = 15000; /* 15ms */ | 1094 | journal->j_max_batch_time = 15000; /* 15ms */ |
1095 | atomic_set(&journal->j_reserved_credits, 0); | ||
1079 | 1096 | ||
1080 | /* The journal is marked for error until we succeed with recovery! */ | 1097 | /* The journal is marked for error until we succeed with recovery! */ |
1081 | journal->j_flags = JBD2_ABORT; | 1098 | journal->j_flags = JBD2_ABORT; |
@@ -1318,6 +1335,7 @@ static int journal_reset(journal_t *journal) | |||
1318 | static void jbd2_write_superblock(journal_t *journal, int write_op) | 1335 | static void jbd2_write_superblock(journal_t *journal, int write_op) |
1319 | { | 1336 | { |
1320 | struct buffer_head *bh = journal->j_sb_buffer; | 1337 | struct buffer_head *bh = journal->j_sb_buffer; |
1338 | journal_superblock_t *sb = journal->j_superblock; | ||
1321 | int ret; | 1339 | int ret; |
1322 | 1340 | ||
1323 | trace_jbd2_write_superblock(journal, write_op); | 1341 | trace_jbd2_write_superblock(journal, write_op); |
@@ -1339,6 +1357,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) | |||
1339 | clear_buffer_write_io_error(bh); | 1357 | clear_buffer_write_io_error(bh); |
1340 | set_buffer_uptodate(bh); | 1358 | set_buffer_uptodate(bh); |
1341 | } | 1359 | } |
1360 | jbd2_superblock_csum_set(journal, sb); | ||
1342 | get_bh(bh); | 1361 | get_bh(bh); |
1343 | bh->b_end_io = end_buffer_write_sync; | 1362 | bh->b_end_io = end_buffer_write_sync; |
1344 | ret = submit_bh(write_op, bh); | 1363 | ret = submit_bh(write_op, bh); |
@@ -1435,7 +1454,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) | |||
1435 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | 1454 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", |
1436 | journal->j_errno); | 1455 | journal->j_errno); |
1437 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1456 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1438 | jbd2_superblock_csum_set(journal, sb); | ||
1439 | read_unlock(&journal->j_state_lock); | 1457 | read_unlock(&journal->j_state_lock); |
1440 | 1458 | ||
1441 | jbd2_write_superblock(journal, WRITE_SYNC); | 1459 | jbd2_write_superblock(journal, WRITE_SYNC); |
@@ -2325,13 +2343,13 @@ static struct journal_head *journal_alloc_journal_head(void) | |||
2325 | #ifdef CONFIG_JBD2_DEBUG | 2343 | #ifdef CONFIG_JBD2_DEBUG |
2326 | atomic_inc(&nr_journal_heads); | 2344 | atomic_inc(&nr_journal_heads); |
2327 | #endif | 2345 | #endif |
2328 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2346 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2329 | if (!ret) { | 2347 | if (!ret) { |
2330 | jbd_debug(1, "out of memory for journal_head\n"); | 2348 | jbd_debug(1, "out of memory for journal_head\n"); |
2331 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); | 2349 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); |
2332 | while (!ret) { | 2350 | while (!ret) { |
2333 | yield(); | 2351 | yield(); |
2334 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2352 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2335 | } | 2353 | } |
2336 | } | 2354 | } |
2337 | return ret; | 2355 | return ret; |
@@ -2393,10 +2411,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | |||
2393 | struct journal_head *new_jh = NULL; | 2411 | struct journal_head *new_jh = NULL; |
2394 | 2412 | ||
2395 | repeat: | 2413 | repeat: |
2396 | if (!buffer_jbd(bh)) { | 2414 | if (!buffer_jbd(bh)) |
2397 | new_jh = journal_alloc_journal_head(); | 2415 | new_jh = journal_alloc_journal_head(); |
2398 | memset(new_jh, 0, sizeof(*new_jh)); | ||
2399 | } | ||
2400 | 2416 | ||
2401 | jbd_lock_bh_journal_head(bh); | 2417 | jbd_lock_bh_journal_head(bh); |
2402 | if (buffer_jbd(bh)) { | 2418 | if (buffer_jbd(bh)) { |