diff options
author | Theodore Ts'o <tytso@mit.edu> | 2008-10-16 20:00:24 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-10-16 20:00:24 -0400 |
commit | 3e624fc72fba09b6f999a9fbb87b64efccd38036 (patch) | |
tree | f0a1feac377788b27ea704519a33f367a6d415f9 | |
parent | 22359f5745eb26bd3205a1ede7968c8944398220 (diff) |
ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback
The multiblock allocator needs to be able to release blocks (and issue
a blkdev discard request) when the transaction which freed those
blocks is committed. Previously this was done via a polling mechanism
when blocks are allocated or freed. A much better way of doing things
is to create a jbd2 callback function and attaching the list of blocks
to be freed directly to the transaction structure.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4_sb.h | 3 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 85 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 3 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 3 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 1 | ||||
-rw-r--r-- | include/linux/jbd2.h | 9 |
6 files changed, 29 insertions, 75 deletions
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6a0b40d43264..445fde603df8 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -99,9 +99,6 @@ struct ext4_sb_info { | |||
99 | struct inode *s_buddy_cache; | 99 | struct inode *s_buddy_cache; |
100 | long s_blocks_reserved; | 100 | long s_blocks_reserved; |
101 | spinlock_t s_reserve_lock; | 101 | spinlock_t s_reserve_lock; |
102 | struct list_head s_active_transaction; | ||
103 | struct list_head s_closed_transaction; | ||
104 | struct list_head s_committed_transaction; | ||
105 | spinlock_t s_md_lock; | 102 | spinlock_t s_md_lock; |
106 | tid_t s_last_transaction; | 103 | tid_t s_last_transaction; |
107 | unsigned short *s_mb_offsets, *s_mb_maxs; | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index da1da1fe2b14..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2523,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2523 | } | 2523 | } |
2524 | 2524 | ||
2525 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
2526 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2527 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2528 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2529 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2530 | 2527 | ||
2531 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
@@ -2554,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2554 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
2555 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
2556 | 2553 | ||
2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2555 | |||
2557 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2558 | return 0; | 2557 | return 0; |
2559 | } | 2558 | } |
@@ -2583,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2583 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2584 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2585 | 2584 | ||
2586 | /* release freed, non-committed blocks */ | ||
2587 | spin_lock(&sbi->s_md_lock); | ||
2588 | list_splice_init(&sbi->s_closed_transaction, | ||
2589 | &sbi->s_committed_transaction); | ||
2590 | list_splice_init(&sbi->s_active_transaction, | ||
2591 | &sbi->s_committed_transaction); | ||
2592 | spin_unlock(&sbi->s_md_lock); | ||
2593 | ext4_mb_free_committed_blocks(sb); | ||
2594 | |||
2595 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
2596 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2597 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
@@ -2645,36 +2635,25 @@ int ext4_mb_release(struct super_block *sb) | |||
2645 | return 0; | 2635 | return 0; |
2646 | } | 2636 | } |
2647 | 2637 | ||
2648 | static noinline_for_stack void | 2638 | /* |
2649 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
2640 | * so we know we can free the blocks that were released with that commit. | ||
2641 | */ | ||
2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
2650 | { | 2643 | { |
2644 | struct super_block *sb = journal->j_private; | ||
2651 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
2652 | struct ext4_group_info *db; | 2646 | struct ext4_group_info *db; |
2653 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2654 | int err, count = 0, count2 = 0; | 2647 | int err, count = 0, count2 = 0; |
2655 | struct ext4_free_data *entry; | 2648 | struct ext4_free_data *entry; |
2656 | ext4_fsblk_t discard_block; | 2649 | ext4_fsblk_t discard_block; |
2650 | struct list_head *l, *ltmp; | ||
2657 | 2651 | ||
2658 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2659 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
2660 | |||
2661 | /* there is committed blocks to be freed yet */ | ||
2662 | do { | ||
2663 | /* get next array of blocks */ | ||
2664 | entry = NULL; | ||
2665 | spin_lock(&sbi->s_md_lock); | ||
2666 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2667 | entry = list_entry(sbi->s_committed_transaction.next, | ||
2668 | struct ext4_free_data, list); | ||
2669 | list_del(&entry->list); | ||
2670 | } | ||
2671 | spin_unlock(&sbi->s_md_lock); | ||
2672 | |||
2673 | if (entry == NULL) | ||
2674 | break; | ||
2675 | 2654 | ||
2676 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2677 | entry->count, entry->group, entry); | 2656 | entry->count, entry->group, entry); |
2678 | 2657 | ||
2679 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2680 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
@@ -2706,7 +2685,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2706 | 2685 | ||
2707 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2686 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2708 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
2709 | } while (1); | 2688 | } |
2710 | 2689 | ||
2711 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2712 | } | 2691 | } |
@@ -4348,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4348 | goto out1; | 4327 | goto out1; |
4349 | } | 4328 | } |
4350 | 4329 | ||
4351 | ext4_mb_poll_new_transaction(sb, handle); | ||
4352 | |||
4353 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4354 | if (*errp) { | 4331 | if (*errp) { |
4355 | ar->len = 0; | 4332 | ar->len = 0; |
@@ -4408,36 +4385,6 @@ out1: | |||
4408 | 4385 | ||
4409 | return block; | 4386 | return block; |
4410 | } | 4387 | } |
4411 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4412 | handle_t *handle) | ||
4413 | { | ||
4414 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4415 | |||
4416 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | ||
4417 | return; | ||
4418 | |||
4419 | /* new transaction! time to close last one and free blocks for | ||
4420 | * committed transaction. we know that only transaction can be | ||
4421 | * active, so previos transaction can be being logged and we | ||
4422 | * know that transaction before previous is known to be already | ||
4423 | * logged. this means that now we may free blocks freed in all | ||
4424 | * transactions before previous one. hope I'm clear enough ... */ | ||
4425 | |||
4426 | spin_lock(&sbi->s_md_lock); | ||
4427 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | ||
4428 | mb_debug("new transaction %lu, old %lu\n", | ||
4429 | (unsigned long) handle->h_transaction->t_tid, | ||
4430 | (unsigned long) sbi->s_last_transaction); | ||
4431 | list_splice_init(&sbi->s_closed_transaction, | ||
4432 | &sbi->s_committed_transaction); | ||
4433 | list_splice_init(&sbi->s_active_transaction, | ||
4434 | &sbi->s_closed_transaction); | ||
4435 | sbi->s_last_transaction = handle->h_transaction->t_tid; | ||
4436 | } | ||
4437 | spin_unlock(&sbi->s_md_lock); | ||
4438 | |||
4439 | ext4_mb_free_committed_blocks(sb); | ||
4440 | } | ||
4441 | 4388 | ||
4442 | /* | 4389 | /* |
4443 | * We can merge two free data extents only if the physical blocks | 4390 | * We can merge two free data extents only if the physical blocks |
@@ -4531,9 +4478,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4531 | kmem_cache_free(ext4_free_ext_cachep, entry); | 4478 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4532 | } | 4479 | } |
4533 | } | 4480 | } |
4534 | /* Add the extent to active_transaction list */ | 4481 | /* Add the extent to transaction's private list */ |
4535 | spin_lock(&sbi->s_md_lock); | 4482 | spin_lock(&sbi->s_md_lock); |
4536 | list_add(&new_entry->list, &sbi->s_active_transaction); | 4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); |
4537 | spin_unlock(&sbi->s_md_lock); | 4484 | spin_unlock(&sbi->s_md_lock); |
4538 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
4539 | return 0; | 4486 | return 0; |
@@ -4562,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4562 | 4509 | ||
4563 | *freed = 0; | 4510 | *freed = 0; |
4564 | 4511 | ||
4565 | ext4_mb_poll_new_transaction(sb, handle); | ||
4566 | |||
4567 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
4568 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
4569 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 94cb7b9fe3ee..b5dff1fff1e5 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -269,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | |||
269 | 269 | ||
270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
271 | ext4_group_t group); | 271 | ext4_group_t group); |
272 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
273 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
274 | static void ext4_mb_return_to_preallocation(struct inode *inode, | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
275 | struct ext4_buddy *e4b, sector_t block, | 273 | struct ext4_buddy *e4b, sector_t block, |
276 | int count); | 274 | int count); |
@@ -278,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, | |||
278 | struct super_block *, struct ext4_prealloc_space *pa); | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
279 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
280 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
279 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
281 | 280 | ||
282 | 281 | ||
283 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0abe02c4242a..8b119e16aa36 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -995,6 +995,9 @@ restart_loop: | |||
995 | } | 995 | } |
996 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
997 | 997 | ||
998 | if (journal->j_commit_callback) | ||
999 | journal->j_commit_callback(journal, commit_transaction); | ||
1000 | |||
998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | 1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
999 | journal->j_devname, commit_transaction->t_tid, | 1002 | journal->j_devname, commit_transaction->t_tid, |
1000 | journal->j_tail_sequence); | 1003 | journal->j_tail_sequence); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa9..39b7805a599a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | INIT_LIST_HEAD(&transaction->t_inode_list); | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
55 | INIT_LIST_HEAD(&transaction->t_private_list); | ||
55 | 56 | ||
56 | /* Set up the commit timer for the new transaction. */ | 57 | /* Set up the commit timer for the new transaction. */ |
57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 463d6f10b64f..c7d106ef22e2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -641,6 +641,11 @@ struct transaction_s | |||
641 | */ | 641 | */ |
642 | int t_handle_count; | 642 | int t_handle_count; |
643 | 643 | ||
644 | /* | ||
645 | * For use by the filesystem to store fs-specific data | ||
646 | * structures associated with the transaction | ||
647 | */ | ||
648 | struct list_head t_private_list; | ||
644 | }; | 649 | }; |
645 | 650 | ||
646 | struct transaction_run_stats_s { | 651 | struct transaction_run_stats_s { |
@@ -935,6 +940,10 @@ struct journal_s | |||
935 | 940 | ||
936 | pid_t j_last_sync_writer; | 941 | pid_t j_last_sync_writer; |
937 | 942 | ||
943 | /* This function is called when a transaction is closed */ | ||
944 | void (*j_commit_callback)(journal_t *, | ||
945 | transaction_t *); | ||
946 | |||
938 | /* | 947 | /* |
939 | * Journal statistics | 948 | * Journal statistics |
940 | */ | 949 | */ |