diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-02-17 17:05:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-02-17 17:05:05 -0500 |
commit | 3512a79dbcc90e6edac98717607bd821bba50a14 (patch) | |
tree | 9038e3545ccdfec77ec24d982f8cc24701fb532d /fs | |
parent | 39a65762d4c48fd8a498f34b7fec74a6b0aebd55 (diff) | |
parent | 090542641de833c6f756895fc2f139f046e298f9 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling
ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
ext4: Initialize preallocation list_head's properly
ext4: Fix lockdep warning
ext4: Fix to read empty directory blocks correctly in 64k
jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()
Revert "ext4: wait on all pending commits in ext4_sync_fs()"
jbd2: Fix return value of jbd2_journal_start_commit()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 27 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 32 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 8 | ||||
-rw-r--r-- | fs/ext4/super.c | 11 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 17 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 6 |
8 files changed, 99 insertions, 46 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index aafc9eba1c25..b0c87dce66a3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen) | |||
868 | { | 868 | { |
869 | unsigned len = le16_to_cpu(dlen); | 869 | unsigned len = le16_to_cpu(dlen); |
870 | 870 | ||
871 | if (len == EXT4_MAX_REC_LEN) | 871 | if (len == EXT4_MAX_REC_LEN || len == 0) |
872 | return 1 << 16; | 872 | return 1 << 16; |
873 | return len; | 873 | return len; |
874 | } | 874 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 03ba20be1329..cbd2ca99d113 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -47,8 +47,10 @@ | |||
47 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 47 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
48 | loff_t new_size) | 48 | loff_t new_size) |
49 | { | 49 | { |
50 | return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | 50 | return jbd2_journal_begin_ordered_truncate( |
51 | new_size); | 51 | EXT4_SB(inode->i_sb)->s_journal, |
52 | &EXT4_I(inode)->jinode, | ||
53 | new_size); | ||
52 | } | 54 | } |
53 | 55 | ||
54 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 56 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2437 | int no_nrwrite_index_update; | 2439 | int no_nrwrite_index_update; |
2438 | int pages_written = 0; | 2440 | int pages_written = 0; |
2439 | long pages_skipped; | 2441 | long pages_skipped; |
2442 | int range_cyclic, cycled = 1, io_done = 0; | ||
2440 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2443 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2441 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2444 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2442 | 2445 | ||
@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2488 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2491 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2489 | range_whole = 1; | 2492 | range_whole = 1; |
2490 | 2493 | ||
2491 | if (wbc->range_cyclic) | 2494 | range_cyclic = wbc->range_cyclic; |
2495 | if (wbc->range_cyclic) { | ||
2492 | index = mapping->writeback_index; | 2496 | index = mapping->writeback_index; |
2493 | else | 2497 | if (index) |
2498 | cycled = 0; | ||
2499 | wbc->range_start = index << PAGE_CACHE_SHIFT; | ||
2500 | wbc->range_end = LLONG_MAX; | ||
2501 | wbc->range_cyclic = 0; | ||
2502 | } else | ||
2494 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2503 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2495 | 2504 | ||
2496 | mpd.wbc = wbc; | 2505 | mpd.wbc = wbc; |
@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2504 | wbc->no_nrwrite_index_update = 1; | 2513 | wbc->no_nrwrite_index_update = 1; |
2505 | pages_skipped = wbc->pages_skipped; | 2514 | pages_skipped = wbc->pages_skipped; |
2506 | 2515 | ||
2516 | retry: | ||
2507 | while (!ret && wbc->nr_to_write > 0) { | 2517 | while (!ret && wbc->nr_to_write > 0) { |
2508 | 2518 | ||
2509 | /* | 2519 | /* |
@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2546 | pages_written += mpd.pages_written; | 2556 | pages_written += mpd.pages_written; |
2547 | wbc->pages_skipped = pages_skipped; | 2557 | wbc->pages_skipped = pages_skipped; |
2548 | ret = 0; | 2558 | ret = 0; |
2559 | io_done = 1; | ||
2549 | } else if (wbc->nr_to_write) | 2560 | } else if (wbc->nr_to_write) |
2550 | /* | 2561 | /* |
2551 | * There is no more writeout needed | 2562 | * There is no more writeout needed |
@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2554 | */ | 2565 | */ |
2555 | break; | 2566 | break; |
2556 | } | 2567 | } |
2568 | if (!io_done && !cycled) { | ||
2569 | cycled = 1; | ||
2570 | index = 0; | ||
2571 | wbc->range_start = index << PAGE_CACHE_SHIFT; | ||
2572 | wbc->range_end = mapping->writeback_index - 1; | ||
2573 | goto retry; | ||
2574 | } | ||
2557 | if (pages_skipped != wbc->pages_skipped) | 2575 | if (pages_skipped != wbc->pages_skipped) |
2558 | printk(KERN_EMERG "This should not happen leaving %s " | 2576 | printk(KERN_EMERG "This should not happen leaving %s " |
2559 | "with nr_to_write = %ld ret = %d\n", | 2577 | "with nr_to_write = %ld ret = %d\n", |
@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2561 | 2579 | ||
2562 | /* Update index */ | 2580 | /* Update index */ |
2563 | index += pages_written; | 2581 | index += pages_written; |
2582 | wbc->range_cyclic = range_cyclic; | ||
2564 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2583 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2565 | /* | 2584 | /* |
2566 | * set the writeback_index so that range_cyclic | 2585 | * set the writeback_index so that range_cyclic |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index deba54f6cbed..4415beeb0b62 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3693 | pa->pa_free = pa->pa_len; | 3693 | pa->pa_free = pa->pa_len; |
3694 | atomic_set(&pa->pa_count, 1); | 3694 | atomic_set(&pa->pa_count, 1); |
3695 | spin_lock_init(&pa->pa_lock); | 3695 | spin_lock_init(&pa->pa_lock); |
3696 | INIT_LIST_HEAD(&pa->pa_inode_list); | ||
3697 | INIT_LIST_HEAD(&pa->pa_group_list); | ||
3696 | pa->pa_deleted = 0; | 3698 | pa->pa_deleted = 0; |
3697 | pa->pa_linear = 0; | 3699 | pa->pa_linear = 0; |
3698 | 3700 | ||
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3755 | atomic_set(&pa->pa_count, 1); | 3757 | atomic_set(&pa->pa_count, 1); |
3756 | spin_lock_init(&pa->pa_lock); | 3758 | spin_lock_init(&pa->pa_lock); |
3757 | INIT_LIST_HEAD(&pa->pa_inode_list); | 3759 | INIT_LIST_HEAD(&pa->pa_inode_list); |
3760 | INIT_LIST_HEAD(&pa->pa_group_list); | ||
3758 | pa->pa_deleted = 0; | 3761 | pa->pa_deleted = 0; |
3759 | pa->pa_linear = 1; | 3762 | pa->pa_linear = 1; |
3760 | 3763 | ||
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4476 | pa->pa_free -= ac->ac_b_ex.fe_len; | 4479 | pa->pa_free -= ac->ac_b_ex.fe_len; |
4477 | pa->pa_len -= ac->ac_b_ex.fe_len; | 4480 | pa->pa_len -= ac->ac_b_ex.fe_len; |
4478 | spin_unlock(&pa->pa_lock); | 4481 | spin_unlock(&pa->pa_lock); |
4479 | /* | ||
4480 | * We want to add the pa to the right bucket. | ||
4481 | * Remove it from the list and while adding | ||
4482 | * make sure the list to which we are adding | ||
4483 | * doesn't grow big. | ||
4484 | */ | ||
4485 | if (likely(pa->pa_free)) { | ||
4486 | spin_lock(pa->pa_obj_lock); | ||
4487 | list_del_rcu(&pa->pa_inode_list); | ||
4488 | spin_unlock(pa->pa_obj_lock); | ||
4489 | ext4_mb_add_n_trim(ac); | ||
4490 | } | ||
4491 | } | 4482 | } |
4492 | ext4_mb_put_pa(ac, ac->ac_sb, pa); | ||
4493 | } | 4483 | } |
4494 | if (ac->alloc_semp) | 4484 | if (ac->alloc_semp) |
4495 | up_read(ac->alloc_semp); | 4485 | up_read(ac->alloc_semp); |
4486 | if (pa) { | ||
4487 | /* | ||
4488 | * We want to add the pa to the right bucket. | ||
4489 | * Remove it from the list and while adding | ||
4490 | * make sure the list to which we are adding | ||
4491 | * doesn't grow big. We need to release | ||
4492 | * alloc_semp before calling ext4_mb_add_n_trim() | ||
4493 | */ | ||
4494 | if (pa->pa_linear && likely(pa->pa_free)) { | ||
4495 | spin_lock(pa->pa_obj_lock); | ||
4496 | list_del_rcu(&pa->pa_inode_list); | ||
4497 | spin_unlock(pa->pa_obj_lock); | ||
4498 | ext4_mb_add_n_trim(ac); | ||
4499 | } | ||
4500 | ext4_mb_put_pa(ac, ac->ac_sb, pa); | ||
4501 | } | ||
4496 | if (ac->ac_bitmap_page) | 4502 | if (ac->ac_bitmap_page) |
4497 | page_cache_release(ac->ac_bitmap_page); | 4503 | page_cache_release(ac->ac_bitmap_page); |
4498 | if (ac->ac_buddy_page) | 4504 | if (ac->ac_buddy_page) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 734abca25e35..fe64d9f79852 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
481 | + 1); | 481 | + 1); |
482 | if (IS_ERR(handle)) { | 482 | if (IS_ERR(handle)) { |
483 | retval = PTR_ERR(handle); | 483 | retval = PTR_ERR(handle); |
484 | goto err_out; | 484 | return retval; |
485 | } | 485 | } |
486 | tmp_inode = ext4_new_inode(handle, | 486 | tmp_inode = ext4_new_inode(handle, |
487 | inode->i_sb->s_root->d_inode, | 487 | inode->i_sb->s_root->d_inode, |
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
489 | if (IS_ERR(tmp_inode)) { | 489 | if (IS_ERR(tmp_inode)) { |
490 | retval = -ENOMEM; | 490 | retval = -ENOMEM; |
491 | ext4_journal_stop(handle); | 491 | ext4_journal_stop(handle); |
492 | tmp_inode = NULL; | 492 | return retval; |
493 | goto err_out; | ||
494 | } | 493 | } |
495 | i_size_write(tmp_inode, i_size_read(inode)); | 494 | i_size_write(tmp_inode, i_size_read(inode)); |
496 | /* | 495 | /* |
@@ -618,8 +617,7 @@ err_out: | |||
618 | 617 | ||
619 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
620 | 619 | ||
621 | if (tmp_inode) | 620 | iput(tmp_inode); |
622 | iput(tmp_inode); | ||
623 | 621 | ||
624 | return retval; | 622 | return retval; |
625 | } | 623 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5f06a5f045e..a5732c58f676 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb) | |||
3046 | static int ext4_sync_fs(struct super_block *sb, int wait) | 3046 | static int ext4_sync_fs(struct super_block *sb, int wait) |
3047 | { | 3047 | { |
3048 | int ret = 0; | 3048 | int ret = 0; |
3049 | tid_t target; | ||
3049 | 3050 | ||
3050 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | 3051 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); |
3051 | sb->s_dirt = 0; | 3052 | sb->s_dirt = 0; |
3052 | if (EXT4_SB(sb)->s_journal) { | 3053 | if (EXT4_SB(sb)->s_journal) { |
3053 | if (wait) | 3054 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, |
3054 | ret = ext4_force_commit(sb); | 3055 | &target)) { |
3055 | else | 3056 | if (wait) |
3056 | jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); | 3057 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, |
3058 | target); | ||
3059 | } | ||
3057 | } else { | 3060 | } else { |
3058 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); | 3061 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); |
3059 | } | 3062 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb343008eded..58144102bf25 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal) | |||
450 | } | 450 | } |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Called under j_state_lock. Returns true if a transaction was started. | 453 | * Called under j_state_lock. Returns true if a transaction commit was started. |
454 | */ | 454 | */ |
455 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) | 455 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) |
456 | { | 456 | { |
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
518 | 518 | ||
519 | /* | 519 | /* |
520 | * Start a commit of the current running transaction (if any). Returns true | 520 | * Start a commit of the current running transaction (if any). Returns true |
521 | * if a transaction was started, and fills its tid in at *ptid | 521 | * if a transaction is going to be committed (or is currently already |
522 | * committing), and fills its tid in at *ptid | ||
522 | */ | 523 | */ |
523 | int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | 524 | int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) |
524 | { | 525 | { |
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |||
528 | if (journal->j_running_transaction) { | 529 | if (journal->j_running_transaction) { |
529 | tid_t tid = journal->j_running_transaction->t_tid; | 530 | tid_t tid = journal->j_running_transaction->t_tid; |
530 | 531 | ||
531 | ret = __jbd2_log_start_commit(journal, tid); | 532 | __jbd2_log_start_commit(journal, tid); |
532 | if (ret && ptid) | 533 | /* There's a running transaction and we've just made sure |
534 | * it's commit has been scheduled. */ | ||
535 | if (ptid) | ||
533 | *ptid = tid; | 536 | *ptid = tid; |
534 | } else if (journal->j_committing_transaction && ptid) { | 537 | ret = 1; |
538 | } else if (journal->j_committing_transaction) { | ||
535 | /* | 539 | /* |
536 | * If ext3_write_super() recently started a commit, then we | 540 | * If ext3_write_super() recently started a commit, then we |
537 | * have to wait for completion of that transaction | 541 | * have to wait for completion of that transaction |
538 | */ | 542 | */ |
539 | *ptid = journal->j_committing_transaction->t_tid; | 543 | if (ptid) |
544 | *ptid = journal->j_committing_transaction->t_tid; | ||
540 | ret = 1; | 545 | ret = 1; |
541 | } | 546 | } |
542 | spin_unlock(&journal->j_state_lock); | 547 | spin_unlock(&journal->j_state_lock); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 46b4e347ed7d..28ce21d8598e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -2129,26 +2129,46 @@ done: | |||
2129 | } | 2129 | } |
2130 | 2130 | ||
2131 | /* | 2131 | /* |
2132 | * This function must be called when inode is journaled in ordered mode | 2132 | * File truncate and transaction commit interact with each other in a |
2133 | * before truncation happens. It starts writeout of truncated part in | 2133 | * non-trivial way. If a transaction writing data block A is |
2134 | * case it is in the committing transaction so that we stand to ordered | 2134 | * committing, we cannot discard the data by truncate until we have |
2135 | * mode consistency guarantees. | 2135 | * written them. Otherwise if we crashed after the transaction with |
2136 | * write has committed but before the transaction with truncate has | ||
2137 | * committed, we could see stale data in block A. This function is a | ||
2138 | * helper to solve this problem. It starts writeout of the truncated | ||
2139 | * part in case it is in the committing transaction. | ||
2140 | * | ||
2141 | * Filesystem code must call this function when inode is journaled in | ||
2142 | * ordered mode before truncation happens and after the inode has been | ||
2143 | * placed on orphan list with the new inode size. The second condition | ||
2144 | * avoids the race that someone writes new data and we start | ||
2145 | * committing the transaction after this function has been called but | ||
2146 | * before a transaction for truncate is started (and furthermore it | ||
2147 | * allows us to optimize the case where the addition to orphan list | ||
2148 | * happens in the same transaction as write --- we don't have to write | ||
2149 | * any data in such case). | ||
2136 | */ | 2150 | */ |
2137 | int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | 2151 | int jbd2_journal_begin_ordered_truncate(journal_t *journal, |
2152 | struct jbd2_inode *jinode, | ||
2138 | loff_t new_size) | 2153 | loff_t new_size) |
2139 | { | 2154 | { |
2140 | journal_t *journal; | 2155 | transaction_t *inode_trans, *commit_trans; |
2141 | transaction_t *commit_trans; | ||
2142 | int ret = 0; | 2156 | int ret = 0; |
2143 | 2157 | ||
2144 | if (!inode->i_transaction && !inode->i_next_transaction) | 2158 | /* This is a quick check to avoid locking if not necessary */ |
2159 | if (!jinode->i_transaction) | ||
2145 | goto out; | 2160 | goto out; |
2146 | journal = inode->i_transaction->t_journal; | 2161 | /* Locks are here just to force reading of recent values, it is |
2162 | * enough that the transaction was not committing before we started | ||
2163 | * a transaction adding the inode to orphan list */ | ||
2147 | spin_lock(&journal->j_state_lock); | 2164 | spin_lock(&journal->j_state_lock); |
2148 | commit_trans = journal->j_committing_transaction; | 2165 | commit_trans = journal->j_committing_transaction; |
2149 | spin_unlock(&journal->j_state_lock); | 2166 | spin_unlock(&journal->j_state_lock); |
2150 | if (inode->i_transaction == commit_trans) { | 2167 | spin_lock(&journal->j_list_lock); |
2151 | ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | 2168 | inode_trans = jinode->i_transaction; |
2169 | spin_unlock(&journal->j_list_lock); | ||
2170 | if (inode_trans == commit_trans) { | ||
2171 | ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, | ||
2152 | new_size, LLONG_MAX); | 2172 | new_size, LLONG_MAX); |
2153 | if (ret) | 2173 | if (ret) |
2154 | jbd2_journal_abort(journal, ret); | 2174 | jbd2_journal_abort(journal, ret); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3c3532e1307c..172850a9a12a 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) | |||
513 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | 513 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, |
514 | loff_t new_size) | 514 | loff_t new_size) |
515 | { | 515 | { |
516 | return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, | 516 | return jbd2_journal_begin_ordered_truncate( |
517 | new_size); | 517 | OCFS2_SB(inode->i_sb)->journal->j_journal, |
518 | &OCFS2_I(inode)->ip_jinode, | ||
519 | new_size); | ||
518 | } | 520 | } |
519 | 521 | ||
520 | #endif /* OCFS2_JOURNAL_H */ | 522 | #endif /* OCFS2_JOURNAL_H */ |