aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-02-17 17:05:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-17 17:05:05 -0500
commit3512a79dbcc90e6edac98717607bd821bba50a14 (patch)
tree9038e3545ccdfec77ec24d982f8cc24701fb532d
parent39a65762d4c48fd8a498f34b7fec74a6b0aebd55 (diff)
parent090542641de833c6f756895fc2f139f046e298f9 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages ext4: Initialize preallocation list_head's properly ext4: Fix lockdep warning ext4: Fix to read empty directory blocks correctly in 64k jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() Revert "ext4: wait on all pending commits in ext4_sync_fs()" jbd2: Fix return value of jbd2_journal_start_commit()
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode.c27
-rw-r--r--fs/ext4/mballoc.c32
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--include/linux/jbd2.h3
9 files changed, 101 insertions, 47 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index aafc9eba1c25..b0c87dce66a3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
868{ 868{
869 unsigned len = le16_to_cpu(dlen); 869 unsigned len = le16_to_cpu(dlen);
870 870
871 if (len == EXT4_MAX_REC_LEN) 871 if (len == EXT4_MAX_REC_LEN || len == 0)
872 return 1 << 16; 872 return 1 << 16;
873 return len; 873 return len;
874} 874}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03ba20be1329..cbd2ca99d113 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,8 +47,10 @@
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 47static inline int ext4_begin_ordered_truncate(struct inode *inode,
48 loff_t new_size) 48 loff_t new_size)
49{ 49{
50 return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, 50 return jbd2_journal_begin_ordered_truncate(
51 new_size); 51 EXT4_SB(inode->i_sb)->s_journal,
52 &EXT4_I(inode)->jinode,
53 new_size);
52} 54}
53 55
54static void ext4_invalidatepage(struct page *page, unsigned long offset); 56static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2437 int no_nrwrite_index_update; 2439 int no_nrwrite_index_update;
2438 int pages_written = 0; 2440 int pages_written = 0;
2439 long pages_skipped; 2441 long pages_skipped;
2442 int range_cyclic, cycled = 1, io_done = 0;
2440 int needed_blocks, ret = 0, nr_to_writebump = 0; 2443 int needed_blocks, ret = 0, nr_to_writebump = 0;
2441 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2444 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2442 2445
@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
2488 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2491 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2489 range_whole = 1; 2492 range_whole = 1;
2490 2493
2491 if (wbc->range_cyclic) 2494 range_cyclic = wbc->range_cyclic;
2495 if (wbc->range_cyclic) {
2492 index = mapping->writeback_index; 2496 index = mapping->writeback_index;
2493 else 2497 if (index)
2498 cycled = 0;
2499 wbc->range_start = index << PAGE_CACHE_SHIFT;
2500 wbc->range_end = LLONG_MAX;
2501 wbc->range_cyclic = 0;
2502 } else
2494 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2503 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2495 2504
2496 mpd.wbc = wbc; 2505 mpd.wbc = wbc;
@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2504 wbc->no_nrwrite_index_update = 1; 2513 wbc->no_nrwrite_index_update = 1;
2505 pages_skipped = wbc->pages_skipped; 2514 pages_skipped = wbc->pages_skipped;
2506 2515
2516retry:
2507 while (!ret && wbc->nr_to_write > 0) { 2517 while (!ret && wbc->nr_to_write > 0) {
2508 2518
2509 /* 2519 /*
@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2546 pages_written += mpd.pages_written; 2556 pages_written += mpd.pages_written;
2547 wbc->pages_skipped = pages_skipped; 2557 wbc->pages_skipped = pages_skipped;
2548 ret = 0; 2558 ret = 0;
2559 io_done = 1;
2549 } else if (wbc->nr_to_write) 2560 } else if (wbc->nr_to_write)
2550 /* 2561 /*
2551 * There is no more writeout needed 2562 * There is no more writeout needed
@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2554 */ 2565 */
2555 break; 2566 break;
2556 } 2567 }
2568 if (!io_done && !cycled) {
2569 cycled = 1;
2570 index = 0;
2571 wbc->range_start = index << PAGE_CACHE_SHIFT;
2572 wbc->range_end = mapping->writeback_index - 1;
2573 goto retry;
2574 }
2557 if (pages_skipped != wbc->pages_skipped) 2575 if (pages_skipped != wbc->pages_skipped)
2558 printk(KERN_EMERG "This should not happen leaving %s " 2576 printk(KERN_EMERG "This should not happen leaving %s "
2559 "with nr_to_write = %ld ret = %d\n", 2577 "with nr_to_write = %ld ret = %d\n",
@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2561 2579
2562 /* Update index */ 2580 /* Update index */
2563 index += pages_written; 2581 index += pages_written;
2582 wbc->range_cyclic = range_cyclic;
2564 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2583 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2565 /* 2584 /*
2566 * set the writeback_index so that range_cyclic 2585 * set the writeback_index so that range_cyclic
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index deba54f6cbed..4415beeb0b62 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3693 pa->pa_free = pa->pa_len; 3693 pa->pa_free = pa->pa_len;
3694 atomic_set(&pa->pa_count, 1); 3694 atomic_set(&pa->pa_count, 1);
3695 spin_lock_init(&pa->pa_lock); 3695 spin_lock_init(&pa->pa_lock);
3696 INIT_LIST_HEAD(&pa->pa_inode_list);
3697 INIT_LIST_HEAD(&pa->pa_group_list);
3696 pa->pa_deleted = 0; 3698 pa->pa_deleted = 0;
3697 pa->pa_linear = 0; 3699 pa->pa_linear = 0;
3698 3700
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3755 atomic_set(&pa->pa_count, 1); 3757 atomic_set(&pa->pa_count, 1);
3756 spin_lock_init(&pa->pa_lock); 3758 spin_lock_init(&pa->pa_lock);
3757 INIT_LIST_HEAD(&pa->pa_inode_list); 3759 INIT_LIST_HEAD(&pa->pa_inode_list);
3760 INIT_LIST_HEAD(&pa->pa_group_list);
3758 pa->pa_deleted = 0; 3761 pa->pa_deleted = 0;
3759 pa->pa_linear = 1; 3762 pa->pa_linear = 1;
3760 3763
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4476 pa->pa_free -= ac->ac_b_ex.fe_len; 4479 pa->pa_free -= ac->ac_b_ex.fe_len;
4477 pa->pa_len -= ac->ac_b_ex.fe_len; 4480 pa->pa_len -= ac->ac_b_ex.fe_len;
4478 spin_unlock(&pa->pa_lock); 4481 spin_unlock(&pa->pa_lock);
4479 /*
4480 * We want to add the pa to the right bucket.
4481 * Remove it from the list and while adding
4482 * make sure the list to which we are adding
4483 * doesn't grow big.
4484 */
4485 if (likely(pa->pa_free)) {
4486 spin_lock(pa->pa_obj_lock);
4487 list_del_rcu(&pa->pa_inode_list);
4488 spin_unlock(pa->pa_obj_lock);
4489 ext4_mb_add_n_trim(ac);
4490 }
4491 } 4482 }
4492 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4493 } 4483 }
4494 if (ac->alloc_semp) 4484 if (ac->alloc_semp)
4495 up_read(ac->alloc_semp); 4485 up_read(ac->alloc_semp);
4486 if (pa) {
4487 /*
4488 * We want to add the pa to the right bucket.
4489 * Remove it from the list and while adding
4490 * make sure the list to which we are adding
4491 * doesn't grow big. We need to release
4492 * alloc_semp before calling ext4_mb_add_n_trim()
4493 */
4494 if (pa->pa_linear && likely(pa->pa_free)) {
4495 spin_lock(pa->pa_obj_lock);
4496 list_del_rcu(&pa->pa_inode_list);
4497 spin_unlock(pa->pa_obj_lock);
4498 ext4_mb_add_n_trim(ac);
4499 }
4500 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4501 }
4496 if (ac->ac_bitmap_page) 4502 if (ac->ac_bitmap_page)
4497 page_cache_release(ac->ac_bitmap_page); 4503 page_cache_release(ac->ac_bitmap_page);
4498 if (ac->ac_buddy_page) 4504 if (ac->ac_buddy_page)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 734abca25e35..fe64d9f79852 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
481 + 1); 481 + 1);
482 if (IS_ERR(handle)) { 482 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 483 retval = PTR_ERR(handle);
484 goto err_out; 484 return retval;
485 } 485 }
486 tmp_inode = ext4_new_inode(handle, 486 tmp_inode = ext4_new_inode(handle,
487 inode->i_sb->s_root->d_inode, 487 inode->i_sb->s_root->d_inode,
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
489 if (IS_ERR(tmp_inode)) { 489 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 490 retval = -ENOMEM;
491 ext4_journal_stop(handle); 491 ext4_journal_stop(handle);
492 tmp_inode = NULL; 492 return retval;
493 goto err_out;
494 } 493 }
495 i_size_write(tmp_inode, i_size_read(inode)); 494 i_size_write(tmp_inode, i_size_read(inode));
496 /* 495 /*
@@ -618,8 +617,7 @@ err_out:
618 617
619 ext4_journal_stop(handle); 618 ext4_journal_stop(handle);
620 619
621 if (tmp_inode) 620 iput(tmp_inode);
622 iput(tmp_inode);
623 621
624 return retval; 622 return retval;
625} 623}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5f06a5f045e..a5732c58f676 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
3046static int ext4_sync_fs(struct super_block *sb, int wait) 3046static int ext4_sync_fs(struct super_block *sb, int wait)
3047{ 3047{
3048 int ret = 0; 3048 int ret = 0;
3049 tid_t target;
3049 3050
3050 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3051 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3051 sb->s_dirt = 0; 3052 sb->s_dirt = 0;
3052 if (EXT4_SB(sb)->s_journal) { 3053 if (EXT4_SB(sb)->s_journal) {
3053 if (wait) 3054 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
3054 ret = ext4_force_commit(sb); 3055 &target)) {
3055 else 3056 if (wait)
3056 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); 3057 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3058 target);
3059 }
3057 } else { 3060 } else {
3058 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); 3061 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3059 } 3062 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008eded..58144102bf25 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
450} 450}
451 451
452/* 452/*
453 * Called under j_state_lock. Returns true if a transaction was started. 453 * Called under j_state_lock. Returns true if a transaction commit was started.
454 */ 454 */
455int __jbd2_log_start_commit(journal_t *journal, tid_t target) 455int __jbd2_log_start_commit(journal_t *journal, tid_t target)
456{ 456{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 518
519/* 519/*
520 * Start a commit of the current running transaction (if any). Returns true 520 * Start a commit of the current running transaction (if any). Returns true
521 * if a transaction was started, and fills its tid in at *ptid 521 * if a transaction is going to be committed (or is currently already
522 * committing), and fills its tid in at *ptid
522 */ 523 */
523int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 524int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
524{ 525{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
528 if (journal->j_running_transaction) { 529 if (journal->j_running_transaction) {
529 tid_t tid = journal->j_running_transaction->t_tid; 530 tid_t tid = journal->j_running_transaction->t_tid;
530 531
531 ret = __jbd2_log_start_commit(journal, tid); 532 __jbd2_log_start_commit(journal, tid);
532 if (ret && ptid) 533 /* There's a running transaction and we've just made sure
534 * it's commit has been scheduled. */
535 if (ptid)
533 *ptid = tid; 536 *ptid = tid;
534 } else if (journal->j_committing_transaction && ptid) { 537 ret = 1;
538 } else if (journal->j_committing_transaction) {
535 /* 539 /*
536 * If ext3_write_super() recently started a commit, then we 540 * If ext3_write_super() recently started a commit, then we
537 * have to wait for completion of that transaction 541 * have to wait for completion of that transaction
538 */ 542 */
539 *ptid = journal->j_committing_transaction->t_tid; 543 if (ptid)
544 *ptid = journal->j_committing_transaction->t_tid;
540 ret = 1; 545 ret = 1;
541 } 546 }
542 spin_unlock(&journal->j_state_lock); 547 spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7d..28ce21d8598e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
2129} 2129}
2130 2130
2131/* 2131/*
2132 * This function must be called when inode is journaled in ordered mode 2132 * File truncate and transaction commit interact with each other in a
2133 * before truncation happens. It starts writeout of truncated part in 2133 * non-trivial way. If a transaction writing data block A is
2134 * case it is in the committing transaction so that we stand to ordered 2134 * committing, we cannot discard the data by truncate until we have
2135 * mode consistency guarantees. 2135 * written them. Otherwise if we crashed after the transaction with
2136 * write has committed but before the transaction with truncate has
2137 * committed, we could see stale data in block A. This function is a
2138 * helper to solve this problem. It starts writeout of the truncated
2139 * part in case it is in the committing transaction.
2140 *
2141 * Filesystem code must call this function when inode is journaled in
2142 * ordered mode before truncation happens and after the inode has been
2143 * placed on orphan list with the new inode size. The second condition
2144 * avoids the race that someone writes new data and we start
2145 * committing the transaction after this function has been called but
2146 * before a transaction for truncate is started (and furthermore it
2147 * allows us to optimize the case where the addition to orphan list
2148 * happens in the same transaction as write --- we don't have to write
2149 * any data in such case).
2136 */ 2150 */
2137int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, 2151int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2152 struct jbd2_inode *jinode,
2138 loff_t new_size) 2153 loff_t new_size)
2139{ 2154{
2140 journal_t *journal; 2155 transaction_t *inode_trans, *commit_trans;
2141 transaction_t *commit_trans;
2142 int ret = 0; 2156 int ret = 0;
2143 2157
2144 if (!inode->i_transaction && !inode->i_next_transaction) 2158 /* This is a quick check to avoid locking if not necessary */
2159 if (!jinode->i_transaction)
2145 goto out; 2160 goto out;
2146 journal = inode->i_transaction->t_journal; 2161 /* Locks are here just to force reading of recent values, it is
2162 * enough that the transaction was not committing before we started
2163 * a transaction adding the inode to orphan list */
2147 spin_lock(&journal->j_state_lock); 2164 spin_lock(&journal->j_state_lock);
2148 commit_trans = journal->j_committing_transaction; 2165 commit_trans = journal->j_committing_transaction;
2149 spin_unlock(&journal->j_state_lock); 2166 spin_unlock(&journal->j_state_lock);
2150 if (inode->i_transaction == commit_trans) { 2167 spin_lock(&journal->j_list_lock);
2151 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, 2168 inode_trans = jinode->i_transaction;
2169 spin_unlock(&journal->j_list_lock);
2170 if (inode_trans == commit_trans) {
2171 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2152 new_size, LLONG_MAX); 2172 new_size, LLONG_MAX);
2153 if (ret) 2173 if (ret)
2154 jbd2_journal_abort(journal, ret); 2174 jbd2_journal_abort(journal, ret);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307c..172850a9a12a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
513static inline int ocfs2_begin_ordered_truncate(struct inode *inode, 513static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
514 loff_t new_size) 514 loff_t new_size)
515{ 515{
516 return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, 516 return jbd2_journal_begin_ordered_truncate(
517 new_size); 517 OCFS2_SB(inode->i_sb)->journal->j_journal,
518 &OCFS2_I(inode)->ip_jinode,
519 new_size);
518} 520}
519 521
520#endif /* OCFS2_JOURNAL_H */ 522#endif /* OCFS2_JOURNAL_H */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index b28b37eb11c6..4d248b3f1323 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *);
1150extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); 1150extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
1151extern int jbd2_journal_force_commit(journal_t *); 1151extern int jbd2_journal_force_commit(journal_t *);
1152extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); 1152extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
1153extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); 1153extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
1154 struct jbd2_inode *inode, loff_t new_size);
1154extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); 1155extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
1155extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); 1156extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
1156 1157