aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <mason@suse.com>2006-09-29 04:59:56 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 12:18:11 -0400
commita3172027148120b8f8797cbecc7d0a0b215736a1 (patch)
tree9da7b5eafe136c8c5b9e76e9a9cccd70899df252
parent25736b1c692d436508585d1d710912e6f76be2d8 (diff)
[PATCH] Fix reiserfs latencies caused by data=ordered
ReiserFS does periodic cleanup of old transactions in order to limit the length of time a journal replay may take after a crash. Sometimes, writing metadata from an old (already committed) transaction may require committing a newer transaction, which also requires writing all data=ordered buffers. This can cause very long stalls on journal_begin. This patch makes sure new transactions will not need to be committed before trying a periodic reclaim of an old transaction. It is low risk because if a bad decision is made, it just means a slightly longer journal replay after a crash. Signed-off-by: Chris Mason <mason@suse.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/reiserfs/journal.c54
1 files changed, 43 insertions, 11 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9b3672d6936..e6b5ccf23f1 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1186 return NULL; 1186 return NULL;
1187} 1187}
1188 1188
1189static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1190{
1191 struct super_block *sb = cn->sb;
1192 b_blocknr_t blocknr = cn->blocknr;
1193
1194 cn = cn->hprev;
1195 while (cn) {
1196 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1197 atomic_read(&cn->jlist->j_commit_left) != 0)
1198 return 0;
1199 cn = cn->hprev;
1200 }
1201 return 1;
1202}
1203
1189static void remove_journal_hash(struct super_block *, 1204static void remove_journal_hash(struct super_block *,
1190 struct reiserfs_journal_cnode **, 1205 struct reiserfs_journal_cnode **,
1191 struct reiserfs_journal_list *, unsigned long, 1206 struct reiserfs_journal_list *, unsigned long,
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s,
1604 return err; 1619 return err;
1605} 1620}
1606 1621
1622static int test_transaction(struct super_block *s,
1623 struct reiserfs_journal_list *jl)
1624{
1625 struct reiserfs_journal_cnode *cn;
1626
1627 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1628 return 1;
1629
1630 cn = jl->j_realblock;
1631 while (cn) {
1632 /* if the blocknr == 0, this has been cleared from the hash,
1633 ** skip it
1634 */
1635 if (cn->blocknr == 0) {
1636 goto next;
1637 }
1638 if (cn->bh && !newer_jl_done(cn))
1639 return 0;
1640 next:
1641 cn = cn->next;
1642 cond_resched();
1643 }
1644 return 0;
1645}
1646
1607static int write_one_transaction(struct super_block *s, 1647static int write_one_transaction(struct super_block *s,
1608 struct reiserfs_journal_list *jl, 1648 struct reiserfs_journal_list *jl,
1609 struct buffer_chunk *chunk) 1649 struct buffer_chunk *chunk)
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p)
3433 flush_commit_list(p_s_sb, jl, 1); 3473 flush_commit_list(p_s_sb, jl, 1);
3434 } 3474 }
3435 unlock_kernel(); 3475 unlock_kernel();
3436 /*
3437 * this is a little racey, but there's no harm in missing
3438 * the filemap_fdata_write
3439 */
3440 if (!atomic_read(&journal->j_async_throttle)
3441 && !reiserfs_is_journal_aborted(journal)) {
3442 atomic_inc(&journal->j_async_throttle);
3443 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
3444 atomic_dec(&journal->j_async_throttle);
3445 }
3446} 3476}
3447 3477
3448/* 3478/*
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s)
3844 entry = journal->j_journal_list.next; 3874 entry = journal->j_journal_list.next;
3845 jl = JOURNAL_LIST_ENTRY(entry); 3875 jl = JOURNAL_LIST_ENTRY(entry);
3846 /* this check should always be run, to send old lists to disk */ 3876 /* this check should always be run, to send old lists to disk */
3847 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { 3877 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3878 atomic_read(&jl->j_commit_left) == 0 &&
3879 test_transaction(s, jl)) {
3848 flush_used_journal_lists(s, jl); 3880 flush_used_journal_lists(s, jl);
3849 } else { 3881 } else {
3850 break; 3882 break;