diff options
author | Chris Mason <mason@suse.com> | 2006-09-29 04:59:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-29 12:18:11 -0400 |
commit | a3172027148120b8f8797cbecc7d0a0b215736a1 (patch) | |
tree | 9da7b5eafe136c8c5b9e76e9a9cccd70899df252 | |
parent | 25736b1c692d436508585d1d710912e6f76be2d8 (diff) |
[PATCH] Fix reiserfs latencies caused by data=ordered
ReiserFS does periodic cleanup of old transactions in order to limit the
length of time a journal replay may take after a crash. Sometimes, writing
metadata from an old (already committed) transaction may require committing
a newer transaction, which also requires writing all data=ordered buffers.
This can cause very long stalls on journal_begin.
This patch makes sure new transactions will not need to be committed before
trying a periodic reclaim of an old transaction. It is low risk because if
a bad decision is made, it just means a slightly longer journal replay
after a crash.
Signed-off-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/reiserfs/journal.c | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 9b3672d6936..e6b5ccf23f1 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct | |||
1186 | return NULL; | 1186 | return NULL; |
1187 | } | 1187 | } |
1188 | 1188 | ||
1189 | static int newer_jl_done(struct reiserfs_journal_cnode *cn) | ||
1190 | { | ||
1191 | struct super_block *sb = cn->sb; | ||
1192 | b_blocknr_t blocknr = cn->blocknr; | ||
1193 | |||
1194 | cn = cn->hprev; | ||
1195 | while (cn) { | ||
1196 | if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && | ||
1197 | atomic_read(&cn->jlist->j_commit_left) != 0) | ||
1198 | return 0; | ||
1199 | cn = cn->hprev; | ||
1200 | } | ||
1201 | return 1; | ||
1202 | } | ||
1203 | |||
1189 | static void remove_journal_hash(struct super_block *, | 1204 | static void remove_journal_hash(struct super_block *, |
1190 | struct reiserfs_journal_cnode **, | 1205 | struct reiserfs_journal_cnode **, |
1191 | struct reiserfs_journal_list *, unsigned long, | 1206 | struct reiserfs_journal_list *, unsigned long, |
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s, | |||
1604 | return err; | 1619 | return err; |
1605 | } | 1620 | } |
1606 | 1621 | ||
1622 | static int test_transaction(struct super_block *s, | ||
1623 | struct reiserfs_journal_list *jl) | ||
1624 | { | ||
1625 | struct reiserfs_journal_cnode *cn; | ||
1626 | |||
1627 | if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) | ||
1628 | return 1; | ||
1629 | |||
1630 | cn = jl->j_realblock; | ||
1631 | while (cn) { | ||
1632 | /* if the blocknr == 0, this has been cleared from the hash, | ||
1633 | ** skip it | ||
1634 | */ | ||
1635 | if (cn->blocknr == 0) { | ||
1636 | goto next; | ||
1637 | } | ||
1638 | if (cn->bh && !newer_jl_done(cn)) | ||
1639 | return 0; | ||
1640 | next: | ||
1641 | cn = cn->next; | ||
1642 | cond_resched(); | ||
1643 | } | ||
1644 | return 0; | ||
1645 | } | ||
1646 | |||
1607 | static int write_one_transaction(struct super_block *s, | 1647 | static int write_one_transaction(struct super_block *s, |
1608 | struct reiserfs_journal_list *jl, | 1648 | struct reiserfs_journal_list *jl, |
1609 | struct buffer_chunk *chunk) | 1649 | struct buffer_chunk *chunk) |
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p) | |||
3433 | flush_commit_list(p_s_sb, jl, 1); | 3473 | flush_commit_list(p_s_sb, jl, 1); |
3434 | } | 3474 | } |
3435 | unlock_kernel(); | 3475 | unlock_kernel(); |
3436 | /* | ||
3437 | * this is a little racey, but there's no harm in missing | ||
3438 | * the filemap_fdata_write | ||
3439 | */ | ||
3440 | if (!atomic_read(&journal->j_async_throttle) | ||
3441 | && !reiserfs_is_journal_aborted(journal)) { | ||
3442 | atomic_inc(&journal->j_async_throttle); | ||
3443 | filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); | ||
3444 | atomic_dec(&journal->j_async_throttle); | ||
3445 | } | ||
3446 | } | 3476 | } |
3447 | 3477 | ||
3448 | /* | 3478 | /* |
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s) | |||
3844 | entry = journal->j_journal_list.next; | 3874 | entry = journal->j_journal_list.next; |
3845 | jl = JOURNAL_LIST_ENTRY(entry); | 3875 | jl = JOURNAL_LIST_ENTRY(entry); |
3846 | /* this check should always be run, to send old lists to disk */ | 3876 | /* this check should always be run, to send old lists to disk */ |
3847 | if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { | 3877 | if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && |
3878 | atomic_read(&jl->j_commit_left) == 0 && | ||
3879 | test_transaction(s, jl)) { | ||
3848 | flush_used_journal_lists(s, jl); | 3880 | flush_used_journal_lists(s, jl); |
3849 | } else { | 3881 | } else { |
3850 | break; | 3882 | break; |