diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-08 11:25:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-08 11:26:00 -0400 |
commit | 5af8c4e0fac9838428bd718040b664043a05f37c (patch) | |
tree | 75a01d98ed244db45fe3c734c4a81c1a3d92ac37 /fs/jbd2 | |
parent | 46e0bb9c12f4bab539736f1714cbf16600f681ec (diff) | |
parent | 577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff) |
Merge commit 'v2.6.30-rc1' into sched/urgent
Merge reason: update to latest upstream to queue up fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/commit.c | 16 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 17 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 24 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 44 |
4 files changed, 75 insertions, 26 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 62804e57a44c..073c8c3df7cd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
138 | set_buffer_ordered(bh); | 138 | set_buffer_ordered(bh); |
139 | barrier_done = 1; | 139 | barrier_done = 1; |
140 | } | 140 | } |
141 | ret = submit_bh(WRITE_SYNC, bh); | 141 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
142 | if (barrier_done) | 142 | if (barrier_done) |
143 | clear_buffer_ordered(bh); | 143 | clear_buffer_ordered(bh); |
144 | 144 | ||
@@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
159 | lock_buffer(bh); | 159 | lock_buffer(bh); |
160 | set_buffer_uptodate(bh); | 160 | set_buffer_uptodate(bh); |
161 | clear_buffer_dirty(bh); | 161 | clear_buffer_dirty(bh); |
162 | ret = submit_bh(WRITE_SYNC, bh); | 162 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
163 | } | 163 | } |
164 | *cbh = bh; | 164 | *cbh = bh; |
165 | return ret; | 165 | return ret; |
@@ -190,7 +190,7 @@ retry: | |||
190 | set_buffer_uptodate(bh); | 190 | set_buffer_uptodate(bh); |
191 | bh->b_end_io = journal_end_buffer_io_sync; | 191 | bh->b_end_io = journal_end_buffer_io_sync; |
192 | 192 | ||
193 | ret = submit_bh(WRITE_SYNC, bh); | 193 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
194 | if (ret) { | 194 | if (ret) { |
195 | unlock_buffer(bh); | 195 | unlock_buffer(bh); |
196 | return ret; | 196 | return ret; |
@@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
367 | int tag_bytes = journal_tag_bytes(journal); | 367 | int tag_bytes = journal_tag_bytes(journal); |
368 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 368 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
369 | __u32 crc32_sum = ~0; | 369 | __u32 crc32_sum = ~0; |
370 | int write_op = WRITE; | ||
370 | 371 | ||
371 | /* | 372 | /* |
372 | * First job: lock down the current transaction and wait for | 373 | * First job: lock down the current transaction and wait for |
@@ -401,6 +402,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
401 | spin_lock(&journal->j_state_lock); | 402 | spin_lock(&journal->j_state_lock); |
402 | commit_transaction->t_state = T_LOCKED; | 403 | commit_transaction->t_state = T_LOCKED; |
403 | 404 | ||
405 | /* | ||
406 | * Use plugged writes here, since we want to submit several before | ||
407 | * we unplug the device. We don't do explicit unplugging in here, | ||
408 | * instead we rely on sync_buffer() doing the unplug for us. | ||
409 | */ | ||
410 | if (commit_transaction->t_synchronous_commit) | ||
411 | write_op = WRITE_SYNC_PLUG; | ||
404 | stats.u.run.rs_wait = commit_transaction->t_max_wait; | 412 | stats.u.run.rs_wait = commit_transaction->t_max_wait; |
405 | stats.u.run.rs_locked = jiffies; | 413 | stats.u.run.rs_locked = jiffies; |
406 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, | 414 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, |
@@ -680,7 +688,7 @@ start_journal_io: | |||
680 | clear_buffer_dirty(bh); | 688 | clear_buffer_dirty(bh); |
681 | set_buffer_uptodate(bh); | 689 | set_buffer_uptodate(bh); |
682 | bh->b_end_io = journal_end_buffer_io_sync; | 690 | bh->b_end_io = journal_end_buffer_io_sync; |
683 | submit_bh(WRITE, bh); | 691 | submit_bh(write_op, bh); |
684 | } | 692 | } |
685 | cond_resched(); | 693 | cond_resched(); |
686 | stats.u.run.rs_blocks_logged += bufs; | 694 | stats.u.run.rs_blocks_logged += bufs; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb343008eded..58144102bf25 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal) | |||
450 | } | 450 | } |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Called under j_state_lock. Returns true if a transaction was started. | 453 | * Called under j_state_lock. Returns true if a transaction commit was started. |
454 | */ | 454 | */ |
455 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) | 455 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) |
456 | { | 456 | { |
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
518 | 518 | ||
519 | /* | 519 | /* |
520 | * Start a commit of the current running transaction (if any). Returns true | 520 | * Start a commit of the current running transaction (if any). Returns true |
521 | * if a transaction was started, and fills its tid in at *ptid | 521 | * if a transaction is going to be committed (or is currently already |
522 | * committing), and fills its tid in at *ptid | ||
522 | */ | 523 | */ |
523 | int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | 524 | int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) |
524 | { | 525 | { |
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |||
528 | if (journal->j_running_transaction) { | 529 | if (journal->j_running_transaction) { |
529 | tid_t tid = journal->j_running_transaction->t_tid; | 530 | tid_t tid = journal->j_running_transaction->t_tid; |
530 | 531 | ||
531 | ret = __jbd2_log_start_commit(journal, tid); | 532 | __jbd2_log_start_commit(journal, tid); |
532 | if (ret && ptid) | 533 | /* There's a running transaction and we've just made sure |
534 | * it's commit has been scheduled. */ | ||
535 | if (ptid) | ||
533 | *ptid = tid; | 536 | *ptid = tid; |
534 | } else if (journal->j_committing_transaction && ptid) { | 537 | ret = 1; |
538 | } else if (journal->j_committing_transaction) { | ||
535 | /* | 539 | /* |
536 | * If ext3_write_super() recently started a commit, then we | 540 | * If ext3_write_super() recently started a commit, then we |
537 | * have to wait for completion of that transaction | 541 | * have to wait for completion of that transaction |
538 | */ | 542 | */ |
539 | *ptid = journal->j_committing_transaction->t_tid; | 543 | if (ptid) |
544 | *ptid = journal->j_committing_transaction->t_tid; | ||
540 | ret = 1; | 545 | ret = 1; |
541 | } | 546 | } |
542 | spin_unlock(&journal->j_state_lock); | 547 | spin_unlock(&journal->j_state_lock); |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 257ff2625765..bbe6d592d8b3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -55,6 +55,25 @@ | |||
55 | * need do nothing. | 55 | * need do nothing. |
56 | * RevokeValid set, Revoked set: | 56 | * RevokeValid set, Revoked set: |
57 | * buffer has been revoked. | 57 | * buffer has been revoked. |
58 | * | ||
59 | * Locking rules: | ||
60 | * We keep two hash tables of revoke records. One hashtable belongs to the | ||
61 | * running transaction (is pointed to by journal->j_revoke), the other one | ||
62 | * belongs to the committing transaction. Accesses to the second hash table | ||
63 | * happen only from the kjournald and no other thread touches this table. Also | ||
64 | * journal_switch_revoke_table() which switches which hashtable belongs to the | ||
65 | * running and which to the committing transaction is called only from | ||
66 | * kjournald. Therefore we need no locks when accessing the hashtable belonging | ||
67 | * to the committing transaction. | ||
68 | * | ||
69 | * All users operating on the hash table belonging to the running transaction | ||
70 | * have a handle to the transaction. Therefore they are safe from kjournald | ||
71 | * switching hash tables under them. For operations on the lists of entries in | ||
72 | * the hash table j_revoke_lock is used. | ||
73 | * | ||
74 | * Finally, also replay code uses the hash tables but at this moment noone else | ||
75 | * can touch them (filesystem isn't mounted yet) and hence no locking is | ||
76 | * needed. | ||
58 | */ | 77 | */ |
59 | 78 | ||
60 | #ifndef __KERNEL__ | 79 | #ifndef __KERNEL__ |
@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, | |||
401 | * the second time we would still have a pending revoke to cancel. So, | 420 | * the second time we would still have a pending revoke to cancel. So, |
402 | * do not trust the Revoked bit on buffers unless RevokeValid is also | 421 | * do not trust the Revoked bit on buffers unless RevokeValid is also |
403 | * set. | 422 | * set. |
404 | * | ||
405 | * The caller must have the journal locked. | ||
406 | */ | 423 | */ |
407 | int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | 424 | int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) |
408 | { | 425 | { |
@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
480 | /* | 497 | /* |
481 | * Write revoke records to the journal for all entries in the current | 498 | * Write revoke records to the journal for all entries in the current |
482 | * revoke hash, deleting the entries as we go. | 499 | * revoke hash, deleting the entries as we go. |
483 | * | ||
484 | * Called with the journal lock held. | ||
485 | */ | 500 | */ |
486 | |||
487 | void jbd2_journal_write_revoke_records(journal_t *journal, | 501 | void jbd2_journal_write_revoke_records(journal_t *journal, |
488 | transaction_t *transaction) | 502 | transaction_t *transaction) |
489 | { | 503 | { |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 46b4e347ed7d..996ffda06bf3 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle) | |||
1315 | } | 1315 | } |
1316 | } | 1316 | } |
1317 | 1317 | ||
1318 | if (handle->h_sync) | ||
1319 | transaction->t_synchronous_commit = 1; | ||
1318 | current->journal_info = NULL; | 1320 | current->journal_info = NULL; |
1319 | spin_lock(&journal->j_state_lock); | 1321 | spin_lock(&journal->j_state_lock); |
1320 | spin_lock(&transaction->t_handle_lock); | 1322 | spin_lock(&transaction->t_handle_lock); |
@@ -2129,26 +2131,46 @@ done: | |||
2129 | } | 2131 | } |
2130 | 2132 | ||
2131 | /* | 2133 | /* |
2132 | * This function must be called when inode is journaled in ordered mode | 2134 | * File truncate and transaction commit interact with each other in a |
2133 | * before truncation happens. It starts writeout of truncated part in | 2135 | * non-trivial way. If a transaction writing data block A is |
2134 | * case it is in the committing transaction so that we stand to ordered | 2136 | * committing, we cannot discard the data by truncate until we have |
2135 | * mode consistency guarantees. | 2137 | * written them. Otherwise if we crashed after the transaction with |
2138 | * write has committed but before the transaction with truncate has | ||
2139 | * committed, we could see stale data in block A. This function is a | ||
2140 | * helper to solve this problem. It starts writeout of the truncated | ||
2141 | * part in case it is in the committing transaction. | ||
2142 | * | ||
2143 | * Filesystem code must call this function when inode is journaled in | ||
2144 | * ordered mode before truncation happens and after the inode has been | ||
2145 | * placed on orphan list with the new inode size. The second condition | ||
2146 | * avoids the race that someone writes new data and we start | ||
2147 | * committing the transaction after this function has been called but | ||
2148 | * before a transaction for truncate is started (and furthermore it | ||
2149 | * allows us to optimize the case where the addition to orphan list | ||
2150 | * happens in the same transaction as write --- we don't have to write | ||
2151 | * any data in such case). | ||
2136 | */ | 2152 | */ |
2137 | int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | 2153 | int jbd2_journal_begin_ordered_truncate(journal_t *journal, |
2154 | struct jbd2_inode *jinode, | ||
2138 | loff_t new_size) | 2155 | loff_t new_size) |
2139 | { | 2156 | { |
2140 | journal_t *journal; | 2157 | transaction_t *inode_trans, *commit_trans; |
2141 | transaction_t *commit_trans; | ||
2142 | int ret = 0; | 2158 | int ret = 0; |
2143 | 2159 | ||
2144 | if (!inode->i_transaction && !inode->i_next_transaction) | 2160 | /* This is a quick check to avoid locking if not necessary */ |
2161 | if (!jinode->i_transaction) | ||
2145 | goto out; | 2162 | goto out; |
2146 | journal = inode->i_transaction->t_journal; | 2163 | /* Locks are here just to force reading of recent values, it is |
2164 | * enough that the transaction was not committing before we started | ||
2165 | * a transaction adding the inode to orphan list */ | ||
2147 | spin_lock(&journal->j_state_lock); | 2166 | spin_lock(&journal->j_state_lock); |
2148 | commit_trans = journal->j_committing_transaction; | 2167 | commit_trans = journal->j_committing_transaction; |
2149 | spin_unlock(&journal->j_state_lock); | 2168 | spin_unlock(&journal->j_state_lock); |
2150 | if (inode->i_transaction == commit_trans) { | 2169 | spin_lock(&journal->j_list_lock); |
2151 | ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | 2170 | inode_trans = jinode->i_transaction; |
2171 | spin_unlock(&journal->j_list_lock); | ||
2172 | if (inode_trans == commit_trans) { | ||
2173 | ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, | ||
2152 | new_size, LLONG_MAX); | 2174 | new_size, LLONG_MAX); |
2153 | if (ret) | 2175 | if (ret) |
2154 | jbd2_journal_abort(journal, ret); | 2176 | jbd2_journal_abort(journal, ret); |