aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-08 11:25:42 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-08 11:26:00 -0400
commit5af8c4e0fac9838428bd718040b664043a05f37c (patch)
tree75a01d98ed244db45fe3c734c4a81c1a3d92ac37 /fs/jbd2
parent46e0bb9c12f4bab539736f1714cbf16600f681ec (diff)
parent577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff)
Merge commit 'v2.6.30-rc1' into sched/urgent
Merge reason: update to latest upstream to queue up fix Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/commit.c16
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/revoke.c24
-rw-r--r--fs/jbd2/transaction.c44
4 files changed, 75 insertions, 26 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 62804e57a44c..073c8c3df7cd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal,
138 set_buffer_ordered(bh); 138 set_buffer_ordered(bh);
139 barrier_done = 1; 139 barrier_done = 1;
140 } 140 }
141 ret = submit_bh(WRITE_SYNC, bh); 141 ret = submit_bh(WRITE_SYNC_PLUG, bh);
142 if (barrier_done) 142 if (barrier_done)
143 clear_buffer_ordered(bh); 143 clear_buffer_ordered(bh);
144 144
@@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal,
159 lock_buffer(bh); 159 lock_buffer(bh);
160 set_buffer_uptodate(bh); 160 set_buffer_uptodate(bh);
161 clear_buffer_dirty(bh); 161 clear_buffer_dirty(bh);
162 ret = submit_bh(WRITE_SYNC, bh); 162 ret = submit_bh(WRITE_SYNC_PLUG, bh);
163 } 163 }
164 *cbh = bh; 164 *cbh = bh;
165 return ret; 165 return ret;
@@ -190,7 +190,7 @@ retry:
190 set_buffer_uptodate(bh); 190 set_buffer_uptodate(bh);
191 bh->b_end_io = journal_end_buffer_io_sync; 191 bh->b_end_io = journal_end_buffer_io_sync;
192 192
193 ret = submit_bh(WRITE_SYNC, bh); 193 ret = submit_bh(WRITE_SYNC_PLUG, bh);
194 if (ret) { 194 if (ret) {
195 unlock_buffer(bh); 195 unlock_buffer(bh);
196 return ret; 196 return ret;
@@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
367 int tag_bytes = journal_tag_bytes(journal); 367 int tag_bytes = journal_tag_bytes(journal);
368 struct buffer_head *cbh = NULL; /* For transactional checksums */ 368 struct buffer_head *cbh = NULL; /* For transactional checksums */
369 __u32 crc32_sum = ~0; 369 __u32 crc32_sum = ~0;
370 int write_op = WRITE;
370 371
371 /* 372 /*
372 * First job: lock down the current transaction and wait for 373 * First job: lock down the current transaction and wait for
@@ -401,6 +402,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
401 spin_lock(&journal->j_state_lock); 402 spin_lock(&journal->j_state_lock);
402 commit_transaction->t_state = T_LOCKED; 403 commit_transaction->t_state = T_LOCKED;
403 404
405 /*
406 * Use plugged writes here, since we want to submit several before
407 * we unplug the device. We don't do explicit unplugging in here,
408 * instead we rely on sync_buffer() doing the unplug for us.
409 */
410 if (commit_transaction->t_synchronous_commit)
411 write_op = WRITE_SYNC_PLUG;
404 stats.u.run.rs_wait = commit_transaction->t_max_wait; 412 stats.u.run.rs_wait = commit_transaction->t_max_wait;
405 stats.u.run.rs_locked = jiffies; 413 stats.u.run.rs_locked = jiffies;
406 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 414 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -680,7 +688,7 @@ start_journal_io:
680 clear_buffer_dirty(bh); 688 clear_buffer_dirty(bh);
681 set_buffer_uptodate(bh); 689 set_buffer_uptodate(bh);
682 bh->b_end_io = journal_end_buffer_io_sync; 690 bh->b_end_io = journal_end_buffer_io_sync;
683 submit_bh(WRITE, bh); 691 submit_bh(write_op, bh);
684 } 692 }
685 cond_resched(); 693 cond_resched();
686 stats.u.run.rs_blocks_logged += bufs; 694 stats.u.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008eded..58144102bf25 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
450} 450}
451 451
452/* 452/*
453 * Called under j_state_lock. Returns true if a transaction was started. 453 * Called under j_state_lock. Returns true if a transaction commit was started.
454 */ 454 */
455int __jbd2_log_start_commit(journal_t *journal, tid_t target) 455int __jbd2_log_start_commit(journal_t *journal, tid_t target)
456{ 456{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 518
519/* 519/*
520 * Start a commit of the current running transaction (if any). Returns true 520 * Start a commit of the current running transaction (if any). Returns true
521 * if a transaction was started, and fills its tid in at *ptid 521 * if a transaction is going to be committed (or is currently already
522 * committing), and fills its tid in at *ptid
522 */ 523 */
523int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 524int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
524{ 525{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
528 if (journal->j_running_transaction) { 529 if (journal->j_running_transaction) {
529 tid_t tid = journal->j_running_transaction->t_tid; 530 tid_t tid = journal->j_running_transaction->t_tid;
530 531
531 ret = __jbd2_log_start_commit(journal, tid); 532 __jbd2_log_start_commit(journal, tid);
532 if (ret && ptid) 533 /* There's a running transaction and we've just made sure
534 * it's commit has been scheduled. */
535 if (ptid)
533 *ptid = tid; 536 *ptid = tid;
534 } else if (journal->j_committing_transaction && ptid) { 537 ret = 1;
538 } else if (journal->j_committing_transaction) {
535 /* 539 /*
536 * If ext3_write_super() recently started a commit, then we 540 * If ext3_write_super() recently started a commit, then we
537 * have to wait for completion of that transaction 541 * have to wait for completion of that transaction
538 */ 542 */
539 *ptid = journal->j_committing_transaction->t_tid; 543 if (ptid)
544 *ptid = journal->j_committing_transaction->t_tid;
540 ret = 1; 545 ret = 1;
541 } 546 }
542 spin_unlock(&journal->j_state_lock); 547 spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 257ff2625765..bbe6d592d8b3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -55,6 +55,25 @@
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 *
59 * Locking rules:
60 * We keep two hash tables of revoke records. One hashtable belongs to the
61 * running transaction (is pointed to by journal->j_revoke), the other one
62 * belongs to the committing transaction. Accesses to the second hash table
63 * happen only from the kjournald and no other thread touches this table. Also
64 * journal_switch_revoke_table() which switches which hashtable belongs to the
65 * running and which to the committing transaction is called only from
66 * kjournald. Therefore we need no locks when accessing the hashtable belonging
67 * to the committing transaction.
68 *
69 * All users operating on the hash table belonging to the running transaction
70 * have a handle to the transaction. Therefore they are safe from kjournald
71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used.
73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed.
58 */ 77 */
59 78
60#ifndef __KERNEL__ 79#ifndef __KERNEL__
@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
401 * the second time we would still have a pending revoke to cancel. So, 420 * the second time we would still have a pending revoke to cancel. So,
402 * do not trust the Revoked bit on buffers unless RevokeValid is also 421 * do not trust the Revoked bit on buffers unless RevokeValid is also
403 * set. 422 * set.
404 *
405 * The caller must have the journal locked.
406 */ 423 */
407int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) 424int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
408{ 425{
@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
480/* 497/*
481 * Write revoke records to the journal for all entries in the current 498 * Write revoke records to the journal for all entries in the current
482 * revoke hash, deleting the entries as we go. 499 * revoke hash, deleting the entries as we go.
483 *
484 * Called with the journal lock held.
485 */ 500 */
486
487void jbd2_journal_write_revoke_records(journal_t *journal, 501void jbd2_journal_write_revoke_records(journal_t *journal,
488 transaction_t *transaction) 502 transaction_t *transaction)
489{ 503{
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7d..996ffda06bf3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle)
1315 } 1315 }
1316 } 1316 }
1317 1317
1318 if (handle->h_sync)
1319 transaction->t_synchronous_commit = 1;
1318 current->journal_info = NULL; 1320 current->journal_info = NULL;
1319 spin_lock(&journal->j_state_lock); 1321 spin_lock(&journal->j_state_lock);
1320 spin_lock(&transaction->t_handle_lock); 1322 spin_lock(&transaction->t_handle_lock);
@@ -2129,26 +2131,46 @@ done:
2129} 2131}
2130 2132
2131/* 2133/*
2132 * This function must be called when inode is journaled in ordered mode 2134 * File truncate and transaction commit interact with each other in a
2133 * before truncation happens. It starts writeout of truncated part in 2135 * non-trivial way. If a transaction writing data block A is
2134 * case it is in the committing transaction so that we stand to ordered 2136 * committing, we cannot discard the data by truncate until we have
2135 * mode consistency guarantees. 2137 * written them. Otherwise if we crashed after the transaction with
2138 * write has committed but before the transaction with truncate has
2139 * committed, we could see stale data in block A. This function is a
2140 * helper to solve this problem. It starts writeout of the truncated
2141 * part in case it is in the committing transaction.
2142 *
2143 * Filesystem code must call this function when inode is journaled in
2144 * ordered mode before truncation happens and after the inode has been
2145 * placed on orphan list with the new inode size. The second condition
2146 * avoids the race that someone writes new data and we start
2147 * committing the transaction after this function has been called but
2148 * before a transaction for truncate is started (and furthermore it
2149 * allows us to optimize the case where the addition to orphan list
2150 * happens in the same transaction as write --- we don't have to write
2151 * any data in such case).
2136 */ 2152 */
2137int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, 2153int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2154 struct jbd2_inode *jinode,
2138 loff_t new_size) 2155 loff_t new_size)
2139{ 2156{
2140 journal_t *journal; 2157 transaction_t *inode_trans, *commit_trans;
2141 transaction_t *commit_trans;
2142 int ret = 0; 2158 int ret = 0;
2143 2159
2144 if (!inode->i_transaction && !inode->i_next_transaction) 2160 /* This is a quick check to avoid locking if not necessary */
2161 if (!jinode->i_transaction)
2145 goto out; 2162 goto out;
2146 journal = inode->i_transaction->t_journal; 2163 /* Locks are here just to force reading of recent values, it is
2164 * enough that the transaction was not committing before we started
2165 * a transaction adding the inode to orphan list */
2147 spin_lock(&journal->j_state_lock); 2166 spin_lock(&journal->j_state_lock);
2148 commit_trans = journal->j_committing_transaction; 2167 commit_trans = journal->j_committing_transaction;
2149 spin_unlock(&journal->j_state_lock); 2168 spin_unlock(&journal->j_state_lock);
2150 if (inode->i_transaction == commit_trans) { 2169 spin_lock(&journal->j_list_lock);
2151 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, 2170 inode_trans = jinode->i_transaction;
2171 spin_unlock(&journal->j_list_lock);
2172 if (inode_trans == commit_trans) {
2173 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2152 new_size, LLONG_MAX); 2174 new_size, LLONG_MAX);
2153 if (ret) 2175 if (ret)
2154 jbd2_journal_abort(journal, ret); 2176 jbd2_journal_abort(journal, ret);