aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
commita931da6ac9331a6c80dd91c199105806f2336188 (patch)
tree2d4ea766def9a98d21c2379f41e796b0e57ccd6b /fs/jbd2/transaction.c
parenta51dca9cd3bb4ec5a05bfb6feabf024a5c808a37 (diff)
jbd2: Change j_state_lock to be a rwlock_t
Lockstat reports have shown that j_state_lock is a major source of lock contention, especially on systems with more than 4 CPU cores. So change it to be a read/write spinlock. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c74
1 files changed, 40 insertions, 34 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9c64c7ec48d4..663065142b42 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -124,36 +124,38 @@ alloc_transaction:
124 124
125 jbd_debug(3, "New handle %p going live.\n", handle); 125 jbd_debug(3, "New handle %p going live.\n", handle);
126 126
127repeat:
128
129 /* 127 /*
130 * We need to hold j_state_lock until t_updates has been incremented, 128 * We need to hold j_state_lock until t_updates has been incremented,
131 * for proper journal barrier handling 129 * for proper journal barrier handling
132 */ 130 */
133 spin_lock(&journal->j_state_lock); 131repeat:
134repeat_locked: 132 read_lock(&journal->j_state_lock);
135 if (is_journal_aborted(journal) || 133 if (is_journal_aborted(journal) ||
136 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 134 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
137 spin_unlock(&journal->j_state_lock); 135 read_unlock(&journal->j_state_lock);
138 kfree(new_transaction); 136 kfree(new_transaction);
139 return -EROFS; 137 return -EROFS;
140 } 138 }
141 139
142 /* Wait on the journal's transaction barrier if necessary */ 140 /* Wait on the journal's transaction barrier if necessary */
143 if (journal->j_barrier_count) { 141 if (journal->j_barrier_count) {
144 spin_unlock(&journal->j_state_lock); 142 read_unlock(&journal->j_state_lock);
145 wait_event(journal->j_wait_transaction_locked, 143 wait_event(journal->j_wait_transaction_locked,
146 journal->j_barrier_count == 0); 144 journal->j_barrier_count == 0);
147 goto repeat; 145 goto repeat;
148 } 146 }
149 147
150 if (!journal->j_running_transaction) { 148 if (!journal->j_running_transaction) {
151 if (!new_transaction) { 149 read_unlock(&journal->j_state_lock);
152 spin_unlock(&journal->j_state_lock); 150 if (!new_transaction)
153 goto alloc_transaction; 151 goto alloc_transaction;
152 write_lock(&journal->j_state_lock);
153 if (!journal->j_running_transaction) {
154 jbd2_get_transaction(journal, new_transaction);
155 new_transaction = NULL;
154 } 156 }
155 jbd2_get_transaction(journal, new_transaction); 157 write_unlock(&journal->j_state_lock);
156 new_transaction = NULL; 158 goto repeat;
157 } 159 }
158 160
159 transaction = journal->j_running_transaction; 161 transaction = journal->j_running_transaction;
@@ -167,7 +169,7 @@ repeat_locked:
167 169
168 prepare_to_wait(&journal->j_wait_transaction_locked, 170 prepare_to_wait(&journal->j_wait_transaction_locked,
169 &wait, TASK_UNINTERRUPTIBLE); 171 &wait, TASK_UNINTERRUPTIBLE);
170 spin_unlock(&journal->j_state_lock); 172 read_unlock(&journal->j_state_lock);
171 schedule(); 173 schedule();
172 finish_wait(&journal->j_wait_transaction_locked, &wait); 174 finish_wait(&journal->j_wait_transaction_locked, &wait);
173 goto repeat; 175 goto repeat;
@@ -194,7 +196,7 @@ repeat_locked:
194 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 196 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
195 TASK_UNINTERRUPTIBLE); 197 TASK_UNINTERRUPTIBLE);
196 __jbd2_log_start_commit(journal, transaction->t_tid); 198 __jbd2_log_start_commit(journal, transaction->t_tid);
197 spin_unlock(&journal->j_state_lock); 199 read_unlock(&journal->j_state_lock);
198 schedule(); 200 schedule();
199 finish_wait(&journal->j_wait_transaction_locked, &wait); 201 finish_wait(&journal->j_wait_transaction_locked, &wait);
200 goto repeat; 202 goto repeat;
@@ -228,8 +230,12 @@ repeat_locked:
228 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { 230 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
229 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 231 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
230 spin_unlock(&transaction->t_handle_lock); 232 spin_unlock(&transaction->t_handle_lock);
231 __jbd2_log_wait_for_space(journal); 233 read_unlock(&journal->j_state_lock);
232 goto repeat_locked; 234 write_lock(&journal->j_state_lock);
235 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
236 __jbd2_log_wait_for_space(journal);
237 write_unlock(&journal->j_state_lock);
238 goto repeat;
233 } 239 }
234 240
235 /* OK, account for the buffers that this operation expects to 241 /* OK, account for the buffers that this operation expects to
@@ -250,7 +256,7 @@ repeat_locked:
250 atomic_read(&transaction->t_outstanding_credits), 256 atomic_read(&transaction->t_outstanding_credits),
251 __jbd2_log_space_left(journal)); 257 __jbd2_log_space_left(journal));
252 spin_unlock(&transaction->t_handle_lock); 258 spin_unlock(&transaction->t_handle_lock);
253 spin_unlock(&journal->j_state_lock); 259 read_unlock(&journal->j_state_lock);
254 260
255 lock_map_acquire(&handle->h_lockdep_map); 261 lock_map_acquire(&handle->h_lockdep_map);
256 kfree(new_transaction); 262 kfree(new_transaction);
@@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
362 368
363 result = 1; 369 result = 1;
364 370
365 spin_lock(&journal->j_state_lock); 371 read_lock(&journal->j_state_lock);
366 372
367 /* Don't extend a locked-down transaction! */ 373 /* Don't extend a locked-down transaction! */
368 if (handle->h_transaction->t_state != T_RUNNING) { 374 if (handle->h_transaction->t_state != T_RUNNING) {
@@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
394unlock: 400unlock:
395 spin_unlock(&transaction->t_handle_lock); 401 spin_unlock(&transaction->t_handle_lock);
396error_out: 402error_out:
397 spin_unlock(&journal->j_state_lock); 403 read_unlock(&journal->j_state_lock);
398out: 404out:
399 return result; 405 return result;
400} 406}
@@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
432 J_ASSERT(atomic_read(&transaction->t_updates) > 0); 438 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
433 J_ASSERT(journal_current_handle() == handle); 439 J_ASSERT(journal_current_handle() == handle);
434 440
435 spin_lock(&journal->j_state_lock); 441 read_lock(&journal->j_state_lock);
436 spin_lock(&transaction->t_handle_lock); 442 spin_lock(&transaction->t_handle_lock);
437 atomic_sub(handle->h_buffer_credits, 443 atomic_sub(handle->h_buffer_credits,
438 &transaction->t_outstanding_credits); 444 &transaction->t_outstanding_credits);
@@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442 448
443 jbd_debug(2, "restarting handle %p\n", handle); 449 jbd_debug(2, "restarting handle %p\n", handle);
444 __jbd2_log_start_commit(journal, transaction->t_tid); 450 __jbd2_log_start_commit(journal, transaction->t_tid);
445 spin_unlock(&journal->j_state_lock); 451 read_unlock(&journal->j_state_lock);
446 452
447 lock_map_release(&handle->h_lockdep_map); 453 lock_map_release(&handle->h_lockdep_map);
448 handle->h_buffer_credits = nblocks; 454 handle->h_buffer_credits = nblocks;
@@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
472{ 478{
473 DEFINE_WAIT(wait); 479 DEFINE_WAIT(wait);
474 480
475 spin_lock(&journal->j_state_lock); 481 write_lock(&journal->j_state_lock);
476 ++journal->j_barrier_count; 482 ++journal->j_barrier_count;
477 483
478 /* Wait until there are no running updates */ 484 /* Wait until there are no running updates */
@@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal)
490 prepare_to_wait(&journal->j_wait_updates, &wait, 496 prepare_to_wait(&journal->j_wait_updates, &wait,
491 TASK_UNINTERRUPTIBLE); 497 TASK_UNINTERRUPTIBLE);
492 spin_unlock(&transaction->t_handle_lock); 498 spin_unlock(&transaction->t_handle_lock);
493 spin_unlock(&journal->j_state_lock); 499 write_unlock(&journal->j_state_lock);
494 schedule(); 500 schedule();
495 finish_wait(&journal->j_wait_updates, &wait); 501 finish_wait(&journal->j_wait_updates, &wait);
496 spin_lock(&journal->j_state_lock); 502 write_lock(&journal->j_state_lock);
497 } 503 }
498 spin_unlock(&journal->j_state_lock); 504 write_unlock(&journal->j_state_lock);
499 505
500 /* 506 /*
501 * We have now established a barrier against other normal updates, but 507 * We have now established a barrier against other normal updates, but
@@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
519 J_ASSERT(journal->j_barrier_count != 0); 525 J_ASSERT(journal->j_barrier_count != 0);
520 526
521 mutex_unlock(&journal->j_barrier); 527 mutex_unlock(&journal->j_barrier);
522 spin_lock(&journal->j_state_lock); 528 write_lock(&journal->j_state_lock);
523 --journal->j_barrier_count; 529 --journal->j_barrier_count;
524 spin_unlock(&journal->j_state_lock); 530 write_unlock(&journal->j_state_lock);
525 wake_up(&journal->j_wait_transaction_locked); 531 wake_up(&journal->j_wait_transaction_locked);
526} 532}
527 533
@@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle)
1314 1320
1315 journal->j_last_sync_writer = pid; 1321 journal->j_last_sync_writer = pid;
1316 1322
1317 spin_lock(&journal->j_state_lock); 1323 read_lock(&journal->j_state_lock);
1318 commit_time = journal->j_average_commit_time; 1324 commit_time = journal->j_average_commit_time;
1319 spin_unlock(&journal->j_state_lock); 1325 read_unlock(&journal->j_state_lock);
1320 1326
1321 trans_time = ktime_to_ns(ktime_sub(ktime_get(), 1327 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1322 transaction->t_start_time)); 1328 transaction->t_start_time));
@@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1748 goto zap_buffer_unlocked; 1754 goto zap_buffer_unlocked;
1749 1755
1750 /* OK, we have data buffer in journaled mode */ 1756 /* OK, we have data buffer in journaled mode */
1751 spin_lock(&journal->j_state_lock); 1757 write_lock(&journal->j_state_lock);
1752 jbd_lock_bh_state(bh); 1758 jbd_lock_bh_state(bh);
1753 spin_lock(&journal->j_list_lock); 1759 spin_lock(&journal->j_list_lock);
1754 1760
@@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1801 jbd2_journal_put_journal_head(jh); 1807 jbd2_journal_put_journal_head(jh);
1802 spin_unlock(&journal->j_list_lock); 1808 spin_unlock(&journal->j_list_lock);
1803 jbd_unlock_bh_state(bh); 1809 jbd_unlock_bh_state(bh);
1804 spin_unlock(&journal->j_state_lock); 1810 write_unlock(&journal->j_state_lock);
1805 return ret; 1811 return ret;
1806 } else { 1812 } else {
1807 /* There is no currently-running transaction. So the 1813 /* There is no currently-running transaction. So the
@@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1815 jbd2_journal_put_journal_head(jh); 1821 jbd2_journal_put_journal_head(jh);
1816 spin_unlock(&journal->j_list_lock); 1822 spin_unlock(&journal->j_list_lock);
1817 jbd_unlock_bh_state(bh); 1823 jbd_unlock_bh_state(bh);
1818 spin_unlock(&journal->j_state_lock); 1824 write_unlock(&journal->j_state_lock);
1819 return ret; 1825 return ret;
1820 } else { 1826 } else {
1821 /* The orphan record's transaction has 1827 /* The orphan record's transaction has
@@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1839 jbd2_journal_put_journal_head(jh); 1845 jbd2_journal_put_journal_head(jh);
1840 spin_unlock(&journal->j_list_lock); 1846 spin_unlock(&journal->j_list_lock);
1841 jbd_unlock_bh_state(bh); 1847 jbd_unlock_bh_state(bh);
1842 spin_unlock(&journal->j_state_lock); 1848 write_unlock(&journal->j_state_lock);
1843 return 0; 1849 return 0;
1844 } else { 1850 } else {
1845 /* Good, the buffer belongs to the running transaction. 1851 /* Good, the buffer belongs to the running transaction.
@@ -1858,7 +1864,7 @@ zap_buffer:
1858zap_buffer_no_jh: 1864zap_buffer_no_jh:
1859 spin_unlock(&journal->j_list_lock); 1865 spin_unlock(&journal->j_list_lock);
1860 jbd_unlock_bh_state(bh); 1866 jbd_unlock_bh_state(bh);
1861 spin_unlock(&journal->j_state_lock); 1867 write_unlock(&journal->j_state_lock);
1862zap_buffer_unlocked: 1868zap_buffer_unlocked:
1863 clear_buffer_dirty(bh); 1869 clear_buffer_dirty(bh);
1864 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 1870 J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2165 /* Locks are here just to force reading of recent values, it is 2171 /* Locks are here just to force reading of recent values, it is
2166 * enough that the transaction was not committing before we started 2172 * enough that the transaction was not committing before we started
2167 * a transaction adding the inode to orphan list */ 2173 * a transaction adding the inode to orphan list */
2168 spin_lock(&journal->j_state_lock); 2174 read_lock(&journal->j_state_lock);
2169 commit_trans = journal->j_committing_transaction; 2175 commit_trans = journal->j_committing_transaction;
2170 spin_unlock(&journal->j_state_lock); 2176 read_unlock(&journal->j_state_lock);
2171 spin_lock(&journal->j_list_lock); 2177 spin_lock(&journal->j_list_lock);
2172 inode_trans = jinode->i_transaction; 2178 inode_trans = jinode->i_transaction;
2173 spin_unlock(&journal->j_list_lock); 2179 spin_unlock(&journal->j_list_lock);