diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-08-03 21:35:12 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-08-03 21:35:12 -0400 |
commit | a931da6ac9331a6c80dd91c199105806f2336188 (patch) | |
tree | 2d4ea766def9a98d21c2379f41e796b0e57ccd6b /fs/jbd2/transaction.c | |
parent | a51dca9cd3bb4ec5a05bfb6feabf024a5c808a37 (diff) |
jbd2: Change j_state_lock to be a rwlock_t
Lockstat reports have shown that j_state_lock is a major source of
lock contention, especially on systems with more than 4 CPU cores. So
change it to be a read/write spinlock.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 74 |
1 files changed, 40 insertions, 34 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 9c64c7ec48d4..663065142b42 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -124,36 +124,38 @@ alloc_transaction: | |||
124 | 124 | ||
125 | jbd_debug(3, "New handle %p going live.\n", handle); | 125 | jbd_debug(3, "New handle %p going live.\n", handle); |
126 | 126 | ||
127 | repeat: | ||
128 | |||
129 | /* | 127 | /* |
130 | * We need to hold j_state_lock until t_updates has been incremented, | 128 | * We need to hold j_state_lock until t_updates has been incremented, |
131 | * for proper journal barrier handling | 129 | * for proper journal barrier handling |
132 | */ | 130 | */ |
133 | spin_lock(&journal->j_state_lock); | 131 | repeat: |
134 | repeat_locked: | 132 | read_lock(&journal->j_state_lock); |
135 | if (is_journal_aborted(journal) || | 133 | if (is_journal_aborted(journal) || |
136 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 134 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
137 | spin_unlock(&journal->j_state_lock); | 135 | read_unlock(&journal->j_state_lock); |
138 | kfree(new_transaction); | 136 | kfree(new_transaction); |
139 | return -EROFS; | 137 | return -EROFS; |
140 | } | 138 | } |
141 | 139 | ||
142 | /* Wait on the journal's transaction barrier if necessary */ | 140 | /* Wait on the journal's transaction barrier if necessary */ |
143 | if (journal->j_barrier_count) { | 141 | if (journal->j_barrier_count) { |
144 | spin_unlock(&journal->j_state_lock); | 142 | read_unlock(&journal->j_state_lock); |
145 | wait_event(journal->j_wait_transaction_locked, | 143 | wait_event(journal->j_wait_transaction_locked, |
146 | journal->j_barrier_count == 0); | 144 | journal->j_barrier_count == 0); |
147 | goto repeat; | 145 | goto repeat; |
148 | } | 146 | } |
149 | 147 | ||
150 | if (!journal->j_running_transaction) { | 148 | if (!journal->j_running_transaction) { |
151 | if (!new_transaction) { | 149 | read_unlock(&journal->j_state_lock); |
152 | spin_unlock(&journal->j_state_lock); | 150 | if (!new_transaction) |
153 | goto alloc_transaction; | 151 | goto alloc_transaction; |
152 | write_lock(&journal->j_state_lock); | ||
153 | if (!journal->j_running_transaction) { | ||
154 | jbd2_get_transaction(journal, new_transaction); | ||
155 | new_transaction = NULL; | ||
154 | } | 156 | } |
155 | jbd2_get_transaction(journal, new_transaction); | 157 | write_unlock(&journal->j_state_lock); |
156 | new_transaction = NULL; | 158 | goto repeat; |
157 | } | 159 | } |
158 | 160 | ||
159 | transaction = journal->j_running_transaction; | 161 | transaction = journal->j_running_transaction; |
@@ -167,7 +169,7 @@ repeat_locked: | |||
167 | 169 | ||
168 | prepare_to_wait(&journal->j_wait_transaction_locked, | 170 | prepare_to_wait(&journal->j_wait_transaction_locked, |
169 | &wait, TASK_UNINTERRUPTIBLE); | 171 | &wait, TASK_UNINTERRUPTIBLE); |
170 | spin_unlock(&journal->j_state_lock); | 172 | read_unlock(&journal->j_state_lock); |
171 | schedule(); | 173 | schedule(); |
172 | finish_wait(&journal->j_wait_transaction_locked, &wait); | 174 | finish_wait(&journal->j_wait_transaction_locked, &wait); |
173 | goto repeat; | 175 | goto repeat; |
@@ -194,7 +196,7 @@ repeat_locked: | |||
194 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | 196 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, |
195 | TASK_UNINTERRUPTIBLE); | 197 | TASK_UNINTERRUPTIBLE); |
196 | __jbd2_log_start_commit(journal, transaction->t_tid); | 198 | __jbd2_log_start_commit(journal, transaction->t_tid); |
197 | spin_unlock(&journal->j_state_lock); | 199 | read_unlock(&journal->j_state_lock); |
198 | schedule(); | 200 | schedule(); |
199 | finish_wait(&journal->j_wait_transaction_locked, &wait); | 201 | finish_wait(&journal->j_wait_transaction_locked, &wait); |
200 | goto repeat; | 202 | goto repeat; |
@@ -228,8 +230,12 @@ repeat_locked: | |||
228 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { | 230 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { |
229 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); | 231 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); |
230 | spin_unlock(&transaction->t_handle_lock); | 232 | spin_unlock(&transaction->t_handle_lock); |
231 | __jbd2_log_wait_for_space(journal); | 233 | read_unlock(&journal->j_state_lock); |
232 | goto repeat_locked; | 234 | write_lock(&journal->j_state_lock); |
235 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) | ||
236 | __jbd2_log_wait_for_space(journal); | ||
237 | write_unlock(&journal->j_state_lock); | ||
238 | goto repeat; | ||
233 | } | 239 | } |
234 | 240 | ||
235 | /* OK, account for the buffers that this operation expects to | 241 | /* OK, account for the buffers that this operation expects to |
@@ -250,7 +256,7 @@ repeat_locked: | |||
250 | atomic_read(&transaction->t_outstanding_credits), | 256 | atomic_read(&transaction->t_outstanding_credits), |
251 | __jbd2_log_space_left(journal)); | 257 | __jbd2_log_space_left(journal)); |
252 | spin_unlock(&transaction->t_handle_lock); | 258 | spin_unlock(&transaction->t_handle_lock); |
253 | spin_unlock(&journal->j_state_lock); | 259 | read_unlock(&journal->j_state_lock); |
254 | 260 | ||
255 | lock_map_acquire(&handle->h_lockdep_map); | 261 | lock_map_acquire(&handle->h_lockdep_map); |
256 | kfree(new_transaction); | 262 | kfree(new_transaction); |
@@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) | |||
362 | 368 | ||
363 | result = 1; | 369 | result = 1; |
364 | 370 | ||
365 | spin_lock(&journal->j_state_lock); | 371 | read_lock(&journal->j_state_lock); |
366 | 372 | ||
367 | /* Don't extend a locked-down transaction! */ | 373 | /* Don't extend a locked-down transaction! */ |
368 | if (handle->h_transaction->t_state != T_RUNNING) { | 374 | if (handle->h_transaction->t_state != T_RUNNING) { |
@@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) | |||
394 | unlock: | 400 | unlock: |
395 | spin_unlock(&transaction->t_handle_lock); | 401 | spin_unlock(&transaction->t_handle_lock); |
396 | error_out: | 402 | error_out: |
397 | spin_unlock(&journal->j_state_lock); | 403 | read_unlock(&journal->j_state_lock); |
398 | out: | 404 | out: |
399 | return result; | 405 | return result; |
400 | } | 406 | } |
@@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | |||
432 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); | 438 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
433 | J_ASSERT(journal_current_handle() == handle); | 439 | J_ASSERT(journal_current_handle() == handle); |
434 | 440 | ||
435 | spin_lock(&journal->j_state_lock); | 441 | read_lock(&journal->j_state_lock); |
436 | spin_lock(&transaction->t_handle_lock); | 442 | spin_lock(&transaction->t_handle_lock); |
437 | atomic_sub(handle->h_buffer_credits, | 443 | atomic_sub(handle->h_buffer_credits, |
438 | &transaction->t_outstanding_credits); | 444 | &transaction->t_outstanding_credits); |
@@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | |||
442 | 448 | ||
443 | jbd_debug(2, "restarting handle %p\n", handle); | 449 | jbd_debug(2, "restarting handle %p\n", handle); |
444 | __jbd2_log_start_commit(journal, transaction->t_tid); | 450 | __jbd2_log_start_commit(journal, transaction->t_tid); |
445 | spin_unlock(&journal->j_state_lock); | 451 | read_unlock(&journal->j_state_lock); |
446 | 452 | ||
447 | lock_map_release(&handle->h_lockdep_map); | 453 | lock_map_release(&handle->h_lockdep_map); |
448 | handle->h_buffer_credits = nblocks; | 454 | handle->h_buffer_credits = nblocks; |
@@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
472 | { | 478 | { |
473 | DEFINE_WAIT(wait); | 479 | DEFINE_WAIT(wait); |
474 | 480 | ||
475 | spin_lock(&journal->j_state_lock); | 481 | write_lock(&journal->j_state_lock); |
476 | ++journal->j_barrier_count; | 482 | ++journal->j_barrier_count; |
477 | 483 | ||
478 | /* Wait until there are no running updates */ | 484 | /* Wait until there are no running updates */ |
@@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
490 | prepare_to_wait(&journal->j_wait_updates, &wait, | 496 | prepare_to_wait(&journal->j_wait_updates, &wait, |
491 | TASK_UNINTERRUPTIBLE); | 497 | TASK_UNINTERRUPTIBLE); |
492 | spin_unlock(&transaction->t_handle_lock); | 498 | spin_unlock(&transaction->t_handle_lock); |
493 | spin_unlock(&journal->j_state_lock); | 499 | write_unlock(&journal->j_state_lock); |
494 | schedule(); | 500 | schedule(); |
495 | finish_wait(&journal->j_wait_updates, &wait); | 501 | finish_wait(&journal->j_wait_updates, &wait); |
496 | spin_lock(&journal->j_state_lock); | 502 | write_lock(&journal->j_state_lock); |
497 | } | 503 | } |
498 | spin_unlock(&journal->j_state_lock); | 504 | write_unlock(&journal->j_state_lock); |
499 | 505 | ||
500 | /* | 506 | /* |
501 | * We have now established a barrier against other normal updates, but | 507 | * We have now established a barrier against other normal updates, but |
@@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal) | |||
519 | J_ASSERT(journal->j_barrier_count != 0); | 525 | J_ASSERT(journal->j_barrier_count != 0); |
520 | 526 | ||
521 | mutex_unlock(&journal->j_barrier); | 527 | mutex_unlock(&journal->j_barrier); |
522 | spin_lock(&journal->j_state_lock); | 528 | write_lock(&journal->j_state_lock); |
523 | --journal->j_barrier_count; | 529 | --journal->j_barrier_count; |
524 | spin_unlock(&journal->j_state_lock); | 530 | write_unlock(&journal->j_state_lock); |
525 | wake_up(&journal->j_wait_transaction_locked); | 531 | wake_up(&journal->j_wait_transaction_locked); |
526 | } | 532 | } |
527 | 533 | ||
@@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle) | |||
1314 | 1320 | ||
1315 | journal->j_last_sync_writer = pid; | 1321 | journal->j_last_sync_writer = pid; |
1316 | 1322 | ||
1317 | spin_lock(&journal->j_state_lock); | 1323 | read_lock(&journal->j_state_lock); |
1318 | commit_time = journal->j_average_commit_time; | 1324 | commit_time = journal->j_average_commit_time; |
1319 | spin_unlock(&journal->j_state_lock); | 1325 | read_unlock(&journal->j_state_lock); |
1320 | 1326 | ||
1321 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | 1327 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), |
1322 | transaction->t_start_time)); | 1328 | transaction->t_start_time)); |
@@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1748 | goto zap_buffer_unlocked; | 1754 | goto zap_buffer_unlocked; |
1749 | 1755 | ||
1750 | /* OK, we have data buffer in journaled mode */ | 1756 | /* OK, we have data buffer in journaled mode */ |
1751 | spin_lock(&journal->j_state_lock); | 1757 | write_lock(&journal->j_state_lock); |
1752 | jbd_lock_bh_state(bh); | 1758 | jbd_lock_bh_state(bh); |
1753 | spin_lock(&journal->j_list_lock); | 1759 | spin_lock(&journal->j_list_lock); |
1754 | 1760 | ||
@@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1801 | jbd2_journal_put_journal_head(jh); | 1807 | jbd2_journal_put_journal_head(jh); |
1802 | spin_unlock(&journal->j_list_lock); | 1808 | spin_unlock(&journal->j_list_lock); |
1803 | jbd_unlock_bh_state(bh); | 1809 | jbd_unlock_bh_state(bh); |
1804 | spin_unlock(&journal->j_state_lock); | 1810 | write_unlock(&journal->j_state_lock); |
1805 | return ret; | 1811 | return ret; |
1806 | } else { | 1812 | } else { |
1807 | /* There is no currently-running transaction. So the | 1813 | /* There is no currently-running transaction. So the |
@@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1815 | jbd2_journal_put_journal_head(jh); | 1821 | jbd2_journal_put_journal_head(jh); |
1816 | spin_unlock(&journal->j_list_lock); | 1822 | spin_unlock(&journal->j_list_lock); |
1817 | jbd_unlock_bh_state(bh); | 1823 | jbd_unlock_bh_state(bh); |
1818 | spin_unlock(&journal->j_state_lock); | 1824 | write_unlock(&journal->j_state_lock); |
1819 | return ret; | 1825 | return ret; |
1820 | } else { | 1826 | } else { |
1821 | /* The orphan record's transaction has | 1827 | /* The orphan record's transaction has |
@@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1839 | jbd2_journal_put_journal_head(jh); | 1845 | jbd2_journal_put_journal_head(jh); |
1840 | spin_unlock(&journal->j_list_lock); | 1846 | spin_unlock(&journal->j_list_lock); |
1841 | jbd_unlock_bh_state(bh); | 1847 | jbd_unlock_bh_state(bh); |
1842 | spin_unlock(&journal->j_state_lock); | 1848 | write_unlock(&journal->j_state_lock); |
1843 | return 0; | 1849 | return 0; |
1844 | } else { | 1850 | } else { |
1845 | /* Good, the buffer belongs to the running transaction. | 1851 | /* Good, the buffer belongs to the running transaction. |
@@ -1858,7 +1864,7 @@ zap_buffer: | |||
1858 | zap_buffer_no_jh: | 1864 | zap_buffer_no_jh: |
1859 | spin_unlock(&journal->j_list_lock); | 1865 | spin_unlock(&journal->j_list_lock); |
1860 | jbd_unlock_bh_state(bh); | 1866 | jbd_unlock_bh_state(bh); |
1861 | spin_unlock(&journal->j_state_lock); | 1867 | write_unlock(&journal->j_state_lock); |
1862 | zap_buffer_unlocked: | 1868 | zap_buffer_unlocked: |
1863 | clear_buffer_dirty(bh); | 1869 | clear_buffer_dirty(bh); |
1864 | J_ASSERT_BH(bh, !buffer_jbddirty(bh)); | 1870 | J_ASSERT_BH(bh, !buffer_jbddirty(bh)); |
@@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |||
2165 | /* Locks are here just to force reading of recent values, it is | 2171 | /* Locks are here just to force reading of recent values, it is |
2166 | * enough that the transaction was not committing before we started | 2172 | * enough that the transaction was not committing before we started |
2167 | * a transaction adding the inode to orphan list */ | 2173 | * a transaction adding the inode to orphan list */ |
2168 | spin_lock(&journal->j_state_lock); | 2174 | read_lock(&journal->j_state_lock); |
2169 | commit_trans = journal->j_committing_transaction; | 2175 | commit_trans = journal->j_committing_transaction; |
2170 | spin_unlock(&journal->j_state_lock); | 2176 | read_unlock(&journal->j_state_lock); |
2171 | spin_lock(&journal->j_list_lock); | 2177 | spin_lock(&journal->j_list_lock); |
2172 | inode_trans = jinode->i_transaction; | 2178 | inode_trans = jinode->i_transaction; |
2173 | spin_unlock(&journal->j_list_lock); | 2179 | spin_unlock(&journal->j_list_lock); |