aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
commita931da6ac9331a6c80dd91c199105806f2336188 (patch)
tree2d4ea766def9a98d21c2379f41e796b0e57ccd6b /fs/jbd2
parenta51dca9cd3bb4ec5a05bfb6feabf024a5c808a37 (diff)
jbd2: Change j_state_lock to be a rwlock_t
Lockstat reports have shown that j_state_lock is a major source of lock contention, especially on systems with more than 4 CPU cores. So change it to be a read/write spinlock. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c16
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c94
-rw-r--r--fs/jbd2/transaction.c74
4 files changed, 107 insertions, 103 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index f8cdc02520f9..1c23a0f4e8a3 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
118void __jbd2_log_wait_for_space(journal_t *journal) 118void __jbd2_log_wait_for_space(journal_t *journal)
119{ 119{
120 int nblocks, space_left; 120 int nblocks, space_left;
121 assert_spin_locked(&journal->j_state_lock); 121 /* assert_spin_locked(&journal->j_state_lock); */
122 122
123 nblocks = jbd_space_needed(journal); 123 nblocks = jbd_space_needed(journal);
124 while (__jbd2_log_space_left(journal) < nblocks) { 124 while (__jbd2_log_space_left(journal) < nblocks) {
125 if (journal->j_flags & JBD2_ABORT) 125 if (journal->j_flags & JBD2_ABORT)
126 return; 126 return;
127 spin_unlock(&journal->j_state_lock); 127 write_unlock(&journal->j_state_lock);
128 mutex_lock(&journal->j_checkpoint_mutex); 128 mutex_lock(&journal->j_checkpoint_mutex);
129 129
130 /* 130 /*
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
138 * filesystem, so abort the journal and leave a stack 138 * filesystem, so abort the journal and leave a stack
139 * trace for forensic evidence. 139 * trace for forensic evidence.
140 */ 140 */
141 spin_lock(&journal->j_state_lock); 141 write_lock(&journal->j_state_lock);
142 spin_lock(&journal->j_list_lock); 142 spin_lock(&journal->j_list_lock);
143 nblocks = jbd_space_needed(journal); 143 nblocks = jbd_space_needed(journal);
144 space_left = __jbd2_log_space_left(journal); 144 space_left = __jbd2_log_space_left(journal);
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
149 if (journal->j_committing_transaction) 149 if (journal->j_committing_transaction)
150 tid = journal->j_committing_transaction->t_tid; 150 tid = journal->j_committing_transaction->t_tid;
151 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
152 spin_unlock(&journal->j_state_lock); 152 write_unlock(&journal->j_state_lock);
153 if (chkpt) { 153 if (chkpt) {
154 jbd2_log_do_checkpoint(journal); 154 jbd2_log_do_checkpoint(journal);
155 } else if (jbd2_cleanup_journal_tail(journal) == 0) { 155 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
167 WARN_ON(1); 167 WARN_ON(1);
168 jbd2_journal_abort(journal, 0); 168 jbd2_journal_abort(journal, 0);
169 } 169 }
170 spin_lock(&journal->j_state_lock); 170 write_lock(&journal->j_state_lock);
171 } else { 171 } else {
172 spin_unlock(&journal->j_list_lock); 172 spin_unlock(&journal->j_list_lock);
173 } 173 }
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
474 * next transaction ID we will write, and where it will 474 * next transaction ID we will write, and where it will
475 * start. */ 475 * start. */
476 476
477 spin_lock(&journal->j_state_lock); 477 write_lock(&journal->j_state_lock);
478 spin_lock(&journal->j_list_lock); 478 spin_lock(&journal->j_list_lock);
479 transaction = journal->j_checkpoint_transactions; 479 transaction = journal->j_checkpoint_transactions;
480 if (transaction) { 480 if (transaction) {
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
496 /* If the oldest pinned transaction is at the tail of the log 496 /* If the oldest pinned transaction is at the tail of the log
497 already then there's not much we can do right now. */ 497 already then there's not much we can do right now. */
498 if (journal->j_tail_sequence == first_tid) { 498 if (journal->j_tail_sequence == first_tid) {
499 spin_unlock(&journal->j_state_lock); 499 write_unlock(&journal->j_state_lock);
500 return 1; 500 return 1;
501 } 501 }
502 502
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
516 journal->j_free += freed; 516 journal->j_free += freed;
517 journal->j_tail_sequence = first_tid; 517 journal->j_tail_sequence = first_tid;
518 journal->j_tail = blocknr; 518 journal->j_tail = blocknr;
519 spin_unlock(&journal->j_state_lock); 519 write_unlock(&journal->j_state_lock);
520 520
521 /* 521 /*
522 * If there is an external journal, we need to make sure that 522 * If there is an external journal, we need to make sure that
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fbd2c564e916..67bb0a2f35e5 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -152,9 +152,9 @@ static int journal_submit_commit_record(journal_t *journal,
152 printk(KERN_WARNING 152 printk(KERN_WARNING
153 "JBD2: Disabling barriers on %s, " 153 "JBD2: Disabling barriers on %s, "
154 "not supported by device\n", journal->j_devname); 154 "not supported by device\n", journal->j_devname);
155 spin_lock(&journal->j_state_lock); 155 write_lock(&journal->j_state_lock);
156 journal->j_flags &= ~JBD2_BARRIER; 156 journal->j_flags &= ~JBD2_BARRIER;
157 spin_unlock(&journal->j_state_lock); 157 write_unlock(&journal->j_state_lock);
158 158
159 /* And try again, without the barrier */ 159 /* And try again, without the barrier */
160 lock_buffer(bh); 160 lock_buffer(bh);
@@ -182,9 +182,9 @@ retry:
182 printk(KERN_WARNING 182 printk(KERN_WARNING
183 "JBD2: %s: disabling barries on %s - not supported " 183 "JBD2: %s: disabling barries on %s - not supported "
184 "by device\n", __func__, journal->j_devname); 184 "by device\n", __func__, journal->j_devname);
185 spin_lock(&journal->j_state_lock); 185 write_lock(&journal->j_state_lock);
186 journal->j_flags &= ~JBD2_BARRIER; 186 journal->j_flags &= ~JBD2_BARRIER;
187 spin_unlock(&journal->j_state_lock); 187 write_unlock(&journal->j_state_lock);
188 188
189 lock_buffer(bh); 189 lock_buffer(bh);
190 clear_buffer_dirty(bh); 190 clear_buffer_dirty(bh);
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
400 jbd_debug(1, "JBD: starting commit of transaction %d\n", 400 jbd_debug(1, "JBD: starting commit of transaction %d\n",
401 commit_transaction->t_tid); 401 commit_transaction->t_tid);
402 402
403 spin_lock(&journal->j_state_lock); 403 write_lock(&journal->j_state_lock);
404 commit_transaction->t_state = T_LOCKED; 404 commit_transaction->t_state = T_LOCKED;
405 405
406 /* 406 /*
@@ -424,9 +424,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
424 TASK_UNINTERRUPTIBLE); 424 TASK_UNINTERRUPTIBLE);
425 if (atomic_read(&commit_transaction->t_updates)) { 425 if (atomic_read(&commit_transaction->t_updates)) {
426 spin_unlock(&commit_transaction->t_handle_lock); 426 spin_unlock(&commit_transaction->t_handle_lock);
427 spin_unlock(&journal->j_state_lock); 427 write_unlock(&journal->j_state_lock);
428 schedule(); 428 schedule();
429 spin_lock(&journal->j_state_lock); 429 write_lock(&journal->j_state_lock);
430 spin_lock(&commit_transaction->t_handle_lock); 430 spin_lock(&commit_transaction->t_handle_lock);
431 } 431 }
432 finish_wait(&journal->j_wait_updates, &wait); 432 finish_wait(&journal->j_wait_updates, &wait);
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
497 start_time = ktime_get(); 497 start_time = ktime_get();
498 commit_transaction->t_log_start = journal->j_head; 498 commit_transaction->t_log_start = journal->j_head;
499 wake_up(&journal->j_wait_transaction_locked); 499 wake_up(&journal->j_wait_transaction_locked);
500 spin_unlock(&journal->j_state_lock); 500 write_unlock(&journal->j_state_lock);
501 501
502 jbd_debug (3, "JBD: commit phase 2\n"); 502 jbd_debug (3, "JBD: commit phase 2\n");
503 503
@@ -519,9 +519,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
519 * transaction! Now comes the tricky part: we need to write out 519 * transaction! Now comes the tricky part: we need to write out
520 * metadata. Loop over the transaction's entire buffer list: 520 * metadata. Loop over the transaction's entire buffer list:
521 */ 521 */
522 spin_lock(&journal->j_state_lock); 522 write_lock(&journal->j_state_lock);
523 commit_transaction->t_state = T_COMMIT; 523 commit_transaction->t_state = T_COMMIT;
524 spin_unlock(&journal->j_state_lock); 524 write_unlock(&journal->j_state_lock);
525 525
526 trace_jbd2_commit_logging(journal, commit_transaction); 526 trace_jbd2_commit_logging(journal, commit_transaction);
527 stats.run.rs_logging = jiffies; 527 stats.run.rs_logging = jiffies;
@@ -978,7 +978,7 @@ restart_loop:
978 * __jbd2_journal_drop_transaction(). Otherwise we could race with 978 * __jbd2_journal_drop_transaction(). Otherwise we could race with
979 * other checkpointing code processing the transaction... 979 * other checkpointing code processing the transaction...
980 */ 980 */
981 spin_lock(&journal->j_state_lock); 981 write_lock(&journal->j_state_lock);
982 spin_lock(&journal->j_list_lock); 982 spin_lock(&journal->j_list_lock);
983 /* 983 /*
984 * Now recheck if some buffers did not get attached to the transaction 984 * Now recheck if some buffers did not get attached to the transaction
@@ -986,7 +986,7 @@ restart_loop:
986 */ 986 */
987 if (commit_transaction->t_forget) { 987 if (commit_transaction->t_forget) {
988 spin_unlock(&journal->j_list_lock); 988 spin_unlock(&journal->j_list_lock);
989 spin_unlock(&journal->j_state_lock); 989 write_unlock(&journal->j_state_lock);
990 goto restart_loop; 990 goto restart_loop;
991 } 991 }
992 992
@@ -1038,7 +1038,7 @@ restart_loop:
1038 journal->j_average_commit_time*3) / 4; 1038 journal->j_average_commit_time*3) / 4;
1039 else 1039 else
1040 journal->j_average_commit_time = commit_time; 1040 journal->j_average_commit_time = commit_time;
1041 spin_unlock(&journal->j_state_lock); 1041 write_unlock(&journal->j_state_lock);
1042 1042
1043 if (commit_transaction->t_checkpoint_list == NULL && 1043 if (commit_transaction->t_checkpoint_list == NULL &&
1044 commit_transaction->t_checkpoint_io_list == NULL) { 1044 commit_transaction->t_checkpoint_io_list == NULL) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a79d3345b55a..e7bf0fd9cec7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -142,7 +142,7 @@ static int kjournald2(void *arg)
142 /* 142 /*
143 * And now, wait forever for commit wakeup events. 143 * And now, wait forever for commit wakeup events.
144 */ 144 */
145 spin_lock(&journal->j_state_lock); 145 write_lock(&journal->j_state_lock);
146 146
147loop: 147loop:
148 if (journal->j_flags & JBD2_UNMOUNT) 148 if (journal->j_flags & JBD2_UNMOUNT)
@@ -153,10 +153,10 @@ loop:
153 153
154 if (journal->j_commit_sequence != journal->j_commit_request) { 154 if (journal->j_commit_sequence != journal->j_commit_request) {
155 jbd_debug(1, "OK, requests differ\n"); 155 jbd_debug(1, "OK, requests differ\n");
156 spin_unlock(&journal->j_state_lock); 156 write_unlock(&journal->j_state_lock);
157 del_timer_sync(&journal->j_commit_timer); 157 del_timer_sync(&journal->j_commit_timer);
158 jbd2_journal_commit_transaction(journal); 158 jbd2_journal_commit_transaction(journal);
159 spin_lock(&journal->j_state_lock); 159 write_lock(&journal->j_state_lock);
160 goto loop; 160 goto loop;
161 } 161 }
162 162
@@ -168,9 +168,9 @@ loop:
168 * be already stopped. 168 * be already stopped.
169 */ 169 */
170 jbd_debug(1, "Now suspending kjournald2\n"); 170 jbd_debug(1, "Now suspending kjournald2\n");
171 spin_unlock(&journal->j_state_lock); 171 write_unlock(&journal->j_state_lock);
172 refrigerator(); 172 refrigerator();
173 spin_lock(&journal->j_state_lock); 173 write_lock(&journal->j_state_lock);
174 } else { 174 } else {
175 /* 175 /*
176 * We assume on resume that commits are already there, 176 * We assume on resume that commits are already there,
@@ -190,9 +190,9 @@ loop:
190 if (journal->j_flags & JBD2_UNMOUNT) 190 if (journal->j_flags & JBD2_UNMOUNT)
191 should_sleep = 0; 191 should_sleep = 0;
192 if (should_sleep) { 192 if (should_sleep) {
193 spin_unlock(&journal->j_state_lock); 193 write_unlock(&journal->j_state_lock);
194 schedule(); 194 schedule();
195 spin_lock(&journal->j_state_lock); 195 write_lock(&journal->j_state_lock);
196 } 196 }
197 finish_wait(&journal->j_wait_commit, &wait); 197 finish_wait(&journal->j_wait_commit, &wait);
198 } 198 }
@@ -210,7 +210,7 @@ loop:
210 goto loop; 210 goto loop;
211 211
212end_loop: 212end_loop:
213 spin_unlock(&journal->j_state_lock); 213 write_unlock(&journal->j_state_lock);
214 del_timer_sync(&journal->j_commit_timer); 214 del_timer_sync(&journal->j_commit_timer);
215 journal->j_task = NULL; 215 journal->j_task = NULL;
216 wake_up(&journal->j_wait_done_commit); 216 wake_up(&journal->j_wait_done_commit);
@@ -233,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
233 233
234static void journal_kill_thread(journal_t *journal) 234static void journal_kill_thread(journal_t *journal)
235{ 235{
236 spin_lock(&journal->j_state_lock); 236 write_lock(&journal->j_state_lock);
237 journal->j_flags |= JBD2_UNMOUNT; 237 journal->j_flags |= JBD2_UNMOUNT;
238 238
239 while (journal->j_task) { 239 while (journal->j_task) {
240 wake_up(&journal->j_wait_commit); 240 wake_up(&journal->j_wait_commit);
241 spin_unlock(&journal->j_state_lock); 241 write_unlock(&journal->j_state_lock);
242 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 242 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
243 spin_lock(&journal->j_state_lock); 243 write_lock(&journal->j_state_lock);
244 } 244 }
245 spin_unlock(&journal->j_state_lock); 245 write_unlock(&journal->j_state_lock);
246} 246}
247 247
248/* 248/*
@@ -452,7 +452,7 @@ int __jbd2_log_space_left(journal_t *journal)
452{ 452{
453 int left = journal->j_free; 453 int left = journal->j_free;
454 454
455 assert_spin_locked(&journal->j_state_lock); 455 /* assert_spin_locked(&journal->j_state_lock); */
456 456
457 /* 457 /*
458 * Be pessimistic here about the number of those free blocks which 458 * Be pessimistic here about the number of those free blocks which
@@ -497,9 +497,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
497{ 497{
498 int ret; 498 int ret;
499 499
500 spin_lock(&journal->j_state_lock); 500 write_lock(&journal->j_state_lock);
501 ret = __jbd2_log_start_commit(journal, tid); 501 ret = __jbd2_log_start_commit(journal, tid);
502 spin_unlock(&journal->j_state_lock); 502 write_unlock(&journal->j_state_lock);
503 return ret; 503 return ret;
504} 504}
505 505
@@ -518,7 +518,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 transaction_t *transaction = NULL; 518 transaction_t *transaction = NULL;
519 tid_t tid; 519 tid_t tid;
520 520
521 spin_lock(&journal->j_state_lock); 521 read_lock(&journal->j_state_lock);
522 if (journal->j_running_transaction && !current->journal_info) { 522 if (journal->j_running_transaction && !current->journal_info) {
523 transaction = journal->j_running_transaction; 523 transaction = journal->j_running_transaction;
524 __jbd2_log_start_commit(journal, transaction->t_tid); 524 __jbd2_log_start_commit(journal, transaction->t_tid);
@@ -526,12 +526,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
526 transaction = journal->j_committing_transaction; 526 transaction = journal->j_committing_transaction;
527 527
528 if (!transaction) { 528 if (!transaction) {
529 spin_unlock(&journal->j_state_lock); 529 read_unlock(&journal->j_state_lock);
530 return 0; /* Nothing to retry */ 530 return 0; /* Nothing to retry */
531 } 531 }
532 532
533 tid = transaction->t_tid; 533 tid = transaction->t_tid;
534 spin_unlock(&journal->j_state_lock); 534 read_unlock(&journal->j_state_lock);
535 jbd2_log_wait_commit(journal, tid); 535 jbd2_log_wait_commit(journal, tid);
536 return 1; 536 return 1;
537} 537}
@@ -545,7 +545,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
545{ 545{
546 int ret = 0; 546 int ret = 0;
547 547
548 spin_lock(&journal->j_state_lock); 548 write_lock(&journal->j_state_lock);
549 if (journal->j_running_transaction) { 549 if (journal->j_running_transaction) {
550 tid_t tid = journal->j_running_transaction->t_tid; 550 tid_t tid = journal->j_running_transaction->t_tid;
551 551
@@ -564,7 +564,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
564 *ptid = journal->j_committing_transaction->t_tid; 564 *ptid = journal->j_committing_transaction->t_tid;
565 ret = 1; 565 ret = 1;
566 } 566 }
567 spin_unlock(&journal->j_state_lock); 567 write_unlock(&journal->j_state_lock);
568 return ret; 568 return ret;
569} 569}
570 570
@@ -576,26 +576,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
576{ 576{
577 int err = 0; 577 int err = 0;
578 578
579 read_lock(&journal->j_state_lock);
579#ifdef CONFIG_JBD2_DEBUG 580#ifdef CONFIG_JBD2_DEBUG
580 spin_lock(&journal->j_state_lock);
581 if (!tid_geq(journal->j_commit_request, tid)) { 581 if (!tid_geq(journal->j_commit_request, tid)) {
582 printk(KERN_EMERG 582 printk(KERN_EMERG
583 "%s: error: j_commit_request=%d, tid=%d\n", 583 "%s: error: j_commit_request=%d, tid=%d\n",
584 __func__, journal->j_commit_request, tid); 584 __func__, journal->j_commit_request, tid);
585 } 585 }
586 spin_unlock(&journal->j_state_lock);
587#endif 586#endif
588 spin_lock(&journal->j_state_lock);
589 while (tid_gt(tid, journal->j_commit_sequence)) { 587 while (tid_gt(tid, journal->j_commit_sequence)) {
590 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 588 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
591 tid, journal->j_commit_sequence); 589 tid, journal->j_commit_sequence);
592 wake_up(&journal->j_wait_commit); 590 wake_up(&journal->j_wait_commit);
593 spin_unlock(&journal->j_state_lock); 591 read_unlock(&journal->j_state_lock);
594 wait_event(journal->j_wait_done_commit, 592 wait_event(journal->j_wait_done_commit,
595 !tid_gt(tid, journal->j_commit_sequence)); 593 !tid_gt(tid, journal->j_commit_sequence));
596 spin_lock(&journal->j_state_lock); 594 read_lock(&journal->j_state_lock);
597 } 595 }
598 spin_unlock(&journal->j_state_lock); 596 read_unlock(&journal->j_state_lock);
599 597
600 if (unlikely(is_journal_aborted(journal))) { 598 if (unlikely(is_journal_aborted(journal))) {
601 printk(KERN_EMERG "journal commit I/O error\n"); 599 printk(KERN_EMERG "journal commit I/O error\n");
@@ -612,7 +610,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
612{ 610{
613 unsigned long blocknr; 611 unsigned long blocknr;
614 612
615 spin_lock(&journal->j_state_lock); 613 write_lock(&journal->j_state_lock);
616 J_ASSERT(journal->j_free > 1); 614 J_ASSERT(journal->j_free > 1);
617 615
618 blocknr = journal->j_head; 616 blocknr = journal->j_head;
@@ -620,7 +618,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
620 journal->j_free--; 618 journal->j_free--;
621 if (journal->j_head == journal->j_last) 619 if (journal->j_head == journal->j_last)
622 journal->j_head = journal->j_first; 620 journal->j_head = journal->j_first;
623 spin_unlock(&journal->j_state_lock); 621 write_unlock(&journal->j_state_lock);
624 return jbd2_journal_bmap(journal, blocknr, retp); 622 return jbd2_journal_bmap(journal, blocknr, retp);
625} 623}
626 624
@@ -840,7 +838,7 @@ static journal_t * journal_init_common (void)
840 mutex_init(&journal->j_checkpoint_mutex); 838 mutex_init(&journal->j_checkpoint_mutex);
841 spin_lock_init(&journal->j_revoke_lock); 839 spin_lock_init(&journal->j_revoke_lock);
842 spin_lock_init(&journal->j_list_lock); 840 spin_lock_init(&journal->j_list_lock);
843 spin_lock_init(&journal->j_state_lock); 841 rwlock_init(&journal->j_state_lock);
844 842
845 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 843 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
846 journal->j_min_batch_time = 0; 844 journal->j_min_batch_time = 0;
@@ -1106,14 +1104,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1106 set_buffer_uptodate(bh); 1104 set_buffer_uptodate(bh);
1107 } 1105 }
1108 1106
1109 spin_lock(&journal->j_state_lock); 1107 read_lock(&journal->j_state_lock);
1110 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1108 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1111 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1109 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1112 1110
1113 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1111 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1114 sb->s_start = cpu_to_be32(journal->j_tail); 1112 sb->s_start = cpu_to_be32(journal->j_tail);
1115 sb->s_errno = cpu_to_be32(journal->j_errno); 1113 sb->s_errno = cpu_to_be32(journal->j_errno);
1116 spin_unlock(&journal->j_state_lock); 1114 read_unlock(&journal->j_state_lock);
1117 1115
1118 BUFFER_TRACE(bh, "marking dirty"); 1116 BUFFER_TRACE(bh, "marking dirty");
1119 mark_buffer_dirty(bh); 1117 mark_buffer_dirty(bh);
@@ -1134,12 +1132,12 @@ out:
1134 * any future commit will have to be careful to update the 1132 * any future commit will have to be careful to update the
1135 * superblock again to re-record the true start of the log. */ 1133 * superblock again to re-record the true start of the log. */
1136 1134
1137 spin_lock(&journal->j_state_lock); 1135 write_lock(&journal->j_state_lock);
1138 if (sb->s_start) 1136 if (sb->s_start)
1139 journal->j_flags &= ~JBD2_FLUSHED; 1137 journal->j_flags &= ~JBD2_FLUSHED;
1140 else 1138 else
1141 journal->j_flags |= JBD2_FLUSHED; 1139 journal->j_flags |= JBD2_FLUSHED;
1142 spin_unlock(&journal->j_state_lock); 1140 write_unlock(&journal->j_state_lock);
1143} 1141}
1144 1142
1145/* 1143/*
@@ -1551,7 +1549,7 @@ int jbd2_journal_flush(journal_t *journal)
1551 transaction_t *transaction = NULL; 1549 transaction_t *transaction = NULL;
1552 unsigned long old_tail; 1550 unsigned long old_tail;
1553 1551
1554 spin_lock(&journal->j_state_lock); 1552 write_lock(&journal->j_state_lock);
1555 1553
1556 /* Force everything buffered to the log... */ 1554 /* Force everything buffered to the log... */
1557 if (journal->j_running_transaction) { 1555 if (journal->j_running_transaction) {
@@ -1564,10 +1562,10 @@ int jbd2_journal_flush(journal_t *journal)
1564 if (transaction) { 1562 if (transaction) {
1565 tid_t tid = transaction->t_tid; 1563 tid_t tid = transaction->t_tid;
1566 1564
1567 spin_unlock(&journal->j_state_lock); 1565 write_unlock(&journal->j_state_lock);
1568 jbd2_log_wait_commit(journal, tid); 1566 jbd2_log_wait_commit(journal, tid);
1569 } else { 1567 } else {
1570 spin_unlock(&journal->j_state_lock); 1568 write_unlock(&journal->j_state_lock);
1571 } 1569 }
1572 1570
1573 /* ...and flush everything in the log out to disk. */ 1571 /* ...and flush everything in the log out to disk. */
@@ -1591,12 +1589,12 @@ int jbd2_journal_flush(journal_t *journal)
1591 * the magic code for a fully-recovered superblock. Any future 1589 * the magic code for a fully-recovered superblock. Any future
1592 * commits of data to the journal will restore the current 1590 * commits of data to the journal will restore the current
1593 * s_start value. */ 1591 * s_start value. */
1594 spin_lock(&journal->j_state_lock); 1592 write_lock(&journal->j_state_lock);
1595 old_tail = journal->j_tail; 1593 old_tail = journal->j_tail;
1596 journal->j_tail = 0; 1594 journal->j_tail = 0;
1597 spin_unlock(&journal->j_state_lock); 1595 write_unlock(&journal->j_state_lock);
1598 jbd2_journal_update_superblock(journal, 1); 1596 jbd2_journal_update_superblock(journal, 1);
1599 spin_lock(&journal->j_state_lock); 1597 write_lock(&journal->j_state_lock);
1600 journal->j_tail = old_tail; 1598 journal->j_tail = old_tail;
1601 1599
1602 J_ASSERT(!journal->j_running_transaction); 1600 J_ASSERT(!journal->j_running_transaction);
@@ -1604,7 +1602,7 @@ int jbd2_journal_flush(journal_t *journal)
1604 J_ASSERT(!journal->j_checkpoint_transactions); 1602 J_ASSERT(!journal->j_checkpoint_transactions);
1605 J_ASSERT(journal->j_head == journal->j_tail); 1603 J_ASSERT(journal->j_head == journal->j_tail);
1606 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1604 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1607 spin_unlock(&journal->j_state_lock); 1605 write_unlock(&journal->j_state_lock);
1608 return 0; 1606 return 0;
1609} 1607}
1610 1608
@@ -1668,12 +1666,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
1668 printk(KERN_ERR "Aborting journal on device %s.\n", 1666 printk(KERN_ERR "Aborting journal on device %s.\n",
1669 journal->j_devname); 1667 journal->j_devname);
1670 1668
1671 spin_lock(&journal->j_state_lock); 1669 write_lock(&journal->j_state_lock);
1672 journal->j_flags |= JBD2_ABORT; 1670 journal->j_flags |= JBD2_ABORT;
1673 transaction = journal->j_running_transaction; 1671 transaction = journal->j_running_transaction;
1674 if (transaction) 1672 if (transaction)
1675 __jbd2_log_start_commit(journal, transaction->t_tid); 1673 __jbd2_log_start_commit(journal, transaction->t_tid);
1676 spin_unlock(&journal->j_state_lock); 1674 write_unlock(&journal->j_state_lock);
1677} 1675}
1678 1676
1679/* Soft abort: record the abort error status in the journal superblock, 1677/* Soft abort: record the abort error status in the journal superblock,
@@ -1758,12 +1756,12 @@ int jbd2_journal_errno(journal_t *journal)
1758{ 1756{
1759 int err; 1757 int err;
1760 1758
1761 spin_lock(&journal->j_state_lock); 1759 read_lock(&journal->j_state_lock);
1762 if (journal->j_flags & JBD2_ABORT) 1760 if (journal->j_flags & JBD2_ABORT)
1763 err = -EROFS; 1761 err = -EROFS;
1764 else 1762 else
1765 err = journal->j_errno; 1763 err = journal->j_errno;
1766 spin_unlock(&journal->j_state_lock); 1764 read_unlock(&journal->j_state_lock);
1767 return err; 1765 return err;
1768} 1766}
1769 1767
@@ -1778,12 +1776,12 @@ int jbd2_journal_clear_err(journal_t *journal)
1778{ 1776{
1779 int err = 0; 1777 int err = 0;
1780 1778
1781 spin_lock(&journal->j_state_lock); 1779 write_lock(&journal->j_state_lock);
1782 if (journal->j_flags & JBD2_ABORT) 1780 if (journal->j_flags & JBD2_ABORT)
1783 err = -EROFS; 1781 err = -EROFS;
1784 else 1782 else
1785 journal->j_errno = 0; 1783 journal->j_errno = 0;
1786 spin_unlock(&journal->j_state_lock); 1784 write_unlock(&journal->j_state_lock);
1787 return err; 1785 return err;
1788} 1786}
1789 1787
@@ -1796,10 +1794,10 @@ int jbd2_journal_clear_err(journal_t *journal)
1796 */ 1794 */
1797void jbd2_journal_ack_err(journal_t *journal) 1795void jbd2_journal_ack_err(journal_t *journal)
1798{ 1796{
1799 spin_lock(&journal->j_state_lock); 1797 write_lock(&journal->j_state_lock);
1800 if (journal->j_errno) 1798 if (journal->j_errno)
1801 journal->j_flags |= JBD2_ACK_ERR; 1799 journal->j_flags |= JBD2_ACK_ERR;
1802 spin_unlock(&journal->j_state_lock); 1800 write_unlock(&journal->j_state_lock);
1803} 1801}
1804 1802
1805int jbd2_journal_blocks_per_page(struct inode *inode) 1803int jbd2_journal_blocks_per_page(struct inode *inode)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9c64c7ec48d4..663065142b42 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -124,36 +124,38 @@ alloc_transaction:
124 124
125 jbd_debug(3, "New handle %p going live.\n", handle); 125 jbd_debug(3, "New handle %p going live.\n", handle);
126 126
127repeat:
128
129 /* 127 /*
130 * We need to hold j_state_lock until t_updates has been incremented, 128 * We need to hold j_state_lock until t_updates has been incremented,
131 * for proper journal barrier handling 129 * for proper journal barrier handling
132 */ 130 */
133 spin_lock(&journal->j_state_lock); 131repeat:
134repeat_locked: 132 read_lock(&journal->j_state_lock);
135 if (is_journal_aborted(journal) || 133 if (is_journal_aborted(journal) ||
136 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 134 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
137 spin_unlock(&journal->j_state_lock); 135 read_unlock(&journal->j_state_lock);
138 kfree(new_transaction); 136 kfree(new_transaction);
139 return -EROFS; 137 return -EROFS;
140 } 138 }
141 139
142 /* Wait on the journal's transaction barrier if necessary */ 140 /* Wait on the journal's transaction barrier if necessary */
143 if (journal->j_barrier_count) { 141 if (journal->j_barrier_count) {
144 spin_unlock(&journal->j_state_lock); 142 read_unlock(&journal->j_state_lock);
145 wait_event(journal->j_wait_transaction_locked, 143 wait_event(journal->j_wait_transaction_locked,
146 journal->j_barrier_count == 0); 144 journal->j_barrier_count == 0);
147 goto repeat; 145 goto repeat;
148 } 146 }
149 147
150 if (!journal->j_running_transaction) { 148 if (!journal->j_running_transaction) {
151 if (!new_transaction) { 149 read_unlock(&journal->j_state_lock);
152 spin_unlock(&journal->j_state_lock); 150 if (!new_transaction)
153 goto alloc_transaction; 151 goto alloc_transaction;
152 write_lock(&journal->j_state_lock);
153 if (!journal->j_running_transaction) {
154 jbd2_get_transaction(journal, new_transaction);
155 new_transaction = NULL;
154 } 156 }
155 jbd2_get_transaction(journal, new_transaction); 157 write_unlock(&journal->j_state_lock);
156 new_transaction = NULL; 158 goto repeat;
157 } 159 }
158 160
159 transaction = journal->j_running_transaction; 161 transaction = journal->j_running_transaction;
@@ -167,7 +169,7 @@ repeat_locked:
167 169
168 prepare_to_wait(&journal->j_wait_transaction_locked, 170 prepare_to_wait(&journal->j_wait_transaction_locked,
169 &wait, TASK_UNINTERRUPTIBLE); 171 &wait, TASK_UNINTERRUPTIBLE);
170 spin_unlock(&journal->j_state_lock); 172 read_unlock(&journal->j_state_lock);
171 schedule(); 173 schedule();
172 finish_wait(&journal->j_wait_transaction_locked, &wait); 174 finish_wait(&journal->j_wait_transaction_locked, &wait);
173 goto repeat; 175 goto repeat;
@@ -194,7 +196,7 @@ repeat_locked:
194 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 196 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
195 TASK_UNINTERRUPTIBLE); 197 TASK_UNINTERRUPTIBLE);
196 __jbd2_log_start_commit(journal, transaction->t_tid); 198 __jbd2_log_start_commit(journal, transaction->t_tid);
197 spin_unlock(&journal->j_state_lock); 199 read_unlock(&journal->j_state_lock);
198 schedule(); 200 schedule();
199 finish_wait(&journal->j_wait_transaction_locked, &wait); 201 finish_wait(&journal->j_wait_transaction_locked, &wait);
200 goto repeat; 202 goto repeat;
@@ -228,8 +230,12 @@ repeat_locked:
228 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { 230 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
229 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 231 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
230 spin_unlock(&transaction->t_handle_lock); 232 spin_unlock(&transaction->t_handle_lock);
231 __jbd2_log_wait_for_space(journal); 233 read_unlock(&journal->j_state_lock);
232 goto repeat_locked; 234 write_lock(&journal->j_state_lock);
235 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
236 __jbd2_log_wait_for_space(journal);
237 write_unlock(&journal->j_state_lock);
238 goto repeat;
233 } 239 }
234 240
235 /* OK, account for the buffers that this operation expects to 241 /* OK, account for the buffers that this operation expects to
@@ -250,7 +256,7 @@ repeat_locked:
250 atomic_read(&transaction->t_outstanding_credits), 256 atomic_read(&transaction->t_outstanding_credits),
251 __jbd2_log_space_left(journal)); 257 __jbd2_log_space_left(journal));
252 spin_unlock(&transaction->t_handle_lock); 258 spin_unlock(&transaction->t_handle_lock);
253 spin_unlock(&journal->j_state_lock); 259 read_unlock(&journal->j_state_lock);
254 260
255 lock_map_acquire(&handle->h_lockdep_map); 261 lock_map_acquire(&handle->h_lockdep_map);
256 kfree(new_transaction); 262 kfree(new_transaction);
@@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
362 368
363 result = 1; 369 result = 1;
364 370
365 spin_lock(&journal->j_state_lock); 371 read_lock(&journal->j_state_lock);
366 372
367 /* Don't extend a locked-down transaction! */ 373 /* Don't extend a locked-down transaction! */
368 if (handle->h_transaction->t_state != T_RUNNING) { 374 if (handle->h_transaction->t_state != T_RUNNING) {
@@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
394unlock: 400unlock:
395 spin_unlock(&transaction->t_handle_lock); 401 spin_unlock(&transaction->t_handle_lock);
396error_out: 402error_out:
397 spin_unlock(&journal->j_state_lock); 403 read_unlock(&journal->j_state_lock);
398out: 404out:
399 return result; 405 return result;
400} 406}
@@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
432 J_ASSERT(atomic_read(&transaction->t_updates) > 0); 438 J_ASSERT(atomic_read(&transaction->t_updates) > 0);
433 J_ASSERT(journal_current_handle() == handle); 439 J_ASSERT(journal_current_handle() == handle);
434 440
435 spin_lock(&journal->j_state_lock); 441 read_lock(&journal->j_state_lock);
436 spin_lock(&transaction->t_handle_lock); 442 spin_lock(&transaction->t_handle_lock);
437 atomic_sub(handle->h_buffer_credits, 443 atomic_sub(handle->h_buffer_credits,
438 &transaction->t_outstanding_credits); 444 &transaction->t_outstanding_credits);
@@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442 448
443 jbd_debug(2, "restarting handle %p\n", handle); 449 jbd_debug(2, "restarting handle %p\n", handle);
444 __jbd2_log_start_commit(journal, transaction->t_tid); 450 __jbd2_log_start_commit(journal, transaction->t_tid);
445 spin_unlock(&journal->j_state_lock); 451 read_unlock(&journal->j_state_lock);
446 452
447 lock_map_release(&handle->h_lockdep_map); 453 lock_map_release(&handle->h_lockdep_map);
448 handle->h_buffer_credits = nblocks; 454 handle->h_buffer_credits = nblocks;
@@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
472{ 478{
473 DEFINE_WAIT(wait); 479 DEFINE_WAIT(wait);
474 480
475 spin_lock(&journal->j_state_lock); 481 write_lock(&journal->j_state_lock);
476 ++journal->j_barrier_count; 482 ++journal->j_barrier_count;
477 483
478 /* Wait until there are no running updates */ 484 /* Wait until there are no running updates */
@@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal)
490 prepare_to_wait(&journal->j_wait_updates, &wait, 496 prepare_to_wait(&journal->j_wait_updates, &wait,
491 TASK_UNINTERRUPTIBLE); 497 TASK_UNINTERRUPTIBLE);
492 spin_unlock(&transaction->t_handle_lock); 498 spin_unlock(&transaction->t_handle_lock);
493 spin_unlock(&journal->j_state_lock); 499 write_unlock(&journal->j_state_lock);
494 schedule(); 500 schedule();
495 finish_wait(&journal->j_wait_updates, &wait); 501 finish_wait(&journal->j_wait_updates, &wait);
496 spin_lock(&journal->j_state_lock); 502 write_lock(&journal->j_state_lock);
497 } 503 }
498 spin_unlock(&journal->j_state_lock); 504 write_unlock(&journal->j_state_lock);
499 505
500 /* 506 /*
501 * We have now established a barrier against other normal updates, but 507 * We have now established a barrier against other normal updates, but
@@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
519 J_ASSERT(journal->j_barrier_count != 0); 525 J_ASSERT(journal->j_barrier_count != 0);
520 526
521 mutex_unlock(&journal->j_barrier); 527 mutex_unlock(&journal->j_barrier);
522 spin_lock(&journal->j_state_lock); 528 write_lock(&journal->j_state_lock);
523 --journal->j_barrier_count; 529 --journal->j_barrier_count;
524 spin_unlock(&journal->j_state_lock); 530 write_unlock(&journal->j_state_lock);
525 wake_up(&journal->j_wait_transaction_locked); 531 wake_up(&journal->j_wait_transaction_locked);
526} 532}
527 533
@@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle)
1314 1320
1315 journal->j_last_sync_writer = pid; 1321 journal->j_last_sync_writer = pid;
1316 1322
1317 spin_lock(&journal->j_state_lock); 1323 read_lock(&journal->j_state_lock);
1318 commit_time = journal->j_average_commit_time; 1324 commit_time = journal->j_average_commit_time;
1319 spin_unlock(&journal->j_state_lock); 1325 read_unlock(&journal->j_state_lock);
1320 1326
1321 trans_time = ktime_to_ns(ktime_sub(ktime_get(), 1327 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1322 transaction->t_start_time)); 1328 transaction->t_start_time));
@@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1748 goto zap_buffer_unlocked; 1754 goto zap_buffer_unlocked;
1749 1755
1750 /* OK, we have data buffer in journaled mode */ 1756 /* OK, we have data buffer in journaled mode */
1751 spin_lock(&journal->j_state_lock); 1757 write_lock(&journal->j_state_lock);
1752 jbd_lock_bh_state(bh); 1758 jbd_lock_bh_state(bh);
1753 spin_lock(&journal->j_list_lock); 1759 spin_lock(&journal->j_list_lock);
1754 1760
@@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1801 jbd2_journal_put_journal_head(jh); 1807 jbd2_journal_put_journal_head(jh);
1802 spin_unlock(&journal->j_list_lock); 1808 spin_unlock(&journal->j_list_lock);
1803 jbd_unlock_bh_state(bh); 1809 jbd_unlock_bh_state(bh);
1804 spin_unlock(&journal->j_state_lock); 1810 write_unlock(&journal->j_state_lock);
1805 return ret; 1811 return ret;
1806 } else { 1812 } else {
1807 /* There is no currently-running transaction. So the 1813 /* There is no currently-running transaction. So the
@@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1815 jbd2_journal_put_journal_head(jh); 1821 jbd2_journal_put_journal_head(jh);
1816 spin_unlock(&journal->j_list_lock); 1822 spin_unlock(&journal->j_list_lock);
1817 jbd_unlock_bh_state(bh); 1823 jbd_unlock_bh_state(bh);
1818 spin_unlock(&journal->j_state_lock); 1824 write_unlock(&journal->j_state_lock);
1819 return ret; 1825 return ret;
1820 } else { 1826 } else {
1821 /* The orphan record's transaction has 1827 /* The orphan record's transaction has
@@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1839 jbd2_journal_put_journal_head(jh); 1845 jbd2_journal_put_journal_head(jh);
1840 spin_unlock(&journal->j_list_lock); 1846 spin_unlock(&journal->j_list_lock);
1841 jbd_unlock_bh_state(bh); 1847 jbd_unlock_bh_state(bh);
1842 spin_unlock(&journal->j_state_lock); 1848 write_unlock(&journal->j_state_lock);
1843 return 0; 1849 return 0;
1844 } else { 1850 } else {
1845 /* Good, the buffer belongs to the running transaction. 1851 /* Good, the buffer belongs to the running transaction.
@@ -1858,7 +1864,7 @@ zap_buffer:
1858zap_buffer_no_jh: 1864zap_buffer_no_jh:
1859 spin_unlock(&journal->j_list_lock); 1865 spin_unlock(&journal->j_list_lock);
1860 jbd_unlock_bh_state(bh); 1866 jbd_unlock_bh_state(bh);
1861 spin_unlock(&journal->j_state_lock); 1867 write_unlock(&journal->j_state_lock);
1862zap_buffer_unlocked: 1868zap_buffer_unlocked:
1863 clear_buffer_dirty(bh); 1869 clear_buffer_dirty(bh);
1864 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 1870 J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2165 /* Locks are here just to force reading of recent values, it is 2171 /* Locks are here just to force reading of recent values, it is
2166 * enough that the transaction was not committing before we started 2172 * enough that the transaction was not committing before we started
2167 * a transaction adding the inode to orphan list */ 2173 * a transaction adding the inode to orphan list */
2168 spin_lock(&journal->j_state_lock); 2174 read_lock(&journal->j_state_lock);
2169 commit_trans = journal->j_committing_transaction; 2175 commit_trans = journal->j_committing_transaction;
2170 spin_unlock(&journal->j_state_lock); 2176 read_unlock(&journal->j_state_lock);
2171 spin_lock(&journal->j_list_lock); 2177 spin_lock(&journal->j_list_lock);
2172 inode_trans = jinode->i_transaction; 2178 inode_trans = jinode->i_transaction;
2173 spin_unlock(&journal->j_list_lock); 2179 spin_unlock(&journal->j_list_lock);