aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/journal.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-08-03 21:35:12 -0400
commita931da6ac9331a6c80dd91c199105806f2336188 (patch)
tree2d4ea766def9a98d21c2379f41e796b0e57ccd6b /fs/jbd2/journal.c
parenta51dca9cd3bb4ec5a05bfb6feabf024a5c808a37 (diff)
jbd2: Change j_state_lock to be a rwlock_t
Lockstat reports have shown that j_state_lock is a major source of lock contention, especially on systems with more than 4 CPU cores. So change it to be a read/write spinlock. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/journal.c')
-rw-r--r--fs/jbd2/journal.c94
1 files changed, 46 insertions, 48 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a79d3345b55a..e7bf0fd9cec7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -142,7 +142,7 @@ static int kjournald2(void *arg)
142 /* 142 /*
143 * And now, wait forever for commit wakeup events. 143 * And now, wait forever for commit wakeup events.
144 */ 144 */
145 spin_lock(&journal->j_state_lock); 145 write_lock(&journal->j_state_lock);
146 146
147loop: 147loop:
148 if (journal->j_flags & JBD2_UNMOUNT) 148 if (journal->j_flags & JBD2_UNMOUNT)
@@ -153,10 +153,10 @@ loop:
153 153
154 if (journal->j_commit_sequence != journal->j_commit_request) { 154 if (journal->j_commit_sequence != journal->j_commit_request) {
155 jbd_debug(1, "OK, requests differ\n"); 155 jbd_debug(1, "OK, requests differ\n");
156 spin_unlock(&journal->j_state_lock); 156 write_unlock(&journal->j_state_lock);
157 del_timer_sync(&journal->j_commit_timer); 157 del_timer_sync(&journal->j_commit_timer);
158 jbd2_journal_commit_transaction(journal); 158 jbd2_journal_commit_transaction(journal);
159 spin_lock(&journal->j_state_lock); 159 write_lock(&journal->j_state_lock);
160 goto loop; 160 goto loop;
161 } 161 }
162 162
@@ -168,9 +168,9 @@ loop:
168 * be already stopped. 168 * be already stopped.
169 */ 169 */
170 jbd_debug(1, "Now suspending kjournald2\n"); 170 jbd_debug(1, "Now suspending kjournald2\n");
171 spin_unlock(&journal->j_state_lock); 171 write_unlock(&journal->j_state_lock);
172 refrigerator(); 172 refrigerator();
173 spin_lock(&journal->j_state_lock); 173 write_lock(&journal->j_state_lock);
174 } else { 174 } else {
175 /* 175 /*
176 * We assume on resume that commits are already there, 176 * We assume on resume that commits are already there,
@@ -190,9 +190,9 @@ loop:
190 if (journal->j_flags & JBD2_UNMOUNT) 190 if (journal->j_flags & JBD2_UNMOUNT)
191 should_sleep = 0; 191 should_sleep = 0;
192 if (should_sleep) { 192 if (should_sleep) {
193 spin_unlock(&journal->j_state_lock); 193 write_unlock(&journal->j_state_lock);
194 schedule(); 194 schedule();
195 spin_lock(&journal->j_state_lock); 195 write_lock(&journal->j_state_lock);
196 } 196 }
197 finish_wait(&journal->j_wait_commit, &wait); 197 finish_wait(&journal->j_wait_commit, &wait);
198 } 198 }
@@ -210,7 +210,7 @@ loop:
210 goto loop; 210 goto loop;
211 211
212end_loop: 212end_loop:
213 spin_unlock(&journal->j_state_lock); 213 write_unlock(&journal->j_state_lock);
214 del_timer_sync(&journal->j_commit_timer); 214 del_timer_sync(&journal->j_commit_timer);
215 journal->j_task = NULL; 215 journal->j_task = NULL;
216 wake_up(&journal->j_wait_done_commit); 216 wake_up(&journal->j_wait_done_commit);
@@ -233,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
233 233
234static void journal_kill_thread(journal_t *journal) 234static void journal_kill_thread(journal_t *journal)
235{ 235{
236 spin_lock(&journal->j_state_lock); 236 write_lock(&journal->j_state_lock);
237 journal->j_flags |= JBD2_UNMOUNT; 237 journal->j_flags |= JBD2_UNMOUNT;
238 238
239 while (journal->j_task) { 239 while (journal->j_task) {
240 wake_up(&journal->j_wait_commit); 240 wake_up(&journal->j_wait_commit);
241 spin_unlock(&journal->j_state_lock); 241 write_unlock(&journal->j_state_lock);
242 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 242 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
243 spin_lock(&journal->j_state_lock); 243 write_lock(&journal->j_state_lock);
244 } 244 }
245 spin_unlock(&journal->j_state_lock); 245 write_unlock(&journal->j_state_lock);
246} 246}
247 247
248/* 248/*
@@ -452,7 +452,7 @@ int __jbd2_log_space_left(journal_t *journal)
452{ 452{
453 int left = journal->j_free; 453 int left = journal->j_free;
454 454
455 assert_spin_locked(&journal->j_state_lock); 455 /* assert_spin_locked(&journal->j_state_lock); */
456 456
457 /* 457 /*
458 * Be pessimistic here about the number of those free blocks which 458 * Be pessimistic here about the number of those free blocks which
@@ -497,9 +497,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
497{ 497{
498 int ret; 498 int ret;
499 499
500 spin_lock(&journal->j_state_lock); 500 write_lock(&journal->j_state_lock);
501 ret = __jbd2_log_start_commit(journal, tid); 501 ret = __jbd2_log_start_commit(journal, tid);
502 spin_unlock(&journal->j_state_lock); 502 write_unlock(&journal->j_state_lock);
503 return ret; 503 return ret;
504} 504}
505 505
@@ -518,7 +518,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 transaction_t *transaction = NULL; 518 transaction_t *transaction = NULL;
519 tid_t tid; 519 tid_t tid;
520 520
521 spin_lock(&journal->j_state_lock); 521 read_lock(&journal->j_state_lock);
522 if (journal->j_running_transaction && !current->journal_info) { 522 if (journal->j_running_transaction && !current->journal_info) {
523 transaction = journal->j_running_transaction; 523 transaction = journal->j_running_transaction;
524 __jbd2_log_start_commit(journal, transaction->t_tid); 524 __jbd2_log_start_commit(journal, transaction->t_tid);
@@ -526,12 +526,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
526 transaction = journal->j_committing_transaction; 526 transaction = journal->j_committing_transaction;
527 527
528 if (!transaction) { 528 if (!transaction) {
529 spin_unlock(&journal->j_state_lock); 529 read_unlock(&journal->j_state_lock);
530 return 0; /* Nothing to retry */ 530 return 0; /* Nothing to retry */
531 } 531 }
532 532
533 tid = transaction->t_tid; 533 tid = transaction->t_tid;
534 spin_unlock(&journal->j_state_lock); 534 read_unlock(&journal->j_state_lock);
535 jbd2_log_wait_commit(journal, tid); 535 jbd2_log_wait_commit(journal, tid);
536 return 1; 536 return 1;
537} 537}
@@ -545,7 +545,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
545{ 545{
546 int ret = 0; 546 int ret = 0;
547 547
548 spin_lock(&journal->j_state_lock); 548 write_lock(&journal->j_state_lock);
549 if (journal->j_running_transaction) { 549 if (journal->j_running_transaction) {
550 tid_t tid = journal->j_running_transaction->t_tid; 550 tid_t tid = journal->j_running_transaction->t_tid;
551 551
@@ -564,7 +564,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
564 *ptid = journal->j_committing_transaction->t_tid; 564 *ptid = journal->j_committing_transaction->t_tid;
565 ret = 1; 565 ret = 1;
566 } 566 }
567 spin_unlock(&journal->j_state_lock); 567 write_unlock(&journal->j_state_lock);
568 return ret; 568 return ret;
569} 569}
570 570
@@ -576,26 +576,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
576{ 576{
577 int err = 0; 577 int err = 0;
578 578
579 read_lock(&journal->j_state_lock);
579#ifdef CONFIG_JBD2_DEBUG 580#ifdef CONFIG_JBD2_DEBUG
580 spin_lock(&journal->j_state_lock);
581 if (!tid_geq(journal->j_commit_request, tid)) { 581 if (!tid_geq(journal->j_commit_request, tid)) {
582 printk(KERN_EMERG 582 printk(KERN_EMERG
583 "%s: error: j_commit_request=%d, tid=%d\n", 583 "%s: error: j_commit_request=%d, tid=%d\n",
584 __func__, journal->j_commit_request, tid); 584 __func__, journal->j_commit_request, tid);
585 } 585 }
586 spin_unlock(&journal->j_state_lock);
587#endif 586#endif
588 spin_lock(&journal->j_state_lock);
589 while (tid_gt(tid, journal->j_commit_sequence)) { 587 while (tid_gt(tid, journal->j_commit_sequence)) {
590 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 588 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
591 tid, journal->j_commit_sequence); 589 tid, journal->j_commit_sequence);
592 wake_up(&journal->j_wait_commit); 590 wake_up(&journal->j_wait_commit);
593 spin_unlock(&journal->j_state_lock); 591 read_unlock(&journal->j_state_lock);
594 wait_event(journal->j_wait_done_commit, 592 wait_event(journal->j_wait_done_commit,
595 !tid_gt(tid, journal->j_commit_sequence)); 593 !tid_gt(tid, journal->j_commit_sequence));
596 spin_lock(&journal->j_state_lock); 594 read_lock(&journal->j_state_lock);
597 } 595 }
598 spin_unlock(&journal->j_state_lock); 596 read_unlock(&journal->j_state_lock);
599 597
600 if (unlikely(is_journal_aborted(journal))) { 598 if (unlikely(is_journal_aborted(journal))) {
601 printk(KERN_EMERG "journal commit I/O error\n"); 599 printk(KERN_EMERG "journal commit I/O error\n");
@@ -612,7 +610,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
612{ 610{
613 unsigned long blocknr; 611 unsigned long blocknr;
614 612
615 spin_lock(&journal->j_state_lock); 613 write_lock(&journal->j_state_lock);
616 J_ASSERT(journal->j_free > 1); 614 J_ASSERT(journal->j_free > 1);
617 615
618 blocknr = journal->j_head; 616 blocknr = journal->j_head;
@@ -620,7 +618,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
620 journal->j_free--; 618 journal->j_free--;
621 if (journal->j_head == journal->j_last) 619 if (journal->j_head == journal->j_last)
622 journal->j_head = journal->j_first; 620 journal->j_head = journal->j_first;
623 spin_unlock(&journal->j_state_lock); 621 write_unlock(&journal->j_state_lock);
624 return jbd2_journal_bmap(journal, blocknr, retp); 622 return jbd2_journal_bmap(journal, blocknr, retp);
625} 623}
626 624
@@ -840,7 +838,7 @@ static journal_t * journal_init_common (void)
840 mutex_init(&journal->j_checkpoint_mutex); 838 mutex_init(&journal->j_checkpoint_mutex);
841 spin_lock_init(&journal->j_revoke_lock); 839 spin_lock_init(&journal->j_revoke_lock);
842 spin_lock_init(&journal->j_list_lock); 840 spin_lock_init(&journal->j_list_lock);
843 spin_lock_init(&journal->j_state_lock); 841 rwlock_init(&journal->j_state_lock);
844 842
845 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 843 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
846 journal->j_min_batch_time = 0; 844 journal->j_min_batch_time = 0;
@@ -1106,14 +1104,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1106 set_buffer_uptodate(bh); 1104 set_buffer_uptodate(bh);
1107 } 1105 }
1108 1106
1109 spin_lock(&journal->j_state_lock); 1107 read_lock(&journal->j_state_lock);
1110 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1108 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1111 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1109 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1112 1110
1113 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1111 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1114 sb->s_start = cpu_to_be32(journal->j_tail); 1112 sb->s_start = cpu_to_be32(journal->j_tail);
1115 sb->s_errno = cpu_to_be32(journal->j_errno); 1113 sb->s_errno = cpu_to_be32(journal->j_errno);
1116 spin_unlock(&journal->j_state_lock); 1114 read_unlock(&journal->j_state_lock);
1117 1115
1118 BUFFER_TRACE(bh, "marking dirty"); 1116 BUFFER_TRACE(bh, "marking dirty");
1119 mark_buffer_dirty(bh); 1117 mark_buffer_dirty(bh);
@@ -1134,12 +1132,12 @@ out:
1134 * any future commit will have to be careful to update the 1132 * any future commit will have to be careful to update the
1135 * superblock again to re-record the true start of the log. */ 1133 * superblock again to re-record the true start of the log. */
1136 1134
1137 spin_lock(&journal->j_state_lock); 1135 write_lock(&journal->j_state_lock);
1138 if (sb->s_start) 1136 if (sb->s_start)
1139 journal->j_flags &= ~JBD2_FLUSHED; 1137 journal->j_flags &= ~JBD2_FLUSHED;
1140 else 1138 else
1141 journal->j_flags |= JBD2_FLUSHED; 1139 journal->j_flags |= JBD2_FLUSHED;
1142 spin_unlock(&journal->j_state_lock); 1140 write_unlock(&journal->j_state_lock);
1143} 1141}
1144 1142
1145/* 1143/*
@@ -1551,7 +1549,7 @@ int jbd2_journal_flush(journal_t *journal)
1551 transaction_t *transaction = NULL; 1549 transaction_t *transaction = NULL;
1552 unsigned long old_tail; 1550 unsigned long old_tail;
1553 1551
1554 spin_lock(&journal->j_state_lock); 1552 write_lock(&journal->j_state_lock);
1555 1553
1556 /* Force everything buffered to the log... */ 1554 /* Force everything buffered to the log... */
1557 if (journal->j_running_transaction) { 1555 if (journal->j_running_transaction) {
@@ -1564,10 +1562,10 @@ int jbd2_journal_flush(journal_t *journal)
1564 if (transaction) { 1562 if (transaction) {
1565 tid_t tid = transaction->t_tid; 1563 tid_t tid = transaction->t_tid;
1566 1564
1567 spin_unlock(&journal->j_state_lock); 1565 write_unlock(&journal->j_state_lock);
1568 jbd2_log_wait_commit(journal, tid); 1566 jbd2_log_wait_commit(journal, tid);
1569 } else { 1567 } else {
1570 spin_unlock(&journal->j_state_lock); 1568 write_unlock(&journal->j_state_lock);
1571 } 1569 }
1572 1570
1573 /* ...and flush everything in the log out to disk. */ 1571 /* ...and flush everything in the log out to disk. */
@@ -1591,12 +1589,12 @@ int jbd2_journal_flush(journal_t *journal)
1591 * the magic code for a fully-recovered superblock. Any future 1589 * the magic code for a fully-recovered superblock. Any future
1592 * commits of data to the journal will restore the current 1590 * commits of data to the journal will restore the current
1593 * s_start value. */ 1591 * s_start value. */
1594 spin_lock(&journal->j_state_lock); 1592 write_lock(&journal->j_state_lock);
1595 old_tail = journal->j_tail; 1593 old_tail = journal->j_tail;
1596 journal->j_tail = 0; 1594 journal->j_tail = 0;
1597 spin_unlock(&journal->j_state_lock); 1595 write_unlock(&journal->j_state_lock);
1598 jbd2_journal_update_superblock(journal, 1); 1596 jbd2_journal_update_superblock(journal, 1);
1599 spin_lock(&journal->j_state_lock); 1597 write_lock(&journal->j_state_lock);
1600 journal->j_tail = old_tail; 1598 journal->j_tail = old_tail;
1601 1599
1602 J_ASSERT(!journal->j_running_transaction); 1600 J_ASSERT(!journal->j_running_transaction);
@@ -1604,7 +1602,7 @@ int jbd2_journal_flush(journal_t *journal)
1604 J_ASSERT(!journal->j_checkpoint_transactions); 1602 J_ASSERT(!journal->j_checkpoint_transactions);
1605 J_ASSERT(journal->j_head == journal->j_tail); 1603 J_ASSERT(journal->j_head == journal->j_tail);
1606 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1604 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1607 spin_unlock(&journal->j_state_lock); 1605 write_unlock(&journal->j_state_lock);
1608 return 0; 1606 return 0;
1609} 1607}
1610 1608
@@ -1668,12 +1666,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
1668 printk(KERN_ERR "Aborting journal on device %s.\n", 1666 printk(KERN_ERR "Aborting journal on device %s.\n",
1669 journal->j_devname); 1667 journal->j_devname);
1670 1668
1671 spin_lock(&journal->j_state_lock); 1669 write_lock(&journal->j_state_lock);
1672 journal->j_flags |= JBD2_ABORT; 1670 journal->j_flags |= JBD2_ABORT;
1673 transaction = journal->j_running_transaction; 1671 transaction = journal->j_running_transaction;
1674 if (transaction) 1672 if (transaction)
1675 __jbd2_log_start_commit(journal, transaction->t_tid); 1673 __jbd2_log_start_commit(journal, transaction->t_tid);
1676 spin_unlock(&journal->j_state_lock); 1674 write_unlock(&journal->j_state_lock);
1677} 1675}
1678 1676
1679/* Soft abort: record the abort error status in the journal superblock, 1677/* Soft abort: record the abort error status in the journal superblock,
@@ -1758,12 +1756,12 @@ int jbd2_journal_errno(journal_t *journal)
1758{ 1756{
1759 int err; 1757 int err;
1760 1758
1761 spin_lock(&journal->j_state_lock); 1759 read_lock(&journal->j_state_lock);
1762 if (journal->j_flags & JBD2_ABORT) 1760 if (journal->j_flags & JBD2_ABORT)
1763 err = -EROFS; 1761 err = -EROFS;
1764 else 1762 else
1765 err = journal->j_errno; 1763 err = journal->j_errno;
1766 spin_unlock(&journal->j_state_lock); 1764 read_unlock(&journal->j_state_lock);
1767 return err; 1765 return err;
1768} 1766}
1769 1767
@@ -1778,12 +1776,12 @@ int jbd2_journal_clear_err(journal_t *journal)
1778{ 1776{
1779 int err = 0; 1777 int err = 0;
1780 1778
1781 spin_lock(&journal->j_state_lock); 1779 write_lock(&journal->j_state_lock);
1782 if (journal->j_flags & JBD2_ABORT) 1780 if (journal->j_flags & JBD2_ABORT)
1783 err = -EROFS; 1781 err = -EROFS;
1784 else 1782 else
1785 journal->j_errno = 0; 1783 journal->j_errno = 0;
1786 spin_unlock(&journal->j_state_lock); 1784 write_unlock(&journal->j_state_lock);
1787 return err; 1785 return err;
1788} 1786}
1789 1787
@@ -1796,10 +1794,10 @@ int jbd2_journal_clear_err(journal_t *journal)
1796 */ 1794 */
1797void jbd2_journal_ack_err(journal_t *journal) 1795void jbd2_journal_ack_err(journal_t *journal)
1798{ 1796{
1799 spin_lock(&journal->j_state_lock); 1797 write_lock(&journal->j_state_lock);
1800 if (journal->j_errno) 1798 if (journal->j_errno)
1801 journal->j_flags |= JBD2_ACK_ERR; 1799 journal->j_flags |= JBD2_ACK_ERR;
1802 spin_unlock(&journal->j_state_lock); 1800 write_unlock(&journal->j_state_lock);
1803} 1801}
1804 1802
1805int jbd2_journal_blocks_per_page(struct inode *inode) 1803int jbd2_journal_blocks_per_page(struct inode *inode)