aboutsummaryrefslogtreecommitdiffstats
path: root/fs/reiserfs/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/reiserfs/journal.c')
-rw-r--r--fs/reiserfs/journal.c134
1 files changed, 105 insertions, 29 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39c..438c71f0bc91 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
429 clear_buffer_journal_restore_dirty(bh); 429 clear_buffer_journal_restore_dirty(bh);
430} 430}
431 431
432/* utility function to force a BUG if it is called without the big
433** kernel lock held. caller is the string printed just before calling BUG()
434*/
435void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
436{
437#ifdef CONFIG_SMP
438 if (current->lock_depth < 0) {
439 reiserfs_panic(sb, "journal-1", "%s called without kernel "
440 "lock held", caller);
441 }
442#else
443 ;
444#endif
445}
446
447/* return a cnode with same dev, block number and size in table, or null if not found */ 432/* return a cnode with same dev, block number and size in table, or null if not found */
448static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 433static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
449 super_block 434 super_block
@@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
552 journal_hash(table, cn->sb, cn->blocknr) = cn; 537 journal_hash(table, cn->sb, cn->blocknr) = cn;
553} 538}
554 539
540/*
541 * Several mutexes depend on the write lock.
542 * However sometimes we want to relax the write lock while we hold
543 * these mutexes, according to the release/reacquire on schedule()
544 * properties of the Bkl that were used.
545 * Reiserfs performances and locking were based on this scheme.
546 * Now that the write lock is a mutex and not the bkl anymore, doing so
547 * may result in a deadlock:
548 *
549 * A acquire write_lock
550 * A acquire j_commit_mutex
551 * A release write_lock and wait for something
552 * B acquire write_lock
553 * B can't acquire j_commit_mutex and sleep
554 * A can't acquire write lock anymore
555 * deadlock
556 *
557 * What we do here is avoiding such deadlock by playing the same game
558 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
559 * we release the write lock, wait a bit and then retry.
560 *
561 * The mutexes concerned by this hack are:
562 * - The commit mutex of a journal list
563 * - The flush mutex
564 * - The journal lock
565 */
566static inline void reiserfs_mutex_lock_safe(struct mutex *m,
567 struct super_block *s)
568{
569 while (!mutex_trylock(m)) {
570 reiserfs_write_unlock(s);
571 schedule();
572 reiserfs_write_lock(s);
573 }
574}
575
555/* lock the current transaction */ 576/* lock the current transaction */
556static inline void lock_journal(struct super_block *sb) 577static inline void lock_journal(struct super_block *sb)
557{ 578{
558 PROC_INFO_INC(sb, journal.lock_journal); 579 PROC_INFO_INC(sb, journal.lock_journal);
559 mutex_lock(&SB_JOURNAL(sb)->j_mutex); 580
581 reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
560} 582}
561 583
562/* unlock the current transaction */ 584/* unlock the current transaction */
@@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s,
708 disable_barrier(s); 730 disable_barrier(s);
709 set_buffer_uptodate(bh); 731 set_buffer_uptodate(bh);
710 set_buffer_dirty(bh); 732 set_buffer_dirty(bh);
733 reiserfs_write_unlock(s);
711 sync_dirty_buffer(bh); 734 sync_dirty_buffer(bh);
735 reiserfs_write_lock(s);
712 } 736 }
713} 737}
714 738
@@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
996{ 1020{
997 DEFINE_WAIT(wait); 1021 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 1022 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 1023
1024 if (atomic_read(&j->j_async_throttle)) {
1025 reiserfs_write_unlock(s);
1000 congestion_wait(BLK_RW_ASYNC, HZ / 10); 1026 congestion_wait(BLK_RW_ASYNC, HZ / 10);
1027 reiserfs_write_lock(s);
1028 }
1029
1001 return 0; 1030 return 0;
1002} 1031}
1003 1032
@@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s,
1043 } 1072 }
1044 1073
1045 /* make sure nobody is trying to flush this one at the same time */ 1074 /* make sure nobody is trying to flush this one at the same time */
1046 mutex_lock(&jl->j_commit_mutex); 1075 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
1076
1047 if (!journal_list_still_alive(s, trans_id)) { 1077 if (!journal_list_still_alive(s, trans_id)) {
1048 mutex_unlock(&jl->j_commit_mutex); 1078 mutex_unlock(&jl->j_commit_mutex);
1049 goto put_jl; 1079 goto put_jl;
@@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s,
1061 1091
1062 if (!list_empty(&jl->j_bh_list)) { 1092 if (!list_empty(&jl->j_bh_list)) {
1063 int ret; 1093 int ret;
1064 unlock_kernel(); 1094
1095 /*
1096 * We might sleep in numerous places inside
1097 * write_ordered_buffers. Relax the write lock.
1098 */
1099 reiserfs_write_unlock(s);
1065 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 1100 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
1066 journal, jl, &jl->j_bh_list); 1101 journal, jl, &jl->j_bh_list);
1067 if (ret < 0 && retval == 0) 1102 if (ret < 0 && retval == 0)
1068 retval = ret; 1103 retval = ret;
1069 lock_kernel(); 1104 reiserfs_write_lock(s);
1070 } 1105 }
1071 BUG_ON(!list_empty(&jl->j_bh_list)); 1106 BUG_ON(!list_empty(&jl->j_bh_list));
1072 /* 1107 /*
@@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s,
1114 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1149 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
1115 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 1150 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
1116 tbh = journal_find_get_block(s, bn); 1151 tbh = journal_find_get_block(s, bn);
1152
1153 reiserfs_write_unlock(s);
1117 wait_on_buffer(tbh); 1154 wait_on_buffer(tbh);
1155 reiserfs_write_lock(s);
1118 // since we're using ll_rw_blk above, it might have skipped over 1156 // since we're using ll_rw_blk above, it might have skipped over
1119 // a locked buffer. Double check here 1157 // a locked buffer. Double check here
1120 // 1158 //
1121 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1159 /* redundant, sync_dirty_buffer() checks */
1160 if (buffer_dirty(tbh)) {
1161 reiserfs_write_unlock(s);
1122 sync_dirty_buffer(tbh); 1162 sync_dirty_buffer(tbh);
1163 reiserfs_write_lock(s);
1164 }
1123 if (unlikely(!buffer_uptodate(tbh))) { 1165 if (unlikely(!buffer_uptodate(tbh))) {
1124#ifdef CONFIG_REISERFS_CHECK 1166#ifdef CONFIG_REISERFS_CHECK
1125 reiserfs_warning(s, "journal-601", 1167 reiserfs_warning(s, "journal-601",
@@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s,
1143 if (buffer_dirty(jl->j_commit_bh)) 1185 if (buffer_dirty(jl->j_commit_bh))
1144 BUG(); 1186 BUG();
1145 mark_buffer_dirty(jl->j_commit_bh) ; 1187 mark_buffer_dirty(jl->j_commit_bh) ;
1188 reiserfs_write_unlock(s);
1146 sync_dirty_buffer(jl->j_commit_bh) ; 1189 sync_dirty_buffer(jl->j_commit_bh) ;
1190 reiserfs_write_lock(s);
1147 } 1191 }
1148 } else 1192 } else {
1193 reiserfs_write_unlock(s);
1149 wait_on_buffer(jl->j_commit_bh); 1194 wait_on_buffer(jl->j_commit_bh);
1195 reiserfs_write_lock(s);
1196 }
1150 1197
1151 check_barrier_completion(s, jl->j_commit_bh); 1198 check_barrier_completion(s, jl->j_commit_bh);
1152 1199
@@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb,
1286 1333
1287 if (trans_id >= journal->j_last_flush_trans_id) { 1334 if (trans_id >= journal->j_last_flush_trans_id) {
1288 if (buffer_locked((journal->j_header_bh))) { 1335 if (buffer_locked((journal->j_header_bh))) {
1336 reiserfs_write_unlock(sb);
1289 wait_on_buffer((journal->j_header_bh)); 1337 wait_on_buffer((journal->j_header_bh));
1338 reiserfs_write_lock(sb);
1290 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1339 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1291#ifdef CONFIG_REISERFS_CHECK 1340#ifdef CONFIG_REISERFS_CHECK
1292 reiserfs_warning(sb, "journal-699", 1341 reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb,
1312 disable_barrier(sb); 1361 disable_barrier(sb);
1313 goto sync; 1362 goto sync;
1314 } 1363 }
1364 reiserfs_write_unlock(sb);
1315 wait_on_buffer(journal->j_header_bh); 1365 wait_on_buffer(journal->j_header_bh);
1366 reiserfs_write_lock(sb);
1316 check_barrier_completion(sb, journal->j_header_bh); 1367 check_barrier_completion(sb, journal->j_header_bh);
1317 } else { 1368 } else {
1318 sync: 1369 sync:
1319 set_buffer_dirty(journal->j_header_bh); 1370 set_buffer_dirty(journal->j_header_bh);
1371 reiserfs_write_unlock(sb);
1320 sync_dirty_buffer(journal->j_header_bh); 1372 sync_dirty_buffer(journal->j_header_bh);
1373 reiserfs_write_lock(sb);
1321 } 1374 }
1322 if (!buffer_uptodate(journal->j_header_bh)) { 1375 if (!buffer_uptodate(journal->j_header_bh)) {
1323 reiserfs_warning(sb, "journal-837", 1376 reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s,
1409 1462
1410 /* if flushall == 0, the lock is already held */ 1463 /* if flushall == 0, the lock is already held */
1411 if (flushall) { 1464 if (flushall) {
1412 mutex_lock(&journal->j_flush_mutex); 1465 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1413 } else if (mutex_trylock(&journal->j_flush_mutex)) { 1466 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1414 BUG(); 1467 BUG();
1415 } 1468 }
@@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s,
1553 reiserfs_panic(s, "journal-1011", 1606 reiserfs_panic(s, "journal-1011",
1554 "cn->bh is NULL"); 1607 "cn->bh is NULL");
1555 } 1608 }
1609
1610 reiserfs_write_unlock(s);
1556 wait_on_buffer(cn->bh); 1611 wait_on_buffer(cn->bh);
1612 reiserfs_write_lock(s);
1613
1557 if (!cn->bh) { 1614 if (!cn->bh) {
1558 reiserfs_panic(s, "journal-1012", 1615 reiserfs_panic(s, "journal-1012",
1559 "cn->bh is NULL"); 1616 "cn->bh is NULL");
@@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1973 reiserfs_mounted_fs_count--; 2030 reiserfs_mounted_fs_count--;
1974 /* wait for all commits to finish */ 2031 /* wait for all commits to finish */
1975 cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 2032 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
2033
2034 /*
2035 * We must release the write lock here because
2036 * the workqueue job (flush_async_commit) needs this lock
2037 */
2038 reiserfs_write_unlock(sb);
1976 flush_workqueue(commit_wq); 2039 flush_workqueue(commit_wq);
2040
1977 if (!reiserfs_mounted_fs_count) { 2041 if (!reiserfs_mounted_fs_count) {
1978 destroy_workqueue(commit_wq); 2042 destroy_workqueue(commit_wq);
1979 commit_wq = NULL; 2043 commit_wq = NULL;
1980 } 2044 }
2045 reiserfs_write_lock(sb);
1981 2046
1982 free_journal_ram(sb); 2047 free_journal_ram(sb);
1983 2048
@@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb,
2243 /* read in the log blocks, memcpy to the corresponding real block */ 2308 /* read in the log blocks, memcpy to the corresponding real block */
2244 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 2309 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2245 for (i = 0; i < get_desc_trans_len(desc); i++) { 2310 for (i = 0; i < get_desc_trans_len(desc); i++) {
2311
2312 reiserfs_write_unlock(sb);
2246 wait_on_buffer(log_blocks[i]); 2313 wait_on_buffer(log_blocks[i]);
2314 reiserfs_write_lock(sb);
2315
2247 if (!buffer_uptodate(log_blocks[i])) { 2316 if (!buffer_uptodate(log_blocks[i])) {
2248 reiserfs_warning(sb, "journal-1212", 2317 reiserfs_warning(sb, "journal-1212",
2249 "REPLAY FAILURE fsck required! " 2318 "REPLAY FAILURE fsck required! "
@@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s)
2964 init_waitqueue_entry(&wait, current); 3033 init_waitqueue_entry(&wait, current);
2965 add_wait_queue(&journal->j_join_wait, &wait); 3034 add_wait_queue(&journal->j_join_wait, &wait);
2966 set_current_state(TASK_UNINTERRUPTIBLE); 3035 set_current_state(TASK_UNINTERRUPTIBLE);
2967 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 3036 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
3037 reiserfs_write_unlock(s);
2968 schedule(); 3038 schedule();
3039 reiserfs_write_lock(s);
3040 }
2969 __set_current_state(TASK_RUNNING); 3041 __set_current_state(TASK_RUNNING);
2970 remove_wait_queue(&journal->j_join_wait, &wait); 3042 remove_wait_queue(&journal->j_join_wait, &wait);
2971} 3043}
@@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2982 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3054 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2983 unsigned long bcount = journal->j_bcount; 3055 unsigned long bcount = journal->j_bcount;
2984 while (1) { 3056 while (1) {
3057 reiserfs_write_unlock(sb);
2985 schedule_timeout_uninterruptible(1); 3058 schedule_timeout_uninterruptible(1);
3059 reiserfs_write_lock(sb);
2986 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 3060 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2987 while ((atomic_read(&journal->j_wcount) > 0 || 3061 while ((atomic_read(&journal->j_wcount) > 0 ||
2988 atomic_read(&journal->j_jlock)) && 3062 atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3033 3107
3034 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3108 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3035 unlock_journal(sb); 3109 unlock_journal(sb);
3110 reiserfs_write_unlock(sb);
3036 reiserfs_wait_on_write_block(sb); 3111 reiserfs_wait_on_write_block(sb);
3112 reiserfs_write_lock(sb);
3037 PROC_INFO_INC(sb, journal.journal_relock_writers); 3113 PROC_INFO_INC(sb, journal.journal_relock_writers);
3038 goto relock; 3114 goto relock;
3039 } 3115 }
@@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work)
3506 struct reiserfs_journal_list *jl; 3582 struct reiserfs_journal_list *jl;
3507 struct list_head *entry; 3583 struct list_head *entry;
3508 3584
3509 lock_kernel(); 3585 reiserfs_write_lock(sb);
3510 if (!list_empty(&journal->j_journal_list)) { 3586 if (!list_empty(&journal->j_journal_list)) {
3511 /* last entry is the youngest, commit it and you get everything */ 3587 /* last entry is the youngest, commit it and you get everything */
3512 entry = journal->j_journal_list.prev; 3588 entry = journal->j_journal_list.prev;
3513 jl = JOURNAL_LIST_ENTRY(entry); 3589 jl = JOURNAL_LIST_ENTRY(entry);
3514 flush_commit_list(sb, jl, 1); 3590 flush_commit_list(sb, jl, 1);
3515 } 3591 }
3516 unlock_kernel(); 3592 reiserfs_write_unlock(sb);
3517} 3593}
3518 3594
3519/* 3595/*
@@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4041 * the new transaction is fully setup, and we've already flushed the 4117 * the new transaction is fully setup, and we've already flushed the
4042 * ordered bh list 4118 * ordered bh list
4043 */ 4119 */
4044 mutex_lock(&jl->j_commit_mutex); 4120 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
4045 4121
4046 /* save the transaction id in case we need to commit it later */ 4122 /* save the transaction id in case we need to commit it later */
4047 commit_trans_id = jl->j_trans_id; 4123 commit_trans_id = jl->j_trans_id;
@@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4203 * is lost. 4279 * is lost.
4204 */ 4280 */
4205 if (!list_empty(&jl->j_tail_bh_list)) { 4281 if (!list_empty(&jl->j_tail_bh_list)) {
4206 unlock_kernel(); 4282 reiserfs_write_unlock(sb);
4207 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4283 write_ordered_buffers(&journal->j_dirty_buffers_lock,
4208 journal, jl, &jl->j_tail_bh_list); 4284 journal, jl, &jl->j_tail_bh_list);
4209 lock_kernel(); 4285 reiserfs_write_lock(sb);
4210 } 4286 }
4211 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4287 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4212 mutex_unlock(&jl->j_commit_mutex); 4288 mutex_unlock(&jl->j_commit_mutex);