diff options
Diffstat (limited to 'fs/reiserfs/journal.c')
-rw-r--r-- | fs/reiserfs/journal.c | 134 |
1 files changed, 105 insertions, 29 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 90622200b39c..438c71f0bc91 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh) | |||
429 | clear_buffer_journal_restore_dirty(bh); | 429 | clear_buffer_journal_restore_dirty(bh); |
430 | } | 430 | } |
431 | 431 | ||
432 | /* utility function to force a BUG if it is called without the big | ||
433 | ** kernel lock held. caller is the string printed just before calling BUG() | ||
434 | */ | ||
435 | void reiserfs_check_lock_depth(struct super_block *sb, char *caller) | ||
436 | { | ||
437 | #ifdef CONFIG_SMP | ||
438 | if (current->lock_depth < 0) { | ||
439 | reiserfs_panic(sb, "journal-1", "%s called without kernel " | ||
440 | "lock held", caller); | ||
441 | } | ||
442 | #else | ||
443 | ; | ||
444 | #endif | ||
445 | } | ||
446 | |||
447 | /* return a cnode with same dev, block number and size in table, or null if not found */ | 432 | /* return a cnode with same dev, block number and size in table, or null if not found */ |
448 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | 433 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct |
449 | super_block | 434 | super_block |
@@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | |||
552 | journal_hash(table, cn->sb, cn->blocknr) = cn; | 537 | journal_hash(table, cn->sb, cn->blocknr) = cn; |
553 | } | 538 | } |
554 | 539 | ||
540 | /* | ||
541 | * Several mutexes depend on the write lock. | ||
542 | * However sometimes we want to relax the write lock while we hold | ||
543 | * these mutexes, according to the release/reacquire on schedule() | ||
544 | * properties of the Bkl that were used. | ||
545 | * Reiserfs performances and locking were based on this scheme. | ||
546 | * Now that the write lock is a mutex and not the bkl anymore, doing so | ||
547 | * may result in a deadlock: | ||
548 | * | ||
549 | * A acquire write_lock | ||
550 | * A acquire j_commit_mutex | ||
551 | * A release write_lock and wait for something | ||
552 | * B acquire write_lock | ||
553 | * B can't acquire j_commit_mutex and sleep | ||
554 | * A can't acquire write lock anymore | ||
555 | * deadlock | ||
556 | * | ||
557 | * What we do here is avoiding such deadlock by playing the same game | ||
558 | * than the Bkl: if we can't acquire a mutex that depends on the write lock, | ||
559 | * we release the write lock, wait a bit and then retry. | ||
560 | * | ||
561 | * The mutexes concerned by this hack are: | ||
562 | * - The commit mutex of a journal list | ||
563 | * - The flush mutex | ||
564 | * - The journal lock | ||
565 | */ | ||
566 | static inline void reiserfs_mutex_lock_safe(struct mutex *m, | ||
567 | struct super_block *s) | ||
568 | { | ||
569 | while (!mutex_trylock(m)) { | ||
570 | reiserfs_write_unlock(s); | ||
571 | schedule(); | ||
572 | reiserfs_write_lock(s); | ||
573 | } | ||
574 | } | ||
575 | |||
555 | /* lock the current transaction */ | 576 | /* lock the current transaction */ |
556 | static inline void lock_journal(struct super_block *sb) | 577 | static inline void lock_journal(struct super_block *sb) |
557 | { | 578 | { |
558 | PROC_INFO_INC(sb, journal.lock_journal); | 579 | PROC_INFO_INC(sb, journal.lock_journal); |
559 | mutex_lock(&SB_JOURNAL(sb)->j_mutex); | 580 | |
581 | reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); | ||
560 | } | 582 | } |
561 | 583 | ||
562 | /* unlock the current transaction */ | 584 | /* unlock the current transaction */ |
@@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s, | |||
708 | disable_barrier(s); | 730 | disable_barrier(s); |
709 | set_buffer_uptodate(bh); | 731 | set_buffer_uptodate(bh); |
710 | set_buffer_dirty(bh); | 732 | set_buffer_dirty(bh); |
733 | reiserfs_write_unlock(s); | ||
711 | sync_dirty_buffer(bh); | 734 | sync_dirty_buffer(bh); |
735 | reiserfs_write_lock(s); | ||
712 | } | 736 | } |
713 | } | 737 | } |
714 | 738 | ||
@@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s) | |||
996 | { | 1020 | { |
997 | DEFINE_WAIT(wait); | 1021 | DEFINE_WAIT(wait); |
998 | struct reiserfs_journal *j = SB_JOURNAL(s); | 1022 | struct reiserfs_journal *j = SB_JOURNAL(s); |
999 | if (atomic_read(&j->j_async_throttle)) | 1023 | |
1024 | if (atomic_read(&j->j_async_throttle)) { | ||
1025 | reiserfs_write_unlock(s); | ||
1000 | congestion_wait(BLK_RW_ASYNC, HZ / 10); | 1026 | congestion_wait(BLK_RW_ASYNC, HZ / 10); |
1027 | reiserfs_write_lock(s); | ||
1028 | } | ||
1029 | |||
1001 | return 0; | 1030 | return 0; |
1002 | } | 1031 | } |
1003 | 1032 | ||
@@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s, | |||
1043 | } | 1072 | } |
1044 | 1073 | ||
1045 | /* make sure nobody is trying to flush this one at the same time */ | 1074 | /* make sure nobody is trying to flush this one at the same time */ |
1046 | mutex_lock(&jl->j_commit_mutex); | 1075 | reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); |
1076 | |||
1047 | if (!journal_list_still_alive(s, trans_id)) { | 1077 | if (!journal_list_still_alive(s, trans_id)) { |
1048 | mutex_unlock(&jl->j_commit_mutex); | 1078 | mutex_unlock(&jl->j_commit_mutex); |
1049 | goto put_jl; | 1079 | goto put_jl; |
@@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s, | |||
1061 | 1091 | ||
1062 | if (!list_empty(&jl->j_bh_list)) { | 1092 | if (!list_empty(&jl->j_bh_list)) { |
1063 | int ret; | 1093 | int ret; |
1064 | unlock_kernel(); | 1094 | |
1095 | /* | ||
1096 | * We might sleep in numerous places inside | ||
1097 | * write_ordered_buffers. Relax the write lock. | ||
1098 | */ | ||
1099 | reiserfs_write_unlock(s); | ||
1065 | ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, | 1100 | ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, |
1066 | journal, jl, &jl->j_bh_list); | 1101 | journal, jl, &jl->j_bh_list); |
1067 | if (ret < 0 && retval == 0) | 1102 | if (ret < 0 && retval == 0) |
1068 | retval = ret; | 1103 | retval = ret; |
1069 | lock_kernel(); | 1104 | reiserfs_write_lock(s); |
1070 | } | 1105 | } |
1071 | BUG_ON(!list_empty(&jl->j_bh_list)); | 1106 | BUG_ON(!list_empty(&jl->j_bh_list)); |
1072 | /* | 1107 | /* |
@@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s, | |||
1114 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + | 1149 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + |
1115 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); | 1150 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); |
1116 | tbh = journal_find_get_block(s, bn); | 1151 | tbh = journal_find_get_block(s, bn); |
1152 | |||
1153 | reiserfs_write_unlock(s); | ||
1117 | wait_on_buffer(tbh); | 1154 | wait_on_buffer(tbh); |
1155 | reiserfs_write_lock(s); | ||
1118 | // since we're using ll_rw_blk above, it might have skipped over | 1156 | // since we're using ll_rw_blk above, it might have skipped over |
1119 | // a locked buffer. Double check here | 1157 | // a locked buffer. Double check here |
1120 | // | 1158 | // |
1121 | if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ | 1159 | /* redundant, sync_dirty_buffer() checks */ |
1160 | if (buffer_dirty(tbh)) { | ||
1161 | reiserfs_write_unlock(s); | ||
1122 | sync_dirty_buffer(tbh); | 1162 | sync_dirty_buffer(tbh); |
1163 | reiserfs_write_lock(s); | ||
1164 | } | ||
1123 | if (unlikely(!buffer_uptodate(tbh))) { | 1165 | if (unlikely(!buffer_uptodate(tbh))) { |
1124 | #ifdef CONFIG_REISERFS_CHECK | 1166 | #ifdef CONFIG_REISERFS_CHECK |
1125 | reiserfs_warning(s, "journal-601", | 1167 | reiserfs_warning(s, "journal-601", |
@@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s, | |||
1143 | if (buffer_dirty(jl->j_commit_bh)) | 1185 | if (buffer_dirty(jl->j_commit_bh)) |
1144 | BUG(); | 1186 | BUG(); |
1145 | mark_buffer_dirty(jl->j_commit_bh) ; | 1187 | mark_buffer_dirty(jl->j_commit_bh) ; |
1188 | reiserfs_write_unlock(s); | ||
1146 | sync_dirty_buffer(jl->j_commit_bh) ; | 1189 | sync_dirty_buffer(jl->j_commit_bh) ; |
1190 | reiserfs_write_lock(s); | ||
1147 | } | 1191 | } |
1148 | } else | 1192 | } else { |
1193 | reiserfs_write_unlock(s); | ||
1149 | wait_on_buffer(jl->j_commit_bh); | 1194 | wait_on_buffer(jl->j_commit_bh); |
1195 | reiserfs_write_lock(s); | ||
1196 | } | ||
1150 | 1197 | ||
1151 | check_barrier_completion(s, jl->j_commit_bh); | 1198 | check_barrier_completion(s, jl->j_commit_bh); |
1152 | 1199 | ||
@@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1286 | 1333 | ||
1287 | if (trans_id >= journal->j_last_flush_trans_id) { | 1334 | if (trans_id >= journal->j_last_flush_trans_id) { |
1288 | if (buffer_locked((journal->j_header_bh))) { | 1335 | if (buffer_locked((journal->j_header_bh))) { |
1336 | reiserfs_write_unlock(sb); | ||
1289 | wait_on_buffer((journal->j_header_bh)); | 1337 | wait_on_buffer((journal->j_header_bh)); |
1338 | reiserfs_write_lock(sb); | ||
1290 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { | 1339 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { |
1291 | #ifdef CONFIG_REISERFS_CHECK | 1340 | #ifdef CONFIG_REISERFS_CHECK |
1292 | reiserfs_warning(sb, "journal-699", | 1341 | reiserfs_warning(sb, "journal-699", |
@@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1312 | disable_barrier(sb); | 1361 | disable_barrier(sb); |
1313 | goto sync; | 1362 | goto sync; |
1314 | } | 1363 | } |
1364 | reiserfs_write_unlock(sb); | ||
1315 | wait_on_buffer(journal->j_header_bh); | 1365 | wait_on_buffer(journal->j_header_bh); |
1366 | reiserfs_write_lock(sb); | ||
1316 | check_barrier_completion(sb, journal->j_header_bh); | 1367 | check_barrier_completion(sb, journal->j_header_bh); |
1317 | } else { | 1368 | } else { |
1318 | sync: | 1369 | sync: |
1319 | set_buffer_dirty(journal->j_header_bh); | 1370 | set_buffer_dirty(journal->j_header_bh); |
1371 | reiserfs_write_unlock(sb); | ||
1320 | sync_dirty_buffer(journal->j_header_bh); | 1372 | sync_dirty_buffer(journal->j_header_bh); |
1373 | reiserfs_write_lock(sb); | ||
1321 | } | 1374 | } |
1322 | if (!buffer_uptodate(journal->j_header_bh)) { | 1375 | if (!buffer_uptodate(journal->j_header_bh)) { |
1323 | reiserfs_warning(sb, "journal-837", | 1376 | reiserfs_warning(sb, "journal-837", |
@@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s, | |||
1409 | 1462 | ||
1410 | /* if flushall == 0, the lock is already held */ | 1463 | /* if flushall == 0, the lock is already held */ |
1411 | if (flushall) { | 1464 | if (flushall) { |
1412 | mutex_lock(&journal->j_flush_mutex); | 1465 | reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); |
1413 | } else if (mutex_trylock(&journal->j_flush_mutex)) { | 1466 | } else if (mutex_trylock(&journal->j_flush_mutex)) { |
1414 | BUG(); | 1467 | BUG(); |
1415 | } | 1468 | } |
@@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s, | |||
1553 | reiserfs_panic(s, "journal-1011", | 1606 | reiserfs_panic(s, "journal-1011", |
1554 | "cn->bh is NULL"); | 1607 | "cn->bh is NULL"); |
1555 | } | 1608 | } |
1609 | |||
1610 | reiserfs_write_unlock(s); | ||
1556 | wait_on_buffer(cn->bh); | 1611 | wait_on_buffer(cn->bh); |
1612 | reiserfs_write_lock(s); | ||
1613 | |||
1557 | if (!cn->bh) { | 1614 | if (!cn->bh) { |
1558 | reiserfs_panic(s, "journal-1012", | 1615 | reiserfs_panic(s, "journal-1012", |
1559 | "cn->bh is NULL"); | 1616 | "cn->bh is NULL"); |
@@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1973 | reiserfs_mounted_fs_count--; | 2030 | reiserfs_mounted_fs_count--; |
1974 | /* wait for all commits to finish */ | 2031 | /* wait for all commits to finish */ |
1975 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); | 2032 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); |
2033 | |||
2034 | /* | ||
2035 | * We must release the write lock here because | ||
2036 | * the workqueue job (flush_async_commit) needs this lock | ||
2037 | */ | ||
2038 | reiserfs_write_unlock(sb); | ||
1976 | flush_workqueue(commit_wq); | 2039 | flush_workqueue(commit_wq); |
2040 | |||
1977 | if (!reiserfs_mounted_fs_count) { | 2041 | if (!reiserfs_mounted_fs_count) { |
1978 | destroy_workqueue(commit_wq); | 2042 | destroy_workqueue(commit_wq); |
1979 | commit_wq = NULL; | 2043 | commit_wq = NULL; |
1980 | } | 2044 | } |
2045 | reiserfs_write_lock(sb); | ||
1981 | 2046 | ||
1982 | free_journal_ram(sb); | 2047 | free_journal_ram(sb); |
1983 | 2048 | ||
@@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb, | |||
2243 | /* read in the log blocks, memcpy to the corresponding real block */ | 2308 | /* read in the log blocks, memcpy to the corresponding real block */ |
2244 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); | 2309 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); |
2245 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2310 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2311 | |||
2312 | reiserfs_write_unlock(sb); | ||
2246 | wait_on_buffer(log_blocks[i]); | 2313 | wait_on_buffer(log_blocks[i]); |
2314 | reiserfs_write_lock(sb); | ||
2315 | |||
2247 | if (!buffer_uptodate(log_blocks[i])) { | 2316 | if (!buffer_uptodate(log_blocks[i])) { |
2248 | reiserfs_warning(sb, "journal-1212", | 2317 | reiserfs_warning(sb, "journal-1212", |
2249 | "REPLAY FAILURE fsck required! " | 2318 | "REPLAY FAILURE fsck required! " |
@@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s) | |||
2964 | init_waitqueue_entry(&wait, current); | 3033 | init_waitqueue_entry(&wait, current); |
2965 | add_wait_queue(&journal->j_join_wait, &wait); | 3034 | add_wait_queue(&journal->j_join_wait, &wait); |
2966 | set_current_state(TASK_UNINTERRUPTIBLE); | 3035 | set_current_state(TASK_UNINTERRUPTIBLE); |
2967 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) | 3036 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { |
3037 | reiserfs_write_unlock(s); | ||
2968 | schedule(); | 3038 | schedule(); |
3039 | reiserfs_write_lock(s); | ||
3040 | } | ||
2969 | __set_current_state(TASK_RUNNING); | 3041 | __set_current_state(TASK_RUNNING); |
2970 | remove_wait_queue(&journal->j_join_wait, &wait); | 3042 | remove_wait_queue(&journal->j_join_wait, &wait); |
2971 | } | 3043 | } |
@@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) | |||
2982 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3054 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
2983 | unsigned long bcount = journal->j_bcount; | 3055 | unsigned long bcount = journal->j_bcount; |
2984 | while (1) { | 3056 | while (1) { |
3057 | reiserfs_write_unlock(sb); | ||
2985 | schedule_timeout_uninterruptible(1); | 3058 | schedule_timeout_uninterruptible(1); |
3059 | reiserfs_write_lock(sb); | ||
2986 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; | 3060 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; |
2987 | while ((atomic_read(&journal->j_wcount) > 0 || | 3061 | while ((atomic_read(&journal->j_wcount) > 0 || |
2988 | atomic_read(&journal->j_jlock)) && | 3062 | atomic_read(&journal->j_jlock)) && |
@@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3033 | 3107 | ||
3034 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { | 3108 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { |
3035 | unlock_journal(sb); | 3109 | unlock_journal(sb); |
3110 | reiserfs_write_unlock(sb); | ||
3036 | reiserfs_wait_on_write_block(sb); | 3111 | reiserfs_wait_on_write_block(sb); |
3112 | reiserfs_write_lock(sb); | ||
3037 | PROC_INFO_INC(sb, journal.journal_relock_writers); | 3113 | PROC_INFO_INC(sb, journal.journal_relock_writers); |
3038 | goto relock; | 3114 | goto relock; |
3039 | } | 3115 | } |
@@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work) | |||
3506 | struct reiserfs_journal_list *jl; | 3582 | struct reiserfs_journal_list *jl; |
3507 | struct list_head *entry; | 3583 | struct list_head *entry; |
3508 | 3584 | ||
3509 | lock_kernel(); | 3585 | reiserfs_write_lock(sb); |
3510 | if (!list_empty(&journal->j_journal_list)) { | 3586 | if (!list_empty(&journal->j_journal_list)) { |
3511 | /* last entry is the youngest, commit it and you get everything */ | 3587 | /* last entry is the youngest, commit it and you get everything */ |
3512 | entry = journal->j_journal_list.prev; | 3588 | entry = journal->j_journal_list.prev; |
3513 | jl = JOURNAL_LIST_ENTRY(entry); | 3589 | jl = JOURNAL_LIST_ENTRY(entry); |
3514 | flush_commit_list(sb, jl, 1); | 3590 | flush_commit_list(sb, jl, 1); |
3515 | } | 3591 | } |
3516 | unlock_kernel(); | 3592 | reiserfs_write_unlock(sb); |
3517 | } | 3593 | } |
3518 | 3594 | ||
3519 | /* | 3595 | /* |
@@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4041 | * the new transaction is fully setup, and we've already flushed the | 4117 | * the new transaction is fully setup, and we've already flushed the |
4042 | * ordered bh list | 4118 | * ordered bh list |
4043 | */ | 4119 | */ |
4044 | mutex_lock(&jl->j_commit_mutex); | 4120 | reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); |
4045 | 4121 | ||
4046 | /* save the transaction id in case we need to commit it later */ | 4122 | /* save the transaction id in case we need to commit it later */ |
4047 | commit_trans_id = jl->j_trans_id; | 4123 | commit_trans_id = jl->j_trans_id; |
@@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4203 | * is lost. | 4279 | * is lost. |
4204 | */ | 4280 | */ |
4205 | if (!list_empty(&jl->j_tail_bh_list)) { | 4281 | if (!list_empty(&jl->j_tail_bh_list)) { |
4206 | unlock_kernel(); | 4282 | reiserfs_write_unlock(sb); |
4207 | write_ordered_buffers(&journal->j_dirty_buffers_lock, | 4283 | write_ordered_buffers(&journal->j_dirty_buffers_lock, |
4208 | journal, jl, &jl->j_tail_bh_list); | 4284 | journal, jl, &jl->j_tail_bh_list); |
4209 | lock_kernel(); | 4285 | reiserfs_write_lock(sb); |
4210 | } | 4286 | } |
4211 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); | 4287 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); |
4212 | mutex_unlock(&jl->j_commit_mutex); | 4288 | mutex_unlock(&jl->j_commit_mutex); |