aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/reiserfs/Makefile2
-rw-r--r--fs/reiserfs/bitmap.c2
-rw-r--r--fs/reiserfs/dir.c8
-rw-r--r--fs/reiserfs/fix_node.c10
-rw-r--r--fs/reiserfs/inode.c23
-rw-r--r--fs/reiserfs/ioctl.c6
-rw-r--r--fs/reiserfs/journal.c134
-rw-r--r--fs/reiserfs/lock.c63
-rw-r--r--fs/reiserfs/resize.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c37
-rw-r--r--include/linux/reiserfs_fs.h12
-rw-r--r--include/linux/reiserfs_fs_sb.h9
13 files changed, 261 insertions, 49 deletions
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 7c5ab6330dd6..6a9e30c041dd 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o xattr.o 10 item_ops.o ioctl.o procfs.o xattr.o lock.o
11 11
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 12ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr_user.o xattr_trusted.o 13reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e716161ab325..147033461b87 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1256,7 +1256,9 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1256 else { 1256 else {
1257 if (buffer_locked(bh)) { 1257 if (buffer_locked(bh)) {
1258 PROC_INFO_INC(sb, scan_bitmap.wait); 1258 PROC_INFO_INC(sb, scan_bitmap.wait);
1259 reiserfs_write_unlock(sb);
1259 __wait_on_buffer(bh); 1260 __wait_on_buffer(bh);
1261 reiserfs_write_lock(sb);
1260 } 1262 }
1261 BUG_ON(!buffer_uptodate(bh)); 1263 BUG_ON(!buffer_uptodate(bh));
1262 BUG_ON(atomic_read(&bh->b_count) == 0); 1264 BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6d2668fdc384..17f31ad379c8 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
174 // user space buffer is swapped out. At that time 174 // user space buffer is swapped out. At that time
175 // entry can move to somewhere else 175 // entry can move to somewhere else
176 memcpy(local_buf, d_name, d_reclen); 176 memcpy(local_buf, d_name, d_reclen);
177
178 /*
179 * Since filldir might sleep, we can release
180 * the write lock here for other waiters
181 */
182 reiserfs_write_unlock(inode->i_sb);
177 if (filldir 183 if (filldir
178 (dirent, local_buf, d_reclen, d_off, d_ino, 184 (dirent, local_buf, d_reclen, d_off, d_ino,
179 DT_UNKNOWN) < 0) { 185 DT_UNKNOWN) < 0) {
186 reiserfs_write_lock(inode->i_sb);
180 if (local_buf != small_buf) { 187 if (local_buf != small_buf) {
181 kfree(local_buf); 188 kfree(local_buf);
182 } 189 }
183 goto end; 190 goto end;
184 } 191 }
192 reiserfs_write_lock(inode->i_sb);
185 if (local_buf != small_buf) { 193 if (local_buf != small_buf) {
186 kfree(local_buf); 194 kfree(local_buf);
187 } 195 }
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5e5a4e6fbaf8..bf5f2cbdb063 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -1022,7 +1022,11 @@ static int get_far_parent(struct tree_balance *tb,
1022 /* Check whether the common parent is locked. */ 1022 /* Check whether the common parent is locked. */
1023 1023
1024 if (buffer_locked(*pcom_father)) { 1024 if (buffer_locked(*pcom_father)) {
1025
1026 /* Release the write lock while the buffer is busy */
1027 reiserfs_write_unlock(tb->tb_sb);
1025 __wait_on_buffer(*pcom_father); 1028 __wait_on_buffer(*pcom_father);
1029 reiserfs_write_lock(tb->tb_sb);
1026 if (FILESYSTEM_CHANGED_TB(tb)) { 1030 if (FILESYSTEM_CHANGED_TB(tb)) {
1027 brelse(*pcom_father); 1031 brelse(*pcom_father);
1028 return REPEAT_SEARCH; 1032 return REPEAT_SEARCH;
@@ -1927,7 +1931,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1927 return REPEAT_SEARCH; 1931 return REPEAT_SEARCH;
1928 1932
1929 if (buffer_locked(bh)) { 1933 if (buffer_locked(bh)) {
1934 reiserfs_write_unlock(tb->tb_sb);
1930 __wait_on_buffer(bh); 1935 __wait_on_buffer(bh);
1936 reiserfs_write_lock(tb->tb_sb);
1931 if (FILESYSTEM_CHANGED_TB(tb)) 1937 if (FILESYSTEM_CHANGED_TB(tb))
1932 return REPEAT_SEARCH; 1938 return REPEAT_SEARCH;
1933 } 1939 }
@@ -2278,7 +2284,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2278 REPEAT_SEARCH : CARRY_ON; 2284 REPEAT_SEARCH : CARRY_ON;
2279 } 2285 }
2280#endif 2286#endif
2287 reiserfs_write_unlock(tb->tb_sb);
2281 __wait_on_buffer(locked); 2288 __wait_on_buffer(locked);
2289 reiserfs_write_lock(tb->tb_sb);
2282 if (FILESYSTEM_CHANGED_TB(tb)) 2290 if (FILESYSTEM_CHANGED_TB(tb))
2283 return REPEAT_SEARCH; 2291 return REPEAT_SEARCH;
2284 } 2292 }
@@ -2349,7 +2357,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2349 2357
2350 /* if it possible in indirect_to_direct conversion */ 2358 /* if it possible in indirect_to_direct conversion */
2351 if (buffer_locked(tbS0)) { 2359 if (buffer_locked(tbS0)) {
2360 reiserfs_write_unlock(tb->tb_sb);
2352 __wait_on_buffer(tbS0); 2361 __wait_on_buffer(tbS0);
2362 reiserfs_write_lock(tb->tb_sb);
2353 if (FILESYSTEM_CHANGED_TB(tb)) 2363 if (FILESYSTEM_CHANGED_TB(tb))
2354 return REPEAT_SEARCH; 2364 return REPEAT_SEARCH;
2355 } 2365 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a14d6cd9eeda..1893c8198439 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -489,10 +489,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
489 disappeared */ 489 disappeared */
490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 int err; 491 int err;
492 lock_kernel(); 492
493 reiserfs_write_lock(inode->i_sb);
494
493 err = reiserfs_commit_for_inode(inode); 495 err = reiserfs_commit_for_inode(inode);
494 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 496 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 unlock_kernel(); 497
498 reiserfs_write_unlock(inode->i_sb);
499
496 if (err < 0) 500 if (err < 0)
497 ret = err; 501 ret = err;
498 } 502 }
@@ -616,7 +620,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
616 loff_t new_offset = 620 loff_t new_offset =
617 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; 621 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
618 622
619 /* bad.... */
620 reiserfs_write_lock(inode->i_sb); 623 reiserfs_write_lock(inode->i_sb);
621 version = get_inode_item_key_version(inode); 624 version = get_inode_item_key_version(inode);
622 625
@@ -997,10 +1000,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
997 if (retval) 1000 if (retval)
998 goto failure; 1001 goto failure;
999 } 1002 }
1000 /* inserting indirect pointers for a hole can take a 1003 /*
1001 ** long time. reschedule if needed 1004 * inserting indirect pointers for a hole can take a
1005 * long time. reschedule if needed and also release the write
1006 * lock for others.
1002 */ 1007 */
1008 reiserfs_write_unlock(inode->i_sb);
1003 cond_resched(); 1009 cond_resched();
1010 reiserfs_write_lock(inode->i_sb);
1004 1011
1005 retval = search_for_position_by_key(inode->i_sb, &key, &path); 1012 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1006 if (retval == IO_ERROR) { 1013 if (retval == IO_ERROR) {
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
2608 int ret; 2615 int ret;
2609 int old_ref = 0; 2616 int old_ref = 0;
2610 2617
2618 reiserfs_write_unlock(inode->i_sb);
2611 reiserfs_wait_on_write_block(inode->i_sb); 2619 reiserfs_wait_on_write_block(inode->i_sb);
2620 reiserfs_write_lock(inode->i_sb);
2621
2612 fix_tail_page_for_writing(page); 2622 fix_tail_page_for_writing(page);
2613 if (reiserfs_transaction_running(inode->i_sb)) { 2623 if (reiserfs_transaction_running(inode->i_sb)) {
2614 struct reiserfs_transaction_handle *th; 2624 struct reiserfs_transaction_handle *th;
@@ -2758,7 +2768,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2758 int update_sd = 0; 2768 int update_sd = 0;
2759 struct reiserfs_transaction_handle *th = NULL; 2769 struct reiserfs_transaction_handle *th = NULL;
2760 2770
2771 reiserfs_write_unlock(inode->i_sb);
2761 reiserfs_wait_on_write_block(inode->i_sb); 2772 reiserfs_wait_on_write_block(inode->i_sb);
2773 reiserfs_write_lock(inode->i_sb);
2774
2762 if (reiserfs_transaction_running(inode->i_sb)) { 2775 if (reiserfs_transaction_running(inode->i_sb)) {
2763 th = current->journal_info; 2776 th = current->journal_info;
2764 } 2777 }
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0ccc3fdda7bf..5e40b0cd4c3d 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -141,9 +141,11 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
141 default: 141 default:
142 return -ENOIOCTLCMD; 142 return -ENOIOCTLCMD;
143 } 143 }
144 lock_kernel(); 144
145 reiserfs_write_lock(inode->i_sb);
145 ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); 146 ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
146 unlock_kernel(); 147 reiserfs_write_unlock(inode->i_sb);
148
147 return ret; 149 return ret;
148} 150}
149#endif 151#endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39c..438c71f0bc91 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
429 clear_buffer_journal_restore_dirty(bh); 429 clear_buffer_journal_restore_dirty(bh);
430} 430}
431 431
432/* utility function to force a BUG if it is called without the big
433** kernel lock held. caller is the string printed just before calling BUG()
434*/
435void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
436{
437#ifdef CONFIG_SMP
438 if (current->lock_depth < 0) {
439 reiserfs_panic(sb, "journal-1", "%s called without kernel "
440 "lock held", caller);
441 }
442#else
443 ;
444#endif
445}
446
447/* return a cnode with same dev, block number and size in table, or null if not found */ 432/* return a cnode with same dev, block number and size in table, or null if not found */
448static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 433static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
449 super_block 434 super_block
@@ -552,11 +537,48 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
552 journal_hash(table, cn->sb, cn->blocknr) = cn; 537 journal_hash(table, cn->sb, cn->blocknr) = cn;
553} 538}
554 539
540/*
541 * Several mutexes depend on the write lock.
542 * However sometimes we want to relax the write lock while we hold
543 * these mutexes, according to the release/reacquire on schedule()
544 * properties of the Bkl that were used.
545 * Reiserfs performances and locking were based on this scheme.
546 * Now that the write lock is a mutex and not the bkl anymore, doing so
547 * may result in a deadlock:
548 *
549 * A acquire write_lock
550 * A acquire j_commit_mutex
551 * A release write_lock and wait for something
552 * B acquire write_lock
553 * B can't acquire j_commit_mutex and sleep
554 * A can't acquire write lock anymore
555 * deadlock
556 *
557 * What we do here is avoiding such deadlock by playing the same game
558 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
559 * we release the write lock, wait a bit and then retry.
560 *
561 * The mutexes concerned by this hack are:
562 * - The commit mutex of a journal list
563 * - The flush mutex
564 * - The journal lock
565 */
566static inline void reiserfs_mutex_lock_safe(struct mutex *m,
567 struct super_block *s)
568{
569 while (!mutex_trylock(m)) {
570 reiserfs_write_unlock(s);
571 schedule();
572 reiserfs_write_lock(s);
573 }
574}
575
555/* lock the current transaction */ 576/* lock the current transaction */
556static inline void lock_journal(struct super_block *sb) 577static inline void lock_journal(struct super_block *sb)
557{ 578{
558 PROC_INFO_INC(sb, journal.lock_journal); 579 PROC_INFO_INC(sb, journal.lock_journal);
559 mutex_lock(&SB_JOURNAL(sb)->j_mutex); 580
581 reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
560} 582}
561 583
562/* unlock the current transaction */ 584/* unlock the current transaction */
@@ -708,7 +730,9 @@ static void check_barrier_completion(struct super_block *s,
708 disable_barrier(s); 730 disable_barrier(s);
709 set_buffer_uptodate(bh); 731 set_buffer_uptodate(bh);
710 set_buffer_dirty(bh); 732 set_buffer_dirty(bh);
733 reiserfs_write_unlock(s);
711 sync_dirty_buffer(bh); 734 sync_dirty_buffer(bh);
735 reiserfs_write_lock(s);
712 } 736 }
713} 737}
714 738
@@ -996,8 +1020,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
996{ 1020{
997 DEFINE_WAIT(wait); 1021 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 1022 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 1023
1024 if (atomic_read(&j->j_async_throttle)) {
1025 reiserfs_write_unlock(s);
1000 congestion_wait(BLK_RW_ASYNC, HZ / 10); 1026 congestion_wait(BLK_RW_ASYNC, HZ / 10);
1027 reiserfs_write_lock(s);
1028 }
1029
1001 return 0; 1030 return 0;
1002} 1031}
1003 1032
@@ -1043,7 +1072,8 @@ static int flush_commit_list(struct super_block *s,
1043 } 1072 }
1044 1073
1045 /* make sure nobody is trying to flush this one at the same time */ 1074 /* make sure nobody is trying to flush this one at the same time */
1046 mutex_lock(&jl->j_commit_mutex); 1075 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
1076
1047 if (!journal_list_still_alive(s, trans_id)) { 1077 if (!journal_list_still_alive(s, trans_id)) {
1048 mutex_unlock(&jl->j_commit_mutex); 1078 mutex_unlock(&jl->j_commit_mutex);
1049 goto put_jl; 1079 goto put_jl;
@@ -1061,12 +1091,17 @@ static int flush_commit_list(struct super_block *s,
1061 1091
1062 if (!list_empty(&jl->j_bh_list)) { 1092 if (!list_empty(&jl->j_bh_list)) {
1063 int ret; 1093 int ret;
1064 unlock_kernel(); 1094
1095 /*
1096 * We might sleep in numerous places inside
1097 * write_ordered_buffers. Relax the write lock.
1098 */
1099 reiserfs_write_unlock(s);
1065 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 1100 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
1066 journal, jl, &jl->j_bh_list); 1101 journal, jl, &jl->j_bh_list);
1067 if (ret < 0 && retval == 0) 1102 if (ret < 0 && retval == 0)
1068 retval = ret; 1103 retval = ret;
1069 lock_kernel(); 1104 reiserfs_write_lock(s);
1070 } 1105 }
1071 BUG_ON(!list_empty(&jl->j_bh_list)); 1106 BUG_ON(!list_empty(&jl->j_bh_list));
1072 /* 1107 /*
@@ -1114,12 +1149,19 @@ static int flush_commit_list(struct super_block *s,
1114 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1149 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
1115 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 1150 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
1116 tbh = journal_find_get_block(s, bn); 1151 tbh = journal_find_get_block(s, bn);
1152
1153 reiserfs_write_unlock(s);
1117 wait_on_buffer(tbh); 1154 wait_on_buffer(tbh);
1155 reiserfs_write_lock(s);
1118 // since we're using ll_rw_blk above, it might have skipped over 1156 // since we're using ll_rw_blk above, it might have skipped over
1119 // a locked buffer. Double check here 1157 // a locked buffer. Double check here
1120 // 1158 //
1121 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1159 /* redundant, sync_dirty_buffer() checks */
1160 if (buffer_dirty(tbh)) {
1161 reiserfs_write_unlock(s);
1122 sync_dirty_buffer(tbh); 1162 sync_dirty_buffer(tbh);
1163 reiserfs_write_lock(s);
1164 }
1123 if (unlikely(!buffer_uptodate(tbh))) { 1165 if (unlikely(!buffer_uptodate(tbh))) {
1124#ifdef CONFIG_REISERFS_CHECK 1166#ifdef CONFIG_REISERFS_CHECK
1125 reiserfs_warning(s, "journal-601", 1167 reiserfs_warning(s, "journal-601",
@@ -1143,10 +1185,15 @@ static int flush_commit_list(struct super_block *s,
1143 if (buffer_dirty(jl->j_commit_bh)) 1185 if (buffer_dirty(jl->j_commit_bh))
1144 BUG(); 1186 BUG();
1145 mark_buffer_dirty(jl->j_commit_bh) ; 1187 mark_buffer_dirty(jl->j_commit_bh) ;
1188 reiserfs_write_unlock(s);
1146 sync_dirty_buffer(jl->j_commit_bh) ; 1189 sync_dirty_buffer(jl->j_commit_bh) ;
1190 reiserfs_write_lock(s);
1147 } 1191 }
1148 } else 1192 } else {
1193 reiserfs_write_unlock(s);
1149 wait_on_buffer(jl->j_commit_bh); 1194 wait_on_buffer(jl->j_commit_bh);
1195 reiserfs_write_lock(s);
1196 }
1150 1197
1151 check_barrier_completion(s, jl->j_commit_bh); 1198 check_barrier_completion(s, jl->j_commit_bh);
1152 1199
@@ -1286,7 +1333,9 @@ static int _update_journal_header_block(struct super_block *sb,
1286 1333
1287 if (trans_id >= journal->j_last_flush_trans_id) { 1334 if (trans_id >= journal->j_last_flush_trans_id) {
1288 if (buffer_locked((journal->j_header_bh))) { 1335 if (buffer_locked((journal->j_header_bh))) {
1336 reiserfs_write_unlock(sb);
1289 wait_on_buffer((journal->j_header_bh)); 1337 wait_on_buffer((journal->j_header_bh));
1338 reiserfs_write_lock(sb);
1290 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1339 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1291#ifdef CONFIG_REISERFS_CHECK 1340#ifdef CONFIG_REISERFS_CHECK
1292 reiserfs_warning(sb, "journal-699", 1341 reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1361,16 @@ static int _update_journal_header_block(struct super_block *sb,
1312 disable_barrier(sb); 1361 disable_barrier(sb);
1313 goto sync; 1362 goto sync;
1314 } 1363 }
1364 reiserfs_write_unlock(sb);
1315 wait_on_buffer(journal->j_header_bh); 1365 wait_on_buffer(journal->j_header_bh);
1366 reiserfs_write_lock(sb);
1316 check_barrier_completion(sb, journal->j_header_bh); 1367 check_barrier_completion(sb, journal->j_header_bh);
1317 } else { 1368 } else {
1318 sync: 1369 sync:
1319 set_buffer_dirty(journal->j_header_bh); 1370 set_buffer_dirty(journal->j_header_bh);
1371 reiserfs_write_unlock(sb);
1320 sync_dirty_buffer(journal->j_header_bh); 1372 sync_dirty_buffer(journal->j_header_bh);
1373 reiserfs_write_lock(sb);
1321 } 1374 }
1322 if (!buffer_uptodate(journal->j_header_bh)) { 1375 if (!buffer_uptodate(journal->j_header_bh)) {
1323 reiserfs_warning(sb, "journal-837", 1376 reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1462,7 @@ static int flush_journal_list(struct super_block *s,
1409 1462
1410 /* if flushall == 0, the lock is already held */ 1463 /* if flushall == 0, the lock is already held */
1411 if (flushall) { 1464 if (flushall) {
1412 mutex_lock(&journal->j_flush_mutex); 1465 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1413 } else if (mutex_trylock(&journal->j_flush_mutex)) { 1466 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1414 BUG(); 1467 BUG();
1415 } 1468 }
@@ -1553,7 +1606,11 @@ static int flush_journal_list(struct super_block *s,
1553 reiserfs_panic(s, "journal-1011", 1606 reiserfs_panic(s, "journal-1011",
1554 "cn->bh is NULL"); 1607 "cn->bh is NULL");
1555 } 1608 }
1609
1610 reiserfs_write_unlock(s);
1556 wait_on_buffer(cn->bh); 1611 wait_on_buffer(cn->bh);
1612 reiserfs_write_lock(s);
1613
1557 if (!cn->bh) { 1614 if (!cn->bh) {
1558 reiserfs_panic(s, "journal-1012", 1615 reiserfs_panic(s, "journal-1012",
1559 "cn->bh is NULL"); 1616 "cn->bh is NULL");
@@ -1973,11 +2030,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1973 reiserfs_mounted_fs_count--; 2030 reiserfs_mounted_fs_count--;
1974 /* wait for all commits to finish */ 2031 /* wait for all commits to finish */
1975 cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 2032 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
2033
2034 /*
2035 * We must release the write lock here because
2036 * the workqueue job (flush_async_commit) needs this lock
2037 */
2038 reiserfs_write_unlock(sb);
1976 flush_workqueue(commit_wq); 2039 flush_workqueue(commit_wq);
2040
1977 if (!reiserfs_mounted_fs_count) { 2041 if (!reiserfs_mounted_fs_count) {
1978 destroy_workqueue(commit_wq); 2042 destroy_workqueue(commit_wq);
1979 commit_wq = NULL; 2043 commit_wq = NULL;
1980 } 2044 }
2045 reiserfs_write_lock(sb);
1981 2046
1982 free_journal_ram(sb); 2047 free_journal_ram(sb);
1983 2048
@@ -2243,7 +2308,11 @@ static int journal_read_transaction(struct super_block *sb,
2243 /* read in the log blocks, memcpy to the corresponding real block */ 2308 /* read in the log blocks, memcpy to the corresponding real block */
2244 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 2309 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2245 for (i = 0; i < get_desc_trans_len(desc); i++) { 2310 for (i = 0; i < get_desc_trans_len(desc); i++) {
2311
2312 reiserfs_write_unlock(sb);
2246 wait_on_buffer(log_blocks[i]); 2313 wait_on_buffer(log_blocks[i]);
2314 reiserfs_write_lock(sb);
2315
2247 if (!buffer_uptodate(log_blocks[i])) { 2316 if (!buffer_uptodate(log_blocks[i])) {
2248 reiserfs_warning(sb, "journal-1212", 2317 reiserfs_warning(sb, "journal-1212",
2249 "REPLAY FAILURE fsck required! " 2318 "REPLAY FAILURE fsck required! "
@@ -2964,8 +3033,11 @@ static void queue_log_writer(struct super_block *s)
2964 init_waitqueue_entry(&wait, current); 3033 init_waitqueue_entry(&wait, current);
2965 add_wait_queue(&journal->j_join_wait, &wait); 3034 add_wait_queue(&journal->j_join_wait, &wait);
2966 set_current_state(TASK_UNINTERRUPTIBLE); 3035 set_current_state(TASK_UNINTERRUPTIBLE);
2967 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 3036 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
3037 reiserfs_write_unlock(s);
2968 schedule(); 3038 schedule();
3039 reiserfs_write_lock(s);
3040 }
2969 __set_current_state(TASK_RUNNING); 3041 __set_current_state(TASK_RUNNING);
2970 remove_wait_queue(&journal->j_join_wait, &wait); 3042 remove_wait_queue(&journal->j_join_wait, &wait);
2971} 3043}
@@ -2982,7 +3054,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2982 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3054 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2983 unsigned long bcount = journal->j_bcount; 3055 unsigned long bcount = journal->j_bcount;
2984 while (1) { 3056 while (1) {
3057 reiserfs_write_unlock(sb);
2985 schedule_timeout_uninterruptible(1); 3058 schedule_timeout_uninterruptible(1);
3059 reiserfs_write_lock(sb);
2986 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 3060 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2987 while ((atomic_read(&journal->j_wcount) > 0 || 3061 while ((atomic_read(&journal->j_wcount) > 0 ||
2988 atomic_read(&journal->j_jlock)) && 3062 atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3107,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3033 3107
3034 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3108 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3035 unlock_journal(sb); 3109 unlock_journal(sb);
3110 reiserfs_write_unlock(sb);
3036 reiserfs_wait_on_write_block(sb); 3111 reiserfs_wait_on_write_block(sb);
3112 reiserfs_write_lock(sb);
3037 PROC_INFO_INC(sb, journal.journal_relock_writers); 3113 PROC_INFO_INC(sb, journal.journal_relock_writers);
3038 goto relock; 3114 goto relock;
3039 } 3115 }
@@ -3506,14 +3582,14 @@ static void flush_async_commits(struct work_struct *work)
3506 struct reiserfs_journal_list *jl; 3582 struct reiserfs_journal_list *jl;
3507 struct list_head *entry; 3583 struct list_head *entry;
3508 3584
3509 lock_kernel(); 3585 reiserfs_write_lock(sb);
3510 if (!list_empty(&journal->j_journal_list)) { 3586 if (!list_empty(&journal->j_journal_list)) {
3511 /* last entry is the youngest, commit it and you get everything */ 3587 /* last entry is the youngest, commit it and you get everything */
3512 entry = journal->j_journal_list.prev; 3588 entry = journal->j_journal_list.prev;
3513 jl = JOURNAL_LIST_ENTRY(entry); 3589 jl = JOURNAL_LIST_ENTRY(entry);
3514 flush_commit_list(sb, jl, 1); 3590 flush_commit_list(sb, jl, 1);
3515 } 3591 }
3516 unlock_kernel(); 3592 reiserfs_write_unlock(sb);
3517} 3593}
3518 3594
3519/* 3595/*
@@ -4041,7 +4117,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4041 * the new transaction is fully setup, and we've already flushed the 4117 * the new transaction is fully setup, and we've already flushed the
4042 * ordered bh list 4118 * ordered bh list
4043 */ 4119 */
4044 mutex_lock(&jl->j_commit_mutex); 4120 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
4045 4121
4046 /* save the transaction id in case we need to commit it later */ 4122 /* save the transaction id in case we need to commit it later */
4047 commit_trans_id = jl->j_trans_id; 4123 commit_trans_id = jl->j_trans_id;
@@ -4203,10 +4279,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4203 * is lost. 4279 * is lost.
4204 */ 4280 */
4205 if (!list_empty(&jl->j_tail_bh_list)) { 4281 if (!list_empty(&jl->j_tail_bh_list)) {
4206 unlock_kernel(); 4282 reiserfs_write_unlock(sb);
4207 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4283 write_ordered_buffers(&journal->j_dirty_buffers_lock,
4208 journal, jl, &jl->j_tail_bh_list); 4284 journal, jl, &jl->j_tail_bh_list);
4209 lock_kernel(); 4285 reiserfs_write_lock(sb);
4210 } 4286 }
4211 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4287 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4212 mutex_unlock(&jl->j_commit_mutex); 4288 mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
new file mode 100644
index 000000000000..cdd8d9ef048e
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,63 @@
1#include <linux/reiserfs_fs.h>
2#include <linux/mutex.h>
3
4/*
5 * The previous reiserfs locking scheme was heavily based on
6 * the tricky properties of the Bkl:
7 *
8 * - it was acquired recursively by a same task
9 * - the performances relied on the release-while-schedule() property
10 *
11 * Now that we replace it by a mutex, we still want to keep the same
12 * recursive property to avoid big changes in the code structure.
13 * We use our own lock_owner here because the owner field on a mutex
14 * is only available in SMP or mutex debugging, also we only need this field
15 * for this mutex, no need for a system wide mutex facility.
16 *
17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule()
19 * property of the Bkl.
20 */
21void reiserfs_write_lock(struct super_block *s)
22{
23 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
24
25 if (sb_i->lock_owner != current) {
26 mutex_lock(&sb_i->lock);
27 sb_i->lock_owner = current;
28 }
29
30 /* No need to protect it, only the current task touches it */
31 sb_i->lock_depth++;
32}
33
34void reiserfs_write_unlock(struct super_block *s)
35{
36 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
37
38 /*
39 * Are we unlocking without even holding the lock?
40 * Such a situation could even raise a BUG() if we don't
41 * want the data become corrupted
42 */
43 WARN_ONCE(sb_i->lock_owner != current,
44 "Superblock write lock imbalance");
45
46 if (--sb_i->lock_depth == -1) {
47 sb_i->lock_owner = NULL;
48 mutex_unlock(&sb_i->lock);
49 }
50}
51
52/*
53 * Utility function to force a BUG if it is called without the superblock
54 * write lock held. caller is the string printed just before calling BUG()
55 */
56void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
57{
58 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
59
60 if (sb_i->lock_depth < 0)
61 reiserfs_panic(sb, "%s called without kernel lock held %d",
62 caller);
63}
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 18b315d3d104..b3a94d20f0fc 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
141 141
142 set_buffer_uptodate(bh); 142 set_buffer_uptodate(bh);
143 mark_buffer_dirty(bh); 143 mark_buffer_dirty(bh);
144 reiserfs_write_unlock(s);
144 sync_dirty_buffer(bh); 145 sync_dirty_buffer(bh);
146 reiserfs_write_lock(s);
145 // update bitmap_info stuff 147 // update bitmap_info stuff
146 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 148 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
147 brelse(bh); 149 brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d036ee5b1c81..6bd99a99a652 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -629,7 +629,9 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
629 search_by_key_reada(sb, reada_bh, 629 search_by_key_reada(sb, reada_bh,
630 reada_blocks, reada_count); 630 reada_blocks, reada_count);
631 ll_rw_block(READ, 1, &bh); 631 ll_rw_block(READ, 1, &bh);
632 reiserfs_write_unlock(sb);
632 wait_on_buffer(bh); 633 wait_on_buffer(bh);
634 reiserfs_write_lock(sb);
633 if (!buffer_uptodate(bh)) 635 if (!buffer_uptodate(bh))
634 goto io_error; 636 goto io_error;
635 } else { 637 } else {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..e1cfb80d0bf3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
465 struct reiserfs_transaction_handle th; 465 struct reiserfs_transaction_handle th;
466 th.t_trans_id = 0; 466 th.t_trans_id = 0;
467 467
468 lock_kernel(); 468 reiserfs_write_lock(s);
469 469
470 if (s->s_dirt) 470 if (s->s_dirt)
471 reiserfs_write_super(s); 471 reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
499 499
500 reiserfs_proc_info_done(s); 500 reiserfs_proc_info_done(s);
501 501
502 reiserfs_write_unlock(s);
503 mutex_destroy(&REISERFS_SB(s)->lock);
502 kfree(s->s_fs_info); 504 kfree(s->s_fs_info);
503 s->s_fs_info = NULL; 505 s->s_fs_info = NULL;
504
505 unlock_kernel();
506} 506}
507 507
508static struct kmem_cache *reiserfs_inode_cachep; 508static struct kmem_cache *reiserfs_inode_cachep;
@@ -1168,11 +1168,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1168 unsigned int qfmt = 0; 1168 unsigned int qfmt = 0;
1169#ifdef CONFIG_QUOTA 1169#ifdef CONFIG_QUOTA
1170 int i; 1170 int i;
1171#endif
1172
1173 reiserfs_write_lock(s);
1171 1174
1175#ifdef CONFIG_QUOTA
1172 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); 1176 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1173#endif 1177#endif
1174 1178
1175 lock_kernel();
1176 rs = SB_DISK_SUPER_BLOCK(s); 1179 rs = SB_DISK_SUPER_BLOCK(s);
1177 1180
1178 if (!reiserfs_parse_options 1181 if (!reiserfs_parse_options
@@ -1295,12 +1298,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1295 1298
1296out_ok: 1299out_ok:
1297 replace_mount_options(s, new_opts); 1300 replace_mount_options(s, new_opts);
1298 unlock_kernel(); 1301 reiserfs_write_unlock(s);
1299 return 0; 1302 return 0;
1300 1303
1301out_err: 1304out_err:
1302 kfree(new_opts); 1305 kfree(new_opts);
1303 unlock_kernel(); 1306 reiserfs_write_unlock(s);
1304 return err; 1307 return err;
1305} 1308}
1306 1309
@@ -1404,7 +1407,9 @@ static int read_super_block(struct super_block *s, int offset)
1404static int reread_meta_blocks(struct super_block *s) 1407static int reread_meta_blocks(struct super_block *s)
1405{ 1408{
1406 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1409 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1410 reiserfs_write_unlock(s);
1407 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1411 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1412 reiserfs_write_lock(s);
1408 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1413 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1409 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1414 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1410 return 1; 1415 return 1;
@@ -1613,7 +1618,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1613 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1618 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1614 if (!sbi) { 1619 if (!sbi) {
1615 errval = -ENOMEM; 1620 errval = -ENOMEM;
1616 goto error; 1621 goto error_alloc;
1617 } 1622 }
1618 s->s_fs_info = sbi; 1623 s->s_fs_info = sbi;
1619 /* Set default values for options: non-aggressive tails, RO on errors */ 1624 /* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1632,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1627 /* setup default block allocator options */ 1632 /* setup default block allocator options */
1628 reiserfs_init_alloc_options(s); 1633 reiserfs_init_alloc_options(s);
1629 1634
1635 mutex_init(&REISERFS_SB(s)->lock);
1636 REISERFS_SB(s)->lock_depth = -1;
1637
1638 /*
1639 * This function is called with the bkl, which also was the old
1640 * locking used here.
1641 * do_journal_begin() will soon check if we hold the lock (ie: was the
1642 * bkl). This is likely because do_journal_begin() has several another
1643 * callers because at this time, it doesn't seem to be necessary to
1644 * protect against anything.
1645 * Anyway, let's be conservative and lock for now.
1646 */
1647 reiserfs_write_lock(s);
1648
1630 jdev_name = NULL; 1649 jdev_name = NULL;
1631 if (reiserfs_parse_options 1650 if (reiserfs_parse_options
1632 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1651 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1871,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1852 init_waitqueue_head(&(sbi->s_wait)); 1871 init_waitqueue_head(&(sbi->s_wait));
1853 spin_lock_init(&sbi->bitmap_lock); 1872 spin_lock_init(&sbi->bitmap_lock);
1854 1873
1874 reiserfs_write_unlock(s);
1875
1855 return (0); 1876 return (0);
1856 1877
1857error: 1878error:
1879 reiserfs_write_unlock(s);
1880error_alloc:
1858 if (jinit_done) { /* kill the commit thread, free journal ram */ 1881 if (jinit_done) { /* kill the commit thread, free journal ram */
1859 journal_release_error(NULL, s); 1882 journal_release_error(NULL, s);
1860 } 1883 }
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index dd31e7bae35c..e47328f51801 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -52,11 +52,13 @@
52#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION 52#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
53#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION 53#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
54 54
55/* Locking primitives */ 55/*
56/* Right now we are still falling back to (un)lock_kernel, but eventually that 56 * Locking primitives. The write lock is a per superblock
57 would evolve into real per-fs locks */ 57 * special mutex that has properties close to the Big Kernel Lock
58#define reiserfs_write_lock( sb ) lock_kernel() 58 * which was used in the previous locking scheme.
59#define reiserfs_write_unlock( sb ) unlock_kernel() 59 */
60void reiserfs_write_lock(struct super_block *s);
61void reiserfs_write_unlock(struct super_block *s);
60 62
61struct fid; 63struct fid;
62 64
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index dab68bbed675..045c37213675 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -7,6 +7,8 @@
7#ifdef __KERNEL__ 7#ifdef __KERNEL__
8#include <linux/workqueue.h> 8#include <linux/workqueue.h>
9#include <linux/rwsem.h> 9#include <linux/rwsem.h>
10#include <linux/mutex.h>
11#include <linux/sched.h>
10#endif 12#endif
11 13
12typedef enum { 14typedef enum {
@@ -355,6 +357,13 @@ struct reiserfs_sb_info {
355 struct reiserfs_journal *s_journal; /* pointer to journal information */ 357 struct reiserfs_journal *s_journal; /* pointer to journal information */
356 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ 358 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
357 359
360 /* Serialize writers access, replace the old bkl */
361 struct mutex lock;
362 /* Owner of the lock (can be recursive) */
363 struct task_struct *lock_owner;
364 /* Depth of the lock, start from -1 like the bkl */
365 int lock_depth;
366
358 /* Comment? -Hans */ 367 /* Comment? -Hans */
359 void (*end_io_handler) (struct buffer_head *, int); 368 void (*end_io_handler) (struct buffer_head *, int);
360 hashf_t s_hash_function; /* pointer to function which is used 369 hashf_t s_hash_function; /* pointer to function which is used