diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-09 10:58:15 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-09 10:58:15 -0500 |
commit | a9280fed383082da08a40af3f38daccaed6d8d62 (patch) | |
tree | 7eeacfcd07f7f1de56b1b29f473cb31b7251d8b4 | |
parent | 2b876f95d03e226394b5d360c86127cbefaf614b (diff) | |
parent | 1d2c6cfd40b2dece3bb958cbbc405a2c1536ab75 (diff) |
Merge branch 'reiserfs/kill-bkl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing
* 'reiserfs/kill-bkl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing: (31 commits)
kill-the-bkl/reiserfs: turn GFP_ATOMIC flag to GFP_NOFS in reiserfs_get_block()
kill-the-bkl/reiserfs: drop the fs race watchdog from _get_block_create_0()
kill-the-bkl/reiserfs: definitely drop the bkl from reiserfs_ioctl()
kill-the-bkl/reiserfs: always lock the ioctl path
kill-the-bkl/reiserfs: fix reiserfs lock to cpu_add_remove_lock dependency
kill-the-bkl/reiserfs: Fix induced mm->mmap_sem to sysfs_mutex dependency
kill-the-bkl/reiserfs: panic in case of lock imbalance
kill-the-bkl/reiserfs: fix recursive reiserfs write lock in reiserfs_commit_write()
kill-the-bkl/reiserfs: fix recursive reiserfs lock in reiserfs_mkdir()
kill-the-bkl/reiserfs: fix "reiserfs lock" / "inode mutex" lock inversion dependency
kill-the-bkl/reiserfs: move the concurrent tree accesses checks per superblock
kill-the-bkl/reiserfs: acquire the inode mutex safely
kill-the-bkl/reiserfs: unlock only when needed in search_by_key
kill-the-bkl/reiserfs: use mutex_lock in reiserfs_mutex_lock_safe
kill-the-bkl/reiserfs: factorize the locking in reiserfs_write_end()
kill-the-bkl/reiserfs: reduce number of contentions in search_by_key()
kill-the-bkl/reiserfs: don't hold the write recursively in reiserfs_lookup()
kill-the-bkl/reiserfs: lock only once on reiserfs_get_block()
kill-the-bkl/reiserfs: conditionaly release the write lock on fs_changed()
kill-the-BKL/reiserfs: add reiserfs_cond_resched()
...
-rw-r--r-- | fs/reiserfs/Makefile | 2 | ||||
-rw-r--r-- | fs/reiserfs/bitmap.c | 4 | ||||
-rw-r--r-- | fs/reiserfs/dir.c | 10 | ||||
-rw-r--r-- | fs/reiserfs/do_balan.c | 17 | ||||
-rw-r--r-- | fs/reiserfs/file.c | 2 | ||||
-rw-r--r-- | fs/reiserfs/fix_node.c | 19 | ||||
-rw-r--r-- | fs/reiserfs/inode.c | 97 | ||||
-rw-r--r-- | fs/reiserfs/ioctl.c | 77 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 130 | ||||
-rw-r--r-- | fs/reiserfs/lock.c | 88 | ||||
-rw-r--r-- | fs/reiserfs/namei.c | 20 | ||||
-rw-r--r-- | fs/reiserfs/prints.c | 4 | ||||
-rw-r--r-- | fs/reiserfs/resize.c | 2 | ||||
-rw-r--r-- | fs/reiserfs/stree.c | 53 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 52 | ||||
-rw-r--r-- | fs/reiserfs/xattr.c | 6 | ||||
-rw-r--r-- | include/linux/reiserfs_fs.h | 71 | ||||
-rw-r--r-- | include/linux/reiserfs_fs_sb.h | 20 |
18 files changed, 503 insertions, 171 deletions
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile index 7c5ab6330dd6..6a9e30c041dd 100644 --- a/fs/reiserfs/Makefile +++ b/fs/reiserfs/Makefile | |||
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o | |||
7 | reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ | 7 | reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ |
8 | super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ | 8 | super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ |
9 | hashes.o tail_conversion.o journal.o resize.o \ | 9 | hashes.o tail_conversion.o journal.o resize.o \ |
10 | item_ops.o ioctl.o procfs.o xattr.o | 10 | item_ops.o ioctl.o procfs.o xattr.o lock.o |
11 | 11 | ||
12 | ifeq ($(CONFIG_REISERFS_FS_XATTR),y) | 12 | ifeq ($(CONFIG_REISERFS_FS_XATTR),y) |
13 | reiserfs-objs += xattr_user.o xattr_trusted.o | 13 | reiserfs-objs += xattr_user.o xattr_trusted.o |
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index e716161ab325..685495707181 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, | |||
1249 | else if (bitmap == 0) | 1249 | else if (bitmap == 0) |
1250 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; | 1250 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; |
1251 | 1251 | ||
1252 | reiserfs_write_unlock(sb); | ||
1252 | bh = sb_bread(sb, block); | 1253 | bh = sb_bread(sb, block); |
1254 | reiserfs_write_lock(sb); | ||
1253 | if (bh == NULL) | 1255 | if (bh == NULL) |
1254 | reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " | 1256 | reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " |
1255 | "reading failed", __func__, block); | 1257 | "reading failed", __func__, block); |
1256 | else { | 1258 | else { |
1257 | if (buffer_locked(bh)) { | 1259 | if (buffer_locked(bh)) { |
1258 | PROC_INFO_INC(sb, scan_bitmap.wait); | 1260 | PROC_INFO_INC(sb, scan_bitmap.wait); |
1261 | reiserfs_write_unlock(sb); | ||
1259 | __wait_on_buffer(bh); | 1262 | __wait_on_buffer(bh); |
1263 | reiserfs_write_lock(sb); | ||
1260 | } | 1264 | } |
1261 | BUG_ON(!buffer_uptodate(bh)); | 1265 | BUG_ON(!buffer_uptodate(bh)); |
1262 | BUG_ON(atomic_read(&bh->b_count) == 0); | 1266 | BUG_ON(atomic_read(&bh->b_count) == 0); |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 6d2668fdc384..c094f58c7448 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = { | |||
20 | .read = generic_read_dir, | 20 | .read = generic_read_dir, |
21 | .readdir = reiserfs_readdir, | 21 | .readdir = reiserfs_readdir, |
22 | .fsync = reiserfs_dir_fsync, | 22 | .fsync = reiserfs_dir_fsync, |
23 | .ioctl = reiserfs_ioctl, | 23 | .unlocked_ioctl = reiserfs_ioctl, |
24 | #ifdef CONFIG_COMPAT | 24 | #ifdef CONFIG_COMPAT |
25 | .compat_ioctl = reiserfs_compat_ioctl, | 25 | .compat_ioctl = reiserfs_compat_ioctl, |
26 | #endif | 26 | #endif |
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
174 | // user space buffer is swapped out. At that time | 174 | // user space buffer is swapped out. At that time |
175 | // entry can move to somewhere else | 175 | // entry can move to somewhere else |
176 | memcpy(local_buf, d_name, d_reclen); | 176 | memcpy(local_buf, d_name, d_reclen); |
177 | |||
178 | /* | ||
179 | * Since filldir might sleep, we can release | ||
180 | * the write lock here for other waiters | ||
181 | */ | ||
182 | reiserfs_write_unlock(inode->i_sb); | ||
177 | if (filldir | 183 | if (filldir |
178 | (dirent, local_buf, d_reclen, d_off, d_ino, | 184 | (dirent, local_buf, d_reclen, d_off, d_ino, |
179 | DT_UNKNOWN) < 0) { | 185 | DT_UNKNOWN) < 0) { |
186 | reiserfs_write_lock(inode->i_sb); | ||
180 | if (local_buf != small_buf) { | 187 | if (local_buf != small_buf) { |
181 | kfree(local_buf); | 188 | kfree(local_buf); |
182 | } | 189 | } |
183 | goto end; | 190 | goto end; |
184 | } | 191 | } |
192 | reiserfs_write_lock(inode->i_sb); | ||
185 | if (local_buf != small_buf) { | 193 | if (local_buf != small_buf) { |
186 | kfree(local_buf); | 194 | kfree(local_buf); |
187 | } | 195 | } |
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 128d3f7c8aa5..60c080440661 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c | |||
@@ -21,14 +21,6 @@ | |||
21 | #include <linux/buffer_head.h> | 21 | #include <linux/buffer_head.h> |
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | 23 | ||
24 | #ifdef CONFIG_REISERFS_CHECK | ||
25 | |||
26 | struct tree_balance *cur_tb = NULL; /* detects whether more than one | ||
27 | copy of tb exists as a means | ||
28 | of checking whether schedule | ||
29 | is interrupting do_balance */ | ||
30 | #endif | ||
31 | |||
32 | static inline void buffer_info_init_left(struct tree_balance *tb, | 24 | static inline void buffer_info_init_left(struct tree_balance *tb, |
33 | struct buffer_info *bi) | 25 | struct buffer_info *bi) |
34 | { | 26 | { |
@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb) | |||
1840 | { | 1832 | { |
1841 | int retval = 0; | 1833 | int retval = 0; |
1842 | 1834 | ||
1843 | if (cur_tb) { | 1835 | if (REISERFS_SB(tb->tb_sb)->cur_tb) { |
1844 | reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " | 1836 | reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " |
1845 | "occurred based on cur_tb not being null at " | 1837 | "occurred based on cur_tb not being null at " |
1846 | "this point in code. do_balance cannot properly " | 1838 | "this point in code. do_balance cannot properly " |
1847 | "handle schedule occurring while it runs."); | 1839 | "handle concurrent tree accesses on a same " |
1840 | "mount point."); | ||
1848 | } | 1841 | } |
1849 | 1842 | ||
1850 | /* double check that buffers that we will modify are unlocked. (fix_nodes should already have | 1843 | /* double check that buffers that we will modify are unlocked. (fix_nodes should already have |
@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb) | |||
1986 | "check");*/ | 1979 | "check");*/ |
1987 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); | 1980 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); |
1988 | #ifdef CONFIG_REISERFS_CHECK | 1981 | #ifdef CONFIG_REISERFS_CHECK |
1989 | cur_tb = tb; | 1982 | REISERFS_SB(tb->tb_sb)->cur_tb = tb; |
1990 | #endif | 1983 | #endif |
1991 | } | 1984 | } |
1992 | 1985 | ||
@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb) | |||
1996 | #ifdef CONFIG_REISERFS_CHECK | 1989 | #ifdef CONFIG_REISERFS_CHECK |
1997 | check_leaf_level(tb); | 1990 | check_leaf_level(tb); |
1998 | check_internal_levels(tb); | 1991 | check_internal_levels(tb); |
1999 | cur_tb = NULL; | 1992 | REISERFS_SB(tb->tb_sb)->cur_tb = NULL; |
2000 | #endif | 1993 | #endif |
2001 | 1994 | ||
2002 | /* reiserfs_free_block is no longer schedule safe. So, we need to | 1995 | /* reiserfs_free_block is no longer schedule safe. So, we need to |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9f436668b7f8..da2dba082e2d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t | |||
284 | const struct file_operations reiserfs_file_operations = { | 284 | const struct file_operations reiserfs_file_operations = { |
285 | .read = do_sync_read, | 285 | .read = do_sync_read, |
286 | .write = reiserfs_file_write, | 286 | .write = reiserfs_file_write, |
287 | .ioctl = reiserfs_ioctl, | 287 | .unlocked_ioctl = reiserfs_ioctl, |
288 | #ifdef CONFIG_COMPAT | 288 | #ifdef CONFIG_COMPAT |
289 | .compat_ioctl = reiserfs_compat_ioctl, | 289 | .compat_ioctl = reiserfs_compat_ioctl, |
290 | #endif | 290 | #endif |
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 5e5a4e6fbaf8..d2f31330dcae 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c | |||
@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
563 | return needed_nodes; | 563 | return needed_nodes; |
564 | } | 564 | } |
565 | 565 | ||
566 | #ifdef CONFIG_REISERFS_CHECK | ||
567 | extern struct tree_balance *cur_tb; | ||
568 | #endif | ||
569 | 566 | ||
570 | /* Set parameters for balancing. | 567 | /* Set parameters for balancing. |
571 | * Performs write of results of analysis of balancing into structure tb, | 568 | * Performs write of results of analysis of balancing into structure tb, |
@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb, | |||
1022 | /* Check whether the common parent is locked. */ | 1019 | /* Check whether the common parent is locked. */ |
1023 | 1020 | ||
1024 | if (buffer_locked(*pcom_father)) { | 1021 | if (buffer_locked(*pcom_father)) { |
1022 | |||
1023 | /* Release the write lock while the buffer is busy */ | ||
1024 | reiserfs_write_unlock(tb->tb_sb); | ||
1025 | __wait_on_buffer(*pcom_father); | 1025 | __wait_on_buffer(*pcom_father); |
1026 | reiserfs_write_lock(tb->tb_sb); | ||
1026 | if (FILESYSTEM_CHANGED_TB(tb)) { | 1027 | if (FILESYSTEM_CHANGED_TB(tb)) { |
1027 | brelse(*pcom_father); | 1028 | brelse(*pcom_father); |
1028 | return REPEAT_SEARCH; | 1029 | return REPEAT_SEARCH; |
@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h) | |||
1927 | return REPEAT_SEARCH; | 1928 | return REPEAT_SEARCH; |
1928 | 1929 | ||
1929 | if (buffer_locked(bh)) { | 1930 | if (buffer_locked(bh)) { |
1931 | reiserfs_write_unlock(tb->tb_sb); | ||
1930 | __wait_on_buffer(bh); | 1932 | __wait_on_buffer(bh); |
1933 | reiserfs_write_lock(tb->tb_sb); | ||
1931 | if (FILESYSTEM_CHANGED_TB(tb)) | 1934 | if (FILESYSTEM_CHANGED_TB(tb)) |
1932 | return REPEAT_SEARCH; | 1935 | return REPEAT_SEARCH; |
1933 | } | 1936 | } |
@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h) | |||
1965 | tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> | 1968 | tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> |
1966 | FL[h]); | 1969 | FL[h]); |
1967 | son_number = B_N_CHILD_NUM(tb->FL[h], child_position); | 1970 | son_number = B_N_CHILD_NUM(tb->FL[h], child_position); |
1971 | reiserfs_write_unlock(sb); | ||
1968 | bh = sb_bread(sb, son_number); | 1972 | bh = sb_bread(sb, son_number); |
1973 | reiserfs_write_lock(sb); | ||
1969 | if (!bh) | 1974 | if (!bh) |
1970 | return IO_ERROR; | 1975 | return IO_ERROR; |
1971 | if (FILESYSTEM_CHANGED_TB(tb)) { | 1976 | if (FILESYSTEM_CHANGED_TB(tb)) { |
@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h) | |||
2003 | child_position = | 2008 | child_position = |
2004 | (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; | 2009 | (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; |
2005 | son_number = B_N_CHILD_NUM(tb->FR[h], child_position); | 2010 | son_number = B_N_CHILD_NUM(tb->FR[h], child_position); |
2011 | reiserfs_write_unlock(sb); | ||
2006 | bh = sb_bread(sb, son_number); | 2012 | bh = sb_bread(sb, son_number); |
2013 | reiserfs_write_lock(sb); | ||
2007 | if (!bh) | 2014 | if (!bh) |
2008 | return IO_ERROR; | 2015 | return IO_ERROR; |
2009 | if (FILESYSTEM_CHANGED_TB(tb)) { | 2016 | if (FILESYSTEM_CHANGED_TB(tb)) { |
@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2278 | REPEAT_SEARCH : CARRY_ON; | 2285 | REPEAT_SEARCH : CARRY_ON; |
2279 | } | 2286 | } |
2280 | #endif | 2287 | #endif |
2288 | reiserfs_write_unlock(tb->tb_sb); | ||
2281 | __wait_on_buffer(locked); | 2289 | __wait_on_buffer(locked); |
2290 | reiserfs_write_lock(tb->tb_sb); | ||
2282 | if (FILESYSTEM_CHANGED_TB(tb)) | 2291 | if (FILESYSTEM_CHANGED_TB(tb)) |
2283 | return REPEAT_SEARCH; | 2292 | return REPEAT_SEARCH; |
2284 | } | 2293 | } |
@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2349 | 2358 | ||
2350 | /* if it possible in indirect_to_direct conversion */ | 2359 | /* if it possible in indirect_to_direct conversion */ |
2351 | if (buffer_locked(tbS0)) { | 2360 | if (buffer_locked(tbS0)) { |
2361 | reiserfs_write_unlock(tb->tb_sb); | ||
2352 | __wait_on_buffer(tbS0); | 2362 | __wait_on_buffer(tbS0); |
2363 | reiserfs_write_lock(tb->tb_sb); | ||
2353 | if (FILESYSTEM_CHANGED_TB(tb)) | 2364 | if (FILESYSTEM_CHANGED_TB(tb)) |
2354 | return REPEAT_SEARCH; | 2365 | return REPEAT_SEARCH; |
2355 | } | 2366 | } |
2356 | #ifdef CONFIG_REISERFS_CHECK | 2367 | #ifdef CONFIG_REISERFS_CHECK |
2357 | if (cur_tb) { | 2368 | if (REISERFS_SB(tb->tb_sb)->cur_tb) { |
2358 | print_cur_tb("fix_nodes"); | 2369 | print_cur_tb("fix_nodes"); |
2359 | reiserfs_panic(tb->tb_sb, "PAP-8305", | 2370 | reiserfs_panic(tb->tb_sb, "PAP-8305", |
2360 | "there is pending do_balance"); | 2371 | "there is pending do_balance"); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a14d6cd9eeda..3a28e7751b3c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -251,7 +251,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
251 | struct cpu_key key; | 251 | struct cpu_key key; |
252 | struct buffer_head *bh; | 252 | struct buffer_head *bh; |
253 | struct item_head *ih, tmp_ih; | 253 | struct item_head *ih, tmp_ih; |
254 | int fs_gen; | ||
255 | b_blocknr_t blocknr; | 254 | b_blocknr_t blocknr; |
256 | char *p = NULL; | 255 | char *p = NULL; |
257 | int chars; | 256 | int chars; |
@@ -265,7 +264,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
265 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, | 264 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, |
266 | 3); | 265 | 3); |
267 | 266 | ||
268 | research: | ||
269 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 267 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
270 | if (result != POSITION_FOUND) { | 268 | if (result != POSITION_FOUND) { |
271 | pathrelse(&path); | 269 | pathrelse(&path); |
@@ -340,7 +338,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
340 | } | 338 | } |
341 | // read file tail into part of page | 339 | // read file tail into part of page |
342 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); | 340 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); |
343 | fs_gen = get_generation(inode->i_sb); | ||
344 | copy_item_head(&tmp_ih, ih); | 341 | copy_item_head(&tmp_ih, ih); |
345 | 342 | ||
346 | /* we only want to kmap if we are reading the tail into the page. | 343 | /* we only want to kmap if we are reading the tail into the page. |
@@ -348,13 +345,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
348 | ** sure we need to. But, this means the item might move if | 345 | ** sure we need to. But, this means the item might move if |
349 | ** kmap schedules | 346 | ** kmap schedules |
350 | */ | 347 | */ |
351 | if (!p) { | 348 | if (!p) |
352 | p = (char *)kmap(bh_result->b_page); | 349 | p = (char *)kmap(bh_result->b_page); |
353 | if (fs_changed(fs_gen, inode->i_sb) | 350 | |
354 | && item_moved(&tmp_ih, &path)) { | ||
355 | goto research; | ||
356 | } | ||
357 | } | ||
358 | p += offset; | 351 | p += offset; |
359 | memset(p, 0, inode->i_sb->s_blocksize); | 352 | memset(p, 0, inode->i_sb->s_blocksize); |
360 | do { | 353 | do { |
@@ -489,10 +482,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
489 | disappeared */ | 482 | disappeared */ |
490 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { | 483 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { |
491 | int err; | 484 | int err; |
492 | lock_kernel(); | 485 | |
486 | reiserfs_write_lock(inode->i_sb); | ||
487 | |||
493 | err = reiserfs_commit_for_inode(inode); | 488 | err = reiserfs_commit_for_inode(inode); |
494 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 489 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
495 | unlock_kernel(); | 490 | |
491 | reiserfs_write_unlock(inode->i_sb); | ||
492 | |||
496 | if (err < 0) | 493 | if (err < 0) |
497 | ret = err; | 494 | ret = err; |
498 | } | 495 | } |
@@ -601,6 +598,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
601 | __le32 *item; | 598 | __le32 *item; |
602 | int done; | 599 | int done; |
603 | int fs_gen; | 600 | int fs_gen; |
601 | int lock_depth; | ||
604 | struct reiserfs_transaction_handle *th = NULL; | 602 | struct reiserfs_transaction_handle *th = NULL; |
605 | /* space reserved in transaction batch: | 603 | /* space reserved in transaction batch: |
606 | . 3 balancings in direct->indirect conversion | 604 | . 3 balancings in direct->indirect conversion |
@@ -616,12 +614,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
616 | loff_t new_offset = | 614 | loff_t new_offset = |
617 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; | 615 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; |
618 | 616 | ||
619 | /* bad.... */ | 617 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
620 | reiserfs_write_lock(inode->i_sb); | ||
621 | version = get_inode_item_key_version(inode); | 618 | version = get_inode_item_key_version(inode); |
622 | 619 | ||
623 | if (!file_capable(inode, block)) { | 620 | if (!file_capable(inode, block)) { |
624 | reiserfs_write_unlock(inode->i_sb); | 621 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
625 | return -EFBIG; | 622 | return -EFBIG; |
626 | } | 623 | } |
627 | 624 | ||
@@ -633,7 +630,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
633 | /* find number of block-th logical block of the file */ | 630 | /* find number of block-th logical block of the file */ |
634 | ret = _get_block_create_0(inode, block, bh_result, | 631 | ret = _get_block_create_0(inode, block, bh_result, |
635 | create | GET_BLOCK_READ_DIRECT); | 632 | create | GET_BLOCK_READ_DIRECT); |
636 | reiserfs_write_unlock(inode->i_sb); | 633 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
637 | return ret; | 634 | return ret; |
638 | } | 635 | } |
639 | /* | 636 | /* |
@@ -751,7 +748,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
751 | if (!dangle && th) | 748 | if (!dangle && th) |
752 | retval = reiserfs_end_persistent_transaction(th); | 749 | retval = reiserfs_end_persistent_transaction(th); |
753 | 750 | ||
754 | reiserfs_write_unlock(inode->i_sb); | 751 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
755 | 752 | ||
756 | /* the item was found, so new blocks were not added to the file | 753 | /* the item was found, so new blocks were not added to the file |
757 | ** there is no need to make sure the inode is updated with this | 754 | ** there is no need to make sure the inode is updated with this |
@@ -935,7 +932,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
935 | if (blocks_needed == 1) { | 932 | if (blocks_needed == 1) { |
936 | un = &unf_single; | 933 | un = &unf_single; |
937 | } else { | 934 | } else { |
938 | un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. | 935 | un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS); |
939 | if (!un) { | 936 | if (!un) { |
940 | un = &unf_single; | 937 | un = &unf_single; |
941 | blocks_needed = 1; | 938 | blocks_needed = 1; |
@@ -997,10 +994,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
997 | if (retval) | 994 | if (retval) |
998 | goto failure; | 995 | goto failure; |
999 | } | 996 | } |
1000 | /* inserting indirect pointers for a hole can take a | 997 | /* |
1001 | ** long time. reschedule if needed | 998 | * inserting indirect pointers for a hole can take a |
999 | * long time. reschedule if needed and also release the write | ||
1000 | * lock for others. | ||
1002 | */ | 1001 | */ |
1003 | cond_resched(); | 1002 | if (need_resched()) { |
1003 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | ||
1004 | schedule(); | ||
1005 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | ||
1006 | } | ||
1004 | 1007 | ||
1005 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 1008 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
1006 | if (retval == IO_ERROR) { | 1009 | if (retval == IO_ERROR) { |
@@ -1035,7 +1038,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
1035 | retval = err; | 1038 | retval = err; |
1036 | } | 1039 | } |
1037 | 1040 | ||
1038 | reiserfs_write_unlock(inode->i_sb); | 1041 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
1039 | reiserfs_check_path(&path); | 1042 | reiserfs_check_path(&path); |
1040 | return retval; | 1043 | return retval; |
1041 | } | 1044 | } |
@@ -2072,8 +2075,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2072 | int error; | 2075 | int error; |
2073 | struct buffer_head *bh = NULL; | 2076 | struct buffer_head *bh = NULL; |
2074 | int err2; | 2077 | int err2; |
2078 | int lock_depth; | ||
2075 | 2079 | ||
2076 | reiserfs_write_lock(inode->i_sb); | 2080 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
2077 | 2081 | ||
2078 | if (inode->i_size > 0) { | 2082 | if (inode->i_size > 0) { |
2079 | error = grab_tail_page(inode, &page, &bh); | 2083 | error = grab_tail_page(inode, &page, &bh); |
@@ -2142,14 +2146,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2142 | page_cache_release(page); | 2146 | page_cache_release(page); |
2143 | } | 2147 | } |
2144 | 2148 | ||
2145 | reiserfs_write_unlock(inode->i_sb); | 2149 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
2150 | |||
2146 | return 0; | 2151 | return 0; |
2147 | out: | 2152 | out: |
2148 | if (page) { | 2153 | if (page) { |
2149 | unlock_page(page); | 2154 | unlock_page(page); |
2150 | page_cache_release(page); | 2155 | page_cache_release(page); |
2151 | } | 2156 | } |
2152 | reiserfs_write_unlock(inode->i_sb); | 2157 | |
2158 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | ||
2159 | |||
2153 | return error; | 2160 | return error; |
2154 | } | 2161 | } |
2155 | 2162 | ||
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page, | |||
2608 | int ret; | 2615 | int ret; |
2609 | int old_ref = 0; | 2616 | int old_ref = 0; |
2610 | 2617 | ||
2618 | reiserfs_write_unlock(inode->i_sb); | ||
2611 | reiserfs_wait_on_write_block(inode->i_sb); | 2619 | reiserfs_wait_on_write_block(inode->i_sb); |
2620 | reiserfs_write_lock(inode->i_sb); | ||
2621 | |||
2612 | fix_tail_page_for_writing(page); | 2622 | fix_tail_page_for_writing(page); |
2613 | if (reiserfs_transaction_running(inode->i_sb)) { | 2623 | if (reiserfs_transaction_running(inode->i_sb)) { |
2614 | struct reiserfs_transaction_handle *th; | 2624 | struct reiserfs_transaction_handle *th; |
@@ -2664,6 +2674,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2664 | int update_sd = 0; | 2674 | int update_sd = 0; |
2665 | struct reiserfs_transaction_handle *th; | 2675 | struct reiserfs_transaction_handle *th; |
2666 | unsigned start; | 2676 | unsigned start; |
2677 | int lock_depth = 0; | ||
2678 | bool locked = false; | ||
2667 | 2679 | ||
2668 | if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) | 2680 | if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) |
2669 | pos ++; | 2681 | pos ++; |
@@ -2690,9 +2702,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2690 | ** to do the i_size updates here. | 2702 | ** to do the i_size updates here. |
2691 | */ | 2703 | */ |
2692 | pos += copied; | 2704 | pos += copied; |
2705 | |||
2693 | if (pos > inode->i_size) { | 2706 | if (pos > inode->i_size) { |
2694 | struct reiserfs_transaction_handle myth; | 2707 | struct reiserfs_transaction_handle myth; |
2695 | reiserfs_write_lock(inode->i_sb); | 2708 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
2709 | locked = true; | ||
2696 | /* If the file have grown beyond the border where it | 2710 | /* If the file have grown beyond the border where it |
2697 | can have a tail, unmark it as needing a tail | 2711 | can have a tail, unmark it as needing a tail |
2698 | packing */ | 2712 | packing */ |
@@ -2703,10 +2717,9 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2703 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 2717 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
2704 | 2718 | ||
2705 | ret = journal_begin(&myth, inode->i_sb, 1); | 2719 | ret = journal_begin(&myth, inode->i_sb, 1); |
2706 | if (ret) { | 2720 | if (ret) |
2707 | reiserfs_write_unlock(inode->i_sb); | ||
2708 | goto journal_error; | 2721 | goto journal_error; |
2709 | } | 2722 | |
2710 | reiserfs_update_inode_transaction(inode); | 2723 | reiserfs_update_inode_transaction(inode); |
2711 | inode->i_size = pos; | 2724 | inode->i_size = pos; |
2712 | /* | 2725 | /* |
@@ -2718,34 +2731,36 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2718 | reiserfs_update_sd(&myth, inode); | 2731 | reiserfs_update_sd(&myth, inode); |
2719 | update_sd = 1; | 2732 | update_sd = 1; |
2720 | ret = journal_end(&myth, inode->i_sb, 1); | 2733 | ret = journal_end(&myth, inode->i_sb, 1); |
2721 | reiserfs_write_unlock(inode->i_sb); | ||
2722 | if (ret) | 2734 | if (ret) |
2723 | goto journal_error; | 2735 | goto journal_error; |
2724 | } | 2736 | } |
2725 | if (th) { | 2737 | if (th) { |
2726 | reiserfs_write_lock(inode->i_sb); | 2738 | if (!locked) { |
2739 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | ||
2740 | locked = true; | ||
2741 | } | ||
2727 | if (!update_sd) | 2742 | if (!update_sd) |
2728 | mark_inode_dirty(inode); | 2743 | mark_inode_dirty(inode); |
2729 | ret = reiserfs_end_persistent_transaction(th); | 2744 | ret = reiserfs_end_persistent_transaction(th); |
2730 | reiserfs_write_unlock(inode->i_sb); | ||
2731 | if (ret) | 2745 | if (ret) |
2732 | goto out; | 2746 | goto out; |
2733 | } | 2747 | } |
2734 | 2748 | ||
2735 | out: | 2749 | out: |
2750 | if (locked) | ||
2751 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | ||
2736 | unlock_page(page); | 2752 | unlock_page(page); |
2737 | page_cache_release(page); | 2753 | page_cache_release(page); |
2738 | return ret == 0 ? copied : ret; | 2754 | return ret == 0 ? copied : ret; |
2739 | 2755 | ||
2740 | journal_error: | 2756 | journal_error: |
2757 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | ||
2758 | locked = false; | ||
2741 | if (th) { | 2759 | if (th) { |
2742 | reiserfs_write_lock(inode->i_sb); | ||
2743 | if (!update_sd) | 2760 | if (!update_sd) |
2744 | reiserfs_update_sd(th, inode); | 2761 | reiserfs_update_sd(th, inode); |
2745 | ret = reiserfs_end_persistent_transaction(th); | 2762 | ret = reiserfs_end_persistent_transaction(th); |
2746 | reiserfs_write_unlock(inode->i_sb); | ||
2747 | } | 2763 | } |
2748 | |||
2749 | goto out; | 2764 | goto out; |
2750 | } | 2765 | } |
2751 | 2766 | ||
@@ -2758,7 +2773,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2758 | int update_sd = 0; | 2773 | int update_sd = 0; |
2759 | struct reiserfs_transaction_handle *th = NULL; | 2774 | struct reiserfs_transaction_handle *th = NULL; |
2760 | 2775 | ||
2776 | reiserfs_write_unlock(inode->i_sb); | ||
2761 | reiserfs_wait_on_write_block(inode->i_sb); | 2777 | reiserfs_wait_on_write_block(inode->i_sb); |
2778 | reiserfs_write_lock(inode->i_sb); | ||
2779 | |||
2762 | if (reiserfs_transaction_running(inode->i_sb)) { | 2780 | if (reiserfs_transaction_running(inode->i_sb)) { |
2763 | th = current->journal_info; | 2781 | th = current->journal_info; |
2764 | } | 2782 | } |
@@ -2770,7 +2788,6 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2770 | */ | 2788 | */ |
2771 | if (pos > inode->i_size) { | 2789 | if (pos > inode->i_size) { |
2772 | struct reiserfs_transaction_handle myth; | 2790 | struct reiserfs_transaction_handle myth; |
2773 | reiserfs_write_lock(inode->i_sb); | ||
2774 | /* If the file have grown beyond the border where it | 2791 | /* If the file have grown beyond the border where it |
2775 | can have a tail, unmark it as needing a tail | 2792 | can have a tail, unmark it as needing a tail |
2776 | packing */ | 2793 | packing */ |
@@ -2781,10 +2798,9 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2781 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 2798 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
2782 | 2799 | ||
2783 | ret = journal_begin(&myth, inode->i_sb, 1); | 2800 | ret = journal_begin(&myth, inode->i_sb, 1); |
2784 | if (ret) { | 2801 | if (ret) |
2785 | reiserfs_write_unlock(inode->i_sb); | ||
2786 | goto journal_error; | 2802 | goto journal_error; |
2787 | } | 2803 | |
2788 | reiserfs_update_inode_transaction(inode); | 2804 | reiserfs_update_inode_transaction(inode); |
2789 | inode->i_size = pos; | 2805 | inode->i_size = pos; |
2790 | /* | 2806 | /* |
@@ -2796,16 +2812,13 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2796 | reiserfs_update_sd(&myth, inode); | 2812 | reiserfs_update_sd(&myth, inode); |
2797 | update_sd = 1; | 2813 | update_sd = 1; |
2798 | ret = journal_end(&myth, inode->i_sb, 1); | 2814 | ret = journal_end(&myth, inode->i_sb, 1); |
2799 | reiserfs_write_unlock(inode->i_sb); | ||
2800 | if (ret) | 2815 | if (ret) |
2801 | goto journal_error; | 2816 | goto journal_error; |
2802 | } | 2817 | } |
2803 | if (th) { | 2818 | if (th) { |
2804 | reiserfs_write_lock(inode->i_sb); | ||
2805 | if (!update_sd) | 2819 | if (!update_sd) |
2806 | mark_inode_dirty(inode); | 2820 | mark_inode_dirty(inode); |
2807 | ret = reiserfs_end_persistent_transaction(th); | 2821 | ret = reiserfs_end_persistent_transaction(th); |
2808 | reiserfs_write_unlock(inode->i_sb); | ||
2809 | if (ret) | 2822 | if (ret) |
2810 | goto out; | 2823 | goto out; |
2811 | } | 2824 | } |
@@ -2815,11 +2828,9 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2815 | 2828 | ||
2816 | journal_error: | 2829 | journal_error: |
2817 | if (th) { | 2830 | if (th) { |
2818 | reiserfs_write_lock(inode->i_sb); | ||
2819 | if (!update_sd) | 2831 | if (!update_sd) |
2820 | reiserfs_update_sd(th, inode); | 2832 | reiserfs_update_sd(th, inode); |
2821 | ret = reiserfs_end_persistent_transaction(th); | 2833 | ret = reiserfs_end_persistent_transaction(th); |
2822 | reiserfs_write_unlock(inode->i_sb); | ||
2823 | } | 2834 | } |
2824 | 2835 | ||
2825 | return ret; | 2836 | return ret; |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 0ccc3fdda7bf..ace77451ceb1 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -13,44 +13,52 @@ | |||
13 | #include <linux/compat.h> | 13 | #include <linux/compat.h> |
14 | 14 | ||
15 | /* | 15 | /* |
16 | ** reiserfs_ioctl - handler for ioctl for inode | 16 | * reiserfs_ioctl - handler for ioctl for inode |
17 | ** supported commands: | 17 | * supported commands: |
18 | ** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect | 18 | * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect |
19 | ** and prevent packing file (argument arg has to be non-zero) | 19 | * and prevent packing file (argument arg has to be non-zero) |
20 | ** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION | 20 | * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION |
21 | ** 3) That's all for a while ... | 21 | * 3) That's all for a while ... |
22 | */ | 22 | */ |
23 | int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 23 | long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
24 | unsigned long arg) | ||
25 | { | 24 | { |
25 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
26 | unsigned int flags; | 26 | unsigned int flags; |
27 | int err = 0; | 27 | int err = 0; |
28 | 28 | ||
29 | reiserfs_write_lock(inode->i_sb); | ||
30 | |||
29 | switch (cmd) { | 31 | switch (cmd) { |
30 | case REISERFS_IOC_UNPACK: | 32 | case REISERFS_IOC_UNPACK: |
31 | if (S_ISREG(inode->i_mode)) { | 33 | if (S_ISREG(inode->i_mode)) { |
32 | if (arg) | 34 | if (arg) |
33 | return reiserfs_unpack(inode, filp); | 35 | err = reiserfs_unpack(inode, filp); |
34 | else | ||
35 | return 0; | ||
36 | } else | 36 | } else |
37 | return -ENOTTY; | 37 | err = -ENOTTY; |
38 | /* following two cases are taken from fs/ext2/ioctl.c by Remy | 38 | break; |
39 | Card (card@masi.ibp.fr) */ | 39 | /* |
40 | * following two cases are taken from fs/ext2/ioctl.c by Remy | ||
41 | * Card (card@masi.ibp.fr) | ||
42 | */ | ||
40 | case REISERFS_IOC_GETFLAGS: | 43 | case REISERFS_IOC_GETFLAGS: |
41 | if (!reiserfs_attrs(inode->i_sb)) | 44 | if (!reiserfs_attrs(inode->i_sb)) { |
42 | return -ENOTTY; | 45 | err = -ENOTTY; |
46 | break; | ||
47 | } | ||
43 | 48 | ||
44 | flags = REISERFS_I(inode)->i_attrs; | 49 | flags = REISERFS_I(inode)->i_attrs; |
45 | i_attrs_to_sd_attrs(inode, (__u16 *) & flags); | 50 | i_attrs_to_sd_attrs(inode, (__u16 *) & flags); |
46 | return put_user(flags, (int __user *)arg); | 51 | err = put_user(flags, (int __user *)arg); |
52 | break; | ||
47 | case REISERFS_IOC_SETFLAGS:{ | 53 | case REISERFS_IOC_SETFLAGS:{ |
48 | if (!reiserfs_attrs(inode->i_sb)) | 54 | if (!reiserfs_attrs(inode->i_sb)) { |
49 | return -ENOTTY; | 55 | err = -ENOTTY; |
56 | break; | ||
57 | } | ||
50 | 58 | ||
51 | err = mnt_want_write(filp->f_path.mnt); | 59 | err = mnt_want_write(filp->f_path.mnt); |
52 | if (err) | 60 | if (err) |
53 | return err; | 61 | break; |
54 | 62 | ||
55 | if (!is_owner_or_cap(inode)) { | 63 | if (!is_owner_or_cap(inode)) { |
56 | err = -EPERM; | 64 | err = -EPERM; |
@@ -90,16 +98,18 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
90 | mark_inode_dirty(inode); | 98 | mark_inode_dirty(inode); |
91 | setflags_out: | 99 | setflags_out: |
92 | mnt_drop_write(filp->f_path.mnt); | 100 | mnt_drop_write(filp->f_path.mnt); |
93 | return err; | 101 | break; |
94 | } | 102 | } |
95 | case REISERFS_IOC_GETVERSION: | 103 | case REISERFS_IOC_GETVERSION: |
96 | return put_user(inode->i_generation, (int __user *)arg); | 104 | err = put_user(inode->i_generation, (int __user *)arg); |
105 | break; | ||
97 | case REISERFS_IOC_SETVERSION: | 106 | case REISERFS_IOC_SETVERSION: |
98 | if (!is_owner_or_cap(inode)) | 107 | if (!is_owner_or_cap(inode)) |
99 | return -EPERM; | 108 | err = -EPERM; |
109 | break; | ||
100 | err = mnt_want_write(filp->f_path.mnt); | 110 | err = mnt_want_write(filp->f_path.mnt); |
101 | if (err) | 111 | if (err) |
102 | return err; | 112 | break; |
103 | if (get_user(inode->i_generation, (int __user *)arg)) { | 113 | if (get_user(inode->i_generation, (int __user *)arg)) { |
104 | err = -EFAULT; | 114 | err = -EFAULT; |
105 | goto setversion_out; | 115 | goto setversion_out; |
@@ -108,19 +118,20 @@ setflags_out: | |||
108 | mark_inode_dirty(inode); | 118 | mark_inode_dirty(inode); |
109 | setversion_out: | 119 | setversion_out: |
110 | mnt_drop_write(filp->f_path.mnt); | 120 | mnt_drop_write(filp->f_path.mnt); |
111 | return err; | 121 | break; |
112 | default: | 122 | default: |
113 | return -ENOTTY; | 123 | err = -ENOTTY; |
114 | } | 124 | } |
125 | |||
126 | reiserfs_write_unlock(inode->i_sb); | ||
127 | |||
128 | return err; | ||
115 | } | 129 | } |
116 | 130 | ||
117 | #ifdef CONFIG_COMPAT | 131 | #ifdef CONFIG_COMPAT |
118 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | 132 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, |
119 | unsigned long arg) | 133 | unsigned long arg) |
120 | { | 134 | { |
121 | struct inode *inode = file->f_path.dentry->d_inode; | ||
122 | int ret; | ||
123 | |||
124 | /* These are just misnamed, they actually get/put from/to user an int */ | 135 | /* These are just misnamed, they actually get/put from/to user an int */ |
125 | switch (cmd) { | 136 | switch (cmd) { |
126 | case REISERFS_IOC32_UNPACK: | 137 | case REISERFS_IOC32_UNPACK: |
@@ -141,10 +152,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | |||
141 | default: | 152 | default: |
142 | return -ENOIOCTLCMD; | 153 | return -ENOIOCTLCMD; |
143 | } | 154 | } |
144 | lock_kernel(); | 155 | |
145 | ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); | 156 | return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); |
146 | unlock_kernel(); | ||
147 | return ret; | ||
148 | } | 157 | } |
149 | #endif | 158 | #endif |
150 | 159 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 90622200b39c..2f8a7e7b8dab 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh) | |||
429 | clear_buffer_journal_restore_dirty(bh); | 429 | clear_buffer_journal_restore_dirty(bh); |
430 | } | 430 | } |
431 | 431 | ||
432 | /* utility function to force a BUG if it is called without the big | ||
433 | ** kernel lock held. caller is the string printed just before calling BUG() | ||
434 | */ | ||
435 | void reiserfs_check_lock_depth(struct super_block *sb, char *caller) | ||
436 | { | ||
437 | #ifdef CONFIG_SMP | ||
438 | if (current->lock_depth < 0) { | ||
439 | reiserfs_panic(sb, "journal-1", "%s called without kernel " | ||
440 | "lock held", caller); | ||
441 | } | ||
442 | #else | ||
443 | ; | ||
444 | #endif | ||
445 | } | ||
446 | |||
447 | /* return a cnode with same dev, block number and size in table, or null if not found */ | 432 | /* return a cnode with same dev, block number and size in table, or null if not found */ |
448 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | 433 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct |
449 | super_block | 434 | super_block |
@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | |||
556 | static inline void lock_journal(struct super_block *sb) | 541 | static inline void lock_journal(struct super_block *sb) |
557 | { | 542 | { |
558 | PROC_INFO_INC(sb, journal.lock_journal); | 543 | PROC_INFO_INC(sb, journal.lock_journal); |
559 | mutex_lock(&SB_JOURNAL(sb)->j_mutex); | 544 | |
545 | reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); | ||
560 | } | 546 | } |
561 | 547 | ||
562 | /* unlock the current transaction */ | 548 | /* unlock the current transaction */ |
@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s, | |||
708 | disable_barrier(s); | 694 | disable_barrier(s); |
709 | set_buffer_uptodate(bh); | 695 | set_buffer_uptodate(bh); |
710 | set_buffer_dirty(bh); | 696 | set_buffer_dirty(bh); |
697 | reiserfs_write_unlock(s); | ||
711 | sync_dirty_buffer(bh); | 698 | sync_dirty_buffer(bh); |
699 | reiserfs_write_lock(s); | ||
712 | } | 700 | } |
713 | } | 701 | } |
714 | 702 | ||
@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s) | |||
996 | { | 984 | { |
997 | DEFINE_WAIT(wait); | 985 | DEFINE_WAIT(wait); |
998 | struct reiserfs_journal *j = SB_JOURNAL(s); | 986 | struct reiserfs_journal *j = SB_JOURNAL(s); |
999 | if (atomic_read(&j->j_async_throttle)) | 987 | |
988 | if (atomic_read(&j->j_async_throttle)) { | ||
989 | reiserfs_write_unlock(s); | ||
1000 | congestion_wait(BLK_RW_ASYNC, HZ / 10); | 990 | congestion_wait(BLK_RW_ASYNC, HZ / 10); |
991 | reiserfs_write_lock(s); | ||
992 | } | ||
993 | |||
1001 | return 0; | 994 | return 0; |
1002 | } | 995 | } |
1003 | 996 | ||
@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s, | |||
1043 | } | 1036 | } |
1044 | 1037 | ||
1045 | /* make sure nobody is trying to flush this one at the same time */ | 1038 | /* make sure nobody is trying to flush this one at the same time */ |
1046 | mutex_lock(&jl->j_commit_mutex); | 1039 | reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); |
1040 | |||
1047 | if (!journal_list_still_alive(s, trans_id)) { | 1041 | if (!journal_list_still_alive(s, trans_id)) { |
1048 | mutex_unlock(&jl->j_commit_mutex); | 1042 | mutex_unlock(&jl->j_commit_mutex); |
1049 | goto put_jl; | 1043 | goto put_jl; |
@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s, | |||
1061 | 1055 | ||
1062 | if (!list_empty(&jl->j_bh_list)) { | 1056 | if (!list_empty(&jl->j_bh_list)) { |
1063 | int ret; | 1057 | int ret; |
1064 | unlock_kernel(); | 1058 | |
1059 | /* | ||
1060 | * We might sleep in numerous places inside | ||
1061 | * write_ordered_buffers. Relax the write lock. | ||
1062 | */ | ||
1063 | reiserfs_write_unlock(s); | ||
1065 | ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, | 1064 | ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, |
1066 | journal, jl, &jl->j_bh_list); | 1065 | journal, jl, &jl->j_bh_list); |
1067 | if (ret < 0 && retval == 0) | 1066 | if (ret < 0 && retval == 0) |
1068 | retval = ret; | 1067 | retval = ret; |
1069 | lock_kernel(); | 1068 | reiserfs_write_lock(s); |
1070 | } | 1069 | } |
1071 | BUG_ON(!list_empty(&jl->j_bh_list)); | 1070 | BUG_ON(!list_empty(&jl->j_bh_list)); |
1072 | /* | 1071 | /* |
@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s, | |||
1085 | SB_ONDISK_JOURNAL_SIZE(s); | 1084 | SB_ONDISK_JOURNAL_SIZE(s); |
1086 | tbh = journal_find_get_block(s, bn); | 1085 | tbh = journal_find_get_block(s, bn); |
1087 | if (tbh) { | 1086 | if (tbh) { |
1088 | if (buffer_dirty(tbh)) | 1087 | if (buffer_dirty(tbh)) { |
1089 | ll_rw_block(WRITE, 1, &tbh) ; | 1088 | reiserfs_write_unlock(s); |
1089 | ll_rw_block(WRITE, 1, &tbh); | ||
1090 | reiserfs_write_lock(s); | ||
1091 | } | ||
1090 | put_bh(tbh) ; | 1092 | put_bh(tbh) ; |
1091 | } | 1093 | } |
1092 | } | 1094 | } |
@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s, | |||
1114 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + | 1116 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + |
1115 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); | 1117 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); |
1116 | tbh = journal_find_get_block(s, bn); | 1118 | tbh = journal_find_get_block(s, bn); |
1119 | |||
1120 | reiserfs_write_unlock(s); | ||
1117 | wait_on_buffer(tbh); | 1121 | wait_on_buffer(tbh); |
1122 | reiserfs_write_lock(s); | ||
1118 | // since we're using ll_rw_blk above, it might have skipped over | 1123 | // since we're using ll_rw_blk above, it might have skipped over |
1119 | // a locked buffer. Double check here | 1124 | // a locked buffer. Double check here |
1120 | // | 1125 | // |
1121 | if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ | 1126 | /* redundant, sync_dirty_buffer() checks */ |
1127 | if (buffer_dirty(tbh)) { | ||
1128 | reiserfs_write_unlock(s); | ||
1122 | sync_dirty_buffer(tbh); | 1129 | sync_dirty_buffer(tbh); |
1130 | reiserfs_write_lock(s); | ||
1131 | } | ||
1123 | if (unlikely(!buffer_uptodate(tbh))) { | 1132 | if (unlikely(!buffer_uptodate(tbh))) { |
1124 | #ifdef CONFIG_REISERFS_CHECK | 1133 | #ifdef CONFIG_REISERFS_CHECK |
1125 | reiserfs_warning(s, "journal-601", | 1134 | reiserfs_warning(s, "journal-601", |
@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s, | |||
1143 | if (buffer_dirty(jl->j_commit_bh)) | 1152 | if (buffer_dirty(jl->j_commit_bh)) |
1144 | BUG(); | 1153 | BUG(); |
1145 | mark_buffer_dirty(jl->j_commit_bh) ; | 1154 | mark_buffer_dirty(jl->j_commit_bh) ; |
1155 | reiserfs_write_unlock(s); | ||
1146 | sync_dirty_buffer(jl->j_commit_bh) ; | 1156 | sync_dirty_buffer(jl->j_commit_bh) ; |
1157 | reiserfs_write_lock(s); | ||
1147 | } | 1158 | } |
1148 | } else | 1159 | } else { |
1160 | reiserfs_write_unlock(s); | ||
1149 | wait_on_buffer(jl->j_commit_bh); | 1161 | wait_on_buffer(jl->j_commit_bh); |
1162 | reiserfs_write_lock(s); | ||
1163 | } | ||
1150 | 1164 | ||
1151 | check_barrier_completion(s, jl->j_commit_bh); | 1165 | check_barrier_completion(s, jl->j_commit_bh); |
1152 | 1166 | ||
@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1286 | 1300 | ||
1287 | if (trans_id >= journal->j_last_flush_trans_id) { | 1301 | if (trans_id >= journal->j_last_flush_trans_id) { |
1288 | if (buffer_locked((journal->j_header_bh))) { | 1302 | if (buffer_locked((journal->j_header_bh))) { |
1303 | reiserfs_write_unlock(sb); | ||
1289 | wait_on_buffer((journal->j_header_bh)); | 1304 | wait_on_buffer((journal->j_header_bh)); |
1305 | reiserfs_write_lock(sb); | ||
1290 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { | 1306 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { |
1291 | #ifdef CONFIG_REISERFS_CHECK | 1307 | #ifdef CONFIG_REISERFS_CHECK |
1292 | reiserfs_warning(sb, "journal-699", | 1308 | reiserfs_warning(sb, "journal-699", |
@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1312 | disable_barrier(sb); | 1328 | disable_barrier(sb); |
1313 | goto sync; | 1329 | goto sync; |
1314 | } | 1330 | } |
1331 | reiserfs_write_unlock(sb); | ||
1315 | wait_on_buffer(journal->j_header_bh); | 1332 | wait_on_buffer(journal->j_header_bh); |
1333 | reiserfs_write_lock(sb); | ||
1316 | check_barrier_completion(sb, journal->j_header_bh); | 1334 | check_barrier_completion(sb, journal->j_header_bh); |
1317 | } else { | 1335 | } else { |
1318 | sync: | 1336 | sync: |
1319 | set_buffer_dirty(journal->j_header_bh); | 1337 | set_buffer_dirty(journal->j_header_bh); |
1338 | reiserfs_write_unlock(sb); | ||
1320 | sync_dirty_buffer(journal->j_header_bh); | 1339 | sync_dirty_buffer(journal->j_header_bh); |
1340 | reiserfs_write_lock(sb); | ||
1321 | } | 1341 | } |
1322 | if (!buffer_uptodate(journal->j_header_bh)) { | 1342 | if (!buffer_uptodate(journal->j_header_bh)) { |
1323 | reiserfs_warning(sb, "journal-837", | 1343 | reiserfs_warning(sb, "journal-837", |
@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s, | |||
1409 | 1429 | ||
1410 | /* if flushall == 0, the lock is already held */ | 1430 | /* if flushall == 0, the lock is already held */ |
1411 | if (flushall) { | 1431 | if (flushall) { |
1412 | mutex_lock(&journal->j_flush_mutex); | 1432 | reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); |
1413 | } else if (mutex_trylock(&journal->j_flush_mutex)) { | 1433 | } else if (mutex_trylock(&journal->j_flush_mutex)) { |
1414 | BUG(); | 1434 | BUG(); |
1415 | } | 1435 | } |
@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s, | |||
1553 | reiserfs_panic(s, "journal-1011", | 1573 | reiserfs_panic(s, "journal-1011", |
1554 | "cn->bh is NULL"); | 1574 | "cn->bh is NULL"); |
1555 | } | 1575 | } |
1576 | |||
1577 | reiserfs_write_unlock(s); | ||
1556 | wait_on_buffer(cn->bh); | 1578 | wait_on_buffer(cn->bh); |
1579 | reiserfs_write_lock(s); | ||
1580 | |||
1557 | if (!cn->bh) { | 1581 | if (!cn->bh) { |
1558 | reiserfs_panic(s, "journal-1012", | 1582 | reiserfs_panic(s, "journal-1012", |
1559 | "cn->bh is NULL"); | 1583 | "cn->bh is NULL"); |
@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s, | |||
1769 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1793 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1770 | chunk.nr = 0; | 1794 | chunk.nr = 0; |
1771 | 1795 | ||
1772 | mutex_lock(&journal->j_flush_mutex); | 1796 | reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); |
1773 | if (!journal_list_still_alive(s, orig_trans_id)) { | 1797 | if (!journal_list_still_alive(s, orig_trans_id)) { |
1774 | goto done; | 1798 | goto done; |
1775 | } | 1799 | } |
@@ -1973,11 +1997,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1973 | reiserfs_mounted_fs_count--; | 1997 | reiserfs_mounted_fs_count--; |
1974 | /* wait for all commits to finish */ | 1998 | /* wait for all commits to finish */ |
1975 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); | 1999 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); |
2000 | |||
2001 | /* | ||
2002 | * We must release the write lock here because | ||
2003 | * the workqueue job (flush_async_commit) needs this lock | ||
2004 | */ | ||
2005 | reiserfs_write_unlock(sb); | ||
1976 | flush_workqueue(commit_wq); | 2006 | flush_workqueue(commit_wq); |
2007 | |||
1977 | if (!reiserfs_mounted_fs_count) { | 2008 | if (!reiserfs_mounted_fs_count) { |
1978 | destroy_workqueue(commit_wq); | 2009 | destroy_workqueue(commit_wq); |
1979 | commit_wq = NULL; | 2010 | commit_wq = NULL; |
1980 | } | 2011 | } |
2012 | reiserfs_write_lock(sb); | ||
1981 | 2013 | ||
1982 | free_journal_ram(sb); | 2014 | free_journal_ram(sb); |
1983 | 2015 | ||
@@ -2243,7 +2275,11 @@ static int journal_read_transaction(struct super_block *sb, | |||
2243 | /* read in the log blocks, memcpy to the corresponding real block */ | 2275 | /* read in the log blocks, memcpy to the corresponding real block */ |
2244 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); | 2276 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); |
2245 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2277 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2278 | |||
2279 | reiserfs_write_unlock(sb); | ||
2246 | wait_on_buffer(log_blocks[i]); | 2280 | wait_on_buffer(log_blocks[i]); |
2281 | reiserfs_write_lock(sb); | ||
2282 | |||
2247 | if (!buffer_uptodate(log_blocks[i])) { | 2283 | if (!buffer_uptodate(log_blocks[i])) { |
2248 | reiserfs_warning(sb, "journal-1212", | 2284 | reiserfs_warning(sb, "journal-1212", |
2249 | "REPLAY FAILURE fsck required! " | 2285 | "REPLAY FAILURE fsck required! " |
@@ -2765,11 +2801,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2765 | goto free_and_return; | 2801 | goto free_and_return; |
2766 | } | 2802 | } |
2767 | 2803 | ||
2804 | /* | ||
2805 | * We need to unlock here to avoid creating the following | ||
2806 | * dependency: | ||
2807 | * reiserfs_lock -> sysfs_mutex | ||
2808 | * Because the reiserfs mmap path creates the following dependency: | ||
2809 | * mm->mmap -> reiserfs_lock, hence we have | ||
2810 | * mm->mmap -> reiserfs_lock ->sysfs_mutex | ||
2811 | * This would ends up in a circular dependency with sysfs readdir path | ||
2812 | * which does sysfs_mutex -> mm->mmap_sem | ||
2813 | * This is fine because the reiserfs lock is useless in mount path, | ||
2814 | * at least until we call journal_begin. We keep it for paranoid | ||
2815 | * reasons. | ||
2816 | */ | ||
2817 | reiserfs_write_unlock(sb); | ||
2768 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { | 2818 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { |
2819 | reiserfs_write_lock(sb); | ||
2769 | reiserfs_warning(sb, "sh-462", | 2820 | reiserfs_warning(sb, "sh-462", |
2770 | "unable to initialize jornal device"); | 2821 | "unable to initialize jornal device"); |
2771 | goto free_and_return; | 2822 | goto free_and_return; |
2772 | } | 2823 | } |
2824 | reiserfs_write_lock(sb); | ||
2773 | 2825 | ||
2774 | rs = SB_DISK_SUPER_BLOCK(sb); | 2826 | rs = SB_DISK_SUPER_BLOCK(sb); |
2775 | 2827 | ||
@@ -2881,8 +2933,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2881 | } | 2933 | } |
2882 | 2934 | ||
2883 | reiserfs_mounted_fs_count++; | 2935 | reiserfs_mounted_fs_count++; |
2884 | if (reiserfs_mounted_fs_count <= 1) | 2936 | if (reiserfs_mounted_fs_count <= 1) { |
2937 | reiserfs_write_unlock(sb); | ||
2885 | commit_wq = create_workqueue("reiserfs"); | 2938 | commit_wq = create_workqueue("reiserfs"); |
2939 | reiserfs_write_lock(sb); | ||
2940 | } | ||
2886 | 2941 | ||
2887 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); | 2942 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); |
2888 | journal->j_work_sb = sb; | 2943 | journal->j_work_sb = sb; |
@@ -2964,8 +3019,11 @@ static void queue_log_writer(struct super_block *s) | |||
2964 | init_waitqueue_entry(&wait, current); | 3019 | init_waitqueue_entry(&wait, current); |
2965 | add_wait_queue(&journal->j_join_wait, &wait); | 3020 | add_wait_queue(&journal->j_join_wait, &wait); |
2966 | set_current_state(TASK_UNINTERRUPTIBLE); | 3021 | set_current_state(TASK_UNINTERRUPTIBLE); |
2967 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) | 3022 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { |
3023 | reiserfs_write_unlock(s); | ||
2968 | schedule(); | 3024 | schedule(); |
3025 | reiserfs_write_lock(s); | ||
3026 | } | ||
2969 | __set_current_state(TASK_RUNNING); | 3027 | __set_current_state(TASK_RUNNING); |
2970 | remove_wait_queue(&journal->j_join_wait, &wait); | 3028 | remove_wait_queue(&journal->j_join_wait, &wait); |
2971 | } | 3029 | } |
@@ -2982,7 +3040,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) | |||
2982 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3040 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
2983 | unsigned long bcount = journal->j_bcount; | 3041 | unsigned long bcount = journal->j_bcount; |
2984 | while (1) { | 3042 | while (1) { |
3043 | reiserfs_write_unlock(sb); | ||
2985 | schedule_timeout_uninterruptible(1); | 3044 | schedule_timeout_uninterruptible(1); |
3045 | reiserfs_write_lock(sb); | ||
2986 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; | 3046 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; |
2987 | while ((atomic_read(&journal->j_wcount) > 0 || | 3047 | while ((atomic_read(&journal->j_wcount) > 0 || |
2988 | atomic_read(&journal->j_jlock)) && | 3048 | atomic_read(&journal->j_jlock)) && |
@@ -3033,7 +3093,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3033 | 3093 | ||
3034 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { | 3094 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { |
3035 | unlock_journal(sb); | 3095 | unlock_journal(sb); |
3096 | reiserfs_write_unlock(sb); | ||
3036 | reiserfs_wait_on_write_block(sb); | 3097 | reiserfs_wait_on_write_block(sb); |
3098 | reiserfs_write_lock(sb); | ||
3037 | PROC_INFO_INC(sb, journal.journal_relock_writers); | 3099 | PROC_INFO_INC(sb, journal.journal_relock_writers); |
3038 | goto relock; | 3100 | goto relock; |
3039 | } | 3101 | } |
@@ -3506,14 +3568,14 @@ static void flush_async_commits(struct work_struct *work) | |||
3506 | struct reiserfs_journal_list *jl; | 3568 | struct reiserfs_journal_list *jl; |
3507 | struct list_head *entry; | 3569 | struct list_head *entry; |
3508 | 3570 | ||
3509 | lock_kernel(); | 3571 | reiserfs_write_lock(sb); |
3510 | if (!list_empty(&journal->j_journal_list)) { | 3572 | if (!list_empty(&journal->j_journal_list)) { |
3511 | /* last entry is the youngest, commit it and you get everything */ | 3573 | /* last entry is the youngest, commit it and you get everything */ |
3512 | entry = journal->j_journal_list.prev; | 3574 | entry = journal->j_journal_list.prev; |
3513 | jl = JOURNAL_LIST_ENTRY(entry); | 3575 | jl = JOURNAL_LIST_ENTRY(entry); |
3514 | flush_commit_list(sb, jl, 1); | 3576 | flush_commit_list(sb, jl, 1); |
3515 | } | 3577 | } |
3516 | unlock_kernel(); | 3578 | reiserfs_write_unlock(sb); |
3517 | } | 3579 | } |
3518 | 3580 | ||
3519 | /* | 3581 | /* |
@@ -4041,7 +4103,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4041 | * the new transaction is fully setup, and we've already flushed the | 4103 | * the new transaction is fully setup, and we've already flushed the |
4042 | * ordered bh list | 4104 | * ordered bh list |
4043 | */ | 4105 | */ |
4044 | mutex_lock(&jl->j_commit_mutex); | 4106 | reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); |
4045 | 4107 | ||
4046 | /* save the transaction id in case we need to commit it later */ | 4108 | /* save the transaction id in case we need to commit it later */ |
4047 | commit_trans_id = jl->j_trans_id; | 4109 | commit_trans_id = jl->j_trans_id; |
@@ -4156,7 +4218,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4156 | next = cn->next; | 4218 | next = cn->next; |
4157 | free_cnode(sb, cn); | 4219 | free_cnode(sb, cn); |
4158 | cn = next; | 4220 | cn = next; |
4221 | reiserfs_write_unlock(sb); | ||
4159 | cond_resched(); | 4222 | cond_resched(); |
4223 | reiserfs_write_lock(sb); | ||
4160 | } | 4224 | } |
4161 | 4225 | ||
4162 | /* we are done with both the c_bh and d_bh, but | 4226 | /* we are done with both the c_bh and d_bh, but |
@@ -4203,10 +4267,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4203 | * is lost. | 4267 | * is lost. |
4204 | */ | 4268 | */ |
4205 | if (!list_empty(&jl->j_tail_bh_list)) { | 4269 | if (!list_empty(&jl->j_tail_bh_list)) { |
4206 | unlock_kernel(); | 4270 | reiserfs_write_unlock(sb); |
4207 | write_ordered_buffers(&journal->j_dirty_buffers_lock, | 4271 | write_ordered_buffers(&journal->j_dirty_buffers_lock, |
4208 | journal, jl, &jl->j_tail_bh_list); | 4272 | journal, jl, &jl->j_tail_bh_list); |
4209 | lock_kernel(); | 4273 | reiserfs_write_lock(sb); |
4210 | } | 4274 | } |
4211 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); | 4275 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); |
4212 | mutex_unlock(&jl->j_commit_mutex); | 4276 | mutex_unlock(&jl->j_commit_mutex); |
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c new file mode 100644 index 000000000000..ee2cfc0fd8a7 --- /dev/null +++ b/fs/reiserfs/lock.c | |||
@@ -0,0 +1,88 @@ | |||
1 | #include <linux/reiserfs_fs.h> | ||
2 | #include <linux/mutex.h> | ||
3 | |||
4 | /* | ||
5 | * The previous reiserfs locking scheme was heavily based on | ||
6 | * the tricky properties of the Bkl: | ||
7 | * | ||
8 | * - it was acquired recursively by a same task | ||
9 | * - the performances relied on the release-while-schedule() property | ||
10 | * | ||
11 | * Now that we replace it by a mutex, we still want to keep the same | ||
12 | * recursive property to avoid big changes in the code structure. | ||
13 | * We use our own lock_owner here because the owner field on a mutex | ||
14 | * is only available in SMP or mutex debugging, also we only need this field | ||
15 | * for this mutex, no need for a system wide mutex facility. | ||
16 | * | ||
17 | * Also this lock is often released before a call that could block because | ||
18 | * reiserfs performances were partialy based on the release while schedule() | ||
19 | * property of the Bkl. | ||
20 | */ | ||
21 | void reiserfs_write_lock(struct super_block *s) | ||
22 | { | ||
23 | struct reiserfs_sb_info *sb_i = REISERFS_SB(s); | ||
24 | |||
25 | if (sb_i->lock_owner != current) { | ||
26 | mutex_lock(&sb_i->lock); | ||
27 | sb_i->lock_owner = current; | ||
28 | } | ||
29 | |||
30 | /* No need to protect it, only the current task touches it */ | ||
31 | sb_i->lock_depth++; | ||
32 | } | ||
33 | |||
34 | void reiserfs_write_unlock(struct super_block *s) | ||
35 | { | ||
36 | struct reiserfs_sb_info *sb_i = REISERFS_SB(s); | ||
37 | |||
38 | /* | ||
39 | * Are we unlocking without even holding the lock? | ||
40 | * Such a situation must raise a BUG() if we don't want | ||
41 | * to corrupt the data. | ||
42 | */ | ||
43 | BUG_ON(sb_i->lock_owner != current); | ||
44 | |||
45 | if (--sb_i->lock_depth == -1) { | ||
46 | sb_i->lock_owner = NULL; | ||
47 | mutex_unlock(&sb_i->lock); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * If we already own the lock, just exit and don't increase the depth. | ||
53 | * Useful when we don't want to lock more than once. | ||
54 | * | ||
55 | * We always return the lock_depth we had before calling | ||
56 | * this function. | ||
57 | */ | ||
58 | int reiserfs_write_lock_once(struct super_block *s) | ||
59 | { | ||
60 | struct reiserfs_sb_info *sb_i = REISERFS_SB(s); | ||
61 | |||
62 | if (sb_i->lock_owner != current) { | ||
63 | mutex_lock(&sb_i->lock); | ||
64 | sb_i->lock_owner = current; | ||
65 | return sb_i->lock_depth++; | ||
66 | } | ||
67 | |||
68 | return sb_i->lock_depth; | ||
69 | } | ||
70 | |||
71 | void reiserfs_write_unlock_once(struct super_block *s, int lock_depth) | ||
72 | { | ||
73 | if (lock_depth == -1) | ||
74 | reiserfs_write_unlock(s); | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Utility function to force a BUG if it is called without the superblock | ||
79 | * write lock held. caller is the string printed just before calling BUG() | ||
80 | */ | ||
81 | void reiserfs_check_lock_depth(struct super_block *sb, char *caller) | ||
82 | { | ||
83 | struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); | ||
84 | |||
85 | if (sb_i->lock_depth < 0) | ||
86 | reiserfs_panic(sb, "%s called without kernel lock held %d", | ||
87 | caller); | ||
88 | } | ||
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 271579128634..e296ff72a6cc 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
324 | struct nameidata *nd) | 324 | struct nameidata *nd) |
325 | { | 325 | { |
326 | int retval; | 326 | int retval; |
327 | int lock_depth; | ||
327 | struct inode *inode = NULL; | 328 | struct inode *inode = NULL; |
328 | struct reiserfs_dir_entry de; | 329 | struct reiserfs_dir_entry de; |
329 | INITIALIZE_PATH(path_to_entry); | 330 | INITIALIZE_PATH(path_to_entry); |
@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
331 | if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) | 332 | if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) |
332 | return ERR_PTR(-ENAMETOOLONG); | 333 | return ERR_PTR(-ENAMETOOLONG); |
333 | 334 | ||
334 | reiserfs_write_lock(dir->i_sb); | 335 | /* |
336 | * Might be called with or without the write lock, must be careful | ||
337 | * to not recursively hold it in case we want to release the lock | ||
338 | * before rescheduling. | ||
339 | */ | ||
340 | lock_depth = reiserfs_write_lock_once(dir->i_sb); | ||
341 | |||
335 | de.de_gen_number_bit_string = NULL; | 342 | de.de_gen_number_bit_string = NULL; |
336 | retval = | 343 | retval = |
337 | reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, | 344 | reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, |
@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
341 | inode = reiserfs_iget(dir->i_sb, | 348 | inode = reiserfs_iget(dir->i_sb, |
342 | (struct cpu_key *)&(de.de_dir_id)); | 349 | (struct cpu_key *)&(de.de_dir_id)); |
343 | if (!inode || IS_ERR(inode)) { | 350 | if (!inode || IS_ERR(inode)) { |
344 | reiserfs_write_unlock(dir->i_sb); | 351 | reiserfs_write_unlock_once(dir->i_sb, lock_depth); |
345 | return ERR_PTR(-EACCES); | 352 | return ERR_PTR(-EACCES); |
346 | } | 353 | } |
347 | 354 | ||
@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
350 | if (IS_PRIVATE(dir)) | 357 | if (IS_PRIVATE(dir)) |
351 | inode->i_flags |= S_PRIVATE; | 358 | inode->i_flags |= S_PRIVATE; |
352 | } | 359 | } |
353 | reiserfs_write_unlock(dir->i_sb); | 360 | reiserfs_write_unlock_once(dir->i_sb, lock_depth); |
354 | if (retval == IO_ERROR) { | 361 | if (retval == IO_ERROR) { |
355 | return ERR_PTR(-EIO); | 362 | return ERR_PTR(-EIO); |
356 | } | 363 | } |
@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
725 | struct inode *inode; | 732 | struct inode *inode; |
726 | struct reiserfs_transaction_handle th; | 733 | struct reiserfs_transaction_handle th; |
727 | struct reiserfs_security_handle security; | 734 | struct reiserfs_security_handle security; |
735 | int lock_depth; | ||
728 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 736 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ |
729 | int jbegin_count = | 737 | int jbegin_count = |
730 | JOURNAL_PER_BALANCE_CNT * 3 + | 738 | JOURNAL_PER_BALANCE_CNT * 3 + |
@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
748 | return retval; | 756 | return retval; |
749 | } | 757 | } |
750 | jbegin_count += retval; | 758 | jbegin_count += retval; |
751 | reiserfs_write_lock(dir->i_sb); | 759 | lock_depth = reiserfs_write_lock_once(dir->i_sb); |
752 | 760 | ||
753 | retval = journal_begin(&th, dir->i_sb, jbegin_count); | 761 | retval = journal_begin(&th, dir->i_sb, jbegin_count); |
754 | if (retval) { | 762 | if (retval) { |
@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
798 | d_instantiate(dentry, inode); | 806 | d_instantiate(dentry, inode); |
799 | unlock_new_inode(inode); | 807 | unlock_new_inode(inode); |
800 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 808 | retval = journal_end(&th, dir->i_sb, jbegin_count); |
801 | out_failed: | 809 | out_failed: |
802 | reiserfs_write_unlock(dir->i_sb); | 810 | reiserfs_write_unlock_once(dir->i_sb, lock_depth); |
803 | return retval; | 811 | return retval; |
804 | } | 812 | } |
805 | 813 | ||
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 536eacaeb710..adbc6f538515 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c | |||
@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) | |||
349 | 349 | ||
350 | . */ | 350 | . */ |
351 | 351 | ||
352 | #ifdef CONFIG_REISERFS_CHECK | ||
353 | extern struct tree_balance *cur_tb; | ||
354 | #endif | ||
355 | |||
356 | void __reiserfs_panic(struct super_block *sb, const char *id, | 352 | void __reiserfs_panic(struct super_block *sb, const char *id, |
357 | const char *function, const char *fmt, ...) | 353 | const char *function, const char *fmt, ...) |
358 | { | 354 | { |
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 18b315d3d104..b3a94d20f0fc 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c | |||
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
141 | 141 | ||
142 | set_buffer_uptodate(bh); | 142 | set_buffer_uptodate(bh); |
143 | mark_buffer_dirty(bh); | 143 | mark_buffer_dirty(bh); |
144 | reiserfs_write_unlock(s); | ||
144 | sync_dirty_buffer(bh); | 145 | sync_dirty_buffer(bh); |
146 | reiserfs_write_lock(s); | ||
145 | // update bitmap_info stuff | 147 | // update bitmap_info stuff |
146 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; | 148 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; |
147 | brelse(bh); | 149 | brelse(bh); |
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index d036ee5b1c81..5fa7118f04e1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c | |||
@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */ | |||
222 | return ITEM_NOT_FOUND; | 222 | return ITEM_NOT_FOUND; |
223 | } | 223 | } |
224 | 224 | ||
225 | #ifdef CONFIG_REISERFS_CHECK | ||
226 | extern struct tree_balance *cur_tb; | ||
227 | #endif | ||
228 | 225 | ||
229 | /* Minimal possible key. It is never in the tree. */ | 226 | /* Minimal possible key. It is never in the tree. */ |
230 | const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; | 227 | const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; |
@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level) | |||
519 | 516 | ||
520 | #define SEARCH_BY_KEY_READA 16 | 517 | #define SEARCH_BY_KEY_READA 16 |
521 | 518 | ||
522 | /* The function is NOT SCHEDULE-SAFE! */ | 519 | /* |
523 | static void search_by_key_reada(struct super_block *s, | 520 | * The function is NOT SCHEDULE-SAFE! |
521 | * It might unlock the write lock if we needed to wait for a block | ||
522 | * to be read. Note that in this case it won't recover the lock to avoid | ||
523 | * high contention resulting from too much lock requests, especially | ||
524 | * the caller (search_by_key) will perform other schedule-unsafe | ||
525 | * operations just after calling this function. | ||
526 | * | ||
527 | * @return true if we have unlocked | ||
528 | */ | ||
529 | static bool search_by_key_reada(struct super_block *s, | ||
524 | struct buffer_head **bh, | 530 | struct buffer_head **bh, |
525 | b_blocknr_t *b, int num) | 531 | b_blocknr_t *b, int num) |
526 | { | 532 | { |
527 | int i, j; | 533 | int i, j; |
534 | bool unlocked = false; | ||
528 | 535 | ||
529 | for (i = 0; i < num; i++) { | 536 | for (i = 0; i < num; i++) { |
530 | bh[i] = sb_getblk(s, b[i]); | 537 | bh[i] = sb_getblk(s, b[i]); |
531 | } | 538 | } |
539 | /* | ||
540 | * We are going to read some blocks on which we | ||
541 | * have a reference. It's safe, though we might be | ||
542 | * reading blocks concurrently changed if we release | ||
543 | * the lock. But it's still fine because we check later | ||
544 | * if the tree changed | ||
545 | */ | ||
532 | for (j = 0; j < i; j++) { | 546 | for (j = 0; j < i; j++) { |
533 | /* | 547 | /* |
534 | * note, this needs attention if we are getting rid of the BKL | 548 | * note, this needs attention if we are getting rid of the BKL |
535 | * you have to make sure the prepared bit isn't set on this buffer | 549 | * you have to make sure the prepared bit isn't set on this buffer |
536 | */ | 550 | */ |
537 | if (!buffer_uptodate(bh[j])) | 551 | if (!buffer_uptodate(bh[j])) { |
552 | if (!unlocked) { | ||
553 | reiserfs_write_unlock(s); | ||
554 | unlocked = true; | ||
555 | } | ||
538 | ll_rw_block(READA, 1, bh + j); | 556 | ll_rw_block(READA, 1, bh + j); |
557 | } | ||
539 | brelse(bh[j]); | 558 | brelse(bh[j]); |
540 | } | 559 | } |
560 | return unlocked; | ||
541 | } | 561 | } |
542 | 562 | ||
543 | /************************************************************************** | 563 | /************************************************************************** |
@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
625 | have a pointer to it. */ | 645 | have a pointer to it. */ |
626 | if ((bh = last_element->pe_buffer = | 646 | if ((bh = last_element->pe_buffer = |
627 | sb_getblk(sb, block_number))) { | 647 | sb_getblk(sb, block_number))) { |
648 | bool unlocked = false; | ||
649 | |||
628 | if (!buffer_uptodate(bh) && reada_count > 1) | 650 | if (!buffer_uptodate(bh) && reada_count > 1) |
629 | search_by_key_reada(sb, reada_bh, | 651 | /* may unlock the write lock */ |
652 | unlocked = search_by_key_reada(sb, reada_bh, | ||
630 | reada_blocks, reada_count); | 653 | reada_blocks, reada_count); |
654 | /* | ||
655 | * If we haven't already unlocked the write lock, | ||
656 | * then we need to do that here before reading | ||
657 | * the current block | ||
658 | */ | ||
659 | if (!buffer_uptodate(bh) && !unlocked) { | ||
660 | reiserfs_write_unlock(sb); | ||
661 | unlocked = true; | ||
662 | } | ||
631 | ll_rw_block(READ, 1, &bh); | 663 | ll_rw_block(READ, 1, &bh); |
632 | wait_on_buffer(bh); | 664 | wait_on_buffer(bh); |
665 | |||
666 | if (unlocked) | ||
667 | reiserfs_write_lock(sb); | ||
633 | if (!buffer_uptodate(bh)) | 668 | if (!buffer_uptodate(bh)) |
634 | goto io_error; | 669 | goto io_error; |
635 | } else { | 670 | } else { |
@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
673 | !key_in_buffer(search_path, key, sb), | 708 | !key_in_buffer(search_path, key, sb), |
674 | "PAP-5130: key is not in the buffer"); | 709 | "PAP-5130: key is not in the buffer"); |
675 | #ifdef CONFIG_REISERFS_CHECK | 710 | #ifdef CONFIG_REISERFS_CHECK |
676 | if (cur_tb) { | 711 | if (REISERFS_SB(sb)->cur_tb) { |
677 | print_cur_tb("5140"); | 712 | print_cur_tb("5140"); |
678 | reiserfs_panic(sb, "PAP-5140", | 713 | reiserfs_panic(sb, "PAP-5140", |
679 | "schedule occurred in do_balance!"); | 714 | "schedule occurred in do_balance!"); |
@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1024 | reiserfs_free_block(th, inode, block, 1); | 1059 | reiserfs_free_block(th, inode, block, 1); |
1025 | } | 1060 | } |
1026 | 1061 | ||
1062 | reiserfs_write_unlock(sb); | ||
1027 | cond_resched(); | 1063 | cond_resched(); |
1064 | reiserfs_write_lock(sb); | ||
1028 | 1065 | ||
1029 | if (item_moved (&s_ih, path)) { | 1066 | if (item_moved (&s_ih, path)) { |
1030 | need_re_search = 1; | 1067 | need_re_search = 1; |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f0ad05f38022..339b0baf2af6 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s) | |||
465 | struct reiserfs_transaction_handle th; | 465 | struct reiserfs_transaction_handle th; |
466 | th.t_trans_id = 0; | 466 | th.t_trans_id = 0; |
467 | 467 | ||
468 | lock_kernel(); | 468 | reiserfs_write_lock(s); |
469 | 469 | ||
470 | if (s->s_dirt) | 470 | if (s->s_dirt) |
471 | reiserfs_write_super(s); | 471 | reiserfs_write_super(s); |
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s) | |||
499 | 499 | ||
500 | reiserfs_proc_info_done(s); | 500 | reiserfs_proc_info_done(s); |
501 | 501 | ||
502 | reiserfs_write_unlock(s); | ||
503 | mutex_destroy(&REISERFS_SB(s)->lock); | ||
502 | kfree(s->s_fs_info); | 504 | kfree(s->s_fs_info); |
503 | s->s_fs_info = NULL; | 505 | s->s_fs_info = NULL; |
504 | |||
505 | unlock_kernel(); | ||
506 | } | 506 | } |
507 | 507 | ||
508 | static struct kmem_cache *reiserfs_inode_cachep; | 508 | static struct kmem_cache *reiserfs_inode_cachep; |
@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode) | |||
554 | struct reiserfs_transaction_handle th; | 554 | struct reiserfs_transaction_handle th; |
555 | 555 | ||
556 | int err = 0; | 556 | int err = 0; |
557 | int lock_depth; | ||
558 | |||
557 | if (inode->i_sb->s_flags & MS_RDONLY) { | 559 | if (inode->i_sb->s_flags & MS_RDONLY) { |
558 | reiserfs_warning(inode->i_sb, "clm-6006", | 560 | reiserfs_warning(inode->i_sb, "clm-6006", |
559 | "writing inode %lu on readonly FS", | 561 | "writing inode %lu on readonly FS", |
560 | inode->i_ino); | 562 | inode->i_ino); |
561 | return; | 563 | return; |
562 | } | 564 | } |
563 | reiserfs_write_lock(inode->i_sb); | 565 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
564 | 566 | ||
565 | /* this is really only used for atime updates, so they don't have | 567 | /* this is really only used for atime updates, so they don't have |
566 | ** to be included in O_SYNC or fsync | 568 | ** to be included in O_SYNC or fsync |
567 | */ | 569 | */ |
568 | err = journal_begin(&th, inode->i_sb, 1); | 570 | err = journal_begin(&th, inode->i_sb, 1); |
569 | if (err) { | 571 | if (err) |
570 | reiserfs_write_unlock(inode->i_sb); | 572 | goto out; |
571 | return; | 573 | |
572 | } | ||
573 | reiserfs_update_sd(&th, inode); | 574 | reiserfs_update_sd(&th, inode); |
574 | journal_end(&th, inode->i_sb, 1); | 575 | journal_end(&th, inode->i_sb, 1); |
575 | reiserfs_write_unlock(inode->i_sb); | 576 | |
577 | out: | ||
578 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | ||
576 | } | 579 | } |
577 | 580 | ||
578 | #ifdef CONFIG_QUOTA | 581 | #ifdef CONFIG_QUOTA |
@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1168 | unsigned int qfmt = 0; | 1171 | unsigned int qfmt = 0; |
1169 | #ifdef CONFIG_QUOTA | 1172 | #ifdef CONFIG_QUOTA |
1170 | int i; | 1173 | int i; |
1174 | #endif | ||
1175 | |||
1176 | reiserfs_write_lock(s); | ||
1171 | 1177 | ||
1178 | #ifdef CONFIG_QUOTA | ||
1172 | memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); | 1179 | memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); |
1173 | #endif | 1180 | #endif |
1174 | 1181 | ||
1175 | lock_kernel(); | ||
1176 | rs = SB_DISK_SUPER_BLOCK(s); | 1182 | rs = SB_DISK_SUPER_BLOCK(s); |
1177 | 1183 | ||
1178 | if (!reiserfs_parse_options | 1184 | if (!reiserfs_parse_options |
@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1295 | 1301 | ||
1296 | out_ok: | 1302 | out_ok: |
1297 | replace_mount_options(s, new_opts); | 1303 | replace_mount_options(s, new_opts); |
1298 | unlock_kernel(); | 1304 | reiserfs_write_unlock(s); |
1299 | return 0; | 1305 | return 0; |
1300 | 1306 | ||
1301 | out_err: | 1307 | out_err: |
1302 | kfree(new_opts); | 1308 | kfree(new_opts); |
1303 | unlock_kernel(); | 1309 | reiserfs_write_unlock(s); |
1304 | return err; | 1310 | return err; |
1305 | } | 1311 | } |
1306 | 1312 | ||
@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset) | |||
1404 | static int reread_meta_blocks(struct super_block *s) | 1410 | static int reread_meta_blocks(struct super_block *s) |
1405 | { | 1411 | { |
1406 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); | 1412 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); |
1413 | reiserfs_write_unlock(s); | ||
1407 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); | 1414 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); |
1415 | reiserfs_write_lock(s); | ||
1408 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { | 1416 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { |
1409 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); | 1417 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); |
1410 | return 1; | 1418 | return 1; |
@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1613 | sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); | 1621 | sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); |
1614 | if (!sbi) { | 1622 | if (!sbi) { |
1615 | errval = -ENOMEM; | 1623 | errval = -ENOMEM; |
1616 | goto error; | 1624 | goto error_alloc; |
1617 | } | 1625 | } |
1618 | s->s_fs_info = sbi; | 1626 | s->s_fs_info = sbi; |
1619 | /* Set default values for options: non-aggressive tails, RO on errors */ | 1627 | /* Set default values for options: non-aggressive tails, RO on errors */ |
@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1627 | /* setup default block allocator options */ | 1635 | /* setup default block allocator options */ |
1628 | reiserfs_init_alloc_options(s); | 1636 | reiserfs_init_alloc_options(s); |
1629 | 1637 | ||
1638 | mutex_init(&REISERFS_SB(s)->lock); | ||
1639 | REISERFS_SB(s)->lock_depth = -1; | ||
1640 | |||
1641 | /* | ||
1642 | * This function is called with the bkl, which also was the old | ||
1643 | * locking used here. | ||
1644 | * do_journal_begin() will soon check if we hold the lock (ie: was the | ||
1645 | * bkl). This is likely because do_journal_begin() has several another | ||
1646 | * callers because at this time, it doesn't seem to be necessary to | ||
1647 | * protect against anything. | ||
1648 | * Anyway, let's be conservative and lock for now. | ||
1649 | */ | ||
1650 | reiserfs_write_lock(s); | ||
1651 | |||
1630 | jdev_name = NULL; | 1652 | jdev_name = NULL; |
1631 | if (reiserfs_parse_options | 1653 | if (reiserfs_parse_options |
1632 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, | 1654 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, |
@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1852 | init_waitqueue_head(&(sbi->s_wait)); | 1874 | init_waitqueue_head(&(sbi->s_wait)); |
1853 | spin_lock_init(&sbi->bitmap_lock); | 1875 | spin_lock_init(&sbi->bitmap_lock); |
1854 | 1876 | ||
1877 | reiserfs_write_unlock(s); | ||
1878 | |||
1855 | return (0); | 1879 | return (0); |
1856 | 1880 | ||
1857 | error: | 1881 | error: |
1882 | reiserfs_write_unlock(s); | ||
1883 | error_alloc: | ||
1858 | if (jinit_done) { /* kill the commit thread, free journal ram */ | 1884 | if (jinit_done) { /* kill the commit thread, free journal ram */ |
1859 | journal_release_error(NULL, s); | 1885 | journal_release_error(NULL, s); |
1860 | } | 1886 | } |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6925b835a43b..58aa8e75f7f5 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
975 | int err = 0; | 975 | int err = 0; |
976 | 976 | ||
977 | /* If we don't have the privroot located yet - go find it */ | 977 | /* If we don't have the privroot located yet - go find it */ |
978 | mutex_lock(&s->s_root->d_inode->i_mutex); | 978 | reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); |
979 | dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, | 979 | dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, |
980 | strlen(PRIVROOT_NAME)); | 980 | strlen(PRIVROOT_NAME)); |
981 | if (!IS_ERR(dentry)) { | 981 | if (!IS_ERR(dentry)) { |
@@ -1004,14 +1004,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) | |||
1004 | goto error; | 1004 | goto error; |
1005 | 1005 | ||
1006 | if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { | 1006 | if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { |
1007 | mutex_lock(&s->s_root->d_inode->i_mutex); | 1007 | reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); |
1008 | err = create_privroot(REISERFS_SB(s)->priv_root); | 1008 | err = create_privroot(REISERFS_SB(s)->priv_root); |
1009 | mutex_unlock(&s->s_root->d_inode->i_mutex); | 1009 | mutex_unlock(&s->s_root->d_inode->i_mutex); |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | if (privroot->d_inode) { | 1012 | if (privroot->d_inode) { |
1013 | s->s_xattr = reiserfs_xattr_handlers; | 1013 | s->s_xattr = reiserfs_xattr_handlers; |
1014 | mutex_lock(&privroot->d_inode->i_mutex); | 1014 | reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s); |
1015 | if (!REISERFS_SB(s)->xattr_root) { | 1015 | if (!REISERFS_SB(s)->xattr_root) { |
1016 | struct dentry *dentry; | 1016 | struct dentry *dentry; |
1017 | dentry = lookup_one_len(XAROOT_NAME, privroot, | 1017 | dentry = lookup_one_len(XAROOT_NAME, privroot, |
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index dd31e7bae35c..a05b4a20768d 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h | |||
@@ -52,11 +52,63 @@ | |||
52 | #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION | 52 | #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION |
53 | #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION | 53 | #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION |
54 | 54 | ||
55 | /* Locking primitives */ | 55 | /* |
56 | /* Right now we are still falling back to (un)lock_kernel, but eventually that | 56 | * Locking primitives. The write lock is a per superblock |
57 | would evolve into real per-fs locks */ | 57 | * special mutex that has properties close to the Big Kernel Lock |
58 | #define reiserfs_write_lock( sb ) lock_kernel() | 58 | * which was used in the previous locking scheme. |
59 | #define reiserfs_write_unlock( sb ) unlock_kernel() | 59 | */ |
60 | void reiserfs_write_lock(struct super_block *s); | ||
61 | void reiserfs_write_unlock(struct super_block *s); | ||
62 | int reiserfs_write_lock_once(struct super_block *s); | ||
63 | void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); | ||
64 | |||
65 | /* | ||
66 | * Several mutexes depend on the write lock. | ||
67 | * However sometimes we want to relax the write lock while we hold | ||
68 | * these mutexes, according to the release/reacquire on schedule() | ||
69 | * properties of the Bkl that were used. | ||
70 | * Reiserfs performances and locking were based on this scheme. | ||
71 | * Now that the write lock is a mutex and not the bkl anymore, doing so | ||
72 | * may result in a deadlock: | ||
73 | * | ||
74 | * A acquire write_lock | ||
75 | * A acquire j_commit_mutex | ||
76 | * A release write_lock and wait for something | ||
77 | * B acquire write_lock | ||
78 | * B can't acquire j_commit_mutex and sleep | ||
79 | * A can't acquire write lock anymore | ||
80 | * deadlock | ||
81 | * | ||
82 | * What we do here is avoiding such deadlock by playing the same game | ||
83 | * than the Bkl: if we can't acquire a mutex that depends on the write lock, | ||
84 | * we release the write lock, wait a bit and then retry. | ||
85 | * | ||
86 | * The mutexes concerned by this hack are: | ||
87 | * - The commit mutex of a journal list | ||
88 | * - The flush mutex | ||
89 | * - The journal lock | ||
90 | * - The inode mutex | ||
91 | */ | ||
92 | static inline void reiserfs_mutex_lock_safe(struct mutex *m, | ||
93 | struct super_block *s) | ||
94 | { | ||
95 | reiserfs_write_unlock(s); | ||
96 | mutex_lock(m); | ||
97 | reiserfs_write_lock(s); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * When we schedule, we usually want to also release the write lock, | ||
102 | * according to the previous bkl based locking scheme of reiserfs. | ||
103 | */ | ||
104 | static inline void reiserfs_cond_resched(struct super_block *s) | ||
105 | { | ||
106 | if (need_resched()) { | ||
107 | reiserfs_write_unlock(s); | ||
108 | schedule(); | ||
109 | reiserfs_write_lock(s); | ||
110 | } | ||
111 | } | ||
60 | 112 | ||
61 | struct fid; | 113 | struct fid; |
62 | 114 | ||
@@ -1329,7 +1381,11 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
1329 | #define get_generation(s) atomic_read (&fs_generation(s)) | 1381 | #define get_generation(s) atomic_read (&fs_generation(s)) |
1330 | #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) | 1382 | #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) |
1331 | #define __fs_changed(gen,s) (gen != get_generation (s)) | 1383 | #define __fs_changed(gen,s) (gen != get_generation (s)) |
1332 | #define fs_changed(gen,s) ({cond_resched(); __fs_changed(gen, s);}) | 1384 | #define fs_changed(gen,s) \ |
1385 | ({ \ | ||
1386 | reiserfs_cond_resched(s); \ | ||
1387 | __fs_changed(gen, s); \ | ||
1388 | }) | ||
1333 | 1389 | ||
1334 | /***************************************************************************/ | 1390 | /***************************************************************************/ |
1335 | /* FIXATE NODES */ | 1391 | /* FIXATE NODES */ |
@@ -2258,8 +2314,7 @@ __u32 r5_hash(const signed char *msg, int len); | |||
2258 | #define SPARE_SPACE 500 | 2314 | #define SPARE_SPACE 500 |
2259 | 2315 | ||
2260 | /* prototypes from ioctl.c */ | 2316 | /* prototypes from ioctl.c */ |
2261 | int reiserfs_ioctl(struct inode *inode, struct file *filp, | 2317 | long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
2262 | unsigned int cmd, unsigned long arg); | ||
2263 | long reiserfs_compat_ioctl(struct file *filp, | 2318 | long reiserfs_compat_ioctl(struct file *filp, |
2264 | unsigned int cmd, unsigned long arg); | 2319 | unsigned int cmd, unsigned long arg); |
2265 | int reiserfs_unpack(struct inode *inode, struct file *filp); | 2320 | int reiserfs_unpack(struct inode *inode, struct file *filp); |
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index dab68bbed675..52c83b6a758a 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h | |||
@@ -7,6 +7,8 @@ | |||
7 | #ifdef __KERNEL__ | 7 | #ifdef __KERNEL__ |
8 | #include <linux/workqueue.h> | 8 | #include <linux/workqueue.h> |
9 | #include <linux/rwsem.h> | 9 | #include <linux/rwsem.h> |
10 | #include <linux/mutex.h> | ||
11 | #include <linux/sched.h> | ||
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | typedef enum { | 14 | typedef enum { |
@@ -355,6 +357,13 @@ struct reiserfs_sb_info { | |||
355 | struct reiserfs_journal *s_journal; /* pointer to journal information */ | 357 | struct reiserfs_journal *s_journal; /* pointer to journal information */ |
356 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ | 358 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ |
357 | 359 | ||
360 | /* Serialize writers access, replace the old bkl */ | ||
361 | struct mutex lock; | ||
362 | /* Owner of the lock (can be recursive) */ | ||
363 | struct task_struct *lock_owner; | ||
364 | /* Depth of the lock, start from -1 like the bkl */ | ||
365 | int lock_depth; | ||
366 | |||
358 | /* Comment? -Hans */ | 367 | /* Comment? -Hans */ |
359 | void (*end_io_handler) (struct buffer_head *, int); | 368 | void (*end_io_handler) (struct buffer_head *, int); |
360 | hashf_t s_hash_function; /* pointer to function which is used | 369 | hashf_t s_hash_function; /* pointer to function which is used |
@@ -408,6 +417,17 @@ struct reiserfs_sb_info { | |||
408 | char *s_qf_names[MAXQUOTAS]; | 417 | char *s_qf_names[MAXQUOTAS]; |
409 | int s_jquota_fmt; | 418 | int s_jquota_fmt; |
410 | #endif | 419 | #endif |
420 | #ifdef CONFIG_REISERFS_CHECK | ||
421 | |||
422 | struct tree_balance *cur_tb; /* | ||
423 | * Detects whether more than one | ||
424 | * copy of tb exists per superblock | ||
425 | * as a means of checking whether | ||
426 | * do_balance is executing concurrently | ||
427 | * against another tree reader/writer | ||
428 | * on a same mount point. | ||
429 | */ | ||
430 | #endif | ||
411 | }; | 431 | }; |
412 | 432 | ||
413 | /* Definitions of reiserfs on-disk properties: */ | 433 | /* Definitions of reiserfs on-disk properties: */ |