aboutsummaryrefslogtreecommitdiffstats
path: root/fs/reiserfs/super.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-04-06 22:19:49 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-09-14 01:17:59 -0400
commit8ebc423238341b52912c7295b045a32477b33f09 (patch)
tree39677401de0df98c09ca888c87d85033b6fe93c9 /fs/reiserfs/super.c
parent74fca6a42863ffacaf7ba6f1936a9f228950f657 (diff)
reiserfs: kill-the-BKL
This patch is an attempt to remove the Bkl based locking scheme from reiserfs and is intended. It is a bit inspired from an old attempt by Peter Zijlstra: http://lkml.indiana.edu/hypermail/linux/kernel/0704.2/2174.html The bkl is heavily used in this filesystem to prevent from concurrent write accesses on the filesystem. Reiserfs makes a deep use of the specific properties of the Bkl: - It can be acqquired recursively by a same task - It is released on the schedule() calls and reacquired when schedule() returns The two properties above are a roadmap for the reiserfs write locking so it's very hard to simply replace it with a common mutex. - We need a recursive-able locking unless we want to restructure several blocks of the code. - We need to identify the sites where the bkl was implictly relaxed (schedule, wait, sync, etc...) so that we can in turn release and reacquire our new lock explicitly. Such implicit releases of the lock are often required to let other resources producer/consumer do their job or we can suffer unexpected starvations or deadlocks. So the new lock that replaces the bkl here is a per superblock mutex with a specific property: it can be acquired recursively by a same task, like the bkl. For such purpose, we integrate a lock owner and a lock depth field on the superblock information structure. The first axis on this patch is to turn reiserfs_write_(un)lock() function into a wrapper to manage this mutex. Also some explicit calls to lock_kernel() have been converted to reiserfs_write_lock() helpers. The second axis is to find the important blocking sites (schedule...(), wait_on_buffer(), sync_dirty_buffer(), etc...) and then apply an explicit release of the write lock on these locations before blocking. Then we can safely wait for those who can give us resources or those who need some. Typically this is a fight between the current writer, the reiserfs workqueue (aka the async commiter) and the pdflush threads. The third axis is a consequence of the second. The write lock is usually on top of a lock dependency chain which can include the journal lock, the flush lock or the commit lock. So it's dangerous to release and trying to reacquire the write lock while we still hold other locks. This is fine with the bkl: T1 T2 lock_kernel() mutex_lock(A) unlock_kernel() // do something lock_kernel() mutex_lock(A) -> already locked by T1 schedule() (and then unlock_kernel()) lock_kernel() mutex_unlock(A) .... This is not fine with a mutex: T1 T2 mutex_lock(write) mutex_lock(A) mutex_unlock(write) // do something mutex_lock(write) mutex_lock(A) -> already locked by T1 schedule() mutex_lock(write) -> already locked by T2 deadlock The solution in this patch is to provide a helper which releases the write lock and sleep a bit if we can't lock a mutex that depend on it. It's another simulation of the bkl behaviour. The last axis is to locate the fs callbacks that are called with the bkl held, according to Documentation/filesystem/Locking. Those are: - reiserfs_remount - reiserfs_fill_super - reiserfs_put_super Reiserfs didn't need to explicitly lock because of the context of these callbacks. But now we must take care of that with the new locking. After this patch, reiserfs suffers from a slight performance regression (for now). On UP, a high volume write with dd reports an average of 27 MB/s instead of 30 MB/s without the patch applied. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Reviewed-by: Ingo Molnar <mingo@elte.hu> Cc: Jeff Mahoney <jeffm@suse.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Bron Gondwana <brong@fastmail.fm> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> LKML-Reference: <1239070789-13354-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/reiserfs/super.c')
-rw-r--r--fs/reiserfs/super.c37
1 files changed, 30 insertions, 7 deletions
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..e1cfb80d0bf3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
465 struct reiserfs_transaction_handle th; 465 struct reiserfs_transaction_handle th;
466 th.t_trans_id = 0; 466 th.t_trans_id = 0;
467 467
468 lock_kernel(); 468 reiserfs_write_lock(s);
469 469
470 if (s->s_dirt) 470 if (s->s_dirt)
471 reiserfs_write_super(s); 471 reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
499 499
500 reiserfs_proc_info_done(s); 500 reiserfs_proc_info_done(s);
501 501
502 reiserfs_write_unlock(s);
503 mutex_destroy(&REISERFS_SB(s)->lock);
502 kfree(s->s_fs_info); 504 kfree(s->s_fs_info);
503 s->s_fs_info = NULL; 505 s->s_fs_info = NULL;
504
505 unlock_kernel();
506} 506}
507 507
508static struct kmem_cache *reiserfs_inode_cachep; 508static struct kmem_cache *reiserfs_inode_cachep;
@@ -1168,11 +1168,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1168 unsigned int qfmt = 0; 1168 unsigned int qfmt = 0;
1169#ifdef CONFIG_QUOTA 1169#ifdef CONFIG_QUOTA
1170 int i; 1170 int i;
1171#endif
1172
1173 reiserfs_write_lock(s);
1171 1174
1175#ifdef CONFIG_QUOTA
1172 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); 1176 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1173#endif 1177#endif
1174 1178
1175 lock_kernel();
1176 rs = SB_DISK_SUPER_BLOCK(s); 1179 rs = SB_DISK_SUPER_BLOCK(s);
1177 1180
1178 if (!reiserfs_parse_options 1181 if (!reiserfs_parse_options
@@ -1295,12 +1298,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1295 1298
1296out_ok: 1299out_ok:
1297 replace_mount_options(s, new_opts); 1300 replace_mount_options(s, new_opts);
1298 unlock_kernel(); 1301 reiserfs_write_unlock(s);
1299 return 0; 1302 return 0;
1300 1303
1301out_err: 1304out_err:
1302 kfree(new_opts); 1305 kfree(new_opts);
1303 unlock_kernel(); 1306 reiserfs_write_unlock(s);
1304 return err; 1307 return err;
1305} 1308}
1306 1309
@@ -1404,7 +1407,9 @@ static int read_super_block(struct super_block *s, int offset)
1404static int reread_meta_blocks(struct super_block *s) 1407static int reread_meta_blocks(struct super_block *s)
1405{ 1408{
1406 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1409 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1410 reiserfs_write_unlock(s);
1407 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1411 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1412 reiserfs_write_lock(s);
1408 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1413 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1409 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1414 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1410 return 1; 1415 return 1;
@@ -1613,7 +1618,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1613 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1618 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1614 if (!sbi) { 1619 if (!sbi) {
1615 errval = -ENOMEM; 1620 errval = -ENOMEM;
1616 goto error; 1621 goto error_alloc;
1617 } 1622 }
1618 s->s_fs_info = sbi; 1623 s->s_fs_info = sbi;
1619 /* Set default values for options: non-aggressive tails, RO on errors */ 1624 /* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1632,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1627 /* setup default block allocator options */ 1632 /* setup default block allocator options */
1628 reiserfs_init_alloc_options(s); 1633 reiserfs_init_alloc_options(s);
1629 1634
1635 mutex_init(&REISERFS_SB(s)->lock);
1636 REISERFS_SB(s)->lock_depth = -1;
1637
1638 /*
1639 * This function is called with the bkl, which also was the old
1640 * locking used here.
1641 * do_journal_begin() will soon check if we hold the lock (ie: was the
1642 * bkl). This is likely because do_journal_begin() has several another
1643 * callers because at this time, it doesn't seem to be necessary to
1644 * protect against anything.
1645 * Anyway, let's be conservative and lock for now.
1646 */
1647 reiserfs_write_lock(s);
1648
1630 jdev_name = NULL; 1649 jdev_name = NULL;
1631 if (reiserfs_parse_options 1650 if (reiserfs_parse_options
1632 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1651 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1871,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1852 init_waitqueue_head(&(sbi->s_wait)); 1871 init_waitqueue_head(&(sbi->s_wait));
1853 spin_lock_init(&sbi->bitmap_lock); 1872 spin_lock_init(&sbi->bitmap_lock);
1854 1873
1874 reiserfs_write_unlock(s);
1875
1855 return (0); 1876 return (0);
1856 1877
1857error: 1878error:
1879 reiserfs_write_unlock(s);
1880error_alloc:
1858 if (jinit_done) { /* kill the commit thread, free journal ram */ 1881 if (jinit_done) { /* kill the commit thread, free journal ram */
1859 journal_release_error(NULL, s); 1882 journal_release_error(NULL, s);
1860 } 1883 }