aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-05-16 12:12:08 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-09-14 01:18:24 -0400
commitc72e05756b900b3be24cd73a16de52bab80984c0 (patch)
tree4fc35ad9efc1a6a9ca14baa3612e551fb4da793e
parent2ac626955ed62ee8596f00581f959cc86e6198d1 (diff)
kill-the-bkl/reiserfs: acquire the inode mutex safely
While searching a pathname, an inode mutex can be acquired in do_lookup() which calls reiserfs_lookup() which in turn acquires the write lock. On the other side reiserfs_fill_super() can acquire the write_lock and then call reiserfs_lookup_privroot() which can acquire an inode mutex (the root of the mount point). So we theoretically risk an AB - BA lock inversion that could lead to a deadlock. As for other lock dependencies found since the bkl to mutex conversion, the fix is to use reiserfs_mutex_lock_safe() which drops the lock dependency to the write lock. [ Impact: fix a possible deadlock with reiserfs ] Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r--fs/reiserfs/journal.c34
-rw-r--r--fs/reiserfs/xattr.c4
-rw-r--r--include/linux/reiserfs_fs.h35
3 files changed, 37 insertions, 36 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e9a972bd0323..d23d6d7a45a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -537,40 +537,6 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
537 journal_hash(table, cn->sb, cn->blocknr) = cn; 537 journal_hash(table, cn->sb, cn->blocknr) = cn;
538} 538}
539 539
540/*
541 * Several mutexes depend on the write lock.
542 * However sometimes we want to relax the write lock while we hold
543 * these mutexes, according to the release/reacquire on schedule()
544 * properties of the Bkl that were used.
545 * Reiserfs performances and locking were based on this scheme.
546 * Now that the write lock is a mutex and not the bkl anymore, doing so
547 * may result in a deadlock:
548 *
549 * A acquire write_lock
550 * A acquire j_commit_mutex
551 * A release write_lock and wait for something
552 * B acquire write_lock
553 * B can't acquire j_commit_mutex and sleep
554 * A can't acquire write lock anymore
555 * deadlock
556 *
557 * What we do here is avoiding such deadlock by playing the same game
558 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
559 * we release the write lock, wait a bit and then retry.
560 *
561 * The mutexes concerned by this hack are:
562 * - The commit mutex of a journal list
563 * - The flush mutex
564 * - The journal lock
565 */
566static inline void reiserfs_mutex_lock_safe(struct mutex *m,
567 struct super_block *s)
568{
569 reiserfs_write_unlock(s);
570 mutex_lock(m);
571 reiserfs_write_lock(s);
572}
573
574/* lock the current transaction */ 540/* lock the current transaction */
575static inline void lock_journal(struct super_block *sb) 541static inline void lock_journal(struct super_block *sb)
576{ 542{
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6925b835a43b..59870a4751cc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
975 int err = 0; 975 int err = 0;
976 976
977 /* If we don't have the privroot located yet - go find it */ 977 /* If we don't have the privroot located yet - go find it */
978 mutex_lock(&s->s_root->d_inode->i_mutex); 978 reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
979 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, 979 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
980 strlen(PRIVROOT_NAME)); 980 strlen(PRIVROOT_NAME));
981 if (!IS_ERR(dentry)) { 981 if (!IS_ERR(dentry)) {
@@ -1011,7 +1011,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1011 1011
1012 if (privroot->d_inode) { 1012 if (privroot->d_inode) {
1013 s->s_xattr = reiserfs_xattr_handlers; 1013 s->s_xattr = reiserfs_xattr_handlers;
1014 mutex_lock(&privroot->d_inode->i_mutex); 1014 reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
1015 if (!REISERFS_SB(s)->xattr_root) { 1015 if (!REISERFS_SB(s)->xattr_root) {
1016 struct dentry *dentry; 1016 struct dentry *dentry;
1017 dentry = lookup_one_len(XAROOT_NAME, privroot, 1017 dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 508fb523863e..a498d9266d8c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -63,6 +63,41 @@ int reiserfs_write_lock_once(struct super_block *s);
63void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); 63void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
64 64
65/* 65/*
66 * Several mutexes depend on the write lock.
67 * However sometimes we want to relax the write lock while we hold
68 * these mutexes, according to the release/reacquire on schedule()
69 * properties of the Bkl that were used.
70 * Reiserfs performances and locking were based on this scheme.
71 * Now that the write lock is a mutex and not the bkl anymore, doing so
72 * may result in a deadlock:
73 *
74 * A acquire write_lock
75 * A acquire j_commit_mutex
76 * A release write_lock and wait for something
77 * B acquire write_lock
78 * B can't acquire j_commit_mutex and sleep
79 * A can't acquire write lock anymore
80 * deadlock
81 *
82 * What we do here is avoiding such deadlock by playing the same game
83 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
84 * we release the write lock, wait a bit and then retry.
85 *
86 * The mutexes concerned by this hack are:
87 * - The commit mutex of a journal list
88 * - The flush mutex
89 * - The journal lock
90 * - The inode mutex
91 */
92static inline void reiserfs_mutex_lock_safe(struct mutex *m,
93 struct super_block *s)
94{
95 reiserfs_write_unlock(s);
96 mutex_lock(m);
97 reiserfs_write_lock(s);
98}
99
100/*
66 * When we schedule, we usually want to also release the write lock, 101 * When we schedule, we usually want to also release the write lock,
67 * according to the previous bkl based locking scheme of reiserfs. 102 * according to the previous bkl based locking scheme of reiserfs.
68 */ 103 */