diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-05-16 12:12:08 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-09-14 01:18:24 -0400 |
commit | c72e05756b900b3be24cd73a16de52bab80984c0 (patch) | |
tree | 4fc35ad9efc1a6a9ca14baa3612e551fb4da793e | |
parent | 2ac626955ed62ee8596f00581f959cc86e6198d1 (diff) |
kill-the-bkl/reiserfs: acquire the inode mutex safely
While searching a pathname, an inode mutex can be acquired
in do_lookup() which calls reiserfs_lookup() which in turn
acquires the write lock.
On the other side reiserfs_fill_super() can acquire the write_lock
and then call reiserfs_lookup_privroot() which can acquire an
inode mutex (the root of the mount point).
So we theoretically risk an AB - BA lock inversion that could lead
to a deadlock.
As for other lock dependencies found since the bkl to mutex
conversion, the fix is to use reiserfs_mutex_lock_safe() which
drops the lock dependency to the write lock.
[ Impact: fix a possible deadlock with reiserfs ]
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r-- | fs/reiserfs/journal.c | 34 | ||||
-rw-r--r-- | fs/reiserfs/xattr.c | 4 | ||||
-rw-r--r-- | include/linux/reiserfs_fs.h | 35 |
3 files changed, 37 insertions, 36 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e9a972bd0323..d23d6d7a45a6 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -537,40 +537,6 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | |||
537 | journal_hash(table, cn->sb, cn->blocknr) = cn; | 537 | journal_hash(table, cn->sb, cn->blocknr) = cn; |
538 | } | 538 | } |
539 | 539 | ||
540 | /* | ||
541 | * Several mutexes depend on the write lock. | ||
542 | * However sometimes we want to relax the write lock while we hold | ||
543 | * these mutexes, according to the release/reacquire on schedule() | ||
544 | * properties of the Bkl that were used. | ||
545 | * Reiserfs performances and locking were based on this scheme. | ||
546 | * Now that the write lock is a mutex and not the bkl anymore, doing so | ||
547 | * may result in a deadlock: | ||
548 | * | ||
549 | * A acquire write_lock | ||
550 | * A acquire j_commit_mutex | ||
551 | * A release write_lock and wait for something | ||
552 | * B acquire write_lock | ||
553 | * B can't acquire j_commit_mutex and sleep | ||
554 | * A can't acquire write lock anymore | ||
555 | * deadlock | ||
556 | * | ||
557 | * What we do here is avoiding such deadlock by playing the same game | ||
558 | * than the Bkl: if we can't acquire a mutex that depends on the write lock, | ||
559 | * we release the write lock, wait a bit and then retry. | ||
560 | * | ||
561 | * The mutexes concerned by this hack are: | ||
562 | * - The commit mutex of a journal list | ||
563 | * - The flush mutex | ||
564 | * - The journal lock | ||
565 | */ | ||
566 | static inline void reiserfs_mutex_lock_safe(struct mutex *m, | ||
567 | struct super_block *s) | ||
568 | { | ||
569 | reiserfs_write_unlock(s); | ||
570 | mutex_lock(m); | ||
571 | reiserfs_write_lock(s); | ||
572 | } | ||
573 | |||
574 | /* lock the current transaction */ | 540 | /* lock the current transaction */ |
575 | static inline void lock_journal(struct super_block *sb) | 541 | static inline void lock_journal(struct super_block *sb) |
576 | { | 542 | { |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6925b835a43b..59870a4751cc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
975 | int err = 0; | 975 | int err = 0; |
976 | 976 | ||
977 | /* If we don't have the privroot located yet - go find it */ | 977 | /* If we don't have the privroot located yet - go find it */ |
978 | mutex_lock(&s->s_root->d_inode->i_mutex); | 978 | reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); |
979 | dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, | 979 | dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, |
980 | strlen(PRIVROOT_NAME)); | 980 | strlen(PRIVROOT_NAME)); |
981 | if (!IS_ERR(dentry)) { | 981 | if (!IS_ERR(dentry)) { |
@@ -1011,7 +1011,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) | |||
1011 | 1011 | ||
1012 | if (privroot->d_inode) { | 1012 | if (privroot->d_inode) { |
1013 | s->s_xattr = reiserfs_xattr_handlers; | 1013 | s->s_xattr = reiserfs_xattr_handlers; |
1014 | mutex_lock(&privroot->d_inode->i_mutex); | 1014 | reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s); |
1015 | if (!REISERFS_SB(s)->xattr_root) { | 1015 | if (!REISERFS_SB(s)->xattr_root) { |
1016 | struct dentry *dentry; | 1016 | struct dentry *dentry; |
1017 | dentry = lookup_one_len(XAROOT_NAME, privroot, | 1017 | dentry = lookup_one_len(XAROOT_NAME, privroot, |
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 508fb523863e..a498d9266d8c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h | |||
@@ -63,6 +63,41 @@ int reiserfs_write_lock_once(struct super_block *s); | |||
63 | void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); | 63 | void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Several mutexes depend on the write lock. | ||
67 | * However sometimes we want to relax the write lock while we hold | ||
68 | * these mutexes, according to the release/reacquire on schedule() | ||
69 | * properties of the Bkl that were used. | ||
70 | * Reiserfs performances and locking were based on this scheme. | ||
71 | * Now that the write lock is a mutex and not the bkl anymore, doing so | ||
72 | * may result in a deadlock: | ||
73 | * | ||
74 | * A acquire write_lock | ||
75 | * A acquire j_commit_mutex | ||
76 | * A release write_lock and wait for something | ||
77 | * B acquire write_lock | ||
78 | * B can't acquire j_commit_mutex and sleep | ||
79 | * A can't acquire write lock anymore | ||
80 | * deadlock | ||
81 | * | ||
82 | * What we do here is avoiding such deadlock by playing the same game | ||
83 | * than the Bkl: if we can't acquire a mutex that depends on the write lock, | ||
84 | * we release the write lock, wait a bit and then retry. | ||
85 | * | ||
86 | * The mutexes concerned by this hack are: | ||
87 | * - The commit mutex of a journal list | ||
88 | * - The flush mutex | ||
89 | * - The journal lock | ||
90 | * - The inode mutex | ||
91 | */ | ||
92 | static inline void reiserfs_mutex_lock_safe(struct mutex *m, | ||
93 | struct super_block *s) | ||
94 | { | ||
95 | reiserfs_write_unlock(s); | ||
96 | mutex_lock(m); | ||
97 | reiserfs_write_lock(s); | ||
98 | } | ||
99 | |||
100 | /* | ||
66 | * When we schedule, we usually want to also release the write lock, | 101 | * When we schedule, we usually want to also release the write lock, |
67 | * according to the previous bkl based locking scheme of reiserfs. | 102 | * according to the previous bkl based locking scheme of reiserfs. |
68 | */ | 103 | */ |