summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2016-04-15 15:08:36 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2016-05-02 19:49:28 -0400
commit9902af79c01a8e39bb99b922fa3eef6d4ea23d69 (patch)
treeb04cc75b5e4a028bfdb619e0a0a0f8cd71113ff2
parentd9171b9345261e0d941d92fdda5672b5db67f968 (diff)
parallel lookups: actual switch to rwsem
ta-da! The main issue is the lack of down_write_killable(), so the places like readdir.c switched to plain inode_lock(); once killable variants of rwsem primitives appear, that'll be dealt with. lockdep side also might need more work Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/porting18
-rw-r--r--fs/btrfs/ioctl.c18
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/inode.c12
-rw-r--r--fs/namei.c4
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/readdir.c7
-rw-r--r--include/linux/fs.h27
11 files changed, 73 insertions, 32 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 8810e2367fe6..1567a53857bd 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -539,3 +539,21 @@ in your dentry operations instead.
539 it's a symlink. Checking ->i_mode is really needed now. In-tree we had 539 it's a symlink. Checking ->i_mode is really needed now. In-tree we had
540 to fix shmem_destroy_callback() that used to take that kind of shortcut; 540 to fix shmem_destroy_callback() that used to take that kind of shortcut;
541 watch out, since that shortcut is no longer valid. 541 watch out, since that shortcut is no longer valid.
542--
543[mandatory]
544 ->i_mutex is replaced with ->i_rwsem now. inode_lock() et.al. work as
545 they used to - they just take it exclusive. However, ->lookup() may be
546 called with parent locked shared. Its instances must not
547 * use d_instantiate) and d_rehash() separately - use d_add() or
548 d_splice_alias() instead.
549 * use d_rehash() alone - call d_add(new_dentry, NULL) instead.
550 * in the unlikely case when (read-only) access to filesystem
551 data structures needs exclusion for some reason, arrange it
552 yourself. None of the in-tree filesystems needed that.
553 * rely on ->d_parent and ->d_name not changing after dentry has
554 been fed to d_add() or d_splice_alias(). Again, none of the
555 in-tree instances relied upon that.
556 We are guaranteed that lookups of the same name in the same directory
557 will not happen in parallel ("same" in the sense of your ->d_compare()).
558 Lookups on different names in the same directory can and do happen in
559 parallel now.
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5a23806ae418..0b8ba717175b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -837,9 +837,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
837 struct dentry *dentry; 837 struct dentry *dentry;
838 int error; 838 int error;
839 839
840 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 840 inode_lock_nested(dir, I_MUTEX_PARENT);
841 if (error == -EINTR) 841 // XXX: should've been
842 return error; 842 // mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
843 // if (error == -EINTR)
844 // return error;
843 845
844 dentry = lookup_one_len(name, parent->dentry, namelen); 846 dentry = lookup_one_len(name, parent->dentry, namelen);
845 error = PTR_ERR(dentry); 847 error = PTR_ERR(dentry);
@@ -2366,9 +2368,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2366 goto out; 2368 goto out;
2367 2369
2368 2370
2369 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 2371 inode_lock_nested(dir, I_MUTEX_PARENT);
2370 if (err == -EINTR) 2372 // XXX: should've been
2371 goto out_drop_write; 2373 // err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
2374 // if (err == -EINTR)
2375 // goto out_drop_write;
2372 dentry = lookup_one_len(vol_args->name, parent, namelen); 2376 dentry = lookup_one_len(vol_args->name, parent, namelen);
2373 if (IS_ERR(dentry)) { 2377 if (IS_ERR(dentry)) {
2374 err = PTR_ERR(dentry); 2378 err = PTR_ERR(dentry);
@@ -2558,7 +2562,7 @@ out_dput:
2558 dput(dentry); 2562 dput(dentry);
2559out_unlock_dir: 2563out_unlock_dir:
2560 inode_unlock(dir); 2564 inode_unlock(dir);
2561out_drop_write: 2565//out_drop_write:
2562 mnt_drop_write_file(file); 2566 mnt_drop_write_file(file);
2563out: 2567out:
2564 kfree(vol_args); 2568 kfree(vol_args);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 03d124ae27d7..0387968e6f47 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -156,7 +156,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
156 156
157 if (depth > 0) { 157 if (depth > 0) {
158 if (depth <= ARRAY_SIZE(default_group_class)) { 158 if (depth <= ARRAY_SIZE(default_group_class)) {
159 lockdep_set_class(&inode->i_mutex, 159 lockdep_set_class(&inode->i_rwsem,
160 &default_group_class[depth - 1]); 160 &default_group_class[depth - 1]);
161 } else { 161 } else {
162 /* 162 /*
diff --git a/fs/dcache.c b/fs/dcache.c
index 59fcffcbf096..e49ba7d1b957 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2932,7 +2932,8 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2932static int __d_unalias(struct inode *inode, 2932static int __d_unalias(struct inode *inode,
2933 struct dentry *dentry, struct dentry *alias) 2933 struct dentry *dentry, struct dentry *alias)
2934{ 2934{
2935 struct mutex *m1 = NULL, *m2 = NULL; 2935 struct mutex *m1 = NULL;
2936 struct rw_semaphore *m2 = NULL;
2936 int ret = -ESTALE; 2937 int ret = -ESTALE;
2937 2938
2938 /* If alias and dentry share a parent, then no extra locks required */ 2939 /* If alias and dentry share a parent, then no extra locks required */
@@ -2943,15 +2944,15 @@ static int __d_unalias(struct inode *inode,
2943 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2944 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2944 goto out_err; 2945 goto out_err;
2945 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2946 m1 = &dentry->d_sb->s_vfs_rename_mutex;
2946 if (!inode_trylock(alias->d_parent->d_inode)) 2947 if (!inode_trylock_shared(alias->d_parent->d_inode))
2947 goto out_err; 2948 goto out_err;
2948 m2 = &alias->d_parent->d_inode->i_mutex; 2949 m2 = &alias->d_parent->d_inode->i_rwsem;
2949out_unalias: 2950out_unalias:
2950 __d_move(alias, dentry, false); 2951 __d_move(alias, dentry, false);
2951 ret = 0; 2952 ret = 0;
2952out_err: 2953out_err:
2953 if (m2) 2954 if (m2)
2954 mutex_unlock(m2); 2955 up_read(m2);
2955 if (m1) 2956 if (m1)
2956 mutex_unlock(m1); 2957 mutex_unlock(m1);
2957 return ret; 2958 return ret;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c09c63dcd7a2..45463600fb81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -824,7 +824,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
824 * i_mutex on quota files is special. Since this inode is hidden system 824 * i_mutex on quota files is special. Since this inode is hidden system
825 * file, we are safe to define locking ourselves. 825 * file, we are safe to define locking ourselves.
826 */ 826 */
827 lockdep_set_class(&sdp->sd_quota_inode->i_mutex, 827 lockdep_set_class(&sdp->sd_quota_inode->i_rwsem,
828 &gfs2_quota_imutex_key); 828 &gfs2_quota_imutex_key);
829 829
830 error = gfs2_rindex_update(sdp); 830 error = gfs2_rindex_update(sdp);
diff --git a/fs/inode.c b/fs/inode.c
index 4b884f73214e..4ccbc21b30ce 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,8 +166,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
166 spin_lock_init(&inode->i_lock); 166 spin_lock_init(&inode->i_lock);
167 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 167 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
168 168
169 mutex_init(&inode->i_mutex); 169 init_rwsem(&inode->i_rwsem);
170 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 170 lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
171 171
172 atomic_set(&inode->i_dio_count, 0); 172 atomic_set(&inode->i_dio_count, 0);
173 173
@@ -925,13 +925,13 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
925 struct file_system_type *type = inode->i_sb->s_type; 925 struct file_system_type *type = inode->i_sb->s_type;
926 926
927 /* Set new key only if filesystem hasn't already changed it */ 927 /* Set new key only if filesystem hasn't already changed it */
928 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) { 928 if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
929 /* 929 /*
930 * ensure nobody is actually holding i_mutex 930 * ensure nobody is actually holding i_mutex
931 */ 931 */
932 mutex_destroy(&inode->i_mutex); 932 // mutex_destroy(&inode->i_mutex);
933 mutex_init(&inode->i_mutex); 933 init_rwsem(&inode->i_rwsem);
934 lockdep_set_class(&inode->i_mutex, 934 lockdep_set_class(&inode->i_rwsem,
935 &type->i_mutex_dir_key); 935 &type->i_mutex_dir_key);
936 } 936 }
937 } 937 }
diff --git a/fs/namei.c b/fs/namei.c
index 7babb5e5f276..8249852b5fc6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1607,7 +1607,7 @@ static struct dentry *lookup_slow(const struct qstr *name,
1607 struct inode *inode = dir->d_inode; 1607 struct inode *inode = dir->d_inode;
1608 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 1608 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1609 1609
1610 inode_lock(inode); 1610 inode_lock_shared(inode);
1611 /* Don't go there if it's already dead */ 1611 /* Don't go there if it's already dead */
1612 if (unlikely(IS_DEADDIR(inode))) 1612 if (unlikely(IS_DEADDIR(inode)))
1613 goto out; 1613 goto out;
@@ -1638,7 +1638,7 @@ again:
1638 } 1638 }
1639 } 1639 }
1640out: 1640out:
1641 inode_unlock(inode); 1641 inode_unlock_shared(inode);
1642 return dentry; 1642 return dentry;
1643} 1643}
1644 1644
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 12f4a9e9800f..0748777f2e2a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -262,7 +262,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
262 inode->i_ino = args->fi_ino; 262 inode->i_ino = args->fi_ino;
263 OCFS2_I(inode)->ip_blkno = args->fi_blkno; 263 OCFS2_I(inode)->ip_blkno = args->fi_blkno;
264 if (args->fi_sysfile_type != 0) 264 if (args->fi_sysfile_type != 0)
265 lockdep_set_class(&inode->i_mutex, 265 lockdep_set_class(&inode->i_rwsem,
266 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); 266 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
267 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || 267 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
268 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || 268 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 6ec1e43a9a54..da186ee4f846 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -218,7 +218,9 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
218 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 218 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
219 old_cred = override_creds(override_cred); 219 old_cred = override_creds(override_cred);
220 220
221 err = mutex_lock_killable(&dir->d_inode->i_mutex); 221 inode_lock(dir->d_inode);
222 err = 0;
223 // XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex);
222 if (!err) { 224 if (!err) {
223 while (rdd->first_maybe_whiteout) { 225 while (rdd->first_maybe_whiteout) {
224 p = rdd->first_maybe_whiteout; 226 p = rdd->first_maybe_whiteout;
diff --git a/fs/readdir.c b/fs/readdir.c
index e69ef3b79787..bf583e848a1a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -32,9 +32,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
32 if (res) 32 if (res)
33 goto out; 33 goto out;
34 34
35 res = mutex_lock_killable(&inode->i_mutex); 35 inode_lock(inode);
36 if (res) 36 // res = mutex_lock_killable(&inode->i_mutex);
37 goto out; 37 // if (res)
38 // goto out;
38 39
39 res = -ENOENT; 40 res = -ENOENT;
40 if (!IS_DEADDIR(inode)) { 41 if (!IS_DEADDIR(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00cecc5a2f75..3018f31f7aa0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -647,7 +647,7 @@ struct inode {
647 647
648 /* Misc */ 648 /* Misc */
649 unsigned long i_state; 649 unsigned long i_state;
650 struct mutex i_mutex; 650 struct rw_semaphore i_rwsem;
651 651
652 unsigned long dirtied_when; /* jiffies of first dirtying */ 652 unsigned long dirtied_when; /* jiffies of first dirtying */
653 unsigned long dirtied_time_when; 653 unsigned long dirtied_time_when;
@@ -734,27 +734,42 @@ enum inode_i_mutex_lock_class
734 734
735static inline void inode_lock(struct inode *inode) 735static inline void inode_lock(struct inode *inode)
736{ 736{
737 mutex_lock(&inode->i_mutex); 737 down_write(&inode->i_rwsem);
738} 738}
739 739
740static inline void inode_unlock(struct inode *inode) 740static inline void inode_unlock(struct inode *inode)
741{ 741{
742 mutex_unlock(&inode->i_mutex); 742 up_write(&inode->i_rwsem);
743}
744
745static inline void inode_lock_shared(struct inode *inode)
746{
747 down_read(&inode->i_rwsem);
748}
749
750static inline void inode_unlock_shared(struct inode *inode)
751{
752 up_read(&inode->i_rwsem);
743} 753}
744 754
745static inline int inode_trylock(struct inode *inode) 755static inline int inode_trylock(struct inode *inode)
746{ 756{
747 return mutex_trylock(&inode->i_mutex); 757 return down_write_trylock(&inode->i_rwsem);
758}
759
760static inline int inode_trylock_shared(struct inode *inode)
761{
762 return down_read_trylock(&inode->i_rwsem);
748} 763}
749 764
750static inline int inode_is_locked(struct inode *inode) 765static inline int inode_is_locked(struct inode *inode)
751{ 766{
752 return mutex_is_locked(&inode->i_mutex); 767 return rwsem_is_locked(&inode->i_rwsem);
753} 768}
754 769
755static inline void inode_lock_nested(struct inode *inode, unsigned subclass) 770static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
756{ 771{
757 mutex_lock_nested(&inode->i_mutex, subclass); 772 down_write_nested(&inode->i_rwsem, subclass);
758} 773}
759 774
760void lock_two_nondirectories(struct inode *, struct inode*); 775void lock_two_nondirectories(struct inode *, struct inode*);