diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2016-04-15 15:08:36 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2016-05-02 19:49:28 -0400 |
commit | 9902af79c01a8e39bb99b922fa3eef6d4ea23d69 (patch) | |
tree | b04cc75b5e4a028bfdb619e0a0a0f8cd71113ff2 | |
parent | d9171b9345261e0d941d92fdda5672b5db67f968 (diff) |
parallel lookups: actual switch to rwsem
ta-da!
The main issue is the lack of down_write_killable(), so the places
like readdir.c switched to plain inode_lock(); once killable
variants of rwsem primitives appear, that'll be dealt with.
lockdep side also might need more work
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | Documentation/filesystems/porting | 18 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 18 | ||||
-rw-r--r-- | fs/configfs/inode.c | 2 | ||||
-rw-r--r-- | fs/dcache.c | 9 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 2 | ||||
-rw-r--r-- | fs/inode.c | 12 | ||||
-rw-r--r-- | fs/namei.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 2 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 4 | ||||
-rw-r--r-- | fs/readdir.c | 7 | ||||
-rw-r--r-- | include/linux/fs.h | 27 |
11 files changed, 73 insertions, 32 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 8810e2367fe6..1567a53857bd 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -539,3 +539,21 @@ in your dentry operations instead. | |||
539 | it's a symlink. Checking ->i_mode is really needed now. In-tree we had | 539 | it's a symlink. Checking ->i_mode is really needed now. In-tree we had |
540 | to fix shmem_destroy_callback() that used to take that kind of shortcut; | 540 | to fix shmem_destroy_callback() that used to take that kind of shortcut; |
541 | watch out, since that shortcut is no longer valid. | 541 | watch out, since that shortcut is no longer valid. |
542 | -- | ||
543 | [mandatory] | ||
544 | ->i_mutex is replaced with ->i_rwsem now. inode_lock() et.al. work as | ||
545 | they used to - they just take it exclusive. However, ->lookup() may be | ||
546 | called with parent locked shared. Its instances must not | ||
547 | * use d_instantiate) and d_rehash() separately - use d_add() or | ||
548 | d_splice_alias() instead. | ||
549 | * use d_rehash() alone - call d_add(new_dentry, NULL) instead. | ||
550 | * in the unlikely case when (read-only) access to filesystem | ||
551 | data structures needs exclusion for some reason, arrange it | ||
552 | yourself. None of the in-tree filesystems needed that. | ||
553 | * rely on ->d_parent and ->d_name not changing after dentry has | ||
554 | been fed to d_add() or d_splice_alias(). Again, none of the | ||
555 | in-tree instances relied upon that. | ||
556 | We are guaranteed that lookups of the same name in the same directory | ||
557 | will not happen in parallel ("same" in the sense of your ->d_compare()). | ||
558 | Lookups on different names in the same directory can and do happen in | ||
559 | parallel now. | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5a23806ae418..0b8ba717175b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -837,9 +837,11 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
837 | struct dentry *dentry; | 837 | struct dentry *dentry; |
838 | int error; | 838 | int error; |
839 | 839 | ||
840 | error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); | 840 | inode_lock_nested(dir, I_MUTEX_PARENT); |
841 | if (error == -EINTR) | 841 | // XXX: should've been |
842 | return error; | 842 | // mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); |
843 | // if (error == -EINTR) | ||
844 | // return error; | ||
843 | 845 | ||
844 | dentry = lookup_one_len(name, parent->dentry, namelen); | 846 | dentry = lookup_one_len(name, parent->dentry, namelen); |
845 | error = PTR_ERR(dentry); | 847 | error = PTR_ERR(dentry); |
@@ -2366,9 +2368,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2366 | goto out; | 2368 | goto out; |
2367 | 2369 | ||
2368 | 2370 | ||
2369 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); | 2371 | inode_lock_nested(dir, I_MUTEX_PARENT); |
2370 | if (err == -EINTR) | 2372 | // XXX: should've been |
2371 | goto out_drop_write; | 2373 | // err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); |
2374 | // if (err == -EINTR) | ||
2375 | // goto out_drop_write; | ||
2372 | dentry = lookup_one_len(vol_args->name, parent, namelen); | 2376 | dentry = lookup_one_len(vol_args->name, parent, namelen); |
2373 | if (IS_ERR(dentry)) { | 2377 | if (IS_ERR(dentry)) { |
2374 | err = PTR_ERR(dentry); | 2378 | err = PTR_ERR(dentry); |
@@ -2558,7 +2562,7 @@ out_dput: | |||
2558 | dput(dentry); | 2562 | dput(dentry); |
2559 | out_unlock_dir: | 2563 | out_unlock_dir: |
2560 | inode_unlock(dir); | 2564 | inode_unlock(dir); |
2561 | out_drop_write: | 2565 | //out_drop_write: |
2562 | mnt_drop_write_file(file); | 2566 | mnt_drop_write_file(file); |
2563 | out: | 2567 | out: |
2564 | kfree(vol_args); | 2568 | kfree(vol_args); |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 03d124ae27d7..0387968e6f47 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -156,7 +156,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd, | |||
156 | 156 | ||
157 | if (depth > 0) { | 157 | if (depth > 0) { |
158 | if (depth <= ARRAY_SIZE(default_group_class)) { | 158 | if (depth <= ARRAY_SIZE(default_group_class)) { |
159 | lockdep_set_class(&inode->i_mutex, | 159 | lockdep_set_class(&inode->i_rwsem, |
160 | &default_group_class[depth - 1]); | 160 | &default_group_class[depth - 1]); |
161 | } else { | 161 | } else { |
162 | /* | 162 | /* |
diff --git a/fs/dcache.c b/fs/dcache.c index 59fcffcbf096..e49ba7d1b957 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2932,7 +2932,8 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) | |||
2932 | static int __d_unalias(struct inode *inode, | 2932 | static int __d_unalias(struct inode *inode, |
2933 | struct dentry *dentry, struct dentry *alias) | 2933 | struct dentry *dentry, struct dentry *alias) |
2934 | { | 2934 | { |
2935 | struct mutex *m1 = NULL, *m2 = NULL; | 2935 | struct mutex *m1 = NULL; |
2936 | struct rw_semaphore *m2 = NULL; | ||
2936 | int ret = -ESTALE; | 2937 | int ret = -ESTALE; |
2937 | 2938 | ||
2938 | /* If alias and dentry share a parent, then no extra locks required */ | 2939 | /* If alias and dentry share a parent, then no extra locks required */ |
@@ -2943,15 +2944,15 @@ static int __d_unalias(struct inode *inode, | |||
2943 | if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) | 2944 | if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) |
2944 | goto out_err; | 2945 | goto out_err; |
2945 | m1 = &dentry->d_sb->s_vfs_rename_mutex; | 2946 | m1 = &dentry->d_sb->s_vfs_rename_mutex; |
2946 | if (!inode_trylock(alias->d_parent->d_inode)) | 2947 | if (!inode_trylock_shared(alias->d_parent->d_inode)) |
2947 | goto out_err; | 2948 | goto out_err; |
2948 | m2 = &alias->d_parent->d_inode->i_mutex; | 2949 | m2 = &alias->d_parent->d_inode->i_rwsem; |
2949 | out_unalias: | 2950 | out_unalias: |
2950 | __d_move(alias, dentry, false); | 2951 | __d_move(alias, dentry, false); |
2951 | ret = 0; | 2952 | ret = 0; |
2952 | out_err: | 2953 | out_err: |
2953 | if (m2) | 2954 | if (m2) |
2954 | mutex_unlock(m2); | 2955 | up_read(m2); |
2955 | if (m1) | 2956 | if (m1) |
2956 | mutex_unlock(m1); | 2957 | mutex_unlock(m1); |
2957 | return ret; | 2958 | return ret; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c09c63dcd7a2..45463600fb81 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -824,7 +824,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
824 | * i_mutex on quota files is special. Since this inode is hidden system | 824 | * i_mutex on quota files is special. Since this inode is hidden system |
825 | * file, we are safe to define locking ourselves. | 825 | * file, we are safe to define locking ourselves. |
826 | */ | 826 | */ |
827 | lockdep_set_class(&sdp->sd_quota_inode->i_mutex, | 827 | lockdep_set_class(&sdp->sd_quota_inode->i_rwsem, |
828 | &gfs2_quota_imutex_key); | 828 | &gfs2_quota_imutex_key); |
829 | 829 | ||
830 | error = gfs2_rindex_update(sdp); | 830 | error = gfs2_rindex_update(sdp); |
diff --git a/fs/inode.c b/fs/inode.c index 4b884f73214e..4ccbc21b30ce 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -166,8 +166,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
166 | spin_lock_init(&inode->i_lock); | 166 | spin_lock_init(&inode->i_lock); |
167 | lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); | 167 | lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); |
168 | 168 | ||
169 | mutex_init(&inode->i_mutex); | 169 | init_rwsem(&inode->i_rwsem); |
170 | lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); | 170 | lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key); |
171 | 171 | ||
172 | atomic_set(&inode->i_dio_count, 0); | 172 | atomic_set(&inode->i_dio_count, 0); |
173 | 173 | ||
@@ -925,13 +925,13 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode) | |||
925 | struct file_system_type *type = inode->i_sb->s_type; | 925 | struct file_system_type *type = inode->i_sb->s_type; |
926 | 926 | ||
927 | /* Set new key only if filesystem hasn't already changed it */ | 927 | /* Set new key only if filesystem hasn't already changed it */ |
928 | if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) { | 928 | if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { |
929 | /* | 929 | /* |
930 | * ensure nobody is actually holding i_mutex | 930 | * ensure nobody is actually holding i_mutex |
931 | */ | 931 | */ |
932 | mutex_destroy(&inode->i_mutex); | 932 | // mutex_destroy(&inode->i_mutex); |
933 | mutex_init(&inode->i_mutex); | 933 | init_rwsem(&inode->i_rwsem); |
934 | lockdep_set_class(&inode->i_mutex, | 934 | lockdep_set_class(&inode->i_rwsem, |
935 | &type->i_mutex_dir_key); | 935 | &type->i_mutex_dir_key); |
936 | } | 936 | } |
937 | } | 937 | } |
diff --git a/fs/namei.c b/fs/namei.c index 7babb5e5f276..8249852b5fc6 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1607,7 +1607,7 @@ static struct dentry *lookup_slow(const struct qstr *name, | |||
1607 | struct inode *inode = dir->d_inode; | 1607 | struct inode *inode = dir->d_inode; |
1608 | DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | 1608 | DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
1609 | 1609 | ||
1610 | inode_lock(inode); | 1610 | inode_lock_shared(inode); |
1611 | /* Don't go there if it's already dead */ | 1611 | /* Don't go there if it's already dead */ |
1612 | if (unlikely(IS_DEADDIR(inode))) | 1612 | if (unlikely(IS_DEADDIR(inode))) |
1613 | goto out; | 1613 | goto out; |
@@ -1638,7 +1638,7 @@ again: | |||
1638 | } | 1638 | } |
1639 | } | 1639 | } |
1640 | out: | 1640 | out: |
1641 | inode_unlock(inode); | 1641 | inode_unlock_shared(inode); |
1642 | return dentry; | 1642 | return dentry; |
1643 | } | 1643 | } |
1644 | 1644 | ||
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 12f4a9e9800f..0748777f2e2a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -262,7 +262,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) | |||
262 | inode->i_ino = args->fi_ino; | 262 | inode->i_ino = args->fi_ino; |
263 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; | 263 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; |
264 | if (args->fi_sysfile_type != 0) | 264 | if (args->fi_sysfile_type != 0) |
265 | lockdep_set_class(&inode->i_mutex, | 265 | lockdep_set_class(&inode->i_rwsem, |
266 | &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); | 266 | &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); |
267 | if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || | 267 | if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || |
268 | args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || | 268 | args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || |
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 6ec1e43a9a54..da186ee4f846 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c | |||
@@ -218,7 +218,9 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) | |||
218 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | 218 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); |
219 | old_cred = override_creds(override_cred); | 219 | old_cred = override_creds(override_cred); |
220 | 220 | ||
221 | err = mutex_lock_killable(&dir->d_inode->i_mutex); | 221 | inode_lock(dir->d_inode); |
222 | err = 0; | ||
223 | // XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex); | ||
222 | if (!err) { | 224 | if (!err) { |
223 | while (rdd->first_maybe_whiteout) { | 225 | while (rdd->first_maybe_whiteout) { |
224 | p = rdd->first_maybe_whiteout; | 226 | p = rdd->first_maybe_whiteout; |
diff --git a/fs/readdir.c b/fs/readdir.c index e69ef3b79787..bf583e848a1a 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -32,9 +32,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx) | |||
32 | if (res) | 32 | if (res) |
33 | goto out; | 33 | goto out; |
34 | 34 | ||
35 | res = mutex_lock_killable(&inode->i_mutex); | 35 | inode_lock(inode); |
36 | if (res) | 36 | // res = mutex_lock_killable(&inode->i_mutex); |
37 | goto out; | 37 | // if (res) |
38 | // goto out; | ||
38 | 39 | ||
39 | res = -ENOENT; | 40 | res = -ENOENT; |
40 | if (!IS_DEADDIR(inode)) { | 41 | if (!IS_DEADDIR(inode)) { |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 00cecc5a2f75..3018f31f7aa0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -647,7 +647,7 @@ struct inode { | |||
647 | 647 | ||
648 | /* Misc */ | 648 | /* Misc */ |
649 | unsigned long i_state; | 649 | unsigned long i_state; |
650 | struct mutex i_mutex; | 650 | struct rw_semaphore i_rwsem; |
651 | 651 | ||
652 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 652 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
653 | unsigned long dirtied_time_when; | 653 | unsigned long dirtied_time_when; |
@@ -734,27 +734,42 @@ enum inode_i_mutex_lock_class | |||
734 | 734 | ||
735 | static inline void inode_lock(struct inode *inode) | 735 | static inline void inode_lock(struct inode *inode) |
736 | { | 736 | { |
737 | mutex_lock(&inode->i_mutex); | 737 | down_write(&inode->i_rwsem); |
738 | } | 738 | } |
739 | 739 | ||
740 | static inline void inode_unlock(struct inode *inode) | 740 | static inline void inode_unlock(struct inode *inode) |
741 | { | 741 | { |
742 | mutex_unlock(&inode->i_mutex); | 742 | up_write(&inode->i_rwsem); |
743 | } | ||
744 | |||
745 | static inline void inode_lock_shared(struct inode *inode) | ||
746 | { | ||
747 | down_read(&inode->i_rwsem); | ||
748 | } | ||
749 | |||
750 | static inline void inode_unlock_shared(struct inode *inode) | ||
751 | { | ||
752 | up_read(&inode->i_rwsem); | ||
743 | } | 753 | } |
744 | 754 | ||
745 | static inline int inode_trylock(struct inode *inode) | 755 | static inline int inode_trylock(struct inode *inode) |
746 | { | 756 | { |
747 | return mutex_trylock(&inode->i_mutex); | 757 | return down_write_trylock(&inode->i_rwsem); |
758 | } | ||
759 | |||
760 | static inline int inode_trylock_shared(struct inode *inode) | ||
761 | { | ||
762 | return down_read_trylock(&inode->i_rwsem); | ||
748 | } | 763 | } |
749 | 764 | ||
750 | static inline int inode_is_locked(struct inode *inode) | 765 | static inline int inode_is_locked(struct inode *inode) |
751 | { | 766 | { |
752 | return mutex_is_locked(&inode->i_mutex); | 767 | return rwsem_is_locked(&inode->i_rwsem); |
753 | } | 768 | } |
754 | 769 | ||
755 | static inline void inode_lock_nested(struct inode *inode, unsigned subclass) | 770 | static inline void inode_lock_nested(struct inode *inode, unsigned subclass) |
756 | { | 771 | { |
757 | mutex_lock_nested(&inode->i_mutex, subclass); | 772 | down_write_nested(&inode->i_rwsem, subclass); |
758 | } | 773 | } |
759 | 774 | ||
760 | void lock_two_nondirectories(struct inode *, struct inode*); | 775 | void lock_two_nondirectories(struct inode *, struct inode*); |