diff options
author | Nick Piggin <npiggin@kernel.dk> | 2011-01-07 01:49:53 -0500 |
---|---|---|
committer | Nick Piggin <npiggin@kernel.dk> | 2011-01-07 01:50:27 -0500 |
commit | c28cc36469554dc55540f059fbdc7fa22a2c31fc (patch) | |
tree | 6b867456be48b8633a2d56a99e00bb3faf9dccc7 | |
parent | 31e6b01f4183ff419a6d1f86177cbf4662347cec (diff) |
fs: fs_struct use seqlock
Use a seqlock in the fs_struct to enable us to take an atomic copy of the
complete cwd and root paths. Use this in the RCU lookup path to avoid a
thread-shared spinlock in RCU lookup operations.
Multi-threaded apps may now perform path lookups with scalability matching
multi-process apps. Operations such as stat(2) become very scalable for
multi-threaded workload.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
-rw-r--r-- | fs/fs_struct.c | 10 | ||||
-rw-r--r-- | fs/namei.c | 34 | ||||
-rw-r--r-- | include/linux/fs_struct.h | 3 |
3 files changed, 34 insertions, 13 deletions
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index ed45a9cf5f3..60b8531f41c 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -14,9 +14,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) | |||
14 | struct path old_root; | 14 | struct path old_root; |
15 | 15 | ||
16 | spin_lock(&fs->lock); | 16 | spin_lock(&fs->lock); |
17 | write_seqcount_begin(&fs->seq); | ||
17 | old_root = fs->root; | 18 | old_root = fs->root; |
18 | fs->root = *path; | 19 | fs->root = *path; |
19 | path_get(path); | 20 | path_get(path); |
21 | write_seqcount_end(&fs->seq); | ||
20 | spin_unlock(&fs->lock); | 22 | spin_unlock(&fs->lock); |
21 | if (old_root.dentry) | 23 | if (old_root.dentry) |
22 | path_put(&old_root); | 24 | path_put(&old_root); |
@@ -31,9 +33,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) | |||
31 | struct path old_pwd; | 33 | struct path old_pwd; |
32 | 34 | ||
33 | spin_lock(&fs->lock); | 35 | spin_lock(&fs->lock); |
36 | write_seqcount_begin(&fs->seq); | ||
34 | old_pwd = fs->pwd; | 37 | old_pwd = fs->pwd; |
35 | fs->pwd = *path; | 38 | fs->pwd = *path; |
36 | path_get(path); | 39 | path_get(path); |
40 | write_seqcount_end(&fs->seq); | ||
37 | spin_unlock(&fs->lock); | 41 | spin_unlock(&fs->lock); |
38 | 42 | ||
39 | if (old_pwd.dentry) | 43 | if (old_pwd.dentry) |
@@ -52,6 +56,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
52 | fs = p->fs; | 56 | fs = p->fs; |
53 | if (fs) { | 57 | if (fs) { |
54 | spin_lock(&fs->lock); | 58 | spin_lock(&fs->lock); |
59 | write_seqcount_begin(&fs->seq); | ||
55 | if (fs->root.dentry == old_root->dentry | 60 | if (fs->root.dentry == old_root->dentry |
56 | && fs->root.mnt == old_root->mnt) { | 61 | && fs->root.mnt == old_root->mnt) { |
57 | path_get(new_root); | 62 | path_get(new_root); |
@@ -64,6 +69,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
64 | fs->pwd = *new_root; | 69 | fs->pwd = *new_root; |
65 | count++; | 70 | count++; |
66 | } | 71 | } |
72 | write_seqcount_end(&fs->seq); | ||
67 | spin_unlock(&fs->lock); | 73 | spin_unlock(&fs->lock); |
68 | } | 74 | } |
69 | task_unlock(p); | 75 | task_unlock(p); |
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk) | |||
88 | int kill; | 94 | int kill; |
89 | task_lock(tsk); | 95 | task_lock(tsk); |
90 | spin_lock(&fs->lock); | 96 | spin_lock(&fs->lock); |
97 | write_seqcount_begin(&fs->seq); | ||
91 | tsk->fs = NULL; | 98 | tsk->fs = NULL; |
92 | kill = !--fs->users; | 99 | kill = !--fs->users; |
100 | write_seqcount_end(&fs->seq); | ||
93 | spin_unlock(&fs->lock); | 101 | spin_unlock(&fs->lock); |
94 | task_unlock(tsk); | 102 | task_unlock(tsk); |
95 | if (kill) | 103 | if (kill) |
@@ -105,6 +113,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
105 | fs->users = 1; | 113 | fs->users = 1; |
106 | fs->in_exec = 0; | 114 | fs->in_exec = 0; |
107 | spin_lock_init(&fs->lock); | 115 | spin_lock_init(&fs->lock); |
116 | seqcount_init(&fs->seq); | ||
108 | fs->umask = old->umask; | 117 | fs->umask = old->umask; |
109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); | 118 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); |
110 | } | 119 | } |
@@ -144,6 +153,7 @@ EXPORT_SYMBOL(current_umask); | |||
144 | struct fs_struct init_fs = { | 153 | struct fs_struct init_fs = { |
145 | .users = 1, | 154 | .users = 1, |
146 | .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), | 155 | .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), |
156 | .seq = SEQCNT_ZERO, | ||
147 | .umask = 0022, | 157 | .umask = 0022, |
148 | }; | 158 | }; |
149 | 159 | ||
diff --git a/fs/namei.c b/fs/namei.c index 8d3f15b3a54..c731b50a618 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -684,9 +684,12 @@ static __always_inline void set_root_rcu(struct nameidata *nd) | |||
684 | { | 684 | { |
685 | if (!nd->root.mnt) { | 685 | if (!nd->root.mnt) { |
686 | struct fs_struct *fs = current->fs; | 686 | struct fs_struct *fs = current->fs; |
687 | spin_lock(&fs->lock); | 687 | unsigned seq; |
688 | nd->root = fs->root; | 688 | |
689 | spin_unlock(&fs->lock); | 689 | do { |
690 | seq = read_seqcount_begin(&fs->seq); | ||
691 | nd->root = fs->root; | ||
692 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
690 | } | 693 | } |
691 | } | 694 | } |
692 | 695 | ||
@@ -1369,26 +1372,31 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n | |||
1369 | 1372 | ||
1370 | if (*name=='/') { | 1373 | if (*name=='/') { |
1371 | struct fs_struct *fs = current->fs; | 1374 | struct fs_struct *fs = current->fs; |
1375 | unsigned seq; | ||
1372 | 1376 | ||
1373 | br_read_lock(vfsmount_lock); | 1377 | br_read_lock(vfsmount_lock); |
1374 | rcu_read_lock(); | 1378 | rcu_read_lock(); |
1375 | 1379 | ||
1376 | spin_lock(&fs->lock); | 1380 | do { |
1377 | nd->root = fs->root; | 1381 | seq = read_seqcount_begin(&fs->seq); |
1378 | nd->path = nd->root; | 1382 | nd->root = fs->root; |
1379 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1383 | nd->path = nd->root; |
1380 | spin_unlock(&fs->lock); | 1384 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1385 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1381 | 1386 | ||
1382 | } else if (dfd == AT_FDCWD) { | 1387 | } else if (dfd == AT_FDCWD) { |
1383 | struct fs_struct *fs = current->fs; | 1388 | struct fs_struct *fs = current->fs; |
1389 | unsigned seq; | ||
1384 | 1390 | ||
1385 | br_read_lock(vfsmount_lock); | 1391 | br_read_lock(vfsmount_lock); |
1386 | rcu_read_lock(); | 1392 | rcu_read_lock(); |
1387 | 1393 | ||
1388 | spin_lock(&fs->lock); | 1394 | do { |
1389 | nd->path = fs->pwd; | 1395 | seq = read_seqcount_begin(&fs->seq); |
1390 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1396 | nd->path = fs->pwd; |
1391 | spin_unlock(&fs->lock); | 1397 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1398 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1399 | |||
1392 | } else { | 1400 | } else { |
1393 | struct dentry *dentry; | 1401 | struct dentry *dentry; |
1394 | 1402 | ||
@@ -1411,7 +1419,7 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n | |||
1411 | if (fput_needed) | 1419 | if (fput_needed) |
1412 | nd->file = file; | 1420 | nd->file = file; |
1413 | 1421 | ||
1414 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1422 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1415 | br_read_lock(vfsmount_lock); | 1423 | br_read_lock(vfsmount_lock); |
1416 | rcu_read_lock(); | 1424 | rcu_read_lock(); |
1417 | } | 1425 | } |
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index a42b5bf02f8..003dc0fd734 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h | |||
@@ -2,10 +2,13 @@ | |||
2 | #define _LINUX_FS_STRUCT_H | 2 | #define _LINUX_FS_STRUCT_H |
3 | 3 | ||
4 | #include <linux/path.h> | 4 | #include <linux/path.h> |
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/seqlock.h> | ||
5 | 7 | ||
6 | struct fs_struct { | 8 | struct fs_struct { |
7 | int users; | 9 | int users; |
8 | spinlock_t lock; | 10 | spinlock_t lock; |
11 | seqcount_t seq; | ||
9 | int umask; | 12 | int umask; |
10 | int in_exec; | 13 | int in_exec; |
11 | struct path root, pwd; | 14 | struct path root, pwd; |