diff options
author | Dipankar Sarma <dipankar@in.ibm.com> | 2005-09-09 16:04:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-09 16:57:55 -0400 |
commit | b835996f628eadb55c5fb222ba46fe9395bf73c7 (patch) | |
tree | d63d80585d197e1ffc299af4a0034049790fb197 | |
parent | ab2af1f5005069321c5d130f09cce577b03f43ef (diff) |
[PATCH] files: lock-free fd look-up
With the use of RCU in files structure, the look-up of files using fds can now
be lock-free. The lookup is protected by rcu_read_lock()/rcu_read_unlock().
This patch changes the readers to use lock-free lookup.
Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Ravikiran Thirumalai <kiran_th@gmail.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/mips/kernel/irixioctl.c | 5 | ||||
-rw-r--r-- | arch/sparc64/solaris/ioctl.c | 7 | ||||
-rw-r--r-- | drivers/char/tty_io.c | 4 | ||||
-rw-r--r-- | fs/fcntl.c | 4 | ||||
-rw-r--r-- | fs/proc/base.c | 29 | ||||
-rw-r--r-- | fs/select.c | 13 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_owner.c | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6t_owner.c | 1 | ||||
-rw-r--r-- | security/selinux/hooks.c | 2 |
9 files changed, 39 insertions, 27 deletions
diff --git a/arch/mips/kernel/irixioctl.c b/arch/mips/kernel/irixioctl.c index 4cd3d38a22c2..3cdc22346f4c 100644 --- a/arch/mips/kernel/irixioctl.c +++ b/arch/mips/kernel/irixioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
15 | #include <linux/tty.h> | 15 | #include <linux/tty.h> |
16 | #include <linux/file.h> | 16 | #include <linux/file.h> |
17 | #include <linux/rcupdate.h> | ||
17 | 18 | ||
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
19 | #include <asm/ioctl.h> | 20 | #include <asm/ioctl.h> |
@@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd) | |||
33 | struct file *filp; | 34 | struct file *filp; |
34 | struct tty_struct *ttyp = NULL; | 35 | struct tty_struct *ttyp = NULL; |
35 | 36 | ||
36 | spin_lock(¤t->files->file_lock); | 37 | rcu_read_lock(); |
37 | filp = fcheck(fd); | 38 | filp = fcheck(fd); |
38 | if(filp && filp->private_data) { | 39 | if(filp && filp->private_data) { |
39 | ttyp = (struct tty_struct *) filp->private_data; | 40 | ttyp = (struct tty_struct *) filp->private_data; |
@@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd) | |||
41 | if(ttyp->magic != TTY_MAGIC) | 42 | if(ttyp->magic != TTY_MAGIC) |
42 | ttyp =NULL; | 43 | ttyp =NULL; |
43 | } | 44 | } |
44 | spin_unlock(¤t->files->file_lock); | 45 | rcu_read_unlock(); |
45 | return ttyp; | 46 | return ttyp; |
46 | } | 47 | } |
47 | 48 | ||
diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c index 374766455f5e..be0a054e3ed6 100644 --- a/arch/sparc64/solaris/ioctl.c +++ b/arch/sparc64/solaris/ioctl.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/netdevice.h> | 24 | #include <linux/netdevice.h> |
25 | #include <linux/mtio.h> | 25 | #include <linux/mtio.h> |
26 | #include <linux/time.h> | 26 | #include <linux/time.h> |
27 | #include <linux/rcupdate.h> | ||
27 | #include <linux/compat.h> | 28 | #include <linux/compat.h> |
28 | 29 | ||
29 | #include <net/sock.h> | 30 | #include <net/sock.h> |
@@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) | |||
295 | struct inode *ino; | 296 | struct inode *ino; |
296 | struct fdtable *fdt; | 297 | struct fdtable *fdt; |
297 | /* I wonder which of these tests are superfluous... --patrik */ | 298 | /* I wonder which of these tests are superfluous... --patrik */ |
298 | spin_lock(¤t->files->file_lock); | 299 | rcu_read_lock(); |
299 | fdt = files_fdtable(current->files); | 300 | fdt = files_fdtable(current->files); |
300 | if (! fdt->fd[fd] || | 301 | if (! fdt->fd[fd] || |
301 | ! fdt->fd[fd]->f_dentry || | 302 | ! fdt->fd[fd]->f_dentry || |
302 | ! (ino = fdt->fd[fd]->f_dentry->d_inode) || | 303 | ! (ino = fdt->fd[fd]->f_dentry->d_inode) || |
303 | ! S_ISSOCK(ino->i_mode)) { | 304 | ! S_ISSOCK(ino->i_mode)) { |
304 | spin_unlock(¤t->files->file_lock); | 305 | rcu_read_unlock(); |
305 | return TBADF; | 306 | return TBADF; |
306 | } | 307 | } |
307 | spin_unlock(¤t->files->file_lock); | 308 | rcu_read_unlock(); |
308 | 309 | ||
309 | switch (cmd & 0xff) { | 310 | switch (cmd & 0xff) { |
310 | case 109: /* SI_SOCKPARAMS */ | 311 | case 109: /* SI_SOCKPARAMS */ |
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0bfc7af68917..e5953f3433f3 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c | |||
@@ -2480,7 +2480,7 @@ static void __do_SAK(void *arg) | |||
2480 | } | 2480 | } |
2481 | task_lock(p); | 2481 | task_lock(p); |
2482 | if (p->files) { | 2482 | if (p->files) { |
2483 | spin_lock(&p->files->file_lock); | 2483 | rcu_read_lock(); |
2484 | fdt = files_fdtable(p->files); | 2484 | fdt = files_fdtable(p->files); |
2485 | for (i=0; i < fdt->max_fds; i++) { | 2485 | for (i=0; i < fdt->max_fds; i++) { |
2486 | filp = fcheck_files(p->files, i); | 2486 | filp = fcheck_files(p->files, i); |
@@ -2495,7 +2495,7 @@ static void __do_SAK(void *arg) | |||
2495 | break; | 2495 | break; |
2496 | } | 2496 | } |
2497 | } | 2497 | } |
2498 | spin_unlock(&p->files->file_lock); | 2498 | rcu_read_unlock(); |
2499 | } | 2499 | } |
2500 | task_unlock(p); | 2500 | task_unlock(p); |
2501 | } while_each_task_pid(session, PIDTYPE_SID, p); | 2501 | } while_each_task_pid(session, PIDTYPE_SID, p); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index d2f3ed8acd93..863b46e0d78a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd) | |||
40 | struct files_struct *files = current->files; | 40 | struct files_struct *files = current->files; |
41 | struct fdtable *fdt; | 41 | struct fdtable *fdt; |
42 | int res; | 42 | int res; |
43 | spin_lock(&files->file_lock); | 43 | rcu_read_lock(); |
44 | fdt = files_fdtable(files); | 44 | fdt = files_fdtable(files); |
45 | res = FD_ISSET(fd, fdt->close_on_exec); | 45 | res = FD_ISSET(fd, fdt->close_on_exec); |
46 | spin_unlock(&files->file_lock); | 46 | rcu_read_unlock(); |
47 | return res; | 47 | return res; |
48 | } | 48 | } |
49 | 49 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index d0087a0b024b..23db452ab428 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/namespace.h> | 62 | #include <linux/namespace.h> |
63 | #include <linux/mm.h> | 63 | #include <linux/mm.h> |
64 | #include <linux/smp_lock.h> | 64 | #include <linux/smp_lock.h> |
65 | #include <linux/rcupdate.h> | ||
65 | #include <linux/kallsyms.h> | 66 | #include <linux/kallsyms.h> |
66 | #include <linux/mount.h> | 67 | #include <linux/mount.h> |
67 | #include <linux/security.h> | 68 | #include <linux/security.h> |
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm | |||
283 | 284 | ||
284 | files = get_files_struct(task); | 285 | files = get_files_struct(task); |
285 | if (files) { | 286 | if (files) { |
286 | spin_lock(&files->file_lock); | 287 | rcu_read_lock(); |
287 | file = fcheck_files(files, fd); | 288 | file = fcheck_files(files, fd); |
288 | if (file) { | 289 | if (file) { |
289 | *mnt = mntget(file->f_vfsmnt); | 290 | *mnt = mntget(file->f_vfsmnt); |
290 | *dentry = dget(file->f_dentry); | 291 | *dentry = dget(file->f_dentry); |
291 | spin_unlock(&files->file_lock); | 292 | rcu_read_unlock(); |
292 | put_files_struct(files); | 293 | put_files_struct(files); |
293 | return 0; | 294 | return 0; |
294 | } | 295 | } |
295 | spin_unlock(&files->file_lock); | 296 | rcu_read_unlock(); |
296 | put_files_struct(files); | 297 | put_files_struct(files); |
297 | } | 298 | } |
298 | return -ENOENT; | 299 | return -ENOENT; |
@@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1062 | files = get_files_struct(p); | 1063 | files = get_files_struct(p); |
1063 | if (!files) | 1064 | if (!files) |
1064 | goto out; | 1065 | goto out; |
1065 | spin_lock(&files->file_lock); | 1066 | rcu_read_lock(); |
1066 | fdt = files_fdtable(files); | 1067 | fdt = files_fdtable(files); |
1067 | for (fd = filp->f_pos-2; | 1068 | for (fd = filp->f_pos-2; |
1068 | fd < fdt->max_fds; | 1069 | fd < fdt->max_fds; |
@@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1071 | 1072 | ||
1072 | if (!fcheck_files(files, fd)) | 1073 | if (!fcheck_files(files, fd)) |
1073 | continue; | 1074 | continue; |
1074 | spin_unlock(&files->file_lock); | 1075 | rcu_read_unlock(); |
1075 | 1076 | ||
1076 | j = NUMBUF; | 1077 | j = NUMBUF; |
1077 | i = fd; | 1078 | i = fd; |
@@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1083 | 1084 | ||
1084 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | 1085 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); |
1085 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | 1086 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { |
1086 | spin_lock(&files->file_lock); | 1087 | rcu_read_lock(); |
1087 | break; | 1088 | break; |
1088 | } | 1089 | } |
1089 | spin_lock(&files->file_lock); | 1090 | rcu_read_lock(); |
1090 | } | 1091 | } |
1091 | spin_unlock(&files->file_lock); | 1092 | rcu_read_unlock(); |
1092 | put_files_struct(files); | 1093 | put_files_struct(files); |
1093 | } | 1094 | } |
1094 | out: | 1095 | out: |
@@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1263 | 1264 | ||
1264 | files = get_files_struct(task); | 1265 | files = get_files_struct(task); |
1265 | if (files) { | 1266 | if (files) { |
1266 | spin_lock(&files->file_lock); | 1267 | rcu_read_lock(); |
1267 | if (fcheck_files(files, fd)) { | 1268 | if (fcheck_files(files, fd)) { |
1268 | spin_unlock(&files->file_lock); | 1269 | rcu_read_unlock(); |
1269 | put_files_struct(files); | 1270 | put_files_struct(files); |
1270 | if (task_dumpable(task)) { | 1271 | if (task_dumpable(task)) { |
1271 | inode->i_uid = task->euid; | 1272 | inode->i_uid = task->euid; |
@@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1277 | security_task_to_inode(task, inode); | 1278 | security_task_to_inode(task, inode); |
1278 | return 1; | 1279 | return 1; |
1279 | } | 1280 | } |
1280 | spin_unlock(&files->file_lock); | 1281 | rcu_read_unlock(); |
1281 | put_files_struct(files); | 1282 | put_files_struct(files); |
1282 | } | 1283 | } |
1283 | d_drop(dentry); | 1284 | d_drop(dentry); |
@@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1369 | if (!files) | 1370 | if (!files) |
1370 | goto out_unlock; | 1371 | goto out_unlock; |
1371 | inode->i_mode = S_IFLNK; | 1372 | inode->i_mode = S_IFLNK; |
1372 | spin_lock(&files->file_lock); | 1373 | rcu_read_lock(); |
1373 | file = fcheck_files(files, fd); | 1374 | file = fcheck_files(files, fd); |
1374 | if (!file) | 1375 | if (!file) |
1375 | goto out_unlock2; | 1376 | goto out_unlock2; |
@@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1377 | inode->i_mode |= S_IRUSR | S_IXUSR; | 1378 | inode->i_mode |= S_IRUSR | S_IXUSR; |
1378 | if (file->f_mode & 2) | 1379 | if (file->f_mode & 2) |
1379 | inode->i_mode |= S_IWUSR | S_IXUSR; | 1380 | inode->i_mode |= S_IWUSR | S_IXUSR; |
1380 | spin_unlock(&files->file_lock); | 1381 | rcu_read_unlock(); |
1381 | put_files_struct(files); | 1382 | put_files_struct(files); |
1382 | inode->i_op = &proc_pid_link_inode_operations; | 1383 | inode->i_op = &proc_pid_link_inode_operations; |
1383 | inode->i_size = 64; | 1384 | inode->i_size = 64; |
@@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1387 | return NULL; | 1388 | return NULL; |
1388 | 1389 | ||
1389 | out_unlock2: | 1390 | out_unlock2: |
1390 | spin_unlock(&files->file_lock); | 1391 | rcu_read_unlock(); |
1391 | put_files_struct(files); | 1392 | put_files_struct(files); |
1392 | out_unlock: | 1393 | out_unlock: |
1393 | iput(inode); | 1394 | iput(inode); |
diff --git a/fs/select.c b/fs/select.c index 2e56325c73c4..f10a10317d54 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ | 22 | #include <linux/personality.h> /* for STICKY_TIMEOUTS */ |
23 | #include <linux/file.h> | 23 | #include <linux/file.h> |
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/rcupdate.h> | ||
25 | 26 | ||
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | 28 | ||
@@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout) | |||
185 | int retval, i; | 186 | int retval, i; |
186 | long __timeout = *timeout; | 187 | long __timeout = *timeout; |
187 | 188 | ||
188 | spin_lock(¤t->files->file_lock); | 189 | rcu_read_lock(); |
189 | retval = max_select_fd(n, fds); | 190 | retval = max_select_fd(n, fds); |
190 | spin_unlock(¤t->files->file_lock); | 191 | rcu_read_unlock(); |
191 | 192 | ||
192 | if (retval < 0) | 193 | if (retval < 0) |
193 | return retval; | 194 | return retval; |
@@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s | |||
329 | goto out_nofds; | 330 | goto out_nofds; |
330 | 331 | ||
331 | /* max_fdset can increase, so grab it once to avoid race */ | 332 | /* max_fdset can increase, so grab it once to avoid race */ |
333 | rcu_read_lock(); | ||
332 | fdt = files_fdtable(current->files); | 334 | fdt = files_fdtable(current->files); |
333 | max_fdset = fdt->max_fdset; | 335 | max_fdset = fdt->max_fdset; |
336 | rcu_read_unlock(); | ||
334 | if (n > max_fdset) | 337 | if (n > max_fdset) |
335 | n = max_fdset; | 338 | n = max_fdset; |
336 | 339 | ||
@@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti | |||
469 | struct poll_list *head; | 472 | struct poll_list *head; |
470 | struct poll_list *walk; | 473 | struct poll_list *walk; |
471 | struct fdtable *fdt; | 474 | struct fdtable *fdt; |
475 | int max_fdset; | ||
472 | 476 | ||
473 | /* Do a sanity check on nfds ... */ | 477 | /* Do a sanity check on nfds ... */ |
478 | rcu_read_lock(); | ||
474 | fdt = files_fdtable(current->files); | 479 | fdt = files_fdtable(current->files); |
475 | if (nfds > fdt->max_fdset && nfds > OPEN_MAX) | 480 | max_fdset = fdt->max_fdset; |
481 | rcu_read_unlock(); | ||
482 | if (nfds > max_fdset && nfds > OPEN_MAX) | ||
476 | return -EINVAL; | 483 | return -EINVAL; |
477 | 484 | ||
478 | if (timeout) { | 485 | if (timeout) { |
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index c1889f88262b..0cee2862ed85 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
14 | #include <linux/rcupdate.h> | ||
14 | #include <net/sock.h> | 15 | #include <net/sock.h> |
15 | 16 | ||
16 | #include <linux/netfilter_ipv4/ipt_owner.h> | 17 | #include <linux/netfilter_ipv4/ipt_owner.h> |
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 9b91decbfddb..4de4cdad4b7d 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
14 | #include <linux/rcupdate.h> | ||
14 | #include <net/sock.h> | 15 | #include <net/sock.h> |
15 | 16 | ||
16 | #include <linux/netfilter_ipv6/ip6t_owner.h> | 17 | #include <linux/netfilter_ipv6/ip6t_owner.h> |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index acb5a495a902..f40c8221ec1b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -1652,7 +1652,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) | |||
1652 | continue; | 1652 | continue; |
1653 | } | 1653 | } |
1654 | if (devnull) { | 1654 | if (devnull) { |
1655 | atomic_inc(&devnull->f_count); | 1655 | rcuref_inc(&devnull->f_count); |
1656 | } else { | 1656 | } else { |
1657 | devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); | 1657 | devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); |
1658 | if (!devnull) { | 1658 | if (!devnull) { |