aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 16:04:14 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 16:57:55 -0400
commitb835996f628eadb55c5fb222ba46fe9395bf73c7 (patch)
treed63d80585d197e1ffc299af4a0034049790fb197
parentab2af1f5005069321c5d130f09cce577b03f43ef (diff)
[PATCH] files: lock-free fd look-up
With the use of RCU in files structure, the look-up of files using fds can now be lock-free. The lookup is protected by rcu_read_lock()/rcu_read_unlock(). This patch changes the readers to use lock-free lookup. Signed-off-by: Maneesh Soni <maneesh@in.ibm.com> Signed-off-by: Ravikiran Thirumalai <kiran_th@gmail.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/mips/kernel/irixioctl.c5
-rw-r--r--arch/sparc64/solaris/ioctl.c7
-rw-r--r--drivers/char/tty_io.c4
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/proc/base.c29
-rw-r--r--fs/select.c13
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c1
-rw-r--r--security/selinux/hooks.c2
9 files changed, 39 insertions, 27 deletions
diff --git a/arch/mips/kernel/irixioctl.c b/arch/mips/kernel/irixioctl.c
index 4cd3d38a22c2..3cdc22346f4c 100644
--- a/arch/mips/kernel/irixioctl.c
+++ b/arch/mips/kernel/irixioctl.c
@@ -14,6 +14,7 @@
14#include <linux/syscalls.h> 14#include <linux/syscalls.h>
15#include <linux/tty.h> 15#include <linux/tty.h>
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/rcupdate.h>
17 18
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include <asm/ioctl.h> 20#include <asm/ioctl.h>
@@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd)
33 struct file *filp; 34 struct file *filp;
34 struct tty_struct *ttyp = NULL; 35 struct tty_struct *ttyp = NULL;
35 36
36 spin_lock(&current->files->file_lock); 37 rcu_read_lock();
37 filp = fcheck(fd); 38 filp = fcheck(fd);
38 if(filp && filp->private_data) { 39 if(filp && filp->private_data) {
39 ttyp = (struct tty_struct *) filp->private_data; 40 ttyp = (struct tty_struct *) filp->private_data;
@@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd)
41 if(ttyp->magic != TTY_MAGIC) 42 if(ttyp->magic != TTY_MAGIC)
42 ttyp =NULL; 43 ttyp =NULL;
43 } 44 }
44 spin_unlock(&current->files->file_lock); 45 rcu_read_unlock();
45 return ttyp; 46 return ttyp;
46} 47}
47 48
diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c
index 374766455f5e..be0a054e3ed6 100644
--- a/arch/sparc64/solaris/ioctl.c
+++ b/arch/sparc64/solaris/ioctl.c
@@ -24,6 +24,7 @@
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/mtio.h> 25#include <linux/mtio.h>
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/rcupdate.h>
27#include <linux/compat.h> 28#include <linux/compat.h>
28 29
29#include <net/sock.h> 30#include <net/sock.h>
@@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg)
295 struct inode *ino; 296 struct inode *ino;
296 struct fdtable *fdt; 297 struct fdtable *fdt;
297 /* I wonder which of these tests are superfluous... --patrik */ 298 /* I wonder which of these tests are superfluous... --patrik */
298 spin_lock(&current->files->file_lock); 299 rcu_read_lock();
299 fdt = files_fdtable(current->files); 300 fdt = files_fdtable(current->files);
300 if (! fdt->fd[fd] || 301 if (! fdt->fd[fd] ||
301 ! fdt->fd[fd]->f_dentry || 302 ! fdt->fd[fd]->f_dentry ||
302 ! (ino = fdt->fd[fd]->f_dentry->d_inode) || 303 ! (ino = fdt->fd[fd]->f_dentry->d_inode) ||
303 ! S_ISSOCK(ino->i_mode)) { 304 ! S_ISSOCK(ino->i_mode)) {
304 spin_unlock(&current->files->file_lock); 305 rcu_read_unlock();
305 return TBADF; 306 return TBADF;
306 } 307 }
307 spin_unlock(&current->files->file_lock); 308 rcu_read_unlock();
308 309
309 switch (cmd & 0xff) { 310 switch (cmd & 0xff) {
310 case 109: /* SI_SOCKPARAMS */ 311 case 109: /* SI_SOCKPARAMS */
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 0bfc7af68917..e5953f3433f3 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2480,7 +2480,7 @@ static void __do_SAK(void *arg)
2480 } 2480 }
2481 task_lock(p); 2481 task_lock(p);
2482 if (p->files) { 2482 if (p->files) {
2483 spin_lock(&p->files->file_lock); 2483 rcu_read_lock();
2484 fdt = files_fdtable(p->files); 2484 fdt = files_fdtable(p->files);
2485 for (i=0; i < fdt->max_fds; i++) { 2485 for (i=0; i < fdt->max_fds; i++) {
2486 filp = fcheck_files(p->files, i); 2486 filp = fcheck_files(p->files, i);
@@ -2495,7 +2495,7 @@ static void __do_SAK(void *arg)
2495 break; 2495 break;
2496 } 2496 }
2497 } 2497 }
2498 spin_unlock(&p->files->file_lock); 2498 rcu_read_unlock();
2499 } 2499 }
2500 task_unlock(p); 2500 task_unlock(p);
2501 } while_each_task_pid(session, PIDTYPE_SID, p); 2501 } while_each_task_pid(session, PIDTYPE_SID, p);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d2f3ed8acd93..863b46e0d78a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd)
40 struct files_struct *files = current->files; 40 struct files_struct *files = current->files;
41 struct fdtable *fdt; 41 struct fdtable *fdt;
42 int res; 42 int res;
43 spin_lock(&files->file_lock); 43 rcu_read_lock();
44 fdt = files_fdtable(files); 44 fdt = files_fdtable(files);
45 res = FD_ISSET(fd, fdt->close_on_exec); 45 res = FD_ISSET(fd, fdt->close_on_exec);
46 spin_unlock(&files->file_lock); 46 rcu_read_unlock();
47 return res; 47 return res;
48} 48}
49 49
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d0087a0b024b..23db452ab428 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -62,6 +62,7 @@
62#include <linux/namespace.h> 62#include <linux/namespace.h>
63#include <linux/mm.h> 63#include <linux/mm.h>
64#include <linux/smp_lock.h> 64#include <linux/smp_lock.h>
65#include <linux/rcupdate.h>
65#include <linux/kallsyms.h> 66#include <linux/kallsyms.h>
66#include <linux/mount.h> 67#include <linux/mount.h>
67#include <linux/security.h> 68#include <linux/security.h>
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
283 284
284 files = get_files_struct(task); 285 files = get_files_struct(task);
285 if (files) { 286 if (files) {
286 spin_lock(&files->file_lock); 287 rcu_read_lock();
287 file = fcheck_files(files, fd); 288 file = fcheck_files(files, fd);
288 if (file) { 289 if (file) {
289 *mnt = mntget(file->f_vfsmnt); 290 *mnt = mntget(file->f_vfsmnt);
290 *dentry = dget(file->f_dentry); 291 *dentry = dget(file->f_dentry);
291 spin_unlock(&files->file_lock); 292 rcu_read_unlock();
292 put_files_struct(files); 293 put_files_struct(files);
293 return 0; 294 return 0;
294 } 295 }
295 spin_unlock(&files->file_lock); 296 rcu_read_unlock();
296 put_files_struct(files); 297 put_files_struct(files);
297 } 298 }
298 return -ENOENT; 299 return -ENOENT;
@@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1062 files = get_files_struct(p); 1063 files = get_files_struct(p);
1063 if (!files) 1064 if (!files)
1064 goto out; 1065 goto out;
1065 spin_lock(&files->file_lock); 1066 rcu_read_lock();
1066 fdt = files_fdtable(files); 1067 fdt = files_fdtable(files);
1067 for (fd = filp->f_pos-2; 1068 for (fd = filp->f_pos-2;
1068 fd < fdt->max_fds; 1069 fd < fdt->max_fds;
@@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1071 1072
1072 if (!fcheck_files(files, fd)) 1073 if (!fcheck_files(files, fd))
1073 continue; 1074 continue;
1074 spin_unlock(&files->file_lock); 1075 rcu_read_unlock();
1075 1076
1076 j = NUMBUF; 1077 j = NUMBUF;
1077 i = fd; 1078 i = fd;
@@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1083 1084
1084 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1085 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1085 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1086 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1086 spin_lock(&files->file_lock); 1087 rcu_read_lock();
1087 break; 1088 break;
1088 } 1089 }
1089 spin_lock(&files->file_lock); 1090 rcu_read_lock();
1090 } 1091 }
1091 spin_unlock(&files->file_lock); 1092 rcu_read_unlock();
1092 put_files_struct(files); 1093 put_files_struct(files);
1093 } 1094 }
1094out: 1095out:
@@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1263 1264
1264 files = get_files_struct(task); 1265 files = get_files_struct(task);
1265 if (files) { 1266 if (files) {
1266 spin_lock(&files->file_lock); 1267 rcu_read_lock();
1267 if (fcheck_files(files, fd)) { 1268 if (fcheck_files(files, fd)) {
1268 spin_unlock(&files->file_lock); 1269 rcu_read_unlock();
1269 put_files_struct(files); 1270 put_files_struct(files);
1270 if (task_dumpable(task)) { 1271 if (task_dumpable(task)) {
1271 inode->i_uid = task->euid; 1272 inode->i_uid = task->euid;
@@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1277 security_task_to_inode(task, inode); 1278 security_task_to_inode(task, inode);
1278 return 1; 1279 return 1;
1279 } 1280 }
1280 spin_unlock(&files->file_lock); 1281 rcu_read_unlock();
1281 put_files_struct(files); 1282 put_files_struct(files);
1282 } 1283 }
1283 d_drop(dentry); 1284 d_drop(dentry);
@@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1369 if (!files) 1370 if (!files)
1370 goto out_unlock; 1371 goto out_unlock;
1371 inode->i_mode = S_IFLNK; 1372 inode->i_mode = S_IFLNK;
1372 spin_lock(&files->file_lock); 1373 rcu_read_lock();
1373 file = fcheck_files(files, fd); 1374 file = fcheck_files(files, fd);
1374 if (!file) 1375 if (!file)
1375 goto out_unlock2; 1376 goto out_unlock2;
@@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1377 inode->i_mode |= S_IRUSR | S_IXUSR; 1378 inode->i_mode |= S_IRUSR | S_IXUSR;
1378 if (file->f_mode & 2) 1379 if (file->f_mode & 2)
1379 inode->i_mode |= S_IWUSR | S_IXUSR; 1380 inode->i_mode |= S_IWUSR | S_IXUSR;
1380 spin_unlock(&files->file_lock); 1381 rcu_read_unlock();
1381 put_files_struct(files); 1382 put_files_struct(files);
1382 inode->i_op = &proc_pid_link_inode_operations; 1383 inode->i_op = &proc_pid_link_inode_operations;
1383 inode->i_size = 64; 1384 inode->i_size = 64;
@@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1387 return NULL; 1388 return NULL;
1388 1389
1389out_unlock2: 1390out_unlock2:
1390 spin_unlock(&files->file_lock); 1391 rcu_read_unlock();
1391 put_files_struct(files); 1392 put_files_struct(files);
1392out_unlock: 1393out_unlock:
1393 iput(inode); 1394 iput(inode);
diff --git a/fs/select.c b/fs/select.c
index 2e56325c73c4..f10a10317d54 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/rcupdate.h>
25 26
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
@@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
185 int retval, i; 186 int retval, i;
186 long __timeout = *timeout; 187 long __timeout = *timeout;
187 188
188 spin_lock(&current->files->file_lock); 189 rcu_read_lock();
189 retval = max_select_fd(n, fds); 190 retval = max_select_fd(n, fds);
190 spin_unlock(&current->files->file_lock); 191 rcu_read_unlock();
191 192
192 if (retval < 0) 193 if (retval < 0)
193 return retval; 194 return retval;
@@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
329 goto out_nofds; 330 goto out_nofds;
330 331
331 /* max_fdset can increase, so grab it once to avoid race */ 332 /* max_fdset can increase, so grab it once to avoid race */
333 rcu_read_lock();
332 fdt = files_fdtable(current->files); 334 fdt = files_fdtable(current->files);
333 max_fdset = fdt->max_fdset; 335 max_fdset = fdt->max_fdset;
336 rcu_read_unlock();
334 if (n > max_fdset) 337 if (n > max_fdset)
335 n = max_fdset; 338 n = max_fdset;
336 339
@@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
469 struct poll_list *head; 472 struct poll_list *head;
470 struct poll_list *walk; 473 struct poll_list *walk;
471 struct fdtable *fdt; 474 struct fdtable *fdt;
475 int max_fdset;
472 476
473 /* Do a sanity check on nfds ... */ 477 /* Do a sanity check on nfds ... */
478 rcu_read_lock();
474 fdt = files_fdtable(current->files); 479 fdt = files_fdtable(current->files);
475 if (nfds > fdt->max_fdset && nfds > OPEN_MAX) 480 max_fdset = fdt->max_fdset;
481 rcu_read_unlock();
482 if (nfds > max_fdset && nfds > OPEN_MAX)
476 return -EINVAL; 483 return -EINVAL;
477 484
478 if (timeout) { 485 if (timeout) {
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index c1889f88262b..0cee2862ed85 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/rcupdate.h>
14#include <net/sock.h> 15#include <net/sock.h>
15 16
16#include <linux/netfilter_ipv4/ipt_owner.h> 17#include <linux/netfilter_ipv4/ipt_owner.h>
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 9b91decbfddb..4de4cdad4b7d 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/rcupdate.h>
14#include <net/sock.h> 15#include <net/sock.h>
15 16
16#include <linux/netfilter_ipv6/ip6t_owner.h> 17#include <linux/netfilter_ipv6/ip6t_owner.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index acb5a495a902..f40c8221ec1b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1652,7 +1652,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1652 continue; 1652 continue;
1653 } 1653 }
1654 if (devnull) { 1654 if (devnull) {
1655 atomic_inc(&devnull->f_count); 1655 rcuref_inc(&devnull->f_count);
1656 } else { 1656 } else {
1657 devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); 1657 devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR);
1658 if (!devnull) { 1658 if (!devnull) {