aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2011-03-13 03:51:11 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-03-15 02:21:45 -0400
commit1abf0c718f15a56a0a435588d1b104c7a37dc9bd (patch)
tree91a6fae3218686b9a945569a7fa7fad120f64e94 /fs
parentf2fa2ffc2046fdc35f96366d1ec8675f4d578522 (diff)
New kind of open files - "location only".
New flag for open(2) - O_PATH. Semantics: * pathname is resolved, but the file itself is _NOT_ opened as far as filesystem is concerned. * almost all operations on the resulting descriptors shall fail with -EBADF. Exceptions are: 1) operations on descriptors themselves (i.e. close(), dup(), dup2(), dup3(), fcntl(fd, F_DUPFD), fcntl(fd, F_DUPFD_CLOEXEC, ...), fcntl(fd, F_GETFD), fcntl(fd, F_SETFD, ...)) 2) fcntl(fd, F_GETFL), for a common non-destructive way to check if descriptor is open 3) "dfd" arguments of ...at(2) syscalls, i.e. the starting points of pathname resolution * closing such descriptor does *NOT* affect dnotify or posix locks. * permissions are checked as usual along the way to file; no permission checks are applied to the file itself. Of course, giving such thing to syscall will result in permission checks (at the moment it means checking that starting point of ....at() is a directory and caller has exec permissions on it). fget() and fget_light() return NULL on such descriptors; use of fget_raw() and fget_raw_light() is needed to get them. That protects existing code from dealing with those things. There are two things still missing (they come in the next commits): one is handling of symlinks (right now we refuse to open them that way; see the next commit for semantics related to those) and another is descriptor passing via SCM_RIGHTS datagrams. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/file_table.c53
-rw-r--r--fs/namei.c2
-rw-r--r--fs/open.c35
4 files changed, 110 insertions, 17 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..3c16e1ca163e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 276 rcu_read_lock();
277 file = fcheck_files(files, fd); 277 file = fcheck_files(files, fd);
278 if (file) { 278 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 279 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 280 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 281 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 282 file = NULL;
283 }
284 } 283 }
285 rcu_read_unlock(); 284 rcu_read_unlock();
286 285
@@ -289,6 +288,23 @@ struct file *fget(unsigned int fd)
289 288
290EXPORT_SYMBOL(fget); 289EXPORT_SYMBOL(fget);
291 290
291struct file *fget_raw(unsigned int fd)
292{
293 struct file *file;
294 struct files_struct *files = current->files;
295
296 rcu_read_lock();
297 file = fcheck_files(files, fd);
298 if (file) {
299 /* File object ref couldn't be taken */
300 if (!atomic_long_inc_not_zero(&file->f_count))
301 file = NULL;
302 }
303 rcu_read_unlock();
304
305 return file;
306}
307
292/* 308/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 309 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 310 *
@@ -313,6 +329,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 329 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 330 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 331 file = fcheck_files(files, fd);
332 if (file && (file->f_mode & FMODE_PATH))
333 file = NULL;
334 } else {
335 rcu_read_lock();
336 file = fcheck_files(files, fd);
337 if (file) {
338 if (!(file->f_mode & FMODE_PATH) &&
339 atomic_long_inc_not_zero(&file->f_count))
340 *fput_needed = 1;
341 else
342 /* Didn't get the reference, someone's freed */
343 file = NULL;
344 }
345 rcu_read_unlock();
346 }
347
348 return file;
349}
350
351struct file *fget_raw_light(unsigned int fd, int *fput_needed)
352{
353 struct file *file;
354 struct files_struct *files = current->files;
355
356 *fput_needed = 0;
357 if (atomic_read(&files->count) == 1) {
358 file = fcheck_files(files, fd);
316 } else { 359 } else {
317 rcu_read_lock(); 360 rcu_read_lock();
318 file = fcheck_files(files, fd); 361 file = fcheck_files(files, fd);
diff --git a/fs/namei.c b/fs/namei.c
index 33be51a2ddb7..e1d9f90d9776 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1544,7 +1544,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1544 } else { 1544 } else {
1545 struct dentry *dentry; 1545 struct dentry *dentry;
1546 1546
1547 file = fget_light(dfd, &fput_needed); 1547 file = fget_raw_light(dfd, &fput_needed);
1548 retval = -EBADF; 1548 retval = -EBADF;
1549 if (!file) 1549 if (!file)
1550 goto out_fail; 1550 goto out_fail;
diff --git a/fs/open.c b/fs/open.c
index 48afc5c139d2..14a51de01f54 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -669,11 +669,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
669 int (*open)(struct inode *, struct file *), 669 int (*open)(struct inode *, struct file *),
670 const struct cred *cred) 670 const struct cred *cred)
671{ 671{
672 static const struct file_operations empty_fops = {};
672 struct inode *inode; 673 struct inode *inode;
673 int error; 674 int error;
674 675
675 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 676 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
676 FMODE_PREAD | FMODE_PWRITE; 677 FMODE_PREAD | FMODE_PWRITE;
678
679 if (unlikely(f->f_flags & O_PATH))
680 f->f_mode = FMODE_PATH;
681
677 inode = dentry->d_inode; 682 inode = dentry->d_inode;
678 if (f->f_mode & FMODE_WRITE) { 683 if (f->f_mode & FMODE_WRITE) {
679 error = __get_file_write_access(inode, mnt); 684 error = __get_file_write_access(inode, mnt);
@@ -687,9 +692,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
687 f->f_path.dentry = dentry; 692 f->f_path.dentry = dentry;
688 f->f_path.mnt = mnt; 693 f->f_path.mnt = mnt;
689 f->f_pos = 0; 694 f->f_pos = 0;
690 f->f_op = fops_get(inode->i_fop);
691 file_sb_list_add(f, inode->i_sb); 695 file_sb_list_add(f, inode->i_sb);
692 696
697 if (unlikely(f->f_mode & FMODE_PATH)) {
698 f->f_op = &empty_fops;
699 return f;
700 }
701
702 f->f_op = fops_get(inode->i_fop);
703
693 error = security_dentry_open(f, cred); 704 error = security_dentry_open(f, cred);
694 if (error) 705 if (error)
695 goto cleanup_all; 706 goto cleanup_all;
@@ -911,9 +922,18 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
911 if (flags & __O_SYNC) 922 if (flags & __O_SYNC)
912 flags |= O_DSYNC; 923 flags |= O_DSYNC;
913 924
914 op->open_flag = flags; 925 /*
926 * If we have O_PATH in the open flag. Then we
927 * cannot have anything other than the below set of flags
928 */
929 if (flags & O_PATH) {
930 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
931 acc_mode = 0;
932 } else {
933 acc_mode = MAY_OPEN | ACC_MODE(flags);
934 }
915 935
916 acc_mode = MAY_OPEN | ACC_MODE(flags); 936 op->open_flag = flags;
917 937
918 /* O_TRUNC implies we need access checks for write permissions */ 938 /* O_TRUNC implies we need access checks for write permissions */
919 if (flags & O_TRUNC) 939 if (flags & O_TRUNC)
@@ -926,7 +946,8 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
926 946
927 op->acc_mode = acc_mode; 947 op->acc_mode = acc_mode;
928 948
929 op->intent = LOOKUP_OPEN; 949 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
950
930 if (flags & O_CREAT) { 951 if (flags & O_CREAT) {
931 op->intent |= LOOKUP_CREATE; 952 op->intent |= LOOKUP_CREATE;
932 if (flags & O_EXCL) 953 if (flags & O_EXCL)
@@ -1053,8 +1074,10 @@ int filp_close(struct file *filp, fl_owner_t id)
1053 if (filp->f_op && filp->f_op->flush) 1074 if (filp->f_op && filp->f_op->flush)
1054 retval = filp->f_op->flush(filp, id); 1075 retval = filp->f_op->flush(filp, id);
1055 1076
1056 dnotify_flush(filp, id); 1077 if (likely(!(filp->f_mode & FMODE_PATH))) {
1057 locks_remove_posix(filp, id); 1078 dnotify_flush(filp, id);
1079 locks_remove_posix(filp, id);
1080 }
1058 fput(filp); 1081 fput(filp);
1059 return retval; 1082 return retval;
1060} 1083}