aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c991
1 files changed, 486 insertions, 505 deletions
diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e9..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 22#include <linux/pagemap.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/personality.h> 24#include <linux/personality.h>
@@ -35,7 +34,7 @@
35#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
37 36
38#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 37#include "internal.h"
39 38
40/* [Feb-1997 T. Schoebel-Theuer] 39/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 40 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -108,8 +107,6 @@
108 * any extra contention... 107 * any extra contention...
109 */ 108 */
110 109
111static int __link_path_walk(const char *name, struct nameidata *nd);
112
113/* In order to reduce some races, while at the same time doing additional 110/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the 111 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them.. 112 * kernel data space before using them..
@@ -234,6 +231,7 @@ int generic_permission(struct inode *inode, int mask,
234 /* 231 /*
235 * Searching includes executable on directories, else just read. 232 * Searching includes executable on directories, else just read.
236 */ 233 */
234 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
237 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
238 if (capable(CAP_DAC_READ_SEARCH)) 236 if (capable(CAP_DAC_READ_SEARCH))
239 return 0; 237 return 0;
@@ -414,36 +412,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
414} 412}
415 413
416/* 414/*
417 * Internal lookup() using the new generic dcache. 415 * force_reval_path - force revalidation of a dentry
418 * SMP-safe 416 *
417 * In some situations the path walking code will trust dentries without
418 * revalidating them. This causes problems for filesystems that depend on
419 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
420 * (which indicates that it's possible for the dentry to go stale), force
421 * a d_revalidate call before proceeding.
422 *
423 * Returns 0 if the revalidation was successful. If the revalidation fails,
424 * either return the error returned by d_revalidate or -ESTALE if the
425 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
426 * invalidate the dentry. It's up to the caller to handle putting references
427 * to the path if necessary.
419 */ 428 */
420static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 429static int
430force_reval_path(struct path *path, struct nameidata *nd)
421{ 431{
422 struct dentry * dentry = __d_lookup(parent, name); 432 int status;
433 struct dentry *dentry = path->dentry;
423 434
424 /* lockess __d_lookup may fail due to concurrent d_move() 435 /*
425 * in some unrelated directory, so try with d_lookup 436 * only check on filesystems where it's possible for the dentry to
437 * become stale. It's assumed that if this flag is set then the
438 * d_revalidate op will also be defined.
426 */ 439 */
427 if (!dentry) 440 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
428 dentry = d_lookup(parent, name); 441 return 0;
429 442
430 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 443 status = dentry->d_op->d_revalidate(dentry, nd);
431 dentry = do_revalidate(dentry, nd); 444 if (status > 0)
445 return 0;
432 446
433 return dentry; 447 if (!status) {
448 d_invalidate(dentry);
449 status = -ESTALE;
450 }
451 return status;
434} 452}
435 453
436/* 454/*
437 * Short-cut version of permission(), for calling by 455 * Short-cut version of permission(), for calling on directories
438 * path_walk(), when dcache lock is held. Combines parts 456 * during pathname resolution. Combines parts of permission()
439 * of permission() and generic_permission(), and tests ONLY for 457 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
440 * MAY_EXEC permission.
441 * 458 *
442 * If appropriate, check DAC only. If not appropriate, or 459 * If appropriate, check DAC only. If not appropriate, or
443 * short-cut DAC fails, then call permission() to do more 460 * short-cut DAC fails, then call ->permission() to do more
444 * complete permission check. 461 * complete permission check.
445 */ 462 */
446static int exec_permission_lite(struct inode *inode) 463static int exec_permission(struct inode *inode)
447{ 464{
448 int ret; 465 int ret;
449 466
@@ -465,99 +482,6 @@ ok:
465 return security_inode_permission(inode, MAY_EXEC); 482 return security_inode_permission(inode, MAY_EXEC);
466} 483}
467 484
468/*
469 * This is called when everything else fails, and we actually have
470 * to go to the low-level filesystem to find out what we should do..
471 *
472 * We get the directory semaphore, and after getting that we also
473 * make sure that nobody added the entry to the dcache in the meantime..
474 * SMP-safe
475 */
476static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
477{
478 struct dentry * result;
479 struct inode *dir = parent->d_inode;
480
481 mutex_lock(&dir->i_mutex);
482 /*
483 * First re-do the cached lookup just in case it was created
484 * while we waited for the directory semaphore..
485 *
486 * FIXME! This could use version numbering or similar to
487 * avoid unnecessary cache lookups.
488 *
489 * The "dcache_lock" is purely to protect the RCU list walker
490 * from concurrent renames at this point (we mustn't get false
491 * negatives from the RCU list walk here, unlike the optimistic
492 * fast walk).
493 *
494 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
495 */
496 result = d_lookup(parent, name);
497 if (!result) {
498 struct dentry *dentry;
499
500 /* Don't create child dentry for a dead directory. */
501 result = ERR_PTR(-ENOENT);
502 if (IS_DEADDIR(dir))
503 goto out_unlock;
504
505 dentry = d_alloc(parent, name);
506 result = ERR_PTR(-ENOMEM);
507 if (dentry) {
508 result = dir->i_op->lookup(dir, dentry, nd);
509 if (result)
510 dput(dentry);
511 else
512 result = dentry;
513 }
514out_unlock:
515 mutex_unlock(&dir->i_mutex);
516 return result;
517 }
518
519 /*
520 * Uhhuh! Nasty case: the cache was re-populated while
521 * we waited on the semaphore. Need to revalidate.
522 */
523 mutex_unlock(&dir->i_mutex);
524 if (result->d_op && result->d_op->d_revalidate) {
525 result = do_revalidate(result, nd);
526 if (!result)
527 result = ERR_PTR(-ENOENT);
528 }
529 return result;
530}
531
532/*
533 * Wrapper to retry pathname resolution whenever the underlying
534 * file system returns an ESTALE.
535 *
536 * Retry the whole path once, forcing real lookup requests
537 * instead of relying on the dcache.
538 */
539static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
540{
541 struct path save = nd->path;
542 int result;
543
544 /* make sure the stuff we saved doesn't go away */
545 path_get(&save);
546
547 result = __link_path_walk(name, nd);
548 if (result == -ESTALE) {
549 /* nd->path had been dropped */
550 nd->path = save;
551 path_get(&nd->path);
552 nd->flags |= LOOKUP_REVAL;
553 result = __link_path_walk(name, nd);
554 }
555
556 path_put(&save);
557
558 return result;
559}
560
561static __always_inline void set_root(struct nameidata *nd) 485static __always_inline void set_root(struct nameidata *nd)
562{ 486{
563 if (!nd->root.mnt) { 487 if (!nd->root.mnt) {
@@ -569,10 +493,10 @@ static __always_inline void set_root(struct nameidata *nd)
569 } 493 }
570} 494}
571 495
496static int link_path_walk(const char *, struct nameidata *);
497
572static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 498static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
573{ 499{
574 int res = 0;
575 char *name;
576 if (IS_ERR(link)) 500 if (IS_ERR(link))
577 goto fail; 501 goto fail;
578 502
@@ -583,22 +507,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
583 path_get(&nd->root); 507 path_get(&nd->root);
584 } 508 }
585 509
586 res = link_path_walk(link, nd); 510 return link_path_walk(link, nd);
587 if (nd->depth || res || nd->last_type!=LAST_NORM)
588 return res;
589 /*
590 * If it is an iterative symlinks resolution in open_namei() we
591 * have to copy the last component. And all that crap because of
592 * bloody create() on broken symlinks. Furrfu...
593 */
594 name = __getname();
595 if (unlikely(!name)) {
596 path_put(&nd->path);
597 return -ENOMEM;
598 }
599 strcpy(name, nd->last.name);
600 nd->last.name = name;
601 return 0;
602fail: 511fail:
603 path_put(&nd->path); 512 path_put(&nd->path);
604 return PTR_ERR(link); 513 return PTR_ERR(link);
@@ -620,10 +529,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
620 nd->path.dentry = path->dentry; 529 nd->path.dentry = path->dentry;
621} 530}
622 531
623static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 532static __always_inline int
533__do_follow_link(struct path *path, struct nameidata *nd, void **p)
624{ 534{
625 int error; 535 int error;
626 void *cookie;
627 struct dentry *dentry = path->dentry; 536 struct dentry *dentry = path->dentry;
628 537
629 touch_atime(path->mnt, dentry); 538 touch_atime(path->mnt, dentry);
@@ -634,18 +543,20 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
634 dget(dentry); 543 dget(dentry);
635 } 544 }
636 mntget(path->mnt); 545 mntget(path->mnt);
637 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 546 nd->last_type = LAST_BIND;
638 error = PTR_ERR(cookie); 547 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
639 if (!IS_ERR(cookie)) { 548 error = PTR_ERR(*p);
549 if (!IS_ERR(*p)) {
640 char *s = nd_get_link(nd); 550 char *s = nd_get_link(nd);
641 error = 0; 551 error = 0;
642 if (s) 552 if (s)
643 error = __vfs_follow_link(nd, s); 553 error = __vfs_follow_link(nd, s);
644 if (dentry->d_inode->i_op->put_link) 554 else if (nd->last_type == LAST_BIND) {
645 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 555 error = force_reval_path(&nd->path, nd);
556 if (error)
557 path_put(&nd->path);
558 }
646 } 559 }
647 path_put(path);
648
649 return error; 560 return error;
650} 561}
651 562
@@ -658,6 +569,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
658 */ 569 */
659static inline int do_follow_link(struct path *path, struct nameidata *nd) 570static inline int do_follow_link(struct path *path, struct nameidata *nd)
660{ 571{
572 void *cookie;
661 int err = -ELOOP; 573 int err = -ELOOP;
662 if (current->link_count >= MAX_NESTED_LINKS) 574 if (current->link_count >= MAX_NESTED_LINKS)
663 goto loop; 575 goto loop;
@@ -671,7 +583,10 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
671 current->link_count++; 583 current->link_count++;
672 current->total_link_count++; 584 current->total_link_count++;
673 nd->depth++; 585 nd->depth++;
674 err = __do_follow_link(path, nd); 586 err = __do_follow_link(path, nd, &cookie);
587 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
588 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
589 path_put(path);
675 current->link_count--; 590 current->link_count--;
676 nd->depth--; 591 nd->depth--;
677 return err; 592 return err;
@@ -757,33 +672,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
757 set_root(nd); 672 set_root(nd);
758 673
759 while(1) { 674 while(1) {
760 struct vfsmount *parent;
761 struct dentry *old = nd->path.dentry; 675 struct dentry *old = nd->path.dentry;
762 676
763 if (nd->path.dentry == nd->root.dentry && 677 if (nd->path.dentry == nd->root.dentry &&
764 nd->path.mnt == nd->root.mnt) { 678 nd->path.mnt == nd->root.mnt) {
765 break; 679 break;
766 } 680 }
767 spin_lock(&dcache_lock);
768 if (nd->path.dentry != nd->path.mnt->mnt_root) { 681 if (nd->path.dentry != nd->path.mnt->mnt_root) {
769 nd->path.dentry = dget(nd->path.dentry->d_parent); 682 /* rare case of legitimate dget_parent()... */
770 spin_unlock(&dcache_lock); 683 nd->path.dentry = dget_parent(nd->path.dentry);
771 dput(old); 684 dput(old);
772 break; 685 break;
773 } 686 }
774 spin_unlock(&dcache_lock); 687 if (!follow_up(&nd->path))
775 spin_lock(&vfsmount_lock);
776 parent = nd->path.mnt->mnt_parent;
777 if (parent == nd->path.mnt) {
778 spin_unlock(&vfsmount_lock);
779 break; 688 break;
780 }
781 mntget(parent);
782 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
783 spin_unlock(&vfsmount_lock);
784 dput(old);
785 mntput(nd->path.mnt);
786 nd->path.mnt = parent;
787 } 689 }
788 follow_mount(&nd->path); 690 follow_mount(&nd->path);
789} 691}
@@ -797,8 +699,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
797 struct path *path) 699 struct path *path)
798{ 700{
799 struct vfsmount *mnt = nd->path.mnt; 701 struct vfsmount *mnt = nd->path.mnt;
800 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 702 struct dentry *dentry, *parent;
703 struct inode *dir;
704 /*
705 * See if the low-level filesystem might want
706 * to use its own hash..
707 */
708 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
709 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
710 if (err < 0)
711 return err;
712 }
801 713
714 dentry = __d_lookup(nd->path.dentry, name);
802 if (!dentry) 715 if (!dentry)
803 goto need_lookup; 716 goto need_lookup;
804 if (dentry->d_op && dentry->d_op->d_revalidate) 717 if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +723,59 @@ done:
810 return 0; 723 return 0;
811 724
812need_lookup: 725need_lookup:
813 dentry = real_lookup(nd->path.dentry, name, nd); 726 parent = nd->path.dentry;
727 dir = parent->d_inode;
728
729 mutex_lock(&dir->i_mutex);
730 /*
731 * First re-do the cached lookup just in case it was created
732 * while we waited for the directory semaphore..
733 *
734 * FIXME! This could use version numbering or similar to
735 * avoid unnecessary cache lookups.
736 *
737 * The "dcache_lock" is purely to protect the RCU list walker
738 * from concurrent renames at this point (we mustn't get false
739 * negatives from the RCU list walk here, unlike the optimistic
740 * fast walk).
741 *
742 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
743 */
744 dentry = d_lookup(parent, name);
745 if (!dentry) {
746 struct dentry *new;
747
748 /* Don't create child dentry for a dead directory. */
749 dentry = ERR_PTR(-ENOENT);
750 if (IS_DEADDIR(dir))
751 goto out_unlock;
752
753 new = d_alloc(parent, name);
754 dentry = ERR_PTR(-ENOMEM);
755 if (new) {
756 dentry = dir->i_op->lookup(dir, new, nd);
757 if (dentry)
758 dput(new);
759 else
760 dentry = new;
761 }
762out_unlock:
763 mutex_unlock(&dir->i_mutex);
764 if (IS_ERR(dentry))
765 goto fail;
766 goto done;
767 }
768
769 /*
770 * Uhhuh! Nasty case: the cache was re-populated while
771 * we waited on the semaphore. Need to revalidate.
772 */
773 mutex_unlock(&dir->i_mutex);
774 if (dentry->d_op && dentry->d_op->d_revalidate) {
775 dentry = do_revalidate(dentry, nd);
776 if (!dentry)
777 dentry = ERR_PTR(-ENOENT);
778 }
814 if (IS_ERR(dentry)) 779 if (IS_ERR(dentry))
815 goto fail; 780 goto fail;
816 goto done; 781 goto done;
@@ -828,6 +793,17 @@ fail:
828} 793}
829 794
830/* 795/*
796 * This is a temporary kludge to deal with "automount" symlinks; proper
797 * solution is to trigger them on follow_mount(), so that do_lookup()
798 * would DTRT. To be killed before 2.6.34-final.
799 */
800static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
801{
802 return inode && unlikely(inode->i_op->follow_link) &&
803 ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
804}
805
806/*
831 * Name resolution. 807 * Name resolution.
832 * This is the basic name resolution function, turning a pathname into 808 * This is the basic name resolution function, turning a pathname into
833 * the final dentry. We expect 'base' to be positive and a directory. 809 * the final dentry. We expect 'base' to be positive and a directory.
@@ -835,7 +811,7 @@ fail:
835 * Returns 0 and nd will have valid dentry and mnt on success. 811 * Returns 0 and nd will have valid dentry and mnt on success.
836 * Returns error and drops reference to input namei data on failure. 812 * Returns error and drops reference to input namei data on failure.
837 */ 813 */
838static int __link_path_walk(const char *name, struct nameidata *nd) 814static int link_path_walk(const char *name, struct nameidata *nd)
839{ 815{
840 struct path next; 816 struct path next;
841 struct inode *inode; 817 struct inode *inode;
@@ -858,7 +834,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
858 unsigned int c; 834 unsigned int c;
859 835
860 nd->flags |= LOOKUP_CONTINUE; 836 nd->flags |= LOOKUP_CONTINUE;
861 err = exec_permission_lite(inode); 837 err = exec_permission(inode);
862 if (err) 838 if (err)
863 break; 839 break;
864 840
@@ -898,16 +874,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
898 case 1: 874 case 1:
899 continue; 875 continue;
900 } 876 }
901 /*
902 * See if the low-level filesystem might want
903 * to use its own hash..
904 */
905 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
906 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
907 &this);
908 if (err < 0)
909 break;
910 }
911 /* This does the actual lookups.. */ 877 /* This does the actual lookups.. */
912 err = do_lookup(nd, &this, &next); 878 err = do_lookup(nd, &this, &next);
913 if (err) 879 if (err)
@@ -953,18 +919,11 @@ last_component:
953 case 1: 919 case 1:
954 goto return_reval; 920 goto return_reval;
955 } 921 }
956 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
957 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
958 &this);
959 if (err < 0)
960 break;
961 }
962 err = do_lookup(nd, &this, &next); 922 err = do_lookup(nd, &this, &next);
963 if (err) 923 if (err)
964 break; 924 break;
965 inode = next.dentry->d_inode; 925 inode = next.dentry->d_inode;
966 if ((lookup_flags & LOOKUP_FOLLOW) 926 if (follow_on_final(inode, lookup_flags)) {
967 && inode && inode->i_op->follow_link) {
968 err = do_follow_link(&next, nd); 927 err = do_follow_link(&next, nd);
969 if (err) 928 if (err)
970 goto return_err; 929 goto return_err;
@@ -1017,8 +976,27 @@ return_err:
1017 976
1018static int path_walk(const char *name, struct nameidata *nd) 977static int path_walk(const char *name, struct nameidata *nd)
1019{ 978{
979 struct path save = nd->path;
980 int result;
981
1020 current->total_link_count = 0; 982 current->total_link_count = 0;
1021 return link_path_walk(name, nd); 983
984 /* make sure the stuff we saved doesn't go away */
985 path_get(&save);
986
987 result = link_path_walk(name, nd);
988 if (result == -ESTALE) {
989 /* nd->path had been dropped */
990 current->total_link_count = 0;
991 nd->path = save;
992 path_get(&nd->path);
993 nd->flags |= LOOKUP_REVAL;
994 result = link_path_walk(name, nd);
995 }
996
997 path_put(&save);
998
999 return result;
1022} 1000}
1023 1001
1024static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1002static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1119,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1141 return retval; 1119 return retval;
1142} 1120}
1143 1121
1144/**
1145 * path_lookup_open - lookup a file path with open intent
1146 * @dfd: the directory to use as base, or AT_FDCWD
1147 * @name: pointer to file name
1148 * @lookup_flags: lookup intent flags
1149 * @nd: pointer to nameidata
1150 * @open_flags: open intent flags
1151 */
1152static int path_lookup_open(int dfd, const char *name,
1153 unsigned int lookup_flags, struct nameidata *nd, int open_flags)
1154{
1155 struct file *filp = get_empty_filp();
1156 int err;
1157
1158 if (filp == NULL)
1159 return -ENFILE;
1160 nd->intent.open.file = filp;
1161 nd->intent.open.flags = open_flags;
1162 nd->intent.open.create_mode = 0;
1163 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1164 if (IS_ERR(nd->intent.open.file)) {
1165 if (err == 0) {
1166 err = PTR_ERR(nd->intent.open.file);
1167 path_put(&nd->path);
1168 }
1169 } else if (err != 0)
1170 release_open_intent(nd);
1171 return err;
1172}
1173
1174static struct dentry *__lookup_hash(struct qstr *name, 1122static struct dentry *__lookup_hash(struct qstr *name,
1175 struct dentry *base, struct nameidata *nd) 1123 struct dentry *base, struct nameidata *nd)
1176{ 1124{
@@ -1191,7 +1139,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
1191 goto out; 1139 goto out;
1192 } 1140 }
1193 1141
1194 dentry = cached_lookup(base, name, nd); 1142 dentry = __d_lookup(base, name);
1143
1144 /* lockess __d_lookup may fail due to concurrent d_move()
1145 * in some unrelated directory, so try with d_lookup
1146 */
1147 if (!dentry)
1148 dentry = d_lookup(base, name);
1149
1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1151 dentry = do_revalidate(dentry, nd);
1152
1195 if (!dentry) { 1153 if (!dentry) {
1196 struct dentry *new; 1154 struct dentry *new;
1197 1155
@@ -1223,7 +1181,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1223{ 1181{
1224 int err; 1182 int err;
1225 1183
1226 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); 1184 err = exec_permission(nd->path.dentry->d_inode);
1227 if (err) 1185 if (err)
1228 return ERR_PTR(err); 1186 return ERR_PTR(err);
1229 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1187 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,29 +1231,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1273 if (err) 1231 if (err)
1274 return ERR_PTR(err); 1232 return ERR_PTR(err);
1275 1233
1276 err = inode_permission(base->d_inode, MAY_EXEC); 1234 err = exec_permission(base->d_inode);
1277 if (err)
1278 return ERR_PTR(err);
1279 return __lookup_hash(&this, base, NULL);
1280}
1281
1282/**
1283 * lookup_one_noperm - bad hack for sysfs
1284 * @name: pathname component to lookup
1285 * @base: base directory to lookup from
1286 *
1287 * This is a variant of lookup_one_len that doesn't perform any permission
1288 * checks. It's a horrible hack to work around the braindead sysfs
1289 * architecture and should not be used anywhere else.
1290 *
1291 * DON'T USE THIS FUNCTION EVER, thanks.
1292 */
1293struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
1294{
1295 int err;
1296 struct qstr this;
1297
1298 err = __lookup_one_len(name, &this, base, strlen(name));
1299 if (err) 1235 if (err)
1300 return ERR_PTR(err); 1236 return ERR_PTR(err);
1301 return __lookup_hash(&this, base, NULL); 1237 return __lookup_hash(&this, base, NULL);
@@ -1381,7 +1317,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1381 return -ENOENT; 1317 return -ENOENT;
1382 1318
1383 BUG_ON(victim->d_parent->d_inode != dir); 1319 BUG_ON(victim->d_parent->d_inode != dir);
1384 audit_inode_child(victim->d_name.name, victim, dir); 1320 audit_inode_child(victim, dir);
1385 1321
1386 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 1322 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1387 if (error) 1323 if (error)
@@ -1422,22 +1358,6 @@ static inline int may_create(struct inode *dir, struct dentry *child)
1422 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 1358 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1423} 1359}
1424 1360
1425/*
1426 * O_DIRECTORY translates into forcing a directory lookup.
1427 */
1428static inline int lookup_flags(unsigned int f)
1429{
1430 unsigned long retval = LOOKUP_FOLLOW;
1431
1432 if (f & O_NOFOLLOW)
1433 retval &= ~LOOKUP_FOLLOW;
1434
1435 if (f & O_DIRECTORY)
1436 retval |= LOOKUP_DIRECTORY;
1437
1438 return retval;
1439}
1440
1441/* 1361/*
1442 * p1 and p2 should be directories on the same fs. 1362 * p1 and p2 should be directories on the same fs.
1443 */ 1363 */
@@ -1495,7 +1415,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1495 error = security_inode_create(dir, dentry, mode); 1415 error = security_inode_create(dir, dentry, mode);
1496 if (error) 1416 if (error)
1497 return error; 1417 return error;
1498 vfs_dq_init(dir);
1499 error = dir->i_op->create(dir, dentry, mode, nd); 1418 error = dir->i_op->create(dir, dentry, mode, nd);
1500 if (!error) 1419 if (!error)
1501 fsnotify_create(dir, dentry); 1420 fsnotify_create(dir, dentry);
@@ -1533,69 +1452,45 @@ int may_open(struct path *path, int acc_mode, int flag)
1533 if (error) 1452 if (error)
1534 return error; 1453 return error;
1535 1454
1536 error = ima_path_check(path, acc_mode ?
1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1538 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1539 IMA_COUNT_UPDATE);
1540
1541 if (error)
1542 return error;
1543 /* 1455 /*
1544 * An append-only file must be opened in append mode for writing. 1456 * An append-only file must be opened in append mode for writing.
1545 */ 1457 */
1546 if (IS_APPEND(inode)) { 1458 if (IS_APPEND(inode)) {
1547 error = -EPERM; 1459 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
1548 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1460 return -EPERM;
1549 goto err_out;
1550 if (flag & O_TRUNC) 1461 if (flag & O_TRUNC)
1551 goto err_out; 1462 return -EPERM;
1552 } 1463 }
1553 1464
1554 /* O_NOATIME can only be set by the owner or superuser */ 1465 /* O_NOATIME can only be set by the owner or superuser */
1555 if (flag & O_NOATIME) 1466 if (flag & O_NOATIME && !is_owner_or_cap(inode))
1556 if (!is_owner_or_cap(inode)) { 1467 return -EPERM;
1557 error = -EPERM;
1558 goto err_out;
1559 }
1560 1468
1561 /* 1469 /*
1562 * Ensure there are no outstanding leases on the file. 1470 * Ensure there are no outstanding leases on the file.
1563 */ 1471 */
1564 error = break_lease(inode, flag); 1472 return break_lease(inode, flag);
1565 if (error) 1473}
1566 goto err_out;
1567
1568 if (flag & O_TRUNC) {
1569 error = get_write_access(inode);
1570 if (error)
1571 goto err_out;
1572
1573 /*
1574 * Refuse to truncate files with mandatory locks held on them.
1575 */
1576 error = locks_verify_locked(inode);
1577 if (!error)
1578 error = security_path_truncate(path, 0,
1579 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1580 if (!error) {
1581 vfs_dq_init(inode);
1582
1583 error = do_truncate(dentry, 0,
1584 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1585 NULL);
1586 }
1587 put_write_access(inode);
1588 if (error)
1589 goto err_out;
1590 } else
1591 if (flag & FMODE_WRITE)
1592 vfs_dq_init(inode);
1593 1474
1594 return 0; 1475static int handle_truncate(struct path *path)
1595err_out: 1476{
1596 ima_counts_put(path, acc_mode ? 1477 struct inode *inode = path->dentry->d_inode;
1597 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : 1478 int error = get_write_access(inode);
1598 ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); 1479 if (error)
1480 return error;
1481 /*
1482 * Refuse to truncate files with mandatory locks held on them.
1483 */
1484 error = locks_verify_locked(inode);
1485 if (!error)
1486 error = security_path_truncate(path, 0,
1487 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1488 if (!error) {
1489 error = do_truncate(path->dentry, 0,
1490 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1491 NULL);
1492 }
1493 put_write_access(inode);
1599 return error; 1494 return error;
1600} 1495}
1601 1496
@@ -1605,7 +1500,7 @@ err_out:
1605 * what get passed to sys_open(). 1500 * what get passed to sys_open().
1606 */ 1501 */
1607static int __open_namei_create(struct nameidata *nd, struct path *path, 1502static int __open_namei_create(struct nameidata *nd, struct path *path,
1608 int flag, int mode) 1503 int open_flag, int mode)
1609{ 1504{
1610 int error; 1505 int error;
1611 struct dentry *dir = nd->path.dentry; 1506 struct dentry *dir = nd->path.dentry;
@@ -1623,7 +1518,7 @@ out_unlock:
1623 if (error) 1518 if (error)
1624 return error; 1519 return error;
1625 /* Don't check for write permission, don't truncate */ 1520 /* Don't check for write permission, don't truncate */
1626 return may_open(&nd->path, 0, flag & ~O_TRUNC); 1521 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
1627} 1522}
1628 1523
1629/* 1524/*
@@ -1650,7 +1545,7 @@ static inline int open_to_namei_flags(int flag)
1650 return flag; 1545 return flag;
1651} 1546}
1652 1547
1653static int open_will_write_to_fs(int flag, struct inode *inode) 1548static int open_will_truncate(int flag, struct inode *inode)
1654{ 1549{
1655 /* 1550 /*
1656 * We'll never write to the fs underlying 1551 * We'll never write to the fs underlying
@@ -1661,100 +1556,133 @@ static int open_will_write_to_fs(int flag, struct inode *inode)
1661 return (flag & O_TRUNC); 1556 return (flag & O_TRUNC);
1662} 1557}
1663 1558
1664/* 1559static struct file *finish_open(struct nameidata *nd,
1665 * Note that the low bits of the passed in "open_flag" 1560 int open_flag, int acc_mode)
1666 * are not the same as in the local variable "flag". See
1667 * open_to_namei_flags() for more details.
1668 */
1669struct file *do_filp_open(int dfd, const char *pathname,
1670 int open_flag, int mode, int acc_mode)
1671{ 1561{
1672 struct file *filp; 1562 struct file *filp;
1673 struct nameidata nd; 1563 int will_truncate;
1674 int error; 1564 int error;
1675 struct path path;
1676 struct dentry *dir;
1677 int count = 0;
1678 int will_write;
1679 int flag = open_to_namei_flags(open_flag);
1680 1565
1681 if (!acc_mode) 1566 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
1682 acc_mode = MAY_OPEN | ACC_MODE(flag); 1567 if (will_truncate) {
1568 error = mnt_want_write(nd->path.mnt);
1569 if (error)
1570 goto exit;
1571 }
1572 error = may_open(&nd->path, acc_mode, open_flag);
1573 if (error) {
1574 if (will_truncate)
1575 mnt_drop_write(nd->path.mnt);
1576 goto exit;
1577 }
1578 filp = nameidata_to_filp(nd);
1579 if (!IS_ERR(filp)) {
1580 error = ima_file_check(filp, acc_mode);
1581 if (error) {
1582 fput(filp);
1583 filp = ERR_PTR(error);
1584 }
1585 }
1586 if (!IS_ERR(filp)) {
1587 if (will_truncate) {
1588 error = handle_truncate(&nd->path);
1589 if (error) {
1590 fput(filp);
1591 filp = ERR_PTR(error);
1592 }
1593 }
1594 }
1595 /*
1596 * It is now safe to drop the mnt write
1597 * because the filp has had a write taken
1598 * on its behalf.
1599 */
1600 if (will_truncate)
1601 mnt_drop_write(nd->path.mnt);
1602 return filp;
1683 1603
1684 /* O_TRUNC implies we need access checks for write permissions */ 1604exit:
1685 if (flag & O_TRUNC) 1605 if (!IS_ERR(nd->intent.open.file))
1686 acc_mode |= MAY_WRITE; 1606 release_open_intent(nd);
1607 path_put(&nd->path);
1608 return ERR_PTR(error);
1609}
1687 1610
1688 /* Allow the LSM permission hook to distinguish append 1611static struct file *do_last(struct nameidata *nd, struct path *path,
1689 access from general write access. */ 1612 int open_flag, int acc_mode,
1690 if (flag & O_APPEND) 1613 int mode, const char *pathname)
1691 acc_mode |= MAY_APPEND; 1614{
1615 struct dentry *dir = nd->path.dentry;
1616 struct file *filp;
1617 int error = -EISDIR;
1692 1618
1693 /* 1619 switch (nd->last_type) {
1694 * The simplest case - just a plain lookup. 1620 case LAST_DOTDOT:
1695 */ 1621 follow_dotdot(nd);
1696 if (!(flag & O_CREAT)) { 1622 dir = nd->path.dentry;
1697 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1623 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
1698 &nd, flag); 1624 if (!dir->d_op->d_revalidate(dir, nd)) {
1699 if (error) 1625 error = -ESTALE;
1700 return ERR_PTR(error); 1626 goto exit;
1627 }
1628 }
1629 /* fallthrough */
1630 case LAST_DOT:
1631 case LAST_ROOT:
1632 if (open_flag & O_CREAT)
1633 goto exit;
1634 /* fallthrough */
1635 case LAST_BIND:
1636 audit_inode(pathname, dir);
1701 goto ok; 1637 goto ok;
1702 } 1638 }
1703 1639
1704 /* 1640 /* trailing slashes? */
1705 * Create - we need to know the parent. 1641 if (nd->last.name[nd->last.len]) {
1706 */ 1642 if (open_flag & O_CREAT)
1707 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1643 goto exit;
1708 if (error) 1644 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1709 return ERR_PTR(error);
1710 error = path_walk(pathname, &nd);
1711 if (error) {
1712 if (nd.root.mnt)
1713 path_put(&nd.root);
1714 return ERR_PTR(error);
1715 } 1645 }
1716 if (unlikely(!audit_dummy_context()))
1717 audit_inode(pathname, nd.path.dentry);
1718 1646
1719 /* 1647 /* just plain open? */
1720 * We have the parent and last component. First of all, check 1648 if (!(open_flag & O_CREAT)) {
1721 * that we are not asked to creat(2) an obvious directory - that 1649 error = do_lookup(nd, &nd->last, path);
1722 * will not do. 1650 if (error)
1723 */ 1651 goto exit;
1724 error = -EISDIR; 1652 error = -ENOENT;
1725 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1653 if (!path->dentry->d_inode)
1726 goto exit_parent; 1654 goto exit_dput;
1655 if (path->dentry->d_inode->i_op->follow_link)
1656 return NULL;
1657 error = -ENOTDIR;
1658 if (nd->flags & LOOKUP_DIRECTORY) {
1659 if (!path->dentry->d_inode->i_op->lookup)
1660 goto exit_dput;
1661 }
1662 path_to_nameidata(path, nd);
1663 audit_inode(pathname, nd->path.dentry);
1664 goto ok;
1665 }
1727 1666
1728 error = -ENFILE; 1667 /* OK, it's O_CREAT */
1729 filp = get_empty_filp();
1730 if (filp == NULL)
1731 goto exit_parent;
1732 nd.intent.open.file = filp;
1733 nd.intent.open.flags = flag;
1734 nd.intent.open.create_mode = mode;
1735 dir = nd.path.dentry;
1736 nd.flags &= ~LOOKUP_PARENT;
1737 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1738 if (flag & O_EXCL)
1739 nd.flags |= LOOKUP_EXCL;
1740 mutex_lock(&dir->d_inode->i_mutex); 1668 mutex_lock(&dir->d_inode->i_mutex);
1741 path.dentry = lookup_hash(&nd);
1742 path.mnt = nd.path.mnt;
1743 1669
1744do_last: 1670 path->dentry = lookup_hash(nd);
1745 error = PTR_ERR(path.dentry); 1671 path->mnt = nd->path.mnt;
1746 if (IS_ERR(path.dentry)) { 1672
1673 error = PTR_ERR(path->dentry);
1674 if (IS_ERR(path->dentry)) {
1747 mutex_unlock(&dir->d_inode->i_mutex); 1675 mutex_unlock(&dir->d_inode->i_mutex);
1748 goto exit; 1676 goto exit;
1749 } 1677 }
1750 1678
1751 if (IS_ERR(nd.intent.open.file)) { 1679 if (IS_ERR(nd->intent.open.file)) {
1752 error = PTR_ERR(nd.intent.open.file); 1680 error = PTR_ERR(nd->intent.open.file);
1753 goto exit_mutex_unlock; 1681 goto exit_mutex_unlock;
1754 } 1682 }
1755 1683
1756 /* Negative dentry, just create the file */ 1684 /* Negative dentry, just create the file */
1757 if (!path.dentry->d_inode) { 1685 if (!path->dentry->d_inode) {
1758 /* 1686 /*
1759 * This write is needed to ensure that a 1687 * This write is needed to ensure that a
1760 * ro->rw transition does not occur between 1688 * ro->rw transition does not occur between
@@ -1762,22 +1690,23 @@ do_last:
1762 * a permanent write count is taken through 1690 * a permanent write count is taken through
1763 * the 'struct file' in nameidata_to_filp(). 1691 * the 'struct file' in nameidata_to_filp().
1764 */ 1692 */
1765 error = mnt_want_write(nd.path.mnt); 1693 error = mnt_want_write(nd->path.mnt);
1766 if (error) 1694 if (error)
1767 goto exit_mutex_unlock; 1695 goto exit_mutex_unlock;
1768 error = __open_namei_create(&nd, &path, flag, mode); 1696 error = __open_namei_create(nd, path, open_flag, mode);
1769 if (error) { 1697 if (error) {
1770 mnt_drop_write(nd.path.mnt); 1698 mnt_drop_write(nd->path.mnt);
1771 goto exit; 1699 goto exit;
1772 } 1700 }
1773 filp = nameidata_to_filp(&nd, open_flag); 1701 filp = nameidata_to_filp(nd);
1774 if (IS_ERR(filp)) 1702 mnt_drop_write(nd->path.mnt);
1775 ima_counts_put(&nd.path, 1703 if (!IS_ERR(filp)) {
1776 acc_mode & (MAY_READ | MAY_WRITE | 1704 error = ima_file_check(filp, acc_mode);
1777 MAY_EXEC)); 1705 if (error) {
1778 mnt_drop_write(nd.path.mnt); 1706 fput(filp);
1779 if (nd.root.mnt) 1707 filp = ERR_PTR(error);
1780 path_put(&nd.root); 1708 }
1709 }
1781 return filp; 1710 return filp;
1782 } 1711 }
1783 1712
@@ -1785,129 +1714,184 @@ do_last:
1785 * It already exists. 1714 * It already exists.
1786 */ 1715 */
1787 mutex_unlock(&dir->d_inode->i_mutex); 1716 mutex_unlock(&dir->d_inode->i_mutex);
1788 audit_inode(pathname, path.dentry); 1717 audit_inode(pathname, path->dentry);
1789 1718
1790 error = -EEXIST; 1719 error = -EEXIST;
1791 if (flag & O_EXCL) 1720 if (open_flag & O_EXCL)
1792 goto exit_dput; 1721 goto exit_dput;
1793 1722
1794 if (__follow_mount(&path)) { 1723 if (__follow_mount(path)) {
1795 error = -ELOOP; 1724 error = -ELOOP;
1796 if (flag & O_NOFOLLOW) 1725 if (open_flag & O_NOFOLLOW)
1797 goto exit_dput; 1726 goto exit_dput;
1798 } 1727 }
1799 1728
1800 error = -ENOENT; 1729 error = -ENOENT;
1801 if (!path.dentry->d_inode) 1730 if (!path->dentry->d_inode)
1802 goto exit_dput; 1731 goto exit_dput;
1803 if (path.dentry->d_inode->i_op->follow_link)
1804 goto do_link;
1805 1732
1806 path_to_nameidata(&path, &nd); 1733 if (path->dentry->d_inode->i_op->follow_link)
1734 return NULL;
1735
1736 path_to_nameidata(path, nd);
1807 error = -EISDIR; 1737 error = -EISDIR;
1808 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1738 if (S_ISDIR(path->dentry->d_inode->i_mode))
1809 goto exit; 1739 goto exit;
1810ok: 1740ok:
1811 /* 1741 filp = finish_open(nd, open_flag, acc_mode);
1812 * Consider:
1813 * 1. may_open() truncates a file
1814 * 2. a rw->ro mount transition occurs
1815 * 3. nameidata_to_filp() fails due to
1816 * the ro mount.
1817 * That would be inconsistent, and should
1818 * be avoided. Taking this mnt write here
1819 * ensures that (2) can not occur.
1820 */
1821 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
1822 if (will_write) {
1823 error = mnt_want_write(nd.path.mnt);
1824 if (error)
1825 goto exit;
1826 }
1827 error = may_open(&nd.path, acc_mode, flag);
1828 if (error) {
1829 if (will_write)
1830 mnt_drop_write(nd.path.mnt);
1831 goto exit;
1832 }
1833 filp = nameidata_to_filp(&nd, open_flag);
1834 if (IS_ERR(filp))
1835 ima_counts_put(&nd.path,
1836 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
1837 /*
1838 * It is now safe to drop the mnt write
1839 * because the filp has had a write taken
1840 * on its behalf.
1841 */
1842 if (will_write)
1843 mnt_drop_write(nd.path.mnt);
1844 if (nd.root.mnt)
1845 path_put(&nd.root);
1846 return filp; 1742 return filp;
1847 1743
1848exit_mutex_unlock: 1744exit_mutex_unlock:
1849 mutex_unlock(&dir->d_inode->i_mutex); 1745 mutex_unlock(&dir->d_inode->i_mutex);
1850exit_dput: 1746exit_dput:
1851 path_put_conditional(&path, &nd); 1747 path_put_conditional(path, nd);
1852exit: 1748exit:
1853 if (!IS_ERR(nd.intent.open.file)) 1749 if (!IS_ERR(nd->intent.open.file))
1854 release_open_intent(&nd); 1750 release_open_intent(nd);
1855exit_parent: 1751 path_put(&nd->path);
1856 if (nd.root.mnt)
1857 path_put(&nd.root);
1858 path_put(&nd.path);
1859 return ERR_PTR(error); 1752 return ERR_PTR(error);
1753}
1754
1755/*
1756 * Note that the low bits of the passed in "open_flag"
1757 * are not the same as in the local variable "flag". See
1758 * open_to_namei_flags() for more details.
1759 */
1760struct file *do_filp_open(int dfd, const char *pathname,
1761 int open_flag, int mode, int acc_mode)
1762{
1763 struct file *filp;
1764 struct nameidata nd;
1765 int error;
1766 struct path path;
1767 int count = 0;
1768 int flag = open_to_namei_flags(open_flag);
1769 int force_reval = 0;
1770
1771 if (!(open_flag & O_CREAT))
1772 mode = 0;
1860 1773
1861do_link:
1862 error = -ELOOP;
1863 if (flag & O_NOFOLLOW)
1864 goto exit_dput;
1865 /* 1774 /*
1866 * This is subtle. Instead of calling do_follow_link() we do the 1775 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1867 * thing by hands. The reason is that this way we have zero link_count 1776 * check for O_DSYNC if the need any syncing at all we enforce it's
1868 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1777 * always set instead of having to deal with possibly weird behaviour
1869 * After that we have the parent and last component, i.e. 1778 * for malicious applications setting only __O_SYNC.
1870 * we are in the same situation as after the first path_walk().
1871 * Well, almost - if the last component is normal we get its copy
1872 * stored in nd->last.name and we will have to putname() it when we
1873 * are done. Procfs-like symlinks just set LAST_BIND.
1874 */ 1779 */
1875 nd.flags |= LOOKUP_PARENT; 1780 if (open_flag & __O_SYNC)
1876 error = security_inode_follow_link(path.dentry, &nd); 1781 open_flag |= O_DSYNC;
1782
1783 if (!acc_mode)
1784 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1785
1786 /* O_TRUNC implies we need access checks for write permissions */
1787 if (open_flag & O_TRUNC)
1788 acc_mode |= MAY_WRITE;
1789
1790 /* Allow the LSM permission hook to distinguish append
1791 access from general write access. */
1792 if (open_flag & O_APPEND)
1793 acc_mode |= MAY_APPEND;
1794
1795 /* find the parent */
1796reval:
1797 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1877 if (error) 1798 if (error)
1878 goto exit_dput;
1879 error = __do_follow_link(&path, &nd);
1880 if (error) {
1881 /* Does someone understand code flow here? Or it is only
1882 * me so stupid? Anathema to whoever designed this non-sense
1883 * with "intent.open".
1884 */
1885 release_open_intent(&nd);
1886 if (nd.root.mnt)
1887 path_put(&nd.root);
1888 return ERR_PTR(error); 1799 return ERR_PTR(error);
1800 if (force_reval)
1801 nd.flags |= LOOKUP_REVAL;
1802
1803 current->total_link_count = 0;
1804 error = link_path_walk(pathname, &nd);
1805 if (error) {
1806 filp = ERR_PTR(error);
1807 goto out;
1889 } 1808 }
1809 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
1810 audit_inode(pathname, nd.path.dentry);
1811
1812 /*
1813 * We have the parent and last component.
1814 */
1815
1816 error = -ENFILE;
1817 filp = get_empty_filp();
1818 if (filp == NULL)
1819 goto exit_parent;
1820 nd.intent.open.file = filp;
1821 filp->f_flags = open_flag;
1822 nd.intent.open.flags = flag;
1823 nd.intent.open.create_mode = mode;
1890 nd.flags &= ~LOOKUP_PARENT; 1824 nd.flags &= ~LOOKUP_PARENT;
1891 if (nd.last_type == LAST_BIND) 1825 nd.flags |= LOOKUP_OPEN;
1892 goto ok; 1826 if (open_flag & O_CREAT) {
1893 error = -EISDIR; 1827 nd.flags |= LOOKUP_CREATE;
1894 if (nd.last_type != LAST_NORM) 1828 if (open_flag & O_EXCL)
1895 goto exit; 1829 nd.flags |= LOOKUP_EXCL;
1896 if (nd.last.name[nd.last.len]) {
1897 __putname(nd.last.name);
1898 goto exit;
1899 } 1830 }
1900 error = -ELOOP; 1831 if (open_flag & O_DIRECTORY)
1901 if (count++==32) { 1832 nd.flags |= LOOKUP_DIRECTORY;
1902 __putname(nd.last.name); 1833 if (!(open_flag & O_NOFOLLOW))
1903 goto exit; 1834 nd.flags |= LOOKUP_FOLLOW;
1835 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1836 while (unlikely(!filp)) { /* trailing symlink */
1837 struct path holder;
1838 struct inode *inode = path.dentry->d_inode;
1839 void *cookie;
1840 error = -ELOOP;
1841 /* S_ISDIR part is a temporary automount kludge */
1842 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
1843 goto exit_dput;
1844 if (count++ == 32)
1845 goto exit_dput;
1846 /*
1847 * This is subtle. Instead of calling do_follow_link() we do
1848 * the thing by hands. The reason is that this way we have zero
1849 * link_count and path_walk() (called from ->follow_link)
1850 * honoring LOOKUP_PARENT. After that we have the parent and
1851 * last component, i.e. we are in the same situation as after
1852 * the first path_walk(). Well, almost - if the last component
1853 * is normal we get its copy stored in nd->last.name and we will
1854 * have to putname() it when we are done. Procfs-like symlinks
1855 * just set LAST_BIND.
1856 */
1857 nd.flags |= LOOKUP_PARENT;
1858 error = security_inode_follow_link(path.dentry, &nd);
1859 if (error)
1860 goto exit_dput;
1861 error = __do_follow_link(&path, &nd, &cookie);
1862 if (unlikely(error)) {
1863 /* nd.path had been dropped */
1864 if (!IS_ERR(cookie) && inode->i_op->put_link)
1865 inode->i_op->put_link(path.dentry, &nd, cookie);
1866 path_put(&path);
1867 release_open_intent(&nd);
1868 filp = ERR_PTR(error);
1869 goto out;
1870 }
1871 holder = path;
1872 nd.flags &= ~LOOKUP_PARENT;
1873 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1874 if (inode->i_op->put_link)
1875 inode->i_op->put_link(holder.dentry, &nd, cookie);
1876 path_put(&holder);
1904 } 1877 }
1905 dir = nd.path.dentry; 1878out:
1906 mutex_lock(&dir->d_inode->i_mutex); 1879 if (nd.root.mnt)
1907 path.dentry = lookup_hash(&nd); 1880 path_put(&nd.root);
1908 path.mnt = nd.path.mnt; 1881 if (filp == ERR_PTR(-ESTALE) && !force_reval) {
1909 __putname(nd.last.name); 1882 force_reval = 1;
1910 goto do_last; 1883 goto reval;
1884 }
1885 return filp;
1886
1887exit_dput:
1888 path_put_conditional(&path, &nd);
1889 if (!IS_ERR(nd.intent.open.file))
1890 release_open_intent(&nd);
1891exit_parent:
1892 path_put(&nd.path);
1893 filp = ERR_PTR(error);
1894 goto out;
1911} 1895}
1912 1896
1913/** 1897/**
@@ -2001,7 +1985,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2001 if (error) 1985 if (error)
2002 return error; 1986 return error;
2003 1987
2004 vfs_dq_init(dir);
2005 error = dir->i_op->mknod(dir, dentry, mode, dev); 1988 error = dir->i_op->mknod(dir, dentry, mode, dev);
2006 if (!error) 1989 if (!error)
2007 fsnotify_create(dir, dentry); 1990 fsnotify_create(dir, dentry);
@@ -2100,7 +2083,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2100 if (error) 2083 if (error)
2101 return error; 2084 return error;
2102 2085
2103 vfs_dq_init(dir);
2104 error = dir->i_op->mkdir(dir, dentry, mode); 2086 error = dir->i_op->mkdir(dir, dentry, mode);
2105 if (!error) 2087 if (!error)
2106 fsnotify_mkdir(dir, dentry); 2088 fsnotify_mkdir(dir, dentry);
@@ -2186,8 +2168,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2186 if (!dir->i_op->rmdir) 2168 if (!dir->i_op->rmdir)
2187 return -EPERM; 2169 return -EPERM;
2188 2170
2189 vfs_dq_init(dir);
2190
2191 mutex_lock(&dentry->d_inode->i_mutex); 2171 mutex_lock(&dentry->d_inode->i_mutex);
2192 dentry_unhash(dentry); 2172 dentry_unhash(dentry);
2193 if (d_mountpoint(dentry)) 2173 if (d_mountpoint(dentry))
@@ -2196,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2196 error = security_inode_rmdir(dir, dentry); 2176 error = security_inode_rmdir(dir, dentry);
2197 if (!error) { 2177 if (!error) {
2198 error = dir->i_op->rmdir(dir, dentry); 2178 error = dir->i_op->rmdir(dir, dentry);
2199 if (!error) 2179 if (!error) {
2200 dentry->d_inode->i_flags |= S_DEAD; 2180 dentry->d_inode->i_flags |= S_DEAD;
2181 dont_mount(dentry);
2182 }
2201 } 2183 }
2202 } 2184 }
2203 mutex_unlock(&dentry->d_inode->i_mutex); 2185 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2273,15 +2255,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2273 if (!dir->i_op->unlink) 2255 if (!dir->i_op->unlink)
2274 return -EPERM; 2256 return -EPERM;
2275 2257
2276 vfs_dq_init(dir);
2277
2278 mutex_lock(&dentry->d_inode->i_mutex); 2258 mutex_lock(&dentry->d_inode->i_mutex);
2279 if (d_mountpoint(dentry)) 2259 if (d_mountpoint(dentry))
2280 error = -EBUSY; 2260 error = -EBUSY;
2281 else { 2261 else {
2282 error = security_inode_unlink(dir, dentry); 2262 error = security_inode_unlink(dir, dentry);
2283 if (!error) 2263 if (!error) {
2284 error = dir->i_op->unlink(dir, dentry); 2264 error = dir->i_op->unlink(dir, dentry);
2265 if (!error)
2266 dont_mount(dentry);
2267 }
2285 } 2268 }
2286 mutex_unlock(&dentry->d_inode->i_mutex); 2269 mutex_unlock(&dentry->d_inode->i_mutex);
2287 2270
@@ -2384,7 +2367,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2384 if (error) 2367 if (error)
2385 return error; 2368 return error;
2386 2369
2387 vfs_dq_init(dir);
2388 error = dir->i_op->symlink(dir, dentry, oldname); 2370 error = dir->i_op->symlink(dir, dentry, oldname);
2389 if (!error) 2371 if (!error)
2390 fsnotify_create(dir, dentry); 2372 fsnotify_create(dir, dentry);
@@ -2468,7 +2450,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2468 return error; 2450 return error;
2469 2451
2470 mutex_lock(&inode->i_mutex); 2452 mutex_lock(&inode->i_mutex);
2471 vfs_dq_init(dir);
2472 error = dir->i_op->link(old_dentry, dir, new_dentry); 2453 error = dir->i_op->link(old_dentry, dir, new_dentry);
2473 mutex_unlock(&inode->i_mutex); 2454 mutex_unlock(&inode->i_mutex);
2474 if (!error) 2455 if (!error)
@@ -2569,7 +2550,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
2569 * e) conversion from fhandle to dentry may come in the wrong moment - when 2550 * e) conversion from fhandle to dentry may come in the wrong moment - when
2570 * we are removing the target. Solution: we will have to grab ->i_mutex 2551 * we are removing the target. Solution: we will have to grab ->i_mutex
2571 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2552 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2572 * ->i_mutex on parents, which works but leads to some truely excessive 2553 * ->i_mutex on parents, which works but leads to some truly excessive
2573 * locking]. 2554 * locking].
2574 */ 2555 */
2575static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2556static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -2593,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2593 return error; 2574 return error;
2594 2575
2595 target = new_dentry->d_inode; 2576 target = new_dentry->d_inode;
2596 if (target) { 2577 if (target)
2597 mutex_lock(&target->i_mutex); 2578 mutex_lock(&target->i_mutex);
2598 dentry_unhash(new_dentry);
2599 }
2600 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2579 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2601 error = -EBUSY; 2580 error = -EBUSY;
2602 else 2581 else {
2582 if (target)
2583 dentry_unhash(new_dentry);
2603 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2584 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2585 }
2604 if (target) { 2586 if (target) {
2605 if (!error) 2587 if (!error) {
2606 target->i_flags |= S_DEAD; 2588 target->i_flags |= S_DEAD;
2589 dont_mount(new_dentry);
2590 }
2607 mutex_unlock(&target->i_mutex); 2591 mutex_unlock(&target->i_mutex);
2608 if (d_unhashed(new_dentry)) 2592 if (d_unhashed(new_dentry))
2609 d_rehash(new_dentry); 2593 d_rehash(new_dentry);
@@ -2634,6 +2618,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2634 else 2618 else
2635 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2619 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2636 if (!error) { 2620 if (!error) {
2621 if (target)
2622 dont_mount(new_dentry);
2637 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2623 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2638 d_move(old_dentry, new_dentry); 2624 d_move(old_dentry, new_dentry);
2639 } 2625 }
@@ -2667,20 +2653,15 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2667 if (!old_dir->i_op->rename) 2653 if (!old_dir->i_op->rename)
2668 return -EPERM; 2654 return -EPERM;
2669 2655
2670 vfs_dq_init(old_dir);
2671 vfs_dq_init(new_dir);
2672
2673 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2656 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2674 2657
2675 if (is_dir) 2658 if (is_dir)
2676 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2659 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2677 else 2660 else
2678 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2661 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2679 if (!error) { 2662 if (!error)
2680 const char *new_name = old_dentry->d_name.name; 2663 fsnotify_move(old_dir, new_dir, old_name, is_dir,
2681 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2682 new_dentry->d_inode, old_dentry); 2664 new_dentry->d_inode, old_dentry);
2683 }
2684 fsnotify_oldname_free(old_name); 2665 fsnotify_oldname_free(old_name);
2685 2666
2686 return error; 2667 return error;