aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c1557
1 files changed, 699 insertions, 858 deletions
diff --git a/fs/namei.c b/fs/namei.c
index a4689eb2df28..3cb616d38d9c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -176,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
176 183
177 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
178 185
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
188
179 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
180 mode >>= 6; 190 mode >>= 6;
181 else { 191 else {
@@ -189,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 mode >>= 3; 199 mode >>= 3;
190 } 200 }
191 201
202other_perms:
192 /* 203 /*
193 * If the DACs are ok we don't need any capability check. 204 * If the DACs are ok we don't need any capability check.
194 */ 205 */
@@ -230,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
230 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable if at least one exec bit is set.
231 */ 242 */
232 if (!(mask & MAY_EXEC) || execute_ok(inode)) 243 if (!(mask & MAY_EXEC) || execute_ok(inode))
233 if (capable(CAP_DAC_OVERRIDE)) 244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
234 return 0; 245 return 0;
235 246
236 /* 247 /*
@@ -238,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
238 */ 249 */
239 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
240 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
241 if (capable(CAP_DAC_READ_SEARCH)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
242 return 0; 253 return 0;
243 254
244 return -EACCES; 255 return -EACCES;
@@ -401,9 +412,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 412{
402 struct fs_struct *fs = current->fs; 413 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 414 struct dentry *dentry = nd->path.dentry;
415 int want_root = 0;
404 416
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 417 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 418 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
419 want_root = 1;
407 spin_lock(&fs->lock); 420 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 421 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 422 nd->root.dentry != fs->root.dentry)
@@ -414,7 +427,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 427 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 428 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 429 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 430 if (want_root) {
418 path_get(&nd->root); 431 path_get(&nd->root);
419 spin_unlock(&fs->lock); 432 spin_unlock(&fs->lock);
420 } 433 }
@@ -427,7 +440,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 440err:
428 spin_unlock(&dentry->d_lock); 441 spin_unlock(&dentry->d_lock);
429err_root: 442err_root:
430 if (nd->root.mnt) 443 if (want_root)
431 spin_unlock(&fs->lock); 444 spin_unlock(&fs->lock);
432 return -ECHILD; 445 return -ECHILD;
433} 446}
@@ -454,9 +467,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 467{
455 struct fs_struct *fs = current->fs; 468 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 469 struct dentry *parent = nd->path.dentry;
470 int want_root = 0;
457 471
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 472 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 473 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
474 want_root = 1;
460 spin_lock(&fs->lock); 475 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 476 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 477 nd->root.dentry != fs->root.dentry)
@@ -476,7 +491,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 491 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 492 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 493 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 494 if (want_root) {
480 path_get(&nd->root); 495 path_get(&nd->root);
481 spin_unlock(&fs->lock); 496 spin_unlock(&fs->lock);
482 } 497 }
@@ -490,7 +505,7 @@ err:
490 spin_unlock(&dentry->d_lock); 505 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 506 spin_unlock(&parent->d_lock);
492err_root: 507err_root:
493 if (nd->root.mnt) 508 if (want_root)
494 spin_unlock(&fs->lock); 509 spin_unlock(&fs->lock);
495 return -ECHILD; 510 return -ECHILD;
496} 511}
@@ -498,8 +513,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 513/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 514static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 515{
501 if (nd->flags & LOOKUP_RCU) 516 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 517 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
518 nd->flags &= ~LOOKUP_RCU;
519 if (!(nd->flags & LOOKUP_ROOT))
520 nd->root.mnt = NULL;
521 rcu_read_unlock();
522 br_read_unlock(vfsmount_lock);
523 return -ECHILD;
524 }
525 }
503 return 0; 526 return 0;
504} 527}
505 528
@@ -518,7 +541,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 541
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 542 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 543 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 544 if (!(nd->flags & LOOKUP_ROOT))
545 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 546 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 547 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 548 goto err_unlock;
@@ -539,14 +563,6 @@ err_unlock:
539 return -ECHILD; 563 return -ECHILD;
540} 564}
541 565
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 566/**
551 * release_open_intent - free up open intent resources 567 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 568 * @nd: pointer to nameidata
@@ -590,42 +606,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 606 return dentry;
591} 607}
592 608
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 609/*
628 * force_reval_path - force revalidation of a dentry 610 * handle_reval_path - force revalidation of a dentry
629 * 611 *
630 * In some situations the path walking code will trust dentries without 612 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 613 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +621,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 621 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 622 * to the path if necessary.
641 */ 623 */
642static int 624static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 625{
626 struct dentry *dentry = nd->path.dentry;
645 int status; 627 int status;
646 struct dentry *dentry = path->dentry;
647 628
648 /* 629 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 630 return 0;
650 * become stale. 631
651 */ 632 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 633 return 0;
634
635 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 636 return 0;
654 637
638 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 639 status = d_revalidate(dentry, nd);
656 if (status > 0) 640 if (status > 0)
657 return 0; 641 return 0;
658 642
659 if (!status) { 643 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 644 status = -ESTALE;
662 } 645
663 return status; 646 return status;
664} 647}
665 648
@@ -675,6 +658,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
675static inline int exec_permission(struct inode *inode, unsigned int flags) 658static inline int exec_permission(struct inode *inode, unsigned int flags)
676{ 659{
677 int ret; 660 int ret;
661 struct user_namespace *ns = inode_userns(inode);
678 662
679 if (inode->i_op->permission) { 663 if (inode->i_op->permission) {
680 ret = inode->i_op->permission(inode, MAY_EXEC, flags); 664 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
@@ -687,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags)
687 if (ret == -ECHILD) 671 if (ret == -ECHILD)
688 return ret; 672 return ret;
689 673
690 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 674 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
675 ns_capable(ns, CAP_DAC_READ_SEARCH))
691 goto ok; 676 goto ok;
692 677
693 return ret; 678 return ret;
@@ -728,6 +713,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 713 path_put(&nd->path);
729 nd->path = nd->root; 714 nd->path = nd->root;
730 path_get(&nd->root); 715 path_get(&nd->root);
716 nd->flags |= LOOKUP_JUMPED;
731 } 717 }
732 nd->inode = nd->path.dentry->d_inode; 718 nd->inode = nd->path.dentry->d_inode;
733 719
@@ -757,19 +743,42 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 743 nd->path.dentry = path->dentry;
758} 744}
759 745
746static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
747{
748 struct inode *inode = link->dentry->d_inode;
749 if (!IS_ERR(cookie) && inode->i_op->put_link)
750 inode->i_op->put_link(link->dentry, nd, cookie);
751 path_put(link);
752}
753
760static __always_inline int 754static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 755follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 756{
763 int error; 757 int error;
764 struct dentry *dentry = link->dentry; 758 struct dentry *dentry = link->dentry;
765 759
766 BUG_ON(nd->flags & LOOKUP_RCU); 760 BUG_ON(nd->flags & LOOKUP_RCU);
767 761
762 if (link->mnt == nd->path.mnt)
763 mntget(link->mnt);
764
765 if (unlikely(current->total_link_count >= 40)) {
766 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
767 path_put(&nd->path);
768 return -ELOOP;
769 }
770 cond_resched();
771 current->total_link_count++;
772
768 touch_atime(link->mnt, dentry); 773 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 774 nd_set_link(nd, NULL);
770 775
771 if (link->mnt == nd->path.mnt) 776 error = security_inode_follow_link(link->dentry, nd);
772 mntget(link->mnt); 777 if (error) {
778 *p = ERR_PTR(error); /* no ->put_link(), please */
779 path_put(&nd->path);
780 return error;
781 }
773 782
774 nd->last_type = LAST_BIND; 783 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 784 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,56 +789,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 789 if (s)
781 error = __vfs_follow_link(nd, s); 790 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 791 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 792 nd->flags |= LOOKUP_JUMPED;
784 if (error) 793 nd->inode = nd->path.dentry->d_inode;
794 if (nd->inode->i_op->follow_link) {
795 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 796 path_put(&nd->path);
797 error = -ELOOP;
798 }
786 } 799 }
787 } 800 }
788 return error; 801 return error;
789} 802}
790 803
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 804static int follow_up_rcu(struct path *path)
834{ 805{
835 struct vfsmount *parent; 806 struct vfsmount *parent;
@@ -968,8 +939,7 @@ static int follow_managed(struct path *path, unsigned flags)
968 if (managed & DCACHE_MANAGE_TRANSIT) { 939 if (managed & DCACHE_MANAGE_TRANSIT) {
969 BUG_ON(!path->dentry->d_op); 940 BUG_ON(!path->dentry->d_op);
970 BUG_ON(!path->dentry->d_op->d_manage); 941 BUG_ON(!path->dentry->d_op->d_manage);
971 ret = path->dentry->d_op->d_manage(path->dentry, 942 ret = path->dentry->d_op->d_manage(path->dentry, false);
972 false, false);
973 if (ret < 0) 943 if (ret < 0)
974 return ret == -EISDIR ? 0 : ret; 944 return ret == -EISDIR ? 0 : ret;
975 } 945 }
@@ -1022,6 +992,12 @@ int follow_down_one(struct path *path)
1022 return 0; 992 return 0;
1023} 993}
1024 994
995static inline bool managed_dentry_might_block(struct dentry *dentry)
996{
997 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
998 dentry->d_op->d_manage(dentry, true) < 0);
999}
1000
1025/* 1001/*
1026 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 1002 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
1027 * meet a managed dentry and we're not walking to "..". True is returned to 1003 * meet a managed dentry and we're not walking to "..". True is returned to
@@ -1030,19 +1006,26 @@ int follow_down_one(struct path *path)
1030static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1006static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1031 struct inode **inode, bool reverse_transit) 1007 struct inode **inode, bool reverse_transit)
1032{ 1008{
1033 while (d_mountpoint(path->dentry)) { 1009 for (;;) {
1034 struct vfsmount *mounted; 1010 struct vfsmount *mounted;
1035 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 1011 /*
1036 !reverse_transit && 1012 * Don't forget we might have a non-mountpoint managed dentry
1037 path->dentry->d_op->d_manage(path->dentry, false, true) < 0) 1013 * that wants to block transit.
1014 */
1015 *inode = path->dentry->d_inode;
1016 if (!reverse_transit &&
1017 unlikely(managed_dentry_might_block(path->dentry)))
1038 return false; 1018 return false;
1019
1020 if (!d_mountpoint(path->dentry))
1021 break;
1022
1039 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1023 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
1040 if (!mounted) 1024 if (!mounted)
1041 break; 1025 break;
1042 path->mnt = mounted; 1026 path->mnt = mounted;
1043 path->dentry = mounted->mnt_root; 1027 path->dentry = mounted->mnt_root;
1044 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1028 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
1045 *inode = path->dentry->d_inode;
1046 } 1029 }
1047 1030
1048 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 1031 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
@@ -1068,7 +1051,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1051
1069 seq = read_seqcount_begin(&parent->d_seq); 1052 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1053 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1054 goto failed;
1072 inode = parent->d_inode; 1055 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1056 nd->path.dentry = parent;
1074 nd->seq = seq; 1057 nd->seq = seq;
@@ -1081,8 +1064,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1064 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1065 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1066 nd->inode = inode;
1084
1085 return 0; 1067 return 0;
1068
1069failed:
1070 nd->flags &= ~LOOKUP_RCU;
1071 if (!(nd->flags & LOOKUP_ROOT))
1072 nd->root.mnt = NULL;
1073 rcu_read_unlock();
1074 br_read_unlock(vfsmount_lock);
1075 return -ECHILD;
1086} 1076}
1087 1077
1088/* 1078/*
@@ -1093,7 +1083,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1093 * Care must be taken as namespace_sem may be held (indicated by mounting_here 1083 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1094 * being true). 1084 * being true).
1095 */ 1085 */
1096int follow_down(struct path *path, bool mounting_here) 1086int follow_down(struct path *path)
1097{ 1087{
1098 unsigned managed; 1088 unsigned managed;
1099 int ret; 1089 int ret;
@@ -1114,7 +1104,7 @@ int follow_down(struct path *path, bool mounting_here)
1114 BUG_ON(!path->dentry->d_op); 1104 BUG_ON(!path->dentry->d_op);
1115 BUG_ON(!path->dentry->d_op->d_manage); 1105 BUG_ON(!path->dentry->d_op->d_manage);
1116 ret = path->dentry->d_op->d_manage( 1106 ret = path->dentry->d_op->d_manage(
1117 path->dentry, mounting_here, false); 1107 path->dentry, false);
1118 if (ret < 0) 1108 if (ret < 0)
1119 return ret == -EISDIR ? 0 : ret; 1109 return ret == -EISDIR ? 0 : ret;
1120 } 1110 }
@@ -1216,68 +1206,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1206{
1217 struct vfsmount *mnt = nd->path.mnt; 1207 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1208 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1209 int need_reval = 1;
1210 int status = 1;
1220 int err; 1211 int err;
1221 1212
1222 /* 1213 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1214 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1215 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1216 * do the non-racy lookup, below.
1236 */ 1217 */
1237 if (nd->flags & LOOKUP_RCU) { 1218 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1219 unsigned seq;
1239
1240 *inode = nd->inode; 1220 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1221 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1222 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1223 goto unlazy;
1244 return -ECHILD; 1224
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1225 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1226 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1227 return -ECHILD;
1250
1251 nd->seq = seq; 1228 nd->seq = seq;
1229
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1230 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1231 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1232 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1233 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1234 need_reval = 0;
1257 goto fail; 1235 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1236 }
1259 goto done;
1260 } 1237 }
1261 path->mnt = mnt; 1238 path->mnt = mnt;
1262 path->dentry = dentry; 1239 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1240 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1241 return 0;
1265 if (nameidata_drop_rcu(nd)) 1242unlazy:
1266 return -ECHILD; 1243 if (dentry) {
1267 /* fallthru */ 1244 if (nameidata_dentry_drop_rcu(nd, dentry))
1245 return -ECHILD;
1246 } else {
1247 if (nameidata_drop_rcu(nd))
1248 return -ECHILD;
1249 }
1250 } else {
1251 dentry = __d_lookup(parent, name);
1268 } 1252 }
1269 dentry = __d_lookup(parent, name); 1253
1270 if (!dentry) 1254retry:
1271 goto need_lookup; 1255 if (unlikely(!dentry)) {
1272found: 1256 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1257 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1258
1275 if (!dentry) 1259 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1260 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1261 if (likely(!dentry)) {
1278 goto fail; 1262 dentry = d_alloc_and_lookup(parent, name, nd);
1263 if (IS_ERR(dentry)) {
1264 mutex_unlock(&dir->i_mutex);
1265 return PTR_ERR(dentry);
1266 }
1267 /* known good */
1268 need_reval = 0;
1269 status = 1;
1270 }
1271 mutex_unlock(&dir->i_mutex);
1279 } 1272 }
1280done: 1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1274 status = d_revalidate(dentry, nd);
1275 if (unlikely(status <= 0)) {
1276 if (status < 0) {
1277 dput(dentry);
1278 return status;
1279 }
1280 if (!d_invalidate(dentry)) {
1281 dput(dentry);
1282 dentry = NULL;
1283 need_reval = 1;
1284 goto retry;
1285 }
1286 }
1287
1281 path->mnt = mnt; 1288 path->mnt = mnt;
1282 path->dentry = dentry; 1289 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1290 err = follow_managed(path, nd->flags);
@@ -1287,39 +1294,113 @@ done:
1287 } 1294 }
1288 *inode = path->dentry->d_inode; 1295 *inode = path->dentry->d_inode;
1289 return 0; 1296 return 0;
1297}
1290 1298
1291need_lookup: 1299static inline int may_lookup(struct nameidata *nd)
1292 dir = parent->d_inode; 1300{
1293 BUG_ON(nd->inode != dir); 1301 if (nd->flags & LOOKUP_RCU) {
1302 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1303 if (err != -ECHILD)
1304 return err;
1305 if (nameidata_drop_rcu(nd))
1306 return -ECHILD;
1307 }
1308 return exec_permission(nd->inode, 0);
1309}
1294 1310
1295 mutex_lock(&dir->i_mutex); 1311static inline int handle_dots(struct nameidata *nd, int type)
1296 /* 1312{
1297 * First re-do the cached lookup just in case it was created 1313 if (type == LAST_DOTDOT) {
1298 * while we waited for the directory semaphore, or the first 1314 if (nd->flags & LOOKUP_RCU) {
1299 * lookup failed due to an unrelated rename. 1315 if (follow_dotdot_rcu(nd))
1300 * 1316 return -ECHILD;
1301 * This could use version numbering or similar to avoid unnecessary 1317 } else
1302 * cache lookups, but then we'd have to do the first lookup in the 1318 follow_dotdot(nd);
1303 * non-racy way. However in the common case here, everything should 1319 }
1304 * be hot in cache, so would it be a big win? 1320 return 0;
1305 */ 1321}
1306 dentry = d_lookup(parent, name); 1322
1307 if (likely(!dentry)) { 1323static void terminate_walk(struct nameidata *nd)
1308 dentry = d_alloc_and_lookup(parent, name, nd); 1324{
1309 mutex_unlock(&dir->i_mutex); 1325 if (!(nd->flags & LOOKUP_RCU)) {
1310 if (IS_ERR(dentry)) 1326 path_put(&nd->path);
1311 goto fail; 1327 } else {
1312 goto done; 1328 nd->flags &= ~LOOKUP_RCU;
1329 if (!(nd->flags & LOOKUP_ROOT))
1330 nd->root.mnt = NULL;
1331 rcu_read_unlock();
1332 br_read_unlock(vfsmount_lock);
1313 } 1333 }
1334}
1335
1336static inline int walk_component(struct nameidata *nd, struct path *path,
1337 struct qstr *name, int type, int follow)
1338{
1339 struct inode *inode;
1340 int err;
1314 /* 1341 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1342 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1343 * to be able to know about the current root directory and
1344 * parent relationships.
1317 */ 1345 */
1318 mutex_unlock(&dir->i_mutex); 1346 if (unlikely(type != LAST_NORM))
1319 goto found; 1347 return handle_dots(nd, type);
1348 err = do_lookup(nd, name, path, &inode);
1349 if (unlikely(err)) {
1350 terminate_walk(nd);
1351 return err;
1352 }
1353 if (!inode) {
1354 path_to_nameidata(path, nd);
1355 terminate_walk(nd);
1356 return -ENOENT;
1357 }
1358 if (unlikely(inode->i_op->follow_link) && follow) {
1359 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1360 return -ECHILD;
1361 BUG_ON(inode != path->dentry->d_inode);
1362 return 1;
1363 }
1364 path_to_nameidata(path, nd);
1365 nd->inode = inode;
1366 return 0;
1367}
1320 1368
1321fail: 1369/*
1322 return PTR_ERR(dentry); 1370 * This limits recursive symlink follows to 8, while
1371 * limiting consecutive symlinks to 40.
1372 *
1373 * Without that kind of total limit, nasty chains of consecutive
1374 * symlinks can cause almost arbitrarily long lookups.
1375 */
1376static inline int nested_symlink(struct path *path, struct nameidata *nd)
1377{
1378 int res;
1379
1380 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1381 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1382 path_put_conditional(path, nd);
1383 path_put(&nd->path);
1384 return -ELOOP;
1385 }
1386
1387 nd->depth++;
1388 current->link_count++;
1389
1390 do {
1391 struct path link = *path;
1392 void *cookie;
1393
1394 res = follow_link(&link, nd, &cookie);
1395 if (!res)
1396 res = walk_component(nd, path, &nd->last,
1397 nd->last_type, LOOKUP_FOLLOW);
1398 put_link(nd, &link, cookie);
1399 } while (res > 0);
1400
1401 current->link_count--;
1402 nd->depth--;
1403 return res;
1323} 1404}
1324 1405
1325/* 1406/*
@@ -1339,30 +1420,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1420 while (*name=='/')
1340 name++; 1421 name++;
1341 if (!*name) 1422 if (!*name)
1342 goto return_reval; 1423 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1424
1347 /* At this point we know we have a real path component. */ 1425 /* At this point we know we have a real path component. */
1348 for(;;) { 1426 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1427 unsigned long hash;
1351 struct qstr this; 1428 struct qstr this;
1352 unsigned int c; 1429 unsigned int c;
1430 int type;
1353 1431
1354 nd->flags |= LOOKUP_CONTINUE; 1432 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1433
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1434 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1435 if (err)
1367 break; 1436 break;
1368 1437
@@ -1378,52 +1447,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1447 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1448 this.hash = end_name_hash(hash);
1380 1449
1450 type = LAST_NORM;
1451 if (this.name[0] == '.') switch (this.len) {
1452 case 2:
1453 if (this.name[1] == '.') {
1454 type = LAST_DOTDOT;
1455 nd->flags |= LOOKUP_JUMPED;
1456 }
1457 break;
1458 case 1:
1459 type = LAST_DOT;
1460 }
1461 if (likely(type == LAST_NORM)) {
1462 struct dentry *parent = nd->path.dentry;
1463 nd->flags &= ~LOOKUP_JUMPED;
1464 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1465 err = parent->d_op->d_hash(parent, nd->inode,
1466 &this);
1467 if (err < 0)
1468 break;
1469 }
1470 }
1471
1381 /* remove trailing slashes? */ 1472 /* remove trailing slashes? */
1382 if (!c) 1473 if (!c)
1383 goto last_component; 1474 goto last_component;
1384 while (*++name == '/'); 1475 while (*++name == '/');
1385 if (!*name) 1476 if (!*name)
1386 goto last_with_slashes; 1477 goto last_component;
1387 1478
1388 /* 1479 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1480 if (err < 0)
1390 * to be able to know about the current root directory and 1481 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1482
1416 if (inode->i_op->follow_link) { 1483 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1484 err = nested_symlink(&next, nd);
1418 if (err) 1485 if (err)
1419 goto return_err; 1486 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1487 }
1428 err = -ENOTDIR; 1488 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1489 if (!nd->inode->i_op->lookup)
@@ -1431,210 +1491,109 @@ exec_again:
1431 continue; 1491 continue;
1432 /* here ends the main loop */ 1492 /* here ends the main loop */
1433 1493
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1494last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1495 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1496 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1497 nd->last = this;
1480 nd->last_type = LAST_NORM; 1498 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1499 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1500 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1501 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1502 return err;
1518} 1503}
1519 1504
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1505static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1506 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 nd->inode = save.dentry->d_inode;
1550 path_get(&nd->path);
1551 nd->flags |= LOOKUP_REVAL;
1552 result = link_path_walk(name, nd);
1553 }
1554
1555 path_put(&save);
1556
1557 return result;
1558}
1559
1560static void path_finish_rcu(struct nameidata *nd)
1561{
1562 if (nd->flags & LOOKUP_RCU) {
1563 /* RCU dangling. Cancel it. */
1564 nd->flags &= ~LOOKUP_RCU;
1565 nd->root.mnt = NULL;
1566 rcu_read_unlock();
1567 br_read_unlock(vfsmount_lock);
1568 }
1569 if (nd->file)
1570 fput(nd->file);
1571}
1572
1573static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1574{ 1507{
1575 int retval = 0; 1508 int retval = 0;
1576 int fput_needed; 1509 int fput_needed;
1577 struct file *file; 1510 struct file *file;
1578 1511
1579 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1512 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1580 nd->flags = flags | LOOKUP_RCU; 1513 nd->flags = flags | LOOKUP_JUMPED;
1581 nd->depth = 0; 1514 nd->depth = 0;
1515 if (flags & LOOKUP_ROOT) {
1516 struct inode *inode = nd->root.dentry->d_inode;
1517 if (*name) {
1518 if (!inode->i_op->lookup)
1519 return -ENOTDIR;
1520 retval = inode_permission(inode, MAY_EXEC);
1521 if (retval)
1522 return retval;
1523 }
1524 nd->path = nd->root;
1525 nd->inode = inode;
1526 if (flags & LOOKUP_RCU) {
1527 br_read_lock(vfsmount_lock);
1528 rcu_read_lock();
1529 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1530 } else {
1531 path_get(&nd->path);
1532 }
1533 return 0;
1534 }
1535
1582 nd->root.mnt = NULL; 1536 nd->root.mnt = NULL;
1583 nd->file = NULL;
1584 1537
1585 if (*name=='/') { 1538 if (*name=='/') {
1586 struct fs_struct *fs = current->fs; 1539 if (flags & LOOKUP_RCU) {
1587 unsigned seq; 1540 br_read_lock(vfsmount_lock);
1588 1541 rcu_read_lock();
1589 br_read_lock(vfsmount_lock); 1542 set_root_rcu(nd);
1590 rcu_read_lock(); 1543 } else {
1591 1544 set_root(nd);
1592 do { 1545 path_get(&nd->root);
1593 seq = read_seqcount_begin(&fs->seq); 1546 }
1594 nd->root = fs->root; 1547 nd->path = nd->root;
1595 nd->path = nd->root;
1596 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1597 } while (read_seqcount_retry(&fs->seq, seq));
1598
1599 } else if (dfd == AT_FDCWD) { 1548 } else if (dfd == AT_FDCWD) {
1600 struct fs_struct *fs = current->fs; 1549 if (flags & LOOKUP_RCU) {
1601 unsigned seq; 1550 struct fs_struct *fs = current->fs;
1602 1551 unsigned seq;
1603 br_read_lock(vfsmount_lock);
1604 rcu_read_lock();
1605 1552
1606 do { 1553 br_read_lock(vfsmount_lock);
1607 seq = read_seqcount_begin(&fs->seq); 1554 rcu_read_lock();
1608 nd->path = fs->pwd;
1609 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1610 } while (read_seqcount_retry(&fs->seq, seq));
1611 1555
1556 do {
1557 seq = read_seqcount_begin(&fs->seq);
1558 nd->path = fs->pwd;
1559 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1560 } while (read_seqcount_retry(&fs->seq, seq));
1561 } else {
1562 get_fs_pwd(current->fs, &nd->path);
1563 }
1612 } else { 1564 } else {
1613 struct dentry *dentry; 1565 struct dentry *dentry;
1614 1566
1615 file = fget_light(dfd, &fput_needed); 1567 file = fget_raw_light(dfd, &fput_needed);
1616 retval = -EBADF; 1568 retval = -EBADF;
1617 if (!file) 1569 if (!file)
1618 goto out_fail; 1570 goto out_fail;
1619 1571
1620 dentry = file->f_path.dentry; 1572 dentry = file->f_path.dentry;
1621 1573
1622 retval = -ENOTDIR; 1574 if (*name) {
1623 if (!S_ISDIR(dentry->d_inode->i_mode)) 1575 retval = -ENOTDIR;
1624 goto fput_fail; 1576 if (!S_ISDIR(dentry->d_inode->i_mode))
1577 goto fput_fail;
1625 1578
1626 retval = file_permission(file, MAY_EXEC); 1579 retval = file_permission(file, MAY_EXEC);
1627 if (retval) 1580 if (retval)
1628 goto fput_fail; 1581 goto fput_fail;
1582 }
1629 1583
1630 nd->path = file->f_path; 1584 nd->path = file->f_path;
1631 if (fput_needed) 1585 if (flags & LOOKUP_RCU) {
1632 nd->file = file; 1586 if (fput_needed)
1633 1587 *fp = file;
1634 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1588 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1635 br_read_lock(vfsmount_lock); 1589 br_read_lock(vfsmount_lock);
1636 rcu_read_lock(); 1590 rcu_read_lock();
1591 } else {
1592 path_get(&file->f_path);
1593 fput_light(file, fput_needed);
1594 }
1637 } 1595 }
1596
1638 nd->inode = nd->path.dentry->d_inode; 1597 nd->inode = nd->path.dentry->d_inode;
1639 return 0; 1598 return 0;
1640 1599
@@ -1644,60 +1603,23 @@ out_fail:
1644 return retval; 1603 return retval;
1645} 1604}
1646 1605
1647static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1606static inline int lookup_last(struct nameidata *nd, struct path *path)
1648{ 1607{
1649 int retval = 0; 1608 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1650 int fput_needed; 1609 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1651 struct file *file;
1652
1653 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1654 nd->flags = flags;
1655 nd->depth = 0;
1656 nd->root.mnt = NULL;
1657 1610
1658 if (*name=='/') { 1611 nd->flags &= ~LOOKUP_PARENT;
1659 set_root(nd); 1612 return walk_component(nd, path, &nd->last, nd->last_type,
1660 nd->path = nd->root; 1613 nd->flags & LOOKUP_FOLLOW);
1661 path_get(&nd->root);
1662 } else if (dfd == AT_FDCWD) {
1663 get_fs_pwd(current->fs, &nd->path);
1664 } else {
1665 struct dentry *dentry;
1666
1667 file = fget_light(dfd, &fput_needed);
1668 retval = -EBADF;
1669 if (!file)
1670 goto out_fail;
1671
1672 dentry = file->f_path.dentry;
1673
1674 retval = -ENOTDIR;
1675 if (!S_ISDIR(dentry->d_inode->i_mode))
1676 goto fput_fail;
1677
1678 retval = file_permission(file, MAY_EXEC);
1679 if (retval)
1680 goto fput_fail;
1681
1682 nd->path = file->f_path;
1683 path_get(&file->f_path);
1684
1685 fput_light(file, fput_needed);
1686 }
1687 nd->inode = nd->path.dentry->d_inode;
1688 return 0;
1689
1690fput_fail:
1691 fput_light(file, fput_needed);
1692out_fail:
1693 return retval;
1694} 1614}
1695 1615
1696/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1616/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1697static int do_path_lookup(int dfd, const char *name, 1617static int path_lookupat(int dfd, const char *name,
1698 unsigned int flags, struct nameidata *nd) 1618 unsigned int flags, struct nameidata *nd)
1699{ 1619{
1700 int retval; 1620 struct file *base = NULL;
1621 struct path path;
1622 int err;
1701 1623
1702 /* 1624 /*
1703 * Path walking is largely split up into 2 different synchronisation 1625 * Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1635,78 @@ static int do_path_lookup(int dfd, const char *name,
1713 * be handled by restarting a traditional ref-walk (which will always 1635 * be handled by restarting a traditional ref-walk (which will always
1714 * be able to complete). 1636 * be able to complete).
1715 */ 1637 */
1716 retval = path_init_rcu(dfd, name, flags, nd); 1638 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1717 if (unlikely(retval)) 1639
1718 return retval; 1640 if (unlikely(err))
1719 retval = path_walk_rcu(name, nd); 1641 return err;
1720 path_finish_rcu(nd); 1642
1721 if (nd->root.mnt) { 1643 current->total_link_count = 0;
1722 path_put(&nd->root); 1644 err = link_path_walk(name, nd);
1723 nd->root.mnt = NULL; 1645
1646 if (!err && !(flags & LOOKUP_PARENT)) {
1647 err = lookup_last(nd, &path);
1648 while (err > 0) {
1649 void *cookie;
1650 struct path link = path;
1651 nd->flags |= LOOKUP_PARENT;
1652 err = follow_link(&link, nd, &cookie);
1653 if (!err)
1654 err = lookup_last(nd, &path);
1655 put_link(nd, &link, cookie);
1656 }
1724 } 1657 }
1725 1658
1726 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1659 if (nd->flags & LOOKUP_RCU) {
1727 /* slower, locked walk */ 1660 /* went all way through without dropping RCU */
1728 if (retval == -ESTALE) 1661 BUG_ON(err);
1729 flags |= LOOKUP_REVAL; 1662 if (nameidata_drop_rcu_last(nd))
1730 retval = path_init(dfd, name, flags, nd); 1663 err = -ECHILD;
1731 if (unlikely(retval)) 1664 }
1732 return retval; 1665
1733 retval = path_walk(name, nd); 1666 if (!err) {
1734 if (nd->root.mnt) { 1667 err = handle_reval_path(nd);
1735 path_put(&nd->root); 1668 if (err)
1736 nd->root.mnt = NULL; 1669 path_put(&nd->path);
1670 }
1671
1672 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1673 if (!nd->inode->i_op->lookup) {
1674 path_put(&nd->path);
1675 err = -ENOTDIR;
1737 } 1676 }
1738 } 1677 }
1739 1678
1679 if (base)
1680 fput(base);
1681
1682 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1683 path_put(&nd->root);
1684 nd->root.mnt = NULL;
1685 }
1686 return err;
1687}
1688
1689static int do_path_lookup(int dfd, const char *name,
1690 unsigned int flags, struct nameidata *nd)
1691{
1692 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1693 if (unlikely(retval == -ECHILD))
1694 retval = path_lookupat(dfd, name, flags, nd);
1695 if (unlikely(retval == -ESTALE))
1696 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1697
1740 if (likely(!retval)) { 1698 if (likely(!retval)) {
1741 if (unlikely(!audit_dummy_context())) { 1699 if (unlikely(!audit_dummy_context())) {
1742 if (nd->path.dentry && nd->inode) 1700 if (nd->path.dentry && nd->inode)
1743 audit_inode(name, nd->path.dentry); 1701 audit_inode(name, nd->path.dentry);
1744 } 1702 }
1745 } 1703 }
1746
1747 return retval; 1704 return retval;
1748} 1705}
1749 1706
1750int path_lookup(const char *name, unsigned int flags, 1707int kern_path_parent(const char *name, struct nameidata *nd)
1751 struct nameidata *nd)
1752{ 1708{
1753 return do_path_lookup(AT_FDCWD, name, flags, nd); 1709 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1754} 1710}
1755 1711
1756int kern_path(const char *name, unsigned int flags, struct path *path) 1712int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1730,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1774 const char *name, unsigned int flags, 1730 const char *name, unsigned int flags,
1775 struct nameidata *nd) 1731 struct nameidata *nd)
1776{ 1732{
1777 int retval; 1733 nd->root.dentry = dentry;
1778 1734 nd->root.mnt = mnt;
1779 /* same as do_path_lookup */ 1735 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1780 nd->last_type = LAST_ROOT; 1736 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1781 nd->flags = flags;
1782 nd->depth = 0;
1783
1784 nd->path.dentry = dentry;
1785 nd->path.mnt = mnt;
1786 path_get(&nd->path);
1787 nd->root = nd->path;
1788 path_get(&nd->root);
1789 nd->inode = nd->path.dentry->d_inode;
1790
1791 retval = path_walk(name, nd);
1792 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1793 nd->inode))
1794 audit_inode(name, nd->path.dentry);
1795
1796 path_put(&nd->root);
1797 nd->root.mnt = NULL;
1798
1799 return retval;
1800} 1737}
1801 1738
1802static struct dentry *__lookup_hash(struct qstr *name, 1739static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1748,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1811 return ERR_PTR(err); 1748 return ERR_PTR(err);
1812 1749
1813 /* 1750 /*
1814 * See if the low-level filesystem might want
1815 * to use its own hash..
1816 */
1817 if (base->d_flags & DCACHE_OP_HASH) {
1818 err = base->d_op->d_hash(base, inode, name);
1819 dentry = ERR_PTR(err);
1820 if (err < 0)
1821 goto out;
1822 }
1823
1824 /*
1825 * Don't bother with __d_lookup: callers are for creat as 1751 * Don't bother with __d_lookup: callers are for creat as
1826 * well as unlink, so a lot of the time it would cost 1752 * well as unlink, so a lot of the time it would cost
1827 * a double lookup. 1753 * a double lookup.
@@ -1833,7 +1759,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1833 1759
1834 if (!dentry) 1760 if (!dentry)
1835 dentry = d_alloc_and_lookup(base, name, nd); 1761 dentry = d_alloc_and_lookup(base, name, nd);
1836out: 1762
1837 return dentry; 1763 return dentry;
1838} 1764}
1839 1765
@@ -1847,28 +1773,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1847 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1773 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1848} 1774}
1849 1775
1850static int __lookup_one_len(const char *name, struct qstr *this,
1851 struct dentry *base, int len)
1852{
1853 unsigned long hash;
1854 unsigned int c;
1855
1856 this->name = name;
1857 this->len = len;
1858 if (!len)
1859 return -EACCES;
1860
1861 hash = init_name_hash();
1862 while (len--) {
1863 c = *(const unsigned char *)name++;
1864 if (c == '/' || c == '\0')
1865 return -EACCES;
1866 hash = partial_name_hash(c, hash);
1867 }
1868 this->hash = end_name_hash(hash);
1869 return 0;
1870}
1871
1872/** 1776/**
1873 * lookup_one_len - filesystem helper to lookup single pathname component 1777 * lookup_one_len - filesystem helper to lookup single pathname component
1874 * @name: pathname component to lookup 1778 * @name: pathname component to lookup
@@ -1882,14 +1786,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1882 */ 1786 */
1883struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1787struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1884{ 1788{
1885 int err;
1886 struct qstr this; 1789 struct qstr this;
1790 unsigned long hash;
1791 unsigned int c;
1887 1792
1888 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1793 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1889 1794
1890 err = __lookup_one_len(name, &this, base, len); 1795 this.name = name;
1891 if (err) 1796 this.len = len;
1892 return ERR_PTR(err); 1797 if (!len)
1798 return ERR_PTR(-EACCES);
1799
1800 hash = init_name_hash();
1801 while (len--) {
1802 c = *(const unsigned char *)name++;
1803 if (c == '/' || c == '\0')
1804 return ERR_PTR(-EACCES);
1805 hash = partial_name_hash(c, hash);
1806 }
1807 this.hash = end_name_hash(hash);
1808 /*
1809 * See if the low-level filesystem might want
1810 * to use its own hash..
1811 */
1812 if (base->d_flags & DCACHE_OP_HASH) {
1813 int err = base->d_op->d_hash(base, base->d_inode, &this);
1814 if (err < 0)
1815 return ERR_PTR(err);
1816 }
1893 1817
1894 return __lookup_hash(&this, base, NULL); 1818 return __lookup_hash(&this, base, NULL);
1895} 1819}
@@ -1898,7 +1822,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1898 struct path *path) 1822 struct path *path)
1899{ 1823{
1900 struct nameidata nd; 1824 struct nameidata nd;
1901 char *tmp = getname(name); 1825 char *tmp = getname_flags(name, flags);
1902 int err = PTR_ERR(tmp); 1826 int err = PTR_ERR(tmp);
1903 if (!IS_ERR(tmp)) { 1827 if (!IS_ERR(tmp)) {
1904 1828
@@ -1940,11 +1864,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1940 1864
1941 if (!(dir->i_mode & S_ISVTX)) 1865 if (!(dir->i_mode & S_ISVTX))
1942 return 0; 1866 return 0;
1867 if (current_user_ns() != inode_userns(inode))
1868 goto other_userns;
1943 if (inode->i_uid == fsuid) 1869 if (inode->i_uid == fsuid)
1944 return 0; 1870 return 0;
1945 if (dir->i_uid == fsuid) 1871 if (dir->i_uid == fsuid)
1946 return 0; 1872 return 0;
1947 return !capable(CAP_FOWNER); 1873
1874other_userns:
1875 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1948} 1876}
1949 1877
1950/* 1878/*
@@ -2078,12 +2006,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2078 return error; 2006 return error;
2079} 2007}
2080 2008
2081int may_open(struct path *path, int acc_mode, int flag) 2009static int may_open(struct path *path, int acc_mode, int flag)
2082{ 2010{
2083 struct dentry *dentry = path->dentry; 2011 struct dentry *dentry = path->dentry;
2084 struct inode *inode = dentry->d_inode; 2012 struct inode *inode = dentry->d_inode;
2085 int error; 2013 int error;
2086 2014
2015 /* O_PATH? */
2016 if (!acc_mode)
2017 return 0;
2018
2087 if (!inode) 2019 if (!inode)
2088 return -ENOENT; 2020 return -ENOENT;
2089 2021
@@ -2120,7 +2052,7 @@ int may_open(struct path *path, int acc_mode, int flag)
2120 } 2052 }
2121 2053
2122 /* O_NOATIME can only be set by the owner or superuser */ 2054 /* O_NOATIME can only be set by the owner or superuser */
2123 if (flag & O_NOATIME && !is_owner_or_cap(inode)) 2055 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2124 return -EPERM; 2056 return -EPERM;
2125 2057
2126 /* 2058 /*
@@ -2152,34 +2084,6 @@ static int handle_truncate(struct file *filp)
2152} 2084}
2153 2085
2154/* 2086/*
2155 * Be careful about ever adding any more callers of this
2156 * function. Its flags must be in the namei format, not
2157 * what get passed to sys_open().
2158 */
2159static int __open_namei_create(struct nameidata *nd, struct path *path,
2160 int open_flag, int mode)
2161{
2162 int error;
2163 struct dentry *dir = nd->path.dentry;
2164
2165 if (!IS_POSIXACL(dir->d_inode))
2166 mode &= ~current_umask();
2167 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2168 if (error)
2169 goto out_unlock;
2170 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2171out_unlock:
2172 mutex_unlock(&dir->d_inode->i_mutex);
2173 dput(nd->path.dentry);
2174 nd->path.dentry = path->dentry;
2175
2176 if (error)
2177 return error;
2178 /* Don't check for write permission, don't truncate */
2179 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2180}
2181
2182/*
2183 * Note that while the flag value (low two bits) for sys_open means: 2087 * Note that while the flag value (low two bits) for sys_open means:
2184 * 00 - read-only 2088 * 00 - read-only
2185 * 01 - write-only 2089 * 01 - write-only
@@ -2203,126 +2107,115 @@ static inline int open_to_namei_flags(int flag)
2203 return flag; 2107 return flag;
2204} 2108}
2205 2109
2206static int open_will_truncate(int flag, struct inode *inode)
2207{
2208 /*
2209 * We'll never write to the fs underlying
2210 * a device file.
2211 */
2212 if (special_file(inode->i_mode))
2213 return 0;
2214 return (flag & O_TRUNC);
2215}
2216
2217static struct file *finish_open(struct nameidata *nd,
2218 int open_flag, int acc_mode)
2219{
2220 struct file *filp;
2221 int will_truncate;
2222 int error;
2223
2224 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2225 if (will_truncate) {
2226 error = mnt_want_write(nd->path.mnt);
2227 if (error)
2228 goto exit;
2229 }
2230 error = may_open(&nd->path, acc_mode, open_flag);
2231 if (error) {
2232 if (will_truncate)
2233 mnt_drop_write(nd->path.mnt);
2234 goto exit;
2235 }
2236 filp = nameidata_to_filp(nd);
2237 if (!IS_ERR(filp)) {
2238 error = ima_file_check(filp, acc_mode);
2239 if (error) {
2240 fput(filp);
2241 filp = ERR_PTR(error);
2242 }
2243 }
2244 if (!IS_ERR(filp)) {
2245 if (will_truncate) {
2246 error = handle_truncate(filp);
2247 if (error) {
2248 fput(filp);
2249 filp = ERR_PTR(error);
2250 }
2251 }
2252 }
2253 /*
2254 * It is now safe to drop the mnt write
2255 * because the filp has had a write taken
2256 * on its behalf.
2257 */
2258 if (will_truncate)
2259 mnt_drop_write(nd->path.mnt);
2260 path_put(&nd->path);
2261 return filp;
2262
2263exit:
2264 path_put(&nd->path);
2265 return ERR_PTR(error);
2266}
2267
2268/* 2110/*
2269 * Handle O_CREAT case for do_filp_open 2111 * Handle the last step of open()
2270 */ 2112 */
2271static struct file *do_last(struct nameidata *nd, struct path *path, 2113static struct file *do_last(struct nameidata *nd, struct path *path,
2272 int open_flag, int acc_mode, 2114 const struct open_flags *op, const char *pathname)
2273 int mode, const char *pathname)
2274{ 2115{
2275 struct dentry *dir = nd->path.dentry; 2116 struct dentry *dir = nd->path.dentry;
2117 struct dentry *dentry;
2118 int open_flag = op->open_flag;
2119 int will_truncate = open_flag & O_TRUNC;
2120 int want_write = 0;
2121 int acc_mode = op->acc_mode;
2276 struct file *filp; 2122 struct file *filp;
2277 int error = -EISDIR; 2123 int error;
2124
2125 nd->flags &= ~LOOKUP_PARENT;
2126 nd->flags |= op->intent;
2278 2127
2279 switch (nd->last_type) { 2128 switch (nd->last_type) {
2280 case LAST_DOTDOT: 2129 case LAST_DOTDOT:
2281 follow_dotdot(nd);
2282 dir = nd->path.dentry;
2283 case LAST_DOT: 2130 case LAST_DOT:
2284 if (need_reval_dot(dir)) { 2131 error = handle_dots(nd, nd->last_type);
2285 int status = d_revalidate(nd->path.dentry, nd); 2132 if (error)
2286 if (!status) 2133 return ERR_PTR(error);
2287 status = -ESTALE;
2288 if (status < 0) {
2289 error = status;
2290 goto exit;
2291 }
2292 }
2293 /* fallthrough */ 2134 /* fallthrough */
2294 case LAST_ROOT: 2135 case LAST_ROOT:
2295 goto exit; 2136 if (nd->flags & LOOKUP_RCU) {
2137 if (nameidata_drop_rcu_last(nd))
2138 return ERR_PTR(-ECHILD);
2139 }
2140 error = handle_reval_path(nd);
2141 if (error)
2142 goto exit;
2143 audit_inode(pathname, nd->path.dentry);
2144 if (open_flag & O_CREAT) {
2145 error = -EISDIR;
2146 goto exit;
2147 }
2148 goto ok;
2296 case LAST_BIND: 2149 case LAST_BIND:
2150 /* can't be RCU mode here */
2151 error = handle_reval_path(nd);
2152 if (error)
2153 goto exit;
2297 audit_inode(pathname, dir); 2154 audit_inode(pathname, dir);
2298 goto ok; 2155 goto ok;
2299 } 2156 }
2300 2157
2158 if (!(open_flag & O_CREAT)) {
2159 int symlink_ok = 0;
2160 if (nd->last.name[nd->last.len])
2161 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2162 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2163 symlink_ok = 1;
2164 /* we _can_ be in RCU mode here */
2165 error = walk_component(nd, path, &nd->last, LAST_NORM,
2166 !symlink_ok);
2167 if (error < 0)
2168 return ERR_PTR(error);
2169 if (error) /* symlink */
2170 return NULL;
2171 /* sayonara */
2172 if (nd->flags & LOOKUP_RCU) {
2173 if (nameidata_drop_rcu_last(nd))
2174 return ERR_PTR(-ECHILD);
2175 }
2176
2177 error = -ENOTDIR;
2178 if (nd->flags & LOOKUP_DIRECTORY) {
2179 if (!nd->inode->i_op->lookup)
2180 goto exit;
2181 }
2182 audit_inode(pathname, nd->path.dentry);
2183 goto ok;
2184 }
2185
2186 /* create side of things */
2187
2188 if (nd->flags & LOOKUP_RCU) {
2189 if (nameidata_drop_rcu_last(nd))
2190 return ERR_PTR(-ECHILD);
2191 }
2192
2193 audit_inode(pathname, dir);
2194 error = -EISDIR;
2301 /* trailing slashes? */ 2195 /* trailing slashes? */
2302 if (nd->last.name[nd->last.len]) 2196 if (nd->last.name[nd->last.len])
2303 goto exit; 2197 goto exit;
2304 2198
2305 mutex_lock(&dir->d_inode->i_mutex); 2199 mutex_lock(&dir->d_inode->i_mutex);
2306 2200
2307 path->dentry = lookup_hash(nd); 2201 dentry = lookup_hash(nd);
2308 path->mnt = nd->path.mnt; 2202 error = PTR_ERR(dentry);
2309 2203 if (IS_ERR(dentry)) {
2310 error = PTR_ERR(path->dentry);
2311 if (IS_ERR(path->dentry)) {
2312 mutex_unlock(&dir->d_inode->i_mutex); 2204 mutex_unlock(&dir->d_inode->i_mutex);
2313 goto exit; 2205 goto exit;
2314 } 2206 }
2315 2207
2316 if (IS_ERR(nd->intent.open.file)) { 2208 path->dentry = dentry;
2317 error = PTR_ERR(nd->intent.open.file); 2209 path->mnt = nd->path.mnt;
2318 goto exit_mutex_unlock;
2319 }
2320 2210
2321 /* Negative dentry, just create the file */ 2211 /* Negative dentry, just create the file */
2322 if (!path->dentry->d_inode) { 2212 if (!dentry->d_inode) {
2213 int mode = op->mode;
2214 if (!IS_POSIXACL(dir->d_inode))
2215 mode &= ~current_umask();
2323 /* 2216 /*
2324 * This write is needed to ensure that a 2217 * This write is needed to ensure that a
2325 * ro->rw transition does not occur between 2218 * rw->ro transition does not occur between
2326 * the time when the file is created and when 2219 * the time when the file is created and when
2327 * a permanent write count is taken through 2220 * a permanent write count is taken through
2328 * the 'struct file' in nameidata_to_filp(). 2221 * the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2223,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2330 error = mnt_want_write(nd->path.mnt); 2223 error = mnt_want_write(nd->path.mnt);
2331 if (error) 2224 if (error)
2332 goto exit_mutex_unlock; 2225 goto exit_mutex_unlock;
2333 error = __open_namei_create(nd, path, open_flag, mode); 2226 want_write = 1;
2334 if (error) { 2227 /* Don't check for write permission, don't truncate */
2335 mnt_drop_write(nd->path.mnt); 2228 open_flag &= ~O_TRUNC;
2336 goto exit; 2229 will_truncate = 0;
2337 } 2230 acc_mode = MAY_OPEN;
2338 filp = nameidata_to_filp(nd); 2231 error = security_path_mknod(&nd->path, dentry, mode, 0);
2339 mnt_drop_write(nd->path.mnt); 2232 if (error)
2340 path_put(&nd->path); 2233 goto exit_mutex_unlock;
2341 if (!IS_ERR(filp)) { 2234 error = vfs_create(dir->d_inode, dentry, mode, nd);
2342 error = ima_file_check(filp, acc_mode); 2235 if (error)
2343 if (error) { 2236 goto exit_mutex_unlock;
2344 fput(filp); 2237 mutex_unlock(&dir->d_inode->i_mutex);
2345 filp = ERR_PTR(error); 2238 dput(nd->path.dentry);
2346 } 2239 nd->path.dentry = dentry;
2347 } 2240 goto common;
2348 return filp;
2349 } 2241 }
2350 2242
2351 /* 2243 /*
@@ -2375,7 +2267,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2375 if (S_ISDIR(nd->inode->i_mode)) 2267 if (S_ISDIR(nd->inode->i_mode))
2376 goto exit; 2268 goto exit;
2377ok: 2269ok:
2378 filp = finish_open(nd, open_flag, acc_mode); 2270 if (!S_ISREG(nd->inode->i_mode))
2271 will_truncate = 0;
2272
2273 if (will_truncate) {
2274 error = mnt_want_write(nd->path.mnt);
2275 if (error)
2276 goto exit;
2277 want_write = 1;
2278 }
2279common:
2280 error = may_open(&nd->path, acc_mode, open_flag);
2281 if (error)
2282 goto exit;
2283 filp = nameidata_to_filp(nd);
2284 if (!IS_ERR(filp)) {
2285 error = ima_file_check(filp, op->acc_mode);
2286 if (error) {
2287 fput(filp);
2288 filp = ERR_PTR(error);
2289 }
2290 }
2291 if (!IS_ERR(filp)) {
2292 if (will_truncate) {
2293 error = handle_truncate(filp);
2294 if (error) {
2295 fput(filp);
2296 filp = ERR_PTR(error);
2297 }
2298 }
2299 }
2300out:
2301 if (want_write)
2302 mnt_drop_write(nd->path.mnt);
2303 path_put(&nd->path);
2379 return filp; 2304 return filp;
2380 2305
2381exit_mutex_unlock: 2306exit_mutex_unlock:
@@ -2383,204 +2308,103 @@ exit_mutex_unlock:
2383exit_dput: 2308exit_dput:
2384 path_put_conditional(path, nd); 2309 path_put_conditional(path, nd);
2385exit: 2310exit:
2386 path_put(&nd->path); 2311 filp = ERR_PTR(error);
2387 return ERR_PTR(error); 2312 goto out;
2388} 2313}
2389 2314
2390/* 2315static struct file *path_openat(int dfd, const char *pathname,
2391 * Note that the low bits of the passed in "open_flag" 2316 struct nameidata *nd, const struct open_flags *op, int flags)
2392 * are not the same as in the local variable "flag". See
2393 * open_to_namei_flags() for more details.
2394 */
2395struct file *do_filp_open(int dfd, const char *pathname,
2396 int open_flag, int mode, int acc_mode)
2397{ 2317{
2318 struct file *base = NULL;
2398 struct file *filp; 2319 struct file *filp;
2399 struct nameidata nd;
2400 int error;
2401 struct path path; 2320 struct path path;
2402 int count = 0; 2321 int error;
2403 int flag = open_to_namei_flags(open_flag);
2404 int flags;
2405
2406 if (!(open_flag & O_CREAT))
2407 mode = 0;
2408
2409 /* Must never be set by userspace */
2410 open_flag &= ~FMODE_NONOTIFY;
2411
2412 /*
2413 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2414 * check for O_DSYNC if the need any syncing at all we enforce it's
2415 * always set instead of having to deal with possibly weird behaviour
2416 * for malicious applications setting only __O_SYNC.
2417 */
2418 if (open_flag & __O_SYNC)
2419 open_flag |= O_DSYNC;
2420
2421 if (!acc_mode)
2422 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2423
2424 /* O_TRUNC implies we need access checks for write permissions */
2425 if (open_flag & O_TRUNC)
2426 acc_mode |= MAY_WRITE;
2427
2428 /* Allow the LSM permission hook to distinguish append
2429 access from general write access. */
2430 if (open_flag & O_APPEND)
2431 acc_mode |= MAY_APPEND;
2432
2433 flags = LOOKUP_OPEN;
2434 if (open_flag & O_CREAT) {
2435 flags |= LOOKUP_CREATE;
2436 if (open_flag & O_EXCL)
2437 flags |= LOOKUP_EXCL;
2438 }
2439 if (open_flag & O_DIRECTORY)
2440 flags |= LOOKUP_DIRECTORY;
2441 if (!(open_flag & O_NOFOLLOW))
2442 flags |= LOOKUP_FOLLOW;
2443 2322
2444 filp = get_empty_filp(); 2323 filp = get_empty_filp();
2445 if (!filp) 2324 if (!filp)
2446 return ERR_PTR(-ENFILE); 2325 return ERR_PTR(-ENFILE);
2447 2326
2448 filp->f_flags = open_flag; 2327 filp->f_flags = op->open_flag;
2449 nd.intent.open.file = filp; 2328 nd->intent.open.file = filp;
2450 nd.intent.open.flags = flag; 2329 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2451 nd.intent.open.create_mode = mode; 2330 nd->intent.open.create_mode = op->mode;
2452
2453 if (open_flag & O_CREAT)
2454 goto creat;
2455 2331
2456 /* !O_CREAT, simple open */ 2332 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2458 if (unlikely(error)) 2333 if (unlikely(error))
2459 goto out_filp2;
2460 error = -ELOOP;
2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2462 if (nd.inode->i_op->follow_link)
2463 goto out_path2;
2464 }
2465 error = -ENOTDIR;
2466 if (nd.flags & LOOKUP_DIRECTORY) {
2467 if (!nd.inode->i_op->lookup)
2468 goto out_path2;
2469 }
2470 audit_inode(pathname, nd.path.dentry);
2471 filp = finish_open(&nd, open_flag, acc_mode);
2472out2:
2473 release_open_intent(&nd);
2474 return filp;
2475
2476out_path2:
2477 path_put(&nd.path);
2478out_filp2:
2479 filp = ERR_PTR(error);
2480 goto out2;
2481
2482creat:
2483 /* OK, have to create the file. Find the parent. */
2484 error = path_init_rcu(dfd, pathname,
2485 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2486 if (error)
2487 goto out_filp; 2334 goto out_filp;
2488 error = path_walk_rcu(pathname, &nd);
2489 path_finish_rcu(&nd);
2490 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2491 /* slower, locked walk */
2492 if (error == -ESTALE) {
2493reval:
2494 flags |= LOOKUP_REVAL;
2495 }
2496 error = path_init(dfd, pathname,
2497 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2498 if (error)
2499 goto out_filp;
2500 2335
2501 error = path_walk_simple(pathname, &nd); 2336 current->total_link_count = 0;
2502 } 2337 error = link_path_walk(pathname, nd);
2503 if (unlikely(error)) 2338 if (unlikely(error))
2504 goto out_filp; 2339 goto out_filp;
2505 if (unlikely(!audit_dummy_context()))
2506 audit_inode(pathname, nd.path.dentry);
2507 2340
2508 /* 2341 filp = do_last(nd, &path, op, pathname);
2509 * We have the parent and last component.
2510 */
2511 nd.flags = flags;
2512 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2513 while (unlikely(!filp)) { /* trailing symlink */ 2342 while (unlikely(!filp)) { /* trailing symlink */
2514 struct path link = path; 2343 struct path link = path;
2515 struct inode *linki = link.dentry->d_inode;
2516 void *cookie; 2344 void *cookie;
2517 error = -ELOOP; 2345 if (!(nd->flags & LOOKUP_FOLLOW)) {
2518 if (!(nd.flags & LOOKUP_FOLLOW)) 2346 path_put_conditional(&path, nd);
2519 goto exit_dput; 2347 path_put(&nd->path);
2520 if (count++ == 32) 2348 filp = ERR_PTR(-ELOOP);
2521 goto exit_dput; 2349 break;
2522 /*
2523 * This is subtle. Instead of calling do_follow_link() we do
2524 * the thing by hands. The reason is that this way we have zero
2525 * link_count and path_walk() (called from ->follow_link)
2526 * honoring LOOKUP_PARENT. After that we have the parent and
2527 * last component, i.e. we are in the same situation as after
2528 * the first path_walk(). Well, almost - if the last component
2529 * is normal we get its copy stored in nd->last.name and we will
2530 * have to putname() it when we are done. Procfs-like symlinks
2531 * just set LAST_BIND.
2532 */
2533 nd.flags |= LOOKUP_PARENT;
2534 error = security_inode_follow_link(link.dentry, &nd);
2535 if (error)
2536 goto exit_dput;
2537 error = __do_follow_link(&link, &nd, &cookie);
2538 if (unlikely(error)) {
2539 if (!IS_ERR(cookie) && linki->i_op->put_link)
2540 linki->i_op->put_link(link.dentry, &nd, cookie);
2541 /* nd.path had been dropped */
2542 nd.path = link;
2543 goto out_path;
2544 } 2350 }
2545 nd.flags &= ~LOOKUP_PARENT; 2351 nd->flags |= LOOKUP_PARENT;
2546 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2352 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2547 if (linki->i_op->put_link) 2353 error = follow_link(&link, nd, &cookie);
2548 linki->i_op->put_link(link.dentry, &nd, cookie); 2354 if (unlikely(error))
2549 path_put(&link); 2355 filp = ERR_PTR(error);
2356 else
2357 filp = do_last(nd, &path, op, pathname);
2358 put_link(nd, &link, cookie);
2550 } 2359 }
2551out: 2360out:
2552 if (nd.root.mnt) 2361 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2553 path_put(&nd.root); 2362 path_put(&nd->root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2363 if (base)
2555 goto reval; 2364 fput(base);
2556 release_open_intent(&nd); 2365 release_open_intent(nd);
2557 return filp; 2366 return filp;
2558 2367
2559exit_dput:
2560 path_put_conditional(&path, &nd);
2561out_path:
2562 path_put(&nd.path);
2563out_filp: 2368out_filp:
2564 filp = ERR_PTR(error); 2369 filp = ERR_PTR(error);
2565 goto out; 2370 goto out;
2566} 2371}
2567 2372
2568/** 2373struct file *do_filp_open(int dfd, const char *pathname,
2569 * filp_open - open file and return file pointer 2374 const struct open_flags *op, int flags)
2570 *
2571 * @filename: path to open
2572 * @flags: open flags as per the open(2) second argument
2573 * @mode: mode for the new file if O_CREAT is set, else ignored
2574 *
2575 * This is the helper to open a file from kernelspace if you really
2576 * have to. But in generally you should not do this, so please move
2577 * along, nothing to see here..
2578 */
2579struct file *filp_open(const char *filename, int flags, int mode)
2580{ 2375{
2581 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2376 struct nameidata nd;
2377 struct file *filp;
2378
2379 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2380 if (unlikely(filp == ERR_PTR(-ECHILD)))
2381 filp = path_openat(dfd, pathname, &nd, op, flags);
2382 if (unlikely(filp == ERR_PTR(-ESTALE)))
2383 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2384 return filp;
2385}
2386
2387struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2388 const char *name, const struct open_flags *op, int flags)
2389{
2390 struct nameidata nd;
2391 struct file *file;
2392
2393 nd.root.mnt = mnt;
2394 nd.root.dentry = dentry;
2395
2396 flags |= LOOKUP_ROOT;
2397
2398 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2399 return ERR_PTR(-ELOOP);
2400
2401 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2402 if (unlikely(file == ERR_PTR(-ECHILD)))
2403 file = path_openat(-1, name, &nd, op, flags);
2404 if (unlikely(file == ERR_PTR(-ESTALE)))
2405 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2406 return file;
2582} 2407}
2583EXPORT_SYMBOL(filp_open);
2584 2408
2585/** 2409/**
2586 * lookup_create - lookup a dentry, creating it if it doesn't exist 2410 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -2642,7 +2466,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2642 if (error) 2466 if (error)
2643 return error; 2467 return error;
2644 2468
2645 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2469 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2470 !ns_capable(inode_userns(dir), CAP_MKNOD))
2646 return -EPERM; 2471 return -EPERM;
2647 2472
2648 if (!dir->i_op->mknod) 2473 if (!dir->i_op->mknod)
@@ -3119,7 +2944,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3119 return error; 2944 return error;
3120 2945
3121 mutex_lock(&inode->i_mutex); 2946 mutex_lock(&inode->i_mutex);
3122 error = dir->i_op->link(old_dentry, dir, new_dentry); 2947 /* Make sure we don't allow creating hardlink to an unlinked file */
2948 if (inode->i_nlink == 0)
2949 error = -ENOENT;
2950 else
2951 error = dir->i_op->link(old_dentry, dir, new_dentry);
3123 mutex_unlock(&inode->i_mutex); 2952 mutex_unlock(&inode->i_mutex);
3124 if (!error) 2953 if (!error)
3125 fsnotify_link(dir, inode, new_dentry); 2954 fsnotify_link(dir, inode, new_dentry);
@@ -3141,15 +2970,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3141 struct dentry *new_dentry; 2970 struct dentry *new_dentry;
3142 struct nameidata nd; 2971 struct nameidata nd;
3143 struct path old_path; 2972 struct path old_path;
2973 int how = 0;
3144 int error; 2974 int error;
3145 char *to; 2975 char *to;
3146 2976
3147 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2977 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3148 return -EINVAL; 2978 return -EINVAL;
2979 /*
2980 * To use null names we require CAP_DAC_READ_SEARCH
2981 * This ensures that not everyone will be able to create
2982 * handlink using the passed filedescriptor.
2983 */
2984 if (flags & AT_EMPTY_PATH) {
2985 if (!capable(CAP_DAC_READ_SEARCH))
2986 return -ENOENT;
2987 how = LOOKUP_EMPTY;
2988 }
2989
2990 if (flags & AT_SYMLINK_FOLLOW)
2991 how |= LOOKUP_FOLLOW;
3149 2992
3150 error = user_path_at(olddfd, oldname, 2993 error = user_path_at(olddfd, oldname, how, &old_path);
3151 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3152 &old_path);
3153 if (error) 2994 if (error)
3154 return error; 2995 return error;
3155 2996
@@ -3586,7 +3427,7 @@ EXPORT_SYMBOL(page_readlink);
3586EXPORT_SYMBOL(__page_symlink); 3427EXPORT_SYMBOL(__page_symlink);
3587EXPORT_SYMBOL(page_symlink); 3428EXPORT_SYMBOL(page_symlink);
3588EXPORT_SYMBOL(page_symlink_inode_operations); 3429EXPORT_SYMBOL(page_symlink_inode_operations);
3589EXPORT_SYMBOL(path_lookup); 3430EXPORT_SYMBOL(kern_path_parent);
3590EXPORT_SYMBOL(kern_path); 3431EXPORT_SYMBOL(kern_path);
3591EXPORT_SYMBOL(vfs_path_lookup); 3432EXPORT_SYMBOL(vfs_path_lookup);
3592EXPORT_SYMBOL(inode_permission); 3433EXPORT_SYMBOL(inode_permission);