aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c743
1 files changed, 606 insertions, 137 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 5642bc2be418..8d3f15b3a541 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static inline int __acl_permission_check(struct inode *inode, int mask,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask), int rcu)
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,9 +180,13 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 if (rcu) {
184 if (error != -EAGAIN) 184 return -ECHILD;
185 return error; 185 } else {
186 int error = check_acl(inode, mask);
187 if (error != -EAGAIN)
188 return error;
189 }
186 } 190 }
187 191
188 if (in_group_p(inode->i_gid)) 192 if (in_group_p(inode->i_gid))
@@ -197,6 +201,12 @@ static int acl_permission_check(struct inode *inode, int mask,
197 return -EACCES; 201 return -EACCES;
198} 202}
199 203
204static inline int acl_permission_check(struct inode *inode, int mask,
205 int (*check_acl)(struct inode *inode, int mask))
206{
207 return __acl_permission_check(inode, mask, check_acl, 0);
208}
209
200/** 210/**
201 * generic_permission - check for access rights on a Posix-like filesystem 211 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 212 * @inode: inode to check access rights for
@@ -375,6 +385,173 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 385EXPORT_SYMBOL(path_put);
376 386
377/** 387/**
388 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
389 * @nd: nameidata pathwalk data to drop
390 * @Returns: 0 on success, -ECHLID on failure
391 *
392 * Path walking has 2 modes, rcu-walk and ref-walk (see
393 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
394 * to drop out of rcu-walk mode and take normal reference counts on dentries
395 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
396 * refcounts at the last known good point before rcu-walk got stuck, so
397 * ref-walk may continue from there. If this is not successful (eg. a seqcount
398 * has changed), then failure is returned and path walk restarts from the
399 * beginning in ref-walk mode.
400 *
401 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
402 * ref-walk. Must be called from rcu-walk context.
403 */
404static int nameidata_drop_rcu(struct nameidata *nd)
405{
406 struct fs_struct *fs = current->fs;
407 struct dentry *dentry = nd->path.dentry;
408
409 BUG_ON(!(nd->flags & LOOKUP_RCU));
410 if (nd->root.mnt) {
411 spin_lock(&fs->lock);
412 if (nd->root.mnt != fs->root.mnt ||
413 nd->root.dentry != fs->root.dentry)
414 goto err_root;
415 }
416 spin_lock(&dentry->d_lock);
417 if (!__d_rcu_to_refcount(dentry, nd->seq))
418 goto err;
419 BUG_ON(nd->inode != dentry->d_inode);
420 spin_unlock(&dentry->d_lock);
421 if (nd->root.mnt) {
422 path_get(&nd->root);
423 spin_unlock(&fs->lock);
424 }
425 mntget(nd->path.mnt);
426
427 rcu_read_unlock();
428 br_read_unlock(vfsmount_lock);
429 nd->flags &= ~LOOKUP_RCU;
430 return 0;
431err:
432 spin_unlock(&dentry->d_lock);
433err_root:
434 if (nd->root.mnt)
435 spin_unlock(&fs->lock);
436 return -ECHILD;
437}
438
439/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
440static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
441{
442 if (nd->flags & LOOKUP_RCU)
443 return nameidata_drop_rcu(nd);
444 return 0;
445}
446
447/**
448 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
449 * @nd: nameidata pathwalk data to drop
450 * @dentry: dentry to drop
451 * @Returns: 0 on success, -ECHLID on failure
452 *
453 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
454 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
455 * @nd. Must be called from rcu-walk context.
456 */
457static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
458{
459 struct fs_struct *fs = current->fs;
460 struct dentry *parent = nd->path.dentry;
461
462 BUG_ON(!(nd->flags & LOOKUP_RCU));
463 if (nd->root.mnt) {
464 spin_lock(&fs->lock);
465 if (nd->root.mnt != fs->root.mnt ||
466 nd->root.dentry != fs->root.dentry)
467 goto err_root;
468 }
469 spin_lock(&parent->d_lock);
470 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
471 if (!__d_rcu_to_refcount(dentry, nd->seq))
472 goto err;
473 /*
474 * If the sequence check on the child dentry passed, then the child has
475 * not been removed from its parent. This means the parent dentry must
476 * be valid and able to take a reference at this point.
477 */
478 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
479 BUG_ON(!parent->d_count);
480 parent->d_count++;
481 spin_unlock(&dentry->d_lock);
482 spin_unlock(&parent->d_lock);
483 if (nd->root.mnt) {
484 path_get(&nd->root);
485 spin_unlock(&fs->lock);
486 }
487 mntget(nd->path.mnt);
488
489 rcu_read_unlock();
490 br_read_unlock(vfsmount_lock);
491 nd->flags &= ~LOOKUP_RCU;
492 return 0;
493err:
494 spin_unlock(&dentry->d_lock);
495 spin_unlock(&parent->d_lock);
496err_root:
497 if (nd->root.mnt)
498 spin_unlock(&fs->lock);
499 return -ECHILD;
500}
501
502/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
503static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
504{
505 if (nd->flags & LOOKUP_RCU)
506 return nameidata_dentry_drop_rcu(nd, dentry);
507 return 0;
508}
509
510/**
511 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
512 * @nd: nameidata pathwalk data to drop
513 * @Returns: 0 on success, -ECHLID on failure
514 *
515 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
516 * nd->path should be the final element of the lookup, so nd->root is discarded.
517 * Must be called from rcu-walk context.
518 */
519static int nameidata_drop_rcu_last(struct nameidata *nd)
520{
521 struct dentry *dentry = nd->path.dentry;
522
523 BUG_ON(!(nd->flags & LOOKUP_RCU));
524 nd->flags &= ~LOOKUP_RCU;
525 nd->root.mnt = NULL;
526 spin_lock(&dentry->d_lock);
527 if (!__d_rcu_to_refcount(dentry, nd->seq))
528 goto err_unlock;
529 BUG_ON(nd->inode != dentry->d_inode);
530 spin_unlock(&dentry->d_lock);
531
532 mntget(nd->path.mnt);
533
534 rcu_read_unlock();
535 br_read_unlock(vfsmount_lock);
536
537 return 0;
538
539err_unlock:
540 spin_unlock(&dentry->d_lock);
541 rcu_read_unlock();
542 br_read_unlock(vfsmount_lock);
543 return -ECHILD;
544}
545
546/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
547static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
548{
549 if (likely(nd->flags & LOOKUP_RCU))
550 return nameidata_drop_rcu_last(nd);
551 return 0;
552}
553
554/**
378 * release_open_intent - free up open intent resources 555 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 556 * @nd: pointer to nameidata
380 */ 557 */
@@ -459,26 +636,40 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 636 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 637 * complete permission check.
461 */ 638 */
462static int exec_permission(struct inode *inode) 639static inline int __exec_permission(struct inode *inode, int rcu)
463{ 640{
464 int ret; 641 int ret;
465 642
466 if (inode->i_op->permission) { 643 if (inode->i_op->permission) {
644 if (rcu)
645 return -ECHILD;
467 ret = inode->i_op->permission(inode, MAY_EXEC); 646 ret = inode->i_op->permission(inode, MAY_EXEC);
468 if (!ret) 647 if (!ret)
469 goto ok; 648 goto ok;
470 return ret; 649 return ret;
471 } 650 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 651 ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu);
473 if (!ret) 652 if (!ret)
474 goto ok; 653 goto ok;
654 if (rcu && ret == -ECHILD)
655 return ret;
475 656
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 657 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 658 goto ok;
478 659
479 return ret; 660 return ret;
480ok: 661ok:
481 return security_inode_permission(inode, MAY_EXEC); 662 return security_inode_exec_permission(inode, rcu);
663}
664
665static int exec_permission(struct inode *inode)
666{
667 return __exec_permission(inode, 0);
668}
669
670static int exec_permission_rcu(struct inode *inode)
671{
672 return __exec_permission(inode, 1);
482} 673}
483 674
484static __always_inline void set_root(struct nameidata *nd) 675static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +680,20 @@ static __always_inline void set_root(struct nameidata *nd)
489 680
490static int link_path_walk(const char *, struct nameidata *); 681static int link_path_walk(const char *, struct nameidata *);
491 682
683static __always_inline void set_root_rcu(struct nameidata *nd)
684{
685 if (!nd->root.mnt) {
686 struct fs_struct *fs = current->fs;
687 spin_lock(&fs->lock);
688 nd->root = fs->root;
689 spin_unlock(&fs->lock);
690 }
691}
692
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 693static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 694{
695 int ret;
696
494 if (IS_ERR(link)) 697 if (IS_ERR(link))
495 goto fail; 698 goto fail;
496 699
@@ -500,8 +703,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 703 nd->path = nd->root;
501 path_get(&nd->root); 704 path_get(&nd->root);
502 } 705 }
706 nd->inode = nd->path.dentry->d_inode;
503 707
504 return link_path_walk(link, nd); 708 ret = link_path_walk(link, nd);
709 return ret;
505fail: 710fail:
506 path_put(&nd->path); 711 path_put(&nd->path);
507 return PTR_ERR(link); 712 return PTR_ERR(link);
@@ -516,11 +721,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 721
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 722static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 723{
519 dput(nd->path.dentry); 724 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 725 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 726 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 727 mntput(nd->path.mnt);
523 } 728 }
729 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 730 nd->path.dentry = path->dentry;
525} 731}
526 732
@@ -535,9 +741,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 741
536 if (path->mnt != nd->path.mnt) { 742 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 743 path_to_nameidata(path, nd);
744 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 745 dget(dentry);
539 } 746 }
540 mntget(path->mnt); 747 mntget(path->mnt);
748
541 nd->last_type = LAST_BIND; 749 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 750 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 751 error = PTR_ERR(*p);
@@ -591,6 +799,20 @@ loop:
591 return err; 799 return err;
592} 800}
593 801
802static int follow_up_rcu(struct path *path)
803{
804 struct vfsmount *parent;
805 struct dentry *mountpoint;
806
807 parent = path->mnt->mnt_parent;
808 if (parent == path->mnt)
809 return 0;
810 mountpoint = path->mnt->mnt_mountpoint;
811 path->dentry = mountpoint;
812 path->mnt = parent;
813 return 1;
814}
815
594int follow_up(struct path *path) 816int follow_up(struct path *path)
595{ 817{
596 struct vfsmount *parent; 818 struct vfsmount *parent;
@@ -615,6 +837,21 @@ int follow_up(struct path *path)
615/* 837/*
616 * serialization is taken care of in namespace.c 838 * serialization is taken care of in namespace.c
617 */ 839 */
840static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
841 struct inode **inode)
842{
843 while (d_mountpoint(path->dentry)) {
844 struct vfsmount *mounted;
845 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
846 if (!mounted)
847 return;
848 path->mnt = mounted;
849 path->dentry = mounted->mnt_root;
850 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
851 *inode = path->dentry->d_inode;
852 }
853}
854
618static int __follow_mount(struct path *path) 855static int __follow_mount(struct path *path)
619{ 856{
620 int res = 0; 857 int res = 0;
@@ -660,7 +897,42 @@ int follow_down(struct path *path)
660 return 0; 897 return 0;
661} 898}
662 899
663static __always_inline void follow_dotdot(struct nameidata *nd) 900static int follow_dotdot_rcu(struct nameidata *nd)
901{
902 struct inode *inode = nd->inode;
903
904 set_root_rcu(nd);
905
906 while(1) {
907 if (nd->path.dentry == nd->root.dentry &&
908 nd->path.mnt == nd->root.mnt) {
909 break;
910 }
911 if (nd->path.dentry != nd->path.mnt->mnt_root) {
912 struct dentry *old = nd->path.dentry;
913 struct dentry *parent = old->d_parent;
914 unsigned seq;
915
916 seq = read_seqcount_begin(&parent->d_seq);
917 if (read_seqcount_retry(&old->d_seq, nd->seq))
918 return -ECHILD;
919 inode = parent->d_inode;
920 nd->path.dentry = parent;
921 nd->seq = seq;
922 break;
923 }
924 if (!follow_up_rcu(&nd->path))
925 break;
926 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
927 inode = nd->path.dentry->d_inode;
928 }
929 __follow_mount_rcu(nd, &nd->path, &inode);
930 nd->inode = inode;
931
932 return 0;
933}
934
935static void follow_dotdot(struct nameidata *nd)
664{ 936{
665 set_root(nd); 937 set_root(nd);
666 938
@@ -681,6 +953,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
681 break; 953 break;
682 } 954 }
683 follow_mount(&nd->path); 955 follow_mount(&nd->path);
956 nd->inode = nd->path.dentry->d_inode;
684} 957}
685 958
686/* 959/*
@@ -718,18 +991,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
718 * It _is_ time-critical. 991 * It _is_ time-critical.
719 */ 992 */
720static int do_lookup(struct nameidata *nd, struct qstr *name, 993static int do_lookup(struct nameidata *nd, struct qstr *name,
721 struct path *path) 994 struct path *path, struct inode **inode)
722{ 995{
723 struct vfsmount *mnt = nd->path.mnt; 996 struct vfsmount *mnt = nd->path.mnt;
724 struct dentry *dentry, *parent; 997 struct dentry *dentry, *parent = nd->path.dentry;
725 struct inode *dir; 998 struct inode *dir;
726 /* 999 /*
727 * See if the low-level filesystem might want 1000 * See if the low-level filesystem might want
728 * to use its own hash.. 1001 * to use its own hash..
729 */ 1002 */
730 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1003 if (parent->d_op && parent->d_op->d_hash) {
731 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 1004 int err = parent->d_op->d_hash(parent, nd->inode, name);
732 nd->path.dentry->d_inode, name);
733 if (err < 0) 1005 if (err < 0)
734 return err; 1006 return err;
735 } 1007 }
@@ -739,21 +1011,48 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
739 * of a false negative due to a concurrent rename, we're going to 1011 * of a false negative due to a concurrent rename, we're going to
740 * do the non-racy lookup, below. 1012 * do the non-racy lookup, below.
741 */ 1013 */
742 dentry = __d_lookup(nd->path.dentry, name); 1014 if (nd->flags & LOOKUP_RCU) {
743 if (!dentry) 1015 unsigned seq;
744 goto need_lookup; 1016
1017 *inode = nd->inode;
1018 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1019 if (!dentry) {
1020 if (nameidata_drop_rcu(nd))
1021 return -ECHILD;
1022 goto need_lookup;
1023 }
1024 /* Memory barrier in read_seqcount_begin of child is enough */
1025 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1026 return -ECHILD;
1027
1028 nd->seq = seq;
1029 if (dentry->d_op && dentry->d_op->d_revalidate) {
1030 /* We commonly drop rcu-walk here */
1031 if (nameidata_dentry_drop_rcu(nd, dentry))
1032 return -ECHILD;
1033 goto need_revalidate;
1034 }
1035 path->mnt = mnt;
1036 path->dentry = dentry;
1037 __follow_mount_rcu(nd, path, inode);
1038 } else {
1039 dentry = __d_lookup(parent, name);
1040 if (!dentry)
1041 goto need_lookup;
745found: 1042found:
746 if (dentry->d_op && dentry->d_op->d_revalidate) 1043 if (dentry->d_op && dentry->d_op->d_revalidate)
747 goto need_revalidate; 1044 goto need_revalidate;
748done: 1045done:
749 path->mnt = mnt; 1046 path->mnt = mnt;
750 path->dentry = dentry; 1047 path->dentry = dentry;
751 __follow_mount(path); 1048 __follow_mount(path);
1049 *inode = path->dentry->d_inode;
1050 }
752 return 0; 1051 return 0;
753 1052
754need_lookup: 1053need_lookup:
755 parent = nd->path.dentry;
756 dir = parent->d_inode; 1054 dir = parent->d_inode;
1055 BUG_ON(nd->inode != dir);
757 1056
758 mutex_lock(&dir->i_mutex); 1057 mutex_lock(&dir->i_mutex);
759 /* 1058 /*
@@ -815,7 +1114,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
815static int link_path_walk(const char *name, struct nameidata *nd) 1114static int link_path_walk(const char *name, struct nameidata *nd)
816{ 1115{
817 struct path next; 1116 struct path next;
818 struct inode *inode;
819 int err; 1117 int err;
820 unsigned int lookup_flags = nd->flags; 1118 unsigned int lookup_flags = nd->flags;
821 1119
@@ -824,18 +1122,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
824 if (!*name) 1122 if (!*name)
825 goto return_reval; 1123 goto return_reval;
826 1124
827 inode = nd->path.dentry->d_inode;
828 if (nd->depth) 1125 if (nd->depth)
829 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1126 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
830 1127
831 /* At this point we know we have a real path component. */ 1128 /* At this point we know we have a real path component. */
832 for(;;) { 1129 for(;;) {
1130 struct inode *inode;
833 unsigned long hash; 1131 unsigned long hash;
834 struct qstr this; 1132 struct qstr this;
835 unsigned int c; 1133 unsigned int c;
836 1134
837 nd->flags |= LOOKUP_CONTINUE; 1135 nd->flags |= LOOKUP_CONTINUE;
838 err = exec_permission(inode); 1136 if (nd->flags & LOOKUP_RCU) {
1137 err = exec_permission_rcu(nd->inode);
1138 if (err == -ECHILD) {
1139 if (nameidata_drop_rcu(nd))
1140 return -ECHILD;
1141 goto exec_again;
1142 }
1143 } else {
1144exec_again:
1145 err = exec_permission(nd->inode);
1146 }
839 if (err) 1147 if (err)
840 break; 1148 break;
841 1149
@@ -866,37 +1174,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
866 if (this.name[0] == '.') switch (this.len) { 1174 if (this.name[0] == '.') switch (this.len) {
867 default: 1175 default:
868 break; 1176 break;
869 case 2: 1177 case 2:
870 if (this.name[1] != '.') 1178 if (this.name[1] != '.')
871 break; 1179 break;
872 follow_dotdot(nd); 1180 if (nd->flags & LOOKUP_RCU) {
873 inode = nd->path.dentry->d_inode; 1181 if (follow_dotdot_rcu(nd))
1182 return -ECHILD;
1183 } else
1184 follow_dotdot(nd);
874 /* fallthrough */ 1185 /* fallthrough */
875 case 1: 1186 case 1:
876 continue; 1187 continue;
877 } 1188 }
878 /* This does the actual lookups.. */ 1189 /* This does the actual lookups.. */
879 err = do_lookup(nd, &this, &next); 1190 err = do_lookup(nd, &this, &next, &inode);
880 if (err) 1191 if (err)
881 break; 1192 break;
882
883 err = -ENOENT; 1193 err = -ENOENT;
884 inode = next.dentry->d_inode;
885 if (!inode) 1194 if (!inode)
886 goto out_dput; 1195 goto out_dput;
887 1196
888 if (inode->i_op->follow_link) { 1197 if (inode->i_op->follow_link) {
1198 /* We commonly drop rcu-walk here */
1199 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1200 return -ECHILD;
1201 BUG_ON(inode != next.dentry->d_inode);
889 err = do_follow_link(&next, nd); 1202 err = do_follow_link(&next, nd);
890 if (err) 1203 if (err)
891 goto return_err; 1204 goto return_err;
1205 nd->inode = nd->path.dentry->d_inode;
892 err = -ENOENT; 1206 err = -ENOENT;
893 inode = nd->path.dentry->d_inode; 1207 if (!nd->inode)
894 if (!inode)
895 break; 1208 break;
896 } else 1209 } else {
897 path_to_nameidata(&next, nd); 1210 path_to_nameidata(&next, nd);
1211 nd->inode = inode;
1212 }
898 err = -ENOTDIR; 1213 err = -ENOTDIR;
899 if (!inode->i_op->lookup) 1214 if (!nd->inode->i_op->lookup)
900 break; 1215 break;
901 continue; 1216 continue;
902 /* here ends the main loop */ 1217 /* here ends the main loop */
@@ -911,32 +1226,39 @@ last_component:
911 if (this.name[0] == '.') switch (this.len) { 1226 if (this.name[0] == '.') switch (this.len) {
912 default: 1227 default:
913 break; 1228 break;
914 case 2: 1229 case 2:
915 if (this.name[1] != '.') 1230 if (this.name[1] != '.')
916 break; 1231 break;
917 follow_dotdot(nd); 1232 if (nd->flags & LOOKUP_RCU) {
918 inode = nd->path.dentry->d_inode; 1233 if (follow_dotdot_rcu(nd))
1234 return -ECHILD;
1235 } else
1236 follow_dotdot(nd);
919 /* fallthrough */ 1237 /* fallthrough */
920 case 1: 1238 case 1:
921 goto return_reval; 1239 goto return_reval;
922 } 1240 }
923 err = do_lookup(nd, &this, &next); 1241 err = do_lookup(nd, &this, &next, &inode);
924 if (err) 1242 if (err)
925 break; 1243 break;
926 inode = next.dentry->d_inode;
927 if (follow_on_final(inode, lookup_flags)) { 1244 if (follow_on_final(inode, lookup_flags)) {
1245 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1246 return -ECHILD;
1247 BUG_ON(inode != next.dentry->d_inode);
928 err = do_follow_link(&next, nd); 1248 err = do_follow_link(&next, nd);
929 if (err) 1249 if (err)
930 goto return_err; 1250 goto return_err;
931 inode = nd->path.dentry->d_inode; 1251 nd->inode = nd->path.dentry->d_inode;
932 } else 1252 } else {
933 path_to_nameidata(&next, nd); 1253 path_to_nameidata(&next, nd);
1254 nd->inode = inode;
1255 }
934 err = -ENOENT; 1256 err = -ENOENT;
935 if (!inode) 1257 if (!nd->inode)
936 break; 1258 break;
937 if (lookup_flags & LOOKUP_DIRECTORY) { 1259 if (lookup_flags & LOOKUP_DIRECTORY) {
938 err = -ENOTDIR; 1260 err = -ENOTDIR;
939 if (!inode->i_op->lookup) 1261 if (!nd->inode->i_op->lookup)
940 break; 1262 break;
941 } 1263 }
942 goto return_base; 1264 goto return_base;
@@ -958,6 +1280,8 @@ return_reval:
958 */ 1280 */
959 if (nd->path.dentry && nd->path.dentry->d_sb && 1281 if (nd->path.dentry && nd->path.dentry->d_sb &&
960 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 1282 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
1283 if (nameidata_drop_rcu_maybe(nd))
1284 return -ECHILD;
961 err = -ESTALE; 1285 err = -ESTALE;
962 /* Note: we do not d_invalidate() */ 1286 /* Note: we do not d_invalidate() */
963 if (!nd->path.dentry->d_op->d_revalidate( 1287 if (!nd->path.dentry->d_op->d_revalidate(
@@ -965,16 +1289,34 @@ return_reval:
965 break; 1289 break;
966 } 1290 }
967return_base: 1291return_base:
1292 if (nameidata_drop_rcu_last_maybe(nd))
1293 return -ECHILD;
968 return 0; 1294 return 0;
969out_dput: 1295out_dput:
970 path_put_conditional(&next, nd); 1296 if (!(nd->flags & LOOKUP_RCU))
1297 path_put_conditional(&next, nd);
971 break; 1298 break;
972 } 1299 }
973 path_put(&nd->path); 1300 if (!(nd->flags & LOOKUP_RCU))
1301 path_put(&nd->path);
974return_err: 1302return_err:
975 return err; 1303 return err;
976} 1304}
977 1305
1306static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1307{
1308 current->total_link_count = 0;
1309
1310 return link_path_walk(name, nd);
1311}
1312
1313static inline int path_walk_simple(const char *name, struct nameidata *nd)
1314{
1315 current->total_link_count = 0;
1316
1317 return link_path_walk(name, nd);
1318}
1319
978static int path_walk(const char *name, struct nameidata *nd) 1320static int path_walk(const char *name, struct nameidata *nd)
979{ 1321{
980 struct path save = nd->path; 1322 struct path save = nd->path;
@@ -1000,6 +1342,88 @@ static int path_walk(const char *name, struct nameidata *nd)
1000 return result; 1342 return result;
1001} 1343}
1002 1344
1345static void path_finish_rcu(struct nameidata *nd)
1346{
1347 if (nd->flags & LOOKUP_RCU) {
1348 /* RCU dangling. Cancel it. */
1349 nd->flags &= ~LOOKUP_RCU;
1350 nd->root.mnt = NULL;
1351 rcu_read_unlock();
1352 br_read_unlock(vfsmount_lock);
1353 }
1354 if (nd->file)
1355 fput(nd->file);
1356}
1357
1358static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1359{
1360 int retval = 0;
1361 int fput_needed;
1362 struct file *file;
1363
1364 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1365 nd->flags = flags | LOOKUP_RCU;
1366 nd->depth = 0;
1367 nd->root.mnt = NULL;
1368 nd->file = NULL;
1369
1370 if (*name=='/') {
1371 struct fs_struct *fs = current->fs;
1372
1373 br_read_lock(vfsmount_lock);
1374 rcu_read_lock();
1375
1376 spin_lock(&fs->lock);
1377 nd->root = fs->root;
1378 nd->path = nd->root;
1379 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1380 spin_unlock(&fs->lock);
1381
1382 } else if (dfd == AT_FDCWD) {
1383 struct fs_struct *fs = current->fs;
1384
1385 br_read_lock(vfsmount_lock);
1386 rcu_read_lock();
1387
1388 spin_lock(&fs->lock);
1389 nd->path = fs->pwd;
1390 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1391 spin_unlock(&fs->lock);
1392 } else {
1393 struct dentry *dentry;
1394
1395 file = fget_light(dfd, &fput_needed);
1396 retval = -EBADF;
1397 if (!file)
1398 goto out_fail;
1399
1400 dentry = file->f_path.dentry;
1401
1402 retval = -ENOTDIR;
1403 if (!S_ISDIR(dentry->d_inode->i_mode))
1404 goto fput_fail;
1405
1406 retval = file_permission(file, MAY_EXEC);
1407 if (retval)
1408 goto fput_fail;
1409
1410 nd->path = file->f_path;
1411 if (fput_needed)
1412 nd->file = file;
1413
1414 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1415 br_read_lock(vfsmount_lock);
1416 rcu_read_lock();
1417 }
1418 nd->inode = nd->path.dentry->d_inode;
1419 return 0;
1420
1421fput_fail:
1422 fput_light(file, fput_needed);
1423out_fail:
1424 return retval;
1425}
1426
1003static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1427static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1004{ 1428{
1005 int retval = 0; 1429 int retval = 0;
@@ -1040,6 +1464,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1040 1464
1041 fput_light(file, fput_needed); 1465 fput_light(file, fput_needed);
1042 } 1466 }
1467 nd->inode = nd->path.dentry->d_inode;
1043 return 0; 1468 return 0;
1044 1469
1045fput_fail: 1470fput_fail:
@@ -1052,16 +1477,53 @@ out_fail:
1052static int do_path_lookup(int dfd, const char *name, 1477static int do_path_lookup(int dfd, const char *name,
1053 unsigned int flags, struct nameidata *nd) 1478 unsigned int flags, struct nameidata *nd)
1054{ 1479{
1055 int retval = path_init(dfd, name, flags, nd); 1480 int retval;
1056 if (!retval) 1481
1057 retval = path_walk(name, nd); 1482 /*
1058 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1483 * Path walking is largely split up into 2 different synchronisation
1059 nd->path.dentry->d_inode)) 1484 * schemes, rcu-walk and ref-walk (explained in
1060 audit_inode(name, nd->path.dentry); 1485 * Documentation/filesystems/path-lookup.txt). These share much of the
1486 * path walk code, but some things particularly setup, cleanup, and
1487 * following mounts are sufficiently divergent that functions are
1488 * duplicated. Typically there is a function foo(), and its RCU
1489 * analogue, foo_rcu().
1490 *
1491 * -ECHILD is the error number of choice (just to avoid clashes) that
1492 * is returned if some aspect of an rcu-walk fails. Such an error must
1493 * be handled by restarting a traditional ref-walk (which will always
1494 * be able to complete).
1495 */
1496 retval = path_init_rcu(dfd, name, flags, nd);
1497 if (unlikely(retval))
1498 return retval;
1499 retval = path_walk_rcu(name, nd);
1500 path_finish_rcu(nd);
1061 if (nd->root.mnt) { 1501 if (nd->root.mnt) {
1062 path_put(&nd->root); 1502 path_put(&nd->root);
1063 nd->root.mnt = NULL; 1503 nd->root.mnt = NULL;
1064 } 1504 }
1505
1506 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1507 /* slower, locked walk */
1508 if (retval == -ESTALE)
1509 flags |= LOOKUP_REVAL;
1510 retval = path_init(dfd, name, flags, nd);
1511 if (unlikely(retval))
1512 return retval;
1513 retval = path_walk(name, nd);
1514 if (nd->root.mnt) {
1515 path_put(&nd->root);
1516 nd->root.mnt = NULL;
1517 }
1518 }
1519
1520 if (likely(!retval)) {
1521 if (unlikely(!audit_dummy_context())) {
1522 if (nd->path.dentry && nd->inode)
1523 audit_inode(name, nd->path.dentry);
1524 }
1525 }
1526
1065 return retval; 1527 return retval;
1066} 1528}
1067 1529
@@ -1104,10 +1566,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1104 path_get(&nd->path); 1566 path_get(&nd->path);
1105 nd->root = nd->path; 1567 nd->root = nd->path;
1106 path_get(&nd->root); 1568 path_get(&nd->root);
1569 nd->inode = nd->path.dentry->d_inode;
1107 1570
1108 retval = path_walk(name, nd); 1571 retval = path_walk(name, nd);
1109 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1572 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1110 nd->path.dentry->d_inode)) 1573 nd->inode))
1111 audit_inode(name, nd->path.dentry); 1574 audit_inode(name, nd->path.dentry);
1112 1575
1113 path_put(&nd->root); 1576 path_put(&nd->root);
@@ -1488,6 +1951,7 @@ out_unlock:
1488 mutex_unlock(&dir->d_inode->i_mutex); 1951 mutex_unlock(&dir->d_inode->i_mutex);
1489 dput(nd->path.dentry); 1952 dput(nd->path.dentry);
1490 nd->path.dentry = path->dentry; 1953 nd->path.dentry = path->dentry;
1954
1491 if (error) 1955 if (error)
1492 return error; 1956 return error;
1493 /* Don't check for write permission, don't truncate */ 1957 /* Don't check for write permission, don't truncate */
@@ -1582,6 +2046,9 @@ exit:
1582 return ERR_PTR(error); 2046 return ERR_PTR(error);
1583} 2047}
1584 2048
2049/*
2050 * Handle O_CREAT case for do_filp_open
2051 */
1585static struct file *do_last(struct nameidata *nd, struct path *path, 2052static struct file *do_last(struct nameidata *nd, struct path *path,
1586 int open_flag, int acc_mode, 2053 int open_flag, int acc_mode,
1587 int mode, const char *pathname) 2054 int mode, const char *pathname)
@@ -1603,42 +2070,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1603 } 2070 }
1604 /* fallthrough */ 2071 /* fallthrough */
1605 case LAST_ROOT: 2072 case LAST_ROOT:
1606 if (open_flag & O_CREAT) 2073 goto exit;
1607 goto exit;
1608 /* fallthrough */
1609 case LAST_BIND: 2074 case LAST_BIND:
1610 audit_inode(pathname, dir); 2075 audit_inode(pathname, dir);
1611 goto ok; 2076 goto ok;
1612 } 2077 }
1613 2078
1614 /* trailing slashes? */ 2079 /* trailing slashes? */
1615 if (nd->last.name[nd->last.len]) { 2080 if (nd->last.name[nd->last.len])
1616 if (open_flag & O_CREAT) 2081 goto exit;
1617 goto exit;
1618 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1619 }
1620
1621 /* just plain open? */
1622 if (!(open_flag & O_CREAT)) {
1623 error = do_lookup(nd, &nd->last, path);
1624 if (error)
1625 goto exit;
1626 error = -ENOENT;
1627 if (!path->dentry->d_inode)
1628 goto exit_dput;
1629 if (path->dentry->d_inode->i_op->follow_link)
1630 return NULL;
1631 error = -ENOTDIR;
1632 if (nd->flags & LOOKUP_DIRECTORY) {
1633 if (!path->dentry->d_inode->i_op->lookup)
1634 goto exit_dput;
1635 }
1636 path_to_nameidata(path, nd);
1637 audit_inode(pathname, nd->path.dentry);
1638 goto ok;
1639 }
1640 2082
1641 /* OK, it's O_CREAT */
1642 mutex_lock(&dir->d_inode->i_mutex); 2083 mutex_lock(&dir->d_inode->i_mutex);
1643 2084
1644 path->dentry = lookup_hash(nd); 2085 path->dentry = lookup_hash(nd);
@@ -1709,8 +2150,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1709 return NULL; 2150 return NULL;
1710 2151
1711 path_to_nameidata(path, nd); 2152 path_to_nameidata(path, nd);
2153 nd->inode = path->dentry->d_inode;
1712 error = -EISDIR; 2154 error = -EISDIR;
1713 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2155 if (S_ISDIR(nd->inode->i_mode))
1714 goto exit; 2156 goto exit;
1715ok: 2157ok:
1716 filp = finish_open(nd, open_flag, acc_mode); 2158 filp = finish_open(nd, open_flag, acc_mode);
@@ -1741,7 +2183,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1741 struct path path; 2183 struct path path;
1742 int count = 0; 2184 int count = 0;
1743 int flag = open_to_namei_flags(open_flag); 2185 int flag = open_to_namei_flags(open_flag);
1744 int force_reval = 0; 2186 int flags;
1745 2187
1746 if (!(open_flag & O_CREAT)) 2188 if (!(open_flag & O_CREAT))
1747 mode = 0; 2189 mode = 0;
@@ -1770,54 +2212,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1770 if (open_flag & O_APPEND) 2212 if (open_flag & O_APPEND)
1771 acc_mode |= MAY_APPEND; 2213 acc_mode |= MAY_APPEND;
1772 2214
1773 /* find the parent */ 2215 flags = LOOKUP_OPEN;
1774reval: 2216 if (open_flag & O_CREAT) {
1775 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2217 flags |= LOOKUP_CREATE;
2218 if (open_flag & O_EXCL)
2219 flags |= LOOKUP_EXCL;
2220 }
2221 if (open_flag & O_DIRECTORY)
2222 flags |= LOOKUP_DIRECTORY;
2223 if (!(open_flag & O_NOFOLLOW))
2224 flags |= LOOKUP_FOLLOW;
2225
2226 filp = get_empty_filp();
2227 if (!filp)
2228 return ERR_PTR(-ENFILE);
2229
2230 filp->f_flags = open_flag;
2231 nd.intent.open.file = filp;
2232 nd.intent.open.flags = flag;
2233 nd.intent.open.create_mode = mode;
2234
2235 if (open_flag & O_CREAT)
2236 goto creat;
2237
2238 /* !O_CREAT, simple open */
2239 error = do_path_lookup(dfd, pathname, flags, &nd);
2240 if (unlikely(error))
2241 goto out_filp;
2242 error = -ELOOP;
2243 if (!(nd.flags & LOOKUP_FOLLOW)) {
2244 if (nd.inode->i_op->follow_link)
2245 goto out_path;
2246 }
2247 error = -ENOTDIR;
2248 if (nd.flags & LOOKUP_DIRECTORY) {
2249 if (!nd.inode->i_op->lookup)
2250 goto out_path;
2251 }
2252 audit_inode(pathname, nd.path.dentry);
2253 filp = finish_open(&nd, open_flag, acc_mode);
2254 return filp;
2255
2256creat:
2257 /* OK, have to create the file. Find the parent. */
2258 error = path_init_rcu(dfd, pathname,
2259 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1776 if (error) 2260 if (error)
1777 return ERR_PTR(error); 2261 goto out_filp;
1778 if (force_reval) 2262 error = path_walk_rcu(pathname, &nd);
1779 nd.flags |= LOOKUP_REVAL; 2263 path_finish_rcu(&nd);
2264 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2265 /* slower, locked walk */
2266 if (error == -ESTALE) {
2267reval:
2268 flags |= LOOKUP_REVAL;
2269 }
2270 error = path_init(dfd, pathname,
2271 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2272 if (error)
2273 goto out_filp;
1780 2274
1781 current->total_link_count = 0; 2275 error = path_walk_simple(pathname, &nd);
1782 error = link_path_walk(pathname, &nd);
1783 if (error) {
1784 filp = ERR_PTR(error);
1785 goto out;
1786 } 2276 }
1787 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2277 if (unlikely(error))
2278 goto out_filp;
2279 if (unlikely(!audit_dummy_context()))
1788 audit_inode(pathname, nd.path.dentry); 2280 audit_inode(pathname, nd.path.dentry);
1789 2281
1790 /* 2282 /*
1791 * We have the parent and last component. 2283 * We have the parent and last component.
1792 */ 2284 */
1793 2285 nd.flags = flags;
1794 error = -ENFILE;
1795 filp = get_empty_filp();
1796 if (filp == NULL)
1797 goto exit_parent;
1798 nd.intent.open.file = filp;
1799 filp->f_flags = open_flag;
1800 nd.intent.open.flags = flag;
1801 nd.intent.open.create_mode = mode;
1802 nd.flags &= ~LOOKUP_PARENT;
1803 nd.flags |= LOOKUP_OPEN;
1804 if (open_flag & O_CREAT) {
1805 nd.flags |= LOOKUP_CREATE;
1806 if (open_flag & O_EXCL)
1807 nd.flags |= LOOKUP_EXCL;
1808 }
1809 if (open_flag & O_DIRECTORY)
1810 nd.flags |= LOOKUP_DIRECTORY;
1811 if (!(open_flag & O_NOFOLLOW))
1812 nd.flags |= LOOKUP_FOLLOW;
1813 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2286 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1814 while (unlikely(!filp)) { /* trailing symlink */ 2287 while (unlikely(!filp)) { /* trailing symlink */
1815 struct path holder; 2288 struct path holder;
1816 struct inode *inode = path.dentry->d_inode;
1817 void *cookie; 2289 void *cookie;
1818 error = -ELOOP; 2290 error = -ELOOP;
1819 /* S_ISDIR part is a temporary automount kludge */ 2291 /* S_ISDIR part is a temporary automount kludge */
1820 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2292 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1821 goto exit_dput; 2293 goto exit_dput;
1822 if (count++ == 32) 2294 if (count++ == 32)
1823 goto exit_dput; 2295 goto exit_dput;
@@ -1838,36 +2310,33 @@ reval:
1838 goto exit_dput; 2310 goto exit_dput;
1839 error = __do_follow_link(&path, &nd, &cookie); 2311 error = __do_follow_link(&path, &nd, &cookie);
1840 if (unlikely(error)) { 2312 if (unlikely(error)) {
2313 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2314 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1841 /* nd.path had been dropped */ 2315 /* nd.path had been dropped */
1842 if (!IS_ERR(cookie) && inode->i_op->put_link) 2316 nd.path = path;
1843 inode->i_op->put_link(path.dentry, &nd, cookie); 2317 goto out_path;
1844 path_put(&path);
1845 release_open_intent(&nd);
1846 filp = ERR_PTR(error);
1847 goto out;
1848 } 2318 }
1849 holder = path; 2319 holder = path;
1850 nd.flags &= ~LOOKUP_PARENT; 2320 nd.flags &= ~LOOKUP_PARENT;
1851 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2321 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1852 if (inode->i_op->put_link) 2322 if (nd.inode->i_op->put_link)
1853 inode->i_op->put_link(holder.dentry, &nd, cookie); 2323 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1854 path_put(&holder); 2324 path_put(&holder);
1855 } 2325 }
1856out: 2326out:
1857 if (nd.root.mnt) 2327 if (nd.root.mnt)
1858 path_put(&nd.root); 2328 path_put(&nd.root);
1859 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2329 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1860 force_reval = 1;
1861 goto reval; 2330 goto reval;
1862 }
1863 return filp; 2331 return filp;
1864 2332
1865exit_dput: 2333exit_dput:
1866 path_put_conditional(&path, &nd); 2334 path_put_conditional(&path, &nd);
2335out_path:
2336 path_put(&nd.path);
2337out_filp:
1867 if (!IS_ERR(nd.intent.open.file)) 2338 if (!IS_ERR(nd.intent.open.file))
1868 release_open_intent(&nd); 2339 release_open_intent(&nd);
1869exit_parent:
1870 path_put(&nd.path);
1871 filp = ERR_PTR(error); 2340 filp = ERR_PTR(error);
1872 goto out; 2341 goto out;
1873} 2342}