aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2011-01-07 01:49:52 -0500
committerNick Piggin <npiggin@kernel.dk>2011-01-07 01:50:27 -0500
commit31e6b01f4183ff419a6d1f86177cbf4662347cec (patch)
treee215ec9af88352c55e024f784f3d9f8eb13fab85 /fs/namei.c
parent3c22cd5709e8143444a6d08682a87f4c57902df3 (diff)
fs: rcu-walk for path lookup
Perform common cases of path lookups without any stores or locking in the ancestor dentry elements. This is called rcu-walk, as opposed to the current algorithm which is a refcount based walk, or ref-walk. This results in far fewer atomic operations on every path element, significantly improving path lookup performance. It also avoids cacheline bouncing on common dentries, significantly improving scalability. The overall design is like this: * LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk. * Take the RCU lock for the entire path walk, starting with the acquiring of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are not required for dentry persistence. * synchronize_rcu is called when unregistering a filesystem, so we can access d_ops and i_ops during rcu-walk. * Similarly take the vfsmount lock for the entire path walk. So now mnt refcounts are not required for persistence. Also we are free to perform mount lookups, and to assume dentry mount points and mount roots are stable up and down the path. * Have a per-dentry seqlock to protect the dentry name, parent, and inode, so we can load this tuple atomically, and also check whether any of its members have changed. * Dentry lookups (based on parent, candidate string tuple) recheck the parent sequence after the child is found in case anything changed in the parent during the path walk. * inode is also RCU protected so we can load d_inode and use the inode for limited things. * i_mode, i_uid, i_gid can be tested for exec permissions during path walk. * i_op can be loaded. When we reach the destination dentry, we lock it, recheck lookup sequence, and increment its refcount and mountpoint refcount. RCU and vfsmount locks are dropped. This is termed "dropping rcu-walk". If the dentry refcount does not match, we can not drop rcu-walk gracefully at the current point in the lokup, so instead return -ECHILD (for want of a better errno). This signals the path walking code to re-do the entire lookup with a ref-walk. Aside from the final dentry, there are other situations that may be encounted where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take a reference on the last good dentry) and continue with a ref-walk. Again, if we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup using ref-walk. But it is very important that we can continue with ref-walk for most cases, particularly to avoid the overhead of double lookups, and to gain the scalability advantages on common path elements (like cwd and root). The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs * dentries with d_revalidate * Following links In future patches, permission checks and d_revalidate become rcu-walk aware. It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c743
1 files changed, 606 insertions, 137 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 5642bc2be418..8d3f15b3a541 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static inline int __acl_permission_check(struct inode *inode, int mask,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask), int rcu)
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,9 +180,13 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 if (rcu) {
184 if (error != -EAGAIN) 184 return -ECHILD;
185 return error; 185 } else {
186 int error = check_acl(inode, mask);
187 if (error != -EAGAIN)
188 return error;
189 }
186 } 190 }
187 191
188 if (in_group_p(inode->i_gid)) 192 if (in_group_p(inode->i_gid))
@@ -197,6 +201,12 @@ static int acl_permission_check(struct inode *inode, int mask,
197 return -EACCES; 201 return -EACCES;
198} 202}
199 203
204static inline int acl_permission_check(struct inode *inode, int mask,
205 int (*check_acl)(struct inode *inode, int mask))
206{
207 return __acl_permission_check(inode, mask, check_acl, 0);
208}
209
200/** 210/**
201 * generic_permission - check for access rights on a Posix-like filesystem 211 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 212 * @inode: inode to check access rights for
@@ -375,6 +385,173 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 385EXPORT_SYMBOL(path_put);
376 386
377/** 387/**
388 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
389 * @nd: nameidata pathwalk data to drop
390 * @Returns: 0 on success, -ECHLID on failure
391 *
392 * Path walking has 2 modes, rcu-walk and ref-walk (see
393 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
394 * to drop out of rcu-walk mode and take normal reference counts on dentries
395 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
396 * refcounts at the last known good point before rcu-walk got stuck, so
397 * ref-walk may continue from there. If this is not successful (eg. a seqcount
398 * has changed), then failure is returned and path walk restarts from the
399 * beginning in ref-walk mode.
400 *
401 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
402 * ref-walk. Must be called from rcu-walk context.
403 */
404static int nameidata_drop_rcu(struct nameidata *nd)
405{
406 struct fs_struct *fs = current->fs;
407 struct dentry *dentry = nd->path.dentry;
408
409 BUG_ON(!(nd->flags & LOOKUP_RCU));
410 if (nd->root.mnt) {
411 spin_lock(&fs->lock);
412 if (nd->root.mnt != fs->root.mnt ||
413 nd->root.dentry != fs->root.dentry)
414 goto err_root;
415 }
416 spin_lock(&dentry->d_lock);
417 if (!__d_rcu_to_refcount(dentry, nd->seq))
418 goto err;
419 BUG_ON(nd->inode != dentry->d_inode);
420 spin_unlock(&dentry->d_lock);
421 if (nd->root.mnt) {
422 path_get(&nd->root);
423 spin_unlock(&fs->lock);
424 }
425 mntget(nd->path.mnt);
426
427 rcu_read_unlock();
428 br_read_unlock(vfsmount_lock);
429 nd->flags &= ~LOOKUP_RCU;
430 return 0;
431err:
432 spin_unlock(&dentry->d_lock);
433err_root:
434 if (nd->root.mnt)
435 spin_unlock(&fs->lock);
436 return -ECHILD;
437}
438
439/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
440static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
441{
442 if (nd->flags & LOOKUP_RCU)
443 return nameidata_drop_rcu(nd);
444 return 0;
445}
446
447/**
448 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
449 * @nd: nameidata pathwalk data to drop
450 * @dentry: dentry to drop
451 * @Returns: 0 on success, -ECHLID on failure
452 *
453 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
454 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
455 * @nd. Must be called from rcu-walk context.
456 */
457static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
458{
459 struct fs_struct *fs = current->fs;
460 struct dentry *parent = nd->path.dentry;
461
462 BUG_ON(!(nd->flags & LOOKUP_RCU));
463 if (nd->root.mnt) {
464 spin_lock(&fs->lock);
465 if (nd->root.mnt != fs->root.mnt ||
466 nd->root.dentry != fs->root.dentry)
467 goto err_root;
468 }
469 spin_lock(&parent->d_lock);
470 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
471 if (!__d_rcu_to_refcount(dentry, nd->seq))
472 goto err;
473 /*
474 * If the sequence check on the child dentry passed, then the child has
475 * not been removed from its parent. This means the parent dentry must
476 * be valid and able to take a reference at this point.
477 */
478 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
479 BUG_ON(!parent->d_count);
480 parent->d_count++;
481 spin_unlock(&dentry->d_lock);
482 spin_unlock(&parent->d_lock);
483 if (nd->root.mnt) {
484 path_get(&nd->root);
485 spin_unlock(&fs->lock);
486 }
487 mntget(nd->path.mnt);
488
489 rcu_read_unlock();
490 br_read_unlock(vfsmount_lock);
491 nd->flags &= ~LOOKUP_RCU;
492 return 0;
493err:
494 spin_unlock(&dentry->d_lock);
495 spin_unlock(&parent->d_lock);
496err_root:
497 if (nd->root.mnt)
498 spin_unlock(&fs->lock);
499 return -ECHILD;
500}
501
502/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
503static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
504{
505 if (nd->flags & LOOKUP_RCU)
506 return nameidata_dentry_drop_rcu(nd, dentry);
507 return 0;
508}
509
510/**
511 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
512 * @nd: nameidata pathwalk data to drop
513 * @Returns: 0 on success, -ECHLID on failure
514 *
515 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
516 * nd->path should be the final element of the lookup, so nd->root is discarded.
517 * Must be called from rcu-walk context.
518 */
519static int nameidata_drop_rcu_last(struct nameidata *nd)
520{
521 struct dentry *dentry = nd->path.dentry;
522
523 BUG_ON(!(nd->flags & LOOKUP_RCU));
524 nd->flags &= ~LOOKUP_RCU;
525 nd->root.mnt = NULL;
526 spin_lock(&dentry->d_lock);
527 if (!__d_rcu_to_refcount(dentry, nd->seq))
528 goto err_unlock;
529 BUG_ON(nd->inode != dentry->d_inode);
530 spin_unlock(&dentry->d_lock);
531
532 mntget(nd->path.mnt);
533
534 rcu_read_unlock();
535 br_read_unlock(vfsmount_lock);
536
537 return 0;
538
539err_unlock:
540 spin_unlock(&dentry->d_lock);
541 rcu_read_unlock();
542 br_read_unlock(vfsmount_lock);
543 return -ECHILD;
544}
545
546/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
547static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
548{
549 if (likely(nd->flags & LOOKUP_RCU))
550 return nameidata_drop_rcu_last(nd);
551 return 0;
552}
553
554/**
378 * release_open_intent - free up open intent resources 555 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 556 * @nd: pointer to nameidata
380 */ 557 */
@@ -459,26 +636,40 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 636 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 637 * complete permission check.
461 */ 638 */
462static int exec_permission(struct inode *inode) 639static inline int __exec_permission(struct inode *inode, int rcu)
463{ 640{
464 int ret; 641 int ret;
465 642
466 if (inode->i_op->permission) { 643 if (inode->i_op->permission) {
644 if (rcu)
645 return -ECHILD;
467 ret = inode->i_op->permission(inode, MAY_EXEC); 646 ret = inode->i_op->permission(inode, MAY_EXEC);
468 if (!ret) 647 if (!ret)
469 goto ok; 648 goto ok;
470 return ret; 649 return ret;
471 } 650 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 651 ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu);
473 if (!ret) 652 if (!ret)
474 goto ok; 653 goto ok;
654 if (rcu && ret == -ECHILD)
655 return ret;
475 656
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 657 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 658 goto ok;
478 659
479 return ret; 660 return ret;
480ok: 661ok:
481 return security_inode_permission(inode, MAY_EXEC); 662 return security_inode_exec_permission(inode, rcu);
663}
664
665static int exec_permission(struct inode *inode)
666{
667 return __exec_permission(inode, 0);
668}
669
670static int exec_permission_rcu(struct inode *inode)
671{
672 return __exec_permission(inode, 1);
482} 673}
483 674
484static __always_inline void set_root(struct nameidata *nd) 675static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +680,20 @@ static __always_inline void set_root(struct nameidata *nd)
489 680
490static int link_path_walk(const char *, struct nameidata *); 681static int link_path_walk(const char *, struct nameidata *);
491 682
683static __always_inline void set_root_rcu(struct nameidata *nd)
684{
685 if (!nd->root.mnt) {
686 struct fs_struct *fs = current->fs;
687 spin_lock(&fs->lock);
688 nd->root = fs->root;
689 spin_unlock(&fs->lock);
690 }
691}
692
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 693static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 694{
695 int ret;
696
494 if (IS_ERR(link)) 697 if (IS_ERR(link))
495 goto fail; 698 goto fail;
496 699
@@ -500,8 +703,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 703 nd->path = nd->root;
501 path_get(&nd->root); 704 path_get(&nd->root);
502 } 705 }
706 nd->inode = nd->path.dentry->d_inode;
503 707
504 return link_path_walk(link, nd); 708 ret = link_path_walk(link, nd);
709 return ret;
505fail: 710fail:
506 path_put(&nd->path); 711 path_put(&nd->path);
507 return PTR_ERR(link); 712 return PTR_ERR(link);
@@ -516,11 +721,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 721
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 722static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 723{
519 dput(nd->path.dentry); 724 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 725 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 726 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 727 mntput(nd->path.mnt);
523 } 728 }
729 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 730 nd->path.dentry = path->dentry;
525} 731}
526 732
@@ -535,9 +741,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 741
536 if (path->mnt != nd->path.mnt) { 742 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 743 path_to_nameidata(path, nd);
744 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 745 dget(dentry);
539 } 746 }
540 mntget(path->mnt); 747 mntget(path->mnt);
748
541 nd->last_type = LAST_BIND; 749 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 750 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 751 error = PTR_ERR(*p);
@@ -591,6 +799,20 @@ loop:
591 return err; 799 return err;
592} 800}
593 801
802static int follow_up_rcu(struct path *path)
803{
804 struct vfsmount *parent;
805 struct dentry *mountpoint;
806
807 parent = path->mnt->mnt_parent;
808 if (parent == path->mnt)
809 return 0;
810 mountpoint = path->mnt->mnt_mountpoint;
811 path->dentry = mountpoint;
812 path->mnt = parent;
813 return 1;
814}
815
594int follow_up(struct path *path) 816int follow_up(struct path *path)
595{ 817{
596 struct vfsmount *parent; 818 struct vfsmount *parent;
@@ -615,6 +837,21 @@ int follow_up(struct path *path)
615/* 837/*
616 * serialization is taken care of in namespace.c 838 * serialization is taken care of in namespace.c
617 */ 839 */
840static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
841 struct inode **inode)
842{
843 while (d_mountpoint(path->dentry)) {
844 struct vfsmount *mounted;
845 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
846 if (!mounted)
847 return;
848 path->mnt = mounted;
849 path->dentry = mounted->mnt_root;
850 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
851 *inode = path->dentry->d_inode;
852 }
853}
854
618static int __follow_mount(struct path *path) 855static int __follow_mount(struct path *path)
619{ 856{
620 int res = 0; 857 int res = 0;
@@ -660,7 +897,42 @@ int follow_down(struct path *path)
660 return 0; 897 return 0;
661} 898}
662 899
663static __always_inline void follow_dotdot(struct nameidata *nd) 900static int follow_dotdot_rcu(struct nameidata *nd)
901{
902 struct inode *inode = nd->inode;
903
904 set_root_rcu(nd);
905
906 while(1) {
907 if (nd->path.dentry == nd->root.dentry &&
908 nd->path.mnt == nd->root.mnt) {
909 break;
910 }
911 if (nd->path.dentry != nd->path.mnt->mnt_root) {
912 struct dentry *old = nd->path.dentry;
913 struct dentry *parent = old->d_parent;
914 unsigned seq;
915
916 seq = read_seqcount_begin(&parent->d_seq);
917 if (read_seqcount_retry(&old->d_seq, nd->seq))
918 return -ECHILD;
919 inode = parent->d_inode;
920 nd->path.dentry = parent;
921 nd->seq = seq;
922 break;
923 }
924 if (!follow_up_rcu(&nd->path))
925 break;
926 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
927 inode = nd->path.dentry->d_inode;
928 }
929 __follow_mount_rcu(nd, &nd->path, &inode);
930 nd->inode = inode;
931
932 return 0;
933}
934
935static void follow_dotdot(struct nameidata *nd)
664{ 936{
665 set_root(nd); 937 set_root(nd);
666 938
@@ -681,6 +953,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
681 break; 953 break;
682 } 954 }
683 follow_mount(&nd->path); 955 follow_mount(&nd->path);
956 nd->inode = nd->path.dentry->d_inode;
684} 957}
685 958
686/* 959/*
@@ -718,18 +991,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
718 * It _is_ time-critical. 991 * It _is_ time-critical.
719 */ 992 */
720static int do_lookup(struct nameidata *nd, struct qstr *name, 993static int do_lookup(struct nameidata *nd, struct qstr *name,
721 struct path *path) 994 struct path *path, struct inode **inode)
722{ 995{
723 struct vfsmount *mnt = nd->path.mnt; 996 struct vfsmount *mnt = nd->path.mnt;
724 struct dentry *dentry, *parent; 997 struct dentry *dentry, *parent = nd->path.dentry;
725 struct inode *dir; 998 struct inode *dir;
726 /* 999 /*
727 * See if the low-level filesystem might want 1000 * See if the low-level filesystem might want
728 * to use its own hash.. 1001 * to use its own hash..
729 */ 1002 */
730 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1003 if (parent->d_op && parent->d_op->d_hash) {
731 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 1004 int err = parent->d_op->d_hash(parent, nd->inode, name);
732 nd->path.dentry->d_inode, name);
733 if (err < 0) 1005 if (err < 0)
734 return err; 1006 return err;
735 } 1007 }
@@ -739,21 +1011,48 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
739 * of a false negative due to a concurrent rename, we're going to 1011 * of a false negative due to a concurrent rename, we're going to
740 * do the non-racy lookup, below. 1012 * do the non-racy lookup, below.
741 */ 1013 */
742 dentry = __d_lookup(nd->path.dentry, name); 1014 if (nd->flags & LOOKUP_RCU) {
743 if (!dentry) 1015 unsigned seq;
744 goto need_lookup; 1016
1017 *inode = nd->inode;
1018 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1019 if (!dentry) {
1020 if (nameidata_drop_rcu(nd))
1021 return -ECHILD;
1022 goto need_lookup;
1023 }
1024 /* Memory barrier in read_seqcount_begin of child is enough */
1025 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1026 return -ECHILD;
1027
1028 nd->seq = seq;
1029 if (dentry->d_op && dentry->d_op->d_revalidate) {
1030 /* We commonly drop rcu-walk here */
1031 if (nameidata_dentry_drop_rcu(nd, dentry))
1032 return -ECHILD;
1033 goto need_revalidate;
1034 }
1035 path->mnt = mnt;
1036 path->dentry = dentry;
1037 __follow_mount_rcu(nd, path, inode);
1038 } else {
1039 dentry = __d_lookup(parent, name);
1040 if (!dentry)
1041 goto need_lookup;
745found: 1042found:
746 if (dentry->d_op && dentry->d_op->d_revalidate) 1043 if (dentry->d_op && dentry->d_op->d_revalidate)
747 goto need_revalidate; 1044 goto need_revalidate;
748done: 1045done:
749 path->mnt = mnt; 1046 path->mnt = mnt;
750 path->dentry = dentry; 1047 path->dentry = dentry;
751 __follow_mount(path); 1048 __follow_mount(path);
1049 *inode = path->dentry->d_inode;
1050 }
752 return 0; 1051 return 0;
753 1052
754need_lookup: 1053need_lookup:
755 parent = nd->path.dentry;
756 dir = parent->d_inode; 1054 dir = parent->d_inode;
1055 BUG_ON(nd->inode != dir);
757 1056
758 mutex_lock(&dir->i_mutex); 1057 mutex_lock(&dir->i_mutex);
759 /* 1058 /*
@@ -815,7 +1114,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
815static int link_path_walk(const char *name, struct nameidata *nd) 1114static int link_path_walk(const char *name, struct nameidata *nd)
816{ 1115{
817 struct path next; 1116 struct path next;
818 struct inode *inode;
819 int err; 1117 int err;
820 unsigned int lookup_flags = nd->flags; 1118 unsigned int lookup_flags = nd->flags;
821 1119
@@ -824,18 +1122,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
824 if (!*name) 1122 if (!*name)
825 goto return_reval; 1123 goto return_reval;
826 1124
827 inode = nd->path.dentry->d_inode;
828 if (nd->depth) 1125 if (nd->depth)
829 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1126 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
830 1127
831 /* At this point we know we have a real path component. */ 1128 /* At this point we know we have a real path component. */
832 for(;;) { 1129 for(;;) {
1130 struct inode *inode;
833 unsigned long hash; 1131 unsigned long hash;
834 struct qstr this; 1132 struct qstr this;
835 unsigned int c; 1133 unsigned int c;
836 1134
837 nd->flags |= LOOKUP_CONTINUE; 1135 nd->flags |= LOOKUP_CONTINUE;
838 err = exec_permission(inode); 1136 if (nd->flags & LOOKUP_RCU) {
1137 err = exec_permission_rcu(nd->inode);
1138 if (err == -ECHILD) {
1139 if (nameidata_drop_rcu(nd))
1140 return -ECHILD;
1141 goto exec_again;
1142 }
1143 } else {
1144exec_again:
1145 err = exec_permission(nd->inode);
1146 }
839 if (err) 1147 if (err)
840 break; 1148 break;
841 1149
@@ -866,37 +1174,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
866 if (this.name[0] == '.') switch (this.len) { 1174 if (this.name[0] == '.') switch (this.len) {
867 default: 1175 default:
868 break; 1176 break;
869 case 2: 1177 case 2:
870 if (this.name[1] != '.') 1178 if (this.name[1] != '.')
871 break; 1179 break;
872 follow_dotdot(nd); 1180 if (nd->flags & LOOKUP_RCU) {
873 inode = nd->path.dentry->d_inode; 1181 if (follow_dotdot_rcu(nd))
1182 return -ECHILD;
1183 } else
1184 follow_dotdot(nd);
874 /* fallthrough */ 1185 /* fallthrough */
875 case 1: 1186 case 1:
876 continue; 1187 continue;
877 } 1188 }
878 /* This does the actual lookups.. */ 1189 /* This does the actual lookups.. */
879 err = do_lookup(nd, &this, &next); 1190 err = do_lookup(nd, &this, &next, &inode);
880 if (err) 1191 if (err)
881 break; 1192 break;
882
883 err = -ENOENT; 1193 err = -ENOENT;
884 inode = next.dentry->d_inode;
885 if (!inode) 1194 if (!inode)
886 goto out_dput; 1195 goto out_dput;
887 1196
888 if (inode->i_op->follow_link) { 1197 if (inode->i_op->follow_link) {
1198 /* We commonly drop rcu-walk here */
1199 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1200 return -ECHILD;
1201 BUG_ON(inode != next.dentry->d_inode);
889 err = do_follow_link(&next, nd); 1202 err = do_follow_link(&next, nd);
890 if (err) 1203 if (err)
891 goto return_err; 1204 goto return_err;
1205 nd->inode = nd->path.dentry->d_inode;
892 err = -ENOENT; 1206 err = -ENOENT;
893 inode = nd->path.dentry->d_inode; 1207 if (!nd->inode)
894 if (!inode)
895 break; 1208 break;
896 } else 1209 } else {
897 path_to_nameidata(&next, nd); 1210 path_to_nameidata(&next, nd);
1211 nd->inode = inode;
1212 }
898 err = -ENOTDIR; 1213 err = -ENOTDIR;
899 if (!inode->i_op->lookup) 1214 if (!nd->inode->i_op->lookup)
900 break; 1215 break;
901 continue; 1216 continue;
902 /* here ends the main loop */ 1217 /* here ends the main loop */
@@ -911,32 +1226,39 @@ last_component:
911 if (this.name[0] == '.') switch (this.len) { 1226 if (this.name[0] == '.') switch (this.len) {
912 default: 1227 default:
913 break; 1228 break;
914 case 2: 1229 case 2:
915 if (this.name[1] != '.') 1230 if (this.name[1] != '.')
916 break; 1231 break;
917 follow_dotdot(nd); 1232 if (nd->flags & LOOKUP_RCU) {
918 inode = nd->path.dentry->d_inode; 1233 if (follow_dotdot_rcu(nd))
1234 return -ECHILD;
1235 } else
1236 follow_dotdot(nd);
919 /* fallthrough */ 1237 /* fallthrough */
920 case 1: 1238 case 1:
921 goto return_reval; 1239 goto return_reval;
922 } 1240 }
923 err = do_lookup(nd, &this, &next); 1241 err = do_lookup(nd, &this, &next, &inode);
924 if (err) 1242 if (err)
925 break; 1243 break;
926 inode = next.dentry->d_inode;
927 if (follow_on_final(inode, lookup_flags)) { 1244 if (follow_on_final(inode, lookup_flags)) {
1245 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1246 return -ECHILD;
1247 BUG_ON(inode != next.dentry->d_inode);
928 err = do_follow_link(&next, nd); 1248 err = do_follow_link(&next, nd);
929 if (err) 1249 if (err)
930 goto return_err; 1250 goto return_err;
931 inode = nd->path.dentry->d_inode; 1251 nd->inode = nd->path.dentry->d_inode;
932 } else 1252 } else {
933 path_to_nameidata(&next, nd); 1253 path_to_nameidata(&next, nd);
1254 nd->inode = inode;
1255 }
934 err = -ENOENT; 1256 err = -ENOENT;
935 if (!inode) 1257 if (!nd->inode)
936 break; 1258 break;
937 if (lookup_flags & LOOKUP_DIRECTORY) { 1259 if (lookup_flags & LOOKUP_DIRECTORY) {
938 err = -ENOTDIR; 1260 err = -ENOTDIR;
939 if (!inode->i_op->lookup) 1261 if (!nd->inode->i_op->lookup)
940 break; 1262 break;
941 } 1263 }
942 goto return_base; 1264 goto return_base;
@@ -958,6 +1280,8 @@ return_reval:
958 */ 1280 */
959 if (nd->path.dentry && nd->path.dentry->d_sb && 1281 if (nd->path.dentry && nd->path.dentry->d_sb &&
960 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 1282 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
1283 if (nameidata_drop_rcu_maybe(nd))
1284 return -ECHILD;
961 err = -ESTALE; 1285 err = -ESTALE;
962 /* Note: we do not d_invalidate() */ 1286 /* Note: we do not d_invalidate() */
963 if (!nd->path.dentry->d_op->d_revalidate( 1287 if (!nd->path.dentry->d_op->d_revalidate(
@@ -965,16 +1289,34 @@ return_reval:
965 break; 1289 break;
966 } 1290 }
967return_base: 1291return_base:
1292 if (nameidata_drop_rcu_last_maybe(nd))
1293 return -ECHILD;
968 return 0; 1294 return 0;
969out_dput: 1295out_dput:
970 path_put_conditional(&next, nd); 1296 if (!(nd->flags & LOOKUP_RCU))
1297 path_put_conditional(&next, nd);
971 break; 1298 break;
972 } 1299 }
973 path_put(&nd->path); 1300 if (!(nd->flags & LOOKUP_RCU))
1301 path_put(&nd->path);
974return_err: 1302return_err:
975 return err; 1303 return err;
976} 1304}
977 1305
1306static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1307{
1308 current->total_link_count = 0;
1309
1310 return link_path_walk(name, nd);
1311}
1312
1313static inline int path_walk_simple(const char *name, struct nameidata *nd)
1314{
1315 current->total_link_count = 0;
1316
1317 return link_path_walk(name, nd);
1318}
1319
978static int path_walk(const char *name, struct nameidata *nd) 1320static int path_walk(const char *name, struct nameidata *nd)
979{ 1321{
980 struct path save = nd->path; 1322 struct path save = nd->path;
@@ -1000,6 +1342,88 @@ static int path_walk(const char *name, struct nameidata *nd)
1000 return result; 1342 return result;
1001} 1343}
1002 1344
1345static void path_finish_rcu(struct nameidata *nd)
1346{
1347 if (nd->flags & LOOKUP_RCU) {
1348 /* RCU dangling. Cancel it. */
1349 nd->flags &= ~LOOKUP_RCU;
1350 nd->root.mnt = NULL;
1351 rcu_read_unlock();
1352 br_read_unlock(vfsmount_lock);
1353 }
1354 if (nd->file)
1355 fput(nd->file);
1356}
1357
1358static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1359{
1360 int retval = 0;
1361 int fput_needed;
1362 struct file *file;
1363
1364 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1365 nd->flags = flags | LOOKUP_RCU;
1366 nd->depth = 0;
1367 nd->root.mnt = NULL;
1368 nd->file = NULL;
1369
1370 if (*name=='/') {
1371 struct fs_struct *fs = current->fs;
1372
1373 br_read_lock(vfsmount_lock);
1374 rcu_read_lock();
1375
1376 spin_lock(&fs->lock);
1377 nd->root = fs->root;
1378 nd->path = nd->root;
1379 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1380 spin_unlock(&fs->lock);
1381
1382 } else if (dfd == AT_FDCWD) {
1383 struct fs_struct *fs = current->fs;
1384
1385 br_read_lock(vfsmount_lock);
1386 rcu_read_lock();
1387
1388 spin_lock(&fs->lock);
1389 nd->path = fs->pwd;
1390 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1391 spin_unlock(&fs->lock);
1392 } else {
1393 struct dentry *dentry;
1394
1395 file = fget_light(dfd, &fput_needed);
1396 retval = -EBADF;
1397 if (!file)
1398 goto out_fail;
1399
1400 dentry = file->f_path.dentry;
1401
1402 retval = -ENOTDIR;
1403 if (!S_ISDIR(dentry->d_inode->i_mode))
1404 goto fput_fail;
1405
1406 retval = file_permission(file, MAY_EXEC);
1407 if (retval)
1408 goto fput_fail;
1409
1410 nd->path = file->f_path;
1411 if (fput_needed)
1412 nd->file = file;
1413
1414 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1415 br_read_lock(vfsmount_lock);
1416 rcu_read_lock();
1417 }
1418 nd->inode = nd->path.dentry->d_inode;
1419 return 0;
1420
1421fput_fail:
1422 fput_light(file, fput_needed);
1423out_fail:
1424 return retval;
1425}
1426
1003static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1427static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1004{ 1428{
1005 int retval = 0; 1429 int retval = 0;
@@ -1040,6 +1464,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1040 1464
1041 fput_light(file, fput_needed); 1465 fput_light(file, fput_needed);
1042 } 1466 }
1467 nd->inode = nd->path.dentry->d_inode;
1043 return 0; 1468 return 0;
1044 1469
1045fput_fail: 1470fput_fail:
@@ -1052,16 +1477,53 @@ out_fail:
1052static int do_path_lookup(int dfd, const char *name, 1477static int do_path_lookup(int dfd, const char *name,
1053 unsigned int flags, struct nameidata *nd) 1478 unsigned int flags, struct nameidata *nd)
1054{ 1479{
1055 int retval = path_init(dfd, name, flags, nd); 1480 int retval;
1056 if (!retval) 1481
1057 retval = path_walk(name, nd); 1482 /*
1058 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1483 * Path walking is largely split up into 2 different synchronisation
1059 nd->path.dentry->d_inode)) 1484 * schemes, rcu-walk and ref-walk (explained in
1060 audit_inode(name, nd->path.dentry); 1485 * Documentation/filesystems/path-lookup.txt). These share much of the
1486 * path walk code, but some things particularly setup, cleanup, and
1487 * following mounts are sufficiently divergent that functions are
1488 * duplicated. Typically there is a function foo(), and its RCU
1489 * analogue, foo_rcu().
1490 *
1491 * -ECHILD is the error number of choice (just to avoid clashes) that
1492 * is returned if some aspect of an rcu-walk fails. Such an error must
1493 * be handled by restarting a traditional ref-walk (which will always
1494 * be able to complete).
1495 */
1496 retval = path_init_rcu(dfd, name, flags, nd);
1497 if (unlikely(retval))
1498 return retval;
1499 retval = path_walk_rcu(name, nd);
1500 path_finish_rcu(nd);
1061 if (nd->root.mnt) { 1501 if (nd->root.mnt) {
1062 path_put(&nd->root); 1502 path_put(&nd->root);
1063 nd->root.mnt = NULL; 1503 nd->root.mnt = NULL;
1064 } 1504 }
1505
1506 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1507 /* slower, locked walk */
1508 if (retval == -ESTALE)
1509 flags |= LOOKUP_REVAL;
1510 retval = path_init(dfd, name, flags, nd);
1511 if (unlikely(retval))
1512 return retval;
1513 retval = path_walk(name, nd);
1514 if (nd->root.mnt) {
1515 path_put(&nd->root);
1516 nd->root.mnt = NULL;
1517 }
1518 }
1519
1520 if (likely(!retval)) {
1521 if (unlikely(!audit_dummy_context())) {
1522 if (nd->path.dentry && nd->inode)
1523 audit_inode(name, nd->path.dentry);
1524 }
1525 }
1526
1065 return retval; 1527 return retval;
1066} 1528}
1067 1529
@@ -1104,10 +1566,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1104 path_get(&nd->path); 1566 path_get(&nd->path);
1105 nd->root = nd->path; 1567 nd->root = nd->path;
1106 path_get(&nd->root); 1568 path_get(&nd->root);
1569 nd->inode = nd->path.dentry->d_inode;
1107 1570
1108 retval = path_walk(name, nd); 1571 retval = path_walk(name, nd);
1109 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1572 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1110 nd->path.dentry->d_inode)) 1573 nd->inode))
1111 audit_inode(name, nd->path.dentry); 1574 audit_inode(name, nd->path.dentry);
1112 1575
1113 path_put(&nd->root); 1576 path_put(&nd->root);
@@ -1488,6 +1951,7 @@ out_unlock:
1488 mutex_unlock(&dir->d_inode->i_mutex); 1951 mutex_unlock(&dir->d_inode->i_mutex);
1489 dput(nd->path.dentry); 1952 dput(nd->path.dentry);
1490 nd->path.dentry = path->dentry; 1953 nd->path.dentry = path->dentry;
1954
1491 if (error) 1955 if (error)
1492 return error; 1956 return error;
1493 /* Don't check for write permission, don't truncate */ 1957 /* Don't check for write permission, don't truncate */
@@ -1582,6 +2046,9 @@ exit:
1582 return ERR_PTR(error); 2046 return ERR_PTR(error);
1583} 2047}
1584 2048
2049/*
2050 * Handle O_CREAT case for do_filp_open
2051 */
1585static struct file *do_last(struct nameidata *nd, struct path *path, 2052static struct file *do_last(struct nameidata *nd, struct path *path,
1586 int open_flag, int acc_mode, 2053 int open_flag, int acc_mode,
1587 int mode, const char *pathname) 2054 int mode, const char *pathname)
@@ -1603,42 +2070,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1603 } 2070 }
1604 /* fallthrough */ 2071 /* fallthrough */
1605 case LAST_ROOT: 2072 case LAST_ROOT:
1606 if (open_flag & O_CREAT) 2073 goto exit;
1607 goto exit;
1608 /* fallthrough */
1609 case LAST_BIND: 2074 case LAST_BIND:
1610 audit_inode(pathname, dir); 2075 audit_inode(pathname, dir);
1611 goto ok; 2076 goto ok;
1612 } 2077 }
1613 2078
1614 /* trailing slashes? */ 2079 /* trailing slashes? */
1615 if (nd->last.name[nd->last.len]) { 2080 if (nd->last.name[nd->last.len])
1616 if (open_flag & O_CREAT) 2081 goto exit;
1617 goto exit;
1618 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1619 }
1620
1621 /* just plain open? */
1622 if (!(open_flag & O_CREAT)) {
1623 error = do_lookup(nd, &nd->last, path);
1624 if (error)
1625 goto exit;
1626 error = -ENOENT;
1627 if (!path->dentry->d_inode)
1628 goto exit_dput;
1629 if (path->dentry->d_inode->i_op->follow_link)
1630 return NULL;
1631 error = -ENOTDIR;
1632 if (nd->flags & LOOKUP_DIRECTORY) {
1633 if (!path->dentry->d_inode->i_op->lookup)
1634 goto exit_dput;
1635 }
1636 path_to_nameidata(path, nd);
1637 audit_inode(pathname, nd->path.dentry);
1638 goto ok;
1639 }
1640 2082
1641 /* OK, it's O_CREAT */
1642 mutex_lock(&dir->d_inode->i_mutex); 2083 mutex_lock(&dir->d_inode->i_mutex);
1643 2084
1644 path->dentry = lookup_hash(nd); 2085 path->dentry = lookup_hash(nd);
@@ -1709,8 +2150,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1709 return NULL; 2150 return NULL;
1710 2151
1711 path_to_nameidata(path, nd); 2152 path_to_nameidata(path, nd);
2153 nd->inode = path->dentry->d_inode;
1712 error = -EISDIR; 2154 error = -EISDIR;
1713 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2155 if (S_ISDIR(nd->inode->i_mode))
1714 goto exit; 2156 goto exit;
1715ok: 2157ok:
1716 filp = finish_open(nd, open_flag, acc_mode); 2158 filp = finish_open(nd, open_flag, acc_mode);
@@ -1741,7 +2183,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1741 struct path path; 2183 struct path path;
1742 int count = 0; 2184 int count = 0;
1743 int flag = open_to_namei_flags(open_flag); 2185 int flag = open_to_namei_flags(open_flag);
1744 int force_reval = 0; 2186 int flags;
1745 2187
1746 if (!(open_flag & O_CREAT)) 2188 if (!(open_flag & O_CREAT))
1747 mode = 0; 2189 mode = 0;
@@ -1770,54 +2212,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1770 if (open_flag & O_APPEND) 2212 if (open_flag & O_APPEND)
1771 acc_mode |= MAY_APPEND; 2213 acc_mode |= MAY_APPEND;
1772 2214
1773 /* find the parent */ 2215 flags = LOOKUP_OPEN;
1774reval: 2216 if (open_flag & O_CREAT) {
1775 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2217 flags |= LOOKUP_CREATE;
2218 if (open_flag & O_EXCL)
2219 flags |= LOOKUP_EXCL;
2220 }
2221 if (open_flag & O_DIRECTORY)
2222 flags |= LOOKUP_DIRECTORY;
2223 if (!(open_flag & O_NOFOLLOW))
2224 flags |= LOOKUP_FOLLOW;
2225
2226 filp = get_empty_filp();
2227 if (!filp)
2228 return ERR_PTR(-ENFILE);
2229
2230 filp->f_flags = open_flag;
2231 nd.intent.open.file = filp;
2232 nd.intent.open.flags = flag;
2233 nd.intent.open.create_mode = mode;
2234
2235 if (open_flag & O_CREAT)
2236 goto creat;
2237
2238 /* !O_CREAT, simple open */
2239 error = do_path_lookup(dfd, pathname, flags, &nd);
2240 if (unlikely(error))
2241 goto out_filp;
2242 error = -ELOOP;
2243 if (!(nd.flags & LOOKUP_FOLLOW)) {
2244 if (nd.inode->i_op->follow_link)
2245 goto out_path;
2246 }
2247 error = -ENOTDIR;
2248 if (nd.flags & LOOKUP_DIRECTORY) {
2249 if (!nd.inode->i_op->lookup)
2250 goto out_path;
2251 }
2252 audit_inode(pathname, nd.path.dentry);
2253 filp = finish_open(&nd, open_flag, acc_mode);
2254 return filp;
2255
2256creat:
2257 /* OK, have to create the file. Find the parent. */
2258 error = path_init_rcu(dfd, pathname,
2259 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1776 if (error) 2260 if (error)
1777 return ERR_PTR(error); 2261 goto out_filp;
1778 if (force_reval) 2262 error = path_walk_rcu(pathname, &nd);
1779 nd.flags |= LOOKUP_REVAL; 2263 path_finish_rcu(&nd);
2264 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2265 /* slower, locked walk */
2266 if (error == -ESTALE) {
2267reval:
2268 flags |= LOOKUP_REVAL;
2269 }
2270 error = path_init(dfd, pathname,
2271 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2272 if (error)
2273 goto out_filp;
1780 2274
1781 current->total_link_count = 0; 2275 error = path_walk_simple(pathname, &nd);
1782 error = link_path_walk(pathname, &nd);
1783 if (error) {
1784 filp = ERR_PTR(error);
1785 goto out;
1786 } 2276 }
1787 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2277 if (unlikely(error))
2278 goto out_filp;
2279 if (unlikely(!audit_dummy_context()))
1788 audit_inode(pathname, nd.path.dentry); 2280 audit_inode(pathname, nd.path.dentry);
1789 2281
1790 /* 2282 /*
1791 * We have the parent and last component. 2283 * We have the parent and last component.
1792 */ 2284 */
1793 2285 nd.flags = flags;
1794 error = -ENFILE;
1795 filp = get_empty_filp();
1796 if (filp == NULL)
1797 goto exit_parent;
1798 nd.intent.open.file = filp;
1799 filp->f_flags = open_flag;
1800 nd.intent.open.flags = flag;
1801 nd.intent.open.create_mode = mode;
1802 nd.flags &= ~LOOKUP_PARENT;
1803 nd.flags |= LOOKUP_OPEN;
1804 if (open_flag & O_CREAT) {
1805 nd.flags |= LOOKUP_CREATE;
1806 if (open_flag & O_EXCL)
1807 nd.flags |= LOOKUP_EXCL;
1808 }
1809 if (open_flag & O_DIRECTORY)
1810 nd.flags |= LOOKUP_DIRECTORY;
1811 if (!(open_flag & O_NOFOLLOW))
1812 nd.flags |= LOOKUP_FOLLOW;
1813 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2286 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1814 while (unlikely(!filp)) { /* trailing symlink */ 2287 while (unlikely(!filp)) { /* trailing symlink */
1815 struct path holder; 2288 struct path holder;
1816 struct inode *inode = path.dentry->d_inode;
1817 void *cookie; 2289 void *cookie;
1818 error = -ELOOP; 2290 error = -ELOOP;
1819 /* S_ISDIR part is a temporary automount kludge */ 2291 /* S_ISDIR part is a temporary automount kludge */
1820 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2292 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1821 goto exit_dput; 2293 goto exit_dput;
1822 if (count++ == 32) 2294 if (count++ == 32)
1823 goto exit_dput; 2295 goto exit_dput;
@@ -1838,36 +2310,33 @@ reval:
1838 goto exit_dput; 2310 goto exit_dput;
1839 error = __do_follow_link(&path, &nd, &cookie); 2311 error = __do_follow_link(&path, &nd, &cookie);
1840 if (unlikely(error)) { 2312 if (unlikely(error)) {
2313 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2314 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1841 /* nd.path had been dropped */ 2315 /* nd.path had been dropped */
1842 if (!IS_ERR(cookie) && inode->i_op->put_link) 2316 nd.path = path;
1843 inode->i_op->put_link(path.dentry, &nd, cookie); 2317 goto out_path;
1844 path_put(&path);
1845 release_open_intent(&nd);
1846 filp = ERR_PTR(error);
1847 goto out;
1848 } 2318 }
1849 holder = path; 2319 holder = path;
1850 nd.flags &= ~LOOKUP_PARENT; 2320 nd.flags &= ~LOOKUP_PARENT;
1851 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2321 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1852 if (inode->i_op->put_link) 2322 if (nd.inode->i_op->put_link)
1853 inode->i_op->put_link(holder.dentry, &nd, cookie); 2323 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1854 path_put(&holder); 2324 path_put(&holder);
1855 } 2325 }
1856out: 2326out:
1857 if (nd.root.mnt) 2327 if (nd.root.mnt)
1858 path_put(&nd.root); 2328 path_put(&nd.root);
1859 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2329 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1860 force_reval = 1;
1861 goto reval; 2330 goto reval;
1862 }
1863 return filp; 2331 return filp;
1864 2332
1865exit_dput: 2333exit_dput:
1866 path_put_conditional(&path, &nd); 2334 path_put_conditional(&path, &nd);
2335out_path:
2336 path_put(&nd.path);
2337out_filp:
1867 if (!IS_ERR(nd.intent.open.file)) 2338 if (!IS_ERR(nd.intent.open.file))
1868 release_open_intent(&nd); 2339 release_open_intent(&nd);
1869exit_parent:
1870 path_put(&nd.path);
1871 filp = ERR_PTR(error); 2340 filp = ERR_PTR(error);
1872 goto out; 2341 goto out;
1873} 2342}