aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c857
1 files changed, 682 insertions, 175 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 4ff7ca53053..24ece10470b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 int error = check_acl(inode, mask, flags);
184 if (error != -EAGAIN) 184 if (error != -EAGAIN)
185 return error; 185 return error;
186 } 186 }
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask,
198} 198}
199 199
200/** 200/**
201 * generic_permission - check for access rights on a Posix-like filesystem 201 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 202 * @inode: inode to check access rights for
203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
204 * @check_acl: optional callback to check for Posix ACLs 204 * @check_acl: optional callback to check for Posix ACLs
205 * @flags: IPERM_FLAG_ flags.
205 * 206 *
206 * Used to check for read/write/execute permissions on a file. 207 * Used to check for read/write/execute permissions on a file.
207 * We use "fsuid" for this, letting us set arbitrary permissions 208 * We use "fsuid" for this, letting us set arbitrary permissions
208 * for filesystem access without changing the "normal" uids which 209 * for filesystem access without changing the "normal" uids which
209 * are used for other things.. 210 * are used for other things.
211 *
212 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
213 * request cannot be satisfied (eg. requires blocking or too much complexity).
214 * It would then be called again in ref-walk mode.
210 */ 215 */
211int generic_permission(struct inode *inode, int mask, 216int generic_permission(struct inode *inode, int mask, unsigned int flags,
212 int (*check_acl)(struct inode *inode, int mask)) 217 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
213{ 218{
214 int ret; 219 int ret;
215 220
216 /* 221 /*
217 * Do the basic POSIX ACL permission checks. 222 * Do the basic POSIX ACL permission checks.
218 */ 223 */
219 ret = acl_permission_check(inode, mask, check_acl); 224 ret = acl_permission_check(inode, mask, flags, check_acl);
220 if (ret != -EACCES) 225 if (ret != -EACCES)
221 return ret; 226 return ret;
222 227
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
271 } 276 }
272 277
273 if (inode->i_op->permission) 278 if (inode->i_op->permission)
274 retval = inode->i_op->permission(inode, mask); 279 retval = inode->i_op->permission(inode, mask, 0);
275 else 280 else
276 retval = generic_permission(inode, mask, inode->i_op->check_acl); 281 retval = generic_permission(inode, mask, 0,
282 inode->i_op->check_acl);
277 283
278 if (retval) 284 if (retval)
279 return retval; 285 return retval;
@@ -362,6 +368,18 @@ void path_get(struct path *path)
362EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
363 369
364/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
365 * path_put - put a reference to a path 383 * path_put - put a reference to a path
366 * @path: path to put the reference to 384 * @path: path to put the reference to
367 * 385 *
@@ -375,6 +393,185 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 393EXPORT_SYMBOL(path_put);
376 394
377/** 395/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop
410 * Returns: 0 on success, -ECHILD on failure
411 *
412 * Path walking has 2 modes, rcu-walk and ref-walk (see
413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
414 * to drop out of rcu-walk mode and take normal reference counts on dentries
415 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
416 * refcounts at the last known good point before rcu-walk got stuck, so
417 * ref-walk may continue from there. If this is not successful (eg. a seqcount
418 * has changed), then failure is returned and path walk restarts from the
419 * beginning in ref-walk mode.
420 *
421 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
422 * ref-walk. Must be called from rcu-walk context.
423 */
424static int nameidata_drop_rcu(struct nameidata *nd)
425{
426 struct fs_struct *fs = current->fs;
427 struct dentry *dentry = nd->path.dentry;
428
429 BUG_ON(!(nd->flags & LOOKUP_RCU));
430 if (nd->root.mnt) {
431 spin_lock(&fs->lock);
432 if (nd->root.mnt != fs->root.mnt ||
433 nd->root.dentry != fs->root.dentry)
434 goto err_root;
435 }
436 spin_lock(&dentry->d_lock);
437 if (!__d_rcu_to_refcount(dentry, nd->seq))
438 goto err;
439 BUG_ON(nd->inode != dentry->d_inode);
440 spin_unlock(&dentry->d_lock);
441 if (nd->root.mnt) {
442 path_get(&nd->root);
443 spin_unlock(&fs->lock);
444 }
445 mntget(nd->path.mnt);
446
447 rcu_read_unlock();
448 br_read_unlock(vfsmount_lock);
449 nd->flags &= ~LOOKUP_RCU;
450 return 0;
451err:
452 spin_unlock(&dentry->d_lock);
453err_root:
454 if (nd->root.mnt)
455 spin_unlock(&fs->lock);
456 return -ECHILD;
457}
458
459/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
460static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
461{
462 if (nd->flags & LOOKUP_RCU)
463 return nameidata_drop_rcu(nd);
464 return 0;
465}
466
467/**
468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
469 * @nd: nameidata pathwalk data to drop
470 * @dentry: dentry to drop
471 * Returns: 0 on success, -ECHILD on failure
472 *
473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
475 * @nd. Must be called from rcu-walk context.
476 */
477static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
478{
479 struct fs_struct *fs = current->fs;
480 struct dentry *parent = nd->path.dentry;
481
482 BUG_ON(!(nd->flags & LOOKUP_RCU));
483 if (nd->root.mnt) {
484 spin_lock(&fs->lock);
485 if (nd->root.mnt != fs->root.mnt ||
486 nd->root.dentry != fs->root.dentry)
487 goto err_root;
488 }
489 spin_lock(&parent->d_lock);
490 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
491 if (!__d_rcu_to_refcount(dentry, nd->seq))
492 goto err;
493 /*
494 * If the sequence check on the child dentry passed, then the child has
495 * not been removed from its parent. This means the parent dentry must
496 * be valid and able to take a reference at this point.
497 */
498 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
499 BUG_ON(!parent->d_count);
500 parent->d_count++;
501 spin_unlock(&dentry->d_lock);
502 spin_unlock(&parent->d_lock);
503 if (nd->root.mnt) {
504 path_get(&nd->root);
505 spin_unlock(&fs->lock);
506 }
507 mntget(nd->path.mnt);
508
509 rcu_read_unlock();
510 br_read_unlock(vfsmount_lock);
511 nd->flags &= ~LOOKUP_RCU;
512 return 0;
513err:
514 spin_unlock(&dentry->d_lock);
515 spin_unlock(&parent->d_lock);
516err_root:
517 if (nd->root.mnt)
518 spin_unlock(&fs->lock);
519 return -ECHILD;
520}
521
522/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
523static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
524{
525 if (nd->flags & LOOKUP_RCU)
526 return nameidata_dentry_drop_rcu(nd, dentry);
527 return 0;
528}
529
530/**
531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
532 * @nd: nameidata pathwalk data to drop
533 * Returns: 0 on success, -ECHILD on failure
534 *
535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
536 * nd->path should be the final element of the lookup, so nd->root is discarded.
537 * Must be called from rcu-walk context.
538 */
539static int nameidata_drop_rcu_last(struct nameidata *nd)
540{
541 struct dentry *dentry = nd->path.dentry;
542
543 BUG_ON(!(nd->flags & LOOKUP_RCU));
544 nd->flags &= ~LOOKUP_RCU;
545 nd->root.mnt = NULL;
546 spin_lock(&dentry->d_lock);
547 if (!__d_rcu_to_refcount(dentry, nd->seq))
548 goto err_unlock;
549 BUG_ON(nd->inode != dentry->d_inode);
550 spin_unlock(&dentry->d_lock);
551
552 mntget(nd->path.mnt);
553
554 rcu_read_unlock();
555 br_read_unlock(vfsmount_lock);
556
557 return 0;
558
559err_unlock:
560 spin_unlock(&dentry->d_lock);
561 rcu_read_unlock();
562 br_read_unlock(vfsmount_lock);
563 return -ECHILD;
564}
565
566/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
567static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
568{
569 if (likely(nd->flags & LOOKUP_RCU))
570 return nameidata_drop_rcu_last(nd);
571 return 0;
572}
573
574/**
378 * release_open_intent - free up open intent resources 575 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 576 * @nd: pointer to nameidata
380 */ 577 */
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd)
386 fput(nd->intent.open.file); 583 fput(nd->intent.open.file);
387} 584}
388 585
586static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
587{
588 int status;
589
590 status = dentry->d_op->d_revalidate(dentry, nd);
591 if (status == -ECHILD) {
592 if (nameidata_dentry_drop_rcu(nd, dentry))
593 return status;
594 status = dentry->d_op->d_revalidate(dentry, nd);
595 }
596
597 return status;
598}
599
389static inline struct dentry * 600static inline struct dentry *
390do_revalidate(struct dentry *dentry, struct nameidata *nd) 601do_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 602{
392 int status = dentry->d_op->d_revalidate(dentry, nd); 603 int status;
604
605 status = d_revalidate(dentry, nd);
393 if (unlikely(status <= 0)) { 606 if (unlikely(status <= 0)) {
394 /* 607 /*
395 * The dentry failed validation. 608 * The dentry failed validation.
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
397 * the dentry otherwise d_revalidate is asking us 610 * the dentry otherwise d_revalidate is asking us
398 * to return a fail status. 611 * to return a fail status.
399 */ 612 */
400 if (!status) { 613 if (status < 0) {
614 /* If we're in rcu-walk, we don't have a ref */
615 if (!(nd->flags & LOOKUP_RCU))
616 dput(dentry);
617 dentry = ERR_PTR(status);
618
619 } else {
620 /* Don't d_invalidate in rcu-walk mode */
621 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
622 return ERR_PTR(-ECHILD);
401 if (!d_invalidate(dentry)) { 623 if (!d_invalidate(dentry)) {
402 dput(dentry); 624 dput(dentry);
403 dentry = NULL; 625 dentry = NULL;
404 } 626 }
405 } else {
406 dput(dentry);
407 dentry = ERR_PTR(status);
408 } 627 }
409 } 628 }
410 return dentry; 629 return dentry;
411} 630}
412 631
632static inline int need_reval_dot(struct dentry *dentry)
633{
634 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
635 return 0;
636
637 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
638 return 0;
639
640 return 1;
641}
642
413/* 643/*
414 * force_reval_path - force revalidation of a dentry 644 * force_reval_path - force revalidation of a dentry
415 * 645 *
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
433 663
434 /* 664 /*
435 * only check on filesystems where it's possible for the dentry to 665 * only check on filesystems where it's possible for the dentry to
436 * become stale. It's assumed that if this flag is set then the 666 * become stale.
437 * d_revalidate op will also be defined.
438 */ 667 */
439 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) 668 if (!need_reval_dot(dentry))
440 return 0; 669 return 0;
441 670
442 status = dentry->d_op->d_revalidate(dentry, nd); 671 status = d_revalidate(dentry, nd);
443 if (status > 0) 672 if (status > 0)
444 return 0; 673 return 0;
445 674
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 688 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 689 * complete permission check.
461 */ 690 */
462static int exec_permission(struct inode *inode) 691static inline int exec_permission(struct inode *inode, unsigned int flags)
463{ 692{
464 int ret; 693 int ret;
465 694
466 if (inode->i_op->permission) { 695 if (inode->i_op->permission) {
467 ret = inode->i_op->permission(inode, MAY_EXEC); 696 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
468 if (!ret) 697 } else {
469 goto ok; 698 ret = acl_permission_check(inode, MAY_EXEC, flags,
470 return ret; 699 inode->i_op->check_acl);
471 } 700 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 701 if (likely(!ret))
473 if (!ret)
474 goto ok; 702 goto ok;
703 if (ret == -ECHILD)
704 return ret;
475 705
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 706 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 707 goto ok;
478 708
479 return ret; 709 return ret;
480ok: 710ok:
481 return security_inode_permission(inode, MAY_EXEC); 711 return security_inode_exec_permission(inode, flags);
482} 712}
483 713
484static __always_inline void set_root(struct nameidata *nd) 714static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd)
489 719
490static int link_path_walk(const char *, struct nameidata *); 720static int link_path_walk(const char *, struct nameidata *);
491 721
722static __always_inline void set_root_rcu(struct nameidata *nd)
723{
724 if (!nd->root.mnt) {
725 struct fs_struct *fs = current->fs;
726 unsigned seq;
727
728 do {
729 seq = read_seqcount_begin(&fs->seq);
730 nd->root = fs->root;
731 } while (read_seqcount_retry(&fs->seq, seq));
732 }
733}
734
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 735static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 736{
737 int ret;
738
494 if (IS_ERR(link)) 739 if (IS_ERR(link))
495 goto fail; 740 goto fail;
496 741
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 745 nd->path = nd->root;
501 path_get(&nd->root); 746 path_get(&nd->root);
502 } 747 }
748 nd->inode = nd->path.dentry->d_inode;
503 749
504 return link_path_walk(link, nd); 750 ret = link_path_walk(link, nd);
751 return ret;
505fail: 752fail:
506 path_put(&nd->path); 753 path_put(&nd->path);
507 return PTR_ERR(link); 754 return PTR_ERR(link);
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 763
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 764static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 765{
519 dput(nd->path.dentry); 766 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 767 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 768 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 769 mntput(nd->path.mnt);
523 } 770 }
771 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 772 nd->path.dentry = path->dentry;
525} 773}
526 774
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 783
536 if (path->mnt != nd->path.mnt) { 784 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 785 path_to_nameidata(path, nd);
786 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 787 dget(dentry);
539 } 788 }
540 mntget(path->mnt); 789 mntget(path->mnt);
790
541 nd->last_type = LAST_BIND; 791 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 792 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 793 error = PTR_ERR(*p);
@@ -591,6 +841,20 @@ loop:
591 return err; 841 return err;
592} 842}
593 843
844static int follow_up_rcu(struct path *path)
845{
846 struct vfsmount *parent;
847 struct dentry *mountpoint;
848
849 parent = path->mnt->mnt_parent;
850 if (parent == path->mnt)
851 return 0;
852 mountpoint = path->mnt->mnt_mountpoint;
853 path->dentry = mountpoint;
854 path->mnt = parent;
855 return 1;
856}
857
594int follow_up(struct path *path) 858int follow_up(struct path *path)
595{ 859{
596 struct vfsmount *parent; 860 struct vfsmount *parent;
@@ -612,9 +876,24 @@ int follow_up(struct path *path)
612 return 1; 876 return 1;
613} 877}
614 878
615/* no need for dcache_lock, as serialization is taken care in 879/*
616 * namespace.c 880 * serialization is taken care of in namespace.c
617 */ 881 */
882static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
883 struct inode **inode)
884{
885 while (d_mountpoint(path->dentry)) {
886 struct vfsmount *mounted;
887 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
888 if (!mounted)
889 return;
890 path->mnt = mounted;
891 path->dentry = mounted->mnt_root;
892 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
893 *inode = path->dentry->d_inode;
894 }
895}
896
618static int __follow_mount(struct path *path) 897static int __follow_mount(struct path *path)
619{ 898{
620 int res = 0; 899 int res = 0;
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path)
645 } 924 }
646} 925}
647 926
648/* no need for dcache_lock, as serialization is taken care in
649 * namespace.c
650 */
651int follow_down(struct path *path) 927int follow_down(struct path *path)
652{ 928{
653 struct vfsmount *mounted; 929 struct vfsmount *mounted;
@@ -663,7 +939,42 @@ int follow_down(struct path *path)
663 return 0; 939 return 0;
664} 940}
665 941
666static __always_inline void follow_dotdot(struct nameidata *nd) 942static int follow_dotdot_rcu(struct nameidata *nd)
943{
944 struct inode *inode = nd->inode;
945
946 set_root_rcu(nd);
947
948 while(1) {
949 if (nd->path.dentry == nd->root.dentry &&
950 nd->path.mnt == nd->root.mnt) {
951 break;
952 }
953 if (nd->path.dentry != nd->path.mnt->mnt_root) {
954 struct dentry *old = nd->path.dentry;
955 struct dentry *parent = old->d_parent;
956 unsigned seq;
957
958 seq = read_seqcount_begin(&parent->d_seq);
959 if (read_seqcount_retry(&old->d_seq, nd->seq))
960 return -ECHILD;
961 inode = parent->d_inode;
962 nd->path.dentry = parent;
963 nd->seq = seq;
964 break;
965 }
966 if (!follow_up_rcu(&nd->path))
967 break;
968 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
969 inode = nd->path.dentry->d_inode;
970 }
971 __follow_mount_rcu(nd, &nd->path, &inode);
972 nd->inode = inode;
973
974 return 0;
975}
976
977static void follow_dotdot(struct nameidata *nd)
667{ 978{
668 set_root(nd); 979 set_root(nd);
669 980
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
684 break; 995 break;
685 } 996 }
686 follow_mount(&nd->path); 997 follow_mount(&nd->path);
998 nd->inode = nd->path.dentry->d_inode;
687} 999}
688 1000
689/* 1001/*
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
721 * It _is_ time-critical. 1033 * It _is_ time-critical.
722 */ 1034 */
723static int do_lookup(struct nameidata *nd, struct qstr *name, 1035static int do_lookup(struct nameidata *nd, struct qstr *name,
724 struct path *path) 1036 struct path *path, struct inode **inode)
725{ 1037{
726 struct vfsmount *mnt = nd->path.mnt; 1038 struct vfsmount *mnt = nd->path.mnt;
727 struct dentry *dentry, *parent; 1039 struct dentry *dentry, *parent = nd->path.dentry;
728 struct inode *dir; 1040 struct inode *dir;
729 /* 1041 /*
730 * See if the low-level filesystem might want 1042 * See if the low-level filesystem might want
731 * to use its own hash.. 1043 * to use its own hash..
732 */ 1044 */
733 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1045 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
734 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); 1046 int err = parent->d_op->d_hash(parent, nd->inode, name);
735 if (err < 0) 1047 if (err < 0)
736 return err; 1048 return err;
737 } 1049 }
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
741 * of a false negative due to a concurrent rename, we're going to 1053 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below. 1054 * do the non-racy lookup, below.
743 */ 1055 */
744 dentry = __d_lookup(nd->path.dentry, name); 1056 if (nd->flags & LOOKUP_RCU) {
745 if (!dentry) 1057 unsigned seq;
746 goto need_lookup; 1058
1059 *inode = nd->inode;
1060 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1061 if (!dentry) {
1062 if (nameidata_drop_rcu(nd))
1063 return -ECHILD;
1064 goto need_lookup;
1065 }
1066 /* Memory barrier in read_seqcount_begin of child is enough */
1067 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1068 return -ECHILD;
1069
1070 nd->seq = seq;
1071 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1072 goto need_revalidate;
1073 path->mnt = mnt;
1074 path->dentry = dentry;
1075 __follow_mount_rcu(nd, path, inode);
1076 } else {
1077 dentry = __d_lookup(parent, name);
1078 if (!dentry)
1079 goto need_lookup;
747found: 1080found:
748 if (dentry->d_op && dentry->d_op->d_revalidate) 1081 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
749 goto need_revalidate; 1082 goto need_revalidate;
750done: 1083done:
751 path->mnt = mnt; 1084 path->mnt = mnt;
752 path->dentry = dentry; 1085 path->dentry = dentry;
753 __follow_mount(path); 1086 __follow_mount(path);
1087 *inode = path->dentry->d_inode;
1088 }
754 return 0; 1089 return 0;
755 1090
756need_lookup: 1091need_lookup:
757 parent = nd->path.dentry;
758 dir = parent->d_inode; 1092 dir = parent->d_inode;
1093 BUG_ON(nd->inode != dir);
759 1094
760 mutex_lock(&dir->i_mutex); 1095 mutex_lock(&dir->i_mutex);
761 /* 1096 /*
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
817static int link_path_walk(const char *name, struct nameidata *nd) 1152static int link_path_walk(const char *name, struct nameidata *nd)
818{ 1153{
819 struct path next; 1154 struct path next;
820 struct inode *inode;
821 int err; 1155 int err;
822 unsigned int lookup_flags = nd->flags; 1156 unsigned int lookup_flags = nd->flags;
823 1157
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
826 if (!*name) 1160 if (!*name)
827 goto return_reval; 1161 goto return_reval;
828 1162
829 inode = nd->path.dentry->d_inode;
830 if (nd->depth) 1163 if (nd->depth)
831 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1164 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
832 1165
833 /* At this point we know we have a real path component. */ 1166 /* At this point we know we have a real path component. */
834 for(;;) { 1167 for(;;) {
1168 struct inode *inode;
835 unsigned long hash; 1169 unsigned long hash;
836 struct qstr this; 1170 struct qstr this;
837 unsigned int c; 1171 unsigned int c;
838 1172
839 nd->flags |= LOOKUP_CONTINUE; 1173 nd->flags |= LOOKUP_CONTINUE;
840 err = exec_permission(inode); 1174 if (nd->flags & LOOKUP_RCU) {
1175 err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1176 if (err == -ECHILD) {
1177 if (nameidata_drop_rcu(nd))
1178 return -ECHILD;
1179 goto exec_again;
1180 }
1181 } else {
1182exec_again:
1183 err = exec_permission(nd->inode, 0);
1184 }
841 if (err) 1185 if (err)
842 break; 1186 break;
843 1187
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
868 if (this.name[0] == '.') switch (this.len) { 1212 if (this.name[0] == '.') switch (this.len) {
869 default: 1213 default:
870 break; 1214 break;
871 case 2: 1215 case 2:
872 if (this.name[1] != '.') 1216 if (this.name[1] != '.')
873 break; 1217 break;
874 follow_dotdot(nd); 1218 if (nd->flags & LOOKUP_RCU) {
875 inode = nd->path.dentry->d_inode; 1219 if (follow_dotdot_rcu(nd))
1220 return -ECHILD;
1221 } else
1222 follow_dotdot(nd);
876 /* fallthrough */ 1223 /* fallthrough */
877 case 1: 1224 case 1:
878 continue; 1225 continue;
879 } 1226 }
880 /* This does the actual lookups.. */ 1227 /* This does the actual lookups.. */
881 err = do_lookup(nd, &this, &next); 1228 err = do_lookup(nd, &this, &next, &inode);
882 if (err) 1229 if (err)
883 break; 1230 break;
884
885 err = -ENOENT; 1231 err = -ENOENT;
886 inode = next.dentry->d_inode;
887 if (!inode) 1232 if (!inode)
888 goto out_dput; 1233 goto out_dput;
889 1234
890 if (inode->i_op->follow_link) { 1235 if (inode->i_op->follow_link) {
1236 /* We commonly drop rcu-walk here */
1237 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1238 return -ECHILD;
1239 BUG_ON(inode != next.dentry->d_inode);
891 err = do_follow_link(&next, nd); 1240 err = do_follow_link(&next, nd);
892 if (err) 1241 if (err)
893 goto return_err; 1242 goto return_err;
1243 nd->inode = nd->path.dentry->d_inode;
894 err = -ENOENT; 1244 err = -ENOENT;
895 inode = nd->path.dentry->d_inode; 1245 if (!nd->inode)
896 if (!inode)
897 break; 1246 break;
898 } else 1247 } else {
899 path_to_nameidata(&next, nd); 1248 path_to_nameidata(&next, nd);
1249 nd->inode = inode;
1250 }
900 err = -ENOTDIR; 1251 err = -ENOTDIR;
901 if (!inode->i_op->lookup) 1252 if (!nd->inode->i_op->lookup)
902 break; 1253 break;
903 continue; 1254 continue;
904 /* here ends the main loop */ 1255 /* here ends the main loop */
@@ -913,32 +1264,39 @@ last_component:
913 if (this.name[0] == '.') switch (this.len) { 1264 if (this.name[0] == '.') switch (this.len) {
914 default: 1265 default:
915 break; 1266 break;
916 case 2: 1267 case 2:
917 if (this.name[1] != '.') 1268 if (this.name[1] != '.')
918 break; 1269 break;
919 follow_dotdot(nd); 1270 if (nd->flags & LOOKUP_RCU) {
920 inode = nd->path.dentry->d_inode; 1271 if (follow_dotdot_rcu(nd))
1272 return -ECHILD;
1273 } else
1274 follow_dotdot(nd);
921 /* fallthrough */ 1275 /* fallthrough */
922 case 1: 1276 case 1:
923 goto return_reval; 1277 goto return_reval;
924 } 1278 }
925 err = do_lookup(nd, &this, &next); 1279 err = do_lookup(nd, &this, &next, &inode);
926 if (err) 1280 if (err)
927 break; 1281 break;
928 inode = next.dentry->d_inode;
929 if (follow_on_final(inode, lookup_flags)) { 1282 if (follow_on_final(inode, lookup_flags)) {
1283 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1284 return -ECHILD;
1285 BUG_ON(inode != next.dentry->d_inode);
930 err = do_follow_link(&next, nd); 1286 err = do_follow_link(&next, nd);
931 if (err) 1287 if (err)
932 goto return_err; 1288 goto return_err;
933 inode = nd->path.dentry->d_inode; 1289 nd->inode = nd->path.dentry->d_inode;
934 } else 1290 } else {
935 path_to_nameidata(&next, nd); 1291 path_to_nameidata(&next, nd);
1292 nd->inode = inode;
1293 }
936 err = -ENOENT; 1294 err = -ENOENT;
937 if (!inode) 1295 if (!nd->inode)
938 break; 1296 break;
939 if (lookup_flags & LOOKUP_DIRECTORY) { 1297 if (lookup_flags & LOOKUP_DIRECTORY) {
940 err = -ENOTDIR; 1298 err = -ENOTDIR;
941 if (!inode->i_op->lookup) 1299 if (!nd->inode->i_op->lookup)
942 break; 1300 break;
943 } 1301 }
944 goto return_base; 1302 goto return_base;
@@ -958,25 +1316,43 @@ return_reval:
958 * We bypassed the ordinary revalidation routines. 1316 * We bypassed the ordinary revalidation routines.
959 * We may need to check the cached dentry for staleness. 1317 * We may need to check the cached dentry for staleness.
960 */ 1318 */
961 if (nd->path.dentry && nd->path.dentry->d_sb && 1319 if (need_reval_dot(nd->path.dentry)) {
962 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
963 err = -ESTALE;
964 /* Note: we do not d_invalidate() */ 1320 /* Note: we do not d_invalidate() */
965 if (!nd->path.dentry->d_op->d_revalidate( 1321 err = d_revalidate(nd->path.dentry, nd);
966 nd->path.dentry, nd)) 1322 if (!err)
1323 err = -ESTALE;
1324 if (err < 0)
967 break; 1325 break;
968 } 1326 }
969return_base: 1327return_base:
1328 if (nameidata_drop_rcu_last_maybe(nd))
1329 return -ECHILD;
970 return 0; 1330 return 0;
971out_dput: 1331out_dput:
972 path_put_conditional(&next, nd); 1332 if (!(nd->flags & LOOKUP_RCU))
1333 path_put_conditional(&next, nd);
973 break; 1334 break;
974 } 1335 }
975 path_put(&nd->path); 1336 if (!(nd->flags & LOOKUP_RCU))
1337 path_put(&nd->path);
976return_err: 1338return_err:
977 return err; 1339 return err;
978} 1340}
979 1341
1342static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1343{
1344 current->total_link_count = 0;
1345
1346 return link_path_walk(name, nd);
1347}
1348
1349static inline int path_walk_simple(const char *name, struct nameidata *nd)
1350{
1351 current->total_link_count = 0;
1352
1353 return link_path_walk(name, nd);
1354}
1355
980static int path_walk(const char *name, struct nameidata *nd) 1356static int path_walk(const char *name, struct nameidata *nd)
981{ 1357{
982 struct path save = nd->path; 1358 struct path save = nd->path;
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd)
1002 return result; 1378 return result;
1003} 1379}
1004 1380
1381static void path_finish_rcu(struct nameidata *nd)
1382{
1383 if (nd->flags & LOOKUP_RCU) {
1384 /* RCU dangling. Cancel it. */
1385 nd->flags &= ~LOOKUP_RCU;
1386 nd->root.mnt = NULL;
1387 rcu_read_unlock();
1388 br_read_unlock(vfsmount_lock);
1389 }
1390 if (nd->file)
1391 fput(nd->file);
1392}
1393
1394static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1395{
1396 int retval = 0;
1397 int fput_needed;
1398 struct file *file;
1399
1400 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1401 nd->flags = flags | LOOKUP_RCU;
1402 nd->depth = 0;
1403 nd->root.mnt = NULL;
1404 nd->file = NULL;
1405
1406 if (*name=='/') {
1407 struct fs_struct *fs = current->fs;
1408 unsigned seq;
1409
1410 br_read_lock(vfsmount_lock);
1411 rcu_read_lock();
1412
1413 do {
1414 seq = read_seqcount_begin(&fs->seq);
1415 nd->root = fs->root;
1416 nd->path = nd->root;
1417 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1418 } while (read_seqcount_retry(&fs->seq, seq));
1419
1420 } else if (dfd == AT_FDCWD) {
1421 struct fs_struct *fs = current->fs;
1422 unsigned seq;
1423
1424 br_read_lock(vfsmount_lock);
1425 rcu_read_lock();
1426
1427 do {
1428 seq = read_seqcount_begin(&fs->seq);
1429 nd->path = fs->pwd;
1430 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1431 } while (read_seqcount_retry(&fs->seq, seq));
1432
1433 } else {
1434 struct dentry *dentry;
1435
1436 file = fget_light(dfd, &fput_needed);
1437 retval = -EBADF;
1438 if (!file)
1439 goto out_fail;
1440
1441 dentry = file->f_path.dentry;
1442
1443 retval = -ENOTDIR;
1444 if (!S_ISDIR(dentry->d_inode->i_mode))
1445 goto fput_fail;
1446
1447 retval = file_permission(file, MAY_EXEC);
1448 if (retval)
1449 goto fput_fail;
1450
1451 nd->path = file->f_path;
1452 if (fput_needed)
1453 nd->file = file;
1454
1455 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1456 br_read_lock(vfsmount_lock);
1457 rcu_read_lock();
1458 }
1459 nd->inode = nd->path.dentry->d_inode;
1460 return 0;
1461
1462fput_fail:
1463 fput_light(file, fput_needed);
1464out_fail:
1465 return retval;
1466}
1467
1005static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1468static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1006{ 1469{
1007 int retval = 0; 1470 int retval = 0;
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1042 1505
1043 fput_light(file, fput_needed); 1506 fput_light(file, fput_needed);
1044 } 1507 }
1508 nd->inode = nd->path.dentry->d_inode;
1045 return 0; 1509 return 0;
1046 1510
1047fput_fail: 1511fput_fail:
@@ -1054,16 +1518,53 @@ out_fail:
1054static int do_path_lookup(int dfd, const char *name, 1518static int do_path_lookup(int dfd, const char *name,
1055 unsigned int flags, struct nameidata *nd) 1519 unsigned int flags, struct nameidata *nd)
1056{ 1520{
1057 int retval = path_init(dfd, name, flags, nd); 1521 int retval;
1058 if (!retval) 1522
1059 retval = path_walk(name, nd); 1523 /*
1060 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1524 * Path walking is largely split up into 2 different synchronisation
1061 nd->path.dentry->d_inode)) 1525 * schemes, rcu-walk and ref-walk (explained in
1062 audit_inode(name, nd->path.dentry); 1526 * Documentation/filesystems/path-lookup.txt). These share much of the
1527 * path walk code, but some things particularly setup, cleanup, and
1528 * following mounts are sufficiently divergent that functions are
1529 * duplicated. Typically there is a function foo(), and its RCU
1530 * analogue, foo_rcu().
1531 *
1532 * -ECHILD is the error number of choice (just to avoid clashes) that
1533 * is returned if some aspect of an rcu-walk fails. Such an error must
1534 * be handled by restarting a traditional ref-walk (which will always
1535 * be able to complete).
1536 */
1537 retval = path_init_rcu(dfd, name, flags, nd);
1538 if (unlikely(retval))
1539 return retval;
1540 retval = path_walk_rcu(name, nd);
1541 path_finish_rcu(nd);
1063 if (nd->root.mnt) { 1542 if (nd->root.mnt) {
1064 path_put(&nd->root); 1543 path_put(&nd->root);
1065 nd->root.mnt = NULL; 1544 nd->root.mnt = NULL;
1066 } 1545 }
1546
1547 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1548 /* slower, locked walk */
1549 if (retval == -ESTALE)
1550 flags |= LOOKUP_REVAL;
1551 retval = path_init(dfd, name, flags, nd);
1552 if (unlikely(retval))
1553 return retval;
1554 retval = path_walk(name, nd);
1555 if (nd->root.mnt) {
1556 path_put(&nd->root);
1557 nd->root.mnt = NULL;
1558 }
1559 }
1560
1561 if (likely(!retval)) {
1562 if (unlikely(!audit_dummy_context())) {
1563 if (nd->path.dentry && nd->inode)
1564 audit_inode(name, nd->path.dentry);
1565 }
1566 }
1567
1067 return retval; 1568 return retval;
1068} 1569}
1069 1570
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1106 path_get(&nd->path); 1607 path_get(&nd->path);
1107 nd->root = nd->path; 1608 nd->root = nd->path;
1108 path_get(&nd->root); 1609 path_get(&nd->root);
1610 nd->inode = nd->path.dentry->d_inode;
1109 1611
1110 retval = path_walk(name, nd); 1612 retval = path_walk(name, nd);
1111 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1613 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1112 nd->path.dentry->d_inode)) 1614 nd->inode))
1113 audit_inode(name, nd->path.dentry); 1615 audit_inode(name, nd->path.dentry);
1114 1616
1115 path_put(&nd->root); 1617 path_put(&nd->root);
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1125 struct dentry *dentry; 1627 struct dentry *dentry;
1126 int err; 1628 int err;
1127 1629
1128 err = exec_permission(inode); 1630 err = exec_permission(inode, 0);
1129 if (err) 1631 if (err)
1130 return ERR_PTR(err); 1632 return ERR_PTR(err);
1131 1633
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
1133 * See if the low-level filesystem might want 1635 * See if the low-level filesystem might want
1134 * to use its own hash.. 1636 * to use its own hash..
1135 */ 1637 */
1136 if (base->d_op && base->d_op->d_hash) { 1638 if (base->d_flags & DCACHE_OP_HASH) {
1137 err = base->d_op->d_hash(base, name); 1639 err = base->d_op->d_hash(base, inode, name);
1138 dentry = ERR_PTR(err); 1640 dentry = ERR_PTR(err);
1139 if (err < 0) 1641 if (err < 0)
1140 goto out; 1642 goto out;
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1147 */ 1649 */
1148 dentry = d_lookup(base, name); 1650 dentry = d_lookup(base, name);
1149 1651
1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1652 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
1151 dentry = do_revalidate(dentry, nd); 1653 dentry = do_revalidate(dentry, nd);
1152 1654
1153 if (!dentry) 1655 if (!dentry)
@@ -1490,6 +1992,7 @@ out_unlock:
1490 mutex_unlock(&dir->d_inode->i_mutex); 1992 mutex_unlock(&dir->d_inode->i_mutex);
1491 dput(nd->path.dentry); 1993 dput(nd->path.dentry);
1492 nd->path.dentry = path->dentry; 1994 nd->path.dentry = path->dentry;
1995
1493 if (error) 1996 if (error)
1494 return error; 1997 return error;
1495 /* Don't check for write permission, don't truncate */ 1998 /* Don't check for write permission, don't truncate */
@@ -1584,6 +2087,9 @@ exit:
1584 return ERR_PTR(error); 2087 return ERR_PTR(error);
1585} 2088}
1586 2089
2090/*
2091 * Handle O_CREAT case for do_filp_open
2092 */
1587static struct file *do_last(struct nameidata *nd, struct path *path, 2093static struct file *do_last(struct nameidata *nd, struct path *path,
1588 int open_flag, int acc_mode, 2094 int open_flag, int acc_mode,
1589 int mode, const char *pathname) 2095 int mode, const char *pathname)
@@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1597 follow_dotdot(nd); 2103 follow_dotdot(nd);
1598 dir = nd->path.dentry; 2104 dir = nd->path.dentry;
1599 case LAST_DOT: 2105 case LAST_DOT:
1600 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { 2106 if (need_reval_dot(dir)) {
1601 if (!dir->d_op->d_revalidate(dir, nd)) { 2107 error = d_revalidate(nd->path.dentry, nd);
2108 if (!error)
1602 error = -ESTALE; 2109 error = -ESTALE;
2110 if (error < 0)
1603 goto exit; 2111 goto exit;
1604 }
1605 } 2112 }
1606 /* fallthrough */ 2113 /* fallthrough */
1607 case LAST_ROOT: 2114 case LAST_ROOT:
1608 if (open_flag & O_CREAT) 2115 goto exit;
1609 goto exit;
1610 /* fallthrough */
1611 case LAST_BIND: 2116 case LAST_BIND:
1612 audit_inode(pathname, dir); 2117 audit_inode(pathname, dir);
1613 goto ok; 2118 goto ok;
1614 } 2119 }
1615 2120
1616 /* trailing slashes? */ 2121 /* trailing slashes? */
1617 if (nd->last.name[nd->last.len]) { 2122 if (nd->last.name[nd->last.len])
1618 if (open_flag & O_CREAT) 2123 goto exit;
1619 goto exit;
1620 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1621 }
1622
1623 /* just plain open? */
1624 if (!(open_flag & O_CREAT)) {
1625 error = do_lookup(nd, &nd->last, path);
1626 if (error)
1627 goto exit;
1628 error = -ENOENT;
1629 if (!path->dentry->d_inode)
1630 goto exit_dput;
1631 if (path->dentry->d_inode->i_op->follow_link)
1632 return NULL;
1633 error = -ENOTDIR;
1634 if (nd->flags & LOOKUP_DIRECTORY) {
1635 if (!path->dentry->d_inode->i_op->lookup)
1636 goto exit_dput;
1637 }
1638 path_to_nameidata(path, nd);
1639 audit_inode(pathname, nd->path.dentry);
1640 goto ok;
1641 }
1642 2124
1643 /* OK, it's O_CREAT */
1644 mutex_lock(&dir->d_inode->i_mutex); 2125 mutex_lock(&dir->d_inode->i_mutex);
1645 2126
1646 path->dentry = lookup_hash(nd); 2127 path->dentry = lookup_hash(nd);
@@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1711 return NULL; 2192 return NULL;
1712 2193
1713 path_to_nameidata(path, nd); 2194 path_to_nameidata(path, nd);
2195 nd->inode = path->dentry->d_inode;
1714 error = -EISDIR; 2196 error = -EISDIR;
1715 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2197 if (S_ISDIR(nd->inode->i_mode))
1716 goto exit; 2198 goto exit;
1717ok: 2199ok:
1718 filp = finish_open(nd, open_flag, acc_mode); 2200 filp = finish_open(nd, open_flag, acc_mode);
@@ -1743,7 +2225,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1743 struct path path; 2225 struct path path;
1744 int count = 0; 2226 int count = 0;
1745 int flag = open_to_namei_flags(open_flag); 2227 int flag = open_to_namei_flags(open_flag);
1746 int force_reval = 0; 2228 int flags;
1747 2229
1748 if (!(open_flag & O_CREAT)) 2230 if (!(open_flag & O_CREAT))
1749 mode = 0; 2231 mode = 0;
@@ -1772,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1772 if (open_flag & O_APPEND) 2254 if (open_flag & O_APPEND)
1773 acc_mode |= MAY_APPEND; 2255 acc_mode |= MAY_APPEND;
1774 2256
1775 /* find the parent */ 2257 flags = LOOKUP_OPEN;
1776reval: 2258 if (open_flag & O_CREAT) {
1777 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2259 flags |= LOOKUP_CREATE;
2260 if (open_flag & O_EXCL)
2261 flags |= LOOKUP_EXCL;
2262 }
2263 if (open_flag & O_DIRECTORY)
2264 flags |= LOOKUP_DIRECTORY;
2265 if (!(open_flag & O_NOFOLLOW))
2266 flags |= LOOKUP_FOLLOW;
2267
2268 filp = get_empty_filp();
2269 if (!filp)
2270 return ERR_PTR(-ENFILE);
2271
2272 filp->f_flags = open_flag;
2273 nd.intent.open.file = filp;
2274 nd.intent.open.flags = flag;
2275 nd.intent.open.create_mode = mode;
2276
2277 if (open_flag & O_CREAT)
2278 goto creat;
2279
2280 /* !O_CREAT, simple open */
2281 error = do_path_lookup(dfd, pathname, flags, &nd);
2282 if (unlikely(error))
2283 goto out_filp;
2284 error = -ELOOP;
2285 if (!(nd.flags & LOOKUP_FOLLOW)) {
2286 if (nd.inode->i_op->follow_link)
2287 goto out_path;
2288 }
2289 error = -ENOTDIR;
2290 if (nd.flags & LOOKUP_DIRECTORY) {
2291 if (!nd.inode->i_op->lookup)
2292 goto out_path;
2293 }
2294 audit_inode(pathname, nd.path.dentry);
2295 filp = finish_open(&nd, open_flag, acc_mode);
2296 return filp;
2297
2298creat:
2299 /* OK, have to create the file. Find the parent. */
2300 error = path_init_rcu(dfd, pathname,
2301 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1778 if (error) 2302 if (error)
1779 return ERR_PTR(error); 2303 goto out_filp;
1780 if (force_reval) 2304 error = path_walk_rcu(pathname, &nd);
1781 nd.flags |= LOOKUP_REVAL; 2305 path_finish_rcu(&nd);
2306 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2307 /* slower, locked walk */
2308 if (error == -ESTALE) {
2309reval:
2310 flags |= LOOKUP_REVAL;
2311 }
2312 error = path_init(dfd, pathname,
2313 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2314 if (error)
2315 goto out_filp;
1782 2316
1783 current->total_link_count = 0; 2317 error = path_walk_simple(pathname, &nd);
1784 error = link_path_walk(pathname, &nd);
1785 if (error) {
1786 filp = ERR_PTR(error);
1787 goto out;
1788 } 2318 }
1789 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2319 if (unlikely(error))
2320 goto out_filp;
2321 if (unlikely(!audit_dummy_context()))
1790 audit_inode(pathname, nd.path.dentry); 2322 audit_inode(pathname, nd.path.dentry);
1791 2323
1792 /* 2324 /*
1793 * We have the parent and last component. 2325 * We have the parent and last component.
1794 */ 2326 */
1795 2327 nd.flags = flags;
1796 error = -ENFILE;
1797 filp = get_empty_filp();
1798 if (filp == NULL)
1799 goto exit_parent;
1800 nd.intent.open.file = filp;
1801 filp->f_flags = open_flag;
1802 nd.intent.open.flags = flag;
1803 nd.intent.open.create_mode = mode;
1804 nd.flags &= ~LOOKUP_PARENT;
1805 nd.flags |= LOOKUP_OPEN;
1806 if (open_flag & O_CREAT) {
1807 nd.flags |= LOOKUP_CREATE;
1808 if (open_flag & O_EXCL)
1809 nd.flags |= LOOKUP_EXCL;
1810 }
1811 if (open_flag & O_DIRECTORY)
1812 nd.flags |= LOOKUP_DIRECTORY;
1813 if (!(open_flag & O_NOFOLLOW))
1814 nd.flags |= LOOKUP_FOLLOW;
1815 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1816 while (unlikely(!filp)) { /* trailing symlink */ 2329 while (unlikely(!filp)) { /* trailing symlink */
1817 struct path holder; 2330 struct path holder;
1818 struct inode *inode = path.dentry->d_inode;
1819 void *cookie; 2331 void *cookie;
1820 error = -ELOOP; 2332 error = -ELOOP;
1821 /* S_ISDIR part is a temporary automount kludge */ 2333 /* S_ISDIR part is a temporary automount kludge */
1822 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2334 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1823 goto exit_dput; 2335 goto exit_dput;
1824 if (count++ == 32) 2336 if (count++ == 32)
1825 goto exit_dput; 2337 goto exit_dput;
@@ -1840,36 +2352,33 @@ reval:
1840 goto exit_dput; 2352 goto exit_dput;
1841 error = __do_follow_link(&path, &nd, &cookie); 2353 error = __do_follow_link(&path, &nd, &cookie);
1842 if (unlikely(error)) { 2354 if (unlikely(error)) {
2355 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2356 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1843 /* nd.path had been dropped */ 2357 /* nd.path had been dropped */
1844 if (!IS_ERR(cookie) && inode->i_op->put_link) 2358 nd.path = path;
1845 inode->i_op->put_link(path.dentry, &nd, cookie); 2359 goto out_path;
1846 path_put(&path);
1847 release_open_intent(&nd);
1848 filp = ERR_PTR(error);
1849 goto out;
1850 } 2360 }
1851 holder = path; 2361 holder = path;
1852 nd.flags &= ~LOOKUP_PARENT; 2362 nd.flags &= ~LOOKUP_PARENT;
1853 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2363 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1854 if (inode->i_op->put_link) 2364 if (nd.inode->i_op->put_link)
1855 inode->i_op->put_link(holder.dentry, &nd, cookie); 2365 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1856 path_put(&holder); 2366 path_put(&holder);
1857 } 2367 }
1858out: 2368out:
1859 if (nd.root.mnt) 2369 if (nd.root.mnt)
1860 path_put(&nd.root); 2370 path_put(&nd.root);
1861 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2371 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1862 force_reval = 1;
1863 goto reval; 2372 goto reval;
1864 }
1865 return filp; 2373 return filp;
1866 2374
1867exit_dput: 2375exit_dput:
1868 path_put_conditional(&path, &nd); 2376 path_put_conditional(&path, &nd);
2377out_path:
2378 path_put(&nd.path);
2379out_filp:
1869 if (!IS_ERR(nd.intent.open.file)) 2380 if (!IS_ERR(nd.intent.open.file))
1870 release_open_intent(&nd); 2381 release_open_intent(&nd);
1871exit_parent:
1872 path_put(&nd.path);
1873 filp = ERR_PTR(error); 2382 filp = ERR_PTR(error);
1874 goto out; 2383 goto out;
1875} 2384}
@@ -2130,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry)
2130{ 2639{
2131 dget(dentry); 2640 dget(dentry);
2132 shrink_dcache_parent(dentry); 2641 shrink_dcache_parent(dentry);
2133 spin_lock(&dcache_lock);
2134 spin_lock(&dentry->d_lock); 2642 spin_lock(&dentry->d_lock);
2135 if (atomic_read(&dentry->d_count) == 2) 2643 if (dentry->d_count == 2)
2136 __d_drop(dentry); 2644 __d_drop(dentry);
2137 spin_unlock(&dentry->d_lock); 2645 spin_unlock(&dentry->d_lock);
2138 spin_unlock(&dcache_lock);
2139} 2646}
2140 2647
2141int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2648int vfs_rmdir(struct inode *dir, struct dentry *dentry)