diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 1557 |
1 files changed, 699 insertions, 858 deletions
diff --git a/fs/namei.c b/fs/namei.c index a4689eb2df28..3cb616d38d9c 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page) | |||
136 | return retval; | 136 | return retval; |
137 | } | 137 | } |
138 | 138 | ||
139 | char * getname(const char __user * filename) | 139 | static char *getname_flags(const char __user * filename, int flags) |
140 | { | 140 | { |
141 | char *tmp, *result; | 141 | char *tmp, *result; |
142 | 142 | ||
@@ -147,14 +147,21 @@ char * getname(const char __user * filename) | |||
147 | 147 | ||
148 | result = tmp; | 148 | result = tmp; |
149 | if (retval < 0) { | 149 | if (retval < 0) { |
150 | __putname(tmp); | 150 | if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { |
151 | result = ERR_PTR(retval); | 151 | __putname(tmp); |
152 | result = ERR_PTR(retval); | ||
153 | } | ||
152 | } | 154 | } |
153 | } | 155 | } |
154 | audit_getname(result); | 156 | audit_getname(result); |
155 | return result; | 157 | return result; |
156 | } | 158 | } |
157 | 159 | ||
160 | char *getname(const char __user * filename) | ||
161 | { | ||
162 | return getname_flags(filename, 0); | ||
163 | } | ||
164 | |||
158 | #ifdef CONFIG_AUDITSYSCALL | 165 | #ifdef CONFIG_AUDITSYSCALL |
159 | void putname(const char *name) | 166 | void putname(const char *name) |
160 | { | 167 | { |
@@ -176,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag | |||
176 | 183 | ||
177 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; | 184 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; |
178 | 185 | ||
186 | if (current_user_ns() != inode_userns(inode)) | ||
187 | goto other_perms; | ||
188 | |||
179 | if (current_fsuid() == inode->i_uid) | 189 | if (current_fsuid() == inode->i_uid) |
180 | mode >>= 6; | 190 | mode >>= 6; |
181 | else { | 191 | else { |
@@ -189,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag | |||
189 | mode >>= 3; | 199 | mode >>= 3; |
190 | } | 200 | } |
191 | 201 | ||
202 | other_perms: | ||
192 | /* | 203 | /* |
193 | * If the DACs are ok we don't need any capability check. | 204 | * If the DACs are ok we don't need any capability check. |
194 | */ | 205 | */ |
@@ -230,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, | |||
230 | * Executable DACs are overridable if at least one exec bit is set. | 241 | * Executable DACs are overridable if at least one exec bit is set. |
231 | */ | 242 | */ |
232 | if (!(mask & MAY_EXEC) || execute_ok(inode)) | 243 | if (!(mask & MAY_EXEC) || execute_ok(inode)) |
233 | if (capable(CAP_DAC_OVERRIDE)) | 244 | if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) |
234 | return 0; | 245 | return 0; |
235 | 246 | ||
236 | /* | 247 | /* |
@@ -238,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, | |||
238 | */ | 249 | */ |
239 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; | 250 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; |
240 | if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) | 251 | if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) |
241 | if (capable(CAP_DAC_READ_SEARCH)) | 252 | if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) |
242 | return 0; | 253 | return 0; |
243 | 254 | ||
244 | return -EACCES; | 255 | return -EACCES; |
@@ -401,9 +412,11 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
401 | { | 412 | { |
402 | struct fs_struct *fs = current->fs; | 413 | struct fs_struct *fs = current->fs; |
403 | struct dentry *dentry = nd->path.dentry; | 414 | struct dentry *dentry = nd->path.dentry; |
415 | int want_root = 0; | ||
404 | 416 | ||
405 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 417 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
406 | if (nd->root.mnt) { | 418 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { |
419 | want_root = 1; | ||
407 | spin_lock(&fs->lock); | 420 | spin_lock(&fs->lock); |
408 | if (nd->root.mnt != fs->root.mnt || | 421 | if (nd->root.mnt != fs->root.mnt || |
409 | nd->root.dentry != fs->root.dentry) | 422 | nd->root.dentry != fs->root.dentry) |
@@ -414,7 +427,7 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
414 | goto err; | 427 | goto err; |
415 | BUG_ON(nd->inode != dentry->d_inode); | 428 | BUG_ON(nd->inode != dentry->d_inode); |
416 | spin_unlock(&dentry->d_lock); | 429 | spin_unlock(&dentry->d_lock); |
417 | if (nd->root.mnt) { | 430 | if (want_root) { |
418 | path_get(&nd->root); | 431 | path_get(&nd->root); |
419 | spin_unlock(&fs->lock); | 432 | spin_unlock(&fs->lock); |
420 | } | 433 | } |
@@ -427,7 +440,7 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
427 | err: | 440 | err: |
428 | spin_unlock(&dentry->d_lock); | 441 | spin_unlock(&dentry->d_lock); |
429 | err_root: | 442 | err_root: |
430 | if (nd->root.mnt) | 443 | if (want_root) |
431 | spin_unlock(&fs->lock); | 444 | spin_unlock(&fs->lock); |
432 | return -ECHILD; | 445 | return -ECHILD; |
433 | } | 446 | } |
@@ -454,9 +467,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
454 | { | 467 | { |
455 | struct fs_struct *fs = current->fs; | 468 | struct fs_struct *fs = current->fs; |
456 | struct dentry *parent = nd->path.dentry; | 469 | struct dentry *parent = nd->path.dentry; |
470 | int want_root = 0; | ||
457 | 471 | ||
458 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 472 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
459 | if (nd->root.mnt) { | 473 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { |
474 | want_root = 1; | ||
460 | spin_lock(&fs->lock); | 475 | spin_lock(&fs->lock); |
461 | if (nd->root.mnt != fs->root.mnt || | 476 | if (nd->root.mnt != fs->root.mnt || |
462 | nd->root.dentry != fs->root.dentry) | 477 | nd->root.dentry != fs->root.dentry) |
@@ -476,7 +491,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
476 | parent->d_count++; | 491 | parent->d_count++; |
477 | spin_unlock(&dentry->d_lock); | 492 | spin_unlock(&dentry->d_lock); |
478 | spin_unlock(&parent->d_lock); | 493 | spin_unlock(&parent->d_lock); |
479 | if (nd->root.mnt) { | 494 | if (want_root) { |
480 | path_get(&nd->root); | 495 | path_get(&nd->root); |
481 | spin_unlock(&fs->lock); | 496 | spin_unlock(&fs->lock); |
482 | } | 497 | } |
@@ -490,7 +505,7 @@ err: | |||
490 | spin_unlock(&dentry->d_lock); | 505 | spin_unlock(&dentry->d_lock); |
491 | spin_unlock(&parent->d_lock); | 506 | spin_unlock(&parent->d_lock); |
492 | err_root: | 507 | err_root: |
493 | if (nd->root.mnt) | 508 | if (want_root) |
494 | spin_unlock(&fs->lock); | 509 | spin_unlock(&fs->lock); |
495 | return -ECHILD; | 510 | return -ECHILD; |
496 | } | 511 | } |
@@ -498,8 +513,16 @@ err_root: | |||
498 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | 513 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ |
499 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) | 514 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) |
500 | { | 515 | { |
501 | if (nd->flags & LOOKUP_RCU) | 516 | if (nd->flags & LOOKUP_RCU) { |
502 | return nameidata_dentry_drop_rcu(nd, dentry); | 517 | if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { |
518 | nd->flags &= ~LOOKUP_RCU; | ||
519 | if (!(nd->flags & LOOKUP_ROOT)) | ||
520 | nd->root.mnt = NULL; | ||
521 | rcu_read_unlock(); | ||
522 | br_read_unlock(vfsmount_lock); | ||
523 | return -ECHILD; | ||
524 | } | ||
525 | } | ||
503 | return 0; | 526 | return 0; |
504 | } | 527 | } |
505 | 528 | ||
@@ -518,7 +541,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd) | |||
518 | 541 | ||
519 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 542 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
520 | nd->flags &= ~LOOKUP_RCU; | 543 | nd->flags &= ~LOOKUP_RCU; |
521 | nd->root.mnt = NULL; | 544 | if (!(nd->flags & LOOKUP_ROOT)) |
545 | nd->root.mnt = NULL; | ||
522 | spin_lock(&dentry->d_lock); | 546 | spin_lock(&dentry->d_lock); |
523 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | 547 | if (!__d_rcu_to_refcount(dentry, nd->seq)) |
524 | goto err_unlock; | 548 | goto err_unlock; |
@@ -539,14 +563,6 @@ err_unlock: | |||
539 | return -ECHILD; | 563 | return -ECHILD; |
540 | } | 564 | } |
541 | 565 | ||
542 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
543 | static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) | ||
544 | { | ||
545 | if (likely(nd->flags & LOOKUP_RCU)) | ||
546 | return nameidata_drop_rcu_last(nd); | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | /** | 566 | /** |
551 | * release_open_intent - free up open intent resources | 567 | * release_open_intent - free up open intent resources |
552 | * @nd: pointer to nameidata | 568 | * @nd: pointer to nameidata |
@@ -590,42 +606,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
590 | return dentry; | 606 | return dentry; |
591 | } | 607 | } |
592 | 608 | ||
593 | static inline struct dentry * | ||
594 | do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) | ||
595 | { | ||
596 | int status = d_revalidate(dentry, nd); | ||
597 | if (likely(status > 0)) | ||
598 | return dentry; | ||
599 | if (status == -ECHILD) { | ||
600 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
601 | return ERR_PTR(-ECHILD); | ||
602 | return do_revalidate(dentry, nd); | ||
603 | } | ||
604 | if (status < 0) | ||
605 | return ERR_PTR(status); | ||
606 | /* Don't d_invalidate in rcu-walk mode */ | ||
607 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
608 | return ERR_PTR(-ECHILD); | ||
609 | if (!d_invalidate(dentry)) { | ||
610 | dput(dentry); | ||
611 | dentry = NULL; | ||
612 | } | ||
613 | return dentry; | ||
614 | } | ||
615 | |||
616 | static inline int need_reval_dot(struct dentry *dentry) | ||
617 | { | ||
618 | if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) | ||
619 | return 0; | ||
620 | |||
621 | if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) | ||
622 | return 0; | ||
623 | |||
624 | return 1; | ||
625 | } | ||
626 | |||
627 | /* | 609 | /* |
628 | * force_reval_path - force revalidation of a dentry | 610 | * handle_reval_path - force revalidation of a dentry |
629 | * | 611 | * |
630 | * In some situations the path walking code will trust dentries without | 612 | * In some situations the path walking code will trust dentries without |
631 | * revalidating them. This causes problems for filesystems that depend on | 613 | * revalidating them. This causes problems for filesystems that depend on |
@@ -639,27 +621,28 @@ static inline int need_reval_dot(struct dentry *dentry) | |||
639 | * invalidate the dentry. It's up to the caller to handle putting references | 621 | * invalidate the dentry. It's up to the caller to handle putting references |
640 | * to the path if necessary. | 622 | * to the path if necessary. |
641 | */ | 623 | */ |
642 | static int | 624 | static inline int handle_reval_path(struct nameidata *nd) |
643 | force_reval_path(struct path *path, struct nameidata *nd) | ||
644 | { | 625 | { |
626 | struct dentry *dentry = nd->path.dentry; | ||
645 | int status; | 627 | int status; |
646 | struct dentry *dentry = path->dentry; | ||
647 | 628 | ||
648 | /* | 629 | if (likely(!(nd->flags & LOOKUP_JUMPED))) |
649 | * only check on filesystems where it's possible for the dentry to | 630 | return 0; |
650 | * become stale. | 631 | |
651 | */ | 632 | if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) |
652 | if (!need_reval_dot(dentry)) | 633 | return 0; |
634 | |||
635 | if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) | ||
653 | return 0; | 636 | return 0; |
654 | 637 | ||
638 | /* Note: we do not d_invalidate() */ | ||
655 | status = d_revalidate(dentry, nd); | 639 | status = d_revalidate(dentry, nd); |
656 | if (status > 0) | 640 | if (status > 0) |
657 | return 0; | 641 | return 0; |
658 | 642 | ||
659 | if (!status) { | 643 | if (!status) |
660 | d_invalidate(dentry); | ||
661 | status = -ESTALE; | 644 | status = -ESTALE; |
662 | } | 645 | |
663 | return status; | 646 | return status; |
664 | } | 647 | } |
665 | 648 | ||
@@ -675,6 +658,7 @@ force_reval_path(struct path *path, struct nameidata *nd) | |||
675 | static inline int exec_permission(struct inode *inode, unsigned int flags) | 658 | static inline int exec_permission(struct inode *inode, unsigned int flags) |
676 | { | 659 | { |
677 | int ret; | 660 | int ret; |
661 | struct user_namespace *ns = inode_userns(inode); | ||
678 | 662 | ||
679 | if (inode->i_op->permission) { | 663 | if (inode->i_op->permission) { |
680 | ret = inode->i_op->permission(inode, MAY_EXEC, flags); | 664 | ret = inode->i_op->permission(inode, MAY_EXEC, flags); |
@@ -687,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags) | |||
687 | if (ret == -ECHILD) | 671 | if (ret == -ECHILD) |
688 | return ret; | 672 | return ret; |
689 | 673 | ||
690 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) | 674 | if (ns_capable(ns, CAP_DAC_OVERRIDE) || |
675 | ns_capable(ns, CAP_DAC_READ_SEARCH)) | ||
691 | goto ok; | 676 | goto ok; |
692 | 677 | ||
693 | return ret; | 678 | return ret; |
@@ -728,6 +713,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l | |||
728 | path_put(&nd->path); | 713 | path_put(&nd->path); |
729 | nd->path = nd->root; | 714 | nd->path = nd->root; |
730 | path_get(&nd->root); | 715 | path_get(&nd->root); |
716 | nd->flags |= LOOKUP_JUMPED; | ||
731 | } | 717 | } |
732 | nd->inode = nd->path.dentry->d_inode; | 718 | nd->inode = nd->path.dentry->d_inode; |
733 | 719 | ||
@@ -757,19 +743,42 @@ static inline void path_to_nameidata(const struct path *path, | |||
757 | nd->path.dentry = path->dentry; | 743 | nd->path.dentry = path->dentry; |
758 | } | 744 | } |
759 | 745 | ||
746 | static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) | ||
747 | { | ||
748 | struct inode *inode = link->dentry->d_inode; | ||
749 | if (!IS_ERR(cookie) && inode->i_op->put_link) | ||
750 | inode->i_op->put_link(link->dentry, nd, cookie); | ||
751 | path_put(link); | ||
752 | } | ||
753 | |||
760 | static __always_inline int | 754 | static __always_inline int |
761 | __do_follow_link(const struct path *link, struct nameidata *nd, void **p) | 755 | follow_link(struct path *link, struct nameidata *nd, void **p) |
762 | { | 756 | { |
763 | int error; | 757 | int error; |
764 | struct dentry *dentry = link->dentry; | 758 | struct dentry *dentry = link->dentry; |
765 | 759 | ||
766 | BUG_ON(nd->flags & LOOKUP_RCU); | 760 | BUG_ON(nd->flags & LOOKUP_RCU); |
767 | 761 | ||
762 | if (link->mnt == nd->path.mnt) | ||
763 | mntget(link->mnt); | ||
764 | |||
765 | if (unlikely(current->total_link_count >= 40)) { | ||
766 | *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ | ||
767 | path_put(&nd->path); | ||
768 | return -ELOOP; | ||
769 | } | ||
770 | cond_resched(); | ||
771 | current->total_link_count++; | ||
772 | |||
768 | touch_atime(link->mnt, dentry); | 773 | touch_atime(link->mnt, dentry); |
769 | nd_set_link(nd, NULL); | 774 | nd_set_link(nd, NULL); |
770 | 775 | ||
771 | if (link->mnt == nd->path.mnt) | 776 | error = security_inode_follow_link(link->dentry, nd); |
772 | mntget(link->mnt); | 777 | if (error) { |
778 | *p = ERR_PTR(error); /* no ->put_link(), please */ | ||
779 | path_put(&nd->path); | ||
780 | return error; | ||
781 | } | ||
773 | 782 | ||
774 | nd->last_type = LAST_BIND; | 783 | nd->last_type = LAST_BIND; |
775 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); | 784 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); |
@@ -780,56 +789,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) | |||
780 | if (s) | 789 | if (s) |
781 | error = __vfs_follow_link(nd, s); | 790 | error = __vfs_follow_link(nd, s); |
782 | else if (nd->last_type == LAST_BIND) { | 791 | else if (nd->last_type == LAST_BIND) { |
783 | error = force_reval_path(&nd->path, nd); | 792 | nd->flags |= LOOKUP_JUMPED; |
784 | if (error) | 793 | nd->inode = nd->path.dentry->d_inode; |
794 | if (nd->inode->i_op->follow_link) { | ||
795 | /* stepped on a _really_ weird one */ | ||
785 | path_put(&nd->path); | 796 | path_put(&nd->path); |
797 | error = -ELOOP; | ||
798 | } | ||
786 | } | 799 | } |
787 | } | 800 | } |
788 | return error; | 801 | return error; |
789 | } | 802 | } |
790 | 803 | ||
791 | /* | ||
792 | * This limits recursive symlink follows to 8, while | ||
793 | * limiting consecutive symlinks to 40. | ||
794 | * | ||
795 | * Without that kind of total limit, nasty chains of consecutive | ||
796 | * symlinks can cause almost arbitrarily long lookups. | ||
797 | */ | ||
798 | static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd) | ||
799 | { | ||
800 | void *cookie; | ||
801 | int err = -ELOOP; | ||
802 | |||
803 | /* We drop rcu-walk here */ | ||
804 | if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) | ||
805 | return -ECHILD; | ||
806 | BUG_ON(inode != path->dentry->d_inode); | ||
807 | |||
808 | if (current->link_count >= MAX_NESTED_LINKS) | ||
809 | goto loop; | ||
810 | if (current->total_link_count >= 40) | ||
811 | goto loop; | ||
812 | BUG_ON(nd->depth >= MAX_NESTED_LINKS); | ||
813 | cond_resched(); | ||
814 | err = security_inode_follow_link(path->dentry, nd); | ||
815 | if (err) | ||
816 | goto loop; | ||
817 | current->link_count++; | ||
818 | current->total_link_count++; | ||
819 | nd->depth++; | ||
820 | err = __do_follow_link(path, nd, &cookie); | ||
821 | if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link) | ||
822 | path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie); | ||
823 | path_put(path); | ||
824 | current->link_count--; | ||
825 | nd->depth--; | ||
826 | return err; | ||
827 | loop: | ||
828 | path_put_conditional(path, nd); | ||
829 | path_put(&nd->path); | ||
830 | return err; | ||
831 | } | ||
832 | |||
833 | static int follow_up_rcu(struct path *path) | 804 | static int follow_up_rcu(struct path *path) |
834 | { | 805 | { |
835 | struct vfsmount *parent; | 806 | struct vfsmount *parent; |
@@ -968,8 +939,7 @@ static int follow_managed(struct path *path, unsigned flags) | |||
968 | if (managed & DCACHE_MANAGE_TRANSIT) { | 939 | if (managed & DCACHE_MANAGE_TRANSIT) { |
969 | BUG_ON(!path->dentry->d_op); | 940 | BUG_ON(!path->dentry->d_op); |
970 | BUG_ON(!path->dentry->d_op->d_manage); | 941 | BUG_ON(!path->dentry->d_op->d_manage); |
971 | ret = path->dentry->d_op->d_manage(path->dentry, | 942 | ret = path->dentry->d_op->d_manage(path->dentry, false); |
972 | false, false); | ||
973 | if (ret < 0) | 943 | if (ret < 0) |
974 | return ret == -EISDIR ? 0 : ret; | 944 | return ret == -EISDIR ? 0 : ret; |
975 | } | 945 | } |
@@ -1022,6 +992,12 @@ int follow_down_one(struct path *path) | |||
1022 | return 0; | 992 | return 0; |
1023 | } | 993 | } |
1024 | 994 | ||
995 | static inline bool managed_dentry_might_block(struct dentry *dentry) | ||
996 | { | ||
997 | return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && | ||
998 | dentry->d_op->d_manage(dentry, true) < 0); | ||
999 | } | ||
1000 | |||
1025 | /* | 1001 | /* |
1026 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we | 1002 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we |
1027 | * meet a managed dentry and we're not walking to "..". True is returned to | 1003 | * meet a managed dentry and we're not walking to "..". True is returned to |
@@ -1030,19 +1006,26 @@ int follow_down_one(struct path *path) | |||
1030 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | 1006 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, |
1031 | struct inode **inode, bool reverse_transit) | 1007 | struct inode **inode, bool reverse_transit) |
1032 | { | 1008 | { |
1033 | while (d_mountpoint(path->dentry)) { | 1009 | for (;;) { |
1034 | struct vfsmount *mounted; | 1010 | struct vfsmount *mounted; |
1035 | if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | 1011 | /* |
1036 | !reverse_transit && | 1012 | * Don't forget we might have a non-mountpoint managed dentry |
1037 | path->dentry->d_op->d_manage(path->dentry, false, true) < 0) | 1013 | * that wants to block transit. |
1014 | */ | ||
1015 | *inode = path->dentry->d_inode; | ||
1016 | if (!reverse_transit && | ||
1017 | unlikely(managed_dentry_might_block(path->dentry))) | ||
1038 | return false; | 1018 | return false; |
1019 | |||
1020 | if (!d_mountpoint(path->dentry)) | ||
1021 | break; | ||
1022 | |||
1039 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | 1023 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); |
1040 | if (!mounted) | 1024 | if (!mounted) |
1041 | break; | 1025 | break; |
1042 | path->mnt = mounted; | 1026 | path->mnt = mounted; |
1043 | path->dentry = mounted->mnt_root; | 1027 | path->dentry = mounted->mnt_root; |
1044 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 1028 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); |
1045 | *inode = path->dentry->d_inode; | ||
1046 | } | 1029 | } |
1047 | 1030 | ||
1048 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | 1031 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) |
@@ -1068,7 +1051,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1068 | 1051 | ||
1069 | seq = read_seqcount_begin(&parent->d_seq); | 1052 | seq = read_seqcount_begin(&parent->d_seq); |
1070 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | 1053 | if (read_seqcount_retry(&old->d_seq, nd->seq)) |
1071 | return -ECHILD; | 1054 | goto failed; |
1072 | inode = parent->d_inode; | 1055 | inode = parent->d_inode; |
1073 | nd->path.dentry = parent; | 1056 | nd->path.dentry = parent; |
1074 | nd->seq = seq; | 1057 | nd->seq = seq; |
@@ -1081,8 +1064,15 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1081 | } | 1064 | } |
1082 | __follow_mount_rcu(nd, &nd->path, &inode, true); | 1065 | __follow_mount_rcu(nd, &nd->path, &inode, true); |
1083 | nd->inode = inode; | 1066 | nd->inode = inode; |
1084 | |||
1085 | return 0; | 1067 | return 0; |
1068 | |||
1069 | failed: | ||
1070 | nd->flags &= ~LOOKUP_RCU; | ||
1071 | if (!(nd->flags & LOOKUP_ROOT)) | ||
1072 | nd->root.mnt = NULL; | ||
1073 | rcu_read_unlock(); | ||
1074 | br_read_unlock(vfsmount_lock); | ||
1075 | return -ECHILD; | ||
1086 | } | 1076 | } |
1087 | 1077 | ||
1088 | /* | 1078 | /* |
@@ -1093,7 +1083,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1093 | * Care must be taken as namespace_sem may be held (indicated by mounting_here | 1083 | * Care must be taken as namespace_sem may be held (indicated by mounting_here |
1094 | * being true). | 1084 | * being true). |
1095 | */ | 1085 | */ |
1096 | int follow_down(struct path *path, bool mounting_here) | 1086 | int follow_down(struct path *path) |
1097 | { | 1087 | { |
1098 | unsigned managed; | 1088 | unsigned managed; |
1099 | int ret; | 1089 | int ret; |
@@ -1114,7 +1104,7 @@ int follow_down(struct path *path, bool mounting_here) | |||
1114 | BUG_ON(!path->dentry->d_op); | 1104 | BUG_ON(!path->dentry->d_op); |
1115 | BUG_ON(!path->dentry->d_op->d_manage); | 1105 | BUG_ON(!path->dentry->d_op->d_manage); |
1116 | ret = path->dentry->d_op->d_manage( | 1106 | ret = path->dentry->d_op->d_manage( |
1117 | path->dentry, mounting_here, false); | 1107 | path->dentry, false); |
1118 | if (ret < 0) | 1108 | if (ret < 0) |
1119 | return ret == -EISDIR ? 0 : ret; | 1109 | return ret == -EISDIR ? 0 : ret; |
1120 | } | 1110 | } |
@@ -1216,68 +1206,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1216 | { | 1206 | { |
1217 | struct vfsmount *mnt = nd->path.mnt; | 1207 | struct vfsmount *mnt = nd->path.mnt; |
1218 | struct dentry *dentry, *parent = nd->path.dentry; | 1208 | struct dentry *dentry, *parent = nd->path.dentry; |
1219 | struct inode *dir; | 1209 | int need_reval = 1; |
1210 | int status = 1; | ||
1220 | int err; | 1211 | int err; |
1221 | 1212 | ||
1222 | /* | 1213 | /* |
1223 | * See if the low-level filesystem might want | ||
1224 | * to use its own hash.. | ||
1225 | */ | ||
1226 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | ||
1227 | err = parent->d_op->d_hash(parent, nd->inode, name); | ||
1228 | if (err < 0) | ||
1229 | return err; | ||
1230 | } | ||
1231 | |||
1232 | /* | ||
1233 | * Rename seqlock is not required here because in the off chance | 1214 | * Rename seqlock is not required here because in the off chance |
1234 | * of a false negative due to a concurrent rename, we're going to | 1215 | * of a false negative due to a concurrent rename, we're going to |
1235 | * do the non-racy lookup, below. | 1216 | * do the non-racy lookup, below. |
1236 | */ | 1217 | */ |
1237 | if (nd->flags & LOOKUP_RCU) { | 1218 | if (nd->flags & LOOKUP_RCU) { |
1238 | unsigned seq; | 1219 | unsigned seq; |
1239 | |||
1240 | *inode = nd->inode; | 1220 | *inode = nd->inode; |
1241 | dentry = __d_lookup_rcu(parent, name, &seq, inode); | 1221 | dentry = __d_lookup_rcu(parent, name, &seq, inode); |
1242 | if (!dentry) { | 1222 | if (!dentry) |
1243 | if (nameidata_drop_rcu(nd)) | 1223 | goto unlazy; |
1244 | return -ECHILD; | 1224 | |
1245 | goto need_lookup; | ||
1246 | } | ||
1247 | /* Memory barrier in read_seqcount_begin of child is enough */ | 1225 | /* Memory barrier in read_seqcount_begin of child is enough */ |
1248 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | 1226 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) |
1249 | return -ECHILD; | 1227 | return -ECHILD; |
1250 | |||
1251 | nd->seq = seq; | 1228 | nd->seq = seq; |
1229 | |||
1252 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { | 1230 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { |
1253 | dentry = do_revalidate_rcu(dentry, nd); | 1231 | status = d_revalidate(dentry, nd); |
1254 | if (!dentry) | 1232 | if (unlikely(status <= 0)) { |
1255 | goto need_lookup; | 1233 | if (status != -ECHILD) |
1256 | if (IS_ERR(dentry)) | 1234 | need_reval = 0; |
1257 | goto fail; | 1235 | goto unlazy; |
1258 | if (!(nd->flags & LOOKUP_RCU)) | 1236 | } |
1259 | goto done; | ||
1260 | } | 1237 | } |
1261 | path->mnt = mnt; | 1238 | path->mnt = mnt; |
1262 | path->dentry = dentry; | 1239 | path->dentry = dentry; |
1263 | if (likely(__follow_mount_rcu(nd, path, inode, false))) | 1240 | if (likely(__follow_mount_rcu(nd, path, inode, false))) |
1264 | return 0; | 1241 | return 0; |
1265 | if (nameidata_drop_rcu(nd)) | 1242 | unlazy: |
1266 | return -ECHILD; | 1243 | if (dentry) { |
1267 | /* fallthru */ | 1244 | if (nameidata_dentry_drop_rcu(nd, dentry)) |
1245 | return -ECHILD; | ||
1246 | } else { | ||
1247 | if (nameidata_drop_rcu(nd)) | ||
1248 | return -ECHILD; | ||
1249 | } | ||
1250 | } else { | ||
1251 | dentry = __d_lookup(parent, name); | ||
1268 | } | 1252 | } |
1269 | dentry = __d_lookup(parent, name); | 1253 | |
1270 | if (!dentry) | 1254 | retry: |
1271 | goto need_lookup; | 1255 | if (unlikely(!dentry)) { |
1272 | found: | 1256 | struct inode *dir = parent->d_inode; |
1273 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { | 1257 | BUG_ON(nd->inode != dir); |
1274 | dentry = do_revalidate(dentry, nd); | 1258 | |
1275 | if (!dentry) | 1259 | mutex_lock(&dir->i_mutex); |
1276 | goto need_lookup; | 1260 | dentry = d_lookup(parent, name); |
1277 | if (IS_ERR(dentry)) | 1261 | if (likely(!dentry)) { |
1278 | goto fail; | 1262 | dentry = d_alloc_and_lookup(parent, name, nd); |
1263 | if (IS_ERR(dentry)) { | ||
1264 | mutex_unlock(&dir->i_mutex); | ||
1265 | return PTR_ERR(dentry); | ||
1266 | } | ||
1267 | /* known good */ | ||
1268 | need_reval = 0; | ||
1269 | status = 1; | ||
1270 | } | ||
1271 | mutex_unlock(&dir->i_mutex); | ||
1279 | } | 1272 | } |
1280 | done: | 1273 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) |
1274 | status = d_revalidate(dentry, nd); | ||
1275 | if (unlikely(status <= 0)) { | ||
1276 | if (status < 0) { | ||
1277 | dput(dentry); | ||
1278 | return status; | ||
1279 | } | ||
1280 | if (!d_invalidate(dentry)) { | ||
1281 | dput(dentry); | ||
1282 | dentry = NULL; | ||
1283 | need_reval = 1; | ||
1284 | goto retry; | ||
1285 | } | ||
1286 | } | ||
1287 | |||
1281 | path->mnt = mnt; | 1288 | path->mnt = mnt; |
1282 | path->dentry = dentry; | 1289 | path->dentry = dentry; |
1283 | err = follow_managed(path, nd->flags); | 1290 | err = follow_managed(path, nd->flags); |
@@ -1287,39 +1294,113 @@ done: | |||
1287 | } | 1294 | } |
1288 | *inode = path->dentry->d_inode; | 1295 | *inode = path->dentry->d_inode; |
1289 | return 0; | 1296 | return 0; |
1297 | } | ||
1290 | 1298 | ||
1291 | need_lookup: | 1299 | static inline int may_lookup(struct nameidata *nd) |
1292 | dir = parent->d_inode; | 1300 | { |
1293 | BUG_ON(nd->inode != dir); | 1301 | if (nd->flags & LOOKUP_RCU) { |
1302 | int err = exec_permission(nd->inode, IPERM_FLAG_RCU); | ||
1303 | if (err != -ECHILD) | ||
1304 | return err; | ||
1305 | if (nameidata_drop_rcu(nd)) | ||
1306 | return -ECHILD; | ||
1307 | } | ||
1308 | return exec_permission(nd->inode, 0); | ||
1309 | } | ||
1294 | 1310 | ||
1295 | mutex_lock(&dir->i_mutex); | 1311 | static inline int handle_dots(struct nameidata *nd, int type) |
1296 | /* | 1312 | { |
1297 | * First re-do the cached lookup just in case it was created | 1313 | if (type == LAST_DOTDOT) { |
1298 | * while we waited for the directory semaphore, or the first | 1314 | if (nd->flags & LOOKUP_RCU) { |
1299 | * lookup failed due to an unrelated rename. | 1315 | if (follow_dotdot_rcu(nd)) |
1300 | * | 1316 | return -ECHILD; |
1301 | * This could use version numbering or similar to avoid unnecessary | 1317 | } else |
1302 | * cache lookups, but then we'd have to do the first lookup in the | 1318 | follow_dotdot(nd); |
1303 | * non-racy way. However in the common case here, everything should | 1319 | } |
1304 | * be hot in cache, so would it be a big win? | 1320 | return 0; |
1305 | */ | 1321 | } |
1306 | dentry = d_lookup(parent, name); | 1322 | |
1307 | if (likely(!dentry)) { | 1323 | static void terminate_walk(struct nameidata *nd) |
1308 | dentry = d_alloc_and_lookup(parent, name, nd); | 1324 | { |
1309 | mutex_unlock(&dir->i_mutex); | 1325 | if (!(nd->flags & LOOKUP_RCU)) { |
1310 | if (IS_ERR(dentry)) | 1326 | path_put(&nd->path); |
1311 | goto fail; | 1327 | } else { |
1312 | goto done; | 1328 | nd->flags &= ~LOOKUP_RCU; |
1329 | if (!(nd->flags & LOOKUP_ROOT)) | ||
1330 | nd->root.mnt = NULL; | ||
1331 | rcu_read_unlock(); | ||
1332 | br_read_unlock(vfsmount_lock); | ||
1313 | } | 1333 | } |
1334 | } | ||
1335 | |||
1336 | static inline int walk_component(struct nameidata *nd, struct path *path, | ||
1337 | struct qstr *name, int type, int follow) | ||
1338 | { | ||
1339 | struct inode *inode; | ||
1340 | int err; | ||
1314 | /* | 1341 | /* |
1315 | * Uhhuh! Nasty case: the cache was re-populated while | 1342 | * "." and ".." are special - ".." especially so because it has |
1316 | * we waited on the semaphore. Need to revalidate. | 1343 | * to be able to know about the current root directory and |
1344 | * parent relationships. | ||
1317 | */ | 1345 | */ |
1318 | mutex_unlock(&dir->i_mutex); | 1346 | if (unlikely(type != LAST_NORM)) |
1319 | goto found; | 1347 | return handle_dots(nd, type); |
1348 | err = do_lookup(nd, name, path, &inode); | ||
1349 | if (unlikely(err)) { | ||
1350 | terminate_walk(nd); | ||
1351 | return err; | ||
1352 | } | ||
1353 | if (!inode) { | ||
1354 | path_to_nameidata(path, nd); | ||
1355 | terminate_walk(nd); | ||
1356 | return -ENOENT; | ||
1357 | } | ||
1358 | if (unlikely(inode->i_op->follow_link) && follow) { | ||
1359 | if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) | ||
1360 | return -ECHILD; | ||
1361 | BUG_ON(inode != path->dentry->d_inode); | ||
1362 | return 1; | ||
1363 | } | ||
1364 | path_to_nameidata(path, nd); | ||
1365 | nd->inode = inode; | ||
1366 | return 0; | ||
1367 | } | ||
1320 | 1368 | ||
1321 | fail: | 1369 | /* |
1322 | return PTR_ERR(dentry); | 1370 | * This limits recursive symlink follows to 8, while |
1371 | * limiting consecutive symlinks to 40. | ||
1372 | * | ||
1373 | * Without that kind of total limit, nasty chains of consecutive | ||
1374 | * symlinks can cause almost arbitrarily long lookups. | ||
1375 | */ | ||
1376 | static inline int nested_symlink(struct path *path, struct nameidata *nd) | ||
1377 | { | ||
1378 | int res; | ||
1379 | |||
1380 | BUG_ON(nd->depth >= MAX_NESTED_LINKS); | ||
1381 | if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { | ||
1382 | path_put_conditional(path, nd); | ||
1383 | path_put(&nd->path); | ||
1384 | return -ELOOP; | ||
1385 | } | ||
1386 | |||
1387 | nd->depth++; | ||
1388 | current->link_count++; | ||
1389 | |||
1390 | do { | ||
1391 | struct path link = *path; | ||
1392 | void *cookie; | ||
1393 | |||
1394 | res = follow_link(&link, nd, &cookie); | ||
1395 | if (!res) | ||
1396 | res = walk_component(nd, path, &nd->last, | ||
1397 | nd->last_type, LOOKUP_FOLLOW); | ||
1398 | put_link(nd, &link, cookie); | ||
1399 | } while (res > 0); | ||
1400 | |||
1401 | current->link_count--; | ||
1402 | nd->depth--; | ||
1403 | return res; | ||
1323 | } | 1404 | } |
1324 | 1405 | ||
1325 | /* | 1406 | /* |
@@ -1339,30 +1420,18 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1339 | while (*name=='/') | 1420 | while (*name=='/') |
1340 | name++; | 1421 | name++; |
1341 | if (!*name) | 1422 | if (!*name) |
1342 | goto return_reval; | 1423 | return 0; |
1343 | |||
1344 | if (nd->depth) | ||
1345 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); | ||
1346 | 1424 | ||
1347 | /* At this point we know we have a real path component. */ | 1425 | /* At this point we know we have a real path component. */ |
1348 | for(;;) { | 1426 | for(;;) { |
1349 | struct inode *inode; | ||
1350 | unsigned long hash; | 1427 | unsigned long hash; |
1351 | struct qstr this; | 1428 | struct qstr this; |
1352 | unsigned int c; | 1429 | unsigned int c; |
1430 | int type; | ||
1353 | 1431 | ||
1354 | nd->flags |= LOOKUP_CONTINUE; | 1432 | nd->flags |= LOOKUP_CONTINUE; |
1355 | if (nd->flags & LOOKUP_RCU) { | 1433 | |
1356 | err = exec_permission(nd->inode, IPERM_FLAG_RCU); | 1434 | err = may_lookup(nd); |
1357 | if (err == -ECHILD) { | ||
1358 | if (nameidata_drop_rcu(nd)) | ||
1359 | return -ECHILD; | ||
1360 | goto exec_again; | ||
1361 | } | ||
1362 | } else { | ||
1363 | exec_again: | ||
1364 | err = exec_permission(nd->inode, 0); | ||
1365 | } | ||
1366 | if (err) | 1435 | if (err) |
1367 | break; | 1436 | break; |
1368 | 1437 | ||
@@ -1378,52 +1447,43 @@ exec_again: | |||
1378 | this.len = name - (const char *) this.name; | 1447 | this.len = name - (const char *) this.name; |
1379 | this.hash = end_name_hash(hash); | 1448 | this.hash = end_name_hash(hash); |
1380 | 1449 | ||
1450 | type = LAST_NORM; | ||
1451 | if (this.name[0] == '.') switch (this.len) { | ||
1452 | case 2: | ||
1453 | if (this.name[1] == '.') { | ||
1454 | type = LAST_DOTDOT; | ||
1455 | nd->flags |= LOOKUP_JUMPED; | ||
1456 | } | ||
1457 | break; | ||
1458 | case 1: | ||
1459 | type = LAST_DOT; | ||
1460 | } | ||
1461 | if (likely(type == LAST_NORM)) { | ||
1462 | struct dentry *parent = nd->path.dentry; | ||
1463 | nd->flags &= ~LOOKUP_JUMPED; | ||
1464 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | ||
1465 | err = parent->d_op->d_hash(parent, nd->inode, | ||
1466 | &this); | ||
1467 | if (err < 0) | ||
1468 | break; | ||
1469 | } | ||
1470 | } | ||
1471 | |||
1381 | /* remove trailing slashes? */ | 1472 | /* remove trailing slashes? */ |
1382 | if (!c) | 1473 | if (!c) |
1383 | goto last_component; | 1474 | goto last_component; |
1384 | while (*++name == '/'); | 1475 | while (*++name == '/'); |
1385 | if (!*name) | 1476 | if (!*name) |
1386 | goto last_with_slashes; | 1477 | goto last_component; |
1387 | 1478 | ||
1388 | /* | 1479 | err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); |
1389 | * "." and ".." are special - ".." especially so because it has | 1480 | if (err < 0) |
1390 | * to be able to know about the current root directory and | 1481 | return err; |
1391 | * parent relationships. | ||
1392 | */ | ||
1393 | if (this.name[0] == '.') switch (this.len) { | ||
1394 | default: | ||
1395 | break; | ||
1396 | case 2: | ||
1397 | if (this.name[1] != '.') | ||
1398 | break; | ||
1399 | if (nd->flags & LOOKUP_RCU) { | ||
1400 | if (follow_dotdot_rcu(nd)) | ||
1401 | return -ECHILD; | ||
1402 | } else | ||
1403 | follow_dotdot(nd); | ||
1404 | /* fallthrough */ | ||
1405 | case 1: | ||
1406 | continue; | ||
1407 | } | ||
1408 | /* This does the actual lookups.. */ | ||
1409 | err = do_lookup(nd, &this, &next, &inode); | ||
1410 | if (err) | ||
1411 | break; | ||
1412 | err = -ENOENT; | ||
1413 | if (!inode) | ||
1414 | goto out_dput; | ||
1415 | 1482 | ||
1416 | if (inode->i_op->follow_link) { | 1483 | if (err) { |
1417 | err = do_follow_link(inode, &next, nd); | 1484 | err = nested_symlink(&next, nd); |
1418 | if (err) | 1485 | if (err) |
1419 | goto return_err; | 1486 | return err; |
1420 | nd->inode = nd->path.dentry->d_inode; | ||
1421 | err = -ENOENT; | ||
1422 | if (!nd->inode) | ||
1423 | break; | ||
1424 | } else { | ||
1425 | path_to_nameidata(&next, nd); | ||
1426 | nd->inode = inode; | ||
1427 | } | 1487 | } |
1428 | err = -ENOTDIR; | 1488 | err = -ENOTDIR; |
1429 | if (!nd->inode->i_op->lookup) | 1489 | if (!nd->inode->i_op->lookup) |
@@ -1431,210 +1491,109 @@ exec_again: | |||
1431 | continue; | 1491 | continue; |
1432 | /* here ends the main loop */ | 1492 | /* here ends the main loop */ |
1433 | 1493 | ||
1434 | last_with_slashes: | ||
1435 | lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | ||
1436 | last_component: | 1494 | last_component: |
1437 | /* Clear LOOKUP_CONTINUE iff it was previously unset */ | 1495 | /* Clear LOOKUP_CONTINUE iff it was previously unset */ |
1438 | nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; | 1496 | nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; |
1439 | if (lookup_flags & LOOKUP_PARENT) | ||
1440 | goto lookup_parent; | ||
1441 | if (this.name[0] == '.') switch (this.len) { | ||
1442 | default: | ||
1443 | break; | ||
1444 | case 2: | ||
1445 | if (this.name[1] != '.') | ||
1446 | break; | ||
1447 | if (nd->flags & LOOKUP_RCU) { | ||
1448 | if (follow_dotdot_rcu(nd)) | ||
1449 | return -ECHILD; | ||
1450 | } else | ||
1451 | follow_dotdot(nd); | ||
1452 | /* fallthrough */ | ||
1453 | case 1: | ||
1454 | goto return_reval; | ||
1455 | } | ||
1456 | err = do_lookup(nd, &this, &next, &inode); | ||
1457 | if (err) | ||
1458 | break; | ||
1459 | if (inode && unlikely(inode->i_op->follow_link) && | ||
1460 | (lookup_flags & LOOKUP_FOLLOW)) { | ||
1461 | err = do_follow_link(inode, &next, nd); | ||
1462 | if (err) | ||
1463 | goto return_err; | ||
1464 | nd->inode = nd->path.dentry->d_inode; | ||
1465 | } else { | ||
1466 | path_to_nameidata(&next, nd); | ||
1467 | nd->inode = inode; | ||
1468 | } | ||
1469 | err = -ENOENT; | ||
1470 | if (!nd->inode) | ||
1471 | break; | ||
1472 | if (lookup_flags & LOOKUP_DIRECTORY) { | ||
1473 | err = -ENOTDIR; | ||
1474 | if (!nd->inode->i_op->lookup) | ||
1475 | break; | ||
1476 | } | ||
1477 | goto return_base; | ||
1478 | lookup_parent: | ||
1479 | nd->last = this; | 1497 | nd->last = this; |
1480 | nd->last_type = LAST_NORM; | 1498 | nd->last_type = type; |
1481 | if (this.name[0] != '.') | ||
1482 | goto return_base; | ||
1483 | if (this.len == 1) | ||
1484 | nd->last_type = LAST_DOT; | ||
1485 | else if (this.len == 2 && this.name[1] == '.') | ||
1486 | nd->last_type = LAST_DOTDOT; | ||
1487 | else | ||
1488 | goto return_base; | ||
1489 | return_reval: | ||
1490 | /* | ||
1491 | * We bypassed the ordinary revalidation routines. | ||
1492 | * We may need to check the cached dentry for staleness. | ||
1493 | */ | ||
1494 | if (need_reval_dot(nd->path.dentry)) { | ||
1495 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1496 | return -ECHILD; | ||
1497 | /* Note: we do not d_invalidate() */ | ||
1498 | err = d_revalidate(nd->path.dentry, nd); | ||
1499 | if (!err) | ||
1500 | err = -ESTALE; | ||
1501 | if (err < 0) | ||
1502 | break; | ||
1503 | return 0; | ||
1504 | } | ||
1505 | return_base: | ||
1506 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1507 | return -ECHILD; | ||
1508 | return 0; | 1499 | return 0; |
1509 | out_dput: | ||
1510 | if (!(nd->flags & LOOKUP_RCU)) | ||
1511 | path_put_conditional(&next, nd); | ||
1512 | break; | ||
1513 | } | 1500 | } |
1514 | if (!(nd->flags & LOOKUP_RCU)) | 1501 | terminate_walk(nd); |
1515 | path_put(&nd->path); | ||
1516 | return_err: | ||
1517 | return err; | 1502 | return err; |
1518 | } | 1503 | } |
1519 | 1504 | ||
1520 | static inline int path_walk_rcu(const char *name, struct nameidata *nd) | 1505 | static int path_init(int dfd, const char *name, unsigned int flags, |
1521 | { | 1506 | struct nameidata *nd, struct file **fp) |
1522 | current->total_link_count = 0; | ||
1523 | |||
1524 | return link_path_walk(name, nd); | ||
1525 | } | ||
1526 | |||
1527 | static inline int path_walk_simple(const char *name, struct nameidata *nd) | ||
1528 | { | ||
1529 | current->total_link_count = 0; | ||
1530 | |||
1531 | return link_path_walk(name, nd); | ||
1532 | } | ||
1533 | |||
1534 | static int path_walk(const char *name, struct nameidata *nd) | ||
1535 | { | ||
1536 | struct path save = nd->path; | ||
1537 | int result; | ||
1538 | |||
1539 | current->total_link_count = 0; | ||
1540 | |||
1541 | /* make sure the stuff we saved doesn't go away */ | ||
1542 | path_get(&save); | ||
1543 | |||
1544 | result = link_path_walk(name, nd); | ||
1545 | if (result == -ESTALE) { | ||
1546 | /* nd->path had been dropped */ | ||
1547 | current->total_link_count = 0; | ||
1548 | nd->path = save; | ||
1549 | nd->inode = save.dentry->d_inode; | ||
1550 | path_get(&nd->path); | ||
1551 | nd->flags |= LOOKUP_REVAL; | ||
1552 | result = link_path_walk(name, nd); | ||
1553 | } | ||
1554 | |||
1555 | path_put(&save); | ||
1556 | |||
1557 | return result; | ||
1558 | } | ||
1559 | |||
1560 | static void path_finish_rcu(struct nameidata *nd) | ||
1561 | { | ||
1562 | if (nd->flags & LOOKUP_RCU) { | ||
1563 | /* RCU dangling. Cancel it. */ | ||
1564 | nd->flags &= ~LOOKUP_RCU; | ||
1565 | nd->root.mnt = NULL; | ||
1566 | rcu_read_unlock(); | ||
1567 | br_read_unlock(vfsmount_lock); | ||
1568 | } | ||
1569 | if (nd->file) | ||
1570 | fput(nd->file); | ||
1571 | } | ||
1572 | |||
1573 | static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | ||
1574 | { | 1507 | { |
1575 | int retval = 0; | 1508 | int retval = 0; |
1576 | int fput_needed; | 1509 | int fput_needed; |
1577 | struct file *file; | 1510 | struct file *file; |
1578 | 1511 | ||
1579 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1512 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
1580 | nd->flags = flags | LOOKUP_RCU; | 1513 | nd->flags = flags | LOOKUP_JUMPED; |
1581 | nd->depth = 0; | 1514 | nd->depth = 0; |
1515 | if (flags & LOOKUP_ROOT) { | ||
1516 | struct inode *inode = nd->root.dentry->d_inode; | ||
1517 | if (*name) { | ||
1518 | if (!inode->i_op->lookup) | ||
1519 | return -ENOTDIR; | ||
1520 | retval = inode_permission(inode, MAY_EXEC); | ||
1521 | if (retval) | ||
1522 | return retval; | ||
1523 | } | ||
1524 | nd->path = nd->root; | ||
1525 | nd->inode = inode; | ||
1526 | if (flags & LOOKUP_RCU) { | ||
1527 | br_read_lock(vfsmount_lock); | ||
1528 | rcu_read_lock(); | ||
1529 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1530 | } else { | ||
1531 | path_get(&nd->path); | ||
1532 | } | ||
1533 | return 0; | ||
1534 | } | ||
1535 | |||
1582 | nd->root.mnt = NULL; | 1536 | nd->root.mnt = NULL; |
1583 | nd->file = NULL; | ||
1584 | 1537 | ||
1585 | if (*name=='/') { | 1538 | if (*name=='/') { |
1586 | struct fs_struct *fs = current->fs; | 1539 | if (flags & LOOKUP_RCU) { |
1587 | unsigned seq; | 1540 | br_read_lock(vfsmount_lock); |
1588 | 1541 | rcu_read_lock(); | |
1589 | br_read_lock(vfsmount_lock); | 1542 | set_root_rcu(nd); |
1590 | rcu_read_lock(); | 1543 | } else { |
1591 | 1544 | set_root(nd); | |
1592 | do { | 1545 | path_get(&nd->root); |
1593 | seq = read_seqcount_begin(&fs->seq); | 1546 | } |
1594 | nd->root = fs->root; | 1547 | nd->path = nd->root; |
1595 | nd->path = nd->root; | ||
1596 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1597 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1598 | |||
1599 | } else if (dfd == AT_FDCWD) { | 1548 | } else if (dfd == AT_FDCWD) { |
1600 | struct fs_struct *fs = current->fs; | 1549 | if (flags & LOOKUP_RCU) { |
1601 | unsigned seq; | 1550 | struct fs_struct *fs = current->fs; |
1602 | 1551 | unsigned seq; | |
1603 | br_read_lock(vfsmount_lock); | ||
1604 | rcu_read_lock(); | ||
1605 | 1552 | ||
1606 | do { | 1553 | br_read_lock(vfsmount_lock); |
1607 | seq = read_seqcount_begin(&fs->seq); | 1554 | rcu_read_lock(); |
1608 | nd->path = fs->pwd; | ||
1609 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1610 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1611 | 1555 | ||
1556 | do { | ||
1557 | seq = read_seqcount_begin(&fs->seq); | ||
1558 | nd->path = fs->pwd; | ||
1559 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1560 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1561 | } else { | ||
1562 | get_fs_pwd(current->fs, &nd->path); | ||
1563 | } | ||
1612 | } else { | 1564 | } else { |
1613 | struct dentry *dentry; | 1565 | struct dentry *dentry; |
1614 | 1566 | ||
1615 | file = fget_light(dfd, &fput_needed); | 1567 | file = fget_raw_light(dfd, &fput_needed); |
1616 | retval = -EBADF; | 1568 | retval = -EBADF; |
1617 | if (!file) | 1569 | if (!file) |
1618 | goto out_fail; | 1570 | goto out_fail; |
1619 | 1571 | ||
1620 | dentry = file->f_path.dentry; | 1572 | dentry = file->f_path.dentry; |
1621 | 1573 | ||
1622 | retval = -ENOTDIR; | 1574 | if (*name) { |
1623 | if (!S_ISDIR(dentry->d_inode->i_mode)) | 1575 | retval = -ENOTDIR; |
1624 | goto fput_fail; | 1576 | if (!S_ISDIR(dentry->d_inode->i_mode)) |
1577 | goto fput_fail; | ||
1625 | 1578 | ||
1626 | retval = file_permission(file, MAY_EXEC); | 1579 | retval = file_permission(file, MAY_EXEC); |
1627 | if (retval) | 1580 | if (retval) |
1628 | goto fput_fail; | 1581 | goto fput_fail; |
1582 | } | ||
1629 | 1583 | ||
1630 | nd->path = file->f_path; | 1584 | nd->path = file->f_path; |
1631 | if (fput_needed) | 1585 | if (flags & LOOKUP_RCU) { |
1632 | nd->file = file; | 1586 | if (fput_needed) |
1633 | 1587 | *fp = file; | |
1634 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1588 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1635 | br_read_lock(vfsmount_lock); | 1589 | br_read_lock(vfsmount_lock); |
1636 | rcu_read_lock(); | 1590 | rcu_read_lock(); |
1591 | } else { | ||
1592 | path_get(&file->f_path); | ||
1593 | fput_light(file, fput_needed); | ||
1594 | } | ||
1637 | } | 1595 | } |
1596 | |||
1638 | nd->inode = nd->path.dentry->d_inode; | 1597 | nd->inode = nd->path.dentry->d_inode; |
1639 | return 0; | 1598 | return 0; |
1640 | 1599 | ||
@@ -1644,60 +1603,23 @@ out_fail: | |||
1644 | return retval; | 1603 | return retval; |
1645 | } | 1604 | } |
1646 | 1605 | ||
1647 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | 1606 | static inline int lookup_last(struct nameidata *nd, struct path *path) |
1648 | { | 1607 | { |
1649 | int retval = 0; | 1608 | if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) |
1650 | int fput_needed; | 1609 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
1651 | struct file *file; | ||
1652 | |||
1653 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | ||
1654 | nd->flags = flags; | ||
1655 | nd->depth = 0; | ||
1656 | nd->root.mnt = NULL; | ||
1657 | 1610 | ||
1658 | if (*name=='/') { | 1611 | nd->flags &= ~LOOKUP_PARENT; |
1659 | set_root(nd); | 1612 | return walk_component(nd, path, &nd->last, nd->last_type, |
1660 | nd->path = nd->root; | 1613 | nd->flags & LOOKUP_FOLLOW); |
1661 | path_get(&nd->root); | ||
1662 | } else if (dfd == AT_FDCWD) { | ||
1663 | get_fs_pwd(current->fs, &nd->path); | ||
1664 | } else { | ||
1665 | struct dentry *dentry; | ||
1666 | |||
1667 | file = fget_light(dfd, &fput_needed); | ||
1668 | retval = -EBADF; | ||
1669 | if (!file) | ||
1670 | goto out_fail; | ||
1671 | |||
1672 | dentry = file->f_path.dentry; | ||
1673 | |||
1674 | retval = -ENOTDIR; | ||
1675 | if (!S_ISDIR(dentry->d_inode->i_mode)) | ||
1676 | goto fput_fail; | ||
1677 | |||
1678 | retval = file_permission(file, MAY_EXEC); | ||
1679 | if (retval) | ||
1680 | goto fput_fail; | ||
1681 | |||
1682 | nd->path = file->f_path; | ||
1683 | path_get(&file->f_path); | ||
1684 | |||
1685 | fput_light(file, fput_needed); | ||
1686 | } | ||
1687 | nd->inode = nd->path.dentry->d_inode; | ||
1688 | return 0; | ||
1689 | |||
1690 | fput_fail: | ||
1691 | fput_light(file, fput_needed); | ||
1692 | out_fail: | ||
1693 | return retval; | ||
1694 | } | 1614 | } |
1695 | 1615 | ||
1696 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ | 1616 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ |
1697 | static int do_path_lookup(int dfd, const char *name, | 1617 | static int path_lookupat(int dfd, const char *name, |
1698 | unsigned int flags, struct nameidata *nd) | 1618 | unsigned int flags, struct nameidata *nd) |
1699 | { | 1619 | { |
1700 | int retval; | 1620 | struct file *base = NULL; |
1621 | struct path path; | ||
1622 | int err; | ||
1701 | 1623 | ||
1702 | /* | 1624 | /* |
1703 | * Path walking is largely split up into 2 different synchronisation | 1625 | * Path walking is largely split up into 2 different synchronisation |
@@ -1713,44 +1635,78 @@ static int do_path_lookup(int dfd, const char *name, | |||
1713 | * be handled by restarting a traditional ref-walk (which will always | 1635 | * be handled by restarting a traditional ref-walk (which will always |
1714 | * be able to complete). | 1636 | * be able to complete). |
1715 | */ | 1637 | */ |
1716 | retval = path_init_rcu(dfd, name, flags, nd); | 1638 | err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); |
1717 | if (unlikely(retval)) | 1639 | |
1718 | return retval; | 1640 | if (unlikely(err)) |
1719 | retval = path_walk_rcu(name, nd); | 1641 | return err; |
1720 | path_finish_rcu(nd); | 1642 | |
1721 | if (nd->root.mnt) { | 1643 | current->total_link_count = 0; |
1722 | path_put(&nd->root); | 1644 | err = link_path_walk(name, nd); |
1723 | nd->root.mnt = NULL; | 1645 | |
1646 | if (!err && !(flags & LOOKUP_PARENT)) { | ||
1647 | err = lookup_last(nd, &path); | ||
1648 | while (err > 0) { | ||
1649 | void *cookie; | ||
1650 | struct path link = path; | ||
1651 | nd->flags |= LOOKUP_PARENT; | ||
1652 | err = follow_link(&link, nd, &cookie); | ||
1653 | if (!err) | ||
1654 | err = lookup_last(nd, &path); | ||
1655 | put_link(nd, &link, cookie); | ||
1656 | } | ||
1724 | } | 1657 | } |
1725 | 1658 | ||
1726 | if (unlikely(retval == -ECHILD || retval == -ESTALE)) { | 1659 | if (nd->flags & LOOKUP_RCU) { |
1727 | /* slower, locked walk */ | 1660 | /* went all way through without dropping RCU */ |
1728 | if (retval == -ESTALE) | 1661 | BUG_ON(err); |
1729 | flags |= LOOKUP_REVAL; | 1662 | if (nameidata_drop_rcu_last(nd)) |
1730 | retval = path_init(dfd, name, flags, nd); | 1663 | err = -ECHILD; |
1731 | if (unlikely(retval)) | 1664 | } |
1732 | return retval; | 1665 | |
1733 | retval = path_walk(name, nd); | 1666 | if (!err) { |
1734 | if (nd->root.mnt) { | 1667 | err = handle_reval_path(nd); |
1735 | path_put(&nd->root); | 1668 | if (err) |
1736 | nd->root.mnt = NULL; | 1669 | path_put(&nd->path); |
1670 | } | ||
1671 | |||
1672 | if (!err && nd->flags & LOOKUP_DIRECTORY) { | ||
1673 | if (!nd->inode->i_op->lookup) { | ||
1674 | path_put(&nd->path); | ||
1675 | err = -ENOTDIR; | ||
1737 | } | 1676 | } |
1738 | } | 1677 | } |
1739 | 1678 | ||
1679 | if (base) | ||
1680 | fput(base); | ||
1681 | |||
1682 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { | ||
1683 | path_put(&nd->root); | ||
1684 | nd->root.mnt = NULL; | ||
1685 | } | ||
1686 | return err; | ||
1687 | } | ||
1688 | |||
1689 | static int do_path_lookup(int dfd, const char *name, | ||
1690 | unsigned int flags, struct nameidata *nd) | ||
1691 | { | ||
1692 | int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); | ||
1693 | if (unlikely(retval == -ECHILD)) | ||
1694 | retval = path_lookupat(dfd, name, flags, nd); | ||
1695 | if (unlikely(retval == -ESTALE)) | ||
1696 | retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); | ||
1697 | |||
1740 | if (likely(!retval)) { | 1698 | if (likely(!retval)) { |
1741 | if (unlikely(!audit_dummy_context())) { | 1699 | if (unlikely(!audit_dummy_context())) { |
1742 | if (nd->path.dentry && nd->inode) | 1700 | if (nd->path.dentry && nd->inode) |
1743 | audit_inode(name, nd->path.dentry); | 1701 | audit_inode(name, nd->path.dentry); |
1744 | } | 1702 | } |
1745 | } | 1703 | } |
1746 | |||
1747 | return retval; | 1704 | return retval; |
1748 | } | 1705 | } |
1749 | 1706 | ||
1750 | int path_lookup(const char *name, unsigned int flags, | 1707 | int kern_path_parent(const char *name, struct nameidata *nd) |
1751 | struct nameidata *nd) | ||
1752 | { | 1708 | { |
1753 | return do_path_lookup(AT_FDCWD, name, flags, nd); | 1709 | return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); |
1754 | } | 1710 | } |
1755 | 1711 | ||
1756 | int kern_path(const char *name, unsigned int flags, struct path *path) | 1712 | int kern_path(const char *name, unsigned int flags, struct path *path) |
@@ -1774,29 +1730,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1774 | const char *name, unsigned int flags, | 1730 | const char *name, unsigned int flags, |
1775 | struct nameidata *nd) | 1731 | struct nameidata *nd) |
1776 | { | 1732 | { |
1777 | int retval; | 1733 | nd->root.dentry = dentry; |
1778 | 1734 | nd->root.mnt = mnt; | |
1779 | /* same as do_path_lookup */ | 1735 | /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ |
1780 | nd->last_type = LAST_ROOT; | 1736 | return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); |
1781 | nd->flags = flags; | ||
1782 | nd->depth = 0; | ||
1783 | |||
1784 | nd->path.dentry = dentry; | ||
1785 | nd->path.mnt = mnt; | ||
1786 | path_get(&nd->path); | ||
1787 | nd->root = nd->path; | ||
1788 | path_get(&nd->root); | ||
1789 | nd->inode = nd->path.dentry->d_inode; | ||
1790 | |||
1791 | retval = path_walk(name, nd); | ||
1792 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | ||
1793 | nd->inode)) | ||
1794 | audit_inode(name, nd->path.dentry); | ||
1795 | |||
1796 | path_put(&nd->root); | ||
1797 | nd->root.mnt = NULL; | ||
1798 | |||
1799 | return retval; | ||
1800 | } | 1737 | } |
1801 | 1738 | ||
1802 | static struct dentry *__lookup_hash(struct qstr *name, | 1739 | static struct dentry *__lookup_hash(struct qstr *name, |
@@ -1811,17 +1748,6 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1811 | return ERR_PTR(err); | 1748 | return ERR_PTR(err); |
1812 | 1749 | ||
1813 | /* | 1750 | /* |
1814 | * See if the low-level filesystem might want | ||
1815 | * to use its own hash.. | ||
1816 | */ | ||
1817 | if (base->d_flags & DCACHE_OP_HASH) { | ||
1818 | err = base->d_op->d_hash(base, inode, name); | ||
1819 | dentry = ERR_PTR(err); | ||
1820 | if (err < 0) | ||
1821 | goto out; | ||
1822 | } | ||
1823 | |||
1824 | /* | ||
1825 | * Don't bother with __d_lookup: callers are for creat as | 1751 | * Don't bother with __d_lookup: callers are for creat as |
1826 | * well as unlink, so a lot of the time it would cost | 1752 | * well as unlink, so a lot of the time it would cost |
1827 | * a double lookup. | 1753 | * a double lookup. |
@@ -1833,7 +1759,7 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1833 | 1759 | ||
1834 | if (!dentry) | 1760 | if (!dentry) |
1835 | dentry = d_alloc_and_lookup(base, name, nd); | 1761 | dentry = d_alloc_and_lookup(base, name, nd); |
1836 | out: | 1762 | |
1837 | return dentry; | 1763 | return dentry; |
1838 | } | 1764 | } |
1839 | 1765 | ||
@@ -1847,28 +1773,6 @@ static struct dentry *lookup_hash(struct nameidata *nd) | |||
1847 | return __lookup_hash(&nd->last, nd->path.dentry, nd); | 1773 | return __lookup_hash(&nd->last, nd->path.dentry, nd); |
1848 | } | 1774 | } |
1849 | 1775 | ||
1850 | static int __lookup_one_len(const char *name, struct qstr *this, | ||
1851 | struct dentry *base, int len) | ||
1852 | { | ||
1853 | unsigned long hash; | ||
1854 | unsigned int c; | ||
1855 | |||
1856 | this->name = name; | ||
1857 | this->len = len; | ||
1858 | if (!len) | ||
1859 | return -EACCES; | ||
1860 | |||
1861 | hash = init_name_hash(); | ||
1862 | while (len--) { | ||
1863 | c = *(const unsigned char *)name++; | ||
1864 | if (c == '/' || c == '\0') | ||
1865 | return -EACCES; | ||
1866 | hash = partial_name_hash(c, hash); | ||
1867 | } | ||
1868 | this->hash = end_name_hash(hash); | ||
1869 | return 0; | ||
1870 | } | ||
1871 | |||
1872 | /** | 1776 | /** |
1873 | * lookup_one_len - filesystem helper to lookup single pathname component | 1777 | * lookup_one_len - filesystem helper to lookup single pathname component |
1874 | * @name: pathname component to lookup | 1778 | * @name: pathname component to lookup |
@@ -1882,14 +1786,34 @@ static int __lookup_one_len(const char *name, struct qstr *this, | |||
1882 | */ | 1786 | */ |
1883 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | 1787 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) |
1884 | { | 1788 | { |
1885 | int err; | ||
1886 | struct qstr this; | 1789 | struct qstr this; |
1790 | unsigned long hash; | ||
1791 | unsigned int c; | ||
1887 | 1792 | ||
1888 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); | 1793 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); |
1889 | 1794 | ||
1890 | err = __lookup_one_len(name, &this, base, len); | 1795 | this.name = name; |
1891 | if (err) | 1796 | this.len = len; |
1892 | return ERR_PTR(err); | 1797 | if (!len) |
1798 | return ERR_PTR(-EACCES); | ||
1799 | |||
1800 | hash = init_name_hash(); | ||
1801 | while (len--) { | ||
1802 | c = *(const unsigned char *)name++; | ||
1803 | if (c == '/' || c == '\0') | ||
1804 | return ERR_PTR(-EACCES); | ||
1805 | hash = partial_name_hash(c, hash); | ||
1806 | } | ||
1807 | this.hash = end_name_hash(hash); | ||
1808 | /* | ||
1809 | * See if the low-level filesystem might want | ||
1810 | * to use its own hash.. | ||
1811 | */ | ||
1812 | if (base->d_flags & DCACHE_OP_HASH) { | ||
1813 | int err = base->d_op->d_hash(base, base->d_inode, &this); | ||
1814 | if (err < 0) | ||
1815 | return ERR_PTR(err); | ||
1816 | } | ||
1893 | 1817 | ||
1894 | return __lookup_hash(&this, base, NULL); | 1818 | return __lookup_hash(&this, base, NULL); |
1895 | } | 1819 | } |
@@ -1898,7 +1822,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, | |||
1898 | struct path *path) | 1822 | struct path *path) |
1899 | { | 1823 | { |
1900 | struct nameidata nd; | 1824 | struct nameidata nd; |
1901 | char *tmp = getname(name); | 1825 | char *tmp = getname_flags(name, flags); |
1902 | int err = PTR_ERR(tmp); | 1826 | int err = PTR_ERR(tmp); |
1903 | if (!IS_ERR(tmp)) { | 1827 | if (!IS_ERR(tmp)) { |
1904 | 1828 | ||
@@ -1940,11 +1864,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) | |||
1940 | 1864 | ||
1941 | if (!(dir->i_mode & S_ISVTX)) | 1865 | if (!(dir->i_mode & S_ISVTX)) |
1942 | return 0; | 1866 | return 0; |
1867 | if (current_user_ns() != inode_userns(inode)) | ||
1868 | goto other_userns; | ||
1943 | if (inode->i_uid == fsuid) | 1869 | if (inode->i_uid == fsuid) |
1944 | return 0; | 1870 | return 0; |
1945 | if (dir->i_uid == fsuid) | 1871 | if (dir->i_uid == fsuid) |
1946 | return 0; | 1872 | return 0; |
1947 | return !capable(CAP_FOWNER); | 1873 | |
1874 | other_userns: | ||
1875 | return !ns_capable(inode_userns(inode), CAP_FOWNER); | ||
1948 | } | 1876 | } |
1949 | 1877 | ||
1950 | /* | 1878 | /* |
@@ -2078,12 +2006,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
2078 | return error; | 2006 | return error; |
2079 | } | 2007 | } |
2080 | 2008 | ||
2081 | int may_open(struct path *path, int acc_mode, int flag) | 2009 | static int may_open(struct path *path, int acc_mode, int flag) |
2082 | { | 2010 | { |
2083 | struct dentry *dentry = path->dentry; | 2011 | struct dentry *dentry = path->dentry; |
2084 | struct inode *inode = dentry->d_inode; | 2012 | struct inode *inode = dentry->d_inode; |
2085 | int error; | 2013 | int error; |
2086 | 2014 | ||
2015 | /* O_PATH? */ | ||
2016 | if (!acc_mode) | ||
2017 | return 0; | ||
2018 | |||
2087 | if (!inode) | 2019 | if (!inode) |
2088 | return -ENOENT; | 2020 | return -ENOENT; |
2089 | 2021 | ||
@@ -2120,7 +2052,7 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
2120 | } | 2052 | } |
2121 | 2053 | ||
2122 | /* O_NOATIME can only be set by the owner or superuser */ | 2054 | /* O_NOATIME can only be set by the owner or superuser */ |
2123 | if (flag & O_NOATIME && !is_owner_or_cap(inode)) | 2055 | if (flag & O_NOATIME && !inode_owner_or_capable(inode)) |
2124 | return -EPERM; | 2056 | return -EPERM; |
2125 | 2057 | ||
2126 | /* | 2058 | /* |
@@ -2152,34 +2084,6 @@ static int handle_truncate(struct file *filp) | |||
2152 | } | 2084 | } |
2153 | 2085 | ||
2154 | /* | 2086 | /* |
2155 | * Be careful about ever adding any more callers of this | ||
2156 | * function. Its flags must be in the namei format, not | ||
2157 | * what get passed to sys_open(). | ||
2158 | */ | ||
2159 | static int __open_namei_create(struct nameidata *nd, struct path *path, | ||
2160 | int open_flag, int mode) | ||
2161 | { | ||
2162 | int error; | ||
2163 | struct dentry *dir = nd->path.dentry; | ||
2164 | |||
2165 | if (!IS_POSIXACL(dir->d_inode)) | ||
2166 | mode &= ~current_umask(); | ||
2167 | error = security_path_mknod(&nd->path, path->dentry, mode, 0); | ||
2168 | if (error) | ||
2169 | goto out_unlock; | ||
2170 | error = vfs_create(dir->d_inode, path->dentry, mode, nd); | ||
2171 | out_unlock: | ||
2172 | mutex_unlock(&dir->d_inode->i_mutex); | ||
2173 | dput(nd->path.dentry); | ||
2174 | nd->path.dentry = path->dentry; | ||
2175 | |||
2176 | if (error) | ||
2177 | return error; | ||
2178 | /* Don't check for write permission, don't truncate */ | ||
2179 | return may_open(&nd->path, 0, open_flag & ~O_TRUNC); | ||
2180 | } | ||
2181 | |||
2182 | /* | ||
2183 | * Note that while the flag value (low two bits) for sys_open means: | 2087 | * Note that while the flag value (low two bits) for sys_open means: |
2184 | * 00 - read-only | 2088 | * 00 - read-only |
2185 | * 01 - write-only | 2089 | * 01 - write-only |
@@ -2203,126 +2107,115 @@ static inline int open_to_namei_flags(int flag) | |||
2203 | return flag; | 2107 | return flag; |
2204 | } | 2108 | } |
2205 | 2109 | ||
2206 | static int open_will_truncate(int flag, struct inode *inode) | ||
2207 | { | ||
2208 | /* | ||
2209 | * We'll never write to the fs underlying | ||
2210 | * a device file. | ||
2211 | */ | ||
2212 | if (special_file(inode->i_mode)) | ||
2213 | return 0; | ||
2214 | return (flag & O_TRUNC); | ||
2215 | } | ||
2216 | |||
2217 | static struct file *finish_open(struct nameidata *nd, | ||
2218 | int open_flag, int acc_mode) | ||
2219 | { | ||
2220 | struct file *filp; | ||
2221 | int will_truncate; | ||
2222 | int error; | ||
2223 | |||
2224 | will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode); | ||
2225 | if (will_truncate) { | ||
2226 | error = mnt_want_write(nd->path.mnt); | ||
2227 | if (error) | ||
2228 | goto exit; | ||
2229 | } | ||
2230 | error = may_open(&nd->path, acc_mode, open_flag); | ||
2231 | if (error) { | ||
2232 | if (will_truncate) | ||
2233 | mnt_drop_write(nd->path.mnt); | ||
2234 | goto exit; | ||
2235 | } | ||
2236 | filp = nameidata_to_filp(nd); | ||
2237 | if (!IS_ERR(filp)) { | ||
2238 | error = ima_file_check(filp, acc_mode); | ||
2239 | if (error) { | ||
2240 | fput(filp); | ||
2241 | filp = ERR_PTR(error); | ||
2242 | } | ||
2243 | } | ||
2244 | if (!IS_ERR(filp)) { | ||
2245 | if (will_truncate) { | ||
2246 | error = handle_truncate(filp); | ||
2247 | if (error) { | ||
2248 | fput(filp); | ||
2249 | filp = ERR_PTR(error); | ||
2250 | } | ||
2251 | } | ||
2252 | } | ||
2253 | /* | ||
2254 | * It is now safe to drop the mnt write | ||
2255 | * because the filp has had a write taken | ||
2256 | * on its behalf. | ||
2257 | */ | ||
2258 | if (will_truncate) | ||
2259 | mnt_drop_write(nd->path.mnt); | ||
2260 | path_put(&nd->path); | ||
2261 | return filp; | ||
2262 | |||
2263 | exit: | ||
2264 | path_put(&nd->path); | ||
2265 | return ERR_PTR(error); | ||
2266 | } | ||
2267 | |||
2268 | /* | 2110 | /* |
2269 | * Handle O_CREAT case for do_filp_open | 2111 | * Handle the last step of open() |
2270 | */ | 2112 | */ |
2271 | static struct file *do_last(struct nameidata *nd, struct path *path, | 2113 | static struct file *do_last(struct nameidata *nd, struct path *path, |
2272 | int open_flag, int acc_mode, | 2114 | const struct open_flags *op, const char *pathname) |
2273 | int mode, const char *pathname) | ||
2274 | { | 2115 | { |
2275 | struct dentry *dir = nd->path.dentry; | 2116 | struct dentry *dir = nd->path.dentry; |
2117 | struct dentry *dentry; | ||
2118 | int open_flag = op->open_flag; | ||
2119 | int will_truncate = open_flag & O_TRUNC; | ||
2120 | int want_write = 0; | ||
2121 | int acc_mode = op->acc_mode; | ||
2276 | struct file *filp; | 2122 | struct file *filp; |
2277 | int error = -EISDIR; | 2123 | int error; |
2124 | |||
2125 | nd->flags &= ~LOOKUP_PARENT; | ||
2126 | nd->flags |= op->intent; | ||
2278 | 2127 | ||
2279 | switch (nd->last_type) { | 2128 | switch (nd->last_type) { |
2280 | case LAST_DOTDOT: | 2129 | case LAST_DOTDOT: |
2281 | follow_dotdot(nd); | ||
2282 | dir = nd->path.dentry; | ||
2283 | case LAST_DOT: | 2130 | case LAST_DOT: |
2284 | if (need_reval_dot(dir)) { | 2131 | error = handle_dots(nd, nd->last_type); |
2285 | int status = d_revalidate(nd->path.dentry, nd); | 2132 | if (error) |
2286 | if (!status) | 2133 | return ERR_PTR(error); |
2287 | status = -ESTALE; | ||
2288 | if (status < 0) { | ||
2289 | error = status; | ||
2290 | goto exit; | ||
2291 | } | ||
2292 | } | ||
2293 | /* fallthrough */ | 2134 | /* fallthrough */ |
2294 | case LAST_ROOT: | 2135 | case LAST_ROOT: |
2295 | goto exit; | 2136 | if (nd->flags & LOOKUP_RCU) { |
2137 | if (nameidata_drop_rcu_last(nd)) | ||
2138 | return ERR_PTR(-ECHILD); | ||
2139 | } | ||
2140 | error = handle_reval_path(nd); | ||
2141 | if (error) | ||
2142 | goto exit; | ||
2143 | audit_inode(pathname, nd->path.dentry); | ||
2144 | if (open_flag & O_CREAT) { | ||
2145 | error = -EISDIR; | ||
2146 | goto exit; | ||
2147 | } | ||
2148 | goto ok; | ||
2296 | case LAST_BIND: | 2149 | case LAST_BIND: |
2150 | /* can't be RCU mode here */ | ||
2151 | error = handle_reval_path(nd); | ||
2152 | if (error) | ||
2153 | goto exit; | ||
2297 | audit_inode(pathname, dir); | 2154 | audit_inode(pathname, dir); |
2298 | goto ok; | 2155 | goto ok; |
2299 | } | 2156 | } |
2300 | 2157 | ||
2158 | if (!(open_flag & O_CREAT)) { | ||
2159 | int symlink_ok = 0; | ||
2160 | if (nd->last.name[nd->last.len]) | ||
2161 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | ||
2162 | if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) | ||
2163 | symlink_ok = 1; | ||
2164 | /* we _can_ be in RCU mode here */ | ||
2165 | error = walk_component(nd, path, &nd->last, LAST_NORM, | ||
2166 | !symlink_ok); | ||
2167 | if (error < 0) | ||
2168 | return ERR_PTR(error); | ||
2169 | if (error) /* symlink */ | ||
2170 | return NULL; | ||
2171 | /* sayonara */ | ||
2172 | if (nd->flags & LOOKUP_RCU) { | ||
2173 | if (nameidata_drop_rcu_last(nd)) | ||
2174 | return ERR_PTR(-ECHILD); | ||
2175 | } | ||
2176 | |||
2177 | error = -ENOTDIR; | ||
2178 | if (nd->flags & LOOKUP_DIRECTORY) { | ||
2179 | if (!nd->inode->i_op->lookup) | ||
2180 | goto exit; | ||
2181 | } | ||
2182 | audit_inode(pathname, nd->path.dentry); | ||
2183 | goto ok; | ||
2184 | } | ||
2185 | |||
2186 | /* create side of things */ | ||
2187 | |||
2188 | if (nd->flags & LOOKUP_RCU) { | ||
2189 | if (nameidata_drop_rcu_last(nd)) | ||
2190 | return ERR_PTR(-ECHILD); | ||
2191 | } | ||
2192 | |||
2193 | audit_inode(pathname, dir); | ||
2194 | error = -EISDIR; | ||
2301 | /* trailing slashes? */ | 2195 | /* trailing slashes? */ |
2302 | if (nd->last.name[nd->last.len]) | 2196 | if (nd->last.name[nd->last.len]) |
2303 | goto exit; | 2197 | goto exit; |
2304 | 2198 | ||
2305 | mutex_lock(&dir->d_inode->i_mutex); | 2199 | mutex_lock(&dir->d_inode->i_mutex); |
2306 | 2200 | ||
2307 | path->dentry = lookup_hash(nd); | 2201 | dentry = lookup_hash(nd); |
2308 | path->mnt = nd->path.mnt; | 2202 | error = PTR_ERR(dentry); |
2309 | 2203 | if (IS_ERR(dentry)) { | |
2310 | error = PTR_ERR(path->dentry); | ||
2311 | if (IS_ERR(path->dentry)) { | ||
2312 | mutex_unlock(&dir->d_inode->i_mutex); | 2204 | mutex_unlock(&dir->d_inode->i_mutex); |
2313 | goto exit; | 2205 | goto exit; |
2314 | } | 2206 | } |
2315 | 2207 | ||
2316 | if (IS_ERR(nd->intent.open.file)) { | 2208 | path->dentry = dentry; |
2317 | error = PTR_ERR(nd->intent.open.file); | 2209 | path->mnt = nd->path.mnt; |
2318 | goto exit_mutex_unlock; | ||
2319 | } | ||
2320 | 2210 | ||
2321 | /* Negative dentry, just create the file */ | 2211 | /* Negative dentry, just create the file */ |
2322 | if (!path->dentry->d_inode) { | 2212 | if (!dentry->d_inode) { |
2213 | int mode = op->mode; | ||
2214 | if (!IS_POSIXACL(dir->d_inode)) | ||
2215 | mode &= ~current_umask(); | ||
2323 | /* | 2216 | /* |
2324 | * This write is needed to ensure that a | 2217 | * This write is needed to ensure that a |
2325 | * ro->rw transition does not occur between | 2218 | * rw->ro transition does not occur between |
2326 | * the time when the file is created and when | 2219 | * the time when the file is created and when |
2327 | * a permanent write count is taken through | 2220 | * a permanent write count is taken through |
2328 | * the 'struct file' in nameidata_to_filp(). | 2221 | * the 'struct file' in nameidata_to_filp(). |
@@ -2330,22 +2223,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2330 | error = mnt_want_write(nd->path.mnt); | 2223 | error = mnt_want_write(nd->path.mnt); |
2331 | if (error) | 2224 | if (error) |
2332 | goto exit_mutex_unlock; | 2225 | goto exit_mutex_unlock; |
2333 | error = __open_namei_create(nd, path, open_flag, mode); | 2226 | want_write = 1; |
2334 | if (error) { | 2227 | /* Don't check for write permission, don't truncate */ |
2335 | mnt_drop_write(nd->path.mnt); | 2228 | open_flag &= ~O_TRUNC; |
2336 | goto exit; | 2229 | will_truncate = 0; |
2337 | } | 2230 | acc_mode = MAY_OPEN; |
2338 | filp = nameidata_to_filp(nd); | 2231 | error = security_path_mknod(&nd->path, dentry, mode, 0); |
2339 | mnt_drop_write(nd->path.mnt); | 2232 | if (error) |
2340 | path_put(&nd->path); | 2233 | goto exit_mutex_unlock; |
2341 | if (!IS_ERR(filp)) { | 2234 | error = vfs_create(dir->d_inode, dentry, mode, nd); |
2342 | error = ima_file_check(filp, acc_mode); | 2235 | if (error) |
2343 | if (error) { | 2236 | goto exit_mutex_unlock; |
2344 | fput(filp); | 2237 | mutex_unlock(&dir->d_inode->i_mutex); |
2345 | filp = ERR_PTR(error); | 2238 | dput(nd->path.dentry); |
2346 | } | 2239 | nd->path.dentry = dentry; |
2347 | } | 2240 | goto common; |
2348 | return filp; | ||
2349 | } | 2241 | } |
2350 | 2242 | ||
2351 | /* | 2243 | /* |
@@ -2375,7 +2267,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2375 | if (S_ISDIR(nd->inode->i_mode)) | 2267 | if (S_ISDIR(nd->inode->i_mode)) |
2376 | goto exit; | 2268 | goto exit; |
2377 | ok: | 2269 | ok: |
2378 | filp = finish_open(nd, open_flag, acc_mode); | 2270 | if (!S_ISREG(nd->inode->i_mode)) |
2271 | will_truncate = 0; | ||
2272 | |||
2273 | if (will_truncate) { | ||
2274 | error = mnt_want_write(nd->path.mnt); | ||
2275 | if (error) | ||
2276 | goto exit; | ||
2277 | want_write = 1; | ||
2278 | } | ||
2279 | common: | ||
2280 | error = may_open(&nd->path, acc_mode, open_flag); | ||
2281 | if (error) | ||
2282 | goto exit; | ||
2283 | filp = nameidata_to_filp(nd); | ||
2284 | if (!IS_ERR(filp)) { | ||
2285 | error = ima_file_check(filp, op->acc_mode); | ||
2286 | if (error) { | ||
2287 | fput(filp); | ||
2288 | filp = ERR_PTR(error); | ||
2289 | } | ||
2290 | } | ||
2291 | if (!IS_ERR(filp)) { | ||
2292 | if (will_truncate) { | ||
2293 | error = handle_truncate(filp); | ||
2294 | if (error) { | ||
2295 | fput(filp); | ||
2296 | filp = ERR_PTR(error); | ||
2297 | } | ||
2298 | } | ||
2299 | } | ||
2300 | out: | ||
2301 | if (want_write) | ||
2302 | mnt_drop_write(nd->path.mnt); | ||
2303 | path_put(&nd->path); | ||
2379 | return filp; | 2304 | return filp; |
2380 | 2305 | ||
2381 | exit_mutex_unlock: | 2306 | exit_mutex_unlock: |
@@ -2383,204 +2308,103 @@ exit_mutex_unlock: | |||
2383 | exit_dput: | 2308 | exit_dput: |
2384 | path_put_conditional(path, nd); | 2309 | path_put_conditional(path, nd); |
2385 | exit: | 2310 | exit: |
2386 | path_put(&nd->path); | 2311 | filp = ERR_PTR(error); |
2387 | return ERR_PTR(error); | 2312 | goto out; |
2388 | } | 2313 | } |
2389 | 2314 | ||
2390 | /* | 2315 | static struct file *path_openat(int dfd, const char *pathname, |
2391 | * Note that the low bits of the passed in "open_flag" | 2316 | struct nameidata *nd, const struct open_flags *op, int flags) |
2392 | * are not the same as in the local variable "flag". See | ||
2393 | * open_to_namei_flags() for more details. | ||
2394 | */ | ||
2395 | struct file *do_filp_open(int dfd, const char *pathname, | ||
2396 | int open_flag, int mode, int acc_mode) | ||
2397 | { | 2317 | { |
2318 | struct file *base = NULL; | ||
2398 | struct file *filp; | 2319 | struct file *filp; |
2399 | struct nameidata nd; | ||
2400 | int error; | ||
2401 | struct path path; | 2320 | struct path path; |
2402 | int count = 0; | 2321 | int error; |
2403 | int flag = open_to_namei_flags(open_flag); | ||
2404 | int flags; | ||
2405 | |||
2406 | if (!(open_flag & O_CREAT)) | ||
2407 | mode = 0; | ||
2408 | |||
2409 | /* Must never be set by userspace */ | ||
2410 | open_flag &= ~FMODE_NONOTIFY; | ||
2411 | |||
2412 | /* | ||
2413 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | ||
2414 | * check for O_DSYNC if the need any syncing at all we enforce it's | ||
2415 | * always set instead of having to deal with possibly weird behaviour | ||
2416 | * for malicious applications setting only __O_SYNC. | ||
2417 | */ | ||
2418 | if (open_flag & __O_SYNC) | ||
2419 | open_flag |= O_DSYNC; | ||
2420 | |||
2421 | if (!acc_mode) | ||
2422 | acc_mode = MAY_OPEN | ACC_MODE(open_flag); | ||
2423 | |||
2424 | /* O_TRUNC implies we need access checks for write permissions */ | ||
2425 | if (open_flag & O_TRUNC) | ||
2426 | acc_mode |= MAY_WRITE; | ||
2427 | |||
2428 | /* Allow the LSM permission hook to distinguish append | ||
2429 | access from general write access. */ | ||
2430 | if (open_flag & O_APPEND) | ||
2431 | acc_mode |= MAY_APPEND; | ||
2432 | |||
2433 | flags = LOOKUP_OPEN; | ||
2434 | if (open_flag & O_CREAT) { | ||
2435 | flags |= LOOKUP_CREATE; | ||
2436 | if (open_flag & O_EXCL) | ||
2437 | flags |= LOOKUP_EXCL; | ||
2438 | } | ||
2439 | if (open_flag & O_DIRECTORY) | ||
2440 | flags |= LOOKUP_DIRECTORY; | ||
2441 | if (!(open_flag & O_NOFOLLOW)) | ||
2442 | flags |= LOOKUP_FOLLOW; | ||
2443 | 2322 | ||
2444 | filp = get_empty_filp(); | 2323 | filp = get_empty_filp(); |
2445 | if (!filp) | 2324 | if (!filp) |
2446 | return ERR_PTR(-ENFILE); | 2325 | return ERR_PTR(-ENFILE); |
2447 | 2326 | ||
2448 | filp->f_flags = open_flag; | 2327 | filp->f_flags = op->open_flag; |
2449 | nd.intent.open.file = filp; | 2328 | nd->intent.open.file = filp; |
2450 | nd.intent.open.flags = flag; | 2329 | nd->intent.open.flags = open_to_namei_flags(op->open_flag); |
2451 | nd.intent.open.create_mode = mode; | 2330 | nd->intent.open.create_mode = op->mode; |
2452 | |||
2453 | if (open_flag & O_CREAT) | ||
2454 | goto creat; | ||
2455 | 2331 | ||
2456 | /* !O_CREAT, simple open */ | 2332 | error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); |
2457 | error = do_path_lookup(dfd, pathname, flags, &nd); | ||
2458 | if (unlikely(error)) | 2333 | if (unlikely(error)) |
2459 | goto out_filp2; | ||
2460 | error = -ELOOP; | ||
2461 | if (!(nd.flags & LOOKUP_FOLLOW)) { | ||
2462 | if (nd.inode->i_op->follow_link) | ||
2463 | goto out_path2; | ||
2464 | } | ||
2465 | error = -ENOTDIR; | ||
2466 | if (nd.flags & LOOKUP_DIRECTORY) { | ||
2467 | if (!nd.inode->i_op->lookup) | ||
2468 | goto out_path2; | ||
2469 | } | ||
2470 | audit_inode(pathname, nd.path.dentry); | ||
2471 | filp = finish_open(&nd, open_flag, acc_mode); | ||
2472 | out2: | ||
2473 | release_open_intent(&nd); | ||
2474 | return filp; | ||
2475 | |||
2476 | out_path2: | ||
2477 | path_put(&nd.path); | ||
2478 | out_filp2: | ||
2479 | filp = ERR_PTR(error); | ||
2480 | goto out2; | ||
2481 | |||
2482 | creat: | ||
2483 | /* OK, have to create the file. Find the parent. */ | ||
2484 | error = path_init_rcu(dfd, pathname, | ||
2485 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2486 | if (error) | ||
2487 | goto out_filp; | 2334 | goto out_filp; |
2488 | error = path_walk_rcu(pathname, &nd); | ||
2489 | path_finish_rcu(&nd); | ||
2490 | if (unlikely(error == -ECHILD || error == -ESTALE)) { | ||
2491 | /* slower, locked walk */ | ||
2492 | if (error == -ESTALE) { | ||
2493 | reval: | ||
2494 | flags |= LOOKUP_REVAL; | ||
2495 | } | ||
2496 | error = path_init(dfd, pathname, | ||
2497 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2498 | if (error) | ||
2499 | goto out_filp; | ||
2500 | 2335 | ||
2501 | error = path_walk_simple(pathname, &nd); | 2336 | current->total_link_count = 0; |
2502 | } | 2337 | error = link_path_walk(pathname, nd); |
2503 | if (unlikely(error)) | 2338 | if (unlikely(error)) |
2504 | goto out_filp; | 2339 | goto out_filp; |
2505 | if (unlikely(!audit_dummy_context())) | ||
2506 | audit_inode(pathname, nd.path.dentry); | ||
2507 | 2340 | ||
2508 | /* | 2341 | filp = do_last(nd, &path, op, pathname); |
2509 | * We have the parent and last component. | ||
2510 | */ | ||
2511 | nd.flags = flags; | ||
2512 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | ||
2513 | while (unlikely(!filp)) { /* trailing symlink */ | 2342 | while (unlikely(!filp)) { /* trailing symlink */ |
2514 | struct path link = path; | 2343 | struct path link = path; |
2515 | struct inode *linki = link.dentry->d_inode; | ||
2516 | void *cookie; | 2344 | void *cookie; |
2517 | error = -ELOOP; | 2345 | if (!(nd->flags & LOOKUP_FOLLOW)) { |
2518 | if (!(nd.flags & LOOKUP_FOLLOW)) | 2346 | path_put_conditional(&path, nd); |
2519 | goto exit_dput; | 2347 | path_put(&nd->path); |
2520 | if (count++ == 32) | 2348 | filp = ERR_PTR(-ELOOP); |
2521 | goto exit_dput; | 2349 | break; |
2522 | /* | ||
2523 | * This is subtle. Instead of calling do_follow_link() we do | ||
2524 | * the thing by hands. The reason is that this way we have zero | ||
2525 | * link_count and path_walk() (called from ->follow_link) | ||
2526 | * honoring LOOKUP_PARENT. After that we have the parent and | ||
2527 | * last component, i.e. we are in the same situation as after | ||
2528 | * the first path_walk(). Well, almost - if the last component | ||
2529 | * is normal we get its copy stored in nd->last.name and we will | ||
2530 | * have to putname() it when we are done. Procfs-like symlinks | ||
2531 | * just set LAST_BIND. | ||
2532 | */ | ||
2533 | nd.flags |= LOOKUP_PARENT; | ||
2534 | error = security_inode_follow_link(link.dentry, &nd); | ||
2535 | if (error) | ||
2536 | goto exit_dput; | ||
2537 | error = __do_follow_link(&link, &nd, &cookie); | ||
2538 | if (unlikely(error)) { | ||
2539 | if (!IS_ERR(cookie) && linki->i_op->put_link) | ||
2540 | linki->i_op->put_link(link.dentry, &nd, cookie); | ||
2541 | /* nd.path had been dropped */ | ||
2542 | nd.path = link; | ||
2543 | goto out_path; | ||
2544 | } | 2350 | } |
2545 | nd.flags &= ~LOOKUP_PARENT; | 2351 | nd->flags |= LOOKUP_PARENT; |
2546 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2352 | nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); |
2547 | if (linki->i_op->put_link) | 2353 | error = follow_link(&link, nd, &cookie); |
2548 | linki->i_op->put_link(link.dentry, &nd, cookie); | 2354 | if (unlikely(error)) |
2549 | path_put(&link); | 2355 | filp = ERR_PTR(error); |
2356 | else | ||
2357 | filp = do_last(nd, &path, op, pathname); | ||
2358 | put_link(nd, &link, cookie); | ||
2550 | } | 2359 | } |
2551 | out: | 2360 | out: |
2552 | if (nd.root.mnt) | 2361 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) |
2553 | path_put(&nd.root); | 2362 | path_put(&nd->root); |
2554 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) | 2363 | if (base) |
2555 | goto reval; | 2364 | fput(base); |
2556 | release_open_intent(&nd); | 2365 | release_open_intent(nd); |
2557 | return filp; | 2366 | return filp; |
2558 | 2367 | ||
2559 | exit_dput: | ||
2560 | path_put_conditional(&path, &nd); | ||
2561 | out_path: | ||
2562 | path_put(&nd.path); | ||
2563 | out_filp: | 2368 | out_filp: |
2564 | filp = ERR_PTR(error); | 2369 | filp = ERR_PTR(error); |
2565 | goto out; | 2370 | goto out; |
2566 | } | 2371 | } |
2567 | 2372 | ||
2568 | /** | 2373 | struct file *do_filp_open(int dfd, const char *pathname, |
2569 | * filp_open - open file and return file pointer | 2374 | const struct open_flags *op, int flags) |
2570 | * | ||
2571 | * @filename: path to open | ||
2572 | * @flags: open flags as per the open(2) second argument | ||
2573 | * @mode: mode for the new file if O_CREAT is set, else ignored | ||
2574 | * | ||
2575 | * This is the helper to open a file from kernelspace if you really | ||
2576 | * have to. But in generally you should not do this, so please move | ||
2577 | * along, nothing to see here.. | ||
2578 | */ | ||
2579 | struct file *filp_open(const char *filename, int flags, int mode) | ||
2580 | { | 2375 | { |
2581 | return do_filp_open(AT_FDCWD, filename, flags, mode, 0); | 2376 | struct nameidata nd; |
2377 | struct file *filp; | ||
2378 | |||
2379 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); | ||
2380 | if (unlikely(filp == ERR_PTR(-ECHILD))) | ||
2381 | filp = path_openat(dfd, pathname, &nd, op, flags); | ||
2382 | if (unlikely(filp == ERR_PTR(-ESTALE))) | ||
2383 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); | ||
2384 | return filp; | ||
2385 | } | ||
2386 | |||
2387 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | ||
2388 | const char *name, const struct open_flags *op, int flags) | ||
2389 | { | ||
2390 | struct nameidata nd; | ||
2391 | struct file *file; | ||
2392 | |||
2393 | nd.root.mnt = mnt; | ||
2394 | nd.root.dentry = dentry; | ||
2395 | |||
2396 | flags |= LOOKUP_ROOT; | ||
2397 | |||
2398 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) | ||
2399 | return ERR_PTR(-ELOOP); | ||
2400 | |||
2401 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); | ||
2402 | if (unlikely(file == ERR_PTR(-ECHILD))) | ||
2403 | file = path_openat(-1, name, &nd, op, flags); | ||
2404 | if (unlikely(file == ERR_PTR(-ESTALE))) | ||
2405 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); | ||
2406 | return file; | ||
2582 | } | 2407 | } |
2583 | EXPORT_SYMBOL(filp_open); | ||
2584 | 2408 | ||
2585 | /** | 2409 | /** |
2586 | * lookup_create - lookup a dentry, creating it if it doesn't exist | 2410 | * lookup_create - lookup a dentry, creating it if it doesn't exist |
@@ -2642,7 +2466,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
2642 | if (error) | 2466 | if (error) |
2643 | return error; | 2467 | return error; |
2644 | 2468 | ||
2645 | if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) | 2469 | if ((S_ISCHR(mode) || S_ISBLK(mode)) && |
2470 | !ns_capable(inode_userns(dir), CAP_MKNOD)) | ||
2646 | return -EPERM; | 2471 | return -EPERM; |
2647 | 2472 | ||
2648 | if (!dir->i_op->mknod) | 2473 | if (!dir->i_op->mknod) |
@@ -3119,7 +2944,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de | |||
3119 | return error; | 2944 | return error; |
3120 | 2945 | ||
3121 | mutex_lock(&inode->i_mutex); | 2946 | mutex_lock(&inode->i_mutex); |
3122 | error = dir->i_op->link(old_dentry, dir, new_dentry); | 2947 | /* Make sure we don't allow creating hardlink to an unlinked file */ |
2948 | if (inode->i_nlink == 0) | ||
2949 | error = -ENOENT; | ||
2950 | else | ||
2951 | error = dir->i_op->link(old_dentry, dir, new_dentry); | ||
3123 | mutex_unlock(&inode->i_mutex); | 2952 | mutex_unlock(&inode->i_mutex); |
3124 | if (!error) | 2953 | if (!error) |
3125 | fsnotify_link(dir, inode, new_dentry); | 2954 | fsnotify_link(dir, inode, new_dentry); |
@@ -3141,15 +2970,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, | |||
3141 | struct dentry *new_dentry; | 2970 | struct dentry *new_dentry; |
3142 | struct nameidata nd; | 2971 | struct nameidata nd; |
3143 | struct path old_path; | 2972 | struct path old_path; |
2973 | int how = 0; | ||
3144 | int error; | 2974 | int error; |
3145 | char *to; | 2975 | char *to; |
3146 | 2976 | ||
3147 | if ((flags & ~AT_SYMLINK_FOLLOW) != 0) | 2977 | if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) |
3148 | return -EINVAL; | 2978 | return -EINVAL; |
2979 | /* | ||
2980 | * To use null names we require CAP_DAC_READ_SEARCH | ||
2981 | * This ensures that not everyone will be able to create | ||
2982 | * handlink using the passed filedescriptor. | ||
2983 | */ | ||
2984 | if (flags & AT_EMPTY_PATH) { | ||
2985 | if (!capable(CAP_DAC_READ_SEARCH)) | ||
2986 | return -ENOENT; | ||
2987 | how = LOOKUP_EMPTY; | ||
2988 | } | ||
2989 | |||
2990 | if (flags & AT_SYMLINK_FOLLOW) | ||
2991 | how |= LOOKUP_FOLLOW; | ||
3149 | 2992 | ||
3150 | error = user_path_at(olddfd, oldname, | 2993 | error = user_path_at(olddfd, oldname, how, &old_path); |
3151 | flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, | ||
3152 | &old_path); | ||
3153 | if (error) | 2994 | if (error) |
3154 | return error; | 2995 | return error; |
3155 | 2996 | ||
@@ -3586,7 +3427,7 @@ EXPORT_SYMBOL(page_readlink); | |||
3586 | EXPORT_SYMBOL(__page_symlink); | 3427 | EXPORT_SYMBOL(__page_symlink); |
3587 | EXPORT_SYMBOL(page_symlink); | 3428 | EXPORT_SYMBOL(page_symlink); |
3588 | EXPORT_SYMBOL(page_symlink_inode_operations); | 3429 | EXPORT_SYMBOL(page_symlink_inode_operations); |
3589 | EXPORT_SYMBOL(path_lookup); | 3430 | EXPORT_SYMBOL(kern_path_parent); |
3590 | EXPORT_SYMBOL(kern_path); | 3431 | EXPORT_SYMBOL(kern_path); |
3591 | EXPORT_SYMBOL(vfs_path_lookup); | 3432 | EXPORT_SYMBOL(vfs_path_lookup); |
3592 | EXPORT_SYMBOL(inode_permission); | 3433 | EXPORT_SYMBOL(inode_permission); |