aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c1863
1 files changed, 800 insertions, 1063 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..2358b326b221 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -70,7 +70,7 @@
70 * name indicated by the symlink. The old code always complained that the 70 * name indicated by the symlink. The old code always complained that the
71 * name already exists, due to not following the symlink even if its target 71 * name already exists, due to not following the symlink even if its target
72 * is nonexistent. The new semantics affects also mknod() and link() when 72 * is nonexistent. The new semantics affects also mknod() and link() when
73 * the name is a symlink pointing to a non-existant name. 73 * the name is a symlink pointing to a non-existent name.
74 * 74 *
75 * I don't know which semantics is the right one, since I have no access 75 * I don't know which semantics is the right one, since I have no access
76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -172,10 +179,13 @@ EXPORT_SYMBOL(putname);
172static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
173 int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) 180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
174{ 181{
175 umode_t mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
176 183
177 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
178 185
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
188
179 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
180 mode >>= 6; 190 mode >>= 6;
181 else { 191 else {
@@ -189,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 mode >>= 3; 199 mode >>= 3;
190 } 200 }
191 201
202other_perms:
192 /* 203 /*
193 * If the DACs are ok we don't need any capability check. 204 * If the DACs are ok we don't need any capability check.
194 */ 205 */
@@ -230,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
230 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable if at least one exec bit is set.
231 */ 242 */
232 if (!(mask & MAY_EXEC) || execute_ok(inode)) 243 if (!(mask & MAY_EXEC) || execute_ok(inode))
233 if (capable(CAP_DAC_OVERRIDE)) 244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
234 return 0; 245 return 0;
235 246
236 /* 247 /*
@@ -238,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
238 */ 249 */
239 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
240 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
241 if (capable(CAP_DAC_READ_SEARCH)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
242 return 0; 253 return 0;
243 254
244 return -EACCES; 255 return -EACCES;
@@ -380,111 +391,63 @@ void path_put(struct path *path)
380} 391}
381EXPORT_SYMBOL(path_put); 392EXPORT_SYMBOL(path_put);
382 393
383/** 394/*
384 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
385 * @nd: nameidata pathwalk data to drop
386 * Returns: 0 on success, -ECHILD on failure
387 *
388 * Path walking has 2 modes, rcu-walk and ref-walk (see 395 * Path walking has 2 modes, rcu-walk and ref-walk (see
389 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt 396 * Documentation/filesystems/path-lookup.txt). In situations when we can't
390 * to drop out of rcu-walk mode and take normal reference counts on dentries 397 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
391 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take 398 * normal reference counts on dentries and vfsmounts to transition to rcu-walk
392 * refcounts at the last known good point before rcu-walk got stuck, so 399 * mode. Refcounts are grabbed at the last known good point before rcu-walk
393 * ref-walk may continue from there. If this is not successful (eg. a seqcount 400 * got stuck, so ref-walk may continue from there. If this is not successful
394 * has changed), then failure is returned and path walk restarts from the 401 * (eg. a seqcount has changed), then failure is returned and it's up to caller
395 * beginning in ref-walk mode. 402 * to restart the path walk from the beginning in ref-walk mode.
396 *
397 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
398 * ref-walk. Must be called from rcu-walk context.
399 */ 403 */
400static int nameidata_drop_rcu(struct nameidata *nd)
401{
402 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry;
404
405 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) {
407 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry)
410 goto err_root;
411 }
412 spin_lock(&dentry->d_lock);
413 if (!__d_rcu_to_refcount(dentry, nd->seq))
414 goto err;
415 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) {
418 path_get(&nd->root);
419 spin_unlock(&fs->lock);
420 }
421 mntget(nd->path.mnt);
422
423 rcu_read_unlock();
424 br_read_unlock(vfsmount_lock);
425 nd->flags &= ~LOOKUP_RCU;
426 return 0;
427err:
428 spin_unlock(&dentry->d_lock);
429err_root:
430 if (nd->root.mnt)
431 spin_unlock(&fs->lock);
432 return -ECHILD;
433}
434
435/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
436static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
437{
438 if (nd->flags & LOOKUP_RCU)
439 return nameidata_drop_rcu(nd);
440 return 0;
441}
442 404
443/** 405/**
444 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk 406 * unlazy_walk - try to switch to ref-walk mode.
445 * @nd: nameidata pathwalk data to drop 407 * @nd: nameidata pathwalk data
446 * @dentry: dentry to drop 408 * @dentry: child of nd->path.dentry or NULL
447 * Returns: 0 on success, -ECHILD on failure 409 * Returns: 0 on success, -ECHILD on failure
448 * 410 *
449 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, 411 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
450 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on 412 * for ref-walk mode. @dentry must be a path found by a do_lookup call on
451 * @nd. Must be called from rcu-walk context. 413 * @nd or NULL. Must be called from rcu-walk context.
452 */ 414 */
453static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) 415static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
454{ 416{
455 struct fs_struct *fs = current->fs; 417 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 418 struct dentry *parent = nd->path.dentry;
457 419 int want_root = 0;
458 /*
459 * It can be possible to revalidate the dentry that we started
460 * the path walk with. force_reval_path may also revalidate the
461 * dentry already committed to the nameidata.
462 */
463 if (unlikely(parent == dentry))
464 return nameidata_drop_rcu(nd);
465 420
466 BUG_ON(!(nd->flags & LOOKUP_RCU)); 421 BUG_ON(!(nd->flags & LOOKUP_RCU));
467 if (nd->root.mnt) { 422 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
423 want_root = 1;
468 spin_lock(&fs->lock); 424 spin_lock(&fs->lock);
469 if (nd->root.mnt != fs->root.mnt || 425 if (nd->root.mnt != fs->root.mnt ||
470 nd->root.dentry != fs->root.dentry) 426 nd->root.dentry != fs->root.dentry)
471 goto err_root; 427 goto err_root;
472 } 428 }
473 spin_lock(&parent->d_lock); 429 spin_lock(&parent->d_lock);
474 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 430 if (!dentry) {
475 if (!__d_rcu_to_refcount(dentry, nd->seq)) 431 if (!__d_rcu_to_refcount(parent, nd->seq))
476 goto err; 432 goto err_parent;
477 /* 433 BUG_ON(nd->inode != parent->d_inode);
478 * If the sequence check on the child dentry passed, then the child has 434 } else {
479 * not been removed from its parent. This means the parent dentry must 435 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
480 * be valid and able to take a reference at this point. 436 if (!__d_rcu_to_refcount(dentry, nd->seq))
481 */ 437 goto err_child;
482 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); 438 /*
483 BUG_ON(!parent->d_count); 439 * If the sequence check on the child dentry passed, then
484 parent->d_count++; 440 * the child has not been removed from its parent. This
485 spin_unlock(&dentry->d_lock); 441 * means the parent dentry must be valid and able to take
442 * a reference at this point.
443 */
444 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
445 BUG_ON(!parent->d_count);
446 parent->d_count++;
447 spin_unlock(&dentry->d_lock);
448 }
486 spin_unlock(&parent->d_lock); 449 spin_unlock(&parent->d_lock);
487 if (nd->root.mnt) { 450 if (want_root) {
488 path_get(&nd->root); 451 path_get(&nd->root);
489 spin_unlock(&fs->lock); 452 spin_unlock(&fs->lock);
490 } 453 }
@@ -494,106 +457,42 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
494 br_read_unlock(vfsmount_lock); 457 br_read_unlock(vfsmount_lock);
495 nd->flags &= ~LOOKUP_RCU; 458 nd->flags &= ~LOOKUP_RCU;
496 return 0; 459 return 0;
497err: 460
461err_child:
498 spin_unlock(&dentry->d_lock); 462 spin_unlock(&dentry->d_lock);
463err_parent:
499 spin_unlock(&parent->d_lock); 464 spin_unlock(&parent->d_lock);
500err_root: 465err_root:
501 if (nd->root.mnt) 466 if (want_root)
502 spin_unlock(&fs->lock); 467 spin_unlock(&fs->lock);
503 return -ECHILD; 468 return -ECHILD;
504} 469}
505 470
506/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
507static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
508{
509 if (nd->flags & LOOKUP_RCU)
510 return nameidata_dentry_drop_rcu(nd, dentry);
511 return 0;
512}
513
514/**
515 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
516 * @nd: nameidata pathwalk data to drop
517 * Returns: 0 on success, -ECHILD on failure
518 *
519 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
520 * nd->path should be the final element of the lookup, so nd->root is discarded.
521 * Must be called from rcu-walk context.
522 */
523static int nameidata_drop_rcu_last(struct nameidata *nd)
524{
525 struct dentry *dentry = nd->path.dentry;
526
527 BUG_ON(!(nd->flags & LOOKUP_RCU));
528 nd->flags &= ~LOOKUP_RCU;
529 nd->root.mnt = NULL;
530 spin_lock(&dentry->d_lock);
531 if (!__d_rcu_to_refcount(dentry, nd->seq))
532 goto err_unlock;
533 BUG_ON(nd->inode != dentry->d_inode);
534 spin_unlock(&dentry->d_lock);
535
536 mntget(nd->path.mnt);
537
538 rcu_read_unlock();
539 br_read_unlock(vfsmount_lock);
540
541 return 0;
542
543err_unlock:
544 spin_unlock(&dentry->d_lock);
545 rcu_read_unlock();
546 br_read_unlock(vfsmount_lock);
547 return -ECHILD;
548}
549
550/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
551static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
552{
553 if (likely(nd->flags & LOOKUP_RCU))
554 return nameidata_drop_rcu_last(nd);
555 return 0;
556}
557
558/** 471/**
559 * release_open_intent - free up open intent resources 472 * release_open_intent - free up open intent resources
560 * @nd: pointer to nameidata 473 * @nd: pointer to nameidata
561 */ 474 */
562void release_open_intent(struct nameidata *nd) 475void release_open_intent(struct nameidata *nd)
563{ 476{
564 if (nd->intent.open.file->f_path.dentry == NULL) 477 struct file *file = nd->intent.open.file;
565 put_filp(nd->intent.open.file);
566 else
567 fput(nd->intent.open.file);
568}
569 478
570/* 479 if (file && !IS_ERR(file)) {
571 * Call d_revalidate and handle filesystems that request rcu-walk 480 if (file->f_path.dentry == NULL)
572 * to be dropped. This may be called and return in rcu-walk mode, 481 put_filp(file);
573 * regardless of success or error. If -ECHILD is returned, the caller 482 else
574 * must return -ECHILD back up the path walk stack so path walk may 483 fput(file);
575 * be restarted in ref-walk mode.
576 */
577static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
578{
579 int status;
580
581 status = dentry->d_op->d_revalidate(dentry, nd);
582 if (status == -ECHILD) {
583 if (nameidata_dentry_drop_rcu(nd, dentry))
584 return status;
585 status = dentry->d_op->d_revalidate(dentry, nd);
586 } 484 }
485}
587 486
588 return status; 487static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
488{
489 return dentry->d_op->d_revalidate(dentry, nd);
589} 490}
590 491
591static inline struct dentry * 492static struct dentry *
592do_revalidate(struct dentry *dentry, struct nameidata *nd) 493do_revalidate(struct dentry *dentry, struct nameidata *nd)
593{ 494{
594 int status; 495 int status = d_revalidate(dentry, nd);
595
596 status = d_revalidate(dentry, nd);
597 if (unlikely(status <= 0)) { 496 if (unlikely(status <= 0)) {
598 /* 497 /*
599 * The dentry failed validation. 498 * The dentry failed validation.
@@ -602,74 +501,67 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
602 * to return a fail status. 501 * to return a fail status.
603 */ 502 */
604 if (status < 0) { 503 if (status < 0) {
605 /* If we're in rcu-walk, we don't have a ref */ 504 dput(dentry);
606 if (!(nd->flags & LOOKUP_RCU))
607 dput(dentry);
608 dentry = ERR_PTR(status); 505 dentry = ERR_PTR(status);
609 506 } else if (!d_invalidate(dentry)) {
610 } else { 507 dput(dentry);
611 /* Don't d_invalidate in rcu-walk mode */ 508 dentry = NULL;
612 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
613 return ERR_PTR(-ECHILD);
614 if (!d_invalidate(dentry)) {
615 dput(dentry);
616 dentry = NULL;
617 }
618 } 509 }
619 } 510 }
620 return dentry; 511 return dentry;
621} 512}
622 513
623static inline int need_reval_dot(struct dentry *dentry) 514/**
624{ 515 * complete_walk - successful completion of path walk
625 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 516 * @nd: pointer nameidata
626 return 0;
627
628 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
629 return 0;
630
631 return 1;
632}
633
634/*
635 * force_reval_path - force revalidation of a dentry
636 *
637 * In some situations the path walking code will trust dentries without
638 * revalidating them. This causes problems for filesystems that depend on
639 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
640 * (which indicates that it's possible for the dentry to go stale), force
641 * a d_revalidate call before proceeding.
642 * 517 *
643 * Returns 0 if the revalidation was successful. If the revalidation fails, 518 * If we had been in RCU mode, drop out of it and legitimize nd->path.
644 * either return the error returned by d_revalidate or -ESTALE if the 519 * Revalidate the final result, unless we'd already done that during
645 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to 520 * the path walk or the filesystem doesn't ask for it. Return 0 on
646 * invalidate the dentry. It's up to the caller to handle putting references 521 * success, -error on failure. In case of failure caller does not
647 * to the path if necessary. 522 * need to drop nd->path.
648 */ 523 */
649static int 524static int complete_walk(struct nameidata *nd)
650force_reval_path(struct path *path, struct nameidata *nd)
651{ 525{
526 struct dentry *dentry = nd->path.dentry;
652 int status; 527 int status;
653 struct dentry *dentry = path->dentry;
654 528
655 /* 529 if (nd->flags & LOOKUP_RCU) {
656 * only check on filesystems where it's possible for the dentry to 530 nd->flags &= ~LOOKUP_RCU;
657 * become stale. 531 if (!(nd->flags & LOOKUP_ROOT))
658 */ 532 nd->root.mnt = NULL;
659 if (!need_reval_dot(dentry)) 533 spin_lock(&dentry->d_lock);
534 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
535 spin_unlock(&dentry->d_lock);
536 rcu_read_unlock();
537 br_read_unlock(vfsmount_lock);
538 return -ECHILD;
539 }
540 BUG_ON(nd->inode != dentry->d_inode);
541 spin_unlock(&dentry->d_lock);
542 mntget(nd->path.mnt);
543 rcu_read_unlock();
544 br_read_unlock(vfsmount_lock);
545 }
546
547 if (likely(!(nd->flags & LOOKUP_JUMPED)))
548 return 0;
549
550 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
551 return 0;
552
553 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
660 return 0; 554 return 0;
661 555
556 /* Note: we do not d_invalidate() */
662 status = d_revalidate(dentry, nd); 557 status = d_revalidate(dentry, nd);
663 if (status > 0) 558 if (status > 0)
664 return 0; 559 return 0;
665 560
666 if (!status) { 561 if (!status)
667 /* Don't d_invalidate in rcu-walk mode */
668 if (nameidata_drop_rcu(nd))
669 return -ECHILD;
670 d_invalidate(dentry);
671 status = -ESTALE; 562 status = -ESTALE;
672 } 563
564 path_put(&nd->path);
673 return status; 565 return status;
674} 566}
675 567
@@ -685,6 +577,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
685static inline int exec_permission(struct inode *inode, unsigned int flags) 577static inline int exec_permission(struct inode *inode, unsigned int flags)
686{ 578{
687 int ret; 579 int ret;
580 struct user_namespace *ns = inode_userns(inode);
688 581
689 if (inode->i_op->permission) { 582 if (inode->i_op->permission) {
690 ret = inode->i_op->permission(inode, MAY_EXEC, flags); 583 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
@@ -697,7 +590,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags)
697 if (ret == -ECHILD) 590 if (ret == -ECHILD)
698 return ret; 591 return ret;
699 592
700 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 593 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
594 ns_capable(ns, CAP_DAC_READ_SEARCH))
701 goto ok; 595 goto ok;
702 596
703 return ret; 597 return ret;
@@ -722,6 +616,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
722 do { 616 do {
723 seq = read_seqcount_begin(&fs->seq); 617 seq = read_seqcount_begin(&fs->seq);
724 nd->root = fs->root; 618 nd->root = fs->root;
619 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
725 } while (read_seqcount_retry(&fs->seq, seq)); 620 } while (read_seqcount_retry(&fs->seq, seq));
726 } 621 }
727} 622}
@@ -738,6 +633,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
738 path_put(&nd->path); 633 path_put(&nd->path);
739 nd->path = nd->root; 634 nd->path = nd->root;
740 path_get(&nd->root); 635 path_get(&nd->root);
636 nd->flags |= LOOKUP_JUMPED;
741 } 637 }
742 nd->inode = nd->path.dentry->d_inode; 638 nd->inode = nd->path.dentry->d_inode;
743 639
@@ -767,18 +663,43 @@ static inline void path_to_nameidata(const struct path *path,
767 nd->path.dentry = path->dentry; 663 nd->path.dentry = path->dentry;
768} 664}
769 665
666static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
667{
668 struct inode *inode = link->dentry->d_inode;
669 if (!IS_ERR(cookie) && inode->i_op->put_link)
670 inode->i_op->put_link(link->dentry, nd, cookie);
671 path_put(link);
672}
673
770static __always_inline int 674static __always_inline int
771__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 675follow_link(struct path *link, struct nameidata *nd, void **p)
772{ 676{
773 int error; 677 int error;
774 struct dentry *dentry = link->dentry; 678 struct dentry *dentry = link->dentry;
775 679
776 touch_atime(link->mnt, dentry); 680 BUG_ON(nd->flags & LOOKUP_RCU);
777 nd_set_link(nd, NULL);
778 681
779 if (link->mnt == nd->path.mnt) 682 if (link->mnt == nd->path.mnt)
780 mntget(link->mnt); 683 mntget(link->mnt);
781 684
685 if (unlikely(current->total_link_count >= 40)) {
686 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
687 path_put(&nd->path);
688 return -ELOOP;
689 }
690 cond_resched();
691 current->total_link_count++;
692
693 touch_atime(link->mnt, dentry);
694 nd_set_link(nd, NULL);
695
696 error = security_inode_follow_link(link->dentry, nd);
697 if (error) {
698 *p = ERR_PTR(error); /* no ->put_link(), please */
699 path_put(&nd->path);
700 return error;
701 }
702
782 nd->last_type = LAST_BIND; 703 nd->last_type = LAST_BIND;
783 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 704 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
784 error = PTR_ERR(*p); 705 error = PTR_ERR(*p);
@@ -788,50 +709,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
788 if (s) 709 if (s)
789 error = __vfs_follow_link(nd, s); 710 error = __vfs_follow_link(nd, s);
790 else if (nd->last_type == LAST_BIND) { 711 else if (nd->last_type == LAST_BIND) {
791 error = force_reval_path(&nd->path, nd); 712 nd->flags |= LOOKUP_JUMPED;
792 if (error) 713 nd->inode = nd->path.dentry->d_inode;
714 if (nd->inode->i_op->follow_link) {
715 /* stepped on a _really_ weird one */
793 path_put(&nd->path); 716 path_put(&nd->path);
717 error = -ELOOP;
718 }
794 } 719 }
795 } 720 }
796 return error; 721 return error;
797} 722}
798 723
799/*
800 * This limits recursive symlink follows to 8, while
801 * limiting consecutive symlinks to 40.
802 *
803 * Without that kind of total limit, nasty chains of consecutive
804 * symlinks can cause almost arbitrarily long lookups.
805 */
806static inline int do_follow_link(struct path *path, struct nameidata *nd)
807{
808 void *cookie;
809 int err = -ELOOP;
810 if (current->link_count >= MAX_NESTED_LINKS)
811 goto loop;
812 if (current->total_link_count >= 40)
813 goto loop;
814 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
815 cond_resched();
816 err = security_inode_follow_link(path->dentry, nd);
817 if (err)
818 goto loop;
819 current->link_count++;
820 current->total_link_count++;
821 nd->depth++;
822 err = __do_follow_link(path, nd, &cookie);
823 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
824 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
825 path_put(path);
826 current->link_count--;
827 nd->depth--;
828 return err;
829loop:
830 path_put_conditional(path, nd);
831 path_put(&nd->path);
832 return err;
833}
834
835static int follow_up_rcu(struct path *path) 724static int follow_up_rcu(struct path *path)
836{ 725{
837 struct vfsmount *parent; 726 struct vfsmount *parent;
@@ -970,8 +859,7 @@ static int follow_managed(struct path *path, unsigned flags)
970 if (managed & DCACHE_MANAGE_TRANSIT) { 859 if (managed & DCACHE_MANAGE_TRANSIT) {
971 BUG_ON(!path->dentry->d_op); 860 BUG_ON(!path->dentry->d_op);
972 BUG_ON(!path->dentry->d_op->d_manage); 861 BUG_ON(!path->dentry->d_op->d_manage);
973 ret = path->dentry->d_op->d_manage(path->dentry, 862 ret = path->dentry->d_op->d_manage(path->dentry, false);
974 false, false);
975 if (ret < 0) 863 if (ret < 0)
976 return ret == -EISDIR ? 0 : ret; 864 return ret == -EISDIR ? 0 : ret;
977 } 865 }
@@ -1024,6 +912,12 @@ int follow_down_one(struct path *path)
1024 return 0; 912 return 0;
1025} 913}
1026 914
915static inline bool managed_dentry_might_block(struct dentry *dentry)
916{
917 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
918 dentry->d_op->d_manage(dentry, true) < 0);
919}
920
1027/* 921/*
1028 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 922 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
1029 * meet a managed dentry and we're not walking to "..". True is returned to 923 * meet a managed dentry and we're not walking to "..". True is returned to
@@ -1032,19 +926,26 @@ int follow_down_one(struct path *path)
1032static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 926static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1033 struct inode **inode, bool reverse_transit) 927 struct inode **inode, bool reverse_transit)
1034{ 928{
1035 while (d_mountpoint(path->dentry)) { 929 for (;;) {
1036 struct vfsmount *mounted; 930 struct vfsmount *mounted;
1037 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 931 /*
1038 !reverse_transit && 932 * Don't forget we might have a non-mountpoint managed dentry
1039 path->dentry->d_op->d_manage(path->dentry, false, true) < 0) 933 * that wants to block transit.
934 */
935 *inode = path->dentry->d_inode;
936 if (!reverse_transit &&
937 unlikely(managed_dentry_might_block(path->dentry)))
1040 return false; 938 return false;
939
940 if (!d_mountpoint(path->dentry))
941 break;
942
1041 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 943 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
1042 if (!mounted) 944 if (!mounted)
1043 break; 945 break;
1044 path->mnt = mounted; 946 path->mnt = mounted;
1045 path->dentry = mounted->mnt_root; 947 path->dentry = mounted->mnt_root;
1046 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 948 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
1047 *inode = path->dentry->d_inode;
1048 } 949 }
1049 950
1050 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 951 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
@@ -1070,7 +971,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1070 971
1071 seq = read_seqcount_begin(&parent->d_seq); 972 seq = read_seqcount_begin(&parent->d_seq);
1072 if (read_seqcount_retry(&old->d_seq, nd->seq)) 973 if (read_seqcount_retry(&old->d_seq, nd->seq))
1073 return -ECHILD; 974 goto failed;
1074 inode = parent->d_inode; 975 inode = parent->d_inode;
1075 nd->path.dentry = parent; 976 nd->path.dentry = parent;
1076 nd->seq = seq; 977 nd->seq = seq;
@@ -1083,8 +984,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1083 } 984 }
1084 __follow_mount_rcu(nd, &nd->path, &inode, true); 985 __follow_mount_rcu(nd, &nd->path, &inode, true);
1085 nd->inode = inode; 986 nd->inode = inode;
1086
1087 return 0; 987 return 0;
988
989failed:
990 nd->flags &= ~LOOKUP_RCU;
991 if (!(nd->flags & LOOKUP_ROOT))
992 nd->root.mnt = NULL;
993 rcu_read_unlock();
994 br_read_unlock(vfsmount_lock);
995 return -ECHILD;
1088} 996}
1089 997
1090/* 998/*
@@ -1095,7 +1003,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1095 * Care must be taken as namespace_sem may be held (indicated by mounting_here 1003 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1096 * being true). 1004 * being true).
1097 */ 1005 */
1098int follow_down(struct path *path, bool mounting_here) 1006int follow_down(struct path *path)
1099{ 1007{
1100 unsigned managed; 1008 unsigned managed;
1101 int ret; 1009 int ret;
@@ -1116,7 +1024,7 @@ int follow_down(struct path *path, bool mounting_here)
1116 BUG_ON(!path->dentry->d_op); 1024 BUG_ON(!path->dentry->d_op);
1117 BUG_ON(!path->dentry->d_op->d_manage); 1025 BUG_ON(!path->dentry->d_op->d_manage);
1118 ret = path->dentry->d_op->d_manage( 1026 ret = path->dentry->d_op->d_manage(
1119 path->dentry, mounting_here, false); 1027 path->dentry, false);
1120 if (ret < 0) 1028 if (ret < 0)
1121 return ret == -EISDIR ? 0 : ret; 1029 return ret == -EISDIR ? 0 : ret;
1122 } 1030 }
@@ -1218,57 +1126,80 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1218{ 1126{
1219 struct vfsmount *mnt = nd->path.mnt; 1127 struct vfsmount *mnt = nd->path.mnt;
1220 struct dentry *dentry, *parent = nd->path.dentry; 1128 struct dentry *dentry, *parent = nd->path.dentry;
1221 struct inode *dir; 1129 int need_reval = 1;
1130 int status = 1;
1222 int err; 1131 int err;
1223 1132
1224 /* 1133 /*
1225 * See if the low-level filesystem might want
1226 * to use its own hash..
1227 */
1228 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1229 err = parent->d_op->d_hash(parent, nd->inode, name);
1230 if (err < 0)
1231 return err;
1232 }
1233
1234 /*
1235 * Rename seqlock is not required here because in the off chance 1134 * Rename seqlock is not required here because in the off chance
1236 * of a false negative due to a concurrent rename, we're going to 1135 * of a false negative due to a concurrent rename, we're going to
1237 * do the non-racy lookup, below. 1136 * do the non-racy lookup, below.
1238 */ 1137 */
1239 if (nd->flags & LOOKUP_RCU) { 1138 if (nd->flags & LOOKUP_RCU) {
1240 unsigned seq; 1139 unsigned seq;
1241
1242 *inode = nd->inode; 1140 *inode = nd->inode;
1243 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1141 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1244 if (!dentry) { 1142 if (!dentry)
1245 if (nameidata_drop_rcu(nd)) 1143 goto unlazy;
1246 return -ECHILD; 1144
1247 goto need_lookup;
1248 }
1249 /* Memory barrier in read_seqcount_begin of child is enough */ 1145 /* Memory barrier in read_seqcount_begin of child is enough */
1250 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1146 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1251 return -ECHILD; 1147 return -ECHILD;
1252
1253 nd->seq = seq; 1148 nd->seq = seq;
1254 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1149
1255 goto need_revalidate; 1150 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1256done2: 1151 status = d_revalidate(dentry, nd);
1152 if (unlikely(status <= 0)) {
1153 if (status != -ECHILD)
1154 need_reval = 0;
1155 goto unlazy;
1156 }
1157 }
1257 path->mnt = mnt; 1158 path->mnt = mnt;
1258 path->dentry = dentry; 1159 path->dentry = dentry;
1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1160 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1260 return 0; 1161 return 0;
1261 if (nameidata_drop_rcu(nd)) 1162unlazy:
1163 if (unlazy_walk(nd, dentry))
1262 return -ECHILD; 1164 return -ECHILD;
1263 /* fallthru */ 1165 } else {
1166 dentry = __d_lookup(parent, name);
1264 } 1167 }
1265 dentry = __d_lookup(parent, name); 1168
1266 if (!dentry) 1169retry:
1267 goto need_lookup; 1170 if (unlikely(!dentry)) {
1268found: 1171 struct inode *dir = parent->d_inode;
1269 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1172 BUG_ON(nd->inode != dir);
1270 goto need_revalidate; 1173
1271done: 1174 mutex_lock(&dir->i_mutex);
1175 dentry = d_lookup(parent, name);
1176 if (likely(!dentry)) {
1177 dentry = d_alloc_and_lookup(parent, name, nd);
1178 if (IS_ERR(dentry)) {
1179 mutex_unlock(&dir->i_mutex);
1180 return PTR_ERR(dentry);
1181 }
1182 /* known good */
1183 need_reval = 0;
1184 status = 1;
1185 }
1186 mutex_unlock(&dir->i_mutex);
1187 }
1188 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1189 status = d_revalidate(dentry, nd);
1190 if (unlikely(status <= 0)) {
1191 if (status < 0) {
1192 dput(dentry);
1193 return status;
1194 }
1195 if (!d_invalidate(dentry)) {
1196 dput(dentry);
1197 dentry = NULL;
1198 need_reval = 1;
1199 goto retry;
1200 }
1201 }
1202
1272 path->mnt = mnt; 1203 path->mnt = mnt;
1273 path->dentry = dentry; 1204 path->dentry = dentry;
1274 err = follow_managed(path, nd->flags); 1205 err = follow_managed(path, nd->flags);
@@ -1278,49 +1209,117 @@ done:
1278 } 1209 }
1279 *inode = path->dentry->d_inode; 1210 *inode = path->dentry->d_inode;
1280 return 0; 1211 return 0;
1212}
1281 1213
1282need_lookup: 1214static inline int may_lookup(struct nameidata *nd)
1283 dir = parent->d_inode; 1215{
1284 BUG_ON(nd->inode != dir); 1216 if (nd->flags & LOOKUP_RCU) {
1217 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1218 if (err != -ECHILD)
1219 return err;
1220 if (unlazy_walk(nd, NULL))
1221 return -ECHILD;
1222 }
1223 return exec_permission(nd->inode, 0);
1224}
1285 1225
1286 mutex_lock(&dir->i_mutex); 1226static inline int handle_dots(struct nameidata *nd, int type)
1287 /* 1227{
1288 * First re-do the cached lookup just in case it was created 1228 if (type == LAST_DOTDOT) {
1289 * while we waited for the directory semaphore, or the first 1229 if (nd->flags & LOOKUP_RCU) {
1290 * lookup failed due to an unrelated rename. 1230 if (follow_dotdot_rcu(nd))
1291 * 1231 return -ECHILD;
1292 * This could use version numbering or similar to avoid unnecessary 1232 } else
1293 * cache lookups, but then we'd have to do the first lookup in the 1233 follow_dotdot(nd);
1294 * non-racy way. However in the common case here, everything should
1295 * be hot in cache, so would it be a big win?
1296 */
1297 dentry = d_lookup(parent, name);
1298 if (likely(!dentry)) {
1299 dentry = d_alloc_and_lookup(parent, name, nd);
1300 mutex_unlock(&dir->i_mutex);
1301 if (IS_ERR(dentry))
1302 goto fail;
1303 goto done;
1304 } 1234 }
1235 return 0;
1236}
1237
1238static void terminate_walk(struct nameidata *nd)
1239{
1240 if (!(nd->flags & LOOKUP_RCU)) {
1241 path_put(&nd->path);
1242 } else {
1243 nd->flags &= ~LOOKUP_RCU;
1244 if (!(nd->flags & LOOKUP_ROOT))
1245 nd->root.mnt = NULL;
1246 rcu_read_unlock();
1247 br_read_unlock(vfsmount_lock);
1248 }
1249}
1250
1251static inline int walk_component(struct nameidata *nd, struct path *path,
1252 struct qstr *name, int type, int follow)
1253{
1254 struct inode *inode;
1255 int err;
1305 /* 1256 /*
1306 * Uhhuh! Nasty case: the cache was re-populated while 1257 * "." and ".." are special - ".." especially so because it has
1307 * we waited on the semaphore. Need to revalidate. 1258 * to be able to know about the current root directory and
1259 * parent relationships.
1308 */ 1260 */
1309 mutex_unlock(&dir->i_mutex); 1261 if (unlikely(type != LAST_NORM))
1310 goto found; 1262 return handle_dots(nd, type);
1263 err = do_lookup(nd, name, path, &inode);
1264 if (unlikely(err)) {
1265 terminate_walk(nd);
1266 return err;
1267 }
1268 if (!inode) {
1269 path_to_nameidata(path, nd);
1270 terminate_walk(nd);
1271 return -ENOENT;
1272 }
1273 if (unlikely(inode->i_op->follow_link) && follow) {
1274 if (nd->flags & LOOKUP_RCU) {
1275 if (unlikely(unlazy_walk(nd, path->dentry))) {
1276 terminate_walk(nd);
1277 return -ECHILD;
1278 }
1279 }
1280 BUG_ON(inode != path->dentry->d_inode);
1281 return 1;
1282 }
1283 path_to_nameidata(path, nd);
1284 nd->inode = inode;
1285 return 0;
1286}
1311 1287
1312need_revalidate: 1288/*
1313 dentry = do_revalidate(dentry, nd); 1289 * This limits recursive symlink follows to 8, while
1314 if (!dentry) 1290 * limiting consecutive symlinks to 40.
1315 goto need_lookup; 1291 *
1316 if (IS_ERR(dentry)) 1292 * Without that kind of total limit, nasty chains of consecutive
1317 goto fail; 1293 * symlinks can cause almost arbitrarily long lookups.
1318 if (nd->flags & LOOKUP_RCU) 1294 */
1319 goto done2; 1295static inline int nested_symlink(struct path *path, struct nameidata *nd)
1320 goto done; 1296{
1297 int res;
1321 1298
1322fail: 1299 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1323 return PTR_ERR(dentry); 1300 path_put_conditional(path, nd);
1301 path_put(&nd->path);
1302 return -ELOOP;
1303 }
1304 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1305
1306 nd->depth++;
1307 current->link_count++;
1308
1309 do {
1310 struct path link = *path;
1311 void *cookie;
1312
1313 res = follow_link(&link, nd, &cookie);
1314 if (!res)
1315 res = walk_component(nd, path, &nd->last,
1316 nd->last_type, LOOKUP_FOLLOW);
1317 put_link(nd, &link, cookie);
1318 } while (res > 0);
1319
1320 current->link_count--;
1321 nd->depth--;
1322 return res;
1324} 1323}
1325 1324
1326/* 1325/*
@@ -1340,30 +1339,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1340 while (*name=='/') 1339 while (*name=='/')
1341 name++; 1340 name++;
1342 if (!*name) 1341 if (!*name)
1343 goto return_reval; 1342 return 0;
1344
1345 if (nd->depth)
1346 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1347 1343
1348 /* At this point we know we have a real path component. */ 1344 /* At this point we know we have a real path component. */
1349 for(;;) { 1345 for(;;) {
1350 struct inode *inode;
1351 unsigned long hash; 1346 unsigned long hash;
1352 struct qstr this; 1347 struct qstr this;
1353 unsigned int c; 1348 unsigned int c;
1349 int type;
1354 1350
1355 nd->flags |= LOOKUP_CONTINUE; 1351 nd->flags |= LOOKUP_CONTINUE;
1356 if (nd->flags & LOOKUP_RCU) { 1352
1357 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1353 err = may_lookup(nd);
1358 if (err == -ECHILD) {
1359 if (nameidata_drop_rcu(nd))
1360 return -ECHILD;
1361 goto exec_again;
1362 }
1363 } else {
1364exec_again:
1365 err = exec_permission(nd->inode, 0);
1366 }
1367 if (err) 1354 if (err)
1368 break; 1355 break;
1369 1356
@@ -1379,56 +1366,43 @@ exec_again:
1379 this.len = name - (const char *) this.name; 1366 this.len = name - (const char *) this.name;
1380 this.hash = end_name_hash(hash); 1367 this.hash = end_name_hash(hash);
1381 1368
1369 type = LAST_NORM;
1370 if (this.name[0] == '.') switch (this.len) {
1371 case 2:
1372 if (this.name[1] == '.') {
1373 type = LAST_DOTDOT;
1374 nd->flags |= LOOKUP_JUMPED;
1375 }
1376 break;
1377 case 1:
1378 type = LAST_DOT;
1379 }
1380 if (likely(type == LAST_NORM)) {
1381 struct dentry *parent = nd->path.dentry;
1382 nd->flags &= ~LOOKUP_JUMPED;
1383 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1384 err = parent->d_op->d_hash(parent, nd->inode,
1385 &this);
1386 if (err < 0)
1387 break;
1388 }
1389 }
1390
1382 /* remove trailing slashes? */ 1391 /* remove trailing slashes? */
1383 if (!c) 1392 if (!c)
1384 goto last_component; 1393 goto last_component;
1385 while (*++name == '/'); 1394 while (*++name == '/');
1386 if (!*name) 1395 if (!*name)
1387 goto last_with_slashes; 1396 goto last_component;
1388 1397
1389 /* 1398 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1390 * "." and ".." are special - ".." especially so because it has 1399 if (err < 0)
1391 * to be able to know about the current root directory and 1400 return err;
1392 * parent relationships.
1393 */
1394 if (this.name[0] == '.') switch (this.len) {
1395 default:
1396 break;
1397 case 2:
1398 if (this.name[1] != '.')
1399 break;
1400 if (nd->flags & LOOKUP_RCU) {
1401 if (follow_dotdot_rcu(nd))
1402 return -ECHILD;
1403 } else
1404 follow_dotdot(nd);
1405 /* fallthrough */
1406 case 1:
1407 continue;
1408 }
1409 /* This does the actual lookups.. */
1410 err = do_lookup(nd, &this, &next, &inode);
1411 if (err)
1412 break;
1413 err = -ENOENT;
1414 if (!inode)
1415 goto out_dput;
1416 1401
1417 if (inode->i_op->follow_link) { 1402 if (err) {
1418 /* We commonly drop rcu-walk here */ 1403 err = nested_symlink(&next, nd);
1419 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1420 return -ECHILD;
1421 BUG_ON(inode != next.dentry->d_inode);
1422 err = do_follow_link(&next, nd);
1423 if (err) 1404 if (err)
1424 goto return_err; 1405 return err;
1425 nd->inode = nd->path.dentry->d_inode;
1426 err = -ENOENT;
1427 if (!nd->inode)
1428 break;
1429 } else {
1430 path_to_nameidata(&next, nd);
1431 nd->inode = inode;
1432 } 1406 }
1433 err = -ENOTDIR; 1407 err = -ENOTDIR;
1434 if (!nd->inode->i_op->lookup) 1408 if (!nd->inode->i_op->lookup)
@@ -1436,209 +1410,109 @@ exec_again:
1436 continue; 1410 continue;
1437 /* here ends the main loop */ 1411 /* here ends the main loop */
1438 1412
1439last_with_slashes:
1440 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1441last_component: 1413last_component:
1442 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1414 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1443 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1415 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1444 if (lookup_flags & LOOKUP_PARENT)
1445 goto lookup_parent;
1446 if (this.name[0] == '.') switch (this.len) {
1447 default:
1448 break;
1449 case 2:
1450 if (this.name[1] != '.')
1451 break;
1452 if (nd->flags & LOOKUP_RCU) {
1453 if (follow_dotdot_rcu(nd))
1454 return -ECHILD;
1455 } else
1456 follow_dotdot(nd);
1457 /* fallthrough */
1458 case 1:
1459 goto return_reval;
1460 }
1461 err = do_lookup(nd, &this, &next, &inode);
1462 if (err)
1463 break;
1464 if (inode && unlikely(inode->i_op->follow_link) &&
1465 (lookup_flags & LOOKUP_FOLLOW)) {
1466 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1467 return -ECHILD;
1468 BUG_ON(inode != next.dentry->d_inode);
1469 err = do_follow_link(&next, nd);
1470 if (err)
1471 goto return_err;
1472 nd->inode = nd->path.dentry->d_inode;
1473 } else {
1474 path_to_nameidata(&next, nd);
1475 nd->inode = inode;
1476 }
1477 err = -ENOENT;
1478 if (!nd->inode)
1479 break;
1480 if (lookup_flags & LOOKUP_DIRECTORY) {
1481 err = -ENOTDIR;
1482 if (!nd->inode->i_op->lookup)
1483 break;
1484 }
1485 goto return_base;
1486lookup_parent:
1487 nd->last = this; 1416 nd->last = this;
1488 nd->last_type = LAST_NORM; 1417 nd->last_type = type;
1489 if (this.name[0] != '.')
1490 goto return_base;
1491 if (this.len == 1)
1492 nd->last_type = LAST_DOT;
1493 else if (this.len == 2 && this.name[1] == '.')
1494 nd->last_type = LAST_DOTDOT;
1495 else
1496 goto return_base;
1497return_reval:
1498 /*
1499 * We bypassed the ordinary revalidation routines.
1500 * We may need to check the cached dentry for staleness.
1501 */
1502 if (need_reval_dot(nd->path.dentry)) {
1503 /* Note: we do not d_invalidate() */
1504 err = d_revalidate(nd->path.dentry, nd);
1505 if (!err)
1506 err = -ESTALE;
1507 if (err < 0)
1508 break;
1509 }
1510return_base:
1511 if (nameidata_drop_rcu_last_maybe(nd))
1512 return -ECHILD;
1513 return 0; 1418 return 0;
1514out_dput:
1515 if (!(nd->flags & LOOKUP_RCU))
1516 path_put_conditional(&next, nd);
1517 break;
1518 } 1419 }
1519 if (!(nd->flags & LOOKUP_RCU)) 1420 terminate_walk(nd);
1520 path_put(&nd->path);
1521return_err:
1522 return err; 1421 return err;
1523} 1422}
1524 1423
1525static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1424static int path_init(int dfd, const char *name, unsigned int flags,
1526{ 1425 struct nameidata *nd, struct file **fp)
1527 current->total_link_count = 0;
1528
1529 return link_path_walk(name, nd);
1530}
1531
1532static inline int path_walk_simple(const char *name, struct nameidata *nd)
1533{
1534 current->total_link_count = 0;
1535
1536 return link_path_walk(name, nd);
1537}
1538
1539static int path_walk(const char *name, struct nameidata *nd)
1540{
1541 struct path save = nd->path;
1542 int result;
1543
1544 current->total_link_count = 0;
1545
1546 /* make sure the stuff we saved doesn't go away */
1547 path_get(&save);
1548
1549 result = link_path_walk(name, nd);
1550 if (result == -ESTALE) {
1551 /* nd->path had been dropped */
1552 current->total_link_count = 0;
1553 nd->path = save;
1554 path_get(&nd->path);
1555 nd->flags |= LOOKUP_REVAL;
1556 result = link_path_walk(name, nd);
1557 }
1558
1559 path_put(&save);
1560
1561 return result;
1562}
1563
1564static void path_finish_rcu(struct nameidata *nd)
1565{
1566 if (nd->flags & LOOKUP_RCU) {
1567 /* RCU dangling. Cancel it. */
1568 nd->flags &= ~LOOKUP_RCU;
1569 nd->root.mnt = NULL;
1570 rcu_read_unlock();
1571 br_read_unlock(vfsmount_lock);
1572 }
1573 if (nd->file)
1574 fput(nd->file);
1575}
1576
1577static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1578{ 1426{
1579 int retval = 0; 1427 int retval = 0;
1580 int fput_needed; 1428 int fput_needed;
1581 struct file *file; 1429 struct file *file;
1582 1430
1583 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1431 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1584 nd->flags = flags | LOOKUP_RCU; 1432 nd->flags = flags | LOOKUP_JUMPED;
1585 nd->depth = 0; 1433 nd->depth = 0;
1434 if (flags & LOOKUP_ROOT) {
1435 struct inode *inode = nd->root.dentry->d_inode;
1436 if (*name) {
1437 if (!inode->i_op->lookup)
1438 return -ENOTDIR;
1439 retval = inode_permission(inode, MAY_EXEC);
1440 if (retval)
1441 return retval;
1442 }
1443 nd->path = nd->root;
1444 nd->inode = inode;
1445 if (flags & LOOKUP_RCU) {
1446 br_read_lock(vfsmount_lock);
1447 rcu_read_lock();
1448 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1449 } else {
1450 path_get(&nd->path);
1451 }
1452 return 0;
1453 }
1454
1586 nd->root.mnt = NULL; 1455 nd->root.mnt = NULL;
1587 nd->file = NULL;
1588 1456
1589 if (*name=='/') { 1457 if (*name=='/') {
1590 struct fs_struct *fs = current->fs; 1458 if (flags & LOOKUP_RCU) {
1591 unsigned seq; 1459 br_read_lock(vfsmount_lock);
1592 1460 rcu_read_lock();
1593 br_read_lock(vfsmount_lock); 1461 set_root_rcu(nd);
1594 rcu_read_lock(); 1462 } else {
1595 1463 set_root(nd);
1596 do { 1464 path_get(&nd->root);
1597 seq = read_seqcount_begin(&fs->seq); 1465 }
1598 nd->root = fs->root; 1466 nd->path = nd->root;
1599 nd->path = nd->root;
1600 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1601 } while (read_seqcount_retry(&fs->seq, seq));
1602
1603 } else if (dfd == AT_FDCWD) { 1467 } else if (dfd == AT_FDCWD) {
1604 struct fs_struct *fs = current->fs; 1468 if (flags & LOOKUP_RCU) {
1605 unsigned seq; 1469 struct fs_struct *fs = current->fs;
1606 1470 unsigned seq;
1607 br_read_lock(vfsmount_lock);
1608 rcu_read_lock();
1609 1471
1610 do { 1472 br_read_lock(vfsmount_lock);
1611 seq = read_seqcount_begin(&fs->seq); 1473 rcu_read_lock();
1612 nd->path = fs->pwd;
1613 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1614 } while (read_seqcount_retry(&fs->seq, seq));
1615 1474
1475 do {
1476 seq = read_seqcount_begin(&fs->seq);
1477 nd->path = fs->pwd;
1478 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1479 } while (read_seqcount_retry(&fs->seq, seq));
1480 } else {
1481 get_fs_pwd(current->fs, &nd->path);
1482 }
1616 } else { 1483 } else {
1617 struct dentry *dentry; 1484 struct dentry *dentry;
1618 1485
1619 file = fget_light(dfd, &fput_needed); 1486 file = fget_raw_light(dfd, &fput_needed);
1620 retval = -EBADF; 1487 retval = -EBADF;
1621 if (!file) 1488 if (!file)
1622 goto out_fail; 1489 goto out_fail;
1623 1490
1624 dentry = file->f_path.dentry; 1491 dentry = file->f_path.dentry;
1625 1492
1626 retval = -ENOTDIR; 1493 if (*name) {
1627 if (!S_ISDIR(dentry->d_inode->i_mode)) 1494 retval = -ENOTDIR;
1628 goto fput_fail; 1495 if (!S_ISDIR(dentry->d_inode->i_mode))
1496 goto fput_fail;
1629 1497
1630 retval = file_permission(file, MAY_EXEC); 1498 retval = file_permission(file, MAY_EXEC);
1631 if (retval) 1499 if (retval)
1632 goto fput_fail; 1500 goto fput_fail;
1501 }
1633 1502
1634 nd->path = file->f_path; 1503 nd->path = file->f_path;
1635 if (fput_needed) 1504 if (flags & LOOKUP_RCU) {
1636 nd->file = file; 1505 if (fput_needed)
1637 1506 *fp = file;
1638 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1507 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1639 br_read_lock(vfsmount_lock); 1508 br_read_lock(vfsmount_lock);
1640 rcu_read_lock(); 1509 rcu_read_lock();
1510 } else {
1511 path_get(&file->f_path);
1512 fput_light(file, fput_needed);
1513 }
1641 } 1514 }
1515
1642 nd->inode = nd->path.dentry->d_inode; 1516 nd->inode = nd->path.dentry->d_inode;
1643 return 0; 1517 return 0;
1644 1518
@@ -1648,60 +1522,23 @@ out_fail:
1648 return retval; 1522 return retval;
1649} 1523}
1650 1524
1651static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1525static inline int lookup_last(struct nameidata *nd, struct path *path)
1652{ 1526{
1653 int retval = 0; 1527 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1654 int fput_needed; 1528 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1655 struct file *file;
1656
1657 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1658 nd->flags = flags;
1659 nd->depth = 0;
1660 nd->root.mnt = NULL;
1661
1662 if (*name=='/') {
1663 set_root(nd);
1664 nd->path = nd->root;
1665 path_get(&nd->root);
1666 } else if (dfd == AT_FDCWD) {
1667 get_fs_pwd(current->fs, &nd->path);
1668 } else {
1669 struct dentry *dentry;
1670
1671 file = fget_light(dfd, &fput_needed);
1672 retval = -EBADF;
1673 if (!file)
1674 goto out_fail;
1675
1676 dentry = file->f_path.dentry;
1677
1678 retval = -ENOTDIR;
1679 if (!S_ISDIR(dentry->d_inode->i_mode))
1680 goto fput_fail;
1681
1682 retval = file_permission(file, MAY_EXEC);
1683 if (retval)
1684 goto fput_fail;
1685
1686 nd->path = file->f_path;
1687 path_get(&file->f_path);
1688
1689 fput_light(file, fput_needed);
1690 }
1691 nd->inode = nd->path.dentry->d_inode;
1692 return 0;
1693 1529
1694fput_fail: 1530 nd->flags &= ~LOOKUP_PARENT;
1695 fput_light(file, fput_needed); 1531 return walk_component(nd, path, &nd->last, nd->last_type,
1696out_fail: 1532 nd->flags & LOOKUP_FOLLOW);
1697 return retval;
1698} 1533}
1699 1534
1700/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1535/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1701static int do_path_lookup(int dfd, const char *name, 1536static int path_lookupat(int dfd, const char *name,
1702 unsigned int flags, struct nameidata *nd) 1537 unsigned int flags, struct nameidata *nd)
1703{ 1538{
1704 int retval; 1539 struct file *base = NULL;
1540 struct path path;
1541 int err;
1705 1542
1706 /* 1543 /*
1707 * Path walking is largely split up into 2 different synchronisation 1544 * Path walking is largely split up into 2 different synchronisation
@@ -1717,44 +1554,68 @@ static int do_path_lookup(int dfd, const char *name,
1717 * be handled by restarting a traditional ref-walk (which will always 1554 * be handled by restarting a traditional ref-walk (which will always
1718 * be able to complete). 1555 * be able to complete).
1719 */ 1556 */
1720 retval = path_init_rcu(dfd, name, flags, nd); 1557 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1721 if (unlikely(retval)) 1558
1722 return retval; 1559 if (unlikely(err))
1723 retval = path_walk_rcu(name, nd); 1560 return err;
1724 path_finish_rcu(nd); 1561
1725 if (nd->root.mnt) { 1562 current->total_link_count = 0;
1726 path_put(&nd->root); 1563 err = link_path_walk(name, nd);
1727 nd->root.mnt = NULL; 1564
1565 if (!err && !(flags & LOOKUP_PARENT)) {
1566 err = lookup_last(nd, &path);
1567 while (err > 0) {
1568 void *cookie;
1569 struct path link = path;
1570 nd->flags |= LOOKUP_PARENT;
1571 err = follow_link(&link, nd, &cookie);
1572 if (!err)
1573 err = lookup_last(nd, &path);
1574 put_link(nd, &link, cookie);
1575 }
1728 } 1576 }
1729 1577
1730 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1578 if (!err)
1731 /* slower, locked walk */ 1579 err = complete_walk(nd);
1732 if (retval == -ESTALE) 1580
1733 flags |= LOOKUP_REVAL; 1581 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1734 retval = path_init(dfd, name, flags, nd); 1582 if (!nd->inode->i_op->lookup) {
1735 if (unlikely(retval)) 1583 path_put(&nd->path);
1736 return retval; 1584 err = -ENOTDIR;
1737 retval = path_walk(name, nd);
1738 if (nd->root.mnt) {
1739 path_put(&nd->root);
1740 nd->root.mnt = NULL;
1741 } 1585 }
1742 } 1586 }
1743 1587
1588 if (base)
1589 fput(base);
1590
1591 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1592 path_put(&nd->root);
1593 nd->root.mnt = NULL;
1594 }
1595 return err;
1596}
1597
1598static int do_path_lookup(int dfd, const char *name,
1599 unsigned int flags, struct nameidata *nd)
1600{
1601 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1602 if (unlikely(retval == -ECHILD))
1603 retval = path_lookupat(dfd, name, flags, nd);
1604 if (unlikely(retval == -ESTALE))
1605 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1606
1744 if (likely(!retval)) { 1607 if (likely(!retval)) {
1745 if (unlikely(!audit_dummy_context())) { 1608 if (unlikely(!audit_dummy_context())) {
1746 if (nd->path.dentry && nd->inode) 1609 if (nd->path.dentry && nd->inode)
1747 audit_inode(name, nd->path.dentry); 1610 audit_inode(name, nd->path.dentry);
1748 } 1611 }
1749 } 1612 }
1750
1751 return retval; 1613 return retval;
1752} 1614}
1753 1615
1754int path_lookup(const char *name, unsigned int flags, 1616int kern_path_parent(const char *name, struct nameidata *nd)
1755 struct nameidata *nd)
1756{ 1617{
1757 return do_path_lookup(AT_FDCWD, name, flags, nd); 1618 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1758} 1619}
1759 1620
1760int kern_path(const char *name, unsigned int flags, struct path *path) 1621int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1778,29 +1639,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1778 const char *name, unsigned int flags, 1639 const char *name, unsigned int flags,
1779 struct nameidata *nd) 1640 struct nameidata *nd)
1780{ 1641{
1781 int retval; 1642 nd->root.dentry = dentry;
1782 1643 nd->root.mnt = mnt;
1783 /* same as do_path_lookup */ 1644 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1784 nd->last_type = LAST_ROOT; 1645 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1785 nd->flags = flags;
1786 nd->depth = 0;
1787
1788 nd->path.dentry = dentry;
1789 nd->path.mnt = mnt;
1790 path_get(&nd->path);
1791 nd->root = nd->path;
1792 path_get(&nd->root);
1793 nd->inode = nd->path.dentry->d_inode;
1794
1795 retval = path_walk(name, nd);
1796 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1797 nd->inode))
1798 audit_inode(name, nd->path.dentry);
1799
1800 path_put(&nd->root);
1801 nd->root.mnt = NULL;
1802
1803 return retval;
1804} 1646}
1805 1647
1806static struct dentry *__lookup_hash(struct qstr *name, 1648static struct dentry *__lookup_hash(struct qstr *name,
@@ -1815,17 +1657,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1815 return ERR_PTR(err); 1657 return ERR_PTR(err);
1816 1658
1817 /* 1659 /*
1818 * See if the low-level filesystem might want
1819 * to use its own hash..
1820 */
1821 if (base->d_flags & DCACHE_OP_HASH) {
1822 err = base->d_op->d_hash(base, inode, name);
1823 dentry = ERR_PTR(err);
1824 if (err < 0)
1825 goto out;
1826 }
1827
1828 /*
1829 * Don't bother with __d_lookup: callers are for creat as 1660 * Don't bother with __d_lookup: callers are for creat as
1830 * well as unlink, so a lot of the time it would cost 1661 * well as unlink, so a lot of the time it would cost
1831 * a double lookup. 1662 * a double lookup.
@@ -1837,7 +1668,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1837 1668
1838 if (!dentry) 1669 if (!dentry)
1839 dentry = d_alloc_and_lookup(base, name, nd); 1670 dentry = d_alloc_and_lookup(base, name, nd);
1840out: 1671
1841 return dentry; 1672 return dentry;
1842} 1673}
1843 1674
@@ -1851,28 +1682,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1851 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1682 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1852} 1683}
1853 1684
1854static int __lookup_one_len(const char *name, struct qstr *this,
1855 struct dentry *base, int len)
1856{
1857 unsigned long hash;
1858 unsigned int c;
1859
1860 this->name = name;
1861 this->len = len;
1862 if (!len)
1863 return -EACCES;
1864
1865 hash = init_name_hash();
1866 while (len--) {
1867 c = *(const unsigned char *)name++;
1868 if (c == '/' || c == '\0')
1869 return -EACCES;
1870 hash = partial_name_hash(c, hash);
1871 }
1872 this->hash = end_name_hash(hash);
1873 return 0;
1874}
1875
1876/** 1685/**
1877 * lookup_one_len - filesystem helper to lookup single pathname component 1686 * lookup_one_len - filesystem helper to lookup single pathname component
1878 * @name: pathname component to lookup 1687 * @name: pathname component to lookup
@@ -1886,14 +1695,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1886 */ 1695 */
1887struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1696struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1888{ 1697{
1889 int err;
1890 struct qstr this; 1698 struct qstr this;
1699 unsigned long hash;
1700 unsigned int c;
1891 1701
1892 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1702 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1893 1703
1894 err = __lookup_one_len(name, &this, base, len); 1704 this.name = name;
1895 if (err) 1705 this.len = len;
1896 return ERR_PTR(err); 1706 if (!len)
1707 return ERR_PTR(-EACCES);
1708
1709 hash = init_name_hash();
1710 while (len--) {
1711 c = *(const unsigned char *)name++;
1712 if (c == '/' || c == '\0')
1713 return ERR_PTR(-EACCES);
1714 hash = partial_name_hash(c, hash);
1715 }
1716 this.hash = end_name_hash(hash);
1717 /*
1718 * See if the low-level filesystem might want
1719 * to use its own hash..
1720 */
1721 if (base->d_flags & DCACHE_OP_HASH) {
1722 int err = base->d_op->d_hash(base, base->d_inode, &this);
1723 if (err < 0)
1724 return ERR_PTR(err);
1725 }
1897 1726
1898 return __lookup_hash(&this, base, NULL); 1727 return __lookup_hash(&this, base, NULL);
1899} 1728}
@@ -1902,7 +1731,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1902 struct path *path) 1731 struct path *path)
1903{ 1732{
1904 struct nameidata nd; 1733 struct nameidata nd;
1905 char *tmp = getname(name); 1734 char *tmp = getname_flags(name, flags);
1906 int err = PTR_ERR(tmp); 1735 int err = PTR_ERR(tmp);
1907 if (!IS_ERR(tmp)) { 1736 if (!IS_ERR(tmp)) {
1908 1737
@@ -1944,11 +1773,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1944 1773
1945 if (!(dir->i_mode & S_ISVTX)) 1774 if (!(dir->i_mode & S_ISVTX))
1946 return 0; 1775 return 0;
1776 if (current_user_ns() != inode_userns(inode))
1777 goto other_userns;
1947 if (inode->i_uid == fsuid) 1778 if (inode->i_uid == fsuid)
1948 return 0; 1779 return 0;
1949 if (dir->i_uid == fsuid) 1780 if (dir->i_uid == fsuid)
1950 return 0; 1781 return 0;
1951 return !capable(CAP_FOWNER); 1782
1783other_userns:
1784 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1952} 1785}
1953 1786
1954/* 1787/*
@@ -2082,12 +1915,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2082 return error; 1915 return error;
2083} 1916}
2084 1917
2085int may_open(struct path *path, int acc_mode, int flag) 1918static int may_open(struct path *path, int acc_mode, int flag)
2086{ 1919{
2087 struct dentry *dentry = path->dentry; 1920 struct dentry *dentry = path->dentry;
2088 struct inode *inode = dentry->d_inode; 1921 struct inode *inode = dentry->d_inode;
2089 int error; 1922 int error;
2090 1923
1924 /* O_PATH? */
1925 if (!acc_mode)
1926 return 0;
1927
2091 if (!inode) 1928 if (!inode)
2092 return -ENOENT; 1929 return -ENOENT;
2093 1930
@@ -2124,7 +1961,7 @@ int may_open(struct path *path, int acc_mode, int flag)
2124 } 1961 }
2125 1962
2126 /* O_NOATIME can only be set by the owner or superuser */ 1963 /* O_NOATIME can only be set by the owner or superuser */
2127 if (flag & O_NOATIME && !is_owner_or_cap(inode)) 1964 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2128 return -EPERM; 1965 return -EPERM;
2129 1966
2130 /* 1967 /*
@@ -2156,34 +1993,6 @@ static int handle_truncate(struct file *filp)
2156} 1993}
2157 1994
2158/* 1995/*
2159 * Be careful about ever adding any more callers of this
2160 * function. Its flags must be in the namei format, not
2161 * what get passed to sys_open().
2162 */
2163static int __open_namei_create(struct nameidata *nd, struct path *path,
2164 int open_flag, int mode)
2165{
2166 int error;
2167 struct dentry *dir = nd->path.dentry;
2168
2169 if (!IS_POSIXACL(dir->d_inode))
2170 mode &= ~current_umask();
2171 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2172 if (error)
2173 goto out_unlock;
2174 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2175out_unlock:
2176 mutex_unlock(&dir->d_inode->i_mutex);
2177 dput(nd->path.dentry);
2178 nd->path.dentry = path->dentry;
2179
2180 if (error)
2181 return error;
2182 /* Don't check for write permission, don't truncate */
2183 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2184}
2185
2186/*
2187 * Note that while the flag value (low two bits) for sys_open means: 1996 * Note that while the flag value (low two bits) for sys_open means:
2188 * 00 - read-only 1997 * 00 - read-only
2189 * 01 - write-only 1998 * 01 - write-only
@@ -2207,128 +2016,107 @@ static inline int open_to_namei_flags(int flag)
2207 return flag; 2016 return flag;
2208} 2017}
2209 2018
2210static int open_will_truncate(int flag, struct inode *inode)
2211{
2212 /*
2213 * We'll never write to the fs underlying
2214 * a device file.
2215 */
2216 if (special_file(inode->i_mode))
2217 return 0;
2218 return (flag & O_TRUNC);
2219}
2220
2221static struct file *finish_open(struct nameidata *nd,
2222 int open_flag, int acc_mode)
2223{
2224 struct file *filp;
2225 int will_truncate;
2226 int error;
2227
2228 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2229 if (will_truncate) {
2230 error = mnt_want_write(nd->path.mnt);
2231 if (error)
2232 goto exit;
2233 }
2234 error = may_open(&nd->path, acc_mode, open_flag);
2235 if (error) {
2236 if (will_truncate)
2237 mnt_drop_write(nd->path.mnt);
2238 goto exit;
2239 }
2240 filp = nameidata_to_filp(nd);
2241 if (!IS_ERR(filp)) {
2242 error = ima_file_check(filp, acc_mode);
2243 if (error) {
2244 fput(filp);
2245 filp = ERR_PTR(error);
2246 }
2247 }
2248 if (!IS_ERR(filp)) {
2249 if (will_truncate) {
2250 error = handle_truncate(filp);
2251 if (error) {
2252 fput(filp);
2253 filp = ERR_PTR(error);
2254 }
2255 }
2256 }
2257 /*
2258 * It is now safe to drop the mnt write
2259 * because the filp has had a write taken
2260 * on its behalf.
2261 */
2262 if (will_truncate)
2263 mnt_drop_write(nd->path.mnt);
2264 path_put(&nd->path);
2265 return filp;
2266
2267exit:
2268 if (!IS_ERR(nd->intent.open.file))
2269 release_open_intent(nd);
2270 path_put(&nd->path);
2271 return ERR_PTR(error);
2272}
2273
2274/* 2019/*
2275 * Handle O_CREAT case for do_filp_open 2020 * Handle the last step of open()
2276 */ 2021 */
2277static struct file *do_last(struct nameidata *nd, struct path *path, 2022static struct file *do_last(struct nameidata *nd, struct path *path,
2278 int open_flag, int acc_mode, 2023 const struct open_flags *op, const char *pathname)
2279 int mode, const char *pathname)
2280{ 2024{
2281 struct dentry *dir = nd->path.dentry; 2025 struct dentry *dir = nd->path.dentry;
2026 struct dentry *dentry;
2027 int open_flag = op->open_flag;
2028 int will_truncate = open_flag & O_TRUNC;
2029 int want_write = 0;
2030 int acc_mode = op->acc_mode;
2282 struct file *filp; 2031 struct file *filp;
2283 int error = -EISDIR; 2032 int error;
2033
2034 nd->flags &= ~LOOKUP_PARENT;
2035 nd->flags |= op->intent;
2284 2036
2285 switch (nd->last_type) { 2037 switch (nd->last_type) {
2286 case LAST_DOTDOT: 2038 case LAST_DOTDOT:
2287 follow_dotdot(nd);
2288 dir = nd->path.dentry;
2289 case LAST_DOT: 2039 case LAST_DOT:
2290 if (need_reval_dot(dir)) { 2040 error = handle_dots(nd, nd->last_type);
2291 int status = d_revalidate(nd->path.dentry, nd); 2041 if (error)
2292 if (!status) 2042 return ERR_PTR(error);
2293 status = -ESTALE;
2294 if (status < 0) {
2295 error = status;
2296 goto exit;
2297 }
2298 }
2299 /* fallthrough */ 2043 /* fallthrough */
2300 case LAST_ROOT: 2044 case LAST_ROOT:
2301 goto exit; 2045 error = complete_walk(nd);
2046 if (error)
2047 return ERR_PTR(error);
2048 audit_inode(pathname, nd->path.dentry);
2049 if (open_flag & O_CREAT) {
2050 error = -EISDIR;
2051 goto exit;
2052 }
2053 goto ok;
2302 case LAST_BIND: 2054 case LAST_BIND:
2055 error = complete_walk(nd);
2056 if (error)
2057 return ERR_PTR(error);
2303 audit_inode(pathname, dir); 2058 audit_inode(pathname, dir);
2304 goto ok; 2059 goto ok;
2305 } 2060 }
2306 2061
2062 if (!(open_flag & O_CREAT)) {
2063 int symlink_ok = 0;
2064 if (nd->last.name[nd->last.len])
2065 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2066 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2067 symlink_ok = 1;
2068 /* we _can_ be in RCU mode here */
2069 error = walk_component(nd, path, &nd->last, LAST_NORM,
2070 !symlink_ok);
2071 if (error < 0)
2072 return ERR_PTR(error);
2073 if (error) /* symlink */
2074 return NULL;
2075 /* sayonara */
2076 error = complete_walk(nd);
2077 if (error)
2078 return ERR_PTR(-ECHILD);
2079
2080 error = -ENOTDIR;
2081 if (nd->flags & LOOKUP_DIRECTORY) {
2082 if (!nd->inode->i_op->lookup)
2083 goto exit;
2084 }
2085 audit_inode(pathname, nd->path.dentry);
2086 goto ok;
2087 }
2088
2089 /* create side of things */
2090 error = complete_walk(nd);
2091 if (error)
2092 return ERR_PTR(error);
2093
2094 audit_inode(pathname, dir);
2095 error = -EISDIR;
2307 /* trailing slashes? */ 2096 /* trailing slashes? */
2308 if (nd->last.name[nd->last.len]) 2097 if (nd->last.name[nd->last.len])
2309 goto exit; 2098 goto exit;
2310 2099
2311 mutex_lock(&dir->d_inode->i_mutex); 2100 mutex_lock(&dir->d_inode->i_mutex);
2312 2101
2313 path->dentry = lookup_hash(nd); 2102 dentry = lookup_hash(nd);
2314 path->mnt = nd->path.mnt; 2103 error = PTR_ERR(dentry);
2315 2104 if (IS_ERR(dentry)) {
2316 error = PTR_ERR(path->dentry);
2317 if (IS_ERR(path->dentry)) {
2318 mutex_unlock(&dir->d_inode->i_mutex); 2105 mutex_unlock(&dir->d_inode->i_mutex);
2319 goto exit; 2106 goto exit;
2320 } 2107 }
2321 2108
2322 if (IS_ERR(nd->intent.open.file)) { 2109 path->dentry = dentry;
2323 error = PTR_ERR(nd->intent.open.file); 2110 path->mnt = nd->path.mnt;
2324 goto exit_mutex_unlock;
2325 }
2326 2111
2327 /* Negative dentry, just create the file */ 2112 /* Negative dentry, just create the file */
2328 if (!path->dentry->d_inode) { 2113 if (!dentry->d_inode) {
2114 int mode = op->mode;
2115 if (!IS_POSIXACL(dir->d_inode))
2116 mode &= ~current_umask();
2329 /* 2117 /*
2330 * This write is needed to ensure that a 2118 * This write is needed to ensure that a
2331 * ro->rw transition does not occur between 2119 * rw->ro transition does not occur between
2332 * the time when the file is created and when 2120 * the time when the file is created and when
2333 * a permanent write count is taken through 2121 * a permanent write count is taken through
2334 * the 'struct file' in nameidata_to_filp(). 2122 * the 'struct file' in nameidata_to_filp().
@@ -2336,22 +2124,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2336 error = mnt_want_write(nd->path.mnt); 2124 error = mnt_want_write(nd->path.mnt);
2337 if (error) 2125 if (error)
2338 goto exit_mutex_unlock; 2126 goto exit_mutex_unlock;
2339 error = __open_namei_create(nd, path, open_flag, mode); 2127 want_write = 1;
2340 if (error) { 2128 /* Don't check for write permission, don't truncate */
2341 mnt_drop_write(nd->path.mnt); 2129 open_flag &= ~O_TRUNC;
2342 goto exit; 2130 will_truncate = 0;
2343 } 2131 acc_mode = MAY_OPEN;
2344 filp = nameidata_to_filp(nd); 2132 error = security_path_mknod(&nd->path, dentry, mode, 0);
2345 mnt_drop_write(nd->path.mnt); 2133 if (error)
2346 path_put(&nd->path); 2134 goto exit_mutex_unlock;
2347 if (!IS_ERR(filp)) { 2135 error = vfs_create(dir->d_inode, dentry, mode, nd);
2348 error = ima_file_check(filp, acc_mode); 2136 if (error)
2349 if (error) { 2137 goto exit_mutex_unlock;
2350 fput(filp); 2138 mutex_unlock(&dir->d_inode->i_mutex);
2351 filp = ERR_PTR(error); 2139 dput(nd->path.dentry);
2352 } 2140 nd->path.dentry = dentry;
2353 } 2141 goto common;
2354 return filp;
2355 } 2142 }
2356 2143
2357 /* 2144 /*
@@ -2381,7 +2168,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2381 if (S_ISDIR(nd->inode->i_mode)) 2168 if (S_ISDIR(nd->inode->i_mode))
2382 goto exit; 2169 goto exit;
2383ok: 2170ok:
2384 filp = finish_open(nd, open_flag, acc_mode); 2171 if (!S_ISREG(nd->inode->i_mode))
2172 will_truncate = 0;
2173
2174 if (will_truncate) {
2175 error = mnt_want_write(nd->path.mnt);
2176 if (error)
2177 goto exit;
2178 want_write = 1;
2179 }
2180common:
2181 error = may_open(&nd->path, acc_mode, open_flag);
2182 if (error)
2183 goto exit;
2184 filp = nameidata_to_filp(nd);
2185 if (!IS_ERR(filp)) {
2186 error = ima_file_check(filp, op->acc_mode);
2187 if (error) {
2188 fput(filp);
2189 filp = ERR_PTR(error);
2190 }
2191 }
2192 if (!IS_ERR(filp)) {
2193 if (will_truncate) {
2194 error = handle_truncate(filp);
2195 if (error) {
2196 fput(filp);
2197 filp = ERR_PTR(error);
2198 }
2199 }
2200 }
2201out:
2202 if (want_write)
2203 mnt_drop_write(nd->path.mnt);
2204 path_put(&nd->path);
2385 return filp; 2205 return filp;
2386 2206
2387exit_mutex_unlock: 2207exit_mutex_unlock:
@@ -2389,199 +2209,103 @@ exit_mutex_unlock:
2389exit_dput: 2209exit_dput:
2390 path_put_conditional(path, nd); 2210 path_put_conditional(path, nd);
2391exit: 2211exit:
2392 if (!IS_ERR(nd->intent.open.file)) 2212 filp = ERR_PTR(error);
2393 release_open_intent(nd); 2213 goto out;
2394 path_put(&nd->path);
2395 return ERR_PTR(error);
2396} 2214}
2397 2215
2398/* 2216static struct file *path_openat(int dfd, const char *pathname,
2399 * Note that the low bits of the passed in "open_flag" 2217 struct nameidata *nd, const struct open_flags *op, int flags)
2400 * are not the same as in the local variable "flag". See
2401 * open_to_namei_flags() for more details.
2402 */
2403struct file *do_filp_open(int dfd, const char *pathname,
2404 int open_flag, int mode, int acc_mode)
2405{ 2218{
2219 struct file *base = NULL;
2406 struct file *filp; 2220 struct file *filp;
2407 struct nameidata nd;
2408 int error;
2409 struct path path; 2221 struct path path;
2410 int count = 0; 2222 int error;
2411 int flag = open_to_namei_flags(open_flag);
2412 int flags;
2413
2414 if (!(open_flag & O_CREAT))
2415 mode = 0;
2416
2417 /* Must never be set by userspace */
2418 open_flag &= ~FMODE_NONOTIFY;
2419
2420 /*
2421 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2422 * check for O_DSYNC if the need any syncing at all we enforce it's
2423 * always set instead of having to deal with possibly weird behaviour
2424 * for malicious applications setting only __O_SYNC.
2425 */
2426 if (open_flag & __O_SYNC)
2427 open_flag |= O_DSYNC;
2428
2429 if (!acc_mode)
2430 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2431
2432 /* O_TRUNC implies we need access checks for write permissions */
2433 if (open_flag & O_TRUNC)
2434 acc_mode |= MAY_WRITE;
2435
2436 /* Allow the LSM permission hook to distinguish append
2437 access from general write access. */
2438 if (open_flag & O_APPEND)
2439 acc_mode |= MAY_APPEND;
2440
2441 flags = LOOKUP_OPEN;
2442 if (open_flag & O_CREAT) {
2443 flags |= LOOKUP_CREATE;
2444 if (open_flag & O_EXCL)
2445 flags |= LOOKUP_EXCL;
2446 }
2447 if (open_flag & O_DIRECTORY)
2448 flags |= LOOKUP_DIRECTORY;
2449 if (!(open_flag & O_NOFOLLOW))
2450 flags |= LOOKUP_FOLLOW;
2451 2223
2452 filp = get_empty_filp(); 2224 filp = get_empty_filp();
2453 if (!filp) 2225 if (!filp)
2454 return ERR_PTR(-ENFILE); 2226 return ERR_PTR(-ENFILE);
2455 2227
2456 filp->f_flags = open_flag; 2228 filp->f_flags = op->open_flag;
2457 nd.intent.open.file = filp; 2229 nd->intent.open.file = filp;
2458 nd.intent.open.flags = flag; 2230 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2459 nd.intent.open.create_mode = mode; 2231 nd->intent.open.create_mode = op->mode;
2460 2232
2461 if (open_flag & O_CREAT) 2233 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2462 goto creat;
2463
2464 /* !O_CREAT, simple open */
2465 error = do_path_lookup(dfd, pathname, flags, &nd);
2466 if (unlikely(error)) 2234 if (unlikely(error))
2467 goto out_filp; 2235 goto out_filp;
2468 error = -ELOOP;
2469 if (!(nd.flags & LOOKUP_FOLLOW)) {
2470 if (nd.inode->i_op->follow_link)
2471 goto out_path;
2472 }
2473 error = -ENOTDIR;
2474 if (nd.flags & LOOKUP_DIRECTORY) {
2475 if (!nd.inode->i_op->lookup)
2476 goto out_path;
2477 }
2478 audit_inode(pathname, nd.path.dentry);
2479 filp = finish_open(&nd, open_flag, acc_mode);
2480 return filp;
2481 2236
2482creat: 2237 current->total_link_count = 0;
2483 /* OK, have to create the file. Find the parent. */ 2238 error = link_path_walk(pathname, nd);
2484 error = path_init_rcu(dfd, pathname,
2485 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2486 if (error)
2487 goto out_filp;
2488 error = path_walk_rcu(pathname, &nd);
2489 path_finish_rcu(&nd);
2490 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2491 /* slower, locked walk */
2492 if (error == -ESTALE) {
2493reval:
2494 flags |= LOOKUP_REVAL;
2495 }
2496 error = path_init(dfd, pathname,
2497 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2498 if (error)
2499 goto out_filp;
2500
2501 error = path_walk_simple(pathname, &nd);
2502 }
2503 if (unlikely(error)) 2239 if (unlikely(error))
2504 goto out_filp; 2240 goto out_filp;
2505 if (unlikely(!audit_dummy_context()))
2506 audit_inode(pathname, nd.path.dentry);
2507 2241
2508 /* 2242 filp = do_last(nd, &path, op, pathname);
2509 * We have the parent and last component.
2510 */
2511 nd.flags = flags;
2512 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2513 while (unlikely(!filp)) { /* trailing symlink */ 2243 while (unlikely(!filp)) { /* trailing symlink */
2514 struct path link = path; 2244 struct path link = path;
2515 struct inode *linki = link.dentry->d_inode;
2516 void *cookie; 2245 void *cookie;
2517 error = -ELOOP; 2246 if (!(nd->flags & LOOKUP_FOLLOW)) {
2518 if (!(nd.flags & LOOKUP_FOLLOW)) 2247 path_put_conditional(&path, nd);
2519 goto exit_dput; 2248 path_put(&nd->path);
2520 if (count++ == 32) 2249 filp = ERR_PTR(-ELOOP);
2521 goto exit_dput; 2250 break;
2522 /*
2523 * This is subtle. Instead of calling do_follow_link() we do
2524 * the thing by hands. The reason is that this way we have zero
2525 * link_count and path_walk() (called from ->follow_link)
2526 * honoring LOOKUP_PARENT. After that we have the parent and
2527 * last component, i.e. we are in the same situation as after
2528 * the first path_walk(). Well, almost - if the last component
2529 * is normal we get its copy stored in nd->last.name and we will
2530 * have to putname() it when we are done. Procfs-like symlinks
2531 * just set LAST_BIND.
2532 */
2533 nd.flags |= LOOKUP_PARENT;
2534 error = security_inode_follow_link(link.dentry, &nd);
2535 if (error)
2536 goto exit_dput;
2537 error = __do_follow_link(&link, &nd, &cookie);
2538 if (unlikely(error)) {
2539 if (!IS_ERR(cookie) && linki->i_op->put_link)
2540 linki->i_op->put_link(link.dentry, &nd, cookie);
2541 /* nd.path had been dropped */
2542 nd.path = link;
2543 goto out_path;
2544 } 2251 }
2545 nd.flags &= ~LOOKUP_PARENT; 2252 nd->flags |= LOOKUP_PARENT;
2546 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2253 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2547 if (linki->i_op->put_link) 2254 error = follow_link(&link, nd, &cookie);
2548 linki->i_op->put_link(link.dentry, &nd, cookie); 2255 if (unlikely(error))
2549 path_put(&link); 2256 filp = ERR_PTR(error);
2257 else
2258 filp = do_last(nd, &path, op, pathname);
2259 put_link(nd, &link, cookie);
2550 } 2260 }
2551out: 2261out:
2552 if (nd.root.mnt) 2262 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2553 path_put(&nd.root); 2263 path_put(&nd->root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2264 if (base)
2555 goto reval; 2265 fput(base);
2266 release_open_intent(nd);
2556 return filp; 2267 return filp;
2557 2268
2558exit_dput:
2559 path_put_conditional(&path, &nd);
2560out_path:
2561 path_put(&nd.path);
2562out_filp: 2269out_filp:
2563 if (!IS_ERR(nd.intent.open.file))
2564 release_open_intent(&nd);
2565 filp = ERR_PTR(error); 2270 filp = ERR_PTR(error);
2566 goto out; 2271 goto out;
2567} 2272}
2568 2273
2569/** 2274struct file *do_filp_open(int dfd, const char *pathname,
2570 * filp_open - open file and return file pointer 2275 const struct open_flags *op, int flags)
2571 *
2572 * @filename: path to open
2573 * @flags: open flags as per the open(2) second argument
2574 * @mode: mode for the new file if O_CREAT is set, else ignored
2575 *
2576 * This is the helper to open a file from kernelspace if you really
2577 * have to. But in generally you should not do this, so please move
2578 * along, nothing to see here..
2579 */
2580struct file *filp_open(const char *filename, int flags, int mode)
2581{ 2276{
2582 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2277 struct nameidata nd;
2278 struct file *filp;
2279
2280 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2281 if (unlikely(filp == ERR_PTR(-ECHILD)))
2282 filp = path_openat(dfd, pathname, &nd, op, flags);
2283 if (unlikely(filp == ERR_PTR(-ESTALE)))
2284 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2285 return filp;
2286}
2287
2288struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2289 const char *name, const struct open_flags *op, int flags)
2290{
2291 struct nameidata nd;
2292 struct file *file;
2293
2294 nd.root.mnt = mnt;
2295 nd.root.dentry = dentry;
2296
2297 flags |= LOOKUP_ROOT;
2298
2299 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2300 return ERR_PTR(-ELOOP);
2301
2302 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2303 if (unlikely(file == ERR_PTR(-ECHILD)))
2304 file = path_openat(-1, name, &nd, op, flags);
2305 if (unlikely(file == ERR_PTR(-ESTALE)))
2306 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2307 return file;
2583} 2308}
2584EXPORT_SYMBOL(filp_open);
2585 2309
2586/** 2310/**
2587 * lookup_create - lookup a dentry, creating it if it doesn't exist 2311 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -2643,7 +2367,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2643 if (error) 2367 if (error)
2644 return error; 2368 return error;
2645 2369
2646 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2370 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2371 !ns_capable(inode_userns(dir), CAP_MKNOD))
2647 return -EPERM; 2372 return -EPERM;
2648 2373
2649 if (!dir->i_op->mknod) 2374 if (!dir->i_op->mknod)
@@ -2804,10 +2529,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
2804} 2529}
2805 2530
2806/* 2531/*
2807 * We try to drop the dentry early: we should have 2532 * The dentry_unhash() helper will try to drop the dentry early: we
2808 * a usage count of 2 if we're the only user of this 2533 * should have a usage count of 2 if we're the only user of this
2809 * dentry, and if that is true (possibly after pruning 2534 * dentry, and if that is true (possibly after pruning the dcache),
2810 * the dcache), then we drop the dentry now. 2535 * then we drop the dentry now.
2811 * 2536 *
2812 * A low-level filesystem can, if it choses, legally 2537 * A low-level filesystem can, if it choses, legally
2813 * do a 2538 * do a
@@ -2820,10 +2545,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
2820 */ 2545 */
2821void dentry_unhash(struct dentry *dentry) 2546void dentry_unhash(struct dentry *dentry)
2822{ 2547{
2823 dget(dentry);
2824 shrink_dcache_parent(dentry); 2548 shrink_dcache_parent(dentry);
2825 spin_lock(&dentry->d_lock); 2549 spin_lock(&dentry->d_lock);
2826 if (dentry->d_count == 2) 2550 if (dentry->d_count == 1)
2827 __d_drop(dentry); 2551 __d_drop(dentry);
2828 spin_unlock(&dentry->d_lock); 2552 spin_unlock(&dentry->d_lock);
2829} 2553}
@@ -2839,25 +2563,26 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2839 return -EPERM; 2563 return -EPERM;
2840 2564
2841 mutex_lock(&dentry->d_inode->i_mutex); 2565 mutex_lock(&dentry->d_inode->i_mutex);
2842 dentry_unhash(dentry); 2566
2567 error = -EBUSY;
2843 if (d_mountpoint(dentry)) 2568 if (d_mountpoint(dentry))
2844 error = -EBUSY; 2569 goto out;
2845 else { 2570
2846 error = security_inode_rmdir(dir, dentry); 2571 error = security_inode_rmdir(dir, dentry);
2847 if (!error) { 2572 if (error)
2848 error = dir->i_op->rmdir(dir, dentry); 2573 goto out;
2849 if (!error) { 2574
2850 dentry->d_inode->i_flags |= S_DEAD; 2575 error = dir->i_op->rmdir(dir, dentry);
2851 dont_mount(dentry); 2576 if (error)
2852 } 2577 goto out;
2853 } 2578
2854 } 2579 dentry->d_inode->i_flags |= S_DEAD;
2580 dont_mount(dentry);
2581
2582out:
2855 mutex_unlock(&dentry->d_inode->i_mutex); 2583 mutex_unlock(&dentry->d_inode->i_mutex);
2856 if (!error) { 2584 if (!error)
2857 d_delete(dentry); 2585 d_delete(dentry);
2858 }
2859 dput(dentry);
2860
2861 return error; 2586 return error;
2862} 2587}
2863 2588
@@ -3120,7 +2845,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3120 return error; 2845 return error;
3121 2846
3122 mutex_lock(&inode->i_mutex); 2847 mutex_lock(&inode->i_mutex);
3123 error = dir->i_op->link(old_dentry, dir, new_dentry); 2848 /* Make sure we don't allow creating hardlink to an unlinked file */
2849 if (inode->i_nlink == 0)
2850 error = -ENOENT;
2851 else
2852 error = dir->i_op->link(old_dentry, dir, new_dentry);
3124 mutex_unlock(&inode->i_mutex); 2853 mutex_unlock(&inode->i_mutex);
3125 if (!error) 2854 if (!error)
3126 fsnotify_link(dir, inode, new_dentry); 2855 fsnotify_link(dir, inode, new_dentry);
@@ -3142,15 +2871,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3142 struct dentry *new_dentry; 2871 struct dentry *new_dentry;
3143 struct nameidata nd; 2872 struct nameidata nd;
3144 struct path old_path; 2873 struct path old_path;
2874 int how = 0;
3145 int error; 2875 int error;
3146 char *to; 2876 char *to;
3147 2877
3148 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2878 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3149 return -EINVAL; 2879 return -EINVAL;
2880 /*
2881 * To use null names we require CAP_DAC_READ_SEARCH
2882 * This ensures that not everyone will be able to create
2883 * handlink using the passed filedescriptor.
2884 */
2885 if (flags & AT_EMPTY_PATH) {
2886 if (!capable(CAP_DAC_READ_SEARCH))
2887 return -ENOENT;
2888 how = LOOKUP_EMPTY;
2889 }
2890
2891 if (flags & AT_SYMLINK_FOLLOW)
2892 how |= LOOKUP_FOLLOW;
3150 2893
3151 error = user_path_at(olddfd, oldname, 2894 error = user_path_at(olddfd, oldname, how, &old_path);
3152 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3153 &old_path);
3154 if (error) 2895 if (error)
3155 return error; 2896 return error;
3156 2897
@@ -3212,12 +2953,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
3212 * HOWEVER, it relies on the assumption that any object with ->lookup() 2953 * HOWEVER, it relies on the assumption that any object with ->lookup()
3213 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2954 * has no more than 1 dentry. If "hybrid" objects will ever appear,
3214 * we'd better make sure that there's no link(2) for them. 2955 * we'd better make sure that there's no link(2) for them.
3215 * d) some filesystems don't support opened-but-unlinked directories, 2956 * d) conversion from fhandle to dentry may come in the wrong moment - when
3216 * either because of layout or because they are not ready to deal with
3217 * all cases correctly. The latter will be fixed (taking this sort of
3218 * stuff into VFS), but the former is not going away. Solution: the same
3219 * trick as in rmdir().
3220 * e) conversion from fhandle to dentry may come in the wrong moment - when
3221 * we are removing the target. Solution: we will have to grab ->i_mutex 2957 * we are removing the target. Solution: we will have to grab ->i_mutex
3222 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2958 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
3223 * ->i_mutex on parents, which works but leads to some truly excessive 2959 * ->i_mutex on parents, which works but leads to some truly excessive
@@ -3227,7 +2963,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3227 struct inode *new_dir, struct dentry *new_dentry) 2963 struct inode *new_dir, struct dentry *new_dentry)
3228{ 2964{
3229 int error = 0; 2965 int error = 0;
3230 struct inode *target; 2966 struct inode *target = new_dentry->d_inode;
3231 2967
3232 /* 2968 /*
3233 * If we are going to change the parent - check write permissions, 2969 * If we are going to change the parent - check write permissions,
@@ -3243,26 +2979,24 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3243 if (error) 2979 if (error)
3244 return error; 2980 return error;
3245 2981
3246 target = new_dentry->d_inode;
3247 if (target) 2982 if (target)
3248 mutex_lock(&target->i_mutex); 2983 mutex_lock(&target->i_mutex);
3249 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2984
3250 error = -EBUSY; 2985 error = -EBUSY;
3251 else { 2986 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3252 if (target) 2987 goto out;
3253 dentry_unhash(new_dentry); 2988
3254 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2989 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3255 } 2990 if (error)
2991 goto out;
2992
3256 if (target) { 2993 if (target) {
3257 if (!error) { 2994 target->i_flags |= S_DEAD;
3258 target->i_flags |= S_DEAD; 2995 dont_mount(new_dentry);
3259 dont_mount(new_dentry);
3260 }
3261 mutex_unlock(&target->i_mutex);
3262 if (d_unhashed(new_dentry))
3263 d_rehash(new_dentry);
3264 dput(new_dentry);
3265 } 2996 }
2997out:
2998 if (target)
2999 mutex_unlock(&target->i_mutex);
3266 if (!error) 3000 if (!error)
3267 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3001 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3268 d_move(old_dentry,new_dentry); 3002 d_move(old_dentry,new_dentry);
@@ -3272,7 +3006,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3272static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 3006static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
3273 struct inode *new_dir, struct dentry *new_dentry) 3007 struct inode *new_dir, struct dentry *new_dentry)
3274{ 3008{
3275 struct inode *target; 3009 struct inode *target = new_dentry->d_inode;
3276 int error; 3010 int error;
3277 3011
3278 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 3012 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3280,19 +3014,22 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
3280 return error; 3014 return error;
3281 3015
3282 dget(new_dentry); 3016 dget(new_dentry);
3283 target = new_dentry->d_inode;
3284 if (target) 3017 if (target)
3285 mutex_lock(&target->i_mutex); 3018 mutex_lock(&target->i_mutex);
3019
3020 error = -EBUSY;
3286 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 3021 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
3287 error = -EBUSY; 3022 goto out;
3288 else 3023
3289 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3024 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3290 if (!error) { 3025 if (error)
3291 if (target) 3026 goto out;
3292 dont_mount(new_dentry); 3027
3293 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3028 if (target)
3294 d_move(old_dentry, new_dentry); 3029 dont_mount(new_dentry);
3295 } 3030 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3031 d_move(old_dentry, new_dentry);
3032out:
3296 if (target) 3033 if (target)
3297 mutex_unlock(&target->i_mutex); 3034 mutex_unlock(&target->i_mutex);
3298 dput(new_dentry); 3035 dput(new_dentry);
@@ -3587,7 +3324,7 @@ EXPORT_SYMBOL(page_readlink);
3587EXPORT_SYMBOL(__page_symlink); 3324EXPORT_SYMBOL(__page_symlink);
3588EXPORT_SYMBOL(page_symlink); 3325EXPORT_SYMBOL(page_symlink);
3589EXPORT_SYMBOL(page_symlink_inode_operations); 3326EXPORT_SYMBOL(page_symlink_inode_operations);
3590EXPORT_SYMBOL(path_lookup); 3327EXPORT_SYMBOL(kern_path_parent);
3591EXPORT_SYMBOL(kern_path); 3328EXPORT_SYMBOL(kern_path);
3592EXPORT_SYMBOL(vfs_path_lookup); 3329EXPORT_SYMBOL(vfs_path_lookup);
3593EXPORT_SYMBOL(inode_permission); 3330EXPORT_SYMBOL(inode_permission);