diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 857 |
1 files changed, 682 insertions, 175 deletions
diff --git a/fs/namei.c b/fs/namei.c index 4ff7ca530533..19433cdba011 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname); | |||
169 | /* | 169 | /* |
170 | * This does basic POSIX ACL permission checking | 170 | * This does basic POSIX ACL permission checking |
171 | */ | 171 | */ |
172 | static int acl_permission_check(struct inode *inode, int mask, | 172 | static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, |
173 | int (*check_acl)(struct inode *inode, int mask)) | 173 | int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) |
174 | { | 174 | { |
175 | umode_t mode = inode->i_mode; | 175 | umode_t mode = inode->i_mode; |
176 | 176 | ||
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask, | |||
180 | mode >>= 6; | 180 | mode >>= 6; |
181 | else { | 181 | else { |
182 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { | 182 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { |
183 | int error = check_acl(inode, mask); | 183 | int error = check_acl(inode, mask, flags); |
184 | if (error != -EAGAIN) | 184 | if (error != -EAGAIN) |
185 | return error; | 185 | return error; |
186 | } | 186 | } |
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask, | |||
198 | } | 198 | } |
199 | 199 | ||
200 | /** | 200 | /** |
201 | * generic_permission - check for access rights on a Posix-like filesystem | 201 | * generic_permission - check for access rights on a Posix-like filesystem |
202 | * @inode: inode to check access rights for | 202 | * @inode: inode to check access rights for |
203 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | 203 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) |
204 | * @check_acl: optional callback to check for Posix ACLs | 204 | * @check_acl: optional callback to check for Posix ACLs |
205 | * @flags IPERM_FLAG_ flags. | ||
205 | * | 206 | * |
206 | * Used to check for read/write/execute permissions on a file. | 207 | * Used to check for read/write/execute permissions on a file. |
207 | * We use "fsuid" for this, letting us set arbitrary permissions | 208 | * We use "fsuid" for this, letting us set arbitrary permissions |
208 | * for filesystem access without changing the "normal" uids which | 209 | * for filesystem access without changing the "normal" uids which |
209 | * are used for other things.. | 210 | * are used for other things. |
211 | * | ||
212 | * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk | ||
213 | * request cannot be satisfied (eg. requires blocking or too much complexity). | ||
214 | * It would then be called again in ref-walk mode. | ||
210 | */ | 215 | */ |
211 | int generic_permission(struct inode *inode, int mask, | 216 | int generic_permission(struct inode *inode, int mask, unsigned int flags, |
212 | int (*check_acl)(struct inode *inode, int mask)) | 217 | int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) |
213 | { | 218 | { |
214 | int ret; | 219 | int ret; |
215 | 220 | ||
216 | /* | 221 | /* |
217 | * Do the basic POSIX ACL permission checks. | 222 | * Do the basic POSIX ACL permission checks. |
218 | */ | 223 | */ |
219 | ret = acl_permission_check(inode, mask, check_acl); | 224 | ret = acl_permission_check(inode, mask, flags, check_acl); |
220 | if (ret != -EACCES) | 225 | if (ret != -EACCES) |
221 | return ret; | 226 | return ret; |
222 | 227 | ||
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask) | |||
271 | } | 276 | } |
272 | 277 | ||
273 | if (inode->i_op->permission) | 278 | if (inode->i_op->permission) |
274 | retval = inode->i_op->permission(inode, mask); | 279 | retval = inode->i_op->permission(inode, mask, 0); |
275 | else | 280 | else |
276 | retval = generic_permission(inode, mask, inode->i_op->check_acl); | 281 | retval = generic_permission(inode, mask, 0, |
282 | inode->i_op->check_acl); | ||
277 | 283 | ||
278 | if (retval) | 284 | if (retval) |
279 | return retval; | 285 | return retval; |
@@ -362,6 +368,18 @@ void path_get(struct path *path) | |||
362 | EXPORT_SYMBOL(path_get); | 368 | EXPORT_SYMBOL(path_get); |
363 | 369 | ||
364 | /** | 370 | /** |
371 | * path_get_long - get a long reference to a path | ||
372 | * @path: path to get the reference to | ||
373 | * | ||
374 | * Given a path increment the reference count to the dentry and the vfsmount. | ||
375 | */ | ||
376 | void path_get_long(struct path *path) | ||
377 | { | ||
378 | mntget_long(path->mnt); | ||
379 | dget(path->dentry); | ||
380 | } | ||
381 | |||
382 | /** | ||
365 | * path_put - put a reference to a path | 383 | * path_put - put a reference to a path |
366 | * @path: path to put the reference to | 384 | * @path: path to put the reference to |
367 | * | 385 | * |
@@ -375,6 +393,185 @@ void path_put(struct path *path) | |||
375 | EXPORT_SYMBOL(path_put); | 393 | EXPORT_SYMBOL(path_put); |
376 | 394 | ||
377 | /** | 395 | /** |
396 | * path_put_long - put a long reference to a path | ||
397 | * @path: path to put the reference to | ||
398 | * | ||
399 | * Given a path decrement the reference count to the dentry and the vfsmount. | ||
400 | */ | ||
401 | void path_put_long(struct path *path) | ||
402 | { | ||
403 | dput(path->dentry); | ||
404 | mntput_long(path->mnt); | ||
405 | } | ||
406 | |||
407 | /** | ||
408 | * nameidata_drop_rcu - drop this nameidata out of rcu-walk | ||
409 | * @nd: nameidata pathwalk data to drop | ||
410 | * @Returns: 0 on success, -ECHLID on failure | ||
411 | * | ||
412 | * Path walking has 2 modes, rcu-walk and ref-walk (see | ||
413 | * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt | ||
414 | * to drop out of rcu-walk mode and take normal reference counts on dentries | ||
415 | * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take | ||
416 | * refcounts at the last known good point before rcu-walk got stuck, so | ||
417 | * ref-walk may continue from there. If this is not successful (eg. a seqcount | ||
418 | * has changed), then failure is returned and path walk restarts from the | ||
419 | * beginning in ref-walk mode. | ||
420 | * | ||
421 | * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into | ||
422 | * ref-walk. Must be called from rcu-walk context. | ||
423 | */ | ||
424 | static int nameidata_drop_rcu(struct nameidata *nd) | ||
425 | { | ||
426 | struct fs_struct *fs = current->fs; | ||
427 | struct dentry *dentry = nd->path.dentry; | ||
428 | |||
429 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
430 | if (nd->root.mnt) { | ||
431 | spin_lock(&fs->lock); | ||
432 | if (nd->root.mnt != fs->root.mnt || | ||
433 | nd->root.dentry != fs->root.dentry) | ||
434 | goto err_root; | ||
435 | } | ||
436 | spin_lock(&dentry->d_lock); | ||
437 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
438 | goto err; | ||
439 | BUG_ON(nd->inode != dentry->d_inode); | ||
440 | spin_unlock(&dentry->d_lock); | ||
441 | if (nd->root.mnt) { | ||
442 | path_get(&nd->root); | ||
443 | spin_unlock(&fs->lock); | ||
444 | } | ||
445 | mntget(nd->path.mnt); | ||
446 | |||
447 | rcu_read_unlock(); | ||
448 | br_read_unlock(vfsmount_lock); | ||
449 | nd->flags &= ~LOOKUP_RCU; | ||
450 | return 0; | ||
451 | err: | ||
452 | spin_unlock(&dentry->d_lock); | ||
453 | err_root: | ||
454 | if (nd->root.mnt) | ||
455 | spin_unlock(&fs->lock); | ||
456 | return -ECHILD; | ||
457 | } | ||
458 | |||
459 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
460 | static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) | ||
461 | { | ||
462 | if (nd->flags & LOOKUP_RCU) | ||
463 | return nameidata_drop_rcu(nd); | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | /** | ||
468 | * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk | ||
469 | * @nd: nameidata pathwalk data to drop | ||
470 | * @dentry: dentry to drop | ||
471 | * @Returns: 0 on success, -ECHLID on failure | ||
472 | * | ||
473 | * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, | ||
474 | * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on | ||
475 | * @nd. Must be called from rcu-walk context. | ||
476 | */ | ||
477 | static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) | ||
478 | { | ||
479 | struct fs_struct *fs = current->fs; | ||
480 | struct dentry *parent = nd->path.dentry; | ||
481 | |||
482 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
483 | if (nd->root.mnt) { | ||
484 | spin_lock(&fs->lock); | ||
485 | if (nd->root.mnt != fs->root.mnt || | ||
486 | nd->root.dentry != fs->root.dentry) | ||
487 | goto err_root; | ||
488 | } | ||
489 | spin_lock(&parent->d_lock); | ||
490 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
491 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
492 | goto err; | ||
493 | /* | ||
494 | * If the sequence check on the child dentry passed, then the child has | ||
495 | * not been removed from its parent. This means the parent dentry must | ||
496 | * be valid and able to take a reference at this point. | ||
497 | */ | ||
498 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
499 | BUG_ON(!parent->d_count); | ||
500 | parent->d_count++; | ||
501 | spin_unlock(&dentry->d_lock); | ||
502 | spin_unlock(&parent->d_lock); | ||
503 | if (nd->root.mnt) { | ||
504 | path_get(&nd->root); | ||
505 | spin_unlock(&fs->lock); | ||
506 | } | ||
507 | mntget(nd->path.mnt); | ||
508 | |||
509 | rcu_read_unlock(); | ||
510 | br_read_unlock(vfsmount_lock); | ||
511 | nd->flags &= ~LOOKUP_RCU; | ||
512 | return 0; | ||
513 | err: | ||
514 | spin_unlock(&dentry->d_lock); | ||
515 | spin_unlock(&parent->d_lock); | ||
516 | err_root: | ||
517 | if (nd->root.mnt) | ||
518 | spin_unlock(&fs->lock); | ||
519 | return -ECHILD; | ||
520 | } | ||
521 | |||
522 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
523 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) | ||
524 | { | ||
525 | if (nd->flags & LOOKUP_RCU) | ||
526 | return nameidata_dentry_drop_rcu(nd, dentry); | ||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk | ||
532 | * @nd: nameidata pathwalk data to drop | ||
533 | * @Returns: 0 on success, -ECHLID on failure | ||
534 | * | ||
535 | * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. | ||
536 | * nd->path should be the final element of the lookup, so nd->root is discarded. | ||
537 | * Must be called from rcu-walk context. | ||
538 | */ | ||
539 | static int nameidata_drop_rcu_last(struct nameidata *nd) | ||
540 | { | ||
541 | struct dentry *dentry = nd->path.dentry; | ||
542 | |||
543 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
544 | nd->flags &= ~LOOKUP_RCU; | ||
545 | nd->root.mnt = NULL; | ||
546 | spin_lock(&dentry->d_lock); | ||
547 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
548 | goto err_unlock; | ||
549 | BUG_ON(nd->inode != dentry->d_inode); | ||
550 | spin_unlock(&dentry->d_lock); | ||
551 | |||
552 | mntget(nd->path.mnt); | ||
553 | |||
554 | rcu_read_unlock(); | ||
555 | br_read_unlock(vfsmount_lock); | ||
556 | |||
557 | return 0; | ||
558 | |||
559 | err_unlock: | ||
560 | spin_unlock(&dentry->d_lock); | ||
561 | rcu_read_unlock(); | ||
562 | br_read_unlock(vfsmount_lock); | ||
563 | return -ECHILD; | ||
564 | } | ||
565 | |||
566 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
567 | static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) | ||
568 | { | ||
569 | if (likely(nd->flags & LOOKUP_RCU)) | ||
570 | return nameidata_drop_rcu_last(nd); | ||
571 | return 0; | ||
572 | } | ||
573 | |||
574 | /** | ||
378 | * release_open_intent - free up open intent resources | 575 | * release_open_intent - free up open intent resources |
379 | * @nd: pointer to nameidata | 576 | * @nd: pointer to nameidata |
380 | */ | 577 | */ |
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd) | |||
386 | fput(nd->intent.open.file); | 583 | fput(nd->intent.open.file); |
387 | } | 584 | } |
388 | 585 | ||
586 | static int d_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
587 | { | ||
588 | int status; | ||
589 | |||
590 | status = dentry->d_op->d_revalidate(dentry, nd); | ||
591 | if (status == -ECHILD) { | ||
592 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
593 | return status; | ||
594 | status = dentry->d_op->d_revalidate(dentry, nd); | ||
595 | } | ||
596 | |||
597 | return status; | ||
598 | } | ||
599 | |||
389 | static inline struct dentry * | 600 | static inline struct dentry * |
390 | do_revalidate(struct dentry *dentry, struct nameidata *nd) | 601 | do_revalidate(struct dentry *dentry, struct nameidata *nd) |
391 | { | 602 | { |
392 | int status = dentry->d_op->d_revalidate(dentry, nd); | 603 | int status; |
604 | |||
605 | status = d_revalidate(dentry, nd); | ||
393 | if (unlikely(status <= 0)) { | 606 | if (unlikely(status <= 0)) { |
394 | /* | 607 | /* |
395 | * The dentry failed validation. | 608 | * The dentry failed validation. |
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
397 | * the dentry otherwise d_revalidate is asking us | 610 | * the dentry otherwise d_revalidate is asking us |
398 | * to return a fail status. | 611 | * to return a fail status. |
399 | */ | 612 | */ |
400 | if (!status) { | 613 | if (status < 0) { |
614 | /* If we're in rcu-walk, we don't have a ref */ | ||
615 | if (!(nd->flags & LOOKUP_RCU)) | ||
616 | dput(dentry); | ||
617 | dentry = ERR_PTR(status); | ||
618 | |||
619 | } else { | ||
620 | /* Don't d_invalidate in rcu-walk mode */ | ||
621 | if (nameidata_dentry_drop_rcu_maybe(nd, dentry)) | ||
622 | return ERR_PTR(-ECHILD); | ||
401 | if (!d_invalidate(dentry)) { | 623 | if (!d_invalidate(dentry)) { |
402 | dput(dentry); | 624 | dput(dentry); |
403 | dentry = NULL; | 625 | dentry = NULL; |
404 | } | 626 | } |
405 | } else { | ||
406 | dput(dentry); | ||
407 | dentry = ERR_PTR(status); | ||
408 | } | 627 | } |
409 | } | 628 | } |
410 | return dentry; | 629 | return dentry; |
411 | } | 630 | } |
412 | 631 | ||
632 | static inline int need_reval_dot(struct dentry *dentry) | ||
633 | { | ||
634 | if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) | ||
635 | return 0; | ||
636 | |||
637 | if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) | ||
638 | return 0; | ||
639 | |||
640 | return 1; | ||
641 | } | ||
642 | |||
413 | /* | 643 | /* |
414 | * force_reval_path - force revalidation of a dentry | 644 | * force_reval_path - force revalidation of a dentry |
415 | * | 645 | * |
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd) | |||
433 | 663 | ||
434 | /* | 664 | /* |
435 | * only check on filesystems where it's possible for the dentry to | 665 | * only check on filesystems where it's possible for the dentry to |
436 | * become stale. It's assumed that if this flag is set then the | 666 | * become stale. |
437 | * d_revalidate op will also be defined. | ||
438 | */ | 667 | */ |
439 | if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) | 668 | if (!need_reval_dot(dentry)) |
440 | return 0; | 669 | return 0; |
441 | 670 | ||
442 | status = dentry->d_op->d_revalidate(dentry, nd); | 671 | status = d_revalidate(dentry, nd); |
443 | if (status > 0) | 672 | if (status > 0) |
444 | return 0; | 673 | return 0; |
445 | 674 | ||
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd) | |||
459 | * short-cut DAC fails, then call ->permission() to do more | 688 | * short-cut DAC fails, then call ->permission() to do more |
460 | * complete permission check. | 689 | * complete permission check. |
461 | */ | 690 | */ |
462 | static int exec_permission(struct inode *inode) | 691 | static inline int exec_permission(struct inode *inode, unsigned int flags) |
463 | { | 692 | { |
464 | int ret; | 693 | int ret; |
465 | 694 | ||
466 | if (inode->i_op->permission) { | 695 | if (inode->i_op->permission) { |
467 | ret = inode->i_op->permission(inode, MAY_EXEC); | 696 | ret = inode->i_op->permission(inode, MAY_EXEC, flags); |
468 | if (!ret) | 697 | } else { |
469 | goto ok; | 698 | ret = acl_permission_check(inode, MAY_EXEC, flags, |
470 | return ret; | 699 | inode->i_op->check_acl); |
471 | } | 700 | } |
472 | ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); | 701 | if (likely(!ret)) |
473 | if (!ret) | ||
474 | goto ok; | 702 | goto ok; |
703 | if (ret == -ECHILD) | ||
704 | return ret; | ||
475 | 705 | ||
476 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) | 706 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) |
477 | goto ok; | 707 | goto ok; |
478 | 708 | ||
479 | return ret; | 709 | return ret; |
480 | ok: | 710 | ok: |
481 | return security_inode_permission(inode, MAY_EXEC); | 711 | return security_inode_exec_permission(inode, flags); |
482 | } | 712 | } |
483 | 713 | ||
484 | static __always_inline void set_root(struct nameidata *nd) | 714 | static __always_inline void set_root(struct nameidata *nd) |
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd) | |||
489 | 719 | ||
490 | static int link_path_walk(const char *, struct nameidata *); | 720 | static int link_path_walk(const char *, struct nameidata *); |
491 | 721 | ||
722 | static __always_inline void set_root_rcu(struct nameidata *nd) | ||
723 | { | ||
724 | if (!nd->root.mnt) { | ||
725 | struct fs_struct *fs = current->fs; | ||
726 | unsigned seq; | ||
727 | |||
728 | do { | ||
729 | seq = read_seqcount_begin(&fs->seq); | ||
730 | nd->root = fs->root; | ||
731 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
732 | } | ||
733 | } | ||
734 | |||
492 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) | 735 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) |
493 | { | 736 | { |
737 | int ret; | ||
738 | |||
494 | if (IS_ERR(link)) | 739 | if (IS_ERR(link)) |
495 | goto fail; | 740 | goto fail; |
496 | 741 | ||
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l | |||
500 | nd->path = nd->root; | 745 | nd->path = nd->root; |
501 | path_get(&nd->root); | 746 | path_get(&nd->root); |
502 | } | 747 | } |
748 | nd->inode = nd->path.dentry->d_inode; | ||
503 | 749 | ||
504 | return link_path_walk(link, nd); | 750 | ret = link_path_walk(link, nd); |
751 | return ret; | ||
505 | fail: | 752 | fail: |
506 | path_put(&nd->path); | 753 | path_put(&nd->path); |
507 | return PTR_ERR(link); | 754 | return PTR_ERR(link); |
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) | |||
516 | 763 | ||
517 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | 764 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) |
518 | { | 765 | { |
519 | dput(nd->path.dentry); | 766 | if (!(nd->flags & LOOKUP_RCU)) { |
520 | if (nd->path.mnt != path->mnt) { | 767 | dput(nd->path.dentry); |
521 | mntput(nd->path.mnt); | 768 | if (nd->path.mnt != path->mnt) |
522 | nd->path.mnt = path->mnt; | 769 | mntput(nd->path.mnt); |
523 | } | 770 | } |
771 | nd->path.mnt = path->mnt; | ||
524 | nd->path.dentry = path->dentry; | 772 | nd->path.dentry = path->dentry; |
525 | } | 773 | } |
526 | 774 | ||
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p) | |||
535 | 783 | ||
536 | if (path->mnt != nd->path.mnt) { | 784 | if (path->mnt != nd->path.mnt) { |
537 | path_to_nameidata(path, nd); | 785 | path_to_nameidata(path, nd); |
786 | nd->inode = nd->path.dentry->d_inode; | ||
538 | dget(dentry); | 787 | dget(dentry); |
539 | } | 788 | } |
540 | mntget(path->mnt); | 789 | mntget(path->mnt); |
790 | |||
541 | nd->last_type = LAST_BIND; | 791 | nd->last_type = LAST_BIND; |
542 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); | 792 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); |
543 | error = PTR_ERR(*p); | 793 | error = PTR_ERR(*p); |
@@ -591,6 +841,20 @@ loop: | |||
591 | return err; | 841 | return err; |
592 | } | 842 | } |
593 | 843 | ||
844 | static int follow_up_rcu(struct path *path) | ||
845 | { | ||
846 | struct vfsmount *parent; | ||
847 | struct dentry *mountpoint; | ||
848 | |||
849 | parent = path->mnt->mnt_parent; | ||
850 | if (parent == path->mnt) | ||
851 | return 0; | ||
852 | mountpoint = path->mnt->mnt_mountpoint; | ||
853 | path->dentry = mountpoint; | ||
854 | path->mnt = parent; | ||
855 | return 1; | ||
856 | } | ||
857 | |||
594 | int follow_up(struct path *path) | 858 | int follow_up(struct path *path) |
595 | { | 859 | { |
596 | struct vfsmount *parent; | 860 | struct vfsmount *parent; |
@@ -612,9 +876,24 @@ int follow_up(struct path *path) | |||
612 | return 1; | 876 | return 1; |
613 | } | 877 | } |
614 | 878 | ||
615 | /* no need for dcache_lock, as serialization is taken care in | 879 | /* |
616 | * namespace.c | 880 | * serialization is taken care of in namespace.c |
617 | */ | 881 | */ |
882 | static void __follow_mount_rcu(struct nameidata *nd, struct path *path, | ||
883 | struct inode **inode) | ||
884 | { | ||
885 | while (d_mountpoint(path->dentry)) { | ||
886 | struct vfsmount *mounted; | ||
887 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | ||
888 | if (!mounted) | ||
889 | return; | ||
890 | path->mnt = mounted; | ||
891 | path->dentry = mounted->mnt_root; | ||
892 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | ||
893 | *inode = path->dentry->d_inode; | ||
894 | } | ||
895 | } | ||
896 | |||
618 | static int __follow_mount(struct path *path) | 897 | static int __follow_mount(struct path *path) |
619 | { | 898 | { |
620 | int res = 0; | 899 | int res = 0; |
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path) | |||
645 | } | 924 | } |
646 | } | 925 | } |
647 | 926 | ||
648 | /* no need for dcache_lock, as serialization is taken care in | ||
649 | * namespace.c | ||
650 | */ | ||
651 | int follow_down(struct path *path) | 927 | int follow_down(struct path *path) |
652 | { | 928 | { |
653 | struct vfsmount *mounted; | 929 | struct vfsmount *mounted; |
@@ -663,7 +939,42 @@ int follow_down(struct path *path) | |||
663 | return 0; | 939 | return 0; |
664 | } | 940 | } |
665 | 941 | ||
666 | static __always_inline void follow_dotdot(struct nameidata *nd) | 942 | static int follow_dotdot_rcu(struct nameidata *nd) |
943 | { | ||
944 | struct inode *inode = nd->inode; | ||
945 | |||
946 | set_root_rcu(nd); | ||
947 | |||
948 | while(1) { | ||
949 | if (nd->path.dentry == nd->root.dentry && | ||
950 | nd->path.mnt == nd->root.mnt) { | ||
951 | break; | ||
952 | } | ||
953 | if (nd->path.dentry != nd->path.mnt->mnt_root) { | ||
954 | struct dentry *old = nd->path.dentry; | ||
955 | struct dentry *parent = old->d_parent; | ||
956 | unsigned seq; | ||
957 | |||
958 | seq = read_seqcount_begin(&parent->d_seq); | ||
959 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | ||
960 | return -ECHILD; | ||
961 | inode = parent->d_inode; | ||
962 | nd->path.dentry = parent; | ||
963 | nd->seq = seq; | ||
964 | break; | ||
965 | } | ||
966 | if (!follow_up_rcu(&nd->path)) | ||
967 | break; | ||
968 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
969 | inode = nd->path.dentry->d_inode; | ||
970 | } | ||
971 | __follow_mount_rcu(nd, &nd->path, &inode); | ||
972 | nd->inode = inode; | ||
973 | |||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | static void follow_dotdot(struct nameidata *nd) | ||
667 | { | 978 | { |
668 | set_root(nd); | 979 | set_root(nd); |
669 | 980 | ||
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) | |||
684 | break; | 995 | break; |
685 | } | 996 | } |
686 | follow_mount(&nd->path); | 997 | follow_mount(&nd->path); |
998 | nd->inode = nd->path.dentry->d_inode; | ||
687 | } | 999 | } |
688 | 1000 | ||
689 | /* | 1001 | /* |
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, | |||
721 | * It _is_ time-critical. | 1033 | * It _is_ time-critical. |
722 | */ | 1034 | */ |
723 | static int do_lookup(struct nameidata *nd, struct qstr *name, | 1035 | static int do_lookup(struct nameidata *nd, struct qstr *name, |
724 | struct path *path) | 1036 | struct path *path, struct inode **inode) |
725 | { | 1037 | { |
726 | struct vfsmount *mnt = nd->path.mnt; | 1038 | struct vfsmount *mnt = nd->path.mnt; |
727 | struct dentry *dentry, *parent; | 1039 | struct dentry *dentry, *parent = nd->path.dentry; |
728 | struct inode *dir; | 1040 | struct inode *dir; |
729 | /* | 1041 | /* |
730 | * See if the low-level filesystem might want | 1042 | * See if the low-level filesystem might want |
731 | * to use its own hash.. | 1043 | * to use its own hash.. |
732 | */ | 1044 | */ |
733 | if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { | 1045 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { |
734 | int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); | 1046 | int err = parent->d_op->d_hash(parent, nd->inode, name); |
735 | if (err < 0) | 1047 | if (err < 0) |
736 | return err; | 1048 | return err; |
737 | } | 1049 | } |
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
741 | * of a false negative due to a concurrent rename, we're going to | 1053 | * of a false negative due to a concurrent rename, we're going to |
742 | * do the non-racy lookup, below. | 1054 | * do the non-racy lookup, below. |
743 | */ | 1055 | */ |
744 | dentry = __d_lookup(nd->path.dentry, name); | 1056 | if (nd->flags & LOOKUP_RCU) { |
745 | if (!dentry) | 1057 | unsigned seq; |
746 | goto need_lookup; | 1058 | |
1059 | *inode = nd->inode; | ||
1060 | dentry = __d_lookup_rcu(parent, name, &seq, inode); | ||
1061 | if (!dentry) { | ||
1062 | if (nameidata_drop_rcu(nd)) | ||
1063 | return -ECHILD; | ||
1064 | goto need_lookup; | ||
1065 | } | ||
1066 | /* Memory barrier in read_seqcount_begin of child is enough */ | ||
1067 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | ||
1068 | return -ECHILD; | ||
1069 | |||
1070 | nd->seq = seq; | ||
1071 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) | ||
1072 | goto need_revalidate; | ||
1073 | path->mnt = mnt; | ||
1074 | path->dentry = dentry; | ||
1075 | __follow_mount_rcu(nd, path, inode); | ||
1076 | } else { | ||
1077 | dentry = __d_lookup(parent, name); | ||
1078 | if (!dentry) | ||
1079 | goto need_lookup; | ||
747 | found: | 1080 | found: |
748 | if (dentry->d_op && dentry->d_op->d_revalidate) | 1081 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) |
749 | goto need_revalidate; | 1082 | goto need_revalidate; |
750 | done: | 1083 | done: |
751 | path->mnt = mnt; | 1084 | path->mnt = mnt; |
752 | path->dentry = dentry; | 1085 | path->dentry = dentry; |
753 | __follow_mount(path); | 1086 | __follow_mount(path); |
1087 | *inode = path->dentry->d_inode; | ||
1088 | } | ||
754 | return 0; | 1089 | return 0; |
755 | 1090 | ||
756 | need_lookup: | 1091 | need_lookup: |
757 | parent = nd->path.dentry; | ||
758 | dir = parent->d_inode; | 1092 | dir = parent->d_inode; |
1093 | BUG_ON(nd->inode != dir); | ||
759 | 1094 | ||
760 | mutex_lock(&dir->i_mutex); | 1095 | mutex_lock(&dir->i_mutex); |
761 | /* | 1096 | /* |
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) | |||
817 | static int link_path_walk(const char *name, struct nameidata *nd) | 1152 | static int link_path_walk(const char *name, struct nameidata *nd) |
818 | { | 1153 | { |
819 | struct path next; | 1154 | struct path next; |
820 | struct inode *inode; | ||
821 | int err; | 1155 | int err; |
822 | unsigned int lookup_flags = nd->flags; | 1156 | unsigned int lookup_flags = nd->flags; |
823 | 1157 | ||
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
826 | if (!*name) | 1160 | if (!*name) |
827 | goto return_reval; | 1161 | goto return_reval; |
828 | 1162 | ||
829 | inode = nd->path.dentry->d_inode; | ||
830 | if (nd->depth) | 1163 | if (nd->depth) |
831 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); | 1164 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); |
832 | 1165 | ||
833 | /* At this point we know we have a real path component. */ | 1166 | /* At this point we know we have a real path component. */ |
834 | for(;;) { | 1167 | for(;;) { |
1168 | struct inode *inode; | ||
835 | unsigned long hash; | 1169 | unsigned long hash; |
836 | struct qstr this; | 1170 | struct qstr this; |
837 | unsigned int c; | 1171 | unsigned int c; |
838 | 1172 | ||
839 | nd->flags |= LOOKUP_CONTINUE; | 1173 | nd->flags |= LOOKUP_CONTINUE; |
840 | err = exec_permission(inode); | 1174 | if (nd->flags & LOOKUP_RCU) { |
1175 | err = exec_permission(nd->inode, IPERM_FLAG_RCU); | ||
1176 | if (err == -ECHILD) { | ||
1177 | if (nameidata_drop_rcu(nd)) | ||
1178 | return -ECHILD; | ||
1179 | goto exec_again; | ||
1180 | } | ||
1181 | } else { | ||
1182 | exec_again: | ||
1183 | err = exec_permission(nd->inode, 0); | ||
1184 | } | ||
841 | if (err) | 1185 | if (err) |
842 | break; | 1186 | break; |
843 | 1187 | ||
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
868 | if (this.name[0] == '.') switch (this.len) { | 1212 | if (this.name[0] == '.') switch (this.len) { |
869 | default: | 1213 | default: |
870 | break; | 1214 | break; |
871 | case 2: | 1215 | case 2: |
872 | if (this.name[1] != '.') | 1216 | if (this.name[1] != '.') |
873 | break; | 1217 | break; |
874 | follow_dotdot(nd); | 1218 | if (nd->flags & LOOKUP_RCU) { |
875 | inode = nd->path.dentry->d_inode; | 1219 | if (follow_dotdot_rcu(nd)) |
1220 | return -ECHILD; | ||
1221 | } else | ||
1222 | follow_dotdot(nd); | ||
876 | /* fallthrough */ | 1223 | /* fallthrough */ |
877 | case 1: | 1224 | case 1: |
878 | continue; | 1225 | continue; |
879 | } | 1226 | } |
880 | /* This does the actual lookups.. */ | 1227 | /* This does the actual lookups.. */ |
881 | err = do_lookup(nd, &this, &next); | 1228 | err = do_lookup(nd, &this, &next, &inode); |
882 | if (err) | 1229 | if (err) |
883 | break; | 1230 | break; |
884 | |||
885 | err = -ENOENT; | 1231 | err = -ENOENT; |
886 | inode = next.dentry->d_inode; | ||
887 | if (!inode) | 1232 | if (!inode) |
888 | goto out_dput; | 1233 | goto out_dput; |
889 | 1234 | ||
890 | if (inode->i_op->follow_link) { | 1235 | if (inode->i_op->follow_link) { |
1236 | /* We commonly drop rcu-walk here */ | ||
1237 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) | ||
1238 | return -ECHILD; | ||
1239 | BUG_ON(inode != next.dentry->d_inode); | ||
891 | err = do_follow_link(&next, nd); | 1240 | err = do_follow_link(&next, nd); |
892 | if (err) | 1241 | if (err) |
893 | goto return_err; | 1242 | goto return_err; |
1243 | nd->inode = nd->path.dentry->d_inode; | ||
894 | err = -ENOENT; | 1244 | err = -ENOENT; |
895 | inode = nd->path.dentry->d_inode; | 1245 | if (!nd->inode) |
896 | if (!inode) | ||
897 | break; | 1246 | break; |
898 | } else | 1247 | } else { |
899 | path_to_nameidata(&next, nd); | 1248 | path_to_nameidata(&next, nd); |
1249 | nd->inode = inode; | ||
1250 | } | ||
900 | err = -ENOTDIR; | 1251 | err = -ENOTDIR; |
901 | if (!inode->i_op->lookup) | 1252 | if (!nd->inode->i_op->lookup) |
902 | break; | 1253 | break; |
903 | continue; | 1254 | continue; |
904 | /* here ends the main loop */ | 1255 | /* here ends the main loop */ |
@@ -913,32 +1264,39 @@ last_component: | |||
913 | if (this.name[0] == '.') switch (this.len) { | 1264 | if (this.name[0] == '.') switch (this.len) { |
914 | default: | 1265 | default: |
915 | break; | 1266 | break; |
916 | case 2: | 1267 | case 2: |
917 | if (this.name[1] != '.') | 1268 | if (this.name[1] != '.') |
918 | break; | 1269 | break; |
919 | follow_dotdot(nd); | 1270 | if (nd->flags & LOOKUP_RCU) { |
920 | inode = nd->path.dentry->d_inode; | 1271 | if (follow_dotdot_rcu(nd)) |
1272 | return -ECHILD; | ||
1273 | } else | ||
1274 | follow_dotdot(nd); | ||
921 | /* fallthrough */ | 1275 | /* fallthrough */ |
922 | case 1: | 1276 | case 1: |
923 | goto return_reval; | 1277 | goto return_reval; |
924 | } | 1278 | } |
925 | err = do_lookup(nd, &this, &next); | 1279 | err = do_lookup(nd, &this, &next, &inode); |
926 | if (err) | 1280 | if (err) |
927 | break; | 1281 | break; |
928 | inode = next.dentry->d_inode; | ||
929 | if (follow_on_final(inode, lookup_flags)) { | 1282 | if (follow_on_final(inode, lookup_flags)) { |
1283 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) | ||
1284 | return -ECHILD; | ||
1285 | BUG_ON(inode != next.dentry->d_inode); | ||
930 | err = do_follow_link(&next, nd); | 1286 | err = do_follow_link(&next, nd); |
931 | if (err) | 1287 | if (err) |
932 | goto return_err; | 1288 | goto return_err; |
933 | inode = nd->path.dentry->d_inode; | 1289 | nd->inode = nd->path.dentry->d_inode; |
934 | } else | 1290 | } else { |
935 | path_to_nameidata(&next, nd); | 1291 | path_to_nameidata(&next, nd); |
1292 | nd->inode = inode; | ||
1293 | } | ||
936 | err = -ENOENT; | 1294 | err = -ENOENT; |
937 | if (!inode) | 1295 | if (!nd->inode) |
938 | break; | 1296 | break; |
939 | if (lookup_flags & LOOKUP_DIRECTORY) { | 1297 | if (lookup_flags & LOOKUP_DIRECTORY) { |
940 | err = -ENOTDIR; | 1298 | err = -ENOTDIR; |
941 | if (!inode->i_op->lookup) | 1299 | if (!nd->inode->i_op->lookup) |
942 | break; | 1300 | break; |
943 | } | 1301 | } |
944 | goto return_base; | 1302 | goto return_base; |
@@ -958,25 +1316,43 @@ return_reval: | |||
958 | * We bypassed the ordinary revalidation routines. | 1316 | * We bypassed the ordinary revalidation routines. |
959 | * We may need to check the cached dentry for staleness. | 1317 | * We may need to check the cached dentry for staleness. |
960 | */ | 1318 | */ |
961 | if (nd->path.dentry && nd->path.dentry->d_sb && | 1319 | if (need_reval_dot(nd->path.dentry)) { |
962 | (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { | ||
963 | err = -ESTALE; | ||
964 | /* Note: we do not d_invalidate() */ | 1320 | /* Note: we do not d_invalidate() */ |
965 | if (!nd->path.dentry->d_op->d_revalidate( | 1321 | err = d_revalidate(nd->path.dentry, nd); |
966 | nd->path.dentry, nd)) | 1322 | if (!err) |
1323 | err = -ESTALE; | ||
1324 | if (err < 0) | ||
967 | break; | 1325 | break; |
968 | } | 1326 | } |
969 | return_base: | 1327 | return_base: |
1328 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1329 | return -ECHILD; | ||
970 | return 0; | 1330 | return 0; |
971 | out_dput: | 1331 | out_dput: |
972 | path_put_conditional(&next, nd); | 1332 | if (!(nd->flags & LOOKUP_RCU)) |
1333 | path_put_conditional(&next, nd); | ||
973 | break; | 1334 | break; |
974 | } | 1335 | } |
975 | path_put(&nd->path); | 1336 | if (!(nd->flags & LOOKUP_RCU)) |
1337 | path_put(&nd->path); | ||
976 | return_err: | 1338 | return_err: |
977 | return err; | 1339 | return err; |
978 | } | 1340 | } |
979 | 1341 | ||
1342 | static inline int path_walk_rcu(const char *name, struct nameidata *nd) | ||
1343 | { | ||
1344 | current->total_link_count = 0; | ||
1345 | |||
1346 | return link_path_walk(name, nd); | ||
1347 | } | ||
1348 | |||
1349 | static inline int path_walk_simple(const char *name, struct nameidata *nd) | ||
1350 | { | ||
1351 | current->total_link_count = 0; | ||
1352 | |||
1353 | return link_path_walk(name, nd); | ||
1354 | } | ||
1355 | |||
980 | static int path_walk(const char *name, struct nameidata *nd) | 1356 | static int path_walk(const char *name, struct nameidata *nd) |
981 | { | 1357 | { |
982 | struct path save = nd->path; | 1358 | struct path save = nd->path; |
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd) | |||
1002 | return result; | 1378 | return result; |
1003 | } | 1379 | } |
1004 | 1380 | ||
1381 | static void path_finish_rcu(struct nameidata *nd) | ||
1382 | { | ||
1383 | if (nd->flags & LOOKUP_RCU) { | ||
1384 | /* RCU dangling. Cancel it. */ | ||
1385 | nd->flags &= ~LOOKUP_RCU; | ||
1386 | nd->root.mnt = NULL; | ||
1387 | rcu_read_unlock(); | ||
1388 | br_read_unlock(vfsmount_lock); | ||
1389 | } | ||
1390 | if (nd->file) | ||
1391 | fput(nd->file); | ||
1392 | } | ||
1393 | |||
1394 | static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | ||
1395 | { | ||
1396 | int retval = 0; | ||
1397 | int fput_needed; | ||
1398 | struct file *file; | ||
1399 | |||
1400 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | ||
1401 | nd->flags = flags | LOOKUP_RCU; | ||
1402 | nd->depth = 0; | ||
1403 | nd->root.mnt = NULL; | ||
1404 | nd->file = NULL; | ||
1405 | |||
1406 | if (*name=='/') { | ||
1407 | struct fs_struct *fs = current->fs; | ||
1408 | unsigned seq; | ||
1409 | |||
1410 | br_read_lock(vfsmount_lock); | ||
1411 | rcu_read_lock(); | ||
1412 | |||
1413 | do { | ||
1414 | seq = read_seqcount_begin(&fs->seq); | ||
1415 | nd->root = fs->root; | ||
1416 | nd->path = nd->root; | ||
1417 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1418 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1419 | |||
1420 | } else if (dfd == AT_FDCWD) { | ||
1421 | struct fs_struct *fs = current->fs; | ||
1422 | unsigned seq; | ||
1423 | |||
1424 | br_read_lock(vfsmount_lock); | ||
1425 | rcu_read_lock(); | ||
1426 | |||
1427 | do { | ||
1428 | seq = read_seqcount_begin(&fs->seq); | ||
1429 | nd->path = fs->pwd; | ||
1430 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1431 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1432 | |||
1433 | } else { | ||
1434 | struct dentry *dentry; | ||
1435 | |||
1436 | file = fget_light(dfd, &fput_needed); | ||
1437 | retval = -EBADF; | ||
1438 | if (!file) | ||
1439 | goto out_fail; | ||
1440 | |||
1441 | dentry = file->f_path.dentry; | ||
1442 | |||
1443 | retval = -ENOTDIR; | ||
1444 | if (!S_ISDIR(dentry->d_inode->i_mode)) | ||
1445 | goto fput_fail; | ||
1446 | |||
1447 | retval = file_permission(file, MAY_EXEC); | ||
1448 | if (retval) | ||
1449 | goto fput_fail; | ||
1450 | |||
1451 | nd->path = file->f_path; | ||
1452 | if (fput_needed) | ||
1453 | nd->file = file; | ||
1454 | |||
1455 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1456 | br_read_lock(vfsmount_lock); | ||
1457 | rcu_read_lock(); | ||
1458 | } | ||
1459 | nd->inode = nd->path.dentry->d_inode; | ||
1460 | return 0; | ||
1461 | |||
1462 | fput_fail: | ||
1463 | fput_light(file, fput_needed); | ||
1464 | out_fail: | ||
1465 | return retval; | ||
1466 | } | ||
1467 | |||
1005 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | 1468 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) |
1006 | { | 1469 | { |
1007 | int retval = 0; | 1470 | int retval = 0; |
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei | |||
1042 | 1505 | ||
1043 | fput_light(file, fput_needed); | 1506 | fput_light(file, fput_needed); |
1044 | } | 1507 | } |
1508 | nd->inode = nd->path.dentry->d_inode; | ||
1045 | return 0; | 1509 | return 0; |
1046 | 1510 | ||
1047 | fput_fail: | 1511 | fput_fail: |
@@ -1054,16 +1518,53 @@ out_fail: | |||
1054 | static int do_path_lookup(int dfd, const char *name, | 1518 | static int do_path_lookup(int dfd, const char *name, |
1055 | unsigned int flags, struct nameidata *nd) | 1519 | unsigned int flags, struct nameidata *nd) |
1056 | { | 1520 | { |
1057 | int retval = path_init(dfd, name, flags, nd); | 1521 | int retval; |
1058 | if (!retval) | 1522 | |
1059 | retval = path_walk(name, nd); | 1523 | /* |
1060 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1524 | * Path walking is largely split up into 2 different synchronisation |
1061 | nd->path.dentry->d_inode)) | 1525 | * schemes, rcu-walk and ref-walk (explained in |
1062 | audit_inode(name, nd->path.dentry); | 1526 | * Documentation/filesystems/path-lookup.txt). These share much of the |
1527 | * path walk code, but some things particularly setup, cleanup, and | ||
1528 | * following mounts are sufficiently divergent that functions are | ||
1529 | * duplicated. Typically there is a function foo(), and its RCU | ||
1530 | * analogue, foo_rcu(). | ||
1531 | * | ||
1532 | * -ECHILD is the error number of choice (just to avoid clashes) that | ||
1533 | * is returned if some aspect of an rcu-walk fails. Such an error must | ||
1534 | * be handled by restarting a traditional ref-walk (which will always | ||
1535 | * be able to complete). | ||
1536 | */ | ||
1537 | retval = path_init_rcu(dfd, name, flags, nd); | ||
1538 | if (unlikely(retval)) | ||
1539 | return retval; | ||
1540 | retval = path_walk_rcu(name, nd); | ||
1541 | path_finish_rcu(nd); | ||
1063 | if (nd->root.mnt) { | 1542 | if (nd->root.mnt) { |
1064 | path_put(&nd->root); | 1543 | path_put(&nd->root); |
1065 | nd->root.mnt = NULL; | 1544 | nd->root.mnt = NULL; |
1066 | } | 1545 | } |
1546 | |||
1547 | if (unlikely(retval == -ECHILD || retval == -ESTALE)) { | ||
1548 | /* slower, locked walk */ | ||
1549 | if (retval == -ESTALE) | ||
1550 | flags |= LOOKUP_REVAL; | ||
1551 | retval = path_init(dfd, name, flags, nd); | ||
1552 | if (unlikely(retval)) | ||
1553 | return retval; | ||
1554 | retval = path_walk(name, nd); | ||
1555 | if (nd->root.mnt) { | ||
1556 | path_put(&nd->root); | ||
1557 | nd->root.mnt = NULL; | ||
1558 | } | ||
1559 | } | ||
1560 | |||
1561 | if (likely(!retval)) { | ||
1562 | if (unlikely(!audit_dummy_context())) { | ||
1563 | if (nd->path.dentry && nd->inode) | ||
1564 | audit_inode(name, nd->path.dentry); | ||
1565 | } | ||
1566 | } | ||
1567 | |||
1067 | return retval; | 1568 | return retval; |
1068 | } | 1569 | } |
1069 | 1570 | ||
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1106 | path_get(&nd->path); | 1607 | path_get(&nd->path); |
1107 | nd->root = nd->path; | 1608 | nd->root = nd->path; |
1108 | path_get(&nd->root); | 1609 | path_get(&nd->root); |
1610 | nd->inode = nd->path.dentry->d_inode; | ||
1109 | 1611 | ||
1110 | retval = path_walk(name, nd); | 1612 | retval = path_walk(name, nd); |
1111 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1613 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && |
1112 | nd->path.dentry->d_inode)) | 1614 | nd->inode)) |
1113 | audit_inode(name, nd->path.dentry); | 1615 | audit_inode(name, nd->path.dentry); |
1114 | 1616 | ||
1115 | path_put(&nd->root); | 1617 | path_put(&nd->root); |
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1125 | struct dentry *dentry; | 1627 | struct dentry *dentry; |
1126 | int err; | 1628 | int err; |
1127 | 1629 | ||
1128 | err = exec_permission(inode); | 1630 | err = exec_permission(inode, 0); |
1129 | if (err) | 1631 | if (err) |
1130 | return ERR_PTR(err); | 1632 | return ERR_PTR(err); |
1131 | 1633 | ||
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1133 | * See if the low-level filesystem might want | 1635 | * See if the low-level filesystem might want |
1134 | * to use its own hash.. | 1636 | * to use its own hash.. |
1135 | */ | 1637 | */ |
1136 | if (base->d_op && base->d_op->d_hash) { | 1638 | if (base->d_flags & DCACHE_OP_HASH) { |
1137 | err = base->d_op->d_hash(base, name); | 1639 | err = base->d_op->d_hash(base, inode, name); |
1138 | dentry = ERR_PTR(err); | 1640 | dentry = ERR_PTR(err); |
1139 | if (err < 0) | 1641 | if (err < 0) |
1140 | goto out; | 1642 | goto out; |
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1147 | */ | 1649 | */ |
1148 | dentry = d_lookup(base, name); | 1650 | dentry = d_lookup(base, name); |
1149 | 1651 | ||
1150 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) | 1652 | if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) |
1151 | dentry = do_revalidate(dentry, nd); | 1653 | dentry = do_revalidate(dentry, nd); |
1152 | 1654 | ||
1153 | if (!dentry) | 1655 | if (!dentry) |
@@ -1490,6 +1992,7 @@ out_unlock: | |||
1490 | mutex_unlock(&dir->d_inode->i_mutex); | 1992 | mutex_unlock(&dir->d_inode->i_mutex); |
1491 | dput(nd->path.dentry); | 1993 | dput(nd->path.dentry); |
1492 | nd->path.dentry = path->dentry; | 1994 | nd->path.dentry = path->dentry; |
1995 | |||
1493 | if (error) | 1996 | if (error) |
1494 | return error; | 1997 | return error; |
1495 | /* Don't check for write permission, don't truncate */ | 1998 | /* Don't check for write permission, don't truncate */ |
@@ -1584,6 +2087,9 @@ exit: | |||
1584 | return ERR_PTR(error); | 2087 | return ERR_PTR(error); |
1585 | } | 2088 | } |
1586 | 2089 | ||
2090 | /* | ||
2091 | * Handle O_CREAT case for do_filp_open | ||
2092 | */ | ||
1587 | static struct file *do_last(struct nameidata *nd, struct path *path, | 2093 | static struct file *do_last(struct nameidata *nd, struct path *path, |
1588 | int open_flag, int acc_mode, | 2094 | int open_flag, int acc_mode, |
1589 | int mode, const char *pathname) | 2095 | int mode, const char *pathname) |
@@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1597 | follow_dotdot(nd); | 2103 | follow_dotdot(nd); |
1598 | dir = nd->path.dentry; | 2104 | dir = nd->path.dentry; |
1599 | case LAST_DOT: | 2105 | case LAST_DOT: |
1600 | if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { | 2106 | if (need_reval_dot(dir)) { |
1601 | if (!dir->d_op->d_revalidate(dir, nd)) { | 2107 | error = d_revalidate(nd->path.dentry, nd); |
2108 | if (!error) | ||
1602 | error = -ESTALE; | 2109 | error = -ESTALE; |
2110 | if (error < 0) | ||
1603 | goto exit; | 2111 | goto exit; |
1604 | } | ||
1605 | } | 2112 | } |
1606 | /* fallthrough */ | 2113 | /* fallthrough */ |
1607 | case LAST_ROOT: | 2114 | case LAST_ROOT: |
1608 | if (open_flag & O_CREAT) | 2115 | goto exit; |
1609 | goto exit; | ||
1610 | /* fallthrough */ | ||
1611 | case LAST_BIND: | 2116 | case LAST_BIND: |
1612 | audit_inode(pathname, dir); | 2117 | audit_inode(pathname, dir); |
1613 | goto ok; | 2118 | goto ok; |
1614 | } | 2119 | } |
1615 | 2120 | ||
1616 | /* trailing slashes? */ | 2121 | /* trailing slashes? */ |
1617 | if (nd->last.name[nd->last.len]) { | 2122 | if (nd->last.name[nd->last.len]) |
1618 | if (open_flag & O_CREAT) | 2123 | goto exit; |
1619 | goto exit; | ||
1620 | nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; | ||
1621 | } | ||
1622 | |||
1623 | /* just plain open? */ | ||
1624 | if (!(open_flag & O_CREAT)) { | ||
1625 | error = do_lookup(nd, &nd->last, path); | ||
1626 | if (error) | ||
1627 | goto exit; | ||
1628 | error = -ENOENT; | ||
1629 | if (!path->dentry->d_inode) | ||
1630 | goto exit_dput; | ||
1631 | if (path->dentry->d_inode->i_op->follow_link) | ||
1632 | return NULL; | ||
1633 | error = -ENOTDIR; | ||
1634 | if (nd->flags & LOOKUP_DIRECTORY) { | ||
1635 | if (!path->dentry->d_inode->i_op->lookup) | ||
1636 | goto exit_dput; | ||
1637 | } | ||
1638 | path_to_nameidata(path, nd); | ||
1639 | audit_inode(pathname, nd->path.dentry); | ||
1640 | goto ok; | ||
1641 | } | ||
1642 | 2124 | ||
1643 | /* OK, it's O_CREAT */ | ||
1644 | mutex_lock(&dir->d_inode->i_mutex); | 2125 | mutex_lock(&dir->d_inode->i_mutex); |
1645 | 2126 | ||
1646 | path->dentry = lookup_hash(nd); | 2127 | path->dentry = lookup_hash(nd); |
@@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1711 | return NULL; | 2192 | return NULL; |
1712 | 2193 | ||
1713 | path_to_nameidata(path, nd); | 2194 | path_to_nameidata(path, nd); |
2195 | nd->inode = path->dentry->d_inode; | ||
1714 | error = -EISDIR; | 2196 | error = -EISDIR; |
1715 | if (S_ISDIR(path->dentry->d_inode->i_mode)) | 2197 | if (S_ISDIR(nd->inode->i_mode)) |
1716 | goto exit; | 2198 | goto exit; |
1717 | ok: | 2199 | ok: |
1718 | filp = finish_open(nd, open_flag, acc_mode); | 2200 | filp = finish_open(nd, open_flag, acc_mode); |
@@ -1743,7 +2225,7 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1743 | struct path path; | 2225 | struct path path; |
1744 | int count = 0; | 2226 | int count = 0; |
1745 | int flag = open_to_namei_flags(open_flag); | 2227 | int flag = open_to_namei_flags(open_flag); |
1746 | int force_reval = 0; | 2228 | int flags; |
1747 | 2229 | ||
1748 | if (!(open_flag & O_CREAT)) | 2230 | if (!(open_flag & O_CREAT)) |
1749 | mode = 0; | 2231 | mode = 0; |
@@ -1772,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1772 | if (open_flag & O_APPEND) | 2254 | if (open_flag & O_APPEND) |
1773 | acc_mode |= MAY_APPEND; | 2255 | acc_mode |= MAY_APPEND; |
1774 | 2256 | ||
1775 | /* find the parent */ | 2257 | flags = LOOKUP_OPEN; |
1776 | reval: | 2258 | if (open_flag & O_CREAT) { |
1777 | error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); | 2259 | flags |= LOOKUP_CREATE; |
2260 | if (open_flag & O_EXCL) | ||
2261 | flags |= LOOKUP_EXCL; | ||
2262 | } | ||
2263 | if (open_flag & O_DIRECTORY) | ||
2264 | flags |= LOOKUP_DIRECTORY; | ||
2265 | if (!(open_flag & O_NOFOLLOW)) | ||
2266 | flags |= LOOKUP_FOLLOW; | ||
2267 | |||
2268 | filp = get_empty_filp(); | ||
2269 | if (!filp) | ||
2270 | return ERR_PTR(-ENFILE); | ||
2271 | |||
2272 | filp->f_flags = open_flag; | ||
2273 | nd.intent.open.file = filp; | ||
2274 | nd.intent.open.flags = flag; | ||
2275 | nd.intent.open.create_mode = mode; | ||
2276 | |||
2277 | if (open_flag & O_CREAT) | ||
2278 | goto creat; | ||
2279 | |||
2280 | /* !O_CREAT, simple open */ | ||
2281 | error = do_path_lookup(dfd, pathname, flags, &nd); | ||
2282 | if (unlikely(error)) | ||
2283 | goto out_filp; | ||
2284 | error = -ELOOP; | ||
2285 | if (!(nd.flags & LOOKUP_FOLLOW)) { | ||
2286 | if (nd.inode->i_op->follow_link) | ||
2287 | goto out_path; | ||
2288 | } | ||
2289 | error = -ENOTDIR; | ||
2290 | if (nd.flags & LOOKUP_DIRECTORY) { | ||
2291 | if (!nd.inode->i_op->lookup) | ||
2292 | goto out_path; | ||
2293 | } | ||
2294 | audit_inode(pathname, nd.path.dentry); | ||
2295 | filp = finish_open(&nd, open_flag, acc_mode); | ||
2296 | return filp; | ||
2297 | |||
2298 | creat: | ||
2299 | /* OK, have to create the file. Find the parent. */ | ||
2300 | error = path_init_rcu(dfd, pathname, | ||
2301 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
1778 | if (error) | 2302 | if (error) |
1779 | return ERR_PTR(error); | 2303 | goto out_filp; |
1780 | if (force_reval) | 2304 | error = path_walk_rcu(pathname, &nd); |
1781 | nd.flags |= LOOKUP_REVAL; | 2305 | path_finish_rcu(&nd); |
2306 | if (unlikely(error == -ECHILD || error == -ESTALE)) { | ||
2307 | /* slower, locked walk */ | ||
2308 | if (error == -ESTALE) { | ||
2309 | reval: | ||
2310 | flags |= LOOKUP_REVAL; | ||
2311 | } | ||
2312 | error = path_init(dfd, pathname, | ||
2313 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2314 | if (error) | ||
2315 | goto out_filp; | ||
1782 | 2316 | ||
1783 | current->total_link_count = 0; | 2317 | error = path_walk_simple(pathname, &nd); |
1784 | error = link_path_walk(pathname, &nd); | ||
1785 | if (error) { | ||
1786 | filp = ERR_PTR(error); | ||
1787 | goto out; | ||
1788 | } | 2318 | } |
1789 | if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) | 2319 | if (unlikely(error)) |
2320 | goto out_filp; | ||
2321 | if (unlikely(!audit_dummy_context())) | ||
1790 | audit_inode(pathname, nd.path.dentry); | 2322 | audit_inode(pathname, nd.path.dentry); |
1791 | 2323 | ||
1792 | /* | 2324 | /* |
1793 | * We have the parent and last component. | 2325 | * We have the parent and last component. |
1794 | */ | 2326 | */ |
1795 | 2327 | nd.flags = flags; | |
1796 | error = -ENFILE; | ||
1797 | filp = get_empty_filp(); | ||
1798 | if (filp == NULL) | ||
1799 | goto exit_parent; | ||
1800 | nd.intent.open.file = filp; | ||
1801 | filp->f_flags = open_flag; | ||
1802 | nd.intent.open.flags = flag; | ||
1803 | nd.intent.open.create_mode = mode; | ||
1804 | nd.flags &= ~LOOKUP_PARENT; | ||
1805 | nd.flags |= LOOKUP_OPEN; | ||
1806 | if (open_flag & O_CREAT) { | ||
1807 | nd.flags |= LOOKUP_CREATE; | ||
1808 | if (open_flag & O_EXCL) | ||
1809 | nd.flags |= LOOKUP_EXCL; | ||
1810 | } | ||
1811 | if (open_flag & O_DIRECTORY) | ||
1812 | nd.flags |= LOOKUP_DIRECTORY; | ||
1813 | if (!(open_flag & O_NOFOLLOW)) | ||
1814 | nd.flags |= LOOKUP_FOLLOW; | ||
1815 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2328 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
1816 | while (unlikely(!filp)) { /* trailing symlink */ | 2329 | while (unlikely(!filp)) { /* trailing symlink */ |
1817 | struct path holder; | 2330 | struct path holder; |
1818 | struct inode *inode = path.dentry->d_inode; | ||
1819 | void *cookie; | 2331 | void *cookie; |
1820 | error = -ELOOP; | 2332 | error = -ELOOP; |
1821 | /* S_ISDIR part is a temporary automount kludge */ | 2333 | /* S_ISDIR part is a temporary automount kludge */ |
1822 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) | 2334 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) |
1823 | goto exit_dput; | 2335 | goto exit_dput; |
1824 | if (count++ == 32) | 2336 | if (count++ == 32) |
1825 | goto exit_dput; | 2337 | goto exit_dput; |
@@ -1840,36 +2352,33 @@ reval: | |||
1840 | goto exit_dput; | 2352 | goto exit_dput; |
1841 | error = __do_follow_link(&path, &nd, &cookie); | 2353 | error = __do_follow_link(&path, &nd, &cookie); |
1842 | if (unlikely(error)) { | 2354 | if (unlikely(error)) { |
2355 | if (!IS_ERR(cookie) && nd.inode->i_op->put_link) | ||
2356 | nd.inode->i_op->put_link(path.dentry, &nd, cookie); | ||
1843 | /* nd.path had been dropped */ | 2357 | /* nd.path had been dropped */ |
1844 | if (!IS_ERR(cookie) && inode->i_op->put_link) | 2358 | nd.path = path; |
1845 | inode->i_op->put_link(path.dentry, &nd, cookie); | 2359 | goto out_path; |
1846 | path_put(&path); | ||
1847 | release_open_intent(&nd); | ||
1848 | filp = ERR_PTR(error); | ||
1849 | goto out; | ||
1850 | } | 2360 | } |
1851 | holder = path; | 2361 | holder = path; |
1852 | nd.flags &= ~LOOKUP_PARENT; | 2362 | nd.flags &= ~LOOKUP_PARENT; |
1853 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2363 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
1854 | if (inode->i_op->put_link) | 2364 | if (nd.inode->i_op->put_link) |
1855 | inode->i_op->put_link(holder.dentry, &nd, cookie); | 2365 | nd.inode->i_op->put_link(holder.dentry, &nd, cookie); |
1856 | path_put(&holder); | 2366 | path_put(&holder); |
1857 | } | 2367 | } |
1858 | out: | 2368 | out: |
1859 | if (nd.root.mnt) | 2369 | if (nd.root.mnt) |
1860 | path_put(&nd.root); | 2370 | path_put(&nd.root); |
1861 | if (filp == ERR_PTR(-ESTALE) && !force_reval) { | 2371 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) |
1862 | force_reval = 1; | ||
1863 | goto reval; | 2372 | goto reval; |
1864 | } | ||
1865 | return filp; | 2373 | return filp; |
1866 | 2374 | ||
1867 | exit_dput: | 2375 | exit_dput: |
1868 | path_put_conditional(&path, &nd); | 2376 | path_put_conditional(&path, &nd); |
2377 | out_path: | ||
2378 | path_put(&nd.path); | ||
2379 | out_filp: | ||
1869 | if (!IS_ERR(nd.intent.open.file)) | 2380 | if (!IS_ERR(nd.intent.open.file)) |
1870 | release_open_intent(&nd); | 2381 | release_open_intent(&nd); |
1871 | exit_parent: | ||
1872 | path_put(&nd.path); | ||
1873 | filp = ERR_PTR(error); | 2382 | filp = ERR_PTR(error); |
1874 | goto out; | 2383 | goto out; |
1875 | } | 2384 | } |
@@ -2130,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry) | |||
2130 | { | 2639 | { |
2131 | dget(dentry); | 2640 | dget(dentry); |
2132 | shrink_dcache_parent(dentry); | 2641 | shrink_dcache_parent(dentry); |
2133 | spin_lock(&dcache_lock); | ||
2134 | spin_lock(&dentry->d_lock); | 2642 | spin_lock(&dentry->d_lock); |
2135 | if (atomic_read(&dentry->d_count) == 2) | 2643 | if (dentry->d_count == 2) |
2136 | __d_drop(dentry); | 2644 | __d_drop(dentry); |
2137 | spin_unlock(&dentry->d_lock); | 2645 | spin_unlock(&dentry->d_lock); |
2138 | spin_unlock(&dcache_lock); | ||
2139 | } | 2646 | } |
2140 | 2647 | ||
2141 | int vfs_rmdir(struct inode *dir, struct dentry *dentry) | 2648 | int vfs_rmdir(struct inode *dir, struct dentry *dentry) |