summaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c322
1 files changed, 196 insertions, 126 deletions
diff --git a/fs/namei.c b/fs/namei.c
index caa28051e197..e029a4cbff7d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
482 * to restart the path walk from the beginning in ref-walk mode. 482 * to restart the path walk from the beginning in ref-walk mode.
483 */ 483 */
484 484
485static inline void lock_rcu_walk(void)
486{
487 br_read_lock(&vfsmount_lock);
488 rcu_read_lock();
489}
490
491static inline void unlock_rcu_walk(void)
492{
493 rcu_read_unlock();
494 br_read_unlock(&vfsmount_lock);
495}
496
497/** 485/**
498 * unlazy_walk - try to switch to ref-walk mode. 486 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 487 * @nd: nameidata pathwalk data
@@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
512 BUG_ON(!(nd->flags & LOOKUP_RCU)); 500 BUG_ON(!(nd->flags & LOOKUP_RCU));
513 501
514 /* 502 /*
515 * Get a reference to the parent first: we're 503 * After legitimizing the bastards, terminate_walk()
516 * going to make "path_put(nd->path)" valid in 504 * will do the right thing for non-RCU mode, and all our
517 * non-RCU context for "terminate_walk()". 505 * subsequent exit cases should rcu_read_unlock()
518 * 506 * before returning. Do vfsmount first; if dentry
519 * If this doesn't work, return immediately with 507 * can't be legitimized, just set nd->path.dentry to NULL
520 * RCU walking still active (and then we will do 508 * and rely on dput(NULL) being a no-op.
521 * the RCU walk cleanup in terminate_walk()).
522 */ 509 */
523 if (!lockref_get_not_dead(&parent->d_lockref)) 510 if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
524 return -ECHILD; 511 return -ECHILD;
525
526 /*
527 * After the mntget(), we terminate_walk() will do
528 * the right thing for non-RCU mode, and all our
529 * subsequent exit cases should unlock_rcu_walk()
530 * before returning.
531 */
532 mntget(nd->path.mnt);
533 nd->flags &= ~LOOKUP_RCU; 512 nd->flags &= ~LOOKUP_RCU;
534 513
514 if (!lockref_get_not_dead(&parent->d_lockref)) {
515 nd->path.dentry = NULL;
516 rcu_read_unlock();
517 return -ECHILD;
518 }
519
535 /* 520 /*
536 * For a negative lookup, the lookup sequence point is the parents 521 * For a negative lookup, the lookup sequence point is the parents
537 * sequence point, and it only needs to revalidate the parent dentry. 522 * sequence point, and it only needs to revalidate the parent dentry.
@@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
566 spin_unlock(&fs->lock); 551 spin_unlock(&fs->lock);
567 } 552 }
568 553
569 unlock_rcu_walk(); 554 rcu_read_unlock();
570 return 0; 555 return 0;
571 556
572unlock_and_drop_dentry: 557unlock_and_drop_dentry:
573 spin_unlock(&fs->lock); 558 spin_unlock(&fs->lock);
574drop_dentry: 559drop_dentry:
575 unlock_rcu_walk(); 560 rcu_read_unlock();
576 dput(dentry); 561 dput(dentry);
577 goto drop_root_mnt; 562 goto drop_root_mnt;
578out: 563out:
579 unlock_rcu_walk(); 564 rcu_read_unlock();
580drop_root_mnt: 565drop_root_mnt:
581 if (!(nd->flags & LOOKUP_ROOT)) 566 if (!(nd->flags & LOOKUP_ROOT))
582 nd->root.mnt = NULL; 567 nd->root.mnt = NULL;
@@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd)
608 if (!(nd->flags & LOOKUP_ROOT)) 593 if (!(nd->flags & LOOKUP_ROOT))
609 nd->root.mnt = NULL; 594 nd->root.mnt = NULL;
610 595
596 if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
597 rcu_read_unlock();
598 return -ECHILD;
599 }
611 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { 600 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
612 unlock_rcu_walk(); 601 rcu_read_unlock();
602 mntput(nd->path.mnt);
613 return -ECHILD; 603 return -ECHILD;
614 } 604 }
615 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { 605 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
616 unlock_rcu_walk(); 606 rcu_read_unlock();
617 dput(dentry); 607 dput(dentry);
608 mntput(nd->path.mnt);
618 return -ECHILD; 609 return -ECHILD;
619 } 610 }
620 mntget(nd->path.mnt); 611 rcu_read_unlock();
621 unlock_rcu_walk();
622 } 612 }
623 613
624 if (likely(!(nd->flags & LOOKUP_JUMPED))) 614 if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -909,15 +899,15 @@ int follow_up(struct path *path)
909 struct mount *parent; 899 struct mount *parent;
910 struct dentry *mountpoint; 900 struct dentry *mountpoint;
911 901
912 br_read_lock(&vfsmount_lock); 902 read_seqlock_excl(&mount_lock);
913 parent = mnt->mnt_parent; 903 parent = mnt->mnt_parent;
914 if (parent == mnt) { 904 if (parent == mnt) {
915 br_read_unlock(&vfsmount_lock); 905 read_sequnlock_excl(&mount_lock);
916 return 0; 906 return 0;
917 } 907 }
918 mntget(&parent->mnt); 908 mntget(&parent->mnt);
919 mountpoint = dget(mnt->mnt_mountpoint); 909 mountpoint = dget(mnt->mnt_mountpoint);
920 br_read_unlock(&vfsmount_lock); 910 read_sequnlock_excl(&mount_lock);
921 dput(path->dentry); 911 dput(path->dentry);
922 path->dentry = mountpoint; 912 path->dentry = mountpoint;
923 mntput(path->mnt); 913 mntput(path->mnt);
@@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags)
1048 1038
1049 /* Something is mounted on this dentry in another 1039 /* Something is mounted on this dentry in another
1050 * namespace and/or whatever was mounted there in this 1040 * namespace and/or whatever was mounted there in this
1051 * namespace got unmounted before we managed to get the 1041 * namespace got unmounted before lookup_mnt() could
1052 * vfsmount_lock */ 1042 * get it */
1053 } 1043 }
1054 1044
1055 /* Handle an automount point */ 1045 /* Handle an automount point */
@@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1111 if (!d_mountpoint(path->dentry)) 1101 if (!d_mountpoint(path->dentry))
1112 break; 1102 break;
1113 1103
1114 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1104 mounted = __lookup_mnt(path->mnt, path->dentry);
1115 if (!mounted) 1105 if (!mounted)
1116 break; 1106 break;
1117 path->mnt = &mounted->mnt; 1107 path->mnt = &mounted->mnt;
@@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd)
1132{ 1122{
1133 while (d_mountpoint(nd->path.dentry)) { 1123 while (d_mountpoint(nd->path.dentry)) {
1134 struct mount *mounted; 1124 struct mount *mounted;
1135 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); 1125 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
1136 if (!mounted) 1126 if (!mounted)
1137 break; 1127 break;
1138 nd->path.mnt = &mounted->mnt; 1128 nd->path.mnt = &mounted->mnt;
@@ -1174,7 +1164,7 @@ failed:
1174 nd->flags &= ~LOOKUP_RCU; 1164 nd->flags &= ~LOOKUP_RCU;
1175 if (!(nd->flags & LOOKUP_ROOT)) 1165 if (!(nd->flags & LOOKUP_ROOT))
1176 nd->root.mnt = NULL; 1166 nd->root.mnt = NULL;
1177 unlock_rcu_walk(); 1167 rcu_read_unlock();
1178 return -ECHILD; 1168 return -ECHILD;
1179} 1169}
1180 1170
@@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1308} 1298}
1309 1299
1310/* 1300/*
1311 * Call i_op->lookup on the dentry. The dentry must be negative but may be 1301 * Call i_op->lookup on the dentry. The dentry must be negative and
1312 * hashed if it was pouplated with DCACHE_NEED_LOOKUP. 1302 * unhashed.
1313 * 1303 *
1314 * dir->d_inode->i_mutex must be held 1304 * dir->d_inode->i_mutex must be held
1315 */ 1305 */
@@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
1501 nd->flags &= ~LOOKUP_RCU; 1491 nd->flags &= ~LOOKUP_RCU;
1502 if (!(nd->flags & LOOKUP_ROOT)) 1492 if (!(nd->flags & LOOKUP_ROOT))
1503 nd->root.mnt = NULL; 1493 nd->root.mnt = NULL;
1504 unlock_rcu_walk(); 1494 rcu_read_unlock();
1505 } 1495 }
1506} 1496}
1507 1497
@@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
1511 * so we keep a cache of "no, this doesn't need follow_link" 1501 * so we keep a cache of "no, this doesn't need follow_link"
1512 * for the common case. 1502 * for the common case.
1513 */ 1503 */
1514static inline int should_follow_link(struct inode *inode, int follow) 1504static inline int should_follow_link(struct dentry *dentry, int follow)
1515{ 1505{
1516 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { 1506 return unlikely(d_is_symlink(dentry)) ? follow : 0;
1517 if (likely(inode->i_op->follow_link))
1518 return follow;
1519
1520 /* This gets set once for the inode lifetime */
1521 spin_lock(&inode->i_lock);
1522 inode->i_opflags |= IOP_NOFOLLOW;
1523 spin_unlock(&inode->i_lock);
1524 }
1525 return 0;
1526} 1507}
1527 1508
1528static inline int walk_component(struct nameidata *nd, struct path *path, 1509static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1552 if (!inode) 1533 if (!inode)
1553 goto out_path_put; 1534 goto out_path_put;
1554 1535
1555 if (should_follow_link(inode, follow)) { 1536 if (should_follow_link(path->dentry, follow)) {
1556 if (nd->flags & LOOKUP_RCU) { 1537 if (nd->flags & LOOKUP_RCU) {
1557 if (unlikely(unlazy_walk(nd, path->dentry))) { 1538 if (unlikely(unlazy_walk(nd, path->dentry))) {
1558 err = -ECHILD; 1539 err = -ECHILD;
@@ -1611,26 +1592,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1611} 1592}
1612 1593
1613/* 1594/*
1614 * We really don't want to look at inode->i_op->lookup
1615 * when we don't have to. So we keep a cache bit in
1616 * the inode ->i_opflags field that says "yes, we can
1617 * do lookup on this inode".
1618 */
1619static inline int can_lookup(struct inode *inode)
1620{
1621 if (likely(inode->i_opflags & IOP_LOOKUP))
1622 return 1;
1623 if (likely(!inode->i_op->lookup))
1624 return 0;
1625
1626 /* We do this once for the lifetime of the inode */
1627 spin_lock(&inode->i_lock);
1628 inode->i_opflags |= IOP_LOOKUP;
1629 spin_unlock(&inode->i_lock);
1630 return 1;
1631}
1632
1633/*
1634 * We can do the critical dentry name comparison and hashing 1595 * We can do the critical dentry name comparison and hashing
1635 * operations one word at a time, but we are limited to: 1596 * operations one word at a time, but we are limited to:
1636 * 1597 *
@@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1833 if (err) 1794 if (err)
1834 return err; 1795 return err;
1835 } 1796 }
1836 if (!can_lookup(nd->inode)) { 1797 if (!d_is_directory(nd->path.dentry)) {
1837 err = -ENOTDIR; 1798 err = -ENOTDIR;
1838 break; 1799 break;
1839 } 1800 }
@@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1851 nd->flags = flags | LOOKUP_JUMPED; 1812 nd->flags = flags | LOOKUP_JUMPED;
1852 nd->depth = 0; 1813 nd->depth = 0;
1853 if (flags & LOOKUP_ROOT) { 1814 if (flags & LOOKUP_ROOT) {
1854 struct inode *inode = nd->root.dentry->d_inode; 1815 struct dentry *root = nd->root.dentry;
1816 struct inode *inode = root->d_inode;
1855 if (*name) { 1817 if (*name) {
1856 if (!can_lookup(inode)) 1818 if (!d_is_directory(root))
1857 return -ENOTDIR; 1819 return -ENOTDIR;
1858 retval = inode_permission(inode, MAY_EXEC); 1820 retval = inode_permission(inode, MAY_EXEC);
1859 if (retval) 1821 if (retval)
@@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1862 nd->path = nd->root; 1824 nd->path = nd->root;
1863 nd->inode = inode; 1825 nd->inode = inode;
1864 if (flags & LOOKUP_RCU) { 1826 if (flags & LOOKUP_RCU) {
1865 lock_rcu_walk(); 1827 rcu_read_lock();
1866 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1828 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1829 nd->m_seq = read_seqbegin(&mount_lock);
1867 } else { 1830 } else {
1868 path_get(&nd->path); 1831 path_get(&nd->path);
1869 } 1832 }
@@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1872 1835
1873 nd->root.mnt = NULL; 1836 nd->root.mnt = NULL;
1874 1837
1838 nd->m_seq = read_seqbegin(&mount_lock);
1875 if (*name=='/') { 1839 if (*name=='/') {
1876 if (flags & LOOKUP_RCU) { 1840 if (flags & LOOKUP_RCU) {
1877 lock_rcu_walk(); 1841 rcu_read_lock();
1878 set_root_rcu(nd); 1842 set_root_rcu(nd);
1879 } else { 1843 } else {
1880 set_root(nd); 1844 set_root(nd);
@@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1886 struct fs_struct *fs = current->fs; 1850 struct fs_struct *fs = current->fs;
1887 unsigned seq; 1851 unsigned seq;
1888 1852
1889 lock_rcu_walk(); 1853 rcu_read_lock();
1890 1854
1891 do { 1855 do {
1892 seq = read_seqcount_begin(&fs->seq); 1856 seq = read_seqcount_begin(&fs->seq);
@@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1907 dentry = f.file->f_path.dentry; 1871 dentry = f.file->f_path.dentry;
1908 1872
1909 if (*name) { 1873 if (*name) {
1910 if (!can_lookup(dentry->d_inode)) { 1874 if (!d_is_directory(dentry)) {
1911 fdput(f); 1875 fdput(f);
1912 return -ENOTDIR; 1876 return -ENOTDIR;
1913 } 1877 }
@@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1918 if (f.need_put) 1882 if (f.need_put)
1919 *fp = f.file; 1883 *fp = f.file;
1920 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1884 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1921 lock_rcu_walk(); 1885 rcu_read_lock();
1922 } else { 1886 } else {
1923 path_get(&nd->path); 1887 path_get(&nd->path);
1924 fdput(f); 1888 fdput(f);
@@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
1989 err = complete_walk(nd); 1953 err = complete_walk(nd);
1990 1954
1991 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1955 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1992 if (!can_lookup(nd->inode)) { 1956 if (!d_is_directory(nd->path.dentry)) {
1993 path_put(&nd->path); 1957 path_put(&nd->path);
1994 err = -ENOTDIR; 1958 err = -ENOTDIR;
1995 } 1959 }
@@ -2281,7 +2245,7 @@ done:
2281 } 2245 }
2282 path->dentry = dentry; 2246 path->dentry = dentry;
2283 path->mnt = mntget(nd->path.mnt); 2247 path->mnt = mntget(nd->path.mnt);
2284 if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) 2248 if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
2285 return 1; 2249 return 1;
2286 follow_mount(path); 2250 follow_mount(path);
2287 error = 0; 2251 error = 0;
@@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
2426 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 2390 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
2427 * nfs_async_unlink(). 2391 * nfs_async_unlink().
2428 */ 2392 */
2429static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 2393static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2430{ 2394{
2395 struct inode *inode = victim->d_inode;
2431 int error; 2396 int error;
2432 2397
2433 if (!victim->d_inode) 2398 if (d_is_negative(victim))
2434 return -ENOENT; 2399 return -ENOENT;
2400 BUG_ON(!inode);
2435 2401
2436 BUG_ON(victim->d_parent->d_inode != dir); 2402 BUG_ON(victim->d_parent->d_inode != dir);
2437 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 2403 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
2441 return error; 2407 return error;
2442 if (IS_APPEND(dir)) 2408 if (IS_APPEND(dir))
2443 return -EPERM; 2409 return -EPERM;
2444 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 2410
2445 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 2411 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
2412 IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
2446 return -EPERM; 2413 return -EPERM;
2447 if (isdir) { 2414 if (isdir) {
2448 if (!S_ISDIR(victim->d_inode->i_mode)) 2415 if (!d_is_directory(victim) && !d_is_autodir(victim))
2449 return -ENOTDIR; 2416 return -ENOTDIR;
2450 if (IS_ROOT(victim)) 2417 if (IS_ROOT(victim))
2451 return -EBUSY; 2418 return -EBUSY;
2452 } else if (S_ISDIR(victim->d_inode->i_mode)) 2419 } else if (d_is_directory(victim) || d_is_autodir(victim))
2453 return -EISDIR; 2420 return -EISDIR;
2454 if (IS_DEADDIR(dir)) 2421 if (IS_DEADDIR(dir))
2455 return -ENOENT; 2422 return -ENOENT;
@@ -2983,7 +2950,7 @@ retry_lookup:
2983 /* 2950 /*
2984 * create/update audit record if it already exists. 2951 * create/update audit record if it already exists.
2985 */ 2952 */
2986 if (path->dentry->d_inode) 2953 if (d_is_positive(path->dentry))
2987 audit_inode(name, path->dentry, 0); 2954 audit_inode(name, path->dentry, 0);
2988 2955
2989 /* 2956 /*
@@ -3012,12 +2979,12 @@ retry_lookup:
3012finish_lookup: 2979finish_lookup:
3013 /* we _can_ be in RCU mode here */ 2980 /* we _can_ be in RCU mode here */
3014 error = -ENOENT; 2981 error = -ENOENT;
3015 if (!inode) { 2982 if (d_is_negative(path->dentry)) {
3016 path_to_nameidata(path, nd); 2983 path_to_nameidata(path, nd);
3017 goto out; 2984 goto out;
3018 } 2985 }
3019 2986
3020 if (should_follow_link(inode, !symlink_ok)) { 2987 if (should_follow_link(path->dentry, !symlink_ok)) {
3021 if (nd->flags & LOOKUP_RCU) { 2988 if (nd->flags & LOOKUP_RCU) {
3022 if (unlikely(unlazy_walk(nd, path->dentry))) { 2989 if (unlikely(unlazy_walk(nd, path->dentry))) {
3023 error = -ECHILD; 2990 error = -ECHILD;
@@ -3046,10 +3013,11 @@ finish_open:
3046 } 3013 }
3047 audit_inode(name, nd->path.dentry, 0); 3014 audit_inode(name, nd->path.dentry, 0);
3048 error = -EISDIR; 3015 error = -EISDIR;
3049 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) 3016 if ((open_flag & O_CREAT) &&
3017 (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
3050 goto out; 3018 goto out;
3051 error = -ENOTDIR; 3019 error = -ENOTDIR;
3052 if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) 3020 if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
3053 goto out; 3021 goto out;
3054 if (!S_ISREG(nd->inode->i_mode)) 3022 if (!S_ISREG(nd->inode->i_mode))
3055 will_truncate = false; 3023 will_truncate = false;
@@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
3275 nd.root.mnt = mnt; 3243 nd.root.mnt = mnt;
3276 nd.root.dentry = dentry; 3244 nd.root.dentry = dentry;
3277 3245
3278 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) 3246 if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
3279 return ERR_PTR(-ELOOP); 3247 return ERR_PTR(-ELOOP);
3280 3248
3281 file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); 3249 file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
3325 goto unlock; 3293 goto unlock;
3326 3294
3327 error = -EEXIST; 3295 error = -EEXIST;
3328 if (dentry->d_inode) 3296 if (d_is_positive(dentry))
3329 goto fail; 3297 goto fail;
3298
3330 /* 3299 /*
3331 * Special case - lookup gave negative, but... we had foo/bar/ 3300 * Special case - lookup gave negative, but... we had foo/bar/
3332 * From the vfs_mknod() POV we just have a negative dentry - 3301 * From the vfs_mknod() POV we just have a negative dentry -
@@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
3647 return do_rmdir(AT_FDCWD, pathname); 3616 return do_rmdir(AT_FDCWD, pathname);
3648} 3617}
3649 3618
3650int vfs_unlink(struct inode *dir, struct dentry *dentry) 3619/**
3620 * vfs_unlink - unlink a filesystem object
3621 * @dir: parent directory
3622 * @dentry: victim
3623 * @delegated_inode: returns victim inode, if the inode is delegated.
3624 *
3625 * The caller must hold dir->i_mutex.
3626 *
3627 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
3628 * return a reference to the inode in delegated_inode. The caller
3629 * should then break the delegation on that inode and retry. Because
3630 * breaking a delegation may take a long time, the caller should drop
3631 * dir->i_mutex before doing so.
3632 *
3633 * Alternatively, a caller may pass NULL for delegated_inode. This may
3634 * be appropriate for callers that expect the underlying filesystem not
3635 * to be NFS exported.
3636 */
3637int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
3651{ 3638{
3639 struct inode *target = dentry->d_inode;
3652 int error = may_delete(dir, dentry, 0); 3640 int error = may_delete(dir, dentry, 0);
3653 3641
3654 if (error) 3642 if (error)
@@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
3657 if (!dir->i_op->unlink) 3645 if (!dir->i_op->unlink)
3658 return -EPERM; 3646 return -EPERM;
3659 3647
3660 mutex_lock(&dentry->d_inode->i_mutex); 3648 mutex_lock(&target->i_mutex);
3661 if (d_mountpoint(dentry)) 3649 if (d_mountpoint(dentry))
3662 error = -EBUSY; 3650 error = -EBUSY;
3663 else { 3651 else {
3664 error = security_inode_unlink(dir, dentry); 3652 error = security_inode_unlink(dir, dentry);
3665 if (!error) { 3653 if (!error) {
3654 error = try_break_deleg(target, delegated_inode);
3655 if (error)
3656 goto out;
3666 error = dir->i_op->unlink(dir, dentry); 3657 error = dir->i_op->unlink(dir, dentry);
3667 if (!error) 3658 if (!error)
3668 dont_mount(dentry); 3659 dont_mount(dentry);
3669 } 3660 }
3670 } 3661 }
3671 mutex_unlock(&dentry->d_inode->i_mutex); 3662out:
3663 mutex_unlock(&target->i_mutex);
3672 3664
3673 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 3665 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
3674 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 3666 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
3675 fsnotify_link_count(dentry->d_inode); 3667 fsnotify_link_count(target);
3676 d_delete(dentry); 3668 d_delete(dentry);
3677 } 3669 }
3678 3670
@@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3692 struct dentry *dentry; 3684 struct dentry *dentry;
3693 struct nameidata nd; 3685 struct nameidata nd;
3694 struct inode *inode = NULL; 3686 struct inode *inode = NULL;
3687 struct inode *delegated_inode = NULL;
3695 unsigned int lookup_flags = 0; 3688 unsigned int lookup_flags = 0;
3696retry: 3689retry:
3697 name = user_path_parent(dfd, pathname, &nd, lookup_flags); 3690 name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3706,7 +3699,7 @@ retry:
3706 error = mnt_want_write(nd.path.mnt); 3699 error = mnt_want_write(nd.path.mnt);
3707 if (error) 3700 if (error)
3708 goto exit1; 3701 goto exit1;
3709 3702retry_deleg:
3710 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3703 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3711 dentry = lookup_hash(&nd); 3704 dentry = lookup_hash(&nd);
3712 error = PTR_ERR(dentry); 3705 error = PTR_ERR(dentry);
@@ -3715,19 +3708,25 @@ retry:
3715 if (nd.last.name[nd.last.len]) 3708 if (nd.last.name[nd.last.len])
3716 goto slashes; 3709 goto slashes;
3717 inode = dentry->d_inode; 3710 inode = dentry->d_inode;
3718 if (!inode) 3711 if (d_is_negative(dentry))
3719 goto slashes; 3712 goto slashes;
3720 ihold(inode); 3713 ihold(inode);
3721 error = security_path_unlink(&nd.path, dentry); 3714 error = security_path_unlink(&nd.path, dentry);
3722 if (error) 3715 if (error)
3723 goto exit2; 3716 goto exit2;
3724 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 3717 error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
3725exit2: 3718exit2:
3726 dput(dentry); 3719 dput(dentry);
3727 } 3720 }
3728 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3721 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3729 if (inode) 3722 if (inode)
3730 iput(inode); /* truncate the inode here */ 3723 iput(inode); /* truncate the inode here */
3724 inode = NULL;
3725 if (delegated_inode) {
3726 error = break_deleg_wait(&delegated_inode);
3727 if (!error)
3728 goto retry_deleg;
3729 }
3731 mnt_drop_write(nd.path.mnt); 3730 mnt_drop_write(nd.path.mnt);
3732exit1: 3731exit1:
3733 path_put(&nd.path); 3732 path_put(&nd.path);
@@ -3740,8 +3739,12 @@ exit1:
3740 return error; 3739 return error;
3741 3740
3742slashes: 3741slashes:
3743 error = !dentry->d_inode ? -ENOENT : 3742 if (d_is_negative(dentry))
3744 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 3743 error = -ENOENT;
3744 else if (d_is_directory(dentry) || d_is_autodir(dentry))
3745 error = -EISDIR;
3746 else
3747 error = -ENOTDIR;
3745 goto exit2; 3748 goto exit2;
3746} 3749}
3747 3750
@@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
3817 return sys_symlinkat(oldname, AT_FDCWD, newname); 3820 return sys_symlinkat(oldname, AT_FDCWD, newname);
3818} 3821}
3819 3822
3820int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 3823/**
3824 * vfs_link - create a new link
3825 * @old_dentry: object to be linked
3826 * @dir: new parent
3827 * @new_dentry: where to create the new link
3828 * @delegated_inode: returns inode needing a delegation break
3829 *
3830 * The caller must hold dir->i_mutex
3831 *
3832 * If vfs_link discovers a delegation on the to-be-linked file in need
3833 * of breaking, it will return -EWOULDBLOCK and return a reference to the
3834 * inode in delegated_inode. The caller should then break the delegation
3835 * and retry. Because breaking a delegation may take a long time, the
3836 * caller should drop the i_mutex before doing so.
3837 *
3838 * Alternatively, a caller may pass NULL for delegated_inode. This may
3839 * be appropriate for callers that expect the underlying filesystem not
3840 * to be NFS exported.
3841 */
3842int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
3821{ 3843{
3822 struct inode *inode = old_dentry->d_inode; 3844 struct inode *inode = old_dentry->d_inode;
3823 unsigned max_links = dir->i_sb->s_max_links; 3845 unsigned max_links = dir->i_sb->s_max_links;
@@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3853 error = -ENOENT; 3875 error = -ENOENT;
3854 else if (max_links && inode->i_nlink >= max_links) 3876 else if (max_links && inode->i_nlink >= max_links)
3855 error = -EMLINK; 3877 error = -EMLINK;
3856 else 3878 else {
3857 error = dir->i_op->link(old_dentry, dir, new_dentry); 3879 error = try_break_deleg(inode, delegated_inode);
3880 if (!error)
3881 error = dir->i_op->link(old_dentry, dir, new_dentry);
3882 }
3858 3883
3859 if (!error && (inode->i_state & I_LINKABLE)) { 3884 if (!error && (inode->i_state & I_LINKABLE)) {
3860 spin_lock(&inode->i_lock); 3885 spin_lock(&inode->i_lock);
@@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3881{ 3906{
3882 struct dentry *new_dentry; 3907 struct dentry *new_dentry;
3883 struct path old_path, new_path; 3908 struct path old_path, new_path;
3909 struct inode *delegated_inode = NULL;
3884 int how = 0; 3910 int how = 0;
3885 int error; 3911 int error;
3886 3912
@@ -3919,9 +3945,14 @@ retry:
3919 error = security_path_link(old_path.dentry, &new_path, new_dentry); 3945 error = security_path_link(old_path.dentry, &new_path, new_dentry);
3920 if (error) 3946 if (error)
3921 goto out_dput; 3947 goto out_dput;
3922 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); 3948 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
3923out_dput: 3949out_dput:
3924 done_path_create(&new_path, new_dentry); 3950 done_path_create(&new_path, new_dentry);
3951 if (delegated_inode) {
3952 error = break_deleg_wait(&delegated_inode);
3953 if (!error)
3954 goto retry;
3955 }
3925 if (retry_estale(error, how)) { 3956 if (retry_estale(error, how)) {
3926 how |= LOOKUP_REVAL; 3957 how |= LOOKUP_REVAL;
3927 goto retry; 3958 goto retry;
@@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
3946 * That's where 4.4 screws up. Current fix: serialization on 3977 * That's where 4.4 screws up. Current fix: serialization on
3947 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 3978 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
3948 * story. 3979 * story.
3949 * c) we have to lock _three_ objects - parents and victim (if it exists). 3980 * c) we have to lock _four_ objects - parents and victim (if it exists),
3981 * and source (if it is not a directory).
3950 * And that - after we got ->i_mutex on parents (until then we don't know 3982 * And that - after we got ->i_mutex on parents (until then we don't know
3951 * whether the target exists). Solution: try to be smart with locking 3983 * whether the target exists). Solution: try to be smart with locking
3952 * order for inodes. We rely on the fact that tree topology may change 3984 * order for inodes. We rely on the fact that tree topology may change
@@ -4019,9 +4051,11 @@ out:
4019} 4051}
4020 4052
4021static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 4053static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4022 struct inode *new_dir, struct dentry *new_dentry) 4054 struct inode *new_dir, struct dentry *new_dentry,
4055 struct inode **delegated_inode)
4023{ 4056{
4024 struct inode *target = new_dentry->d_inode; 4057 struct inode *target = new_dentry->d_inode;
4058 struct inode *source = old_dentry->d_inode;
4025 int error; 4059 int error;
4026 4060
4027 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 4061 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4029 return error; 4063 return error;
4030 4064
4031 dget(new_dentry); 4065 dget(new_dentry);
4032 if (target) 4066 lock_two_nondirectories(source, target);
4033 mutex_lock(&target->i_mutex);
4034 4067
4035 error = -EBUSY; 4068 error = -EBUSY;
4036 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 4069 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
4037 goto out; 4070 goto out;
4038 4071
4072 error = try_break_deleg(source, delegated_inode);
4073 if (error)
4074 goto out;
4075 if (target) {
4076 error = try_break_deleg(target, delegated_inode);
4077 if (error)
4078 goto out;
4079 }
4039 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 4080 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
4040 if (error) 4081 if (error)
4041 goto out; 4082 goto out;
@@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
4045 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 4086 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
4046 d_move(old_dentry, new_dentry); 4087 d_move(old_dentry, new_dentry);
4047out: 4088out:
4048 if (target) 4089 unlock_two_nondirectories(source, target);
4049 mutex_unlock(&target->i_mutex);
4050 dput(new_dentry); 4090 dput(new_dentry);
4051 return error; 4091 return error;
4052} 4092}
4053 4093
4094/**
4095 * vfs_rename - rename a filesystem object
4096 * @old_dir: parent of source
4097 * @old_dentry: source
4098 * @new_dir: parent of destination
4099 * @new_dentry: destination
4100 * @delegated_inode: returns an inode needing a delegation break
4101 *
4102 * The caller must hold multiple mutexes--see lock_rename()).
4103 *
4104 * If vfs_rename discovers a delegation in need of breaking at either
4105 * the source or destination, it will return -EWOULDBLOCK and return a
4106 * reference to the inode in delegated_inode. The caller should then
4107 * break the delegation and retry. Because breaking a delegation may
4108 * take a long time, the caller should drop all locks before doing
4109 * so.
4110 *
4111 * Alternatively, a caller may pass NULL for delegated_inode. This may
4112 * be appropriate for callers that expect the underlying filesystem not
4113 * to be NFS exported.
4114 */
4054int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 4115int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4055 struct inode *new_dir, struct dentry *new_dentry) 4116 struct inode *new_dir, struct dentry *new_dentry,
4117 struct inode **delegated_inode)
4056{ 4118{
4057 int error; 4119 int error;
4058 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 4120 int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
4059 const unsigned char *old_name; 4121 const unsigned char *old_name;
4060 4122
4061 if (old_dentry->d_inode == new_dentry->d_inode) 4123 if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4080 if (is_dir) 4142 if (is_dir)
4081 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 4143 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
4082 else 4144 else
4083 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 4145 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
4084 if (!error) 4146 if (!error)
4085 fsnotify_move(old_dir, new_dir, old_name, is_dir, 4147 fsnotify_move(old_dir, new_dir, old_name, is_dir,
4086 new_dentry->d_inode, old_dentry); 4148 new_dentry->d_inode, old_dentry);
@@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
4096 struct dentry *old_dentry, *new_dentry; 4158 struct dentry *old_dentry, *new_dentry;
4097 struct dentry *trap; 4159 struct dentry *trap;
4098 struct nameidata oldnd, newnd; 4160 struct nameidata oldnd, newnd;
4161 struct inode *delegated_inode = NULL;
4099 struct filename *from; 4162 struct filename *from;
4100 struct filename *to; 4163 struct filename *to;
4101 unsigned int lookup_flags = 0; 4164 unsigned int lookup_flags = 0;
@@ -4135,6 +4198,7 @@ retry:
4135 newnd.flags &= ~LOOKUP_PARENT; 4198 newnd.flags &= ~LOOKUP_PARENT;
4136 newnd.flags |= LOOKUP_RENAME_TARGET; 4199 newnd.flags |= LOOKUP_RENAME_TARGET;
4137 4200
4201retry_deleg:
4138 trap = lock_rename(new_dir, old_dir); 4202 trap = lock_rename(new_dir, old_dir);
4139 4203
4140 old_dentry = lookup_hash(&oldnd); 4204 old_dentry = lookup_hash(&oldnd);
@@ -4143,10 +4207,10 @@ retry:
4143 goto exit3; 4207 goto exit3;
4144 /* source must exist */ 4208 /* source must exist */
4145 error = -ENOENT; 4209 error = -ENOENT;
4146 if (!old_dentry->d_inode) 4210 if (d_is_negative(old_dentry))
4147 goto exit4; 4211 goto exit4;
4148 /* unless the source is a directory trailing slashes give -ENOTDIR */ 4212 /* unless the source is a directory trailing slashes give -ENOTDIR */
4149 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 4213 if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
4150 error = -ENOTDIR; 4214 error = -ENOTDIR;
4151 if (oldnd.last.name[oldnd.last.len]) 4215 if (oldnd.last.name[oldnd.last.len])
4152 goto exit4; 4216 goto exit4;
@@ -4171,13 +4235,19 @@ retry:
4171 if (error) 4235 if (error)
4172 goto exit5; 4236 goto exit5;
4173 error = vfs_rename(old_dir->d_inode, old_dentry, 4237 error = vfs_rename(old_dir->d_inode, old_dentry,
4174 new_dir->d_inode, new_dentry); 4238 new_dir->d_inode, new_dentry,
4239 &delegated_inode);
4175exit5: 4240exit5:
4176 dput(new_dentry); 4241 dput(new_dentry);
4177exit4: 4242exit4:
4178 dput(old_dentry); 4243 dput(old_dentry);
4179exit3: 4244exit3:
4180 unlock_rename(new_dir, old_dir); 4245 unlock_rename(new_dir, old_dir);
4246 if (delegated_inode) {
4247 error = break_deleg_wait(&delegated_inode);
4248 if (!error)
4249 goto retry_deleg;
4250 }
4181 mnt_drop_write(oldnd.path.mnt); 4251 mnt_drop_write(oldnd.path.mnt);
4182exit2: 4252exit2:
4183 if (retry_estale(error, lookup_flags)) 4253 if (retry_estale(error, lookup_flags))