1 files changed, 280 insertions, 99 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 8df7a78ace58..8f7b41a14882 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
 EXPORT_SYMBOL(path_get);
 /**
- * path_get_long - get a long reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get_long(struct path *path)
-{
-        mntget_long(path->mnt);
-        dget(path->dentry);
-}
-/**
 * path_put - put a reference to a path
 * @path: path to put the reference to
 *
@@ -393,18 +381,6 @@ void path_put(struct path *path)
 EXPORT_SYMBOL(path_put);
 /**
- * path_put_long - put a long reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put_long(struct path *path)
-{
-        dput(path->dentry);
-        mntput_long(path->mnt);
-}
-/**
 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
 * @nd: nameidata pathwalk data to drop
 * Returns: 0 on success, -ECHILD on failure
@@ -800,12 +776,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
        touch_atime(link->mnt, dentry);
        nd_set_link(nd, NULL);
-        if (link->mnt != nd->path.mnt) {
+        if (link->mnt == nd->path.mnt)
-                path_to_nameidata(link, nd);
+                mntget(link->mnt);
-                nd->inode = nd->path.dentry->d_inode;
-                dget(dentry);
-        }
-        mntget(link->mnt);
        nd->last_type = LAST_BIND;
        *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -896,54 +868,169 @@ int follow_up(struct path *path)
 }
 /*
- * serialization is taken care of in namespace.c
+ * Perform an automount
+ * - return -EISDIR to tell follow_managed() to stop and return the path we
+ *   were called with.
 */
-static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
+static int follow_automount(struct path *path, unsigned flags,
-                                struct inode **inode)
+                            bool *need_mntput)
 {
-        while (d_mountpoint(path->dentry)) {
+        struct vfsmount *mnt;
-                struct vfsmount *mounted;
+        int err;
-                mounted = __lookup_mnt(path->mnt, path->dentry, 1);
-                if (!mounted)
+        if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
-                        return;
+                return -EREMOTE;
-                path->mnt = mounted;
-                path->dentry = mounted->mnt_root;
+        /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
-                nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+         * and this is the terminal part of the path.
-                *inode = path->dentry->d_inode;
+         */
+        if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
+                return -EISDIR; /* we actually want to stop here */
+        /* We want to mount if someone is trying to open/create a file of any
+         * type under the mountpoint, wants to traverse through the mountpoint
+         * or wants to open the mounted directory.
+         *
+         * We don't want to mount if someone's just doing a stat and they've
+         * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
+         * appended a '/' to the name.
+         */
+        if (!(flags & LOOKUP_FOLLOW) &&
+            !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
+                       LOOKUP_OPEN | LOOKUP_CREATE)))
+                return -EISDIR;
+        current->total_link_count++;
+        if (current->total_link_count >= 40)
+                return -ELOOP;
+        mnt = path->dentry->d_op->d_automount(path);
+        if (IS_ERR(mnt)) {
+                /*
+                 * The filesystem is allowed to return -EISDIR here to indicate
+                 * it doesn't want to automount.  For instance, autofs would do
+                 * this so that its userspace daemon can mount on this dentry.
+                 *
+                 * However, we can only permit this if it's a terminal point in
+                 * the path being looked up; if it wasn't then the remainder of
+                 * the path is inaccessible and we should say so.
+                 */
+                if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
+                        return -EREMOTE;
+                return PTR_ERR(mnt);
        }
-}
-static int __follow_mount(struct path *path)
+        if (!mnt) /* mount collision */
-{
+                return 0;
-        int res = 0;
-        while (d_mountpoint(path->dentry)) {
+        /* The new mount record should have at least 2 refs to prevent it being
-                struct vfsmount *mounted = lookup_mnt(path);
+         * expired before we get a chance to add it
-                if (!mounted)
+         */
-                        break;
+        BUG_ON(mnt_get_count(mnt) < 2);
+        if (mnt->mnt_sb == path->mnt->mnt_sb &&
+            mnt->mnt_root == path->dentry) {
+                mnt_clear_expiry(mnt);
+                mntput(mnt);
+                mntput(mnt);
+                return -ELOOP;
+        }
+        /* We need to add the mountpoint to the parent.  The filesystem may
+         * have placed it on an expiry list, and so we need to make sure it
+         * won't be expired under us if do_add_mount() fails (do_add_mount()
+         * will eat a reference unconditionally).
+         */
+        mntget(mnt);
+        err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+        switch (err) {
+        case -EBUSY:
+                /* Someone else made a mount here whilst we were busy */
+                err = 0;
+        default:
+                mnt_clear_expiry(mnt);
+                mntput(mnt);
+                mntput(mnt);
+                return err;
+        case 0:
+                mntput(mnt);
                dput(path->dentry);
-                if (res)
+                if (*need_mntput)
                        mntput(path->mnt);
-                path->mnt = mounted;
+                path->mnt = mnt;
-                path->dentry = dget(mounted->mnt_root);
+                path->dentry = dget(mnt->mnt_root);
-                res = 1;
+                *need_mntput = true;
+                return 0;
        }
-        return res;
 }
-static void follow_mount(struct path *path)
+/*
+ * Handle a dentry that is managed in some way.
+ * - Flagged for transit management (autofs)
+ * - Flagged as mountpoint
+ * - Flagged as automount point
+ *
+ * This may only be called in refwalk mode.
+ *
+ * Serialization is taken care of in namespace.c
+ */
+static int follow_managed(struct path *path, unsigned flags)
 {
-        while (d_mountpoint(path->dentry)) {
+        unsigned managed;
-                struct vfsmount *mounted = lookup_mnt(path);
+        bool need_mntput = false;
-                if (!mounted)
+        int ret;
-                        break;
-                dput(path->dentry);
+        /* Given that we're not holding a lock here, we retain the value in a
-                mntput(path->mnt);
+         * local variable for each dentry as we look at it so that we don't see
-                path->mnt = mounted;
+         * the components of that value change under us */
-                path->dentry = dget(mounted->mnt_root);
+        while (managed = ACCESS_ONCE(path->dentry->d_flags),
+               managed &= DCACHE_MANAGED_DENTRY,
+               unlikely(managed != 0)) {
+                /* Allow the filesystem to manage the transit without i_mutex
+                 * being held. */
+                if (managed & DCACHE_MANAGE_TRANSIT) {
+                        BUG_ON(!path->dentry->d_op);
+                        BUG_ON(!path->dentry->d_op->d_manage);
+                        ret = path->dentry->d_op->d_manage(path->dentry,
+                                                           false, false);
+                        if (ret < 0)
+                                return ret == -EISDIR ? 0 : ret;
+                }
+                /* Transit to a mounted filesystem. */
+                if (managed & DCACHE_MOUNTED) {
+                        struct vfsmount *mounted = lookup_mnt(path);
+                        if (mounted) {
+                                dput(path->dentry);
+                                if (need_mntput)
+                                        mntput(path->mnt);
+                                path->mnt = mounted;
+                                path->dentry = dget(mounted->mnt_root);
+                                need_mntput = true;
+                                continue;
+                        }
+                        /* Something is mounted on this dentry in another
+                         * namespace and/or whatever was mounted there in this
+                         * namespace got unmounted before we managed to get the
+                         * vfsmount_lock */
+                }
+                /* Handle an automount point */
+                if (managed & DCACHE_NEED_AUTOMOUNT) {
+                        ret = follow_automount(path, flags, &need_mntput);
+                        if (ret < 0)
+                                return ret == -EISDIR ? 0 : ret;
+                        continue;
+                }
+                /* We didn't change the current path point */
+                break;
        }
+        return 0;
 }
-int follow_down(struct path *path)
+int follow_down_one(struct path *path)
 {
        struct vfsmount *mounted;
@@ -958,13 +1045,41 @@ int follow_down(struct path *path)
        return 0;
 }
+/*
+ * Skip to top of mountpoint pile in rcuwalk mode.  We abort the rcu-walk if we
+ * meet a managed dentry and we're not walking to "..".  True is returned to
+ * continue, false to abort.
+ */
+static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+                               struct inode **inode, bool reverse_transit)
+{
+        while (d_mountpoint(path->dentry)) {
+                struct vfsmount *mounted;
+                if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
+                    !reverse_transit &&
+                    path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
+                        return false;
+                mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+                if (!mounted)
+                        break;
+                path->mnt = mounted;
+                path->dentry = mounted->mnt_root;
+                nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+                *inode = path->dentry->d_inode;
+        }
+        if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+                return reverse_transit;
+        return true;
+}
 static int follow_dotdot_rcu(struct nameidata *nd)
 {
        struct inode *inode = nd->inode;
        set_root_rcu(nd);
-        while(1) {
+        while (1) {
                if (nd->path.dentry == nd->root.dentry &&
                    nd->path.mnt == nd->root.mnt) {
                        break;
@@ -987,12 +1102,80 @@ static int follow_dotdot_rcu(struct nameidata *nd)
                nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
                inode = nd->path.dentry->d_inode;
        }
-        __follow_mount_rcu(nd, &nd->path, &inode);
+        __follow_mount_rcu(nd, &nd->path, &inode, true);
        nd->inode = inode;
        return 0;
 }
+/*
+ * Follow down to the covering mount currently visible to userspace.  At each
+ * point, the filesystem owning that dentry may be queried as to whether the
+ * caller is permitted to proceed or not.
+ *
+ * Care must be taken as namespace_sem may be held (indicated by mounting_here
+ * being true).
+ */
+int follow_down(struct path *path, bool mounting_here)
+{
+        unsigned managed;
+        int ret;
+        while (managed = ACCESS_ONCE(path->dentry->d_flags),
+               unlikely(managed & DCACHE_MANAGED_DENTRY)) {
+                /* Allow the filesystem to manage the transit without i_mutex
+                 * being held.
+                 *
+                 * We indicate to the filesystem if someone is trying to mount
+                 * something here.  This gives autofs the chance to deny anyone
+                 * other than its daemon the right to mount on its
+                 * superstructure.
+                 *
+                 * The filesystem may sleep at this point.
+                 */
+                if (managed & DCACHE_MANAGE_TRANSIT) {
+                        BUG_ON(!path->dentry->d_op);
+                        BUG_ON(!path->dentry->d_op->d_manage);
+                        ret = path->dentry->d_op->d_manage(
+                                path->dentry, mounting_here, false);
+                        if (ret < 0)
+                                return ret == -EISDIR ? 0 : ret;
+                }
+                /* Transit to a mounted filesystem. */
+                if (managed & DCACHE_MOUNTED) {
+                        struct vfsmount *mounted = lookup_mnt(path);
+                        if (!mounted)
+                                break;
+                        dput(path->dentry);
+                        mntput(path->mnt);
+                        path->mnt = mounted;
+                        path->dentry = dget(mounted->mnt_root);
+                        continue;
+                }
+                /* Don't handle automount points here */
+                break;
+        }
+        return 0;
+}
+/*
+ * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
+ */
+static void follow_mount(struct path *path)
+{
+        while (d_mountpoint(path->dentry)) {
+                struct vfsmount *mounted = lookup_mnt(path);
+                if (!mounted)
+                        break;
+                dput(path->dentry);
+                mntput(path->mnt);
+                path->mnt = mounted;
+                path->dentry = dget(mounted->mnt_root);
+        }
+}
 static void follow_dotdot(struct nameidata *nd)
 {
        set_root(nd);
@@ -1057,12 +1240,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
        struct vfsmount *mnt = nd->path.mnt;
        struct dentry *dentry, *parent = nd->path.dentry;
        struct inode *dir;
+        int err;
        /*
         * See if the low-level filesystem might want
         * to use its own hash..
         */
        if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
-                int err = parent->d_op->d_hash(parent, nd->inode, name);
+                err = parent->d_op->d_hash(parent, nd->inode, name);
                if (err < 0)
                        return err;
        }
@@ -1089,22 +1274,28 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
                nd->seq = seq;
                if (dentry->d_flags & DCACHE_OP_REVALIDATE)
                        goto need_revalidate;
+done2:
                path->mnt = mnt;
                path->dentry = dentry;
-                __follow_mount_rcu(nd, path, inode);
+                if (likely(__follow_mount_rcu(nd, path, inode, false)))
-        } else {
+                        return 0;
-                dentry = __d_lookup(parent, name);
+                if (nameidata_drop_rcu(nd))
-                if (!dentry)
+                        return -ECHILD;
-                        goto need_lookup;
+                /* fallthru */
+        }
+        dentry = __d_lookup(parent, name);
+        if (!dentry)
+                goto need_lookup;
 found:
-                if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+        if (dentry->d_flags & DCACHE_OP_REVALIDATE)
-                        goto need_revalidate;
+                goto need_revalidate;
 done:
-                path->mnt = mnt;
+        path->mnt = mnt;
-                path->dentry = dentry;
+        path->dentry = dentry;
-                __follow_mount(path);
+        err = follow_managed(path, nd->flags);
-                *inode = path->dentry->d_inode;
+        if (unlikely(err < 0))
-        }
+                return err;
+        *inode = path->dentry->d_inode;
        return 0;
 need_lookup:
@@ -1143,6 +1334,8 @@ need_revalidate:
                goto need_lookup;
        if (IS_ERR(dentry))
                goto fail;
+        if (nd->flags & LOOKUP_RCU)
+                goto done2;
        goto done;
 fail:
@@ -1150,17 +1343,6 @@ fail:
 }
 /*
- * This is a temporary kludge to deal with "automount" symlinks; proper
- * solution is to trigger them on follow_mount(), so that do_lookup()
- * would DTRT.  To be killed before 2.6.34-final.
- */
-static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
-{
-        return inode && unlikely(inode->i_op->follow_link) &&
-                ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
-}
-/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1298,7 +1480,8 @@ last_component:
                err = do_lookup(nd, &this, &next, &inode);
                if (err)
                        break;
-                if (follow_on_final(inode, lookup_flags)) {
+                if (inode && unlikely(inode->i_op->follow_link) &&
+                    (lookup_flags & LOOKUP_FOLLOW)) {
                        if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
                                return -ECHILD;
                        BUG_ON(inode != next.dentry->d_inode);
@@ -2200,11 +2383,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
        if (open_flag & O_EXCL)
                goto exit_dput;
-        if (__follow_mount(path)) {
+        error = follow_managed(path, nd->flags);
-                error = -ELOOP;
+        if (error < 0)
-                if (open_flag & O_NOFOLLOW)
+                goto exit_dput;
-                        goto exit_dput;
-        }
        error = -ENOENT;
        if (!path->dentry->d_inode)
@@ -2353,8 +2534,7 @@ reval:
                struct inode *linki = link.dentry->d_inode;
                void *cookie;
                error = -ELOOP;
-                /* S_ISDIR part is a temporary automount kludge */
+                if (!(nd.flags & LOOKUP_FOLLOW))
-                if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode))
                        goto exit_dput;
                if (count++ == 32)
                        goto exit_dput;
@@ -3413,6 +3593,7 @@ const struct inode_operations page_symlink_inode_operations = {
 };
 EXPORT_SYMBOL(user_path_at);
+EXPORT_SYMBOL(follow_down_one);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */

diff --git a/fs/namei.c b/fs/namei.c index 8df7a78ace58..8f7b41a14882 100644 --- a/fs/namei.c +++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
368	EXPORT_SYMBOL(path_get);	368	EXPORT_SYMBOL(path_get);
369		369
370	/**	370	/**
371	* path_get_long - get a long reference to a path
372	* @path: path to get the reference to
373	*
374	* Given a path increment the reference count to the dentry and the vfsmount.
375	*/
376	void path_get_long(struct path *path)
377	{
378	mntget_long(path->mnt);
379	dget(path->dentry);
380	}
381
382	/**
383	* path_put - put a reference to a path	371	* path_put - put a reference to a path
384	* @path: path to put the reference to	372	* @path: path to put the reference to
385	*	373	*
@@ -393,18 +381,6 @@ void path_put(struct path *path)
393	EXPORT_SYMBOL(path_put);	381	EXPORT_SYMBOL(path_put);
394		382
395	/**	383	/**
396	* path_put_long - put a long reference to a path
397	* @path: path to put the reference to
398	*
399	* Given a path decrement the reference count to the dentry and the vfsmount.
400	*/
401	void path_put_long(struct path *path)
402	{
403	dput(path->dentry);
404	mntput_long(path->mnt);
405	}
406
407	/**
408	* nameidata_drop_rcu - drop this nameidata out of rcu-walk	384	* nameidata_drop_rcu - drop this nameidata out of rcu-walk
409	* @nd: nameidata pathwalk data to drop	385	* @nd: nameidata pathwalk data to drop
410	* Returns: 0 on success, -ECHILD on failure	386	* Returns: 0 on success, -ECHILD on failure
@@ -800,12 +776,8 @@ __do_follow_link(const struct path link, struct nameidata nd, void **p)
800	touch_atime(link->mnt, dentry);	776	touch_atime(link->mnt, dentry);
801	nd_set_link(nd, NULL);	777	nd_set_link(nd, NULL);
802		778
803	if (link->mnt != nd->path.mnt) {	779	if (link->mnt == nd->path.mnt)
804	path_to_nameidata(link, nd);	780	mntget(link->mnt);
805	nd->inode = nd->path.dentry->d_inode;
806	dget(dentry);
807	}
808	mntget(link->mnt);
809		781
810	nd->last_type = LAST_BIND;	782	nd->last_type = LAST_BIND;
811	*p = dentry->d_inode->i_op->follow_link(dentry, nd);	783	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -896,54 +868,169 @@ int follow_up(struct path *path)
896	}	868	}
897		869
898	/*	870	/*
899	* serialization is taken care of in namespace.c	871	* Perform an automount
		872	* - return -EISDIR to tell follow_managed() to stop and return the path we
		873	* were called with.
900	*/	874	*/
901	static void __follow_mount_rcu(struct nameidata nd, struct path path,	875	static int follow_automount(struct path *path, unsigned flags,
902	struct inode **inode)	876	bool *need_mntput)
903	{	877	{
904	while (d_mountpoint(path->dentry)) {	878	struct vfsmount *mnt;
905	struct vfsmount *mounted;	879	int err;
906	mounted = __lookup_mnt(path->mnt, path->dentry, 1);	880
907	if (!mounted)	881	if (!path->dentry->d_op \|\| !path->dentry->d_op->d_automount)
908	return;	882	return -EREMOTE;
909	path->mnt = mounted;	883
910	path->dentry = mounted->mnt_root;	884	/* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
911	nd->seq = read_seqcount_begin(&path->dentry->d_seq);	885	* and this is the terminal part of the path.
912	*inode = path->dentry->d_inode;	886	*/
		887	if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
		888	return -EISDIR; /* we actually want to stop here */
		889
		890	/* We want to mount if someone is trying to open/create a file of any
		891	* type under the mountpoint, wants to traverse through the mountpoint
		892	* or wants to open the mounted directory.
		893	*
		894	* We don't want to mount if someone's just doing a stat and they've
		895	* set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
		896	* appended a '/' to the name.
		897	*/
		898	if (!(flags & LOOKUP_FOLLOW) &&
		899	!(flags & (LOOKUP_CONTINUE \| LOOKUP_DIRECTORY \|
		900	LOOKUP_OPEN \| LOOKUP_CREATE)))
		901	return -EISDIR;
		902
		903	current->total_link_count++;
		904	if (current->total_link_count >= 40)
		905	return -ELOOP;
		906
		907	mnt = path->dentry->d_op->d_automount(path);
		908	if (IS_ERR(mnt)) {
		909	/*
		910	* The filesystem is allowed to return -EISDIR here to indicate
		911	* it doesn't want to automount. For instance, autofs would do
		912	* this so that its userspace daemon can mount on this dentry.
		913	*
		914	* However, we can only permit this if it's a terminal point in
		915	* the path being looked up; if it wasn't then the remainder of
		916	* the path is inaccessible and we should say so.
		917	*/
		918	if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
		919	return -EREMOTE;
		920	return PTR_ERR(mnt);
913	}	921	}
914	}
915		922
916	static int __follow_mount(struct path *path)	923	if (!mnt) /* mount collision */
917	{	924	return 0;
918	int res = 0;	925
919	while (d_mountpoint(path->dentry)) {	926	/* The new mount record should have at least 2 refs to prevent it being
920	struct vfsmount *mounted = lookup_mnt(path);	927	* expired before we get a chance to add it
921	if (!mounted)	928	*/
922	break;	929	BUG_ON(mnt_get_count(mnt) < 2);
		930
		931	if (mnt->mnt_sb == path->mnt->mnt_sb &&
		932	mnt->mnt_root == path->dentry) {
		933	mnt_clear_expiry(mnt);
		934	mntput(mnt);
		935	mntput(mnt);
		936	return -ELOOP;
		937	}
		938
		939	/* We need to add the mountpoint to the parent. The filesystem may
		940	* have placed it on an expiry list, and so we need to make sure it
		941	* won't be expired under us if do_add_mount() fails (do_add_mount()
		942	* will eat a reference unconditionally).
		943	*/
		944	mntget(mnt);
		945	err = do_add_mount(mnt, path, path->mnt->mnt_flags \| MNT_SHRINKABLE);
		946	switch (err) {
		947	case -EBUSY:
		948	/* Someone else made a mount here whilst we were busy */
		949	err = 0;
		950	default:
		951	mnt_clear_expiry(mnt);
		952	mntput(mnt);
		953	mntput(mnt);
		954	return err;
		955	case 0:
		956	mntput(mnt);
923	dput(path->dentry);	957	dput(path->dentry);
924	if (res)	958	if (*need_mntput)
925	mntput(path->mnt);	959	mntput(path->mnt);
926	path->mnt = mounted;	960	path->mnt = mnt;
927	path->dentry = dget(mounted->mnt_root);	961	path->dentry = dget(mnt->mnt_root);
928	res = 1;	962	*need_mntput = true;
		963	return 0;
929	}	964	}
930	return res;
931	}	965	}
932		966
933	static void follow_mount(struct path *path)	967	/*
		968	* Handle a dentry that is managed in some way.
		969	* - Flagged for transit management (autofs)
		970	* - Flagged as mountpoint
		971	* - Flagged as automount point
		972	*
		973	* This may only be called in refwalk mode.
		974	*
		975	* Serialization is taken care of in namespace.c
		976	*/
		977	static int follow_managed(struct path *path, unsigned flags)
934	{	978	{
935	while (d_mountpoint(path->dentry)) {	979	unsigned managed;
936	struct vfsmount *mounted = lookup_mnt(path);	980	bool need_mntput = false;
937	if (!mounted)	981	int ret;
938	break;	982
939	dput(path->dentry);	983	/* Given that we're not holding a lock here, we retain the value in a
940	mntput(path->mnt);	984	* local variable for each dentry as we look at it so that we don't see
941	path->mnt = mounted;	985	* the components of that value change under us */
942	path->dentry = dget(mounted->mnt_root);	986	while (managed = ACCESS_ONCE(path->dentry->d_flags),
		987	managed &= DCACHE_MANAGED_DENTRY,
		988	unlikely(managed != 0)) {
		989	/* Allow the filesystem to manage the transit without i_mutex
		990	* being held. */
		991	if (managed & DCACHE_MANAGE_TRANSIT) {
		992	BUG_ON(!path->dentry->d_op);
		993	BUG_ON(!path->dentry->d_op->d_manage);
		994	ret = path->dentry->d_op->d_manage(path->dentry,
		995	false, false);
		996	if (ret < 0)
		997	return ret == -EISDIR ? 0 : ret;
		998	}
		999
		1000	/* Transit to a mounted filesystem. */
		1001	if (managed & DCACHE_MOUNTED) {
		1002	struct vfsmount *mounted = lookup_mnt(path);
		1003	if (mounted) {
		1004	dput(path->dentry);
		1005	if (need_mntput)
		1006	mntput(path->mnt);
		1007	path->mnt = mounted;
		1008	path->dentry = dget(mounted->mnt_root);
		1009	need_mntput = true;
		1010	continue;
		1011	}
		1012
		1013	/* Something is mounted on this dentry in another
		1014	* namespace and/or whatever was mounted there in this
		1015	* namespace got unmounted before we managed to get the
		1016	* vfsmount_lock */
		1017	}
		1018
		1019	/* Handle an automount point */
		1020	if (managed & DCACHE_NEED_AUTOMOUNT) {
		1021	ret = follow_automount(path, flags, &need_mntput);
		1022	if (ret < 0)
		1023	return ret == -EISDIR ? 0 : ret;
		1024	continue;
		1025	}
		1026
		1027	/* We didn't change the current path point */
		1028	break;
943	}	1029	}
		1030	return 0;
944	}	1031	}
945		1032
946	int follow_down(struct path *path)	1033	int follow_down_one(struct path *path)
947	{	1034	{
948	struct vfsmount *mounted;	1035	struct vfsmount *mounted;
949		1036
@@ -958,13 +1045,41 @@ int follow_down(struct path *path)
958	return 0;	1045	return 0;
959	}	1046	}
960		1047
		1048	/*
		1049	* Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
		1050	* meet a managed dentry and we're not walking to "..". True is returned to
		1051	* continue, false to abort.
		1052	*/
		1053	static bool __follow_mount_rcu(struct nameidata nd, struct path path,
		1054	struct inode **inode, bool reverse_transit)
		1055	{
		1056	while (d_mountpoint(path->dentry)) {
		1057	struct vfsmount *mounted;
		1058	if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
		1059	!reverse_transit &&
		1060	path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
		1061	return false;
		1062	mounted = __lookup_mnt(path->mnt, path->dentry, 1);
		1063	if (!mounted)
		1064	break;
		1065	path->mnt = mounted;
		1066	path->dentry = mounted->mnt_root;
		1067	nd->seq = read_seqcount_begin(&path->dentry->d_seq);
		1068	*inode = path->dentry->d_inode;
		1069	}
		1070
		1071	if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
		1072	return reverse_transit;
		1073	return true;
		1074	}
		1075
961	static int follow_dotdot_rcu(struct nameidata *nd)	1076	static int follow_dotdot_rcu(struct nameidata *nd)
962	{	1077	{
963	struct inode *inode = nd->inode;	1078	struct inode *inode = nd->inode;
964		1079
965	set_root_rcu(nd);	1080	set_root_rcu(nd);
966		1081
967	while(1) {	1082	while (1) {
968	if (nd->path.dentry == nd->root.dentry &&	1083	if (nd->path.dentry == nd->root.dentry &&
969	nd->path.mnt == nd->root.mnt) {	1084	nd->path.mnt == nd->root.mnt) {
970	break;	1085	break;
@@ -987,12 +1102,80 @@ static int follow_dotdot_rcu(struct nameidata *nd)
987	nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);	1102	nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
988	inode = nd->path.dentry->d_inode;	1103	inode = nd->path.dentry->d_inode;
989	}	1104	}
990	__follow_mount_rcu(nd, &nd->path, &inode);	1105	__follow_mount_rcu(nd, &nd->path, &inode, true);
991	nd->inode = inode;	1106	nd->inode = inode;
992		1107
993	return 0;	1108	return 0;
994	}	1109	}
995		1110
		1111	/*
		1112	* Follow down to the covering mount currently visible to userspace. At each
		1113	* point, the filesystem owning that dentry may be queried as to whether the
		1114	* caller is permitted to proceed or not.
		1115	*
		1116	* Care must be taken as namespace_sem may be held (indicated by mounting_here
		1117	* being true).
		1118	*/
		1119	int follow_down(struct path *path, bool mounting_here)
		1120	{
		1121	unsigned managed;
		1122	int ret;
		1123
		1124	while (managed = ACCESS_ONCE(path->dentry->d_flags),
		1125	unlikely(managed & DCACHE_MANAGED_DENTRY)) {
		1126	/* Allow the filesystem to manage the transit without i_mutex
		1127	* being held.
		1128	*
		1129	* We indicate to the filesystem if someone is trying to mount
		1130	* something here. This gives autofs the chance to deny anyone
		1131	* other than its daemon the right to mount on its
		1132	* superstructure.
		1133	*
		1134	* The filesystem may sleep at this point.
		1135	*/
		1136	if (managed & DCACHE_MANAGE_TRANSIT) {
		1137	BUG_ON(!path->dentry->d_op);
		1138	BUG_ON(!path->dentry->d_op->d_manage);
		1139	ret = path->dentry->d_op->d_manage(
		1140	path->dentry, mounting_here, false);
		1141	if (ret < 0)
		1142	return ret == -EISDIR ? 0 : ret;
		1143	}
		1144
		1145	/* Transit to a mounted filesystem. */
		1146	if (managed & DCACHE_MOUNTED) {
		1147	struct vfsmount *mounted = lookup_mnt(path);
		1148	if (!mounted)
		1149	break;
		1150	dput(path->dentry);
		1151	mntput(path->mnt);
		1152	path->mnt = mounted;
		1153	path->dentry = dget(mounted->mnt_root);
		1154	continue;
		1155	}
		1156
		1157	/* Don't handle automount points here */
		1158	break;
		1159	}
		1160	return 0;
		1161	}
		1162
		1163	/*
		1164	* Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
		1165	*/
		1166	static void follow_mount(struct path *path)
		1167	{
		1168	while (d_mountpoint(path->dentry)) {
		1169	struct vfsmount *mounted = lookup_mnt(path);
		1170	if (!mounted)
		1171	break;
		1172	dput(path->dentry);
		1173	mntput(path->mnt);
		1174	path->mnt = mounted;
		1175	path->dentry = dget(mounted->mnt_root);
		1176	}
		1177	}
		1178
996	static void follow_dotdot(struct nameidata *nd)	1179	static void follow_dotdot(struct nameidata *nd)
997	{	1180	{
998	set_root(nd);	1181	set_root(nd);
@@ -1057,12 +1240,14 @@ static int do_lookup(struct nameidata nd, struct qstr name,
1057	struct vfsmount *mnt = nd->path.mnt;	1240	struct vfsmount *mnt = nd->path.mnt;
1058	struct dentry dentry, parent = nd->path.dentry;	1241	struct dentry dentry, parent = nd->path.dentry;
1059	struct inode *dir;	1242	struct inode *dir;
		1243	int err;
		1244
1060	/*	1245	/*
1061	* See if the low-level filesystem might want	1246	* See if the low-level filesystem might want
1062	* to use its own hash..	1247	* to use its own hash..
1063	*/	1248	*/
1064	if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {	1249	if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1065	int err = parent->d_op->d_hash(parent, nd->inode, name);	1250	err = parent->d_op->d_hash(parent, nd->inode, name);
1066	if (err < 0)	1251	if (err < 0)
1067	return err;	1252	return err;
1068	}	1253	}
@@ -1089,22 +1274,28 @@ static int do_lookup(struct nameidata nd, struct qstr name,
1089	nd->seq = seq;	1274	nd->seq = seq;
1090	if (dentry->d_flags & DCACHE_OP_REVALIDATE)	1275	if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1091	goto need_revalidate;	1276	goto need_revalidate;
		1277	done2:
1092	path->mnt = mnt;	1278	path->mnt = mnt;
1093	path->dentry = dentry;	1279	path->dentry = dentry;
1094	__follow_mount_rcu(nd, path, inode);	1280	if (likely(__follow_mount_rcu(nd, path, inode, false)))
1095	} else {	1281	return 0;
1096	dentry = __d_lookup(parent, name);	1282	if (nameidata_drop_rcu(nd))
1097	if (!dentry)	1283	return -ECHILD;
1098	goto need_lookup;	1284	/* fallthru */
		1285	}
		1286	dentry = __d_lookup(parent, name);
		1287	if (!dentry)
		1288	goto need_lookup;
1099	found:	1289	found:
1100	if (dentry->d_flags & DCACHE_OP_REVALIDATE)	1290	if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1101	goto need_revalidate;	1291	goto need_revalidate;
1102	done:	1292	done:
1103	path->mnt = mnt;	1293	path->mnt = mnt;
1104	path->dentry = dentry;	1294	path->dentry = dentry;
1105	__follow_mount(path);	1295	err = follow_managed(path, nd->flags);
1106	*inode = path->dentry->d_inode;	1296	if (unlikely(err < 0))
1107	}	1297	return err;
		1298	*inode = path->dentry->d_inode;
1108	return 0;	1299	return 0;
1109		1300
1110	need_lookup:	1301	need_lookup:
@@ -1143,6 +1334,8 @@ need_revalidate:
1143	goto need_lookup;	1334	goto need_lookup;
1144	if (IS_ERR(dentry))	1335	if (IS_ERR(dentry))
1145	goto fail;	1336	goto fail;
		1337	if (nd->flags & LOOKUP_RCU)
		1338	goto done2;
1146	goto done;	1339	goto done;
1147		1340
1148	fail:	1341	fail:
@@ -1150,17 +1343,6 @@ fail:
1150	}	1343	}
1151		1344
1152	/*	1345	/*
1153	* This is a temporary kludge to deal with "automount" symlinks; proper
1154	* solution is to trigger them on follow_mount(), so that do_lookup()
1155	* would DTRT. To be killed before 2.6.34-final.
1156	*/
1157	static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
1158	{
1159	return inode && unlikely(inode->i_op->follow_link) &&
1160	((lookup_flags & LOOKUP_FOLLOW) \|\| S_ISDIR(inode->i_mode));
1161	}
1162
1163	/*
1164	* Name resolution.	1346	* Name resolution.
1165	* This is the basic name resolution function, turning a pathname into	1347	* This is the basic name resolution function, turning a pathname into
1166	* the final dentry. We expect 'base' to be positive and a directory.	1348	* the final dentry. We expect 'base' to be positive and a directory.
@@ -1298,7 +1480,8 @@ last_component:
1298	err = do_lookup(nd, &this, &next, &inode);	1480	err = do_lookup(nd, &this, &next, &inode);
1299	if (err)	1481	if (err)
1300	break;	1482	break;
1301	if (follow_on_final(inode, lookup_flags)) {	1483	if (inode && unlikely(inode->i_op->follow_link) &&
		1484	(lookup_flags & LOOKUP_FOLLOW)) {
1302	if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))	1485	if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1303	return -ECHILD;	1486	return -ECHILD;
1304	BUG_ON(inode != next.dentry->d_inode);	1487	BUG_ON(inode != next.dentry->d_inode);
@@ -2200,11 +2383,9 @@ static struct file do_last(struct nameidata nd, struct path *path,
2200	if (open_flag & O_EXCL)	2383	if (open_flag & O_EXCL)
2201	goto exit_dput;	2384	goto exit_dput;
2202		2385
2203	if (__follow_mount(path)) {	2386	error = follow_managed(path, nd->flags);
2204	error = -ELOOP;	2387	if (error < 0)
2205	if (open_flag & O_NOFOLLOW)	2388	goto exit_dput;
2206	goto exit_dput;
2207	}
2208		2389
2209	error = -ENOENT;	2390	error = -ENOENT;
2210	if (!path->dentry->d_inode)	2391	if (!path->dentry->d_inode)
@@ -2353,8 +2534,7 @@ reval:
2353	struct inode *linki = link.dentry->d_inode;	2534	struct inode *linki = link.dentry->d_inode;
2354	void *cookie;	2535	void *cookie;
2355	error = -ELOOP;	2536	error = -ELOOP;
2356	/* S_ISDIR part is a temporary automount kludge */	2537	if (!(nd.flags & LOOKUP_FOLLOW))
2357	if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode))
2358	goto exit_dput;	2538	goto exit_dput;
2359	if (count++ == 32)	2539	if (count++ == 32)
2360	goto exit_dput;	2540	goto exit_dput;
@@ -3413,6 +3593,7 @@ const struct inode_operations page_symlink_inode_operations = {
3413	};	3593	};
3414		3594
3415	EXPORT_SYMBOL(user_path_at);	3595	EXPORT_SYMBOL(user_path_at);
		3596	EXPORT_SYMBOL(follow_down_one);
3416	EXPORT_SYMBOL(follow_down);	3597	EXPORT_SYMBOL(follow_down);
3417	EXPORT_SYMBOL(follow_up);	3598	EXPORT_SYMBOL(follow_up);
3418	EXPORT_SYMBOL(get_write_access); /* binfmt_aout */	3599	EXPORT_SYMBOL(get_write_access); /* binfmt_aout */