aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederman@twitter.com>2013-10-01 21:33:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2014-10-09 02:38:56 -0400
commit8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe (patch)
treeb20ff83dba79142efd005327dee38289f5c5b5cc
parent80b5dce8c59b0de1ed6e403b8298e02dcb4db64b (diff)
vfs: Lazily remove mounts on unlinked files and directories.
With the introduction of mount namespaces and bind mounts it became possible to access files and directories that on some paths are mount points but are not mount points on other paths. It is very confusing when rm -rf somedir returns -EBUSY simply because somedir is mounted somewhere else. With the addition of user namespaces allowing unprivileged mounts this condition has gone from annoying to allowing a DOS attack on other users in the system. The possibility for mischief is removed by updating the vfs to support rename, unlink and rmdir on a dentry that is a mountpoint and by lazily unmounting mountpoints on deleted dentries. In particular this change allows rename, unlink and rmdir system calls on a dentry without a mountpoint in the current mount namespace to succeed, and it allows rename, unlink, and rmdir performed on a distributed filesystem to update the vfs cache even if when there is a mount in some namespace on the original dentry. There are two common patterns of maintaining mounts: Mounts on trusted paths with the parent directory of the mount point and all ancestory directories up to / owned by root and modifiable only by root (i.e. /media/xxx, /dev, /dev/pts, /proc, /sys, /sys/fs/cgroup/{cpu, cpuacct, ...}, /usr, /usr/local). Mounts on unprivileged directories maintained by fusermount. In the case of mounts in trusted directories owned by root and modifiable only by root the current parent directory permissions are sufficient to ensure a mount point on a trusted path is not removed or renamed by anyone other than root, even if there is a context where the there are no mount points to prevent this. In the case of mounts in directories owned by less privileged users races with users modifying the path of a mount point are already a danger. fusermount already uses a combination of chdir, /proc/<pid>/fd/NNN, and UMOUNT_NOFOLLOW to prevent these races. The removable of global rename, unlink, and rmdir protection really adds nothing new to consider only a widening of the attack window, and fusermount is already safe against unprivileged users modifying the directory simultaneously. In principle for perfect userspace programs returning -EBUSY for unlink, rmdir, and rename of dentires that have mounts in the local namespace is actually unnecessary. Unfortunately not all userspace programs are perfect so retaining -EBUSY for unlink, rmdir and rename of dentries that have mounts in the current mount namespace plays an important role of maintaining consistency with historical behavior and making imperfect userspace applications hard to exploit. v2: Remove spurious old_dentry. v3: Optimized shrink_submounts_and_drop Removed unsued afs label v4: Simplified the changes to check_submounts_and_drop Do not rename check_submounts_and_drop shrink_submounts_and_drop Document what why we need atomicity in check_submounts_and_drop Rely on the parent inode mutex to make d_revalidate and d_invalidate an atomic unit. v5: Refcount the mountpoint to detach in case of simultaneous renames. Reviewed-by: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/dcache.c60
-rw-r--r--fs/namei.c12
2 files changed, 39 insertions, 33 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 8150e4e9e88b..484114a4db93 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1343,36 +1343,39 @@ void shrink_dcache_for_umount(struct super_block *sb)
1343 } 1343 }
1344} 1344}
1345 1345
1346static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) 1346struct detach_data {
1347 struct select_data select;
1348 struct dentry *mountpoint;
1349};
1350static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
1347{ 1351{
1348 struct select_data *data = _data; 1352 struct detach_data *data = _data;
1349 1353
1350 if (d_mountpoint(dentry)) { 1354 if (d_mountpoint(dentry)) {
1351 data->found = -EBUSY; 1355 __dget_dlock(dentry);
1356 data->mountpoint = dentry;
1352 return D_WALK_QUIT; 1357 return D_WALK_QUIT;
1353 } 1358 }
1354 1359
1355 return select_collect(_data, dentry); 1360 return select_collect(&data->select, dentry);
1356} 1361}
1357 1362
1358static void check_and_drop(void *_data) 1363static void check_and_drop(void *_data)
1359{ 1364{
1360 struct select_data *data = _data; 1365 struct detach_data *data = _data;
1361 1366
1362 if (d_mountpoint(data->start)) 1367 if (!data->mountpoint && !data->select.found)
1363 data->found = -EBUSY; 1368 __d_drop(data->select.start);
1364 if (!data->found)
1365 __d_drop(data->start);
1366} 1369}
1367 1370
1368/** 1371/**
1369 * check_submounts_and_drop - prune dcache, check for submounts and drop 1372 * check_submounts_and_drop - detach submounts, prune dcache, and drop
1370 * 1373 *
1371 * All done as a single atomic operation relative to has_unlinked_ancestor(). 1374 * The final d_drop is done as an atomic operation relative to
1372 * Returns 0 if successfully unhashed @parent. If there were submounts then 1375 * rename_lock ensuring there are no races with d_set_mounted. This
1373 * return -EBUSY. 1376 * ensures there are no unhashed dentries on the path to a mountpoint.
1374 * 1377 *
1375 * @dentry: dentry to prune and drop 1378 * @dentry: dentry to detach, prune and drop
1376 */ 1379 */
1377int check_submounts_and_drop(struct dentry *dentry) 1380int check_submounts_and_drop(struct dentry *dentry)
1378{ 1381{
@@ -1385,19 +1388,24 @@ int check_submounts_and_drop(struct dentry *dentry)
1385 } 1388 }
1386 1389
1387 for (;;) { 1390 for (;;) {
1388 struct select_data data; 1391 struct detach_data data;
1389 1392
1390 INIT_LIST_HEAD(&data.dispose); 1393 data.mountpoint = NULL;
1391 data.start = dentry; 1394 INIT_LIST_HEAD(&data.select.dispose);
1392 data.found = 0; 1395 data.select.start = dentry;
1396 data.select.found = 0;
1397
1398 d_walk(dentry, &data, detach_and_collect, check_and_drop);
1393 1399
1394 d_walk(dentry, &data, check_and_collect, check_and_drop); 1400 if (data.select.found)
1395 ret = data.found; 1401 shrink_dentry_list(&data.select.dispose);
1396 1402
1397 if (!list_empty(&data.dispose)) 1403 if (data.mountpoint) {
1398 shrink_dentry_list(&data.dispose); 1404 detach_mounts(data.mountpoint);
1405 dput(data.mountpoint);
1406 }
1399 1407
1400 if (ret <= 0) 1408 if (!data.mountpoint && !data.select.found)
1401 break; 1409 break;
1402 1410
1403 cond_resched(); 1411 cond_resched();
@@ -2639,10 +2647,8 @@ static struct dentry *__d_unalias(struct inode *inode,
2639 goto out_err; 2647 goto out_err;
2640 m2 = &alias->d_parent->d_inode->i_mutex; 2648 m2 = &alias->d_parent->d_inode->i_mutex;
2641out_unalias: 2649out_unalias:
2642 if (likely(!d_mountpoint(alias))) { 2650 __d_move(alias, dentry, false);
2643 __d_move(alias, dentry, false); 2651 ret = alias;
2644 ret = alias;
2645 }
2646out_err: 2652out_err:
2647 spin_unlock(&inode->i_lock); 2653 spin_unlock(&inode->i_lock);
2648 if (m2) 2654 if (m2)
diff --git a/fs/namei.c b/fs/namei.c
index a3a14b033b0d..2ba10904dba0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3567,8 +3567,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3567 error = -EBUSY; 3567 error = -EBUSY;
3568 if (is_local_mountpoint(dentry)) 3568 if (is_local_mountpoint(dentry))
3569 goto out; 3569 goto out;
3570 if (d_mountpoint(dentry))
3571 goto out;
3572 3570
3573 error = security_inode_rmdir(dir, dentry); 3571 error = security_inode_rmdir(dir, dentry);
3574 if (error) 3572 if (error)
@@ -3581,6 +3579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3581 3579
3582 dentry->d_inode->i_flags |= S_DEAD; 3580 dentry->d_inode->i_flags |= S_DEAD;
3583 dont_mount(dentry); 3581 dont_mount(dentry);
3582 detach_mounts(dentry);
3584 3583
3585out: 3584out:
3586 mutex_unlock(&dentry->d_inode->i_mutex); 3585 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -3683,7 +3682,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
3683 return -EPERM; 3682 return -EPERM;
3684 3683
3685 mutex_lock(&target->i_mutex); 3684 mutex_lock(&target->i_mutex);
3686 if (is_local_mountpoint(dentry) || d_mountpoint(dentry)) 3685 if (is_local_mountpoint(dentry))
3687 error = -EBUSY; 3686 error = -EBUSY;
3688 else { 3687 else {
3689 error = security_inode_unlink(dir, dentry); 3688 error = security_inode_unlink(dir, dentry);
@@ -3692,8 +3691,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
3692 if (error) 3691 if (error)
3693 goto out; 3692 goto out;
3694 error = dir->i_op->unlink(dir, dentry); 3693 error = dir->i_op->unlink(dir, dentry);
3695 if (!error) 3694 if (!error) {
3696 dont_mount(dentry); 3695 dont_mount(dentry);
3696 detach_mounts(dentry);
3697 }
3697 } 3698 }
3698 } 3699 }
3699out: 3700out:
@@ -4130,8 +4131,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4130 error = -EBUSY; 4131 error = -EBUSY;
4131 if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) 4132 if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
4132 goto out; 4133 goto out;
4133 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
4134 goto out;
4135 4134
4136 if (max_links && new_dir != old_dir) { 4135 if (max_links && new_dir != old_dir) {
4137 error = -EMLINK; 4136 error = -EMLINK;
@@ -4168,6 +4167,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4168 if (is_dir) 4167 if (is_dir)
4169 target->i_flags |= S_DEAD; 4168 target->i_flags |= S_DEAD;
4170 dont_mount(new_dentry); 4169 dont_mount(new_dentry);
4170 detach_mounts(new_dentry);
4171 } 4171 }
4172 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { 4172 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
4173 if (!(flags & RENAME_EXCHANGE)) 4173 if (!(flags & RENAME_EXCHANGE))