aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiklos Szeredi <mszeredi@suse.cz>2014-10-23 18:14:38 -0400
committerMiklos Szeredi <mszeredi@suse.cz>2014-10-23 18:14:38 -0400
commite9be9d5e76e34872f0c37d72e25bc27fe9e2c54c (patch)
tree38a55884997b3f796ce41faeea167e65d38e982f
parent46fdb794e3f52ef18b859ebc92f0a9d7db21c5df (diff)
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be overlaid onto another, read-only directory tree. All modifications go to the upper, writable layer. This type of mechanism is most often used for live CDs but there's a wide variety of other uses. The implementation differs from other "union filesystem" implementations in that after a file is opened all operations go directly to the underlying, lower or upper, filesystems. This simplifies the implementation and allows native performance in these cases. The dentry tree is duplicated from the underlying filesystems, this enables fast cached lookups without adding special support into the VFS. This uses slightly more memory than union mounts, but dentries are relatively small. Currently inodes are duplicated as well, but it is a possible optimization to share inodes for non-directories. Opening non directories results in the open forwarded to the underlying filesystem. This makes the behavior very similar to union mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file descriptors). Usage: mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay The following cotributions have been folded into this patch: Neil Brown <neilb@suse.de>: - minimal remount support - use correct seek function for directories - initialise is_real before use - rename ovl_fill_cache to ovl_dir_read Felix Fietkau <nbd@openwrt.org>: - fix a deadlock in ovl_dir_read_merged - fix a deadlock in ovl_remove_whiteouts Erez Zadok <ezk@fsl.cs.sunysb.edu> - fix cleanup after WARN_ON Sedat Dilek <sedat.dilek@googlemail.com> - fix up permission to confirm to new API Robin Dong <hao.bigrat@gmail.com> - fix possible leak in ovl_new_inode - create new inode in ovl_link Andy Whitcroft <apw@canonical.com> - switch to __inode_permission() - copy up i_uid/i_gid from the underlying inode AV: - ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits - ovl_clear_empty() - one failure exit forgetting to do unlock_rename(), lack of check for udir being the parent of upper, dropping and regaining the lock on udir (which would require _another_ check for parent being right). - bogus d_drop() in copyup and rename [fix from your mail] - copyup/remove and copyup/rename races [fix from your mail] - ovl_dir_fsync() leaving ERR_PTR() in ->realfile - ovl_entry_free() is pointless - it's just a kfree_rcu() - fold ovl_do_lookup() into ovl_lookup() - manually assigning ->d_op is wrong. Just use ->s_d_op. [patches picked from Miklos]: * copyup/remove and copyup/rename races * bogus d_drop() in copyup and rename Also thanks to the following people for testing and reporting bugs: Jordi Pujol <jordipujolp@gmail.com> Andy Whitcroft <apw@canonical.com> Michal Suchanek <hramrach@centrum.cz> Felix Fietkau <nbd@openwrt.org> Erez Zadok <ezk@fsl.cs.sunysb.edu> Randy Dunlap <rdunlap@xenotime.net> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile1
-rw-r--r--fs/overlayfs/Kconfig10
-rw-r--r--fs/overlayfs/Makefile7
-rw-r--r--fs/overlayfs/copy_up.c414
-rw-r--r--fs/overlayfs/dir.c921
-rw-r--r--fs/overlayfs/inode.c425
-rw-r--r--fs/overlayfs/overlayfs.h191
-rw-r--r--fs/overlayfs/readdir.c587
-rw-r--r--fs/overlayfs/super.c727
10 files changed, 3284 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index db5dc1598716..664991afe0c0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -67,6 +67,7 @@ source "fs/quota/Kconfig"
67 67
68source "fs/autofs4/Kconfig" 68source "fs/autofs4/Kconfig"
69source "fs/fuse/Kconfig" 69source "fs/fuse/Kconfig"
70source "fs/overlayfs/Kconfig"
70 71
71menu "Caches" 72menu "Caches"
72 73
diff --git a/fs/Makefile b/fs/Makefile
index 90c88529892b..34a1b9dea6dd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/
104obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 104obj-$(CONFIG_AUTOFS4_FS) += autofs4/
105obj-$(CONFIG_ADFS_FS) += adfs/ 105obj-$(CONFIG_ADFS_FS) += adfs/
106obj-$(CONFIG_FUSE_FS) += fuse/ 106obj-$(CONFIG_FUSE_FS) += fuse/
107obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
107obj-$(CONFIG_UDF_FS) += udf/ 108obj-$(CONFIG_UDF_FS) += udf/
108obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 109obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
109obj-$(CONFIG_OMFS_FS) += omfs/ 110obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 000000000000..e60125976873
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,10 @@
1config OVERLAYFS_FS
2 tristate "Overlay filesystem support"
3 help
4 An overlay filesystem combines two filesystems - an 'upper' filesystem
5 and a 'lower' filesystem. When a name exists in both filesystems, the
6 object in the 'upper' filesystem is visible while the object in the
7 'lower' filesystem is either hidden or, in the case of directories,
8 merged with the 'upper' object.
9
10 For more information see Documentation/filesystems/overlayfs.txt
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 000000000000..8f91889480d0
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the overlay filesystem.
3#
4
5obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
6
7overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 000000000000..ea10a8719107
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,414 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/splice.h>
14#include <linux/xattr.h>
15#include <linux/security.h>
16#include <linux/uaccess.h>
17#include <linux/sched.h>
18#include <linux/namei.h>
19#include "overlayfs.h"
20
21#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
22
23int ovl_copy_xattr(struct dentry *old, struct dentry *new)
24{
25 ssize_t list_size, size;
26 char *buf, *name, *value;
27 int error;
28
29 if (!old->d_inode->i_op->getxattr ||
30 !new->d_inode->i_op->getxattr)
31 return 0;
32
33 list_size = vfs_listxattr(old, NULL, 0);
34 if (list_size <= 0) {
35 if (list_size == -EOPNOTSUPP)
36 return 0;
37 return list_size;
38 }
39
40 buf = kzalloc(list_size, GFP_KERNEL);
41 if (!buf)
42 return -ENOMEM;
43
44 error = -ENOMEM;
45 value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
46 if (!value)
47 goto out;
48
49 list_size = vfs_listxattr(old, buf, list_size);
50 if (list_size <= 0) {
51 error = list_size;
52 goto out_free_value;
53 }
54
55 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
56 size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
57 if (size <= 0) {
58 error = size;
59 goto out_free_value;
60 }
61 error = vfs_setxattr(new, name, value, size, 0);
62 if (error)
63 goto out_free_value;
64 }
65
66out_free_value:
67 kfree(value);
68out:
69 kfree(buf);
70 return error;
71}
72
73static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
74{
75 struct file *old_file;
76 struct file *new_file;
77 loff_t old_pos = 0;
78 loff_t new_pos = 0;
79 int error = 0;
80
81 if (len == 0)
82 return 0;
83
84 old_file = ovl_path_open(old, O_RDONLY);
85 if (IS_ERR(old_file))
86 return PTR_ERR(old_file);
87
88 new_file = ovl_path_open(new, O_WRONLY);
89 if (IS_ERR(new_file)) {
90 error = PTR_ERR(new_file);
91 goto out_fput;
92 }
93
94 /* FIXME: copy up sparse files efficiently */
95 while (len) {
96 size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
97 long bytes;
98
99 if (len < this_len)
100 this_len = len;
101
102 if (signal_pending_state(TASK_KILLABLE, current)) {
103 error = -EINTR;
104 break;
105 }
106
107 bytes = do_splice_direct(old_file, &old_pos,
108 new_file, &new_pos,
109 this_len, SPLICE_F_MOVE);
110 if (bytes <= 0) {
111 error = bytes;
112 break;
113 }
114 WARN_ON(old_pos != new_pos);
115
116 len -= bytes;
117 }
118
119 fput(new_file);
120out_fput:
121 fput(old_file);
122 return error;
123}
124
125static char *ovl_read_symlink(struct dentry *realdentry)
126{
127 int res;
128 char *buf;
129 struct inode *inode = realdentry->d_inode;
130 mm_segment_t old_fs;
131
132 res = -EINVAL;
133 if (!inode->i_op->readlink)
134 goto err;
135
136 res = -ENOMEM;
137 buf = (char *) __get_free_page(GFP_KERNEL);
138 if (!buf)
139 goto err;
140
141 old_fs = get_fs();
142 set_fs(get_ds());
143 /* The cast to a user pointer is valid due to the set_fs() */
144 res = inode->i_op->readlink(realdentry,
145 (char __user *)buf, PAGE_SIZE - 1);
146 set_fs(old_fs);
147 if (res < 0) {
148 free_page((unsigned long) buf);
149 goto err;
150 }
151 buf[res] = '\0';
152
153 return buf;
154
155err:
156 return ERR_PTR(res);
157}
158
159static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
160{
161 struct iattr attr = {
162 .ia_valid =
163 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
164 .ia_atime = stat->atime,
165 .ia_mtime = stat->mtime,
166 };
167
168 return notify_change(upperdentry, &attr, NULL);
169}
170
171int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
172{
173 int err = 0;
174
175 if (!S_ISLNK(stat->mode)) {
176 struct iattr attr = {
177 .ia_valid = ATTR_MODE,
178 .ia_mode = stat->mode,
179 };
180 err = notify_change(upperdentry, &attr, NULL);
181 }
182 if (!err) {
183 struct iattr attr = {
184 .ia_valid = ATTR_UID | ATTR_GID,
185 .ia_uid = stat->uid,
186 .ia_gid = stat->gid,
187 };
188 err = notify_change(upperdentry, &attr, NULL);
189 }
190 if (!err)
191 ovl_set_timestamps(upperdentry, stat);
192
193 return err;
194
195}
196
197static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
198 struct dentry *dentry, struct path *lowerpath,
199 struct kstat *stat, struct iattr *attr,
200 const char *link)
201{
202 struct inode *wdir = workdir->d_inode;
203 struct inode *udir = upperdir->d_inode;
204 struct dentry *newdentry = NULL;
205 struct dentry *upper = NULL;
206 umode_t mode = stat->mode;
207 int err;
208
209 newdentry = ovl_lookup_temp(workdir, dentry);
210 err = PTR_ERR(newdentry);
211 if (IS_ERR(newdentry))
212 goto out;
213
214 upper = lookup_one_len(dentry->d_name.name, upperdir,
215 dentry->d_name.len);
216 err = PTR_ERR(upper);
217 if (IS_ERR(upper))
218 goto out1;
219
220 /* Can't properly set mode on creation because of the umask */
221 stat->mode &= S_IFMT;
222 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
223 stat->mode = mode;
224 if (err)
225 goto out2;
226
227 if (S_ISREG(stat->mode)) {
228 struct path upperpath;
229 ovl_path_upper(dentry, &upperpath);
230 BUG_ON(upperpath.dentry != NULL);
231 upperpath.dentry = newdentry;
232
233 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
234 if (err)
235 goto out_cleanup;
236 }
237
238 err = ovl_copy_xattr(lowerpath->dentry, newdentry);
239 if (err)
240 goto out_cleanup;
241
242 mutex_lock(&newdentry->d_inode->i_mutex);
243 err = ovl_set_attr(newdentry, stat);
244 if (!err && attr)
245 err = notify_change(newdentry, attr, NULL);
246 mutex_unlock(&newdentry->d_inode->i_mutex);
247 if (err)
248 goto out_cleanup;
249
250 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
251 if (err)
252 goto out_cleanup;
253
254 ovl_dentry_update(dentry, newdentry);
255 newdentry = NULL;
256
257 /*
258 * Non-directores become opaque when copied up.
259 */
260 if (!S_ISDIR(stat->mode))
261 ovl_dentry_set_opaque(dentry, true);
262out2:
263 dput(upper);
264out1:
265 dput(newdentry);
266out:
267 return err;
268
269out_cleanup:
270 ovl_cleanup(wdir, newdentry);
271 goto out;
272}
273
274/*
275 * Copy up a single dentry
276 *
277 * Directory renames only allowed on "pure upper" (already created on
278 * upper filesystem, never copied up). Directories which are on lower or
279 * are merged may not be renamed. For these -EXDEV is returned and
280 * userspace has to deal with it. This means, when copying up a
281 * directory we can rely on it and ancestors being stable.
282 *
283 * Non-directory renames start with copy up of source if necessary. The
284 * actual rename will only proceed once the copy up was successful. Copy
285 * up uses upper parent i_mutex for exclusion. Since rename can change
286 * d_parent it is possible that the copy up will lock the old parent. At
287 * that point the file will have already been copied up anyway.
288 */
289int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
290 struct path *lowerpath, struct kstat *stat,
291 struct iattr *attr)
292{
293 struct dentry *workdir = ovl_workdir(dentry);
294 int err;
295 struct kstat pstat;
296 struct path parentpath;
297 struct dentry *upperdir;
298 struct dentry *upperdentry;
299 const struct cred *old_cred;
300 struct cred *override_cred;
301 char *link = NULL;
302
303 ovl_path_upper(parent, &parentpath);
304 upperdir = parentpath.dentry;
305
306 err = vfs_getattr(&parentpath, &pstat);
307 if (err)
308 return err;
309
310 if (S_ISLNK(stat->mode)) {
311 link = ovl_read_symlink(lowerpath->dentry);
312 if (IS_ERR(link))
313 return PTR_ERR(link);
314 }
315
316 err = -ENOMEM;
317 override_cred = prepare_creds();
318 if (!override_cred)
319 goto out_free_link;
320
321 override_cred->fsuid = stat->uid;
322 override_cred->fsgid = stat->gid;
323 /*
324 * CAP_SYS_ADMIN for copying up extended attributes
325 * CAP_DAC_OVERRIDE for create
326 * CAP_FOWNER for chmod, timestamp update
327 * CAP_FSETID for chmod
328 * CAP_CHOWN for chown
329 * CAP_MKNOD for mknod
330 */
331 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
332 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
333 cap_raise(override_cred->cap_effective, CAP_FOWNER);
334 cap_raise(override_cred->cap_effective, CAP_FSETID);
335 cap_raise(override_cred->cap_effective, CAP_CHOWN);
336 cap_raise(override_cred->cap_effective, CAP_MKNOD);
337 old_cred = override_creds(override_cred);
338
339 err = -EIO;
340 if (lock_rename(workdir, upperdir) != NULL) {
341 pr_err("overlayfs: failed to lock workdir+upperdir\n");
342 goto out_unlock;
343 }
344 upperdentry = ovl_dentry_upper(dentry);
345 if (upperdentry) {
346 unlock_rename(workdir, upperdir);
347 err = 0;
348 /* Raced with another copy-up? Do the setattr here */
349 if (attr) {
350 mutex_lock(&upperdentry->d_inode->i_mutex);
351 err = notify_change(upperdentry, attr, NULL);
352 mutex_unlock(&upperdentry->d_inode->i_mutex);
353 }
354 goto out_put_cred;
355 }
356
357 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
358 stat, attr, link);
359 if (!err) {
360 /* Restore timestamps on parent (best effort) */
361 ovl_set_timestamps(upperdir, &pstat);
362 }
363out_unlock:
364 unlock_rename(workdir, upperdir);
365out_put_cred:
366 revert_creds(old_cred);
367 put_cred(override_cred);
368
369out_free_link:
370 if (link)
371 free_page((unsigned long) link);
372
373 return err;
374}
375
376int ovl_copy_up(struct dentry *dentry)
377{
378 int err;
379
380 err = 0;
381 while (!err) {
382 struct dentry *next;
383 struct dentry *parent;
384 struct path lowerpath;
385 struct kstat stat;
386 enum ovl_path_type type = ovl_path_type(dentry);
387
388 if (type != OVL_PATH_LOWER)
389 break;
390
391 next = dget(dentry);
392 /* find the topmost dentry not yet copied up */
393 for (;;) {
394 parent = dget_parent(next);
395
396 type = ovl_path_type(parent);
397 if (type != OVL_PATH_LOWER)
398 break;
399
400 dput(next);
401 next = parent;
402 }
403
404 ovl_path_lower(next, &lowerpath);
405 err = vfs_getattr(&lowerpath, &stat);
406 if (!err)
407 err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
408
409 dput(parent);
410 dput(next);
411 }
412
413 return err;
414}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 000000000000..15cd91ad9940
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,921 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/namei.h>
12#include <linux/xattr.h>
13#include <linux/security.h>
14#include <linux/cred.h>
15#include "overlayfs.h"
16
17void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
18{
19 int err;
20
21 dget(wdentry);
22 if (S_ISDIR(wdentry->d_inode->i_mode))
23 err = ovl_do_rmdir(wdir, wdentry);
24 else
25 err = ovl_do_unlink(wdir, wdentry);
26 dput(wdentry);
27
28 if (err) {
29 pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
30 wdentry, err);
31 }
32}
33
34struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
35{
36 struct dentry *temp;
37 char name[20];
38
39 snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
40
41 temp = lookup_one_len(name, workdir, strlen(name));
42 if (!IS_ERR(temp) && temp->d_inode) {
43 pr_err("overlayfs: workdir/%s already exists\n", name);
44 dput(temp);
45 temp = ERR_PTR(-EIO);
46 }
47
48 return temp;
49}
50
51/* caller holds i_mutex on workdir */
52static struct dentry *ovl_whiteout(struct dentry *workdir,
53 struct dentry *dentry)
54{
55 int err;
56 struct dentry *whiteout;
57 struct inode *wdir = workdir->d_inode;
58
59 whiteout = ovl_lookup_temp(workdir, dentry);
60 if (IS_ERR(whiteout))
61 return whiteout;
62
63 err = ovl_do_whiteout(wdir, whiteout);
64 if (err) {
65 dput(whiteout);
66 whiteout = ERR_PTR(err);
67 }
68
69 return whiteout;
70}
71
72int ovl_create_real(struct inode *dir, struct dentry *newdentry,
73 struct kstat *stat, const char *link,
74 struct dentry *hardlink, bool debug)
75{
76 int err;
77
78 if (newdentry->d_inode)
79 return -ESTALE;
80
81 if (hardlink) {
82 err = ovl_do_link(hardlink, dir, newdentry, debug);
83 } else {
84 switch (stat->mode & S_IFMT) {
85 case S_IFREG:
86 err = ovl_do_create(dir, newdentry, stat->mode, debug);
87 break;
88
89 case S_IFDIR:
90 err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
91 break;
92
93 case S_IFCHR:
94 case S_IFBLK:
95 case S_IFIFO:
96 case S_IFSOCK:
97 err = ovl_do_mknod(dir, newdentry,
98 stat->mode, stat->rdev, debug);
99 break;
100
101 case S_IFLNK:
102 err = ovl_do_symlink(dir, newdentry, link, debug);
103 break;
104
105 default:
106 err = -EPERM;
107 }
108 }
109 if (!err && WARN_ON(!newdentry->d_inode)) {
110 /*
111 * Not quite sure if non-instantiated dentry is legal or not.
112 * VFS doesn't seem to care so check and warn here.
113 */
114 err = -ENOENT;
115 }
116 return err;
117}
118
119static int ovl_set_opaque(struct dentry *upperdentry)
120{
121 return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
122}
123
124static void ovl_remove_opaque(struct dentry *upperdentry)
125{
126 int err;
127
128 err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr);
129 if (err) {
130 pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
131 upperdentry->d_name.name, err);
132 }
133}
134
135static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
136 struct kstat *stat)
137{
138 int err;
139 enum ovl_path_type type;
140 struct path realpath;
141
142 type = ovl_path_real(dentry, &realpath);
143 err = vfs_getattr(&realpath, stat);
144 if (err)
145 return err;
146
147 stat->dev = dentry->d_sb->s_dev;
148 stat->ino = dentry->d_inode->i_ino;
149
150 /*
151 * It's probably not worth it to count subdirs to get the
152 * correct link count. nlink=1 seems to pacify 'find' and
153 * other utilities.
154 */
155 if (type == OVL_PATH_MERGE)
156 stat->nlink = 1;
157
158 return 0;
159}
160
161static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
162 struct kstat *stat, const char *link,
163 struct dentry *hardlink)
164{
165 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
166 struct inode *udir = upperdir->d_inode;
167 struct dentry *newdentry;
168 int err;
169
170 mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
171 newdentry = lookup_one_len(dentry->d_name.name, upperdir,
172 dentry->d_name.len);
173 err = PTR_ERR(newdentry);
174 if (IS_ERR(newdentry))
175 goto out_unlock;
176 err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
177 if (err)
178 goto out_dput;
179
180 ovl_dentry_version_inc(dentry->d_parent);
181 ovl_dentry_update(dentry, newdentry);
182 ovl_copyattr(newdentry->d_inode, inode);
183 d_instantiate(dentry, inode);
184 newdentry = NULL;
185out_dput:
186 dput(newdentry);
187out_unlock:
188 mutex_unlock(&udir->i_mutex);
189 return err;
190}
191
192static int ovl_lock_rename_workdir(struct dentry *workdir,
193 struct dentry *upperdir)
194{
195 /* Workdir should not be the same as upperdir */
196 if (workdir == upperdir)
197 goto err;
198
199 /* Workdir should not be subdir of upperdir and vice versa */
200 if (lock_rename(workdir, upperdir) != NULL)
201 goto err_unlock;
202
203 return 0;
204
205err_unlock:
206 unlock_rename(workdir, upperdir);
207err:
208 pr_err("overlayfs: failed to lock workdir+upperdir\n");
209 return -EIO;
210}
211
212static struct dentry *ovl_clear_empty(struct dentry *dentry,
213 struct list_head *list)
214{
215 struct dentry *workdir = ovl_workdir(dentry);
216 struct inode *wdir = workdir->d_inode;
217 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
218 struct inode *udir = upperdir->d_inode;
219 struct path upperpath;
220 struct dentry *upper;
221 struct dentry *opaquedir;
222 struct kstat stat;
223 int err;
224
225 err = ovl_lock_rename_workdir(workdir, upperdir);
226 if (err)
227 goto out;
228
229 ovl_path_upper(dentry, &upperpath);
230 err = vfs_getattr(&upperpath, &stat);
231 if (err)
232 goto out_unlock;
233
234 err = -ESTALE;
235 if (!S_ISDIR(stat.mode))
236 goto out_unlock;
237 upper = upperpath.dentry;
238 if (upper->d_parent->d_inode != udir)
239 goto out_unlock;
240
241 opaquedir = ovl_lookup_temp(workdir, dentry);
242 err = PTR_ERR(opaquedir);
243 if (IS_ERR(opaquedir))
244 goto out_unlock;
245
246 err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
247 if (err)
248 goto out_dput;
249
250 err = ovl_copy_xattr(upper, opaquedir);
251 if (err)
252 goto out_cleanup;
253
254 err = ovl_set_opaque(opaquedir);
255 if (err)
256 goto out_cleanup;
257
258 mutex_lock(&opaquedir->d_inode->i_mutex);
259 err = ovl_set_attr(opaquedir, &stat);
260 mutex_unlock(&opaquedir->d_inode->i_mutex);
261 if (err)
262 goto out_cleanup;
263
264 err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
265 if (err)
266 goto out_cleanup;
267
268 ovl_cleanup_whiteouts(upper, list);
269 ovl_cleanup(wdir, upper);
270 unlock_rename(workdir, upperdir);
271
272 /* dentry's upper doesn't match now, get rid of it */
273 d_drop(dentry);
274
275 return opaquedir;
276
277out_cleanup:
278 ovl_cleanup(wdir, opaquedir);
279out_dput:
280 dput(opaquedir);
281out_unlock:
282 unlock_rename(workdir, upperdir);
283out:
284 return ERR_PTR(err);
285}
286
287static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
288 enum ovl_path_type type)
289{
290 int err;
291 struct dentry *ret = NULL;
292 LIST_HEAD(list);
293
294 err = ovl_check_empty_dir(dentry, &list);
295 if (err)
296 ret = ERR_PTR(err);
297 else if (type == OVL_PATH_MERGE)
298 ret = ovl_clear_empty(dentry, &list);
299
300 ovl_cache_free(&list);
301
302 return ret;
303}
304
305static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
306 struct kstat *stat, const char *link,
307 struct dentry *hardlink)
308{
309 struct dentry *workdir = ovl_workdir(dentry);
310 struct inode *wdir = workdir->d_inode;
311 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
312 struct inode *udir = upperdir->d_inode;
313 struct dentry *upper;
314 struct dentry *newdentry;
315 int err;
316
317 err = ovl_lock_rename_workdir(workdir, upperdir);
318 if (err)
319 goto out;
320
321 newdentry = ovl_lookup_temp(workdir, dentry);
322 err = PTR_ERR(newdentry);
323 if (IS_ERR(newdentry))
324 goto out_unlock;
325
326 upper = lookup_one_len(dentry->d_name.name, upperdir,
327 dentry->d_name.len);
328 err = PTR_ERR(upper);
329 if (IS_ERR(upper))
330 goto out_dput;
331
332 err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
333 if (err)
334 goto out_dput2;
335
336 if (S_ISDIR(stat->mode)) {
337 err = ovl_set_opaque(newdentry);
338 if (err)
339 goto out_cleanup;
340
341 err = ovl_do_rename(wdir, newdentry, udir, upper,
342 RENAME_EXCHANGE);
343 if (err)
344 goto out_cleanup;
345
346 ovl_cleanup(wdir, upper);
347 } else {
348 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
349 if (err)
350 goto out_cleanup;
351 }
352 ovl_dentry_version_inc(dentry->d_parent);
353 ovl_dentry_update(dentry, newdentry);
354 ovl_copyattr(newdentry->d_inode, inode);
355 d_instantiate(dentry, inode);
356 newdentry = NULL;
357out_dput2:
358 dput(upper);
359out_dput:
360 dput(newdentry);
361out_unlock:
362 unlock_rename(workdir, upperdir);
363out:
364 return err;
365
366out_cleanup:
367 ovl_cleanup(wdir, newdentry);
368 goto out_dput2;
369}
370
371static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
372 const char *link, struct dentry *hardlink)
373{
374 int err;
375 struct inode *inode;
376 struct kstat stat = {
377 .mode = mode,
378 .rdev = rdev,
379 };
380
381 err = -ENOMEM;
382 inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
383 if (!inode)
384 goto out;
385
386 err = ovl_copy_up(dentry->d_parent);
387 if (err)
388 goto out_iput;
389
390 if (!ovl_dentry_is_opaque(dentry)) {
391 err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
392 } else {
393 const struct cred *old_cred;
394 struct cred *override_cred;
395
396 err = -ENOMEM;
397 override_cred = prepare_creds();
398 if (!override_cred)
399 goto out_iput;
400
401 /*
402 * CAP_SYS_ADMIN for setting opaque xattr
403 * CAP_DAC_OVERRIDE for create in workdir, rename
404 * CAP_FOWNER for removing whiteout from sticky dir
405 */
406 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
407 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
408 cap_raise(override_cred->cap_effective, CAP_FOWNER);
409 old_cred = override_creds(override_cred);
410
411 err = ovl_create_over_whiteout(dentry, inode, &stat, link,
412 hardlink);
413
414 revert_creds(old_cred);
415 put_cred(override_cred);
416 }
417
418 if (!err)
419 inode = NULL;
420out_iput:
421 iput(inode);
422out:
423 return err;
424}
425
426static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
427 const char *link)
428{
429 int err;
430
431 err = ovl_want_write(dentry);
432 if (!err) {
433 err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
434 ovl_drop_write(dentry);
435 }
436
437 return err;
438}
439
440static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
441 bool excl)
442{
443 return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
444}
445
446static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
447{
448 return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
449}
450
451static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
452 dev_t rdev)
453{
454 /* Don't allow creation of "whiteout" on overlay */
455 if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
456 return -EPERM;
457
458 return ovl_create_object(dentry, mode, rdev, NULL);
459}
460
461static int ovl_symlink(struct inode *dir, struct dentry *dentry,
462 const char *link)
463{
464 return ovl_create_object(dentry, S_IFLNK, 0, link);
465}
466
467static int ovl_link(struct dentry *old, struct inode *newdir,
468 struct dentry *new)
469{
470 int err;
471 struct dentry *upper;
472
473 err = ovl_want_write(old);
474 if (err)
475 goto out;
476
477 err = ovl_copy_up(old);
478 if (err)
479 goto out_drop_write;
480
481 upper = ovl_dentry_upper(old);
482 err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
483
484out_drop_write:
485 ovl_drop_write(old);
486out:
487 return err;
488}
489
490static int ovl_remove_and_whiteout(struct dentry *dentry,
491 enum ovl_path_type type, bool is_dir)
492{
493 struct dentry *workdir = ovl_workdir(dentry);
494 struct inode *wdir = workdir->d_inode;
495 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
496 struct inode *udir = upperdir->d_inode;
497 struct dentry *whiteout;
498 struct dentry *upper;
499 struct dentry *opaquedir = NULL;
500 int err;
501
502 if (is_dir) {
503 opaquedir = ovl_check_empty_and_clear(dentry, type);
504 err = PTR_ERR(opaquedir);
505 if (IS_ERR(opaquedir))
506 goto out;
507 }
508
509 err = ovl_lock_rename_workdir(workdir, upperdir);
510 if (err)
511 goto out_dput;
512
513 whiteout = ovl_whiteout(workdir, dentry);
514 err = PTR_ERR(whiteout);
515 if (IS_ERR(whiteout))
516 goto out_unlock;
517
518 if (type == OVL_PATH_LOWER) {
519 upper = lookup_one_len(dentry->d_name.name, upperdir,
520 dentry->d_name.len);
521 err = PTR_ERR(upper);
522 if (IS_ERR(upper))
523 goto kill_whiteout;
524
525 err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
526 dput(upper);
527 if (err)
528 goto kill_whiteout;
529 } else {
530 int flags = 0;
531
532 upper = ovl_dentry_upper(dentry);
533 if (opaquedir)
534 upper = opaquedir;
535 err = -ESTALE;
536 if (upper->d_parent != upperdir)
537 goto kill_whiteout;
538
539 if (is_dir)
540 flags |= RENAME_EXCHANGE;
541
542 err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
543 if (err)
544 goto kill_whiteout;
545
546 if (is_dir)
547 ovl_cleanup(wdir, upper);
548 }
549 ovl_dentry_version_inc(dentry->d_parent);
550out_d_drop:
551 d_drop(dentry);
552 dput(whiteout);
553out_unlock:
554 unlock_rename(workdir, upperdir);
555out_dput:
556 dput(opaquedir);
557out:
558 return err;
559
560kill_whiteout:
561 ovl_cleanup(wdir, whiteout);
562 goto out_d_drop;
563}
564
565static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
566{
567 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
568 struct inode *dir = upperdir->d_inode;
569 struct dentry *upper = ovl_dentry_upper(dentry);
570 int err;
571
572 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
573 err = -ESTALE;
574 if (upper->d_parent == upperdir) {
575 /* Don't let d_delete() think it can reset d_inode */
576 dget(upper);
577 if (is_dir)
578 err = vfs_rmdir(dir, upper);
579 else
580 err = vfs_unlink(dir, upper, NULL);
581 dput(upper);
582 ovl_dentry_version_inc(dentry->d_parent);
583 }
584
585 /*
586 * Keeping this dentry hashed would mean having to release
587 * upperpath/lowerpath, which could only be done if we are the
588 * sole user of this dentry. Too tricky... Just unhash for
589 * now.
590 */
591 d_drop(dentry);
592 mutex_unlock(&dir->i_mutex);
593
594 return err;
595}
596
597static inline int ovl_check_sticky(struct dentry *dentry)
598{
599 struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
600 struct inode *inode = ovl_dentry_real(dentry)->d_inode;
601
602 if (check_sticky(dir, inode))
603 return -EPERM;
604
605 return 0;
606}
607
608static int ovl_do_remove(struct dentry *dentry, bool is_dir)
609{
610 enum ovl_path_type type;
611 int err;
612
613 err = ovl_check_sticky(dentry);
614 if (err)
615 goto out;
616
617 err = ovl_want_write(dentry);
618 if (err)
619 goto out;
620
621 err = ovl_copy_up(dentry->d_parent);
622 if (err)
623 goto out_drop_write;
624
625 type = ovl_path_type(dentry);
626 if (type == OVL_PATH_PURE_UPPER) {
627 err = ovl_remove_upper(dentry, is_dir);
628 } else {
629 const struct cred *old_cred;
630 struct cred *override_cred;
631
632 err = -ENOMEM;
633 override_cred = prepare_creds();
634 if (!override_cred)
635 goto out_drop_write;
636
637 /*
638 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
639 * CAP_DAC_OVERRIDE for create in workdir, rename
640 * CAP_FOWNER for removing whiteout from sticky dir
641 * CAP_FSETID for chmod of opaque dir
642 * CAP_CHOWN for chown of opaque dir
643 */
644 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
645 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
646 cap_raise(override_cred->cap_effective, CAP_FOWNER);
647 cap_raise(override_cred->cap_effective, CAP_FSETID);
648 cap_raise(override_cred->cap_effective, CAP_CHOWN);
649 old_cred = override_creds(override_cred);
650
651 err = ovl_remove_and_whiteout(dentry, type, is_dir);
652
653 revert_creds(old_cred);
654 put_cred(override_cred);
655 }
656out_drop_write:
657 ovl_drop_write(dentry);
658out:
659 return err;
660}
661
662static int ovl_unlink(struct inode *dir, struct dentry *dentry)
663{
664 return ovl_do_remove(dentry, false);
665}
666
667static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
668{
669 return ovl_do_remove(dentry, true);
670}
671
672static int ovl_rename2(struct inode *olddir, struct dentry *old,
673 struct inode *newdir, struct dentry *new,
674 unsigned int flags)
675{
676 int err;
677 enum ovl_path_type old_type;
678 enum ovl_path_type new_type;
679 struct dentry *old_upperdir;
680 struct dentry *new_upperdir;
681 struct dentry *olddentry;
682 struct dentry *newdentry;
683 struct dentry *trap;
684 bool old_opaque;
685 bool new_opaque;
686 bool new_create = false;
687 bool cleanup_whiteout = false;
688 bool overwrite = !(flags & RENAME_EXCHANGE);
689 bool is_dir = S_ISDIR(old->d_inode->i_mode);
690 bool new_is_dir = false;
691 struct dentry *opaquedir = NULL;
692 const struct cred *old_cred = NULL;
693 struct cred *override_cred = NULL;
694
695 err = -EINVAL;
696 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
697 goto out;
698
699 flags &= ~RENAME_NOREPLACE;
700
701 err = ovl_check_sticky(old);
702 if (err)
703 goto out;
704
705 /* Don't copy up directory trees */
706 old_type = ovl_path_type(old);
707 err = -EXDEV;
708 if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir)
709 goto out;
710
711 if (new->d_inode) {
712 err = ovl_check_sticky(new);
713 if (err)
714 goto out;
715
716 if (S_ISDIR(new->d_inode->i_mode))
717 new_is_dir = true;
718
719 new_type = ovl_path_type(new);
720 err = -EXDEV;
721 if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir)
722 goto out;
723
724 err = 0;
725 if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
726 if (ovl_dentry_lower(old)->d_inode ==
727 ovl_dentry_lower(new)->d_inode)
728 goto out;
729 }
730 if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
731 if (ovl_dentry_upper(old)->d_inode ==
732 ovl_dentry_upper(new)->d_inode)
733 goto out;
734 }
735 } else {
736 if (ovl_dentry_is_opaque(new))
737 new_type = OVL_PATH_UPPER;
738 else
739 new_type = OVL_PATH_PURE_UPPER;
740 }
741
742 err = ovl_want_write(old);
743 if (err)
744 goto out;
745
746 err = ovl_copy_up(old);
747 if (err)
748 goto out_drop_write;
749
750 err = ovl_copy_up(new->d_parent);
751 if (err)
752 goto out_drop_write;
753 if (!overwrite) {
754 err = ovl_copy_up(new);
755 if (err)
756 goto out_drop_write;
757 }
758
759 old_opaque = old_type != OVL_PATH_PURE_UPPER;
760 new_opaque = new_type != OVL_PATH_PURE_UPPER;
761
762 if (old_opaque || new_opaque) {
763 err = -ENOMEM;
764 override_cred = prepare_creds();
765 if (!override_cred)
766 goto out_drop_write;
767
768 /*
769 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
770 * CAP_DAC_OVERRIDE for create in workdir
771 * CAP_FOWNER for removing whiteout from sticky dir
772 * CAP_FSETID for chmod of opaque dir
773 * CAP_CHOWN for chown of opaque dir
774 */
775 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
776 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
777 cap_raise(override_cred->cap_effective, CAP_FOWNER);
778 cap_raise(override_cred->cap_effective, CAP_FSETID);
779 cap_raise(override_cred->cap_effective, CAP_CHOWN);
780 old_cred = override_creds(override_cred);
781 }
782
783 if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
784 opaquedir = ovl_check_empty_and_clear(new, new_type);
785 err = PTR_ERR(opaquedir);
786 if (IS_ERR(opaquedir)) {
787 opaquedir = NULL;
788 goto out_revert_creds;
789 }
790 }
791
792 if (overwrite) {
793 if (old_opaque) {
794 if (new->d_inode || !new_opaque) {
795 /* Whiteout source */
796 flags |= RENAME_WHITEOUT;
797 } else {
798 /* Switch whiteouts */
799 flags |= RENAME_EXCHANGE;
800 }
801 } else if (is_dir && !new->d_inode && new_opaque) {
802 flags |= RENAME_EXCHANGE;
803 cleanup_whiteout = true;
804 }
805 }
806
807 old_upperdir = ovl_dentry_upper(old->d_parent);
808 new_upperdir = ovl_dentry_upper(new->d_parent);
809
810 trap = lock_rename(new_upperdir, old_upperdir);
811
812 olddentry = ovl_dentry_upper(old);
813 newdentry = ovl_dentry_upper(new);
814 if (newdentry) {
815 if (opaquedir) {
816 newdentry = opaquedir;
817 opaquedir = NULL;
818 } else {
819 dget(newdentry);
820 }
821 } else {
822 new_create = true;
823 newdentry = lookup_one_len(new->d_name.name, new_upperdir,
824 new->d_name.len);
825 err = PTR_ERR(newdentry);
826 if (IS_ERR(newdentry))
827 goto out_unlock;
828 }
829
830 err = -ESTALE;
831 if (olddentry->d_parent != old_upperdir)
832 goto out_dput;
833 if (newdentry->d_parent != new_upperdir)
834 goto out_dput;
835 if (olddentry == trap)
836 goto out_dput;
837 if (newdentry == trap)
838 goto out_dput;
839
840 if (is_dir && !old_opaque && new_opaque) {
841 err = ovl_set_opaque(olddentry);
842 if (err)
843 goto out_dput;
844 }
845 if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
846 err = ovl_set_opaque(newdentry);
847 if (err)
848 goto out_dput;
849 }
850
851 if (old_opaque || new_opaque) {
852 err = ovl_do_rename(old_upperdir->d_inode, olddentry,
853 new_upperdir->d_inode, newdentry,
854 flags);
855 } else {
856 /* No debug for the plain case */
857 BUG_ON(flags & ~RENAME_EXCHANGE);
858 err = vfs_rename(old_upperdir->d_inode, olddentry,
859 new_upperdir->d_inode, newdentry,
860 NULL, flags);
861 }
862
863 if (err) {
864 if (is_dir && !old_opaque && new_opaque)
865 ovl_remove_opaque(olddentry);
866 if (!overwrite && new_is_dir && old_opaque && !new_opaque)
867 ovl_remove_opaque(newdentry);
868 goto out_dput;
869 }
870
871 if (is_dir && old_opaque && !new_opaque)
872 ovl_remove_opaque(olddentry);
873 if (!overwrite && new_is_dir && !old_opaque && new_opaque)
874 ovl_remove_opaque(newdentry);
875
876 if (old_opaque != new_opaque) {
877 ovl_dentry_set_opaque(old, new_opaque);
878 if (!overwrite)
879 ovl_dentry_set_opaque(new, old_opaque);
880 }
881
882 if (cleanup_whiteout)
883 ovl_cleanup(old_upperdir->d_inode, newdentry);
884
885 ovl_dentry_version_inc(old->d_parent);
886 ovl_dentry_version_inc(new->d_parent);
887
888out_dput:
889 dput(newdentry);
890out_unlock:
891 unlock_rename(new_upperdir, old_upperdir);
892out_revert_creds:
893 if (old_opaque || new_opaque) {
894 revert_creds(old_cred);
895 put_cred(override_cred);
896 }
897out_drop_write:
898 ovl_drop_write(old);
899out:
900 dput(opaquedir);
901 return err;
902}
903
904const struct inode_operations ovl_dir_inode_operations = {
905 .lookup = ovl_lookup,
906 .mkdir = ovl_mkdir,
907 .symlink = ovl_symlink,
908 .unlink = ovl_unlink,
909 .rmdir = ovl_rmdir,
910 .rename2 = ovl_rename2,
911 .link = ovl_link,
912 .setattr = ovl_setattr,
913 .create = ovl_create,
914 .mknod = ovl_mknod,
915 .permission = ovl_permission,
916 .getattr = ovl_dir_getattr,
917 .setxattr = ovl_setxattr,
918 .getxattr = ovl_getxattr,
919 .listxattr = ovl_listxattr,
920 .removexattr = ovl_removexattr,
921};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 000000000000..af2d18c9fcee
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,425 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/xattr.h>
13#include "overlayfs.h"
14
15static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
16 bool no_data)
17{
18 int err;
19 struct dentry *parent;
20 struct kstat stat;
21 struct path lowerpath;
22
23 parent = dget_parent(dentry);
24 err = ovl_copy_up(parent);
25 if (err)
26 goto out_dput_parent;
27
28 ovl_path_lower(dentry, &lowerpath);
29 err = vfs_getattr(&lowerpath, &stat);
30 if (err)
31 goto out_dput_parent;
32
33 if (no_data)
34 stat.size = 0;
35
36 err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
37
38out_dput_parent:
39 dput(parent);
40 return err;
41}
42
43int ovl_setattr(struct dentry *dentry, struct iattr *attr)
44{
45 int err;
46 struct dentry *upperdentry;
47
48 err = ovl_want_write(dentry);
49 if (err)
50 goto out;
51
52 upperdentry = ovl_dentry_upper(dentry);
53 if (upperdentry) {
54 mutex_lock(&upperdentry->d_inode->i_mutex);
55 err = notify_change(upperdentry, attr, NULL);
56 mutex_unlock(&upperdentry->d_inode->i_mutex);
57 } else {
58 err = ovl_copy_up_last(dentry, attr, false);
59 }
60 ovl_drop_write(dentry);
61out:
62 return err;
63}
64
65static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
66 struct kstat *stat)
67{
68 struct path realpath;
69
70 ovl_path_real(dentry, &realpath);
71 return vfs_getattr(&realpath, stat);
72}
73
74int ovl_permission(struct inode *inode, int mask)
75{
76 struct ovl_entry *oe;
77 struct dentry *alias = NULL;
78 struct inode *realinode;
79 struct dentry *realdentry;
80 bool is_upper;
81 int err;
82
83 if (S_ISDIR(inode->i_mode)) {
84 oe = inode->i_private;
85 } else if (mask & MAY_NOT_BLOCK) {
86 return -ECHILD;
87 } else {
88 /*
89 * For non-directories find an alias and get the info
90 * from there.
91 */
92 alias = d_find_any_alias(inode);
93 if (WARN_ON(!alias))
94 return -ENOENT;
95
96 oe = alias->d_fsdata;
97 }
98
99 realdentry = ovl_entry_real(oe, &is_upper);
100
101 /* Careful in RCU walk mode */
102 realinode = ACCESS_ONCE(realdentry->d_inode);
103 if (!realinode) {
104 WARN_ON(!(mask & MAY_NOT_BLOCK));
105 err = -ENOENT;
106 goto out_dput;
107 }
108
109 if (mask & MAY_WRITE) {
110 umode_t mode = realinode->i_mode;
111
112 /*
113 * Writes will always be redirected to upper layer, so
114 * ignore lower layer being read-only.
115 *
116 * If the overlay itself is read-only then proceed
117 * with the permission check, don't return EROFS.
118 * This will only happen if this is the lower layer of
119 * another overlayfs.
120 *
121 * If upper fs becomes read-only after the overlay was
122 * constructed return EROFS to prevent modification of
123 * upper layer.
124 */
125 err = -EROFS;
126 if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
127 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
128 goto out_dput;
129 }
130
131 err = __inode_permission(realinode, mask);
132out_dput:
133 dput(alias);
134 return err;
135}
136
137
138struct ovl_link_data {
139 struct dentry *realdentry;
140 void *cookie;
141};
142
143static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
144{
145 void *ret;
146 struct dentry *realdentry;
147 struct inode *realinode;
148
149 realdentry = ovl_dentry_real(dentry);
150 realinode = realdentry->d_inode;
151
152 if (WARN_ON(!realinode->i_op->follow_link))
153 return ERR_PTR(-EPERM);
154
155 ret = realinode->i_op->follow_link(realdentry, nd);
156 if (IS_ERR(ret))
157 return ret;
158
159 if (realinode->i_op->put_link) {
160 struct ovl_link_data *data;
161
162 data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
163 if (!data) {
164 realinode->i_op->put_link(realdentry, nd, ret);
165 return ERR_PTR(-ENOMEM);
166 }
167 data->realdentry = realdentry;
168 data->cookie = ret;
169
170 return data;
171 } else {
172 return NULL;
173 }
174}
175
176static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
177{
178 struct inode *realinode;
179 struct ovl_link_data *data = c;
180
181 if (!data)
182 return;
183
184 realinode = data->realdentry->d_inode;
185 realinode->i_op->put_link(data->realdentry, nd, data->cookie);
186 kfree(data);
187}
188
189static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
190{
191 struct path realpath;
192 struct inode *realinode;
193
194 ovl_path_real(dentry, &realpath);
195 realinode = realpath.dentry->d_inode;
196
197 if (!realinode->i_op->readlink)
198 return -EINVAL;
199
200 touch_atime(&realpath);
201
202 return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
203}
204
205
206static bool ovl_is_private_xattr(const char *name)
207{
208 return strncmp(name, "trusted.overlay.", 14) == 0;
209}
210
211int ovl_setxattr(struct dentry *dentry, const char *name,
212 const void *value, size_t size, int flags)
213{
214 int err;
215 struct dentry *upperdentry;
216
217 err = ovl_want_write(dentry);
218 if (err)
219 goto out;
220
221 err = -EPERM;
222 if (ovl_is_private_xattr(name))
223 goto out_drop_write;
224
225 err = ovl_copy_up(dentry);
226 if (err)
227 goto out_drop_write;
228
229 upperdentry = ovl_dentry_upper(dentry);
230 err = vfs_setxattr(upperdentry, name, value, size, flags);
231
232out_drop_write:
233 ovl_drop_write(dentry);
234out:
235 return err;
236}
237
238ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
239 void *value, size_t size)
240{
241 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
242 ovl_is_private_xattr(name))
243 return -ENODATA;
244
245 return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
246}
247
248ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
249{
250 ssize_t res;
251 int off;
252
253 res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
254 if (res <= 0 || size == 0)
255 return res;
256
257 if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
258 return res;
259
260 /* filter out private xattrs */
261 for (off = 0; off < res;) {
262 char *s = list + off;
263 size_t slen = strlen(s) + 1;
264
265 BUG_ON(off + slen > res);
266
267 if (ovl_is_private_xattr(s)) {
268 res -= slen;
269 memmove(s, s + slen, res - off);
270 } else {
271 off += slen;
272 }
273 }
274
275 return res;
276}
277
278int ovl_removexattr(struct dentry *dentry, const char *name)
279{
280 int err;
281 struct path realpath;
282 enum ovl_path_type type;
283
284 err = ovl_want_write(dentry);
285 if (err)
286 goto out;
287
288 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
289 ovl_is_private_xattr(name))
290 goto out_drop_write;
291
292 type = ovl_path_real(dentry, &realpath);
293 if (type == OVL_PATH_LOWER) {
294 err = vfs_getxattr(realpath.dentry, name, NULL, 0);
295 if (err < 0)
296 goto out_drop_write;
297
298 err = ovl_copy_up(dentry);
299 if (err)
300 goto out_drop_write;
301
302 ovl_path_upper(dentry, &realpath);
303 }
304
305 err = vfs_removexattr(realpath.dentry, name);
306out_drop_write:
307 ovl_drop_write(dentry);
308out:
309 return err;
310}
311
312static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
313 struct dentry *realdentry)
314{
315 if (type != OVL_PATH_LOWER)
316 return false;
317
318 if (special_file(realdentry->d_inode->i_mode))
319 return false;
320
321 if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
322 return false;
323
324 return true;
325}
326
327static int ovl_dentry_open(struct dentry *dentry, struct file *file,
328 const struct cred *cred)
329{
330 int err;
331 struct path realpath;
332 enum ovl_path_type type;
333 bool want_write = false;
334
335 type = ovl_path_real(dentry, &realpath);
336 if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
337 want_write = true;
338 err = ovl_want_write(dentry);
339 if (err)
340 goto out;
341
342 if (file->f_flags & O_TRUNC)
343 err = ovl_copy_up_last(dentry, NULL, true);
344 else
345 err = ovl_copy_up(dentry);
346 if (err)
347 goto out_drop_write;
348
349 ovl_path_upper(dentry, &realpath);
350 }
351
352 err = vfs_open(&realpath, file, cred);
353out_drop_write:
354 if (want_write)
355 ovl_drop_write(dentry);
356out:
357 return err;
358}
359
360static const struct inode_operations ovl_file_inode_operations = {
361 .setattr = ovl_setattr,
362 .permission = ovl_permission,
363 .getattr = ovl_getattr,
364 .setxattr = ovl_setxattr,
365 .getxattr = ovl_getxattr,
366 .listxattr = ovl_listxattr,
367 .removexattr = ovl_removexattr,
368 .dentry_open = ovl_dentry_open,
369};
370
371static const struct inode_operations ovl_symlink_inode_operations = {
372 .setattr = ovl_setattr,
373 .follow_link = ovl_follow_link,
374 .put_link = ovl_put_link,
375 .readlink = ovl_readlink,
376 .getattr = ovl_getattr,
377 .setxattr = ovl_setxattr,
378 .getxattr = ovl_getxattr,
379 .listxattr = ovl_listxattr,
380 .removexattr = ovl_removexattr,
381};
382
383struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
384 struct ovl_entry *oe)
385{
386 struct inode *inode;
387
388 inode = new_inode(sb);
389 if (!inode)
390 return NULL;
391
392 mode &= S_IFMT;
393
394 inode->i_ino = get_next_ino();
395 inode->i_mode = mode;
396 inode->i_flags |= S_NOATIME | S_NOCMTIME;
397
398 switch (mode) {
399 case S_IFDIR:
400 inode->i_private = oe;
401 inode->i_op = &ovl_dir_inode_operations;
402 inode->i_fop = &ovl_dir_operations;
403 break;
404
405 case S_IFLNK:
406 inode->i_op = &ovl_symlink_inode_operations;
407 break;
408
409 case S_IFREG:
410 case S_IFSOCK:
411 case S_IFBLK:
412 case S_IFCHR:
413 case S_IFIFO:
414 inode->i_op = &ovl_file_inode_operations;
415 break;
416
417 default:
418 WARN(1, "illegal file type: %i\n", mode);
419 iput(inode);
420 inode = NULL;
421 }
422
423 return inode;
424
425}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 000000000000..814bed33dd07
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,191 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11
12struct ovl_entry;
13
14enum ovl_path_type {
15 OVL_PATH_PURE_UPPER,
16 OVL_PATH_UPPER,
17 OVL_PATH_MERGE,
18 OVL_PATH_LOWER,
19};
20
21extern const char *ovl_opaque_xattr;
22
23static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
24{
25 int err = vfs_rmdir(dir, dentry);
26 pr_debug("rmdir(%pd2) = %i\n", dentry, err);
27 return err;
28}
29
30static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
31{
32 int err = vfs_unlink(dir, dentry, NULL);
33 pr_debug("unlink(%pd2) = %i\n", dentry, err);
34 return err;
35}
36
37static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
38 struct dentry *new_dentry, bool debug)
39{
40 int err = vfs_link(old_dentry, dir, new_dentry, NULL);
41 if (debug) {
42 pr_debug("link(%pd2, %pd2) = %i\n",
43 old_dentry, new_dentry, err);
44 }
45 return err;
46}
47
48static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
49 umode_t mode, bool debug)
50{
51 int err = vfs_create(dir, dentry, mode, true);
52 if (debug)
53 pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
54 return err;
55}
56
57static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
58 umode_t mode, bool debug)
59{
60 int err = vfs_mkdir(dir, dentry, mode);
61 if (debug)
62 pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
63 return err;
64}
65
66static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
67 umode_t mode, dev_t dev, bool debug)
68{
69 int err = vfs_mknod(dir, dentry, mode, dev);
70 if (debug) {
71 pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
72 dentry, mode, dev, err);
73 }
74 return err;
75}
76
77static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
78 const char *oldname, bool debug)
79{
80 int err = vfs_symlink(dir, dentry, oldname);
81 if (debug)
82 pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
83 return err;
84}
85
86static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
87 const void *value, size_t size, int flags)
88{
89 int err = vfs_setxattr(dentry, name, value, size, flags);
90 pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
91 dentry, name, (int) size, (char *) value, flags, err);
92 return err;
93}
94
95static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
96{
97 int err = vfs_removexattr(dentry, name);
98 pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
99 return err;
100}
101
102static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
103 struct inode *newdir, struct dentry *newdentry,
104 unsigned int flags)
105{
106 int err;
107
108 pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
109 olddentry, newdentry, flags);
110
111 err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
112
113 if (err) {
114 pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
115 olddentry, newdentry, err);
116 }
117 return err;
118}
119
120static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
121{
122 int err = vfs_whiteout(dir, dentry);
123 pr_debug("whiteout(%pd2) = %i\n", dentry, err);
124 return err;
125}
126
127enum ovl_path_type ovl_path_type(struct dentry *dentry);
128u64 ovl_dentry_version_get(struct dentry *dentry);
129void ovl_dentry_version_inc(struct dentry *dentry);
130void ovl_path_upper(struct dentry *dentry, struct path *path);
131void ovl_path_lower(struct dentry *dentry, struct path *path);
132enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
133struct dentry *ovl_dentry_upper(struct dentry *dentry);
134struct dentry *ovl_dentry_lower(struct dentry *dentry);
135struct dentry *ovl_dentry_real(struct dentry *dentry);
136struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
137struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
138void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
139struct dentry *ovl_workdir(struct dentry *dentry);
140int ovl_want_write(struct dentry *dentry);
141void ovl_drop_write(struct dentry *dentry);
142bool ovl_dentry_is_opaque(struct dentry *dentry);
143void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
144bool ovl_is_whiteout(struct dentry *dentry);
145void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
146struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
147 unsigned int flags);
148struct file *ovl_path_open(struct path *path, int flags);
149
150struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
151 struct kstat *stat, const char *link);
152
153/* readdir.c */
154extern const struct file_operations ovl_dir_operations;
155int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
156void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
157void ovl_cache_free(struct list_head *list);
158
159/* inode.c */
160int ovl_setattr(struct dentry *dentry, struct iattr *attr);
161int ovl_permission(struct inode *inode, int mask);
162int ovl_setxattr(struct dentry *dentry, const char *name,
163 const void *value, size_t size, int flags);
164ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
165 void *value, size_t size);
166ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
167int ovl_removexattr(struct dentry *dentry, const char *name);
168
169struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
170 struct ovl_entry *oe);
171static inline void ovl_copyattr(struct inode *from, struct inode *to)
172{
173 to->i_uid = from->i_uid;
174 to->i_gid = from->i_gid;
175}
176
177/* dir.c */
178extern const struct inode_operations ovl_dir_inode_operations;
179struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
180int ovl_create_real(struct inode *dir, struct dentry *newdentry,
181 struct kstat *stat, const char *link,
182 struct dentry *hardlink, bool debug);
183void ovl_cleanup(struct inode *dir, struct dentry *dentry);
184
185/* copy_up.c */
186int ovl_copy_up(struct dentry *dentry);
187int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
188 struct path *lowerpath, struct kstat *stat,
189 struct iattr *attr);
190int ovl_copy_xattr(struct dentry *old, struct dentry *new);
191int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 000000000000..c6787f84ece9
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,587 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/namei.h>
13#include <linux/file.h>
14#include <linux/xattr.h>
15#include <linux/rbtree.h>
16#include <linux/security.h>
17#include <linux/cred.h>
18#include "overlayfs.h"
19
20struct ovl_cache_entry {
21 const char *name;
22 unsigned int len;
23 unsigned int type;
24 u64 ino;
25 bool is_whiteout;
26 struct list_head l_node;
27 struct rb_node node;
28};
29
30struct ovl_dir_cache {
31 long refcount;
32 u64 version;
33 struct list_head entries;
34};
35
36struct ovl_readdir_data {
37 struct dir_context ctx;
38 bool is_merge;
39 struct rb_root *root;
40 struct list_head *list;
41 struct list_head *middle;
42 int count;
43 int err;
44};
45
46struct ovl_dir_file {
47 bool is_real;
48 bool is_upper;
49 struct ovl_dir_cache *cache;
50 struct ovl_cache_entry cursor;
51 struct file *realfile;
52 struct file *upperfile;
53};
54
55static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
56{
57 return container_of(n, struct ovl_cache_entry, node);
58}
59
60static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
61 const char *name, int len)
62{
63 struct rb_node *node = root->rb_node;
64 int cmp;
65
66 while (node) {
67 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
68
69 cmp = strncmp(name, p->name, len);
70 if (cmp > 0)
71 node = p->node.rb_right;
72 else if (cmp < 0 || len < p->len)
73 node = p->node.rb_left;
74 else
75 return p;
76 }
77
78 return NULL;
79}
80
81static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
82 u64 ino, unsigned int d_type)
83{
84 struct ovl_cache_entry *p;
85
86 p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
87 if (p) {
88 char *name_copy = (char *) (p + 1);
89 memcpy(name_copy, name, len);
90 name_copy[len] = '\0';
91 p->name = name_copy;
92 p->len = len;
93 p->type = d_type;
94 p->ino = ino;
95 p->is_whiteout = false;
96 }
97
98 return p;
99}
100
101static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
102 const char *name, int len, u64 ino,
103 unsigned int d_type)
104{
105 struct rb_node **newp = &rdd->root->rb_node;
106 struct rb_node *parent = NULL;
107 struct ovl_cache_entry *p;
108
109 while (*newp) {
110 int cmp;
111 struct ovl_cache_entry *tmp;
112
113 parent = *newp;
114 tmp = ovl_cache_entry_from_node(*newp);
115 cmp = strncmp(name, tmp->name, len);
116 if (cmp > 0)
117 newp = &tmp->node.rb_right;
118 else if (cmp < 0 || len < tmp->len)
119 newp = &tmp->node.rb_left;
120 else
121 return 0;
122 }
123
124 p = ovl_cache_entry_new(name, len, ino, d_type);
125 if (p == NULL)
126 return -ENOMEM;
127
128 list_add_tail(&p->l_node, rdd->list);
129 rb_link_node(&p->node, parent, newp);
130 rb_insert_color(&p->node, rdd->root);
131
132 return 0;
133}
134
135static int ovl_fill_lower(struct ovl_readdir_data *rdd,
136 const char *name, int namelen,
137 loff_t offset, u64 ino, unsigned int d_type)
138{
139 struct ovl_cache_entry *p;
140
141 p = ovl_cache_entry_find(rdd->root, name, namelen);
142 if (p) {
143 list_move_tail(&p->l_node, rdd->middle);
144 } else {
145 p = ovl_cache_entry_new(name, namelen, ino, d_type);
146 if (p == NULL)
147 rdd->err = -ENOMEM;
148 else
149 list_add_tail(&p->l_node, rdd->middle);
150 }
151
152 return rdd->err;
153}
154
155void ovl_cache_free(struct list_head *list)
156{
157 struct ovl_cache_entry *p;
158 struct ovl_cache_entry *n;
159
160 list_for_each_entry_safe(p, n, list, l_node)
161 kfree(p);
162
163 INIT_LIST_HEAD(list);
164}
165
166static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
167{
168 struct ovl_dir_cache *cache = od->cache;
169
170 list_del(&od->cursor.l_node);
171 WARN_ON(cache->refcount <= 0);
172 cache->refcount--;
173 if (!cache->refcount) {
174 if (ovl_dir_cache(dentry) == cache)
175 ovl_set_dir_cache(dentry, NULL);
176
177 ovl_cache_free(&cache->entries);
178 kfree(cache);
179 }
180}
181
182static int ovl_fill_merge(void *buf, const char *name, int namelen,
183 loff_t offset, u64 ino, unsigned int d_type)
184{
185 struct ovl_readdir_data *rdd = buf;
186
187 rdd->count++;
188 if (!rdd->is_merge)
189 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
190 else
191 return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
192}
193
194static inline int ovl_dir_read(struct path *realpath,
195 struct ovl_readdir_data *rdd)
196{
197 struct file *realfile;
198 int err;
199
200 realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
201 if (IS_ERR(realfile))
202 return PTR_ERR(realfile);
203
204 rdd->ctx.pos = 0;
205 do {
206 rdd->count = 0;
207 rdd->err = 0;
208 err = iterate_dir(realfile, &rdd->ctx);
209 if (err >= 0)
210 err = rdd->err;
211 } while (!err && rdd->count);
212 fput(realfile);
213
214 return err;
215}
216
217static void ovl_dir_reset(struct file *file)
218{
219 struct ovl_dir_file *od = file->private_data;
220 struct ovl_dir_cache *cache = od->cache;
221 struct dentry *dentry = file->f_path.dentry;
222 enum ovl_path_type type = ovl_path_type(dentry);
223
224 if (cache && ovl_dentry_version_get(dentry) != cache->version) {
225 ovl_cache_put(od, dentry);
226 od->cache = NULL;
227 }
228 WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
229 if (od->is_real && type == OVL_PATH_MERGE)
230 od->is_real = false;
231}
232
233static int ovl_dir_mark_whiteouts(struct dentry *dir,
234 struct ovl_readdir_data *rdd)
235{
236 struct ovl_cache_entry *p;
237 struct dentry *dentry;
238 const struct cred *old_cred;
239 struct cred *override_cred;
240
241 override_cred = prepare_creds();
242 if (!override_cred) {
243 ovl_cache_free(rdd->list);
244 return -ENOMEM;
245 }
246
247 /*
248 * CAP_DAC_OVERRIDE for lookup
249 */
250 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
251 old_cred = override_creds(override_cred);
252
253 mutex_lock(&dir->d_inode->i_mutex);
254 list_for_each_entry(p, rdd->list, l_node) {
255 if (!p->name)
256 continue;
257
258 if (p->type != DT_CHR)
259 continue;
260
261 dentry = lookup_one_len(p->name, dir, p->len);
262 if (IS_ERR(dentry))
263 continue;
264
265 p->is_whiteout = ovl_is_whiteout(dentry);
266 dput(dentry);
267 }
268 mutex_unlock(&dir->d_inode->i_mutex);
269
270 revert_creds(old_cred);
271 put_cred(override_cred);
272
273 return 0;
274}
275
276static inline int ovl_dir_read_merged(struct path *upperpath,
277 struct path *lowerpath,
278 struct list_head *list)
279{
280 int err;
281 struct rb_root root = RB_ROOT;
282 struct list_head middle;
283 struct ovl_readdir_data rdd = {
284 .ctx.actor = ovl_fill_merge,
285 .list = list,
286 .root = &root,
287 .is_merge = false,
288 };
289
290 if (upperpath->dentry) {
291 err = ovl_dir_read(upperpath, &rdd);
292 if (err)
293 goto out;
294
295 if (lowerpath->dentry) {
296 err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
297 if (err)
298 goto out;
299 }
300 }
301 if (lowerpath->dentry) {
302 /*
303 * Insert lowerpath entries before upperpath ones, this allows
304 * offsets to be reasonably constant
305 */
306 list_add(&middle, rdd.list);
307 rdd.middle = &middle;
308 rdd.is_merge = true;
309 err = ovl_dir_read(lowerpath, &rdd);
310 list_del(&middle);
311 }
312out:
313 return err;
314
315}
316
317static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
318{
319 struct ovl_cache_entry *p;
320 loff_t off = 0;
321
322 list_for_each_entry(p, &od->cache->entries, l_node) {
323 if (!p->name)
324 continue;
325 if (off >= pos)
326 break;
327 off++;
328 }
329 list_move_tail(&od->cursor.l_node, &p->l_node);
330}
331
332static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
333{
334 int res;
335 struct path lowerpath;
336 struct path upperpath;
337 struct ovl_dir_cache *cache;
338
339 cache = ovl_dir_cache(dentry);
340 if (cache && ovl_dentry_version_get(dentry) == cache->version) {
341 cache->refcount++;
342 return cache;
343 }
344 ovl_set_dir_cache(dentry, NULL);
345
346 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
347 if (!cache)
348 return ERR_PTR(-ENOMEM);
349
350 cache->refcount = 1;
351 INIT_LIST_HEAD(&cache->entries);
352
353 ovl_path_lower(dentry, &lowerpath);
354 ovl_path_upper(dentry, &upperpath);
355
356 res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
357 if (res) {
358 ovl_cache_free(&cache->entries);
359 kfree(cache);
360 return ERR_PTR(res);
361 }
362
363 cache->version = ovl_dentry_version_get(dentry);
364 ovl_set_dir_cache(dentry, cache);
365
366 return cache;
367}
368
369static int ovl_iterate(struct file *file, struct dir_context *ctx)
370{
371 struct ovl_dir_file *od = file->private_data;
372 struct dentry *dentry = file->f_path.dentry;
373
374 if (!ctx->pos)
375 ovl_dir_reset(file);
376
377 if (od->is_real)
378 return iterate_dir(od->realfile, ctx);
379
380 if (!od->cache) {
381 struct ovl_dir_cache *cache;
382
383 cache = ovl_cache_get(dentry);
384 if (IS_ERR(cache))
385 return PTR_ERR(cache);
386
387 od->cache = cache;
388 ovl_seek_cursor(od, ctx->pos);
389 }
390
391 while (od->cursor.l_node.next != &od->cache->entries) {
392 struct ovl_cache_entry *p;
393
394 p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node);
395 /* Skip cursors */
396 if (p->name) {
397 if (!p->is_whiteout) {
398 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
399 break;
400 }
401 ctx->pos++;
402 }
403 list_move(&od->cursor.l_node, &p->l_node);
404 }
405 return 0;
406}
407
408static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
409{
410 loff_t res;
411 struct ovl_dir_file *od = file->private_data;
412
413 mutex_lock(&file_inode(file)->i_mutex);
414 if (!file->f_pos)
415 ovl_dir_reset(file);
416
417 if (od->is_real) {
418 res = vfs_llseek(od->realfile, offset, origin);
419 file->f_pos = od->realfile->f_pos;
420 } else {
421 res = -EINVAL;
422
423 switch (origin) {
424 case SEEK_CUR:
425 offset += file->f_pos;
426 break;
427 case SEEK_SET:
428 break;
429 default:
430 goto out_unlock;
431 }
432 if (offset < 0)
433 goto out_unlock;
434
435 if (offset != file->f_pos) {
436 file->f_pos = offset;
437 if (od->cache)
438 ovl_seek_cursor(od, offset);
439 }
440 res = offset;
441 }
442out_unlock:
443 mutex_unlock(&file_inode(file)->i_mutex);
444
445 return res;
446}
447
448static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
449 int datasync)
450{
451 struct ovl_dir_file *od = file->private_data;
452 struct dentry *dentry = file->f_path.dentry;
453 struct file *realfile = od->realfile;
454
455 /*
456 * Need to check if we started out being a lower dir, but got copied up
457 */
458 if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
459 struct inode *inode = file_inode(file);
460
461 mutex_lock(&inode->i_mutex);
462 realfile = od->upperfile;
463 if (!realfile) {
464 struct path upperpath;
465
466 ovl_path_upper(dentry, &upperpath);
467 realfile = ovl_path_open(&upperpath, O_RDONLY);
468 if (IS_ERR(realfile)) {
469 mutex_unlock(&inode->i_mutex);
470 return PTR_ERR(realfile);
471 }
472 od->upperfile = realfile;
473 }
474 mutex_unlock(&inode->i_mutex);
475 }
476
477 return vfs_fsync_range(realfile, start, end, datasync);
478}
479
480static int ovl_dir_release(struct inode *inode, struct file *file)
481{
482 struct ovl_dir_file *od = file->private_data;
483
484 if (od->cache) {
485 mutex_lock(&inode->i_mutex);
486 ovl_cache_put(od, file->f_path.dentry);
487 mutex_unlock(&inode->i_mutex);
488 }
489 fput(od->realfile);
490 if (od->upperfile)
491 fput(od->upperfile);
492 kfree(od);
493
494 return 0;
495}
496
497static int ovl_dir_open(struct inode *inode, struct file *file)
498{
499 struct path realpath;
500 struct file *realfile;
501 struct ovl_dir_file *od;
502 enum ovl_path_type type;
503
504 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
505 if (!od)
506 return -ENOMEM;
507
508 type = ovl_path_real(file->f_path.dentry, &realpath);
509 realfile = ovl_path_open(&realpath, file->f_flags);
510 if (IS_ERR(realfile)) {
511 kfree(od);
512 return PTR_ERR(realfile);
513 }
514 INIT_LIST_HEAD(&od->cursor.l_node);
515 od->realfile = realfile;
516 od->is_real = (type != OVL_PATH_MERGE);
517 od->is_upper = (type != OVL_PATH_LOWER);
518 file->private_data = od;
519
520 return 0;
521}
522
523const struct file_operations ovl_dir_operations = {
524 .read = generic_read_dir,
525 .open = ovl_dir_open,
526 .iterate = ovl_iterate,
527 .llseek = ovl_dir_llseek,
528 .fsync = ovl_dir_fsync,
529 .release = ovl_dir_release,
530};
531
532int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
533{
534 int err;
535 struct path lowerpath;
536 struct path upperpath;
537 struct ovl_cache_entry *p;
538
539 ovl_path_upper(dentry, &upperpath);
540 ovl_path_lower(dentry, &lowerpath);
541
542 err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
543 if (err)
544 return err;
545
546 err = 0;
547
548 list_for_each_entry(p, list, l_node) {
549 if (p->is_whiteout)
550 continue;
551
552 if (p->name[0] == '.') {
553 if (p->len == 1)
554 continue;
555 if (p->len == 2 && p->name[1] == '.')
556 continue;
557 }
558 err = -ENOTEMPTY;
559 break;
560 }
561
562 return err;
563}
564
565void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
566{
567 struct ovl_cache_entry *p;
568
569 mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
570 list_for_each_entry(p, list, l_node) {
571 struct dentry *dentry;
572
573 if (!p->is_whiteout)
574 continue;
575
576 dentry = lookup_one_len(p->name, upper, p->len);
577 if (IS_ERR(dentry)) {
578 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
579 upper->d_name.name, p->len, p->name,
580 (int) PTR_ERR(dentry));
581 continue;
582 }
583 ovl_cleanup(upper->d_inode, dentry);
584 dput(dentry);
585 }
586 mutex_unlock(&upper->d_inode->i_mutex);
587}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 000000000000..227710aad781
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,727 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/namei.h>
12#include <linux/xattr.h>
13#include <linux/security.h>
14#include <linux/mount.h>
15#include <linux/slab.h>
16#include <linux/parser.h>
17#include <linux/module.h>
18#include <linux/sched.h>
19#include "overlayfs.h"
20
21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Overlay filesystem");
23MODULE_LICENSE("GPL");
24
25/* private information held for overlayfs's superblock */
26struct ovl_fs {
27 struct vfsmount *upper_mnt;
28 struct vfsmount *lower_mnt;
29 struct dentry *workdir;
30};
31
32struct ovl_dir_cache;
33
34/* private information held for every overlayfs dentry */
35struct ovl_entry {
36 struct dentry *__upperdentry;
37 struct dentry *lowerdentry;
38 struct ovl_dir_cache *cache;
39 union {
40 struct {
41 u64 version;
42 bool opaque;
43 };
44 struct rcu_head rcu;
45 };
46};
47
48const char *ovl_opaque_xattr = "trusted.overlay.opaque";
49
50
51enum ovl_path_type ovl_path_type(struct dentry *dentry)
52{
53 struct ovl_entry *oe = dentry->d_fsdata;
54
55 if (oe->__upperdentry) {
56 if (oe->lowerdentry) {
57 if (S_ISDIR(dentry->d_inode->i_mode))
58 return OVL_PATH_MERGE;
59 else
60 return OVL_PATH_UPPER;
61 } else {
62 if (oe->opaque)
63 return OVL_PATH_UPPER;
64 else
65 return OVL_PATH_PURE_UPPER;
66 }
67 } else {
68 return OVL_PATH_LOWER;
69 }
70}
71
72static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
73{
74 struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
75 /*
76 * Make sure to order reads to upperdentry wrt ovl_dentry_update()
77 */
78 smp_read_barrier_depends();
79 return upperdentry;
80}
81
82void ovl_path_upper(struct dentry *dentry, struct path *path)
83{
84 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
85 struct ovl_entry *oe = dentry->d_fsdata;
86
87 path->mnt = ofs->upper_mnt;
88 path->dentry = ovl_upperdentry_dereference(oe);
89}
90
91enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
92{
93
94 enum ovl_path_type type = ovl_path_type(dentry);
95
96 if (type == OVL_PATH_LOWER)
97 ovl_path_lower(dentry, path);
98 else
99 ovl_path_upper(dentry, path);
100
101 return type;
102}
103
104struct dentry *ovl_dentry_upper(struct dentry *dentry)
105{
106 struct ovl_entry *oe = dentry->d_fsdata;
107
108 return ovl_upperdentry_dereference(oe);
109}
110
111struct dentry *ovl_dentry_lower(struct dentry *dentry)
112{
113 struct ovl_entry *oe = dentry->d_fsdata;
114
115 return oe->lowerdentry;
116}
117
118struct dentry *ovl_dentry_real(struct dentry *dentry)
119{
120 struct ovl_entry *oe = dentry->d_fsdata;
121 struct dentry *realdentry;
122
123 realdentry = ovl_upperdentry_dereference(oe);
124 if (!realdentry)
125 realdentry = oe->lowerdentry;
126
127 return realdentry;
128}
129
130struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
131{
132 struct dentry *realdentry;
133
134 realdentry = ovl_upperdentry_dereference(oe);
135 if (realdentry) {
136 *is_upper = true;
137 } else {
138 realdentry = oe->lowerdentry;
139 *is_upper = false;
140 }
141 return realdentry;
142}
143
144struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
145{
146 struct ovl_entry *oe = dentry->d_fsdata;
147
148 return oe->cache;
149}
150
151void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
152{
153 struct ovl_entry *oe = dentry->d_fsdata;
154
155 oe->cache = cache;
156}
157
158void ovl_path_lower(struct dentry *dentry, struct path *path)
159{
160 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
161 struct ovl_entry *oe = dentry->d_fsdata;
162
163 path->mnt = ofs->lower_mnt;
164 path->dentry = oe->lowerdentry;
165}
166
167int ovl_want_write(struct dentry *dentry)
168{
169 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
170 return mnt_want_write(ofs->upper_mnt);
171}
172
173void ovl_drop_write(struct dentry *dentry)
174{
175 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
176 mnt_drop_write(ofs->upper_mnt);
177}
178
179struct dentry *ovl_workdir(struct dentry *dentry)
180{
181 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
182 return ofs->workdir;
183}
184
185bool ovl_dentry_is_opaque(struct dentry *dentry)
186{
187 struct ovl_entry *oe = dentry->d_fsdata;
188 return oe->opaque;
189}
190
191void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
192{
193 struct ovl_entry *oe = dentry->d_fsdata;
194 oe->opaque = opaque;
195}
196
197void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
198{
199 struct ovl_entry *oe = dentry->d_fsdata;
200
201 WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
202 WARN_ON(oe->__upperdentry);
203 BUG_ON(!upperdentry->d_inode);
204 /*
205 * Make sure upperdentry is consistent before making it visible to
206 * ovl_upperdentry_dereference().
207 */
208 smp_wmb();
209 oe->__upperdentry = upperdentry;
210}
211
212void ovl_dentry_version_inc(struct dentry *dentry)
213{
214 struct ovl_entry *oe = dentry->d_fsdata;
215
216 WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
217 oe->version++;
218}
219
220u64 ovl_dentry_version_get(struct dentry *dentry)
221{
222 struct ovl_entry *oe = dentry->d_fsdata;
223
224 WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
225 return oe->version;
226}
227
228bool ovl_is_whiteout(struct dentry *dentry)
229{
230 struct inode *inode = dentry->d_inode;
231
232 return inode && IS_WHITEOUT(inode);
233}
234
235static bool ovl_is_opaquedir(struct dentry *dentry)
236{
237 int res;
238 char val;
239 struct inode *inode = dentry->d_inode;
240
241 if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
242 return false;
243
244 res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1);
245 if (res == 1 && val == 'y')
246 return true;
247
248 return false;
249}
250
251static void ovl_dentry_release(struct dentry *dentry)
252{
253 struct ovl_entry *oe = dentry->d_fsdata;
254
255 if (oe) {
256 dput(oe->__upperdentry);
257 dput(oe->lowerdentry);
258 kfree_rcu(oe, rcu);
259 }
260}
261
262static const struct dentry_operations ovl_dentry_operations = {
263 .d_release = ovl_dentry_release,
264};
265
266static struct ovl_entry *ovl_alloc_entry(void)
267{
268 return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
269}
270
271static inline struct dentry *ovl_lookup_real(struct dentry *dir,
272 struct qstr *name)
273{
274 struct dentry *dentry;
275
276 mutex_lock(&dir->d_inode->i_mutex);
277 dentry = lookup_one_len(name->name, dir, name->len);
278 mutex_unlock(&dir->d_inode->i_mutex);
279
280 if (IS_ERR(dentry)) {
281 if (PTR_ERR(dentry) == -ENOENT)
282 dentry = NULL;
283 } else if (!dentry->d_inode) {
284 dput(dentry);
285 dentry = NULL;
286 }
287 return dentry;
288}
289
290struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
291 unsigned int flags)
292{
293 struct ovl_entry *oe;
294 struct dentry *upperdir;
295 struct dentry *lowerdir;
296 struct dentry *upperdentry = NULL;
297 struct dentry *lowerdentry = NULL;
298 struct inode *inode = NULL;
299 int err;
300
301 err = -ENOMEM;
302 oe = ovl_alloc_entry();
303 if (!oe)
304 goto out;
305
306 upperdir = ovl_dentry_upper(dentry->d_parent);
307 lowerdir = ovl_dentry_lower(dentry->d_parent);
308
309 if (upperdir) {
310 upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
311 err = PTR_ERR(upperdentry);
312 if (IS_ERR(upperdentry))
313 goto out_put_dir;
314
315 if (lowerdir && upperdentry) {
316 if (ovl_is_whiteout(upperdentry)) {
317 dput(upperdentry);
318 upperdentry = NULL;
319 oe->opaque = true;
320 } else if (ovl_is_opaquedir(upperdentry)) {
321 oe->opaque = true;
322 }
323 }
324 }
325 if (lowerdir && !oe->opaque) {
326 lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
327 err = PTR_ERR(lowerdentry);
328 if (IS_ERR(lowerdentry))
329 goto out_dput_upper;
330 }
331
332 if (lowerdentry && upperdentry &&
333 (!S_ISDIR(upperdentry->d_inode->i_mode) ||
334 !S_ISDIR(lowerdentry->d_inode->i_mode))) {
335 dput(lowerdentry);
336 lowerdentry = NULL;
337 oe->opaque = true;
338 }
339
340 if (lowerdentry || upperdentry) {
341 struct dentry *realdentry;
342
343 realdentry = upperdentry ? upperdentry : lowerdentry;
344 err = -ENOMEM;
345 inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
346 oe);
347 if (!inode)
348 goto out_dput;
349 ovl_copyattr(realdentry->d_inode, inode);
350 }
351
352 oe->__upperdentry = upperdentry;
353 oe->lowerdentry = lowerdentry;
354
355 dentry->d_fsdata = oe;
356 d_add(dentry, inode);
357
358 return NULL;
359
360out_dput:
361 dput(lowerdentry);
362out_dput_upper:
363 dput(upperdentry);
364out_put_dir:
365 kfree(oe);
366out:
367 return ERR_PTR(err);
368}
369
370struct file *ovl_path_open(struct path *path, int flags)
371{
372 return dentry_open(path, flags, current_cred());
373}
374
375static void ovl_put_super(struct super_block *sb)
376{
377 struct ovl_fs *ufs = sb->s_fs_info;
378
379 dput(ufs->workdir);
380 mntput(ufs->upper_mnt);
381 mntput(ufs->lower_mnt);
382
383 kfree(ufs);
384}
385
386static const struct super_operations ovl_super_operations = {
387 .put_super = ovl_put_super,
388};
389
390struct ovl_config {
391 char *lowerdir;
392 char *upperdir;
393 char *workdir;
394};
395
396enum {
397 OPT_LOWERDIR,
398 OPT_UPPERDIR,
399 OPT_WORKDIR,
400 OPT_ERR,
401};
402
403static const match_table_t ovl_tokens = {
404 {OPT_LOWERDIR, "lowerdir=%s"},
405 {OPT_UPPERDIR, "upperdir=%s"},
406 {OPT_WORKDIR, "workdir=%s"},
407 {OPT_ERR, NULL}
408};
409
410static int ovl_parse_opt(char *opt, struct ovl_config *config)
411{
412 char *p;
413
414 config->upperdir = NULL;
415 config->lowerdir = NULL;
416 config->workdir = NULL;
417
418 while ((p = strsep(&opt, ",")) != NULL) {
419 int token;
420 substring_t args[MAX_OPT_ARGS];
421
422 if (!*p)
423 continue;
424
425 token = match_token(p, ovl_tokens, args);
426 switch (token) {
427 case OPT_UPPERDIR:
428 kfree(config->upperdir);
429 config->upperdir = match_strdup(&args[0]);
430 if (!config->upperdir)
431 return -ENOMEM;
432 break;
433
434 case OPT_LOWERDIR:
435 kfree(config->lowerdir);
436 config->lowerdir = match_strdup(&args[0]);
437 if (!config->lowerdir)
438 return -ENOMEM;
439 break;
440
441 case OPT_WORKDIR:
442 kfree(config->workdir);
443 config->workdir = match_strdup(&args[0]);
444 if (!config->workdir)
445 return -ENOMEM;
446 break;
447
448 default:
449 return -EINVAL;
450 }
451 }
452 return 0;
453}
454
455#define OVL_WORKDIR_NAME "work"
456
457static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
458 struct dentry *dentry)
459{
460 struct inode *dir = dentry->d_inode;
461 struct dentry *work;
462 int err;
463 bool retried = false;
464
465 err = mnt_want_write(mnt);
466 if (err)
467 return ERR_PTR(err);
468
469 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
470retry:
471 work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
472 strlen(OVL_WORKDIR_NAME));
473
474 if (!IS_ERR(work)) {
475 struct kstat stat = {
476 .mode = S_IFDIR | 0,
477 };
478
479 if (work->d_inode) {
480 err = -EEXIST;
481 if (retried)
482 goto out_dput;
483
484 retried = true;
485 ovl_cleanup(dir, work);
486 dput(work);
487 goto retry;
488 }
489
490 err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
491 if (err)
492 goto out_dput;
493 }
494out_unlock:
495 mutex_unlock(&dir->i_mutex);
496 mnt_drop_write(mnt);
497
498 return work;
499
500out_dput:
501 dput(work);
502 work = ERR_PTR(err);
503 goto out_unlock;
504}
505
506static int ovl_mount_dir(const char *name, struct path *path)
507{
508 int err;
509
510 err = kern_path(name, LOOKUP_FOLLOW, path);
511 if (err) {
512 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
513 err = -EINVAL;
514 }
515 return err;
516}
517
518static bool ovl_is_allowed_fs_type(struct dentry *root)
519{
520 const struct dentry_operations *dop = root->d_op;
521
522 /*
523 * We don't support:
524 * - automount filesystems
525 * - filesystems with revalidate (FIXME for lower layer)
526 * - filesystems with case insensitive names
527 */
528 if (dop &&
529 (dop->d_manage || dop->d_automount ||
530 dop->d_revalidate || dop->d_weak_revalidate ||
531 dop->d_compare || dop->d_hash)) {
532 return false;
533 }
534 return true;
535}
536
537/* Workdir should not be subdir of upperdir and vice versa */
538static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
539{
540 bool ok = false;
541
542 if (workdir != upperdir) {
543 ok = (lock_rename(workdir, upperdir) == NULL);
544 unlock_rename(workdir, upperdir);
545 }
546 return ok;
547}
548
549static int ovl_fill_super(struct super_block *sb, void *data, int silent)
550{
551 struct path lowerpath;
552 struct path upperpath;
553 struct path workpath;
554 struct inode *root_inode;
555 struct dentry *root_dentry;
556 struct ovl_entry *oe;
557 struct ovl_fs *ufs;
558 struct ovl_config config;
559 int err;
560
561 err = ovl_parse_opt((char *) data, &config);
562 if (err)
563 goto out;
564
565 /* FIXME: workdir is not needed for a R/O mount */
566 err = -EINVAL;
567 if (!config.upperdir || !config.lowerdir || !config.workdir) {
568 pr_err("overlayfs: missing upperdir or lowerdir or workdir\n");
569 goto out_free_config;
570 }
571
572 err = -ENOMEM;
573 ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
574 if (!ufs)
575 goto out_free_config;
576
577 oe = ovl_alloc_entry();
578 if (oe == NULL)
579 goto out_free_ufs;
580
581 err = ovl_mount_dir(config.upperdir, &upperpath);
582 if (err)
583 goto out_free_oe;
584
585 err = ovl_mount_dir(config.lowerdir, &lowerpath);
586 if (err)
587 goto out_put_upperpath;
588
589 err = ovl_mount_dir(config.workdir, &workpath);
590 if (err)
591 goto out_put_lowerpath;
592
593 err = -EINVAL;
594 if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
595 !S_ISDIR(lowerpath.dentry->d_inode->i_mode) ||
596 !S_ISDIR(workpath.dentry->d_inode->i_mode)) {
597 pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n");
598 goto out_put_workpath;
599 }
600
601 if (upperpath.mnt != workpath.mnt) {
602 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
603 goto out_put_workpath;
604 }
605 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
606 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
607 goto out_put_workpath;
608 }
609
610 if (!ovl_is_allowed_fs_type(upperpath.dentry)) {
611 pr_err("overlayfs: filesystem of upperdir is not supported\n");
612 goto out_put_workpath;
613 }
614
615 if (!ovl_is_allowed_fs_type(lowerpath.dentry)) {
616 pr_err("overlayfs: filesystem of lowerdir is not supported\n");
617 goto out_put_workpath;
618 }
619
620 ufs->upper_mnt = clone_private_mount(&upperpath);
621 err = PTR_ERR(ufs->upper_mnt);
622 if (IS_ERR(ufs->upper_mnt)) {
623 pr_err("overlayfs: failed to clone upperpath\n");
624 goto out_put_workpath;
625 }
626
627 ufs->lower_mnt = clone_private_mount(&lowerpath);
628 err = PTR_ERR(ufs->lower_mnt);
629 if (IS_ERR(ufs->lower_mnt)) {
630 pr_err("overlayfs: failed to clone lowerpath\n");
631 goto out_put_upper_mnt;
632 }
633
634 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
635 err = PTR_ERR(ufs->workdir);
636 if (IS_ERR(ufs->workdir)) {
637 pr_err("overlayfs: failed to create directory %s/%s\n",
638 config.workdir, OVL_WORKDIR_NAME);
639 goto out_put_lower_mnt;
640 }
641
642 /*
643 * Make lower_mnt R/O. That way fchmod/fchown on lower file
644 * will fail instead of modifying lower fs.
645 */
646 ufs->lower_mnt->mnt_flags |= MNT_READONLY;
647
648 /* If the upper fs is r/o, we mark overlayfs r/o too */
649 if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
650 sb->s_flags |= MS_RDONLY;
651
652 sb->s_d_op = &ovl_dentry_operations;
653
654 err = -ENOMEM;
655 root_inode = ovl_new_inode(sb, S_IFDIR, oe);
656 if (!root_inode)
657 goto out_put_workdir;
658
659 root_dentry = d_make_root(root_inode);
660 if (!root_dentry)
661 goto out_put_workdir;
662
663 mntput(upperpath.mnt);
664 mntput(lowerpath.mnt);
665 path_put(&workpath);
666
667 oe->__upperdentry = upperpath.dentry;
668 oe->lowerdentry = lowerpath.dentry;
669
670 root_dentry->d_fsdata = oe;
671
672 sb->s_op = &ovl_super_operations;
673 sb->s_root = root_dentry;
674 sb->s_fs_info = ufs;
675
676 return 0;
677
678out_put_workdir:
679 dput(ufs->workdir);
680out_put_lower_mnt:
681 mntput(ufs->lower_mnt);
682out_put_upper_mnt:
683 mntput(ufs->upper_mnt);
684out_put_workpath:
685 path_put(&workpath);
686out_put_lowerpath:
687 path_put(&lowerpath);
688out_put_upperpath:
689 path_put(&upperpath);
690out_free_oe:
691 kfree(oe);
692out_free_ufs:
693 kfree(ufs);
694out_free_config:
695 kfree(config.lowerdir);
696 kfree(config.upperdir);
697 kfree(config.workdir);
698out:
699 return err;
700}
701
702static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
703 const char *dev_name, void *raw_data)
704{
705 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
706}
707
708static struct file_system_type ovl_fs_type = {
709 .owner = THIS_MODULE,
710 .name = "overlayfs",
711 .mount = ovl_mount,
712 .kill_sb = kill_anon_super,
713};
714MODULE_ALIAS_FS("overlayfs");
715
716static int __init ovl_init(void)
717{
718 return register_filesystem(&ovl_fs_type);
719}
720
721static void __exit ovl_exit(void)
722{
723 unregister_filesystem(&ovl_fs_type);
724}
725
726module_init(ovl_init);
727module_exit(ovl_exit);