diff options
-rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
-rw-r--r-- | Documentation/filesystems/overlayfs.txt | 198 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 7 | ||||
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | fs/Kconfig | 1 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 20 | ||||
-rw-r--r-- | fs/ecryptfs/main.c | 7 | ||||
-rw-r--r-- | fs/ext4/namei.c | 95 | ||||
-rw-r--r-- | fs/internal.h | 7 | ||||
-rw-r--r-- | fs/namei.c | 41 | ||||
-rw-r--r-- | fs/namespace.c | 27 | ||||
-rw-r--r-- | fs/open.c | 23 | ||||
-rw-r--r-- | fs/overlayfs/Kconfig | 10 | ||||
-rw-r--r-- | fs/overlayfs/Makefile | 7 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 414 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 921 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 425 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 191 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 587 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 796 | ||||
-rw-r--r-- | fs/splice.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 39 | ||||
-rw-r--r-- | include/linux/mount.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 1 | ||||
-rw-r--r-- | mm/shmem.c | 36 |
26 files changed, 3809 insertions, 58 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 94d93b1f8b53..b30753cbf431 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -67,6 +67,7 @@ prototypes: | |||
67 | struct file *, unsigned open_flag, | 67 | struct file *, unsigned open_flag, |
68 | umode_t create_mode, int *opened); | 68 | umode_t create_mode, int *opened); |
69 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 69 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
70 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
70 | 71 | ||
71 | locking rules: | 72 | locking rules: |
72 | all may block | 73 | all may block |
@@ -96,6 +97,7 @@ fiemap: no | |||
96 | update_time: no | 97 | update_time: no |
97 | atomic_open: yes | 98 | atomic_open: yes |
98 | tmpfile: no | 99 | tmpfile: no |
100 | dentry_open: no | ||
99 | 101 | ||
100 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 102 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
101 | victim. | 103 | victim. |
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt new file mode 100644 index 000000000000..530850a72735 --- /dev/null +++ b/Documentation/filesystems/overlayfs.txt | |||
@@ -0,0 +1,198 @@ | |||
1 | Written by: Neil Brown <neilb@suse.de> | ||
2 | |||
3 | Overlay Filesystem | ||
4 | ================== | ||
5 | |||
6 | This document describes a prototype for a new approach to providing | ||
7 | overlay-filesystem functionality in Linux (sometimes referred to as | ||
8 | union-filesystems). An overlay-filesystem tries to present a | ||
9 | filesystem which is the result over overlaying one filesystem on top | ||
10 | of the other. | ||
11 | |||
12 | The result will inevitably fail to look exactly like a normal | ||
13 | filesystem for various technical reasons. The expectation is that | ||
14 | many use cases will be able to ignore these differences. | ||
15 | |||
16 | This approach is 'hybrid' because the objects that appear in the | ||
17 | filesystem do not all appear to belong to that filesystem. In many | ||
18 | cases an object accessed in the union will be indistinguishable | ||
19 | from accessing the corresponding object from the original filesystem. | ||
20 | This is most obvious from the 'st_dev' field returned by stat(2). | ||
21 | |||
22 | While directories will report an st_dev from the overlay-filesystem, | ||
23 | all non-directory objects will report an st_dev from the lower or | ||
24 | upper filesystem that is providing the object. Similarly st_ino will | ||
25 | only be unique when combined with st_dev, and both of these can change | ||
26 | over the lifetime of a non-directory object. Many applications and | ||
27 | tools ignore these values and will not be affected. | ||
28 | |||
29 | Upper and Lower | ||
30 | --------------- | ||
31 | |||
32 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
33 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
34 | object in the 'upper' filesystem is visible while the object in the | ||
35 | 'lower' filesystem is either hidden or, in the case of directories, | ||
36 | merged with the 'upper' object. | ||
37 | |||
38 | It would be more correct to refer to an upper and lower 'directory | ||
39 | tree' rather than 'filesystem' as it is quite possible for both | ||
40 | directory trees to be in the same filesystem and there is no | ||
41 | requirement that the root of a filesystem be given for either upper or | ||
42 | lower. | ||
43 | |||
44 | The lower filesystem can be any filesystem supported by Linux and does | ||
45 | not need to be writable. The lower filesystem can even be another | ||
46 | overlayfs. The upper filesystem will normally be writable and if it | ||
47 | is it must support the creation of trusted.* extended attributes, and | ||
48 | must provide valid d_type in readdir responses, so NFS is not suitable. | ||
49 | |||
50 | A read-only overlay of two read-only filesystems may use any | ||
51 | filesystem type. | ||
52 | |||
53 | Directories | ||
54 | ----------- | ||
55 | |||
56 | Overlaying mainly involves directories. If a given name appears in both | ||
57 | upper and lower filesystems and refers to a non-directory in either, | ||
58 | then the lower object is hidden - the name refers only to the upper | ||
59 | object. | ||
60 | |||
61 | Where both upper and lower objects are directories, a merged directory | ||
62 | is formed. | ||
63 | |||
64 | At mount time, the two directories given as mount options "lowerdir" and | ||
65 | "upperdir" are combined into a merged directory: | ||
66 | |||
67 | mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\ | ||
68 | workdir=/work /merged | ||
69 | |||
70 | The "workdir" needs to be an empty directory on the same filesystem | ||
71 | as upperdir. | ||
72 | |||
73 | Then whenever a lookup is requested in such a merged directory, the | ||
74 | lookup is performed in each actual directory and the combined result | ||
75 | is cached in the dentry belonging to the overlay filesystem. If both | ||
76 | actual lookups find directories, both are stored and a merged | ||
77 | directory is created, otherwise only one is stored: the upper if it | ||
78 | exists, else the lower. | ||
79 | |||
80 | Only the lists of names from directories are merged. Other content | ||
81 | such as metadata and extended attributes are reported for the upper | ||
82 | directory only. These attributes of the lower directory are hidden. | ||
83 | |||
84 | whiteouts and opaque directories | ||
85 | -------------------------------- | ||
86 | |||
87 | In order to support rm and rmdir without changing the lower | ||
88 | filesystem, an overlay filesystem needs to record in the upper filesystem | ||
89 | that files have been removed. This is done using whiteouts and opaque | ||
90 | directories (non-directories are always opaque). | ||
91 | |||
92 | A whiteout is created as a character device with 0/0 device number. | ||
93 | When a whiteout is found in the upper level of a merged directory, any | ||
94 | matching name in the lower level is ignored, and the whiteout itself | ||
95 | is also hidden. | ||
96 | |||
97 | A directory is made opaque by setting the xattr "trusted.overlay.opaque" | ||
98 | to "y". Where the upper filesystem contains an opaque directory, any | ||
99 | directory in the lower filesystem with the same name is ignored. | ||
100 | |||
101 | readdir | ||
102 | ------- | ||
103 | |||
104 | When a 'readdir' request is made on a merged directory, the upper and | ||
105 | lower directories are each read and the name lists merged in the | ||
106 | obvious way (upper is read first, then lower - entries that already | ||
107 | exist are not re-added). This merged name list is cached in the | ||
108 | 'struct file' and so remains as long as the file is kept open. If the | ||
109 | directory is opened and read by two processes at the same time, they | ||
110 | will each have separate caches. A seekdir to the start of the | ||
111 | directory (offset 0) followed by a readdir will cause the cache to be | ||
112 | discarded and rebuilt. | ||
113 | |||
114 | This means that changes to the merged directory do not appear while a | ||
115 | directory is being read. This is unlikely to be noticed by many | ||
116 | programs. | ||
117 | |||
118 | seek offsets are assigned sequentially when the directories are read. | ||
119 | Thus if | ||
120 | - read part of a directory | ||
121 | - remember an offset, and close the directory | ||
122 | - re-open the directory some time later | ||
123 | - seek to the remembered offset | ||
124 | |||
125 | there may be little correlation between the old and new locations in | ||
126 | the list of filenames, particularly if anything has changed in the | ||
127 | directory. | ||
128 | |||
129 | Readdir on directories that are not merged is simply handled by the | ||
130 | underlying directory (upper or lower). | ||
131 | |||
132 | |||
133 | Non-directories | ||
134 | --------------- | ||
135 | |||
136 | Objects that are not directories (files, symlinks, device-special | ||
137 | files etc.) are presented either from the upper or lower filesystem as | ||
138 | appropriate. When a file in the lower filesystem is accessed in a way | ||
139 | the requires write-access, such as opening for write access, changing | ||
140 | some metadata etc., the file is first copied from the lower filesystem | ||
141 | to the upper filesystem (copy_up). Note that creating a hard-link | ||
142 | also requires copy_up, though of course creation of a symlink does | ||
143 | not. | ||
144 | |||
145 | The copy_up may turn out to be unnecessary, for example if the file is | ||
146 | opened for read-write but the data is not modified. | ||
147 | |||
148 | The copy_up process first makes sure that the containing directory | ||
149 | exists in the upper filesystem - creating it and any parents as | ||
150 | necessary. It then creates the object with the same metadata (owner, | ||
151 | mode, mtime, symlink-target etc.) and then if the object is a file, the | ||
152 | data is copied from the lower to the upper filesystem. Finally any | ||
153 | extended attributes are copied up. | ||
154 | |||
155 | Once the copy_up is complete, the overlay filesystem simply | ||
156 | provides direct access to the newly created file in the upper | ||
157 | filesystem - future operations on the file are barely noticed by the | ||
158 | overlay filesystem (though an operation on the name of the file such as | ||
159 | rename or unlink will of course be noticed and handled). | ||
160 | |||
161 | |||
162 | Non-standard behavior | ||
163 | --------------------- | ||
164 | |||
165 | The copy_up operation essentially creates a new, identical file and | ||
166 | moves it over to the old name. The new file may be on a different | ||
167 | filesystem, so both st_dev and st_ino of the file may change. | ||
168 | |||
169 | Any open files referring to this inode will access the old data and | ||
170 | metadata. Similarly any file locks obtained before copy_up will not | ||
171 | apply to the copied up file. | ||
172 | |||
173 | On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and | ||
174 | fsetxattr(2) will fail with EROFS. | ||
175 | |||
176 | If a file with multiple hard links is copied up, then this will | ||
177 | "break" the link. Changes will not be propagated to other names | ||
178 | referring to the same inode. | ||
179 | |||
180 | Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory | ||
181 | object in overlayfs will not contain valid absolute paths, only | ||
182 | relative paths leading up to the filesystem's root. This will be | ||
183 | fixed in the future. | ||
184 | |||
185 | Some operations are not atomic, for example a crash during copy_up or | ||
186 | rename will leave the filesystem in an inconsistent state. This will | ||
187 | be addressed in the future. | ||
188 | |||
189 | Changes to underlying filesystems | ||
190 | --------------------------------- | ||
191 | |||
192 | Offline changes, when the overlay is not mounted, are allowed to either | ||
193 | the upper or the lower trees. | ||
194 | |||
195 | Changes to the underlying filesystems while part of a mounted overlay | ||
196 | filesystem are not allowed. If the underlying filesystem is changed, | ||
197 | the behavior of the overlay is undefined, though it will not result in | ||
198 | a crash or deadlock. | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fceff7c00a3c..20bf204426ca 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -364,6 +364,7 @@ struct inode_operations { | |||
364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, | 364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, |
365 | unsigned open_flag, umode_t create_mode, int *opened); | 365 | unsigned open_flag, umode_t create_mode, int *opened); |
366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
367 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
367 | }; | 368 | }; |
368 | 369 | ||
369 | Again, all methods are called without any locks being held, unless | 370 | Again, all methods are called without any locks being held, unless |
@@ -696,6 +697,12 @@ struct address_space_operations { | |||
696 | but instead uses bmap to find out where the blocks in the file | 697 | but instead uses bmap to find out where the blocks in the file |
697 | are and uses those addresses directly. | 698 | are and uses those addresses directly. |
698 | 699 | ||
700 | dentry_open: *WARNING: probably going away soon, do not use!* This is an | ||
701 | alternative to f_op->open(), the difference is that this method may open | ||
702 | a file not necessarily originating from the same filesystem as the one | ||
703 | i_op->open() was called on. It may be useful for stacking filesystems | ||
704 | which want to allow native I/O directly on underlying files. | ||
705 | |||
699 | 706 | ||
700 | invalidatepage: If a page has PagePrivate set, then invalidatepage | 707 | invalidatepage: If a page has PagePrivate set, then invalidatepage |
701 | will be called when part or all of the page is to be removed | 708 | will be called when part or all of the page is to be removed |
diff --git a/MAINTAINERS b/MAINTAINERS index a20df9bf8ab0..aa974d445bfd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -6832,6 +6832,13 @@ F: drivers/scsi/osd/ | |||
6832 | F: include/scsi/osd_* | 6832 | F: include/scsi/osd_* |
6833 | F: fs/exofs/ | 6833 | F: fs/exofs/ |
6834 | 6834 | ||
6835 | OVERLAYFS FILESYSTEM | ||
6836 | M: Miklos Szeredi <miklos@szeredi.hu> | ||
6837 | L: linux-fsdevel@vger.kernel.org | ||
6838 | S: Supported | ||
6839 | F: fs/overlayfs/* | ||
6840 | F: Documentation/filesystems/overlayfs.txt | ||
6841 | |||
6835 | P54 WIRELESS DRIVER | 6842 | P54 WIRELESS DRIVER |
6836 | M: Christian Lamparter <chunkeey@googlemail.com> | 6843 | M: Christian Lamparter <chunkeey@googlemail.com> |
6837 | L: linux-wireless@vger.kernel.org | 6844 | L: linux-wireless@vger.kernel.org |
diff --git a/fs/Kconfig b/fs/Kconfig index db5dc1598716..664991afe0c0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -67,6 +67,7 @@ source "fs/quota/Kconfig" | |||
67 | 67 | ||
68 | source "fs/autofs4/Kconfig" | 68 | source "fs/autofs4/Kconfig" |
69 | source "fs/fuse/Kconfig" | 69 | source "fs/fuse/Kconfig" |
70 | source "fs/overlayfs/Kconfig" | ||
70 | 71 | ||
71 | menu "Caches" | 72 | menu "Caches" |
72 | 73 | ||
diff --git a/fs/Makefile b/fs/Makefile index 90c88529892b..34a1b9dea6dd 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ | |||
104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
105 | obj-$(CONFIG_ADFS_FS) += adfs/ | 105 | obj-$(CONFIG_ADFS_FS) += adfs/ |
106 | obj-$(CONFIG_FUSE_FS) += fuse/ | 106 | obj-$(CONFIG_FUSE_FS) += fuse/ |
107 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ | ||
107 | obj-$(CONFIG_UDF_FS) += udf/ | 108 | obj-$(CONFIG_UDF_FS) += udf/ |
108 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ | 109 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
109 | obj-$(CONFIG_OMFS_FS) += omfs/ | 110 | obj-$(CONFIG_OMFS_FS) += omfs/ |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d2b76e29d3b..4399f0c3a4ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -765,23 +765,6 @@ out: | |||
765 | return ret; | 765 | return ret; |
766 | } | 766 | } |
767 | 767 | ||
768 | /* copy of check_sticky in fs/namei.c() | ||
769 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
770 | * minimal. | ||
771 | */ | ||
772 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
773 | { | ||
774 | kuid_t fsuid = current_fsuid(); | ||
775 | |||
776 | if (!(dir->i_mode & S_ISVTX)) | ||
777 | return 0; | ||
778 | if (uid_eq(inode->i_uid, fsuid)) | ||
779 | return 0; | ||
780 | if (uid_eq(dir->i_uid, fsuid)) | ||
781 | return 0; | ||
782 | return !capable(CAP_FOWNER); | ||
783 | } | ||
784 | |||
785 | /* copy of may_delete in fs/namei.c() | 768 | /* copy of may_delete in fs/namei.c() |
786 | * Check whether we can remove a link victim from directory dir, check | 769 | * Check whether we can remove a link victim from directory dir, check |
787 | * whether the type of victim is right. | 770 | * whether the type of victim is right. |
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) | |||
817 | return error; | 800 | return error; |
818 | if (IS_APPEND(dir)) | 801 | if (IS_APPEND(dir)) |
819 | return -EPERM; | 802 | return -EPERM; |
820 | if (btrfs_check_sticky(dir, victim->d_inode)|| | 803 | if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || |
821 | IS_APPEND(victim->d_inode)|| | ||
822 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | 804 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) |
823 | return -EPERM; | 805 | return -EPERM; |
824 | if (isdir) { | 806 | if (isdir) { |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1b119d3bf924..c4cd1fd86cc2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; | 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; |
569 | s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; | ||
570 | |||
571 | rc = -EINVAL; | ||
572 | if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
573 | pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); | ||
574 | goto out_free; | ||
575 | } | ||
569 | 576 | ||
570 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); | 577 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); |
571 | rc = PTR_ERR(inode); | 578 | rc = PTR_ERR(inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 603e4ebbd0ac..aba86e8ef1ef 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -3190,6 +3190,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
3190 | } | 3190 | } |
3191 | } | 3191 | } |
3192 | 3192 | ||
3193 | static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, | ||
3194 | int credits, handle_t **h) | ||
3195 | { | ||
3196 | struct inode *wh; | ||
3197 | handle_t *handle; | ||
3198 | int retries = 0; | ||
3199 | |||
3200 | /* | ||
3201 | * for inode block, sb block, group summaries, | ||
3202 | * and inode bitmap | ||
3203 | */ | ||
3204 | credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + | ||
3205 | EXT4_XATTR_TRANS_BLOCKS + 4); | ||
3206 | retry: | ||
3207 | wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE, | ||
3208 | &ent->dentry->d_name, 0, NULL, | ||
3209 | EXT4_HT_DIR, credits); | ||
3210 | |||
3211 | handle = ext4_journal_current_handle(); | ||
3212 | if (IS_ERR(wh)) { | ||
3213 | if (handle) | ||
3214 | ext4_journal_stop(handle); | ||
3215 | if (PTR_ERR(wh) == -ENOSPC && | ||
3216 | ext4_should_retry_alloc(ent->dir->i_sb, &retries)) | ||
3217 | goto retry; | ||
3218 | } else { | ||
3219 | *h = handle; | ||
3220 | init_special_inode(wh, wh->i_mode, WHITEOUT_DEV); | ||
3221 | wh->i_op = &ext4_special_inode_operations; | ||
3222 | } | ||
3223 | return wh; | ||
3224 | } | ||
3225 | |||
3193 | /* | 3226 | /* |
3194 | * Anybody can rename anything with this: the permission checks are left to the | 3227 | * Anybody can rename anything with this: the permission checks are left to the |
3195 | * higher-level routines. | 3228 | * higher-level routines. |
@@ -3199,7 +3232,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
3199 | * This comes from rename(const char *oldpath, const char *newpath) | 3232 | * This comes from rename(const char *oldpath, const char *newpath) |
3200 | */ | 3233 | */ |
3201 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | 3234 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
3202 | struct inode *new_dir, struct dentry *new_dentry) | 3235 | struct inode *new_dir, struct dentry *new_dentry, |
3236 | unsigned int flags) | ||
3203 | { | 3237 | { |
3204 | handle_t *handle = NULL; | 3238 | handle_t *handle = NULL; |
3205 | struct ext4_renament old = { | 3239 | struct ext4_renament old = { |
@@ -3214,6 +3248,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3214 | }; | 3248 | }; |
3215 | int force_reread; | 3249 | int force_reread; |
3216 | int retval; | 3250 | int retval; |
3251 | struct inode *whiteout = NULL; | ||
3252 | int credits; | ||
3253 | u8 old_file_type; | ||
3217 | 3254 | ||
3218 | dquot_initialize(old.dir); | 3255 | dquot_initialize(old.dir); |
3219 | dquot_initialize(new.dir); | 3256 | dquot_initialize(new.dir); |
@@ -3252,11 +3289,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3252 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) | 3289 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
3253 | ext4_alloc_da_blocks(old.inode); | 3290 | ext4_alloc_da_blocks(old.inode); |
3254 | 3291 | ||
3255 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | 3292 | credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
3256 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | 3293 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
3257 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3294 | if (!(flags & RENAME_WHITEOUT)) { |
3258 | if (IS_ERR(handle)) | 3295 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); |
3259 | return PTR_ERR(handle); | 3296 | if (IS_ERR(handle)) |
3297 | return PTR_ERR(handle); | ||
3298 | } else { | ||
3299 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); | ||
3300 | if (IS_ERR(whiteout)) | ||
3301 | return PTR_ERR(whiteout); | ||
3302 | } | ||
3260 | 3303 | ||
3261 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3304 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
3262 | ext4_handle_sync(handle); | 3305 | ext4_handle_sync(handle); |
@@ -3284,13 +3327,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3284 | */ | 3327 | */ |
3285 | force_reread = (new.dir->i_ino == old.dir->i_ino && | 3328 | force_reread = (new.dir->i_ino == old.dir->i_ino && |
3286 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); | 3329 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); |
3330 | |||
3331 | old_file_type = old.de->file_type; | ||
3332 | if (whiteout) { | ||
3333 | /* | ||
3334 | * Do this before adding a new entry, so the old entry is sure | ||
3335 | * to be still pointing to the valid old entry. | ||
3336 | */ | ||
3337 | retval = ext4_setent(handle, &old, whiteout->i_ino, | ||
3338 | EXT4_FT_CHRDEV); | ||
3339 | if (retval) | ||
3340 | goto end_rename; | ||
3341 | ext4_mark_inode_dirty(handle, whiteout); | ||
3342 | } | ||
3287 | if (!new.bh) { | 3343 | if (!new.bh) { |
3288 | retval = ext4_add_entry(handle, new.dentry, old.inode); | 3344 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
3289 | if (retval) | 3345 | if (retval) |
3290 | goto end_rename; | 3346 | goto end_rename; |
3291 | } else { | 3347 | } else { |
3292 | retval = ext4_setent(handle, &new, | 3348 | retval = ext4_setent(handle, &new, |
3293 | old.inode->i_ino, old.de->file_type); | 3349 | old.inode->i_ino, old_file_type); |
3294 | if (retval) | 3350 | if (retval) |
3295 | goto end_rename; | 3351 | goto end_rename; |
3296 | } | 3352 | } |
@@ -3305,10 +3361,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3305 | old.inode->i_ctime = ext4_current_time(old.inode); | 3361 | old.inode->i_ctime = ext4_current_time(old.inode); |
3306 | ext4_mark_inode_dirty(handle, old.inode); | 3362 | ext4_mark_inode_dirty(handle, old.inode); |
3307 | 3363 | ||
3308 | /* | 3364 | if (!whiteout) { |
3309 | * ok, that's it | 3365 | /* |
3310 | */ | 3366 | * ok, that's it |
3311 | ext4_rename_delete(handle, &old, force_reread); | 3367 | */ |
3368 | ext4_rename_delete(handle, &old, force_reread); | ||
3369 | } | ||
3312 | 3370 | ||
3313 | if (new.inode) { | 3371 | if (new.inode) { |
3314 | ext4_dec_count(handle, new.inode); | 3372 | ext4_dec_count(handle, new.inode); |
@@ -3344,6 +3402,12 @@ end_rename: | |||
3344 | brelse(old.dir_bh); | 3402 | brelse(old.dir_bh); |
3345 | brelse(old.bh); | 3403 | brelse(old.bh); |
3346 | brelse(new.bh); | 3404 | brelse(new.bh); |
3405 | if (whiteout) { | ||
3406 | if (retval) | ||
3407 | drop_nlink(whiteout); | ||
3408 | unlock_new_inode(whiteout); | ||
3409 | iput(whiteout); | ||
3410 | } | ||
3347 | if (handle) | 3411 | if (handle) |
3348 | ext4_journal_stop(handle); | 3412 | ext4_journal_stop(handle); |
3349 | return retval; | 3413 | return retval; |
@@ -3476,18 +3540,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
3476 | struct inode *new_dir, struct dentry *new_dentry, | 3540 | struct inode *new_dir, struct dentry *new_dentry, |
3477 | unsigned int flags) | 3541 | unsigned int flags) |
3478 | { | 3542 | { |
3479 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 3543 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
3480 | return -EINVAL; | 3544 | return -EINVAL; |
3481 | 3545 | ||
3482 | if (flags & RENAME_EXCHANGE) { | 3546 | if (flags & RENAME_EXCHANGE) { |
3483 | return ext4_cross_rename(old_dir, old_dentry, | 3547 | return ext4_cross_rename(old_dir, old_dentry, |
3484 | new_dir, new_dentry); | 3548 | new_dir, new_dentry); |
3485 | } | 3549 | } |
3486 | /* | 3550 | |
3487 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | 3551 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags); |
3488 | * is equivalent to regular rename. | ||
3489 | */ | ||
3490 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
3491 | } | 3552 | } |
3492 | 3553 | ||
3493 | /* | 3554 | /* |
diff --git a/fs/internal.h b/fs/internal.h index 9477f8f6aefc..757ba2abf21e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void); | |||
47 | /* | 47 | /* |
48 | * namei.c | 48 | * namei.c |
49 | */ | 49 | */ |
50 | extern int __inode_permission(struct inode *, int); | ||
51 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); | 50 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); |
52 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, | 51 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, |
53 | const char *, unsigned int, struct path *); | 52 | const char *, unsigned int, struct path *); |
@@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | |||
139 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | 138 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); |
140 | 139 | ||
141 | /* | 140 | /* |
142 | * splice.c | ||
143 | */ | ||
144 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
145 | loff_t *opos, size_t len, unsigned int flags); | ||
146 | |||
147 | /* | ||
148 | * pipe.c | 141 | * pipe.c |
149 | */ | 142 | */ |
150 | extern const struct file_operations pipefifo_fops; | 143 | extern const struct file_operations pipefifo_fops; |
diff --git a/fs/namei.c b/fs/namei.c index 43927d14db67..42df664e95e5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask) | |||
416 | 416 | ||
417 | return security_inode_permission(inode, mask); | 417 | return security_inode_permission(inode, mask); |
418 | } | 418 | } |
419 | EXPORT_SYMBOL(__inode_permission); | ||
419 | 420 | ||
420 | /** | 421 | /** |
421 | * sb_permission - Check superblock-level permissions | 422 | * sb_permission - Check superblock-level permissions |
@@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, | |||
2383 | } | 2384 | } |
2384 | EXPORT_SYMBOL(kern_path_mountpoint); | 2385 | EXPORT_SYMBOL(kern_path_mountpoint); |
2385 | 2386 | ||
2386 | /* | 2387 | int __check_sticky(struct inode *dir, struct inode *inode) |
2387 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
2388 | * minimal. | ||
2389 | */ | ||
2390 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
2391 | { | 2388 | { |
2392 | kuid_t fsuid = current_fsuid(); | 2389 | kuid_t fsuid = current_fsuid(); |
2393 | 2390 | ||
2394 | if (!(dir->i_mode & S_ISVTX)) | ||
2395 | return 0; | ||
2396 | if (uid_eq(inode->i_uid, fsuid)) | 2391 | if (uid_eq(inode->i_uid, fsuid)) |
2397 | return 0; | 2392 | return 0; |
2398 | if (uid_eq(dir->i_uid, fsuid)) | 2393 | if (uid_eq(dir->i_uid, fsuid)) |
2399 | return 0; | 2394 | return 0; |
2400 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); | 2395 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); |
2401 | } | 2396 | } |
2397 | EXPORT_SYMBOL(__check_sticky); | ||
2402 | 2398 | ||
2403 | /* | 2399 | /* |
2404 | * Check whether we can remove a link victim from directory dir, check | 2400 | * Check whether we can remove a link victim from directory dir, check |
@@ -3064,9 +3060,12 @@ finish_open_created: | |||
3064 | error = may_open(&nd->path, acc_mode, open_flag); | 3060 | error = may_open(&nd->path, acc_mode, open_flag); |
3065 | if (error) | 3061 | if (error) |
3066 | goto out; | 3062 | goto out; |
3067 | file->f_path.mnt = nd->path.mnt; | 3063 | |
3068 | error = finish_open(file, nd->path.dentry, NULL, opened); | 3064 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
3069 | if (error) { | 3065 | error = vfs_open(&nd->path, file, current_cred()); |
3066 | if (!error) { | ||
3067 | *opened |= FILE_OPENED; | ||
3068 | } else { | ||
3070 | if (error == -EOPENSTALE) | 3069 | if (error == -EOPENSTALE) |
3071 | goto stale_open; | 3070 | goto stale_open; |
3072 | goto out; | 3071 | goto out; |
@@ -4210,12 +4209,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, | |||
4210 | bool should_retry = false; | 4209 | bool should_retry = false; |
4211 | int error; | 4210 | int error; |
4212 | 4211 | ||
4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 4212 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
4214 | return -EINVAL; | 4213 | return -EINVAL; |
4215 | 4214 | ||
4216 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | 4215 | if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && |
4216 | (flags & RENAME_EXCHANGE)) | ||
4217 | return -EINVAL; | 4217 | return -EINVAL; |
4218 | 4218 | ||
4219 | if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) | ||
4220 | return -EPERM; | ||
4221 | |||
4219 | retry: | 4222 | retry: |
4220 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4223 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
4221 | if (IS_ERR(from)) { | 4224 | if (IS_ERR(from)) { |
@@ -4347,6 +4350,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna | |||
4347 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 4350 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
4348 | } | 4351 | } |
4349 | 4352 | ||
4353 | int vfs_whiteout(struct inode *dir, struct dentry *dentry) | ||
4354 | { | ||
4355 | int error = may_create(dir, dentry); | ||
4356 | if (error) | ||
4357 | return error; | ||
4358 | |||
4359 | if (!dir->i_op->mknod) | ||
4360 | return -EPERM; | ||
4361 | |||
4362 | return dir->i_op->mknod(dir, dentry, | ||
4363 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
4364 | } | ||
4365 | EXPORT_SYMBOL(vfs_whiteout); | ||
4366 | |||
4350 | int readlink_copy(char __user *buffer, int buflen, const char *link) | 4367 | int readlink_copy(char __user *buffer, int buflen, const char *link) |
4351 | { | 4368 | { |
4352 | int len = PTR_ERR(link); | 4369 | int len = PTR_ERR(link); |
diff --git a/fs/namespace.c b/fs/namespace.c index fbba8b17330d..5b66b2b3624d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
1686 | namespace_unlock(); | 1686 | namespace_unlock(); |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | /** | ||
1690 | * clone_private_mount - create a private clone of a path | ||
1691 | * | ||
1692 | * This creates a new vfsmount, which will be the clone of @path. The new will | ||
1693 | * not be attached anywhere in the namespace and will be private (i.e. changes | ||
1694 | * to the originating mount won't be propagated into this). | ||
1695 | * | ||
1696 | * Release with mntput(). | ||
1697 | */ | ||
1698 | struct vfsmount *clone_private_mount(struct path *path) | ||
1699 | { | ||
1700 | struct mount *old_mnt = real_mount(path->mnt); | ||
1701 | struct mount *new_mnt; | ||
1702 | |||
1703 | if (IS_MNT_UNBINDABLE(old_mnt)) | ||
1704 | return ERR_PTR(-EINVAL); | ||
1705 | |||
1706 | down_read(&namespace_sem); | ||
1707 | new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); | ||
1708 | up_read(&namespace_sem); | ||
1709 | if (IS_ERR(new_mnt)) | ||
1710 | return ERR_CAST(new_mnt); | ||
1711 | |||
1712 | return &new_mnt->mnt; | ||
1713 | } | ||
1714 | EXPORT_SYMBOL_GPL(clone_private_mount); | ||
1715 | |||
1689 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | 1716 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, |
1690 | struct vfsmount *root) | 1717 | struct vfsmount *root) |
1691 | { | 1718 | { |
@@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags, | |||
823 | f = get_empty_filp(); | 823 | f = get_empty_filp(); |
824 | if (!IS_ERR(f)) { | 824 | if (!IS_ERR(f)) { |
825 | f->f_flags = flags; | 825 | f->f_flags = flags; |
826 | f->f_path = *path; | 826 | error = vfs_open(path, f, cred); |
827 | error = do_dentry_open(f, NULL, cred); | ||
828 | if (!error) { | 827 | if (!error) { |
829 | /* from now on we need fput() to dispose of f */ | 828 | /* from now on we need fput() to dispose of f */ |
830 | error = open_check_o_direct(f); | 829 | error = open_check_o_direct(f); |
@@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags, | |||
841 | } | 840 | } |
842 | EXPORT_SYMBOL(dentry_open); | 841 | EXPORT_SYMBOL(dentry_open); |
843 | 842 | ||
843 | /** | ||
844 | * vfs_open - open the file at the given path | ||
845 | * @path: path to open | ||
846 | * @filp: newly allocated file with f_flag initialized | ||
847 | * @cred: credentials to use | ||
848 | */ | ||
849 | int vfs_open(const struct path *path, struct file *filp, | ||
850 | const struct cred *cred) | ||
851 | { | ||
852 | struct inode *inode = path->dentry->d_inode; | ||
853 | |||
854 | if (inode->i_op->dentry_open) | ||
855 | return inode->i_op->dentry_open(path->dentry, filp, cred); | ||
856 | else { | ||
857 | filp->f_path = *path; | ||
858 | return do_dentry_open(filp, NULL, cred); | ||
859 | } | ||
860 | } | ||
861 | EXPORT_SYMBOL(vfs_open); | ||
862 | |||
844 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 863 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
845 | { | 864 | { |
846 | int lookup_flags = 0; | 865 | int lookup_flags = 0; |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig new file mode 100644 index 000000000000..e60125976873 --- /dev/null +++ b/fs/overlayfs/Kconfig | |||
@@ -0,0 +1,10 @@ | |||
1 | config OVERLAYFS_FS | ||
2 | tristate "Overlay filesystem support" | ||
3 | help | ||
4 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
5 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
6 | object in the 'upper' filesystem is visible while the object in the | ||
7 | 'lower' filesystem is either hidden or, in the case of directories, | ||
8 | merged with the 'upper' object. | ||
9 | |||
10 | For more information see Documentation/filesystems/overlayfs.txt | ||
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile new file mode 100644 index 000000000000..8f91889480d0 --- /dev/null +++ b/fs/overlayfs/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the overlay filesystem. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o | ||
6 | |||
7 | overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o | ||
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c new file mode 100644 index 000000000000..ea10a8719107 --- /dev/null +++ b/fs/overlayfs/copy_up.c | |||
@@ -0,0 +1,414 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/file.h> | ||
13 | #include <linux/splice.h> | ||
14 | #include <linux/xattr.h> | ||
15 | #include <linux/security.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/namei.h> | ||
19 | #include "overlayfs.h" | ||
20 | |||
21 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | ||
22 | |||
23 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) | ||
24 | { | ||
25 | ssize_t list_size, size; | ||
26 | char *buf, *name, *value; | ||
27 | int error; | ||
28 | |||
29 | if (!old->d_inode->i_op->getxattr || | ||
30 | !new->d_inode->i_op->getxattr) | ||
31 | return 0; | ||
32 | |||
33 | list_size = vfs_listxattr(old, NULL, 0); | ||
34 | if (list_size <= 0) { | ||
35 | if (list_size == -EOPNOTSUPP) | ||
36 | return 0; | ||
37 | return list_size; | ||
38 | } | ||
39 | |||
40 | buf = kzalloc(list_size, GFP_KERNEL); | ||
41 | if (!buf) | ||
42 | return -ENOMEM; | ||
43 | |||
44 | error = -ENOMEM; | ||
45 | value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); | ||
46 | if (!value) | ||
47 | goto out; | ||
48 | |||
49 | list_size = vfs_listxattr(old, buf, list_size); | ||
50 | if (list_size <= 0) { | ||
51 | error = list_size; | ||
52 | goto out_free_value; | ||
53 | } | ||
54 | |||
55 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { | ||
56 | size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); | ||
57 | if (size <= 0) { | ||
58 | error = size; | ||
59 | goto out_free_value; | ||
60 | } | ||
61 | error = vfs_setxattr(new, name, value, size, 0); | ||
62 | if (error) | ||
63 | goto out_free_value; | ||
64 | } | ||
65 | |||
66 | out_free_value: | ||
67 | kfree(value); | ||
68 | out: | ||
69 | kfree(buf); | ||
70 | return error; | ||
71 | } | ||
72 | |||
73 | static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) | ||
74 | { | ||
75 | struct file *old_file; | ||
76 | struct file *new_file; | ||
77 | loff_t old_pos = 0; | ||
78 | loff_t new_pos = 0; | ||
79 | int error = 0; | ||
80 | |||
81 | if (len == 0) | ||
82 | return 0; | ||
83 | |||
84 | old_file = ovl_path_open(old, O_RDONLY); | ||
85 | if (IS_ERR(old_file)) | ||
86 | return PTR_ERR(old_file); | ||
87 | |||
88 | new_file = ovl_path_open(new, O_WRONLY); | ||
89 | if (IS_ERR(new_file)) { | ||
90 | error = PTR_ERR(new_file); | ||
91 | goto out_fput; | ||
92 | } | ||
93 | |||
94 | /* FIXME: copy up sparse files efficiently */ | ||
95 | while (len) { | ||
96 | size_t this_len = OVL_COPY_UP_CHUNK_SIZE; | ||
97 | long bytes; | ||
98 | |||
99 | if (len < this_len) | ||
100 | this_len = len; | ||
101 | |||
102 | if (signal_pending_state(TASK_KILLABLE, current)) { | ||
103 | error = -EINTR; | ||
104 | break; | ||
105 | } | ||
106 | |||
107 | bytes = do_splice_direct(old_file, &old_pos, | ||
108 | new_file, &new_pos, | ||
109 | this_len, SPLICE_F_MOVE); | ||
110 | if (bytes <= 0) { | ||
111 | error = bytes; | ||
112 | break; | ||
113 | } | ||
114 | WARN_ON(old_pos != new_pos); | ||
115 | |||
116 | len -= bytes; | ||
117 | } | ||
118 | |||
119 | fput(new_file); | ||
120 | out_fput: | ||
121 | fput(old_file); | ||
122 | return error; | ||
123 | } | ||
124 | |||
125 | static char *ovl_read_symlink(struct dentry *realdentry) | ||
126 | { | ||
127 | int res; | ||
128 | char *buf; | ||
129 | struct inode *inode = realdentry->d_inode; | ||
130 | mm_segment_t old_fs; | ||
131 | |||
132 | res = -EINVAL; | ||
133 | if (!inode->i_op->readlink) | ||
134 | goto err; | ||
135 | |||
136 | res = -ENOMEM; | ||
137 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
138 | if (!buf) | ||
139 | goto err; | ||
140 | |||
141 | old_fs = get_fs(); | ||
142 | set_fs(get_ds()); | ||
143 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
144 | res = inode->i_op->readlink(realdentry, | ||
145 | (char __user *)buf, PAGE_SIZE - 1); | ||
146 | set_fs(old_fs); | ||
147 | if (res < 0) { | ||
148 | free_page((unsigned long) buf); | ||
149 | goto err; | ||
150 | } | ||
151 | buf[res] = '\0'; | ||
152 | |||
153 | return buf; | ||
154 | |||
155 | err: | ||
156 | return ERR_PTR(res); | ||
157 | } | ||
158 | |||
159 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) | ||
160 | { | ||
161 | struct iattr attr = { | ||
162 | .ia_valid = | ||
163 | ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, | ||
164 | .ia_atime = stat->atime, | ||
165 | .ia_mtime = stat->mtime, | ||
166 | }; | ||
167 | |||
168 | return notify_change(upperdentry, &attr, NULL); | ||
169 | } | ||
170 | |||
171 | int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) | ||
172 | { | ||
173 | int err = 0; | ||
174 | |||
175 | if (!S_ISLNK(stat->mode)) { | ||
176 | struct iattr attr = { | ||
177 | .ia_valid = ATTR_MODE, | ||
178 | .ia_mode = stat->mode, | ||
179 | }; | ||
180 | err = notify_change(upperdentry, &attr, NULL); | ||
181 | } | ||
182 | if (!err) { | ||
183 | struct iattr attr = { | ||
184 | .ia_valid = ATTR_UID | ATTR_GID, | ||
185 | .ia_uid = stat->uid, | ||
186 | .ia_gid = stat->gid, | ||
187 | }; | ||
188 | err = notify_change(upperdentry, &attr, NULL); | ||
189 | } | ||
190 | if (!err) | ||
191 | ovl_set_timestamps(upperdentry, stat); | ||
192 | |||
193 | return err; | ||
194 | |||
195 | } | ||
196 | |||
197 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | ||
198 | struct dentry *dentry, struct path *lowerpath, | ||
199 | struct kstat *stat, struct iattr *attr, | ||
200 | const char *link) | ||
201 | { | ||
202 | struct inode *wdir = workdir->d_inode; | ||
203 | struct inode *udir = upperdir->d_inode; | ||
204 | struct dentry *newdentry = NULL; | ||
205 | struct dentry *upper = NULL; | ||
206 | umode_t mode = stat->mode; | ||
207 | int err; | ||
208 | |||
209 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
210 | err = PTR_ERR(newdentry); | ||
211 | if (IS_ERR(newdentry)) | ||
212 | goto out; | ||
213 | |||
214 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
215 | dentry->d_name.len); | ||
216 | err = PTR_ERR(upper); | ||
217 | if (IS_ERR(upper)) | ||
218 | goto out1; | ||
219 | |||
220 | /* Can't properly set mode on creation because of the umask */ | ||
221 | stat->mode &= S_IFMT; | ||
222 | err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); | ||
223 | stat->mode = mode; | ||
224 | if (err) | ||
225 | goto out2; | ||
226 | |||
227 | if (S_ISREG(stat->mode)) { | ||
228 | struct path upperpath; | ||
229 | ovl_path_upper(dentry, &upperpath); | ||
230 | BUG_ON(upperpath.dentry != NULL); | ||
231 | upperpath.dentry = newdentry; | ||
232 | |||
233 | err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); | ||
234 | if (err) | ||
235 | goto out_cleanup; | ||
236 | } | ||
237 | |||
238 | err = ovl_copy_xattr(lowerpath->dentry, newdentry); | ||
239 | if (err) | ||
240 | goto out_cleanup; | ||
241 | |||
242 | mutex_lock(&newdentry->d_inode->i_mutex); | ||
243 | err = ovl_set_attr(newdentry, stat); | ||
244 | if (!err && attr) | ||
245 | err = notify_change(newdentry, attr, NULL); | ||
246 | mutex_unlock(&newdentry->d_inode->i_mutex); | ||
247 | if (err) | ||
248 | goto out_cleanup; | ||
249 | |||
250 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
251 | if (err) | ||
252 | goto out_cleanup; | ||
253 | |||
254 | ovl_dentry_update(dentry, newdentry); | ||
255 | newdentry = NULL; | ||
256 | |||
257 | /* | ||
258 | * Non-directores become opaque when copied up. | ||
259 | */ | ||
260 | if (!S_ISDIR(stat->mode)) | ||
261 | ovl_dentry_set_opaque(dentry, true); | ||
262 | out2: | ||
263 | dput(upper); | ||
264 | out1: | ||
265 | dput(newdentry); | ||
266 | out: | ||
267 | return err; | ||
268 | |||
269 | out_cleanup: | ||
270 | ovl_cleanup(wdir, newdentry); | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Copy up a single dentry | ||
276 | * | ||
277 | * Directory renames only allowed on "pure upper" (already created on | ||
278 | * upper filesystem, never copied up). Directories which are on lower or | ||
279 | * are merged may not be renamed. For these -EXDEV is returned and | ||
280 | * userspace has to deal with it. This means, when copying up a | ||
281 | * directory we can rely on it and ancestors being stable. | ||
282 | * | ||
283 | * Non-directory renames start with copy up of source if necessary. The | ||
284 | * actual rename will only proceed once the copy up was successful. Copy | ||
285 | * up uses upper parent i_mutex for exclusion. Since rename can change | ||
286 | * d_parent it is possible that the copy up will lock the old parent. At | ||
287 | * that point the file will have already been copied up anyway. | ||
288 | */ | ||
289 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
290 | struct path *lowerpath, struct kstat *stat, | ||
291 | struct iattr *attr) | ||
292 | { | ||
293 | struct dentry *workdir = ovl_workdir(dentry); | ||
294 | int err; | ||
295 | struct kstat pstat; | ||
296 | struct path parentpath; | ||
297 | struct dentry *upperdir; | ||
298 | struct dentry *upperdentry; | ||
299 | const struct cred *old_cred; | ||
300 | struct cred *override_cred; | ||
301 | char *link = NULL; | ||
302 | |||
303 | ovl_path_upper(parent, &parentpath); | ||
304 | upperdir = parentpath.dentry; | ||
305 | |||
306 | err = vfs_getattr(&parentpath, &pstat); | ||
307 | if (err) | ||
308 | return err; | ||
309 | |||
310 | if (S_ISLNK(stat->mode)) { | ||
311 | link = ovl_read_symlink(lowerpath->dentry); | ||
312 | if (IS_ERR(link)) | ||
313 | return PTR_ERR(link); | ||
314 | } | ||
315 | |||
316 | err = -ENOMEM; | ||
317 | override_cred = prepare_creds(); | ||
318 | if (!override_cred) | ||
319 | goto out_free_link; | ||
320 | |||
321 | override_cred->fsuid = stat->uid; | ||
322 | override_cred->fsgid = stat->gid; | ||
323 | /* | ||
324 | * CAP_SYS_ADMIN for copying up extended attributes | ||
325 | * CAP_DAC_OVERRIDE for create | ||
326 | * CAP_FOWNER for chmod, timestamp update | ||
327 | * CAP_FSETID for chmod | ||
328 | * CAP_CHOWN for chown | ||
329 | * CAP_MKNOD for mknod | ||
330 | */ | ||
331 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
332 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
333 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
334 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
335 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
336 | cap_raise(override_cred->cap_effective, CAP_MKNOD); | ||
337 | old_cred = override_creds(override_cred); | ||
338 | |||
339 | err = -EIO; | ||
340 | if (lock_rename(workdir, upperdir) != NULL) { | ||
341 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
342 | goto out_unlock; | ||
343 | } | ||
344 | upperdentry = ovl_dentry_upper(dentry); | ||
345 | if (upperdentry) { | ||
346 | unlock_rename(workdir, upperdir); | ||
347 | err = 0; | ||
348 | /* Raced with another copy-up? Do the setattr here */ | ||
349 | if (attr) { | ||
350 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
351 | err = notify_change(upperdentry, attr, NULL); | ||
352 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
353 | } | ||
354 | goto out_put_cred; | ||
355 | } | ||
356 | |||
357 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | ||
358 | stat, attr, link); | ||
359 | if (!err) { | ||
360 | /* Restore timestamps on parent (best effort) */ | ||
361 | ovl_set_timestamps(upperdir, &pstat); | ||
362 | } | ||
363 | out_unlock: | ||
364 | unlock_rename(workdir, upperdir); | ||
365 | out_put_cred: | ||
366 | revert_creds(old_cred); | ||
367 | put_cred(override_cred); | ||
368 | |||
369 | out_free_link: | ||
370 | if (link) | ||
371 | free_page((unsigned long) link); | ||
372 | |||
373 | return err; | ||
374 | } | ||
375 | |||
376 | int ovl_copy_up(struct dentry *dentry) | ||
377 | { | ||
378 | int err; | ||
379 | |||
380 | err = 0; | ||
381 | while (!err) { | ||
382 | struct dentry *next; | ||
383 | struct dentry *parent; | ||
384 | struct path lowerpath; | ||
385 | struct kstat stat; | ||
386 | enum ovl_path_type type = ovl_path_type(dentry); | ||
387 | |||
388 | if (type != OVL_PATH_LOWER) | ||
389 | break; | ||
390 | |||
391 | next = dget(dentry); | ||
392 | /* find the topmost dentry not yet copied up */ | ||
393 | for (;;) { | ||
394 | parent = dget_parent(next); | ||
395 | |||
396 | type = ovl_path_type(parent); | ||
397 | if (type != OVL_PATH_LOWER) | ||
398 | break; | ||
399 | |||
400 | dput(next); | ||
401 | next = parent; | ||
402 | } | ||
403 | |||
404 | ovl_path_lower(next, &lowerpath); | ||
405 | err = vfs_getattr(&lowerpath, &stat); | ||
406 | if (!err) | ||
407 | err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); | ||
408 | |||
409 | dput(parent); | ||
410 | dput(next); | ||
411 | } | ||
412 | |||
413 | return err; | ||
414 | } | ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c new file mode 100644 index 000000000000..15cd91ad9940 --- /dev/null +++ b/fs/overlayfs/dir.c | |||
@@ -0,0 +1,921 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/namei.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include <linux/security.h> | ||
14 | #include <linux/cred.h> | ||
15 | #include "overlayfs.h" | ||
16 | |||
17 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | ||
18 | { | ||
19 | int err; | ||
20 | |||
21 | dget(wdentry); | ||
22 | if (S_ISDIR(wdentry->d_inode->i_mode)) | ||
23 | err = ovl_do_rmdir(wdir, wdentry); | ||
24 | else | ||
25 | err = ovl_do_unlink(wdir, wdentry); | ||
26 | dput(wdentry); | ||
27 | |||
28 | if (err) { | ||
29 | pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", | ||
30 | wdentry, err); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) | ||
35 | { | ||
36 | struct dentry *temp; | ||
37 | char name[20]; | ||
38 | |||
39 | snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); | ||
40 | |||
41 | temp = lookup_one_len(name, workdir, strlen(name)); | ||
42 | if (!IS_ERR(temp) && temp->d_inode) { | ||
43 | pr_err("overlayfs: workdir/%s already exists\n", name); | ||
44 | dput(temp); | ||
45 | temp = ERR_PTR(-EIO); | ||
46 | } | ||
47 | |||
48 | return temp; | ||
49 | } | ||
50 | |||
51 | /* caller holds i_mutex on workdir */ | ||
52 | static struct dentry *ovl_whiteout(struct dentry *workdir, | ||
53 | struct dentry *dentry) | ||
54 | { | ||
55 | int err; | ||
56 | struct dentry *whiteout; | ||
57 | struct inode *wdir = workdir->d_inode; | ||
58 | |||
59 | whiteout = ovl_lookup_temp(workdir, dentry); | ||
60 | if (IS_ERR(whiteout)) | ||
61 | return whiteout; | ||
62 | |||
63 | err = ovl_do_whiteout(wdir, whiteout); | ||
64 | if (err) { | ||
65 | dput(whiteout); | ||
66 | whiteout = ERR_PTR(err); | ||
67 | } | ||
68 | |||
69 | return whiteout; | ||
70 | } | ||
71 | |||
72 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
73 | struct kstat *stat, const char *link, | ||
74 | struct dentry *hardlink, bool debug) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | if (newdentry->d_inode) | ||
79 | return -ESTALE; | ||
80 | |||
81 | if (hardlink) { | ||
82 | err = ovl_do_link(hardlink, dir, newdentry, debug); | ||
83 | } else { | ||
84 | switch (stat->mode & S_IFMT) { | ||
85 | case S_IFREG: | ||
86 | err = ovl_do_create(dir, newdentry, stat->mode, debug); | ||
87 | break; | ||
88 | |||
89 | case S_IFDIR: | ||
90 | err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); | ||
91 | break; | ||
92 | |||
93 | case S_IFCHR: | ||
94 | case S_IFBLK: | ||
95 | case S_IFIFO: | ||
96 | case S_IFSOCK: | ||
97 | err = ovl_do_mknod(dir, newdentry, | ||
98 | stat->mode, stat->rdev, debug); | ||
99 | break; | ||
100 | |||
101 | case S_IFLNK: | ||
102 | err = ovl_do_symlink(dir, newdentry, link, debug); | ||
103 | break; | ||
104 | |||
105 | default: | ||
106 | err = -EPERM; | ||
107 | } | ||
108 | } | ||
109 | if (!err && WARN_ON(!newdentry->d_inode)) { | ||
110 | /* | ||
111 | * Not quite sure if non-instantiated dentry is legal or not. | ||
112 | * VFS doesn't seem to care so check and warn here. | ||
113 | */ | ||
114 | err = -ENOENT; | ||
115 | } | ||
116 | return err; | ||
117 | } | ||
118 | |||
119 | static int ovl_set_opaque(struct dentry *upperdentry) | ||
120 | { | ||
121 | return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); | ||
122 | } | ||
123 | |||
124 | static void ovl_remove_opaque(struct dentry *upperdentry) | ||
125 | { | ||
126 | int err; | ||
127 | |||
128 | err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); | ||
129 | if (err) { | ||
130 | pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", | ||
131 | upperdentry->d_name.name, err); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
136 | struct kstat *stat) | ||
137 | { | ||
138 | int err; | ||
139 | enum ovl_path_type type; | ||
140 | struct path realpath; | ||
141 | |||
142 | type = ovl_path_real(dentry, &realpath); | ||
143 | err = vfs_getattr(&realpath, stat); | ||
144 | if (err) | ||
145 | return err; | ||
146 | |||
147 | stat->dev = dentry->d_sb->s_dev; | ||
148 | stat->ino = dentry->d_inode->i_ino; | ||
149 | |||
150 | /* | ||
151 | * It's probably not worth it to count subdirs to get the | ||
152 | * correct link count. nlink=1 seems to pacify 'find' and | ||
153 | * other utilities. | ||
154 | */ | ||
155 | if (type == OVL_PATH_MERGE) | ||
156 | stat->nlink = 1; | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static int ovl_create_upper(struct dentry *dentry, struct inode *inode, | ||
162 | struct kstat *stat, const char *link, | ||
163 | struct dentry *hardlink) | ||
164 | { | ||
165 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
166 | struct inode *udir = upperdir->d_inode; | ||
167 | struct dentry *newdentry; | ||
168 | int err; | ||
169 | |||
170 | mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); | ||
171 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, | ||
172 | dentry->d_name.len); | ||
173 | err = PTR_ERR(newdentry); | ||
174 | if (IS_ERR(newdentry)) | ||
175 | goto out_unlock; | ||
176 | err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); | ||
177 | if (err) | ||
178 | goto out_dput; | ||
179 | |||
180 | ovl_dentry_version_inc(dentry->d_parent); | ||
181 | ovl_dentry_update(dentry, newdentry); | ||
182 | ovl_copyattr(newdentry->d_inode, inode); | ||
183 | d_instantiate(dentry, inode); | ||
184 | newdentry = NULL; | ||
185 | out_dput: | ||
186 | dput(newdentry); | ||
187 | out_unlock: | ||
188 | mutex_unlock(&udir->i_mutex); | ||
189 | return err; | ||
190 | } | ||
191 | |||
192 | static int ovl_lock_rename_workdir(struct dentry *workdir, | ||
193 | struct dentry *upperdir) | ||
194 | { | ||
195 | /* Workdir should not be the same as upperdir */ | ||
196 | if (workdir == upperdir) | ||
197 | goto err; | ||
198 | |||
199 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
200 | if (lock_rename(workdir, upperdir) != NULL) | ||
201 | goto err_unlock; | ||
202 | |||
203 | return 0; | ||
204 | |||
205 | err_unlock: | ||
206 | unlock_rename(workdir, upperdir); | ||
207 | err: | ||
208 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
209 | return -EIO; | ||
210 | } | ||
211 | |||
212 | static struct dentry *ovl_clear_empty(struct dentry *dentry, | ||
213 | struct list_head *list) | ||
214 | { | ||
215 | struct dentry *workdir = ovl_workdir(dentry); | ||
216 | struct inode *wdir = workdir->d_inode; | ||
217 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
218 | struct inode *udir = upperdir->d_inode; | ||
219 | struct path upperpath; | ||
220 | struct dentry *upper; | ||
221 | struct dentry *opaquedir; | ||
222 | struct kstat stat; | ||
223 | int err; | ||
224 | |||
225 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
226 | if (err) | ||
227 | goto out; | ||
228 | |||
229 | ovl_path_upper(dentry, &upperpath); | ||
230 | err = vfs_getattr(&upperpath, &stat); | ||
231 | if (err) | ||
232 | goto out_unlock; | ||
233 | |||
234 | err = -ESTALE; | ||
235 | if (!S_ISDIR(stat.mode)) | ||
236 | goto out_unlock; | ||
237 | upper = upperpath.dentry; | ||
238 | if (upper->d_parent->d_inode != udir) | ||
239 | goto out_unlock; | ||
240 | |||
241 | opaquedir = ovl_lookup_temp(workdir, dentry); | ||
242 | err = PTR_ERR(opaquedir); | ||
243 | if (IS_ERR(opaquedir)) | ||
244 | goto out_unlock; | ||
245 | |||
246 | err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); | ||
247 | if (err) | ||
248 | goto out_dput; | ||
249 | |||
250 | err = ovl_copy_xattr(upper, opaquedir); | ||
251 | if (err) | ||
252 | goto out_cleanup; | ||
253 | |||
254 | err = ovl_set_opaque(opaquedir); | ||
255 | if (err) | ||
256 | goto out_cleanup; | ||
257 | |||
258 | mutex_lock(&opaquedir->d_inode->i_mutex); | ||
259 | err = ovl_set_attr(opaquedir, &stat); | ||
260 | mutex_unlock(&opaquedir->d_inode->i_mutex); | ||
261 | if (err) | ||
262 | goto out_cleanup; | ||
263 | |||
264 | err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); | ||
265 | if (err) | ||
266 | goto out_cleanup; | ||
267 | |||
268 | ovl_cleanup_whiteouts(upper, list); | ||
269 | ovl_cleanup(wdir, upper); | ||
270 | unlock_rename(workdir, upperdir); | ||
271 | |||
272 | /* dentry's upper doesn't match now, get rid of it */ | ||
273 | d_drop(dentry); | ||
274 | |||
275 | return opaquedir; | ||
276 | |||
277 | out_cleanup: | ||
278 | ovl_cleanup(wdir, opaquedir); | ||
279 | out_dput: | ||
280 | dput(opaquedir); | ||
281 | out_unlock: | ||
282 | unlock_rename(workdir, upperdir); | ||
283 | out: | ||
284 | return ERR_PTR(err); | ||
285 | } | ||
286 | |||
287 | static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, | ||
288 | enum ovl_path_type type) | ||
289 | { | ||
290 | int err; | ||
291 | struct dentry *ret = NULL; | ||
292 | LIST_HEAD(list); | ||
293 | |||
294 | err = ovl_check_empty_dir(dentry, &list); | ||
295 | if (err) | ||
296 | ret = ERR_PTR(err); | ||
297 | else if (type == OVL_PATH_MERGE) | ||
298 | ret = ovl_clear_empty(dentry, &list); | ||
299 | |||
300 | ovl_cache_free(&list); | ||
301 | |||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | ||
306 | struct kstat *stat, const char *link, | ||
307 | struct dentry *hardlink) | ||
308 | { | ||
309 | struct dentry *workdir = ovl_workdir(dentry); | ||
310 | struct inode *wdir = workdir->d_inode; | ||
311 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
312 | struct inode *udir = upperdir->d_inode; | ||
313 | struct dentry *upper; | ||
314 | struct dentry *newdentry; | ||
315 | int err; | ||
316 | |||
317 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
318 | if (err) | ||
319 | goto out; | ||
320 | |||
321 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
322 | err = PTR_ERR(newdentry); | ||
323 | if (IS_ERR(newdentry)) | ||
324 | goto out_unlock; | ||
325 | |||
326 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
327 | dentry->d_name.len); | ||
328 | err = PTR_ERR(upper); | ||
329 | if (IS_ERR(upper)) | ||
330 | goto out_dput; | ||
331 | |||
332 | err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); | ||
333 | if (err) | ||
334 | goto out_dput2; | ||
335 | |||
336 | if (S_ISDIR(stat->mode)) { | ||
337 | err = ovl_set_opaque(newdentry); | ||
338 | if (err) | ||
339 | goto out_cleanup; | ||
340 | |||
341 | err = ovl_do_rename(wdir, newdentry, udir, upper, | ||
342 | RENAME_EXCHANGE); | ||
343 | if (err) | ||
344 | goto out_cleanup; | ||
345 | |||
346 | ovl_cleanup(wdir, upper); | ||
347 | } else { | ||
348 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
349 | if (err) | ||
350 | goto out_cleanup; | ||
351 | } | ||
352 | ovl_dentry_version_inc(dentry->d_parent); | ||
353 | ovl_dentry_update(dentry, newdentry); | ||
354 | ovl_copyattr(newdentry->d_inode, inode); | ||
355 | d_instantiate(dentry, inode); | ||
356 | newdentry = NULL; | ||
357 | out_dput2: | ||
358 | dput(upper); | ||
359 | out_dput: | ||
360 | dput(newdentry); | ||
361 | out_unlock: | ||
362 | unlock_rename(workdir, upperdir); | ||
363 | out: | ||
364 | return err; | ||
365 | |||
366 | out_cleanup: | ||
367 | ovl_cleanup(wdir, newdentry); | ||
368 | goto out_dput2; | ||
369 | } | ||
370 | |||
371 | static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, | ||
372 | const char *link, struct dentry *hardlink) | ||
373 | { | ||
374 | int err; | ||
375 | struct inode *inode; | ||
376 | struct kstat stat = { | ||
377 | .mode = mode, | ||
378 | .rdev = rdev, | ||
379 | }; | ||
380 | |||
381 | err = -ENOMEM; | ||
382 | inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); | ||
383 | if (!inode) | ||
384 | goto out; | ||
385 | |||
386 | err = ovl_copy_up(dentry->d_parent); | ||
387 | if (err) | ||
388 | goto out_iput; | ||
389 | |||
390 | if (!ovl_dentry_is_opaque(dentry)) { | ||
391 | err = ovl_create_upper(dentry, inode, &stat, link, hardlink); | ||
392 | } else { | ||
393 | const struct cred *old_cred; | ||
394 | struct cred *override_cred; | ||
395 | |||
396 | err = -ENOMEM; | ||
397 | override_cred = prepare_creds(); | ||
398 | if (!override_cred) | ||
399 | goto out_iput; | ||
400 | |||
401 | /* | ||
402 | * CAP_SYS_ADMIN for setting opaque xattr | ||
403 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
404 | * CAP_FOWNER for removing whiteout from sticky dir | ||
405 | */ | ||
406 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
407 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
408 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
409 | old_cred = override_creds(override_cred); | ||
410 | |||
411 | err = ovl_create_over_whiteout(dentry, inode, &stat, link, | ||
412 | hardlink); | ||
413 | |||
414 | revert_creds(old_cred); | ||
415 | put_cred(override_cred); | ||
416 | } | ||
417 | |||
418 | if (!err) | ||
419 | inode = NULL; | ||
420 | out_iput: | ||
421 | iput(inode); | ||
422 | out: | ||
423 | return err; | ||
424 | } | ||
425 | |||
426 | static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, | ||
427 | const char *link) | ||
428 | { | ||
429 | int err; | ||
430 | |||
431 | err = ovl_want_write(dentry); | ||
432 | if (!err) { | ||
433 | err = ovl_create_or_link(dentry, mode, rdev, link, NULL); | ||
434 | ovl_drop_write(dentry); | ||
435 | } | ||
436 | |||
437 | return err; | ||
438 | } | ||
439 | |||
440 | static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
441 | bool excl) | ||
442 | { | ||
443 | return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); | ||
444 | } | ||
445 | |||
446 | static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
447 | { | ||
448 | return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); | ||
449 | } | ||
450 | |||
451 | static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
452 | dev_t rdev) | ||
453 | { | ||
454 | /* Don't allow creation of "whiteout" on overlay */ | ||
455 | if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) | ||
456 | return -EPERM; | ||
457 | |||
458 | return ovl_create_object(dentry, mode, rdev, NULL); | ||
459 | } | ||
460 | |||
461 | static int ovl_symlink(struct inode *dir, struct dentry *dentry, | ||
462 | const char *link) | ||
463 | { | ||
464 | return ovl_create_object(dentry, S_IFLNK, 0, link); | ||
465 | } | ||
466 | |||
467 | static int ovl_link(struct dentry *old, struct inode *newdir, | ||
468 | struct dentry *new) | ||
469 | { | ||
470 | int err; | ||
471 | struct dentry *upper; | ||
472 | |||
473 | err = ovl_want_write(old); | ||
474 | if (err) | ||
475 | goto out; | ||
476 | |||
477 | err = ovl_copy_up(old); | ||
478 | if (err) | ||
479 | goto out_drop_write; | ||
480 | |||
481 | upper = ovl_dentry_upper(old); | ||
482 | err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); | ||
483 | |||
484 | out_drop_write: | ||
485 | ovl_drop_write(old); | ||
486 | out: | ||
487 | return err; | ||
488 | } | ||
489 | |||
490 | static int ovl_remove_and_whiteout(struct dentry *dentry, | ||
491 | enum ovl_path_type type, bool is_dir) | ||
492 | { | ||
493 | struct dentry *workdir = ovl_workdir(dentry); | ||
494 | struct inode *wdir = workdir->d_inode; | ||
495 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
496 | struct inode *udir = upperdir->d_inode; | ||
497 | struct dentry *whiteout; | ||
498 | struct dentry *upper; | ||
499 | struct dentry *opaquedir = NULL; | ||
500 | int err; | ||
501 | |||
502 | if (is_dir) { | ||
503 | opaquedir = ovl_check_empty_and_clear(dentry, type); | ||
504 | err = PTR_ERR(opaquedir); | ||
505 | if (IS_ERR(opaquedir)) | ||
506 | goto out; | ||
507 | } | ||
508 | |||
509 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
510 | if (err) | ||
511 | goto out_dput; | ||
512 | |||
513 | whiteout = ovl_whiteout(workdir, dentry); | ||
514 | err = PTR_ERR(whiteout); | ||
515 | if (IS_ERR(whiteout)) | ||
516 | goto out_unlock; | ||
517 | |||
518 | if (type == OVL_PATH_LOWER) { | ||
519 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
520 | dentry->d_name.len); | ||
521 | err = PTR_ERR(upper); | ||
522 | if (IS_ERR(upper)) | ||
523 | goto kill_whiteout; | ||
524 | |||
525 | err = ovl_do_rename(wdir, whiteout, udir, upper, 0); | ||
526 | dput(upper); | ||
527 | if (err) | ||
528 | goto kill_whiteout; | ||
529 | } else { | ||
530 | int flags = 0; | ||
531 | |||
532 | upper = ovl_dentry_upper(dentry); | ||
533 | if (opaquedir) | ||
534 | upper = opaquedir; | ||
535 | err = -ESTALE; | ||
536 | if (upper->d_parent != upperdir) | ||
537 | goto kill_whiteout; | ||
538 | |||
539 | if (is_dir) | ||
540 | flags |= RENAME_EXCHANGE; | ||
541 | |||
542 | err = ovl_do_rename(wdir, whiteout, udir, upper, flags); | ||
543 | if (err) | ||
544 | goto kill_whiteout; | ||
545 | |||
546 | if (is_dir) | ||
547 | ovl_cleanup(wdir, upper); | ||
548 | } | ||
549 | ovl_dentry_version_inc(dentry->d_parent); | ||
550 | out_d_drop: | ||
551 | d_drop(dentry); | ||
552 | dput(whiteout); | ||
553 | out_unlock: | ||
554 | unlock_rename(workdir, upperdir); | ||
555 | out_dput: | ||
556 | dput(opaquedir); | ||
557 | out: | ||
558 | return err; | ||
559 | |||
560 | kill_whiteout: | ||
561 | ovl_cleanup(wdir, whiteout); | ||
562 | goto out_d_drop; | ||
563 | } | ||
564 | |||
565 | static int ovl_remove_upper(struct dentry *dentry, bool is_dir) | ||
566 | { | ||
567 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
568 | struct inode *dir = upperdir->d_inode; | ||
569 | struct dentry *upper = ovl_dentry_upper(dentry); | ||
570 | int err; | ||
571 | |||
572 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
573 | err = -ESTALE; | ||
574 | if (upper->d_parent == upperdir) { | ||
575 | /* Don't let d_delete() think it can reset d_inode */ | ||
576 | dget(upper); | ||
577 | if (is_dir) | ||
578 | err = vfs_rmdir(dir, upper); | ||
579 | else | ||
580 | err = vfs_unlink(dir, upper, NULL); | ||
581 | dput(upper); | ||
582 | ovl_dentry_version_inc(dentry->d_parent); | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * Keeping this dentry hashed would mean having to release | ||
587 | * upperpath/lowerpath, which could only be done if we are the | ||
588 | * sole user of this dentry. Too tricky... Just unhash for | ||
589 | * now. | ||
590 | */ | ||
591 | d_drop(dentry); | ||
592 | mutex_unlock(&dir->i_mutex); | ||
593 | |||
594 | return err; | ||
595 | } | ||
596 | |||
597 | static inline int ovl_check_sticky(struct dentry *dentry) | ||
598 | { | ||
599 | struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; | ||
600 | struct inode *inode = ovl_dentry_real(dentry)->d_inode; | ||
601 | |||
602 | if (check_sticky(dir, inode)) | ||
603 | return -EPERM; | ||
604 | |||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | static int ovl_do_remove(struct dentry *dentry, bool is_dir) | ||
609 | { | ||
610 | enum ovl_path_type type; | ||
611 | int err; | ||
612 | |||
613 | err = ovl_check_sticky(dentry); | ||
614 | if (err) | ||
615 | goto out; | ||
616 | |||
617 | err = ovl_want_write(dentry); | ||
618 | if (err) | ||
619 | goto out; | ||
620 | |||
621 | err = ovl_copy_up(dentry->d_parent); | ||
622 | if (err) | ||
623 | goto out_drop_write; | ||
624 | |||
625 | type = ovl_path_type(dentry); | ||
626 | if (type == OVL_PATH_PURE_UPPER) { | ||
627 | err = ovl_remove_upper(dentry, is_dir); | ||
628 | } else { | ||
629 | const struct cred *old_cred; | ||
630 | struct cred *override_cred; | ||
631 | |||
632 | err = -ENOMEM; | ||
633 | override_cred = prepare_creds(); | ||
634 | if (!override_cred) | ||
635 | goto out_drop_write; | ||
636 | |||
637 | /* | ||
638 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
639 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
640 | * CAP_FOWNER for removing whiteout from sticky dir | ||
641 | * CAP_FSETID for chmod of opaque dir | ||
642 | * CAP_CHOWN for chown of opaque dir | ||
643 | */ | ||
644 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
645 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
646 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
647 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
648 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
649 | old_cred = override_creds(override_cred); | ||
650 | |||
651 | err = ovl_remove_and_whiteout(dentry, type, is_dir); | ||
652 | |||
653 | revert_creds(old_cred); | ||
654 | put_cred(override_cred); | ||
655 | } | ||
656 | out_drop_write: | ||
657 | ovl_drop_write(dentry); | ||
658 | out: | ||
659 | return err; | ||
660 | } | ||
661 | |||
662 | static int ovl_unlink(struct inode *dir, struct dentry *dentry) | ||
663 | { | ||
664 | return ovl_do_remove(dentry, false); | ||
665 | } | ||
666 | |||
667 | static int ovl_rmdir(struct inode *dir, struct dentry *dentry) | ||
668 | { | ||
669 | return ovl_do_remove(dentry, true); | ||
670 | } | ||
671 | |||
672 | static int ovl_rename2(struct inode *olddir, struct dentry *old, | ||
673 | struct inode *newdir, struct dentry *new, | ||
674 | unsigned int flags) | ||
675 | { | ||
676 | int err; | ||
677 | enum ovl_path_type old_type; | ||
678 | enum ovl_path_type new_type; | ||
679 | struct dentry *old_upperdir; | ||
680 | struct dentry *new_upperdir; | ||
681 | struct dentry *olddentry; | ||
682 | struct dentry *newdentry; | ||
683 | struct dentry *trap; | ||
684 | bool old_opaque; | ||
685 | bool new_opaque; | ||
686 | bool new_create = false; | ||
687 | bool cleanup_whiteout = false; | ||
688 | bool overwrite = !(flags & RENAME_EXCHANGE); | ||
689 | bool is_dir = S_ISDIR(old->d_inode->i_mode); | ||
690 | bool new_is_dir = false; | ||
691 | struct dentry *opaquedir = NULL; | ||
692 | const struct cred *old_cred = NULL; | ||
693 | struct cred *override_cred = NULL; | ||
694 | |||
695 | err = -EINVAL; | ||
696 | if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) | ||
697 | goto out; | ||
698 | |||
699 | flags &= ~RENAME_NOREPLACE; | ||
700 | |||
701 | err = ovl_check_sticky(old); | ||
702 | if (err) | ||
703 | goto out; | ||
704 | |||
705 | /* Don't copy up directory trees */ | ||
706 | old_type = ovl_path_type(old); | ||
707 | err = -EXDEV; | ||
708 | if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) | ||
709 | goto out; | ||
710 | |||
711 | if (new->d_inode) { | ||
712 | err = ovl_check_sticky(new); | ||
713 | if (err) | ||
714 | goto out; | ||
715 | |||
716 | if (S_ISDIR(new->d_inode->i_mode)) | ||
717 | new_is_dir = true; | ||
718 | |||
719 | new_type = ovl_path_type(new); | ||
720 | err = -EXDEV; | ||
721 | if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) | ||
722 | goto out; | ||
723 | |||
724 | err = 0; | ||
725 | if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { | ||
726 | if (ovl_dentry_lower(old)->d_inode == | ||
727 | ovl_dentry_lower(new)->d_inode) | ||
728 | goto out; | ||
729 | } | ||
730 | if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { | ||
731 | if (ovl_dentry_upper(old)->d_inode == | ||
732 | ovl_dentry_upper(new)->d_inode) | ||
733 | goto out; | ||
734 | } | ||
735 | } else { | ||
736 | if (ovl_dentry_is_opaque(new)) | ||
737 | new_type = OVL_PATH_UPPER; | ||
738 | else | ||
739 | new_type = OVL_PATH_PURE_UPPER; | ||
740 | } | ||
741 | |||
742 | err = ovl_want_write(old); | ||
743 | if (err) | ||
744 | goto out; | ||
745 | |||
746 | err = ovl_copy_up(old); | ||
747 | if (err) | ||
748 | goto out_drop_write; | ||
749 | |||
750 | err = ovl_copy_up(new->d_parent); | ||
751 | if (err) | ||
752 | goto out_drop_write; | ||
753 | if (!overwrite) { | ||
754 | err = ovl_copy_up(new); | ||
755 | if (err) | ||
756 | goto out_drop_write; | ||
757 | } | ||
758 | |||
759 | old_opaque = old_type != OVL_PATH_PURE_UPPER; | ||
760 | new_opaque = new_type != OVL_PATH_PURE_UPPER; | ||
761 | |||
762 | if (old_opaque || new_opaque) { | ||
763 | err = -ENOMEM; | ||
764 | override_cred = prepare_creds(); | ||
765 | if (!override_cred) | ||
766 | goto out_drop_write; | ||
767 | |||
768 | /* | ||
769 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
770 | * CAP_DAC_OVERRIDE for create in workdir | ||
771 | * CAP_FOWNER for removing whiteout from sticky dir | ||
772 | * CAP_FSETID for chmod of opaque dir | ||
773 | * CAP_CHOWN for chown of opaque dir | ||
774 | */ | ||
775 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
776 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
777 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
778 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
779 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
780 | old_cred = override_creds(override_cred); | ||
781 | } | ||
782 | |||
783 | if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { | ||
784 | opaquedir = ovl_check_empty_and_clear(new, new_type); | ||
785 | err = PTR_ERR(opaquedir); | ||
786 | if (IS_ERR(opaquedir)) { | ||
787 | opaquedir = NULL; | ||
788 | goto out_revert_creds; | ||
789 | } | ||
790 | } | ||
791 | |||
792 | if (overwrite) { | ||
793 | if (old_opaque) { | ||
794 | if (new->d_inode || !new_opaque) { | ||
795 | /* Whiteout source */ | ||
796 | flags |= RENAME_WHITEOUT; | ||
797 | } else { | ||
798 | /* Switch whiteouts */ | ||
799 | flags |= RENAME_EXCHANGE; | ||
800 | } | ||
801 | } else if (is_dir && !new->d_inode && new_opaque) { | ||
802 | flags |= RENAME_EXCHANGE; | ||
803 | cleanup_whiteout = true; | ||
804 | } | ||
805 | } | ||
806 | |||
807 | old_upperdir = ovl_dentry_upper(old->d_parent); | ||
808 | new_upperdir = ovl_dentry_upper(new->d_parent); | ||
809 | |||
810 | trap = lock_rename(new_upperdir, old_upperdir); | ||
811 | |||
812 | olddentry = ovl_dentry_upper(old); | ||
813 | newdentry = ovl_dentry_upper(new); | ||
814 | if (newdentry) { | ||
815 | if (opaquedir) { | ||
816 | newdentry = opaquedir; | ||
817 | opaquedir = NULL; | ||
818 | } else { | ||
819 | dget(newdentry); | ||
820 | } | ||
821 | } else { | ||
822 | new_create = true; | ||
823 | newdentry = lookup_one_len(new->d_name.name, new_upperdir, | ||
824 | new->d_name.len); | ||
825 | err = PTR_ERR(newdentry); | ||
826 | if (IS_ERR(newdentry)) | ||
827 | goto out_unlock; | ||
828 | } | ||
829 | |||
830 | err = -ESTALE; | ||
831 | if (olddentry->d_parent != old_upperdir) | ||
832 | goto out_dput; | ||
833 | if (newdentry->d_parent != new_upperdir) | ||
834 | goto out_dput; | ||
835 | if (olddentry == trap) | ||
836 | goto out_dput; | ||
837 | if (newdentry == trap) | ||
838 | goto out_dput; | ||
839 | |||
840 | if (is_dir && !old_opaque && new_opaque) { | ||
841 | err = ovl_set_opaque(olddentry); | ||
842 | if (err) | ||
843 | goto out_dput; | ||
844 | } | ||
845 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) { | ||
846 | err = ovl_set_opaque(newdentry); | ||
847 | if (err) | ||
848 | goto out_dput; | ||
849 | } | ||
850 | |||
851 | if (old_opaque || new_opaque) { | ||
852 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, | ||
853 | new_upperdir->d_inode, newdentry, | ||
854 | flags); | ||
855 | } else { | ||
856 | /* No debug for the plain case */ | ||
857 | BUG_ON(flags & ~RENAME_EXCHANGE); | ||
858 | err = vfs_rename(old_upperdir->d_inode, olddentry, | ||
859 | new_upperdir->d_inode, newdentry, | ||
860 | NULL, flags); | ||
861 | } | ||
862 | |||
863 | if (err) { | ||
864 | if (is_dir && !old_opaque && new_opaque) | ||
865 | ovl_remove_opaque(olddentry); | ||
866 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) | ||
867 | ovl_remove_opaque(newdentry); | ||
868 | goto out_dput; | ||
869 | } | ||
870 | |||
871 | if (is_dir && old_opaque && !new_opaque) | ||
872 | ovl_remove_opaque(olddentry); | ||
873 | if (!overwrite && new_is_dir && !old_opaque && new_opaque) | ||
874 | ovl_remove_opaque(newdentry); | ||
875 | |||
876 | if (old_opaque != new_opaque) { | ||
877 | ovl_dentry_set_opaque(old, new_opaque); | ||
878 | if (!overwrite) | ||
879 | ovl_dentry_set_opaque(new, old_opaque); | ||
880 | } | ||
881 | |||
882 | if (cleanup_whiteout) | ||
883 | ovl_cleanup(old_upperdir->d_inode, newdentry); | ||
884 | |||
885 | ovl_dentry_version_inc(old->d_parent); | ||
886 | ovl_dentry_version_inc(new->d_parent); | ||
887 | |||
888 | out_dput: | ||
889 | dput(newdentry); | ||
890 | out_unlock: | ||
891 | unlock_rename(new_upperdir, old_upperdir); | ||
892 | out_revert_creds: | ||
893 | if (old_opaque || new_opaque) { | ||
894 | revert_creds(old_cred); | ||
895 | put_cred(override_cred); | ||
896 | } | ||
897 | out_drop_write: | ||
898 | ovl_drop_write(old); | ||
899 | out: | ||
900 | dput(opaquedir); | ||
901 | return err; | ||
902 | } | ||
903 | |||
904 | const struct inode_operations ovl_dir_inode_operations = { | ||
905 | .lookup = ovl_lookup, | ||
906 | .mkdir = ovl_mkdir, | ||
907 | .symlink = ovl_symlink, | ||
908 | .unlink = ovl_unlink, | ||
909 | .rmdir = ovl_rmdir, | ||
910 | .rename2 = ovl_rename2, | ||
911 | .link = ovl_link, | ||
912 | .setattr = ovl_setattr, | ||
913 | .create = ovl_create, | ||
914 | .mknod = ovl_mknod, | ||
915 | .permission = ovl_permission, | ||
916 | .getattr = ovl_dir_getattr, | ||
917 | .setxattr = ovl_setxattr, | ||
918 | .getxattr = ovl_getxattr, | ||
919 | .listxattr = ovl_listxattr, | ||
920 | .removexattr = ovl_removexattr, | ||
921 | }; | ||
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c new file mode 100644 index 000000000000..af2d18c9fcee --- /dev/null +++ b/fs/overlayfs/inode.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include "overlayfs.h" | ||
14 | |||
15 | static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, | ||
16 | bool no_data) | ||
17 | { | ||
18 | int err; | ||
19 | struct dentry *parent; | ||
20 | struct kstat stat; | ||
21 | struct path lowerpath; | ||
22 | |||
23 | parent = dget_parent(dentry); | ||
24 | err = ovl_copy_up(parent); | ||
25 | if (err) | ||
26 | goto out_dput_parent; | ||
27 | |||
28 | ovl_path_lower(dentry, &lowerpath); | ||
29 | err = vfs_getattr(&lowerpath, &stat); | ||
30 | if (err) | ||
31 | goto out_dput_parent; | ||
32 | |||
33 | if (no_data) | ||
34 | stat.size = 0; | ||
35 | |||
36 | err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); | ||
37 | |||
38 | out_dput_parent: | ||
39 | dput(parent); | ||
40 | return err; | ||
41 | } | ||
42 | |||
43 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) | ||
44 | { | ||
45 | int err; | ||
46 | struct dentry *upperdentry; | ||
47 | |||
48 | err = ovl_want_write(dentry); | ||
49 | if (err) | ||
50 | goto out; | ||
51 | |||
52 | upperdentry = ovl_dentry_upper(dentry); | ||
53 | if (upperdentry) { | ||
54 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
55 | err = notify_change(upperdentry, attr, NULL); | ||
56 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
57 | } else { | ||
58 | err = ovl_copy_up_last(dentry, attr, false); | ||
59 | } | ||
60 | ovl_drop_write(dentry); | ||
61 | out: | ||
62 | return err; | ||
63 | } | ||
64 | |||
65 | static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
66 | struct kstat *stat) | ||
67 | { | ||
68 | struct path realpath; | ||
69 | |||
70 | ovl_path_real(dentry, &realpath); | ||
71 | return vfs_getattr(&realpath, stat); | ||
72 | } | ||
73 | |||
74 | int ovl_permission(struct inode *inode, int mask) | ||
75 | { | ||
76 | struct ovl_entry *oe; | ||
77 | struct dentry *alias = NULL; | ||
78 | struct inode *realinode; | ||
79 | struct dentry *realdentry; | ||
80 | bool is_upper; | ||
81 | int err; | ||
82 | |||
83 | if (S_ISDIR(inode->i_mode)) { | ||
84 | oe = inode->i_private; | ||
85 | } else if (mask & MAY_NOT_BLOCK) { | ||
86 | return -ECHILD; | ||
87 | } else { | ||
88 | /* | ||
89 | * For non-directories find an alias and get the info | ||
90 | * from there. | ||
91 | */ | ||
92 | alias = d_find_any_alias(inode); | ||
93 | if (WARN_ON(!alias)) | ||
94 | return -ENOENT; | ||
95 | |||
96 | oe = alias->d_fsdata; | ||
97 | } | ||
98 | |||
99 | realdentry = ovl_entry_real(oe, &is_upper); | ||
100 | |||
101 | /* Careful in RCU walk mode */ | ||
102 | realinode = ACCESS_ONCE(realdentry->d_inode); | ||
103 | if (!realinode) { | ||
104 | WARN_ON(!(mask & MAY_NOT_BLOCK)); | ||
105 | err = -ENOENT; | ||
106 | goto out_dput; | ||
107 | } | ||
108 | |||
109 | if (mask & MAY_WRITE) { | ||
110 | umode_t mode = realinode->i_mode; | ||
111 | |||
112 | /* | ||
113 | * Writes will always be redirected to upper layer, so | ||
114 | * ignore lower layer being read-only. | ||
115 | * | ||
116 | * If the overlay itself is read-only then proceed | ||
117 | * with the permission check, don't return EROFS. | ||
118 | * This will only happen if this is the lower layer of | ||
119 | * another overlayfs. | ||
120 | * | ||
121 | * If upper fs becomes read-only after the overlay was | ||
122 | * constructed return EROFS to prevent modification of | ||
123 | * upper layer. | ||
124 | */ | ||
125 | err = -EROFS; | ||
126 | if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && | ||
127 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
128 | goto out_dput; | ||
129 | } | ||
130 | |||
131 | err = __inode_permission(realinode, mask); | ||
132 | out_dput: | ||
133 | dput(alias); | ||
134 | return err; | ||
135 | } | ||
136 | |||
137 | |||
138 | struct ovl_link_data { | ||
139 | struct dentry *realdentry; | ||
140 | void *cookie; | ||
141 | }; | ||
142 | |||
143 | static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
144 | { | ||
145 | void *ret; | ||
146 | struct dentry *realdentry; | ||
147 | struct inode *realinode; | ||
148 | |||
149 | realdentry = ovl_dentry_real(dentry); | ||
150 | realinode = realdentry->d_inode; | ||
151 | |||
152 | if (WARN_ON(!realinode->i_op->follow_link)) | ||
153 | return ERR_PTR(-EPERM); | ||
154 | |||
155 | ret = realinode->i_op->follow_link(realdentry, nd); | ||
156 | if (IS_ERR(ret)) | ||
157 | return ret; | ||
158 | |||
159 | if (realinode->i_op->put_link) { | ||
160 | struct ovl_link_data *data; | ||
161 | |||
162 | data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); | ||
163 | if (!data) { | ||
164 | realinode->i_op->put_link(realdentry, nd, ret); | ||
165 | return ERR_PTR(-ENOMEM); | ||
166 | } | ||
167 | data->realdentry = realdentry; | ||
168 | data->cookie = ret; | ||
169 | |||
170 | return data; | ||
171 | } else { | ||
172 | return NULL; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | ||
177 | { | ||
178 | struct inode *realinode; | ||
179 | struct ovl_link_data *data = c; | ||
180 | |||
181 | if (!data) | ||
182 | return; | ||
183 | |||
184 | realinode = data->realdentry->d_inode; | ||
185 | realinode->i_op->put_link(data->realdentry, nd, data->cookie); | ||
186 | kfree(data); | ||
187 | } | ||
188 | |||
189 | static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
190 | { | ||
191 | struct path realpath; | ||
192 | struct inode *realinode; | ||
193 | |||
194 | ovl_path_real(dentry, &realpath); | ||
195 | realinode = realpath.dentry->d_inode; | ||
196 | |||
197 | if (!realinode->i_op->readlink) | ||
198 | return -EINVAL; | ||
199 | |||
200 | touch_atime(&realpath); | ||
201 | |||
202 | return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); | ||
203 | } | ||
204 | |||
205 | |||
206 | static bool ovl_is_private_xattr(const char *name) | ||
207 | { | ||
208 | return strncmp(name, "trusted.overlay.", 14) == 0; | ||
209 | } | ||
210 | |||
211 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
212 | const void *value, size_t size, int flags) | ||
213 | { | ||
214 | int err; | ||
215 | struct dentry *upperdentry; | ||
216 | |||
217 | err = ovl_want_write(dentry); | ||
218 | if (err) | ||
219 | goto out; | ||
220 | |||
221 | err = -EPERM; | ||
222 | if (ovl_is_private_xattr(name)) | ||
223 | goto out_drop_write; | ||
224 | |||
225 | err = ovl_copy_up(dentry); | ||
226 | if (err) | ||
227 | goto out_drop_write; | ||
228 | |||
229 | upperdentry = ovl_dentry_upper(dentry); | ||
230 | err = vfs_setxattr(upperdentry, name, value, size, flags); | ||
231 | |||
232 | out_drop_write: | ||
233 | ovl_drop_write(dentry); | ||
234 | out: | ||
235 | return err; | ||
236 | } | ||
237 | |||
238 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
239 | void *value, size_t size) | ||
240 | { | ||
241 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
242 | ovl_is_private_xattr(name)) | ||
243 | return -ENODATA; | ||
244 | |||
245 | return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); | ||
246 | } | ||
247 | |||
248 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | ||
249 | { | ||
250 | ssize_t res; | ||
251 | int off; | ||
252 | |||
253 | res = vfs_listxattr(ovl_dentry_real(dentry), list, size); | ||
254 | if (res <= 0 || size == 0) | ||
255 | return res; | ||
256 | |||
257 | if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) | ||
258 | return res; | ||
259 | |||
260 | /* filter out private xattrs */ | ||
261 | for (off = 0; off < res;) { | ||
262 | char *s = list + off; | ||
263 | size_t slen = strlen(s) + 1; | ||
264 | |||
265 | BUG_ON(off + slen > res); | ||
266 | |||
267 | if (ovl_is_private_xattr(s)) { | ||
268 | res -= slen; | ||
269 | memmove(s, s + slen, res - off); | ||
270 | } else { | ||
271 | off += slen; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | return res; | ||
276 | } | ||
277 | |||
278 | int ovl_removexattr(struct dentry *dentry, const char *name) | ||
279 | { | ||
280 | int err; | ||
281 | struct path realpath; | ||
282 | enum ovl_path_type type; | ||
283 | |||
284 | err = ovl_want_write(dentry); | ||
285 | if (err) | ||
286 | goto out; | ||
287 | |||
288 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
289 | ovl_is_private_xattr(name)) | ||
290 | goto out_drop_write; | ||
291 | |||
292 | type = ovl_path_real(dentry, &realpath); | ||
293 | if (type == OVL_PATH_LOWER) { | ||
294 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
295 | if (err < 0) | ||
296 | goto out_drop_write; | ||
297 | |||
298 | err = ovl_copy_up(dentry); | ||
299 | if (err) | ||
300 | goto out_drop_write; | ||
301 | |||
302 | ovl_path_upper(dentry, &realpath); | ||
303 | } | ||
304 | |||
305 | err = vfs_removexattr(realpath.dentry, name); | ||
306 | out_drop_write: | ||
307 | ovl_drop_write(dentry); | ||
308 | out: | ||
309 | return err; | ||
310 | } | ||
311 | |||
312 | static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, | ||
313 | struct dentry *realdentry) | ||
314 | { | ||
315 | if (type != OVL_PATH_LOWER) | ||
316 | return false; | ||
317 | |||
318 | if (special_file(realdentry->d_inode->i_mode)) | ||
319 | return false; | ||
320 | |||
321 | if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) | ||
322 | return false; | ||
323 | |||
324 | return true; | ||
325 | } | ||
326 | |||
327 | static int ovl_dentry_open(struct dentry *dentry, struct file *file, | ||
328 | const struct cred *cred) | ||
329 | { | ||
330 | int err; | ||
331 | struct path realpath; | ||
332 | enum ovl_path_type type; | ||
333 | bool want_write = false; | ||
334 | |||
335 | type = ovl_path_real(dentry, &realpath); | ||
336 | if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { | ||
337 | want_write = true; | ||
338 | err = ovl_want_write(dentry); | ||
339 | if (err) | ||
340 | goto out; | ||
341 | |||
342 | if (file->f_flags & O_TRUNC) | ||
343 | err = ovl_copy_up_last(dentry, NULL, true); | ||
344 | else | ||
345 | err = ovl_copy_up(dentry); | ||
346 | if (err) | ||
347 | goto out_drop_write; | ||
348 | |||
349 | ovl_path_upper(dentry, &realpath); | ||
350 | } | ||
351 | |||
352 | err = vfs_open(&realpath, file, cred); | ||
353 | out_drop_write: | ||
354 | if (want_write) | ||
355 | ovl_drop_write(dentry); | ||
356 | out: | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | static const struct inode_operations ovl_file_inode_operations = { | ||
361 | .setattr = ovl_setattr, | ||
362 | .permission = ovl_permission, | ||
363 | .getattr = ovl_getattr, | ||
364 | .setxattr = ovl_setxattr, | ||
365 | .getxattr = ovl_getxattr, | ||
366 | .listxattr = ovl_listxattr, | ||
367 | .removexattr = ovl_removexattr, | ||
368 | .dentry_open = ovl_dentry_open, | ||
369 | }; | ||
370 | |||
371 | static const struct inode_operations ovl_symlink_inode_operations = { | ||
372 | .setattr = ovl_setattr, | ||
373 | .follow_link = ovl_follow_link, | ||
374 | .put_link = ovl_put_link, | ||
375 | .readlink = ovl_readlink, | ||
376 | .getattr = ovl_getattr, | ||
377 | .setxattr = ovl_setxattr, | ||
378 | .getxattr = ovl_getxattr, | ||
379 | .listxattr = ovl_listxattr, | ||
380 | .removexattr = ovl_removexattr, | ||
381 | }; | ||
382 | |||
383 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
384 | struct ovl_entry *oe) | ||
385 | { | ||
386 | struct inode *inode; | ||
387 | |||
388 | inode = new_inode(sb); | ||
389 | if (!inode) | ||
390 | return NULL; | ||
391 | |||
392 | mode &= S_IFMT; | ||
393 | |||
394 | inode->i_ino = get_next_ino(); | ||
395 | inode->i_mode = mode; | ||
396 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | ||
397 | |||
398 | switch (mode) { | ||
399 | case S_IFDIR: | ||
400 | inode->i_private = oe; | ||
401 | inode->i_op = &ovl_dir_inode_operations; | ||
402 | inode->i_fop = &ovl_dir_operations; | ||
403 | break; | ||
404 | |||
405 | case S_IFLNK: | ||
406 | inode->i_op = &ovl_symlink_inode_operations; | ||
407 | break; | ||
408 | |||
409 | case S_IFREG: | ||
410 | case S_IFSOCK: | ||
411 | case S_IFBLK: | ||
412 | case S_IFCHR: | ||
413 | case S_IFIFO: | ||
414 | inode->i_op = &ovl_file_inode_operations; | ||
415 | break; | ||
416 | |||
417 | default: | ||
418 | WARN(1, "illegal file type: %i\n", mode); | ||
419 | iput(inode); | ||
420 | inode = NULL; | ||
421 | } | ||
422 | |||
423 | return inode; | ||
424 | |||
425 | } | ||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h new file mode 100644 index 000000000000..814bed33dd07 --- /dev/null +++ b/fs/overlayfs/overlayfs.h | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | |||
12 | struct ovl_entry; | ||
13 | |||
14 | enum ovl_path_type { | ||
15 | OVL_PATH_PURE_UPPER, | ||
16 | OVL_PATH_UPPER, | ||
17 | OVL_PATH_MERGE, | ||
18 | OVL_PATH_LOWER, | ||
19 | }; | ||
20 | |||
21 | extern const char *ovl_opaque_xattr; | ||
22 | |||
23 | static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) | ||
24 | { | ||
25 | int err = vfs_rmdir(dir, dentry); | ||
26 | pr_debug("rmdir(%pd2) = %i\n", dentry, err); | ||
27 | return err; | ||
28 | } | ||
29 | |||
30 | static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) | ||
31 | { | ||
32 | int err = vfs_unlink(dir, dentry, NULL); | ||
33 | pr_debug("unlink(%pd2) = %i\n", dentry, err); | ||
34 | return err; | ||
35 | } | ||
36 | |||
37 | static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, | ||
38 | struct dentry *new_dentry, bool debug) | ||
39 | { | ||
40 | int err = vfs_link(old_dentry, dir, new_dentry, NULL); | ||
41 | if (debug) { | ||
42 | pr_debug("link(%pd2, %pd2) = %i\n", | ||
43 | old_dentry, new_dentry, err); | ||
44 | } | ||
45 | return err; | ||
46 | } | ||
47 | |||
48 | static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, | ||
49 | umode_t mode, bool debug) | ||
50 | { | ||
51 | int err = vfs_create(dir, dentry, mode, true); | ||
52 | if (debug) | ||
53 | pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
54 | return err; | ||
55 | } | ||
56 | |||
57 | static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, | ||
58 | umode_t mode, bool debug) | ||
59 | { | ||
60 | int err = vfs_mkdir(dir, dentry, mode); | ||
61 | if (debug) | ||
62 | pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
63 | return err; | ||
64 | } | ||
65 | |||
66 | static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, | ||
67 | umode_t mode, dev_t dev, bool debug) | ||
68 | { | ||
69 | int err = vfs_mknod(dir, dentry, mode, dev); | ||
70 | if (debug) { | ||
71 | pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", | ||
72 | dentry, mode, dev, err); | ||
73 | } | ||
74 | return err; | ||
75 | } | ||
76 | |||
77 | static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, | ||
78 | const char *oldname, bool debug) | ||
79 | { | ||
80 | int err = vfs_symlink(dir, dentry, oldname); | ||
81 | if (debug) | ||
82 | pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); | ||
83 | return err; | ||
84 | } | ||
85 | |||
86 | static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, | ||
87 | const void *value, size_t size, int flags) | ||
88 | { | ||
89 | int err = vfs_setxattr(dentry, name, value, size, flags); | ||
90 | pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", | ||
91 | dentry, name, (int) size, (char *) value, flags, err); | ||
92 | return err; | ||
93 | } | ||
94 | |||
95 | static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) | ||
96 | { | ||
97 | int err = vfs_removexattr(dentry, name); | ||
98 | pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); | ||
99 | return err; | ||
100 | } | ||
101 | |||
102 | static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, | ||
103 | struct inode *newdir, struct dentry *newdentry, | ||
104 | unsigned int flags) | ||
105 | { | ||
106 | int err; | ||
107 | |||
108 | pr_debug("rename2(%pd2, %pd2, 0x%x)\n", | ||
109 | olddentry, newdentry, flags); | ||
110 | |||
111 | err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); | ||
112 | |||
113 | if (err) { | ||
114 | pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", | ||
115 | olddentry, newdentry, err); | ||
116 | } | ||
117 | return err; | ||
118 | } | ||
119 | |||
120 | static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) | ||
121 | { | ||
122 | int err = vfs_whiteout(dir, dentry); | ||
123 | pr_debug("whiteout(%pd2) = %i\n", dentry, err); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | enum ovl_path_type ovl_path_type(struct dentry *dentry); | ||
128 | u64 ovl_dentry_version_get(struct dentry *dentry); | ||
129 | void ovl_dentry_version_inc(struct dentry *dentry); | ||
130 | void ovl_path_upper(struct dentry *dentry, struct path *path); | ||
131 | void ovl_path_lower(struct dentry *dentry, struct path *path); | ||
132 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); | ||
133 | struct dentry *ovl_dentry_upper(struct dentry *dentry); | ||
134 | struct dentry *ovl_dentry_lower(struct dentry *dentry); | ||
135 | struct dentry *ovl_dentry_real(struct dentry *dentry); | ||
136 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); | ||
137 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); | ||
138 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); | ||
139 | struct dentry *ovl_workdir(struct dentry *dentry); | ||
140 | int ovl_want_write(struct dentry *dentry); | ||
141 | void ovl_drop_write(struct dentry *dentry); | ||
142 | bool ovl_dentry_is_opaque(struct dentry *dentry); | ||
143 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); | ||
144 | bool ovl_is_whiteout(struct dentry *dentry); | ||
145 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); | ||
146 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
147 | unsigned int flags); | ||
148 | struct file *ovl_path_open(struct path *path, int flags); | ||
149 | |||
150 | struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, | ||
151 | struct kstat *stat, const char *link); | ||
152 | |||
153 | /* readdir.c */ | ||
154 | extern const struct file_operations ovl_dir_operations; | ||
155 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); | ||
156 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); | ||
157 | void ovl_cache_free(struct list_head *list); | ||
158 | |||
159 | /* inode.c */ | ||
160 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); | ||
161 | int ovl_permission(struct inode *inode, int mask); | ||
162 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
163 | const void *value, size_t size, int flags); | ||
164 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
165 | void *value, size_t size); | ||
166 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | ||
167 | int ovl_removexattr(struct dentry *dentry, const char *name); | ||
168 | |||
169 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
170 | struct ovl_entry *oe); | ||
171 | static inline void ovl_copyattr(struct inode *from, struct inode *to) | ||
172 | { | ||
173 | to->i_uid = from->i_uid; | ||
174 | to->i_gid = from->i_gid; | ||
175 | } | ||
176 | |||
177 | /* dir.c */ | ||
178 | extern const struct inode_operations ovl_dir_inode_operations; | ||
179 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); | ||
180 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
181 | struct kstat *stat, const char *link, | ||
182 | struct dentry *hardlink, bool debug); | ||
183 | void ovl_cleanup(struct inode *dir, struct dentry *dentry); | ||
184 | |||
185 | /* copy_up.c */ | ||
186 | int ovl_copy_up(struct dentry *dentry); | ||
187 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
188 | struct path *lowerpath, struct kstat *stat, | ||
189 | struct iattr *attr); | ||
190 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); | ||
191 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); | ||
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..c6787f84ece9 --- /dev/null +++ b/fs/overlayfs/readdir.c | |||
@@ -0,0 +1,587 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/namei.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/xattr.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/security.h> | ||
17 | #include <linux/cred.h> | ||
18 | #include "overlayfs.h" | ||
19 | |||
20 | struct ovl_cache_entry { | ||
21 | const char *name; | ||
22 | unsigned int len; | ||
23 | unsigned int type; | ||
24 | u64 ino; | ||
25 | bool is_whiteout; | ||
26 | struct list_head l_node; | ||
27 | struct rb_node node; | ||
28 | }; | ||
29 | |||
30 | struct ovl_dir_cache { | ||
31 | long refcount; | ||
32 | u64 version; | ||
33 | struct list_head entries; | ||
34 | }; | ||
35 | |||
36 | struct ovl_readdir_data { | ||
37 | struct dir_context ctx; | ||
38 | bool is_merge; | ||
39 | struct rb_root *root; | ||
40 | struct list_head *list; | ||
41 | struct list_head *middle; | ||
42 | int count; | ||
43 | int err; | ||
44 | }; | ||
45 | |||
46 | struct ovl_dir_file { | ||
47 | bool is_real; | ||
48 | bool is_upper; | ||
49 | struct ovl_dir_cache *cache; | ||
50 | struct ovl_cache_entry cursor; | ||
51 | struct file *realfile; | ||
52 | struct file *upperfile; | ||
53 | }; | ||
54 | |||
55 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) | ||
56 | { | ||
57 | return container_of(n, struct ovl_cache_entry, node); | ||
58 | } | ||
59 | |||
60 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, | ||
61 | const char *name, int len) | ||
62 | { | ||
63 | struct rb_node *node = root->rb_node; | ||
64 | int cmp; | ||
65 | |||
66 | while (node) { | ||
67 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); | ||
68 | |||
69 | cmp = strncmp(name, p->name, len); | ||
70 | if (cmp > 0) | ||
71 | node = p->node.rb_right; | ||
72 | else if (cmp < 0 || len < p->len) | ||
73 | node = p->node.rb_left; | ||
74 | else | ||
75 | return p; | ||
76 | } | ||
77 | |||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, | ||
82 | u64 ino, unsigned int d_type) | ||
83 | { | ||
84 | struct ovl_cache_entry *p; | ||
85 | |||
86 | p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); | ||
87 | if (p) { | ||
88 | char *name_copy = (char *) (p + 1); | ||
89 | memcpy(name_copy, name, len); | ||
90 | name_copy[len] = '\0'; | ||
91 | p->name = name_copy; | ||
92 | p->len = len; | ||
93 | p->type = d_type; | ||
94 | p->ino = ino; | ||
95 | p->is_whiteout = false; | ||
96 | } | ||
97 | |||
98 | return p; | ||
99 | } | ||
100 | |||
101 | static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, | ||
102 | const char *name, int len, u64 ino, | ||
103 | unsigned int d_type) | ||
104 | { | ||
105 | struct rb_node **newp = &rdd->root->rb_node; | ||
106 | struct rb_node *parent = NULL; | ||
107 | struct ovl_cache_entry *p; | ||
108 | |||
109 | while (*newp) { | ||
110 | int cmp; | ||
111 | struct ovl_cache_entry *tmp; | ||
112 | |||
113 | parent = *newp; | ||
114 | tmp = ovl_cache_entry_from_node(*newp); | ||
115 | cmp = strncmp(name, tmp->name, len); | ||
116 | if (cmp > 0) | ||
117 | newp = &tmp->node.rb_right; | ||
118 | else if (cmp < 0 || len < tmp->len) | ||
119 | newp = &tmp->node.rb_left; | ||
120 | else | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | p = ovl_cache_entry_new(name, len, ino, d_type); | ||
125 | if (p == NULL) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | list_add_tail(&p->l_node, rdd->list); | ||
129 | rb_link_node(&p->node, parent, newp); | ||
130 | rb_insert_color(&p->node, rdd->root); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static int ovl_fill_lower(struct ovl_readdir_data *rdd, | ||
136 | const char *name, int namelen, | ||
137 | loff_t offset, u64 ino, unsigned int d_type) | ||
138 | { | ||
139 | struct ovl_cache_entry *p; | ||
140 | |||
141 | p = ovl_cache_entry_find(rdd->root, name, namelen); | ||
142 | if (p) { | ||
143 | list_move_tail(&p->l_node, rdd->middle); | ||
144 | } else { | ||
145 | p = ovl_cache_entry_new(name, namelen, ino, d_type); | ||
146 | if (p == NULL) | ||
147 | rdd->err = -ENOMEM; | ||
148 | else | ||
149 | list_add_tail(&p->l_node, rdd->middle); | ||
150 | } | ||
151 | |||
152 | return rdd->err; | ||
153 | } | ||
154 | |||
155 | void ovl_cache_free(struct list_head *list) | ||
156 | { | ||
157 | struct ovl_cache_entry *p; | ||
158 | struct ovl_cache_entry *n; | ||
159 | |||
160 | list_for_each_entry_safe(p, n, list, l_node) | ||
161 | kfree(p); | ||
162 | |||
163 | INIT_LIST_HEAD(list); | ||
164 | } | ||
165 | |||
166 | static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) | ||
167 | { | ||
168 | struct ovl_dir_cache *cache = od->cache; | ||
169 | |||
170 | list_del(&od->cursor.l_node); | ||
171 | WARN_ON(cache->refcount <= 0); | ||
172 | cache->refcount--; | ||
173 | if (!cache->refcount) { | ||
174 | if (ovl_dir_cache(dentry) == cache) | ||
175 | ovl_set_dir_cache(dentry, NULL); | ||
176 | |||
177 | ovl_cache_free(&cache->entries); | ||
178 | kfree(cache); | ||
179 | } | ||
180 | } | ||
181 | |||
182 | static int ovl_fill_merge(void *buf, const char *name, int namelen, | ||
183 | loff_t offset, u64 ino, unsigned int d_type) | ||
184 | { | ||
185 | struct ovl_readdir_data *rdd = buf; | ||
186 | |||
187 | rdd->count++; | ||
188 | if (!rdd->is_merge) | ||
189 | return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); | ||
190 | else | ||
191 | return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); | ||
192 | } | ||
193 | |||
194 | static inline int ovl_dir_read(struct path *realpath, | ||
195 | struct ovl_readdir_data *rdd) | ||
196 | { | ||
197 | struct file *realfile; | ||
198 | int err; | ||
199 | |||
200 | realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); | ||
201 | if (IS_ERR(realfile)) | ||
202 | return PTR_ERR(realfile); | ||
203 | |||
204 | rdd->ctx.pos = 0; | ||
205 | do { | ||
206 | rdd->count = 0; | ||
207 | rdd->err = 0; | ||
208 | err = iterate_dir(realfile, &rdd->ctx); | ||
209 | if (err >= 0) | ||
210 | err = rdd->err; | ||
211 | } while (!err && rdd->count); | ||
212 | fput(realfile); | ||
213 | |||
214 | return err; | ||
215 | } | ||
216 | |||
217 | static void ovl_dir_reset(struct file *file) | ||
218 | { | ||
219 | struct ovl_dir_file *od = file->private_data; | ||
220 | struct ovl_dir_cache *cache = od->cache; | ||
221 | struct dentry *dentry = file->f_path.dentry; | ||
222 | enum ovl_path_type type = ovl_path_type(dentry); | ||
223 | |||
224 | if (cache && ovl_dentry_version_get(dentry) != cache->version) { | ||
225 | ovl_cache_put(od, dentry); | ||
226 | od->cache = NULL; | ||
227 | } | ||
228 | WARN_ON(!od->is_real && type != OVL_PATH_MERGE); | ||
229 | if (od->is_real && type == OVL_PATH_MERGE) | ||
230 | od->is_real = false; | ||
231 | } | ||
232 | |||
233 | static int ovl_dir_mark_whiteouts(struct dentry *dir, | ||
234 | struct ovl_readdir_data *rdd) | ||
235 | { | ||
236 | struct ovl_cache_entry *p; | ||
237 | struct dentry *dentry; | ||
238 | const struct cred *old_cred; | ||
239 | struct cred *override_cred; | ||
240 | |||
241 | override_cred = prepare_creds(); | ||
242 | if (!override_cred) { | ||
243 | ovl_cache_free(rdd->list); | ||
244 | return -ENOMEM; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * CAP_DAC_OVERRIDE for lookup | ||
249 | */ | ||
250 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
251 | old_cred = override_creds(override_cred); | ||
252 | |||
253 | mutex_lock(&dir->d_inode->i_mutex); | ||
254 | list_for_each_entry(p, rdd->list, l_node) { | ||
255 | if (!p->name) | ||
256 | continue; | ||
257 | |||
258 | if (p->type != DT_CHR) | ||
259 | continue; | ||
260 | |||
261 | dentry = lookup_one_len(p->name, dir, p->len); | ||
262 | if (IS_ERR(dentry)) | ||
263 | continue; | ||
264 | |||
265 | p->is_whiteout = ovl_is_whiteout(dentry); | ||
266 | dput(dentry); | ||
267 | } | ||
268 | mutex_unlock(&dir->d_inode->i_mutex); | ||
269 | |||
270 | revert_creds(old_cred); | ||
271 | put_cred(override_cred); | ||
272 | |||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static inline int ovl_dir_read_merged(struct path *upperpath, | ||
277 | struct path *lowerpath, | ||
278 | struct list_head *list) | ||
279 | { | ||
280 | int err; | ||
281 | struct rb_root root = RB_ROOT; | ||
282 | struct list_head middle; | ||
283 | struct ovl_readdir_data rdd = { | ||
284 | .ctx.actor = ovl_fill_merge, | ||
285 | .list = list, | ||
286 | .root = &root, | ||
287 | .is_merge = false, | ||
288 | }; | ||
289 | |||
290 | if (upperpath->dentry) { | ||
291 | err = ovl_dir_read(upperpath, &rdd); | ||
292 | if (err) | ||
293 | goto out; | ||
294 | |||
295 | if (lowerpath->dentry) { | ||
296 | err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); | ||
297 | if (err) | ||
298 | goto out; | ||
299 | } | ||
300 | } | ||
301 | if (lowerpath->dentry) { | ||
302 | /* | ||
303 | * Insert lowerpath entries before upperpath ones, this allows | ||
304 | * offsets to be reasonably constant | ||
305 | */ | ||
306 | list_add(&middle, rdd.list); | ||
307 | rdd.middle = &middle; | ||
308 | rdd.is_merge = true; | ||
309 | err = ovl_dir_read(lowerpath, &rdd); | ||
310 | list_del(&middle); | ||
311 | } | ||
312 | out: | ||
313 | return err; | ||
314 | |||
315 | } | ||
316 | |||
317 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) | ||
318 | { | ||
319 | struct ovl_cache_entry *p; | ||
320 | loff_t off = 0; | ||
321 | |||
322 | list_for_each_entry(p, &od->cache->entries, l_node) { | ||
323 | if (!p->name) | ||
324 | continue; | ||
325 | if (off >= pos) | ||
326 | break; | ||
327 | off++; | ||
328 | } | ||
329 | list_move_tail(&od->cursor.l_node, &p->l_node); | ||
330 | } | ||
331 | |||
332 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) | ||
333 | { | ||
334 | int res; | ||
335 | struct path lowerpath; | ||
336 | struct path upperpath; | ||
337 | struct ovl_dir_cache *cache; | ||
338 | |||
339 | cache = ovl_dir_cache(dentry); | ||
340 | if (cache && ovl_dentry_version_get(dentry) == cache->version) { | ||
341 | cache->refcount++; | ||
342 | return cache; | ||
343 | } | ||
344 | ovl_set_dir_cache(dentry, NULL); | ||
345 | |||
346 | cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); | ||
347 | if (!cache) | ||
348 | return ERR_PTR(-ENOMEM); | ||
349 | |||
350 | cache->refcount = 1; | ||
351 | INIT_LIST_HEAD(&cache->entries); | ||
352 | |||
353 | ovl_path_lower(dentry, &lowerpath); | ||
354 | ovl_path_upper(dentry, &upperpath); | ||
355 | |||
356 | res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); | ||
357 | if (res) { | ||
358 | ovl_cache_free(&cache->entries); | ||
359 | kfree(cache); | ||
360 | return ERR_PTR(res); | ||
361 | } | ||
362 | |||
363 | cache->version = ovl_dentry_version_get(dentry); | ||
364 | ovl_set_dir_cache(dentry, cache); | ||
365 | |||
366 | return cache; | ||
367 | } | ||
368 | |||
369 | static int ovl_iterate(struct file *file, struct dir_context *ctx) | ||
370 | { | ||
371 | struct ovl_dir_file *od = file->private_data; | ||
372 | struct dentry *dentry = file->f_path.dentry; | ||
373 | |||
374 | if (!ctx->pos) | ||
375 | ovl_dir_reset(file); | ||
376 | |||
377 | if (od->is_real) | ||
378 | return iterate_dir(od->realfile, ctx); | ||
379 | |||
380 | if (!od->cache) { | ||
381 | struct ovl_dir_cache *cache; | ||
382 | |||
383 | cache = ovl_cache_get(dentry); | ||
384 | if (IS_ERR(cache)) | ||
385 | return PTR_ERR(cache); | ||
386 | |||
387 | od->cache = cache; | ||
388 | ovl_seek_cursor(od, ctx->pos); | ||
389 | } | ||
390 | |||
391 | while (od->cursor.l_node.next != &od->cache->entries) { | ||
392 | struct ovl_cache_entry *p; | ||
393 | |||
394 | p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); | ||
395 | /* Skip cursors */ | ||
396 | if (p->name) { | ||
397 | if (!p->is_whiteout) { | ||
398 | if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) | ||
399 | break; | ||
400 | } | ||
401 | ctx->pos++; | ||
402 | } | ||
403 | list_move(&od->cursor.l_node, &p->l_node); | ||
404 | } | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) | ||
409 | { | ||
410 | loff_t res; | ||
411 | struct ovl_dir_file *od = file->private_data; | ||
412 | |||
413 | mutex_lock(&file_inode(file)->i_mutex); | ||
414 | if (!file->f_pos) | ||
415 | ovl_dir_reset(file); | ||
416 | |||
417 | if (od->is_real) { | ||
418 | res = vfs_llseek(od->realfile, offset, origin); | ||
419 | file->f_pos = od->realfile->f_pos; | ||
420 | } else { | ||
421 | res = -EINVAL; | ||
422 | |||
423 | switch (origin) { | ||
424 | case SEEK_CUR: | ||
425 | offset += file->f_pos; | ||
426 | break; | ||
427 | case SEEK_SET: | ||
428 | break; | ||
429 | default: | ||
430 | goto out_unlock; | ||
431 | } | ||
432 | if (offset < 0) | ||
433 | goto out_unlock; | ||
434 | |||
435 | if (offset != file->f_pos) { | ||
436 | file->f_pos = offset; | ||
437 | if (od->cache) | ||
438 | ovl_seek_cursor(od, offset); | ||
439 | } | ||
440 | res = offset; | ||
441 | } | ||
442 | out_unlock: | ||
443 | mutex_unlock(&file_inode(file)->i_mutex); | ||
444 | |||
445 | return res; | ||
446 | } | ||
447 | |||
448 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
449 | int datasync) | ||
450 | { | ||
451 | struct ovl_dir_file *od = file->private_data; | ||
452 | struct dentry *dentry = file->f_path.dentry; | ||
453 | struct file *realfile = od->realfile; | ||
454 | |||
455 | /* | ||
456 | * Need to check if we started out being a lower dir, but got copied up | ||
457 | */ | ||
458 | if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { | ||
459 | struct inode *inode = file_inode(file); | ||
460 | |||
461 | mutex_lock(&inode->i_mutex); | ||
462 | realfile = od->upperfile; | ||
463 | if (!realfile) { | ||
464 | struct path upperpath; | ||
465 | |||
466 | ovl_path_upper(dentry, &upperpath); | ||
467 | realfile = ovl_path_open(&upperpath, O_RDONLY); | ||
468 | if (IS_ERR(realfile)) { | ||
469 | mutex_unlock(&inode->i_mutex); | ||
470 | return PTR_ERR(realfile); | ||
471 | } | ||
472 | od->upperfile = realfile; | ||
473 | } | ||
474 | mutex_unlock(&inode->i_mutex); | ||
475 | } | ||
476 | |||
477 | return vfs_fsync_range(realfile, start, end, datasync); | ||
478 | } | ||
479 | |||
480 | static int ovl_dir_release(struct inode *inode, struct file *file) | ||
481 | { | ||
482 | struct ovl_dir_file *od = file->private_data; | ||
483 | |||
484 | if (od->cache) { | ||
485 | mutex_lock(&inode->i_mutex); | ||
486 | ovl_cache_put(od, file->f_path.dentry); | ||
487 | mutex_unlock(&inode->i_mutex); | ||
488 | } | ||
489 | fput(od->realfile); | ||
490 | if (od->upperfile) | ||
491 | fput(od->upperfile); | ||
492 | kfree(od); | ||
493 | |||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | static int ovl_dir_open(struct inode *inode, struct file *file) | ||
498 | { | ||
499 | struct path realpath; | ||
500 | struct file *realfile; | ||
501 | struct ovl_dir_file *od; | ||
502 | enum ovl_path_type type; | ||
503 | |||
504 | od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); | ||
505 | if (!od) | ||
506 | return -ENOMEM; | ||
507 | |||
508 | type = ovl_path_real(file->f_path.dentry, &realpath); | ||
509 | realfile = ovl_path_open(&realpath, file->f_flags); | ||
510 | if (IS_ERR(realfile)) { | ||
511 | kfree(od); | ||
512 | return PTR_ERR(realfile); | ||
513 | } | ||
514 | INIT_LIST_HEAD(&od->cursor.l_node); | ||
515 | od->realfile = realfile; | ||
516 | od->is_real = (type != OVL_PATH_MERGE); | ||
517 | od->is_upper = (type != OVL_PATH_LOWER); | ||
518 | file->private_data = od; | ||
519 | |||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | const struct file_operations ovl_dir_operations = { | ||
524 | .read = generic_read_dir, | ||
525 | .open = ovl_dir_open, | ||
526 | .iterate = ovl_iterate, | ||
527 | .llseek = ovl_dir_llseek, | ||
528 | .fsync = ovl_dir_fsync, | ||
529 | .release = ovl_dir_release, | ||
530 | }; | ||
531 | |||
532 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) | ||
533 | { | ||
534 | int err; | ||
535 | struct path lowerpath; | ||
536 | struct path upperpath; | ||
537 | struct ovl_cache_entry *p; | ||
538 | |||
539 | ovl_path_upper(dentry, &upperpath); | ||
540 | ovl_path_lower(dentry, &lowerpath); | ||
541 | |||
542 | err = ovl_dir_read_merged(&upperpath, &lowerpath, list); | ||
543 | if (err) | ||
544 | return err; | ||
545 | |||
546 | err = 0; | ||
547 | |||
548 | list_for_each_entry(p, list, l_node) { | ||
549 | if (p->is_whiteout) | ||
550 | continue; | ||
551 | |||
552 | if (p->name[0] == '.') { | ||
553 | if (p->len == 1) | ||
554 | continue; | ||
555 | if (p->len == 2 && p->name[1] == '.') | ||
556 | continue; | ||
557 | } | ||
558 | err = -ENOTEMPTY; | ||
559 | break; | ||
560 | } | ||
561 | |||
562 | return err; | ||
563 | } | ||
564 | |||
565 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) | ||
566 | { | ||
567 | struct ovl_cache_entry *p; | ||
568 | |||
569 | mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); | ||
570 | list_for_each_entry(p, list, l_node) { | ||
571 | struct dentry *dentry; | ||
572 | |||
573 | if (!p->is_whiteout) | ||
574 | continue; | ||
575 | |||
576 | dentry = lookup_one_len(p->name, upper, p->len); | ||
577 | if (IS_ERR(dentry)) { | ||
578 | pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", | ||
579 | upper->d_name.name, p->len, p->name, | ||
580 | (int) PTR_ERR(dentry)); | ||
581 | continue; | ||
582 | } | ||
583 | ovl_cleanup(upper->d_inode, dentry); | ||
584 | dput(dentry); | ||
585 | } | ||
586 | mutex_unlock(&upper->d_inode->i_mutex); | ||
587 | } | ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c new file mode 100644 index 000000000000..08b704cebfc4 --- /dev/null +++ b/fs/overlayfs/super.c | |||
@@ -0,0 +1,796 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/namei.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include <linux/security.h> | ||
14 | #include <linux/mount.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/parser.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/statfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include "overlayfs.h" | ||
22 | |||
23 | MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); | ||
24 | MODULE_DESCRIPTION("Overlay filesystem"); | ||
25 | MODULE_LICENSE("GPL"); | ||
26 | |||
27 | #define OVERLAYFS_SUPER_MAGIC 0x794c764f | ||
28 | |||
29 | struct ovl_config { | ||
30 | char *lowerdir; | ||
31 | char *upperdir; | ||
32 | char *workdir; | ||
33 | }; | ||
34 | |||
35 | /* private information held for overlayfs's superblock */ | ||
36 | struct ovl_fs { | ||
37 | struct vfsmount *upper_mnt; | ||
38 | struct vfsmount *lower_mnt; | ||
39 | struct dentry *workdir; | ||
40 | long lower_namelen; | ||
41 | /* pathnames of lower and upper dirs, for show_options */ | ||
42 | struct ovl_config config; | ||
43 | }; | ||
44 | |||
45 | struct ovl_dir_cache; | ||
46 | |||
47 | /* private information held for every overlayfs dentry */ | ||
48 | struct ovl_entry { | ||
49 | struct dentry *__upperdentry; | ||
50 | struct dentry *lowerdentry; | ||
51 | struct ovl_dir_cache *cache; | ||
52 | union { | ||
53 | struct { | ||
54 | u64 version; | ||
55 | bool opaque; | ||
56 | }; | ||
57 | struct rcu_head rcu; | ||
58 | }; | ||
59 | }; | ||
60 | |||
61 | const char *ovl_opaque_xattr = "trusted.overlay.opaque"; | ||
62 | |||
63 | |||
64 | enum ovl_path_type ovl_path_type(struct dentry *dentry) | ||
65 | { | ||
66 | struct ovl_entry *oe = dentry->d_fsdata; | ||
67 | |||
68 | if (oe->__upperdentry) { | ||
69 | if (oe->lowerdentry) { | ||
70 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
71 | return OVL_PATH_MERGE; | ||
72 | else | ||
73 | return OVL_PATH_UPPER; | ||
74 | } else { | ||
75 | if (oe->opaque) | ||
76 | return OVL_PATH_UPPER; | ||
77 | else | ||
78 | return OVL_PATH_PURE_UPPER; | ||
79 | } | ||
80 | } else { | ||
81 | return OVL_PATH_LOWER; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) | ||
86 | { | ||
87 | struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); | ||
88 | /* | ||
89 | * Make sure to order reads to upperdentry wrt ovl_dentry_update() | ||
90 | */ | ||
91 | smp_read_barrier_depends(); | ||
92 | return upperdentry; | ||
93 | } | ||
94 | |||
95 | void ovl_path_upper(struct dentry *dentry, struct path *path) | ||
96 | { | ||
97 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
98 | struct ovl_entry *oe = dentry->d_fsdata; | ||
99 | |||
100 | path->mnt = ofs->upper_mnt; | ||
101 | path->dentry = ovl_upperdentry_dereference(oe); | ||
102 | } | ||
103 | |||
104 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) | ||
105 | { | ||
106 | |||
107 | enum ovl_path_type type = ovl_path_type(dentry); | ||
108 | |||
109 | if (type == OVL_PATH_LOWER) | ||
110 | ovl_path_lower(dentry, path); | ||
111 | else | ||
112 | ovl_path_upper(dentry, path); | ||
113 | |||
114 | return type; | ||
115 | } | ||
116 | |||
117 | struct dentry *ovl_dentry_upper(struct dentry *dentry) | ||
118 | { | ||
119 | struct ovl_entry *oe = dentry->d_fsdata; | ||
120 | |||
121 | return ovl_upperdentry_dereference(oe); | ||
122 | } | ||
123 | |||
124 | struct dentry *ovl_dentry_lower(struct dentry *dentry) | ||
125 | { | ||
126 | struct ovl_entry *oe = dentry->d_fsdata; | ||
127 | |||
128 | return oe->lowerdentry; | ||
129 | } | ||
130 | |||
131 | struct dentry *ovl_dentry_real(struct dentry *dentry) | ||
132 | { | ||
133 | struct ovl_entry *oe = dentry->d_fsdata; | ||
134 | struct dentry *realdentry; | ||
135 | |||
136 | realdentry = ovl_upperdentry_dereference(oe); | ||
137 | if (!realdentry) | ||
138 | realdentry = oe->lowerdentry; | ||
139 | |||
140 | return realdentry; | ||
141 | } | ||
142 | |||
143 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) | ||
144 | { | ||
145 | struct dentry *realdentry; | ||
146 | |||
147 | realdentry = ovl_upperdentry_dereference(oe); | ||
148 | if (realdentry) { | ||
149 | *is_upper = true; | ||
150 | } else { | ||
151 | realdentry = oe->lowerdentry; | ||
152 | *is_upper = false; | ||
153 | } | ||
154 | return realdentry; | ||
155 | } | ||
156 | |||
157 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) | ||
158 | { | ||
159 | struct ovl_entry *oe = dentry->d_fsdata; | ||
160 | |||
161 | return oe->cache; | ||
162 | } | ||
163 | |||
164 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) | ||
165 | { | ||
166 | struct ovl_entry *oe = dentry->d_fsdata; | ||
167 | |||
168 | oe->cache = cache; | ||
169 | } | ||
170 | |||
171 | void ovl_path_lower(struct dentry *dentry, struct path *path) | ||
172 | { | ||
173 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
174 | struct ovl_entry *oe = dentry->d_fsdata; | ||
175 | |||
176 | path->mnt = ofs->lower_mnt; | ||
177 | path->dentry = oe->lowerdentry; | ||
178 | } | ||
179 | |||
180 | int ovl_want_write(struct dentry *dentry) | ||
181 | { | ||
182 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
183 | return mnt_want_write(ofs->upper_mnt); | ||
184 | } | ||
185 | |||
186 | void ovl_drop_write(struct dentry *dentry) | ||
187 | { | ||
188 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
189 | mnt_drop_write(ofs->upper_mnt); | ||
190 | } | ||
191 | |||
192 | struct dentry *ovl_workdir(struct dentry *dentry) | ||
193 | { | ||
194 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
195 | return ofs->workdir; | ||
196 | } | ||
197 | |||
198 | bool ovl_dentry_is_opaque(struct dentry *dentry) | ||
199 | { | ||
200 | struct ovl_entry *oe = dentry->d_fsdata; | ||
201 | return oe->opaque; | ||
202 | } | ||
203 | |||
204 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) | ||
205 | { | ||
206 | struct ovl_entry *oe = dentry->d_fsdata; | ||
207 | oe->opaque = opaque; | ||
208 | } | ||
209 | |||
210 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) | ||
211 | { | ||
212 | struct ovl_entry *oe = dentry->d_fsdata; | ||
213 | |||
214 | WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); | ||
215 | WARN_ON(oe->__upperdentry); | ||
216 | BUG_ON(!upperdentry->d_inode); | ||
217 | /* | ||
218 | * Make sure upperdentry is consistent before making it visible to | ||
219 | * ovl_upperdentry_dereference(). | ||
220 | */ | ||
221 | smp_wmb(); | ||
222 | oe->__upperdentry = upperdentry; | ||
223 | } | ||
224 | |||
225 | void ovl_dentry_version_inc(struct dentry *dentry) | ||
226 | { | ||
227 | struct ovl_entry *oe = dentry->d_fsdata; | ||
228 | |||
229 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
230 | oe->version++; | ||
231 | } | ||
232 | |||
233 | u64 ovl_dentry_version_get(struct dentry *dentry) | ||
234 | { | ||
235 | struct ovl_entry *oe = dentry->d_fsdata; | ||
236 | |||
237 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
238 | return oe->version; | ||
239 | } | ||
240 | |||
241 | bool ovl_is_whiteout(struct dentry *dentry) | ||
242 | { | ||
243 | struct inode *inode = dentry->d_inode; | ||
244 | |||
245 | return inode && IS_WHITEOUT(inode); | ||
246 | } | ||
247 | |||
248 | static bool ovl_is_opaquedir(struct dentry *dentry) | ||
249 | { | ||
250 | int res; | ||
251 | char val; | ||
252 | struct inode *inode = dentry->d_inode; | ||
253 | |||
254 | if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) | ||
255 | return false; | ||
256 | |||
257 | res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); | ||
258 | if (res == 1 && val == 'y') | ||
259 | return true; | ||
260 | |||
261 | return false; | ||
262 | } | ||
263 | |||
264 | static void ovl_dentry_release(struct dentry *dentry) | ||
265 | { | ||
266 | struct ovl_entry *oe = dentry->d_fsdata; | ||
267 | |||
268 | if (oe) { | ||
269 | dput(oe->__upperdentry); | ||
270 | dput(oe->lowerdentry); | ||
271 | kfree_rcu(oe, rcu); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static const struct dentry_operations ovl_dentry_operations = { | ||
276 | .d_release = ovl_dentry_release, | ||
277 | }; | ||
278 | |||
279 | static struct ovl_entry *ovl_alloc_entry(void) | ||
280 | { | ||
281 | return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); | ||
282 | } | ||
283 | |||
284 | static inline struct dentry *ovl_lookup_real(struct dentry *dir, | ||
285 | struct qstr *name) | ||
286 | { | ||
287 | struct dentry *dentry; | ||
288 | |||
289 | mutex_lock(&dir->d_inode->i_mutex); | ||
290 | dentry = lookup_one_len(name->name, dir, name->len); | ||
291 | mutex_unlock(&dir->d_inode->i_mutex); | ||
292 | |||
293 | if (IS_ERR(dentry)) { | ||
294 | if (PTR_ERR(dentry) == -ENOENT) | ||
295 | dentry = NULL; | ||
296 | } else if (!dentry->d_inode) { | ||
297 | dput(dentry); | ||
298 | dentry = NULL; | ||
299 | } | ||
300 | return dentry; | ||
301 | } | ||
302 | |||
303 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
304 | unsigned int flags) | ||
305 | { | ||
306 | struct ovl_entry *oe; | ||
307 | struct dentry *upperdir; | ||
308 | struct dentry *lowerdir; | ||
309 | struct dentry *upperdentry = NULL; | ||
310 | struct dentry *lowerdentry = NULL; | ||
311 | struct inode *inode = NULL; | ||
312 | int err; | ||
313 | |||
314 | err = -ENOMEM; | ||
315 | oe = ovl_alloc_entry(); | ||
316 | if (!oe) | ||
317 | goto out; | ||
318 | |||
319 | upperdir = ovl_dentry_upper(dentry->d_parent); | ||
320 | lowerdir = ovl_dentry_lower(dentry->d_parent); | ||
321 | |||
322 | if (upperdir) { | ||
323 | upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); | ||
324 | err = PTR_ERR(upperdentry); | ||
325 | if (IS_ERR(upperdentry)) | ||
326 | goto out_put_dir; | ||
327 | |||
328 | if (lowerdir && upperdentry) { | ||
329 | if (ovl_is_whiteout(upperdentry)) { | ||
330 | dput(upperdentry); | ||
331 | upperdentry = NULL; | ||
332 | oe->opaque = true; | ||
333 | } else if (ovl_is_opaquedir(upperdentry)) { | ||
334 | oe->opaque = true; | ||
335 | } | ||
336 | } | ||
337 | } | ||
338 | if (lowerdir && !oe->opaque) { | ||
339 | lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); | ||
340 | err = PTR_ERR(lowerdentry); | ||
341 | if (IS_ERR(lowerdentry)) | ||
342 | goto out_dput_upper; | ||
343 | } | ||
344 | |||
345 | if (lowerdentry && upperdentry && | ||
346 | (!S_ISDIR(upperdentry->d_inode->i_mode) || | ||
347 | !S_ISDIR(lowerdentry->d_inode->i_mode))) { | ||
348 | dput(lowerdentry); | ||
349 | lowerdentry = NULL; | ||
350 | oe->opaque = true; | ||
351 | } | ||
352 | |||
353 | if (lowerdentry || upperdentry) { | ||
354 | struct dentry *realdentry; | ||
355 | |||
356 | realdentry = upperdentry ? upperdentry : lowerdentry; | ||
357 | err = -ENOMEM; | ||
358 | inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, | ||
359 | oe); | ||
360 | if (!inode) | ||
361 | goto out_dput; | ||
362 | ovl_copyattr(realdentry->d_inode, inode); | ||
363 | } | ||
364 | |||
365 | oe->__upperdentry = upperdentry; | ||
366 | oe->lowerdentry = lowerdentry; | ||
367 | |||
368 | dentry->d_fsdata = oe; | ||
369 | d_add(dentry, inode); | ||
370 | |||
371 | return NULL; | ||
372 | |||
373 | out_dput: | ||
374 | dput(lowerdentry); | ||
375 | out_dput_upper: | ||
376 | dput(upperdentry); | ||
377 | out_put_dir: | ||
378 | kfree(oe); | ||
379 | out: | ||
380 | return ERR_PTR(err); | ||
381 | } | ||
382 | |||
383 | struct file *ovl_path_open(struct path *path, int flags) | ||
384 | { | ||
385 | return dentry_open(path, flags, current_cred()); | ||
386 | } | ||
387 | |||
388 | static void ovl_put_super(struct super_block *sb) | ||
389 | { | ||
390 | struct ovl_fs *ufs = sb->s_fs_info; | ||
391 | |||
392 | dput(ufs->workdir); | ||
393 | mntput(ufs->upper_mnt); | ||
394 | mntput(ufs->lower_mnt); | ||
395 | |||
396 | kfree(ufs->config.lowerdir); | ||
397 | kfree(ufs->config.upperdir); | ||
398 | kfree(ufs->config.workdir); | ||
399 | kfree(ufs); | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * ovl_statfs | ||
404 | * @sb: The overlayfs super block | ||
405 | * @buf: The struct kstatfs to fill in with stats | ||
406 | * | ||
407 | * Get the filesystem statistics. As writes always target the upper layer | ||
408 | * filesystem pass the statfs to the same filesystem. | ||
409 | */ | ||
410 | static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
411 | { | ||
412 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
413 | struct dentry *root_dentry = dentry->d_sb->s_root; | ||
414 | struct path path; | ||
415 | int err; | ||
416 | |||
417 | ovl_path_upper(root_dentry, &path); | ||
418 | |||
419 | err = vfs_statfs(&path, buf); | ||
420 | if (!err) { | ||
421 | buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); | ||
422 | buf->f_type = OVERLAYFS_SUPER_MAGIC; | ||
423 | } | ||
424 | |||
425 | return err; | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * ovl_show_options | ||
430 | * | ||
431 | * Prints the mount options for a given superblock. | ||
432 | * Returns zero; does not fail. | ||
433 | */ | ||
434 | static int ovl_show_options(struct seq_file *m, struct dentry *dentry) | ||
435 | { | ||
436 | struct super_block *sb = dentry->d_sb; | ||
437 | struct ovl_fs *ufs = sb->s_fs_info; | ||
438 | |||
439 | seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); | ||
440 | seq_printf(m, ",upperdir=%s", ufs->config.upperdir); | ||
441 | seq_printf(m, ",workdir=%s", ufs->config.workdir); | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | static const struct super_operations ovl_super_operations = { | ||
446 | .put_super = ovl_put_super, | ||
447 | .statfs = ovl_statfs, | ||
448 | .show_options = ovl_show_options, | ||
449 | }; | ||
450 | |||
451 | enum { | ||
452 | OPT_LOWERDIR, | ||
453 | OPT_UPPERDIR, | ||
454 | OPT_WORKDIR, | ||
455 | OPT_ERR, | ||
456 | }; | ||
457 | |||
458 | static const match_table_t ovl_tokens = { | ||
459 | {OPT_LOWERDIR, "lowerdir=%s"}, | ||
460 | {OPT_UPPERDIR, "upperdir=%s"}, | ||
461 | {OPT_WORKDIR, "workdir=%s"}, | ||
462 | {OPT_ERR, NULL} | ||
463 | }; | ||
464 | |||
465 | static int ovl_parse_opt(char *opt, struct ovl_config *config) | ||
466 | { | ||
467 | char *p; | ||
468 | |||
469 | while ((p = strsep(&opt, ",")) != NULL) { | ||
470 | int token; | ||
471 | substring_t args[MAX_OPT_ARGS]; | ||
472 | |||
473 | if (!*p) | ||
474 | continue; | ||
475 | |||
476 | token = match_token(p, ovl_tokens, args); | ||
477 | switch (token) { | ||
478 | case OPT_UPPERDIR: | ||
479 | kfree(config->upperdir); | ||
480 | config->upperdir = match_strdup(&args[0]); | ||
481 | if (!config->upperdir) | ||
482 | return -ENOMEM; | ||
483 | break; | ||
484 | |||
485 | case OPT_LOWERDIR: | ||
486 | kfree(config->lowerdir); | ||
487 | config->lowerdir = match_strdup(&args[0]); | ||
488 | if (!config->lowerdir) | ||
489 | return -ENOMEM; | ||
490 | break; | ||
491 | |||
492 | case OPT_WORKDIR: | ||
493 | kfree(config->workdir); | ||
494 | config->workdir = match_strdup(&args[0]); | ||
495 | if (!config->workdir) | ||
496 | return -ENOMEM; | ||
497 | break; | ||
498 | |||
499 | default: | ||
500 | return -EINVAL; | ||
501 | } | ||
502 | } | ||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | #define OVL_WORKDIR_NAME "work" | ||
507 | |||
508 | static struct dentry *ovl_workdir_create(struct vfsmount *mnt, | ||
509 | struct dentry *dentry) | ||
510 | { | ||
511 | struct inode *dir = dentry->d_inode; | ||
512 | struct dentry *work; | ||
513 | int err; | ||
514 | bool retried = false; | ||
515 | |||
516 | err = mnt_want_write(mnt); | ||
517 | if (err) | ||
518 | return ERR_PTR(err); | ||
519 | |||
520 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
521 | retry: | ||
522 | work = lookup_one_len(OVL_WORKDIR_NAME, dentry, | ||
523 | strlen(OVL_WORKDIR_NAME)); | ||
524 | |||
525 | if (!IS_ERR(work)) { | ||
526 | struct kstat stat = { | ||
527 | .mode = S_IFDIR | 0, | ||
528 | }; | ||
529 | |||
530 | if (work->d_inode) { | ||
531 | err = -EEXIST; | ||
532 | if (retried) | ||
533 | goto out_dput; | ||
534 | |||
535 | retried = true; | ||
536 | ovl_cleanup(dir, work); | ||
537 | dput(work); | ||
538 | goto retry; | ||
539 | } | ||
540 | |||
541 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); | ||
542 | if (err) | ||
543 | goto out_dput; | ||
544 | } | ||
545 | out_unlock: | ||
546 | mutex_unlock(&dir->i_mutex); | ||
547 | mnt_drop_write(mnt); | ||
548 | |||
549 | return work; | ||
550 | |||
551 | out_dput: | ||
552 | dput(work); | ||
553 | work = ERR_PTR(err); | ||
554 | goto out_unlock; | ||
555 | } | ||
556 | |||
557 | static int ovl_mount_dir(const char *name, struct path *path) | ||
558 | { | ||
559 | int err; | ||
560 | |||
561 | err = kern_path(name, LOOKUP_FOLLOW, path); | ||
562 | if (err) { | ||
563 | pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); | ||
564 | err = -EINVAL; | ||
565 | } | ||
566 | return err; | ||
567 | } | ||
568 | |||
569 | static bool ovl_is_allowed_fs_type(struct dentry *root) | ||
570 | { | ||
571 | const struct dentry_operations *dop = root->d_op; | ||
572 | |||
573 | /* | ||
574 | * We don't support: | ||
575 | * - automount filesystems | ||
576 | * - filesystems with revalidate (FIXME for lower layer) | ||
577 | * - filesystems with case insensitive names | ||
578 | */ | ||
579 | if (dop && | ||
580 | (dop->d_manage || dop->d_automount || | ||
581 | dop->d_revalidate || dop->d_weak_revalidate || | ||
582 | dop->d_compare || dop->d_hash)) { | ||
583 | return false; | ||
584 | } | ||
585 | return true; | ||
586 | } | ||
587 | |||
588 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
589 | static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) | ||
590 | { | ||
591 | bool ok = false; | ||
592 | |||
593 | if (workdir != upperdir) { | ||
594 | ok = (lock_rename(workdir, upperdir) == NULL); | ||
595 | unlock_rename(workdir, upperdir); | ||
596 | } | ||
597 | return ok; | ||
598 | } | ||
599 | |||
600 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) | ||
601 | { | ||
602 | struct path lowerpath; | ||
603 | struct path upperpath; | ||
604 | struct path workpath; | ||
605 | struct inode *root_inode; | ||
606 | struct dentry *root_dentry; | ||
607 | struct ovl_entry *oe; | ||
608 | struct ovl_fs *ufs; | ||
609 | struct kstatfs statfs; | ||
610 | int err; | ||
611 | |||
612 | err = -ENOMEM; | ||
613 | ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); | ||
614 | if (!ufs) | ||
615 | goto out; | ||
616 | |||
617 | err = ovl_parse_opt((char *) data, &ufs->config); | ||
618 | if (err) | ||
619 | goto out_free_config; | ||
620 | |||
621 | /* FIXME: workdir is not needed for a R/O mount */ | ||
622 | err = -EINVAL; | ||
623 | if (!ufs->config.upperdir || !ufs->config.lowerdir || | ||
624 | !ufs->config.workdir) { | ||
625 | pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); | ||
626 | goto out_free_config; | ||
627 | } | ||
628 | |||
629 | err = -ENOMEM; | ||
630 | oe = ovl_alloc_entry(); | ||
631 | if (oe == NULL) | ||
632 | goto out_free_config; | ||
633 | |||
634 | err = ovl_mount_dir(ufs->config.upperdir, &upperpath); | ||
635 | if (err) | ||
636 | goto out_free_oe; | ||
637 | |||
638 | err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); | ||
639 | if (err) | ||
640 | goto out_put_upperpath; | ||
641 | |||
642 | err = ovl_mount_dir(ufs->config.workdir, &workpath); | ||
643 | if (err) | ||
644 | goto out_put_lowerpath; | ||
645 | |||
646 | err = -EINVAL; | ||
647 | if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || | ||
648 | !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || | ||
649 | !S_ISDIR(workpath.dentry->d_inode->i_mode)) { | ||
650 | pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); | ||
651 | goto out_put_workpath; | ||
652 | } | ||
653 | |||
654 | if (upperpath.mnt != workpath.mnt) { | ||
655 | pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); | ||
656 | goto out_put_workpath; | ||
657 | } | ||
658 | if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { | ||
659 | pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); | ||
660 | goto out_put_workpath; | ||
661 | } | ||
662 | |||
663 | if (!ovl_is_allowed_fs_type(upperpath.dentry)) { | ||
664 | pr_err("overlayfs: filesystem of upperdir is not supported\n"); | ||
665 | goto out_put_workpath; | ||
666 | } | ||
667 | |||
668 | if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { | ||
669 | pr_err("overlayfs: filesystem of lowerdir is not supported\n"); | ||
670 | goto out_put_workpath; | ||
671 | } | ||
672 | |||
673 | err = vfs_statfs(&lowerpath, &statfs); | ||
674 | if (err) { | ||
675 | pr_err("overlayfs: statfs failed on lowerpath\n"); | ||
676 | goto out_put_workpath; | ||
677 | } | ||
678 | ufs->lower_namelen = statfs.f_namelen; | ||
679 | |||
680 | sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, | ||
681 | lowerpath.mnt->mnt_sb->s_stack_depth) + 1; | ||
682 | |||
683 | err = -EINVAL; | ||
684 | if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
685 | pr_err("overlayfs: maximum fs stacking depth exceeded\n"); | ||
686 | goto out_put_workpath; | ||
687 | } | ||
688 | |||
689 | ufs->upper_mnt = clone_private_mount(&upperpath); | ||
690 | err = PTR_ERR(ufs->upper_mnt); | ||
691 | if (IS_ERR(ufs->upper_mnt)) { | ||
692 | pr_err("overlayfs: failed to clone upperpath\n"); | ||
693 | goto out_put_workpath; | ||
694 | } | ||
695 | |||
696 | ufs->lower_mnt = clone_private_mount(&lowerpath); | ||
697 | err = PTR_ERR(ufs->lower_mnt); | ||
698 | if (IS_ERR(ufs->lower_mnt)) { | ||
699 | pr_err("overlayfs: failed to clone lowerpath\n"); | ||
700 | goto out_put_upper_mnt; | ||
701 | } | ||
702 | |||
703 | ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); | ||
704 | err = PTR_ERR(ufs->workdir); | ||
705 | if (IS_ERR(ufs->workdir)) { | ||
706 | pr_err("overlayfs: failed to create directory %s/%s\n", | ||
707 | ufs->config.workdir, OVL_WORKDIR_NAME); | ||
708 | goto out_put_lower_mnt; | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * Make lower_mnt R/O. That way fchmod/fchown on lower file | ||
713 | * will fail instead of modifying lower fs. | ||
714 | */ | ||
715 | ufs->lower_mnt->mnt_flags |= MNT_READONLY; | ||
716 | |||
717 | /* If the upper fs is r/o, we mark overlayfs r/o too */ | ||
718 | if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) | ||
719 | sb->s_flags |= MS_RDONLY; | ||
720 | |||
721 | sb->s_d_op = &ovl_dentry_operations; | ||
722 | |||
723 | err = -ENOMEM; | ||
724 | root_inode = ovl_new_inode(sb, S_IFDIR, oe); | ||
725 | if (!root_inode) | ||
726 | goto out_put_workdir; | ||
727 | |||
728 | root_dentry = d_make_root(root_inode); | ||
729 | if (!root_dentry) | ||
730 | goto out_put_workdir; | ||
731 | |||
732 | mntput(upperpath.mnt); | ||
733 | mntput(lowerpath.mnt); | ||
734 | path_put(&workpath); | ||
735 | |||
736 | oe->__upperdentry = upperpath.dentry; | ||
737 | oe->lowerdentry = lowerpath.dentry; | ||
738 | |||
739 | root_dentry->d_fsdata = oe; | ||
740 | |||
741 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; | ||
742 | sb->s_op = &ovl_super_operations; | ||
743 | sb->s_root = root_dentry; | ||
744 | sb->s_fs_info = ufs; | ||
745 | |||
746 | return 0; | ||
747 | |||
748 | out_put_workdir: | ||
749 | dput(ufs->workdir); | ||
750 | out_put_lower_mnt: | ||
751 | mntput(ufs->lower_mnt); | ||
752 | out_put_upper_mnt: | ||
753 | mntput(ufs->upper_mnt); | ||
754 | out_put_workpath: | ||
755 | path_put(&workpath); | ||
756 | out_put_lowerpath: | ||
757 | path_put(&lowerpath); | ||
758 | out_put_upperpath: | ||
759 | path_put(&upperpath); | ||
760 | out_free_oe: | ||
761 | kfree(oe); | ||
762 | out_free_config: | ||
763 | kfree(ufs->config.lowerdir); | ||
764 | kfree(ufs->config.upperdir); | ||
765 | kfree(ufs->config.workdir); | ||
766 | kfree(ufs); | ||
767 | out: | ||
768 | return err; | ||
769 | } | ||
770 | |||
771 | static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, | ||
772 | const char *dev_name, void *raw_data) | ||
773 | { | ||
774 | return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); | ||
775 | } | ||
776 | |||
777 | static struct file_system_type ovl_fs_type = { | ||
778 | .owner = THIS_MODULE, | ||
779 | .name = "overlayfs", | ||
780 | .mount = ovl_mount, | ||
781 | .kill_sb = kill_anon_super, | ||
782 | }; | ||
783 | MODULE_ALIAS_FS("overlayfs"); | ||
784 | |||
785 | static int __init ovl_init(void) | ||
786 | { | ||
787 | return register_filesystem(&ovl_fs_type); | ||
788 | } | ||
789 | |||
790 | static void __exit ovl_exit(void) | ||
791 | { | ||
792 | unregister_filesystem(&ovl_fs_type); | ||
793 | } | ||
794 | |||
795 | module_init(ovl_init); | ||
796 | module_exit(ovl_exit); | ||
diff --git a/fs/splice.c b/fs/splice.c index f5cb9ba84510..75c6058eabf2 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1330 | 1330 | ||
1331 | return ret; | 1331 | return ret; |
1332 | } | 1332 | } |
1333 | EXPORT_SYMBOL(do_splice_direct); | ||
1333 | 1334 | ||
1334 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1335 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
1335 | struct pipe_inode_info *opipe, | 1336 | struct pipe_inode_info *opipe, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a957d4366c24..4e41a4a331bb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -223,6 +223,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
223 | #define ATTR_TIMES_SET (1 << 16) | 223 | #define ATTR_TIMES_SET (1 << 16) |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Whiteout is represented by a char device. The following constants define the | ||
227 | * mode and device number to use. | ||
228 | */ | ||
229 | #define WHITEOUT_MODE 0 | ||
230 | #define WHITEOUT_DEV 0 | ||
231 | |||
232 | /* | ||
226 | * This is the Inode Attributes structure, used for notify_change(). It | 233 | * This is the Inode Attributes structure, used for notify_change(). It |
227 | * uses the above definitions as flags, to know which values have changed. | 234 | * uses the above definitions as flags, to know which values have changed. |
228 | * Also, in this manner, a Filesystem can look at only the values it cares | 235 | * Also, in this manner, a Filesystem can look at only the values it cares |
@@ -254,6 +261,12 @@ struct iattr { | |||
254 | */ | 261 | */ |
255 | #include <linux/quota.h> | 262 | #include <linux/quota.h> |
256 | 263 | ||
264 | /* | ||
265 | * Maximum number of layers of fs stack. Needs to be limited to | ||
266 | * prevent kernel stack overflow | ||
267 | */ | ||
268 | #define FILESYSTEM_MAX_STACK_DEPTH 2 | ||
269 | |||
257 | /** | 270 | /** |
258 | * enum positive_aop_returns - aop return codes with specific semantics | 271 | * enum positive_aop_returns - aop return codes with specific semantics |
259 | * | 272 | * |
@@ -1266,6 +1279,11 @@ struct super_block { | |||
1266 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | 1279 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; |
1267 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | 1280 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; |
1268 | struct rcu_head rcu; | 1281 | struct rcu_head rcu; |
1282 | |||
1283 | /* | ||
1284 | * Indicates how deep in a filesystem stack this SB is | ||
1285 | */ | ||
1286 | int s_stack_depth; | ||
1269 | }; | 1287 | }; |
1270 | 1288 | ||
1271 | extern struct timespec current_fs_time(struct super_block *sb); | 1289 | extern struct timespec current_fs_time(struct super_block *sb); |
@@ -1398,6 +1416,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino | |||
1398 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1416 | extern int vfs_rmdir(struct inode *, struct dentry *); |
1399 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); | 1417 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); |
1400 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); | 1418 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); |
1419 | extern int vfs_whiteout(struct inode *, struct dentry *); | ||
1401 | 1420 | ||
1402 | /* | 1421 | /* |
1403 | * VFS dentry helper functions. | 1422 | * VFS dentry helper functions. |
@@ -1528,6 +1547,9 @@ struct inode_operations { | |||
1528 | umode_t create_mode, int *opened); | 1547 | umode_t create_mode, int *opened); |
1529 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 1548 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
1530 | int (*set_acl)(struct inode *, struct posix_acl *, int); | 1549 | int (*set_acl)(struct inode *, struct posix_acl *, int); |
1550 | |||
1551 | /* WARNING: probably going away soon, do not use! */ | ||
1552 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
1531 | } ____cacheline_aligned; | 1553 | } ____cacheline_aligned; |
1532 | 1554 | ||
1533 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1555 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
@@ -1625,6 +1647,9 @@ struct super_operations { | |||
1625 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) | 1647 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) |
1626 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) | 1648 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) |
1627 | 1649 | ||
1650 | #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ | ||
1651 | (inode)->i_rdev == WHITEOUT_DEV) | ||
1652 | |||
1628 | /* | 1653 | /* |
1629 | * Inode state bits. Protected by inode->i_lock | 1654 | * Inode state bits. Protected by inode->i_lock |
1630 | * | 1655 | * |
@@ -2040,6 +2065,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); | |||
2040 | extern struct file *filp_open(const char *, int, umode_t); | 2065 | extern struct file *filp_open(const char *, int, umode_t); |
2041 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, | 2066 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, |
2042 | const char *, int); | 2067 | const char *, int); |
2068 | extern int vfs_open(const struct path *, struct file *, const struct cred *); | ||
2043 | extern struct file * dentry_open(const struct path *, int, const struct cred *); | 2069 | extern struct file * dentry_open(const struct path *, int, const struct cred *); |
2044 | extern int filp_close(struct file *, fl_owner_t id); | 2070 | extern int filp_close(struct file *, fl_owner_t id); |
2045 | 2071 | ||
@@ -2253,7 +2279,9 @@ extern sector_t bmap(struct inode *, sector_t); | |||
2253 | #endif | 2279 | #endif |
2254 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); | 2280 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); |
2255 | extern int inode_permission(struct inode *, int); | 2281 | extern int inode_permission(struct inode *, int); |
2282 | extern int __inode_permission(struct inode *, int); | ||
2256 | extern int generic_permission(struct inode *, int); | 2283 | extern int generic_permission(struct inode *, int); |
2284 | extern int __check_sticky(struct inode *dir, struct inode *inode); | ||
2257 | 2285 | ||
2258 | static inline bool execute_ok(struct inode *inode) | 2286 | static inline bool execute_ok(struct inode *inode) |
2259 | { | 2287 | { |
@@ -2452,6 +2480,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, | |||
2452 | struct file *, loff_t *, size_t, unsigned int); | 2480 | struct file *, loff_t *, size_t, unsigned int); |
2453 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2481 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
2454 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2482 | struct file *out, loff_t *, size_t len, unsigned int flags); |
2483 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
2484 | loff_t *opos, size_t len, unsigned int flags); | ||
2485 | |||
2455 | 2486 | ||
2456 | extern void | 2487 | extern void |
2457 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2488 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
@@ -2737,6 +2768,14 @@ static inline int is_sxid(umode_t mode) | |||
2737 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); | 2768 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); |
2738 | } | 2769 | } |
2739 | 2770 | ||
2771 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
2772 | { | ||
2773 | if (!(dir->i_mode & S_ISVTX)) | ||
2774 | return 0; | ||
2775 | |||
2776 | return __check_sticky(dir, inode); | ||
2777 | } | ||
2778 | |||
2740 | static inline void inode_has_no_xattr(struct inode *inode) | 2779 | static inline void inode_has_no_xattr(struct inode *inode) |
2741 | { | 2780 | { |
2742 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) | 2781 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 9262e4bf0cc3..c2c561dc0114 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
@@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt); | |||
81 | extern struct vfsmount *mnt_clone_internal(struct path *path); | 81 | extern struct vfsmount *mnt_clone_internal(struct path *path); |
82 | extern int __mnt_is_readonly(struct vfsmount *mnt); | 82 | extern int __mnt_is_readonly(struct vfsmount *mnt); |
83 | 83 | ||
84 | struct path; | ||
85 | extern struct vfsmount *clone_private_mount(struct path *path); | ||
86 | |||
84 | struct file_system_type; | 87 | struct file_system_type; |
85 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, | 88 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, |
86 | int flags, const char *name, | 89 | int flags, const char *name, |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index ca1a11bb4443..3735fa0a6784 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ | 38 | #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ |
39 | #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ | 39 | #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ |
40 | #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ | ||
40 | 41 | ||
41 | struct fstrim_range { | 42 | struct fstrim_range { |
42 | __u64 start; | 43 | __u64 start; |
diff --git a/mm/shmem.c b/mm/shmem.c index cd6fc7590e54..185836ba53ef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2345,6 +2345,32 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru | |||
2345 | return 0; | 2345 | return 0; |
2346 | } | 2346 | } |
2347 | 2347 | ||
2348 | static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry) | ||
2349 | { | ||
2350 | struct dentry *whiteout; | ||
2351 | int error; | ||
2352 | |||
2353 | whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); | ||
2354 | if (!whiteout) | ||
2355 | return -ENOMEM; | ||
2356 | |||
2357 | error = shmem_mknod(old_dir, whiteout, | ||
2358 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
2359 | dput(whiteout); | ||
2360 | if (error) | ||
2361 | return error; | ||
2362 | |||
2363 | /* | ||
2364 | * Cheat and hash the whiteout while the old dentry is still in | ||
2365 | * place, instead of playing games with FS_RENAME_DOES_D_MOVE. | ||
2366 | * | ||
2367 | * d_lookup() will consistently find one of them at this point, | ||
2368 | * not sure which one, but that isn't even important. | ||
2369 | */ | ||
2370 | d_rehash(whiteout); | ||
2371 | return 0; | ||
2372 | } | ||
2373 | |||
2348 | /* | 2374 | /* |
2349 | * The VFS layer already does all the dentry stuff for rename, | 2375 | * The VFS layer already does all the dentry stuff for rename, |
2350 | * we just have to decrement the usage count for the target if | 2376 | * we just have to decrement the usage count for the target if |
@@ -2356,7 +2382,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
2356 | struct inode *inode = old_dentry->d_inode; | 2382 | struct inode *inode = old_dentry->d_inode; |
2357 | int they_are_dirs = S_ISDIR(inode->i_mode); | 2383 | int they_are_dirs = S_ISDIR(inode->i_mode); |
2358 | 2384 | ||
2359 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 2385 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
2360 | return -EINVAL; | 2386 | return -EINVAL; |
2361 | 2387 | ||
2362 | if (flags & RENAME_EXCHANGE) | 2388 | if (flags & RENAME_EXCHANGE) |
@@ -2365,6 +2391,14 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
2365 | if (!simple_empty(new_dentry)) | 2391 | if (!simple_empty(new_dentry)) |
2366 | return -ENOTEMPTY; | 2392 | return -ENOTEMPTY; |
2367 | 2393 | ||
2394 | if (flags & RENAME_WHITEOUT) { | ||
2395 | int error; | ||
2396 | |||
2397 | error = shmem_whiteout(old_dir, old_dentry); | ||
2398 | if (error) | ||
2399 | return error; | ||
2400 | } | ||
2401 | |||
2368 | if (new_dentry->d_inode) { | 2402 | if (new_dentry->d_inode) { |
2369 | (void) shmem_unlink(new_dir, new_dentry); | 2403 | (void) shmem_unlink(new_dir, new_dentry); |
2370 | if (they_are_dirs) { | 2404 | if (they_are_dirs) { |