diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-26 14:19:18 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-26 14:19:18 -0400 |
| commit | d1e14f1d63eb15ebe97d1a8544ddc143486b0204 (patch) | |
| tree | a73e7c751f23835483f1e8029d04547192611975 | |
| parent | 2cc91884b6b3f7328680b8ea7563016d3aee3d19 (diff) | |
| parent | db6ec212b53abc29a5bb6ac8c810010fc28d5191 (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro:
"overlayfs merge + leak fix for d_splice_alias() failure exits"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
overlayfs: embed middle into overlay_readdir_data
overlayfs: embed root into overlay_readdir_data
overlayfs: make ovl_cache_entry->name an array instead of pointer
overlayfs: don't hold ->i_mutex over opening the real directory
fix inode leaks on d_splice_alias() failure exits
fs: limit filesystem stacking depth
overlay: overlay filesystem documentation
overlayfs: implement show_options
overlayfs: add statfs support
overlay filesystem
shmem: support RENAME_WHITEOUT
ext4: support RENAME_WHITEOUT
vfs: add RENAME_WHITEOUT
vfs: add whiteout support
vfs: export check_sticky()
vfs: introduce clone_private_mount()
vfs: export __inode_permission() to modules
vfs: export do_splice_direct() to modules
vfs: add i_op->dentry_open()
| -rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/overlayfs.txt | 198 | ||||
| -rw-r--r-- | Documentation/filesystems/vfs.txt | 7 | ||||
| -rw-r--r-- | MAINTAINERS | 7 | ||||
| -rw-r--r-- | fs/Kconfig | 1 | ||||
| -rw-r--r-- | fs/Makefile | 1 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 20 | ||||
| -rw-r--r-- | fs/dcache.c | 2 | ||||
| -rw-r--r-- | fs/ecryptfs/main.c | 7 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 95 | ||||
| -rw-r--r-- | fs/internal.h | 7 | ||||
| -rw-r--r-- | fs/namei.c | 41 | ||||
| -rw-r--r-- | fs/namespace.c | 27 | ||||
| -rw-r--r-- | fs/open.c | 23 | ||||
| -rw-r--r-- | fs/overlayfs/Kconfig | 10 | ||||
| -rw-r--r-- | fs/overlayfs/Makefile | 7 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 414 | ||||
| -rw-r--r-- | fs/overlayfs/dir.c | 921 | ||||
| -rw-r--r-- | fs/overlayfs/inode.c | 425 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 191 | ||||
| -rw-r--r-- | fs/overlayfs/readdir.c | 590 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 796 | ||||
| -rw-r--r-- | fs/splice.c | 1 | ||||
| -rw-r--r-- | include/linux/fs.h | 39 | ||||
| -rw-r--r-- | include/linux/mount.h | 3 | ||||
| -rw-r--r-- | include/uapi/linux/fs.h | 1 | ||||
| -rw-r--r-- | mm/shmem.c | 36 |
27 files changed, 3814 insertions, 58 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 94d93b1f8b53..b30753cbf431 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
| @@ -67,6 +67,7 @@ prototypes: | |||
| 67 | struct file *, unsigned open_flag, | 67 | struct file *, unsigned open_flag, |
| 68 | umode_t create_mode, int *opened); | 68 | umode_t create_mode, int *opened); |
| 69 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 69 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
| 70 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
| 70 | 71 | ||
| 71 | locking rules: | 72 | locking rules: |
| 72 | all may block | 73 | all may block |
| @@ -96,6 +97,7 @@ fiemap: no | |||
| 96 | update_time: no | 97 | update_time: no |
| 97 | atomic_open: yes | 98 | atomic_open: yes |
| 98 | tmpfile: no | 99 | tmpfile: no |
| 100 | dentry_open: no | ||
| 99 | 101 | ||
| 100 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 102 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
| 101 | victim. | 103 | victim. |
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt new file mode 100644 index 000000000000..530850a72735 --- /dev/null +++ b/Documentation/filesystems/overlayfs.txt | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | Written by: Neil Brown <neilb@suse.de> | ||
| 2 | |||
| 3 | Overlay Filesystem | ||
| 4 | ================== | ||
| 5 | |||
| 6 | This document describes a prototype for a new approach to providing | ||
| 7 | overlay-filesystem functionality in Linux (sometimes referred to as | ||
| 8 | union-filesystems). An overlay-filesystem tries to present a | ||
| 9 | filesystem which is the result over overlaying one filesystem on top | ||
| 10 | of the other. | ||
| 11 | |||
| 12 | The result will inevitably fail to look exactly like a normal | ||
| 13 | filesystem for various technical reasons. The expectation is that | ||
| 14 | many use cases will be able to ignore these differences. | ||
| 15 | |||
| 16 | This approach is 'hybrid' because the objects that appear in the | ||
| 17 | filesystem do not all appear to belong to that filesystem. In many | ||
| 18 | cases an object accessed in the union will be indistinguishable | ||
| 19 | from accessing the corresponding object from the original filesystem. | ||
| 20 | This is most obvious from the 'st_dev' field returned by stat(2). | ||
| 21 | |||
| 22 | While directories will report an st_dev from the overlay-filesystem, | ||
| 23 | all non-directory objects will report an st_dev from the lower or | ||
| 24 | upper filesystem that is providing the object. Similarly st_ino will | ||
| 25 | only be unique when combined with st_dev, and both of these can change | ||
| 26 | over the lifetime of a non-directory object. Many applications and | ||
| 27 | tools ignore these values and will not be affected. | ||
| 28 | |||
| 29 | Upper and Lower | ||
| 30 | --------------- | ||
| 31 | |||
| 32 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
| 33 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
| 34 | object in the 'upper' filesystem is visible while the object in the | ||
| 35 | 'lower' filesystem is either hidden or, in the case of directories, | ||
| 36 | merged with the 'upper' object. | ||
| 37 | |||
| 38 | It would be more correct to refer to an upper and lower 'directory | ||
| 39 | tree' rather than 'filesystem' as it is quite possible for both | ||
| 40 | directory trees to be in the same filesystem and there is no | ||
| 41 | requirement that the root of a filesystem be given for either upper or | ||
| 42 | lower. | ||
| 43 | |||
| 44 | The lower filesystem can be any filesystem supported by Linux and does | ||
| 45 | not need to be writable. The lower filesystem can even be another | ||
| 46 | overlayfs. The upper filesystem will normally be writable and if it | ||
| 47 | is it must support the creation of trusted.* extended attributes, and | ||
| 48 | must provide valid d_type in readdir responses, so NFS is not suitable. | ||
| 49 | |||
| 50 | A read-only overlay of two read-only filesystems may use any | ||
| 51 | filesystem type. | ||
| 52 | |||
| 53 | Directories | ||
| 54 | ----------- | ||
| 55 | |||
| 56 | Overlaying mainly involves directories. If a given name appears in both | ||
| 57 | upper and lower filesystems and refers to a non-directory in either, | ||
| 58 | then the lower object is hidden - the name refers only to the upper | ||
| 59 | object. | ||
| 60 | |||
| 61 | Where both upper and lower objects are directories, a merged directory | ||
| 62 | is formed. | ||
| 63 | |||
| 64 | At mount time, the two directories given as mount options "lowerdir" and | ||
| 65 | "upperdir" are combined into a merged directory: | ||
| 66 | |||
| 67 | mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\ | ||
| 68 | workdir=/work /merged | ||
| 69 | |||
| 70 | The "workdir" needs to be an empty directory on the same filesystem | ||
| 71 | as upperdir. | ||
| 72 | |||
| 73 | Then whenever a lookup is requested in such a merged directory, the | ||
| 74 | lookup is performed in each actual directory and the combined result | ||
| 75 | is cached in the dentry belonging to the overlay filesystem. If both | ||
| 76 | actual lookups find directories, both are stored and a merged | ||
| 77 | directory is created, otherwise only one is stored: the upper if it | ||
| 78 | exists, else the lower. | ||
| 79 | |||
| 80 | Only the lists of names from directories are merged. Other content | ||
| 81 | such as metadata and extended attributes are reported for the upper | ||
| 82 | directory only. These attributes of the lower directory are hidden. | ||
| 83 | |||
| 84 | whiteouts and opaque directories | ||
| 85 | -------------------------------- | ||
| 86 | |||
| 87 | In order to support rm and rmdir without changing the lower | ||
| 88 | filesystem, an overlay filesystem needs to record in the upper filesystem | ||
| 89 | that files have been removed. This is done using whiteouts and opaque | ||
| 90 | directories (non-directories are always opaque). | ||
| 91 | |||
| 92 | A whiteout is created as a character device with 0/0 device number. | ||
| 93 | When a whiteout is found in the upper level of a merged directory, any | ||
| 94 | matching name in the lower level is ignored, and the whiteout itself | ||
| 95 | is also hidden. | ||
| 96 | |||
| 97 | A directory is made opaque by setting the xattr "trusted.overlay.opaque" | ||
| 98 | to "y". Where the upper filesystem contains an opaque directory, any | ||
| 99 | directory in the lower filesystem with the same name is ignored. | ||
| 100 | |||
| 101 | readdir | ||
| 102 | ------- | ||
| 103 | |||
| 104 | When a 'readdir' request is made on a merged directory, the upper and | ||
| 105 | lower directories are each read and the name lists merged in the | ||
| 106 | obvious way (upper is read first, then lower - entries that already | ||
| 107 | exist are not re-added). This merged name list is cached in the | ||
| 108 | 'struct file' and so remains as long as the file is kept open. If the | ||
| 109 | directory is opened and read by two processes at the same time, they | ||
| 110 | will each have separate caches. A seekdir to the start of the | ||
| 111 | directory (offset 0) followed by a readdir will cause the cache to be | ||
| 112 | discarded and rebuilt. | ||
| 113 | |||
| 114 | This means that changes to the merged directory do not appear while a | ||
| 115 | directory is being read. This is unlikely to be noticed by many | ||
| 116 | programs. | ||
| 117 | |||
| 118 | seek offsets are assigned sequentially when the directories are read. | ||
| 119 | Thus if | ||
| 120 | - read part of a directory | ||
| 121 | - remember an offset, and close the directory | ||
| 122 | - re-open the directory some time later | ||
| 123 | - seek to the remembered offset | ||
| 124 | |||
| 125 | there may be little correlation between the old and new locations in | ||
| 126 | the list of filenames, particularly if anything has changed in the | ||
| 127 | directory. | ||
| 128 | |||
| 129 | Readdir on directories that are not merged is simply handled by the | ||
| 130 | underlying directory (upper or lower). | ||
| 131 | |||
| 132 | |||
| 133 | Non-directories | ||
| 134 | --------------- | ||
| 135 | |||
| 136 | Objects that are not directories (files, symlinks, device-special | ||
| 137 | files etc.) are presented either from the upper or lower filesystem as | ||
| 138 | appropriate. When a file in the lower filesystem is accessed in a way | ||
| 139 | the requires write-access, such as opening for write access, changing | ||
| 140 | some metadata etc., the file is first copied from the lower filesystem | ||
| 141 | to the upper filesystem (copy_up). Note that creating a hard-link | ||
| 142 | also requires copy_up, though of course creation of a symlink does | ||
| 143 | not. | ||
| 144 | |||
| 145 | The copy_up may turn out to be unnecessary, for example if the file is | ||
| 146 | opened for read-write but the data is not modified. | ||
| 147 | |||
| 148 | The copy_up process first makes sure that the containing directory | ||
| 149 | exists in the upper filesystem - creating it and any parents as | ||
| 150 | necessary. It then creates the object with the same metadata (owner, | ||
| 151 | mode, mtime, symlink-target etc.) and then if the object is a file, the | ||
| 152 | data is copied from the lower to the upper filesystem. Finally any | ||
| 153 | extended attributes are copied up. | ||
| 154 | |||
| 155 | Once the copy_up is complete, the overlay filesystem simply | ||
| 156 | provides direct access to the newly created file in the upper | ||
| 157 | filesystem - future operations on the file are barely noticed by the | ||
| 158 | overlay filesystem (though an operation on the name of the file such as | ||
| 159 | rename or unlink will of course be noticed and handled). | ||
| 160 | |||
| 161 | |||
| 162 | Non-standard behavior | ||
| 163 | --------------------- | ||
| 164 | |||
| 165 | The copy_up operation essentially creates a new, identical file and | ||
| 166 | moves it over to the old name. The new file may be on a different | ||
| 167 | filesystem, so both st_dev and st_ino of the file may change. | ||
| 168 | |||
| 169 | Any open files referring to this inode will access the old data and | ||
| 170 | metadata. Similarly any file locks obtained before copy_up will not | ||
| 171 | apply to the copied up file. | ||
| 172 | |||
| 173 | On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and | ||
| 174 | fsetxattr(2) will fail with EROFS. | ||
| 175 | |||
| 176 | If a file with multiple hard links is copied up, then this will | ||
| 177 | "break" the link. Changes will not be propagated to other names | ||
| 178 | referring to the same inode. | ||
| 179 | |||
| 180 | Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory | ||
| 181 | object in overlayfs will not contain valid absolute paths, only | ||
| 182 | relative paths leading up to the filesystem's root. This will be | ||
| 183 | fixed in the future. | ||
| 184 | |||
| 185 | Some operations are not atomic, for example a crash during copy_up or | ||
| 186 | rename will leave the filesystem in an inconsistent state. This will | ||
| 187 | be addressed in the future. | ||
| 188 | |||
| 189 | Changes to underlying filesystems | ||
| 190 | --------------------------------- | ||
| 191 | |||
| 192 | Offline changes, when the overlay is not mounted, are allowed to either | ||
| 193 | the upper or the lower trees. | ||
| 194 | |||
| 195 | Changes to the underlying filesystems while part of a mounted overlay | ||
| 196 | filesystem are not allowed. If the underlying filesystem is changed, | ||
| 197 | the behavior of the overlay is undefined, though it will not result in | ||
| 198 | a crash or deadlock. | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fceff7c00a3c..20bf204426ca 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
| @@ -364,6 +364,7 @@ struct inode_operations { | |||
| 364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, | 364 | int (*atomic_open)(struct inode *, struct dentry *, struct file *, |
| 365 | unsigned open_flag, umode_t create_mode, int *opened); | 365 | unsigned open_flag, umode_t create_mode, int *opened); |
| 366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 366 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
| 367 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
| 367 | }; | 368 | }; |
| 368 | 369 | ||
| 369 | Again, all methods are called without any locks being held, unless | 370 | Again, all methods are called without any locks being held, unless |
| @@ -696,6 +697,12 @@ struct address_space_operations { | |||
| 696 | but instead uses bmap to find out where the blocks in the file | 697 | but instead uses bmap to find out where the blocks in the file |
| 697 | are and uses those addresses directly. | 698 | are and uses those addresses directly. |
| 698 | 699 | ||
| 700 | dentry_open: *WARNING: probably going away soon, do not use!* This is an | ||
| 701 | alternative to f_op->open(), the difference is that this method may open | ||
| 702 | a file not necessarily originating from the same filesystem as the one | ||
| 703 | i_op->open() was called on. It may be useful for stacking filesystems | ||
| 704 | which want to allow native I/O directly on underlying files. | ||
| 705 | |||
| 699 | 706 | ||
| 700 | invalidatepage: If a page has PagePrivate set, then invalidatepage | 707 | invalidatepage: If a page has PagePrivate set, then invalidatepage |
| 701 | will be called when part or all of the page is to be removed | 708 | will be called when part or all of the page is to be removed |
diff --git a/MAINTAINERS b/MAINTAINERS index 0484f2c526f0..ec25b0e1e745 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -6840,6 +6840,13 @@ F: drivers/scsi/osd/ | |||
| 6840 | F: include/scsi/osd_* | 6840 | F: include/scsi/osd_* |
| 6841 | F: fs/exofs/ | 6841 | F: fs/exofs/ |
| 6842 | 6842 | ||
| 6843 | OVERLAYFS FILESYSTEM | ||
| 6844 | M: Miklos Szeredi <miklos@szeredi.hu> | ||
| 6845 | L: linux-fsdevel@vger.kernel.org | ||
| 6846 | S: Supported | ||
| 6847 | F: fs/overlayfs/* | ||
| 6848 | F: Documentation/filesystems/overlayfs.txt | ||
| 6849 | |||
| 6843 | P54 WIRELESS DRIVER | 6850 | P54 WIRELESS DRIVER |
| 6844 | M: Christian Lamparter <chunkeey@googlemail.com> | 6851 | M: Christian Lamparter <chunkeey@googlemail.com> |
| 6845 | L: linux-wireless@vger.kernel.org | 6852 | L: linux-wireless@vger.kernel.org |
diff --git a/fs/Kconfig b/fs/Kconfig index db5dc1598716..664991afe0c0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -67,6 +67,7 @@ source "fs/quota/Kconfig" | |||
| 67 | 67 | ||
| 68 | source "fs/autofs4/Kconfig" | 68 | source "fs/autofs4/Kconfig" |
| 69 | source "fs/fuse/Kconfig" | 69 | source "fs/fuse/Kconfig" |
| 70 | source "fs/overlayfs/Kconfig" | ||
| 70 | 71 | ||
| 71 | menu "Caches" | 72 | menu "Caches" |
| 72 | 73 | ||
diff --git a/fs/Makefile b/fs/Makefile index 90c88529892b..34a1b9dea6dd 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ | |||
| 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
| 105 | obj-$(CONFIG_ADFS_FS) += adfs/ | 105 | obj-$(CONFIG_ADFS_FS) += adfs/ |
| 106 | obj-$(CONFIG_FUSE_FS) += fuse/ | 106 | obj-$(CONFIG_FUSE_FS) += fuse/ |
| 107 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ | ||
| 107 | obj-$(CONFIG_UDF_FS) += udf/ | 108 | obj-$(CONFIG_UDF_FS) += udf/ |
| 108 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ | 109 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
| 109 | obj-$(CONFIG_OMFS_FS) += omfs/ | 110 | obj-$(CONFIG_OMFS_FS) += omfs/ |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d2b76e29d3b..4399f0c3a4ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -765,23 +765,6 @@ out: | |||
| 765 | return ret; | 765 | return ret; |
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | /* copy of check_sticky in fs/namei.c() | ||
| 769 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
| 770 | * minimal. | ||
| 771 | */ | ||
| 772 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
| 773 | { | ||
| 774 | kuid_t fsuid = current_fsuid(); | ||
| 775 | |||
| 776 | if (!(dir->i_mode & S_ISVTX)) | ||
| 777 | return 0; | ||
| 778 | if (uid_eq(inode->i_uid, fsuid)) | ||
| 779 | return 0; | ||
| 780 | if (uid_eq(dir->i_uid, fsuid)) | ||
| 781 | return 0; | ||
| 782 | return !capable(CAP_FOWNER); | ||
| 783 | } | ||
| 784 | |||
| 785 | /* copy of may_delete in fs/namei.c() | 768 | /* copy of may_delete in fs/namei.c() |
| 786 | * Check whether we can remove a link victim from directory dir, check | 769 | * Check whether we can remove a link victim from directory dir, check |
| 787 | * whether the type of victim is right. | 770 | * whether the type of victim is right. |
| @@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) | |||
| 817 | return error; | 800 | return error; |
| 818 | if (IS_APPEND(dir)) | 801 | if (IS_APPEND(dir)) |
| 819 | return -EPERM; | 802 | return -EPERM; |
| 820 | if (btrfs_check_sticky(dir, victim->d_inode)|| | 803 | if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || |
| 821 | IS_APPEND(victim->d_inode)|| | ||
| 822 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | 804 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) |
| 823 | return -EPERM; | 805 | return -EPERM; |
| 824 | if (isdir) { | 806 | if (isdir) { |
diff --git a/fs/dcache.c b/fs/dcache.c index d5a23fd0da90..3ffef7f4e5cd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -2673,11 +2673,13 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
| 2673 | if (!IS_ROOT(new)) { | 2673 | if (!IS_ROOT(new)) { |
| 2674 | spin_unlock(&inode->i_lock); | 2674 | spin_unlock(&inode->i_lock); |
| 2675 | dput(new); | 2675 | dput(new); |
| 2676 | iput(inode); | ||
| 2676 | return ERR_PTR(-EIO); | 2677 | return ERR_PTR(-EIO); |
| 2677 | } | 2678 | } |
| 2678 | if (d_ancestor(new, dentry)) { | 2679 | if (d_ancestor(new, dentry)) { |
| 2679 | spin_unlock(&inode->i_lock); | 2680 | spin_unlock(&inode->i_lock); |
| 2680 | dput(new); | 2681 | dput(new); |
| 2682 | iput(inode); | ||
| 2681 | return ERR_PTR(-EIO); | 2683 | return ERR_PTR(-EIO); |
| 2682 | } | 2684 | } |
| 2683 | write_seqlock(&rename_lock); | 2685 | write_seqlock(&rename_lock); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1b119d3bf924..c4cd1fd86cc2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
| @@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
| 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
| 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
| 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; | 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; |
| 569 | s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; | ||
| 570 | |||
| 571 | rc = -EINVAL; | ||
| 572 | if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
| 573 | pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); | ||
| 574 | goto out_free; | ||
| 575 | } | ||
| 569 | 576 | ||
| 570 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); | 577 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); |
| 571 | rc = PTR_ERR(inode); | 578 | rc = PTR_ERR(inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index adb559de23c1..123798c5ac31 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -3148,6 +3148,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
| 3148 | } | 3148 | } |
| 3149 | } | 3149 | } |
| 3150 | 3150 | ||
| 3151 | static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, | ||
| 3152 | int credits, handle_t **h) | ||
| 3153 | { | ||
| 3154 | struct inode *wh; | ||
| 3155 | handle_t *handle; | ||
| 3156 | int retries = 0; | ||
| 3157 | |||
| 3158 | /* | ||
| 3159 | * for inode block, sb block, group summaries, | ||
| 3160 | * and inode bitmap | ||
| 3161 | */ | ||
| 3162 | credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + | ||
| 3163 | EXT4_XATTR_TRANS_BLOCKS + 4); | ||
| 3164 | retry: | ||
| 3165 | wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE, | ||
| 3166 | &ent->dentry->d_name, 0, NULL, | ||
| 3167 | EXT4_HT_DIR, credits); | ||
| 3168 | |||
| 3169 | handle = ext4_journal_current_handle(); | ||
| 3170 | if (IS_ERR(wh)) { | ||
| 3171 | if (handle) | ||
| 3172 | ext4_journal_stop(handle); | ||
| 3173 | if (PTR_ERR(wh) == -ENOSPC && | ||
| 3174 | ext4_should_retry_alloc(ent->dir->i_sb, &retries)) | ||
| 3175 | goto retry; | ||
| 3176 | } else { | ||
| 3177 | *h = handle; | ||
| 3178 | init_special_inode(wh, wh->i_mode, WHITEOUT_DEV); | ||
| 3179 | wh->i_op = &ext4_special_inode_operations; | ||
| 3180 | } | ||
| 3181 | return wh; | ||
| 3182 | } | ||
| 3183 | |||
| 3151 | /* | 3184 | /* |
| 3152 | * Anybody can rename anything with this: the permission checks are left to the | 3185 | * Anybody can rename anything with this: the permission checks are left to the |
| 3153 | * higher-level routines. | 3186 | * higher-level routines. |
| @@ -3157,7 +3190,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
| 3157 | * This comes from rename(const char *oldpath, const char *newpath) | 3190 | * This comes from rename(const char *oldpath, const char *newpath) |
| 3158 | */ | 3191 | */ |
| 3159 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | 3192 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 3160 | struct inode *new_dir, struct dentry *new_dentry) | 3193 | struct inode *new_dir, struct dentry *new_dentry, |
| 3194 | unsigned int flags) | ||
| 3161 | { | 3195 | { |
| 3162 | handle_t *handle = NULL; | 3196 | handle_t *handle = NULL; |
| 3163 | struct ext4_renament old = { | 3197 | struct ext4_renament old = { |
| @@ -3172,6 +3206,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3172 | }; | 3206 | }; |
| 3173 | int force_reread; | 3207 | int force_reread; |
| 3174 | int retval; | 3208 | int retval; |
| 3209 | struct inode *whiteout = NULL; | ||
| 3210 | int credits; | ||
| 3211 | u8 old_file_type; | ||
| 3175 | 3212 | ||
| 3176 | dquot_initialize(old.dir); | 3213 | dquot_initialize(old.dir); |
| 3177 | dquot_initialize(new.dir); | 3214 | dquot_initialize(new.dir); |
| @@ -3210,11 +3247,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3210 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) | 3247 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
| 3211 | ext4_alloc_da_blocks(old.inode); | 3248 | ext4_alloc_da_blocks(old.inode); |
| 3212 | 3249 | ||
| 3213 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | 3250 | credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
| 3214 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | 3251 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
| 3215 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3252 | if (!(flags & RENAME_WHITEOUT)) { |
| 3216 | if (IS_ERR(handle)) | 3253 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); |
| 3217 | return PTR_ERR(handle); | 3254 | if (IS_ERR(handle)) |
| 3255 | return PTR_ERR(handle); | ||
| 3256 | } else { | ||
| 3257 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); | ||
| 3258 | if (IS_ERR(whiteout)) | ||
| 3259 | return PTR_ERR(whiteout); | ||
| 3260 | } | ||
| 3218 | 3261 | ||
| 3219 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3262 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
| 3220 | ext4_handle_sync(handle); | 3263 | ext4_handle_sync(handle); |
| @@ -3242,13 +3285,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3242 | */ | 3285 | */ |
| 3243 | force_reread = (new.dir->i_ino == old.dir->i_ino && | 3286 | force_reread = (new.dir->i_ino == old.dir->i_ino && |
| 3244 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); | 3287 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); |
| 3288 | |||
| 3289 | old_file_type = old.de->file_type; | ||
| 3290 | if (whiteout) { | ||
| 3291 | /* | ||
| 3292 | * Do this before adding a new entry, so the old entry is sure | ||
| 3293 | * to be still pointing to the valid old entry. | ||
| 3294 | */ | ||
| 3295 | retval = ext4_setent(handle, &old, whiteout->i_ino, | ||
| 3296 | EXT4_FT_CHRDEV); | ||
| 3297 | if (retval) | ||
| 3298 | goto end_rename; | ||
| 3299 | ext4_mark_inode_dirty(handle, whiteout); | ||
| 3300 | } | ||
| 3245 | if (!new.bh) { | 3301 | if (!new.bh) { |
| 3246 | retval = ext4_add_entry(handle, new.dentry, old.inode); | 3302 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
| 3247 | if (retval) | 3303 | if (retval) |
| 3248 | goto end_rename; | 3304 | goto end_rename; |
| 3249 | } else { | 3305 | } else { |
| 3250 | retval = ext4_setent(handle, &new, | 3306 | retval = ext4_setent(handle, &new, |
| 3251 | old.inode->i_ino, old.de->file_type); | 3307 | old.inode->i_ino, old_file_type); |
| 3252 | if (retval) | 3308 | if (retval) |
| 3253 | goto end_rename; | 3309 | goto end_rename; |
| 3254 | } | 3310 | } |
| @@ -3263,10 +3319,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3263 | old.inode->i_ctime = ext4_current_time(old.inode); | 3319 | old.inode->i_ctime = ext4_current_time(old.inode); |
| 3264 | ext4_mark_inode_dirty(handle, old.inode); | 3320 | ext4_mark_inode_dirty(handle, old.inode); |
| 3265 | 3321 | ||
| 3266 | /* | 3322 | if (!whiteout) { |
| 3267 | * ok, that's it | 3323 | /* |
| 3268 | */ | 3324 | * ok, that's it |
| 3269 | ext4_rename_delete(handle, &old, force_reread); | 3325 | */ |
| 3326 | ext4_rename_delete(handle, &old, force_reread); | ||
| 3327 | } | ||
| 3270 | 3328 | ||
| 3271 | if (new.inode) { | 3329 | if (new.inode) { |
| 3272 | ext4_dec_count(handle, new.inode); | 3330 | ext4_dec_count(handle, new.inode); |
| @@ -3302,6 +3360,12 @@ end_rename: | |||
| 3302 | brelse(old.dir_bh); | 3360 | brelse(old.dir_bh); |
| 3303 | brelse(old.bh); | 3361 | brelse(old.bh); |
| 3304 | brelse(new.bh); | 3362 | brelse(new.bh); |
| 3363 | if (whiteout) { | ||
| 3364 | if (retval) | ||
| 3365 | drop_nlink(whiteout); | ||
| 3366 | unlock_new_inode(whiteout); | ||
| 3367 | iput(whiteout); | ||
| 3368 | } | ||
| 3305 | if (handle) | 3369 | if (handle) |
| 3306 | ext4_journal_stop(handle); | 3370 | ext4_journal_stop(handle); |
| 3307 | return retval; | 3371 | return retval; |
| @@ -3434,18 +3498,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3434 | struct inode *new_dir, struct dentry *new_dentry, | 3498 | struct inode *new_dir, struct dentry *new_dentry, |
| 3435 | unsigned int flags) | 3499 | unsigned int flags) |
| 3436 | { | 3500 | { |
| 3437 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 3501 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
| 3438 | return -EINVAL; | 3502 | return -EINVAL; |
| 3439 | 3503 | ||
| 3440 | if (flags & RENAME_EXCHANGE) { | 3504 | if (flags & RENAME_EXCHANGE) { |
| 3441 | return ext4_cross_rename(old_dir, old_dentry, | 3505 | return ext4_cross_rename(old_dir, old_dentry, |
| 3442 | new_dir, new_dentry); | 3506 | new_dir, new_dentry); |
| 3443 | } | 3507 | } |
| 3444 | /* | 3508 | |
| 3445 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | 3509 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags); |
| 3446 | * is equivalent to regular rename. | ||
| 3447 | */ | ||
| 3448 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
| 3449 | } | 3510 | } |
| 3450 | 3511 | ||
| 3451 | /* | 3512 | /* |
diff --git a/fs/internal.h b/fs/internal.h index 9477f8f6aefc..757ba2abf21e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -47,7 +47,6 @@ extern void __init chrdev_init(void); | |||
| 47 | /* | 47 | /* |
| 48 | * namei.c | 48 | * namei.c |
| 49 | */ | 49 | */ |
| 50 | extern int __inode_permission(struct inode *, int); | ||
| 51 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); | 50 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); |
| 52 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, | 51 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, |
| 53 | const char *, unsigned int, struct path *); | 52 | const char *, unsigned int, struct path *); |
| @@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | |||
| 139 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | 138 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); |
| 140 | 139 | ||
| 141 | /* | 140 | /* |
| 142 | * splice.c | ||
| 143 | */ | ||
| 144 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
| 145 | loff_t *opos, size_t len, unsigned int flags); | ||
| 146 | |||
| 147 | /* | ||
| 148 | * pipe.c | 141 | * pipe.c |
| 149 | */ | 142 | */ |
| 150 | extern const struct file_operations pipefifo_fops; | 143 | extern const struct file_operations pipefifo_fops; |
diff --git a/fs/namei.c b/fs/namei.c index 43927d14db67..42df664e95e5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask) | |||
| 416 | 416 | ||
| 417 | return security_inode_permission(inode, mask); | 417 | return security_inode_permission(inode, mask); |
| 418 | } | 418 | } |
| 419 | EXPORT_SYMBOL(__inode_permission); | ||
| 419 | 420 | ||
| 420 | /** | 421 | /** |
| 421 | * sb_permission - Check superblock-level permissions | 422 | * sb_permission - Check superblock-level permissions |
| @@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, | |||
| 2383 | } | 2384 | } |
| 2384 | EXPORT_SYMBOL(kern_path_mountpoint); | 2385 | EXPORT_SYMBOL(kern_path_mountpoint); |
| 2385 | 2386 | ||
| 2386 | /* | 2387 | int __check_sticky(struct inode *dir, struct inode *inode) |
| 2387 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
| 2388 | * minimal. | ||
| 2389 | */ | ||
| 2390 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
| 2391 | { | 2388 | { |
| 2392 | kuid_t fsuid = current_fsuid(); | 2389 | kuid_t fsuid = current_fsuid(); |
| 2393 | 2390 | ||
| 2394 | if (!(dir->i_mode & S_ISVTX)) | ||
| 2395 | return 0; | ||
| 2396 | if (uid_eq(inode->i_uid, fsuid)) | 2391 | if (uid_eq(inode->i_uid, fsuid)) |
| 2397 | return 0; | 2392 | return 0; |
| 2398 | if (uid_eq(dir->i_uid, fsuid)) | 2393 | if (uid_eq(dir->i_uid, fsuid)) |
| 2399 | return 0; | 2394 | return 0; |
| 2400 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); | 2395 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); |
| 2401 | } | 2396 | } |
| 2397 | EXPORT_SYMBOL(__check_sticky); | ||
| 2402 | 2398 | ||
| 2403 | /* | 2399 | /* |
| 2404 | * Check whether we can remove a link victim from directory dir, check | 2400 | * Check whether we can remove a link victim from directory dir, check |
| @@ -3064,9 +3060,12 @@ finish_open_created: | |||
| 3064 | error = may_open(&nd->path, acc_mode, open_flag); | 3060 | error = may_open(&nd->path, acc_mode, open_flag); |
| 3065 | if (error) | 3061 | if (error) |
| 3066 | goto out; | 3062 | goto out; |
| 3067 | file->f_path.mnt = nd->path.mnt; | 3063 | |
| 3068 | error = finish_open(file, nd->path.dentry, NULL, opened); | 3064 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
| 3069 | if (error) { | 3065 | error = vfs_open(&nd->path, file, current_cred()); |
| 3066 | if (!error) { | ||
| 3067 | *opened |= FILE_OPENED; | ||
| 3068 | } else { | ||
| 3070 | if (error == -EOPENSTALE) | 3069 | if (error == -EOPENSTALE) |
| 3071 | goto stale_open; | 3070 | goto stale_open; |
| 3072 | goto out; | 3071 | goto out; |
| @@ -4210,12 +4209,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, | |||
| 4210 | bool should_retry = false; | 4209 | bool should_retry = false; |
| 4211 | int error; | 4210 | int error; |
| 4212 | 4211 | ||
| 4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 4212 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
| 4214 | return -EINVAL; | 4213 | return -EINVAL; |
| 4215 | 4214 | ||
| 4216 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | 4215 | if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && |
| 4216 | (flags & RENAME_EXCHANGE)) | ||
| 4217 | return -EINVAL; | 4217 | return -EINVAL; |
| 4218 | 4218 | ||
| 4219 | if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) | ||
| 4220 | return -EPERM; | ||
| 4221 | |||
| 4219 | retry: | 4222 | retry: |
| 4220 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4223 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
| 4221 | if (IS_ERR(from)) { | 4224 | if (IS_ERR(from)) { |
| @@ -4347,6 +4350,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna | |||
| 4347 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 4350 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
| 4348 | } | 4351 | } |
| 4349 | 4352 | ||
| 4353 | int vfs_whiteout(struct inode *dir, struct dentry *dentry) | ||
| 4354 | { | ||
| 4355 | int error = may_create(dir, dentry); | ||
| 4356 | if (error) | ||
| 4357 | return error; | ||
| 4358 | |||
| 4359 | if (!dir->i_op->mknod) | ||
| 4360 | return -EPERM; | ||
| 4361 | |||
| 4362 | return dir->i_op->mknod(dir, dentry, | ||
| 4363 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
| 4364 | } | ||
| 4365 | EXPORT_SYMBOL(vfs_whiteout); | ||
| 4366 | |||
| 4350 | int readlink_copy(char __user *buffer, int buflen, const char *link) | 4367 | int readlink_copy(char __user *buffer, int buflen, const char *link) |
| 4351 | { | 4368 | { |
| 4352 | int len = PTR_ERR(link); | 4369 | int len = PTR_ERR(link); |
diff --git a/fs/namespace.c b/fs/namespace.c index fbba8b17330d..5b66b2b3624d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
| 1686 | namespace_unlock(); | 1686 | namespace_unlock(); |
| 1687 | } | 1687 | } |
| 1688 | 1688 | ||
| 1689 | /** | ||
| 1690 | * clone_private_mount - create a private clone of a path | ||
| 1691 | * | ||
| 1692 | * This creates a new vfsmount, which will be the clone of @path. The new will | ||
| 1693 | * not be attached anywhere in the namespace and will be private (i.e. changes | ||
| 1694 | * to the originating mount won't be propagated into this). | ||
| 1695 | * | ||
| 1696 | * Release with mntput(). | ||
| 1697 | */ | ||
| 1698 | struct vfsmount *clone_private_mount(struct path *path) | ||
| 1699 | { | ||
| 1700 | struct mount *old_mnt = real_mount(path->mnt); | ||
| 1701 | struct mount *new_mnt; | ||
| 1702 | |||
| 1703 | if (IS_MNT_UNBINDABLE(old_mnt)) | ||
| 1704 | return ERR_PTR(-EINVAL); | ||
| 1705 | |||
| 1706 | down_read(&namespace_sem); | ||
| 1707 | new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); | ||
| 1708 | up_read(&namespace_sem); | ||
| 1709 | if (IS_ERR(new_mnt)) | ||
| 1710 | return ERR_CAST(new_mnt); | ||
| 1711 | |||
| 1712 | return &new_mnt->mnt; | ||
| 1713 | } | ||
| 1714 | EXPORT_SYMBOL_GPL(clone_private_mount); | ||
| 1715 | |||
| 1689 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | 1716 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, |
| 1690 | struct vfsmount *root) | 1717 | struct vfsmount *root) |
| 1691 | { | 1718 | { |
| @@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags, | |||
| 823 | f = get_empty_filp(); | 823 | f = get_empty_filp(); |
| 824 | if (!IS_ERR(f)) { | 824 | if (!IS_ERR(f)) { |
| 825 | f->f_flags = flags; | 825 | f->f_flags = flags; |
| 826 | f->f_path = *path; | 826 | error = vfs_open(path, f, cred); |
| 827 | error = do_dentry_open(f, NULL, cred); | ||
| 828 | if (!error) { | 827 | if (!error) { |
| 829 | /* from now on we need fput() to dispose of f */ | 828 | /* from now on we need fput() to dispose of f */ |
| 830 | error = open_check_o_direct(f); | 829 | error = open_check_o_direct(f); |
| @@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags, | |||
| 841 | } | 840 | } |
| 842 | EXPORT_SYMBOL(dentry_open); | 841 | EXPORT_SYMBOL(dentry_open); |
| 843 | 842 | ||
| 843 | /** | ||
| 844 | * vfs_open - open the file at the given path | ||
| 845 | * @path: path to open | ||
| 846 | * @filp: newly allocated file with f_flag initialized | ||
| 847 | * @cred: credentials to use | ||
| 848 | */ | ||
| 849 | int vfs_open(const struct path *path, struct file *filp, | ||
| 850 | const struct cred *cred) | ||
| 851 | { | ||
| 852 | struct inode *inode = path->dentry->d_inode; | ||
| 853 | |||
| 854 | if (inode->i_op->dentry_open) | ||
| 855 | return inode->i_op->dentry_open(path->dentry, filp, cred); | ||
| 856 | else { | ||
| 857 | filp->f_path = *path; | ||
| 858 | return do_dentry_open(filp, NULL, cred); | ||
| 859 | } | ||
| 860 | } | ||
| 861 | EXPORT_SYMBOL(vfs_open); | ||
| 862 | |||
| 844 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 863 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
| 845 | { | 864 | { |
| 846 | int lookup_flags = 0; | 865 | int lookup_flags = 0; |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig new file mode 100644 index 000000000000..e60125976873 --- /dev/null +++ b/fs/overlayfs/Kconfig | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | config OVERLAYFS_FS | ||
| 2 | tristate "Overlay filesystem support" | ||
| 3 | help | ||
| 4 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
| 5 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
| 6 | object in the 'upper' filesystem is visible while the object in the | ||
| 7 | 'lower' filesystem is either hidden or, in the case of directories, | ||
| 8 | merged with the 'upper' object. | ||
| 9 | |||
| 10 | For more information see Documentation/filesystems/overlayfs.txt | ||
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile new file mode 100644 index 000000000000..8f91889480d0 --- /dev/null +++ b/fs/overlayfs/Makefile | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | # | ||
| 2 | # Makefile for the overlay filesystem. | ||
| 3 | # | ||
| 4 | |||
| 5 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o | ||
| 6 | |||
| 7 | overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o | ||
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c new file mode 100644 index 000000000000..ea10a8719107 --- /dev/null +++ b/fs/overlayfs/copy_up.c | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/file.h> | ||
| 13 | #include <linux/splice.h> | ||
| 14 | #include <linux/xattr.h> | ||
| 15 | #include <linux/security.h> | ||
| 16 | #include <linux/uaccess.h> | ||
| 17 | #include <linux/sched.h> | ||
| 18 | #include <linux/namei.h> | ||
| 19 | #include "overlayfs.h" | ||
| 20 | |||
| 21 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | ||
| 22 | |||
| 23 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) | ||
| 24 | { | ||
| 25 | ssize_t list_size, size; | ||
| 26 | char *buf, *name, *value; | ||
| 27 | int error; | ||
| 28 | |||
| 29 | if (!old->d_inode->i_op->getxattr || | ||
| 30 | !new->d_inode->i_op->getxattr) | ||
| 31 | return 0; | ||
| 32 | |||
| 33 | list_size = vfs_listxattr(old, NULL, 0); | ||
| 34 | if (list_size <= 0) { | ||
| 35 | if (list_size == -EOPNOTSUPP) | ||
| 36 | return 0; | ||
| 37 | return list_size; | ||
| 38 | } | ||
| 39 | |||
| 40 | buf = kzalloc(list_size, GFP_KERNEL); | ||
| 41 | if (!buf) | ||
| 42 | return -ENOMEM; | ||
| 43 | |||
| 44 | error = -ENOMEM; | ||
| 45 | value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); | ||
| 46 | if (!value) | ||
| 47 | goto out; | ||
| 48 | |||
| 49 | list_size = vfs_listxattr(old, buf, list_size); | ||
| 50 | if (list_size <= 0) { | ||
| 51 | error = list_size; | ||
| 52 | goto out_free_value; | ||
| 53 | } | ||
| 54 | |||
| 55 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { | ||
| 56 | size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); | ||
| 57 | if (size <= 0) { | ||
| 58 | error = size; | ||
| 59 | goto out_free_value; | ||
| 60 | } | ||
| 61 | error = vfs_setxattr(new, name, value, size, 0); | ||
| 62 | if (error) | ||
| 63 | goto out_free_value; | ||
| 64 | } | ||
| 65 | |||
| 66 | out_free_value: | ||
| 67 | kfree(value); | ||
| 68 | out: | ||
| 69 | kfree(buf); | ||
| 70 | return error; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) | ||
| 74 | { | ||
| 75 | struct file *old_file; | ||
| 76 | struct file *new_file; | ||
| 77 | loff_t old_pos = 0; | ||
| 78 | loff_t new_pos = 0; | ||
| 79 | int error = 0; | ||
| 80 | |||
| 81 | if (len == 0) | ||
| 82 | return 0; | ||
| 83 | |||
| 84 | old_file = ovl_path_open(old, O_RDONLY); | ||
| 85 | if (IS_ERR(old_file)) | ||
| 86 | return PTR_ERR(old_file); | ||
| 87 | |||
| 88 | new_file = ovl_path_open(new, O_WRONLY); | ||
| 89 | if (IS_ERR(new_file)) { | ||
| 90 | error = PTR_ERR(new_file); | ||
| 91 | goto out_fput; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* FIXME: copy up sparse files efficiently */ | ||
| 95 | while (len) { | ||
| 96 | size_t this_len = OVL_COPY_UP_CHUNK_SIZE; | ||
| 97 | long bytes; | ||
| 98 | |||
| 99 | if (len < this_len) | ||
| 100 | this_len = len; | ||
| 101 | |||
| 102 | if (signal_pending_state(TASK_KILLABLE, current)) { | ||
| 103 | error = -EINTR; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | bytes = do_splice_direct(old_file, &old_pos, | ||
| 108 | new_file, &new_pos, | ||
| 109 | this_len, SPLICE_F_MOVE); | ||
| 110 | if (bytes <= 0) { | ||
| 111 | error = bytes; | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | WARN_ON(old_pos != new_pos); | ||
| 115 | |||
| 116 | len -= bytes; | ||
| 117 | } | ||
| 118 | |||
| 119 | fput(new_file); | ||
| 120 | out_fput: | ||
| 121 | fput(old_file); | ||
| 122 | return error; | ||
| 123 | } | ||
| 124 | |||
| 125 | static char *ovl_read_symlink(struct dentry *realdentry) | ||
| 126 | { | ||
| 127 | int res; | ||
| 128 | char *buf; | ||
| 129 | struct inode *inode = realdentry->d_inode; | ||
| 130 | mm_segment_t old_fs; | ||
| 131 | |||
| 132 | res = -EINVAL; | ||
| 133 | if (!inode->i_op->readlink) | ||
| 134 | goto err; | ||
| 135 | |||
| 136 | res = -ENOMEM; | ||
| 137 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
| 138 | if (!buf) | ||
| 139 | goto err; | ||
| 140 | |||
| 141 | old_fs = get_fs(); | ||
| 142 | set_fs(get_ds()); | ||
| 143 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
| 144 | res = inode->i_op->readlink(realdentry, | ||
| 145 | (char __user *)buf, PAGE_SIZE - 1); | ||
| 146 | set_fs(old_fs); | ||
| 147 | if (res < 0) { | ||
| 148 | free_page((unsigned long) buf); | ||
| 149 | goto err; | ||
| 150 | } | ||
| 151 | buf[res] = '\0'; | ||
| 152 | |||
| 153 | return buf; | ||
| 154 | |||
| 155 | err: | ||
| 156 | return ERR_PTR(res); | ||
| 157 | } | ||
| 158 | |||
| 159 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) | ||
| 160 | { | ||
| 161 | struct iattr attr = { | ||
| 162 | .ia_valid = | ||
| 163 | ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, | ||
| 164 | .ia_atime = stat->atime, | ||
| 165 | .ia_mtime = stat->mtime, | ||
| 166 | }; | ||
| 167 | |||
| 168 | return notify_change(upperdentry, &attr, NULL); | ||
| 169 | } | ||
| 170 | |||
| 171 | int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) | ||
| 172 | { | ||
| 173 | int err = 0; | ||
| 174 | |||
| 175 | if (!S_ISLNK(stat->mode)) { | ||
| 176 | struct iattr attr = { | ||
| 177 | .ia_valid = ATTR_MODE, | ||
| 178 | .ia_mode = stat->mode, | ||
| 179 | }; | ||
| 180 | err = notify_change(upperdentry, &attr, NULL); | ||
| 181 | } | ||
| 182 | if (!err) { | ||
| 183 | struct iattr attr = { | ||
| 184 | .ia_valid = ATTR_UID | ATTR_GID, | ||
| 185 | .ia_uid = stat->uid, | ||
| 186 | .ia_gid = stat->gid, | ||
| 187 | }; | ||
| 188 | err = notify_change(upperdentry, &attr, NULL); | ||
| 189 | } | ||
| 190 | if (!err) | ||
| 191 | ovl_set_timestamps(upperdentry, stat); | ||
| 192 | |||
| 193 | return err; | ||
| 194 | |||
| 195 | } | ||
| 196 | |||
| 197 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | ||
| 198 | struct dentry *dentry, struct path *lowerpath, | ||
| 199 | struct kstat *stat, struct iattr *attr, | ||
| 200 | const char *link) | ||
| 201 | { | ||
| 202 | struct inode *wdir = workdir->d_inode; | ||
| 203 | struct inode *udir = upperdir->d_inode; | ||
| 204 | struct dentry *newdentry = NULL; | ||
| 205 | struct dentry *upper = NULL; | ||
| 206 | umode_t mode = stat->mode; | ||
| 207 | int err; | ||
| 208 | |||
| 209 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
| 210 | err = PTR_ERR(newdentry); | ||
| 211 | if (IS_ERR(newdentry)) | ||
| 212 | goto out; | ||
| 213 | |||
| 214 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 215 | dentry->d_name.len); | ||
| 216 | err = PTR_ERR(upper); | ||
| 217 | if (IS_ERR(upper)) | ||
| 218 | goto out1; | ||
| 219 | |||
| 220 | /* Can't properly set mode on creation because of the umask */ | ||
| 221 | stat->mode &= S_IFMT; | ||
| 222 | err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); | ||
| 223 | stat->mode = mode; | ||
| 224 | if (err) | ||
| 225 | goto out2; | ||
| 226 | |||
| 227 | if (S_ISREG(stat->mode)) { | ||
| 228 | struct path upperpath; | ||
| 229 | ovl_path_upper(dentry, &upperpath); | ||
| 230 | BUG_ON(upperpath.dentry != NULL); | ||
| 231 | upperpath.dentry = newdentry; | ||
| 232 | |||
| 233 | err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); | ||
| 234 | if (err) | ||
| 235 | goto out_cleanup; | ||
| 236 | } | ||
| 237 | |||
| 238 | err = ovl_copy_xattr(lowerpath->dentry, newdentry); | ||
| 239 | if (err) | ||
| 240 | goto out_cleanup; | ||
| 241 | |||
| 242 | mutex_lock(&newdentry->d_inode->i_mutex); | ||
| 243 | err = ovl_set_attr(newdentry, stat); | ||
| 244 | if (!err && attr) | ||
| 245 | err = notify_change(newdentry, attr, NULL); | ||
| 246 | mutex_unlock(&newdentry->d_inode->i_mutex); | ||
| 247 | if (err) | ||
| 248 | goto out_cleanup; | ||
| 249 | |||
| 250 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
| 251 | if (err) | ||
| 252 | goto out_cleanup; | ||
| 253 | |||
| 254 | ovl_dentry_update(dentry, newdentry); | ||
| 255 | newdentry = NULL; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Non-directores become opaque when copied up. | ||
| 259 | */ | ||
| 260 | if (!S_ISDIR(stat->mode)) | ||
| 261 | ovl_dentry_set_opaque(dentry, true); | ||
| 262 | out2: | ||
| 263 | dput(upper); | ||
| 264 | out1: | ||
| 265 | dput(newdentry); | ||
| 266 | out: | ||
| 267 | return err; | ||
| 268 | |||
| 269 | out_cleanup: | ||
| 270 | ovl_cleanup(wdir, newdentry); | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Copy up a single dentry | ||
| 276 | * | ||
| 277 | * Directory renames only allowed on "pure upper" (already created on | ||
| 278 | * upper filesystem, never copied up). Directories which are on lower or | ||
| 279 | * are merged may not be renamed. For these -EXDEV is returned and | ||
| 280 | * userspace has to deal with it. This means, when copying up a | ||
| 281 | * directory we can rely on it and ancestors being stable. | ||
| 282 | * | ||
| 283 | * Non-directory renames start with copy up of source if necessary. The | ||
| 284 | * actual rename will only proceed once the copy up was successful. Copy | ||
| 285 | * up uses upper parent i_mutex for exclusion. Since rename can change | ||
| 286 | * d_parent it is possible that the copy up will lock the old parent. At | ||
| 287 | * that point the file will have already been copied up anyway. | ||
| 288 | */ | ||
| 289 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
| 290 | struct path *lowerpath, struct kstat *stat, | ||
| 291 | struct iattr *attr) | ||
| 292 | { | ||
| 293 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 294 | int err; | ||
| 295 | struct kstat pstat; | ||
| 296 | struct path parentpath; | ||
| 297 | struct dentry *upperdir; | ||
| 298 | struct dentry *upperdentry; | ||
| 299 | const struct cred *old_cred; | ||
| 300 | struct cred *override_cred; | ||
| 301 | char *link = NULL; | ||
| 302 | |||
| 303 | ovl_path_upper(parent, &parentpath); | ||
| 304 | upperdir = parentpath.dentry; | ||
| 305 | |||
| 306 | err = vfs_getattr(&parentpath, &pstat); | ||
| 307 | if (err) | ||
| 308 | return err; | ||
| 309 | |||
| 310 | if (S_ISLNK(stat->mode)) { | ||
| 311 | link = ovl_read_symlink(lowerpath->dentry); | ||
| 312 | if (IS_ERR(link)) | ||
| 313 | return PTR_ERR(link); | ||
| 314 | } | ||
| 315 | |||
| 316 | err = -ENOMEM; | ||
| 317 | override_cred = prepare_creds(); | ||
| 318 | if (!override_cred) | ||
| 319 | goto out_free_link; | ||
| 320 | |||
| 321 | override_cred->fsuid = stat->uid; | ||
| 322 | override_cred->fsgid = stat->gid; | ||
| 323 | /* | ||
| 324 | * CAP_SYS_ADMIN for copying up extended attributes | ||
| 325 | * CAP_DAC_OVERRIDE for create | ||
| 326 | * CAP_FOWNER for chmod, timestamp update | ||
| 327 | * CAP_FSETID for chmod | ||
| 328 | * CAP_CHOWN for chown | ||
| 329 | * CAP_MKNOD for mknod | ||
| 330 | */ | ||
| 331 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 332 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 333 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 334 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 335 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 336 | cap_raise(override_cred->cap_effective, CAP_MKNOD); | ||
| 337 | old_cred = override_creds(override_cred); | ||
| 338 | |||
| 339 | err = -EIO; | ||
| 340 | if (lock_rename(workdir, upperdir) != NULL) { | ||
| 341 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
| 342 | goto out_unlock; | ||
| 343 | } | ||
| 344 | upperdentry = ovl_dentry_upper(dentry); | ||
| 345 | if (upperdentry) { | ||
| 346 | unlock_rename(workdir, upperdir); | ||
| 347 | err = 0; | ||
| 348 | /* Raced with another copy-up? Do the setattr here */ | ||
| 349 | if (attr) { | ||
| 350 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
| 351 | err = notify_change(upperdentry, attr, NULL); | ||
| 352 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
| 353 | } | ||
| 354 | goto out_put_cred; | ||
| 355 | } | ||
| 356 | |||
| 357 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | ||
| 358 | stat, attr, link); | ||
| 359 | if (!err) { | ||
| 360 | /* Restore timestamps on parent (best effort) */ | ||
| 361 | ovl_set_timestamps(upperdir, &pstat); | ||
| 362 | } | ||
| 363 | out_unlock: | ||
| 364 | unlock_rename(workdir, upperdir); | ||
| 365 | out_put_cred: | ||
| 366 | revert_creds(old_cred); | ||
| 367 | put_cred(override_cred); | ||
| 368 | |||
| 369 | out_free_link: | ||
| 370 | if (link) | ||
| 371 | free_page((unsigned long) link); | ||
| 372 | |||
| 373 | return err; | ||
| 374 | } | ||
| 375 | |||
| 376 | int ovl_copy_up(struct dentry *dentry) | ||
| 377 | { | ||
| 378 | int err; | ||
| 379 | |||
| 380 | err = 0; | ||
| 381 | while (!err) { | ||
| 382 | struct dentry *next; | ||
| 383 | struct dentry *parent; | ||
| 384 | struct path lowerpath; | ||
| 385 | struct kstat stat; | ||
| 386 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 387 | |||
| 388 | if (type != OVL_PATH_LOWER) | ||
| 389 | break; | ||
| 390 | |||
| 391 | next = dget(dentry); | ||
| 392 | /* find the topmost dentry not yet copied up */ | ||
| 393 | for (;;) { | ||
| 394 | parent = dget_parent(next); | ||
| 395 | |||
| 396 | type = ovl_path_type(parent); | ||
| 397 | if (type != OVL_PATH_LOWER) | ||
| 398 | break; | ||
| 399 | |||
| 400 | dput(next); | ||
| 401 | next = parent; | ||
| 402 | } | ||
| 403 | |||
| 404 | ovl_path_lower(next, &lowerpath); | ||
| 405 | err = vfs_getattr(&lowerpath, &stat); | ||
| 406 | if (!err) | ||
| 407 | err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); | ||
| 408 | |||
| 409 | dput(parent); | ||
| 410 | dput(next); | ||
| 411 | } | ||
| 412 | |||
| 413 | return err; | ||
| 414 | } | ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c new file mode 100644 index 000000000000..15cd91ad9940 --- /dev/null +++ b/fs/overlayfs/dir.c | |||
| @@ -0,0 +1,921 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/namei.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include <linux/security.h> | ||
| 14 | #include <linux/cred.h> | ||
| 15 | #include "overlayfs.h" | ||
| 16 | |||
| 17 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | ||
| 18 | { | ||
| 19 | int err; | ||
| 20 | |||
| 21 | dget(wdentry); | ||
| 22 | if (S_ISDIR(wdentry->d_inode->i_mode)) | ||
| 23 | err = ovl_do_rmdir(wdir, wdentry); | ||
| 24 | else | ||
| 25 | err = ovl_do_unlink(wdir, wdentry); | ||
| 26 | dput(wdentry); | ||
| 27 | |||
| 28 | if (err) { | ||
| 29 | pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", | ||
| 30 | wdentry, err); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) | ||
| 35 | { | ||
| 36 | struct dentry *temp; | ||
| 37 | char name[20]; | ||
| 38 | |||
| 39 | snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); | ||
| 40 | |||
| 41 | temp = lookup_one_len(name, workdir, strlen(name)); | ||
| 42 | if (!IS_ERR(temp) && temp->d_inode) { | ||
| 43 | pr_err("overlayfs: workdir/%s already exists\n", name); | ||
| 44 | dput(temp); | ||
| 45 | temp = ERR_PTR(-EIO); | ||
| 46 | } | ||
| 47 | |||
| 48 | return temp; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* caller holds i_mutex on workdir */ | ||
| 52 | static struct dentry *ovl_whiteout(struct dentry *workdir, | ||
| 53 | struct dentry *dentry) | ||
| 54 | { | ||
| 55 | int err; | ||
| 56 | struct dentry *whiteout; | ||
| 57 | struct inode *wdir = workdir->d_inode; | ||
| 58 | |||
| 59 | whiteout = ovl_lookup_temp(workdir, dentry); | ||
| 60 | if (IS_ERR(whiteout)) | ||
| 61 | return whiteout; | ||
| 62 | |||
| 63 | err = ovl_do_whiteout(wdir, whiteout); | ||
| 64 | if (err) { | ||
| 65 | dput(whiteout); | ||
| 66 | whiteout = ERR_PTR(err); | ||
| 67 | } | ||
| 68 | |||
| 69 | return whiteout; | ||
| 70 | } | ||
| 71 | |||
| 72 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
| 73 | struct kstat *stat, const char *link, | ||
| 74 | struct dentry *hardlink, bool debug) | ||
| 75 | { | ||
| 76 | int err; | ||
| 77 | |||
| 78 | if (newdentry->d_inode) | ||
| 79 | return -ESTALE; | ||
| 80 | |||
| 81 | if (hardlink) { | ||
| 82 | err = ovl_do_link(hardlink, dir, newdentry, debug); | ||
| 83 | } else { | ||
| 84 | switch (stat->mode & S_IFMT) { | ||
| 85 | case S_IFREG: | ||
| 86 | err = ovl_do_create(dir, newdentry, stat->mode, debug); | ||
| 87 | break; | ||
| 88 | |||
| 89 | case S_IFDIR: | ||
| 90 | err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); | ||
| 91 | break; | ||
| 92 | |||
| 93 | case S_IFCHR: | ||
| 94 | case S_IFBLK: | ||
| 95 | case S_IFIFO: | ||
| 96 | case S_IFSOCK: | ||
| 97 | err = ovl_do_mknod(dir, newdentry, | ||
| 98 | stat->mode, stat->rdev, debug); | ||
| 99 | break; | ||
| 100 | |||
| 101 | case S_IFLNK: | ||
| 102 | err = ovl_do_symlink(dir, newdentry, link, debug); | ||
| 103 | break; | ||
| 104 | |||
| 105 | default: | ||
| 106 | err = -EPERM; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | if (!err && WARN_ON(!newdentry->d_inode)) { | ||
| 110 | /* | ||
| 111 | * Not quite sure if non-instantiated dentry is legal or not. | ||
| 112 | * VFS doesn't seem to care so check and warn here. | ||
| 113 | */ | ||
| 114 | err = -ENOENT; | ||
| 115 | } | ||
| 116 | return err; | ||
| 117 | } | ||
| 118 | |||
| 119 | static int ovl_set_opaque(struct dentry *upperdentry) | ||
| 120 | { | ||
| 121 | return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); | ||
| 122 | } | ||
| 123 | |||
| 124 | static void ovl_remove_opaque(struct dentry *upperdentry) | ||
| 125 | { | ||
| 126 | int err; | ||
| 127 | |||
| 128 | err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); | ||
| 129 | if (err) { | ||
| 130 | pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", | ||
| 131 | upperdentry->d_name.name, err); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 136 | struct kstat *stat) | ||
| 137 | { | ||
| 138 | int err; | ||
| 139 | enum ovl_path_type type; | ||
| 140 | struct path realpath; | ||
| 141 | |||
| 142 | type = ovl_path_real(dentry, &realpath); | ||
| 143 | err = vfs_getattr(&realpath, stat); | ||
| 144 | if (err) | ||
| 145 | return err; | ||
| 146 | |||
| 147 | stat->dev = dentry->d_sb->s_dev; | ||
| 148 | stat->ino = dentry->d_inode->i_ino; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * It's probably not worth it to count subdirs to get the | ||
| 152 | * correct link count. nlink=1 seems to pacify 'find' and | ||
| 153 | * other utilities. | ||
| 154 | */ | ||
| 155 | if (type == OVL_PATH_MERGE) | ||
| 156 | stat->nlink = 1; | ||
| 157 | |||
| 158 | return 0; | ||
| 159 | } | ||
| 160 | |||
| 161 | static int ovl_create_upper(struct dentry *dentry, struct inode *inode, | ||
| 162 | struct kstat *stat, const char *link, | ||
| 163 | struct dentry *hardlink) | ||
| 164 | { | ||
| 165 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 166 | struct inode *udir = upperdir->d_inode; | ||
| 167 | struct dentry *newdentry; | ||
| 168 | int err; | ||
| 169 | |||
| 170 | mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); | ||
| 171 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 172 | dentry->d_name.len); | ||
| 173 | err = PTR_ERR(newdentry); | ||
| 174 | if (IS_ERR(newdentry)) | ||
| 175 | goto out_unlock; | ||
| 176 | err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); | ||
| 177 | if (err) | ||
| 178 | goto out_dput; | ||
| 179 | |||
| 180 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 181 | ovl_dentry_update(dentry, newdentry); | ||
| 182 | ovl_copyattr(newdentry->d_inode, inode); | ||
| 183 | d_instantiate(dentry, inode); | ||
| 184 | newdentry = NULL; | ||
| 185 | out_dput: | ||
| 186 | dput(newdentry); | ||
| 187 | out_unlock: | ||
| 188 | mutex_unlock(&udir->i_mutex); | ||
| 189 | return err; | ||
| 190 | } | ||
| 191 | |||
| 192 | static int ovl_lock_rename_workdir(struct dentry *workdir, | ||
| 193 | struct dentry *upperdir) | ||
| 194 | { | ||
| 195 | /* Workdir should not be the same as upperdir */ | ||
| 196 | if (workdir == upperdir) | ||
| 197 | goto err; | ||
| 198 | |||
| 199 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
| 200 | if (lock_rename(workdir, upperdir) != NULL) | ||
| 201 | goto err_unlock; | ||
| 202 | |||
| 203 | return 0; | ||
| 204 | |||
| 205 | err_unlock: | ||
| 206 | unlock_rename(workdir, upperdir); | ||
| 207 | err: | ||
| 208 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
| 209 | return -EIO; | ||
| 210 | } | ||
| 211 | |||
| 212 | static struct dentry *ovl_clear_empty(struct dentry *dentry, | ||
| 213 | struct list_head *list) | ||
| 214 | { | ||
| 215 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 216 | struct inode *wdir = workdir->d_inode; | ||
| 217 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 218 | struct inode *udir = upperdir->d_inode; | ||
| 219 | struct path upperpath; | ||
| 220 | struct dentry *upper; | ||
| 221 | struct dentry *opaquedir; | ||
| 222 | struct kstat stat; | ||
| 223 | int err; | ||
| 224 | |||
| 225 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 226 | if (err) | ||
| 227 | goto out; | ||
| 228 | |||
| 229 | ovl_path_upper(dentry, &upperpath); | ||
| 230 | err = vfs_getattr(&upperpath, &stat); | ||
| 231 | if (err) | ||
| 232 | goto out_unlock; | ||
| 233 | |||
| 234 | err = -ESTALE; | ||
| 235 | if (!S_ISDIR(stat.mode)) | ||
| 236 | goto out_unlock; | ||
| 237 | upper = upperpath.dentry; | ||
| 238 | if (upper->d_parent->d_inode != udir) | ||
| 239 | goto out_unlock; | ||
| 240 | |||
| 241 | opaquedir = ovl_lookup_temp(workdir, dentry); | ||
| 242 | err = PTR_ERR(opaquedir); | ||
| 243 | if (IS_ERR(opaquedir)) | ||
| 244 | goto out_unlock; | ||
| 245 | |||
| 246 | err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); | ||
| 247 | if (err) | ||
| 248 | goto out_dput; | ||
| 249 | |||
| 250 | err = ovl_copy_xattr(upper, opaquedir); | ||
| 251 | if (err) | ||
| 252 | goto out_cleanup; | ||
| 253 | |||
| 254 | err = ovl_set_opaque(opaquedir); | ||
| 255 | if (err) | ||
| 256 | goto out_cleanup; | ||
| 257 | |||
| 258 | mutex_lock(&opaquedir->d_inode->i_mutex); | ||
| 259 | err = ovl_set_attr(opaquedir, &stat); | ||
| 260 | mutex_unlock(&opaquedir->d_inode->i_mutex); | ||
| 261 | if (err) | ||
| 262 | goto out_cleanup; | ||
| 263 | |||
| 264 | err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); | ||
| 265 | if (err) | ||
| 266 | goto out_cleanup; | ||
| 267 | |||
| 268 | ovl_cleanup_whiteouts(upper, list); | ||
| 269 | ovl_cleanup(wdir, upper); | ||
| 270 | unlock_rename(workdir, upperdir); | ||
| 271 | |||
| 272 | /* dentry's upper doesn't match now, get rid of it */ | ||
| 273 | d_drop(dentry); | ||
| 274 | |||
| 275 | return opaquedir; | ||
| 276 | |||
| 277 | out_cleanup: | ||
| 278 | ovl_cleanup(wdir, opaquedir); | ||
| 279 | out_dput: | ||
| 280 | dput(opaquedir); | ||
| 281 | out_unlock: | ||
| 282 | unlock_rename(workdir, upperdir); | ||
| 283 | out: | ||
| 284 | return ERR_PTR(err); | ||
| 285 | } | ||
| 286 | |||
| 287 | static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, | ||
| 288 | enum ovl_path_type type) | ||
| 289 | { | ||
| 290 | int err; | ||
| 291 | struct dentry *ret = NULL; | ||
| 292 | LIST_HEAD(list); | ||
| 293 | |||
| 294 | err = ovl_check_empty_dir(dentry, &list); | ||
| 295 | if (err) | ||
| 296 | ret = ERR_PTR(err); | ||
| 297 | else if (type == OVL_PATH_MERGE) | ||
| 298 | ret = ovl_clear_empty(dentry, &list); | ||
| 299 | |||
| 300 | ovl_cache_free(&list); | ||
| 301 | |||
| 302 | return ret; | ||
| 303 | } | ||
| 304 | |||
| 305 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | ||
| 306 | struct kstat *stat, const char *link, | ||
| 307 | struct dentry *hardlink) | ||
| 308 | { | ||
| 309 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 310 | struct inode *wdir = workdir->d_inode; | ||
| 311 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 312 | struct inode *udir = upperdir->d_inode; | ||
| 313 | struct dentry *upper; | ||
| 314 | struct dentry *newdentry; | ||
| 315 | int err; | ||
| 316 | |||
| 317 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 318 | if (err) | ||
| 319 | goto out; | ||
| 320 | |||
| 321 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
| 322 | err = PTR_ERR(newdentry); | ||
| 323 | if (IS_ERR(newdentry)) | ||
| 324 | goto out_unlock; | ||
| 325 | |||
| 326 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 327 | dentry->d_name.len); | ||
| 328 | err = PTR_ERR(upper); | ||
| 329 | if (IS_ERR(upper)) | ||
| 330 | goto out_dput; | ||
| 331 | |||
| 332 | err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); | ||
| 333 | if (err) | ||
| 334 | goto out_dput2; | ||
| 335 | |||
| 336 | if (S_ISDIR(stat->mode)) { | ||
| 337 | err = ovl_set_opaque(newdentry); | ||
| 338 | if (err) | ||
| 339 | goto out_cleanup; | ||
| 340 | |||
| 341 | err = ovl_do_rename(wdir, newdentry, udir, upper, | ||
| 342 | RENAME_EXCHANGE); | ||
| 343 | if (err) | ||
| 344 | goto out_cleanup; | ||
| 345 | |||
| 346 | ovl_cleanup(wdir, upper); | ||
| 347 | } else { | ||
| 348 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
| 349 | if (err) | ||
| 350 | goto out_cleanup; | ||
| 351 | } | ||
| 352 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 353 | ovl_dentry_update(dentry, newdentry); | ||
| 354 | ovl_copyattr(newdentry->d_inode, inode); | ||
| 355 | d_instantiate(dentry, inode); | ||
| 356 | newdentry = NULL; | ||
| 357 | out_dput2: | ||
| 358 | dput(upper); | ||
| 359 | out_dput: | ||
| 360 | dput(newdentry); | ||
| 361 | out_unlock: | ||
| 362 | unlock_rename(workdir, upperdir); | ||
| 363 | out: | ||
| 364 | return err; | ||
| 365 | |||
| 366 | out_cleanup: | ||
| 367 | ovl_cleanup(wdir, newdentry); | ||
| 368 | goto out_dput2; | ||
| 369 | } | ||
| 370 | |||
| 371 | static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, | ||
| 372 | const char *link, struct dentry *hardlink) | ||
| 373 | { | ||
| 374 | int err; | ||
| 375 | struct inode *inode; | ||
| 376 | struct kstat stat = { | ||
| 377 | .mode = mode, | ||
| 378 | .rdev = rdev, | ||
| 379 | }; | ||
| 380 | |||
| 381 | err = -ENOMEM; | ||
| 382 | inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); | ||
| 383 | if (!inode) | ||
| 384 | goto out; | ||
| 385 | |||
| 386 | err = ovl_copy_up(dentry->d_parent); | ||
| 387 | if (err) | ||
| 388 | goto out_iput; | ||
| 389 | |||
| 390 | if (!ovl_dentry_is_opaque(dentry)) { | ||
| 391 | err = ovl_create_upper(dentry, inode, &stat, link, hardlink); | ||
| 392 | } else { | ||
| 393 | const struct cred *old_cred; | ||
| 394 | struct cred *override_cred; | ||
| 395 | |||
| 396 | err = -ENOMEM; | ||
| 397 | override_cred = prepare_creds(); | ||
| 398 | if (!override_cred) | ||
| 399 | goto out_iput; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * CAP_SYS_ADMIN for setting opaque xattr | ||
| 403 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
| 404 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 405 | */ | ||
| 406 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 407 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 408 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 409 | old_cred = override_creds(override_cred); | ||
| 410 | |||
| 411 | err = ovl_create_over_whiteout(dentry, inode, &stat, link, | ||
| 412 | hardlink); | ||
| 413 | |||
| 414 | revert_creds(old_cred); | ||
| 415 | put_cred(override_cred); | ||
| 416 | } | ||
| 417 | |||
| 418 | if (!err) | ||
| 419 | inode = NULL; | ||
| 420 | out_iput: | ||
| 421 | iput(inode); | ||
| 422 | out: | ||
| 423 | return err; | ||
| 424 | } | ||
| 425 | |||
| 426 | static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, | ||
| 427 | const char *link) | ||
| 428 | { | ||
| 429 | int err; | ||
| 430 | |||
| 431 | err = ovl_want_write(dentry); | ||
| 432 | if (!err) { | ||
| 433 | err = ovl_create_or_link(dentry, mode, rdev, link, NULL); | ||
| 434 | ovl_drop_write(dentry); | ||
| 435 | } | ||
| 436 | |||
| 437 | return err; | ||
| 438 | } | ||
| 439 | |||
| 440 | static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
| 441 | bool excl) | ||
| 442 | { | ||
| 443 | return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); | ||
| 444 | } | ||
| 445 | |||
| 446 | static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 447 | { | ||
| 448 | return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); | ||
| 449 | } | ||
| 450 | |||
| 451 | static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
| 452 | dev_t rdev) | ||
| 453 | { | ||
| 454 | /* Don't allow creation of "whiteout" on overlay */ | ||
| 455 | if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) | ||
| 456 | return -EPERM; | ||
| 457 | |||
| 458 | return ovl_create_object(dentry, mode, rdev, NULL); | ||
| 459 | } | ||
| 460 | |||
| 461 | static int ovl_symlink(struct inode *dir, struct dentry *dentry, | ||
| 462 | const char *link) | ||
| 463 | { | ||
| 464 | return ovl_create_object(dentry, S_IFLNK, 0, link); | ||
| 465 | } | ||
| 466 | |||
| 467 | static int ovl_link(struct dentry *old, struct inode *newdir, | ||
| 468 | struct dentry *new) | ||
| 469 | { | ||
| 470 | int err; | ||
| 471 | struct dentry *upper; | ||
| 472 | |||
| 473 | err = ovl_want_write(old); | ||
| 474 | if (err) | ||
| 475 | goto out; | ||
| 476 | |||
| 477 | err = ovl_copy_up(old); | ||
| 478 | if (err) | ||
| 479 | goto out_drop_write; | ||
| 480 | |||
| 481 | upper = ovl_dentry_upper(old); | ||
| 482 | err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); | ||
| 483 | |||
| 484 | out_drop_write: | ||
| 485 | ovl_drop_write(old); | ||
| 486 | out: | ||
| 487 | return err; | ||
| 488 | } | ||
| 489 | |||
| 490 | static int ovl_remove_and_whiteout(struct dentry *dentry, | ||
| 491 | enum ovl_path_type type, bool is_dir) | ||
| 492 | { | ||
| 493 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 494 | struct inode *wdir = workdir->d_inode; | ||
| 495 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 496 | struct inode *udir = upperdir->d_inode; | ||
| 497 | struct dentry *whiteout; | ||
| 498 | struct dentry *upper; | ||
| 499 | struct dentry *opaquedir = NULL; | ||
| 500 | int err; | ||
| 501 | |||
| 502 | if (is_dir) { | ||
| 503 | opaquedir = ovl_check_empty_and_clear(dentry, type); | ||
| 504 | err = PTR_ERR(opaquedir); | ||
| 505 | if (IS_ERR(opaquedir)) | ||
| 506 | goto out; | ||
| 507 | } | ||
| 508 | |||
| 509 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 510 | if (err) | ||
| 511 | goto out_dput; | ||
| 512 | |||
| 513 | whiteout = ovl_whiteout(workdir, dentry); | ||
| 514 | err = PTR_ERR(whiteout); | ||
| 515 | if (IS_ERR(whiteout)) | ||
| 516 | goto out_unlock; | ||
| 517 | |||
| 518 | if (type == OVL_PATH_LOWER) { | ||
| 519 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 520 | dentry->d_name.len); | ||
| 521 | err = PTR_ERR(upper); | ||
| 522 | if (IS_ERR(upper)) | ||
| 523 | goto kill_whiteout; | ||
| 524 | |||
| 525 | err = ovl_do_rename(wdir, whiteout, udir, upper, 0); | ||
| 526 | dput(upper); | ||
| 527 | if (err) | ||
| 528 | goto kill_whiteout; | ||
| 529 | } else { | ||
| 530 | int flags = 0; | ||
| 531 | |||
| 532 | upper = ovl_dentry_upper(dentry); | ||
| 533 | if (opaquedir) | ||
| 534 | upper = opaquedir; | ||
| 535 | err = -ESTALE; | ||
| 536 | if (upper->d_parent != upperdir) | ||
| 537 | goto kill_whiteout; | ||
| 538 | |||
| 539 | if (is_dir) | ||
| 540 | flags |= RENAME_EXCHANGE; | ||
| 541 | |||
| 542 | err = ovl_do_rename(wdir, whiteout, udir, upper, flags); | ||
| 543 | if (err) | ||
| 544 | goto kill_whiteout; | ||
| 545 | |||
| 546 | if (is_dir) | ||
| 547 | ovl_cleanup(wdir, upper); | ||
| 548 | } | ||
| 549 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 550 | out_d_drop: | ||
| 551 | d_drop(dentry); | ||
| 552 | dput(whiteout); | ||
| 553 | out_unlock: | ||
| 554 | unlock_rename(workdir, upperdir); | ||
| 555 | out_dput: | ||
| 556 | dput(opaquedir); | ||
| 557 | out: | ||
| 558 | return err; | ||
| 559 | |||
| 560 | kill_whiteout: | ||
| 561 | ovl_cleanup(wdir, whiteout); | ||
| 562 | goto out_d_drop; | ||
| 563 | } | ||
| 564 | |||
| 565 | static int ovl_remove_upper(struct dentry *dentry, bool is_dir) | ||
| 566 | { | ||
| 567 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 568 | struct inode *dir = upperdir->d_inode; | ||
| 569 | struct dentry *upper = ovl_dentry_upper(dentry); | ||
| 570 | int err; | ||
| 571 | |||
| 572 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 573 | err = -ESTALE; | ||
| 574 | if (upper->d_parent == upperdir) { | ||
| 575 | /* Don't let d_delete() think it can reset d_inode */ | ||
| 576 | dget(upper); | ||
| 577 | if (is_dir) | ||
| 578 | err = vfs_rmdir(dir, upper); | ||
| 579 | else | ||
| 580 | err = vfs_unlink(dir, upper, NULL); | ||
| 581 | dput(upper); | ||
| 582 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 583 | } | ||
| 584 | |||
| 585 | /* | ||
| 586 | * Keeping this dentry hashed would mean having to release | ||
| 587 | * upperpath/lowerpath, which could only be done if we are the | ||
| 588 | * sole user of this dentry. Too tricky... Just unhash for | ||
| 589 | * now. | ||
| 590 | */ | ||
| 591 | d_drop(dentry); | ||
| 592 | mutex_unlock(&dir->i_mutex); | ||
| 593 | |||
| 594 | return err; | ||
| 595 | } | ||
| 596 | |||
| 597 | static inline int ovl_check_sticky(struct dentry *dentry) | ||
| 598 | { | ||
| 599 | struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; | ||
| 600 | struct inode *inode = ovl_dentry_real(dentry)->d_inode; | ||
| 601 | |||
| 602 | if (check_sticky(dir, inode)) | ||
| 603 | return -EPERM; | ||
| 604 | |||
| 605 | return 0; | ||
| 606 | } | ||
| 607 | |||
| 608 | static int ovl_do_remove(struct dentry *dentry, bool is_dir) | ||
| 609 | { | ||
| 610 | enum ovl_path_type type; | ||
| 611 | int err; | ||
| 612 | |||
| 613 | err = ovl_check_sticky(dentry); | ||
| 614 | if (err) | ||
| 615 | goto out; | ||
| 616 | |||
| 617 | err = ovl_want_write(dentry); | ||
| 618 | if (err) | ||
| 619 | goto out; | ||
| 620 | |||
| 621 | err = ovl_copy_up(dentry->d_parent); | ||
| 622 | if (err) | ||
| 623 | goto out_drop_write; | ||
| 624 | |||
| 625 | type = ovl_path_type(dentry); | ||
| 626 | if (type == OVL_PATH_PURE_UPPER) { | ||
| 627 | err = ovl_remove_upper(dentry, is_dir); | ||
| 628 | } else { | ||
| 629 | const struct cred *old_cred; | ||
| 630 | struct cred *override_cred; | ||
| 631 | |||
| 632 | err = -ENOMEM; | ||
| 633 | override_cred = prepare_creds(); | ||
| 634 | if (!override_cred) | ||
| 635 | goto out_drop_write; | ||
| 636 | |||
| 637 | /* | ||
| 638 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
| 639 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
| 640 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 641 | * CAP_FSETID for chmod of opaque dir | ||
| 642 | * CAP_CHOWN for chown of opaque dir | ||
| 643 | */ | ||
| 644 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 645 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 646 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 647 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 648 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 649 | old_cred = override_creds(override_cred); | ||
| 650 | |||
| 651 | err = ovl_remove_and_whiteout(dentry, type, is_dir); | ||
| 652 | |||
| 653 | revert_creds(old_cred); | ||
| 654 | put_cred(override_cred); | ||
| 655 | } | ||
| 656 | out_drop_write: | ||
| 657 | ovl_drop_write(dentry); | ||
| 658 | out: | ||
| 659 | return err; | ||
| 660 | } | ||
| 661 | |||
| 662 | static int ovl_unlink(struct inode *dir, struct dentry *dentry) | ||
| 663 | { | ||
| 664 | return ovl_do_remove(dentry, false); | ||
| 665 | } | ||
| 666 | |||
| 667 | static int ovl_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 668 | { | ||
| 669 | return ovl_do_remove(dentry, true); | ||
| 670 | } | ||
| 671 | |||
| 672 | static int ovl_rename2(struct inode *olddir, struct dentry *old, | ||
| 673 | struct inode *newdir, struct dentry *new, | ||
| 674 | unsigned int flags) | ||
| 675 | { | ||
| 676 | int err; | ||
| 677 | enum ovl_path_type old_type; | ||
| 678 | enum ovl_path_type new_type; | ||
| 679 | struct dentry *old_upperdir; | ||
| 680 | struct dentry *new_upperdir; | ||
| 681 | struct dentry *olddentry; | ||
| 682 | struct dentry *newdentry; | ||
| 683 | struct dentry *trap; | ||
| 684 | bool old_opaque; | ||
| 685 | bool new_opaque; | ||
| 686 | bool new_create = false; | ||
| 687 | bool cleanup_whiteout = false; | ||
| 688 | bool overwrite = !(flags & RENAME_EXCHANGE); | ||
| 689 | bool is_dir = S_ISDIR(old->d_inode->i_mode); | ||
| 690 | bool new_is_dir = false; | ||
| 691 | struct dentry *opaquedir = NULL; | ||
| 692 | const struct cred *old_cred = NULL; | ||
| 693 | struct cred *override_cred = NULL; | ||
| 694 | |||
| 695 | err = -EINVAL; | ||
| 696 | if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) | ||
| 697 | goto out; | ||
| 698 | |||
| 699 | flags &= ~RENAME_NOREPLACE; | ||
| 700 | |||
| 701 | err = ovl_check_sticky(old); | ||
| 702 | if (err) | ||
| 703 | goto out; | ||
| 704 | |||
| 705 | /* Don't copy up directory trees */ | ||
| 706 | old_type = ovl_path_type(old); | ||
| 707 | err = -EXDEV; | ||
| 708 | if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) | ||
| 709 | goto out; | ||
| 710 | |||
| 711 | if (new->d_inode) { | ||
| 712 | err = ovl_check_sticky(new); | ||
| 713 | if (err) | ||
| 714 | goto out; | ||
| 715 | |||
| 716 | if (S_ISDIR(new->d_inode->i_mode)) | ||
| 717 | new_is_dir = true; | ||
| 718 | |||
| 719 | new_type = ovl_path_type(new); | ||
| 720 | err = -EXDEV; | ||
| 721 | if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) | ||
| 722 | goto out; | ||
| 723 | |||
| 724 | err = 0; | ||
| 725 | if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { | ||
| 726 | if (ovl_dentry_lower(old)->d_inode == | ||
| 727 | ovl_dentry_lower(new)->d_inode) | ||
| 728 | goto out; | ||
| 729 | } | ||
| 730 | if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { | ||
| 731 | if (ovl_dentry_upper(old)->d_inode == | ||
| 732 | ovl_dentry_upper(new)->d_inode) | ||
| 733 | goto out; | ||
| 734 | } | ||
| 735 | } else { | ||
| 736 | if (ovl_dentry_is_opaque(new)) | ||
| 737 | new_type = OVL_PATH_UPPER; | ||
| 738 | else | ||
| 739 | new_type = OVL_PATH_PURE_UPPER; | ||
| 740 | } | ||
| 741 | |||
| 742 | err = ovl_want_write(old); | ||
| 743 | if (err) | ||
| 744 | goto out; | ||
| 745 | |||
| 746 | err = ovl_copy_up(old); | ||
| 747 | if (err) | ||
| 748 | goto out_drop_write; | ||
| 749 | |||
| 750 | err = ovl_copy_up(new->d_parent); | ||
| 751 | if (err) | ||
| 752 | goto out_drop_write; | ||
| 753 | if (!overwrite) { | ||
| 754 | err = ovl_copy_up(new); | ||
| 755 | if (err) | ||
| 756 | goto out_drop_write; | ||
| 757 | } | ||
| 758 | |||
| 759 | old_opaque = old_type != OVL_PATH_PURE_UPPER; | ||
| 760 | new_opaque = new_type != OVL_PATH_PURE_UPPER; | ||
| 761 | |||
| 762 | if (old_opaque || new_opaque) { | ||
| 763 | err = -ENOMEM; | ||
| 764 | override_cred = prepare_creds(); | ||
| 765 | if (!override_cred) | ||
| 766 | goto out_drop_write; | ||
| 767 | |||
| 768 | /* | ||
| 769 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
| 770 | * CAP_DAC_OVERRIDE for create in workdir | ||
| 771 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 772 | * CAP_FSETID for chmod of opaque dir | ||
| 773 | * CAP_CHOWN for chown of opaque dir | ||
| 774 | */ | ||
| 775 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 776 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 777 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 778 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 779 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 780 | old_cred = override_creds(override_cred); | ||
| 781 | } | ||
| 782 | |||
| 783 | if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { | ||
| 784 | opaquedir = ovl_check_empty_and_clear(new, new_type); | ||
| 785 | err = PTR_ERR(opaquedir); | ||
| 786 | if (IS_ERR(opaquedir)) { | ||
| 787 | opaquedir = NULL; | ||
| 788 | goto out_revert_creds; | ||
| 789 | } | ||
| 790 | } | ||
| 791 | |||
| 792 | if (overwrite) { | ||
| 793 | if (old_opaque) { | ||
| 794 | if (new->d_inode || !new_opaque) { | ||
| 795 | /* Whiteout source */ | ||
| 796 | flags |= RENAME_WHITEOUT; | ||
| 797 | } else { | ||
| 798 | /* Switch whiteouts */ | ||
| 799 | flags |= RENAME_EXCHANGE; | ||
| 800 | } | ||
| 801 | } else if (is_dir && !new->d_inode && new_opaque) { | ||
| 802 | flags |= RENAME_EXCHANGE; | ||
| 803 | cleanup_whiteout = true; | ||
| 804 | } | ||
| 805 | } | ||
| 806 | |||
| 807 | old_upperdir = ovl_dentry_upper(old->d_parent); | ||
| 808 | new_upperdir = ovl_dentry_upper(new->d_parent); | ||
| 809 | |||
| 810 | trap = lock_rename(new_upperdir, old_upperdir); | ||
| 811 | |||
| 812 | olddentry = ovl_dentry_upper(old); | ||
| 813 | newdentry = ovl_dentry_upper(new); | ||
| 814 | if (newdentry) { | ||
| 815 | if (opaquedir) { | ||
| 816 | newdentry = opaquedir; | ||
| 817 | opaquedir = NULL; | ||
| 818 | } else { | ||
| 819 | dget(newdentry); | ||
| 820 | } | ||
| 821 | } else { | ||
| 822 | new_create = true; | ||
| 823 | newdentry = lookup_one_len(new->d_name.name, new_upperdir, | ||
| 824 | new->d_name.len); | ||
| 825 | err = PTR_ERR(newdentry); | ||
| 826 | if (IS_ERR(newdentry)) | ||
| 827 | goto out_unlock; | ||
| 828 | } | ||
| 829 | |||
| 830 | err = -ESTALE; | ||
| 831 | if (olddentry->d_parent != old_upperdir) | ||
| 832 | goto out_dput; | ||
| 833 | if (newdentry->d_parent != new_upperdir) | ||
| 834 | goto out_dput; | ||
| 835 | if (olddentry == trap) | ||
| 836 | goto out_dput; | ||
| 837 | if (newdentry == trap) | ||
| 838 | goto out_dput; | ||
| 839 | |||
| 840 | if (is_dir && !old_opaque && new_opaque) { | ||
| 841 | err = ovl_set_opaque(olddentry); | ||
| 842 | if (err) | ||
| 843 | goto out_dput; | ||
| 844 | } | ||
| 845 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) { | ||
| 846 | err = ovl_set_opaque(newdentry); | ||
| 847 | if (err) | ||
| 848 | goto out_dput; | ||
| 849 | } | ||
| 850 | |||
| 851 | if (old_opaque || new_opaque) { | ||
| 852 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, | ||
| 853 | new_upperdir->d_inode, newdentry, | ||
| 854 | flags); | ||
| 855 | } else { | ||
| 856 | /* No debug for the plain case */ | ||
| 857 | BUG_ON(flags & ~RENAME_EXCHANGE); | ||
| 858 | err = vfs_rename(old_upperdir->d_inode, olddentry, | ||
| 859 | new_upperdir->d_inode, newdentry, | ||
| 860 | NULL, flags); | ||
| 861 | } | ||
| 862 | |||
| 863 | if (err) { | ||
| 864 | if (is_dir && !old_opaque && new_opaque) | ||
| 865 | ovl_remove_opaque(olddentry); | ||
| 866 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) | ||
| 867 | ovl_remove_opaque(newdentry); | ||
| 868 | goto out_dput; | ||
| 869 | } | ||
| 870 | |||
| 871 | if (is_dir && old_opaque && !new_opaque) | ||
| 872 | ovl_remove_opaque(olddentry); | ||
| 873 | if (!overwrite && new_is_dir && !old_opaque && new_opaque) | ||
| 874 | ovl_remove_opaque(newdentry); | ||
| 875 | |||
| 876 | if (old_opaque != new_opaque) { | ||
| 877 | ovl_dentry_set_opaque(old, new_opaque); | ||
| 878 | if (!overwrite) | ||
| 879 | ovl_dentry_set_opaque(new, old_opaque); | ||
| 880 | } | ||
| 881 | |||
| 882 | if (cleanup_whiteout) | ||
| 883 | ovl_cleanup(old_upperdir->d_inode, newdentry); | ||
| 884 | |||
| 885 | ovl_dentry_version_inc(old->d_parent); | ||
| 886 | ovl_dentry_version_inc(new->d_parent); | ||
| 887 | |||
| 888 | out_dput: | ||
| 889 | dput(newdentry); | ||
| 890 | out_unlock: | ||
| 891 | unlock_rename(new_upperdir, old_upperdir); | ||
| 892 | out_revert_creds: | ||
| 893 | if (old_opaque || new_opaque) { | ||
| 894 | revert_creds(old_cred); | ||
| 895 | put_cred(override_cred); | ||
| 896 | } | ||
| 897 | out_drop_write: | ||
| 898 | ovl_drop_write(old); | ||
| 899 | out: | ||
| 900 | dput(opaquedir); | ||
| 901 | return err; | ||
| 902 | } | ||
| 903 | |||
| 904 | const struct inode_operations ovl_dir_inode_operations = { | ||
| 905 | .lookup = ovl_lookup, | ||
| 906 | .mkdir = ovl_mkdir, | ||
| 907 | .symlink = ovl_symlink, | ||
| 908 | .unlink = ovl_unlink, | ||
| 909 | .rmdir = ovl_rmdir, | ||
| 910 | .rename2 = ovl_rename2, | ||
| 911 | .link = ovl_link, | ||
| 912 | .setattr = ovl_setattr, | ||
| 913 | .create = ovl_create, | ||
| 914 | .mknod = ovl_mknod, | ||
| 915 | .permission = ovl_permission, | ||
| 916 | .getattr = ovl_dir_getattr, | ||
| 917 | .setxattr = ovl_setxattr, | ||
| 918 | .getxattr = ovl_getxattr, | ||
| 919 | .listxattr = ovl_listxattr, | ||
| 920 | .removexattr = ovl_removexattr, | ||
| 921 | }; | ||
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c new file mode 100644 index 000000000000..af2d18c9fcee --- /dev/null +++ b/fs/overlayfs/inode.c | |||
| @@ -0,0 +1,425 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include "overlayfs.h" | ||
| 14 | |||
| 15 | static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, | ||
| 16 | bool no_data) | ||
| 17 | { | ||
| 18 | int err; | ||
| 19 | struct dentry *parent; | ||
| 20 | struct kstat stat; | ||
| 21 | struct path lowerpath; | ||
| 22 | |||
| 23 | parent = dget_parent(dentry); | ||
| 24 | err = ovl_copy_up(parent); | ||
| 25 | if (err) | ||
| 26 | goto out_dput_parent; | ||
| 27 | |||
| 28 | ovl_path_lower(dentry, &lowerpath); | ||
| 29 | err = vfs_getattr(&lowerpath, &stat); | ||
| 30 | if (err) | ||
| 31 | goto out_dput_parent; | ||
| 32 | |||
| 33 | if (no_data) | ||
| 34 | stat.size = 0; | ||
| 35 | |||
| 36 | err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); | ||
| 37 | |||
| 38 | out_dput_parent: | ||
| 39 | dput(parent); | ||
| 40 | return err; | ||
| 41 | } | ||
| 42 | |||
| 43 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) | ||
| 44 | { | ||
| 45 | int err; | ||
| 46 | struct dentry *upperdentry; | ||
| 47 | |||
| 48 | err = ovl_want_write(dentry); | ||
| 49 | if (err) | ||
| 50 | goto out; | ||
| 51 | |||
| 52 | upperdentry = ovl_dentry_upper(dentry); | ||
| 53 | if (upperdentry) { | ||
| 54 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
| 55 | err = notify_change(upperdentry, attr, NULL); | ||
| 56 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
| 57 | } else { | ||
| 58 | err = ovl_copy_up_last(dentry, attr, false); | ||
| 59 | } | ||
| 60 | ovl_drop_write(dentry); | ||
| 61 | out: | ||
| 62 | return err; | ||
| 63 | } | ||
| 64 | |||
| 65 | static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 66 | struct kstat *stat) | ||
| 67 | { | ||
| 68 | struct path realpath; | ||
| 69 | |||
| 70 | ovl_path_real(dentry, &realpath); | ||
| 71 | return vfs_getattr(&realpath, stat); | ||
| 72 | } | ||
| 73 | |||
| 74 | int ovl_permission(struct inode *inode, int mask) | ||
| 75 | { | ||
| 76 | struct ovl_entry *oe; | ||
| 77 | struct dentry *alias = NULL; | ||
| 78 | struct inode *realinode; | ||
| 79 | struct dentry *realdentry; | ||
| 80 | bool is_upper; | ||
| 81 | int err; | ||
| 82 | |||
| 83 | if (S_ISDIR(inode->i_mode)) { | ||
| 84 | oe = inode->i_private; | ||
| 85 | } else if (mask & MAY_NOT_BLOCK) { | ||
| 86 | return -ECHILD; | ||
| 87 | } else { | ||
| 88 | /* | ||
| 89 | * For non-directories find an alias and get the info | ||
| 90 | * from there. | ||
| 91 | */ | ||
| 92 | alias = d_find_any_alias(inode); | ||
| 93 | if (WARN_ON(!alias)) | ||
| 94 | return -ENOENT; | ||
| 95 | |||
| 96 | oe = alias->d_fsdata; | ||
| 97 | } | ||
| 98 | |||
| 99 | realdentry = ovl_entry_real(oe, &is_upper); | ||
| 100 | |||
| 101 | /* Careful in RCU walk mode */ | ||
| 102 | realinode = ACCESS_ONCE(realdentry->d_inode); | ||
| 103 | if (!realinode) { | ||
| 104 | WARN_ON(!(mask & MAY_NOT_BLOCK)); | ||
| 105 | err = -ENOENT; | ||
| 106 | goto out_dput; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (mask & MAY_WRITE) { | ||
| 110 | umode_t mode = realinode->i_mode; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Writes will always be redirected to upper layer, so | ||
| 114 | * ignore lower layer being read-only. | ||
| 115 | * | ||
| 116 | * If the overlay itself is read-only then proceed | ||
| 117 | * with the permission check, don't return EROFS. | ||
| 118 | * This will only happen if this is the lower layer of | ||
| 119 | * another overlayfs. | ||
| 120 | * | ||
| 121 | * If upper fs becomes read-only after the overlay was | ||
| 122 | * constructed return EROFS to prevent modification of | ||
| 123 | * upper layer. | ||
| 124 | */ | ||
| 125 | err = -EROFS; | ||
| 126 | if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && | ||
| 127 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
| 128 | goto out_dput; | ||
| 129 | } | ||
| 130 | |||
| 131 | err = __inode_permission(realinode, mask); | ||
| 132 | out_dput: | ||
| 133 | dput(alias); | ||
| 134 | return err; | ||
| 135 | } | ||
| 136 | |||
| 137 | |||
| 138 | struct ovl_link_data { | ||
| 139 | struct dentry *realdentry; | ||
| 140 | void *cookie; | ||
| 141 | }; | ||
| 142 | |||
| 143 | static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
| 144 | { | ||
| 145 | void *ret; | ||
| 146 | struct dentry *realdentry; | ||
| 147 | struct inode *realinode; | ||
| 148 | |||
| 149 | realdentry = ovl_dentry_real(dentry); | ||
| 150 | realinode = realdentry->d_inode; | ||
| 151 | |||
| 152 | if (WARN_ON(!realinode->i_op->follow_link)) | ||
| 153 | return ERR_PTR(-EPERM); | ||
| 154 | |||
| 155 | ret = realinode->i_op->follow_link(realdentry, nd); | ||
| 156 | if (IS_ERR(ret)) | ||
| 157 | return ret; | ||
| 158 | |||
| 159 | if (realinode->i_op->put_link) { | ||
| 160 | struct ovl_link_data *data; | ||
| 161 | |||
| 162 | data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); | ||
| 163 | if (!data) { | ||
| 164 | realinode->i_op->put_link(realdentry, nd, ret); | ||
| 165 | return ERR_PTR(-ENOMEM); | ||
| 166 | } | ||
| 167 | data->realdentry = realdentry; | ||
| 168 | data->cookie = ret; | ||
| 169 | |||
| 170 | return data; | ||
| 171 | } else { | ||
| 172 | return NULL; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | ||
| 177 | { | ||
| 178 | struct inode *realinode; | ||
| 179 | struct ovl_link_data *data = c; | ||
| 180 | |||
| 181 | if (!data) | ||
| 182 | return; | ||
| 183 | |||
| 184 | realinode = data->realdentry->d_inode; | ||
| 185 | realinode->i_op->put_link(data->realdentry, nd, data->cookie); | ||
| 186 | kfree(data); | ||
| 187 | } | ||
| 188 | |||
| 189 | static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
| 190 | { | ||
| 191 | struct path realpath; | ||
| 192 | struct inode *realinode; | ||
| 193 | |||
| 194 | ovl_path_real(dentry, &realpath); | ||
| 195 | realinode = realpath.dentry->d_inode; | ||
| 196 | |||
| 197 | if (!realinode->i_op->readlink) | ||
| 198 | return -EINVAL; | ||
| 199 | |||
| 200 | touch_atime(&realpath); | ||
| 201 | |||
| 202 | return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); | ||
| 203 | } | ||
| 204 | |||
| 205 | |||
| 206 | static bool ovl_is_private_xattr(const char *name) | ||
| 207 | { | ||
| 208 | return strncmp(name, "trusted.overlay.", 14) == 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
| 212 | const void *value, size_t size, int flags) | ||
| 213 | { | ||
| 214 | int err; | ||
| 215 | struct dentry *upperdentry; | ||
| 216 | |||
| 217 | err = ovl_want_write(dentry); | ||
| 218 | if (err) | ||
| 219 | goto out; | ||
| 220 | |||
| 221 | err = -EPERM; | ||
| 222 | if (ovl_is_private_xattr(name)) | ||
| 223 | goto out_drop_write; | ||
| 224 | |||
| 225 | err = ovl_copy_up(dentry); | ||
| 226 | if (err) | ||
| 227 | goto out_drop_write; | ||
| 228 | |||
| 229 | upperdentry = ovl_dentry_upper(dentry); | ||
| 230 | err = vfs_setxattr(upperdentry, name, value, size, flags); | ||
| 231 | |||
| 232 | out_drop_write: | ||
| 233 | ovl_drop_write(dentry); | ||
| 234 | out: | ||
| 235 | return err; | ||
| 236 | } | ||
| 237 | |||
| 238 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
| 239 | void *value, size_t size) | ||
| 240 | { | ||
| 241 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
| 242 | ovl_is_private_xattr(name)) | ||
| 243 | return -ENODATA; | ||
| 244 | |||
| 245 | return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); | ||
| 246 | } | ||
| 247 | |||
| 248 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | ||
| 249 | { | ||
| 250 | ssize_t res; | ||
| 251 | int off; | ||
| 252 | |||
| 253 | res = vfs_listxattr(ovl_dentry_real(dentry), list, size); | ||
| 254 | if (res <= 0 || size == 0) | ||
| 255 | return res; | ||
| 256 | |||
| 257 | if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) | ||
| 258 | return res; | ||
| 259 | |||
| 260 | /* filter out private xattrs */ | ||
| 261 | for (off = 0; off < res;) { | ||
| 262 | char *s = list + off; | ||
| 263 | size_t slen = strlen(s) + 1; | ||
| 264 | |||
| 265 | BUG_ON(off + slen > res); | ||
| 266 | |||
| 267 | if (ovl_is_private_xattr(s)) { | ||
| 268 | res -= slen; | ||
| 269 | memmove(s, s + slen, res - off); | ||
| 270 | } else { | ||
| 271 | off += slen; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | return res; | ||
| 276 | } | ||
| 277 | |||
| 278 | int ovl_removexattr(struct dentry *dentry, const char *name) | ||
| 279 | { | ||
| 280 | int err; | ||
| 281 | struct path realpath; | ||
| 282 | enum ovl_path_type type; | ||
| 283 | |||
| 284 | err = ovl_want_write(dentry); | ||
| 285 | if (err) | ||
| 286 | goto out; | ||
| 287 | |||
| 288 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
| 289 | ovl_is_private_xattr(name)) | ||
| 290 | goto out_drop_write; | ||
| 291 | |||
| 292 | type = ovl_path_real(dentry, &realpath); | ||
| 293 | if (type == OVL_PATH_LOWER) { | ||
| 294 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
| 295 | if (err < 0) | ||
| 296 | goto out_drop_write; | ||
| 297 | |||
| 298 | err = ovl_copy_up(dentry); | ||
| 299 | if (err) | ||
| 300 | goto out_drop_write; | ||
| 301 | |||
| 302 | ovl_path_upper(dentry, &realpath); | ||
| 303 | } | ||
| 304 | |||
| 305 | err = vfs_removexattr(realpath.dentry, name); | ||
| 306 | out_drop_write: | ||
| 307 | ovl_drop_write(dentry); | ||
| 308 | out: | ||
| 309 | return err; | ||
| 310 | } | ||
| 311 | |||
| 312 | static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, | ||
| 313 | struct dentry *realdentry) | ||
| 314 | { | ||
| 315 | if (type != OVL_PATH_LOWER) | ||
| 316 | return false; | ||
| 317 | |||
| 318 | if (special_file(realdentry->d_inode->i_mode)) | ||
| 319 | return false; | ||
| 320 | |||
| 321 | if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) | ||
| 322 | return false; | ||
| 323 | |||
| 324 | return true; | ||
| 325 | } | ||
| 326 | |||
| 327 | static int ovl_dentry_open(struct dentry *dentry, struct file *file, | ||
| 328 | const struct cred *cred) | ||
| 329 | { | ||
| 330 | int err; | ||
| 331 | struct path realpath; | ||
| 332 | enum ovl_path_type type; | ||
| 333 | bool want_write = false; | ||
| 334 | |||
| 335 | type = ovl_path_real(dentry, &realpath); | ||
| 336 | if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { | ||
| 337 | want_write = true; | ||
| 338 | err = ovl_want_write(dentry); | ||
| 339 | if (err) | ||
| 340 | goto out; | ||
| 341 | |||
| 342 | if (file->f_flags & O_TRUNC) | ||
| 343 | err = ovl_copy_up_last(dentry, NULL, true); | ||
| 344 | else | ||
| 345 | err = ovl_copy_up(dentry); | ||
| 346 | if (err) | ||
| 347 | goto out_drop_write; | ||
| 348 | |||
| 349 | ovl_path_upper(dentry, &realpath); | ||
| 350 | } | ||
| 351 | |||
| 352 | err = vfs_open(&realpath, file, cred); | ||
| 353 | out_drop_write: | ||
| 354 | if (want_write) | ||
| 355 | ovl_drop_write(dentry); | ||
| 356 | out: | ||
| 357 | return err; | ||
| 358 | } | ||
| 359 | |||
| 360 | static const struct inode_operations ovl_file_inode_operations = { | ||
| 361 | .setattr = ovl_setattr, | ||
| 362 | .permission = ovl_permission, | ||
| 363 | .getattr = ovl_getattr, | ||
| 364 | .setxattr = ovl_setxattr, | ||
| 365 | .getxattr = ovl_getxattr, | ||
| 366 | .listxattr = ovl_listxattr, | ||
| 367 | .removexattr = ovl_removexattr, | ||
| 368 | .dentry_open = ovl_dentry_open, | ||
| 369 | }; | ||
| 370 | |||
| 371 | static const struct inode_operations ovl_symlink_inode_operations = { | ||
| 372 | .setattr = ovl_setattr, | ||
| 373 | .follow_link = ovl_follow_link, | ||
| 374 | .put_link = ovl_put_link, | ||
| 375 | .readlink = ovl_readlink, | ||
| 376 | .getattr = ovl_getattr, | ||
| 377 | .setxattr = ovl_setxattr, | ||
| 378 | .getxattr = ovl_getxattr, | ||
| 379 | .listxattr = ovl_listxattr, | ||
| 380 | .removexattr = ovl_removexattr, | ||
| 381 | }; | ||
| 382 | |||
| 383 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
| 384 | struct ovl_entry *oe) | ||
| 385 | { | ||
| 386 | struct inode *inode; | ||
| 387 | |||
| 388 | inode = new_inode(sb); | ||
| 389 | if (!inode) | ||
| 390 | return NULL; | ||
| 391 | |||
| 392 | mode &= S_IFMT; | ||
| 393 | |||
| 394 | inode->i_ino = get_next_ino(); | ||
| 395 | inode->i_mode = mode; | ||
| 396 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | ||
| 397 | |||
| 398 | switch (mode) { | ||
| 399 | case S_IFDIR: | ||
| 400 | inode->i_private = oe; | ||
| 401 | inode->i_op = &ovl_dir_inode_operations; | ||
| 402 | inode->i_fop = &ovl_dir_operations; | ||
| 403 | break; | ||
| 404 | |||
| 405 | case S_IFLNK: | ||
| 406 | inode->i_op = &ovl_symlink_inode_operations; | ||
| 407 | break; | ||
| 408 | |||
| 409 | case S_IFREG: | ||
| 410 | case S_IFSOCK: | ||
| 411 | case S_IFBLK: | ||
| 412 | case S_IFCHR: | ||
| 413 | case S_IFIFO: | ||
| 414 | inode->i_op = &ovl_file_inode_operations; | ||
| 415 | break; | ||
| 416 | |||
| 417 | default: | ||
| 418 | WARN(1, "illegal file type: %i\n", mode); | ||
| 419 | iput(inode); | ||
| 420 | inode = NULL; | ||
| 421 | } | ||
| 422 | |||
| 423 | return inode; | ||
| 424 | |||
| 425 | } | ||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h new file mode 100644 index 000000000000..814bed33dd07 --- /dev/null +++ b/fs/overlayfs/overlayfs.h | |||
| @@ -0,0 +1,191 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/kernel.h> | ||
| 11 | |||
| 12 | struct ovl_entry; | ||
| 13 | |||
| 14 | enum ovl_path_type { | ||
| 15 | OVL_PATH_PURE_UPPER, | ||
| 16 | OVL_PATH_UPPER, | ||
| 17 | OVL_PATH_MERGE, | ||
| 18 | OVL_PATH_LOWER, | ||
| 19 | }; | ||
| 20 | |||
| 21 | extern const char *ovl_opaque_xattr; | ||
| 22 | |||
| 23 | static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 24 | { | ||
| 25 | int err = vfs_rmdir(dir, dentry); | ||
| 26 | pr_debug("rmdir(%pd2) = %i\n", dentry, err); | ||
| 27 | return err; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) | ||
| 31 | { | ||
| 32 | int err = vfs_unlink(dir, dentry, NULL); | ||
| 33 | pr_debug("unlink(%pd2) = %i\n", dentry, err); | ||
| 34 | return err; | ||
| 35 | } | ||
| 36 | |||
| 37 | static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, | ||
| 38 | struct dentry *new_dentry, bool debug) | ||
| 39 | { | ||
| 40 | int err = vfs_link(old_dentry, dir, new_dentry, NULL); | ||
| 41 | if (debug) { | ||
| 42 | pr_debug("link(%pd2, %pd2) = %i\n", | ||
| 43 | old_dentry, new_dentry, err); | ||
| 44 | } | ||
| 45 | return err; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, | ||
| 49 | umode_t mode, bool debug) | ||
| 50 | { | ||
| 51 | int err = vfs_create(dir, dentry, mode, true); | ||
| 52 | if (debug) | ||
| 53 | pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
| 54 | return err; | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, | ||
| 58 | umode_t mode, bool debug) | ||
| 59 | { | ||
| 60 | int err = vfs_mkdir(dir, dentry, mode); | ||
| 61 | if (debug) | ||
| 62 | pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
| 63 | return err; | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, | ||
| 67 | umode_t mode, dev_t dev, bool debug) | ||
| 68 | { | ||
| 69 | int err = vfs_mknod(dir, dentry, mode, dev); | ||
| 70 | if (debug) { | ||
| 71 | pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", | ||
| 72 | dentry, mode, dev, err); | ||
| 73 | } | ||
| 74 | return err; | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, | ||
| 78 | const char *oldname, bool debug) | ||
| 79 | { | ||
| 80 | int err = vfs_symlink(dir, dentry, oldname); | ||
| 81 | if (debug) | ||
| 82 | pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); | ||
| 83 | return err; | ||
| 84 | } | ||
| 85 | |||
| 86 | static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, | ||
| 87 | const void *value, size_t size, int flags) | ||
| 88 | { | ||
| 89 | int err = vfs_setxattr(dentry, name, value, size, flags); | ||
| 90 | pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", | ||
| 91 | dentry, name, (int) size, (char *) value, flags, err); | ||
| 92 | return err; | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) | ||
| 96 | { | ||
| 97 | int err = vfs_removexattr(dentry, name); | ||
| 98 | pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); | ||
| 99 | return err; | ||
| 100 | } | ||
| 101 | |||
| 102 | static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, | ||
| 103 | struct inode *newdir, struct dentry *newdentry, | ||
| 104 | unsigned int flags) | ||
| 105 | { | ||
| 106 | int err; | ||
| 107 | |||
| 108 | pr_debug("rename2(%pd2, %pd2, 0x%x)\n", | ||
| 109 | olddentry, newdentry, flags); | ||
| 110 | |||
| 111 | err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); | ||
| 112 | |||
| 113 | if (err) { | ||
| 114 | pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", | ||
| 115 | olddentry, newdentry, err); | ||
| 116 | } | ||
| 117 | return err; | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) | ||
| 121 | { | ||
| 122 | int err = vfs_whiteout(dir, dentry); | ||
| 123 | pr_debug("whiteout(%pd2) = %i\n", dentry, err); | ||
| 124 | return err; | ||
| 125 | } | ||
| 126 | |||
| 127 | enum ovl_path_type ovl_path_type(struct dentry *dentry); | ||
| 128 | u64 ovl_dentry_version_get(struct dentry *dentry); | ||
| 129 | void ovl_dentry_version_inc(struct dentry *dentry); | ||
| 130 | void ovl_path_upper(struct dentry *dentry, struct path *path); | ||
| 131 | void ovl_path_lower(struct dentry *dentry, struct path *path); | ||
| 132 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); | ||
| 133 | struct dentry *ovl_dentry_upper(struct dentry *dentry); | ||
| 134 | struct dentry *ovl_dentry_lower(struct dentry *dentry); | ||
| 135 | struct dentry *ovl_dentry_real(struct dentry *dentry); | ||
| 136 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); | ||
| 137 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); | ||
| 138 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); | ||
| 139 | struct dentry *ovl_workdir(struct dentry *dentry); | ||
| 140 | int ovl_want_write(struct dentry *dentry); | ||
| 141 | void ovl_drop_write(struct dentry *dentry); | ||
| 142 | bool ovl_dentry_is_opaque(struct dentry *dentry); | ||
| 143 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); | ||
| 144 | bool ovl_is_whiteout(struct dentry *dentry); | ||
| 145 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); | ||
| 146 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
| 147 | unsigned int flags); | ||
| 148 | struct file *ovl_path_open(struct path *path, int flags); | ||
| 149 | |||
| 150 | struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, | ||
| 151 | struct kstat *stat, const char *link); | ||
| 152 | |||
| 153 | /* readdir.c */ | ||
| 154 | extern const struct file_operations ovl_dir_operations; | ||
| 155 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); | ||
| 156 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); | ||
| 157 | void ovl_cache_free(struct list_head *list); | ||
| 158 | |||
| 159 | /* inode.c */ | ||
| 160 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); | ||
| 161 | int ovl_permission(struct inode *inode, int mask); | ||
| 162 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
| 163 | const void *value, size_t size, int flags); | ||
| 164 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
| 165 | void *value, size_t size); | ||
| 166 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | ||
| 167 | int ovl_removexattr(struct dentry *dentry, const char *name); | ||
| 168 | |||
| 169 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
| 170 | struct ovl_entry *oe); | ||
| 171 | static inline void ovl_copyattr(struct inode *from, struct inode *to) | ||
| 172 | { | ||
| 173 | to->i_uid = from->i_uid; | ||
| 174 | to->i_gid = from->i_gid; | ||
| 175 | } | ||
| 176 | |||
| 177 | /* dir.c */ | ||
| 178 | extern const struct inode_operations ovl_dir_inode_operations; | ||
| 179 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); | ||
| 180 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
| 181 | struct kstat *stat, const char *link, | ||
| 182 | struct dentry *hardlink, bool debug); | ||
| 183 | void ovl_cleanup(struct inode *dir, struct dentry *dentry); | ||
| 184 | |||
| 185 | /* copy_up.c */ | ||
| 186 | int ovl_copy_up(struct dentry *dentry); | ||
| 187 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
| 188 | struct path *lowerpath, struct kstat *stat, | ||
| 189 | struct iattr *attr); | ||
| 190 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); | ||
| 191 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); | ||
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..910553f37aca --- /dev/null +++ b/fs/overlayfs/readdir.c | |||
| @@ -0,0 +1,590 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/namei.h> | ||
| 13 | #include <linux/file.h> | ||
| 14 | #include <linux/xattr.h> | ||
| 15 | #include <linux/rbtree.h> | ||
| 16 | #include <linux/security.h> | ||
| 17 | #include <linux/cred.h> | ||
| 18 | #include "overlayfs.h" | ||
| 19 | |||
| 20 | struct ovl_cache_entry { | ||
| 21 | unsigned int len; | ||
| 22 | unsigned int type; | ||
| 23 | u64 ino; | ||
| 24 | bool is_whiteout; | ||
| 25 | struct list_head l_node; | ||
| 26 | struct rb_node node; | ||
| 27 | char name[]; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct ovl_dir_cache { | ||
| 31 | long refcount; | ||
| 32 | u64 version; | ||
| 33 | struct list_head entries; | ||
| 34 | }; | ||
| 35 | |||
| 36 | struct ovl_readdir_data { | ||
| 37 | struct dir_context ctx; | ||
| 38 | bool is_merge; | ||
| 39 | struct rb_root root; | ||
| 40 | struct list_head *list; | ||
| 41 | struct list_head middle; | ||
| 42 | int count; | ||
| 43 | int err; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct ovl_dir_file { | ||
| 47 | bool is_real; | ||
| 48 | bool is_upper; | ||
| 49 | struct ovl_dir_cache *cache; | ||
| 50 | struct ovl_cache_entry cursor; | ||
| 51 | struct file *realfile; | ||
| 52 | struct file *upperfile; | ||
| 53 | }; | ||
| 54 | |||
| 55 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) | ||
| 56 | { | ||
| 57 | return container_of(n, struct ovl_cache_entry, node); | ||
| 58 | } | ||
| 59 | |||
| 60 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, | ||
| 61 | const char *name, int len) | ||
| 62 | { | ||
| 63 | struct rb_node *node = root->rb_node; | ||
| 64 | int cmp; | ||
| 65 | |||
| 66 | while (node) { | ||
| 67 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); | ||
| 68 | |||
| 69 | cmp = strncmp(name, p->name, len); | ||
| 70 | if (cmp > 0) | ||
| 71 | node = p->node.rb_right; | ||
| 72 | else if (cmp < 0 || len < p->len) | ||
| 73 | node = p->node.rb_left; | ||
| 74 | else | ||
| 75 | return p; | ||
| 76 | } | ||
| 77 | |||
| 78 | return NULL; | ||
| 79 | } | ||
| 80 | |||
| 81 | static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, | ||
| 82 | u64 ino, unsigned int d_type) | ||
| 83 | { | ||
| 84 | struct ovl_cache_entry *p; | ||
| 85 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); | ||
| 86 | |||
| 87 | p = kmalloc(size, GFP_KERNEL); | ||
| 88 | if (p) { | ||
| 89 | memcpy(p->name, name, len); | ||
| 90 | p->name[len] = '\0'; | ||
| 91 | p->len = len; | ||
| 92 | p->type = d_type; | ||
| 93 | p->ino = ino; | ||
| 94 | p->is_whiteout = false; | ||
| 95 | } | ||
| 96 | |||
| 97 | return p; | ||
| 98 | } | ||
| 99 | |||
| 100 | static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, | ||
| 101 | const char *name, int len, u64 ino, | ||
| 102 | unsigned int d_type) | ||
| 103 | { | ||
| 104 | struct rb_node **newp = &rdd->root.rb_node; | ||
| 105 | struct rb_node *parent = NULL; | ||
| 106 | struct ovl_cache_entry *p; | ||
| 107 | |||
| 108 | while (*newp) { | ||
| 109 | int cmp; | ||
| 110 | struct ovl_cache_entry *tmp; | ||
| 111 | |||
| 112 | parent = *newp; | ||
| 113 | tmp = ovl_cache_entry_from_node(*newp); | ||
| 114 | cmp = strncmp(name, tmp->name, len); | ||
| 115 | if (cmp > 0) | ||
| 116 | newp = &tmp->node.rb_right; | ||
| 117 | else if (cmp < 0 || len < tmp->len) | ||
| 118 | newp = &tmp->node.rb_left; | ||
| 119 | else | ||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | |||
| 123 | p = ovl_cache_entry_new(name, len, ino, d_type); | ||
| 124 | if (p == NULL) | ||
| 125 | return -ENOMEM; | ||
| 126 | |||
| 127 | list_add_tail(&p->l_node, rdd->list); | ||
| 128 | rb_link_node(&p->node, parent, newp); | ||
| 129 | rb_insert_color(&p->node, &rdd->root); | ||
| 130 | |||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | |||
| 134 | static int ovl_fill_lower(struct ovl_readdir_data *rdd, | ||
| 135 | const char *name, int namelen, | ||
| 136 | loff_t offset, u64 ino, unsigned int d_type) | ||
| 137 | { | ||
| 138 | struct ovl_cache_entry *p; | ||
| 139 | |||
| 140 | p = ovl_cache_entry_find(&rdd->root, name, namelen); | ||
| 141 | if (p) { | ||
| 142 | list_move_tail(&p->l_node, &rdd->middle); | ||
| 143 | } else { | ||
| 144 | p = ovl_cache_entry_new(name, namelen, ino, d_type); | ||
| 145 | if (p == NULL) | ||
| 146 | rdd->err = -ENOMEM; | ||
| 147 | else | ||
| 148 | list_add_tail(&p->l_node, &rdd->middle); | ||
| 149 | } | ||
| 150 | |||
| 151 | return rdd->err; | ||
| 152 | } | ||
| 153 | |||
| 154 | void ovl_cache_free(struct list_head *list) | ||
| 155 | { | ||
| 156 | struct ovl_cache_entry *p; | ||
| 157 | struct ovl_cache_entry *n; | ||
| 158 | |||
| 159 | list_for_each_entry_safe(p, n, list, l_node) | ||
| 160 | kfree(p); | ||
| 161 | |||
| 162 | INIT_LIST_HEAD(list); | ||
| 163 | } | ||
| 164 | |||
| 165 | static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) | ||
| 166 | { | ||
| 167 | struct ovl_dir_cache *cache = od->cache; | ||
| 168 | |||
| 169 | list_del(&od->cursor.l_node); | ||
| 170 | WARN_ON(cache->refcount <= 0); | ||
| 171 | cache->refcount--; | ||
| 172 | if (!cache->refcount) { | ||
| 173 | if (ovl_dir_cache(dentry) == cache) | ||
| 174 | ovl_set_dir_cache(dentry, NULL); | ||
| 175 | |||
| 176 | ovl_cache_free(&cache->entries); | ||
| 177 | kfree(cache); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | static int ovl_fill_merge(void *buf, const char *name, int namelen, | ||
| 182 | loff_t offset, u64 ino, unsigned int d_type) | ||
| 183 | { | ||
| 184 | struct ovl_readdir_data *rdd = buf; | ||
| 185 | |||
| 186 | rdd->count++; | ||
| 187 | if (!rdd->is_merge) | ||
| 188 | return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); | ||
| 189 | else | ||
| 190 | return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline int ovl_dir_read(struct path *realpath, | ||
| 194 | struct ovl_readdir_data *rdd) | ||
| 195 | { | ||
| 196 | struct file *realfile; | ||
| 197 | int err; | ||
| 198 | |||
| 199 | realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); | ||
| 200 | if (IS_ERR(realfile)) | ||
| 201 | return PTR_ERR(realfile); | ||
| 202 | |||
| 203 | rdd->ctx.pos = 0; | ||
| 204 | do { | ||
| 205 | rdd->count = 0; | ||
| 206 | rdd->err = 0; | ||
| 207 | err = iterate_dir(realfile, &rdd->ctx); | ||
| 208 | if (err >= 0) | ||
| 209 | err = rdd->err; | ||
| 210 | } while (!err && rdd->count); | ||
| 211 | fput(realfile); | ||
| 212 | |||
| 213 | return err; | ||
| 214 | } | ||
| 215 | |||
| 216 | static void ovl_dir_reset(struct file *file) | ||
| 217 | { | ||
| 218 | struct ovl_dir_file *od = file->private_data; | ||
| 219 | struct ovl_dir_cache *cache = od->cache; | ||
| 220 | struct dentry *dentry = file->f_path.dentry; | ||
| 221 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 222 | |||
| 223 | if (cache && ovl_dentry_version_get(dentry) != cache->version) { | ||
| 224 | ovl_cache_put(od, dentry); | ||
| 225 | od->cache = NULL; | ||
| 226 | } | ||
| 227 | WARN_ON(!od->is_real && type != OVL_PATH_MERGE); | ||
| 228 | if (od->is_real && type == OVL_PATH_MERGE) | ||
| 229 | od->is_real = false; | ||
| 230 | } | ||
| 231 | |||
| 232 | static int ovl_dir_mark_whiteouts(struct dentry *dir, | ||
| 233 | struct ovl_readdir_data *rdd) | ||
| 234 | { | ||
| 235 | struct ovl_cache_entry *p; | ||
| 236 | struct dentry *dentry; | ||
| 237 | const struct cred *old_cred; | ||
| 238 | struct cred *override_cred; | ||
| 239 | |||
| 240 | override_cred = prepare_creds(); | ||
| 241 | if (!override_cred) { | ||
| 242 | ovl_cache_free(rdd->list); | ||
| 243 | return -ENOMEM; | ||
| 244 | } | ||
| 245 | |||
| 246 | /* | ||
| 247 | * CAP_DAC_OVERRIDE for lookup | ||
| 248 | */ | ||
| 249 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 250 | old_cred = override_creds(override_cred); | ||
| 251 | |||
| 252 | mutex_lock(&dir->d_inode->i_mutex); | ||
| 253 | list_for_each_entry(p, rdd->list, l_node) { | ||
| 254 | if (!p->name) | ||
| 255 | continue; | ||
| 256 | |||
| 257 | if (p->type != DT_CHR) | ||
| 258 | continue; | ||
| 259 | |||
| 260 | dentry = lookup_one_len(p->name, dir, p->len); | ||
| 261 | if (IS_ERR(dentry)) | ||
| 262 | continue; | ||
| 263 | |||
| 264 | p->is_whiteout = ovl_is_whiteout(dentry); | ||
| 265 | dput(dentry); | ||
| 266 | } | ||
| 267 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 268 | |||
| 269 | revert_creds(old_cred); | ||
| 270 | put_cred(override_cred); | ||
| 271 | |||
| 272 | return 0; | ||
| 273 | } | ||
| 274 | |||
| 275 | static inline int ovl_dir_read_merged(struct path *upperpath, | ||
| 276 | struct path *lowerpath, | ||
| 277 | struct list_head *list) | ||
| 278 | { | ||
| 279 | int err; | ||
| 280 | struct ovl_readdir_data rdd = { | ||
| 281 | .ctx.actor = ovl_fill_merge, | ||
| 282 | .list = list, | ||
| 283 | .root = RB_ROOT, | ||
| 284 | .is_merge = false, | ||
| 285 | }; | ||
| 286 | |||
| 287 | if (upperpath->dentry) { | ||
| 288 | err = ovl_dir_read(upperpath, &rdd); | ||
| 289 | if (err) | ||
| 290 | goto out; | ||
| 291 | |||
| 292 | if (lowerpath->dentry) { | ||
| 293 | err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); | ||
| 294 | if (err) | ||
| 295 | goto out; | ||
| 296 | } | ||
| 297 | } | ||
| 298 | if (lowerpath->dentry) { | ||
| 299 | /* | ||
| 300 | * Insert lowerpath entries before upperpath ones, this allows | ||
| 301 | * offsets to be reasonably constant | ||
| 302 | */ | ||
| 303 | list_add(&rdd.middle, rdd.list); | ||
| 304 | rdd.is_merge = true; | ||
| 305 | err = ovl_dir_read(lowerpath, &rdd); | ||
| 306 | list_del(&rdd.middle); | ||
| 307 | } | ||
| 308 | out: | ||
| 309 | return err; | ||
| 310 | |||
| 311 | } | ||
| 312 | |||
| 313 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) | ||
| 314 | { | ||
| 315 | struct ovl_cache_entry *p; | ||
| 316 | loff_t off = 0; | ||
| 317 | |||
| 318 | list_for_each_entry(p, &od->cache->entries, l_node) { | ||
| 319 | if (!p->name) | ||
| 320 | continue; | ||
| 321 | if (off >= pos) | ||
| 322 | break; | ||
| 323 | off++; | ||
| 324 | } | ||
| 325 | list_move_tail(&od->cursor.l_node, &p->l_node); | ||
| 326 | } | ||
| 327 | |||
| 328 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) | ||
| 329 | { | ||
| 330 | int res; | ||
| 331 | struct path lowerpath; | ||
| 332 | struct path upperpath; | ||
| 333 | struct ovl_dir_cache *cache; | ||
| 334 | |||
| 335 | cache = ovl_dir_cache(dentry); | ||
| 336 | if (cache && ovl_dentry_version_get(dentry) == cache->version) { | ||
| 337 | cache->refcount++; | ||
| 338 | return cache; | ||
| 339 | } | ||
| 340 | ovl_set_dir_cache(dentry, NULL); | ||
| 341 | |||
| 342 | cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); | ||
| 343 | if (!cache) | ||
| 344 | return ERR_PTR(-ENOMEM); | ||
| 345 | |||
| 346 | cache->refcount = 1; | ||
| 347 | INIT_LIST_HEAD(&cache->entries); | ||
| 348 | |||
| 349 | ovl_path_lower(dentry, &lowerpath); | ||
| 350 | ovl_path_upper(dentry, &upperpath); | ||
| 351 | |||
| 352 | res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); | ||
| 353 | if (res) { | ||
| 354 | ovl_cache_free(&cache->entries); | ||
| 355 | kfree(cache); | ||
| 356 | return ERR_PTR(res); | ||
| 357 | } | ||
| 358 | |||
| 359 | cache->version = ovl_dentry_version_get(dentry); | ||
| 360 | ovl_set_dir_cache(dentry, cache); | ||
| 361 | |||
| 362 | return cache; | ||
| 363 | } | ||
| 364 | |||
| 365 | static int ovl_iterate(struct file *file, struct dir_context *ctx) | ||
| 366 | { | ||
| 367 | struct ovl_dir_file *od = file->private_data; | ||
| 368 | struct dentry *dentry = file->f_path.dentry; | ||
| 369 | |||
| 370 | if (!ctx->pos) | ||
| 371 | ovl_dir_reset(file); | ||
| 372 | |||
| 373 | if (od->is_real) | ||
| 374 | return iterate_dir(od->realfile, ctx); | ||
| 375 | |||
| 376 | if (!od->cache) { | ||
| 377 | struct ovl_dir_cache *cache; | ||
| 378 | |||
| 379 | cache = ovl_cache_get(dentry); | ||
| 380 | if (IS_ERR(cache)) | ||
| 381 | return PTR_ERR(cache); | ||
| 382 | |||
| 383 | od->cache = cache; | ||
| 384 | ovl_seek_cursor(od, ctx->pos); | ||
| 385 | } | ||
| 386 | |||
| 387 | while (od->cursor.l_node.next != &od->cache->entries) { | ||
| 388 | struct ovl_cache_entry *p; | ||
| 389 | |||
| 390 | p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); | ||
| 391 | /* Skip cursors */ | ||
| 392 | if (p->name) { | ||
| 393 | if (!p->is_whiteout) { | ||
| 394 | if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | ctx->pos++; | ||
| 398 | } | ||
| 399 | list_move(&od->cursor.l_node, &p->l_node); | ||
| 400 | } | ||
| 401 | return 0; | ||
| 402 | } | ||
| 403 | |||
| 404 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) | ||
| 405 | { | ||
| 406 | loff_t res; | ||
| 407 | struct ovl_dir_file *od = file->private_data; | ||
| 408 | |||
| 409 | mutex_lock(&file_inode(file)->i_mutex); | ||
| 410 | if (!file->f_pos) | ||
| 411 | ovl_dir_reset(file); | ||
| 412 | |||
| 413 | if (od->is_real) { | ||
| 414 | res = vfs_llseek(od->realfile, offset, origin); | ||
| 415 | file->f_pos = od->realfile->f_pos; | ||
| 416 | } else { | ||
| 417 | res = -EINVAL; | ||
| 418 | |||
| 419 | switch (origin) { | ||
| 420 | case SEEK_CUR: | ||
| 421 | offset += file->f_pos; | ||
| 422 | break; | ||
| 423 | case SEEK_SET: | ||
| 424 | break; | ||
| 425 | default: | ||
| 426 | goto out_unlock; | ||
| 427 | } | ||
| 428 | if (offset < 0) | ||
| 429 | goto out_unlock; | ||
| 430 | |||
| 431 | if (offset != file->f_pos) { | ||
| 432 | file->f_pos = offset; | ||
| 433 | if (od->cache) | ||
| 434 | ovl_seek_cursor(od, offset); | ||
| 435 | } | ||
| 436 | res = offset; | ||
| 437 | } | ||
| 438 | out_unlock: | ||
| 439 | mutex_unlock(&file_inode(file)->i_mutex); | ||
| 440 | |||
| 441 | return res; | ||
| 442 | } | ||
| 443 | |||
| 444 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
| 445 | int datasync) | ||
| 446 | { | ||
| 447 | struct ovl_dir_file *od = file->private_data; | ||
| 448 | struct dentry *dentry = file->f_path.dentry; | ||
| 449 | struct file *realfile = od->realfile; | ||
| 450 | |||
| 451 | /* | ||
| 452 | * Need to check if we started out being a lower dir, but got copied up | ||
| 453 | */ | ||
| 454 | if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { | ||
| 455 | struct inode *inode = file_inode(file); | ||
| 456 | |||
| 457 | realfile = od->upperfile; | ||
| 458 | if (!realfile) { | ||
| 459 | struct path upperpath; | ||
| 460 | |||
| 461 | ovl_path_upper(dentry, &upperpath); | ||
| 462 | realfile = ovl_path_open(&upperpath, O_RDONLY); | ||
| 463 | mutex_lock(&inode->i_mutex); | ||
| 464 | if (!od->upperfile) { | ||
| 465 | if (IS_ERR(realfile)) { | ||
| 466 | mutex_unlock(&inode->i_mutex); | ||
| 467 | return PTR_ERR(realfile); | ||
| 468 | } | ||
| 469 | od->upperfile = realfile; | ||
| 470 | } else { | ||
| 471 | /* somebody has beaten us to it */ | ||
| 472 | if (!IS_ERR(realfile)) | ||
| 473 | fput(realfile); | ||
| 474 | realfile = od->upperfile; | ||
| 475 | } | ||
| 476 | mutex_unlock(&inode->i_mutex); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | return vfs_fsync_range(realfile, start, end, datasync); | ||
| 481 | } | ||
| 482 | |||
| 483 | static int ovl_dir_release(struct inode *inode, struct file *file) | ||
| 484 | { | ||
| 485 | struct ovl_dir_file *od = file->private_data; | ||
| 486 | |||
| 487 | if (od->cache) { | ||
| 488 | mutex_lock(&inode->i_mutex); | ||
| 489 | ovl_cache_put(od, file->f_path.dentry); | ||
| 490 | mutex_unlock(&inode->i_mutex); | ||
| 491 | } | ||
| 492 | fput(od->realfile); | ||
| 493 | if (od->upperfile) | ||
| 494 | fput(od->upperfile); | ||
| 495 | kfree(od); | ||
| 496 | |||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | static int ovl_dir_open(struct inode *inode, struct file *file) | ||
| 501 | { | ||
| 502 | struct path realpath; | ||
| 503 | struct file *realfile; | ||
| 504 | struct ovl_dir_file *od; | ||
| 505 | enum ovl_path_type type; | ||
| 506 | |||
| 507 | od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); | ||
| 508 | if (!od) | ||
| 509 | return -ENOMEM; | ||
| 510 | |||
| 511 | type = ovl_path_real(file->f_path.dentry, &realpath); | ||
| 512 | realfile = ovl_path_open(&realpath, file->f_flags); | ||
| 513 | if (IS_ERR(realfile)) { | ||
| 514 | kfree(od); | ||
| 515 | return PTR_ERR(realfile); | ||
| 516 | } | ||
| 517 | INIT_LIST_HEAD(&od->cursor.l_node); | ||
| 518 | od->realfile = realfile; | ||
| 519 | od->is_real = (type != OVL_PATH_MERGE); | ||
| 520 | od->is_upper = (type != OVL_PATH_LOWER); | ||
| 521 | file->private_data = od; | ||
| 522 | |||
| 523 | return 0; | ||
| 524 | } | ||
| 525 | |||
| 526 | const struct file_operations ovl_dir_operations = { | ||
| 527 | .read = generic_read_dir, | ||
| 528 | .open = ovl_dir_open, | ||
| 529 | .iterate = ovl_iterate, | ||
| 530 | .llseek = ovl_dir_llseek, | ||
| 531 | .fsync = ovl_dir_fsync, | ||
| 532 | .release = ovl_dir_release, | ||
| 533 | }; | ||
| 534 | |||
| 535 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) | ||
| 536 | { | ||
| 537 | int err; | ||
| 538 | struct path lowerpath; | ||
| 539 | struct path upperpath; | ||
| 540 | struct ovl_cache_entry *p; | ||
| 541 | |||
| 542 | ovl_path_upper(dentry, &upperpath); | ||
| 543 | ovl_path_lower(dentry, &lowerpath); | ||
| 544 | |||
| 545 | err = ovl_dir_read_merged(&upperpath, &lowerpath, list); | ||
| 546 | if (err) | ||
| 547 | return err; | ||
| 548 | |||
| 549 | err = 0; | ||
| 550 | |||
| 551 | list_for_each_entry(p, list, l_node) { | ||
| 552 | if (p->is_whiteout) | ||
| 553 | continue; | ||
| 554 | |||
| 555 | if (p->name[0] == '.') { | ||
| 556 | if (p->len == 1) | ||
| 557 | continue; | ||
| 558 | if (p->len == 2 && p->name[1] == '.') | ||
| 559 | continue; | ||
| 560 | } | ||
| 561 | err = -ENOTEMPTY; | ||
| 562 | break; | ||
| 563 | } | ||
| 564 | |||
| 565 | return err; | ||
| 566 | } | ||
| 567 | |||
| 568 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) | ||
| 569 | { | ||
| 570 | struct ovl_cache_entry *p; | ||
| 571 | |||
| 572 | mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); | ||
| 573 | list_for_each_entry(p, list, l_node) { | ||
| 574 | struct dentry *dentry; | ||
| 575 | |||
| 576 | if (!p->is_whiteout) | ||
| 577 | continue; | ||
| 578 | |||
| 579 | dentry = lookup_one_len(p->name, upper, p->len); | ||
| 580 | if (IS_ERR(dentry)) { | ||
| 581 | pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", | ||
| 582 | upper->d_name.name, p->len, p->name, | ||
| 583 | (int) PTR_ERR(dentry)); | ||
| 584 | continue; | ||
| 585 | } | ||
| 586 | ovl_cleanup(upper->d_inode, dentry); | ||
| 587 | dput(dentry); | ||
| 588 | } | ||
| 589 | mutex_unlock(&upper->d_inode->i_mutex); | ||
| 590 | } | ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c new file mode 100644 index 000000000000..08b704cebfc4 --- /dev/null +++ b/fs/overlayfs/super.c | |||
| @@ -0,0 +1,796 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/namei.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include <linux/security.h> | ||
| 14 | #include <linux/mount.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/parser.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/statfs.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include "overlayfs.h" | ||
| 22 | |||
| 23 | MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); | ||
| 24 | MODULE_DESCRIPTION("Overlay filesystem"); | ||
| 25 | MODULE_LICENSE("GPL"); | ||
| 26 | |||
| 27 | #define OVERLAYFS_SUPER_MAGIC 0x794c764f | ||
| 28 | |||
| 29 | struct ovl_config { | ||
| 30 | char *lowerdir; | ||
| 31 | char *upperdir; | ||
| 32 | char *workdir; | ||
| 33 | }; | ||
| 34 | |||
| 35 | /* private information held for overlayfs's superblock */ | ||
| 36 | struct ovl_fs { | ||
| 37 | struct vfsmount *upper_mnt; | ||
| 38 | struct vfsmount *lower_mnt; | ||
| 39 | struct dentry *workdir; | ||
| 40 | long lower_namelen; | ||
| 41 | /* pathnames of lower and upper dirs, for show_options */ | ||
| 42 | struct ovl_config config; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ovl_dir_cache; | ||
| 46 | |||
| 47 | /* private information held for every overlayfs dentry */ | ||
| 48 | struct ovl_entry { | ||
| 49 | struct dentry *__upperdentry; | ||
| 50 | struct dentry *lowerdentry; | ||
| 51 | struct ovl_dir_cache *cache; | ||
| 52 | union { | ||
| 53 | struct { | ||
| 54 | u64 version; | ||
| 55 | bool opaque; | ||
| 56 | }; | ||
| 57 | struct rcu_head rcu; | ||
| 58 | }; | ||
| 59 | }; | ||
| 60 | |||
| 61 | const char *ovl_opaque_xattr = "trusted.overlay.opaque"; | ||
| 62 | |||
| 63 | |||
| 64 | enum ovl_path_type ovl_path_type(struct dentry *dentry) | ||
| 65 | { | ||
| 66 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 67 | |||
| 68 | if (oe->__upperdentry) { | ||
| 69 | if (oe->lowerdentry) { | ||
| 70 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
| 71 | return OVL_PATH_MERGE; | ||
| 72 | else | ||
| 73 | return OVL_PATH_UPPER; | ||
| 74 | } else { | ||
| 75 | if (oe->opaque) | ||
| 76 | return OVL_PATH_UPPER; | ||
| 77 | else | ||
| 78 | return OVL_PATH_PURE_UPPER; | ||
| 79 | } | ||
| 80 | } else { | ||
| 81 | return OVL_PATH_LOWER; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) | ||
| 86 | { | ||
| 87 | struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); | ||
| 88 | /* | ||
| 89 | * Make sure to order reads to upperdentry wrt ovl_dentry_update() | ||
| 90 | */ | ||
| 91 | smp_read_barrier_depends(); | ||
| 92 | return upperdentry; | ||
| 93 | } | ||
| 94 | |||
| 95 | void ovl_path_upper(struct dentry *dentry, struct path *path) | ||
| 96 | { | ||
| 97 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 98 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 99 | |||
| 100 | path->mnt = ofs->upper_mnt; | ||
| 101 | path->dentry = ovl_upperdentry_dereference(oe); | ||
| 102 | } | ||
| 103 | |||
| 104 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) | ||
| 105 | { | ||
| 106 | |||
| 107 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 108 | |||
| 109 | if (type == OVL_PATH_LOWER) | ||
| 110 | ovl_path_lower(dentry, path); | ||
| 111 | else | ||
| 112 | ovl_path_upper(dentry, path); | ||
| 113 | |||
| 114 | return type; | ||
| 115 | } | ||
| 116 | |||
| 117 | struct dentry *ovl_dentry_upper(struct dentry *dentry) | ||
| 118 | { | ||
| 119 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 120 | |||
| 121 | return ovl_upperdentry_dereference(oe); | ||
| 122 | } | ||
| 123 | |||
| 124 | struct dentry *ovl_dentry_lower(struct dentry *dentry) | ||
| 125 | { | ||
| 126 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 127 | |||
| 128 | return oe->lowerdentry; | ||
| 129 | } | ||
| 130 | |||
| 131 | struct dentry *ovl_dentry_real(struct dentry *dentry) | ||
| 132 | { | ||
| 133 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 134 | struct dentry *realdentry; | ||
| 135 | |||
| 136 | realdentry = ovl_upperdentry_dereference(oe); | ||
| 137 | if (!realdentry) | ||
| 138 | realdentry = oe->lowerdentry; | ||
| 139 | |||
| 140 | return realdentry; | ||
| 141 | } | ||
| 142 | |||
| 143 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) | ||
| 144 | { | ||
| 145 | struct dentry *realdentry; | ||
| 146 | |||
| 147 | realdentry = ovl_upperdentry_dereference(oe); | ||
| 148 | if (realdentry) { | ||
| 149 | *is_upper = true; | ||
| 150 | } else { | ||
| 151 | realdentry = oe->lowerdentry; | ||
| 152 | *is_upper = false; | ||
| 153 | } | ||
| 154 | return realdentry; | ||
| 155 | } | ||
| 156 | |||
| 157 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) | ||
| 158 | { | ||
| 159 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 160 | |||
| 161 | return oe->cache; | ||
| 162 | } | ||
| 163 | |||
| 164 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) | ||
| 165 | { | ||
| 166 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 167 | |||
| 168 | oe->cache = cache; | ||
| 169 | } | ||
| 170 | |||
| 171 | void ovl_path_lower(struct dentry *dentry, struct path *path) | ||
| 172 | { | ||
| 173 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 174 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 175 | |||
| 176 | path->mnt = ofs->lower_mnt; | ||
| 177 | path->dentry = oe->lowerdentry; | ||
| 178 | } | ||
| 179 | |||
| 180 | int ovl_want_write(struct dentry *dentry) | ||
| 181 | { | ||
| 182 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 183 | return mnt_want_write(ofs->upper_mnt); | ||
| 184 | } | ||
| 185 | |||
| 186 | void ovl_drop_write(struct dentry *dentry) | ||
| 187 | { | ||
| 188 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 189 | mnt_drop_write(ofs->upper_mnt); | ||
| 190 | } | ||
| 191 | |||
| 192 | struct dentry *ovl_workdir(struct dentry *dentry) | ||
| 193 | { | ||
| 194 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 195 | return ofs->workdir; | ||
| 196 | } | ||
| 197 | |||
| 198 | bool ovl_dentry_is_opaque(struct dentry *dentry) | ||
| 199 | { | ||
| 200 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 201 | return oe->opaque; | ||
| 202 | } | ||
| 203 | |||
| 204 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) | ||
| 205 | { | ||
| 206 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 207 | oe->opaque = opaque; | ||
| 208 | } | ||
| 209 | |||
| 210 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) | ||
| 211 | { | ||
| 212 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 213 | |||
| 214 | WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); | ||
| 215 | WARN_ON(oe->__upperdentry); | ||
| 216 | BUG_ON(!upperdentry->d_inode); | ||
| 217 | /* | ||
| 218 | * Make sure upperdentry is consistent before making it visible to | ||
| 219 | * ovl_upperdentry_dereference(). | ||
| 220 | */ | ||
| 221 | smp_wmb(); | ||
| 222 | oe->__upperdentry = upperdentry; | ||
| 223 | } | ||
| 224 | |||
| 225 | void ovl_dentry_version_inc(struct dentry *dentry) | ||
| 226 | { | ||
| 227 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 228 | |||
| 229 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
| 230 | oe->version++; | ||
| 231 | } | ||
| 232 | |||
| 233 | u64 ovl_dentry_version_get(struct dentry *dentry) | ||
| 234 | { | ||
| 235 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 236 | |||
| 237 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
| 238 | return oe->version; | ||
| 239 | } | ||
| 240 | |||
| 241 | bool ovl_is_whiteout(struct dentry *dentry) | ||
| 242 | { | ||
| 243 | struct inode *inode = dentry->d_inode; | ||
| 244 | |||
| 245 | return inode && IS_WHITEOUT(inode); | ||
| 246 | } | ||
| 247 | |||
| 248 | static bool ovl_is_opaquedir(struct dentry *dentry) | ||
| 249 | { | ||
| 250 | int res; | ||
| 251 | char val; | ||
| 252 | struct inode *inode = dentry->d_inode; | ||
| 253 | |||
| 254 | if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) | ||
| 255 | return false; | ||
| 256 | |||
| 257 | res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); | ||
| 258 | if (res == 1 && val == 'y') | ||
| 259 | return true; | ||
| 260 | |||
| 261 | return false; | ||
| 262 | } | ||
| 263 | |||
| 264 | static void ovl_dentry_release(struct dentry *dentry) | ||
| 265 | { | ||
| 266 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 267 | |||
| 268 | if (oe) { | ||
| 269 | dput(oe->__upperdentry); | ||
| 270 | dput(oe->lowerdentry); | ||
| 271 | kfree_rcu(oe, rcu); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | static const struct dentry_operations ovl_dentry_operations = { | ||
| 276 | .d_release = ovl_dentry_release, | ||
| 277 | }; | ||
| 278 | |||
| 279 | static struct ovl_entry *ovl_alloc_entry(void) | ||
| 280 | { | ||
| 281 | return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); | ||
| 282 | } | ||
| 283 | |||
| 284 | static inline struct dentry *ovl_lookup_real(struct dentry *dir, | ||
| 285 | struct qstr *name) | ||
| 286 | { | ||
| 287 | struct dentry *dentry; | ||
| 288 | |||
| 289 | mutex_lock(&dir->d_inode->i_mutex); | ||
| 290 | dentry = lookup_one_len(name->name, dir, name->len); | ||
| 291 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 292 | |||
| 293 | if (IS_ERR(dentry)) { | ||
| 294 | if (PTR_ERR(dentry) == -ENOENT) | ||
| 295 | dentry = NULL; | ||
| 296 | } else if (!dentry->d_inode) { | ||
| 297 | dput(dentry); | ||
| 298 | dentry = NULL; | ||
| 299 | } | ||
| 300 | return dentry; | ||
| 301 | } | ||
| 302 | |||
| 303 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
| 304 | unsigned int flags) | ||
| 305 | { | ||
| 306 | struct ovl_entry *oe; | ||
| 307 | struct dentry *upperdir; | ||
| 308 | struct dentry *lowerdir; | ||
| 309 | struct dentry *upperdentry = NULL; | ||
| 310 | struct dentry *lowerdentry = NULL; | ||
| 311 | struct inode *inode = NULL; | ||
| 312 | int err; | ||
| 313 | |||
| 314 | err = -ENOMEM; | ||
| 315 | oe = ovl_alloc_entry(); | ||
| 316 | if (!oe) | ||
| 317 | goto out; | ||
| 318 | |||
| 319 | upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 320 | lowerdir = ovl_dentry_lower(dentry->d_parent); | ||
| 321 | |||
| 322 | if (upperdir) { | ||
| 323 | upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); | ||
| 324 | err = PTR_ERR(upperdentry); | ||
| 325 | if (IS_ERR(upperdentry)) | ||
| 326 | goto out_put_dir; | ||
| 327 | |||
| 328 | if (lowerdir && upperdentry) { | ||
| 329 | if (ovl_is_whiteout(upperdentry)) { | ||
| 330 | dput(upperdentry); | ||
| 331 | upperdentry = NULL; | ||
| 332 | oe->opaque = true; | ||
| 333 | } else if (ovl_is_opaquedir(upperdentry)) { | ||
| 334 | oe->opaque = true; | ||
| 335 | } | ||
| 336 | } | ||
| 337 | } | ||
| 338 | if (lowerdir && !oe->opaque) { | ||
| 339 | lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); | ||
| 340 | err = PTR_ERR(lowerdentry); | ||
| 341 | if (IS_ERR(lowerdentry)) | ||
| 342 | goto out_dput_upper; | ||
| 343 | } | ||
| 344 | |||
| 345 | if (lowerdentry && upperdentry && | ||
| 346 | (!S_ISDIR(upperdentry->d_inode->i_mode) || | ||
| 347 | !S_ISDIR(lowerdentry->d_inode->i_mode))) { | ||
| 348 | dput(lowerdentry); | ||
| 349 | lowerdentry = NULL; | ||
| 350 | oe->opaque = true; | ||
| 351 | } | ||
| 352 | |||
| 353 | if (lowerdentry || upperdentry) { | ||
| 354 | struct dentry *realdentry; | ||
| 355 | |||
| 356 | realdentry = upperdentry ? upperdentry : lowerdentry; | ||
| 357 | err = -ENOMEM; | ||
| 358 | inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, | ||
| 359 | oe); | ||
| 360 | if (!inode) | ||
| 361 | goto out_dput; | ||
| 362 | ovl_copyattr(realdentry->d_inode, inode); | ||
| 363 | } | ||
| 364 | |||
| 365 | oe->__upperdentry = upperdentry; | ||
| 366 | oe->lowerdentry = lowerdentry; | ||
| 367 | |||
| 368 | dentry->d_fsdata = oe; | ||
| 369 | d_add(dentry, inode); | ||
| 370 | |||
| 371 | return NULL; | ||
| 372 | |||
| 373 | out_dput: | ||
| 374 | dput(lowerdentry); | ||
| 375 | out_dput_upper: | ||
| 376 | dput(upperdentry); | ||
| 377 | out_put_dir: | ||
| 378 | kfree(oe); | ||
| 379 | out: | ||
| 380 | return ERR_PTR(err); | ||
| 381 | } | ||
| 382 | |||
| 383 | struct file *ovl_path_open(struct path *path, int flags) | ||
| 384 | { | ||
| 385 | return dentry_open(path, flags, current_cred()); | ||
| 386 | } | ||
| 387 | |||
| 388 | static void ovl_put_super(struct super_block *sb) | ||
| 389 | { | ||
| 390 | struct ovl_fs *ufs = sb->s_fs_info; | ||
| 391 | |||
| 392 | dput(ufs->workdir); | ||
| 393 | mntput(ufs->upper_mnt); | ||
| 394 | mntput(ufs->lower_mnt); | ||
| 395 | |||
| 396 | kfree(ufs->config.lowerdir); | ||
| 397 | kfree(ufs->config.upperdir); | ||
| 398 | kfree(ufs->config.workdir); | ||
| 399 | kfree(ufs); | ||
| 400 | } | ||
| 401 | |||
| 402 | /** | ||
| 403 | * ovl_statfs | ||
| 404 | * @sb: The overlayfs super block | ||
| 405 | * @buf: The struct kstatfs to fill in with stats | ||
| 406 | * | ||
| 407 | * Get the filesystem statistics. As writes always target the upper layer | ||
| 408 | * filesystem pass the statfs to the same filesystem. | ||
| 409 | */ | ||
| 410 | static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
| 411 | { | ||
| 412 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 413 | struct dentry *root_dentry = dentry->d_sb->s_root; | ||
| 414 | struct path path; | ||
| 415 | int err; | ||
| 416 | |||
| 417 | ovl_path_upper(root_dentry, &path); | ||
| 418 | |||
| 419 | err = vfs_statfs(&path, buf); | ||
| 420 | if (!err) { | ||
| 421 | buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); | ||
| 422 | buf->f_type = OVERLAYFS_SUPER_MAGIC; | ||
| 423 | } | ||
| 424 | |||
| 425 | return err; | ||
| 426 | } | ||
| 427 | |||
| 428 | /** | ||
| 429 | * ovl_show_options | ||
| 430 | * | ||
| 431 | * Prints the mount options for a given superblock. | ||
| 432 | * Returns zero; does not fail. | ||
| 433 | */ | ||
| 434 | static int ovl_show_options(struct seq_file *m, struct dentry *dentry) | ||
| 435 | { | ||
| 436 | struct super_block *sb = dentry->d_sb; | ||
| 437 | struct ovl_fs *ufs = sb->s_fs_info; | ||
| 438 | |||
| 439 | seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); | ||
| 440 | seq_printf(m, ",upperdir=%s", ufs->config.upperdir); | ||
| 441 | seq_printf(m, ",workdir=%s", ufs->config.workdir); | ||
| 442 | return 0; | ||
| 443 | } | ||
| 444 | |||
| 445 | static const struct super_operations ovl_super_operations = { | ||
| 446 | .put_super = ovl_put_super, | ||
| 447 | .statfs = ovl_statfs, | ||
| 448 | .show_options = ovl_show_options, | ||
| 449 | }; | ||
| 450 | |||
| 451 | enum { | ||
| 452 | OPT_LOWERDIR, | ||
| 453 | OPT_UPPERDIR, | ||
| 454 | OPT_WORKDIR, | ||
| 455 | OPT_ERR, | ||
| 456 | }; | ||
| 457 | |||
| 458 | static const match_table_t ovl_tokens = { | ||
| 459 | {OPT_LOWERDIR, "lowerdir=%s"}, | ||
| 460 | {OPT_UPPERDIR, "upperdir=%s"}, | ||
| 461 | {OPT_WORKDIR, "workdir=%s"}, | ||
| 462 | {OPT_ERR, NULL} | ||
| 463 | }; | ||
| 464 | |||
| 465 | static int ovl_parse_opt(char *opt, struct ovl_config *config) | ||
| 466 | { | ||
| 467 | char *p; | ||
| 468 | |||
| 469 | while ((p = strsep(&opt, ",")) != NULL) { | ||
| 470 | int token; | ||
| 471 | substring_t args[MAX_OPT_ARGS]; | ||
| 472 | |||
| 473 | if (!*p) | ||
| 474 | continue; | ||
| 475 | |||
| 476 | token = match_token(p, ovl_tokens, args); | ||
| 477 | switch (token) { | ||
| 478 | case OPT_UPPERDIR: | ||
| 479 | kfree(config->upperdir); | ||
| 480 | config->upperdir = match_strdup(&args[0]); | ||
| 481 | if (!config->upperdir) | ||
| 482 | return -ENOMEM; | ||
| 483 | break; | ||
| 484 | |||
| 485 | case OPT_LOWERDIR: | ||
| 486 | kfree(config->lowerdir); | ||
| 487 | config->lowerdir = match_strdup(&args[0]); | ||
| 488 | if (!config->lowerdir) | ||
| 489 | return -ENOMEM; | ||
| 490 | break; | ||
| 491 | |||
| 492 | case OPT_WORKDIR: | ||
| 493 | kfree(config->workdir); | ||
| 494 | config->workdir = match_strdup(&args[0]); | ||
| 495 | if (!config->workdir) | ||
| 496 | return -ENOMEM; | ||
| 497 | break; | ||
| 498 | |||
| 499 | default: | ||
| 500 | return -EINVAL; | ||
| 501 | } | ||
| 502 | } | ||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | #define OVL_WORKDIR_NAME "work" | ||
| 507 | |||
| 508 | static struct dentry *ovl_workdir_create(struct vfsmount *mnt, | ||
| 509 | struct dentry *dentry) | ||
| 510 | { | ||
| 511 | struct inode *dir = dentry->d_inode; | ||
| 512 | struct dentry *work; | ||
| 513 | int err; | ||
| 514 | bool retried = false; | ||
| 515 | |||
| 516 | err = mnt_want_write(mnt); | ||
| 517 | if (err) | ||
| 518 | return ERR_PTR(err); | ||
| 519 | |||
| 520 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 521 | retry: | ||
| 522 | work = lookup_one_len(OVL_WORKDIR_NAME, dentry, | ||
| 523 | strlen(OVL_WORKDIR_NAME)); | ||
| 524 | |||
| 525 | if (!IS_ERR(work)) { | ||
| 526 | struct kstat stat = { | ||
| 527 | .mode = S_IFDIR | 0, | ||
| 528 | }; | ||
| 529 | |||
| 530 | if (work->d_inode) { | ||
| 531 | err = -EEXIST; | ||
| 532 | if (retried) | ||
| 533 | goto out_dput; | ||
| 534 | |||
| 535 | retried = true; | ||
| 536 | ovl_cleanup(dir, work); | ||
| 537 | dput(work); | ||
| 538 | goto retry; | ||
| 539 | } | ||
| 540 | |||
| 541 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); | ||
| 542 | if (err) | ||
| 543 | goto out_dput; | ||
| 544 | } | ||
| 545 | out_unlock: | ||
| 546 | mutex_unlock(&dir->i_mutex); | ||
| 547 | mnt_drop_write(mnt); | ||
| 548 | |||
| 549 | return work; | ||
| 550 | |||
| 551 | out_dput: | ||
| 552 | dput(work); | ||
| 553 | work = ERR_PTR(err); | ||
| 554 | goto out_unlock; | ||
| 555 | } | ||
| 556 | |||
| 557 | static int ovl_mount_dir(const char *name, struct path *path) | ||
| 558 | { | ||
| 559 | int err; | ||
| 560 | |||
| 561 | err = kern_path(name, LOOKUP_FOLLOW, path); | ||
| 562 | if (err) { | ||
| 563 | pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); | ||
| 564 | err = -EINVAL; | ||
| 565 | } | ||
| 566 | return err; | ||
| 567 | } | ||
| 568 | |||
| 569 | static bool ovl_is_allowed_fs_type(struct dentry *root) | ||
| 570 | { | ||
| 571 | const struct dentry_operations *dop = root->d_op; | ||
| 572 | |||
| 573 | /* | ||
| 574 | * We don't support: | ||
| 575 | * - automount filesystems | ||
| 576 | * - filesystems with revalidate (FIXME for lower layer) | ||
| 577 | * - filesystems with case insensitive names | ||
| 578 | */ | ||
| 579 | if (dop && | ||
| 580 | (dop->d_manage || dop->d_automount || | ||
| 581 | dop->d_revalidate || dop->d_weak_revalidate || | ||
| 582 | dop->d_compare || dop->d_hash)) { | ||
| 583 | return false; | ||
| 584 | } | ||
| 585 | return true; | ||
| 586 | } | ||
| 587 | |||
| 588 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
| 589 | static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) | ||
| 590 | { | ||
| 591 | bool ok = false; | ||
| 592 | |||
| 593 | if (workdir != upperdir) { | ||
| 594 | ok = (lock_rename(workdir, upperdir) == NULL); | ||
| 595 | unlock_rename(workdir, upperdir); | ||
| 596 | } | ||
| 597 | return ok; | ||
| 598 | } | ||
| 599 | |||
| 600 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) | ||
| 601 | { | ||
| 602 | struct path lowerpath; | ||
| 603 | struct path upperpath; | ||
| 604 | struct path workpath; | ||
| 605 | struct inode *root_inode; | ||
| 606 | struct dentry *root_dentry; | ||
| 607 | struct ovl_entry *oe; | ||
| 608 | struct ovl_fs *ufs; | ||
| 609 | struct kstatfs statfs; | ||
| 610 | int err; | ||
| 611 | |||
| 612 | err = -ENOMEM; | ||
| 613 | ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); | ||
| 614 | if (!ufs) | ||
| 615 | goto out; | ||
| 616 | |||
| 617 | err = ovl_parse_opt((char *) data, &ufs->config); | ||
| 618 | if (err) | ||
| 619 | goto out_free_config; | ||
| 620 | |||
| 621 | /* FIXME: workdir is not needed for a R/O mount */ | ||
| 622 | err = -EINVAL; | ||
| 623 | if (!ufs->config.upperdir || !ufs->config.lowerdir || | ||
| 624 | !ufs->config.workdir) { | ||
| 625 | pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); | ||
| 626 | goto out_free_config; | ||
| 627 | } | ||
| 628 | |||
| 629 | err = -ENOMEM; | ||
| 630 | oe = ovl_alloc_entry(); | ||
| 631 | if (oe == NULL) | ||
| 632 | goto out_free_config; | ||
| 633 | |||
| 634 | err = ovl_mount_dir(ufs->config.upperdir, &upperpath); | ||
| 635 | if (err) | ||
| 636 | goto out_free_oe; | ||
| 637 | |||
| 638 | err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); | ||
| 639 | if (err) | ||
| 640 | goto out_put_upperpath; | ||
| 641 | |||
| 642 | err = ovl_mount_dir(ufs->config.workdir, &workpath); | ||
| 643 | if (err) | ||
| 644 | goto out_put_lowerpath; | ||
| 645 | |||
| 646 | err = -EINVAL; | ||
| 647 | if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || | ||
| 648 | !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || | ||
| 649 | !S_ISDIR(workpath.dentry->d_inode->i_mode)) { | ||
| 650 | pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); | ||
| 651 | goto out_put_workpath; | ||
| 652 | } | ||
| 653 | |||
| 654 | if (upperpath.mnt != workpath.mnt) { | ||
| 655 | pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); | ||
| 656 | goto out_put_workpath; | ||
| 657 | } | ||
| 658 | if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { | ||
| 659 | pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); | ||
| 660 | goto out_put_workpath; | ||
| 661 | } | ||
| 662 | |||
| 663 | if (!ovl_is_allowed_fs_type(upperpath.dentry)) { | ||
| 664 | pr_err("overlayfs: filesystem of upperdir is not supported\n"); | ||
| 665 | goto out_put_workpath; | ||
| 666 | } | ||
| 667 | |||
| 668 | if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { | ||
| 669 | pr_err("overlayfs: filesystem of lowerdir is not supported\n"); | ||
| 670 | goto out_put_workpath; | ||
| 671 | } | ||
| 672 | |||
| 673 | err = vfs_statfs(&lowerpath, &statfs); | ||
| 674 | if (err) { | ||
| 675 | pr_err("overlayfs: statfs failed on lowerpath\n"); | ||
| 676 | goto out_put_workpath; | ||
| 677 | } | ||
| 678 | ufs->lower_namelen = statfs.f_namelen; | ||
| 679 | |||
| 680 | sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, | ||
| 681 | lowerpath.mnt->mnt_sb->s_stack_depth) + 1; | ||
| 682 | |||
| 683 | err = -EINVAL; | ||
| 684 | if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
| 685 | pr_err("overlayfs: maximum fs stacking depth exceeded\n"); | ||
| 686 | goto out_put_workpath; | ||
| 687 | } | ||
| 688 | |||
| 689 | ufs->upper_mnt = clone_private_mount(&upperpath); | ||
| 690 | err = PTR_ERR(ufs->upper_mnt); | ||
| 691 | if (IS_ERR(ufs->upper_mnt)) { | ||
| 692 | pr_err("overlayfs: failed to clone upperpath\n"); | ||
| 693 | goto out_put_workpath; | ||
| 694 | } | ||
| 695 | |||
| 696 | ufs->lower_mnt = clone_private_mount(&lowerpath); | ||
| 697 | err = PTR_ERR(ufs->lower_mnt); | ||
| 698 | if (IS_ERR(ufs->lower_mnt)) { | ||
| 699 | pr_err("overlayfs: failed to clone lowerpath\n"); | ||
| 700 | goto out_put_upper_mnt; | ||
| 701 | } | ||
| 702 | |||
| 703 | ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); | ||
| 704 | err = PTR_ERR(ufs->workdir); | ||
| 705 | if (IS_ERR(ufs->workdir)) { | ||
| 706 | pr_err("overlayfs: failed to create directory %s/%s\n", | ||
| 707 | ufs->config.workdir, OVL_WORKDIR_NAME); | ||
| 708 | goto out_put_lower_mnt; | ||
| 709 | } | ||
| 710 | |||
| 711 | /* | ||
| 712 | * Make lower_mnt R/O. That way fchmod/fchown on lower file | ||
| 713 | * will fail instead of modifying lower fs. | ||
| 714 | */ | ||
| 715 | ufs->lower_mnt->mnt_flags |= MNT_READONLY; | ||
| 716 | |||
| 717 | /* If the upper fs is r/o, we mark overlayfs r/o too */ | ||
| 718 | if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) | ||
| 719 | sb->s_flags |= MS_RDONLY; | ||
| 720 | |||
| 721 | sb->s_d_op = &ovl_dentry_operations; | ||
| 722 | |||
| 723 | err = -ENOMEM; | ||
| 724 | root_inode = ovl_new_inode(sb, S_IFDIR, oe); | ||
| 725 | if (!root_inode) | ||
| 726 | goto out_put_workdir; | ||
| 727 | |||
| 728 | root_dentry = d_make_root(root_inode); | ||
| 729 | if (!root_dentry) | ||
| 730 | goto out_put_workdir; | ||
| 731 | |||
| 732 | mntput(upperpath.mnt); | ||
| 733 | mntput(lowerpath.mnt); | ||
| 734 | path_put(&workpath); | ||
| 735 | |||
| 736 | oe->__upperdentry = upperpath.dentry; | ||
| 737 | oe->lowerdentry = lowerpath.dentry; | ||
| 738 | |||
| 739 | root_dentry->d_fsdata = oe; | ||
| 740 | |||
| 741 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; | ||
| 742 | sb->s_op = &ovl_super_operations; | ||
| 743 | sb->s_root = root_dentry; | ||
| 744 | sb->s_fs_info = ufs; | ||
| 745 | |||
| 746 | return 0; | ||
| 747 | |||
| 748 | out_put_workdir: | ||
| 749 | dput(ufs->workdir); | ||
| 750 | out_put_lower_mnt: | ||
| 751 | mntput(ufs->lower_mnt); | ||
| 752 | out_put_upper_mnt: | ||
| 753 | mntput(ufs->upper_mnt); | ||
| 754 | out_put_workpath: | ||
| 755 | path_put(&workpath); | ||
| 756 | out_put_lowerpath: | ||
| 757 | path_put(&lowerpath); | ||
| 758 | out_put_upperpath: | ||
| 759 | path_put(&upperpath); | ||
| 760 | out_free_oe: | ||
| 761 | kfree(oe); | ||
| 762 | out_free_config: | ||
| 763 | kfree(ufs->config.lowerdir); | ||
| 764 | kfree(ufs->config.upperdir); | ||
| 765 | kfree(ufs->config.workdir); | ||
| 766 | kfree(ufs); | ||
| 767 | out: | ||
| 768 | return err; | ||
| 769 | } | ||
| 770 | |||
| 771 | static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, | ||
| 772 | const char *dev_name, void *raw_data) | ||
| 773 | { | ||
| 774 | return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); | ||
| 775 | } | ||
| 776 | |||
| 777 | static struct file_system_type ovl_fs_type = { | ||
| 778 | .owner = THIS_MODULE, | ||
| 779 | .name = "overlayfs", | ||
| 780 | .mount = ovl_mount, | ||
| 781 | .kill_sb = kill_anon_super, | ||
| 782 | }; | ||
| 783 | MODULE_ALIAS_FS("overlayfs"); | ||
| 784 | |||
| 785 | static int __init ovl_init(void) | ||
| 786 | { | ||
| 787 | return register_filesystem(&ovl_fs_type); | ||
| 788 | } | ||
| 789 | |||
| 790 | static void __exit ovl_exit(void) | ||
| 791 | { | ||
| 792 | unregister_filesystem(&ovl_fs_type); | ||
| 793 | } | ||
| 794 | |||
| 795 | module_init(ovl_init); | ||
| 796 | module_exit(ovl_exit); | ||
diff --git a/fs/splice.c b/fs/splice.c index f5cb9ba84510..75c6058eabf2 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
| 1330 | 1330 | ||
| 1331 | return ret; | 1331 | return ret; |
| 1332 | } | 1332 | } |
| 1333 | EXPORT_SYMBOL(do_splice_direct); | ||
| 1333 | 1334 | ||
| 1334 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1335 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
| 1335 | struct pipe_inode_info *opipe, | 1336 | struct pipe_inode_info *opipe, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a957d4366c24..4e41a4a331bb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -223,6 +223,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
| 223 | #define ATTR_TIMES_SET (1 << 16) | 223 | #define ATTR_TIMES_SET (1 << 16) |
| 224 | 224 | ||
| 225 | /* | 225 | /* |
| 226 | * Whiteout is represented by a char device. The following constants define the | ||
| 227 | * mode and device number to use. | ||
| 228 | */ | ||
| 229 | #define WHITEOUT_MODE 0 | ||
| 230 | #define WHITEOUT_DEV 0 | ||
| 231 | |||
| 232 | /* | ||
| 226 | * This is the Inode Attributes structure, used for notify_change(). It | 233 | * This is the Inode Attributes structure, used for notify_change(). It |
| 227 | * uses the above definitions as flags, to know which values have changed. | 234 | * uses the above definitions as flags, to know which values have changed. |
| 228 | * Also, in this manner, a Filesystem can look at only the values it cares | 235 | * Also, in this manner, a Filesystem can look at only the values it cares |
| @@ -254,6 +261,12 @@ struct iattr { | |||
| 254 | */ | 261 | */ |
| 255 | #include <linux/quota.h> | 262 | #include <linux/quota.h> |
| 256 | 263 | ||
| 264 | /* | ||
| 265 | * Maximum number of layers of fs stack. Needs to be limited to | ||
| 266 | * prevent kernel stack overflow | ||
| 267 | */ | ||
| 268 | #define FILESYSTEM_MAX_STACK_DEPTH 2 | ||
| 269 | |||
| 257 | /** | 270 | /** |
| 258 | * enum positive_aop_returns - aop return codes with specific semantics | 271 | * enum positive_aop_returns - aop return codes with specific semantics |
| 259 | * | 272 | * |
| @@ -1266,6 +1279,11 @@ struct super_block { | |||
| 1266 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | 1279 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; |
| 1267 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | 1280 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; |
| 1268 | struct rcu_head rcu; | 1281 | struct rcu_head rcu; |
| 1282 | |||
| 1283 | /* | ||
| 1284 | * Indicates how deep in a filesystem stack this SB is | ||
| 1285 | */ | ||
| 1286 | int s_stack_depth; | ||
| 1269 | }; | 1287 | }; |
| 1270 | 1288 | ||
| 1271 | extern struct timespec current_fs_time(struct super_block *sb); | 1289 | extern struct timespec current_fs_time(struct super_block *sb); |
| @@ -1398,6 +1416,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino | |||
| 1398 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1416 | extern int vfs_rmdir(struct inode *, struct dentry *); |
| 1399 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); | 1417 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); |
| 1400 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); | 1418 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); |
| 1419 | extern int vfs_whiteout(struct inode *, struct dentry *); | ||
| 1401 | 1420 | ||
| 1402 | /* | 1421 | /* |
| 1403 | * VFS dentry helper functions. | 1422 | * VFS dentry helper functions. |
| @@ -1528,6 +1547,9 @@ struct inode_operations { | |||
| 1528 | umode_t create_mode, int *opened); | 1547 | umode_t create_mode, int *opened); |
| 1529 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 1548 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
| 1530 | int (*set_acl)(struct inode *, struct posix_acl *, int); | 1549 | int (*set_acl)(struct inode *, struct posix_acl *, int); |
| 1550 | |||
| 1551 | /* WARNING: probably going away soon, do not use! */ | ||
| 1552 | int (*dentry_open)(struct dentry *, struct file *, const struct cred *); | ||
| 1531 | } ____cacheline_aligned; | 1553 | } ____cacheline_aligned; |
| 1532 | 1554 | ||
| 1533 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1555 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
| @@ -1625,6 +1647,9 @@ struct super_operations { | |||
| 1625 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) | 1647 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) |
| 1626 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) | 1648 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) |
| 1627 | 1649 | ||
| 1650 | #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ | ||
| 1651 | (inode)->i_rdev == WHITEOUT_DEV) | ||
| 1652 | |||
| 1628 | /* | 1653 | /* |
| 1629 | * Inode state bits. Protected by inode->i_lock | 1654 | * Inode state bits. Protected by inode->i_lock |
| 1630 | * | 1655 | * |
| @@ -2040,6 +2065,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); | |||
| 2040 | extern struct file *filp_open(const char *, int, umode_t); | 2065 | extern struct file *filp_open(const char *, int, umode_t); |
| 2041 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, | 2066 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, |
| 2042 | const char *, int); | 2067 | const char *, int); |
| 2068 | extern int vfs_open(const struct path *, struct file *, const struct cred *); | ||
| 2043 | extern struct file * dentry_open(const struct path *, int, const struct cred *); | 2069 | extern struct file * dentry_open(const struct path *, int, const struct cred *); |
| 2044 | extern int filp_close(struct file *, fl_owner_t id); | 2070 | extern int filp_close(struct file *, fl_owner_t id); |
| 2045 | 2071 | ||
| @@ -2253,7 +2279,9 @@ extern sector_t bmap(struct inode *, sector_t); | |||
| 2253 | #endif | 2279 | #endif |
| 2254 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); | 2280 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); |
| 2255 | extern int inode_permission(struct inode *, int); | 2281 | extern int inode_permission(struct inode *, int); |
| 2282 | extern int __inode_permission(struct inode *, int); | ||
| 2256 | extern int generic_permission(struct inode *, int); | 2283 | extern int generic_permission(struct inode *, int); |
| 2284 | extern int __check_sticky(struct inode *dir, struct inode *inode); | ||
| 2257 | 2285 | ||
| 2258 | static inline bool execute_ok(struct inode *inode) | 2286 | static inline bool execute_ok(struct inode *inode) |
| 2259 | { | 2287 | { |
| @@ -2452,6 +2480,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, | |||
| 2452 | struct file *, loff_t *, size_t, unsigned int); | 2480 | struct file *, loff_t *, size_t, unsigned int); |
| 2453 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2481 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
| 2454 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2482 | struct file *out, loff_t *, size_t len, unsigned int flags); |
| 2483 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
| 2484 | loff_t *opos, size_t len, unsigned int flags); | ||
| 2485 | |||
| 2455 | 2486 | ||
| 2456 | extern void | 2487 | extern void |
| 2457 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2488 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
| @@ -2737,6 +2768,14 @@ static inline int is_sxid(umode_t mode) | |||
| 2737 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); | 2768 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); |
| 2738 | } | 2769 | } |
| 2739 | 2770 | ||
| 2771 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
| 2772 | { | ||
| 2773 | if (!(dir->i_mode & S_ISVTX)) | ||
| 2774 | return 0; | ||
| 2775 | |||
| 2776 | return __check_sticky(dir, inode); | ||
| 2777 | } | ||
| 2778 | |||
| 2740 | static inline void inode_has_no_xattr(struct inode *inode) | 2779 | static inline void inode_has_no_xattr(struct inode *inode) |
| 2741 | { | 2780 | { |
| 2742 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) | 2781 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 9262e4bf0cc3..c2c561dc0114 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
| @@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt); | |||
| 81 | extern struct vfsmount *mnt_clone_internal(struct path *path); | 81 | extern struct vfsmount *mnt_clone_internal(struct path *path); |
| 82 | extern int __mnt_is_readonly(struct vfsmount *mnt); | 82 | extern int __mnt_is_readonly(struct vfsmount *mnt); |
| 83 | 83 | ||
| 84 | struct path; | ||
| 85 | extern struct vfsmount *clone_private_mount(struct path *path); | ||
| 86 | |||
| 84 | struct file_system_type; | 87 | struct file_system_type; |
| 85 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, | 88 | extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, |
| 86 | int flags, const char *name, | 89 | int flags, const char *name, |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index ca1a11bb4443..3735fa0a6784 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | 37 | ||
| 38 | #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ | 38 | #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ |
| 39 | #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ | 39 | #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ |
| 40 | #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ | ||
| 40 | 41 | ||
| 41 | struct fstrim_range { | 42 | struct fstrim_range { |
| 42 | __u64 start; | 43 | __u64 start; |
diff --git a/mm/shmem.c b/mm/shmem.c index cd6fc7590e54..185836ba53ef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -2345,6 +2345,32 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru | |||
| 2345 | return 0; | 2345 | return 0; |
| 2346 | } | 2346 | } |
| 2347 | 2347 | ||
| 2348 | static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry) | ||
| 2349 | { | ||
| 2350 | struct dentry *whiteout; | ||
| 2351 | int error; | ||
| 2352 | |||
| 2353 | whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); | ||
| 2354 | if (!whiteout) | ||
| 2355 | return -ENOMEM; | ||
| 2356 | |||
| 2357 | error = shmem_mknod(old_dir, whiteout, | ||
| 2358 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
| 2359 | dput(whiteout); | ||
| 2360 | if (error) | ||
| 2361 | return error; | ||
| 2362 | |||
| 2363 | /* | ||
| 2364 | * Cheat and hash the whiteout while the old dentry is still in | ||
| 2365 | * place, instead of playing games with FS_RENAME_DOES_D_MOVE. | ||
| 2366 | * | ||
| 2367 | * d_lookup() will consistently find one of them at this point, | ||
| 2368 | * not sure which one, but that isn't even important. | ||
| 2369 | */ | ||
| 2370 | d_rehash(whiteout); | ||
| 2371 | return 0; | ||
| 2372 | } | ||
| 2373 | |||
| 2348 | /* | 2374 | /* |
| 2349 | * The VFS layer already does all the dentry stuff for rename, | 2375 | * The VFS layer already does all the dentry stuff for rename, |
| 2350 | * we just have to decrement the usage count for the target if | 2376 | * we just have to decrement the usage count for the target if |
| @@ -2356,7 +2382,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
| 2356 | struct inode *inode = old_dentry->d_inode; | 2382 | struct inode *inode = old_dentry->d_inode; |
| 2357 | int they_are_dirs = S_ISDIR(inode->i_mode); | 2383 | int they_are_dirs = S_ISDIR(inode->i_mode); |
| 2358 | 2384 | ||
| 2359 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 2385 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
| 2360 | return -EINVAL; | 2386 | return -EINVAL; |
| 2361 | 2387 | ||
| 2362 | if (flags & RENAME_EXCHANGE) | 2388 | if (flags & RENAME_EXCHANGE) |
| @@ -2365,6 +2391,14 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc | |||
| 2365 | if (!simple_empty(new_dentry)) | 2391 | if (!simple_empty(new_dentry)) |
| 2366 | return -ENOTEMPTY; | 2392 | return -ENOTEMPTY; |
| 2367 | 2393 | ||
| 2394 | if (flags & RENAME_WHITEOUT) { | ||
| 2395 | int error; | ||
| 2396 | |||
| 2397 | error = shmem_whiteout(old_dir, old_dentry); | ||
| 2398 | if (error) | ||
| 2399 | return error; | ||
| 2400 | } | ||
| 2401 | |||
| 2368 | if (new_dentry->d_inode) { | 2402 | if (new_dentry->d_inode) { |
| 2369 | (void) shmem_unlink(new_dir, new_dentry); | 2403 | (void) shmem_unlink(new_dir, new_dentry); |
| 2370 | if (they_are_dirs) { | 2404 | if (they_are_dirs) { |
