diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-21 21:19:09 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-21 21:19:09 -0400 |
commit | d9a185f8b49678775ef56ecbdbc7b76970302897 (patch) | |
tree | 7ace1b26133e5d796af09e5d71d6531bcb69865c | |
parent | c22fc16d172fba4d19ffd8f2aa8fe67edba63895 (diff) | |
parent | 989974c804574d250ac92d44e220081959ac8ac1 (diff) |
Merge tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi:
"This contains two new features:
- Stack file operations: this allows removal of several hacks from
the VFS, proper interaction of read-only open files with copy-up,
possibility to implement fs modifying ioctls properly, and others.
- Metadata only copy-up: when file is on lower layer and only
metadata is modified (except size) then only copy up the metadata
and continue to use the data from the lower file"
* tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (66 commits)
ovl: Enable metadata only feature
ovl: Do not do metacopy only for ioctl modifying file attr
ovl: Do not do metadata only copy-up for truncate operation
ovl: add helper to force data copy-up
ovl: Check redirect on index as well
ovl: Set redirect on upper inode when it is linked
ovl: Set redirect on metacopy files upon rename
ovl: Do not set dentry type ORIGIN for broken hardlinks
ovl: Add an inode flag OVL_CONST_INO
ovl: Treat metacopy dentries as type OVL_PATH_MERGE
ovl: Check redirects for metacopy files
ovl: Move some dir related ovl_lookup_single() code in else block
ovl: Do not expose metacopy only dentry from d_real()
ovl: Open file with data except for the case of fsync
ovl: Add helper ovl_inode_realdata()
ovl: Store lower data inode in ovl_inode
ovl: Fix ovl_getattr() to get number of blocks from lower
ovl: Add helper ovl_dentry_lowerdata() to get lower data dentry
ovl: Copy up meta inode data from lowest data inode
ovl: Modify ovl_lookup() and friends to lookup metacopy dentry
...
33 files changed, 1619 insertions, 595 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9e6f19eaef89..efea228ccd8a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -21,8 +21,7 @@ prototypes: | |||
21 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); | 21 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); |
22 | struct vfsmount *(*d_automount)(struct path *path); | 22 | struct vfsmount *(*d_automount)(struct path *path); |
23 | int (*d_manage)(const struct path *, bool); | 23 | int (*d_manage)(const struct path *, bool); |
24 | struct dentry *(*d_real)(struct dentry *, const struct inode *, | 24 | struct dentry *(*d_real)(struct dentry *, const struct inode *); |
25 | unsigned int, unsigned int); | ||
26 | 25 | ||
27 | locking rules: | 26 | locking rules: |
28 | rename_lock ->d_lock may block rcu-walk | 27 | rename_lock ->d_lock may block rcu-walk |
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 72615a2c0752..51c136c821bf 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt | |||
@@ -10,10 +10,6 @@ union-filesystems). An overlay-filesystem tries to present a | |||
10 | filesystem which is the result over overlaying one filesystem on top | 10 | filesystem which is the result over overlaying one filesystem on top |
11 | of the other. | 11 | of the other. |
12 | 12 | ||
13 | The result will inevitably fail to look exactly like a normal | ||
14 | filesystem for various technical reasons. The expectation is that | ||
15 | many use cases will be able to ignore these differences. | ||
16 | |||
17 | 13 | ||
18 | Overlay objects | 14 | Overlay objects |
19 | --------------- | 15 | --------------- |
@@ -266,6 +262,30 @@ rightmost one and going left. In the above example lower1 will be the | |||
266 | top, lower2 the middle and lower3 the bottom layer. | 262 | top, lower2 the middle and lower3 the bottom layer. |
267 | 263 | ||
268 | 264 | ||
265 | Metadata only copy up | ||
266 | -------------------- | ||
267 | |||
268 | When metadata only copy up feature is enabled, overlayfs will only copy | ||
269 | up metadata (as opposed to whole file), when a metadata specific operation | ||
270 | like chown/chmod is performed. Full file will be copied up later when | ||
271 | file is opened for WRITE operation. | ||
272 | |||
273 | In other words, this is delayed data copy up operation and data is copied | ||
274 | up when there is a need to actually modify data. | ||
275 | |||
276 | There are multiple ways to enable/disable this feature. A config option | ||
277 | CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature | ||
278 | by default. Or one can enable/disable it at module load time with module | ||
279 | parameter metacopy=on/off. Lastly, there is also a per mount option | ||
280 | metacopy=on/off to enable/disable this feature per mount. | ||
281 | |||
282 | Do not use metacopy=on with untrusted upper/lower directories. Otherwise | ||
283 | it is possible that an attacker can create a handcrafted file with | ||
284 | appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower | ||
285 | pointed by REDIRECT. This should not be possible on local system as setting | ||
286 | "trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible | ||
287 | for untrusted layers like from a pen drive. | ||
288 | |||
269 | Sharing and copying layers | 289 | Sharing and copying layers |
270 | -------------------------- | 290 | -------------------------- |
271 | 291 | ||
@@ -284,7 +304,7 @@ though it will not result in a crash or deadlock. | |||
284 | Mounting an overlay using an upper layer path, where the upper layer path | 304 | Mounting an overlay using an upper layer path, where the upper layer path |
285 | was previously used by another mounted overlay in combination with a | 305 | was previously used by another mounted overlay in combination with a |
286 | different lower layer path, is allowed, unless the "inodes index" feature | 306 | different lower layer path, is allowed, unless the "inodes index" feature |
287 | is enabled. | 307 | or "metadata only copy up" feature is enabled. |
288 | 308 | ||
289 | With the "inodes index" feature, on the first time mount, an NFS file | 309 | With the "inodes index" feature, on the first time mount, an NFS file |
290 | handle of the lower layer root directory, along with the UUID of the lower | 310 | handle of the lower layer root directory, along with the UUID of the lower |
@@ -297,6 +317,10 @@ lower root origin, mount will fail with ESTALE. An overlayfs mount with | |||
297 | does not support NFS export, lower filesystem does not have a valid UUID or | 317 | does not support NFS export, lower filesystem does not have a valid UUID or |
298 | if the upper filesystem does not support extended attributes. | 318 | if the upper filesystem does not support extended attributes. |
299 | 319 | ||
320 | For "metadata only copy up" feature there is no verification mechanism at | ||
321 | mount time. So if same upper is mounted with different set of lower, mount | ||
322 | probably will succeed but expect the unexpected later on. So don't do it. | ||
323 | |||
300 | It is quite a common practice to copy overlay layers to a different | 324 | It is quite a common practice to copy overlay layers to a different |
301 | directory tree on the same or different underlying filesystem, and even | 325 | directory tree on the same or different underlying filesystem, and even |
302 | to a different machine. With the "inodes index" feature, trying to mount | 326 | to a different machine. With the "inodes index" feature, trying to mount |
@@ -306,27 +330,40 @@ the copied layers will fail the verification of the lower root file handle. | |||
306 | Non-standard behavior | 330 | Non-standard behavior |
307 | --------------------- | 331 | --------------------- |
308 | 332 | ||
309 | The copy_up operation essentially creates a new, identical file and | 333 | Overlayfs can now act as a POSIX compliant filesystem with the following |
310 | moves it over to the old name. Any open files referring to this inode | 334 | features turned on: |
311 | will access the old data. | 335 | |
336 | 1) "redirect_dir" | ||
337 | |||
338 | Enabled with the mount option or module option: "redirect_dir=on" or with | ||
339 | the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y. | ||
340 | |||
341 | If this feature is disabled, then rename(2) on a lower or merged directory | ||
342 | will fail with EXDEV ("Invalid cross-device link"). | ||
343 | |||
344 | 2) "inode index" | ||
345 | |||
346 | Enabled with the mount option or module option "index=on" or with the | ||
347 | kernel config option CONFIG_OVERLAY_FS_INDEX=y. | ||
312 | 348 | ||
313 | The new file may be on a different filesystem, so both st_dev and st_ino | 349 | If this feature is disabled and a file with multiple hard links is copied |
314 | of the real file may change. The values of st_dev and st_ino returned by | 350 | up, then this will "break" the link. Changes will not be propagated to |
315 | stat(2) on an overlay object are often not the same as the real file | 351 | other names referring to the same inode. |
316 | stat(2) values to prevent the values from changing on copy_up. | ||
317 | 352 | ||
318 | Unless "xino" feature is enabled, when overlay layers are not all on the | 353 | 3) "xino" |
319 | same underlying filesystem, the value of st_dev may be different for two | ||
320 | non-directory objects in the same overlay filesystem and the value of | ||
321 | st_ino for directory objects may be non persistent and could change even | ||
322 | while the overlay filesystem is still mounted. | ||
323 | 354 | ||
324 | Unless "inode index" feature is enabled, if a file with multiple hard | 355 | Enabled with the mount option "xino=auto" or "xino=on", with the module |
325 | links is copied up, then this will "break" the link. Changes will not be | 356 | option "xino_auto=on" or with the kernel config option |
326 | propagated to other names referring to the same inode. | 357 | CONFIG_OVERLAY_FS_XINO_AUTO=y. Also implicitly enabled by using the same |
358 | underlying filesystem for all layers making up the overlay. | ||
327 | 359 | ||
328 | Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged | 360 | If this feature is disabled or the underlying filesystem doesn't have |
329 | directory will fail with EXDEV. | 361 | enough free bits in the inode number, then overlayfs will not be able to |
362 | guarantee that the values of st_ino and st_dev returned by stat(2) and the | ||
363 | value of d_ino returned by readdir(3) will act like on a normal filesystem. | ||
364 | E.g. the value of st_dev may be different for two objects in the same | ||
365 | overlay filesystem and the value of st_ino for directory objects may not be | ||
366 | persistent and could change even while the overlay filesystem is mounted. | ||
330 | 367 | ||
331 | 368 | ||
332 | Changes to underlying filesystems | 369 | Changes to underlying filesystems |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 85907d5b9c2c..4b2084d0f1fb 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -989,8 +989,7 @@ struct dentry_operations { | |||
989 | char *(*d_dname)(struct dentry *, char *, int); | 989 | char *(*d_dname)(struct dentry *, char *, int); |
990 | struct vfsmount *(*d_automount)(struct path *); | 990 | struct vfsmount *(*d_automount)(struct path *); |
991 | int (*d_manage)(const struct path *, bool); | 991 | int (*d_manage)(const struct path *, bool); |
992 | struct dentry *(*d_real)(struct dentry *, const struct inode *, | 992 | struct dentry *(*d_real)(struct dentry *, const struct inode *); |
993 | unsigned int, unsigned int); | ||
994 | }; | 993 | }; |
995 | 994 | ||
996 | d_revalidate: called when the VFS needs to revalidate a dentry. This | 995 | d_revalidate: called when the VFS needs to revalidate a dentry. This |
@@ -1124,22 +1123,15 @@ struct dentry_operations { | |||
1124 | dentry being transited from. | 1123 | dentry being transited from. |
1125 | 1124 | ||
1126 | d_real: overlay/union type filesystems implement this method to return one of | 1125 | d_real: overlay/union type filesystems implement this method to return one of |
1127 | the underlying dentries hidden by the overlay. It is used in three | 1126 | the underlying dentries hidden by the overlay. It is used in two |
1128 | different modes: | 1127 | different modes: |
1129 | 1128 | ||
1130 | Called from open it may need to copy-up the file depending on the | ||
1131 | supplied open flags. This mode is selected with a non-zero flags | ||
1132 | argument. In this mode the d_real method can return an error. | ||
1133 | |||
1134 | Called from file_dentry() it returns the real dentry matching the inode | 1129 | Called from file_dentry() it returns the real dentry matching the inode |
1135 | argument. The real dentry may be from a lower layer already copied up, | 1130 | argument. The real dentry may be from a lower layer already copied up, |
1136 | but still referenced from the file. This mode is selected with a | 1131 | but still referenced from the file. This mode is selected with a |
1137 | non-NULL inode argument. This will always succeed. | 1132 | non-NULL inode argument. |
1138 | |||
1139 | With NULL inode and zero flags the topmost real underlying dentry is | ||
1140 | returned. This will always succeed. | ||
1141 | 1133 | ||
1142 | This method is never called with both non-NULL inode and non-zero flags. | 1134 | With NULL inode the topmost real underlying dentry is returned. |
1143 | 1135 | ||
1144 | Each dentry has a pointer to its parent dentry, as well as a hash list | 1136 | Each dentry has a pointer to its parent dentry, as well as a hash list |
1145 | of child dentries. Child dentries are basically like files in a | 1137 | of child dentries. Child dentries are basically like files in a |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 318be7864072..53af9f5253f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -3217,8 +3217,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list, | |||
3217 | struct btrfs_ioctl_space_info *space); | 3217 | struct btrfs_ioctl_space_info *space); |
3218 | void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, | 3218 | void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, |
3219 | struct btrfs_ioctl_balance_args *bargs); | 3219 | struct btrfs_ioctl_balance_args *bargs); |
3220 | ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, | 3220 | int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, |
3221 | struct file *dst_file, u64 dst_loff); | 3221 | struct file *dst_file, loff_t dst_loff, |
3222 | u64 olen); | ||
3222 | 3223 | ||
3223 | /* file.c */ | 3224 | /* file.c */ |
3224 | int __init btrfs_auto_defrag_init(void); | 3225 | int __init btrfs_auto_defrag_init(void); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d3a5d2a41e5f..63600dc2ac4c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -3592,13 +3592,13 @@ out_unlock: | |||
3592 | return ret; | 3592 | return ret; |
3593 | } | 3593 | } |
3594 | 3594 | ||
3595 | ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, | 3595 | int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, |
3596 | struct file *dst_file, u64 dst_loff) | 3596 | struct file *dst_file, loff_t dst_loff, |
3597 | u64 olen) | ||
3597 | { | 3598 | { |
3598 | struct inode *src = file_inode(src_file); | 3599 | struct inode *src = file_inode(src_file); |
3599 | struct inode *dst = file_inode(dst_file); | 3600 | struct inode *dst = file_inode(dst_file); |
3600 | u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; | 3601 | u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; |
3601 | ssize_t res; | ||
3602 | 3602 | ||
3603 | if (WARN_ON_ONCE(bs < PAGE_SIZE)) { | 3603 | if (WARN_ON_ONCE(bs < PAGE_SIZE)) { |
3604 | /* | 3604 | /* |
@@ -3609,10 +3609,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, | |||
3609 | return -EINVAL; | 3609 | return -EINVAL; |
3610 | } | 3610 | } |
3611 | 3611 | ||
3612 | res = btrfs_extent_same(src, loff, olen, dst, dst_loff); | 3612 | return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); |
3613 | if (res) | ||
3614 | return res; | ||
3615 | return olen; | ||
3616 | } | 3613 | } |
3617 | 3614 | ||
3618 | static int clone_finish_inode_update(struct btrfs_trans_handle *trans, | 3615 | static int clone_finish_inode_update(struct btrfs_trans_handle *trans, |
diff --git a/fs/file_table.c b/fs/file_table.c index d6eccd04d703..e49af4caf15d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head) | |||
52 | static inline void file_free(struct file *f) | 52 | static inline void file_free(struct file *f) |
53 | { | 53 | { |
54 | security_file_free(f); | 54 | security_file_free(f); |
55 | percpu_counter_dec(&nr_files); | 55 | if (!(f->f_mode & FMODE_NOACCOUNT)) |
56 | percpu_counter_dec(&nr_files); | ||
56 | call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); | 57 | call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); |
57 | } | 58 | } |
58 | 59 | ||
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write, | |||
91 | } | 92 | } |
92 | #endif | 93 | #endif |
93 | 94 | ||
95 | static struct file *__alloc_file(int flags, const struct cred *cred) | ||
96 | { | ||
97 | struct file *f; | ||
98 | int error; | ||
99 | |||
100 | f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); | ||
101 | if (unlikely(!f)) | ||
102 | return ERR_PTR(-ENOMEM); | ||
103 | |||
104 | f->f_cred = get_cred(cred); | ||
105 | error = security_file_alloc(f); | ||
106 | if (unlikely(error)) { | ||
107 | file_free_rcu(&f->f_u.fu_rcuhead); | ||
108 | return ERR_PTR(error); | ||
109 | } | ||
110 | |||
111 | atomic_long_set(&f->f_count, 1); | ||
112 | rwlock_init(&f->f_owner.lock); | ||
113 | spin_lock_init(&f->f_lock); | ||
114 | mutex_init(&f->f_pos_lock); | ||
115 | eventpoll_init_file(f); | ||
116 | f->f_flags = flags; | ||
117 | f->f_mode = OPEN_FMODE(flags); | ||
118 | /* f->f_version: 0 */ | ||
119 | |||
120 | return f; | ||
121 | } | ||
122 | |||
94 | /* Find an unused file structure and return a pointer to it. | 123 | /* Find an unused file structure and return a pointer to it. |
95 | * Returns an error pointer if some error happend e.g. we over file | 124 | * Returns an error pointer if some error happend e.g. we over file |
96 | * structures limit, run out of memory or operation is not permitted. | 125 | * structures limit, run out of memory or operation is not permitted. |
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) | |||
105 | { | 134 | { |
106 | static long old_max; | 135 | static long old_max; |
107 | struct file *f; | 136 | struct file *f; |
108 | int error; | ||
109 | 137 | ||
110 | /* | 138 | /* |
111 | * Privileged users can go above max_files | 139 | * Privileged users can go above max_files |
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) | |||
119 | goto over; | 147 | goto over; |
120 | } | 148 | } |
121 | 149 | ||
122 | f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); | 150 | f = __alloc_file(flags, cred); |
123 | if (unlikely(!f)) | 151 | if (!IS_ERR(f)) |
124 | return ERR_PTR(-ENOMEM); | 152 | percpu_counter_inc(&nr_files); |
125 | |||
126 | f->f_cred = get_cred(cred); | ||
127 | error = security_file_alloc(f); | ||
128 | if (unlikely(error)) { | ||
129 | file_free_rcu(&f->f_u.fu_rcuhead); | ||
130 | return ERR_PTR(error); | ||
131 | } | ||
132 | 153 | ||
133 | atomic_long_set(&f->f_count, 1); | ||
134 | rwlock_init(&f->f_owner.lock); | ||
135 | spin_lock_init(&f->f_lock); | ||
136 | mutex_init(&f->f_pos_lock); | ||
137 | eventpoll_init_file(f); | ||
138 | f->f_flags = flags; | ||
139 | f->f_mode = OPEN_FMODE(flags); | ||
140 | /* f->f_version: 0 */ | ||
141 | percpu_counter_inc(&nr_files); | ||
142 | return f; | 154 | return f; |
143 | 155 | ||
144 | over: | 156 | over: |
@@ -150,6 +162,21 @@ over: | |||
150 | return ERR_PTR(-ENFILE); | 162 | return ERR_PTR(-ENFILE); |
151 | } | 163 | } |
152 | 164 | ||
165 | /* | ||
166 | * Variant of alloc_empty_file() that doesn't check and modify nr_files. | ||
167 | * | ||
168 | * Should not be used unless there's a very good reason to do so. | ||
169 | */ | ||
170 | struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) | ||
171 | { | ||
172 | struct file *f = __alloc_file(flags, cred); | ||
173 | |||
174 | if (!IS_ERR(f)) | ||
175 | f->f_mode |= FMODE_NOACCOUNT; | ||
176 | |||
177 | return f; | ||
178 | } | ||
179 | |||
153 | /** | 180 | /** |
154 | * alloc_file - allocate and initialize a 'struct file' | 181 | * alloc_file - allocate and initialize a 'struct file' |
155 | * | 182 | * |
diff --git a/fs/inode.c b/fs/inode.c index a06de4454232..42f6d25f32a5 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1596,49 +1596,16 @@ sector_t bmap(struct inode *inode, sector_t block) | |||
1596 | EXPORT_SYMBOL(bmap); | 1596 | EXPORT_SYMBOL(bmap); |
1597 | 1597 | ||
1598 | /* | 1598 | /* |
1599 | * Update times in overlayed inode from underlying real inode | ||
1600 | */ | ||
1601 | static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, | ||
1602 | bool rcu) | ||
1603 | { | ||
1604 | struct dentry *upperdentry; | ||
1605 | |||
1606 | /* | ||
1607 | * Nothing to do if in rcu or if non-overlayfs | ||
1608 | */ | ||
1609 | if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL))) | ||
1610 | return; | ||
1611 | |||
1612 | upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); | ||
1613 | |||
1614 | /* | ||
1615 | * If file is on lower then we can't update atime, so no worries about | ||
1616 | * stale mtime/ctime. | ||
1617 | */ | ||
1618 | if (upperdentry) { | ||
1619 | struct inode *realinode = d_inode(upperdentry); | ||
1620 | |||
1621 | if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) || | ||
1622 | !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) { | ||
1623 | inode->i_mtime = realinode->i_mtime; | ||
1624 | inode->i_ctime = realinode->i_ctime; | ||
1625 | } | ||
1626 | } | ||
1627 | } | ||
1628 | |||
1629 | /* | ||
1630 | * With relative atime, only update atime if the previous atime is | 1599 | * With relative atime, only update atime if the previous atime is |
1631 | * earlier than either the ctime or mtime or if at least a day has | 1600 | * earlier than either the ctime or mtime or if at least a day has |
1632 | * passed since the last atime update. | 1601 | * passed since the last atime update. |
1633 | */ | 1602 | */ |
1634 | static int relatime_need_update(const struct path *path, struct inode *inode, | 1603 | static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, |
1635 | struct timespec now, bool rcu) | 1604 | struct timespec now) |
1636 | { | 1605 | { |
1637 | 1606 | ||
1638 | if (!(path->mnt->mnt_flags & MNT_RELATIME)) | 1607 | if (!(mnt->mnt_flags & MNT_RELATIME)) |
1639 | return 1; | 1608 | return 1; |
1640 | |||
1641 | update_ovl_inode_times(path->dentry, inode, rcu); | ||
1642 | /* | 1609 | /* |
1643 | * Is mtime younger than atime? If yes, update atime: | 1610 | * Is mtime younger than atime? If yes, update atime: |
1644 | */ | 1611 | */ |
@@ -1709,8 +1676,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags) | |||
1709 | * This function automatically handles read only file systems and media, | 1676 | * This function automatically handles read only file systems and media, |
1710 | * as well as the "noatime" flag and inode specific "noatime" markers. | 1677 | * as well as the "noatime" flag and inode specific "noatime" markers. |
1711 | */ | 1678 | */ |
1712 | bool __atime_needs_update(const struct path *path, struct inode *inode, | 1679 | bool atime_needs_update(const struct path *path, struct inode *inode) |
1713 | bool rcu) | ||
1714 | { | 1680 | { |
1715 | struct vfsmount *mnt = path->mnt; | 1681 | struct vfsmount *mnt = path->mnt; |
1716 | struct timespec64 now; | 1682 | struct timespec64 now; |
@@ -1736,7 +1702,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, | |||
1736 | 1702 | ||
1737 | now = current_time(inode); | 1703 | now = current_time(inode); |
1738 | 1704 | ||
1739 | if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu)) | 1705 | if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now))) |
1740 | return false; | 1706 | return false; |
1741 | 1707 | ||
1742 | if (timespec64_equal(&inode->i_atime, &now)) | 1708 | if (timespec64_equal(&inode->i_atime, &now)) |
@@ -1751,7 +1717,7 @@ void touch_atime(const struct path *path) | |||
1751 | struct inode *inode = d_inode(path->dentry); | 1717 | struct inode *inode = d_inode(path->dentry); |
1752 | struct timespec64 now; | 1718 | struct timespec64 now; |
1753 | 1719 | ||
1754 | if (!__atime_needs_update(path, inode, false)) | 1720 | if (!atime_needs_update(path, inode)) |
1755 | return; | 1721 | return; |
1756 | 1722 | ||
1757 | if (!sb_start_write_trylock(inode->i_sb)) | 1723 | if (!sb_start_write_trylock(inode->i_sb)) |
diff --git a/fs/internal.h b/fs/internal.h index 50a28fc71300..d410186bc369 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -82,10 +82,8 @@ extern void __init mnt_init(void); | |||
82 | 82 | ||
83 | extern int __mnt_want_write(struct vfsmount *); | 83 | extern int __mnt_want_write(struct vfsmount *); |
84 | extern int __mnt_want_write_file(struct file *); | 84 | extern int __mnt_want_write_file(struct file *); |
85 | extern int mnt_want_write_file_path(struct file *); | ||
86 | extern void __mnt_drop_write(struct vfsmount *); | 85 | extern void __mnt_drop_write(struct vfsmount *); |
87 | extern void __mnt_drop_write_file(struct file *); | 86 | extern void __mnt_drop_write_file(struct file *); |
88 | extern void mnt_drop_write_file_path(struct file *); | ||
89 | 87 | ||
90 | /* | 88 | /* |
91 | * fs_struct.c | 89 | * fs_struct.c |
@@ -96,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); | |||
96 | * file_table.c | 94 | * file_table.c |
97 | */ | 95 | */ |
98 | extern struct file *alloc_empty_file(int, const struct cred *); | 96 | extern struct file *alloc_empty_file(int, const struct cred *); |
97 | extern struct file *alloc_empty_file_noaccount(int, const struct cred *); | ||
99 | 98 | ||
100 | /* | 99 | /* |
101 | * super.c | 100 | * super.c |
@@ -136,13 +135,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); | |||
136 | extern void inode_add_lru(struct inode *inode); | 135 | extern void inode_add_lru(struct inode *inode); |
137 | extern int dentry_needs_remove_privs(struct dentry *dentry); | 136 | extern int dentry_needs_remove_privs(struct dentry *dentry); |
138 | 137 | ||
139 | extern bool __atime_needs_update(const struct path *, struct inode *, bool); | ||
140 | static inline bool atime_needs_update_rcu(const struct path *path, | ||
141 | struct inode *inode) | ||
142 | { | ||
143 | return __atime_needs_update(path, inode, true); | ||
144 | } | ||
145 | |||
146 | /* | 138 | /* |
147 | * fs-writeback.c | 139 | * fs-writeback.c |
148 | */ | 140 | */ |
@@ -185,7 +177,6 @@ extern const struct dentry_operations ns_dentry_operations; | |||
185 | */ | 177 | */ |
186 | extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, | 178 | extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, |
187 | unsigned long arg); | 179 | unsigned long arg); |
188 | extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
189 | 180 | ||
190 | /* | 181 | /* |
191 | * iomap support: | 182 | * iomap support: |
diff --git a/fs/ioctl.c b/fs/ioctl.c index b445b13fc59b..3212c29235ce 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
49 | out: | 49 | out: |
50 | return error; | 50 | return error; |
51 | } | 51 | } |
52 | EXPORT_SYMBOL(vfs_ioctl); | ||
52 | 53 | ||
53 | static int ioctl_fibmap(struct file *filp, int __user *p) | 54 | static int ioctl_fibmap(struct file *filp, int __user *p) |
54 | { | 55 | { |
diff --git a/fs/locks.c b/fs/locks.c index 5086bde5a18e..2ecb4db8c840 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -139,11 +139,6 @@ | |||
139 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) | 139 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) |
140 | #define IS_REMOTELCK(fl) (fl->fl_pid <= 0) | 140 | #define IS_REMOTELCK(fl) (fl->fl_pid <= 0) |
141 | 141 | ||
142 | static inline bool is_remote_lock(struct file *filp) | ||
143 | { | ||
144 | return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK)); | ||
145 | } | ||
146 | |||
147 | static bool lease_breaking(struct file_lock *fl) | 142 | static bool lease_breaking(struct file_lock *fl) |
148 | { | 143 | { |
149 | return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); | 144 | return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); |
@@ -1651,8 +1646,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) | |||
1651 | if (flags & FL_LAYOUT) | 1646 | if (flags & FL_LAYOUT) |
1652 | return 0; | 1647 | return 0; |
1653 | 1648 | ||
1654 | if ((arg == F_RDLCK) && | 1649 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1655 | (atomic_read(&d_real_inode(dentry)->i_writecount) > 0)) | ||
1656 | return -EAGAIN; | 1650 | return -EAGAIN; |
1657 | 1651 | ||
1658 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || | 1652 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || |
@@ -1873,7 +1867,7 @@ EXPORT_SYMBOL(generic_setlease); | |||
1873 | int | 1867 | int |
1874 | vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) | 1868 | vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) |
1875 | { | 1869 | { |
1876 | if (filp->f_op->setlease && is_remote_lock(filp)) | 1870 | if (filp->f_op->setlease) |
1877 | return filp->f_op->setlease(filp, arg, lease, priv); | 1871 | return filp->f_op->setlease(filp, arg, lease, priv); |
1878 | else | 1872 | else |
1879 | return generic_setlease(filp, arg, lease, priv); | 1873 | return generic_setlease(filp, arg, lease, priv); |
@@ -2020,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
2020 | if (error) | 2014 | if (error) |
2021 | goto out_free; | 2015 | goto out_free; |
2022 | 2016 | ||
2023 | if (f.file->f_op->flock && is_remote_lock(f.file)) | 2017 | if (f.file->f_op->flock) |
2024 | error = f.file->f_op->flock(f.file, | 2018 | error = f.file->f_op->flock(f.file, |
2025 | (can_sleep) ? F_SETLKW : F_SETLK, | 2019 | (can_sleep) ? F_SETLKW : F_SETLK, |
2026 | lock); | 2020 | lock); |
@@ -2046,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
2046 | */ | 2040 | */ |
2047 | int vfs_test_lock(struct file *filp, struct file_lock *fl) | 2041 | int vfs_test_lock(struct file *filp, struct file_lock *fl) |
2048 | { | 2042 | { |
2049 | if (filp->f_op->lock && is_remote_lock(filp)) | 2043 | if (filp->f_op->lock) |
2050 | return filp->f_op->lock(filp, F_GETLK, fl); | 2044 | return filp->f_op->lock(filp, F_GETLK, fl); |
2051 | posix_test_lock(filp, fl); | 2045 | posix_test_lock(filp, fl); |
2052 | return 0; | 2046 | return 0; |
@@ -2196,7 +2190,7 @@ out: | |||
2196 | */ | 2190 | */ |
2197 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) | 2191 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) |
2198 | { | 2192 | { |
2199 | if (filp->f_op->lock && is_remote_lock(filp)) | 2193 | if (filp->f_op->lock) |
2200 | return filp->f_op->lock(filp, cmd, fl); | 2194 | return filp->f_op->lock(filp, cmd, fl); |
2201 | else | 2195 | else |
2202 | return posix_lock_file(filp, fl, conf); | 2196 | return posix_lock_file(filp, fl, conf); |
@@ -2518,7 +2512,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx) | |||
2518 | if (list_empty(&flctx->flc_flock)) | 2512 | if (list_empty(&flctx->flc_flock)) |
2519 | return; | 2513 | return; |
2520 | 2514 | ||
2521 | if (filp->f_op->flock && is_remote_lock(filp)) | 2515 | if (filp->f_op->flock) |
2522 | filp->f_op->flock(filp, F_SETLKW, &fl); | 2516 | filp->f_op->flock(filp, F_SETLKW, &fl); |
2523 | else | 2517 | else |
2524 | flock_lock_inode(inode, &fl); | 2518 | flock_lock_inode(inode, &fl); |
@@ -2605,7 +2599,7 @@ EXPORT_SYMBOL(posix_unblock_lock); | |||
2605 | */ | 2599 | */ |
2606 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 2600 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
2607 | { | 2601 | { |
2608 | if (filp->f_op->lock && is_remote_lock(filp)) | 2602 | if (filp->f_op->lock) |
2609 | return filp->f_op->lock(filp, F_CANCELLK, fl); | 2603 | return filp->f_op->lock(filp, F_CANCELLK, fl); |
2610 | return 0; | 2604 | return 0; |
2611 | } | 2605 | } |
diff --git a/fs/namei.c b/fs/namei.c index 3cd396277cd3..ae6aa9ae757c 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd) | |||
1015 | if (!(nd->flags & LOOKUP_RCU)) { | 1015 | if (!(nd->flags & LOOKUP_RCU)) { |
1016 | touch_atime(&last->link); | 1016 | touch_atime(&last->link); |
1017 | cond_resched(); | 1017 | cond_resched(); |
1018 | } else if (atime_needs_update_rcu(&last->link, inode)) { | 1018 | } else if (atime_needs_update(&last->link, inode)) { |
1019 | if (unlikely(unlazy_walk(nd))) | 1019 | if (unlikely(unlazy_walk(nd))) |
1020 | return ERR_PTR(-ECHILD); | 1020 | return ERR_PTR(-ECHILD); |
1021 | touch_atime(&last->link); | 1021 | touch_atime(&last->link); |
diff --git a/fs/namespace.c b/fs/namespace.c index bd2f4c68506a..725d6935fab9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -431,74 +431,20 @@ int __mnt_want_write_file(struct file *file) | |||
431 | } | 431 | } |
432 | 432 | ||
433 | /** | 433 | /** |
434 | * mnt_want_write_file_path - get write access to a file's mount | ||
435 | * @file: the file who's mount on which to take a write | ||
436 | * | ||
437 | * This is like mnt_want_write, but it takes a file and can | ||
438 | * do some optimisations if the file is open for write already | ||
439 | * | ||
440 | * Called by the vfs for cases when we have an open file at hand, but will do an | ||
441 | * inode operation on it (important distinction for files opened on overlayfs, | ||
442 | * since the file operations will come from the real underlying file, while | ||
443 | * inode operations come from the overlay). | ||
444 | */ | ||
445 | int mnt_want_write_file_path(struct file *file) | ||
446 | { | ||
447 | int ret; | ||
448 | |||
449 | sb_start_write(file->f_path.mnt->mnt_sb); | ||
450 | ret = __mnt_want_write_file(file); | ||
451 | if (ret) | ||
452 | sb_end_write(file->f_path.mnt->mnt_sb); | ||
453 | return ret; | ||
454 | } | ||
455 | |||
456 | static inline int may_write_real(struct file *file) | ||
457 | { | ||
458 | struct dentry *dentry = file->f_path.dentry; | ||
459 | struct dentry *upperdentry; | ||
460 | |||
461 | /* Writable file? */ | ||
462 | if (file->f_mode & FMODE_WRITER) | ||
463 | return 0; | ||
464 | |||
465 | /* Not overlayfs? */ | ||
466 | if (likely(!(dentry->d_flags & DCACHE_OP_REAL))) | ||
467 | return 0; | ||
468 | |||
469 | /* File refers to upper, writable layer? */ | ||
470 | upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); | ||
471 | if (upperdentry && | ||
472 | (file_inode(file) == d_inode(upperdentry) || | ||
473 | file_inode(file) == d_inode(dentry))) | ||
474 | return 0; | ||
475 | |||
476 | /* Lower layer: can't write to real file, sorry... */ | ||
477 | return -EPERM; | ||
478 | } | ||
479 | |||
480 | /** | ||
481 | * mnt_want_write_file - get write access to a file's mount | 434 | * mnt_want_write_file - get write access to a file's mount |
482 | * @file: the file who's mount on which to take a write | 435 | * @file: the file who's mount on which to take a write |
483 | * | 436 | * |
484 | * This is like mnt_want_write, but it takes a file and can | 437 | * This is like mnt_want_write, but it takes a file and can |
485 | * do some optimisations if the file is open for write already | 438 | * do some optimisations if the file is open for write already |
486 | * | ||
487 | * Mostly called by filesystems from their ioctl operation before performing | ||
488 | * modification. On overlayfs this needs to check if the file is on a read-only | ||
489 | * lower layer and deny access in that case. | ||
490 | */ | 439 | */ |
491 | int mnt_want_write_file(struct file *file) | 440 | int mnt_want_write_file(struct file *file) |
492 | { | 441 | { |
493 | int ret; | 442 | int ret; |
494 | 443 | ||
495 | ret = may_write_real(file); | 444 | sb_start_write(file_inode(file)->i_sb); |
496 | if (!ret) { | 445 | ret = __mnt_want_write_file(file); |
497 | sb_start_write(file_inode(file)->i_sb); | 446 | if (ret) |
498 | ret = __mnt_want_write_file(file); | 447 | sb_end_write(file_inode(file)->i_sb); |
499 | if (ret) | ||
500 | sb_end_write(file_inode(file)->i_sb); | ||
501 | } | ||
502 | return ret; | 448 | return ret; |
503 | } | 449 | } |
504 | EXPORT_SYMBOL_GPL(mnt_want_write_file); | 450 | EXPORT_SYMBOL_GPL(mnt_want_write_file); |
@@ -538,14 +484,9 @@ void __mnt_drop_write_file(struct file *file) | |||
538 | __mnt_drop_write(file->f_path.mnt); | 484 | __mnt_drop_write(file->f_path.mnt); |
539 | } | 485 | } |
540 | 486 | ||
541 | void mnt_drop_write_file_path(struct file *file) | ||
542 | { | ||
543 | mnt_drop_write(file->f_path.mnt); | ||
544 | } | ||
545 | |||
546 | void mnt_drop_write_file(struct file *file) | 487 | void mnt_drop_write_file(struct file *file) |
547 | { | 488 | { |
548 | __mnt_drop_write(file->f_path.mnt); | 489 | __mnt_drop_write_file(file); |
549 | sb_end_write(file_inode(file)->i_sb); | 490 | sb_end_write(file_inode(file)->i_sb); |
550 | } | 491 | } |
551 | EXPORT_SYMBOL(mnt_drop_write_file); | 492 | EXPORT_SYMBOL(mnt_drop_write_file); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 255f758af03a..9fa35cb6f6e0 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in, | |||
2537 | len, false); | 2537 | len, false); |
2538 | } | 2538 | } |
2539 | 2539 | ||
2540 | static ssize_t ocfs2_file_dedupe_range(struct file *src_file, | 2540 | static int ocfs2_file_dedupe_range(struct file *file_in, |
2541 | u64 loff, | 2541 | loff_t pos_in, |
2542 | u64 len, | 2542 | struct file *file_out, |
2543 | struct file *dst_file, | 2543 | loff_t pos_out, |
2544 | u64 dst_loff) | 2544 | u64 len) |
2545 | { | 2545 | { |
2546 | int error; | 2546 | return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, |
2547 | |||
2548 | error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff, | ||
2549 | len, true); | 2547 | len, true); |
2550 | if (error) | ||
2551 | return error; | ||
2552 | return len; | ||
2553 | } | 2548 | } |
2554 | 2549 | ||
2555 | const struct inode_operations ocfs2_file_iops = { | 2550 | const struct inode_operations ocfs2_file_iops = { |
@@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | |||
68 | long vfs_truncate(const struct path *path, loff_t length) | 68 | long vfs_truncate(const struct path *path, loff_t length) |
69 | { | 69 | { |
70 | struct inode *inode; | 70 | struct inode *inode; |
71 | struct dentry *upperdentry; | ||
72 | long error; | 71 | long error; |
73 | 72 | ||
74 | inode = path->dentry->d_inode; | 73 | inode = path->dentry->d_inode; |
@@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length) | |||
91 | if (IS_APPEND(inode)) | 90 | if (IS_APPEND(inode)) |
92 | goto mnt_drop_write_and_out; | 91 | goto mnt_drop_write_and_out; |
93 | 92 | ||
94 | /* | 93 | error = get_write_access(inode); |
95 | * If this is an overlayfs then do as if opening the file so we get | ||
96 | * write access on the upper inode, not on the overlay inode. For | ||
97 | * non-overlay filesystems d_real() is an identity function. | ||
98 | */ | ||
99 | upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0); | ||
100 | error = PTR_ERR(upperdentry); | ||
101 | if (IS_ERR(upperdentry)) | ||
102 | goto mnt_drop_write_and_out; | ||
103 | |||
104 | error = get_write_access(upperdentry->d_inode); | ||
105 | if (error) | 94 | if (error) |
106 | goto mnt_drop_write_and_out; | 95 | goto mnt_drop_write_and_out; |
107 | 96 | ||
@@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length) | |||
120 | error = do_truncate(path->dentry, length, 0, NULL); | 109 | error = do_truncate(path->dentry, length, 0, NULL); |
121 | 110 | ||
122 | put_write_and_out: | 111 | put_write_and_out: |
123 | put_write_access(upperdentry->d_inode); | 112 | put_write_access(inode); |
124 | mnt_drop_write_and_out: | 113 | mnt_drop_write_and_out: |
125 | mnt_drop_write(path->mnt); | 114 | mnt_drop_write(path->mnt); |
126 | out: | 115 | out: |
@@ -707,12 +696,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group) | |||
707 | if (!f.file) | 696 | if (!f.file) |
708 | goto out; | 697 | goto out; |
709 | 698 | ||
710 | error = mnt_want_write_file_path(f.file); | 699 | error = mnt_want_write_file(f.file); |
711 | if (error) | 700 | if (error) |
712 | goto out_fput; | 701 | goto out_fput; |
713 | audit_file(f.file); | 702 | audit_file(f.file); |
714 | error = chown_common(&f.file->f_path, user, group); | 703 | error = chown_common(&f.file->f_path, user, group); |
715 | mnt_drop_write_file_path(f.file); | 704 | mnt_drop_write_file(f.file); |
716 | out_fput: | 705 | out_fput: |
717 | fdput(f); | 706 | fdput(f); |
718 | out: | 707 | out: |
@@ -887,13 +876,8 @@ EXPORT_SYMBOL(file_path); | |||
887 | */ | 876 | */ |
888 | int vfs_open(const struct path *path, struct file *file) | 877 | int vfs_open(const struct path *path, struct file *file) |
889 | { | 878 | { |
890 | struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0); | ||
891 | |||
892 | if (IS_ERR(dentry)) | ||
893 | return PTR_ERR(dentry); | ||
894 | |||
895 | file->f_path = *path; | 879 | file->f_path = *path; |
896 | return do_dentry_open(file, d_backing_inode(dentry), NULL); | 880 | return do_dentry_open(file, d_backing_inode(path->dentry), NULL); |
897 | } | 881 | } |
898 | 882 | ||
899 | struct file *dentry_open(const struct path *path, int flags, | 883 | struct file *dentry_open(const struct path *path, int flags, |
@@ -919,6 +903,24 @@ struct file *dentry_open(const struct path *path, int flags, | |||
919 | } | 903 | } |
920 | EXPORT_SYMBOL(dentry_open); | 904 | EXPORT_SYMBOL(dentry_open); |
921 | 905 | ||
906 | struct file *open_with_fake_path(const struct path *path, int flags, | ||
907 | struct inode *inode, const struct cred *cred) | ||
908 | { | ||
909 | struct file *f = alloc_empty_file_noaccount(flags, cred); | ||
910 | if (!IS_ERR(f)) { | ||
911 | int error; | ||
912 | |||
913 | f->f_path = *path; | ||
914 | error = do_dentry_open(f, inode, NULL); | ||
915 | if (error) { | ||
916 | fput(f); | ||
917 | f = ERR_PTR(error); | ||
918 | } | ||
919 | } | ||
920 | return f; | ||
921 | } | ||
922 | EXPORT_SYMBOL(open_with_fake_path); | ||
923 | |||
922 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 924 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
923 | { | 925 | { |
924 | int lookup_flags = 0; | 926 | int lookup_flags = 0; |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 9384164253ac..2ef91be2a04e 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig | |||
@@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT | |||
64 | bool "Overlayfs: turn on NFS export feature by default" | 64 | bool "Overlayfs: turn on NFS export feature by default" |
65 | depends on OVERLAY_FS | 65 | depends on OVERLAY_FS |
66 | depends on OVERLAY_FS_INDEX | 66 | depends on OVERLAY_FS_INDEX |
67 | depends on !OVERLAY_FS_METACOPY | ||
67 | help | 68 | help |
68 | If this config option is enabled then overlay filesystems will use | 69 | If this config option is enabled then overlay filesystems will use |
69 | the index directory to decode overlay NFS file handles by default. | 70 | the index directory to decode overlay NFS file handles by default. |
@@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO | |||
103 | For more information, see Documentation/filesystems/overlayfs.txt | 104 | For more information, see Documentation/filesystems/overlayfs.txt |
104 | 105 | ||
105 | If unsure, say N. | 106 | If unsure, say N. |
107 | |||
108 | config OVERLAY_FS_METACOPY | ||
109 | bool "Overlayfs: turn on metadata only copy up feature by default" | ||
110 | depends on OVERLAY_FS | ||
111 | select OVERLAY_FS_REDIRECT_DIR | ||
112 | help | ||
113 | If this config option is enabled then overlay filesystems will | ||
114 | copy up only metadata where appropriate and data copy up will | ||
115 | happen when a file is opened for WRITE operation. It is still | ||
116 | possible to turn off this feature globally with the "metacopy=off" | ||
117 | module option or on a filesystem instance basis with the | ||
118 | "metacopy=off" mount option. | ||
119 | |||
120 | Note, that this feature is not backward compatible. That is, | ||
121 | mounting an overlay which has metacopy only inodes on a kernel | ||
122 | that doesn't support this feature will have unexpected results. | ||
123 | |||
124 | If unsure, say N. | ||
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 30802347a020..46e1ff8ac056 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile | |||
@@ -4,5 +4,5 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_OVERLAY_FS) += overlay.o | 5 | obj-$(CONFIG_OVERLAY_FS) += overlay.o |
6 | 6 | ||
7 | overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \ | 7 | overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \ |
8 | export.o | 8 | copy_up.o export.o |
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ddaddb4ce4c3..296037afecdb 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c | |||
@@ -25,35 +25,20 @@ | |||
25 | 25 | ||
26 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | 26 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) |
27 | 27 | ||
28 | static bool __read_mostly ovl_check_copy_up; | 28 | static int ovl_ccup_set(const char *buf, const struct kernel_param *param) |
29 | module_param_named(check_copy_up, ovl_check_copy_up, bool, | ||
30 | S_IWUSR | S_IRUGO); | ||
31 | MODULE_PARM_DESC(ovl_check_copy_up, | ||
32 | "Warn on copy-up when causing process also has a R/O fd open"); | ||
33 | |||
34 | static int ovl_check_fd(const void *data, struct file *f, unsigned int fd) | ||
35 | { | 29 | { |
36 | const struct dentry *dentry = data; | 30 | pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n"); |
37 | |||
38 | if (file_inode(f) == d_inode(dentry)) | ||
39 | pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n", | ||
40 | f, fd, current->pid, current->comm); | ||
41 | return 0; | 31 | return 0; |
42 | } | 32 | } |
43 | 33 | ||
44 | /* | 34 | static int ovl_ccup_get(char *buf, const struct kernel_param *param) |
45 | * Check the fds open by this process and warn if something like the following | ||
46 | * scenario is about to occur: | ||
47 | * | ||
48 | * fd1 = open("foo", O_RDONLY); | ||
49 | * fd2 = open("foo", O_RDWR); | ||
50 | */ | ||
51 | static void ovl_do_check_copy_up(struct dentry *dentry) | ||
52 | { | 35 | { |
53 | if (ovl_check_copy_up) | 36 | return sprintf(buf, "N\n"); |
54 | iterate_fd(current->files, 0, ovl_check_fd, dentry); | ||
55 | } | 37 | } |
56 | 38 | ||
39 | module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); | ||
40 | MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing"); | ||
41 | |||
57 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) | 42 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) |
58 | { | 43 | { |
59 | ssize_t list_size, size, value_size = 0; | 44 | ssize_t list_size, size, value_size = 0; |
@@ -195,6 +180,16 @@ out_fput: | |||
195 | return error; | 180 | return error; |
196 | } | 181 | } |
197 | 182 | ||
183 | static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat) | ||
184 | { | ||
185 | struct iattr attr = { | ||
186 | .ia_valid = ATTR_SIZE, | ||
187 | .ia_size = stat->size, | ||
188 | }; | ||
189 | |||
190 | return notify_change(upperdentry, &attr, NULL); | ||
191 | } | ||
192 | |||
198 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) | 193 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) |
199 | { | 194 | { |
200 | struct iattr attr = { | 195 | struct iattr attr = { |
@@ -403,6 +398,7 @@ struct ovl_copy_up_ctx { | |||
403 | bool tmpfile; | 398 | bool tmpfile; |
404 | bool origin; | 399 | bool origin; |
405 | bool indexed; | 400 | bool indexed; |
401 | bool metacopy; | ||
406 | }; | 402 | }; |
407 | 403 | ||
408 | static int ovl_link_up(struct ovl_copy_up_ctx *c) | 404 | static int ovl_link_up(struct ovl_copy_up_ctx *c) |
@@ -505,28 +501,10 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) | |||
505 | { | 501 | { |
506 | int err; | 502 | int err; |
507 | 503 | ||
508 | if (S_ISREG(c->stat.mode)) { | ||
509 | struct path upperpath; | ||
510 | |||
511 | ovl_path_upper(c->dentry, &upperpath); | ||
512 | BUG_ON(upperpath.dentry != NULL); | ||
513 | upperpath.dentry = temp; | ||
514 | |||
515 | err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size); | ||
516 | if (err) | ||
517 | return err; | ||
518 | } | ||
519 | |||
520 | err = ovl_copy_xattr(c->lowerpath.dentry, temp); | 504 | err = ovl_copy_xattr(c->lowerpath.dentry, temp); |
521 | if (err) | 505 | if (err) |
522 | return err; | 506 | return err; |
523 | 507 | ||
524 | inode_lock(temp->d_inode); | ||
525 | err = ovl_set_attr(temp, &c->stat); | ||
526 | inode_unlock(temp->d_inode); | ||
527 | if (err) | ||
528 | return err; | ||
529 | |||
530 | /* | 508 | /* |
531 | * Store identifier of lower inode in upper inode xattr to | 509 | * Store identifier of lower inode in upper inode xattr to |
532 | * allow lookup of the copy up origin inode. | 510 | * allow lookup of the copy up origin inode. |
@@ -540,7 +518,34 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) | |||
540 | return err; | 518 | return err; |
541 | } | 519 | } |
542 | 520 | ||
543 | return 0; | 521 | if (S_ISREG(c->stat.mode) && !c->metacopy) { |
522 | struct path upperpath, datapath; | ||
523 | |||
524 | ovl_path_upper(c->dentry, &upperpath); | ||
525 | BUG_ON(upperpath.dentry != NULL); | ||
526 | upperpath.dentry = temp; | ||
527 | |||
528 | ovl_path_lowerdata(c->dentry, &datapath); | ||
529 | err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); | ||
530 | if (err) | ||
531 | return err; | ||
532 | } | ||
533 | |||
534 | if (c->metacopy) { | ||
535 | err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY, | ||
536 | NULL, 0, -EOPNOTSUPP); | ||
537 | if (err) | ||
538 | return err; | ||
539 | } | ||
540 | |||
541 | inode_lock(temp->d_inode); | ||
542 | if (c->metacopy) | ||
543 | err = ovl_set_size(temp, &c->stat); | ||
544 | if (!err) | ||
545 | err = ovl_set_attr(temp, &c->stat); | ||
546 | inode_unlock(temp->d_inode); | ||
547 | |||
548 | return err; | ||
544 | } | 549 | } |
545 | 550 | ||
546 | static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) | 551 | static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) |
@@ -575,6 +580,8 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) | |||
575 | if (err) | 580 | if (err) |
576 | goto out; | 581 | goto out; |
577 | 582 | ||
583 | if (!c->metacopy) | ||
584 | ovl_set_upperdata(d_inode(c->dentry)); | ||
578 | inode = d_inode(c->dentry); | 585 | inode = d_inode(c->dentry); |
579 | ovl_inode_update(inode, newdentry); | 586 | ovl_inode_update(inode, newdentry); |
580 | if (S_ISDIR(inode->i_mode)) | 587 | if (S_ISDIR(inode->i_mode)) |
@@ -677,6 +684,49 @@ out: | |||
677 | return err; | 684 | return err; |
678 | } | 685 | } |
679 | 686 | ||
687 | static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode, | ||
688 | int flags) | ||
689 | { | ||
690 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
691 | |||
692 | if (!ofs->config.metacopy) | ||
693 | return false; | ||
694 | |||
695 | if (!S_ISREG(mode)) | ||
696 | return false; | ||
697 | |||
698 | if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC))) | ||
699 | return false; | ||
700 | |||
701 | return true; | ||
702 | } | ||
703 | |||
704 | /* Copy up data of an inode which was copied up metadata only in the past. */ | ||
705 | static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) | ||
706 | { | ||
707 | struct path upperpath, datapath; | ||
708 | int err; | ||
709 | |||
710 | ovl_path_upper(c->dentry, &upperpath); | ||
711 | if (WARN_ON(upperpath.dentry == NULL)) | ||
712 | return -EIO; | ||
713 | |||
714 | ovl_path_lowerdata(c->dentry, &datapath); | ||
715 | if (WARN_ON(datapath.dentry == NULL)) | ||
716 | return -EIO; | ||
717 | |||
718 | err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); | ||
719 | if (err) | ||
720 | return err; | ||
721 | |||
722 | err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY); | ||
723 | if (err) | ||
724 | return err; | ||
725 | |||
726 | ovl_set_upperdata(d_inode(c->dentry)); | ||
727 | return err; | ||
728 | } | ||
729 | |||
680 | static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | 730 | static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, |
681 | int flags) | 731 | int flags) |
682 | { | 732 | { |
@@ -698,6 +748,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
698 | if (err) | 748 | if (err) |
699 | return err; | 749 | return err; |
700 | 750 | ||
751 | ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); | ||
752 | |||
701 | if (parent) { | 753 | if (parent) { |
702 | ovl_path_upper(parent, &parentpath); | 754 | ovl_path_upper(parent, &parentpath); |
703 | ctx.destdir = parentpath.dentry; | 755 | ctx.destdir = parentpath.dentry; |
@@ -719,9 +771,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
719 | if (IS_ERR(ctx.link)) | 771 | if (IS_ERR(ctx.link)) |
720 | return PTR_ERR(ctx.link); | 772 | return PTR_ERR(ctx.link); |
721 | } | 773 | } |
722 | ovl_do_check_copy_up(ctx.lowerpath.dentry); | ||
723 | 774 | ||
724 | err = ovl_copy_up_start(dentry); | 775 | err = ovl_copy_up_start(dentry, flags); |
725 | /* err < 0: interrupted, err > 0: raced with another copy-up */ | 776 | /* err < 0: interrupted, err > 0: raced with another copy-up */ |
726 | if (unlikely(err)) { | 777 | if (unlikely(err)) { |
727 | if (err > 0) | 778 | if (err > 0) |
@@ -731,6 +782,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
731 | err = ovl_do_copy_up(&ctx); | 782 | err = ovl_do_copy_up(&ctx); |
732 | if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) | 783 | if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) |
733 | err = ovl_link_up(&ctx); | 784 | err = ovl_link_up(&ctx); |
785 | if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags)) | ||
786 | err = ovl_copy_up_meta_inode_data(&ctx); | ||
734 | ovl_copy_up_end(dentry); | 787 | ovl_copy_up_end(dentry); |
735 | } | 788 | } |
736 | do_delayed_call(&done); | 789 | do_delayed_call(&done); |
@@ -756,21 +809,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) | |||
756 | struct dentry *next; | 809 | struct dentry *next; |
757 | struct dentry *parent = NULL; | 810 | struct dentry *parent = NULL; |
758 | 811 | ||
759 | /* | 812 | if (ovl_already_copied_up(dentry, flags)) |
760 | * Check if copy-up has happened as well as for upper alias (in | ||
761 | * case of hard links) is there. | ||
762 | * | ||
763 | * Both checks are lockless: | ||
764 | * - false negatives: will recheck under oi->lock | ||
765 | * - false positives: | ||
766 | * + ovl_dentry_upper() uses memory barriers to ensure the | ||
767 | * upper dentry is up-to-date | ||
768 | * + ovl_dentry_has_upper_alias() relies on locking of | ||
769 | * upper parent i_rwsem to prevent reordering copy-up | ||
770 | * with rename. | ||
771 | */ | ||
772 | if (ovl_dentry_upper(dentry) && | ||
773 | (ovl_dentry_has_upper_alias(dentry) || disconnected)) | ||
774 | break; | 813 | break; |
775 | 814 | ||
776 | next = dget(dentry); | 815 | next = dget(dentry); |
@@ -795,6 +834,41 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) | |||
795 | return err; | 834 | return err; |
796 | } | 835 | } |
797 | 836 | ||
837 | static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) | ||
838 | { | ||
839 | /* Copy up of disconnected dentry does not set upper alias */ | ||
840 | if (ovl_already_copied_up(dentry, flags)) | ||
841 | return false; | ||
842 | |||
843 | if (special_file(d_inode(dentry)->i_mode)) | ||
844 | return false; | ||
845 | |||
846 | if (!ovl_open_flags_need_copy_up(flags)) | ||
847 | return false; | ||
848 | |||
849 | return true; | ||
850 | } | ||
851 | |||
852 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) | ||
853 | { | ||
854 | int err = 0; | ||
855 | |||
856 | if (ovl_open_need_copy_up(dentry, file_flags)) { | ||
857 | err = ovl_want_write(dentry); | ||
858 | if (!err) { | ||
859 | err = ovl_copy_up_flags(dentry, file_flags); | ||
860 | ovl_drop_write(dentry); | ||
861 | } | ||
862 | } | ||
863 | |||
864 | return err; | ||
865 | } | ||
866 | |||
867 | int ovl_copy_up_with_data(struct dentry *dentry) | ||
868 | { | ||
869 | return ovl_copy_up_flags(dentry, O_WRONLY); | ||
870 | } | ||
871 | |||
798 | int ovl_copy_up(struct dentry *dentry) | 872 | int ovl_copy_up(struct dentry *dentry) |
799 | { | 873 | { |
800 | return ovl_copy_up_flags(dentry, 0); | 874 | return ovl_copy_up_flags(dentry, 0); |
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f480b1a2cd2e..ec350d4d921c 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c | |||
@@ -24,6 +24,8 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); | |||
24 | MODULE_PARM_DESC(ovl_redirect_max, | 24 | MODULE_PARM_DESC(ovl_redirect_max, |
25 | "Maximum length of absolute redirect xattr value"); | 25 | "Maximum length of absolute redirect xattr value"); |
26 | 26 | ||
27 | static int ovl_set_redirect(struct dentry *dentry, bool samedir); | ||
28 | |||
27 | int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | 29 | int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) |
28 | { | 30 | { |
29 | int err; | 31 | int err; |
@@ -242,7 +244,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, | |||
242 | .newinode = inode, | 244 | .newinode = inode, |
243 | }; | 245 | }; |
244 | 246 | ||
245 | ovl_dentry_version_inc(dentry->d_parent, false); | 247 | ovl_dir_modified(dentry->d_parent, false); |
246 | ovl_dentry_set_upper_alias(dentry); | 248 | ovl_dentry_set_upper_alias(dentry); |
247 | if (!hardlink) { | 249 | if (!hardlink) { |
248 | /* | 250 | /* |
@@ -657,6 +659,12 @@ static int ovl_link(struct dentry *old, struct inode *newdir, | |||
657 | if (err) | 659 | if (err) |
658 | goto out_drop_write; | 660 | goto out_drop_write; |
659 | 661 | ||
662 | if (ovl_is_metacopy_dentry(old)) { | ||
663 | err = ovl_set_redirect(old, false); | ||
664 | if (err) | ||
665 | goto out_drop_write; | ||
666 | } | ||
667 | |||
660 | err = ovl_nlink_start(old, &locked); | 668 | err = ovl_nlink_start(old, &locked); |
661 | if (err) | 669 | if (err) |
662 | goto out_drop_write; | 670 | goto out_drop_write; |
@@ -722,7 +730,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, | |||
722 | if (err) | 730 | if (err) |
723 | goto out_d_drop; | 731 | goto out_d_drop; |
724 | 732 | ||
725 | ovl_dentry_version_inc(dentry->d_parent, true); | 733 | ovl_dir_modified(dentry->d_parent, true); |
726 | out_d_drop: | 734 | out_d_drop: |
727 | d_drop(dentry); | 735 | d_drop(dentry); |
728 | out_dput_upper: | 736 | out_dput_upper: |
@@ -767,7 +775,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, | |||
767 | err = vfs_rmdir(dir, upper); | 775 | err = vfs_rmdir(dir, upper); |
768 | else | 776 | else |
769 | err = vfs_unlink(dir, upper, NULL); | 777 | err = vfs_unlink(dir, upper, NULL); |
770 | ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry)); | 778 | ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry)); |
771 | 779 | ||
772 | /* | 780 | /* |
773 | * Keeping this dentry hashed would mean having to release | 781 | * Keeping this dentry hashed would mean having to release |
@@ -797,6 +805,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) | |||
797 | int err; | 805 | int err; |
798 | bool locked = false; | 806 | bool locked = false; |
799 | const struct cred *old_cred; | 807 | const struct cred *old_cred; |
808 | struct dentry *upperdentry; | ||
800 | bool lower_positive = ovl_lower_positive(dentry); | 809 | bool lower_positive = ovl_lower_positive(dentry); |
801 | LIST_HEAD(list); | 810 | LIST_HEAD(list); |
802 | 811 | ||
@@ -832,6 +841,17 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) | |||
832 | drop_nlink(dentry->d_inode); | 841 | drop_nlink(dentry->d_inode); |
833 | } | 842 | } |
834 | ovl_nlink_end(dentry, locked); | 843 | ovl_nlink_end(dentry, locked); |
844 | |||
845 | /* | ||
846 | * Copy ctime | ||
847 | * | ||
848 | * Note: we fail to update ctime if there was no copy-up, only a | ||
849 | * whiteout | ||
850 | */ | ||
851 | upperdentry = ovl_dentry_upper(dentry); | ||
852 | if (upperdentry) | ||
853 | ovl_copyattr(d_inode(upperdentry), d_inode(dentry)); | ||
854 | |||
835 | out_drop_write: | 855 | out_drop_write: |
836 | ovl_drop_write(dentry); | 856 | ovl_drop_write(dentry); |
837 | out: | 857 | out: |
@@ -862,13 +882,13 @@ static bool ovl_can_move(struct dentry *dentry) | |||
862 | !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry); | 882 | !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry); |
863 | } | 883 | } |
864 | 884 | ||
865 | static char *ovl_get_redirect(struct dentry *dentry, bool samedir) | 885 | static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect) |
866 | { | 886 | { |
867 | char *buf, *ret; | 887 | char *buf, *ret; |
868 | struct dentry *d, *tmp; | 888 | struct dentry *d, *tmp; |
869 | int buflen = ovl_redirect_max + 1; | 889 | int buflen = ovl_redirect_max + 1; |
870 | 890 | ||
871 | if (samedir) { | 891 | if (!abs_redirect) { |
872 | ret = kstrndup(dentry->d_name.name, dentry->d_name.len, | 892 | ret = kstrndup(dentry->d_name.name, dentry->d_name.len, |
873 | GFP_KERNEL); | 893 | GFP_KERNEL); |
874 | goto out; | 894 | goto out; |
@@ -922,15 +942,43 @@ out: | |||
922 | return ret ? ret : ERR_PTR(-ENOMEM); | 942 | return ret ? ret : ERR_PTR(-ENOMEM); |
923 | } | 943 | } |
924 | 944 | ||
945 | static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir) | ||
946 | { | ||
947 | struct dentry *lowerdentry; | ||
948 | |||
949 | if (!samedir) | ||
950 | return true; | ||
951 | |||
952 | if (d_is_dir(dentry)) | ||
953 | return false; | ||
954 | |||
955 | /* | ||
956 | * For non-dir hardlinked files, we need absolute redirects | ||
957 | * in general as two upper hardlinks could be in different | ||
958 | * dirs. We could put a relative redirect now and convert | ||
959 | * it to absolute redirect later. But when nlink > 1 and | ||
960 | * indexing is on, that means relative redirect needs to be | ||
961 | * converted to absolute during copy up of another lower | ||
962 | * hardllink as well. | ||
963 | * | ||
964 | * So without optimizing too much, just check if lower is | ||
965 | * a hard link or not. If lower is hard link, put absolute | ||
966 | * redirect. | ||
967 | */ | ||
968 | lowerdentry = ovl_dentry_lower(dentry); | ||
969 | return (d_inode(lowerdentry)->i_nlink > 1); | ||
970 | } | ||
971 | |||
925 | static int ovl_set_redirect(struct dentry *dentry, bool samedir) | 972 | static int ovl_set_redirect(struct dentry *dentry, bool samedir) |
926 | { | 973 | { |
927 | int err; | 974 | int err; |
928 | const char *redirect = ovl_dentry_get_redirect(dentry); | 975 | const char *redirect = ovl_dentry_get_redirect(dentry); |
976 | bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir); | ||
929 | 977 | ||
930 | if (redirect && (samedir || redirect[0] == '/')) | 978 | if (redirect && (!absolute_redirect || redirect[0] == '/')) |
931 | return 0; | 979 | return 0; |
932 | 980 | ||
933 | redirect = ovl_get_redirect(dentry, samedir); | 981 | redirect = ovl_get_redirect(dentry, absolute_redirect); |
934 | if (IS_ERR(redirect)) | 982 | if (IS_ERR(redirect)) |
935 | return PTR_ERR(redirect); | 983 | return PTR_ERR(redirect); |
936 | 984 | ||
@@ -1106,22 +1154,20 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, | |||
1106 | goto out_dput; | 1154 | goto out_dput; |
1107 | 1155 | ||
1108 | err = 0; | 1156 | err = 0; |
1109 | if (is_dir) { | 1157 | if (ovl_type_merge_or_lower(old)) |
1110 | if (ovl_type_merge_or_lower(old)) | 1158 | err = ovl_set_redirect(old, samedir); |
1111 | err = ovl_set_redirect(old, samedir); | 1159 | else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent)) |
1112 | else if (!old_opaque && ovl_type_merge(new->d_parent)) | 1160 | err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); |
1113 | err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); | 1161 | if (err) |
1114 | if (err) | 1162 | goto out_dput; |
1115 | goto out_dput; | 1163 | |
1116 | } | 1164 | if (!overwrite && ovl_type_merge_or_lower(new)) |
1117 | if (!overwrite && new_is_dir) { | 1165 | err = ovl_set_redirect(new, samedir); |
1118 | if (ovl_type_merge_or_lower(new)) | 1166 | else if (!overwrite && new_is_dir && !new_opaque && |
1119 | err = ovl_set_redirect(new, samedir); | 1167 | ovl_type_merge(old->d_parent)) |
1120 | else if (!new_opaque && ovl_type_merge(old->d_parent)) | 1168 | err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); |
1121 | err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); | 1169 | if (err) |
1122 | if (err) | 1170 | goto out_dput; |
1123 | goto out_dput; | ||
1124 | } | ||
1125 | 1171 | ||
1126 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, | 1172 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, |
1127 | new_upperdir->d_inode, newdentry, flags); | 1173 | new_upperdir->d_inode, newdentry, flags); |
@@ -1138,10 +1184,15 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, | |||
1138 | drop_nlink(d_inode(new)); | 1184 | drop_nlink(d_inode(new)); |
1139 | } | 1185 | } |
1140 | 1186 | ||
1141 | ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) || | 1187 | ovl_dir_modified(old->d_parent, ovl_type_origin(old) || |
1142 | (!overwrite && ovl_type_origin(new))); | 1188 | (!overwrite && ovl_type_origin(new))); |
1143 | ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) || | 1189 | ovl_dir_modified(new->d_parent, ovl_type_origin(old) || |
1144 | (d_inode(new) && ovl_type_origin(new))); | 1190 | (d_inode(new) && ovl_type_origin(new))); |
1191 | |||
1192 | /* copy ctime: */ | ||
1193 | ovl_copyattr(d_inode(olddentry), d_inode(old)); | ||
1194 | if (d_inode(new) && ovl_dentry_upper(new)) | ||
1195 | ovl_copyattr(d_inode(newdentry), d_inode(new)); | ||
1145 | 1196 | ||
1146 | out_dput: | 1197 | out_dput: |
1147 | dput(newdentry); | 1198 | dput(newdentry); |
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 9941ece61a14..8fa37cd7818a 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c | |||
@@ -317,6 +317,9 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, | |||
317 | return ERR_CAST(inode); | 317 | return ERR_CAST(inode); |
318 | } | 318 | } |
319 | 319 | ||
320 | if (upper) | ||
321 | ovl_set_flag(OVL_UPPERDATA, inode); | ||
322 | |||
320 | dentry = d_find_any_alias(inode); | 323 | dentry = d_find_any_alias(inode); |
321 | if (!dentry) { | 324 | if (!dentry) { |
322 | dentry = d_alloc_anon(inode->i_sb); | 325 | dentry = d_alloc_anon(inode->i_sb); |
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c new file mode 100644 index 000000000000..32e9282893c9 --- /dev/null +++ b/fs/overlayfs/file.c | |||
@@ -0,0 +1,511 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Red Hat, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License version 2 as published by | ||
6 | * the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/cred.h> | ||
10 | #include <linux/file.h> | ||
11 | #include <linux/mount.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include <linux/uio.h> | ||
14 | #include "overlayfs.h" | ||
15 | |||
16 | static char ovl_whatisit(struct inode *inode, struct inode *realinode) | ||
17 | { | ||
18 | if (realinode != ovl_inode_upper(inode)) | ||
19 | return 'l'; | ||
20 | if (ovl_has_upperdata(inode)) | ||
21 | return 'u'; | ||
22 | else | ||
23 | return 'm'; | ||
24 | } | ||
25 | |||
26 | static struct file *ovl_open_realfile(const struct file *file, | ||
27 | struct inode *realinode) | ||
28 | { | ||
29 | struct inode *inode = file_inode(file); | ||
30 | struct file *realfile; | ||
31 | const struct cred *old_cred; | ||
32 | |||
33 | old_cred = ovl_override_creds(inode->i_sb); | ||
34 | realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME, | ||
35 | realinode, current_cred()); | ||
36 | revert_creds(old_cred); | ||
37 | |||
38 | pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", | ||
39 | file, file, ovl_whatisit(inode, realinode), file->f_flags, | ||
40 | realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); | ||
41 | |||
42 | return realfile; | ||
43 | } | ||
44 | |||
45 | #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) | ||
46 | |||
47 | static int ovl_change_flags(struct file *file, unsigned int flags) | ||
48 | { | ||
49 | struct inode *inode = file_inode(file); | ||
50 | int err; | ||
51 | |||
52 | /* No atime modificaton on underlying */ | ||
53 | flags |= O_NOATIME; | ||
54 | |||
55 | /* If some flag changed that cannot be changed then something's amiss */ | ||
56 | if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK)) | ||
57 | return -EIO; | ||
58 | |||
59 | flags &= OVL_SETFL_MASK; | ||
60 | |||
61 | if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) | ||
62 | return -EPERM; | ||
63 | |||
64 | if (flags & O_DIRECT) { | ||
65 | if (!file->f_mapping->a_ops || | ||
66 | !file->f_mapping->a_ops->direct_IO) | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | if (file->f_op->check_flags) { | ||
71 | err = file->f_op->check_flags(flags); | ||
72 | if (err) | ||
73 | return err; | ||
74 | } | ||
75 | |||
76 | spin_lock(&file->f_lock); | ||
77 | file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; | ||
78 | spin_unlock(&file->f_lock); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static int ovl_real_fdget_meta(const struct file *file, struct fd *real, | ||
84 | bool allow_meta) | ||
85 | { | ||
86 | struct inode *inode = file_inode(file); | ||
87 | struct inode *realinode; | ||
88 | |||
89 | real->flags = 0; | ||
90 | real->file = file->private_data; | ||
91 | |||
92 | if (allow_meta) | ||
93 | realinode = ovl_inode_real(inode); | ||
94 | else | ||
95 | realinode = ovl_inode_realdata(inode); | ||
96 | |||
97 | /* Has it been copied up since we'd opened it? */ | ||
98 | if (unlikely(file_inode(real->file) != realinode)) { | ||
99 | real->flags = FDPUT_FPUT; | ||
100 | real->file = ovl_open_realfile(file, realinode); | ||
101 | |||
102 | return PTR_ERR_OR_ZERO(real->file); | ||
103 | } | ||
104 | |||
105 | /* Did the flags change since open? */ | ||
106 | if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME)) | ||
107 | return ovl_change_flags(real->file, file->f_flags); | ||
108 | |||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static int ovl_real_fdget(const struct file *file, struct fd *real) | ||
113 | { | ||
114 | return ovl_real_fdget_meta(file, real, false); | ||
115 | } | ||
116 | |||
117 | static int ovl_open(struct inode *inode, struct file *file) | ||
118 | { | ||
119 | struct dentry *dentry = file_dentry(file); | ||
120 | struct file *realfile; | ||
121 | int err; | ||
122 | |||
123 | err = ovl_open_maybe_copy_up(dentry, file->f_flags); | ||
124 | if (err) | ||
125 | return err; | ||
126 | |||
127 | /* No longer need these flags, so don't pass them on to underlying fs */ | ||
128 | file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); | ||
129 | |||
130 | realfile = ovl_open_realfile(file, ovl_inode_realdata(inode)); | ||
131 | if (IS_ERR(realfile)) | ||
132 | return PTR_ERR(realfile); | ||
133 | |||
134 | /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ | ||
135 | file->f_mapping = realfile->f_mapping; | ||
136 | |||
137 | file->private_data = realfile; | ||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static int ovl_release(struct inode *inode, struct file *file) | ||
143 | { | ||
144 | fput(file->private_data); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) | ||
150 | { | ||
151 | struct inode *realinode = ovl_inode_real(file_inode(file)); | ||
152 | |||
153 | return generic_file_llseek_size(file, offset, whence, | ||
154 | realinode->i_sb->s_maxbytes, | ||
155 | i_size_read(realinode)); | ||
156 | } | ||
157 | |||
158 | static void ovl_file_accessed(struct file *file) | ||
159 | { | ||
160 | struct inode *inode, *upperinode; | ||
161 | |||
162 | if (file->f_flags & O_NOATIME) | ||
163 | return; | ||
164 | |||
165 | inode = file_inode(file); | ||
166 | upperinode = ovl_inode_upper(inode); | ||
167 | |||
168 | if (!upperinode) | ||
169 | return; | ||
170 | |||
171 | if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) || | ||
172 | !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) { | ||
173 | inode->i_mtime = upperinode->i_mtime; | ||
174 | inode->i_ctime = upperinode->i_ctime; | ||
175 | } | ||
176 | |||
177 | touch_atime(&file->f_path); | ||
178 | } | ||
179 | |||
180 | static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) | ||
181 | { | ||
182 | int ifl = iocb->ki_flags; | ||
183 | rwf_t flags = 0; | ||
184 | |||
185 | if (ifl & IOCB_NOWAIT) | ||
186 | flags |= RWF_NOWAIT; | ||
187 | if (ifl & IOCB_HIPRI) | ||
188 | flags |= RWF_HIPRI; | ||
189 | if (ifl & IOCB_DSYNC) | ||
190 | flags |= RWF_DSYNC; | ||
191 | if (ifl & IOCB_SYNC) | ||
192 | flags |= RWF_SYNC; | ||
193 | |||
194 | return flags; | ||
195 | } | ||
196 | |||
197 | static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) | ||
198 | { | ||
199 | struct file *file = iocb->ki_filp; | ||
200 | struct fd real; | ||
201 | const struct cred *old_cred; | ||
202 | ssize_t ret; | ||
203 | |||
204 | if (!iov_iter_count(iter)) | ||
205 | return 0; | ||
206 | |||
207 | ret = ovl_real_fdget(file, &real); | ||
208 | if (ret) | ||
209 | return ret; | ||
210 | |||
211 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
212 | ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, | ||
213 | ovl_iocb_to_rwf(iocb)); | ||
214 | revert_creds(old_cred); | ||
215 | |||
216 | ovl_file_accessed(file); | ||
217 | |||
218 | fdput(real); | ||
219 | |||
220 | return ret; | ||
221 | } | ||
222 | |||
223 | static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) | ||
224 | { | ||
225 | struct file *file = iocb->ki_filp; | ||
226 | struct inode *inode = file_inode(file); | ||
227 | struct fd real; | ||
228 | const struct cred *old_cred; | ||
229 | ssize_t ret; | ||
230 | |||
231 | if (!iov_iter_count(iter)) | ||
232 | return 0; | ||
233 | |||
234 | inode_lock(inode); | ||
235 | /* Update mode */ | ||
236 | ovl_copyattr(ovl_inode_real(inode), inode); | ||
237 | ret = file_remove_privs(file); | ||
238 | if (ret) | ||
239 | goto out_unlock; | ||
240 | |||
241 | ret = ovl_real_fdget(file, &real); | ||
242 | if (ret) | ||
243 | goto out_unlock; | ||
244 | |||
245 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
246 | ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, | ||
247 | ovl_iocb_to_rwf(iocb)); | ||
248 | revert_creds(old_cred); | ||
249 | |||
250 | /* Update size */ | ||
251 | ovl_copyattr(ovl_inode_real(inode), inode); | ||
252 | |||
253 | fdput(real); | ||
254 | |||
255 | out_unlock: | ||
256 | inode_unlock(inode); | ||
257 | |||
258 | return ret; | ||
259 | } | ||
260 | |||
261 | static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) | ||
262 | { | ||
263 | struct fd real; | ||
264 | const struct cred *old_cred; | ||
265 | int ret; | ||
266 | |||
267 | ret = ovl_real_fdget_meta(file, &real, !datasync); | ||
268 | if (ret) | ||
269 | return ret; | ||
270 | |||
271 | /* Don't sync lower file for fear of receiving EROFS error */ | ||
272 | if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) { | ||
273 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
274 | ret = vfs_fsync_range(real.file, start, end, datasync); | ||
275 | revert_creds(old_cred); | ||
276 | } | ||
277 | |||
278 | fdput(real); | ||
279 | |||
280 | return ret; | ||
281 | } | ||
282 | |||
283 | static int ovl_mmap(struct file *file, struct vm_area_struct *vma) | ||
284 | { | ||
285 | struct file *realfile = file->private_data; | ||
286 | const struct cred *old_cred; | ||
287 | int ret; | ||
288 | |||
289 | if (!realfile->f_op->mmap) | ||
290 | return -ENODEV; | ||
291 | |||
292 | if (WARN_ON(file != vma->vm_file)) | ||
293 | return -EIO; | ||
294 | |||
295 | vma->vm_file = get_file(realfile); | ||
296 | |||
297 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
298 | ret = call_mmap(vma->vm_file, vma); | ||
299 | revert_creds(old_cred); | ||
300 | |||
301 | if (ret) { | ||
302 | /* Drop reference count from new vm_file value */ | ||
303 | fput(realfile); | ||
304 | } else { | ||
305 | /* Drop reference count from previous vm_file value */ | ||
306 | fput(file); | ||
307 | } | ||
308 | |||
309 | ovl_file_accessed(file); | ||
310 | |||
311 | return ret; | ||
312 | } | ||
313 | |||
314 | static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | ||
315 | { | ||
316 | struct inode *inode = file_inode(file); | ||
317 | struct fd real; | ||
318 | const struct cred *old_cred; | ||
319 | int ret; | ||
320 | |||
321 | ret = ovl_real_fdget(file, &real); | ||
322 | if (ret) | ||
323 | return ret; | ||
324 | |||
325 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
326 | ret = vfs_fallocate(real.file, mode, offset, len); | ||
327 | revert_creds(old_cred); | ||
328 | |||
329 | /* Update size */ | ||
330 | ovl_copyattr(ovl_inode_real(inode), inode); | ||
331 | |||
332 | fdput(real); | ||
333 | |||
334 | return ret; | ||
335 | } | ||
336 | |||
337 | static long ovl_real_ioctl(struct file *file, unsigned int cmd, | ||
338 | unsigned long arg) | ||
339 | { | ||
340 | struct fd real; | ||
341 | const struct cred *old_cred; | ||
342 | long ret; | ||
343 | |||
344 | ret = ovl_real_fdget(file, &real); | ||
345 | if (ret) | ||
346 | return ret; | ||
347 | |||
348 | old_cred = ovl_override_creds(file_inode(file)->i_sb); | ||
349 | ret = vfs_ioctl(real.file, cmd, arg); | ||
350 | revert_creds(old_cred); | ||
351 | |||
352 | fdput(real); | ||
353 | |||
354 | return ret; | ||
355 | } | ||
356 | |||
357 | static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
358 | { | ||
359 | long ret; | ||
360 | struct inode *inode = file_inode(file); | ||
361 | |||
362 | switch (cmd) { | ||
363 | case FS_IOC_GETFLAGS: | ||
364 | ret = ovl_real_ioctl(file, cmd, arg); | ||
365 | break; | ||
366 | |||
367 | case FS_IOC_SETFLAGS: | ||
368 | if (!inode_owner_or_capable(inode)) | ||
369 | return -EACCES; | ||
370 | |||
371 | ret = mnt_want_write_file(file); | ||
372 | if (ret) | ||
373 | return ret; | ||
374 | |||
375 | ret = ovl_copy_up_with_data(file_dentry(file)); | ||
376 | if (!ret) { | ||
377 | ret = ovl_real_ioctl(file, cmd, arg); | ||
378 | |||
379 | inode_lock(inode); | ||
380 | ovl_copyflags(ovl_inode_real(inode), inode); | ||
381 | inode_unlock(inode); | ||
382 | } | ||
383 | |||
384 | mnt_drop_write_file(file); | ||
385 | break; | ||
386 | |||
387 | default: | ||
388 | ret = -ENOTTY; | ||
389 | } | ||
390 | |||
391 | return ret; | ||
392 | } | ||
393 | |||
394 | static long ovl_compat_ioctl(struct file *file, unsigned int cmd, | ||
395 | unsigned long arg) | ||
396 | { | ||
397 | switch (cmd) { | ||
398 | case FS_IOC32_GETFLAGS: | ||
399 | cmd = FS_IOC_GETFLAGS; | ||
400 | break; | ||
401 | |||
402 | case FS_IOC32_SETFLAGS: | ||
403 | cmd = FS_IOC_SETFLAGS; | ||
404 | break; | ||
405 | |||
406 | default: | ||
407 | return -ENOIOCTLCMD; | ||
408 | } | ||
409 | |||
410 | return ovl_ioctl(file, cmd, arg); | ||
411 | } | ||
412 | |||
413 | enum ovl_copyop { | ||
414 | OVL_COPY, | ||
415 | OVL_CLONE, | ||
416 | OVL_DEDUPE, | ||
417 | }; | ||
418 | |||
419 | static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, | ||
420 | struct file *file_out, loff_t pos_out, | ||
421 | u64 len, unsigned int flags, enum ovl_copyop op) | ||
422 | { | ||
423 | struct inode *inode_out = file_inode(file_out); | ||
424 | struct fd real_in, real_out; | ||
425 | const struct cred *old_cred; | ||
426 | ssize_t ret; | ||
427 | |||
428 | ret = ovl_real_fdget(file_out, &real_out); | ||
429 | if (ret) | ||
430 | return ret; | ||
431 | |||
432 | ret = ovl_real_fdget(file_in, &real_in); | ||
433 | if (ret) { | ||
434 | fdput(real_out); | ||
435 | return ret; | ||
436 | } | ||
437 | |||
438 | old_cred = ovl_override_creds(file_inode(file_out)->i_sb); | ||
439 | switch (op) { | ||
440 | case OVL_COPY: | ||
441 | ret = vfs_copy_file_range(real_in.file, pos_in, | ||
442 | real_out.file, pos_out, len, flags); | ||
443 | break; | ||
444 | |||
445 | case OVL_CLONE: | ||
446 | ret = vfs_clone_file_range(real_in.file, pos_in, | ||
447 | real_out.file, pos_out, len); | ||
448 | break; | ||
449 | |||
450 | case OVL_DEDUPE: | ||
451 | ret = vfs_dedupe_file_range_one(real_in.file, pos_in, | ||
452 | real_out.file, pos_out, len); | ||
453 | break; | ||
454 | } | ||
455 | revert_creds(old_cred); | ||
456 | |||
457 | /* Update size */ | ||
458 | ovl_copyattr(ovl_inode_real(inode_out), inode_out); | ||
459 | |||
460 | fdput(real_in); | ||
461 | fdput(real_out); | ||
462 | |||
463 | return ret; | ||
464 | } | ||
465 | |||
466 | static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, | ||
467 | struct file *file_out, loff_t pos_out, | ||
468 | size_t len, unsigned int flags) | ||
469 | { | ||
470 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, | ||
471 | OVL_COPY); | ||
472 | } | ||
473 | |||
474 | static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, | ||
475 | struct file *file_out, loff_t pos_out, u64 len) | ||
476 | { | ||
477 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, | ||
478 | OVL_CLONE); | ||
479 | } | ||
480 | |||
481 | static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in, | ||
482 | struct file *file_out, loff_t pos_out, u64 len) | ||
483 | { | ||
484 | /* | ||
485 | * Don't copy up because of a dedupe request, this wouldn't make sense | ||
486 | * most of the time (data would be duplicated instead of deduplicated). | ||
487 | */ | ||
488 | if (!ovl_inode_upper(file_inode(file_in)) || | ||
489 | !ovl_inode_upper(file_inode(file_out))) | ||
490 | return -EPERM; | ||
491 | |||
492 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, | ||
493 | OVL_DEDUPE); | ||
494 | } | ||
495 | |||
496 | const struct file_operations ovl_file_operations = { | ||
497 | .open = ovl_open, | ||
498 | .release = ovl_release, | ||
499 | .llseek = ovl_llseek, | ||
500 | .read_iter = ovl_read_iter, | ||
501 | .write_iter = ovl_write_iter, | ||
502 | .fsync = ovl_fsync, | ||
503 | .mmap = ovl_mmap, | ||
504 | .fallocate = ovl_fallocate, | ||
505 | .unlocked_ioctl = ovl_ioctl, | ||
506 | .compat_ioctl = ovl_compat_ioctl, | ||
507 | |||
508 | .copy_file_range = ovl_copy_file_range, | ||
509 | .clone_file_range = ovl_clone_file_range, | ||
510 | .dedupe_file_range = ovl_dedupe_file_range, | ||
511 | }; | ||
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ed16a898caeb..e0bb217c01e2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c | |||
@@ -19,18 +19,10 @@ | |||
19 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) | 19 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) |
20 | { | 20 | { |
21 | int err; | 21 | int err; |
22 | bool full_copy_up = false; | ||
22 | struct dentry *upperdentry; | 23 | struct dentry *upperdentry; |
23 | const struct cred *old_cred; | 24 | const struct cred *old_cred; |
24 | 25 | ||
25 | /* | ||
26 | * Check for permissions before trying to copy-up. This is redundant | ||
27 | * since it will be rechecked later by ->setattr() on upper dentry. But | ||
28 | * without this, copy-up can be triggered by just about anybody. | ||
29 | * | ||
30 | * We don't initialize inode->size, which just means that | ||
31 | * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not | ||
32 | * check for a swapfile (which this won't be anyway). | ||
33 | */ | ||
34 | err = setattr_prepare(dentry, attr); | 26 | err = setattr_prepare(dentry, attr); |
35 | if (err) | 27 | if (err) |
36 | return err; | 28 | return err; |
@@ -39,10 +31,33 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) | |||
39 | if (err) | 31 | if (err) |
40 | goto out; | 32 | goto out; |
41 | 33 | ||
42 | err = ovl_copy_up(dentry); | 34 | if (attr->ia_valid & ATTR_SIZE) { |
35 | struct inode *realinode = d_inode(ovl_dentry_real(dentry)); | ||
36 | |||
37 | err = -ETXTBSY; | ||
38 | if (atomic_read(&realinode->i_writecount) < 0) | ||
39 | goto out_drop_write; | ||
40 | |||
41 | /* Truncate should trigger data copy up as well */ | ||
42 | full_copy_up = true; | ||
43 | } | ||
44 | |||
45 | if (!full_copy_up) | ||
46 | err = ovl_copy_up(dentry); | ||
47 | else | ||
48 | err = ovl_copy_up_with_data(dentry); | ||
43 | if (!err) { | 49 | if (!err) { |
50 | struct inode *winode = NULL; | ||
51 | |||
44 | upperdentry = ovl_dentry_upper(dentry); | 52 | upperdentry = ovl_dentry_upper(dentry); |
45 | 53 | ||
54 | if (attr->ia_valid & ATTR_SIZE) { | ||
55 | winode = d_inode(upperdentry); | ||
56 | err = get_write_access(winode); | ||
57 | if (err) | ||
58 | goto out_drop_write; | ||
59 | } | ||
60 | |||
46 | if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) | 61 | if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) |
47 | attr->ia_valid &= ~ATTR_MODE; | 62 | attr->ia_valid &= ~ATTR_MODE; |
48 | 63 | ||
@@ -53,7 +68,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) | |||
53 | if (!err) | 68 | if (!err) |
54 | ovl_copyattr(upperdentry->d_inode, dentry->d_inode); | 69 | ovl_copyattr(upperdentry->d_inode, dentry->d_inode); |
55 | inode_unlock(upperdentry->d_inode); | 70 | inode_unlock(upperdentry->d_inode); |
71 | |||
72 | if (winode) | ||
73 | put_write_access(winode); | ||
56 | } | 74 | } |
75 | out_drop_write: | ||
57 | ovl_drop_write(dentry); | 76 | ovl_drop_write(dentry); |
58 | out: | 77 | out: |
59 | return err; | 78 | return err; |
@@ -133,6 +152,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat, | |||
133 | bool samefs = ovl_same_sb(dentry->d_sb); | 152 | bool samefs = ovl_same_sb(dentry->d_sb); |
134 | struct ovl_layer *lower_layer = NULL; | 153 | struct ovl_layer *lower_layer = NULL; |
135 | int err; | 154 | int err; |
155 | bool metacopy_blocks = false; | ||
156 | |||
157 | metacopy_blocks = ovl_is_metacopy_dentry(dentry); | ||
136 | 158 | ||
137 | type = ovl_path_real(dentry, &realpath); | 159 | type = ovl_path_real(dentry, &realpath); |
138 | old_cred = ovl_override_creds(dentry->d_sb); | 160 | old_cred = ovl_override_creds(dentry->d_sb); |
@@ -154,7 +176,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat, | |||
154 | lower_layer = ovl_layer_lower(dentry); | 176 | lower_layer = ovl_layer_lower(dentry); |
155 | } else if (OVL_TYPE_ORIGIN(type)) { | 177 | } else if (OVL_TYPE_ORIGIN(type)) { |
156 | struct kstat lowerstat; | 178 | struct kstat lowerstat; |
157 | u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); | 179 | u32 lowermask = STATX_INO | STATX_BLOCKS | |
180 | (!is_dir ? STATX_NLINK : 0); | ||
158 | 181 | ||
159 | ovl_path_lower(dentry, &realpath); | 182 | ovl_path_lower(dentry, &realpath); |
160 | err = vfs_getattr(&realpath, &lowerstat, | 183 | err = vfs_getattr(&realpath, &lowerstat, |
@@ -183,6 +206,35 @@ int ovl_getattr(const struct path *path, struct kstat *stat, | |||
183 | stat->ino = lowerstat.ino; | 206 | stat->ino = lowerstat.ino; |
184 | lower_layer = ovl_layer_lower(dentry); | 207 | lower_layer = ovl_layer_lower(dentry); |
185 | } | 208 | } |
209 | |||
210 | /* | ||
211 | * If we are querying a metacopy dentry and lower | ||
212 | * dentry is data dentry, then use the blocks we | ||
213 | * queried just now. We don't have to do additional | ||
214 | * vfs_getattr(). If lower itself is metacopy, then | ||
215 | * additional vfs_getattr() is unavoidable. | ||
216 | */ | ||
217 | if (metacopy_blocks && | ||
218 | realpath.dentry == ovl_dentry_lowerdata(dentry)) { | ||
219 | stat->blocks = lowerstat.blocks; | ||
220 | metacopy_blocks = false; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | if (metacopy_blocks) { | ||
225 | /* | ||
226 | * If lower is not same as lowerdata or if there was | ||
227 | * no origin on upper, we can end up here. | ||
228 | */ | ||
229 | struct kstat lowerdatastat; | ||
230 | u32 lowermask = STATX_BLOCKS; | ||
231 | |||
232 | ovl_path_lowerdata(dentry, &realpath); | ||
233 | err = vfs_getattr(&realpath, &lowerdatastat, | ||
234 | lowermask, flags); | ||
235 | if (err) | ||
236 | goto out; | ||
237 | stat->blocks = lowerdatastat.blocks; | ||
186 | } | 238 | } |
187 | } | 239 | } |
188 | 240 | ||
@@ -304,6 +356,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, | |||
304 | } | 356 | } |
305 | revert_creds(old_cred); | 357 | revert_creds(old_cred); |
306 | 358 | ||
359 | /* copy c/mtime */ | ||
360 | ovl_copyattr(d_inode(realdentry), inode); | ||
361 | |||
307 | out_drop_write: | 362 | out_drop_write: |
308 | ovl_drop_write(dentry); | 363 | ovl_drop_write(dentry); |
309 | out: | 364 | out: |
@@ -384,38 +439,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) | |||
384 | return acl; | 439 | return acl; |
385 | } | 440 | } |
386 | 441 | ||
387 | static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) | ||
388 | { | ||
389 | /* Copy up of disconnected dentry does not set upper alias */ | ||
390 | if (ovl_dentry_upper(dentry) && | ||
391 | (ovl_dentry_has_upper_alias(dentry) || | ||
392 | (dentry->d_flags & DCACHE_DISCONNECTED))) | ||
393 | return false; | ||
394 | |||
395 | if (special_file(d_inode(dentry)->i_mode)) | ||
396 | return false; | ||
397 | |||
398 | if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) | ||
399 | return false; | ||
400 | |||
401 | return true; | ||
402 | } | ||
403 | |||
404 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) | ||
405 | { | ||
406 | int err = 0; | ||
407 | |||
408 | if (ovl_open_need_copy_up(dentry, file_flags)) { | ||
409 | err = ovl_want_write(dentry); | ||
410 | if (!err) { | ||
411 | err = ovl_copy_up_flags(dentry, file_flags); | ||
412 | ovl_drop_write(dentry); | ||
413 | } | ||
414 | } | ||
415 | |||
416 | return err; | ||
417 | } | ||
418 | |||
419 | int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) | 442 | int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) |
420 | { | 443 | { |
421 | if (flags & S_ATIME) { | 444 | if (flags & S_ATIME) { |
@@ -433,6 +456,23 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) | |||
433 | return 0; | 456 | return 0; |
434 | } | 457 | } |
435 | 458 | ||
459 | static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
460 | u64 start, u64 len) | ||
461 | { | ||
462 | int err; | ||
463 | struct inode *realinode = ovl_inode_real(inode); | ||
464 | const struct cred *old_cred; | ||
465 | |||
466 | if (!realinode->i_op->fiemap) | ||
467 | return -EOPNOTSUPP; | ||
468 | |||
469 | old_cred = ovl_override_creds(inode->i_sb); | ||
470 | err = realinode->i_op->fiemap(realinode, fieinfo, start, len); | ||
471 | revert_creds(old_cred); | ||
472 | |||
473 | return err; | ||
474 | } | ||
475 | |||
436 | static const struct inode_operations ovl_file_inode_operations = { | 476 | static const struct inode_operations ovl_file_inode_operations = { |
437 | .setattr = ovl_setattr, | 477 | .setattr = ovl_setattr, |
438 | .permission = ovl_permission, | 478 | .permission = ovl_permission, |
@@ -440,6 +480,7 @@ static const struct inode_operations ovl_file_inode_operations = { | |||
440 | .listxattr = ovl_listxattr, | 480 | .listxattr = ovl_listxattr, |
441 | .get_acl = ovl_get_acl, | 481 | .get_acl = ovl_get_acl, |
442 | .update_time = ovl_update_time, | 482 | .update_time = ovl_update_time, |
483 | .fiemap = ovl_fiemap, | ||
443 | }; | 484 | }; |
444 | 485 | ||
445 | static const struct inode_operations ovl_symlink_inode_operations = { | 486 | static const struct inode_operations ovl_symlink_inode_operations = { |
@@ -450,6 +491,15 @@ static const struct inode_operations ovl_symlink_inode_operations = { | |||
450 | .update_time = ovl_update_time, | 491 | .update_time = ovl_update_time, |
451 | }; | 492 | }; |
452 | 493 | ||
494 | static const struct inode_operations ovl_special_inode_operations = { | ||
495 | .setattr = ovl_setattr, | ||
496 | .permission = ovl_permission, | ||
497 | .getattr = ovl_getattr, | ||
498 | .listxattr = ovl_listxattr, | ||
499 | .get_acl = ovl_get_acl, | ||
500 | .update_time = ovl_update_time, | ||
501 | }; | ||
502 | |||
453 | /* | 503 | /* |
454 | * It is possible to stack overlayfs instance on top of another | 504 | * It is possible to stack overlayfs instance on top of another |
455 | * overlayfs instance as lower layer. We need to annonate the | 505 | * overlayfs instance as lower layer. We need to annonate the |
@@ -520,6 +570,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, | |||
520 | switch (mode & S_IFMT) { | 570 | switch (mode & S_IFMT) { |
521 | case S_IFREG: | 571 | case S_IFREG: |
522 | inode->i_op = &ovl_file_inode_operations; | 572 | inode->i_op = &ovl_file_inode_operations; |
573 | inode->i_fop = &ovl_file_operations; | ||
523 | break; | 574 | break; |
524 | 575 | ||
525 | case S_IFDIR: | 576 | case S_IFDIR: |
@@ -532,7 +583,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, | |||
532 | break; | 583 | break; |
533 | 584 | ||
534 | default: | 585 | default: |
535 | inode->i_op = &ovl_file_inode_operations; | 586 | inode->i_op = &ovl_special_inode_operations; |
536 | init_special_inode(inode, mode, rdev); | 587 | init_special_inode(inode, mode, rdev); |
537 | break; | 588 | break; |
538 | } | 589 | } |
@@ -769,8 +820,9 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
769 | bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, | 820 | bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, |
770 | oip->index); | 821 | oip->index); |
771 | int fsid = bylower ? oip->lowerpath->layer->fsid : 0; | 822 | int fsid = bylower ? oip->lowerpath->layer->fsid : 0; |
772 | bool is_dir; | 823 | bool is_dir, metacopy = false; |
773 | unsigned long ino = 0; | 824 | unsigned long ino = 0; |
825 | int err = -ENOMEM; | ||
774 | 826 | ||
775 | if (!realinode) | 827 | if (!realinode) |
776 | realinode = d_inode(lowerdentry); | 828 | realinode = d_inode(lowerdentry); |
@@ -787,7 +839,7 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
787 | 839 | ||
788 | inode = ovl_iget5(sb, oip->newinode, key); | 840 | inode = ovl_iget5(sb, oip->newinode, key); |
789 | if (!inode) | 841 | if (!inode) |
790 | goto out_nomem; | 842 | goto out_err; |
791 | if (!(inode->i_state & I_NEW)) { | 843 | if (!(inode->i_state & I_NEW)) { |
792 | /* | 844 | /* |
793 | * Verify that the underlying files stored in the inode | 845 | * Verify that the underlying files stored in the inode |
@@ -796,11 +848,12 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
796 | if (!ovl_verify_inode(inode, lowerdentry, upperdentry, | 848 | if (!ovl_verify_inode(inode, lowerdentry, upperdentry, |
797 | true)) { | 849 | true)) { |
798 | iput(inode); | 850 | iput(inode); |
799 | inode = ERR_PTR(-ESTALE); | 851 | err = -ESTALE; |
800 | goto out; | 852 | goto out_err; |
801 | } | 853 | } |
802 | 854 | ||
803 | dput(upperdentry); | 855 | dput(upperdentry); |
856 | kfree(oip->redirect); | ||
804 | goto out; | 857 | goto out; |
805 | } | 858 | } |
806 | 859 | ||
@@ -812,11 +865,13 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
812 | } else { | 865 | } else { |
813 | /* Lower hardlink that will be broken on copy up */ | 866 | /* Lower hardlink that will be broken on copy up */ |
814 | inode = new_inode(sb); | 867 | inode = new_inode(sb); |
815 | if (!inode) | 868 | if (!inode) { |
816 | goto out_nomem; | 869 | err = -ENOMEM; |
870 | goto out_err; | ||
871 | } | ||
817 | } | 872 | } |
818 | ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); | 873 | ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); |
819 | ovl_inode_init(inode, upperdentry, lowerdentry); | 874 | ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); |
820 | 875 | ||
821 | if (upperdentry && ovl_is_impuredir(upperdentry)) | 876 | if (upperdentry && ovl_is_impuredir(upperdentry)) |
822 | ovl_set_flag(OVL_IMPURE, inode); | 877 | ovl_set_flag(OVL_IMPURE, inode); |
@@ -824,6 +879,20 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
824 | if (oip->index) | 879 | if (oip->index) |
825 | ovl_set_flag(OVL_INDEX, inode); | 880 | ovl_set_flag(OVL_INDEX, inode); |
826 | 881 | ||
882 | if (upperdentry) { | ||
883 | err = ovl_check_metacopy_xattr(upperdentry); | ||
884 | if (err < 0) | ||
885 | goto out_err; | ||
886 | metacopy = err; | ||
887 | if (!metacopy) | ||
888 | ovl_set_flag(OVL_UPPERDATA, inode); | ||
889 | } | ||
890 | |||
891 | OVL_I(inode)->redirect = oip->redirect; | ||
892 | |||
893 | if (bylower) | ||
894 | ovl_set_flag(OVL_CONST_INO, inode); | ||
895 | |||
827 | /* Check for non-merge dir that may have whiteouts */ | 896 | /* Check for non-merge dir that may have whiteouts */ |
828 | if (is_dir) { | 897 | if (is_dir) { |
829 | if (((upperdentry && lowerdentry) || oip->numlower > 1) || | 898 | if (((upperdentry && lowerdentry) || oip->numlower > 1) || |
@@ -837,7 +906,7 @@ struct inode *ovl_get_inode(struct super_block *sb, | |||
837 | out: | 906 | out: |
838 | return inode; | 907 | return inode; |
839 | 908 | ||
840 | out_nomem: | 909 | out_err: |
841 | inode = ERR_PTR(-ENOMEM); | 910 | inode = ERR_PTR(err); |
842 | goto out; | 911 | goto out; |
843 | } | 912 | } |
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index c993dd8db739..f28711846dd6 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c | |||
@@ -24,38 +24,20 @@ struct ovl_lookup_data { | |||
24 | bool stop; | 24 | bool stop; |
25 | bool last; | 25 | bool last; |
26 | char *redirect; | 26 | char *redirect; |
27 | bool metacopy; | ||
27 | }; | 28 | }; |
28 | 29 | ||
29 | static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, | 30 | static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, |
30 | size_t prelen, const char *post) | 31 | size_t prelen, const char *post) |
31 | { | 32 | { |
32 | int res; | 33 | int res; |
33 | char *s, *next, *buf = NULL; | 34 | char *buf; |
34 | 35 | ||
35 | res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); | 36 | buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post)); |
36 | if (res < 0) { | 37 | if (IS_ERR_OR_NULL(buf)) |
37 | if (res == -ENODATA || res == -EOPNOTSUPP) | 38 | return PTR_ERR(buf); |
38 | return 0; | ||
39 | goto fail; | ||
40 | } | ||
41 | buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL); | ||
42 | if (!buf) | ||
43 | return -ENOMEM; | ||
44 | 39 | ||
45 | if (res == 0) | ||
46 | goto invalid; | ||
47 | |||
48 | res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); | ||
49 | if (res < 0) | ||
50 | goto fail; | ||
51 | if (res == 0) | ||
52 | goto invalid; | ||
53 | if (buf[0] == '/') { | 40 | if (buf[0] == '/') { |
54 | for (s = buf; *s++ == '/'; s = next) { | ||
55 | next = strchrnul(s, '/'); | ||
56 | if (s == next) | ||
57 | goto invalid; | ||
58 | } | ||
59 | /* | 41 | /* |
60 | * One of the ancestor path elements in an absolute path | 42 | * One of the ancestor path elements in an absolute path |
61 | * lookup in ovl_lookup_layer() could have been opaque and | 43 | * lookup in ovl_lookup_layer() could have been opaque and |
@@ -66,9 +48,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, | |||
66 | */ | 48 | */ |
67 | d->stop = false; | 49 | d->stop = false; |
68 | } else { | 50 | } else { |
69 | if (strchr(buf, '/') != NULL) | 51 | res = strlen(buf) + 1; |
70 | goto invalid; | ||
71 | |||
72 | memmove(buf + prelen, buf, res); | 52 | memmove(buf + prelen, buf, res); |
73 | memcpy(buf, d->name.name, prelen); | 53 | memcpy(buf, d->name.name, prelen); |
74 | } | 54 | } |
@@ -80,16 +60,6 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, | |||
80 | d->name.len = strlen(d->redirect); | 60 | d->name.len = strlen(d->redirect); |
81 | 61 | ||
82 | return 0; | 62 | return 0; |
83 | |||
84 | err_free: | ||
85 | kfree(buf); | ||
86 | return 0; | ||
87 | fail: | ||
88 | pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); | ||
89 | goto err_free; | ||
90 | invalid: | ||
91 | pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); | ||
92 | goto err_free; | ||
93 | } | 63 | } |
94 | 64 | ||
95 | static int ovl_acceptable(void *ctx, struct dentry *dentry) | 65 | static int ovl_acceptable(void *ctx, struct dentry *dentry) |
@@ -252,28 +222,39 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, | |||
252 | d->stop = d->opaque = true; | 222 | d->stop = d->opaque = true; |
253 | goto put_and_out; | 223 | goto put_and_out; |
254 | } | 224 | } |
255 | if (!d_can_lookup(this)) { | 225 | /* |
226 | * This dentry should be a regular file if previous layer lookup | ||
227 | * found a metacopy dentry. | ||
228 | */ | ||
229 | if (last_element && d->metacopy && !d_is_reg(this)) { | ||
256 | d->stop = true; | 230 | d->stop = true; |
257 | if (d->is_dir) | 231 | goto put_and_out; |
258 | goto put_and_out; | ||
259 | |||
260 | /* | ||
261 | * NB: handle failure to lookup non-last element when non-dir | ||
262 | * redirects become possible | ||
263 | */ | ||
264 | WARN_ON(!last_element); | ||
265 | goto out; | ||
266 | } | 232 | } |
267 | if (last_element) | 233 | if (!d_can_lookup(this)) { |
268 | d->is_dir = true; | 234 | if (d->is_dir || !last_element) { |
269 | if (d->last) | 235 | d->stop = true; |
270 | goto out; | 236 | goto put_and_out; |
237 | } | ||
238 | err = ovl_check_metacopy_xattr(this); | ||
239 | if (err < 0) | ||
240 | goto out_err; | ||
271 | 241 | ||
272 | if (ovl_is_opaquedir(this)) { | 242 | d->metacopy = err; |
273 | d->stop = true; | 243 | d->stop = !d->metacopy; |
244 | if (!d->metacopy || d->last) | ||
245 | goto out; | ||
246 | } else { | ||
274 | if (last_element) | 247 | if (last_element) |
275 | d->opaque = true; | 248 | d->is_dir = true; |
276 | goto out; | 249 | if (d->last) |
250 | goto out; | ||
251 | |||
252 | if (ovl_is_opaquedir(this)) { | ||
253 | d->stop = true; | ||
254 | if (last_element) | ||
255 | d->opaque = true; | ||
256 | goto out; | ||
257 | } | ||
277 | } | 258 | } |
278 | err = ovl_check_redirect(this, d, prelen, post); | 259 | err = ovl_check_redirect(this, d, prelen, post); |
279 | if (err) | 260 | if (err) |
@@ -823,7 +804,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
823 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | 804 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; |
824 | struct ovl_entry *poe = dentry->d_parent->d_fsdata; | 805 | struct ovl_entry *poe = dentry->d_parent->d_fsdata; |
825 | struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; | 806 | struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; |
826 | struct ovl_path *stack = NULL; | 807 | struct ovl_path *stack = NULL, *origin_path = NULL; |
827 | struct dentry *upperdir, *upperdentry = NULL; | 808 | struct dentry *upperdir, *upperdentry = NULL; |
828 | struct dentry *origin = NULL; | 809 | struct dentry *origin = NULL; |
829 | struct dentry *index = NULL; | 810 | struct dentry *index = NULL; |
@@ -834,6 +815,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
834 | struct dentry *this; | 815 | struct dentry *this; |
835 | unsigned int i; | 816 | unsigned int i; |
836 | int err; | 817 | int err; |
818 | bool metacopy = false; | ||
837 | struct ovl_lookup_data d = { | 819 | struct ovl_lookup_data d = { |
838 | .name = dentry->d_name, | 820 | .name = dentry->d_name, |
839 | .is_dir = false, | 821 | .is_dir = false, |
@@ -841,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
841 | .stop = false, | 823 | .stop = false, |
842 | .last = ofs->config.redirect_follow ? false : !poe->numlower, | 824 | .last = ofs->config.redirect_follow ? false : !poe->numlower, |
843 | .redirect = NULL, | 825 | .redirect = NULL, |
826 | .metacopy = false, | ||
844 | }; | 827 | }; |
845 | 828 | ||
846 | if (dentry->d_name.len > ofs->namelen) | 829 | if (dentry->d_name.len > ofs->namelen) |
@@ -859,7 +842,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
859 | goto out; | 842 | goto out; |
860 | } | 843 | } |
861 | if (upperdentry && !d.is_dir) { | 844 | if (upperdentry && !d.is_dir) { |
862 | BUG_ON(!d.stop || d.redirect); | 845 | unsigned int origin_ctr = 0; |
846 | |||
863 | /* | 847 | /* |
864 | * Lookup copy up origin by decoding origin file handle. | 848 | * Lookup copy up origin by decoding origin file handle. |
865 | * We may get a disconnected dentry, which is fine, | 849 | * We may get a disconnected dentry, which is fine, |
@@ -870,9 +854,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
870 | * number - it's the same as if we held a reference | 854 | * number - it's the same as if we held a reference |
871 | * to a dentry in lower layer that was moved under us. | 855 | * to a dentry in lower layer that was moved under us. |
872 | */ | 856 | */ |
873 | err = ovl_check_origin(ofs, upperdentry, &stack, &ctr); | 857 | err = ovl_check_origin(ofs, upperdentry, &origin_path, |
858 | &origin_ctr); | ||
874 | if (err) | 859 | if (err) |
875 | goto out_put_upper; | 860 | goto out_put_upper; |
861 | |||
862 | if (d.metacopy) | ||
863 | metacopy = true; | ||
876 | } | 864 | } |
877 | 865 | ||
878 | if (d.redirect) { | 866 | if (d.redirect) { |
@@ -913,7 +901,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
913 | * If no origin fh is stored in upper of a merge dir, store fh | 901 | * If no origin fh is stored in upper of a merge dir, store fh |
914 | * of lower dir and set upper parent "impure". | 902 | * of lower dir and set upper parent "impure". |
915 | */ | 903 | */ |
916 | if (upperdentry && !ctr && !ofs->noxattr) { | 904 | if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) { |
917 | err = ovl_fix_origin(dentry, this, upperdentry); | 905 | err = ovl_fix_origin(dentry, this, upperdentry); |
918 | if (err) { | 906 | if (err) { |
919 | dput(this); | 907 | dput(this); |
@@ -925,18 +913,35 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
925 | * When "verify_lower" feature is enabled, do not merge with a | 913 | * When "verify_lower" feature is enabled, do not merge with a |
926 | * lower dir that does not match a stored origin xattr. In any | 914 | * lower dir that does not match a stored origin xattr. In any |
927 | * case, only verified origin is used for index lookup. | 915 | * case, only verified origin is used for index lookup. |
916 | * | ||
917 | * For non-dir dentry, if index=on, then ensure origin | ||
918 | * matches the dentry found using path based lookup, | ||
919 | * otherwise error out. | ||
928 | */ | 920 | */ |
929 | if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { | 921 | if (upperdentry && !ctr && |
922 | ((d.is_dir && ovl_verify_lower(dentry->d_sb)) || | ||
923 | (!d.is_dir && ofs->config.index && origin_path))) { | ||
930 | err = ovl_verify_origin(upperdentry, this, false); | 924 | err = ovl_verify_origin(upperdentry, this, false); |
931 | if (err) { | 925 | if (err) { |
932 | dput(this); | 926 | dput(this); |
933 | break; | 927 | if (d.is_dir) |
928 | break; | ||
929 | goto out_put; | ||
934 | } | 930 | } |
935 | |||
936 | /* Bless lower dir as verified origin */ | ||
937 | origin = this; | 931 | origin = this; |
938 | } | 932 | } |
939 | 933 | ||
934 | if (d.metacopy) | ||
935 | metacopy = true; | ||
936 | /* | ||
937 | * Do not store intermediate metacopy dentries in chain, | ||
938 | * except top most lower metacopy dentry | ||
939 | */ | ||
940 | if (d.metacopy && ctr) { | ||
941 | dput(this); | ||
942 | continue; | ||
943 | } | ||
944 | |||
940 | stack[ctr].dentry = this; | 945 | stack[ctr].dentry = this; |
941 | stack[ctr].layer = lower.layer; | 946 | stack[ctr].layer = lower.layer; |
942 | ctr++; | 947 | ctr++; |
@@ -968,13 +973,48 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
968 | } | 973 | } |
969 | } | 974 | } |
970 | 975 | ||
976 | if (metacopy) { | ||
977 | /* | ||
978 | * Found a metacopy dentry but did not find corresponding | ||
979 | * data dentry | ||
980 | */ | ||
981 | if (d.metacopy) { | ||
982 | err = -EIO; | ||
983 | goto out_put; | ||
984 | } | ||
985 | |||
986 | err = -EPERM; | ||
987 | if (!ofs->config.metacopy) { | ||
988 | pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n", | ||
989 | dentry); | ||
990 | goto out_put; | ||
991 | } | ||
992 | } else if (!d.is_dir && upperdentry && !ctr && origin_path) { | ||
993 | if (WARN_ON(stack != NULL)) { | ||
994 | err = -EIO; | ||
995 | goto out_put; | ||
996 | } | ||
997 | stack = origin_path; | ||
998 | ctr = 1; | ||
999 | origin_path = NULL; | ||
1000 | } | ||
1001 | |||
971 | /* | 1002 | /* |
972 | * Lookup index by lower inode and verify it matches upper inode. | 1003 | * Lookup index by lower inode and verify it matches upper inode. |
973 | * We only trust dir index if we verified that lower dir matches | 1004 | * We only trust dir index if we verified that lower dir matches |
974 | * origin, otherwise dir index entries may be inconsistent and we | 1005 | * origin, otherwise dir index entries may be inconsistent and we |
975 | * ignore them. Always lookup index of non-dir and non-upper. | 1006 | * ignore them. |
1007 | * | ||
1008 | * For non-dir upper metacopy dentry, we already set "origin" if we | ||
1009 | * verified that lower matched upper origin. If upper origin was | ||
1010 | * not present (because lower layer did not support fh encode/decode), | ||
1011 | * or indexing is not enabled, do not set "origin" and skip looking up | ||
1012 | * index. This case should be handled in same way as a non-dir upper | ||
1013 | * without ORIGIN is handled. | ||
1014 | * | ||
1015 | * Always lookup index of non-dir non-metacopy and non-upper. | ||
976 | */ | 1016 | */ |
977 | if (ctr && (!upperdentry || !d.is_dir)) | 1017 | if (ctr && (!upperdentry || (!d.is_dir && !metacopy))) |
978 | origin = stack[0].dentry; | 1018 | origin = stack[0].dentry; |
979 | 1019 | ||
980 | if (origin && ovl_indexdir(dentry->d_sb) && | 1020 | if (origin && ovl_indexdir(dentry->d_sb) && |
@@ -1000,8 +1040,15 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
1000 | 1040 | ||
1001 | if (upperdentry) | 1041 | if (upperdentry) |
1002 | ovl_dentry_set_upper_alias(dentry); | 1042 | ovl_dentry_set_upper_alias(dentry); |
1003 | else if (index) | 1043 | else if (index) { |
1004 | upperdentry = dget(index); | 1044 | upperdentry = dget(index); |
1045 | upperredirect = ovl_get_redirect_xattr(upperdentry, 0); | ||
1046 | if (IS_ERR(upperredirect)) { | ||
1047 | err = PTR_ERR(upperredirect); | ||
1048 | upperredirect = NULL; | ||
1049 | goto out_free_oe; | ||
1050 | } | ||
1051 | } | ||
1005 | 1052 | ||
1006 | if (upperdentry || ctr) { | 1053 | if (upperdentry || ctr) { |
1007 | struct ovl_inode_params oip = { | 1054 | struct ovl_inode_params oip = { |
@@ -1009,22 +1056,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
1009 | .lowerpath = stack, | 1056 | .lowerpath = stack, |
1010 | .index = index, | 1057 | .index = index, |
1011 | .numlower = ctr, | 1058 | .numlower = ctr, |
1059 | .redirect = upperredirect, | ||
1060 | .lowerdata = (ctr > 1 && !d.is_dir) ? | ||
1061 | stack[ctr - 1].dentry : NULL, | ||
1012 | }; | 1062 | }; |
1013 | 1063 | ||
1014 | inode = ovl_get_inode(dentry->d_sb, &oip); | 1064 | inode = ovl_get_inode(dentry->d_sb, &oip); |
1015 | err = PTR_ERR(inode); | 1065 | err = PTR_ERR(inode); |
1016 | if (IS_ERR(inode)) | 1066 | if (IS_ERR(inode)) |
1017 | goto out_free_oe; | 1067 | goto out_free_oe; |
1018 | |||
1019 | /* | ||
1020 | * NB: handle redirected hard links when non-dir redirects | ||
1021 | * become possible | ||
1022 | */ | ||
1023 | WARN_ON(OVL_I(inode)->redirect); | ||
1024 | OVL_I(inode)->redirect = upperredirect; | ||
1025 | } | 1068 | } |
1026 | 1069 | ||
1027 | revert_creds(old_cred); | 1070 | revert_creds(old_cred); |
1071 | if (origin_path) { | ||
1072 | dput(origin_path->dentry); | ||
1073 | kfree(origin_path); | ||
1074 | } | ||
1028 | dput(index); | 1075 | dput(index); |
1029 | kfree(stack); | 1076 | kfree(stack); |
1030 | kfree(d.redirect); | 1077 | kfree(d.redirect); |
@@ -1039,6 +1086,10 @@ out_put: | |||
1039 | dput(stack[i].dentry); | 1086 | dput(stack[i].dentry); |
1040 | kfree(stack); | 1087 | kfree(stack); |
1041 | out_put_upper: | 1088 | out_put_upper: |
1089 | if (origin_path) { | ||
1090 | dput(origin_path->dentry); | ||
1091 | kfree(origin_path); | ||
1092 | } | ||
1042 | dput(upperdentry); | 1093 | dput(upperdentry); |
1043 | kfree(upperredirect); | 1094 | kfree(upperredirect); |
1044 | out: | 1095 | out: |
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 7538b9b56237..f61839e1054c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/uuid.h> | 11 | #include <linux/uuid.h> |
12 | #include <linux/fs.h> | ||
12 | #include "ovl_entry.h" | 13 | #include "ovl_entry.h" |
13 | 14 | ||
14 | enum ovl_path_type { | 15 | enum ovl_path_type { |
@@ -28,6 +29,7 @@ enum ovl_path_type { | |||
28 | #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" | 29 | #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" |
29 | #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" | 30 | #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" |
30 | #define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" | 31 | #define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" |
32 | #define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy" | ||
31 | 33 | ||
32 | enum ovl_inode_flag { | 34 | enum ovl_inode_flag { |
33 | /* Pure upper dir that may contain non pure upper entries */ | 35 | /* Pure upper dir that may contain non pure upper entries */ |
@@ -35,6 +37,9 @@ enum ovl_inode_flag { | |||
35 | /* Non-merge dir that may contain whiteout entries */ | 37 | /* Non-merge dir that may contain whiteout entries */ |
36 | OVL_WHITEOUTS, | 38 | OVL_WHITEOUTS, |
37 | OVL_INDEX, | 39 | OVL_INDEX, |
40 | OVL_UPPERDATA, | ||
41 | /* Inode number will remain constant over copy up. */ | ||
42 | OVL_CONST_INO, | ||
38 | }; | 43 | }; |
39 | 44 | ||
40 | enum ovl_entry_flag { | 45 | enum ovl_entry_flag { |
@@ -190,6 +195,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) | |||
190 | return ret; | 195 | return ret; |
191 | } | 196 | } |
192 | 197 | ||
198 | static inline bool ovl_open_flags_need_copy_up(int flags) | ||
199 | { | ||
200 | if (!flags) | ||
201 | return false; | ||
202 | |||
203 | return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); | ||
204 | } | ||
205 | |||
193 | /* util.c */ | 206 | /* util.c */ |
194 | int ovl_want_write(struct dentry *dentry); | 207 | int ovl_want_write(struct dentry *dentry); |
195 | void ovl_drop_write(struct dentry *dentry); | 208 | void ovl_drop_write(struct dentry *dentry); |
@@ -206,15 +219,19 @@ bool ovl_dentry_weird(struct dentry *dentry); | |||
206 | enum ovl_path_type ovl_path_type(struct dentry *dentry); | 219 | enum ovl_path_type ovl_path_type(struct dentry *dentry); |
207 | void ovl_path_upper(struct dentry *dentry, struct path *path); | 220 | void ovl_path_upper(struct dentry *dentry, struct path *path); |
208 | void ovl_path_lower(struct dentry *dentry, struct path *path); | 221 | void ovl_path_lower(struct dentry *dentry, struct path *path); |
222 | void ovl_path_lowerdata(struct dentry *dentry, struct path *path); | ||
209 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); | 223 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); |
210 | struct dentry *ovl_dentry_upper(struct dentry *dentry); | 224 | struct dentry *ovl_dentry_upper(struct dentry *dentry); |
211 | struct dentry *ovl_dentry_lower(struct dentry *dentry); | 225 | struct dentry *ovl_dentry_lower(struct dentry *dentry); |
226 | struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); | ||
212 | struct ovl_layer *ovl_layer_lower(struct dentry *dentry); | 227 | struct ovl_layer *ovl_layer_lower(struct dentry *dentry); |
213 | struct dentry *ovl_dentry_real(struct dentry *dentry); | 228 | struct dentry *ovl_dentry_real(struct dentry *dentry); |
214 | struct dentry *ovl_i_dentry_upper(struct inode *inode); | 229 | struct dentry *ovl_i_dentry_upper(struct inode *inode); |
215 | struct inode *ovl_inode_upper(struct inode *inode); | 230 | struct inode *ovl_inode_upper(struct inode *inode); |
216 | struct inode *ovl_inode_lower(struct inode *inode); | 231 | struct inode *ovl_inode_lower(struct inode *inode); |
232 | struct inode *ovl_inode_lowerdata(struct inode *inode); | ||
217 | struct inode *ovl_inode_real(struct inode *inode); | 233 | struct inode *ovl_inode_real(struct inode *inode); |
234 | struct inode *ovl_inode_realdata(struct inode *inode); | ||
218 | struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); | 235 | struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); |
219 | void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); | 236 | void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); |
220 | void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); | 237 | void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); |
@@ -225,18 +242,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry); | |||
225 | void ovl_dentry_set_opaque(struct dentry *dentry); | 242 | void ovl_dentry_set_opaque(struct dentry *dentry); |
226 | bool ovl_dentry_has_upper_alias(struct dentry *dentry); | 243 | bool ovl_dentry_has_upper_alias(struct dentry *dentry); |
227 | void ovl_dentry_set_upper_alias(struct dentry *dentry); | 244 | void ovl_dentry_set_upper_alias(struct dentry *dentry); |
245 | bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); | ||
246 | bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags); | ||
247 | bool ovl_has_upperdata(struct inode *inode); | ||
248 | void ovl_set_upperdata(struct inode *inode); | ||
228 | bool ovl_redirect_dir(struct super_block *sb); | 249 | bool ovl_redirect_dir(struct super_block *sb); |
229 | const char *ovl_dentry_get_redirect(struct dentry *dentry); | 250 | const char *ovl_dentry_get_redirect(struct dentry *dentry); |
230 | void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); | 251 | void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); |
231 | void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, | 252 | void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, |
232 | struct dentry *lowerdentry); | 253 | struct dentry *lowerdentry, struct dentry *lowerdata); |
233 | void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); | 254 | void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); |
234 | void ovl_dentry_version_inc(struct dentry *dentry, bool impurity); | 255 | void ovl_dir_modified(struct dentry *dentry, bool impurity); |
235 | u64 ovl_dentry_version_get(struct dentry *dentry); | 256 | u64 ovl_dentry_version_get(struct dentry *dentry); |
236 | bool ovl_is_whiteout(struct dentry *dentry); | 257 | bool ovl_is_whiteout(struct dentry *dentry); |
237 | struct file *ovl_path_open(struct path *path, int flags); | 258 | struct file *ovl_path_open(struct path *path, int flags); |
238 | int ovl_copy_up_start(struct dentry *dentry); | 259 | int ovl_copy_up_start(struct dentry *dentry, int flags); |
239 | void ovl_copy_up_end(struct dentry *dentry); | 260 | void ovl_copy_up_end(struct dentry *dentry); |
261 | bool ovl_already_copied_up(struct dentry *dentry, int flags); | ||
240 | bool ovl_check_origin_xattr(struct dentry *dentry); | 262 | bool ovl_check_origin_xattr(struct dentry *dentry); |
241 | bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); | 263 | bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); |
242 | int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, | 264 | int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, |
@@ -252,6 +274,9 @@ bool ovl_need_index(struct dentry *dentry); | |||
252 | int ovl_nlink_start(struct dentry *dentry, bool *locked); | 274 | int ovl_nlink_start(struct dentry *dentry, bool *locked); |
253 | void ovl_nlink_end(struct dentry *dentry, bool locked); | 275 | void ovl_nlink_end(struct dentry *dentry, bool locked); |
254 | int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); | 276 | int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); |
277 | int ovl_check_metacopy_xattr(struct dentry *dentry); | ||
278 | bool ovl_is_metacopy_dentry(struct dentry *dentry); | ||
279 | char *ovl_get_redirect_xattr(struct dentry *dentry, int padding); | ||
255 | 280 | ||
256 | static inline bool ovl_is_impuredir(struct dentry *dentry) | 281 | static inline bool ovl_is_impuredir(struct dentry *dentry) |
257 | { | 282 | { |
@@ -324,7 +349,6 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, | |||
324 | void *value, size_t size); | 349 | void *value, size_t size); |
325 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | 350 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); |
326 | struct posix_acl *ovl_get_acl(struct inode *inode, int type); | 351 | struct posix_acl *ovl_get_acl(struct inode *inode, int type); |
327 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); | ||
328 | int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); | 352 | int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); |
329 | bool ovl_is_private_xattr(const char *name); | 353 | bool ovl_is_private_xattr(const char *name); |
330 | 354 | ||
@@ -334,6 +358,8 @@ struct ovl_inode_params { | |||
334 | struct ovl_path *lowerpath; | 358 | struct ovl_path *lowerpath; |
335 | struct dentry *index; | 359 | struct dentry *index; |
336 | unsigned int numlower; | 360 | unsigned int numlower; |
361 | char *redirect; | ||
362 | struct dentry *lowerdata; | ||
337 | }; | 363 | }; |
338 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); | 364 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); |
339 | struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, | 365 | struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, |
@@ -348,6 +374,14 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) | |||
348 | to->i_atime = from->i_atime; | 374 | to->i_atime = from->i_atime; |
349 | to->i_mtime = from->i_mtime; | 375 | to->i_mtime = from->i_mtime; |
350 | to->i_ctime = from->i_ctime; | 376 | to->i_ctime = from->i_ctime; |
377 | i_size_write(to, i_size_read(from)); | ||
378 | } | ||
379 | |||
380 | static inline void ovl_copyflags(struct inode *from, struct inode *to) | ||
381 | { | ||
382 | unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; | ||
383 | |||
384 | inode_set_flags(to, from->i_flags & mask, mask); | ||
351 | } | 385 | } |
352 | 386 | ||
353 | /* dir.c */ | 387 | /* dir.c */ |
@@ -368,9 +402,14 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, | |||
368 | int ovl_cleanup(struct inode *dir, struct dentry *dentry); | 402 | int ovl_cleanup(struct inode *dir, struct dentry *dentry); |
369 | struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); | 403 | struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); |
370 | 404 | ||
405 | /* file.c */ | ||
406 | extern const struct file_operations ovl_file_operations; | ||
407 | |||
371 | /* copy_up.c */ | 408 | /* copy_up.c */ |
372 | int ovl_copy_up(struct dentry *dentry); | 409 | int ovl_copy_up(struct dentry *dentry); |
410 | int ovl_copy_up_with_data(struct dentry *dentry); | ||
373 | int ovl_copy_up_flags(struct dentry *dentry, int flags); | 411 | int ovl_copy_up_flags(struct dentry *dentry, int flags); |
412 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); | ||
374 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); | 413 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); |
375 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); | 414 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); |
376 | struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); | 415 | struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); |
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 41655a7d6894..ec237035333a 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h | |||
@@ -19,6 +19,7 @@ struct ovl_config { | |||
19 | bool index; | 19 | bool index; |
20 | bool nfs_export; | 20 | bool nfs_export; |
21 | int xino; | 21 | int xino; |
22 | bool metacopy; | ||
22 | }; | 23 | }; |
23 | 24 | ||
24 | struct ovl_sb { | 25 | struct ovl_sb { |
@@ -88,7 +89,10 @@ static inline struct ovl_entry *OVL_E(struct dentry *dentry) | |||
88 | } | 89 | } |
89 | 90 | ||
90 | struct ovl_inode { | 91 | struct ovl_inode { |
91 | struct ovl_dir_cache *cache; | 92 | union { |
93 | struct ovl_dir_cache *cache; /* directory */ | ||
94 | struct inode *lowerdata; /* regular file */ | ||
95 | }; | ||
92 | const char *redirect; | 96 | const char *redirect; |
93 | u64 version; | 97 | u64 version; |
94 | unsigned long flags; | 98 | unsigned long flags; |
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index ef1fe42ff7bb..cc8303a806b4 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c | |||
@@ -668,6 +668,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, | |||
668 | return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); | 668 | return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); |
669 | } | 669 | } |
670 | 670 | ||
671 | static bool ovl_is_impure_dir(struct file *file) | ||
672 | { | ||
673 | struct ovl_dir_file *od = file->private_data; | ||
674 | struct inode *dir = d_inode(file->f_path.dentry); | ||
675 | |||
676 | /* | ||
677 | * Only upper dir can be impure, but if we are in the middle of | ||
678 | * iterating a lower real dir, dir could be copied up and marked | ||
679 | * impure. We only want the impure cache if we started iterating | ||
680 | * a real upper dir to begin with. | ||
681 | */ | ||
682 | return od->is_upper && ovl_test_flag(OVL_IMPURE, dir); | ||
683 | |||
684 | } | ||
685 | |||
671 | static int ovl_iterate_real(struct file *file, struct dir_context *ctx) | 686 | static int ovl_iterate_real(struct file *file, struct dir_context *ctx) |
672 | { | 687 | { |
673 | int err; | 688 | int err; |
@@ -696,7 +711,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) | |||
696 | rdt.parent_ino = stat.ino; | 711 | rdt.parent_ino = stat.ino; |
697 | } | 712 | } |
698 | 713 | ||
699 | if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) { | 714 | if (ovl_is_impure_dir(file)) { |
700 | rdt.cache = ovl_cache_get_impure(&file->f_path); | 715 | rdt.cache = ovl_cache_get_impure(&file->f_path); |
701 | if (IS_ERR(rdt.cache)) | 716 | if (IS_ERR(rdt.cache)) |
702 | return PTR_ERR(rdt.cache); | 717 | return PTR_ERR(rdt.cache); |
@@ -727,7 +742,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) | |||
727 | */ | 742 | */ |
728 | if (ovl_xino_bits(dentry->d_sb) || | 743 | if (ovl_xino_bits(dentry->d_sb) || |
729 | (ovl_same_sb(dentry->d_sb) && | 744 | (ovl_same_sb(dentry->d_sb) && |
730 | (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || | 745 | (ovl_is_impure_dir(file) || |
731 | OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { | 746 | OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { |
732 | return ovl_iterate_real(file, ctx); | 747 | return ovl_iterate_real(file, ctx); |
733 | } | 748 | } |
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 704b37311467..2e0fc93c2c06 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c | |||
@@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe) | |||
64 | dput(oe->lowerstack[i].dentry); | 64 | dput(oe->lowerstack[i].dentry); |
65 | } | 65 | } |
66 | 66 | ||
67 | static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); | ||
68 | module_param_named(metacopy, ovl_metacopy_def, bool, 0644); | ||
69 | MODULE_PARM_DESC(ovl_metacopy_def, | ||
70 | "Default to on or off for the metadata only copy up feature"); | ||
71 | |||
67 | static void ovl_dentry_release(struct dentry *dentry) | 72 | static void ovl_dentry_release(struct dentry *dentry) |
68 | { | 73 | { |
69 | struct ovl_entry *oe = dentry->d_fsdata; | 74 | struct ovl_entry *oe = dentry->d_fsdata; |
@@ -74,31 +79,14 @@ static void ovl_dentry_release(struct dentry *dentry) | |||
74 | } | 79 | } |
75 | } | 80 | } |
76 | 81 | ||
77 | static int ovl_check_append_only(struct inode *inode, int flag) | ||
78 | { | ||
79 | /* | ||
80 | * This test was moot in vfs may_open() because overlay inode does | ||
81 | * not have the S_APPEND flag, so re-check on real upper inode | ||
82 | */ | ||
83 | if (IS_APPEND(inode)) { | ||
84 | if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) | ||
85 | return -EPERM; | ||
86 | if (flag & O_TRUNC) | ||
87 | return -EPERM; | ||
88 | } | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static struct dentry *ovl_d_real(struct dentry *dentry, | 82 | static struct dentry *ovl_d_real(struct dentry *dentry, |
94 | const struct inode *inode, | 83 | const struct inode *inode) |
95 | unsigned int open_flags, unsigned int flags) | ||
96 | { | 84 | { |
97 | struct dentry *real; | 85 | struct dentry *real; |
98 | int err; | ||
99 | 86 | ||
100 | if (flags & D_REAL_UPPER) | 87 | /* It's an overlay file */ |
101 | return ovl_dentry_upper(dentry); | 88 | if (inode && d_inode(dentry) == inode) |
89 | return dentry; | ||
102 | 90 | ||
103 | if (!d_is_reg(dentry)) { | 91 | if (!d_is_reg(dentry)) { |
104 | if (!inode || inode == d_inode(dentry)) | 92 | if (!inode || inode == d_inode(dentry)) |
@@ -106,28 +94,19 @@ static struct dentry *ovl_d_real(struct dentry *dentry, | |||
106 | goto bug; | 94 | goto bug; |
107 | } | 95 | } |
108 | 96 | ||
109 | if (open_flags) { | ||
110 | err = ovl_open_maybe_copy_up(dentry, open_flags); | ||
111 | if (err) | ||
112 | return ERR_PTR(err); | ||
113 | } | ||
114 | |||
115 | real = ovl_dentry_upper(dentry); | 97 | real = ovl_dentry_upper(dentry); |
116 | if (real && (!inode || inode == d_inode(real))) { | 98 | if (real && (inode == d_inode(real))) |
117 | if (!inode) { | 99 | return real; |
118 | err = ovl_check_append_only(d_inode(real), open_flags); | 100 | |
119 | if (err) | 101 | if (real && !inode && ovl_has_upperdata(d_inode(dentry))) |
120 | return ERR_PTR(err); | ||
121 | } | ||
122 | return real; | 102 | return real; |
123 | } | ||
124 | 103 | ||
125 | real = ovl_dentry_lower(dentry); | 104 | real = ovl_dentry_lowerdata(dentry); |
126 | if (!real) | 105 | if (!real) |
127 | goto bug; | 106 | goto bug; |
128 | 107 | ||
129 | /* Handle recursion */ | 108 | /* Handle recursion */ |
130 | real = d_real(real, inode, open_flags, 0); | 109 | real = d_real(real, inode); |
131 | 110 | ||
132 | if (!inode || inode == d_inode(real)) | 111 | if (!inode || inode == d_inode(real)) |
133 | return real; | 112 | return real; |
@@ -205,6 +184,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) | |||
205 | oi->flags = 0; | 184 | oi->flags = 0; |
206 | oi->__upperdentry = NULL; | 185 | oi->__upperdentry = NULL; |
207 | oi->lower = NULL; | 186 | oi->lower = NULL; |
187 | oi->lowerdata = NULL; | ||
208 | mutex_init(&oi->lock); | 188 | mutex_init(&oi->lock); |
209 | 189 | ||
210 | return &oi->vfs_inode; | 190 | return &oi->vfs_inode; |
@@ -223,8 +203,11 @@ static void ovl_destroy_inode(struct inode *inode) | |||
223 | 203 | ||
224 | dput(oi->__upperdentry); | 204 | dput(oi->__upperdentry); |
225 | iput(oi->lower); | 205 | iput(oi->lower); |
206 | if (S_ISDIR(inode->i_mode)) | ||
207 | ovl_dir_cache_free(inode); | ||
208 | else | ||
209 | iput(oi->lowerdata); | ||
226 | kfree(oi->redirect); | 210 | kfree(oi->redirect); |
227 | ovl_dir_cache_free(inode); | ||
228 | mutex_destroy(&oi->lock); | 211 | mutex_destroy(&oi->lock); |
229 | 212 | ||
230 | call_rcu(&inode->i_rcu, ovl_i_callback); | 213 | call_rcu(&inode->i_rcu, ovl_i_callback); |
@@ -376,6 +359,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) | |||
376 | "on" : "off"); | 359 | "on" : "off"); |
377 | if (ofs->config.xino != ovl_xino_def()) | 360 | if (ofs->config.xino != ovl_xino_def()) |
378 | seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); | 361 | seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); |
362 | if (ofs->config.metacopy != ovl_metacopy_def) | ||
363 | seq_printf(m, ",metacopy=%s", | ||
364 | ofs->config.metacopy ? "on" : "off"); | ||
379 | return 0; | 365 | return 0; |
380 | } | 366 | } |
381 | 367 | ||
@@ -413,6 +399,8 @@ enum { | |||
413 | OPT_XINO_ON, | 399 | OPT_XINO_ON, |
414 | OPT_XINO_OFF, | 400 | OPT_XINO_OFF, |
415 | OPT_XINO_AUTO, | 401 | OPT_XINO_AUTO, |
402 | OPT_METACOPY_ON, | ||
403 | OPT_METACOPY_OFF, | ||
416 | OPT_ERR, | 404 | OPT_ERR, |
417 | }; | 405 | }; |
418 | 406 | ||
@@ -429,6 +417,8 @@ static const match_table_t ovl_tokens = { | |||
429 | {OPT_XINO_ON, "xino=on"}, | 417 | {OPT_XINO_ON, "xino=on"}, |
430 | {OPT_XINO_OFF, "xino=off"}, | 418 | {OPT_XINO_OFF, "xino=off"}, |
431 | {OPT_XINO_AUTO, "xino=auto"}, | 419 | {OPT_XINO_AUTO, "xino=auto"}, |
420 | {OPT_METACOPY_ON, "metacopy=on"}, | ||
421 | {OPT_METACOPY_OFF, "metacopy=off"}, | ||
432 | {OPT_ERR, NULL} | 422 | {OPT_ERR, NULL} |
433 | }; | 423 | }; |
434 | 424 | ||
@@ -481,6 +471,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) | |||
481 | static int ovl_parse_opt(char *opt, struct ovl_config *config) | 471 | static int ovl_parse_opt(char *opt, struct ovl_config *config) |
482 | { | 472 | { |
483 | char *p; | 473 | char *p; |
474 | int err; | ||
484 | 475 | ||
485 | config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); | 476 | config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); |
486 | if (!config->redirect_mode) | 477 | if (!config->redirect_mode) |
@@ -555,6 +546,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) | |||
555 | config->xino = OVL_XINO_AUTO; | 546 | config->xino = OVL_XINO_AUTO; |
556 | break; | 547 | break; |
557 | 548 | ||
549 | case OPT_METACOPY_ON: | ||
550 | config->metacopy = true; | ||
551 | break; | ||
552 | |||
553 | case OPT_METACOPY_OFF: | ||
554 | config->metacopy = false; | ||
555 | break; | ||
556 | |||
558 | default: | 557 | default: |
559 | pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); | 558 | pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); |
560 | return -EINVAL; | 559 | return -EINVAL; |
@@ -569,7 +568,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) | |||
569 | config->workdir = NULL; | 568 | config->workdir = NULL; |
570 | } | 569 | } |
571 | 570 | ||
572 | return ovl_parse_redirect_mode(config, config->redirect_mode); | 571 | err = ovl_parse_redirect_mode(config, config->redirect_mode); |
572 | if (err) | ||
573 | return err; | ||
574 | |||
575 | /* metacopy feature with upper requires redirect_dir=on */ | ||
576 | if (config->upperdir && config->metacopy && !config->redirect_dir) { | ||
577 | pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n"); | ||
578 | config->metacopy = false; | ||
579 | } else if (config->metacopy && !config->redirect_follow) { | ||
580 | pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n"); | ||
581 | config->metacopy = false; | ||
582 | } | ||
583 | |||
584 | return 0; | ||
573 | } | 585 | } |
574 | 586 | ||
575 | #define OVL_WORKDIR_NAME "work" | 587 | #define OVL_WORKDIR_NAME "work" |
@@ -1042,7 +1054,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) | |||
1042 | if (err) { | 1054 | if (err) { |
1043 | ofs->noxattr = true; | 1055 | ofs->noxattr = true; |
1044 | ofs->config.index = false; | 1056 | ofs->config.index = false; |
1045 | pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); | 1057 | ofs->config.metacopy = false; |
1058 | pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); | ||
1046 | err = 0; | 1059 | err = 0; |
1047 | } else { | 1060 | } else { |
1048 | vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); | 1061 | vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); |
@@ -1064,7 +1077,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) | |||
1064 | pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); | 1077 | pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); |
1065 | ofs->config.nfs_export = false; | 1078 | ofs->config.nfs_export = false; |
1066 | } | 1079 | } |
1067 | |||
1068 | out: | 1080 | out: |
1069 | mnt_drop_write(mnt); | 1081 | mnt_drop_write(mnt); |
1070 | return err; | 1082 | return err; |
@@ -1375,6 +1387,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1375 | ofs->config.index = ovl_index_def; | 1387 | ofs->config.index = ovl_index_def; |
1376 | ofs->config.nfs_export = ovl_nfs_export_def; | 1388 | ofs->config.nfs_export = ovl_nfs_export_def; |
1377 | ofs->config.xino = ovl_xino_def(); | 1389 | ofs->config.xino = ovl_xino_def(); |
1390 | ofs->config.metacopy = ovl_metacopy_def; | ||
1378 | err = ovl_parse_opt((char *) data, &ofs->config); | 1391 | err = ovl_parse_opt((char *) data, &ofs->config); |
1379 | if (err) | 1392 | if (err) |
1380 | goto out_err; | 1393 | goto out_err; |
@@ -1445,6 +1458,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1445 | } | 1458 | } |
1446 | } | 1459 | } |
1447 | 1460 | ||
1461 | if (ofs->config.metacopy && ofs->config.nfs_export) { | ||
1462 | pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); | ||
1463 | ofs->config.nfs_export = false; | ||
1464 | } | ||
1465 | |||
1448 | if (ofs->config.nfs_export) | 1466 | if (ofs->config.nfs_export) |
1449 | sb->s_export_op = &ovl_export_operations; | 1467 | sb->s_export_op = &ovl_export_operations; |
1450 | 1468 | ||
@@ -1455,7 +1473,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1455 | sb->s_op = &ovl_super_operations; | 1473 | sb->s_op = &ovl_super_operations; |
1456 | sb->s_xattr = ovl_xattr_handlers; | 1474 | sb->s_xattr = ovl_xattr_handlers; |
1457 | sb->s_fs_info = ofs; | 1475 | sb->s_fs_info = ofs; |
1458 | sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; | 1476 | sb->s_flags |= SB_POSIXACL; |
1459 | 1477 | ||
1460 | err = -ENOMEM; | 1478 | err = -ENOMEM; |
1461 | root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); | 1479 | root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); |
@@ -1474,8 +1492,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1474 | /* Root is always merge -> can have whiteouts */ | 1492 | /* Root is always merge -> can have whiteouts */ |
1475 | ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); | 1493 | ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); |
1476 | ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); | 1494 | ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); |
1495 | ovl_set_upperdata(d_inode(root_dentry)); | ||
1477 | ovl_inode_init(d_inode(root_dentry), upperpath.dentry, | 1496 | ovl_inode_init(d_inode(root_dentry), upperpath.dentry, |
1478 | ovl_dentry_lower(root_dentry)); | 1497 | ovl_dentry_lower(root_dentry), NULL); |
1479 | 1498 | ||
1480 | sb->s_root = root_dentry; | 1499 | sb->s_root = root_dentry; |
1481 | 1500 | ||
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 6f1078028c66..8cfb62cc8672 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c | |||
@@ -133,8 +133,10 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) | |||
133 | * Non-dir dentry can hold lower dentry of its copy up origin. | 133 | * Non-dir dentry can hold lower dentry of its copy up origin. |
134 | */ | 134 | */ |
135 | if (oe->numlower) { | 135 | if (oe->numlower) { |
136 | type |= __OVL_PATH_ORIGIN; | 136 | if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry))) |
137 | if (d_is_dir(dentry)) | 137 | type |= __OVL_PATH_ORIGIN; |
138 | if (d_is_dir(dentry) || | ||
139 | !ovl_has_upperdata(d_inode(dentry))) | ||
138 | type |= __OVL_PATH_MERGE; | 140 | type |= __OVL_PATH_MERGE; |
139 | } | 141 | } |
140 | } else { | 142 | } else { |
@@ -164,6 +166,18 @@ void ovl_path_lower(struct dentry *dentry, struct path *path) | |||
164 | } | 166 | } |
165 | } | 167 | } |
166 | 168 | ||
169 | void ovl_path_lowerdata(struct dentry *dentry, struct path *path) | ||
170 | { | ||
171 | struct ovl_entry *oe = dentry->d_fsdata; | ||
172 | |||
173 | if (oe->numlower) { | ||
174 | path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt; | ||
175 | path->dentry = oe->lowerstack[oe->numlower - 1].dentry; | ||
176 | } else { | ||
177 | *path = (struct path) { }; | ||
178 | } | ||
179 | } | ||
180 | |||
167 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) | 181 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) |
168 | { | 182 | { |
169 | enum ovl_path_type type = ovl_path_type(dentry); | 183 | enum ovl_path_type type = ovl_path_type(dentry); |
@@ -195,6 +209,19 @@ struct ovl_layer *ovl_layer_lower(struct dentry *dentry) | |||
195 | return oe->numlower ? oe->lowerstack[0].layer : NULL; | 209 | return oe->numlower ? oe->lowerstack[0].layer : NULL; |
196 | } | 210 | } |
197 | 211 | ||
212 | /* | ||
213 | * ovl_dentry_lower() could return either a data dentry or metacopy dentry | ||
214 | * dependig on what is stored in lowerstack[0]. At times we need to find | ||
215 | * lower dentry which has data (and not metacopy dentry). This helper | ||
216 | * returns the lower data dentry. | ||
217 | */ | ||
218 | struct dentry *ovl_dentry_lowerdata(struct dentry *dentry) | ||
219 | { | ||
220 | struct ovl_entry *oe = dentry->d_fsdata; | ||
221 | |||
222 | return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL; | ||
223 | } | ||
224 | |||
198 | struct dentry *ovl_dentry_real(struct dentry *dentry) | 225 | struct dentry *ovl_dentry_real(struct dentry *dentry) |
199 | { | 226 | { |
200 | return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); | 227 | return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); |
@@ -222,6 +249,26 @@ struct inode *ovl_inode_real(struct inode *inode) | |||
222 | return ovl_inode_upper(inode) ?: ovl_inode_lower(inode); | 249 | return ovl_inode_upper(inode) ?: ovl_inode_lower(inode); |
223 | } | 250 | } |
224 | 251 | ||
252 | /* Return inode which contains lower data. Do not return metacopy */ | ||
253 | struct inode *ovl_inode_lowerdata(struct inode *inode) | ||
254 | { | ||
255 | if (WARN_ON(!S_ISREG(inode->i_mode))) | ||
256 | return NULL; | ||
257 | |||
258 | return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode); | ||
259 | } | ||
260 | |||
261 | /* Return real inode which contains data. Does not return metacopy inode */ | ||
262 | struct inode *ovl_inode_realdata(struct inode *inode) | ||
263 | { | ||
264 | struct inode *upperinode; | ||
265 | |||
266 | upperinode = ovl_inode_upper(inode); | ||
267 | if (upperinode && ovl_has_upperdata(inode)) | ||
268 | return upperinode; | ||
269 | |||
270 | return ovl_inode_lowerdata(inode); | ||
271 | } | ||
225 | 272 | ||
226 | struct ovl_dir_cache *ovl_dir_cache(struct inode *inode) | 273 | struct ovl_dir_cache *ovl_dir_cache(struct inode *inode) |
227 | { | 274 | { |
@@ -279,6 +326,62 @@ void ovl_dentry_set_upper_alias(struct dentry *dentry) | |||
279 | ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); | 326 | ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); |
280 | } | 327 | } |
281 | 328 | ||
329 | static bool ovl_should_check_upperdata(struct inode *inode) | ||
330 | { | ||
331 | if (!S_ISREG(inode->i_mode)) | ||
332 | return false; | ||
333 | |||
334 | if (!ovl_inode_lower(inode)) | ||
335 | return false; | ||
336 | |||
337 | return true; | ||
338 | } | ||
339 | |||
340 | bool ovl_has_upperdata(struct inode *inode) | ||
341 | { | ||
342 | if (!ovl_should_check_upperdata(inode)) | ||
343 | return true; | ||
344 | |||
345 | if (!ovl_test_flag(OVL_UPPERDATA, inode)) | ||
346 | return false; | ||
347 | /* | ||
348 | * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of | ||
349 | * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure | ||
350 | * if setting of OVL_UPPERDATA is visible, then effects of writes | ||
351 | * before that are visible too. | ||
352 | */ | ||
353 | smp_rmb(); | ||
354 | return true; | ||
355 | } | ||
356 | |||
357 | void ovl_set_upperdata(struct inode *inode) | ||
358 | { | ||
359 | /* | ||
360 | * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure | ||
361 | * if OVL_UPPERDATA flag is visible, then effects of write operations | ||
362 | * before it are visible as well. | ||
363 | */ | ||
364 | smp_wmb(); | ||
365 | ovl_set_flag(OVL_UPPERDATA, inode); | ||
366 | } | ||
367 | |||
368 | /* Caller should hold ovl_inode->lock */ | ||
369 | bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags) | ||
370 | { | ||
371 | if (!ovl_open_flags_need_copy_up(flags)) | ||
372 | return false; | ||
373 | |||
374 | return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry)); | ||
375 | } | ||
376 | |||
377 | bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags) | ||
378 | { | ||
379 | if (!ovl_open_flags_need_copy_up(flags)) | ||
380 | return false; | ||
381 | |||
382 | return !ovl_has_upperdata(d_inode(dentry)); | ||
383 | } | ||
384 | |||
282 | bool ovl_redirect_dir(struct super_block *sb) | 385 | bool ovl_redirect_dir(struct super_block *sb) |
283 | { | 386 | { |
284 | struct ovl_fs *ofs = sb->s_fs_info; | 387 | struct ovl_fs *ofs = sb->s_fs_info; |
@@ -300,7 +403,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) | |||
300 | } | 403 | } |
301 | 404 | ||
302 | void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, | 405 | void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, |
303 | struct dentry *lowerdentry) | 406 | struct dentry *lowerdentry, struct dentry *lowerdata) |
304 | { | 407 | { |
305 | struct inode *realinode = d_inode(upperdentry ?: lowerdentry); | 408 | struct inode *realinode = d_inode(upperdentry ?: lowerdentry); |
306 | 409 | ||
@@ -308,8 +411,11 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, | |||
308 | OVL_I(inode)->__upperdentry = upperdentry; | 411 | OVL_I(inode)->__upperdentry = upperdentry; |
309 | if (lowerdentry) | 412 | if (lowerdentry) |
310 | OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); | 413 | OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); |
414 | if (lowerdata) | ||
415 | OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata)); | ||
311 | 416 | ||
312 | ovl_copyattr(realinode, inode); | 417 | ovl_copyattr(realinode, inode); |
418 | ovl_copyflags(realinode, inode); | ||
313 | if (!inode->i_ino) | 419 | if (!inode->i_ino) |
314 | inode->i_ino = realinode->i_ino; | 420 | inode->i_ino = realinode->i_ino; |
315 | } | 421 | } |
@@ -333,7 +439,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) | |||
333 | } | 439 | } |
334 | } | 440 | } |
335 | 441 | ||
336 | void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) | 442 | static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) |
337 | { | 443 | { |
338 | struct inode *inode = d_inode(dentry); | 444 | struct inode *inode = d_inode(dentry); |
339 | 445 | ||
@@ -348,6 +454,14 @@ void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) | |||
348 | OVL_I(inode)->version++; | 454 | OVL_I(inode)->version++; |
349 | } | 455 | } |
350 | 456 | ||
457 | void ovl_dir_modified(struct dentry *dentry, bool impurity) | ||
458 | { | ||
459 | /* Copy mtime/ctime */ | ||
460 | ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry)); | ||
461 | |||
462 | ovl_dentry_version_inc(dentry, impurity); | ||
463 | } | ||
464 | |||
351 | u64 ovl_dentry_version_get(struct dentry *dentry) | 465 | u64 ovl_dentry_version_get(struct dentry *dentry) |
352 | { | 466 | { |
353 | struct inode *inode = d_inode(dentry); | 467 | struct inode *inode = d_inode(dentry); |
@@ -368,13 +482,51 @@ struct file *ovl_path_open(struct path *path, int flags) | |||
368 | return dentry_open(path, flags | O_NOATIME, current_cred()); | 482 | return dentry_open(path, flags | O_NOATIME, current_cred()); |
369 | } | 483 | } |
370 | 484 | ||
371 | int ovl_copy_up_start(struct dentry *dentry) | 485 | /* Caller should hold ovl_inode->lock */ |
486 | static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags) | ||
487 | { | ||
488 | bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED; | ||
489 | |||
490 | if (ovl_dentry_upper(dentry) && | ||
491 | (ovl_dentry_has_upper_alias(dentry) || disconnected) && | ||
492 | !ovl_dentry_needs_data_copy_up_locked(dentry, flags)) | ||
493 | return true; | ||
494 | |||
495 | return false; | ||
496 | } | ||
497 | |||
498 | bool ovl_already_copied_up(struct dentry *dentry, int flags) | ||
499 | { | ||
500 | bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED; | ||
501 | |||
502 | /* | ||
503 | * Check if copy-up has happened as well as for upper alias (in | ||
504 | * case of hard links) is there. | ||
505 | * | ||
506 | * Both checks are lockless: | ||
507 | * - false negatives: will recheck under oi->lock | ||
508 | * - false positives: | ||
509 | * + ovl_dentry_upper() uses memory barriers to ensure the | ||
510 | * upper dentry is up-to-date | ||
511 | * + ovl_dentry_has_upper_alias() relies on locking of | ||
512 | * upper parent i_rwsem to prevent reordering copy-up | ||
513 | * with rename. | ||
514 | */ | ||
515 | if (ovl_dentry_upper(dentry) && | ||
516 | (ovl_dentry_has_upper_alias(dentry) || disconnected) && | ||
517 | !ovl_dentry_needs_data_copy_up(dentry, flags)) | ||
518 | return true; | ||
519 | |||
520 | return false; | ||
521 | } | ||
522 | |||
523 | int ovl_copy_up_start(struct dentry *dentry, int flags) | ||
372 | { | 524 | { |
373 | struct ovl_inode *oi = OVL_I(d_inode(dentry)); | 525 | struct ovl_inode *oi = OVL_I(d_inode(dentry)); |
374 | int err; | 526 | int err; |
375 | 527 | ||
376 | err = mutex_lock_interruptible(&oi->lock); | 528 | err = mutex_lock_interruptible(&oi->lock); |
377 | if (!err && ovl_dentry_has_upper_alias(dentry)) { | 529 | if (!err && ovl_already_copied_up_locked(dentry, flags)) { |
378 | err = 1; /* Already copied up */ | 530 | err = 1; /* Already copied up */ |
379 | mutex_unlock(&oi->lock); | 531 | mutex_unlock(&oi->lock); |
380 | } | 532 | } |
@@ -675,3 +827,91 @@ err: | |||
675 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | 827 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); |
676 | return -EIO; | 828 | return -EIO; |
677 | } | 829 | } |
830 | |||
831 | /* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */ | ||
832 | int ovl_check_metacopy_xattr(struct dentry *dentry) | ||
833 | { | ||
834 | int res; | ||
835 | |||
836 | /* Only regular files can have metacopy xattr */ | ||
837 | if (!S_ISREG(d_inode(dentry)->i_mode)) | ||
838 | return 0; | ||
839 | |||
840 | res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0); | ||
841 | if (res < 0) { | ||
842 | if (res == -ENODATA || res == -EOPNOTSUPP) | ||
843 | return 0; | ||
844 | goto out; | ||
845 | } | ||
846 | |||
847 | return 1; | ||
848 | out: | ||
849 | pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res); | ||
850 | return res; | ||
851 | } | ||
852 | |||
853 | bool ovl_is_metacopy_dentry(struct dentry *dentry) | ||
854 | { | ||
855 | struct ovl_entry *oe = dentry->d_fsdata; | ||
856 | |||
857 | if (!d_is_reg(dentry)) | ||
858 | return false; | ||
859 | |||
860 | if (ovl_dentry_upper(dentry)) { | ||
861 | if (!ovl_has_upperdata(d_inode(dentry))) | ||
862 | return true; | ||
863 | return false; | ||
864 | } | ||
865 | |||
866 | return (oe->numlower > 1); | ||
867 | } | ||
868 | |||
869 | char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) | ||
870 | { | ||
871 | int res; | ||
872 | char *s, *next, *buf = NULL; | ||
873 | |||
874 | res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); | ||
875 | if (res < 0) { | ||
876 | if (res == -ENODATA || res == -EOPNOTSUPP) | ||
877 | return NULL; | ||
878 | goto fail; | ||
879 | } | ||
880 | |||
881 | buf = kzalloc(res + padding + 1, GFP_KERNEL); | ||
882 | if (!buf) | ||
883 | return ERR_PTR(-ENOMEM); | ||
884 | |||
885 | if (res == 0) | ||
886 | goto invalid; | ||
887 | |||
888 | res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); | ||
889 | if (res < 0) | ||
890 | goto fail; | ||
891 | if (res == 0) | ||
892 | goto invalid; | ||
893 | |||
894 | if (buf[0] == '/') { | ||
895 | for (s = buf; *s++ == '/'; s = next) { | ||
896 | next = strchrnul(s, '/'); | ||
897 | if (s == next) | ||
898 | goto invalid; | ||
899 | } | ||
900 | } else { | ||
901 | if (strchr(buf, '/') != NULL) | ||
902 | goto invalid; | ||
903 | } | ||
904 | |||
905 | return buf; | ||
906 | |||
907 | err_free: | ||
908 | kfree(buf); | ||
909 | return ERR_PTR(res); | ||
910 | fail: | ||
911 | pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); | ||
912 | goto err_free; | ||
913 | invalid: | ||
914 | pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); | ||
915 | res = -EINVAL; | ||
916 | goto err_free; | ||
917 | } | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 153f8f690490..39b4a21dd933 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -1964,6 +1964,44 @@ out_error: | |||
1964 | } | 1964 | } |
1965 | EXPORT_SYMBOL(vfs_dedupe_file_range_compare); | 1965 | EXPORT_SYMBOL(vfs_dedupe_file_range_compare); |
1966 | 1966 | ||
1967 | int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, | ||
1968 | struct file *dst_file, loff_t dst_pos, u64 len) | ||
1969 | { | ||
1970 | s64 ret; | ||
1971 | |||
1972 | ret = mnt_want_write_file(dst_file); | ||
1973 | if (ret) | ||
1974 | return ret; | ||
1975 | |||
1976 | ret = clone_verify_area(dst_file, dst_pos, len, true); | ||
1977 | if (ret < 0) | ||
1978 | goto out_drop_write; | ||
1979 | |||
1980 | ret = -EINVAL; | ||
1981 | if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE))) | ||
1982 | goto out_drop_write; | ||
1983 | |||
1984 | ret = -EXDEV; | ||
1985 | if (src_file->f_path.mnt != dst_file->f_path.mnt) | ||
1986 | goto out_drop_write; | ||
1987 | |||
1988 | ret = -EISDIR; | ||
1989 | if (S_ISDIR(file_inode(dst_file)->i_mode)) | ||
1990 | goto out_drop_write; | ||
1991 | |||
1992 | ret = -EINVAL; | ||
1993 | if (!dst_file->f_op->dedupe_file_range) | ||
1994 | goto out_drop_write; | ||
1995 | |||
1996 | ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, | ||
1997 | dst_file, dst_pos, len); | ||
1998 | out_drop_write: | ||
1999 | mnt_drop_write_file(dst_file); | ||
2000 | |||
2001 | return ret; | ||
2002 | } | ||
2003 | EXPORT_SYMBOL(vfs_dedupe_file_range_one); | ||
2004 | |||
1967 | int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) | 2005 | int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) |
1968 | { | 2006 | { |
1969 | struct file_dedupe_range_info *info; | 2007 | struct file_dedupe_range_info *info; |
@@ -1972,11 +2010,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) | |||
1972 | u64 len; | 2010 | u64 len; |
1973 | int i; | 2011 | int i; |
1974 | int ret; | 2012 | int ret; |
1975 | bool is_admin = capable(CAP_SYS_ADMIN); | ||
1976 | u16 count = same->dest_count; | 2013 | u16 count = same->dest_count; |
1977 | struct file *dst_file; | 2014 | int deduped; |
1978 | loff_t dst_off; | ||
1979 | ssize_t deduped; | ||
1980 | 2015 | ||
1981 | if (!(file->f_mode & FMODE_READ)) | 2016 | if (!(file->f_mode & FMODE_READ)) |
1982 | return -EINVAL; | 2017 | return -EINVAL; |
@@ -2003,6 +2038,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) | |||
2003 | if (off + len > i_size_read(src)) | 2038 | if (off + len > i_size_read(src)) |
2004 | return -EINVAL; | 2039 | return -EINVAL; |
2005 | 2040 | ||
2041 | /* Arbitrary 1G limit on a single dedupe request, can be raised. */ | ||
2042 | len = min_t(u64, len, 1 << 30); | ||
2043 | |||
2006 | /* pre-format output fields to sane values */ | 2044 | /* pre-format output fields to sane values */ |
2007 | for (i = 0; i < count; i++) { | 2045 | for (i = 0; i < count; i++) { |
2008 | same->info[i].bytes_deduped = 0ULL; | 2046 | same->info[i].bytes_deduped = 0ULL; |
@@ -2010,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) | |||
2010 | } | 2048 | } |
2011 | 2049 | ||
2012 | for (i = 0, info = same->info; i < count; i++, info++) { | 2050 | for (i = 0, info = same->info; i < count; i++, info++) { |
2013 | struct inode *dst; | ||
2014 | struct fd dst_fd = fdget(info->dest_fd); | 2051 | struct fd dst_fd = fdget(info->dest_fd); |
2052 | struct file *dst_file = dst_fd.file; | ||
2015 | 2053 | ||
2016 | dst_file = dst_fd.file; | ||
2017 | if (!dst_file) { | 2054 | if (!dst_file) { |
2018 | info->status = -EBADF; | 2055 | info->status = -EBADF; |
2019 | goto next_loop; | 2056 | goto next_loop; |
2020 | } | 2057 | } |
2021 | dst = file_inode(dst_file); | ||
2022 | |||
2023 | ret = mnt_want_write_file(dst_file); | ||
2024 | if (ret) { | ||
2025 | info->status = ret; | ||
2026 | goto next_fdput; | ||
2027 | } | ||
2028 | |||
2029 | dst_off = info->dest_offset; | ||
2030 | ret = clone_verify_area(dst_file, dst_off, len, true); | ||
2031 | if (ret < 0) { | ||
2032 | info->status = ret; | ||
2033 | goto next_file; | ||
2034 | } | ||
2035 | ret = 0; | ||
2036 | 2058 | ||
2037 | if (info->reserved) { | 2059 | if (info->reserved) { |
2038 | info->status = -EINVAL; | 2060 | info->status = -EINVAL; |
2039 | } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { | 2061 | goto next_fdput; |
2040 | info->status = -EINVAL; | ||
2041 | } else if (file->f_path.mnt != dst_file->f_path.mnt) { | ||
2042 | info->status = -EXDEV; | ||
2043 | } else if (S_ISDIR(dst->i_mode)) { | ||
2044 | info->status = -EISDIR; | ||
2045 | } else if (dst_file->f_op->dedupe_file_range == NULL) { | ||
2046 | info->status = -EINVAL; | ||
2047 | } else { | ||
2048 | deduped = dst_file->f_op->dedupe_file_range(file, off, | ||
2049 | len, dst_file, | ||
2050 | info->dest_offset); | ||
2051 | if (deduped == -EBADE) | ||
2052 | info->status = FILE_DEDUPE_RANGE_DIFFERS; | ||
2053 | else if (deduped < 0) | ||
2054 | info->status = deduped; | ||
2055 | else | ||
2056 | info->bytes_deduped += deduped; | ||
2057 | } | 2062 | } |
2058 | 2063 | ||
2059 | next_file: | 2064 | deduped = vfs_dedupe_file_range_one(file, off, dst_file, |
2060 | mnt_drop_write_file(dst_file); | 2065 | info->dest_offset, len); |
2066 | if (deduped == -EBADE) | ||
2067 | info->status = FILE_DEDUPE_RANGE_DIFFERS; | ||
2068 | else if (deduped < 0) | ||
2069 | info->status = deduped; | ||
2070 | else | ||
2071 | info->bytes_deduped = len; | ||
2072 | |||
2061 | next_fdput: | 2073 | next_fdput: |
2062 | fdput(dst_fd); | 2074 | fdput(dst_fd); |
2063 | next_loop: | 2075 | next_loop: |
diff --git a/fs/xattr.c b/fs/xattr.c index f9cb1db187b7..3a24027c062d 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/posix_acl_xattr.h> | 23 | #include <linux/posix_acl_xattr.h> |
24 | 24 | ||
25 | #include <linux/uaccess.h> | 25 | #include <linux/uaccess.h> |
26 | #include "internal.h" | ||
27 | 26 | ||
28 | static const char * | 27 | static const char * |
29 | strcmp_prefix(const char *a, const char *a_prefix) | 28 | strcmp_prefix(const char *a, const char *a_prefix) |
@@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | |||
501 | if (!f.file) | 500 | if (!f.file) |
502 | return error; | 501 | return error; |
503 | audit_file(f.file); | 502 | audit_file(f.file); |
504 | error = mnt_want_write_file_path(f.file); | 503 | error = mnt_want_write_file(f.file); |
505 | if (!error) { | 504 | if (!error) { |
506 | error = setxattr(f.file->f_path.dentry, name, value, size, flags); | 505 | error = setxattr(f.file->f_path.dentry, name, value, size, flags); |
507 | mnt_drop_write_file_path(f.file); | 506 | mnt_drop_write_file(f.file); |
508 | } | 507 | } |
509 | fdput(f); | 508 | fdput(f); |
510 | return error; | 509 | return error; |
@@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | |||
733 | if (!f.file) | 732 | if (!f.file) |
734 | return error; | 733 | return error; |
735 | audit_file(f.file); | 734 | audit_file(f.file); |
736 | error = mnt_want_write_file_path(f.file); | 735 | error = mnt_want_write_file(f.file); |
737 | if (!error) { | 736 | if (!error) { |
738 | error = removexattr(f.file->f_path.dentry, name); | 737 | error = removexattr(f.file->f_path.dentry, name); |
739 | mnt_drop_write_file_path(f.file); | 738 | mnt_drop_write_file(f.file); |
740 | } | 739 | } |
741 | fdput(f); | 740 | fdput(f); |
742 | return error; | 741 | return error; |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5eaef2c17293..61a5ad2600e8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -931,31 +931,16 @@ xfs_file_clone_range( | |||
931 | len, false); | 931 | len, false); |
932 | } | 932 | } |
933 | 933 | ||
934 | STATIC ssize_t | 934 | STATIC int |
935 | xfs_file_dedupe_range( | 935 | xfs_file_dedupe_range( |
936 | struct file *src_file, | 936 | struct file *file_in, |
937 | u64 loff, | 937 | loff_t pos_in, |
938 | u64 len, | 938 | struct file *file_out, |
939 | struct file *dst_file, | 939 | loff_t pos_out, |
940 | u64 dst_loff) | 940 | u64 len) |
941 | { | 941 | { |
942 | struct inode *srci = file_inode(src_file); | 942 | return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, |
943 | u64 max_dedupe; | ||
944 | int error; | ||
945 | |||
946 | /* | ||
947 | * Since we have to read all these pages in to compare them, cut | ||
948 | * it off at MAX_RW_COUNT/2 rounded down to the nearest block. | ||
949 | * That means we won't do more than MAX_RW_COUNT IO per request. | ||
950 | */ | ||
951 | max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1); | ||
952 | if (len > max_dedupe) | ||
953 | len = max_dedupe; | ||
954 | error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, | ||
955 | len, true); | 943 | len, true); |
956 | if (error) | ||
957 | return error; | ||
958 | return len; | ||
959 | } | 944 | } |
960 | 945 | ||
961 | STATIC int | 946 | STATIC int |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d32957b423d5..ef4b70f64f33 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -145,8 +145,7 @@ struct dentry_operations { | |||
145 | char *(*d_dname)(struct dentry *, char *, int); | 145 | char *(*d_dname)(struct dentry *, char *, int); |
146 | struct vfsmount *(*d_automount)(struct path *); | 146 | struct vfsmount *(*d_automount)(struct path *); |
147 | int (*d_manage)(const struct path *, bool); | 147 | int (*d_manage)(const struct path *, bool); |
148 | struct dentry *(*d_real)(struct dentry *, const struct inode *, | 148 | struct dentry *(*d_real)(struct dentry *, const struct inode *); |
149 | unsigned int, unsigned int); | ||
150 | } ____cacheline_aligned; | 149 | } ____cacheline_aligned; |
151 | 150 | ||
152 | /* | 151 | /* |
@@ -561,15 +560,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) | |||
561 | return upper; | 560 | return upper; |
562 | } | 561 | } |
563 | 562 | ||
564 | /* d_real() flags */ | ||
565 | #define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */ | ||
566 | |||
567 | /** | 563 | /** |
568 | * d_real - Return the real dentry | 564 | * d_real - Return the real dentry |
569 | * @dentry: the dentry to query | 565 | * @dentry: the dentry to query |
570 | * @inode: inode to select the dentry from multiple layers (can be NULL) | 566 | * @inode: inode to select the dentry from multiple layers (can be NULL) |
571 | * @open_flags: open flags to control copy-up behavior | ||
572 | * @flags: flags to control what is returned by this function | ||
573 | * | 567 | * |
574 | * If dentry is on a union/overlay, then return the underlying, real dentry. | 568 | * If dentry is on a union/overlay, then return the underlying, real dentry. |
575 | * Otherwise return the dentry itself. | 569 | * Otherwise return the dentry itself. |
@@ -577,11 +571,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) | |||
577 | * See also: Documentation/filesystems/vfs.txt | 571 | * See also: Documentation/filesystems/vfs.txt |
578 | */ | 572 | */ |
579 | static inline struct dentry *d_real(struct dentry *dentry, | 573 | static inline struct dentry *d_real(struct dentry *dentry, |
580 | const struct inode *inode, | 574 | const struct inode *inode) |
581 | unsigned int open_flags, unsigned int flags) | ||
582 | { | 575 | { |
583 | if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) | 576 | if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) |
584 | return dentry->d_op->d_real(dentry, inode, open_flags, flags); | 577 | return dentry->d_op->d_real(dentry, inode); |
585 | else | 578 | else |
586 | return dentry; | 579 | return dentry; |
587 | } | 580 | } |
@@ -596,7 +589,7 @@ static inline struct dentry *d_real(struct dentry *dentry, | |||
596 | static inline struct inode *d_real_inode(const struct dentry *dentry) | 589 | static inline struct inode *d_real_inode(const struct dentry *dentry) |
597 | { | 590 | { |
598 | /* This usage of d_real() results in const dentry */ | 591 | /* This usage of d_real() results in const dentry */ |
599 | return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0)); | 592 | return d_backing_inode(d_real((struct dentry *) dentry, NULL)); |
600 | } | 593 | } |
601 | 594 | ||
602 | struct name_snapshot { | 595 | struct name_snapshot { |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a9242f336f02..e5710541183b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -157,6 +157,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
157 | /* File is capable of returning -EAGAIN if I/O will block */ | 157 | /* File is capable of returning -EAGAIN if I/O will block */ |
158 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) | 158 | #define FMODE_NOWAIT ((__force fmode_t)0x8000000) |
159 | 159 | ||
160 | /* File does not contribute to nr_files count */ | ||
161 | #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) | ||
162 | |||
160 | /* | 163 | /* |
161 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector | 164 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector |
162 | * that indicates that they should check the contents of the iovec are | 165 | * that indicates that they should check the contents of the iovec are |
@@ -1067,17 +1070,7 @@ struct file_lock_context { | |||
1067 | 1070 | ||
1068 | extern void send_sigio(struct fown_struct *fown, int fd, int band); | 1071 | extern void send_sigio(struct fown_struct *fown, int fd, int band); |
1069 | 1072 | ||
1070 | /* | 1073 | #define locks_inode(f) file_inode(f) |
1071 | * Return the inode to use for locking | ||
1072 | * | ||
1073 | * For overlayfs this should be the overlay inode, not the real inode returned | ||
1074 | * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are | ||
1075 | * equal. | ||
1076 | */ | ||
1077 | static inline struct inode *locks_inode(const struct file *f) | ||
1078 | { | ||
1079 | return f->f_path.dentry->d_inode; | ||
1080 | } | ||
1081 | 1074 | ||
1082 | #ifdef CONFIG_FILE_LOCKING | 1075 | #ifdef CONFIG_FILE_LOCKING |
1083 | extern int fcntl_getlk(struct file *, unsigned int, struct flock *); | 1076 | extern int fcntl_getlk(struct file *, unsigned int, struct flock *); |
@@ -1262,7 +1255,7 @@ static inline struct inode *file_inode(const struct file *f) | |||
1262 | 1255 | ||
1263 | static inline struct dentry *file_dentry(const struct file *file) | 1256 | static inline struct dentry *file_dentry(const struct file *file) |
1264 | { | 1257 | { |
1265 | return d_real(file->f_path.dentry, file_inode(file), 0, 0); | 1258 | return d_real(file->f_path.dentry, file_inode(file)); |
1266 | } | 1259 | } |
1267 | 1260 | ||
1268 | static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) | 1261 | static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) |
@@ -1318,7 +1311,6 @@ extern int send_sigurg(struct fown_struct *fown); | |||
1318 | 1311 | ||
1319 | /* These sb flags are internal to the kernel */ | 1312 | /* These sb flags are internal to the kernel */ |
1320 | #define SB_SUBMOUNT (1<<26) | 1313 | #define SB_SUBMOUNT (1<<26) |
1321 | #define SB_NOREMOTELOCK (1<<27) | ||
1322 | #define SB_NOSEC (1<<28) | 1314 | #define SB_NOSEC (1<<28) |
1323 | #define SB_BORN (1<<29) | 1315 | #define SB_BORN (1<<29) |
1324 | #define SB_ACTIVE (1<<30) | 1316 | #define SB_ACTIVE (1<<30) |
@@ -1647,6 +1639,8 @@ int vfs_mkobj(struct dentry *, umode_t, | |||
1647 | int (*f)(struct dentry *, umode_t, void *), | 1639 | int (*f)(struct dentry *, umode_t, void *), |
1648 | void *); | 1640 | void *); |
1649 | 1641 | ||
1642 | extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
1643 | |||
1650 | /* | 1644 | /* |
1651 | * VFS file helper functions. | 1645 | * VFS file helper functions. |
1652 | */ | 1646 | */ |
@@ -1765,7 +1759,7 @@ struct file_operations { | |||
1765 | loff_t, size_t, unsigned int); | 1759 | loff_t, size_t, unsigned int); |
1766 | int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, | 1760 | int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, |
1767 | u64); | 1761 | u64); |
1768 | ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, | 1762 | int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, |
1769 | u64); | 1763 | u64); |
1770 | } __randomize_layout; | 1764 | } __randomize_layout; |
1771 | 1765 | ||
@@ -1838,6 +1832,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, | |||
1838 | loff_t len, bool *is_same); | 1832 | loff_t len, bool *is_same); |
1839 | extern int vfs_dedupe_file_range(struct file *file, | 1833 | extern int vfs_dedupe_file_range(struct file *file, |
1840 | struct file_dedupe_range *same); | 1834 | struct file_dedupe_range *same); |
1835 | extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, | ||
1836 | struct file *dst_file, loff_t dst_pos, | ||
1837 | u64 len); | ||
1838 | |||
1841 | 1839 | ||
1842 | struct super_operations { | 1840 | struct super_operations { |
1843 | struct inode *(*alloc_inode)(struct super_block *sb); | 1841 | struct inode *(*alloc_inode)(struct super_block *sb); |
@@ -2096,6 +2094,7 @@ enum file_time_flags { | |||
2096 | S_VERSION = 8, | 2094 | S_VERSION = 8, |
2097 | }; | 2095 | }; |
2098 | 2096 | ||
2097 | extern bool atime_needs_update(const struct path *, struct inode *); | ||
2099 | extern void touch_atime(const struct path *); | 2098 | extern void touch_atime(const struct path *); |
2100 | static inline void file_accessed(struct file *file) | 2099 | static inline void file_accessed(struct file *file) |
2101 | { | 2100 | { |
@@ -2441,6 +2440,8 @@ extern struct file *filp_open(const char *, int, umode_t); | |||
2441 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, | 2440 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, |
2442 | const char *, int, umode_t); | 2441 | const char *, int, umode_t); |
2443 | extern struct file * dentry_open(const struct path *, int, const struct cred *); | 2442 | extern struct file * dentry_open(const struct path *, int, const struct cred *); |
2443 | extern struct file * open_with_fake_path(const struct path *, int, | ||
2444 | struct inode*, const struct cred *); | ||
2444 | static inline struct file *file_clone_open(struct file *file) | 2445 | static inline struct file *file_clone_open(struct file *file) |
2445 | { | 2446 | { |
2446 | return dentry_open(&file->f_path, file->f_flags, file->f_cred); | 2447 | return dentry_open(&file->f_path, file->f_flags, file->f_cred); |
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index bdaf22582f6e..fd1ce10553bf 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h | |||
@@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry | |||
30 | static inline int fsnotify_perm(struct file *file, int mask) | 30 | static inline int fsnotify_perm(struct file *file, int mask) |
31 | { | 31 | { |
32 | const struct path *path = &file->f_path; | 32 | const struct path *path = &file->f_path; |
33 | /* | 33 | struct inode *inode = file_inode(file); |
34 | * Do not use file_inode() here or anywhere in this file to get the | ||
35 | * inode. That would break *notity on overlayfs. | ||
36 | */ | ||
37 | struct inode *inode = path->dentry->d_inode; | ||
38 | __u32 fsnotify_mask = 0; | 34 | __u32 fsnotify_mask = 0; |
39 | int ret; | 35 | int ret; |
40 | 36 | ||
@@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) | |||
178 | static inline void fsnotify_access(struct file *file) | 174 | static inline void fsnotify_access(struct file *file) |
179 | { | 175 | { |
180 | const struct path *path = &file->f_path; | 176 | const struct path *path = &file->f_path; |
181 | struct inode *inode = path->dentry->d_inode; | 177 | struct inode *inode = file_inode(file); |
182 | __u32 mask = FS_ACCESS; | 178 | __u32 mask = FS_ACCESS; |
183 | 179 | ||
184 | if (S_ISDIR(inode->i_mode)) | 180 | if (S_ISDIR(inode->i_mode)) |
@@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file) | |||
196 | static inline void fsnotify_modify(struct file *file) | 192 | static inline void fsnotify_modify(struct file *file) |
197 | { | 193 | { |
198 | const struct path *path = &file->f_path; | 194 | const struct path *path = &file->f_path; |
199 | struct inode *inode = path->dentry->d_inode; | 195 | struct inode *inode = file_inode(file); |
200 | __u32 mask = FS_MODIFY; | 196 | __u32 mask = FS_MODIFY; |
201 | 197 | ||
202 | if (S_ISDIR(inode->i_mode)) | 198 | if (S_ISDIR(inode->i_mode)) |
@@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file) | |||
214 | static inline void fsnotify_open(struct file *file) | 210 | static inline void fsnotify_open(struct file *file) |
215 | { | 211 | { |
216 | const struct path *path = &file->f_path; | 212 | const struct path *path = &file->f_path; |
217 | struct inode *inode = path->dentry->d_inode; | 213 | struct inode *inode = file_inode(file); |
218 | __u32 mask = FS_OPEN; | 214 | __u32 mask = FS_OPEN; |
219 | 215 | ||
220 | if (S_ISDIR(inode->i_mode)) | 216 | if (S_ISDIR(inode->i_mode)) |
@@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file) | |||
230 | static inline void fsnotify_close(struct file *file) | 226 | static inline void fsnotify_close(struct file *file) |
231 | { | 227 | { |
232 | const struct path *path = &file->f_path; | 228 | const struct path *path = &file->f_path; |
233 | struct inode *inode = path->dentry->d_inode; | 229 | struct inode *inode = file_inode(file); |
234 | fmode_t mode = file->f_mode; | 230 | fmode_t mode = file->f_mode; |
235 | __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; | 231 | __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; |
236 | 232 | ||