summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-21 21:19:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-21 21:19:09 -0400
commitd9a185f8b49678775ef56ecbdbc7b76970302897 (patch)
tree7ace1b26133e5d796af09e5d71d6531bcb69865c
parentc22fc16d172fba4d19ffd8f2aa8fe67edba63895 (diff)
parent989974c804574d250ac92d44e220081959ac8ac1 (diff)
Merge tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi: "This contains two new features: - Stack file operations: this allows removal of several hacks from the VFS, proper interaction of read-only open files with copy-up, possibility to implement fs modifying ioctls properly, and others. - Metadata only copy-up: when file is on lower layer and only metadata is modified (except size) then only copy up the metadata and continue to use the data from the lower file" * tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (66 commits) ovl: Enable metadata only feature ovl: Do not do metacopy only for ioctl modifying file attr ovl: Do not do metadata only copy-up for truncate operation ovl: add helper to force data copy-up ovl: Check redirect on index as well ovl: Set redirect on upper inode when it is linked ovl: Set redirect on metacopy files upon rename ovl: Do not set dentry type ORIGIN for broken hardlinks ovl: Add an inode flag OVL_CONST_INO ovl: Treat metacopy dentries as type OVL_PATH_MERGE ovl: Check redirects for metacopy files ovl: Move some dir related ovl_lookup_single() code in else block ovl: Do not expose metacopy only dentry from d_real() ovl: Open file with data except for the case of fsync ovl: Add helper ovl_inode_realdata() ovl: Store lower data inode in ovl_inode ovl: Fix ovl_getattr() to get number of blocks from lower ovl: Add helper ovl_dentry_lowerdata() to get lower data dentry ovl: Copy up meta inode data from lowest data inode ovl: Modify ovl_lookup() and friends to lookup metacopy dentry ...
-rw-r--r--Documentation/filesystems/Locking3
-rw-r--r--Documentation/filesystems/overlayfs.txt81
-rw-r--r--Documentation/filesystems/vfs.txt16
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/ioctl.c11
-rw-r--r--fs/file_table.c69
-rw-r--r--fs/inode.c46
-rw-r--r--fs/internal.h11
-rw-r--r--fs/ioctl.c1
-rw-r--r--fs/locks.c20
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c69
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/open.c44
-rw-r--r--fs/overlayfs/Kconfig19
-rw-r--r--fs/overlayfs/Makefile4
-rw-r--r--fs/overlayfs/copy_up.c190
-rw-r--r--fs/overlayfs/dir.c105
-rw-r--r--fs/overlayfs/export.c3
-rw-r--r--fs/overlayfs/file.c511
-rw-r--r--fs/overlayfs/inode.c175
-rw-r--r--fs/overlayfs/namei.c195
-rw-r--r--fs/overlayfs/overlayfs.h47
-rw-r--r--fs/overlayfs/ovl_entry.h6
-rw-r--r--fs/overlayfs/readdir.c19
-rw-r--r--fs/overlayfs/super.c103
-rw-r--r--fs/overlayfs/util.c252
-rw-r--r--fs/read_write.c94
-rw-r--r--fs/xattr.c9
-rw-r--r--fs/xfs/xfs_file.c29
-rw-r--r--include/linux/dcache.h15
-rw-r--r--include/linux/fs.h29
-rw-r--r--include/linux/fsnotify.h14
33 files changed, 1619 insertions, 595 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 9e6f19eaef89..efea228ccd8a 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -21,8 +21,7 @@ prototypes:
21 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 21 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
22 struct vfsmount *(*d_automount)(struct path *path); 22 struct vfsmount *(*d_automount)(struct path *path);
23 int (*d_manage)(const struct path *, bool); 23 int (*d_manage)(const struct path *, bool);
24 struct dentry *(*d_real)(struct dentry *, const struct inode *, 24 struct dentry *(*d_real)(struct dentry *, const struct inode *);
25 unsigned int, unsigned int);
26 25
27locking rules: 26locking rules:
28 rename_lock ->d_lock may block rcu-walk 27 rename_lock ->d_lock may block rcu-walk
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 72615a2c0752..51c136c821bf 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -10,10 +10,6 @@ union-filesystems). An overlay-filesystem tries to present a
10filesystem which is the result over overlaying one filesystem on top 10filesystem which is the result over overlaying one filesystem on top
11of the other. 11of the other.
12 12
13The result will inevitably fail to look exactly like a normal
14filesystem for various technical reasons. The expectation is that
15many use cases will be able to ignore these differences.
16
17 13
18Overlay objects 14Overlay objects
19--------------- 15---------------
@@ -266,6 +262,30 @@ rightmost one and going left. In the above example lower1 will be the
266top, lower2 the middle and lower3 the bottom layer. 262top, lower2 the middle and lower3 the bottom layer.
267 263
268 264
265Metadata only copy up
266--------------------
267
268When metadata only copy up feature is enabled, overlayfs will only copy
269up metadata (as opposed to whole file), when a metadata specific operation
270like chown/chmod is performed. Full file will be copied up later when
271file is opened for WRITE operation.
272
273In other words, this is delayed data copy up operation and data is copied
274up when there is a need to actually modify data.
275
276There are multiple ways to enable/disable this feature. A config option
277CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature
278by default. Or one can enable/disable it at module load time with module
279parameter metacopy=on/off. Lastly, there is also a per mount option
280metacopy=on/off to enable/disable this feature per mount.
281
282Do not use metacopy=on with untrusted upper/lower directories. Otherwise
283it is possible that an attacker can create a handcrafted file with
284appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower
285pointed by REDIRECT. This should not be possible on local system as setting
286"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
287for untrusted layers like from a pen drive.
288
269Sharing and copying layers 289Sharing and copying layers
270-------------------------- 290--------------------------
271 291
@@ -284,7 +304,7 @@ though it will not result in a crash or deadlock.
284Mounting an overlay using an upper layer path, where the upper layer path 304Mounting an overlay using an upper layer path, where the upper layer path
285was previously used by another mounted overlay in combination with a 305was previously used by another mounted overlay in combination with a
286different lower layer path, is allowed, unless the "inodes index" feature 306different lower layer path, is allowed, unless the "inodes index" feature
287is enabled. 307or "metadata only copy up" feature is enabled.
288 308
289With the "inodes index" feature, on the first time mount, an NFS file 309With the "inodes index" feature, on the first time mount, an NFS file
290handle of the lower layer root directory, along with the UUID of the lower 310handle of the lower layer root directory, along with the UUID of the lower
@@ -297,6 +317,10 @@ lower root origin, mount will fail with ESTALE. An overlayfs mount with
297does not support NFS export, lower filesystem does not have a valid UUID or 317does not support NFS export, lower filesystem does not have a valid UUID or
298if the upper filesystem does not support extended attributes. 318if the upper filesystem does not support extended attributes.
299 319
320For "metadata only copy up" feature there is no verification mechanism at
321mount time. So if same upper is mounted with different set of lower, mount
322probably will succeed but expect the unexpected later on. So don't do it.
323
300It is quite a common practice to copy overlay layers to a different 324It is quite a common practice to copy overlay layers to a different
301directory tree on the same or different underlying filesystem, and even 325directory tree on the same or different underlying filesystem, and even
302to a different machine. With the "inodes index" feature, trying to mount 326to a different machine. With the "inodes index" feature, trying to mount
@@ -306,27 +330,40 @@ the copied layers will fail the verification of the lower root file handle.
306Non-standard behavior 330Non-standard behavior
307--------------------- 331---------------------
308 332
309The copy_up operation essentially creates a new, identical file and 333Overlayfs can now act as a POSIX compliant filesystem with the following
310moves it over to the old name. Any open files referring to this inode 334features turned on:
311will access the old data. 335
3361) "redirect_dir"
337
338Enabled with the mount option or module option: "redirect_dir=on" or with
339the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y.
340
341If this feature is disabled, then rename(2) on a lower or merged directory
342will fail with EXDEV ("Invalid cross-device link").
343
3442) "inode index"
345
346Enabled with the mount option or module option "index=on" or with the
347kernel config option CONFIG_OVERLAY_FS_INDEX=y.
312 348
313The new file may be on a different filesystem, so both st_dev and st_ino 349If this feature is disabled and a file with multiple hard links is copied
314of the real file may change. The values of st_dev and st_ino returned by 350up, then this will "break" the link. Changes will not be propagated to
315stat(2) on an overlay object are often not the same as the real file 351other names referring to the same inode.
316stat(2) values to prevent the values from changing on copy_up.
317 352
318Unless "xino" feature is enabled, when overlay layers are not all on the 3533) "xino"
319same underlying filesystem, the value of st_dev may be different for two
320non-directory objects in the same overlay filesystem and the value of
321st_ino for directory objects may be non persistent and could change even
322while the overlay filesystem is still mounted.
323 354
324Unless "inode index" feature is enabled, if a file with multiple hard 355Enabled with the mount option "xino=auto" or "xino=on", with the module
325links is copied up, then this will "break" the link. Changes will not be 356option "xino_auto=on" or with the kernel config option
326propagated to other names referring to the same inode. 357CONFIG_OVERLAY_FS_XINO_AUTO=y. Also implicitly enabled by using the same
358underlying filesystem for all layers making up the overlay.
327 359
328Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged 360If this feature is disabled or the underlying filesystem doesn't have
329directory will fail with EXDEV. 361enough free bits in the inode number, then overlayfs will not be able to
362guarantee that the values of st_ino and st_dev returned by stat(2) and the
363value of d_ino returned by readdir(3) will act like on a normal filesystem.
364E.g. the value of st_dev may be different for two objects in the same
365overlay filesystem and the value of st_ino for directory objects may not be
366persistent and could change even while the overlay filesystem is mounted.
330 367
331 368
332Changes to underlying filesystems 369Changes to underlying filesystems
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 85907d5b9c2c..4b2084d0f1fb 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -989,8 +989,7 @@ struct dentry_operations {
989 char *(*d_dname)(struct dentry *, char *, int); 989 char *(*d_dname)(struct dentry *, char *, int);
990 struct vfsmount *(*d_automount)(struct path *); 990 struct vfsmount *(*d_automount)(struct path *);
991 int (*d_manage)(const struct path *, bool); 991 int (*d_manage)(const struct path *, bool);
992 struct dentry *(*d_real)(struct dentry *, const struct inode *, 992 struct dentry *(*d_real)(struct dentry *, const struct inode *);
993 unsigned int, unsigned int);
994}; 993};
995 994
996 d_revalidate: called when the VFS needs to revalidate a dentry. This 995 d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -1124,22 +1123,15 @@ struct dentry_operations {
1124 dentry being transited from. 1123 dentry being transited from.
1125 1124
1126 d_real: overlay/union type filesystems implement this method to return one of 1125 d_real: overlay/union type filesystems implement this method to return one of
1127 the underlying dentries hidden by the overlay. It is used in three 1126 the underlying dentries hidden by the overlay. It is used in two
1128 different modes: 1127 different modes:
1129 1128
1130 Called from open it may need to copy-up the file depending on the
1131 supplied open flags. This mode is selected with a non-zero flags
1132 argument. In this mode the d_real method can return an error.
1133
1134 Called from file_dentry() it returns the real dentry matching the inode 1129 Called from file_dentry() it returns the real dentry matching the inode
1135 argument. The real dentry may be from a lower layer already copied up, 1130 argument. The real dentry may be from a lower layer already copied up,
1136 but still referenced from the file. This mode is selected with a 1131 but still referenced from the file. This mode is selected with a
1137 non-NULL inode argument. This will always succeed. 1132 non-NULL inode argument.
1138
1139 With NULL inode and zero flags the topmost real underlying dentry is
1140 returned. This will always succeed.
1141 1133
1142 This method is never called with both non-NULL inode and non-zero flags. 1134 With NULL inode the topmost real underlying dentry is returned.
1143 1135
1144Each dentry has a pointer to its parent dentry, as well as a hash list 1136Each dentry has a pointer to its parent dentry, as well as a hash list
1145of child dentries. Child dentries are basically like files in a 1137of child dentries. Child dentries are basically like files in a
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 318be7864072..53af9f5253f4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3217,8 +3217,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
3217 struct btrfs_ioctl_space_info *space); 3217 struct btrfs_ioctl_space_info *space);
3218void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 3218void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3219 struct btrfs_ioctl_balance_args *bargs); 3219 struct btrfs_ioctl_balance_args *bargs);
3220ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, 3220int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
3221 struct file *dst_file, u64 dst_loff); 3221 struct file *dst_file, loff_t dst_loff,
3222 u64 olen);
3222 3223
3223/* file.c */ 3224/* file.c */
3224int __init btrfs_auto_defrag_init(void); 3225int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3a5d2a41e5f..63600dc2ac4c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3592,13 +3592,13 @@ out_unlock:
3592 return ret; 3592 return ret;
3593} 3593}
3594 3594
3595ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, 3595int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
3596 struct file *dst_file, u64 dst_loff) 3596 struct file *dst_file, loff_t dst_loff,
3597 u64 olen)
3597{ 3598{
3598 struct inode *src = file_inode(src_file); 3599 struct inode *src = file_inode(src_file);
3599 struct inode *dst = file_inode(dst_file); 3600 struct inode *dst = file_inode(dst_file);
3600 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3601 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
3601 ssize_t res;
3602 3602
3603 if (WARN_ON_ONCE(bs < PAGE_SIZE)) { 3603 if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
3604 /* 3604 /*
@@ -3609,10 +3609,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
3609 return -EINVAL; 3609 return -EINVAL;
3610 } 3610 }
3611 3611
3612 res = btrfs_extent_same(src, loff, olen, dst, dst_loff); 3612 return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
3613 if (res)
3614 return res;
3615 return olen;
3616} 3613}
3617 3614
3618static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3615static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/file_table.c b/fs/file_table.c
index d6eccd04d703..e49af4caf15d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
52static inline void file_free(struct file *f) 52static inline void file_free(struct file *f)
53{ 53{
54 security_file_free(f); 54 security_file_free(f);
55 percpu_counter_dec(&nr_files); 55 if (!(f->f_mode & FMODE_NOACCOUNT))
56 percpu_counter_dec(&nr_files);
56 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 57 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
57} 58}
58 59
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write,
91} 92}
92#endif 93#endif
93 94
95static struct file *__alloc_file(int flags, const struct cred *cred)
96{
97 struct file *f;
98 int error;
99
100 f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
101 if (unlikely(!f))
102 return ERR_PTR(-ENOMEM);
103
104 f->f_cred = get_cred(cred);
105 error = security_file_alloc(f);
106 if (unlikely(error)) {
107 file_free_rcu(&f->f_u.fu_rcuhead);
108 return ERR_PTR(error);
109 }
110
111 atomic_long_set(&f->f_count, 1);
112 rwlock_init(&f->f_owner.lock);
113 spin_lock_init(&f->f_lock);
114 mutex_init(&f->f_pos_lock);
115 eventpoll_init_file(f);
116 f->f_flags = flags;
117 f->f_mode = OPEN_FMODE(flags);
118 /* f->f_version: 0 */
119
120 return f;
121}
122
94/* Find an unused file structure and return a pointer to it. 123/* Find an unused file structure and return a pointer to it.
95 * Returns an error pointer if some error happend e.g. we over file 124 * Returns an error pointer if some error happend e.g. we over file
96 * structures limit, run out of memory or operation is not permitted. 125 * structures limit, run out of memory or operation is not permitted.
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
105{ 134{
106 static long old_max; 135 static long old_max;
107 struct file *f; 136 struct file *f;
108 int error;
109 137
110 /* 138 /*
111 * Privileged users can go above max_files 139 * Privileged users can go above max_files
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
119 goto over; 147 goto over;
120 } 148 }
121 149
122 f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); 150 f = __alloc_file(flags, cred);
123 if (unlikely(!f)) 151 if (!IS_ERR(f))
124 return ERR_PTR(-ENOMEM); 152 percpu_counter_inc(&nr_files);
125
126 f->f_cred = get_cred(cred);
127 error = security_file_alloc(f);
128 if (unlikely(error)) {
129 file_free_rcu(&f->f_u.fu_rcuhead);
130 return ERR_PTR(error);
131 }
132 153
133 atomic_long_set(&f->f_count, 1);
134 rwlock_init(&f->f_owner.lock);
135 spin_lock_init(&f->f_lock);
136 mutex_init(&f->f_pos_lock);
137 eventpoll_init_file(f);
138 f->f_flags = flags;
139 f->f_mode = OPEN_FMODE(flags);
140 /* f->f_version: 0 */
141 percpu_counter_inc(&nr_files);
142 return f; 154 return f;
143 155
144over: 156over:
@@ -150,6 +162,21 @@ over:
150 return ERR_PTR(-ENFILE); 162 return ERR_PTR(-ENFILE);
151} 163}
152 164
165/*
166 * Variant of alloc_empty_file() that doesn't check and modify nr_files.
167 *
168 * Should not be used unless there's a very good reason to do so.
169 */
170struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
171{
172 struct file *f = __alloc_file(flags, cred);
173
174 if (!IS_ERR(f))
175 f->f_mode |= FMODE_NOACCOUNT;
176
177 return f;
178}
179
153/** 180/**
154 * alloc_file - allocate and initialize a 'struct file' 181 * alloc_file - allocate and initialize a 'struct file'
155 * 182 *
diff --git a/fs/inode.c b/fs/inode.c
index a06de4454232..42f6d25f32a5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1596,49 +1596,16 @@ sector_t bmap(struct inode *inode, sector_t block)
1596EXPORT_SYMBOL(bmap); 1596EXPORT_SYMBOL(bmap);
1597 1597
1598/* 1598/*
1599 * Update times in overlayed inode from underlying real inode
1600 */
1601static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
1602 bool rcu)
1603{
1604 struct dentry *upperdentry;
1605
1606 /*
1607 * Nothing to do if in rcu or if non-overlayfs
1608 */
1609 if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
1610 return;
1611
1612 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
1613
1614 /*
1615 * If file is on lower then we can't update atime, so no worries about
1616 * stale mtime/ctime.
1617 */
1618 if (upperdentry) {
1619 struct inode *realinode = d_inode(upperdentry);
1620
1621 if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
1622 !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
1623 inode->i_mtime = realinode->i_mtime;
1624 inode->i_ctime = realinode->i_ctime;
1625 }
1626 }
1627}
1628
1629/*
1630 * With relative atime, only update atime if the previous atime is 1599 * With relative atime, only update atime if the previous atime is
1631 * earlier than either the ctime or mtime or if at least a day has 1600 * earlier than either the ctime or mtime or if at least a day has
1632 * passed since the last atime update. 1601 * passed since the last atime update.
1633 */ 1602 */
1634static int relatime_need_update(const struct path *path, struct inode *inode, 1603static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1635 struct timespec now, bool rcu) 1604 struct timespec now)
1636{ 1605{
1637 1606
1638 if (!(path->mnt->mnt_flags & MNT_RELATIME)) 1607 if (!(mnt->mnt_flags & MNT_RELATIME))
1639 return 1; 1608 return 1;
1640
1641 update_ovl_inode_times(path->dentry, inode, rcu);
1642 /* 1609 /*
1643 * Is mtime younger than atime? If yes, update atime: 1610 * Is mtime younger than atime? If yes, update atime:
1644 */ 1611 */
@@ -1709,8 +1676,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
1709 * This function automatically handles read only file systems and media, 1676 * This function automatically handles read only file systems and media,
1710 * as well as the "noatime" flag and inode specific "noatime" markers. 1677 * as well as the "noatime" flag and inode specific "noatime" markers.
1711 */ 1678 */
1712bool __atime_needs_update(const struct path *path, struct inode *inode, 1679bool atime_needs_update(const struct path *path, struct inode *inode)
1713 bool rcu)
1714{ 1680{
1715 struct vfsmount *mnt = path->mnt; 1681 struct vfsmount *mnt = path->mnt;
1716 struct timespec64 now; 1682 struct timespec64 now;
@@ -1736,7 +1702,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
1736 1702
1737 now = current_time(inode); 1703 now = current_time(inode);
1738 1704
1739 if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu)) 1705 if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
1740 return false; 1706 return false;
1741 1707
1742 if (timespec64_equal(&inode->i_atime, &now)) 1708 if (timespec64_equal(&inode->i_atime, &now))
@@ -1751,7 +1717,7 @@ void touch_atime(const struct path *path)
1751 struct inode *inode = d_inode(path->dentry); 1717 struct inode *inode = d_inode(path->dentry);
1752 struct timespec64 now; 1718 struct timespec64 now;
1753 1719
1754 if (!__atime_needs_update(path, inode, false)) 1720 if (!atime_needs_update(path, inode))
1755 return; 1721 return;
1756 1722
1757 if (!sb_start_write_trylock(inode->i_sb)) 1723 if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index 50a28fc71300..d410186bc369 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,10 +82,8 @@ extern void __init mnt_init(void);
82 82
83extern int __mnt_want_write(struct vfsmount *); 83extern int __mnt_want_write(struct vfsmount *);
84extern int __mnt_want_write_file(struct file *); 84extern int __mnt_want_write_file(struct file *);
85extern int mnt_want_write_file_path(struct file *);
86extern void __mnt_drop_write(struct vfsmount *); 85extern void __mnt_drop_write(struct vfsmount *);
87extern void __mnt_drop_write_file(struct file *); 86extern void __mnt_drop_write_file(struct file *);
88extern void mnt_drop_write_file_path(struct file *);
89 87
90/* 88/*
91 * fs_struct.c 89 * fs_struct.c
@@ -96,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
96 * file_table.c 94 * file_table.c
97 */ 95 */
98extern struct file *alloc_empty_file(int, const struct cred *); 96extern struct file *alloc_empty_file(int, const struct cred *);
97extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
99 98
100/* 99/*
101 * super.c 100 * super.c
@@ -136,13 +135,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
136extern void inode_add_lru(struct inode *inode); 135extern void inode_add_lru(struct inode *inode);
137extern int dentry_needs_remove_privs(struct dentry *dentry); 136extern int dentry_needs_remove_privs(struct dentry *dentry);
138 137
139extern bool __atime_needs_update(const struct path *, struct inode *, bool);
140static inline bool atime_needs_update_rcu(const struct path *path,
141 struct inode *inode)
142{
143 return __atime_needs_update(path, inode, true);
144}
145
146/* 138/*
147 * fs-writeback.c 139 * fs-writeback.c
148 */ 140 */
@@ -185,7 +177,6 @@ extern const struct dentry_operations ns_dentry_operations;
185 */ 177 */
186extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, 178extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
187 unsigned long arg); 179 unsigned long arg);
188extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
189 180
190/* 181/*
191 * iomap support: 182 * iomap support:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index b445b13fc59b..3212c29235ce 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
49 out: 49 out:
50 return error; 50 return error;
51} 51}
52EXPORT_SYMBOL(vfs_ioctl);
52 53
53static int ioctl_fibmap(struct file *filp, int __user *p) 54static int ioctl_fibmap(struct file *filp, int __user *p)
54{ 55{
diff --git a/fs/locks.c b/fs/locks.c
index 5086bde5a18e..2ecb4db8c840 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,11 +139,6 @@
139#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) 139#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
140#define IS_REMOTELCK(fl) (fl->fl_pid <= 0) 140#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
141 141
142static inline bool is_remote_lock(struct file *filp)
143{
144 return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
145}
146
147static bool lease_breaking(struct file_lock *fl) 142static bool lease_breaking(struct file_lock *fl)
148{ 143{
149 return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); 144 return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -1651,8 +1646,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
1651 if (flags & FL_LAYOUT) 1646 if (flags & FL_LAYOUT)
1652 return 0; 1647 return 0;
1653 1648
1654 if ((arg == F_RDLCK) && 1649 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1655 (atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
1656 return -EAGAIN; 1650 return -EAGAIN;
1657 1651
1658 if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || 1652 if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@ -1873,7 +1867,7 @@ EXPORT_SYMBOL(generic_setlease);
1873int 1867int
1874vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) 1868vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
1875{ 1869{
1876 if (filp->f_op->setlease && is_remote_lock(filp)) 1870 if (filp->f_op->setlease)
1877 return filp->f_op->setlease(filp, arg, lease, priv); 1871 return filp->f_op->setlease(filp, arg, lease, priv);
1878 else 1872 else
1879 return generic_setlease(filp, arg, lease, priv); 1873 return generic_setlease(filp, arg, lease, priv);
@@ -2020,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2020 if (error) 2014 if (error)
2021 goto out_free; 2015 goto out_free;
2022 2016
2023 if (f.file->f_op->flock && is_remote_lock(f.file)) 2017 if (f.file->f_op->flock)
2024 error = f.file->f_op->flock(f.file, 2018 error = f.file->f_op->flock(f.file,
2025 (can_sleep) ? F_SETLKW : F_SETLK, 2019 (can_sleep) ? F_SETLKW : F_SETLK,
2026 lock); 2020 lock);
@@ -2046,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2046 */ 2040 */
2047int vfs_test_lock(struct file *filp, struct file_lock *fl) 2041int vfs_test_lock(struct file *filp, struct file_lock *fl)
2048{ 2042{
2049 if (filp->f_op->lock && is_remote_lock(filp)) 2043 if (filp->f_op->lock)
2050 return filp->f_op->lock(filp, F_GETLK, fl); 2044 return filp->f_op->lock(filp, F_GETLK, fl);
2051 posix_test_lock(filp, fl); 2045 posix_test_lock(filp, fl);
2052 return 0; 2046 return 0;
@@ -2196,7 +2190,7 @@ out:
2196 */ 2190 */
2197int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 2191int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2198{ 2192{
2199 if (filp->f_op->lock && is_remote_lock(filp)) 2193 if (filp->f_op->lock)
2200 return filp->f_op->lock(filp, cmd, fl); 2194 return filp->f_op->lock(filp, cmd, fl);
2201 else 2195 else
2202 return posix_lock_file(filp, fl, conf); 2196 return posix_lock_file(filp, fl, conf);
@@ -2518,7 +2512,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2518 if (list_empty(&flctx->flc_flock)) 2512 if (list_empty(&flctx->flc_flock))
2519 return; 2513 return;
2520 2514
2521 if (filp->f_op->flock && is_remote_lock(filp)) 2515 if (filp->f_op->flock)
2522 filp->f_op->flock(filp, F_SETLKW, &fl); 2516 filp->f_op->flock(filp, F_SETLKW, &fl);
2523 else 2517 else
2524 flock_lock_inode(inode, &fl); 2518 flock_lock_inode(inode, &fl);
@@ -2605,7 +2599,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
2605 */ 2599 */
2606int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 2600int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2607{ 2601{
2608 if (filp->f_op->lock && is_remote_lock(filp)) 2602 if (filp->f_op->lock)
2609 return filp->f_op->lock(filp, F_CANCELLK, fl); 2603 return filp->f_op->lock(filp, F_CANCELLK, fl);
2610 return 0; 2604 return 0;
2611} 2605}
diff --git a/fs/namei.c b/fs/namei.c
index 3cd396277cd3..ae6aa9ae757c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
1015 if (!(nd->flags & LOOKUP_RCU)) { 1015 if (!(nd->flags & LOOKUP_RCU)) {
1016 touch_atime(&last->link); 1016 touch_atime(&last->link);
1017 cond_resched(); 1017 cond_resched();
1018 } else if (atime_needs_update_rcu(&last->link, inode)) { 1018 } else if (atime_needs_update(&last->link, inode)) {
1019 if (unlikely(unlazy_walk(nd))) 1019 if (unlikely(unlazy_walk(nd)))
1020 return ERR_PTR(-ECHILD); 1020 return ERR_PTR(-ECHILD);
1021 touch_atime(&last->link); 1021 touch_atime(&last->link);
diff --git a/fs/namespace.c b/fs/namespace.c
index bd2f4c68506a..725d6935fab9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -431,74 +431,20 @@ int __mnt_want_write_file(struct file *file)
431} 431}
432 432
433/** 433/**
434 * mnt_want_write_file_path - get write access to a file's mount
435 * @file: the file who's mount on which to take a write
436 *
437 * This is like mnt_want_write, but it takes a file and can
438 * do some optimisations if the file is open for write already
439 *
440 * Called by the vfs for cases when we have an open file at hand, but will do an
441 * inode operation on it (important distinction for files opened on overlayfs,
442 * since the file operations will come from the real underlying file, while
443 * inode operations come from the overlay).
444 */
445int mnt_want_write_file_path(struct file *file)
446{
447 int ret;
448
449 sb_start_write(file->f_path.mnt->mnt_sb);
450 ret = __mnt_want_write_file(file);
451 if (ret)
452 sb_end_write(file->f_path.mnt->mnt_sb);
453 return ret;
454}
455
456static inline int may_write_real(struct file *file)
457{
458 struct dentry *dentry = file->f_path.dentry;
459 struct dentry *upperdentry;
460
461 /* Writable file? */
462 if (file->f_mode & FMODE_WRITER)
463 return 0;
464
465 /* Not overlayfs? */
466 if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
467 return 0;
468
469 /* File refers to upper, writable layer? */
470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
471 if (upperdentry &&
472 (file_inode(file) == d_inode(upperdentry) ||
473 file_inode(file) == d_inode(dentry)))
474 return 0;
475
476 /* Lower layer: can't write to real file, sorry... */
477 return -EPERM;
478}
479
480/**
481 * mnt_want_write_file - get write access to a file's mount 434 * mnt_want_write_file - get write access to a file's mount
482 * @file: the file who's mount on which to take a write 435 * @file: the file who's mount on which to take a write
483 * 436 *
484 * This is like mnt_want_write, but it takes a file and can 437 * This is like mnt_want_write, but it takes a file and can
485 * do some optimisations if the file is open for write already 438 * do some optimisations if the file is open for write already
486 *
487 * Mostly called by filesystems from their ioctl operation before performing
488 * modification. On overlayfs this needs to check if the file is on a read-only
489 * lower layer and deny access in that case.
490 */ 439 */
491int mnt_want_write_file(struct file *file) 440int mnt_want_write_file(struct file *file)
492{ 441{
493 int ret; 442 int ret;
494 443
495 ret = may_write_real(file); 444 sb_start_write(file_inode(file)->i_sb);
496 if (!ret) { 445 ret = __mnt_want_write_file(file);
497 sb_start_write(file_inode(file)->i_sb); 446 if (ret)
498 ret = __mnt_want_write_file(file); 447 sb_end_write(file_inode(file)->i_sb);
499 if (ret)
500 sb_end_write(file_inode(file)->i_sb);
501 }
502 return ret; 448 return ret;
503} 449}
504EXPORT_SYMBOL_GPL(mnt_want_write_file); 450EXPORT_SYMBOL_GPL(mnt_want_write_file);
@@ -538,14 +484,9 @@ void __mnt_drop_write_file(struct file *file)
538 __mnt_drop_write(file->f_path.mnt); 484 __mnt_drop_write(file->f_path.mnt);
539} 485}
540 486
541void mnt_drop_write_file_path(struct file *file)
542{
543 mnt_drop_write(file->f_path.mnt);
544}
545
546void mnt_drop_write_file(struct file *file) 487void mnt_drop_write_file(struct file *file)
547{ 488{
548 __mnt_drop_write(file->f_path.mnt); 489 __mnt_drop_write_file(file);
549 sb_end_write(file_inode(file)->i_sb); 490 sb_end_write(file_inode(file)->i_sb);
550} 491}
551EXPORT_SYMBOL(mnt_drop_write_file); 492EXPORT_SYMBOL(mnt_drop_write_file);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 255f758af03a..9fa35cb6f6e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
2537 len, false); 2537 len, false);
2538} 2538}
2539 2539
2540static ssize_t ocfs2_file_dedupe_range(struct file *src_file, 2540static int ocfs2_file_dedupe_range(struct file *file_in,
2541 u64 loff, 2541 loff_t pos_in,
2542 u64 len, 2542 struct file *file_out,
2543 struct file *dst_file, 2543 loff_t pos_out,
2544 u64 dst_loff) 2544 u64 len)
2545{ 2545{
2546 int error; 2546 return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
2547
2548 error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
2549 len, true); 2547 len, true);
2550 if (error)
2551 return error;
2552 return len;
2553} 2548}
2554 2549
2555const struct inode_operations ocfs2_file_iops = { 2550const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/open.c b/fs/open.c
index d98e19239bb7..0285ce7dbd51 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
68long vfs_truncate(const struct path *path, loff_t length) 68long vfs_truncate(const struct path *path, loff_t length)
69{ 69{
70 struct inode *inode; 70 struct inode *inode;
71 struct dentry *upperdentry;
72 long error; 71 long error;
73 72
74 inode = path->dentry->d_inode; 73 inode = path->dentry->d_inode;
@@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length)
91 if (IS_APPEND(inode)) 90 if (IS_APPEND(inode))
92 goto mnt_drop_write_and_out; 91 goto mnt_drop_write_and_out;
93 92
94 /* 93 error = get_write_access(inode);
95 * If this is an overlayfs then do as if opening the file so we get
96 * write access on the upper inode, not on the overlay inode. For
97 * non-overlay filesystems d_real() is an identity function.
98 */
99 upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
100 error = PTR_ERR(upperdentry);
101 if (IS_ERR(upperdentry))
102 goto mnt_drop_write_and_out;
103
104 error = get_write_access(upperdentry->d_inode);
105 if (error) 94 if (error)
106 goto mnt_drop_write_and_out; 95 goto mnt_drop_write_and_out;
107 96
@@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
120 error = do_truncate(path->dentry, length, 0, NULL); 109 error = do_truncate(path->dentry, length, 0, NULL);
121 110
122put_write_and_out: 111put_write_and_out:
123 put_write_access(upperdentry->d_inode); 112 put_write_access(inode);
124mnt_drop_write_and_out: 113mnt_drop_write_and_out:
125 mnt_drop_write(path->mnt); 114 mnt_drop_write(path->mnt);
126out: 115out:
@@ -707,12 +696,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
707 if (!f.file) 696 if (!f.file)
708 goto out; 697 goto out;
709 698
710 error = mnt_want_write_file_path(f.file); 699 error = mnt_want_write_file(f.file);
711 if (error) 700 if (error)
712 goto out_fput; 701 goto out_fput;
713 audit_file(f.file); 702 audit_file(f.file);
714 error = chown_common(&f.file->f_path, user, group); 703 error = chown_common(&f.file->f_path, user, group);
715 mnt_drop_write_file_path(f.file); 704 mnt_drop_write_file(f.file);
716out_fput: 705out_fput:
717 fdput(f); 706 fdput(f);
718out: 707out:
@@ -887,13 +876,8 @@ EXPORT_SYMBOL(file_path);
887 */ 876 */
888int vfs_open(const struct path *path, struct file *file) 877int vfs_open(const struct path *path, struct file *file)
889{ 878{
890 struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
891
892 if (IS_ERR(dentry))
893 return PTR_ERR(dentry);
894
895 file->f_path = *path; 879 file->f_path = *path;
896 return do_dentry_open(file, d_backing_inode(dentry), NULL); 880 return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
897} 881}
898 882
899struct file *dentry_open(const struct path *path, int flags, 883struct file *dentry_open(const struct path *path, int flags,
@@ -919,6 +903,24 @@ struct file *dentry_open(const struct path *path, int flags,
919} 903}
920EXPORT_SYMBOL(dentry_open); 904EXPORT_SYMBOL(dentry_open);
921 905
906struct file *open_with_fake_path(const struct path *path, int flags,
907 struct inode *inode, const struct cred *cred)
908{
909 struct file *f = alloc_empty_file_noaccount(flags, cred);
910 if (!IS_ERR(f)) {
911 int error;
912
913 f->f_path = *path;
914 error = do_dentry_open(f, inode, NULL);
915 if (error) {
916 fput(f);
917 f = ERR_PTR(error);
918 }
919 }
920 return f;
921}
922EXPORT_SYMBOL(open_with_fake_path);
923
922static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 924static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
923{ 925{
924 int lookup_flags = 0; 926 int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 9384164253ac..2ef91be2a04e 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT
64 bool "Overlayfs: turn on NFS export feature by default" 64 bool "Overlayfs: turn on NFS export feature by default"
65 depends on OVERLAY_FS 65 depends on OVERLAY_FS
66 depends on OVERLAY_FS_INDEX 66 depends on OVERLAY_FS_INDEX
67 depends on !OVERLAY_FS_METACOPY
67 help 68 help
68 If this config option is enabled then overlay filesystems will use 69 If this config option is enabled then overlay filesystems will use
69 the index directory to decode overlay NFS file handles by default. 70 the index directory to decode overlay NFS file handles by default.
@@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO
103 For more information, see Documentation/filesystems/overlayfs.txt 104 For more information, see Documentation/filesystems/overlayfs.txt
104 105
105 If unsure, say N. 106 If unsure, say N.
107
108config OVERLAY_FS_METACOPY
109 bool "Overlayfs: turn on metadata only copy up feature by default"
110 depends on OVERLAY_FS
111 select OVERLAY_FS_REDIRECT_DIR
112 help
113 If this config option is enabled then overlay filesystems will
114 copy up only metadata where appropriate and data copy up will
115 happen when a file is opened for WRITE operation. It is still
116 possible to turn off this feature globally with the "metacopy=off"
117 module option or on a filesystem instance basis with the
118 "metacopy=off" mount option.
119
120 Note, that this feature is not backward compatible. That is,
121 mounting an overlay which has metacopy only inodes on a kernel
122 that doesn't support this feature will have unexpected results.
123
124 If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 30802347a020..46e1ff8ac056 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,5 +4,5 @@
4 4
5obj-$(CONFIG_OVERLAY_FS) += overlay.o 5obj-$(CONFIG_OVERLAY_FS) += overlay.o
6 6
7overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \ 7overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
8 export.o 8 copy_up.o export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ddaddb4ce4c3..296037afecdb 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -25,35 +25,20 @@
25 25
26#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 26#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
27 27
28static bool __read_mostly ovl_check_copy_up; 28static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
29module_param_named(check_copy_up, ovl_check_copy_up, bool,
30 S_IWUSR | S_IRUGO);
31MODULE_PARM_DESC(ovl_check_copy_up,
32 "Warn on copy-up when causing process also has a R/O fd open");
33
34static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
35{ 29{
36 const struct dentry *dentry = data; 30 pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
37
38 if (file_inode(f) == d_inode(dentry))
39 pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
40 f, fd, current->pid, current->comm);
41 return 0; 31 return 0;
42} 32}
43 33
44/* 34static int ovl_ccup_get(char *buf, const struct kernel_param *param)
45 * Check the fds open by this process and warn if something like the following
46 * scenario is about to occur:
47 *
48 * fd1 = open("foo", O_RDONLY);
49 * fd2 = open("foo", O_RDWR);
50 */
51static void ovl_do_check_copy_up(struct dentry *dentry)
52{ 35{
53 if (ovl_check_copy_up) 36 return sprintf(buf, "N\n");
54 iterate_fd(current->files, 0, ovl_check_fd, dentry);
55} 37}
56 38
39module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
40MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
41
57int ovl_copy_xattr(struct dentry *old, struct dentry *new) 42int ovl_copy_xattr(struct dentry *old, struct dentry *new)
58{ 43{
59 ssize_t list_size, size, value_size = 0; 44 ssize_t list_size, size, value_size = 0;
@@ -195,6 +180,16 @@ out_fput:
195 return error; 180 return error;
196} 181}
197 182
183static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
184{
185 struct iattr attr = {
186 .ia_valid = ATTR_SIZE,
187 .ia_size = stat->size,
188 };
189
190 return notify_change(upperdentry, &attr, NULL);
191}
192
198static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 193static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
199{ 194{
200 struct iattr attr = { 195 struct iattr attr = {
@@ -403,6 +398,7 @@ struct ovl_copy_up_ctx {
403 bool tmpfile; 398 bool tmpfile;
404 bool origin; 399 bool origin;
405 bool indexed; 400 bool indexed;
401 bool metacopy;
406}; 402};
407 403
408static int ovl_link_up(struct ovl_copy_up_ctx *c) 404static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -505,28 +501,10 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
505{ 501{
506 int err; 502 int err;
507 503
508 if (S_ISREG(c->stat.mode)) {
509 struct path upperpath;
510
511 ovl_path_upper(c->dentry, &upperpath);
512 BUG_ON(upperpath.dentry != NULL);
513 upperpath.dentry = temp;
514
515 err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
516 if (err)
517 return err;
518 }
519
520 err = ovl_copy_xattr(c->lowerpath.dentry, temp); 504 err = ovl_copy_xattr(c->lowerpath.dentry, temp);
521 if (err) 505 if (err)
522 return err; 506 return err;
523 507
524 inode_lock(temp->d_inode);
525 err = ovl_set_attr(temp, &c->stat);
526 inode_unlock(temp->d_inode);
527 if (err)
528 return err;
529
530 /* 508 /*
531 * Store identifier of lower inode in upper inode xattr to 509 * Store identifier of lower inode in upper inode xattr to
532 * allow lookup of the copy up origin inode. 510 * allow lookup of the copy up origin inode.
@@ -540,7 +518,34 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
540 return err; 518 return err;
541 } 519 }
542 520
543 return 0; 521 if (S_ISREG(c->stat.mode) && !c->metacopy) {
522 struct path upperpath, datapath;
523
524 ovl_path_upper(c->dentry, &upperpath);
525 BUG_ON(upperpath.dentry != NULL);
526 upperpath.dentry = temp;
527
528 ovl_path_lowerdata(c->dentry, &datapath);
529 err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
530 if (err)
531 return err;
532 }
533
534 if (c->metacopy) {
535 err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
536 NULL, 0, -EOPNOTSUPP);
537 if (err)
538 return err;
539 }
540
541 inode_lock(temp->d_inode);
542 if (c->metacopy)
543 err = ovl_set_size(temp, &c->stat);
544 if (!err)
545 err = ovl_set_attr(temp, &c->stat);
546 inode_unlock(temp->d_inode);
547
548 return err;
544} 549}
545 550
546static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) 551static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
@@ -575,6 +580,8 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
575 if (err) 580 if (err)
576 goto out; 581 goto out;
577 582
583 if (!c->metacopy)
584 ovl_set_upperdata(d_inode(c->dentry));
578 inode = d_inode(c->dentry); 585 inode = d_inode(c->dentry);
579 ovl_inode_update(inode, newdentry); 586 ovl_inode_update(inode, newdentry);
580 if (S_ISDIR(inode->i_mode)) 587 if (S_ISDIR(inode->i_mode))
@@ -677,6 +684,49 @@ out:
677 return err; 684 return err;
678} 685}
679 686
687static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
688 int flags)
689{
690 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
691
692 if (!ofs->config.metacopy)
693 return false;
694
695 if (!S_ISREG(mode))
696 return false;
697
698 if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
699 return false;
700
701 return true;
702}
703
704/* Copy up data of an inode which was copied up metadata only in the past. */
705static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
706{
707 struct path upperpath, datapath;
708 int err;
709
710 ovl_path_upper(c->dentry, &upperpath);
711 if (WARN_ON(upperpath.dentry == NULL))
712 return -EIO;
713
714 ovl_path_lowerdata(c->dentry, &datapath);
715 if (WARN_ON(datapath.dentry == NULL))
716 return -EIO;
717
718 err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
719 if (err)
720 return err;
721
722 err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
723 if (err)
724 return err;
725
726 ovl_set_upperdata(d_inode(c->dentry));
727 return err;
728}
729
680static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 730static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
681 int flags) 731 int flags)
682{ 732{
@@ -698,6 +748,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
698 if (err) 748 if (err)
699 return err; 749 return err;
700 750
751 ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
752
701 if (parent) { 753 if (parent) {
702 ovl_path_upper(parent, &parentpath); 754 ovl_path_upper(parent, &parentpath);
703 ctx.destdir = parentpath.dentry; 755 ctx.destdir = parentpath.dentry;
@@ -719,9 +771,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
719 if (IS_ERR(ctx.link)) 771 if (IS_ERR(ctx.link))
720 return PTR_ERR(ctx.link); 772 return PTR_ERR(ctx.link);
721 } 773 }
722 ovl_do_check_copy_up(ctx.lowerpath.dentry);
723 774
724 err = ovl_copy_up_start(dentry); 775 err = ovl_copy_up_start(dentry, flags);
725 /* err < 0: interrupted, err > 0: raced with another copy-up */ 776 /* err < 0: interrupted, err > 0: raced with another copy-up */
726 if (unlikely(err)) { 777 if (unlikely(err)) {
727 if (err > 0) 778 if (err > 0)
@@ -731,6 +782,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
731 err = ovl_do_copy_up(&ctx); 782 err = ovl_do_copy_up(&ctx);
732 if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) 783 if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
733 err = ovl_link_up(&ctx); 784 err = ovl_link_up(&ctx);
785 if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
786 err = ovl_copy_up_meta_inode_data(&ctx);
734 ovl_copy_up_end(dentry); 787 ovl_copy_up_end(dentry);
735 } 788 }
736 do_delayed_call(&done); 789 do_delayed_call(&done);
@@ -756,21 +809,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
756 struct dentry *next; 809 struct dentry *next;
757 struct dentry *parent = NULL; 810 struct dentry *parent = NULL;
758 811
759 /* 812 if (ovl_already_copied_up(dentry, flags))
760 * Check if copy-up has happened as well as for upper alias (in
761 * case of hard links) is there.
762 *
763 * Both checks are lockless:
764 * - false negatives: will recheck under oi->lock
765 * - false positives:
766 * + ovl_dentry_upper() uses memory barriers to ensure the
767 * upper dentry is up-to-date
768 * + ovl_dentry_has_upper_alias() relies on locking of
769 * upper parent i_rwsem to prevent reordering copy-up
770 * with rename.
771 */
772 if (ovl_dentry_upper(dentry) &&
773 (ovl_dentry_has_upper_alias(dentry) || disconnected))
774 break; 813 break;
775 814
776 next = dget(dentry); 815 next = dget(dentry);
@@ -795,6 +834,41 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
795 return err; 834 return err;
796} 835}
797 836
837static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
838{
839 /* Copy up of disconnected dentry does not set upper alias */
840 if (ovl_already_copied_up(dentry, flags))
841 return false;
842
843 if (special_file(d_inode(dentry)->i_mode))
844 return false;
845
846 if (!ovl_open_flags_need_copy_up(flags))
847 return false;
848
849 return true;
850}
851
852int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
853{
854 int err = 0;
855
856 if (ovl_open_need_copy_up(dentry, file_flags)) {
857 err = ovl_want_write(dentry);
858 if (!err) {
859 err = ovl_copy_up_flags(dentry, file_flags);
860 ovl_drop_write(dentry);
861 }
862 }
863
864 return err;
865}
866
867int ovl_copy_up_with_data(struct dentry *dentry)
868{
869 return ovl_copy_up_flags(dentry, O_WRONLY);
870}
871
798int ovl_copy_up(struct dentry *dentry) 872int ovl_copy_up(struct dentry *dentry)
799{ 873{
800 return ovl_copy_up_flags(dentry, 0); 874 return ovl_copy_up_flags(dentry, 0);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f480b1a2cd2e..ec350d4d921c 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,6 +24,8 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24MODULE_PARM_DESC(ovl_redirect_max, 24MODULE_PARM_DESC(ovl_redirect_max,
25 "Maximum length of absolute redirect xattr value"); 25 "Maximum length of absolute redirect xattr value");
26 26
27static int ovl_set_redirect(struct dentry *dentry, bool samedir);
28
27int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) 29int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28{ 30{
29 int err; 31 int err;
@@ -242,7 +244,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
242 .newinode = inode, 244 .newinode = inode,
243 }; 245 };
244 246
245 ovl_dentry_version_inc(dentry->d_parent, false); 247 ovl_dir_modified(dentry->d_parent, false);
246 ovl_dentry_set_upper_alias(dentry); 248 ovl_dentry_set_upper_alias(dentry);
247 if (!hardlink) { 249 if (!hardlink) {
248 /* 250 /*
@@ -657,6 +659,12 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
657 if (err) 659 if (err)
658 goto out_drop_write; 660 goto out_drop_write;
659 661
662 if (ovl_is_metacopy_dentry(old)) {
663 err = ovl_set_redirect(old, false);
664 if (err)
665 goto out_drop_write;
666 }
667
660 err = ovl_nlink_start(old, &locked); 668 err = ovl_nlink_start(old, &locked);
661 if (err) 669 if (err)
662 goto out_drop_write; 670 goto out_drop_write;
@@ -722,7 +730,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
722 if (err) 730 if (err)
723 goto out_d_drop; 731 goto out_d_drop;
724 732
725 ovl_dentry_version_inc(dentry->d_parent, true); 733 ovl_dir_modified(dentry->d_parent, true);
726out_d_drop: 734out_d_drop:
727 d_drop(dentry); 735 d_drop(dentry);
728out_dput_upper: 736out_dput_upper:
@@ -767,7 +775,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
767 err = vfs_rmdir(dir, upper); 775 err = vfs_rmdir(dir, upper);
768 else 776 else
769 err = vfs_unlink(dir, upper, NULL); 777 err = vfs_unlink(dir, upper, NULL);
770 ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry)); 778 ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
771 779
772 /* 780 /*
773 * Keeping this dentry hashed would mean having to release 781 * Keeping this dentry hashed would mean having to release
@@ -797,6 +805,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
797 int err; 805 int err;
798 bool locked = false; 806 bool locked = false;
799 const struct cred *old_cred; 807 const struct cred *old_cred;
808 struct dentry *upperdentry;
800 bool lower_positive = ovl_lower_positive(dentry); 809 bool lower_positive = ovl_lower_positive(dentry);
801 LIST_HEAD(list); 810 LIST_HEAD(list);
802 811
@@ -832,6 +841,17 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
832 drop_nlink(dentry->d_inode); 841 drop_nlink(dentry->d_inode);
833 } 842 }
834 ovl_nlink_end(dentry, locked); 843 ovl_nlink_end(dentry, locked);
844
845 /*
846 * Copy ctime
847 *
848 * Note: we fail to update ctime if there was no copy-up, only a
849 * whiteout
850 */
851 upperdentry = ovl_dentry_upper(dentry);
852 if (upperdentry)
853 ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
854
835out_drop_write: 855out_drop_write:
836 ovl_drop_write(dentry); 856 ovl_drop_write(dentry);
837out: 857out:
@@ -862,13 +882,13 @@ static bool ovl_can_move(struct dentry *dentry)
862 !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry); 882 !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
863} 883}
864 884
865static char *ovl_get_redirect(struct dentry *dentry, bool samedir) 885static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
866{ 886{
867 char *buf, *ret; 887 char *buf, *ret;
868 struct dentry *d, *tmp; 888 struct dentry *d, *tmp;
869 int buflen = ovl_redirect_max + 1; 889 int buflen = ovl_redirect_max + 1;
870 890
871 if (samedir) { 891 if (!abs_redirect) {
872 ret = kstrndup(dentry->d_name.name, dentry->d_name.len, 892 ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
873 GFP_KERNEL); 893 GFP_KERNEL);
874 goto out; 894 goto out;
@@ -922,15 +942,43 @@ out:
922 return ret ? ret : ERR_PTR(-ENOMEM); 942 return ret ? ret : ERR_PTR(-ENOMEM);
923} 943}
924 944
945static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
946{
947 struct dentry *lowerdentry;
948
949 if (!samedir)
950 return true;
951
952 if (d_is_dir(dentry))
953 return false;
954
955 /*
956 * For non-dir hardlinked files, we need absolute redirects
957 * in general as two upper hardlinks could be in different
958 * dirs. We could put a relative redirect now and convert
959 * it to absolute redirect later. But when nlink > 1 and
960 * indexing is on, that means relative redirect needs to be
961 * converted to absolute during copy up of another lower
962 * hardllink as well.
963 *
964 * So without optimizing too much, just check if lower is
965 * a hard link or not. If lower is hard link, put absolute
966 * redirect.
967 */
968 lowerdentry = ovl_dentry_lower(dentry);
969 return (d_inode(lowerdentry)->i_nlink > 1);
970}
971
925static int ovl_set_redirect(struct dentry *dentry, bool samedir) 972static int ovl_set_redirect(struct dentry *dentry, bool samedir)
926{ 973{
927 int err; 974 int err;
928 const char *redirect = ovl_dentry_get_redirect(dentry); 975 const char *redirect = ovl_dentry_get_redirect(dentry);
976 bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
929 977
930 if (redirect && (samedir || redirect[0] == '/')) 978 if (redirect && (!absolute_redirect || redirect[0] == '/'))
931 return 0; 979 return 0;
932 980
933 redirect = ovl_get_redirect(dentry, samedir); 981 redirect = ovl_get_redirect(dentry, absolute_redirect);
934 if (IS_ERR(redirect)) 982 if (IS_ERR(redirect))
935 return PTR_ERR(redirect); 983 return PTR_ERR(redirect);
936 984
@@ -1106,22 +1154,20 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
1106 goto out_dput; 1154 goto out_dput;
1107 1155
1108 err = 0; 1156 err = 0;
1109 if (is_dir) { 1157 if (ovl_type_merge_or_lower(old))
1110 if (ovl_type_merge_or_lower(old)) 1158 err = ovl_set_redirect(old, samedir);
1111 err = ovl_set_redirect(old, samedir); 1159 else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
1112 else if (!old_opaque && ovl_type_merge(new->d_parent)) 1160 err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1113 err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); 1161 if (err)
1114 if (err) 1162 goto out_dput;
1115 goto out_dput; 1163
1116 } 1164 if (!overwrite && ovl_type_merge_or_lower(new))
1117 if (!overwrite && new_is_dir) { 1165 err = ovl_set_redirect(new, samedir);
1118 if (ovl_type_merge_or_lower(new)) 1166 else if (!overwrite && new_is_dir && !new_opaque &&
1119 err = ovl_set_redirect(new, samedir); 1167 ovl_type_merge(old->d_parent))
1120 else if (!new_opaque && ovl_type_merge(old->d_parent)) 1168 err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1121 err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); 1169 if (err)
1122 if (err) 1170 goto out_dput;
1123 goto out_dput;
1124 }
1125 1171
1126 err = ovl_do_rename(old_upperdir->d_inode, olddentry, 1172 err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1127 new_upperdir->d_inode, newdentry, flags); 1173 new_upperdir->d_inode, newdentry, flags);
@@ -1138,10 +1184,15 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
1138 drop_nlink(d_inode(new)); 1184 drop_nlink(d_inode(new));
1139 } 1185 }
1140 1186
1141 ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) || 1187 ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
1142 (!overwrite && ovl_type_origin(new))); 1188 (!overwrite && ovl_type_origin(new)));
1143 ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) || 1189 ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
1144 (d_inode(new) && ovl_type_origin(new))); 1190 (d_inode(new) && ovl_type_origin(new)));
1191
1192 /* copy ctime: */
1193 ovl_copyattr(d_inode(olddentry), d_inode(old));
1194 if (d_inode(new) && ovl_dentry_upper(new))
1195 ovl_copyattr(d_inode(newdentry), d_inode(new));
1145 1196
1146out_dput: 1197out_dput:
1147 dput(newdentry); 1198 dput(newdentry);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 9941ece61a14..8fa37cd7818a 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -317,6 +317,9 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
317 return ERR_CAST(inode); 317 return ERR_CAST(inode);
318 } 318 }
319 319
320 if (upper)
321 ovl_set_flag(OVL_UPPERDATA, inode);
322
320 dentry = d_find_any_alias(inode); 323 dentry = d_find_any_alias(inode);
321 if (!dentry) { 324 if (!dentry) {
322 dentry = d_alloc_anon(inode->i_sb); 325 dentry = d_alloc_anon(inode->i_sb);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000000..32e9282893c9
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,511 @@
1/*
2 * Copyright (C) 2017 Red Hat, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 */
8
9#include <linux/cred.h>
10#include <linux/file.h>
11#include <linux/mount.h>
12#include <linux/xattr.h>
13#include <linux/uio.h>
14#include "overlayfs.h"
15
16static char ovl_whatisit(struct inode *inode, struct inode *realinode)
17{
18 if (realinode != ovl_inode_upper(inode))
19 return 'l';
20 if (ovl_has_upperdata(inode))
21 return 'u';
22 else
23 return 'm';
24}
25
26static struct file *ovl_open_realfile(const struct file *file,
27 struct inode *realinode)
28{
29 struct inode *inode = file_inode(file);
30 struct file *realfile;
31 const struct cred *old_cred;
32
33 old_cred = ovl_override_creds(inode->i_sb);
34 realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
35 realinode, current_cred());
36 revert_creds(old_cred);
37
38 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
39 file, file, ovl_whatisit(inode, realinode), file->f_flags,
40 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
41
42 return realfile;
43}
44
45#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
46
47static int ovl_change_flags(struct file *file, unsigned int flags)
48{
49 struct inode *inode = file_inode(file);
50 int err;
51
52 /* No atime modificaton on underlying */
53 flags |= O_NOATIME;
54
55 /* If some flag changed that cannot be changed then something's amiss */
56 if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
57 return -EIO;
58
59 flags &= OVL_SETFL_MASK;
60
61 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
62 return -EPERM;
63
64 if (flags & O_DIRECT) {
65 if (!file->f_mapping->a_ops ||
66 !file->f_mapping->a_ops->direct_IO)
67 return -EINVAL;
68 }
69
70 if (file->f_op->check_flags) {
71 err = file->f_op->check_flags(flags);
72 if (err)
73 return err;
74 }
75
76 spin_lock(&file->f_lock);
77 file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
78 spin_unlock(&file->f_lock);
79
80 return 0;
81}
82
83static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
84 bool allow_meta)
85{
86 struct inode *inode = file_inode(file);
87 struct inode *realinode;
88
89 real->flags = 0;
90 real->file = file->private_data;
91
92 if (allow_meta)
93 realinode = ovl_inode_real(inode);
94 else
95 realinode = ovl_inode_realdata(inode);
96
97 /* Has it been copied up since we'd opened it? */
98 if (unlikely(file_inode(real->file) != realinode)) {
99 real->flags = FDPUT_FPUT;
100 real->file = ovl_open_realfile(file, realinode);
101
102 return PTR_ERR_OR_ZERO(real->file);
103 }
104
105 /* Did the flags change since open? */
106 if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
107 return ovl_change_flags(real->file, file->f_flags);
108
109 return 0;
110}
111
112static int ovl_real_fdget(const struct file *file, struct fd *real)
113{
114 return ovl_real_fdget_meta(file, real, false);
115}
116
117static int ovl_open(struct inode *inode, struct file *file)
118{
119 struct dentry *dentry = file_dentry(file);
120 struct file *realfile;
121 int err;
122
123 err = ovl_open_maybe_copy_up(dentry, file->f_flags);
124 if (err)
125 return err;
126
127 /* No longer need these flags, so don't pass them on to underlying fs */
128 file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
129
130 realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
131 if (IS_ERR(realfile))
132 return PTR_ERR(realfile);
133
134 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
135 file->f_mapping = realfile->f_mapping;
136
137 file->private_data = realfile;
138
139 return 0;
140}
141
142static int ovl_release(struct inode *inode, struct file *file)
143{
144 fput(file->private_data);
145
146 return 0;
147}
148
149static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
150{
151 struct inode *realinode = ovl_inode_real(file_inode(file));
152
153 return generic_file_llseek_size(file, offset, whence,
154 realinode->i_sb->s_maxbytes,
155 i_size_read(realinode));
156}
157
158static void ovl_file_accessed(struct file *file)
159{
160 struct inode *inode, *upperinode;
161
162 if (file->f_flags & O_NOATIME)
163 return;
164
165 inode = file_inode(file);
166 upperinode = ovl_inode_upper(inode);
167
168 if (!upperinode)
169 return;
170
171 if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
172 !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
173 inode->i_mtime = upperinode->i_mtime;
174 inode->i_ctime = upperinode->i_ctime;
175 }
176
177 touch_atime(&file->f_path);
178}
179
180static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
181{
182 int ifl = iocb->ki_flags;
183 rwf_t flags = 0;
184
185 if (ifl & IOCB_NOWAIT)
186 flags |= RWF_NOWAIT;
187 if (ifl & IOCB_HIPRI)
188 flags |= RWF_HIPRI;
189 if (ifl & IOCB_DSYNC)
190 flags |= RWF_DSYNC;
191 if (ifl & IOCB_SYNC)
192 flags |= RWF_SYNC;
193
194 return flags;
195}
196
197static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
198{
199 struct file *file = iocb->ki_filp;
200 struct fd real;
201 const struct cred *old_cred;
202 ssize_t ret;
203
204 if (!iov_iter_count(iter))
205 return 0;
206
207 ret = ovl_real_fdget(file, &real);
208 if (ret)
209 return ret;
210
211 old_cred = ovl_override_creds(file_inode(file)->i_sb);
212 ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
213 ovl_iocb_to_rwf(iocb));
214 revert_creds(old_cred);
215
216 ovl_file_accessed(file);
217
218 fdput(real);
219
220 return ret;
221}
222
223static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
224{
225 struct file *file = iocb->ki_filp;
226 struct inode *inode = file_inode(file);
227 struct fd real;
228 const struct cred *old_cred;
229 ssize_t ret;
230
231 if (!iov_iter_count(iter))
232 return 0;
233
234 inode_lock(inode);
235 /* Update mode */
236 ovl_copyattr(ovl_inode_real(inode), inode);
237 ret = file_remove_privs(file);
238 if (ret)
239 goto out_unlock;
240
241 ret = ovl_real_fdget(file, &real);
242 if (ret)
243 goto out_unlock;
244
245 old_cred = ovl_override_creds(file_inode(file)->i_sb);
246 ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
247 ovl_iocb_to_rwf(iocb));
248 revert_creds(old_cred);
249
250 /* Update size */
251 ovl_copyattr(ovl_inode_real(inode), inode);
252
253 fdput(real);
254
255out_unlock:
256 inode_unlock(inode);
257
258 return ret;
259}
260
261static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
262{
263 struct fd real;
264 const struct cred *old_cred;
265 int ret;
266
267 ret = ovl_real_fdget_meta(file, &real, !datasync);
268 if (ret)
269 return ret;
270
271 /* Don't sync lower file for fear of receiving EROFS error */
272 if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
273 old_cred = ovl_override_creds(file_inode(file)->i_sb);
274 ret = vfs_fsync_range(real.file, start, end, datasync);
275 revert_creds(old_cred);
276 }
277
278 fdput(real);
279
280 return ret;
281}
282
283static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
284{
285 struct file *realfile = file->private_data;
286 const struct cred *old_cred;
287 int ret;
288
289 if (!realfile->f_op->mmap)
290 return -ENODEV;
291
292 if (WARN_ON(file != vma->vm_file))
293 return -EIO;
294
295 vma->vm_file = get_file(realfile);
296
297 old_cred = ovl_override_creds(file_inode(file)->i_sb);
298 ret = call_mmap(vma->vm_file, vma);
299 revert_creds(old_cred);
300
301 if (ret) {
302 /* Drop reference count from new vm_file value */
303 fput(realfile);
304 } else {
305 /* Drop reference count from previous vm_file value */
306 fput(file);
307 }
308
309 ovl_file_accessed(file);
310
311 return ret;
312}
313
314static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
315{
316 struct inode *inode = file_inode(file);
317 struct fd real;
318 const struct cred *old_cred;
319 int ret;
320
321 ret = ovl_real_fdget(file, &real);
322 if (ret)
323 return ret;
324
325 old_cred = ovl_override_creds(file_inode(file)->i_sb);
326 ret = vfs_fallocate(real.file, mode, offset, len);
327 revert_creds(old_cred);
328
329 /* Update size */
330 ovl_copyattr(ovl_inode_real(inode), inode);
331
332 fdput(real);
333
334 return ret;
335}
336
337static long ovl_real_ioctl(struct file *file, unsigned int cmd,
338 unsigned long arg)
339{
340 struct fd real;
341 const struct cred *old_cred;
342 long ret;
343
344 ret = ovl_real_fdget(file, &real);
345 if (ret)
346 return ret;
347
348 old_cred = ovl_override_creds(file_inode(file)->i_sb);
349 ret = vfs_ioctl(real.file, cmd, arg);
350 revert_creds(old_cred);
351
352 fdput(real);
353
354 return ret;
355}
356
357static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
358{
359 long ret;
360 struct inode *inode = file_inode(file);
361
362 switch (cmd) {
363 case FS_IOC_GETFLAGS:
364 ret = ovl_real_ioctl(file, cmd, arg);
365 break;
366
367 case FS_IOC_SETFLAGS:
368 if (!inode_owner_or_capable(inode))
369 return -EACCES;
370
371 ret = mnt_want_write_file(file);
372 if (ret)
373 return ret;
374
375 ret = ovl_copy_up_with_data(file_dentry(file));
376 if (!ret) {
377 ret = ovl_real_ioctl(file, cmd, arg);
378
379 inode_lock(inode);
380 ovl_copyflags(ovl_inode_real(inode), inode);
381 inode_unlock(inode);
382 }
383
384 mnt_drop_write_file(file);
385 break;
386
387 default:
388 ret = -ENOTTY;
389 }
390
391 return ret;
392}
393
394static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
395 unsigned long arg)
396{
397 switch (cmd) {
398 case FS_IOC32_GETFLAGS:
399 cmd = FS_IOC_GETFLAGS;
400 break;
401
402 case FS_IOC32_SETFLAGS:
403 cmd = FS_IOC_SETFLAGS;
404 break;
405
406 default:
407 return -ENOIOCTLCMD;
408 }
409
410 return ovl_ioctl(file, cmd, arg);
411}
412
413enum ovl_copyop {
414 OVL_COPY,
415 OVL_CLONE,
416 OVL_DEDUPE,
417};
418
419static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
420 struct file *file_out, loff_t pos_out,
421 u64 len, unsigned int flags, enum ovl_copyop op)
422{
423 struct inode *inode_out = file_inode(file_out);
424 struct fd real_in, real_out;
425 const struct cred *old_cred;
426 ssize_t ret;
427
428 ret = ovl_real_fdget(file_out, &real_out);
429 if (ret)
430 return ret;
431
432 ret = ovl_real_fdget(file_in, &real_in);
433 if (ret) {
434 fdput(real_out);
435 return ret;
436 }
437
438 old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
439 switch (op) {
440 case OVL_COPY:
441 ret = vfs_copy_file_range(real_in.file, pos_in,
442 real_out.file, pos_out, len, flags);
443 break;
444
445 case OVL_CLONE:
446 ret = vfs_clone_file_range(real_in.file, pos_in,
447 real_out.file, pos_out, len);
448 break;
449
450 case OVL_DEDUPE:
451 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
452 real_out.file, pos_out, len);
453 break;
454 }
455 revert_creds(old_cred);
456
457 /* Update size */
458 ovl_copyattr(ovl_inode_real(inode_out), inode_out);
459
460 fdput(real_in);
461 fdput(real_out);
462
463 return ret;
464}
465
466static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
467 struct file *file_out, loff_t pos_out,
468 size_t len, unsigned int flags)
469{
470 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
471 OVL_COPY);
472}
473
474static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
475 struct file *file_out, loff_t pos_out, u64 len)
476{
477 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
478 OVL_CLONE);
479}
480
481static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
482 struct file *file_out, loff_t pos_out, u64 len)
483{
484 /*
485 * Don't copy up because of a dedupe request, this wouldn't make sense
486 * most of the time (data would be duplicated instead of deduplicated).
487 */
488 if (!ovl_inode_upper(file_inode(file_in)) ||
489 !ovl_inode_upper(file_inode(file_out)))
490 return -EPERM;
491
492 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
493 OVL_DEDUPE);
494}
495
496const struct file_operations ovl_file_operations = {
497 .open = ovl_open,
498 .release = ovl_release,
499 .llseek = ovl_llseek,
500 .read_iter = ovl_read_iter,
501 .write_iter = ovl_write_iter,
502 .fsync = ovl_fsync,
503 .mmap = ovl_mmap,
504 .fallocate = ovl_fallocate,
505 .unlocked_ioctl = ovl_ioctl,
506 .compat_ioctl = ovl_compat_ioctl,
507
508 .copy_file_range = ovl_copy_file_range,
509 .clone_file_range = ovl_clone_file_range,
510 .dedupe_file_range = ovl_dedupe_file_range,
511};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ed16a898caeb..e0bb217c01e2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,18 +19,10 @@
19int ovl_setattr(struct dentry *dentry, struct iattr *attr) 19int ovl_setattr(struct dentry *dentry, struct iattr *attr)
20{ 20{
21 int err; 21 int err;
22 bool full_copy_up = false;
22 struct dentry *upperdentry; 23 struct dentry *upperdentry;
23 const struct cred *old_cred; 24 const struct cred *old_cred;
24 25
25 /*
26 * Check for permissions before trying to copy-up. This is redundant
27 * since it will be rechecked later by ->setattr() on upper dentry. But
28 * without this, copy-up can be triggered by just about anybody.
29 *
30 * We don't initialize inode->size, which just means that
31 * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
32 * check for a swapfile (which this won't be anyway).
33 */
34 err = setattr_prepare(dentry, attr); 26 err = setattr_prepare(dentry, attr);
35 if (err) 27 if (err)
36 return err; 28 return err;
@@ -39,10 +31,33 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
39 if (err) 31 if (err)
40 goto out; 32 goto out;
41 33
42 err = ovl_copy_up(dentry); 34 if (attr->ia_valid & ATTR_SIZE) {
35 struct inode *realinode = d_inode(ovl_dentry_real(dentry));
36
37 err = -ETXTBSY;
38 if (atomic_read(&realinode->i_writecount) < 0)
39 goto out_drop_write;
40
41 /* Truncate should trigger data copy up as well */
42 full_copy_up = true;
43 }
44
45 if (!full_copy_up)
46 err = ovl_copy_up(dentry);
47 else
48 err = ovl_copy_up_with_data(dentry);
43 if (!err) { 49 if (!err) {
50 struct inode *winode = NULL;
51
44 upperdentry = ovl_dentry_upper(dentry); 52 upperdentry = ovl_dentry_upper(dentry);
45 53
54 if (attr->ia_valid & ATTR_SIZE) {
55 winode = d_inode(upperdentry);
56 err = get_write_access(winode);
57 if (err)
58 goto out_drop_write;
59 }
60
46 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 61 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
47 attr->ia_valid &= ~ATTR_MODE; 62 attr->ia_valid &= ~ATTR_MODE;
48 63
@@ -53,7 +68,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
53 if (!err) 68 if (!err)
54 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 69 ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
55 inode_unlock(upperdentry->d_inode); 70 inode_unlock(upperdentry->d_inode);
71
72 if (winode)
73 put_write_access(winode);
56 } 74 }
75out_drop_write:
57 ovl_drop_write(dentry); 76 ovl_drop_write(dentry);
58out: 77out:
59 return err; 78 return err;
@@ -133,6 +152,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
133 bool samefs = ovl_same_sb(dentry->d_sb); 152 bool samefs = ovl_same_sb(dentry->d_sb);
134 struct ovl_layer *lower_layer = NULL; 153 struct ovl_layer *lower_layer = NULL;
135 int err; 154 int err;
155 bool metacopy_blocks = false;
156
157 metacopy_blocks = ovl_is_metacopy_dentry(dentry);
136 158
137 type = ovl_path_real(dentry, &realpath); 159 type = ovl_path_real(dentry, &realpath);
138 old_cred = ovl_override_creds(dentry->d_sb); 160 old_cred = ovl_override_creds(dentry->d_sb);
@@ -154,7 +176,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
154 lower_layer = ovl_layer_lower(dentry); 176 lower_layer = ovl_layer_lower(dentry);
155 } else if (OVL_TYPE_ORIGIN(type)) { 177 } else if (OVL_TYPE_ORIGIN(type)) {
156 struct kstat lowerstat; 178 struct kstat lowerstat;
157 u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); 179 u32 lowermask = STATX_INO | STATX_BLOCKS |
180 (!is_dir ? STATX_NLINK : 0);
158 181
159 ovl_path_lower(dentry, &realpath); 182 ovl_path_lower(dentry, &realpath);
160 err = vfs_getattr(&realpath, &lowerstat, 183 err = vfs_getattr(&realpath, &lowerstat,
@@ -183,6 +206,35 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
183 stat->ino = lowerstat.ino; 206 stat->ino = lowerstat.ino;
184 lower_layer = ovl_layer_lower(dentry); 207 lower_layer = ovl_layer_lower(dentry);
185 } 208 }
209
210 /*
211 * If we are querying a metacopy dentry and lower
212 * dentry is data dentry, then use the blocks we
213 * queried just now. We don't have to do additional
214 * vfs_getattr(). If lower itself is metacopy, then
215 * additional vfs_getattr() is unavoidable.
216 */
217 if (metacopy_blocks &&
218 realpath.dentry == ovl_dentry_lowerdata(dentry)) {
219 stat->blocks = lowerstat.blocks;
220 metacopy_blocks = false;
221 }
222 }
223
224 if (metacopy_blocks) {
225 /*
226 * If lower is not same as lowerdata or if there was
227 * no origin on upper, we can end up here.
228 */
229 struct kstat lowerdatastat;
230 u32 lowermask = STATX_BLOCKS;
231
232 ovl_path_lowerdata(dentry, &realpath);
233 err = vfs_getattr(&realpath, &lowerdatastat,
234 lowermask, flags);
235 if (err)
236 goto out;
237 stat->blocks = lowerdatastat.blocks;
186 } 238 }
187 } 239 }
188 240
@@ -304,6 +356,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
304 } 356 }
305 revert_creds(old_cred); 357 revert_creds(old_cred);
306 358
359 /* copy c/mtime */
360 ovl_copyattr(d_inode(realdentry), inode);
361
307out_drop_write: 362out_drop_write:
308 ovl_drop_write(dentry); 363 ovl_drop_write(dentry);
309out: 364out:
@@ -384,38 +439,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
384 return acl; 439 return acl;
385} 440}
386 441
387static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
388{
389 /* Copy up of disconnected dentry does not set upper alias */
390 if (ovl_dentry_upper(dentry) &&
391 (ovl_dentry_has_upper_alias(dentry) ||
392 (dentry->d_flags & DCACHE_DISCONNECTED)))
393 return false;
394
395 if (special_file(d_inode(dentry)->i_mode))
396 return false;
397
398 if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
399 return false;
400
401 return true;
402}
403
404int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
405{
406 int err = 0;
407
408 if (ovl_open_need_copy_up(dentry, file_flags)) {
409 err = ovl_want_write(dentry);
410 if (!err) {
411 err = ovl_copy_up_flags(dentry, file_flags);
412 ovl_drop_write(dentry);
413 }
414 }
415
416 return err;
417}
418
419int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 442int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
420{ 443{
421 if (flags & S_ATIME) { 444 if (flags & S_ATIME) {
@@ -433,6 +456,23 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
433 return 0; 456 return 0;
434} 457}
435 458
459static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
460 u64 start, u64 len)
461{
462 int err;
463 struct inode *realinode = ovl_inode_real(inode);
464 const struct cred *old_cred;
465
466 if (!realinode->i_op->fiemap)
467 return -EOPNOTSUPP;
468
469 old_cred = ovl_override_creds(inode->i_sb);
470 err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
471 revert_creds(old_cred);
472
473 return err;
474}
475
436static const struct inode_operations ovl_file_inode_operations = { 476static const struct inode_operations ovl_file_inode_operations = {
437 .setattr = ovl_setattr, 477 .setattr = ovl_setattr,
438 .permission = ovl_permission, 478 .permission = ovl_permission,
@@ -440,6 +480,7 @@ static const struct inode_operations ovl_file_inode_operations = {
440 .listxattr = ovl_listxattr, 480 .listxattr = ovl_listxattr,
441 .get_acl = ovl_get_acl, 481 .get_acl = ovl_get_acl,
442 .update_time = ovl_update_time, 482 .update_time = ovl_update_time,
483 .fiemap = ovl_fiemap,
443}; 484};
444 485
445static const struct inode_operations ovl_symlink_inode_operations = { 486static const struct inode_operations ovl_symlink_inode_operations = {
@@ -450,6 +491,15 @@ static const struct inode_operations ovl_symlink_inode_operations = {
450 .update_time = ovl_update_time, 491 .update_time = ovl_update_time,
451}; 492};
452 493
494static const struct inode_operations ovl_special_inode_operations = {
495 .setattr = ovl_setattr,
496 .permission = ovl_permission,
497 .getattr = ovl_getattr,
498 .listxattr = ovl_listxattr,
499 .get_acl = ovl_get_acl,
500 .update_time = ovl_update_time,
501};
502
453/* 503/*
454 * It is possible to stack overlayfs instance on top of another 504 * It is possible to stack overlayfs instance on top of another
455 * overlayfs instance as lower layer. We need to annonate the 505 * overlayfs instance as lower layer. We need to annonate the
@@ -520,6 +570,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
520 switch (mode & S_IFMT) { 570 switch (mode & S_IFMT) {
521 case S_IFREG: 571 case S_IFREG:
522 inode->i_op = &ovl_file_inode_operations; 572 inode->i_op = &ovl_file_inode_operations;
573 inode->i_fop = &ovl_file_operations;
523 break; 574 break;
524 575
525 case S_IFDIR: 576 case S_IFDIR:
@@ -532,7 +583,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
532 break; 583 break;
533 584
534 default: 585 default:
535 inode->i_op = &ovl_file_inode_operations; 586 inode->i_op = &ovl_special_inode_operations;
536 init_special_inode(inode, mode, rdev); 587 init_special_inode(inode, mode, rdev);
537 break; 588 break;
538 } 589 }
@@ -769,8 +820,9 @@ struct inode *ovl_get_inode(struct super_block *sb,
769 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 820 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
770 oip->index); 821 oip->index);
771 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 822 int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
772 bool is_dir; 823 bool is_dir, metacopy = false;
773 unsigned long ino = 0; 824 unsigned long ino = 0;
825 int err = -ENOMEM;
774 826
775 if (!realinode) 827 if (!realinode)
776 realinode = d_inode(lowerdentry); 828 realinode = d_inode(lowerdentry);
@@ -787,7 +839,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
787 839
788 inode = ovl_iget5(sb, oip->newinode, key); 840 inode = ovl_iget5(sb, oip->newinode, key);
789 if (!inode) 841 if (!inode)
790 goto out_nomem; 842 goto out_err;
791 if (!(inode->i_state & I_NEW)) { 843 if (!(inode->i_state & I_NEW)) {
792 /* 844 /*
793 * Verify that the underlying files stored in the inode 845 * Verify that the underlying files stored in the inode
@@ -796,11 +848,12 @@ struct inode *ovl_get_inode(struct super_block *sb,
796 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 848 if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
797 true)) { 849 true)) {
798 iput(inode); 850 iput(inode);
799 inode = ERR_PTR(-ESTALE); 851 err = -ESTALE;
800 goto out; 852 goto out_err;
801 } 853 }
802 854
803 dput(upperdentry); 855 dput(upperdentry);
856 kfree(oip->redirect);
804 goto out; 857 goto out;
805 } 858 }
806 859
@@ -812,11 +865,13 @@ struct inode *ovl_get_inode(struct super_block *sb,
812 } else { 865 } else {
813 /* Lower hardlink that will be broken on copy up */ 866 /* Lower hardlink that will be broken on copy up */
814 inode = new_inode(sb); 867 inode = new_inode(sb);
815 if (!inode) 868 if (!inode) {
816 goto out_nomem; 869 err = -ENOMEM;
870 goto out_err;
871 }
817 } 872 }
818 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 873 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
819 ovl_inode_init(inode, upperdentry, lowerdentry); 874 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
820 875
821 if (upperdentry && ovl_is_impuredir(upperdentry)) 876 if (upperdentry && ovl_is_impuredir(upperdentry))
822 ovl_set_flag(OVL_IMPURE, inode); 877 ovl_set_flag(OVL_IMPURE, inode);
@@ -824,6 +879,20 @@ struct inode *ovl_get_inode(struct super_block *sb,
824 if (oip->index) 879 if (oip->index)
825 ovl_set_flag(OVL_INDEX, inode); 880 ovl_set_flag(OVL_INDEX, inode);
826 881
882 if (upperdentry) {
883 err = ovl_check_metacopy_xattr(upperdentry);
884 if (err < 0)
885 goto out_err;
886 metacopy = err;
887 if (!metacopy)
888 ovl_set_flag(OVL_UPPERDATA, inode);
889 }
890
891 OVL_I(inode)->redirect = oip->redirect;
892
893 if (bylower)
894 ovl_set_flag(OVL_CONST_INO, inode);
895
827 /* Check for non-merge dir that may have whiteouts */ 896 /* Check for non-merge dir that may have whiteouts */
828 if (is_dir) { 897 if (is_dir) {
829 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 898 if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
@@ -837,7 +906,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
837out: 906out:
838 return inode; 907 return inode;
839 908
840out_nomem: 909out_err:
841 inode = ERR_PTR(-ENOMEM); 910 inode = ERR_PTR(err);
842 goto out; 911 goto out;
843} 912}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c993dd8db739..f28711846dd6 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -24,38 +24,20 @@ struct ovl_lookup_data {
24 bool stop; 24 bool stop;
25 bool last; 25 bool last;
26 char *redirect; 26 char *redirect;
27 bool metacopy;
27}; 28};
28 29
29static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, 30static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
30 size_t prelen, const char *post) 31 size_t prelen, const char *post)
31{ 32{
32 int res; 33 int res;
33 char *s, *next, *buf = NULL; 34 char *buf;
34 35
35 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); 36 buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
36 if (res < 0) { 37 if (IS_ERR_OR_NULL(buf))
37 if (res == -ENODATA || res == -EOPNOTSUPP) 38 return PTR_ERR(buf);
38 return 0;
39 goto fail;
40 }
41 buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
42 if (!buf)
43 return -ENOMEM;
44 39
45 if (res == 0)
46 goto invalid;
47
48 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
49 if (res < 0)
50 goto fail;
51 if (res == 0)
52 goto invalid;
53 if (buf[0] == '/') { 40 if (buf[0] == '/') {
54 for (s = buf; *s++ == '/'; s = next) {
55 next = strchrnul(s, '/');
56 if (s == next)
57 goto invalid;
58 }
59 /* 41 /*
60 * One of the ancestor path elements in an absolute path 42 * One of the ancestor path elements in an absolute path
61 * lookup in ovl_lookup_layer() could have been opaque and 43 * lookup in ovl_lookup_layer() could have been opaque and
@@ -66,9 +48,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
66 */ 48 */
67 d->stop = false; 49 d->stop = false;
68 } else { 50 } else {
69 if (strchr(buf, '/') != NULL) 51 res = strlen(buf) + 1;
70 goto invalid;
71
72 memmove(buf + prelen, buf, res); 52 memmove(buf + prelen, buf, res);
73 memcpy(buf, d->name.name, prelen); 53 memcpy(buf, d->name.name, prelen);
74 } 54 }
@@ -80,16 +60,6 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
80 d->name.len = strlen(d->redirect); 60 d->name.len = strlen(d->redirect);
81 61
82 return 0; 62 return 0;
83
84err_free:
85 kfree(buf);
86 return 0;
87fail:
88 pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
89 goto err_free;
90invalid:
91 pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
92 goto err_free;
93} 63}
94 64
95static int ovl_acceptable(void *ctx, struct dentry *dentry) 65static int ovl_acceptable(void *ctx, struct dentry *dentry)
@@ -252,28 +222,39 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
252 d->stop = d->opaque = true; 222 d->stop = d->opaque = true;
253 goto put_and_out; 223 goto put_and_out;
254 } 224 }
255 if (!d_can_lookup(this)) { 225 /*
226 * This dentry should be a regular file if previous layer lookup
227 * found a metacopy dentry.
228 */
229 if (last_element && d->metacopy && !d_is_reg(this)) {
256 d->stop = true; 230 d->stop = true;
257 if (d->is_dir) 231 goto put_and_out;
258 goto put_and_out;
259
260 /*
261 * NB: handle failure to lookup non-last element when non-dir
262 * redirects become possible
263 */
264 WARN_ON(!last_element);
265 goto out;
266 } 232 }
267 if (last_element) 233 if (!d_can_lookup(this)) {
268 d->is_dir = true; 234 if (d->is_dir || !last_element) {
269 if (d->last) 235 d->stop = true;
270 goto out; 236 goto put_and_out;
237 }
238 err = ovl_check_metacopy_xattr(this);
239 if (err < 0)
240 goto out_err;
271 241
272 if (ovl_is_opaquedir(this)) { 242 d->metacopy = err;
273 d->stop = true; 243 d->stop = !d->metacopy;
244 if (!d->metacopy || d->last)
245 goto out;
246 } else {
274 if (last_element) 247 if (last_element)
275 d->opaque = true; 248 d->is_dir = true;
276 goto out; 249 if (d->last)
250 goto out;
251
252 if (ovl_is_opaquedir(this)) {
253 d->stop = true;
254 if (last_element)
255 d->opaque = true;
256 goto out;
257 }
277 } 258 }
278 err = ovl_check_redirect(this, d, prelen, post); 259 err = ovl_check_redirect(this, d, prelen, post);
279 if (err) 260 if (err)
@@ -823,7 +804,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
823 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 804 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
824 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 805 struct ovl_entry *poe = dentry->d_parent->d_fsdata;
825 struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; 806 struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
826 struct ovl_path *stack = NULL; 807 struct ovl_path *stack = NULL, *origin_path = NULL;
827 struct dentry *upperdir, *upperdentry = NULL; 808 struct dentry *upperdir, *upperdentry = NULL;
828 struct dentry *origin = NULL; 809 struct dentry *origin = NULL;
829 struct dentry *index = NULL; 810 struct dentry *index = NULL;
@@ -834,6 +815,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
834 struct dentry *this; 815 struct dentry *this;
835 unsigned int i; 816 unsigned int i;
836 int err; 817 int err;
818 bool metacopy = false;
837 struct ovl_lookup_data d = { 819 struct ovl_lookup_data d = {
838 .name = dentry->d_name, 820 .name = dentry->d_name,
839 .is_dir = false, 821 .is_dir = false,
@@ -841,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
841 .stop = false, 823 .stop = false,
842 .last = ofs->config.redirect_follow ? false : !poe->numlower, 824 .last = ofs->config.redirect_follow ? false : !poe->numlower,
843 .redirect = NULL, 825 .redirect = NULL,
826 .metacopy = false,
844 }; 827 };
845 828
846 if (dentry->d_name.len > ofs->namelen) 829 if (dentry->d_name.len > ofs->namelen)
@@ -859,7 +842,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
859 goto out; 842 goto out;
860 } 843 }
861 if (upperdentry && !d.is_dir) { 844 if (upperdentry && !d.is_dir) {
862 BUG_ON(!d.stop || d.redirect); 845 unsigned int origin_ctr = 0;
846
863 /* 847 /*
864 * Lookup copy up origin by decoding origin file handle. 848 * Lookup copy up origin by decoding origin file handle.
865 * We may get a disconnected dentry, which is fine, 849 * We may get a disconnected dentry, which is fine,
@@ -870,9 +854,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
870 * number - it's the same as if we held a reference 854 * number - it's the same as if we held a reference
871 * to a dentry in lower layer that was moved under us. 855 * to a dentry in lower layer that was moved under us.
872 */ 856 */
873 err = ovl_check_origin(ofs, upperdentry, &stack, &ctr); 857 err = ovl_check_origin(ofs, upperdentry, &origin_path,
858 &origin_ctr);
874 if (err) 859 if (err)
875 goto out_put_upper; 860 goto out_put_upper;
861
862 if (d.metacopy)
863 metacopy = true;
876 } 864 }
877 865
878 if (d.redirect) { 866 if (d.redirect) {
@@ -913,7 +901,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
913 * If no origin fh is stored in upper of a merge dir, store fh 901 * If no origin fh is stored in upper of a merge dir, store fh
914 * of lower dir and set upper parent "impure". 902 * of lower dir and set upper parent "impure".
915 */ 903 */
916 if (upperdentry && !ctr && !ofs->noxattr) { 904 if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
917 err = ovl_fix_origin(dentry, this, upperdentry); 905 err = ovl_fix_origin(dentry, this, upperdentry);
918 if (err) { 906 if (err) {
919 dput(this); 907 dput(this);
@@ -925,18 +913,35 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
925 * When "verify_lower" feature is enabled, do not merge with a 913 * When "verify_lower" feature is enabled, do not merge with a
926 * lower dir that does not match a stored origin xattr. In any 914 * lower dir that does not match a stored origin xattr. In any
927 * case, only verified origin is used for index lookup. 915 * case, only verified origin is used for index lookup.
916 *
917 * For non-dir dentry, if index=on, then ensure origin
918 * matches the dentry found using path based lookup,
919 * otherwise error out.
928 */ 920 */
929 if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { 921 if (upperdentry && !ctr &&
922 ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
923 (!d.is_dir && ofs->config.index && origin_path))) {
930 err = ovl_verify_origin(upperdentry, this, false); 924 err = ovl_verify_origin(upperdentry, this, false);
931 if (err) { 925 if (err) {
932 dput(this); 926 dput(this);
933 break; 927 if (d.is_dir)
928 break;
929 goto out_put;
934 } 930 }
935
936 /* Bless lower dir as verified origin */
937 origin = this; 931 origin = this;
938 } 932 }
939 933
934 if (d.metacopy)
935 metacopy = true;
936 /*
937 * Do not store intermediate metacopy dentries in chain,
938 * except top most lower metacopy dentry
939 */
940 if (d.metacopy && ctr) {
941 dput(this);
942 continue;
943 }
944
940 stack[ctr].dentry = this; 945 stack[ctr].dentry = this;
941 stack[ctr].layer = lower.layer; 946 stack[ctr].layer = lower.layer;
942 ctr++; 947 ctr++;
@@ -968,13 +973,48 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
968 } 973 }
969 } 974 }
970 975
976 if (metacopy) {
977 /*
978 * Found a metacopy dentry but did not find corresponding
979 * data dentry
980 */
981 if (d.metacopy) {
982 err = -EIO;
983 goto out_put;
984 }
985
986 err = -EPERM;
987 if (!ofs->config.metacopy) {
988 pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
989 dentry);
990 goto out_put;
991 }
992 } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
993 if (WARN_ON(stack != NULL)) {
994 err = -EIO;
995 goto out_put;
996 }
997 stack = origin_path;
998 ctr = 1;
999 origin_path = NULL;
1000 }
1001
971 /* 1002 /*
972 * Lookup index by lower inode and verify it matches upper inode. 1003 * Lookup index by lower inode and verify it matches upper inode.
973 * We only trust dir index if we verified that lower dir matches 1004 * We only trust dir index if we verified that lower dir matches
974 * origin, otherwise dir index entries may be inconsistent and we 1005 * origin, otherwise dir index entries may be inconsistent and we
975 * ignore them. Always lookup index of non-dir and non-upper. 1006 * ignore them.
1007 *
1008 * For non-dir upper metacopy dentry, we already set "origin" if we
1009 * verified that lower matched upper origin. If upper origin was
1010 * not present (because lower layer did not support fh encode/decode),
1011 * or indexing is not enabled, do not set "origin" and skip looking up
1012 * index. This case should be handled in same way as a non-dir upper
1013 * without ORIGIN is handled.
1014 *
1015 * Always lookup index of non-dir non-metacopy and non-upper.
976 */ 1016 */
977 if (ctr && (!upperdentry || !d.is_dir)) 1017 if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
978 origin = stack[0].dentry; 1018 origin = stack[0].dentry;
979 1019
980 if (origin && ovl_indexdir(dentry->d_sb) && 1020 if (origin && ovl_indexdir(dentry->d_sb) &&
@@ -1000,8 +1040,15 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1000 1040
1001 if (upperdentry) 1041 if (upperdentry)
1002 ovl_dentry_set_upper_alias(dentry); 1042 ovl_dentry_set_upper_alias(dentry);
1003 else if (index) 1043 else if (index) {
1004 upperdentry = dget(index); 1044 upperdentry = dget(index);
1045 upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
1046 if (IS_ERR(upperredirect)) {
1047 err = PTR_ERR(upperredirect);
1048 upperredirect = NULL;
1049 goto out_free_oe;
1050 }
1051 }
1005 1052
1006 if (upperdentry || ctr) { 1053 if (upperdentry || ctr) {
1007 struct ovl_inode_params oip = { 1054 struct ovl_inode_params oip = {
@@ -1009,22 +1056,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1009 .lowerpath = stack, 1056 .lowerpath = stack,
1010 .index = index, 1057 .index = index,
1011 .numlower = ctr, 1058 .numlower = ctr,
1059 .redirect = upperredirect,
1060 .lowerdata = (ctr > 1 && !d.is_dir) ?
1061 stack[ctr - 1].dentry : NULL,
1012 }; 1062 };
1013 1063
1014 inode = ovl_get_inode(dentry->d_sb, &oip); 1064 inode = ovl_get_inode(dentry->d_sb, &oip);
1015 err = PTR_ERR(inode); 1065 err = PTR_ERR(inode);
1016 if (IS_ERR(inode)) 1066 if (IS_ERR(inode))
1017 goto out_free_oe; 1067 goto out_free_oe;
1018
1019 /*
1020 * NB: handle redirected hard links when non-dir redirects
1021 * become possible
1022 */
1023 WARN_ON(OVL_I(inode)->redirect);
1024 OVL_I(inode)->redirect = upperredirect;
1025 } 1068 }
1026 1069
1027 revert_creds(old_cred); 1070 revert_creds(old_cred);
1071 if (origin_path) {
1072 dput(origin_path->dentry);
1073 kfree(origin_path);
1074 }
1028 dput(index); 1075 dput(index);
1029 kfree(stack); 1076 kfree(stack);
1030 kfree(d.redirect); 1077 kfree(d.redirect);
@@ -1039,6 +1086,10 @@ out_put:
1039 dput(stack[i].dentry); 1086 dput(stack[i].dentry);
1040 kfree(stack); 1087 kfree(stack);
1041out_put_upper: 1088out_put_upper:
1089 if (origin_path) {
1090 dput(origin_path->dentry);
1091 kfree(origin_path);
1092 }
1042 dput(upperdentry); 1093 dput(upperdentry);
1043 kfree(upperredirect); 1094 kfree(upperredirect);
1044out: 1095out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7538b9b56237..f61839e1054c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,7 @@
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/uuid.h> 11#include <linux/uuid.h>
12#include <linux/fs.h>
12#include "ovl_entry.h" 13#include "ovl_entry.h"
13 14
14enum ovl_path_type { 15enum ovl_path_type {
@@ -28,6 +29,7 @@ enum ovl_path_type {
28#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" 29#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
29#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" 30#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
30#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" 31#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
32#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
31 33
32enum ovl_inode_flag { 34enum ovl_inode_flag {
33 /* Pure upper dir that may contain non pure upper entries */ 35 /* Pure upper dir that may contain non pure upper entries */
@@ -35,6 +37,9 @@ enum ovl_inode_flag {
35 /* Non-merge dir that may contain whiteout entries */ 37 /* Non-merge dir that may contain whiteout entries */
36 OVL_WHITEOUTS, 38 OVL_WHITEOUTS,
37 OVL_INDEX, 39 OVL_INDEX,
40 OVL_UPPERDATA,
41 /* Inode number will remain constant over copy up. */
42 OVL_CONST_INO,
38}; 43};
39 44
40enum ovl_entry_flag { 45enum ovl_entry_flag {
@@ -190,6 +195,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
190 return ret; 195 return ret;
191} 196}
192 197
198static inline bool ovl_open_flags_need_copy_up(int flags)
199{
200 if (!flags)
201 return false;
202
203 return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
204}
205
193/* util.c */ 206/* util.c */
194int ovl_want_write(struct dentry *dentry); 207int ovl_want_write(struct dentry *dentry);
195void ovl_drop_write(struct dentry *dentry); 208void ovl_drop_write(struct dentry *dentry);
@@ -206,15 +219,19 @@ bool ovl_dentry_weird(struct dentry *dentry);
206enum ovl_path_type ovl_path_type(struct dentry *dentry); 219enum ovl_path_type ovl_path_type(struct dentry *dentry);
207void ovl_path_upper(struct dentry *dentry, struct path *path); 220void ovl_path_upper(struct dentry *dentry, struct path *path);
208void ovl_path_lower(struct dentry *dentry, struct path *path); 221void ovl_path_lower(struct dentry *dentry, struct path *path);
222void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
209enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); 223enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
210struct dentry *ovl_dentry_upper(struct dentry *dentry); 224struct dentry *ovl_dentry_upper(struct dentry *dentry);
211struct dentry *ovl_dentry_lower(struct dentry *dentry); 225struct dentry *ovl_dentry_lower(struct dentry *dentry);
226struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
212struct ovl_layer *ovl_layer_lower(struct dentry *dentry); 227struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
213struct dentry *ovl_dentry_real(struct dentry *dentry); 228struct dentry *ovl_dentry_real(struct dentry *dentry);
214struct dentry *ovl_i_dentry_upper(struct inode *inode); 229struct dentry *ovl_i_dentry_upper(struct inode *inode);
215struct inode *ovl_inode_upper(struct inode *inode); 230struct inode *ovl_inode_upper(struct inode *inode);
216struct inode *ovl_inode_lower(struct inode *inode); 231struct inode *ovl_inode_lower(struct inode *inode);
232struct inode *ovl_inode_lowerdata(struct inode *inode);
217struct inode *ovl_inode_real(struct inode *inode); 233struct inode *ovl_inode_real(struct inode *inode);
234struct inode *ovl_inode_realdata(struct inode *inode);
218struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); 235struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
219void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); 236void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
220void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); 237void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
@@ -225,18 +242,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry);
225void ovl_dentry_set_opaque(struct dentry *dentry); 242void ovl_dentry_set_opaque(struct dentry *dentry);
226bool ovl_dentry_has_upper_alias(struct dentry *dentry); 243bool ovl_dentry_has_upper_alias(struct dentry *dentry);
227void ovl_dentry_set_upper_alias(struct dentry *dentry); 244void ovl_dentry_set_upper_alias(struct dentry *dentry);
245bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
246bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
247bool ovl_has_upperdata(struct inode *inode);
248void ovl_set_upperdata(struct inode *inode);
228bool ovl_redirect_dir(struct super_block *sb); 249bool ovl_redirect_dir(struct super_block *sb);
229const char *ovl_dentry_get_redirect(struct dentry *dentry); 250const char *ovl_dentry_get_redirect(struct dentry *dentry);
230void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); 251void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
231void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, 252void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
232 struct dentry *lowerdentry); 253 struct dentry *lowerdentry, struct dentry *lowerdata);
233void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); 254void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
234void ovl_dentry_version_inc(struct dentry *dentry, bool impurity); 255void ovl_dir_modified(struct dentry *dentry, bool impurity);
235u64 ovl_dentry_version_get(struct dentry *dentry); 256u64 ovl_dentry_version_get(struct dentry *dentry);
236bool ovl_is_whiteout(struct dentry *dentry); 257bool ovl_is_whiteout(struct dentry *dentry);
237struct file *ovl_path_open(struct path *path, int flags); 258struct file *ovl_path_open(struct path *path, int flags);
238int ovl_copy_up_start(struct dentry *dentry); 259int ovl_copy_up_start(struct dentry *dentry, int flags);
239void ovl_copy_up_end(struct dentry *dentry); 260void ovl_copy_up_end(struct dentry *dentry);
261bool ovl_already_copied_up(struct dentry *dentry, int flags);
240bool ovl_check_origin_xattr(struct dentry *dentry); 262bool ovl_check_origin_xattr(struct dentry *dentry);
241bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); 263bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
242int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, 264int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
@@ -252,6 +274,9 @@ bool ovl_need_index(struct dentry *dentry);
252int ovl_nlink_start(struct dentry *dentry, bool *locked); 274int ovl_nlink_start(struct dentry *dentry, bool *locked);
253void ovl_nlink_end(struct dentry *dentry, bool locked); 275void ovl_nlink_end(struct dentry *dentry, bool locked);
254int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); 276int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
277int ovl_check_metacopy_xattr(struct dentry *dentry);
278bool ovl_is_metacopy_dentry(struct dentry *dentry);
279char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
255 280
256static inline bool ovl_is_impuredir(struct dentry *dentry) 281static inline bool ovl_is_impuredir(struct dentry *dentry)
257{ 282{
@@ -324,7 +349,6 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
324 void *value, size_t size); 349 void *value, size_t size);
325ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); 350ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
326struct posix_acl *ovl_get_acl(struct inode *inode, int type); 351struct posix_acl *ovl_get_acl(struct inode *inode, int type);
327int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
328int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); 352int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
329bool ovl_is_private_xattr(const char *name); 353bool ovl_is_private_xattr(const char *name);
330 354
@@ -334,6 +358,8 @@ struct ovl_inode_params {
334 struct ovl_path *lowerpath; 358 struct ovl_path *lowerpath;
335 struct dentry *index; 359 struct dentry *index;
336 unsigned int numlower; 360 unsigned int numlower;
361 char *redirect;
362 struct dentry *lowerdata;
337}; 363};
338struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); 364struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
339struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 365struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
@@ -348,6 +374,14 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
348 to->i_atime = from->i_atime; 374 to->i_atime = from->i_atime;
349 to->i_mtime = from->i_mtime; 375 to->i_mtime = from->i_mtime;
350 to->i_ctime = from->i_ctime; 376 to->i_ctime = from->i_ctime;
377 i_size_write(to, i_size_read(from));
378}
379
380static inline void ovl_copyflags(struct inode *from, struct inode *to)
381{
382 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
383
384 inode_set_flags(to, from->i_flags & mask, mask);
351} 385}
352 386
353/* dir.c */ 387/* dir.c */
@@ -368,9 +402,14 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
368int ovl_cleanup(struct inode *dir, struct dentry *dentry); 402int ovl_cleanup(struct inode *dir, struct dentry *dentry);
369struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); 403struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
370 404
405/* file.c */
406extern const struct file_operations ovl_file_operations;
407
371/* copy_up.c */ 408/* copy_up.c */
372int ovl_copy_up(struct dentry *dentry); 409int ovl_copy_up(struct dentry *dentry);
410int ovl_copy_up_with_data(struct dentry *dentry);
373int ovl_copy_up_flags(struct dentry *dentry, int flags); 411int ovl_copy_up_flags(struct dentry *dentry, int flags);
412int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
374int ovl_copy_xattr(struct dentry *old, struct dentry *new); 413int ovl_copy_xattr(struct dentry *old, struct dentry *new);
375int ovl_set_attr(struct dentry *upper, struct kstat *stat); 414int ovl_set_attr(struct dentry *upper, struct kstat *stat);
376struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); 415struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 41655a7d6894..ec237035333a 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -19,6 +19,7 @@ struct ovl_config {
19 bool index; 19 bool index;
20 bool nfs_export; 20 bool nfs_export;
21 int xino; 21 int xino;
22 bool metacopy;
22}; 23};
23 24
24struct ovl_sb { 25struct ovl_sb {
@@ -88,7 +89,10 @@ static inline struct ovl_entry *OVL_E(struct dentry *dentry)
88} 89}
89 90
90struct ovl_inode { 91struct ovl_inode {
91 struct ovl_dir_cache *cache; 92 union {
93 struct ovl_dir_cache *cache; /* directory */
94 struct inode *lowerdata; /* regular file */
95 };
92 const char *redirect; 96 const char *redirect;
93 u64 version; 97 u64 version;
94 unsigned long flags; 98 unsigned long flags;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index ef1fe42ff7bb..cc8303a806b4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -668,6 +668,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
668 return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); 668 return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
669} 669}
670 670
671static bool ovl_is_impure_dir(struct file *file)
672{
673 struct ovl_dir_file *od = file->private_data;
674 struct inode *dir = d_inode(file->f_path.dentry);
675
676 /*
677 * Only upper dir can be impure, but if we are in the middle of
678 * iterating a lower real dir, dir could be copied up and marked
679 * impure. We only want the impure cache if we started iterating
680 * a real upper dir to begin with.
681 */
682 return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
683
684}
685
671static int ovl_iterate_real(struct file *file, struct dir_context *ctx) 686static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
672{ 687{
673 int err; 688 int err;
@@ -696,7 +711,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
696 rdt.parent_ino = stat.ino; 711 rdt.parent_ino = stat.ino;
697 } 712 }
698 713
699 if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) { 714 if (ovl_is_impure_dir(file)) {
700 rdt.cache = ovl_cache_get_impure(&file->f_path); 715 rdt.cache = ovl_cache_get_impure(&file->f_path);
701 if (IS_ERR(rdt.cache)) 716 if (IS_ERR(rdt.cache))
702 return PTR_ERR(rdt.cache); 717 return PTR_ERR(rdt.cache);
@@ -727,7 +742,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
727 */ 742 */
728 if (ovl_xino_bits(dentry->d_sb) || 743 if (ovl_xino_bits(dentry->d_sb) ||
729 (ovl_same_sb(dentry->d_sb) && 744 (ovl_same_sb(dentry->d_sb) &&
730 (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || 745 (ovl_is_impure_dir(file) ||
731 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { 746 OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
732 return ovl_iterate_real(file, ctx); 747 return ovl_iterate_real(file, ctx);
733 } 748 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 704b37311467..2e0fc93c2c06 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe)
64 dput(oe->lowerstack[i].dentry); 64 dput(oe->lowerstack[i].dentry);
65} 65}
66 66
67static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
68module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
69MODULE_PARM_DESC(ovl_metacopy_def,
70 "Default to on or off for the metadata only copy up feature");
71
67static void ovl_dentry_release(struct dentry *dentry) 72static void ovl_dentry_release(struct dentry *dentry)
68{ 73{
69 struct ovl_entry *oe = dentry->d_fsdata; 74 struct ovl_entry *oe = dentry->d_fsdata;
@@ -74,31 +79,14 @@ static void ovl_dentry_release(struct dentry *dentry)
74 } 79 }
75} 80}
76 81
77static int ovl_check_append_only(struct inode *inode, int flag)
78{
79 /*
80 * This test was moot in vfs may_open() because overlay inode does
81 * not have the S_APPEND flag, so re-check on real upper inode
82 */
83 if (IS_APPEND(inode)) {
84 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
85 return -EPERM;
86 if (flag & O_TRUNC)
87 return -EPERM;
88 }
89
90 return 0;
91}
92
93static struct dentry *ovl_d_real(struct dentry *dentry, 82static struct dentry *ovl_d_real(struct dentry *dentry,
94 const struct inode *inode, 83 const struct inode *inode)
95 unsigned int open_flags, unsigned int flags)
96{ 84{
97 struct dentry *real; 85 struct dentry *real;
98 int err;
99 86
100 if (flags & D_REAL_UPPER) 87 /* It's an overlay file */
101 return ovl_dentry_upper(dentry); 88 if (inode && d_inode(dentry) == inode)
89 return dentry;
102 90
103 if (!d_is_reg(dentry)) { 91 if (!d_is_reg(dentry)) {
104 if (!inode || inode == d_inode(dentry)) 92 if (!inode || inode == d_inode(dentry))
@@ -106,28 +94,19 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
106 goto bug; 94 goto bug;
107 } 95 }
108 96
109 if (open_flags) {
110 err = ovl_open_maybe_copy_up(dentry, open_flags);
111 if (err)
112 return ERR_PTR(err);
113 }
114
115 real = ovl_dentry_upper(dentry); 97 real = ovl_dentry_upper(dentry);
116 if (real && (!inode || inode == d_inode(real))) { 98 if (real && (inode == d_inode(real)))
117 if (!inode) { 99 return real;
118 err = ovl_check_append_only(d_inode(real), open_flags); 100
119 if (err) 101 if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
120 return ERR_PTR(err);
121 }
122 return real; 102 return real;
123 }
124 103
125 real = ovl_dentry_lower(dentry); 104 real = ovl_dentry_lowerdata(dentry);
126 if (!real) 105 if (!real)
127 goto bug; 106 goto bug;
128 107
129 /* Handle recursion */ 108 /* Handle recursion */
130 real = d_real(real, inode, open_flags, 0); 109 real = d_real(real, inode);
131 110
132 if (!inode || inode == d_inode(real)) 111 if (!inode || inode == d_inode(real))
133 return real; 112 return real;
@@ -205,6 +184,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
205 oi->flags = 0; 184 oi->flags = 0;
206 oi->__upperdentry = NULL; 185 oi->__upperdentry = NULL;
207 oi->lower = NULL; 186 oi->lower = NULL;
187 oi->lowerdata = NULL;
208 mutex_init(&oi->lock); 188 mutex_init(&oi->lock);
209 189
210 return &oi->vfs_inode; 190 return &oi->vfs_inode;
@@ -223,8 +203,11 @@ static void ovl_destroy_inode(struct inode *inode)
223 203
224 dput(oi->__upperdentry); 204 dput(oi->__upperdentry);
225 iput(oi->lower); 205 iput(oi->lower);
206 if (S_ISDIR(inode->i_mode))
207 ovl_dir_cache_free(inode);
208 else
209 iput(oi->lowerdata);
226 kfree(oi->redirect); 210 kfree(oi->redirect);
227 ovl_dir_cache_free(inode);
228 mutex_destroy(&oi->lock); 211 mutex_destroy(&oi->lock);
229 212
230 call_rcu(&inode->i_rcu, ovl_i_callback); 213 call_rcu(&inode->i_rcu, ovl_i_callback);
@@ -376,6 +359,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
376 "on" : "off"); 359 "on" : "off");
377 if (ofs->config.xino != ovl_xino_def()) 360 if (ofs->config.xino != ovl_xino_def())
378 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 361 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
362 if (ofs->config.metacopy != ovl_metacopy_def)
363 seq_printf(m, ",metacopy=%s",
364 ofs->config.metacopy ? "on" : "off");
379 return 0; 365 return 0;
380} 366}
381 367
@@ -413,6 +399,8 @@ enum {
413 OPT_XINO_ON, 399 OPT_XINO_ON,
414 OPT_XINO_OFF, 400 OPT_XINO_OFF,
415 OPT_XINO_AUTO, 401 OPT_XINO_AUTO,
402 OPT_METACOPY_ON,
403 OPT_METACOPY_OFF,
416 OPT_ERR, 404 OPT_ERR,
417}; 405};
418 406
@@ -429,6 +417,8 @@ static const match_table_t ovl_tokens = {
429 {OPT_XINO_ON, "xino=on"}, 417 {OPT_XINO_ON, "xino=on"},
430 {OPT_XINO_OFF, "xino=off"}, 418 {OPT_XINO_OFF, "xino=off"},
431 {OPT_XINO_AUTO, "xino=auto"}, 419 {OPT_XINO_AUTO, "xino=auto"},
420 {OPT_METACOPY_ON, "metacopy=on"},
421 {OPT_METACOPY_OFF, "metacopy=off"},
432 {OPT_ERR, NULL} 422 {OPT_ERR, NULL}
433}; 423};
434 424
@@ -481,6 +471,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
481static int ovl_parse_opt(char *opt, struct ovl_config *config) 471static int ovl_parse_opt(char *opt, struct ovl_config *config)
482{ 472{
483 char *p; 473 char *p;
474 int err;
484 475
485 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 476 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
486 if (!config->redirect_mode) 477 if (!config->redirect_mode)
@@ -555,6 +546,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
555 config->xino = OVL_XINO_AUTO; 546 config->xino = OVL_XINO_AUTO;
556 break; 547 break;
557 548
549 case OPT_METACOPY_ON:
550 config->metacopy = true;
551 break;
552
553 case OPT_METACOPY_OFF:
554 config->metacopy = false;
555 break;
556
558 default: 557 default:
559 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 558 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
560 return -EINVAL; 559 return -EINVAL;
@@ -569,7 +568,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
569 config->workdir = NULL; 568 config->workdir = NULL;
570 } 569 }
571 570
572 return ovl_parse_redirect_mode(config, config->redirect_mode); 571 err = ovl_parse_redirect_mode(config, config->redirect_mode);
572 if (err)
573 return err;
574
575 /* metacopy feature with upper requires redirect_dir=on */
576 if (config->upperdir && config->metacopy && !config->redirect_dir) {
577 pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
578 config->metacopy = false;
579 } else if (config->metacopy && !config->redirect_follow) {
580 pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
581 config->metacopy = false;
582 }
583
584 return 0;
573} 585}
574 586
575#define OVL_WORKDIR_NAME "work" 587#define OVL_WORKDIR_NAME "work"
@@ -1042,7 +1054,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
1042 if (err) { 1054 if (err) {
1043 ofs->noxattr = true; 1055 ofs->noxattr = true;
1044 ofs->config.index = false; 1056 ofs->config.index = false;
1045 pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); 1057 ofs->config.metacopy = false;
1058 pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1046 err = 0; 1059 err = 0;
1047 } else { 1060 } else {
1048 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1061 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
@@ -1064,7 +1077,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
1064 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1077 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1065 ofs->config.nfs_export = false; 1078 ofs->config.nfs_export = false;
1066 } 1079 }
1067
1068out: 1080out:
1069 mnt_drop_write(mnt); 1081 mnt_drop_write(mnt);
1070 return err; 1082 return err;
@@ -1375,6 +1387,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1375 ofs->config.index = ovl_index_def; 1387 ofs->config.index = ovl_index_def;
1376 ofs->config.nfs_export = ovl_nfs_export_def; 1388 ofs->config.nfs_export = ovl_nfs_export_def;
1377 ofs->config.xino = ovl_xino_def(); 1389 ofs->config.xino = ovl_xino_def();
1390 ofs->config.metacopy = ovl_metacopy_def;
1378 err = ovl_parse_opt((char *) data, &ofs->config); 1391 err = ovl_parse_opt((char *) data, &ofs->config);
1379 if (err) 1392 if (err)
1380 goto out_err; 1393 goto out_err;
@@ -1445,6 +1458,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1445 } 1458 }
1446 } 1459 }
1447 1460
1461 if (ofs->config.metacopy && ofs->config.nfs_export) {
1462 pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
1463 ofs->config.nfs_export = false;
1464 }
1465
1448 if (ofs->config.nfs_export) 1466 if (ofs->config.nfs_export)
1449 sb->s_export_op = &ovl_export_operations; 1467 sb->s_export_op = &ovl_export_operations;
1450 1468
@@ -1455,7 +1473,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1455 sb->s_op = &ovl_super_operations; 1473 sb->s_op = &ovl_super_operations;
1456 sb->s_xattr = ovl_xattr_handlers; 1474 sb->s_xattr = ovl_xattr_handlers;
1457 sb->s_fs_info = ofs; 1475 sb->s_fs_info = ofs;
1458 sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; 1476 sb->s_flags |= SB_POSIXACL;
1459 1477
1460 err = -ENOMEM; 1478 err = -ENOMEM;
1461 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1479 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
@@ -1474,8 +1492,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1474 /* Root is always merge -> can have whiteouts */ 1492 /* Root is always merge -> can have whiteouts */
1475 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1493 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1476 ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); 1494 ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1495 ovl_set_upperdata(d_inode(root_dentry));
1477 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1496 ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1478 ovl_dentry_lower(root_dentry)); 1497 ovl_dentry_lower(root_dentry), NULL);
1479 1498
1480 sb->s_root = root_dentry; 1499 sb->s_root = root_dentry;
1481 1500
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6f1078028c66..8cfb62cc8672 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -133,8 +133,10 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
133 * Non-dir dentry can hold lower dentry of its copy up origin. 133 * Non-dir dentry can hold lower dentry of its copy up origin.
134 */ 134 */
135 if (oe->numlower) { 135 if (oe->numlower) {
136 type |= __OVL_PATH_ORIGIN; 136 if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
137 if (d_is_dir(dentry)) 137 type |= __OVL_PATH_ORIGIN;
138 if (d_is_dir(dentry) ||
139 !ovl_has_upperdata(d_inode(dentry)))
138 type |= __OVL_PATH_MERGE; 140 type |= __OVL_PATH_MERGE;
139 } 141 }
140 } else { 142 } else {
@@ -164,6 +166,18 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
164 } 166 }
165} 167}
166 168
169void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
170{
171 struct ovl_entry *oe = dentry->d_fsdata;
172
173 if (oe->numlower) {
174 path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
175 path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
176 } else {
177 *path = (struct path) { };
178 }
179}
180
167enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) 181enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
168{ 182{
169 enum ovl_path_type type = ovl_path_type(dentry); 183 enum ovl_path_type type = ovl_path_type(dentry);
@@ -195,6 +209,19 @@ struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
195 return oe->numlower ? oe->lowerstack[0].layer : NULL; 209 return oe->numlower ? oe->lowerstack[0].layer : NULL;
196} 210}
197 211
212/*
213 * ovl_dentry_lower() could return either a data dentry or metacopy dentry
214 * dependig on what is stored in lowerstack[0]. At times we need to find
215 * lower dentry which has data (and not metacopy dentry). This helper
216 * returns the lower data dentry.
217 */
218struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
219{
220 struct ovl_entry *oe = dentry->d_fsdata;
221
222 return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
223}
224
198struct dentry *ovl_dentry_real(struct dentry *dentry) 225struct dentry *ovl_dentry_real(struct dentry *dentry)
199{ 226{
200 return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); 227 return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
@@ -222,6 +249,26 @@ struct inode *ovl_inode_real(struct inode *inode)
222 return ovl_inode_upper(inode) ?: ovl_inode_lower(inode); 249 return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
223} 250}
224 251
252/* Return inode which contains lower data. Do not return metacopy */
253struct inode *ovl_inode_lowerdata(struct inode *inode)
254{
255 if (WARN_ON(!S_ISREG(inode->i_mode)))
256 return NULL;
257
258 return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
259}
260
261/* Return real inode which contains data. Does not return metacopy inode */
262struct inode *ovl_inode_realdata(struct inode *inode)
263{
264 struct inode *upperinode;
265
266 upperinode = ovl_inode_upper(inode);
267 if (upperinode && ovl_has_upperdata(inode))
268 return upperinode;
269
270 return ovl_inode_lowerdata(inode);
271}
225 272
226struct ovl_dir_cache *ovl_dir_cache(struct inode *inode) 273struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
227{ 274{
@@ -279,6 +326,62 @@ void ovl_dentry_set_upper_alias(struct dentry *dentry)
279 ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); 326 ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
280} 327}
281 328
329static bool ovl_should_check_upperdata(struct inode *inode)
330{
331 if (!S_ISREG(inode->i_mode))
332 return false;
333
334 if (!ovl_inode_lower(inode))
335 return false;
336
337 return true;
338}
339
340bool ovl_has_upperdata(struct inode *inode)
341{
342 if (!ovl_should_check_upperdata(inode))
343 return true;
344
345 if (!ovl_test_flag(OVL_UPPERDATA, inode))
346 return false;
347 /*
348 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
349 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
350 * if setting of OVL_UPPERDATA is visible, then effects of writes
351 * before that are visible too.
352 */
353 smp_rmb();
354 return true;
355}
356
357void ovl_set_upperdata(struct inode *inode)
358{
359 /*
360 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
361 * if OVL_UPPERDATA flag is visible, then effects of write operations
362 * before it are visible as well.
363 */
364 smp_wmb();
365 ovl_set_flag(OVL_UPPERDATA, inode);
366}
367
368/* Caller should hold ovl_inode->lock */
369bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
370{
371 if (!ovl_open_flags_need_copy_up(flags))
372 return false;
373
374 return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
375}
376
377bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
378{
379 if (!ovl_open_flags_need_copy_up(flags))
380 return false;
381
382 return !ovl_has_upperdata(d_inode(dentry));
383}
384
282bool ovl_redirect_dir(struct super_block *sb) 385bool ovl_redirect_dir(struct super_block *sb)
283{ 386{
284 struct ovl_fs *ofs = sb->s_fs_info; 387 struct ovl_fs *ofs = sb->s_fs_info;
@@ -300,7 +403,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
300} 403}
301 404
302void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, 405void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
303 struct dentry *lowerdentry) 406 struct dentry *lowerdentry, struct dentry *lowerdata)
304{ 407{
305 struct inode *realinode = d_inode(upperdentry ?: lowerdentry); 408 struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
306 409
@@ -308,8 +411,11 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
308 OVL_I(inode)->__upperdentry = upperdentry; 411 OVL_I(inode)->__upperdentry = upperdentry;
309 if (lowerdentry) 412 if (lowerdentry)
310 OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); 413 OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
414 if (lowerdata)
415 OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
311 416
312 ovl_copyattr(realinode, inode); 417 ovl_copyattr(realinode, inode);
418 ovl_copyflags(realinode, inode);
313 if (!inode->i_ino) 419 if (!inode->i_ino)
314 inode->i_ino = realinode->i_ino; 420 inode->i_ino = realinode->i_ino;
315} 421}
@@ -333,7 +439,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
333 } 439 }
334} 440}
335 441
336void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) 442static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
337{ 443{
338 struct inode *inode = d_inode(dentry); 444 struct inode *inode = d_inode(dentry);
339 445
@@ -348,6 +454,14 @@ void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
348 OVL_I(inode)->version++; 454 OVL_I(inode)->version++;
349} 455}
350 456
457void ovl_dir_modified(struct dentry *dentry, bool impurity)
458{
459 /* Copy mtime/ctime */
460 ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
461
462 ovl_dentry_version_inc(dentry, impurity);
463}
464
351u64 ovl_dentry_version_get(struct dentry *dentry) 465u64 ovl_dentry_version_get(struct dentry *dentry)
352{ 466{
353 struct inode *inode = d_inode(dentry); 467 struct inode *inode = d_inode(dentry);
@@ -368,13 +482,51 @@ struct file *ovl_path_open(struct path *path, int flags)
368 return dentry_open(path, flags | O_NOATIME, current_cred()); 482 return dentry_open(path, flags | O_NOATIME, current_cred());
369} 483}
370 484
371int ovl_copy_up_start(struct dentry *dentry) 485/* Caller should hold ovl_inode->lock */
486static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
487{
488 bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
489
490 if (ovl_dentry_upper(dentry) &&
491 (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
492 !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
493 return true;
494
495 return false;
496}
497
498bool ovl_already_copied_up(struct dentry *dentry, int flags)
499{
500 bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
501
502 /*
503 * Check if copy-up has happened as well as for upper alias (in
504 * case of hard links) is there.
505 *
506 * Both checks are lockless:
507 * - false negatives: will recheck under oi->lock
508 * - false positives:
509 * + ovl_dentry_upper() uses memory barriers to ensure the
510 * upper dentry is up-to-date
511 * + ovl_dentry_has_upper_alias() relies on locking of
512 * upper parent i_rwsem to prevent reordering copy-up
513 * with rename.
514 */
515 if (ovl_dentry_upper(dentry) &&
516 (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
517 !ovl_dentry_needs_data_copy_up(dentry, flags))
518 return true;
519
520 return false;
521}
522
523int ovl_copy_up_start(struct dentry *dentry, int flags)
372{ 524{
373 struct ovl_inode *oi = OVL_I(d_inode(dentry)); 525 struct ovl_inode *oi = OVL_I(d_inode(dentry));
374 int err; 526 int err;
375 527
376 err = mutex_lock_interruptible(&oi->lock); 528 err = mutex_lock_interruptible(&oi->lock);
377 if (!err && ovl_dentry_has_upper_alias(dentry)) { 529 if (!err && ovl_already_copied_up_locked(dentry, flags)) {
378 err = 1; /* Already copied up */ 530 err = 1; /* Already copied up */
379 mutex_unlock(&oi->lock); 531 mutex_unlock(&oi->lock);
380 } 532 }
@@ -675,3 +827,91 @@ err:
675 pr_err("overlayfs: failed to lock workdir+upperdir\n"); 827 pr_err("overlayfs: failed to lock workdir+upperdir\n");
676 return -EIO; 828 return -EIO;
677} 829}
830
831/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
832int ovl_check_metacopy_xattr(struct dentry *dentry)
833{
834 int res;
835
836 /* Only regular files can have metacopy xattr */
837 if (!S_ISREG(d_inode(dentry)->i_mode))
838 return 0;
839
840 res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
841 if (res < 0) {
842 if (res == -ENODATA || res == -EOPNOTSUPP)
843 return 0;
844 goto out;
845 }
846
847 return 1;
848out:
849 pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
850 return res;
851}
852
853bool ovl_is_metacopy_dentry(struct dentry *dentry)
854{
855 struct ovl_entry *oe = dentry->d_fsdata;
856
857 if (!d_is_reg(dentry))
858 return false;
859
860 if (ovl_dentry_upper(dentry)) {
861 if (!ovl_has_upperdata(d_inode(dentry)))
862 return true;
863 return false;
864 }
865
866 return (oe->numlower > 1);
867}
868
869char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
870{
871 int res;
872 char *s, *next, *buf = NULL;
873
874 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
875 if (res < 0) {
876 if (res == -ENODATA || res == -EOPNOTSUPP)
877 return NULL;
878 goto fail;
879 }
880
881 buf = kzalloc(res + padding + 1, GFP_KERNEL);
882 if (!buf)
883 return ERR_PTR(-ENOMEM);
884
885 if (res == 0)
886 goto invalid;
887
888 res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
889 if (res < 0)
890 goto fail;
891 if (res == 0)
892 goto invalid;
893
894 if (buf[0] == '/') {
895 for (s = buf; *s++ == '/'; s = next) {
896 next = strchrnul(s, '/');
897 if (s == next)
898 goto invalid;
899 }
900 } else {
901 if (strchr(buf, '/') != NULL)
902 goto invalid;
903 }
904
905 return buf;
906
907err_free:
908 kfree(buf);
909 return ERR_PTR(res);
910fail:
911 pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
912 goto err_free;
913invalid:
914 pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
915 res = -EINVAL;
916 goto err_free;
917}
diff --git a/fs/read_write.c b/fs/read_write.c
index 153f8f690490..39b4a21dd933 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,6 +1964,44 @@ out_error:
1964} 1964}
1965EXPORT_SYMBOL(vfs_dedupe_file_range_compare); 1965EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
1966 1966
1967int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
1968 struct file *dst_file, loff_t dst_pos, u64 len)
1969{
1970 s64 ret;
1971
1972 ret = mnt_want_write_file(dst_file);
1973 if (ret)
1974 return ret;
1975
1976 ret = clone_verify_area(dst_file, dst_pos, len, true);
1977 if (ret < 0)
1978 goto out_drop_write;
1979
1980 ret = -EINVAL;
1981 if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
1982 goto out_drop_write;
1983
1984 ret = -EXDEV;
1985 if (src_file->f_path.mnt != dst_file->f_path.mnt)
1986 goto out_drop_write;
1987
1988 ret = -EISDIR;
1989 if (S_ISDIR(file_inode(dst_file)->i_mode))
1990 goto out_drop_write;
1991
1992 ret = -EINVAL;
1993 if (!dst_file->f_op->dedupe_file_range)
1994 goto out_drop_write;
1995
1996 ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
1997 dst_file, dst_pos, len);
1998out_drop_write:
1999 mnt_drop_write_file(dst_file);
2000
2001 return ret;
2002}
2003EXPORT_SYMBOL(vfs_dedupe_file_range_one);
2004
1967int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) 2005int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
1968{ 2006{
1969 struct file_dedupe_range_info *info; 2007 struct file_dedupe_range_info *info;
@@ -1972,11 +2010,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
1972 u64 len; 2010 u64 len;
1973 int i; 2011 int i;
1974 int ret; 2012 int ret;
1975 bool is_admin = capable(CAP_SYS_ADMIN);
1976 u16 count = same->dest_count; 2013 u16 count = same->dest_count;
1977 struct file *dst_file; 2014 int deduped;
1978 loff_t dst_off;
1979 ssize_t deduped;
1980 2015
1981 if (!(file->f_mode & FMODE_READ)) 2016 if (!(file->f_mode & FMODE_READ))
1982 return -EINVAL; 2017 return -EINVAL;
@@ -2003,6 +2038,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2003 if (off + len > i_size_read(src)) 2038 if (off + len > i_size_read(src))
2004 return -EINVAL; 2039 return -EINVAL;
2005 2040
2041 /* Arbitrary 1G limit on a single dedupe request, can be raised. */
2042 len = min_t(u64, len, 1 << 30);
2043
2006 /* pre-format output fields to sane values */ 2044 /* pre-format output fields to sane values */
2007 for (i = 0; i < count; i++) { 2045 for (i = 0; i < count; i++) {
2008 same->info[i].bytes_deduped = 0ULL; 2046 same->info[i].bytes_deduped = 0ULL;
@@ -2010,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2010 } 2048 }
2011 2049
2012 for (i = 0, info = same->info; i < count; i++, info++) { 2050 for (i = 0, info = same->info; i < count; i++, info++) {
2013 struct inode *dst;
2014 struct fd dst_fd = fdget(info->dest_fd); 2051 struct fd dst_fd = fdget(info->dest_fd);
2052 struct file *dst_file = dst_fd.file;
2015 2053
2016 dst_file = dst_fd.file;
2017 if (!dst_file) { 2054 if (!dst_file) {
2018 info->status = -EBADF; 2055 info->status = -EBADF;
2019 goto next_loop; 2056 goto next_loop;
2020 } 2057 }
2021 dst = file_inode(dst_file);
2022
2023 ret = mnt_want_write_file(dst_file);
2024 if (ret) {
2025 info->status = ret;
2026 goto next_fdput;
2027 }
2028
2029 dst_off = info->dest_offset;
2030 ret = clone_verify_area(dst_file, dst_off, len, true);
2031 if (ret < 0) {
2032 info->status = ret;
2033 goto next_file;
2034 }
2035 ret = 0;
2036 2058
2037 if (info->reserved) { 2059 if (info->reserved) {
2038 info->status = -EINVAL; 2060 info->status = -EINVAL;
2039 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2061 goto next_fdput;
2040 info->status = -EINVAL;
2041 } else if (file->f_path.mnt != dst_file->f_path.mnt) {
2042 info->status = -EXDEV;
2043 } else if (S_ISDIR(dst->i_mode)) {
2044 info->status = -EISDIR;
2045 } else if (dst_file->f_op->dedupe_file_range == NULL) {
2046 info->status = -EINVAL;
2047 } else {
2048 deduped = dst_file->f_op->dedupe_file_range(file, off,
2049 len, dst_file,
2050 info->dest_offset);
2051 if (deduped == -EBADE)
2052 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2053 else if (deduped < 0)
2054 info->status = deduped;
2055 else
2056 info->bytes_deduped += deduped;
2057 } 2062 }
2058 2063
2059next_file: 2064 deduped = vfs_dedupe_file_range_one(file, off, dst_file,
2060 mnt_drop_write_file(dst_file); 2065 info->dest_offset, len);
2066 if (deduped == -EBADE)
2067 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2068 else if (deduped < 0)
2069 info->status = deduped;
2070 else
2071 info->bytes_deduped = len;
2072
2061next_fdput: 2073next_fdput:
2062 fdput(dst_fd); 2074 fdput(dst_fd);
2063next_loop: 2075next_loop:
diff --git a/fs/xattr.c b/fs/xattr.c
index f9cb1db187b7..3a24027c062d 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -23,7 +23,6 @@
23#include <linux/posix_acl_xattr.h> 23#include <linux/posix_acl_xattr.h>
24 24
25#include <linux/uaccess.h> 25#include <linux/uaccess.h>
26#include "internal.h"
27 26
28static const char * 27static const char *
29strcmp_prefix(const char *a, const char *a_prefix) 28strcmp_prefix(const char *a, const char *a_prefix)
@@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
501 if (!f.file) 500 if (!f.file)
502 return error; 501 return error;
503 audit_file(f.file); 502 audit_file(f.file);
504 error = mnt_want_write_file_path(f.file); 503 error = mnt_want_write_file(f.file);
505 if (!error) { 504 if (!error) {
506 error = setxattr(f.file->f_path.dentry, name, value, size, flags); 505 error = setxattr(f.file->f_path.dentry, name, value, size, flags);
507 mnt_drop_write_file_path(f.file); 506 mnt_drop_write_file(f.file);
508 } 507 }
509 fdput(f); 508 fdput(f);
510 return error; 509 return error;
@@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
733 if (!f.file) 732 if (!f.file)
734 return error; 733 return error;
735 audit_file(f.file); 734 audit_file(f.file);
736 error = mnt_want_write_file_path(f.file); 735 error = mnt_want_write_file(f.file);
737 if (!error) { 736 if (!error) {
738 error = removexattr(f.file->f_path.dentry, name); 737 error = removexattr(f.file->f_path.dentry, name);
739 mnt_drop_write_file_path(f.file); 738 mnt_drop_write_file(f.file);
740 } 739 }
741 fdput(f); 740 fdput(f);
742 return error; 741 return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5eaef2c17293..61a5ad2600e8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -931,31 +931,16 @@ xfs_file_clone_range(
931 len, false); 931 len, false);
932} 932}
933 933
934STATIC ssize_t 934STATIC int
935xfs_file_dedupe_range( 935xfs_file_dedupe_range(
936 struct file *src_file, 936 struct file *file_in,
937 u64 loff, 937 loff_t pos_in,
938 u64 len, 938 struct file *file_out,
939 struct file *dst_file, 939 loff_t pos_out,
940 u64 dst_loff) 940 u64 len)
941{ 941{
942 struct inode *srci = file_inode(src_file); 942 return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
943 u64 max_dedupe;
944 int error;
945
946 /*
947 * Since we have to read all these pages in to compare them, cut
948 * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
949 * That means we won't do more than MAX_RW_COUNT IO per request.
950 */
951 max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
952 if (len > max_dedupe)
953 len = max_dedupe;
954 error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
955 len, true); 943 len, true);
956 if (error)
957 return error;
958 return len;
959} 944}
960 945
961STATIC int 946STATIC int
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d32957b423d5..ef4b70f64f33 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,8 +145,7 @@ struct dentry_operations {
145 char *(*d_dname)(struct dentry *, char *, int); 145 char *(*d_dname)(struct dentry *, char *, int);
146 struct vfsmount *(*d_automount)(struct path *); 146 struct vfsmount *(*d_automount)(struct path *);
147 int (*d_manage)(const struct path *, bool); 147 int (*d_manage)(const struct path *, bool);
148 struct dentry *(*d_real)(struct dentry *, const struct inode *, 148 struct dentry *(*d_real)(struct dentry *, const struct inode *);
149 unsigned int, unsigned int);
150} ____cacheline_aligned; 149} ____cacheline_aligned;
151 150
152/* 151/*
@@ -561,15 +560,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
561 return upper; 560 return upper;
562} 561}
563 562
564/* d_real() flags */
565#define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */
566
567/** 563/**
568 * d_real - Return the real dentry 564 * d_real - Return the real dentry
569 * @dentry: the dentry to query 565 * @dentry: the dentry to query
570 * @inode: inode to select the dentry from multiple layers (can be NULL) 566 * @inode: inode to select the dentry from multiple layers (can be NULL)
571 * @open_flags: open flags to control copy-up behavior
572 * @flags: flags to control what is returned by this function
573 * 567 *
574 * If dentry is on a union/overlay, then return the underlying, real dentry. 568 * If dentry is on a union/overlay, then return the underlying, real dentry.
575 * Otherwise return the dentry itself. 569 * Otherwise return the dentry itself.
@@ -577,11 +571,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
577 * See also: Documentation/filesystems/vfs.txt 571 * See also: Documentation/filesystems/vfs.txt
578 */ 572 */
579static inline struct dentry *d_real(struct dentry *dentry, 573static inline struct dentry *d_real(struct dentry *dentry,
580 const struct inode *inode, 574 const struct inode *inode)
581 unsigned int open_flags, unsigned int flags)
582{ 575{
583 if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) 576 if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
584 return dentry->d_op->d_real(dentry, inode, open_flags, flags); 577 return dentry->d_op->d_real(dentry, inode);
585 else 578 else
586 return dentry; 579 return dentry;
587} 580}
@@ -596,7 +589,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
596static inline struct inode *d_real_inode(const struct dentry *dentry) 589static inline struct inode *d_real_inode(const struct dentry *dentry)
597{ 590{
598 /* This usage of d_real() results in const dentry */ 591 /* This usage of d_real() results in const dentry */
599 return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0)); 592 return d_backing_inode(d_real((struct dentry *) dentry, NULL));
600} 593}
601 594
602struct name_snapshot { 595struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a9242f336f02..e5710541183b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -157,6 +157,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
157/* File is capable of returning -EAGAIN if I/O will block */ 157/* File is capable of returning -EAGAIN if I/O will block */
158#define FMODE_NOWAIT ((__force fmode_t)0x8000000) 158#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
159 159
160/* File does not contribute to nr_files count */
161#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
162
160/* 163/*
161 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 164 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
162 * that indicates that they should check the contents of the iovec are 165 * that indicates that they should check the contents of the iovec are
@@ -1067,17 +1070,7 @@ struct file_lock_context {
1067 1070
1068extern void send_sigio(struct fown_struct *fown, int fd, int band); 1071extern void send_sigio(struct fown_struct *fown, int fd, int band);
1069 1072
1070/* 1073#define locks_inode(f) file_inode(f)
1071 * Return the inode to use for locking
1072 *
1073 * For overlayfs this should be the overlay inode, not the real inode returned
1074 * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are
1075 * equal.
1076 */
1077static inline struct inode *locks_inode(const struct file *f)
1078{
1079 return f->f_path.dentry->d_inode;
1080}
1081 1074
1082#ifdef CONFIG_FILE_LOCKING 1075#ifdef CONFIG_FILE_LOCKING
1083extern int fcntl_getlk(struct file *, unsigned int, struct flock *); 1076extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
@@ -1262,7 +1255,7 @@ static inline struct inode *file_inode(const struct file *f)
1262 1255
1263static inline struct dentry *file_dentry(const struct file *file) 1256static inline struct dentry *file_dentry(const struct file *file)
1264{ 1257{
1265 return d_real(file->f_path.dentry, file_inode(file), 0, 0); 1258 return d_real(file->f_path.dentry, file_inode(file));
1266} 1259}
1267 1260
1268static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) 1261static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
@@ -1318,7 +1311,6 @@ extern int send_sigurg(struct fown_struct *fown);
1318 1311
1319/* These sb flags are internal to the kernel */ 1312/* These sb flags are internal to the kernel */
1320#define SB_SUBMOUNT (1<<26) 1313#define SB_SUBMOUNT (1<<26)
1321#define SB_NOREMOTELOCK (1<<27)
1322#define SB_NOSEC (1<<28) 1314#define SB_NOSEC (1<<28)
1323#define SB_BORN (1<<29) 1315#define SB_BORN (1<<29)
1324#define SB_ACTIVE (1<<30) 1316#define SB_ACTIVE (1<<30)
@@ -1647,6 +1639,8 @@ int vfs_mkobj(struct dentry *, umode_t,
1647 int (*f)(struct dentry *, umode_t, void *), 1639 int (*f)(struct dentry *, umode_t, void *),
1648 void *); 1640 void *);
1649 1641
1642extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1643
1650/* 1644/*
1651 * VFS file helper functions. 1645 * VFS file helper functions.
1652 */ 1646 */
@@ -1765,7 +1759,7 @@ struct file_operations {
1765 loff_t, size_t, unsigned int); 1759 loff_t, size_t, unsigned int);
1766 int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, 1760 int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
1767 u64); 1761 u64);
1768 ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, 1762 int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
1769 u64); 1763 u64);
1770} __randomize_layout; 1764} __randomize_layout;
1771 1765
@@ -1838,6 +1832,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1838 loff_t len, bool *is_same); 1832 loff_t len, bool *is_same);
1839extern int vfs_dedupe_file_range(struct file *file, 1833extern int vfs_dedupe_file_range(struct file *file,
1840 struct file_dedupe_range *same); 1834 struct file_dedupe_range *same);
1835extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
1836 struct file *dst_file, loff_t dst_pos,
1837 u64 len);
1838
1841 1839
1842struct super_operations { 1840struct super_operations {
1843 struct inode *(*alloc_inode)(struct super_block *sb); 1841 struct inode *(*alloc_inode)(struct super_block *sb);
@@ -2096,6 +2094,7 @@ enum file_time_flags {
2096 S_VERSION = 8, 2094 S_VERSION = 8,
2097}; 2095};
2098 2096
2097extern bool atime_needs_update(const struct path *, struct inode *);
2099extern void touch_atime(const struct path *); 2098extern void touch_atime(const struct path *);
2100static inline void file_accessed(struct file *file) 2099static inline void file_accessed(struct file *file)
2101{ 2100{
@@ -2441,6 +2440,8 @@ extern struct file *filp_open(const char *, int, umode_t);
2441extern struct file *file_open_root(struct dentry *, struct vfsmount *, 2440extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2442 const char *, int, umode_t); 2441 const char *, int, umode_t);
2443extern struct file * dentry_open(const struct path *, int, const struct cred *); 2442extern struct file * dentry_open(const struct path *, int, const struct cred *);
2443extern struct file * open_with_fake_path(const struct path *, int,
2444 struct inode*, const struct cred *);
2444static inline struct file *file_clone_open(struct file *file) 2445static inline struct file *file_clone_open(struct file *file)
2445{ 2446{
2446 return dentry_open(&file->f_path, file->f_flags, file->f_cred); 2447 return dentry_open(&file->f_path, file->f_flags, file->f_cred);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bdaf22582f6e..fd1ce10553bf 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
30static inline int fsnotify_perm(struct file *file, int mask) 30static inline int fsnotify_perm(struct file *file, int mask)
31{ 31{
32 const struct path *path = &file->f_path; 32 const struct path *path = &file->f_path;
33 /* 33 struct inode *inode = file_inode(file);
34 * Do not use file_inode() here or anywhere in this file to get the
35 * inode. That would break *notity on overlayfs.
36 */
37 struct inode *inode = path->dentry->d_inode;
38 __u32 fsnotify_mask = 0; 34 __u32 fsnotify_mask = 0;
39 int ret; 35 int ret;
40 36
@@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
178static inline void fsnotify_access(struct file *file) 174static inline void fsnotify_access(struct file *file)
179{ 175{
180 const struct path *path = &file->f_path; 176 const struct path *path = &file->f_path;
181 struct inode *inode = path->dentry->d_inode; 177 struct inode *inode = file_inode(file);
182 __u32 mask = FS_ACCESS; 178 __u32 mask = FS_ACCESS;
183 179
184 if (S_ISDIR(inode->i_mode)) 180 if (S_ISDIR(inode->i_mode))
@@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file)
196static inline void fsnotify_modify(struct file *file) 192static inline void fsnotify_modify(struct file *file)
197{ 193{
198 const struct path *path = &file->f_path; 194 const struct path *path = &file->f_path;
199 struct inode *inode = path->dentry->d_inode; 195 struct inode *inode = file_inode(file);
200 __u32 mask = FS_MODIFY; 196 __u32 mask = FS_MODIFY;
201 197
202 if (S_ISDIR(inode->i_mode)) 198 if (S_ISDIR(inode->i_mode))
@@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file)
214static inline void fsnotify_open(struct file *file) 210static inline void fsnotify_open(struct file *file)
215{ 211{
216 const struct path *path = &file->f_path; 212 const struct path *path = &file->f_path;
217 struct inode *inode = path->dentry->d_inode; 213 struct inode *inode = file_inode(file);
218 __u32 mask = FS_OPEN; 214 __u32 mask = FS_OPEN;
219 215
220 if (S_ISDIR(inode->i_mode)) 216 if (S_ISDIR(inode->i_mode))
@@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file)
230static inline void fsnotify_close(struct file *file) 226static inline void fsnotify_close(struct file *file)
231{ 227{
232 const struct path *path = &file->f_path; 228 const struct path *path = &file->f_path;
233 struct inode *inode = path->dentry->d_inode; 229 struct inode *inode = file_inode(file);
234 fmode_t mode = file->f_mode; 230 fmode_t mode = file->f_mode;
235 __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; 231 __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
236 232