diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-03 14:55:57 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-03 15:02:42 -0500 |
commit | e58bc927835a6f5ddbe4d2e069c9082b706810e7 (patch) | |
tree | a7d6a2d637e324c5dad98540d1d293aaf3924d28 | |
parent | 590dce2d4934fb909b112cd80c80486362337744 (diff) | |
parent | 51f8f3c4e22535933ef9aecc00e9a6069e051b57 (diff) |
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi:
"Because copy up can take a long time, serialized copy ups could be a
big performance bottleneck. This update allows concurrent copy up of
regular files eliminating this potential problem.
There are also minor fixes"
* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials
ovl: properly implement sync_filesystem()
ovl: concurrent copy up of regular files
ovl: introduce copy up waitqueue
ovl: copy up regular file using O_TMPFILE
ovl: rearrange code in ovl_copy_up_locked()
ovl: check if upperdir fs supports O_TMPFILE
-rw-r--r-- | fs/overlayfs/copy_up.c | 91 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 11 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 3 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 39 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 31 |
5 files changed, 148 insertions, 27 deletions
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2d4985018fdb..906ea6c93260 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/fdtable.h> | 21 | #include <linux/fdtable.h> |
22 | #include <linux/ratelimit.h> | 22 | #include <linux/ratelimit.h> |
23 | #include "overlayfs.h" | 23 | #include "overlayfs.h" |
24 | #include "ovl_entry.h" | ||
24 | 25 | ||
25 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | 26 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) |
26 | 27 | ||
@@ -233,12 +234,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) | |||
233 | 234 | ||
234 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | 235 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, |
235 | struct dentry *dentry, struct path *lowerpath, | 236 | struct dentry *dentry, struct path *lowerpath, |
236 | struct kstat *stat, const char *link) | 237 | struct kstat *stat, const char *link, |
238 | struct kstat *pstat, bool tmpfile) | ||
237 | { | 239 | { |
238 | struct inode *wdir = workdir->d_inode; | 240 | struct inode *wdir = workdir->d_inode; |
239 | struct inode *udir = upperdir->d_inode; | 241 | struct inode *udir = upperdir->d_inode; |
240 | struct dentry *newdentry = NULL; | 242 | struct dentry *newdentry = NULL; |
241 | struct dentry *upper = NULL; | 243 | struct dentry *upper = NULL; |
244 | struct dentry *temp = NULL; | ||
242 | int err; | 245 | int err; |
243 | const struct cred *old_creds = NULL; | 246 | const struct cred *old_creds = NULL; |
244 | struct cred *new_creds = NULL; | 247 | struct cred *new_creds = NULL; |
@@ -249,25 +252,30 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | |||
249 | .link = link | 252 | .link = link |
250 | }; | 253 | }; |
251 | 254 | ||
252 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
253 | err = PTR_ERR(newdentry); | ||
254 | if (IS_ERR(newdentry)) | ||
255 | goto out; | ||
256 | |||
257 | upper = lookup_one_len(dentry->d_name.name, upperdir, | 255 | upper = lookup_one_len(dentry->d_name.name, upperdir, |
258 | dentry->d_name.len); | 256 | dentry->d_name.len); |
259 | err = PTR_ERR(upper); | 257 | err = PTR_ERR(upper); |
260 | if (IS_ERR(upper)) | 258 | if (IS_ERR(upper)) |
261 | goto out1; | 259 | goto out; |
262 | 260 | ||
263 | err = security_inode_copy_up(dentry, &new_creds); | 261 | err = security_inode_copy_up(dentry, &new_creds); |
264 | if (err < 0) | 262 | if (err < 0) |
265 | goto out2; | 263 | goto out1; |
266 | 264 | ||
267 | if (new_creds) | 265 | if (new_creds) |
268 | old_creds = override_creds(new_creds); | 266 | old_creds = override_creds(new_creds); |
269 | 267 | ||
270 | err = ovl_create_real(wdir, newdentry, &cattr, NULL, true); | 268 | if (tmpfile) |
269 | temp = ovl_do_tmpfile(upperdir, stat->mode); | ||
270 | else | ||
271 | temp = ovl_lookup_temp(workdir, dentry); | ||
272 | err = PTR_ERR(temp); | ||
273 | if (IS_ERR(temp)) | ||
274 | goto out1; | ||
275 | |||
276 | err = 0; | ||
277 | if (!tmpfile) | ||
278 | err = ovl_create_real(wdir, temp, &cattr, NULL, true); | ||
271 | 279 | ||
272 | if (new_creds) { | 280 | if (new_creds) { |
273 | revert_creds(old_creds); | 281 | revert_creds(old_creds); |
@@ -282,39 +290,55 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | |||
282 | 290 | ||
283 | ovl_path_upper(dentry, &upperpath); | 291 | ovl_path_upper(dentry, &upperpath); |
284 | BUG_ON(upperpath.dentry != NULL); | 292 | BUG_ON(upperpath.dentry != NULL); |
285 | upperpath.dentry = newdentry; | 293 | upperpath.dentry = temp; |
294 | |||
295 | if (tmpfile) { | ||
296 | inode_unlock(udir); | ||
297 | err = ovl_copy_up_data(lowerpath, &upperpath, | ||
298 | stat->size); | ||
299 | inode_lock_nested(udir, I_MUTEX_PARENT); | ||
300 | } else { | ||
301 | err = ovl_copy_up_data(lowerpath, &upperpath, | ||
302 | stat->size); | ||
303 | } | ||
286 | 304 | ||
287 | err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); | ||
288 | if (err) | 305 | if (err) |
289 | goto out_cleanup; | 306 | goto out_cleanup; |
290 | } | 307 | } |
291 | 308 | ||
292 | err = ovl_copy_xattr(lowerpath->dentry, newdentry); | 309 | err = ovl_copy_xattr(lowerpath->dentry, temp); |
293 | if (err) | 310 | if (err) |
294 | goto out_cleanup; | 311 | goto out_cleanup; |
295 | 312 | ||
296 | inode_lock(newdentry->d_inode); | 313 | inode_lock(temp->d_inode); |
297 | err = ovl_set_attr(newdentry, stat); | 314 | err = ovl_set_attr(temp, stat); |
298 | inode_unlock(newdentry->d_inode); | 315 | inode_unlock(temp->d_inode); |
299 | if (err) | 316 | if (err) |
300 | goto out_cleanup; | 317 | goto out_cleanup; |
301 | 318 | ||
302 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | 319 | if (tmpfile) |
320 | err = ovl_do_link(temp, udir, upper, true); | ||
321 | else | ||
322 | err = ovl_do_rename(wdir, temp, udir, upper, 0); | ||
303 | if (err) | 323 | if (err) |
304 | goto out_cleanup; | 324 | goto out_cleanup; |
305 | 325 | ||
326 | newdentry = dget(tmpfile ? upper : temp); | ||
306 | ovl_dentry_update(dentry, newdentry); | 327 | ovl_dentry_update(dentry, newdentry); |
307 | ovl_inode_update(d_inode(dentry), d_inode(newdentry)); | 328 | ovl_inode_update(d_inode(dentry), d_inode(newdentry)); |
308 | newdentry = NULL; | 329 | |
330 | /* Restore timestamps on parent (best effort) */ | ||
331 | ovl_set_timestamps(upperdir, pstat); | ||
309 | out2: | 332 | out2: |
310 | dput(upper); | 333 | dput(temp); |
311 | out1: | 334 | out1: |
312 | dput(newdentry); | 335 | dput(upper); |
313 | out: | 336 | out: |
314 | return err; | 337 | return err; |
315 | 338 | ||
316 | out_cleanup: | 339 | out_cleanup: |
317 | ovl_cleanup(wdir, newdentry); | 340 | if (!tmpfile) |
341 | ovl_cleanup(wdir, temp); | ||
318 | goto out2; | 342 | goto out2; |
319 | } | 343 | } |
320 | 344 | ||
@@ -338,6 +362,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
338 | struct dentry *lowerdentry = lowerpath->dentry; | 362 | struct dentry *lowerdentry = lowerpath->dentry; |
339 | struct dentry *upperdir; | 363 | struct dentry *upperdir; |
340 | const char *link = NULL; | 364 | const char *link = NULL; |
365 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
341 | 366 | ||
342 | if (WARN_ON(!workdir)) | 367 | if (WARN_ON(!workdir)) |
343 | return -EROFS; | 368 | return -EROFS; |
@@ -358,6 +383,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
358 | return PTR_ERR(link); | 383 | return PTR_ERR(link); |
359 | } | 384 | } |
360 | 385 | ||
386 | /* Should we copyup with O_TMPFILE or with workdir? */ | ||
387 | if (S_ISREG(stat->mode) && ofs->tmpfile) { | ||
388 | err = ovl_copy_up_start(dentry); | ||
389 | /* err < 0: interrupted, err > 0: raced with another copy-up */ | ||
390 | if (unlikely(err)) { | ||
391 | pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err); | ||
392 | if (err > 0) | ||
393 | err = 0; | ||
394 | goto out_done; | ||
395 | } | ||
396 | |||
397 | inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT); | ||
398 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | ||
399 | stat, link, &pstat, true); | ||
400 | inode_unlock(upperdir->d_inode); | ||
401 | ovl_copy_up_end(dentry); | ||
402 | goto out_done; | ||
403 | } | ||
404 | |||
361 | err = -EIO; | 405 | err = -EIO; |
362 | if (lock_rename(workdir, upperdir) != NULL) { | 406 | if (lock_rename(workdir, upperdir) != NULL) { |
363 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | 407 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); |
@@ -370,13 +414,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | |||
370 | } | 414 | } |
371 | 415 | ||
372 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | 416 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, |
373 | stat, link); | 417 | stat, link, &pstat, false); |
374 | if (!err) { | ||
375 | /* Restore timestamps on parent (best effort) */ | ||
376 | ovl_set_timestamps(upperdir, &pstat); | ||
377 | } | ||
378 | out_unlock: | 418 | out_unlock: |
379 | unlock_rename(workdir, upperdir); | 419 | unlock_rename(workdir, upperdir); |
420 | out_done: | ||
380 | do_delayed_call(&done); | 421 | do_delayed_call(&done); |
381 | 422 | ||
382 | return err; | 423 | return err; |
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8af450b0e57a..741dc0b6931f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h | |||
@@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) | |||
127 | return err; | 127 | return err; |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) | ||
131 | { | ||
132 | struct dentry *ret = vfs_tmpfile(dentry, mode, 0); | ||
133 | int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; | ||
134 | |||
135 | pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
136 | return ret; | ||
137 | } | ||
138 | |||
130 | static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) | 139 | static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) |
131 | { | 140 | { |
132 | unsigned long x = (unsigned long) READ_ONCE(inode->i_private); | 141 | unsigned long x = (unsigned long) READ_ONCE(inode->i_private); |
@@ -169,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry); | |||
169 | u64 ovl_dentry_version_get(struct dentry *dentry); | 178 | u64 ovl_dentry_version_get(struct dentry *dentry); |
170 | bool ovl_is_whiteout(struct dentry *dentry); | 179 | bool ovl_is_whiteout(struct dentry *dentry); |
171 | struct file *ovl_path_open(struct path *path, int flags); | 180 | struct file *ovl_path_open(struct path *path, int flags); |
181 | int ovl_copy_up_start(struct dentry *dentry); | ||
182 | void ovl_copy_up_end(struct dentry *dentry); | ||
172 | 183 | ||
173 | /* namei.c */ | 184 | /* namei.c */ |
174 | int ovl_path_next(int idx, struct dentry *dentry, struct path *path); | 185 | int ovl_path_next(int idx, struct dentry *dentry, struct path *path); |
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index d14bca1850d9..59614faa14c3 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h | |||
@@ -27,6 +27,8 @@ struct ovl_fs { | |||
27 | struct ovl_config config; | 27 | struct ovl_config config; |
28 | /* creds of process who forced instantiation of super block */ | 28 | /* creds of process who forced instantiation of super block */ |
29 | const struct cred *creator_cred; | 29 | const struct cred *creator_cred; |
30 | bool tmpfile; | ||
31 | wait_queue_head_t copyup_wq; | ||
30 | }; | 32 | }; |
31 | 33 | ||
32 | /* private information held for every overlayfs dentry */ | 34 | /* private information held for every overlayfs dentry */ |
@@ -38,6 +40,7 @@ struct ovl_entry { | |||
38 | u64 version; | 40 | u64 version; |
39 | const char *redirect; | 41 | const char *redirect; |
40 | bool opaque; | 42 | bool opaque; |
43 | bool copying; | ||
41 | }; | 44 | }; |
42 | struct rcu_head rcu; | 45 | struct rcu_head rcu; |
43 | }; | 46 | }; |
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9aa37c2f7f7d..c9e70d39c1ea 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c | |||
@@ -161,6 +161,25 @@ static void ovl_put_super(struct super_block *sb) | |||
161 | kfree(ufs); | 161 | kfree(ufs); |
162 | } | 162 | } |
163 | 163 | ||
164 | static int ovl_sync_fs(struct super_block *sb, int wait) | ||
165 | { | ||
166 | struct ovl_fs *ufs = sb->s_fs_info; | ||
167 | struct super_block *upper_sb; | ||
168 | int ret; | ||
169 | |||
170 | if (!ufs->upper_mnt) | ||
171 | return 0; | ||
172 | upper_sb = ufs->upper_mnt->mnt_sb; | ||
173 | if (!upper_sb->s_op->sync_fs) | ||
174 | return 0; | ||
175 | |||
176 | /* real inodes have already been synced by sync_filesystem(ovl_sb) */ | ||
177 | down_read(&upper_sb->s_umount); | ||
178 | ret = upper_sb->s_op->sync_fs(upper_sb, wait); | ||
179 | up_read(&upper_sb->s_umount); | ||
180 | return ret; | ||
181 | } | ||
182 | |||
164 | /** | 183 | /** |
165 | * ovl_statfs | 184 | * ovl_statfs |
166 | * @sb: The overlayfs super block | 185 | * @sb: The overlayfs super block |
@@ -223,6 +242,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) | |||
223 | 242 | ||
224 | static const struct super_operations ovl_super_operations = { | 243 | static const struct super_operations ovl_super_operations = { |
225 | .put_super = ovl_put_super, | 244 | .put_super = ovl_put_super, |
245 | .sync_fs = ovl_sync_fs, | ||
226 | .statfs = ovl_statfs, | 246 | .statfs = ovl_statfs, |
227 | .show_options = ovl_show_options, | 247 | .show_options = ovl_show_options, |
228 | .remount_fs = ovl_remount, | 248 | .remount_fs = ovl_remount, |
@@ -702,6 +722,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
702 | unsigned int stacklen = 0; | 722 | unsigned int stacklen = 0; |
703 | unsigned int i; | 723 | unsigned int i; |
704 | bool remote = false; | 724 | bool remote = false; |
725 | struct cred *cred; | ||
705 | int err; | 726 | int err; |
706 | 727 | ||
707 | err = -ENOMEM; | 728 | err = -ENOMEM; |
@@ -709,6 +730,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
709 | if (!ufs) | 730 | if (!ufs) |
710 | goto out; | 731 | goto out; |
711 | 732 | ||
733 | init_waitqueue_head(&ufs->copyup_wq); | ||
712 | ufs->config.redirect_dir = ovl_redirect_dir_def; | 734 | ufs->config.redirect_dir = ovl_redirect_dir_def; |
713 | err = ovl_parse_opt((char *) data, &ufs->config); | 735 | err = ovl_parse_opt((char *) data, &ufs->config); |
714 | if (err) | 736 | if (err) |
@@ -826,6 +848,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
826 | * creation of workdir in previous step. | 848 | * creation of workdir in previous step. |
827 | */ | 849 | */ |
828 | if (ufs->workdir) { | 850 | if (ufs->workdir) { |
851 | struct dentry *temp; | ||
852 | |||
829 | err = ovl_check_d_type_supported(&workpath); | 853 | err = ovl_check_d_type_supported(&workpath); |
830 | if (err < 0) | 854 | if (err < 0) |
831 | goto out_put_workdir; | 855 | goto out_put_workdir; |
@@ -837,6 +861,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
837 | */ | 861 | */ |
838 | if (!err) | 862 | if (!err) |
839 | pr_warn("overlayfs: upper fs needs to support d_type.\n"); | 863 | pr_warn("overlayfs: upper fs needs to support d_type.\n"); |
864 | |||
865 | /* Check if upper/work fs supports O_TMPFILE */ | ||
866 | temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); | ||
867 | ufs->tmpfile = !IS_ERR(temp); | ||
868 | if (ufs->tmpfile) | ||
869 | dput(temp); | ||
870 | else | ||
871 | pr_warn("overlayfs: upper fs does not support tmpfile.\n"); | ||
840 | } | 872 | } |
841 | } | 873 | } |
842 | 874 | ||
@@ -871,10 +903,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
871 | else | 903 | else |
872 | sb->s_d_op = &ovl_dentry_operations; | 904 | sb->s_d_op = &ovl_dentry_operations; |
873 | 905 | ||
874 | ufs->creator_cred = prepare_creds(); | 906 | ufs->creator_cred = cred = prepare_creds(); |
875 | if (!ufs->creator_cred) | 907 | if (!cred) |
876 | goto out_put_lower_mnt; | 908 | goto out_put_lower_mnt; |
877 | 909 | ||
910 | /* Never override disk quota limits or use reserved space */ | ||
911 | cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); | ||
912 | |||
878 | err = -ENOMEM; | 913 | err = -ENOMEM; |
879 | oe = ovl_alloc_entry(numlower); | 914 | oe = ovl_alloc_entry(numlower); |
880 | if (!oe) | 915 | if (!oe) |
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 9dc1c0af586b..1953986ee6bc 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
14 | #include <linux/xattr.h> | 14 | #include <linux/xattr.h> |
15 | #include <linux/sched/signal.h> | ||
15 | #include "overlayfs.h" | 16 | #include "overlayfs.h" |
16 | #include "ovl_entry.h" | 17 | #include "ovl_entry.h" |
17 | 18 | ||
@@ -264,3 +265,33 @@ struct file *ovl_path_open(struct path *path, int flags) | |||
264 | { | 265 | { |
265 | return dentry_open(path, flags | O_NOATIME, current_cred()); | 266 | return dentry_open(path, flags | O_NOATIME, current_cred()); |
266 | } | 267 | } |
268 | |||
269 | int ovl_copy_up_start(struct dentry *dentry) | ||
270 | { | ||
271 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
272 | struct ovl_entry *oe = dentry->d_fsdata; | ||
273 | int err; | ||
274 | |||
275 | spin_lock(&ofs->copyup_wq.lock); | ||
276 | err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying); | ||
277 | if (!err) { | ||
278 | if (oe->__upperdentry) | ||
279 | err = 1; /* Already copied up */ | ||
280 | else | ||
281 | oe->copying = true; | ||
282 | } | ||
283 | spin_unlock(&ofs->copyup_wq.lock); | ||
284 | |||
285 | return err; | ||
286 | } | ||
287 | |||
288 | void ovl_copy_up_end(struct dentry *dentry) | ||
289 | { | ||
290 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
291 | struct ovl_entry *oe = dentry->d_fsdata; | ||
292 | |||
293 | spin_lock(&ofs->copyup_wq.lock); | ||
294 | oe->copying = false; | ||
295 | wake_up_locked(&ofs->copyup_wq); | ||
296 | spin_unlock(&ofs->copyup_wq.lock); | ||
297 | } | ||