aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-05 16:05:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-05 16:05:20 -0500
commit139351f1f98546c312a1942215977ea703b383b8 (patch)
tree1b1d35d469f2461a2bd88950e5a7996a26aa6e9f
parent2deb41b245320f0eefb535a5c8ea19ed66b33c04 (diff)
parent9b6faee074702bbbc207e7027b9416c2d8fea9fe (diff)
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi: "This work from Amir adds NFS export capability to overlayfs. NFS exporting an overlay filesystem is a challange because we want to keep track of any copy-up of a file or directory between encoding the file handle and decoding it. This is achieved by indexing copied up objects by lower layer file handle. The index is already used for hard links, this patchset extends the use to NFS file handle decoding" * 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (51 commits) ovl: check ERR_PTR() return value from ovl_encode_fh() ovl: fix regression in fsnotify of overlay merge dir ovl: wire up NFS export operations ovl: lookup indexed ancestor of lower dir ovl: lookup connected ancestor of dir in inode cache ovl: hash non-indexed dir by upper inode for NFS export ovl: decode pure lower dir file handles ovl: decode indexed dir file handles ovl: decode lower file handles of unlinked but open files ovl: decode indexed non-dir file handles ovl: decode lower non-dir file handles ovl: encode lower file handles ovl: copy up before encoding non-connectable dir file handle ovl: encode non-indexed upper file handles ovl: decode connected upper dir file handles ovl: decode pure upper file handles ovl: encode pure upper file handles ovl: document NFS export vfs: factor out helpers d_instantiate_anon() and d_alloc_anon() ovl: store 'has_upper' and 'opaque' as bit flags ...
-rw-r--r--Documentation/filesystems/overlayfs.txt106
-rw-r--r--fs/dcache.c88
-rw-r--r--fs/overlayfs/Kconfig31
-rw-r--r--fs/overlayfs/Makefile3
-rw-r--r--fs/overlayfs/copy_up.c188
-rw-r--r--fs/overlayfs/dir.c175
-rw-r--r--fs/overlayfs/export.c715
-rw-r--r--fs/overlayfs/inode.c106
-rw-r--r--fs/overlayfs/namei.c533
-rw-r--r--fs/overlayfs/overlayfs.h66
-rw-r--r--fs/overlayfs/ovl_entry.h11
-rw-r--r--fs/overlayfs/readdir.c57
-rw-r--r--fs/overlayfs/super.c125
-rw-r--r--fs/overlayfs/util.c108
-rw-r--r--include/linux/dcache.h2
15 files changed, 1905 insertions, 409 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index e6a5f4912b6d..6ea1e64d1464 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -190,6 +190,20 @@ Mount options:
190 Redirects are not created and not followed (equivalent to "redirect_dir=off" 190 Redirects are not created and not followed (equivalent to "redirect_dir=off"
191 if "redirect_always_follow" feature is not enabled). 191 if "redirect_always_follow" feature is not enabled).
192 192
193When the NFS export feature is enabled, every copied up directory is
194indexed by the file handle of the lower inode and a file handle of the
195upper directory is stored in a "trusted.overlay.upper" extended attribute
196on the index entry. On lookup of a merged directory, if the upper
197directory does not match the file handle stores in the index, that is an
198indication that multiple upper directories may be redirected to the same
199lower directory. In that case, lookup returns an error and warns about
200a possible inconsistency.
201
202Because lower layer redirects cannot be verified with the index, enabling
203NFS export support on an overlay filesystem with no upper layer requires
204turning off redirect follow (e.g. "redirect_dir=nofollow").
205
206
193Non-directories 207Non-directories
194--------------- 208---------------
195 209
@@ -281,9 +295,9 @@ filesystem, so both st_dev and st_ino of the file may change.
281 295
282Any open files referring to this inode will access the old data. 296Any open files referring to this inode will access the old data.
283 297
284If a file with multiple hard links is copied up, then this will 298Unless "inode index" feature is enabled, if a file with multiple hard
285"break" the link. Changes will not be propagated to other names 299links is copied up, then this will "break" the link. Changes will not be
286referring to the same inode. 300propagated to other names referring to the same inode.
287 301
288Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged 302Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged
289directory will fail with EXDEV. 303directory will fail with EXDEV.
@@ -299,6 +313,92 @@ filesystem are not allowed. If the underlying filesystem is changed,
299the behavior of the overlay is undefined, though it will not result in 313the behavior of the overlay is undefined, though it will not result in
300a crash or deadlock. 314a crash or deadlock.
301 315
316When the overlay NFS export feature is enabled, overlay filesystems
317behavior on offline changes of the underlying lower layer is different
318than the behavior when NFS export is disabled.
319
320On every copy_up, an NFS file handle of the lower inode, along with the
321UUID of the lower filesystem, are encoded and stored in an extended
322attribute "trusted.overlay.origin" on the upper inode.
323
324When the NFS export feature is enabled, a lookup of a merged directory,
325that found a lower directory at the lookup path or at the path pointed
326to by the "trusted.overlay.redirect" extended attribute, will verify
327that the found lower directory file handle and lower filesystem UUID
328match the origin file handle that was stored at copy_up time. If a
329found lower directory does not match the stored origin, that directory
330will not be merged with the upper directory.
331
332
333
334NFS export
335----------
336
337When the underlying filesystems supports NFS export and the "nfs_export"
338feature is enabled, an overlay filesystem may be exported to NFS.
339
340With the "nfs_export" feature, on copy_up of any lower object, an index
341entry is created under the index directory. The index entry name is the
342hexadecimal representation of the copy up origin file handle. For a
343non-directory object, the index entry is a hard link to the upper inode.
344For a directory object, the index entry has an extended attribute
345"trusted.overlay.upper" with an encoded file handle of the upper
346directory inode.
347
348When encoding a file handle from an overlay filesystem object, the
349following rules apply:
350
3511. For a non-upper object, encode a lower file handle from lower inode
3522. For an indexed object, encode a lower file handle from copy_up origin
3533. For a pure-upper object and for an existing non-indexed upper object,
354 encode an upper file handle from upper inode
355
356The encoded overlay file handle includes:
357 - Header including path type information (e.g. lower/upper)
358 - UUID of the underlying filesystem
359 - Underlying filesystem encoding of underlying inode
360
361This encoding format is identical to the encoding format file handles that
362are stored in extended attribute "trusted.overlay.origin".
363
364When decoding an overlay file handle, the following steps are followed:
365
3661. Find underlying layer by UUID and path type information.
3672. Decode the underlying filesystem file handle to underlying dentry.
3683. For a lower file handle, lookup the handle in index directory by name.
3694. If a whiteout is found in index, return ESTALE. This represents an
370 overlay object that was deleted after its file handle was encoded.
3715. For a non-directory, instantiate a disconnected overlay dentry from the
372 decoded underlying dentry, the path type and index inode, if found.
3736. For a directory, use the connected underlying decoded dentry, path type
374 and index, to lookup a connected overlay dentry.
375
376Decoding a non-directory file handle may return a disconnected dentry.
377copy_up of that disconnected dentry will create an upper index entry with
378no upper alias.
379
380When overlay filesystem has multiple lower layers, a middle layer
381directory may have a "redirect" to lower directory. Because middle layer
382"redirects" are not indexed, a lower file handle that was encoded from the
383"redirect" origin directory, cannot be used to find the middle or upper
384layer directory. Similarly, a lower file handle that was encoded from a
385descendant of the "redirect" origin directory, cannot be used to
386reconstruct a connected overlay path. To mitigate the cases of
387directories that cannot be decoded from a lower file handle, these
388directories are copied up on encode and encoded as an upper file handle.
389On an overlay filesystem with no upper layer this mitigation cannot be
390used NFS export in this setup requires turning off redirect follow (e.g.
391"redirect_dir=nofollow").
392
393The overlay filesystem does not support non-directory connectable file
394handles, so exporting with the 'subtree_check' exportfs configuration will
395cause failures to lookup files over NFS.
396
397When the NFS export feature is enabled, all directory index entries are
398verified on mount time to check that upper file handles are not stale.
399This verification may cause significant overhead in some cases.
400
401
302Testsuite 402Testsuite
303--------- 403---------
304 404
diff --git a/fs/dcache.c b/fs/dcache.c
index cca2b377ff0a..7c38f39958bc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1698,9 +1698,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1698} 1698}
1699EXPORT_SYMBOL(d_alloc); 1699EXPORT_SYMBOL(d_alloc);
1700 1700
1701struct dentry *d_alloc_anon(struct super_block *sb)
1702{
1703 return __d_alloc(sb, NULL);
1704}
1705EXPORT_SYMBOL(d_alloc_anon);
1706
1701struct dentry *d_alloc_cursor(struct dentry * parent) 1707struct dentry *d_alloc_cursor(struct dentry * parent)
1702{ 1708{
1703 struct dentry *dentry = __d_alloc(parent->d_sb, NULL); 1709 struct dentry *dentry = d_alloc_anon(parent->d_sb);
1704 if (dentry) { 1710 if (dentry) {
1705 dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; 1711 dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
1706 dentry->d_parent = dget(parent); 1712 dentry->d_parent = dget(parent);
@@ -1886,7 +1892,7 @@ struct dentry *d_make_root(struct inode *root_inode)
1886 struct dentry *res = NULL; 1892 struct dentry *res = NULL;
1887 1893
1888 if (root_inode) { 1894 if (root_inode) {
1889 res = __d_alloc(root_inode->i_sb, NULL); 1895 res = d_alloc_anon(root_inode->i_sb);
1890 if (res) 1896 if (res)
1891 d_instantiate(res, root_inode); 1897 d_instantiate(res, root_inode);
1892 else 1898 else
@@ -1925,33 +1931,19 @@ struct dentry *d_find_any_alias(struct inode *inode)
1925} 1931}
1926EXPORT_SYMBOL(d_find_any_alias); 1932EXPORT_SYMBOL(d_find_any_alias);
1927 1933
1928static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) 1934static struct dentry *__d_instantiate_anon(struct dentry *dentry,
1935 struct inode *inode,
1936 bool disconnected)
1929{ 1937{
1930 struct dentry *tmp;
1931 struct dentry *res; 1938 struct dentry *res;
1932 unsigned add_flags; 1939 unsigned add_flags;
1933 1940
1934 if (!inode) 1941 security_d_instantiate(dentry, inode);
1935 return ERR_PTR(-ESTALE);
1936 if (IS_ERR(inode))
1937 return ERR_CAST(inode);
1938
1939 res = d_find_any_alias(inode);
1940 if (res)
1941 goto out_iput;
1942
1943 tmp = __d_alloc(inode->i_sb, NULL);
1944 if (!tmp) {
1945 res = ERR_PTR(-ENOMEM);
1946 goto out_iput;
1947 }
1948
1949 security_d_instantiate(tmp, inode);
1950 spin_lock(&inode->i_lock); 1942 spin_lock(&inode->i_lock);
1951 res = __d_find_any_alias(inode); 1943 res = __d_find_any_alias(inode);
1952 if (res) { 1944 if (res) {
1953 spin_unlock(&inode->i_lock); 1945 spin_unlock(&inode->i_lock);
1954 dput(tmp); 1946 dput(dentry);
1955 goto out_iput; 1947 goto out_iput;
1956 } 1948 }
1957 1949
@@ -1961,24 +1953,57 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1961 if (disconnected) 1953 if (disconnected)
1962 add_flags |= DCACHE_DISCONNECTED; 1954 add_flags |= DCACHE_DISCONNECTED;
1963 1955
1964 spin_lock(&tmp->d_lock); 1956 spin_lock(&dentry->d_lock);
1965 __d_set_inode_and_type(tmp, inode, add_flags); 1957 __d_set_inode_and_type(dentry, inode, add_flags);
1966 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); 1958 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1967 if (!disconnected) { 1959 if (!disconnected) {
1968 hlist_bl_lock(&tmp->d_sb->s_roots); 1960 hlist_bl_lock(&dentry->d_sb->s_roots);
1969 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots); 1961 hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots);
1970 hlist_bl_unlock(&tmp->d_sb->s_roots); 1962 hlist_bl_unlock(&dentry->d_sb->s_roots);
1971 } 1963 }
1972 spin_unlock(&tmp->d_lock); 1964 spin_unlock(&dentry->d_lock);
1973 spin_unlock(&inode->i_lock); 1965 spin_unlock(&inode->i_lock);
1974 1966
1975 return tmp; 1967 return dentry;
1976 1968
1977 out_iput: 1969 out_iput:
1978 iput(inode); 1970 iput(inode);
1979 return res; 1971 return res;
1980} 1972}
1981 1973
1974struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
1975{
1976 return __d_instantiate_anon(dentry, inode, true);
1977}
1978EXPORT_SYMBOL(d_instantiate_anon);
1979
1980static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
1981{
1982 struct dentry *tmp;
1983 struct dentry *res;
1984
1985 if (!inode)
1986 return ERR_PTR(-ESTALE);
1987 if (IS_ERR(inode))
1988 return ERR_CAST(inode);
1989
1990 res = d_find_any_alias(inode);
1991 if (res)
1992 goto out_iput;
1993
1994 tmp = d_alloc_anon(inode->i_sb);
1995 if (!tmp) {
1996 res = ERR_PTR(-ENOMEM);
1997 goto out_iput;
1998 }
1999
2000 return __d_instantiate_anon(tmp, inode, disconnected);
2001
2002out_iput:
2003 iput(inode);
2004 return res;
2005}
2006
1982/** 2007/**
1983 * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode 2008 * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
1984 * @inode: inode to allocate the dentry for 2009 * @inode: inode to allocate the dentry for
@@ -1999,7 +2024,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1999 */ 2024 */
2000struct dentry *d_obtain_alias(struct inode *inode) 2025struct dentry *d_obtain_alias(struct inode *inode)
2001{ 2026{
2002 return __d_obtain_alias(inode, 1); 2027 return __d_obtain_alias(inode, true);
2003} 2028}
2004EXPORT_SYMBOL(d_obtain_alias); 2029EXPORT_SYMBOL(d_obtain_alias);
2005 2030
@@ -2020,7 +2045,7 @@ EXPORT_SYMBOL(d_obtain_alias);
2020 */ 2045 */
2021struct dentry *d_obtain_root(struct inode *inode) 2046struct dentry *d_obtain_root(struct inode *inode)
2022{ 2047{
2023 return __d_obtain_alias(inode, 0); 2048 return __d_obtain_alias(inode, false);
2024} 2049}
2025EXPORT_SYMBOL(d_obtain_root); 2050EXPORT_SYMBOL(d_obtain_root);
2026 2051
@@ -3527,6 +3552,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
3527 3552
3528 return result; 3553 return result;
3529} 3554}
3555EXPORT_SYMBOL(is_subdir);
3530 3556
3531static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) 3557static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3532{ 3558{
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 5ac415466861..406e72de88f6 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -47,9 +47,28 @@ config OVERLAY_FS_INDEX
47 The inodes index feature prevents breaking of lower hardlinks on copy 47 The inodes index feature prevents breaking of lower hardlinks on copy
48 up. 48 up.
49 49
50 Note, that the inodes index feature is read-only backward compatible. 50 Note, that the inodes index feature is not backward compatible.
51 That is, mounting an overlay which has an index dir on a kernel that 51 That is, mounting an overlay which has an inodes index on a kernel
52 doesn't support this feature read-only, will not have any negative 52 that doesn't support this feature will have unexpected results.
53 outcomes. However, mounting the same overlay with an old kernel 53
54 read-write and then mounting it again with a new kernel, will have 54config OVERLAY_FS_NFS_EXPORT
55 unexpected results. 55 bool "Overlayfs: turn on NFS export feature by default"
56 depends on OVERLAY_FS
57 depends on OVERLAY_FS_INDEX
58 help
59 If this config option is enabled then overlay filesystems will use
60 the inodes index dir to decode overlay NFS file handles by default.
61 In this case, it is still possible to turn off NFS export support
62 globally with the "nfs_export=off" module option or on a filesystem
63 instance basis with the "nfs_export=off" mount option.
64
65 The NFS export feature creates an index on copy up of every file and
66 directory. This full index is used to detect overlay filesystems
67 inconsistencies on lookup, like redirect from multiple upper dirs to
68 the same lower dir. The full index may incur some overhead on mount
69 time, especially when verifying that directory file handles are not
70 stale.
71
72 Note, that the NFS export feature is not backward compatible.
73 That is, mounting an overlay which has a full index on a kernel
74 that doesn't support this feature will have unexpected results.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 99373bbc1478..30802347a020 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,4 +4,5 @@
4 4
5obj-$(CONFIG_OVERLAY_FS) += overlay.o 5obj-$(CONFIG_OVERLAY_FS) += overlay.o
6 6
7overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o 7overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
8 export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eb3b8d39fb61..d855f508fa20 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
232 return err; 232 return err;
233} 233}
234 234
235struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) 235struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper)
236{ 236{
237 struct ovl_fh *fh; 237 struct ovl_fh *fh;
238 int fh_type, fh_len, dwords; 238 int fh_type, fh_len, dwords;
239 void *buf; 239 void *buf;
240 int buflen = MAX_HANDLE_SZ; 240 int buflen = MAX_HANDLE_SZ;
241 uuid_t *uuid = &lower->d_sb->s_uuid; 241 uuid_t *uuid = &real->d_sb->s_uuid;
242 242
243 buf = kmalloc(buflen, GFP_KERNEL); 243 buf = kmalloc(buflen, GFP_KERNEL);
244 if (!buf) 244 if (!buf)
@@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
250 * the price or reconnecting the dentry. 250 * the price or reconnecting the dentry.
251 */ 251 */
252 dwords = buflen >> 2; 252 dwords = buflen >> 2;
253 fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); 253 fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
254 buflen = (dwords << 2); 254 buflen = (dwords << 2);
255 255
256 fh = ERR_PTR(-EIO); 256 fh = ERR_PTR(-EIO);
@@ -288,8 +288,8 @@ out:
288 return fh; 288 return fh;
289} 289}
290 290
291static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, 291int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
292 struct dentry *upper) 292 struct dentry *upper)
293{ 293{
294 const struct ovl_fh *fh = NULL; 294 const struct ovl_fh *fh = NULL;
295 int err; 295 int err;
@@ -315,6 +315,94 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
315 return err; 315 return err;
316} 316}
317 317
318/* Store file handle of @upper dir in @index dir entry */
319static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
320{
321 const struct ovl_fh *fh;
322 int err;
323
324 fh = ovl_encode_fh(upper, true);
325 if (IS_ERR(fh))
326 return PTR_ERR(fh);
327
328 err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
329
330 kfree(fh);
331 return err;
332}
333
334/*
335 * Create and install index entry.
336 *
337 * Caller must hold i_mutex on indexdir.
338 */
339static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
340 struct dentry *upper)
341{
342 struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
343 struct inode *dir = d_inode(indexdir);
344 struct dentry *index = NULL;
345 struct dentry *temp = NULL;
346 struct qstr name = { };
347 int err;
348
349 /*
350 * For now this is only used for creating index entry for directories,
351 * because non-dir are copied up directly to index and then hardlinked
352 * to upper dir.
353 *
354 * TODO: implement create index for non-dir, so we can call it when
355 * encoding file handle for non-dir in case index does not exist.
356 */
357 if (WARN_ON(!d_is_dir(dentry)))
358 return -EIO;
359
360 /* Directory not expected to be indexed before copy up */
361 if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
362 return -EIO;
363
364 err = ovl_get_index_name(origin, &name);
365 if (err)
366 return err;
367
368 temp = ovl_lookup_temp(indexdir);
369 if (IS_ERR(temp))
370 goto temp_err;
371
372 err = ovl_do_mkdir(dir, temp, S_IFDIR, true);
373 if (err)
374 goto out;
375
376 err = ovl_set_upper_fh(upper, temp);
377 if (err)
378 goto out_cleanup;
379
380 index = lookup_one_len(name.name, indexdir, name.len);
381 if (IS_ERR(index)) {
382 err = PTR_ERR(index);
383 } else {
384 err = ovl_do_rename(dir, temp, dir, index, 0);
385 dput(index);
386 }
387
388 if (err)
389 goto out_cleanup;
390
391out:
392 dput(temp);
393 kfree(name.name);
394 return err;
395
396temp_err:
397 err = PTR_ERR(temp);
398 temp = NULL;
399 goto out;
400
401out_cleanup:
402 ovl_cleanup(dir, temp);
403 goto out;
404}
405
318struct ovl_copy_up_ctx { 406struct ovl_copy_up_ctx {
319 struct dentry *parent; 407 struct dentry *parent;
320 struct dentry *dentry; 408 struct dentry *dentry;
@@ -327,6 +415,7 @@ struct ovl_copy_up_ctx {
327 struct dentry *workdir; 415 struct dentry *workdir;
328 bool tmpfile; 416 bool tmpfile;
329 bool origin; 417 bool origin;
418 bool indexed;
330}; 419};
331 420
332static int ovl_link_up(struct ovl_copy_up_ctx *c) 421static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -361,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
361 } 450 }
362 } 451 }
363 inode_unlock(udir); 452 inode_unlock(udir);
364 ovl_set_nlink_upper(c->dentry); 453 if (err)
454 return err;
455
456 err = ovl_set_nlink_upper(c->dentry);
365 457
366 return err; 458 return err;
367} 459}
@@ -498,6 +590,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
498 if (err) 590 if (err)
499 goto out_cleanup; 591 goto out_cleanup;
500 592
593 if (S_ISDIR(c->stat.mode) && c->indexed) {
594 err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
595 if (err)
596 goto out_cleanup;
597 }
598
501 if (c->tmpfile) { 599 if (c->tmpfile) {
502 inode_lock_nested(udir, I_MUTEX_PARENT); 600 inode_lock_nested(udir, I_MUTEX_PARENT);
503 err = ovl_install_temp(c, temp, &newdentry); 601 err = ovl_install_temp(c, temp, &newdentry);
@@ -536,20 +634,33 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
536{ 634{
537 int err; 635 int err;
538 struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; 636 struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
539 bool indexed = false; 637 bool to_index = false;
540 638
541 if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && 639 /*
542 c->stat.nlink > 1) 640 * Indexed non-dir is copied up directly to the index entry and then
543 indexed = true; 641 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
642 * then index entry is created and then copied up dir installed.
643 * Copying dir up to indexdir instead of workdir simplifies locking.
644 */
645 if (ovl_need_index(c->dentry)) {
646 c->indexed = true;
647 if (S_ISDIR(c->stat.mode))
648 c->workdir = ovl_indexdir(c->dentry->d_sb);
649 else
650 to_index = true;
651 }
544 652
545 if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) 653 if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
546 c->origin = true; 654 c->origin = true;
547 655
548 if (indexed) { 656 if (to_index) {
549 c->destdir = ovl_indexdir(c->dentry->d_sb); 657 c->destdir = ovl_indexdir(c->dentry->d_sb);
550 err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); 658 err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
551 if (err) 659 if (err)
552 return err; 660 return err;
661 } else if (WARN_ON(!c->parent)) {
662 /* Disconnected dentry must be copied up to index dir */
663 return -EIO;
553 } else { 664 } else {
554 /* 665 /*
555 * Mark parent "impure" because it may now contain non-pure 666 * Mark parent "impure" because it may now contain non-pure
@@ -572,11 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
572 } 683 }
573 } 684 }
574 685
575 if (indexed) { 686
576 if (!err) 687 if (err)
577 ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); 688 goto out;
578 kfree(c->destname.name); 689
579 } else if (!err) { 690 if (c->indexed)
691 ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
692
693 if (to_index) {
694 /* Initialize nlink for copy up of disconnected dentry */
695 err = ovl_set_nlink_upper(c->dentry);
696 } else {
580 struct inode *udir = d_inode(c->destdir); 697 struct inode *udir = d_inode(c->destdir);
581 698
582 /* Restore timestamps on parent (best effort) */ 699 /* Restore timestamps on parent (best effort) */
@@ -587,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
587 ovl_dentry_set_upper_alias(c->dentry); 704 ovl_dentry_set_upper_alias(c->dentry);
588 } 705 }
589 706
707out:
708 if (to_index)
709 kfree(c->destname.name);
590 return err; 710 return err;
591} 711}
592 712
@@ -611,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
611 if (err) 731 if (err)
612 return err; 732 return err;
613 733
614 ovl_path_upper(parent, &parentpath); 734 if (parent) {
615 ctx.destdir = parentpath.dentry; 735 ovl_path_upper(parent, &parentpath);
616 ctx.destname = dentry->d_name; 736 ctx.destdir = parentpath.dentry;
737 ctx.destname = dentry->d_name;
617 738
618 err = vfs_getattr(&parentpath, &ctx.pstat, 739 err = vfs_getattr(&parentpath, &ctx.pstat,
619 STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); 740 STATX_ATIME | STATX_MTIME,
620 if (err) 741 AT_STATX_SYNC_AS_STAT);
621 return err; 742 if (err)
743 return err;
744 }
622 745
623 /* maybe truncate regular file. this has no effect on dirs */ 746 /* maybe truncate regular file. this has no effect on dirs */
624 if (flags & O_TRUNC) 747 if (flags & O_TRUNC)
@@ -639,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
639 } else { 762 } else {
640 if (!ovl_dentry_upper(dentry)) 763 if (!ovl_dentry_upper(dentry))
641 err = ovl_do_copy_up(&ctx); 764 err = ovl_do_copy_up(&ctx);
642 if (!err && !ovl_dentry_has_upper_alias(dentry)) 765 if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
643 err = ovl_link_up(&ctx); 766 err = ovl_link_up(&ctx);
644 ovl_copy_up_end(dentry); 767 ovl_copy_up_end(dentry);
645 } 768 }
@@ -652,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
652{ 775{
653 int err = 0; 776 int err = 0;
654 const struct cred *old_cred = ovl_override_creds(dentry->d_sb); 777 const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
778 bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
779
780 /*
781 * With NFS export, copy up can get called for a disconnected non-dir.
782 * In this case, we will copy up lower inode to index dir without
783 * linking it to upper dir.
784 */
785 if (WARN_ON(disconnected && d_is_dir(dentry)))
786 return -EIO;
655 787
656 while (!err) { 788 while (!err) {
657 struct dentry *next; 789 struct dentry *next;
658 struct dentry *parent; 790 struct dentry *parent = NULL;
659 791
660 /* 792 /*
661 * Check if copy-up has happened as well as for upper alias (in 793 * Check if copy-up has happened as well as for upper alias (in
@@ -671,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
671 * with rename. 803 * with rename.
672 */ 804 */
673 if (ovl_dentry_upper(dentry) && 805 if (ovl_dentry_upper(dentry) &&
674 ovl_dentry_has_upper_alias(dentry)) 806 (ovl_dentry_has_upper_alias(dentry) || disconnected))
675 break; 807 break;
676 808
677 next = dget(dentry); 809 next = dget(dentry);
678 /* find the topmost dentry not yet copied up */ 810 /* find the topmost dentry not yet copied up */
679 for (;;) { 811 for (; !disconnected;) {
680 parent = dget_parent(next); 812 parent = dget_parent(next);
681 813
682 if (ovl_dentry_upper(parent)) 814 if (ovl_dentry_upper(parent))
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f9788bc116a8..839709c7803a 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
63} 63}
64 64
65/* caller holds i_mutex on workdir */ 65/* caller holds i_mutex on workdir */
66static struct dentry *ovl_whiteout(struct dentry *workdir, 66static struct dentry *ovl_whiteout(struct dentry *workdir)
67 struct dentry *dentry)
68{ 67{
69 int err; 68 int err;
70 struct dentry *whiteout; 69 struct dentry *whiteout;
@@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
83 return whiteout; 82 return whiteout;
84} 83}
85 84
85/* Caller must hold i_mutex on both workdir and dir */
86int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
87 struct dentry *dentry)
88{
89 struct inode *wdir = workdir->d_inode;
90 struct dentry *whiteout;
91 int err;
92 int flags = 0;
93
94 whiteout = ovl_whiteout(workdir);
95 err = PTR_ERR(whiteout);
96 if (IS_ERR(whiteout))
97 return err;
98
99 if (d_is_dir(dentry))
100 flags = RENAME_EXCHANGE;
101
102 err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
103 if (err)
104 goto kill_whiteout;
105 if (flags)
106 ovl_cleanup(wdir, dentry);
107
108out:
109 dput(whiteout);
110 return err;
111
112kill_whiteout:
113 ovl_cleanup(wdir, whiteout);
114 goto out;
115}
116
86int ovl_create_real(struct inode *dir, struct dentry *newdentry, 117int ovl_create_real(struct inode *dir, struct dentry *newdentry,
87 struct cattr *attr, struct dentry *hardlink, bool debug) 118 struct cattr *attr, struct dentry *hardlink, bool debug)
88{ 119{
@@ -181,11 +212,6 @@ static bool ovl_type_origin(struct dentry *dentry)
181 return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); 212 return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
182} 213}
183 214
184static bool ovl_may_have_whiteouts(struct dentry *dentry)
185{
186 return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
187}
188
189static int ovl_create_upper(struct dentry *dentry, struct inode *inode, 215static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
190 struct cattr *attr, struct dentry *hardlink) 216 struct cattr *attr, struct dentry *hardlink)
191{ 217{
@@ -301,37 +327,6 @@ out:
301 return ERR_PTR(err); 327 return ERR_PTR(err);
302} 328}
303 329
304static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
305{
306 int err;
307 struct dentry *ret = NULL;
308 LIST_HEAD(list);
309
310 err = ovl_check_empty_dir(dentry, &list);
311 if (err) {
312 ret = ERR_PTR(err);
313 goto out_free;
314 }
315
316 /*
317 * When removing an empty opaque directory, then it makes no sense to
318 * replace it with an exact replica of itself.
319 *
320 * If upperdentry has whiteouts, clear them.
321 *
322 * Can race with copy-up, since we don't hold the upperdir mutex.
323 * Doesn't matter, since copy-up can't create a non-empty directory
324 * from an empty one.
325 */
326 if (!list_empty(&list))
327 ret = ovl_clear_empty(dentry, &list);
328
329out_free:
330 ovl_cache_free(&list);
331
332 return ret;
333}
334
335static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, 330static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
336 const struct posix_acl *acl) 331 const struct posix_acl *acl)
337{ 332{
@@ -623,23 +618,20 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
623 return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); 618 return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
624} 619}
625 620
626static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) 621static int ovl_remove_and_whiteout(struct dentry *dentry,
622 struct list_head *list)
627{ 623{
628 struct dentry *workdir = ovl_workdir(dentry); 624 struct dentry *workdir = ovl_workdir(dentry);
629 struct inode *wdir = workdir->d_inode;
630 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); 625 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
631 struct inode *udir = upperdir->d_inode;
632 struct dentry *whiteout;
633 struct dentry *upper; 626 struct dentry *upper;
634 struct dentry *opaquedir = NULL; 627 struct dentry *opaquedir = NULL;
635 int err; 628 int err;
636 int flags = 0;
637 629
638 if (WARN_ON(!workdir)) 630 if (WARN_ON(!workdir))
639 return -EROFS; 631 return -EROFS;
640 632
641 if (is_dir) { 633 if (!list_empty(list)) {
642 opaquedir = ovl_check_empty_and_clear(dentry); 634 opaquedir = ovl_clear_empty(dentry, list);
643 err = PTR_ERR(opaquedir); 635 err = PTR_ERR(opaquedir);
644 if (IS_ERR(opaquedir)) 636 if (IS_ERR(opaquedir))
645 goto out; 637 goto out;
@@ -662,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
662 goto out_dput_upper; 654 goto out_dput_upper;
663 } 655 }
664 656
665 whiteout = ovl_whiteout(workdir, dentry); 657 err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
666 err = PTR_ERR(whiteout);
667 if (IS_ERR(whiteout))
668 goto out_dput_upper;
669
670 if (d_is_dir(upper))
671 flags = RENAME_EXCHANGE;
672
673 err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
674 if (err) 658 if (err)
675 goto kill_whiteout; 659 goto out_d_drop;
676 if (flags)
677 ovl_cleanup(wdir, upper);
678 660
679 ovl_dentry_version_inc(dentry->d_parent, true); 661 ovl_dentry_version_inc(dentry->d_parent, true);
680out_d_drop: 662out_d_drop:
681 d_drop(dentry); 663 d_drop(dentry);
682 dput(whiteout);
683out_dput_upper: 664out_dput_upper:
684 dput(upper); 665 dput(upper);
685out_unlock: 666out_unlock:
@@ -688,13 +669,10 @@ out_dput:
688 dput(opaquedir); 669 dput(opaquedir);
689out: 670out:
690 return err; 671 return err;
691
692kill_whiteout:
693 ovl_cleanup(wdir, whiteout);
694 goto out_d_drop;
695} 672}
696 673
697static int ovl_remove_upper(struct dentry *dentry, bool is_dir) 674static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
675 struct list_head *list)
698{ 676{
699 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); 677 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
700 struct inode *dir = upperdir->d_inode; 678 struct inode *dir = upperdir->d_inode;
@@ -702,10 +680,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
702 struct dentry *opaquedir = NULL; 680 struct dentry *opaquedir = NULL;
703 int err; 681 int err;
704 682
705 /* Redirect/origin dir can be !ovl_lower_positive && not clean */ 683 if (!list_empty(list)) {
706 if (is_dir && (ovl_dentry_get_redirect(dentry) || 684 opaquedir = ovl_clear_empty(dentry, list);
707 ovl_may_have_whiteouts(dentry))) {
708 opaquedir = ovl_check_empty_and_clear(dentry);
709 err = PTR_ERR(opaquedir); 685 err = PTR_ERR(opaquedir);
710 if (IS_ERR(opaquedir)) 686 if (IS_ERR(opaquedir))
711 goto out; 687 goto out;
@@ -746,11 +722,26 @@ out:
746 return err; 722 return err;
747} 723}
748 724
725static bool ovl_pure_upper(struct dentry *dentry)
726{
727 return !ovl_dentry_lower(dentry) &&
728 !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
729}
730
749static int ovl_do_remove(struct dentry *dentry, bool is_dir) 731static int ovl_do_remove(struct dentry *dentry, bool is_dir)
750{ 732{
751 int err; 733 int err;
752 bool locked = false; 734 bool locked = false;
753 const struct cred *old_cred; 735 const struct cred *old_cred;
736 bool lower_positive = ovl_lower_positive(dentry);
737 LIST_HEAD(list);
738
739 /* No need to clean pure upper removed by vfs_rmdir() */
740 if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
741 err = ovl_check_empty_dir(dentry, &list);
742 if (err)
743 goto out;
744 }
754 745
755 err = ovl_want_write(dentry); 746 err = ovl_want_write(dentry);
756 if (err) 747 if (err)
@@ -765,10 +756,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
765 goto out_drop_write; 756 goto out_drop_write;
766 757
767 old_cred = ovl_override_creds(dentry->d_sb); 758 old_cred = ovl_override_creds(dentry->d_sb);
768 if (!ovl_lower_positive(dentry)) 759 if (!lower_positive)
769 err = ovl_remove_upper(dentry, is_dir); 760 err = ovl_remove_upper(dentry, is_dir, &list);
770 else 761 else
771 err = ovl_remove_and_whiteout(dentry, is_dir); 762 err = ovl_remove_and_whiteout(dentry, &list);
772 revert_creds(old_cred); 763 revert_creds(old_cred);
773 if (!err) { 764 if (!err) {
774 if (is_dir) 765 if (is_dir)
@@ -780,6 +771,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
780out_drop_write: 771out_drop_write:
781 ovl_drop_write(dentry); 772 ovl_drop_write(dentry);
782out: 773out:
774 ovl_cache_free(&list);
783 return err; 775 return err;
784} 776}
785 777
@@ -915,6 +907,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
915 bool samedir = olddir == newdir; 907 bool samedir = olddir == newdir;
916 struct dentry *opaquedir = NULL; 908 struct dentry *opaquedir = NULL;
917 const struct cred *old_cred = NULL; 909 const struct cred *old_cred = NULL;
910 LIST_HEAD(list);
918 911
919 err = -EINVAL; 912 err = -EINVAL;
920 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) 913 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
@@ -929,6 +922,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
929 if (!overwrite && !ovl_can_move(new)) 922 if (!overwrite && !ovl_can_move(new))
930 goto out; 923 goto out;
931 924
925 if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
926 err = ovl_check_empty_dir(new, &list);
927 if (err)
928 goto out;
929 }
930
931 if (overwrite) {
932 if (ovl_lower_positive(old)) {
933 if (!ovl_dentry_is_whiteout(new)) {
934 /* Whiteout source */
935 flags |= RENAME_WHITEOUT;
936 } else {
937 /* Switch whiteouts */
938 flags |= RENAME_EXCHANGE;
939 }
940 } else if (is_dir && ovl_dentry_is_whiteout(new)) {
941 flags |= RENAME_EXCHANGE;
942 cleanup_whiteout = true;
943 }
944 }
945
932 err = ovl_want_write(old); 946 err = ovl_want_write(old);
933 if (err) 947 if (err)
934 goto out; 948 goto out;
@@ -952,9 +966,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
952 966
953 old_cred = ovl_override_creds(old->d_sb); 967 old_cred = ovl_override_creds(old->d_sb);
954 968
955 if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) || 969 if (!list_empty(&list)) {
956 ovl_may_have_whiteouts(new))) { 970 opaquedir = ovl_clear_empty(new, &list);
957 opaquedir = ovl_check_empty_and_clear(new);
958 err = PTR_ERR(opaquedir); 971 err = PTR_ERR(opaquedir);
959 if (IS_ERR(opaquedir)) { 972 if (IS_ERR(opaquedir)) {
960 opaquedir = NULL; 973 opaquedir = NULL;
@@ -962,21 +975,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
962 } 975 }
963 } 976 }
964 977
965 if (overwrite) {
966 if (ovl_lower_positive(old)) {
967 if (!ovl_dentry_is_whiteout(new)) {
968 /* Whiteout source */
969 flags |= RENAME_WHITEOUT;
970 } else {
971 /* Switch whiteouts */
972 flags |= RENAME_EXCHANGE;
973 }
974 } else if (is_dir && ovl_dentry_is_whiteout(new)) {
975 flags |= RENAME_EXCHANGE;
976 cleanup_whiteout = true;
977 }
978 }
979
980 old_upperdir = ovl_dentry_upper(old->d_parent); 978 old_upperdir = ovl_dentry_upper(old->d_parent);
981 new_upperdir = ovl_dentry_upper(new->d_parent); 979 new_upperdir = ovl_dentry_upper(new->d_parent);
982 980
@@ -1094,6 +1092,7 @@ out_drop_write:
1094 ovl_drop_write(old); 1092 ovl_drop_write(old);
1095out: 1093out:
1096 dput(opaquedir); 1094 dput(opaquedir);
1095 ovl_cache_free(&list);
1097 return err; 1096 return err;
1098} 1097}
1099 1098
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
new file mode 100644
index 000000000000..bb94ce9da5c8
--- /dev/null
+++ b/fs/overlayfs/export.c
@@ -0,0 +1,715 @@
1/*
2 * Overlayfs NFS export support.
3 *
4 * Amir Goldstein <amir73il@gmail.com>
5 *
6 * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 as published by
10 * the Free Software Foundation.
11 */
12
13#include <linux/fs.h>
14#include <linux/cred.h>
15#include <linux/mount.h>
16#include <linux/namei.h>
17#include <linux/xattr.h>
18#include <linux/exportfs.h>
19#include <linux/ratelimit.h>
20#include "overlayfs.h"
21
22/*
23 * We only need to encode origin if there is a chance that the same object was
24 * encoded pre copy up and then we need to stay consistent with the same
25 * encoding also after copy up. If non-pure upper is not indexed, then it was
26 * copied up before NFS export was enabled. In that case we don't need to worry
27 * about staying consistent with pre copy up encoding and we encode an upper
28 * file handle. Overlay root dentry is a private case of non-indexed upper.
29 *
30 * The following table summarizes the different file handle encodings used for
31 * different overlay object types:
32 *
33 * Object type | Encoding
34 * --------------------------------
35 * Pure upper | U
36 * Non-indexed upper | U
37 * Indexed upper | L (*)
38 * Non-upper | L (*)
39 *
40 * U = upper file handle
41 * L = lower file handle
42 *
43 * (*) Connecting an overlay dir from real lower dentry is not always
44 * possible when there are redirects in lower layers. To mitigate this case,
45 * we copy up the lower dir first and then encode an upper dir file handle.
46 */
47static bool ovl_should_encode_origin(struct dentry *dentry)
48{
49 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
50
51 if (!ovl_dentry_lower(dentry))
52 return false;
53
54 /*
55 * Decoding a merge dir, whose origin's parent is under a redirected
56 * lower dir is not always possible. As a simple aproximation, we do
57 * not encode lower dir file handles when overlay has multiple lower
58 * layers and origin is below the topmost lower layer.
59 *
60 * TODO: copy up only the parent that is under redirected lower.
61 */
62 if (d_is_dir(dentry) && ofs->upper_mnt &&
63 OVL_E(dentry)->lowerstack[0].layer->idx > 1)
64 return false;
65
66 /* Decoding a non-indexed upper from origin is not implemented */
67 if (ovl_dentry_upper(dentry) &&
68 !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
69 return false;
70
71 return true;
72}
73
74static int ovl_encode_maybe_copy_up(struct dentry *dentry)
75{
76 int err;
77
78 if (ovl_dentry_upper(dentry))
79 return 0;
80
81 err = ovl_want_write(dentry);
82 if (err)
83 return err;
84
85 err = ovl_copy_up(dentry);
86
87 ovl_drop_write(dentry);
88 return err;
89}
90
91static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
92{
93 struct dentry *origin = ovl_dentry_lower(dentry);
94 struct ovl_fh *fh = NULL;
95 int err;
96
97 /*
98 * If we should not encode a lower dir file handle, copy up and encode
99 * an upper dir file handle.
100 */
101 if (!ovl_should_encode_origin(dentry)) {
102 err = ovl_encode_maybe_copy_up(dentry);
103 if (err)
104 goto fail;
105
106 origin = NULL;
107 }
108
109 /* Encode an upper or origin file handle */
110 fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
111 err = PTR_ERR(fh);
112 if (IS_ERR(fh))
113 goto fail;
114
115 err = -EOVERFLOW;
116 if (fh->len > buflen)
117 goto fail;
118
119 memcpy(buf, (char *)fh, fh->len);
120 err = fh->len;
121
122out:
123 kfree(fh);
124 return err;
125
126fail:
127 pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
128 dentry, err, buflen, fh ? (int)fh->len : 0,
129 fh ? fh->type : 0);
130 goto out;
131}
132
133static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
134{
135 int res, len = *max_len << 2;
136
137 res = ovl_d_to_fh(dentry, (char *)fid, len);
138 if (res <= 0)
139 return FILEID_INVALID;
140
141 len = res;
142
143 /* Round up to dwords */
144 *max_len = (len + 3) >> 2;
145 return OVL_FILEID;
146}
147
148static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
149 struct inode *parent)
150{
151 struct dentry *dentry;
152 int type;
153
154 /* TODO: encode connectable file handles */
155 if (parent)
156 return FILEID_INVALID;
157
158 dentry = d_find_any_alias(inode);
159 if (WARN_ON(!dentry))
160 return FILEID_INVALID;
161
162 type = ovl_dentry_to_fh(dentry, fid, max_len);
163
164 dput(dentry);
165 return type;
166}
167
168/*
169 * Find or instantiate an overlay dentry from real dentries and index.
170 */
171static struct dentry *ovl_obtain_alias(struct super_block *sb,
172 struct dentry *upper_alias,
173 struct ovl_path *lowerpath,
174 struct dentry *index)
175{
176 struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
177 struct dentry *upper = upper_alias ?: index;
178 struct dentry *dentry;
179 struct inode *inode;
180 struct ovl_entry *oe;
181
182 /* We get overlay directory dentries with ovl_lookup_real() */
183 if (d_is_dir(upper ?: lower))
184 return ERR_PTR(-EIO);
185
186 inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower);
187 if (IS_ERR(inode)) {
188 dput(upper);
189 return ERR_CAST(inode);
190 }
191
192 if (index)
193 ovl_set_flag(OVL_INDEX, inode);
194
195 dentry = d_find_any_alias(inode);
196 if (!dentry) {
197 dentry = d_alloc_anon(inode->i_sb);
198 if (!dentry)
199 goto nomem;
200 oe = ovl_alloc_entry(lower ? 1 : 0);
201 if (!oe)
202 goto nomem;
203
204 if (lower) {
205 oe->lowerstack->dentry = dget(lower);
206 oe->lowerstack->layer = lowerpath->layer;
207 }
208 dentry->d_fsdata = oe;
209 if (upper_alias)
210 ovl_dentry_set_upper_alias(dentry);
211 }
212
213 return d_instantiate_anon(dentry, inode);
214
215nomem:
216 iput(inode);
217 dput(dentry);
218 return ERR_PTR(-ENOMEM);
219}
220
221/* Get the upper or lower dentry in stach whose on layer @idx */
222static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
223{
224 struct ovl_entry *oe = dentry->d_fsdata;
225 int i;
226
227 if (!idx)
228 return ovl_dentry_upper(dentry);
229
230 for (i = 0; i < oe->numlower; i++) {
231 if (oe->lowerstack[i].layer->idx == idx)
232 return oe->lowerstack[i].dentry;
233 }
234
235 return NULL;
236}
237
238/*
239 * Lookup a child overlay dentry to get a connected overlay dentry whose real
240 * dentry is @real. If @real is on upper layer, we lookup a child overlay
241 * dentry with the same name as the real dentry. Otherwise, we need to consult
242 * index for lookup.
243 */
244static struct dentry *ovl_lookup_real_one(struct dentry *connected,
245 struct dentry *real,
246 struct ovl_layer *layer)
247{
248 struct inode *dir = d_inode(connected);
249 struct dentry *this, *parent = NULL;
250 struct name_snapshot name;
251 int err;
252
253 /*
254 * Lookup child overlay dentry by real name. The dir mutex protects us
255 * from racing with overlay rename. If the overlay dentry that is above
256 * real has already been moved to a parent that is not under the
257 * connected overlay dir, we return -ECHILD and restart the lookup of
258 * connected real path from the top.
259 */
260 inode_lock_nested(dir, I_MUTEX_PARENT);
261 err = -ECHILD;
262 parent = dget_parent(real);
263 if (ovl_dentry_real_at(connected, layer->idx) != parent)
264 goto fail;
265
266 /*
267 * We also need to take a snapshot of real dentry name to protect us
268 * from racing with underlying layer rename. In this case, we don't
269 * care about returning ESTALE, only from dereferencing a free name
270 * pointer because we hold no lock on the real dentry.
271 */
272 take_dentry_name_snapshot(&name, real);
273 this = lookup_one_len(name.name, connected, strlen(name.name));
274 err = PTR_ERR(this);
275 if (IS_ERR(this)) {
276 goto fail;
277 } else if (!this || !this->d_inode) {
278 dput(this);
279 err = -ENOENT;
280 goto fail;
281 } else if (ovl_dentry_real_at(this, layer->idx) != real) {
282 dput(this);
283 err = -ESTALE;
284 goto fail;
285 }
286
287out:
288 release_dentry_name_snapshot(&name);
289 dput(parent);
290 inode_unlock(dir);
291 return this;
292
293fail:
294 pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
295 real, layer->idx, connected, err);
296 this = ERR_PTR(err);
297 goto out;
298}
299
300static struct dentry *ovl_lookup_real(struct super_block *sb,
301 struct dentry *real,
302 struct ovl_layer *layer);
303
304/*
305 * Lookup an indexed or hashed overlay dentry by real inode.
306 */
307static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
308 struct dentry *real,
309 struct ovl_layer *layer)
310{
311 struct ovl_fs *ofs = sb->s_fs_info;
312 struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
313 struct dentry *index = NULL;
314 struct dentry *this = NULL;
315 struct inode *inode;
316
317 /*
318 * Decoding upper dir from index is expensive, so first try to lookup
319 * overlay dentry in inode/dcache.
320 */
321 inode = ovl_lookup_inode(sb, real, !layer->idx);
322 if (IS_ERR(inode))
323 return ERR_CAST(inode);
324 if (inode) {
325 this = d_find_any_alias(inode);
326 iput(inode);
327 }
328
329 /*
330 * For decoded lower dir file handle, lookup index by origin to check
331 * if lower dir was copied up and and/or removed.
332 */
333 if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
334 index = ovl_lookup_index(ofs, NULL, real, false);
335 if (IS_ERR(index))
336 return index;
337 }
338
339 /* Get connected upper overlay dir from index */
340 if (index) {
341 struct dentry *upper = ovl_index_upper(ofs, index);
342
343 dput(index);
344 if (IS_ERR_OR_NULL(upper))
345 return upper;
346
347 /*
348 * ovl_lookup_real() in lower layer may call recursively once to
349 * ovl_lookup_real() in upper layer. The first level call walks
350 * back lower parents to the topmost indexed parent. The second
351 * recursive call walks back from indexed upper to the topmost
352 * connected/hashed upper parent (or up to root).
353 */
354 this = ovl_lookup_real(sb, upper, &upper_layer);
355 dput(upper);
356 }
357
358 if (!this)
359 return NULL;
360
361 if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
362 dput(this);
363 this = ERR_PTR(-EIO);
364 }
365
366 return this;
367}
368
369/*
370 * Lookup an indexed or hashed overlay dentry, whose real dentry is an
371 * ancestor of @real.
372 */
373static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
374 struct dentry *real,
375 struct ovl_layer *layer)
376{
377 struct dentry *next, *parent = NULL;
378 struct dentry *ancestor = ERR_PTR(-EIO);
379
380 if (real == layer->mnt->mnt_root)
381 return dget(sb->s_root);
382
383 /* Find the topmost indexed or hashed ancestor */
384 next = dget(real);
385 for (;;) {
386 parent = dget_parent(next);
387
388 /*
389 * Lookup a matching overlay dentry in inode/dentry
390 * cache or in index by real inode.
391 */
392 ancestor = ovl_lookup_real_inode(sb, next, layer);
393 if (ancestor)
394 break;
395
396 if (parent == layer->mnt->mnt_root) {
397 ancestor = dget(sb->s_root);
398 break;
399 }
400
401 /*
402 * If @real has been moved out of the layer root directory,
403 * we will eventully hit the real fs root. This cannot happen
404 * by legit overlay rename, so we return error in that case.
405 */
406 if (parent == next) {
407 ancestor = ERR_PTR(-EXDEV);
408 break;
409 }
410
411 dput(next);
412 next = parent;
413 }
414
415 dput(parent);
416 dput(next);
417
418 return ancestor;
419}
420
421/*
422 * Lookup a connected overlay dentry whose real dentry is @real.
423 * If @real is on upper layer, we lookup a child overlay dentry with the same
424 * path the real dentry. Otherwise, we need to consult index for lookup.
425 */
426static struct dentry *ovl_lookup_real(struct super_block *sb,
427 struct dentry *real,
428 struct ovl_layer *layer)
429{
430 struct dentry *connected;
431 int err = 0;
432
433 connected = ovl_lookup_real_ancestor(sb, real, layer);
434 if (IS_ERR(connected))
435 return connected;
436
437 while (!err) {
438 struct dentry *next, *this;
439 struct dentry *parent = NULL;
440 struct dentry *real_connected = ovl_dentry_real_at(connected,
441 layer->idx);
442
443 if (real_connected == real)
444 break;
445
446 /* Find the topmost dentry not yet connected */
447 next = dget(real);
448 for (;;) {
449 parent = dget_parent(next);
450
451 if (parent == real_connected)
452 break;
453
454 /*
455 * If real has been moved out of 'real_connected',
456 * we will not find 'real_connected' and hit the layer
457 * root. In that case, we need to restart connecting.
458 * This game can go on forever in the worst case. We
459 * may want to consider taking s_vfs_rename_mutex if
460 * this happens more than once.
461 */
462 if (parent == layer->mnt->mnt_root) {
463 dput(connected);
464 connected = dget(sb->s_root);
465 break;
466 }
467
468 /*
469 * If real file has been moved out of the layer root
470 * directory, we will eventully hit the real fs root.
471 * This cannot happen by legit overlay rename, so we
472 * return error in that case.
473 */
474 if (parent == next) {
475 err = -EXDEV;
476 break;
477 }
478
479 dput(next);
480 next = parent;
481 }
482
483 if (!err) {
484 this = ovl_lookup_real_one(connected, next, layer);
485 if (IS_ERR(this))
486 err = PTR_ERR(this);
487
488 /*
489 * Lookup of child in overlay can fail when racing with
490 * overlay rename of child away from 'connected' parent.
491 * In this case, we need to restart the lookup from the
492 * top, because we cannot trust that 'real_connected' is
493 * still an ancestor of 'real'. There is a good chance
494 * that the renamed overlay ancestor is now in cache, so
495 * ovl_lookup_real_ancestor() will find it and we can
496 * continue to connect exactly from where lookup failed.
497 */
498 if (err == -ECHILD) {
499 this = ovl_lookup_real_ancestor(sb, real,
500 layer);
501 err = IS_ERR(this) ? PTR_ERR(this) : 0;
502 }
503 if (!err) {
504 dput(connected);
505 connected = this;
506 }
507 }
508
509 dput(parent);
510 dput(next);
511 }
512
513 if (err)
514 goto fail;
515
516 return connected;
517
518fail:
519 pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
520 real, layer->idx, connected, err);
521 dput(connected);
522 return ERR_PTR(err);
523}
524
525/*
526 * Get an overlay dentry from upper/lower real dentries and index.
527 */
528static struct dentry *ovl_get_dentry(struct super_block *sb,
529 struct dentry *upper,
530 struct ovl_path *lowerpath,
531 struct dentry *index)
532{
533 struct ovl_fs *ofs = sb->s_fs_info;
534 struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
535 struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
536 struct dentry *real = upper ?: (index ?: lowerpath->dentry);
537
538 /*
539 * Obtain a disconnected overlay dentry from a non-dir real dentry
540 * and index.
541 */
542 if (!d_is_dir(real))
543 return ovl_obtain_alias(sb, upper, lowerpath, index);
544
545 /* Removed empty directory? */
546 if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
547 return ERR_PTR(-ENOENT);
548
549 /*
550 * If real dentry is connected and hashed, get a connected overlay
551 * dentry whose real dentry is @real.
552 */
553 return ovl_lookup_real(sb, real, layer);
554}
555
556static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
557 struct ovl_fh *fh)
558{
559 struct ovl_fs *ofs = sb->s_fs_info;
560 struct dentry *dentry;
561 struct dentry *upper;
562
563 if (!ofs->upper_mnt)
564 return ERR_PTR(-EACCES);
565
566 upper = ovl_decode_fh(fh, ofs->upper_mnt);
567 if (IS_ERR_OR_NULL(upper))
568 return upper;
569
570 dentry = ovl_get_dentry(sb, upper, NULL, NULL);
571 dput(upper);
572
573 return dentry;
574}
575
576static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
577 struct ovl_fh *fh)
578{
579 struct ovl_fs *ofs = sb->s_fs_info;
580 struct ovl_path origin = { };
581 struct ovl_path *stack = &origin;
582 struct dentry *dentry = NULL;
583 struct dentry *index = NULL;
584 struct inode *inode = NULL;
585 bool is_deleted = false;
586 int err;
587
588 /* First lookup indexed upper by fh */
589 if (ofs->indexdir) {
590 index = ovl_get_index_fh(ofs, fh);
591 err = PTR_ERR(index);
592 if (IS_ERR(index)) {
593 if (err != -ESTALE)
594 return ERR_PTR(err);
595
596 /* Found a whiteout index - treat as deleted inode */
597 is_deleted = true;
598 index = NULL;
599 }
600 }
601
602 /* Then try to get upper dir by index */
603 if (index && d_is_dir(index)) {
604 struct dentry *upper = ovl_index_upper(ofs, index);
605
606 err = PTR_ERR(upper);
607 if (IS_ERR_OR_NULL(upper))
608 goto out_err;
609
610 dentry = ovl_get_dentry(sb, upper, NULL, NULL);
611 dput(upper);
612 goto out;
613 }
614
615 /* Then lookup origin by fh */
616 err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
617 if (err) {
618 goto out_err;
619 } else if (index) {
620 err = ovl_verify_origin(index, origin.dentry, false);
621 if (err)
622 goto out_err;
623 } else if (is_deleted) {
624 /* Lookup deleted non-dir by origin inode */
625 if (!d_is_dir(origin.dentry))
626 inode = ovl_lookup_inode(sb, origin.dentry, false);
627 err = -ESTALE;
628 if (!inode || atomic_read(&inode->i_count) == 1)
629 goto out_err;
630
631 /* Deleted but still open? */
632 index = dget(ovl_i_dentry_upper(inode));
633 }
634
635 dentry = ovl_get_dentry(sb, NULL, &origin, index);
636
637out:
638 dput(origin.dentry);
639 dput(index);
640 iput(inode);
641 return dentry;
642
643out_err:
644 dentry = ERR_PTR(err);
645 goto out;
646}
647
648static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
649 int fh_len, int fh_type)
650{
651 struct dentry *dentry = NULL;
652 struct ovl_fh *fh = (struct ovl_fh *) fid;
653 int len = fh_len << 2;
654 unsigned int flags = 0;
655 int err;
656
657 err = -EINVAL;
658 if (fh_type != OVL_FILEID)
659 goto out_err;
660
661 err = ovl_check_fh_len(fh, len);
662 if (err)
663 goto out_err;
664
665 flags = fh->flags;
666 dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
667 ovl_upper_fh_to_d(sb, fh) :
668 ovl_lower_fh_to_d(sb, fh);
669 err = PTR_ERR(dentry);
670 if (IS_ERR(dentry) && err != -ESTALE)
671 goto out_err;
672
673 return dentry;
674
675out_err:
676 pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
677 len, fh_type, flags, err);
678 return ERR_PTR(err);
679}
680
681static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
682 int fh_len, int fh_type)
683{
684 pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
685 return ERR_PTR(-EACCES);
686}
687
688static int ovl_get_name(struct dentry *parent, char *name,
689 struct dentry *child)
690{
691 /*
692 * ovl_fh_to_dentry() returns connected dir overlay dentries and
693 * ovl_fh_to_parent() is not implemented, so we should not get here.
694 */
695 WARN_ON_ONCE(1);
696 return -EIO;
697}
698
699static struct dentry *ovl_get_parent(struct dentry *dentry)
700{
701 /*
702 * ovl_fh_to_dentry() returns connected dir overlay dentries, so we
703 * should not get here.
704 */
705 WARN_ON_ONCE(1);
706 return ERR_PTR(-EIO);
707}
708
709const struct export_operations ovl_export_operations = {
710 .encode_fh = ovl_encode_inode_fh,
711 .fh_to_dentry = ovl_fh_to_dentry,
712 .fh_to_parent = ovl_fh_to_parent,
713 .get_name = ovl_get_name,
714 .get_parent = ovl_get_parent,
715};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 00b6b294272a..fcd97b783fa1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
105 * Lower hardlinks may be broken on copy up to different 105 * Lower hardlinks may be broken on copy up to different
106 * upper files, so we cannot use the lower origin st_ino 106 * upper files, so we cannot use the lower origin st_ino
107 * for those different files, even for the same fs case. 107 * for those different files, even for the same fs case.
108 *
109 * Similarly, several redirected dirs can point to the
110 * same dir on a lower layer. With the "verify_lower"
111 * feature, we do not use the lower origin st_ino, if
112 * we haven't verified that this redirect is unique.
113 *
108 * With inodes index enabled, it is safe to use st_ino 114 * With inodes index enabled, it is safe to use st_ino
109 * of an indexed hardlinked origin. The index validates 115 * of an indexed origin. The index validates that the
110 * that the upper hardlink is not broken. 116 * upper hardlink is not broken and that a redirected
117 * dir is the only redirect to that origin.
111 */ 118 */
112 if (is_dir || lowerstat.nlink == 1 || 119 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
113 ovl_test_flag(OVL_INDEX, d_inode(dentry))) 120 (!ovl_verify_lower(dentry->d_sb) &&
121 (is_dir || lowerstat.nlink == 1)))
114 stat->ino = lowerstat.ino; 122 stat->ino = lowerstat.ino;
115 123
116 if (samefs) 124 if (samefs)
@@ -343,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
343 351
344static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) 352static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
345{ 353{
354 /* Copy up of disconnected dentry does not set upper alias */
346 if (ovl_dentry_upper(dentry) && 355 if (ovl_dentry_upper(dentry) &&
347 ovl_dentry_has_upper_alias(dentry)) 356 (ovl_dentry_has_upper_alias(dentry) ||
357 (dentry->d_flags & DCACHE_DISCONNECTED)))
348 return false; 358 return false;
349 359
350 if (special_file(d_inode(dentry)->i_mode)) 360 if (special_file(d_inode(dentry)->i_mode))
@@ -604,9 +614,25 @@ static int ovl_inode_set(struct inode *inode, void *data)
604} 614}
605 615
606static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 616static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
607 struct dentry *upperdentry) 617 struct dentry *upperdentry, bool strict)
608{ 618{
609 /* 619 /*
620 * For directories, @strict verify from lookup path performs consistency
621 * checks, so NULL lower/upper in dentry must match NULL lower/upper in
622 * inode. Non @strict verify from NFS handle decode path passes NULL for
623 * 'unknown' lower/upper.
624 */
625 if (S_ISDIR(inode->i_mode) && strict) {
626 /* Real lower dir moved to upper layer under us? */
627 if (!lowerdentry && ovl_inode_lower(inode))
628 return false;
629
630 /* Lookup of an uncovered redirect origin? */
631 if (!upperdentry && ovl_inode_upper(inode))
632 return false;
633 }
634
635 /*
610 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 636 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
611 * This happens when finding a copied up overlay inode for a renamed 637 * This happens when finding a copied up overlay inode for a renamed
612 * or hardlinked overlay dentry and lower dentry cannot be followed 638 * or hardlinked overlay dentry and lower dentry cannot be followed
@@ -625,14 +651,35 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
625 return true; 651 return true;
626} 652}
627 653
628struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, 654struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
629 struct dentry *index) 655 bool is_upper)
630{ 656{
631 struct dentry *lowerdentry = ovl_dentry_lower(dentry); 657 struct inode *inode, *key = d_inode(real);
658
659 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
660 if (!inode)
661 return NULL;
662
663 if (!ovl_verify_inode(inode, is_upper ? NULL : real,
664 is_upper ? real : NULL, false)) {
665 iput(inode);
666 return ERR_PTR(-ESTALE);
667 }
668
669 return inode;
670}
671
672struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
673 struct dentry *lowerdentry, struct dentry *index,
674 unsigned int numlower)
675{
676 struct ovl_fs *ofs = sb->s_fs_info;
632 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 677 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
633 struct inode *inode; 678 struct inode *inode;
634 /* Already indexed or could be indexed on copy up? */ 679 /* Already indexed or could be indexed on copy up? */
635 bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); 680 bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
681 struct dentry *origin = indexed ? lowerdentry : NULL;
682 bool is_dir;
636 683
637 if (WARN_ON(upperdentry && indexed && !lowerdentry)) 684 if (WARN_ON(upperdentry && indexed && !lowerdentry))
638 return ERR_PTR(-EIO); 685 return ERR_PTR(-EIO);
@@ -641,17 +688,22 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
641 realinode = d_inode(lowerdentry); 688 realinode = d_inode(lowerdentry);
642 689
643 /* 690 /*
644 * Copy up origin (lower) may exist for non-indexed upper, but we must 691 * Copy up origin (lower) may exist for non-indexed non-dir upper, but
645 * not use lower as hash key in that case. 692 * we must not use lower as hash key in that case.
646 * Hash inodes that are or could be indexed by origin inode and 693 * Hash non-dir that is or could be indexed by origin inode.
647 * non-indexed upper inodes that could be hard linked by upper inode. 694 * Hash dir that is or could be merged by origin inode.
695 * Hash pure upper and non-indexed non-dir by upper inode.
696 * Hash non-indexed dir by upper inode for NFS export.
648 */ 697 */
649 if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { 698 is_dir = S_ISDIR(realinode->i_mode);
650 struct inode *key = d_inode(indexed ? lowerdentry : 699 if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
651 upperdentry); 700 origin = lowerdentry;
652 unsigned int nlink; 701
702 if (upperdentry || origin) {
703 struct inode *key = d_inode(origin ?: upperdentry);
704 unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
653 705
654 inode = iget5_locked(dentry->d_sb, (unsigned long) key, 706 inode = iget5_locked(sb, (unsigned long) key,
655 ovl_inode_test, ovl_inode_set, key); 707 ovl_inode_test, ovl_inode_set, key);
656 if (!inode) 708 if (!inode)
657 goto out_nomem; 709 goto out_nomem;
@@ -660,7 +712,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
660 * Verify that the underlying files stored in the inode 712 * Verify that the underlying files stored in the inode
661 * match those in the dentry. 713 * match those in the dentry.
662 */ 714 */
663 if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { 715 if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
716 true)) {
664 iput(inode); 717 iput(inode);
665 inode = ERR_PTR(-ESTALE); 718 inode = ERR_PTR(-ESTALE);
666 goto out; 719 goto out;
@@ -670,11 +723,12 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
670 goto out; 723 goto out;
671 } 724 }
672 725
673 nlink = ovl_get_nlink(lowerdentry, upperdentry, 726 /* Recalculate nlink for non-dir due to indexing */
674 realinode->i_nlink); 727 if (!is_dir)
728 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
675 set_nlink(inode, nlink); 729 set_nlink(inode, nlink);
676 } else { 730 } else {
677 inode = new_inode(dentry->d_sb); 731 inode = new_inode(sb);
678 if (!inode) 732 if (!inode)
679 goto out_nomem; 733 goto out_nomem;
680 } 734 }
@@ -685,10 +739,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
685 ovl_set_flag(OVL_IMPURE, inode); 739 ovl_set_flag(OVL_IMPURE, inode);
686 740
687 /* Check for non-merge dir that may have whiteouts */ 741 /* Check for non-merge dir that may have whiteouts */
688 if (S_ISDIR(realinode->i_mode)) { 742 if (is_dir) {
689 struct ovl_entry *oe = dentry->d_fsdata; 743 if (((upperdentry && lowerdentry) || numlower > 1) ||
690
691 if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
692 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 744 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
693 ovl_set_flag(OVL_WHITEOUTS, inode); 745 ovl_set_flag(OVL_WHITEOUTS, inode);
694 } 746 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index beb945e1963c..de3e6da1d5a5 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/cred.h> 11#include <linux/cred.h>
12#include <linux/ctype.h>
12#include <linux/namei.h> 13#include <linux/namei.h>
13#include <linux/xattr.h> 14#include <linux/xattr.h>
14#include <linux/ratelimit.h> 15#include <linux/ratelimit.h>
@@ -84,15 +85,54 @@ invalid:
84 85
85static int ovl_acceptable(void *ctx, struct dentry *dentry) 86static int ovl_acceptable(void *ctx, struct dentry *dentry)
86{ 87{
87 return 1; 88 /*
89 * A non-dir origin may be disconnected, which is fine, because
90 * we only need it for its unique inode number.
91 */
92 if (!d_is_dir(dentry))
93 return 1;
94
95 /* Don't decode a deleted empty directory */
96 if (d_unhashed(dentry))
97 return 0;
98
99 /* Check if directory belongs to the layer we are decoding from */
100 return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
88} 101}
89 102
90static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) 103/*
104 * Check validity of an overlay file handle buffer.
105 *
106 * Return 0 for a valid file handle.
107 * Return -ENODATA for "origin unknown".
108 * Return <0 for an invalid file handle.
109 */
110int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
91{ 111{
92 int res; 112 if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
113 return -EINVAL;
114
115 if (fh->magic != OVL_FH_MAGIC)
116 return -EINVAL;
117
118 /* Treat larger version and unknown flags as "origin unknown" */
119 if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
120 return -ENODATA;
121
122 /* Treat endianness mismatch as "origin unknown" */
123 if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
124 (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
125 return -ENODATA;
126
127 return 0;
128}
129
130static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
131{
132 int res, err;
93 struct ovl_fh *fh = NULL; 133 struct ovl_fh *fh = NULL;
94 134
95 res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); 135 res = vfs_getxattr(dentry, name, NULL, 0);
96 if (res < 0) { 136 if (res < 0) {
97 if (res == -ENODATA || res == -EOPNOTSUPP) 137 if (res == -ENODATA || res == -EOPNOTSUPP)
98 return NULL; 138 return NULL;
@@ -102,28 +142,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
102 if (res == 0) 142 if (res == 0)
103 return NULL; 143 return NULL;
104 144
105 fh = kzalloc(res, GFP_KERNEL); 145 fh = kzalloc(res, GFP_KERNEL);
106 if (!fh) 146 if (!fh)
107 return ERR_PTR(-ENOMEM); 147 return ERR_PTR(-ENOMEM);
108 148
109 res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); 149 res = vfs_getxattr(dentry, name, fh, res);
110 if (res < 0) 150 if (res < 0)
111 goto fail; 151 goto fail;
112 152
113 if (res < sizeof(struct ovl_fh) || res < fh->len) 153 err = ovl_check_fh_len(fh, res);
114 goto invalid; 154 if (err < 0) {
115 155 if (err == -ENODATA)
116 if (fh->magic != OVL_FH_MAGIC) 156 goto out;
117 goto invalid; 157 goto invalid;
118 158 }
119 /* Treat larger version and unknown flags as "origin unknown" */
120 if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
121 goto out;
122
123 /* Treat endianness mismatch as "origin unknown" */
124 if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
125 (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
126 goto out;
127 159
128 return fh; 160 return fh;
129 161
@@ -139,47 +171,41 @@ invalid:
139 goto out; 171 goto out;
140} 172}
141 173
142static struct dentry *ovl_get_origin(struct dentry *dentry, 174struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
143 struct vfsmount *mnt)
144{ 175{
145 struct dentry *origin = NULL; 176 struct dentry *real;
146 struct ovl_fh *fh = ovl_get_origin_fh(dentry);
147 int bytes; 177 int bytes;
148 178
149 if (IS_ERR_OR_NULL(fh))
150 return (struct dentry *)fh;
151
152 /* 179 /*
153 * Make sure that the stored uuid matches the uuid of the lower 180 * Make sure that the stored uuid matches the uuid of the lower
154 * layer where file handle will be decoded. 181 * layer where file handle will be decoded.
155 */ 182 */
156 if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) 183 if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
157 goto out; 184 return NULL;
158 185
159 bytes = (fh->len - offsetof(struct ovl_fh, fid)); 186 bytes = (fh->len - offsetof(struct ovl_fh, fid));
160 origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, 187 real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
161 bytes >> 2, (int)fh->type, 188 bytes >> 2, (int)fh->type,
162 ovl_acceptable, NULL); 189 ovl_acceptable, mnt);
163 if (IS_ERR(origin)) { 190 if (IS_ERR(real)) {
164 /* Treat stale file handle as "origin unknown" */ 191 /*
165 if (origin == ERR_PTR(-ESTALE)) 192 * Treat stale file handle to lower file as "origin unknown".
166 origin = NULL; 193 * upper file handle could become stale when upper file is
167 goto out; 194 * unlinked and this information is needed to handle stale
195 * index entries correctly.
196 */
197 if (real == ERR_PTR(-ESTALE) &&
198 !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
199 real = NULL;
200 return real;
168 } 201 }
169 202
170 if (ovl_dentry_weird(origin) || 203 if (ovl_dentry_weird(real)) {
171 ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) 204 dput(real);
172 goto invalid; 205 return NULL;
173 206 }
174out:
175 kfree(fh);
176 return origin;
177 207
178invalid: 208 return real;
179 pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
180 dput(origin);
181 origin = NULL;
182 goto out;
183} 209}
184 210
185static bool ovl_is_opaquedir(struct dentry *dentry) 211static bool ovl_is_opaquedir(struct dentry *dentry)
@@ -284,47 +310,81 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
284} 310}
285 311
286 312
287static int ovl_check_origin(struct dentry *upperdentry, 313int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
288 struct ovl_path *lower, unsigned int numlower, 314 struct dentry *upperdentry, struct ovl_path **stackp)
289 struct ovl_path **stackp, unsigned int *ctrp)
290{ 315{
291 struct vfsmount *mnt;
292 struct dentry *origin = NULL; 316 struct dentry *origin = NULL;
293 int i; 317 int i;
294 318
295 for (i = 0; i < numlower; i++) { 319 for (i = 0; i < ofs->numlower; i++) {
296 mnt = lower[i].layer->mnt; 320 origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt);
297 origin = ovl_get_origin(upperdentry, mnt);
298 if (IS_ERR(origin))
299 return PTR_ERR(origin);
300
301 if (origin) 321 if (origin)
302 break; 322 break;
303 } 323 }
304 324
305 if (!origin) 325 if (!origin)
306 return 0; 326 return -ESTALE;
327 else if (IS_ERR(origin))
328 return PTR_ERR(origin);
329
330 if (upperdentry && !ovl_is_whiteout(upperdentry) &&
331 ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
332 goto invalid;
307 333
308 BUG_ON(*ctrp);
309 if (!*stackp) 334 if (!*stackp)
310 *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); 335 *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
311 if (!*stackp) { 336 if (!*stackp) {
312 dput(origin); 337 dput(origin);
313 return -ENOMEM; 338 return -ENOMEM;
314 } 339 }
315 **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; 340 **stackp = (struct ovl_path){
316 *ctrp = 1; 341 .dentry = origin,
342 .layer = &ofs->lower_layers[i]
343 };
344
345 return 0;
346
347invalid:
348 pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
349 upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
350 d_inode(origin)->i_mode & S_IFMT);
351 dput(origin);
352 return -EIO;
353}
354
355static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
356 struct ovl_path **stackp, unsigned int *ctrp)
357{
358 struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
359 int err;
360
361 if (IS_ERR_OR_NULL(fh))
362 return PTR_ERR(fh);
363
364 err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp);
365 kfree(fh);
366
367 if (err) {
368 if (err == -ESTALE)
369 return 0;
370 return err;
371 }
372
373 if (WARN_ON(*ctrp))
374 return -EIO;
317 375
376 *ctrp = 1;
318 return 0; 377 return 0;
319} 378}
320 379
321/* 380/*
322 * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. 381 * Verify that @fh matches the file handle stored in xattr @name.
323 * Return 0 on match, -ESTALE on mismatch, < 0 on error. 382 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
324 */ 383 */
325static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) 384static int ovl_verify_fh(struct dentry *dentry, const char *name,
385 const struct ovl_fh *fh)
326{ 386{
327 struct ovl_fh *ofh = ovl_get_origin_fh(dentry); 387 struct ovl_fh *ofh = ovl_get_fh(dentry, name);
328 int err = 0; 388 int err = 0;
329 389
330 if (!ofh) 390 if (!ofh)
@@ -341,28 +401,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
341} 401}
342 402
343/* 403/*
344 * Verify that an inode matches the origin file handle stored in upper inode. 404 * Verify that @real dentry matches the file handle stored in xattr @name.
345 * 405 *
346 * If @set is true and there is no stored file handle, encode and store origin 406 * If @set is true and there is no stored file handle, encode @real and store
347 * file handle in OVL_XATTR_ORIGIN. 407 * file handle in xattr @name.
348 * 408 *
349 * Return 0 on match, -ESTALE on mismatch, < 0 on error. 409 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
350 */ 410 */
351int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, 411int ovl_verify_set_fh(struct dentry *dentry, const char *name,
352 bool is_upper, bool set) 412 struct dentry *real, bool is_upper, bool set)
353{ 413{
354 struct inode *inode; 414 struct inode *inode;
355 struct ovl_fh *fh; 415 struct ovl_fh *fh;
356 int err; 416 int err;
357 417
358 fh = ovl_encode_fh(origin, is_upper); 418 fh = ovl_encode_fh(real, is_upper);
359 err = PTR_ERR(fh); 419 err = PTR_ERR(fh);
360 if (IS_ERR(fh)) 420 if (IS_ERR(fh))
361 goto fail; 421 goto fail;
362 422
363 err = ovl_verify_origin_fh(dentry, fh); 423 err = ovl_verify_fh(dentry, name, fh);
364 if (set && err == -ENODATA) 424 if (set && err == -ENODATA)
365 err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); 425 err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
366 if (err) 426 if (err)
367 goto fail; 427 goto fail;
368 428
@@ -371,45 +431,71 @@ out:
371 return err; 431 return err;
372 432
373fail: 433fail:
374 inode = d_inode(origin); 434 inode = d_inode(real);
375 pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", 435 pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
376 origin, inode ? inode->i_ino : 0, err); 436 is_upper ? "upper" : "origin", real,
437 inode ? inode->i_ino : 0, err);
377 goto out; 438 goto out;
378} 439}
379 440
441/* Get upper dentry from index */
442struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
443{
444 struct ovl_fh *fh;
445 struct dentry *upper;
446
447 if (!d_is_dir(index))
448 return dget(index);
449
450 fh = ovl_get_fh(index, OVL_XATTR_UPPER);
451 if (IS_ERR_OR_NULL(fh))
452 return ERR_CAST(fh);
453
454 upper = ovl_decode_fh(fh, ofs->upper_mnt);
455 kfree(fh);
456
457 if (IS_ERR_OR_NULL(upper))
458 return upper ?: ERR_PTR(-ESTALE);
459
460 if (!d_is_dir(upper)) {
461 pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
462 index, upper);
463 dput(upper);
464 return ERR_PTR(-EIO);
465 }
466
467 return upper;
468}
469
470/* Is this a leftover from create/whiteout of directory index entry? */
471static bool ovl_is_temp_index(struct dentry *index)
472{
473 return index->d_name.name[0] == '#';
474}
475
380/* 476/*
381 * Verify that an index entry name matches the origin file handle stored in 477 * Verify that an index entry name matches the origin file handle stored in
382 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. 478 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
383 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. 479 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
384 */ 480 */
385int ovl_verify_index(struct dentry *index, struct ovl_path *lower, 481int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
386 unsigned int numlower)
387{ 482{
388 struct ovl_fh *fh = NULL; 483 struct ovl_fh *fh = NULL;
389 size_t len; 484 size_t len;
390 struct ovl_path origin = { }; 485 struct ovl_path origin = { };
391 struct ovl_path *stack = &origin; 486 struct ovl_path *stack = &origin;
392 unsigned int ctr = 0; 487 struct dentry *upper = NULL;
393 int err; 488 int err;
394 489
395 if (!d_inode(index)) 490 if (!d_inode(index))
396 return 0; 491 return 0;
397 492
398 /* 493 /* Cleanup leftover from index create/cleanup attempt */
399 * Directory index entries are going to be used for looking up 494 err = -ESTALE;
400 * redirected upper dirs by lower dir fh when decoding an overlay 495 if (ovl_is_temp_index(index))
401 * file handle of a merge dir. Whiteout index entries are going to be
402 * used as an indication that an exported overlay file handle should
403 * be treated as stale (i.e. after unlink of the overlay inode).
404 * We don't know the verification rules for directory and whiteout
405 * index entries, because they have not been implemented yet, so return
406 * EINVAL if those entries are found to abort the mount to avoid
407 * corrupting an index that was created by a newer kernel.
408 */
409 err = -EINVAL;
410 if (d_is_dir(index) || ovl_is_whiteout(index))
411 goto fail; 496 goto fail;
412 497
498 err = -EINVAL;
413 if (index->d_name.len < sizeof(struct ovl_fh)*2) 499 if (index->d_name.len < sizeof(struct ovl_fh)*2)
414 goto fail; 500 goto fail;
415 501
@@ -420,26 +506,68 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
420 goto fail; 506 goto fail;
421 507
422 err = -EINVAL; 508 err = -EINVAL;
423 if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) 509 if (hex2bin((u8 *)fh, index->d_name.name, len))
424 goto fail; 510 goto fail;
425 511
426 err = ovl_verify_origin_fh(index, fh); 512 err = ovl_check_fh_len(fh, len);
427 if (err) 513 if (err)
428 goto fail; 514 goto fail;
429 515
430 err = ovl_check_origin(index, lower, numlower, &stack, &ctr); 516 /*
431 if (!err && !ctr) 517 * Whiteout index entries are used as an indication that an exported
432 err = -ESTALE; 518 * overlay file handle should be treated as stale (i.e. after unlink
519 * of the overlay inode). These entries contain no origin xattr.
520 */
521 if (ovl_is_whiteout(index))
522 goto out;
523
524 /*
525 * Verifying directory index entries are not stale is expensive, so
526 * only verify stale dir index if NFS export is enabled.
527 */
528 if (d_is_dir(index) && !ofs->config.nfs_export)
529 goto out;
530
531 /*
532 * Directory index entries should have 'upper' xattr pointing to the
533 * real upper dir. Non-dir index entries are hardlinks to the upper
534 * real inode. For non-dir index, we can read the copy up origin xattr
535 * directly from the index dentry, but for dir index we first need to
536 * decode the upper directory.
537 */
538 upper = ovl_index_upper(ofs, index);
539 if (IS_ERR_OR_NULL(upper)) {
540 err = PTR_ERR(upper);
541 /*
542 * Directory index entries with no 'upper' xattr need to be
543 * removed. When dir index entry has a stale 'upper' xattr,
544 * we assume that upper dir was removed and we treat the dir
545 * index as orphan entry that needs to be whited out.
546 */
547 if (err == -ESTALE)
548 goto orphan;
549 else if (!err)
550 err = -ESTALE;
551 goto fail;
552 }
553
554 err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
555 dput(upper);
433 if (err) 556 if (err)
434 goto fail; 557 goto fail;
435 558
436 /* Check if index is orphan and don't warn before cleaning it */ 559 /* Check if non-dir index is orphan and don't warn before cleaning it */
437 if (d_inode(index)->i_nlink == 1 && 560 if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
438 ovl_get_nlink(origin.dentry, index, 0) == 0) 561 err = ovl_check_origin_fh(ofs, fh, index, &stack);
439 err = -ENOENT; 562 if (err)
563 goto fail;
564
565 if (ovl_get_nlink(origin.dentry, index, 0) == 0)
566 goto orphan;
567 }
440 568
441 dput(origin.dentry);
442out: 569out:
570 dput(origin.dentry);
443 kfree(fh); 571 kfree(fh);
444 return err; 572 return err;
445 573
@@ -447,6 +575,28 @@ fail:
447 pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", 575 pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
448 index, d_inode(index)->i_mode & S_IFMT, err); 576 index, d_inode(index)->i_mode & S_IFMT, err);
449 goto out; 577 goto out;
578
579orphan:
580 pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
581 index, d_inode(index)->i_mode & S_IFMT,
582 d_inode(index)->i_nlink);
583 err = -ENOENT;
584 goto out;
585}
586
587static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
588{
589 char *n, *s;
590
591 n = kzalloc(fh->len * 2, GFP_KERNEL);
592 if (!n)
593 return -ENOMEM;
594
595 s = bin2hex(n, fh, fh->len);
596 *name = (struct qstr) QSTR_INIT(n, s - n);
597
598 return 0;
599
450} 600}
451 601
452/* 602/*
@@ -466,35 +616,58 @@ fail:
466 */ 616 */
467int ovl_get_index_name(struct dentry *origin, struct qstr *name) 617int ovl_get_index_name(struct dentry *origin, struct qstr *name)
468{ 618{
469 int err;
470 struct ovl_fh *fh; 619 struct ovl_fh *fh;
471 char *n, *s; 620 int err;
472 621
473 fh = ovl_encode_fh(origin, false); 622 fh = ovl_encode_fh(origin, false);
474 if (IS_ERR(fh)) 623 if (IS_ERR(fh))
475 return PTR_ERR(fh); 624 return PTR_ERR(fh);
476 625
477 err = -ENOMEM; 626 err = ovl_get_index_name_fh(fh, name);
478 n = kzalloc(fh->len * 2, GFP_KERNEL);
479 if (n) {
480 s = bin2hex(n, fh, fh->len);
481 *name = (struct qstr) QSTR_INIT(n, s - n);
482 err = 0;
483 }
484 kfree(fh);
485 627
628 kfree(fh);
486 return err; 629 return err;
630}
631
632/* Lookup index by file handle for NFS export */
633struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
634{
635 struct dentry *index;
636 struct qstr name;
637 int err;
638
639 err = ovl_get_index_name_fh(fh, &name);
640 if (err)
641 return ERR_PTR(err);
642
643 index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
644 kfree(name.name);
645 if (IS_ERR(index)) {
646 if (PTR_ERR(index) == -ENOENT)
647 index = NULL;
648 return index;
649 }
487 650
651 if (d_is_negative(index))
652 err = 0;
653 else if (ovl_is_whiteout(index))
654 err = -ESTALE;
655 else if (ovl_dentry_weird(index))
656 err = -EIO;
657 else
658 return index;
659
660 dput(index);
661 return ERR_PTR(err);
488} 662}
489 663
490static struct dentry *ovl_lookup_index(struct dentry *dentry, 664struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
491 struct dentry *upper, 665 struct dentry *origin, bool verify)
492 struct dentry *origin)
493{ 666{
494 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
495 struct dentry *index; 667 struct dentry *index;
496 struct inode *inode; 668 struct inode *inode;
497 struct qstr name; 669 struct qstr name;
670 bool is_dir = d_is_dir(origin);
498 int err; 671 int err;
499 672
500 err = ovl_get_index_name(origin, &name); 673 err = ovl_get_index_name(origin, &name);
@@ -518,8 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
518 inode = d_inode(index); 691 inode = d_inode(index);
519 if (d_is_negative(index)) { 692 if (d_is_negative(index)) {
520 goto out_dput; 693 goto out_dput;
521 } else if (upper && d_inode(upper) != inode) { 694 } else if (ovl_is_whiteout(index) && !verify) {
522 goto out_dput; 695 /*
696 * When index lookup is called with !verify for decoding an
697 * overlay file handle, a whiteout index implies that decode
698 * should treat file handle as stale and no need to print a
699 * warning about it.
700 */
701 dput(index);
702 index = ERR_PTR(-ESTALE);
703 goto out;
523 } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || 704 } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
524 ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { 705 ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
525 /* 706 /*
@@ -533,8 +714,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
533 index, d_inode(index)->i_mode & S_IFMT, 714 index, d_inode(index)->i_mode & S_IFMT,
534 d_inode(origin)->i_mode & S_IFMT); 715 d_inode(origin)->i_mode & S_IFMT);
535 goto fail; 716 goto fail;
536 } 717 } else if (is_dir && verify) {
718 if (!upper) {
719 pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
720 origin, index);
721 goto fail;
722 }
537 723
724 /* Verify that dir index 'upper' xattr points to upper dir */
725 err = ovl_verify_upper(index, upper, false);
726 if (err) {
727 if (err == -ESTALE) {
728 pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
729 upper, origin, index);
730 }
731 goto fail;
732 }
733 } else if (upper && d_inode(upper) != inode) {
734 goto out_dput;
735 }
538out: 736out:
539 kfree(name.name); 737 kfree(name.name);
540 return index; 738 return index;
@@ -572,16 +770,25 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
572 return (idx < oe->numlower) ? idx + 1 : -1; 770 return (idx < oe->numlower) ? idx + 1 : -1;
573} 771}
574 772
575static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) 773/* Fix missing 'origin' xattr */
774static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
775 struct dentry *upper)
576{ 776{
577 int i; 777 int err;
578 778
579 for (i = 0; i < ofs->numlower; i++) { 779 if (ovl_check_origin_xattr(upper))
580 if (ofs->lower_layers[i].mnt == path->layer->mnt) 780 return 0;
581 break; 781
582 } 782 err = ovl_want_write(dentry);
783 if (err)
784 return err;
785
786 err = ovl_set_origin(dentry, lower, upper);
787 if (!err)
788 err = ovl_set_impure(dentry->d_parent, upper->d_parent);
583 789
584 return i; 790 ovl_drop_write(dentry);
791 return err;
585} 792}
586 793
587struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 794struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
@@ -594,6 +801,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
594 struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; 801 struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
595 struct ovl_path *stack = NULL; 802 struct ovl_path *stack = NULL;
596 struct dentry *upperdir, *upperdentry = NULL; 803 struct dentry *upperdir, *upperdentry = NULL;
804 struct dentry *origin = NULL;
597 struct dentry *index = NULL; 805 struct dentry *index = NULL;
598 unsigned int ctr = 0; 806 unsigned int ctr = 0;
599 struct inode *inode = NULL; 807 struct inode *inode = NULL;
@@ -638,8 +846,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
638 * number - it's the same as if we held a reference 846 * number - it's the same as if we held a reference
639 * to a dentry in lower layer that was moved under us. 847 * to a dentry in lower layer that was moved under us.
640 */ 848 */
641 err = ovl_check_origin(upperdentry, roe->lowerstack, 849 err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
642 roe->numlower, &stack, &ctr);
643 if (err) 850 if (err)
644 goto out_put_upper; 851 goto out_put_upper;
645 } 852 }
@@ -674,6 +881,34 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
674 if (!this) 881 if (!this)
675 continue; 882 continue;
676 883
884 /*
885 * If no origin fh is stored in upper of a merge dir, store fh
886 * of lower dir and set upper parent "impure".
887 */
888 if (upperdentry && !ctr && !ofs->noxattr) {
889 err = ovl_fix_origin(dentry, this, upperdentry);
890 if (err) {
891 dput(this);
892 goto out_put;
893 }
894 }
895
896 /*
897 * When "verify_lower" feature is enabled, do not merge with a
898 * lower dir that does not match a stored origin xattr. In any
899 * case, only verified origin is used for index lookup.
900 */
901 if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
902 err = ovl_verify_origin(upperdentry, this, false);
903 if (err) {
904 dput(this);
905 break;
906 }
907
908 /* Bless lower dir as verified origin */
909 origin = this;
910 }
911
677 stack[ctr].dentry = this; 912 stack[ctr].dentry = this;
678 stack[ctr].layer = lower.layer; 913 stack[ctr].layer = lower.layer;
679 ctr++; 914 ctr++;
@@ -693,25 +928,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
693 */ 928 */
694 err = -EPERM; 929 err = -EPERM;
695 if (d.redirect && !ofs->config.redirect_follow) { 930 if (d.redirect && !ofs->config.redirect_follow) {
696 pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); 931 pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
932 dentry);
697 goto out_put; 933 goto out_put;
698 } 934 }
699 935
700 if (d.redirect && d.redirect[0] == '/' && poe != roe) { 936 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
701 poe = roe; 937 poe = roe;
702
703 /* Find the current layer on the root dentry */ 938 /* Find the current layer on the root dentry */
704 i = ovl_find_layer(ofs, &lower); 939 i = lower.layer->idx - 1;
705 if (WARN_ON(i == ofs->numlower))
706 break;
707 } 940 }
708 } 941 }
709 942
710 /* Lookup index by lower inode and verify it matches upper inode */ 943 /*
711 if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { 944 * Lookup index by lower inode and verify it matches upper inode.
712 struct dentry *origin = stack[0].dentry; 945 * We only trust dir index if we verified that lower dir matches
946 * origin, otherwise dir index entries may be inconsistent and we
947 * ignore them. Always lookup index of non-dir and non-upper.
948 */
949 if (ctr && (!upperdentry || !d.is_dir))
950 origin = stack[0].dentry;
713 951
714 index = ovl_lookup_index(dentry, upperdentry, origin); 952 if (origin && ovl_indexdir(dentry->d_sb) &&
953 (!d.is_dir || ovl_index_all(dentry->d_sb))) {
954 index = ovl_lookup_index(ofs, upperdentry, origin, true);
715 if (IS_ERR(index)) { 955 if (IS_ERR(index)) {
716 err = PTR_ERR(index); 956 err = PTR_ERR(index);
717 index = NULL; 957 index = NULL;
@@ -724,17 +964,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
724 if (!oe) 964 if (!oe)
725 goto out_put; 965 goto out_put;
726 966
727 oe->opaque = upperopaque;
728 memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); 967 memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
729 dentry->d_fsdata = oe; 968 dentry->d_fsdata = oe;
730 969
970 if (upperopaque)
971 ovl_dentry_set_opaque(dentry);
972
731 if (upperdentry) 973 if (upperdentry)
732 ovl_dentry_set_upper_alias(dentry); 974 ovl_dentry_set_upper_alias(dentry);
733 else if (index) 975 else if (index)
734 upperdentry = dget(index); 976 upperdentry = dget(index);
735 977
736 if (upperdentry || ctr) { 978 if (upperdentry || ctr) {
737 inode = ovl_get_inode(dentry, upperdentry, index); 979 if (ctr)
980 origin = stack[0].dentry;
981 inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
982 ctr);
738 err = PTR_ERR(inode); 983 err = PTR_ERR(inode);
739 if (IS_ERR(inode)) 984 if (IS_ERR(inode))
740 goto out_free_oe; 985 goto out_free_oe;
@@ -748,9 +993,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
748 dput(index); 993 dput(index);
749 kfree(stack); 994 kfree(stack);
750 kfree(d.redirect); 995 kfree(d.redirect);
751 d_add(dentry, inode); 996 return d_splice_alias(inode, dentry);
752
753 return NULL;
754 997
755out_free_oe: 998out_free_oe:
756 dentry->d_fsdata = NULL; 999 dentry->d_fsdata = NULL;
@@ -771,9 +1014,9 @@ out:
771 1014
772bool ovl_lower_positive(struct dentry *dentry) 1015bool ovl_lower_positive(struct dentry *dentry)
773{ 1016{
774 struct ovl_entry *oe = dentry->d_fsdata;
775 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 1017 struct ovl_entry *poe = dentry->d_parent->d_fsdata;
776 const struct qstr *name = &dentry->d_name; 1018 const struct qstr *name = &dentry->d_name;
1019 const struct cred *old_cred;
777 unsigned int i; 1020 unsigned int i;
778 bool positive = false; 1021 bool positive = false;
779 bool done = false; 1022 bool done = false;
@@ -783,12 +1026,13 @@ bool ovl_lower_positive(struct dentry *dentry)
783 * whiteout. 1026 * whiteout.
784 */ 1027 */
785 if (!dentry->d_inode) 1028 if (!dentry->d_inode)
786 return oe->opaque; 1029 return ovl_dentry_is_opaque(dentry);
787 1030
788 /* Negative upper -> positive lower */ 1031 /* Negative upper -> positive lower */
789 if (!ovl_dentry_upper(dentry)) 1032 if (!ovl_dentry_upper(dentry))
790 return true; 1033 return true;
791 1034
1035 old_cred = ovl_override_creds(dentry->d_sb);
792 /* Positive upper -> have to look up lower to see whether it exists */ 1036 /* Positive upper -> have to look up lower to see whether it exists */
793 for (i = 0; !done && !positive && i < poe->numlower; i++) { 1037 for (i = 0; !done && !positive && i < poe->numlower; i++) {
794 struct dentry *this; 1038 struct dentry *this;
@@ -818,6 +1062,7 @@ bool ovl_lower_positive(struct dentry *dentry)
818 dput(this); 1062 dput(this);
819 } 1063 }
820 } 1064 }
1065 revert_creds(old_cred);
821 1066
822 return positive; 1067 return positive;
823} 1068}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b489099ccd49..0df25a9c94bd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -27,8 +27,9 @@ enum ovl_path_type {
27#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" 27#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
28#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" 28#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
29#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" 29#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
30#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
30 31
31enum ovl_flag { 32enum ovl_inode_flag {
32 /* Pure upper dir that may contain non pure upper entries */ 33 /* Pure upper dir that may contain non pure upper entries */
33 OVL_IMPURE, 34 OVL_IMPURE,
34 /* Non-merge dir that may contain whiteout entries */ 35 /* Non-merge dir that may contain whiteout entries */
@@ -36,6 +37,11 @@ enum ovl_flag {
36 OVL_INDEX, 37 OVL_INDEX,
37}; 38};
38 39
40enum ovl_entry_flag {
41 OVL_E_UPPER_ALIAS,
42 OVL_E_OPAQUE,
43};
44
39/* 45/*
40 * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, 46 * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
41 * where: 47 * where:
@@ -62,6 +68,9 @@ enum ovl_flag {
62#error Endianness not defined 68#error Endianness not defined
63#endif 69#endif
64 70
71/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
72#define OVL_FILEID 0xfb
73
65/* On-disk and in-memeory format for redirect by file handle */ 74/* On-disk and in-memeory format for redirect by file handle */
66struct ovl_fh { 75struct ovl_fh {
67 u8 version; /* 0 */ 76 u8 version; /* 0 */
@@ -194,6 +203,8 @@ const struct cred *ovl_override_creds(struct super_block *sb);
194struct super_block *ovl_same_sb(struct super_block *sb); 203struct super_block *ovl_same_sb(struct super_block *sb);
195bool ovl_can_decode_fh(struct super_block *sb); 204bool ovl_can_decode_fh(struct super_block *sb);
196struct dentry *ovl_indexdir(struct super_block *sb); 205struct dentry *ovl_indexdir(struct super_block *sb);
206bool ovl_index_all(struct super_block *sb);
207bool ovl_verify_lower(struct super_block *sb);
197struct ovl_entry *ovl_alloc_entry(unsigned int numlower); 208struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
198bool ovl_dentry_remote(struct dentry *dentry); 209bool ovl_dentry_remote(struct dentry *dentry);
199bool ovl_dentry_weird(struct dentry *dentry); 210bool ovl_dentry_weird(struct dentry *dentry);
@@ -210,6 +221,9 @@ struct inode *ovl_inode_lower(struct inode *inode);
210struct inode *ovl_inode_real(struct inode *inode); 221struct inode *ovl_inode_real(struct inode *inode);
211struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); 222struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
212void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); 223void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
224void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
225void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
226bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
213bool ovl_dentry_is_opaque(struct dentry *dentry); 227bool ovl_dentry_is_opaque(struct dentry *dentry);
214bool ovl_dentry_is_whiteout(struct dentry *dentry); 228bool ovl_dentry_is_whiteout(struct dentry *dentry);
215void ovl_dentry_set_opaque(struct dentry *dentry); 229void ovl_dentry_set_opaque(struct dentry *dentry);
@@ -238,6 +252,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode);
238bool ovl_test_flag(unsigned long flag, struct inode *inode); 252bool ovl_test_flag(unsigned long flag, struct inode *inode);
239bool ovl_inuse_trylock(struct dentry *dentry); 253bool ovl_inuse_trylock(struct dentry *dentry);
240void ovl_inuse_unlock(struct dentry *dentry); 254void ovl_inuse_unlock(struct dentry *dentry);
255bool ovl_need_index(struct dentry *dentry);
241int ovl_nlink_start(struct dentry *dentry, bool *locked); 256int ovl_nlink_start(struct dentry *dentry, bool *locked);
242void ovl_nlink_end(struct dentry *dentry, bool locked); 257void ovl_nlink_end(struct dentry *dentry, bool locked);
243int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); 258int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
@@ -249,15 +264,35 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
249 264
250 265
251/* namei.c */ 266/* namei.c */
252int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, 267int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
253 bool is_upper, bool set); 268struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
254int ovl_verify_index(struct dentry *index, struct ovl_path *lower, 269int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
255 unsigned int numlower); 270 struct dentry *upperdentry, struct ovl_path **stackp);
271int ovl_verify_set_fh(struct dentry *dentry, const char *name,
272 struct dentry *real, bool is_upper, bool set);
273struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
274int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
256int ovl_get_index_name(struct dentry *origin, struct qstr *name); 275int ovl_get_index_name(struct dentry *origin, struct qstr *name);
276struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
277struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
278 struct dentry *origin, bool verify);
257int ovl_path_next(int idx, struct dentry *dentry, struct path *path); 279int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
258struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); 280struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
281 unsigned int flags);
259bool ovl_lower_positive(struct dentry *dentry); 282bool ovl_lower_positive(struct dentry *dentry);
260 283
284static inline int ovl_verify_origin(struct dentry *upper,
285 struct dentry *origin, bool set)
286{
287 return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
288}
289
290static inline int ovl_verify_upper(struct dentry *index,
291 struct dentry *upper, bool set)
292{
293 return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
294}
295
261/* readdir.c */ 296/* readdir.c */
262extern const struct file_operations ovl_dir_operations; 297extern const struct file_operations ovl_dir_operations;
263int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); 298int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
@@ -267,8 +302,7 @@ void ovl_dir_cache_free(struct inode *inode);
267int ovl_check_d_type_supported(struct path *realpath); 302int ovl_check_d_type_supported(struct path *realpath);
268void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, 303void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
269 struct dentry *dentry, int level); 304 struct dentry *dentry, int level);
270int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, 305int ovl_indexdir_cleanup(struct ovl_fs *ofs);
271 struct ovl_path *lower, unsigned int numlower);
272 306
273/* inode.c */ 307/* inode.c */
274int ovl_set_nlink_upper(struct dentry *dentry); 308int ovl_set_nlink_upper(struct dentry *dentry);
@@ -291,8 +325,11 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
291bool ovl_is_private_xattr(const char *name); 325bool ovl_is_private_xattr(const char *name);
292 326
293struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); 327struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
294struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, 328struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
295 struct dentry *index); 329 bool is_upper);
330struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
331 struct dentry *lowerdentry, struct dentry *index,
332 unsigned int numlower);
296static inline void ovl_copyattr(struct inode *from, struct inode *to) 333static inline void ovl_copyattr(struct inode *from, struct inode *to)
297{ 334{
298 to->i_uid = from->i_uid; 335 to->i_uid = from->i_uid;
@@ -306,6 +343,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
306/* dir.c */ 343/* dir.c */
307extern const struct inode_operations ovl_dir_inode_operations; 344extern const struct inode_operations ovl_dir_inode_operations;
308struct dentry *ovl_lookup_temp(struct dentry *workdir); 345struct dentry *ovl_lookup_temp(struct dentry *workdir);
346int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
347 struct dentry *dentry);
309struct cattr { 348struct cattr {
310 dev_t rdev; 349 dev_t rdev;
311 umode_t mode; 350 umode_t mode;
@@ -321,4 +360,9 @@ int ovl_copy_up(struct dentry *dentry);
321int ovl_copy_up_flags(struct dentry *dentry, int flags); 360int ovl_copy_up_flags(struct dentry *dentry, int flags);
322int ovl_copy_xattr(struct dentry *old, struct dentry *new); 361int ovl_copy_xattr(struct dentry *old, struct dentry *new);
323int ovl_set_attr(struct dentry *upper, struct kstat *stat); 362int ovl_set_attr(struct dentry *upper, struct kstat *stat);
324struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); 363struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
364int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
365 struct dentry *upper);
366
367/* export.c */
368extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 9d0bc03bf6e4..bfef6edcc111 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,11 +17,14 @@ struct ovl_config {
17 bool redirect_follow; 17 bool redirect_follow;
18 const char *redirect_mode; 18 const char *redirect_mode;
19 bool index; 19 bool index;
20 bool nfs_export;
20}; 21};
21 22
22struct ovl_layer { 23struct ovl_layer {
23 struct vfsmount *mnt; 24 struct vfsmount *mnt;
24 dev_t pseudo_dev; 25 dev_t pseudo_dev;
26 /* Index of this layer in fs root (upper == 0) */
27 int idx;
25}; 28};
26 29
27struct ovl_path { 30struct ovl_path {
@@ -58,8 +61,7 @@ struct ovl_fs {
58struct ovl_entry { 61struct ovl_entry {
59 union { 62 union {
60 struct { 63 struct {
61 unsigned long has_upper; 64 unsigned long flags;
62 bool opaque;
63 }; 65 };
64 struct rcu_head rcu; 66 struct rcu_head rcu;
65 }; 67 };
@@ -69,6 +71,11 @@ struct ovl_entry {
69 71
70struct ovl_entry *ovl_alloc_entry(unsigned int numlower); 72struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
71 73
74static inline struct ovl_entry *OVL_E(struct dentry *dentry)
75{
76 return (struct ovl_entry *) dentry->d_fsdata;
77}
78
72struct ovl_inode { 79struct ovl_inode {
73 struct ovl_dir_cache *cache; 80 struct ovl_dir_cache *cache;
74 const char *redirect; 81 const char *redirect;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 8c98578d27a1..c11f5c0906c3 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
593 return ERR_PTR(res); 593 return ERR_PTR(res);
594 } 594 }
595 if (list_empty(&cache->entries)) { 595 if (list_empty(&cache->entries)) {
596 /* Good oportunity to get rid of an unnecessary "impure" flag */ 596 /*
597 ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); 597 * A good opportunity to get rid of an unneeded "impure" flag.
598 * Removing the "impure" xattr is best effort.
599 */
600 if (!ovl_want_write(dentry)) {
601 ovl_do_removexattr(ovl_dentry_upper(dentry),
602 OVL_XATTR_IMPURE);
603 ovl_drop_write(dentry);
604 }
598 ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); 605 ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
599 kfree(cache); 606 kfree(cache);
600 return NULL; 607 return NULL;
@@ -769,10 +776,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
769 struct dentry *dentry = file->f_path.dentry; 776 struct dentry *dentry = file->f_path.dentry;
770 struct file *realfile = od->realfile; 777 struct file *realfile = od->realfile;
771 778
779 /* Nothing to sync for lower */
780 if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
781 return 0;
782
772 /* 783 /*
773 * Need to check if we started out being a lower dir, but got copied up 784 * Need to check if we started out being a lower dir, but got copied up
774 */ 785 */
775 if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { 786 if (!od->is_upper) {
776 struct inode *inode = file_inode(file); 787 struct inode *inode = file_inode(file);
777 788
778 realfile = READ_ONCE(od->upperfile); 789 realfile = READ_ONCE(od->upperfile);
@@ -858,8 +869,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
858 int err; 869 int err;
859 struct ovl_cache_entry *p, *n; 870 struct ovl_cache_entry *p, *n;
860 struct rb_root root = RB_ROOT; 871 struct rb_root root = RB_ROOT;
872 const struct cred *old_cred;
861 873
874 old_cred = ovl_override_creds(dentry->d_sb);
862 err = ovl_dir_read_merged(dentry, list, &root); 875 err = ovl_dir_read_merged(dentry, list, &root);
876 revert_creds(old_cred);
863 if (err) 877 if (err)
864 return err; 878 return err;
865 879
@@ -1016,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
1016 } 1030 }
1017} 1031}
1018 1032
1019int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, 1033int ovl_indexdir_cleanup(struct ovl_fs *ofs)
1020 struct ovl_path *lower, unsigned int numlower)
1021{ 1034{
1022 int err; 1035 int err;
1036 struct dentry *indexdir = ofs->indexdir;
1023 struct dentry *index = NULL; 1037 struct dentry *index = NULL;
1024 struct inode *dir = dentry->d_inode; 1038 struct inode *dir = indexdir->d_inode;
1025 struct path path = { .mnt = mnt, .dentry = dentry }; 1039 struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
1026 LIST_HEAD(list); 1040 LIST_HEAD(list);
1027 struct rb_root root = RB_ROOT; 1041 struct rb_root root = RB_ROOT;
1028 struct ovl_cache_entry *p; 1042 struct ovl_cache_entry *p;
@@ -1046,19 +1060,40 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
1046 if (p->len == 2 && p->name[1] == '.') 1060 if (p->len == 2 && p->name[1] == '.')
1047 continue; 1061 continue;
1048 } 1062 }
1049 index = lookup_one_len(p->name, dentry, p->len); 1063 index = lookup_one_len(p->name, indexdir, p->len);
1050 if (IS_ERR(index)) { 1064 if (IS_ERR(index)) {
1051 err = PTR_ERR(index); 1065 err = PTR_ERR(index);
1052 index = NULL; 1066 index = NULL;
1053 break; 1067 break;
1054 } 1068 }
1055 err = ovl_verify_index(index, lower, numlower); 1069 err = ovl_verify_index(ofs, index);
1056 /* Cleanup stale and orphan index entries */ 1070 if (!err) {
1057 if (err && (err == -ESTALE || err == -ENOENT)) 1071 goto next;
1072 } else if (err == -ESTALE) {
1073 /* Cleanup stale index entries */
1074 err = ovl_cleanup(dir, index);
1075 } else if (err != -ENOENT) {
1076 /*
1077 * Abort mount to avoid corrupting the index if
1078 * an incompatible index entry was found or on out
1079 * of memory.
1080 */
1081 break;
1082 } else if (ofs->config.nfs_export) {
1083 /*
1084 * Whiteout orphan index to block future open by
1085 * handle after overlay nlink dropped to zero.
1086 */
1087 err = ovl_cleanup_and_whiteout(indexdir, dir, index);
1088 } else {
1089 /* Cleanup orphan index entries */
1058 err = ovl_cleanup(dir, index); 1090 err = ovl_cleanup(dir, index);
1091 }
1092
1059 if (err) 1093 if (err)
1060 break; 1094 break;
1061 1095
1096next:
1062 dput(index); 1097 dput(index);
1063 index = NULL; 1098 index = NULL;
1064 } 1099 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 76440feb79f6..9ee37c76091d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644);
45MODULE_PARM_DESC(ovl_index_def, 45MODULE_PARM_DESC(ovl_index_def,
46 "Default to on or off for the inodes index feature"); 46 "Default to on or off for the inodes index feature");
47 47
48static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
49module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
50MODULE_PARM_DESC(ovl_nfs_export_def,
51 "Default to on or off for the NFS export feature");
52
48static void ovl_entry_stack_free(struct ovl_entry *oe) 53static void ovl_entry_stack_free(struct ovl_entry *oe)
49{ 54{
50 unsigned int i; 55 unsigned int i;
@@ -211,6 +216,7 @@ static void ovl_destroy_inode(struct inode *inode)
211 struct ovl_inode *oi = OVL_I(inode); 216 struct ovl_inode *oi = OVL_I(inode);
212 217
213 dput(oi->__upperdentry); 218 dput(oi->__upperdentry);
219 iput(oi->lower);
214 kfree(oi->redirect); 220 kfree(oi->redirect);
215 ovl_dir_cache_free(inode); 221 ovl_dir_cache_free(inode);
216 mutex_destroy(&oi->lock); 222 mutex_destroy(&oi->lock);
@@ -341,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
341 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 347 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
342 if (ofs->config.index != ovl_index_def) 348 if (ofs->config.index != ovl_index_def)
343 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 349 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
350 if (ofs->config.nfs_export != ovl_nfs_export_def)
351 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
352 "on" : "off");
344 return 0; 353 return 0;
345} 354}
346 355
@@ -373,6 +382,8 @@ enum {
373 OPT_REDIRECT_DIR, 382 OPT_REDIRECT_DIR,
374 OPT_INDEX_ON, 383 OPT_INDEX_ON,
375 OPT_INDEX_OFF, 384 OPT_INDEX_OFF,
385 OPT_NFS_EXPORT_ON,
386 OPT_NFS_EXPORT_OFF,
376 OPT_ERR, 387 OPT_ERR,
377}; 388};
378 389
@@ -384,6 +395,8 @@ static const match_table_t ovl_tokens = {
384 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 395 {OPT_REDIRECT_DIR, "redirect_dir=%s"},
385 {OPT_INDEX_ON, "index=on"}, 396 {OPT_INDEX_ON, "index=on"},
386 {OPT_INDEX_OFF, "index=off"}, 397 {OPT_INDEX_OFF, "index=off"},
398 {OPT_NFS_EXPORT_ON, "nfs_export=on"},
399 {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
387 {OPT_ERR, NULL} 400 {OPT_ERR, NULL}
388}; 401};
389 402
@@ -490,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
490 config->index = false; 503 config->index = false;
491 break; 504 break;
492 505
506 case OPT_NFS_EXPORT_ON:
507 config->nfs_export = true;
508 break;
509
510 case OPT_NFS_EXPORT_OFF:
511 config->nfs_export = false;
512 break;
513
493 default: 514 default:
494 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 515 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
495 return -EINVAL; 516 return -EINVAL;
@@ -520,10 +541,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
520 bool retried = false; 541 bool retried = false;
521 bool locked = false; 542 bool locked = false;
522 543
523 err = mnt_want_write(mnt);
524 if (err)
525 goto out_err;
526
527 inode_lock_nested(dir, I_MUTEX_PARENT); 544 inode_lock_nested(dir, I_MUTEX_PARENT);
528 locked = true; 545 locked = true;
529 546
@@ -588,7 +605,6 @@ retry:
588 goto out_err; 605 goto out_err;
589 } 606 }
590out_unlock: 607out_unlock:
591 mnt_drop_write(mnt);
592 if (locked) 608 if (locked)
593 inode_unlock(dir); 609 inode_unlock(dir);
594 610
@@ -700,12 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path,
700 *remote = true; 716 *remote = true;
701 717
702 /* 718 /*
703 * The inodes index feature needs to encode and decode file 719 * The inodes index feature and NFS export need to encode and decode
704 * handles, so it requires that all layers support them. 720 * file handles, so they require that all layers support them.
705 */ 721 */
706 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 722 if ((ofs->config.nfs_export ||
723 (ofs->config.index && ofs->config.upperdir)) &&
724 !ovl_can_decode_fh(path->dentry->d_sb)) {
707 ofs->config.index = false; 725 ofs->config.index = false;
708 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 726 ofs->config.nfs_export = false;
727 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
728 name);
709 } 729 }
710 730
711 return 0; 731 return 0;
@@ -929,12 +949,17 @@ out:
929 949
930static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) 950static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
931{ 951{
952 struct vfsmount *mnt = ofs->upper_mnt;
932 struct dentry *temp; 953 struct dentry *temp;
933 int err; 954 int err;
934 955
956 err = mnt_want_write(mnt);
957 if (err)
958 return err;
959
935 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 960 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
936 if (!ofs->workdir) 961 if (!ofs->workdir)
937 return 0; 962 goto out;
938 963
939 /* 964 /*
940 * Upper should support d_type, else whiteouts are visible. Given 965 * Upper should support d_type, else whiteouts are visible. Given
@@ -944,7 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
944 */ 969 */
945 err = ovl_check_d_type_supported(workpath); 970 err = ovl_check_d_type_supported(workpath);
946 if (err < 0) 971 if (err < 0)
947 return err; 972 goto out;
948 973
949 /* 974 /*
950 * We allowed this configuration and don't want to break users over 975 * We allowed this configuration and don't want to break users over
@@ -967,7 +992,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
967 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 992 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
968 if (err) { 993 if (err) {
969 ofs->noxattr = true; 994 ofs->noxattr = true;
970 pr_warn("overlayfs: upper fs does not support xattr.\n"); 995 ofs->config.index = false;
996 pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
997 err = 0;
971 } else { 998 } else {
972 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 999 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
973 } 1000 }
@@ -979,7 +1006,15 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
979 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1006 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
980 } 1007 }
981 1008
982 return 0; 1009 /* NFS export of r/w mount depends on index */
1010 if (ofs->config.nfs_export && !ofs->config.index) {
1011 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1012 ofs->config.nfs_export = false;
1013 }
1014
1015out:
1016 mnt_drop_write(mnt);
1017 return err;
983} 1018}
984 1019
985static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) 1020static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
@@ -1026,11 +1061,16 @@ out:
1026static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, 1061static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1027 struct path *upperpath) 1062 struct path *upperpath)
1028{ 1063{
1064 struct vfsmount *mnt = ofs->upper_mnt;
1029 int err; 1065 int err;
1030 1066
1067 err = mnt_want_write(mnt);
1068 if (err)
1069 return err;
1070
1031 /* Verify lower root is upper root origin */ 1071 /* Verify lower root is upper root origin */
1032 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1072 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1033 false, true); 1073 true);
1034 if (err) { 1074 if (err) {
1035 pr_err("overlayfs: failed to verify upper root origin\n"); 1075 pr_err("overlayfs: failed to verify upper root origin\n");
1036 goto out; 1076 goto out;
@@ -1038,23 +1078,33 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1038 1078
1039 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1079 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1040 if (ofs->indexdir) { 1080 if (ofs->indexdir) {
1041 /* Verify upper root is index dir origin */ 1081 /*
1042 err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, 1082 * Verify upper root is exclusively associated with index dir.
1043 true, true); 1083 * Older kernels stored upper fh in "trusted.overlay.origin"
1084 * xattr. If that xattr exists, verify that it is a match to
1085 * upper dir file handle. In any case, verify or set xattr
1086 * "trusted.overlay.upper" to indicate that index may have
1087 * directory entries.
1088 */
1089 if (ovl_check_origin_xattr(ofs->indexdir)) {
1090 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
1091 upperpath->dentry, true, false);
1092 if (err)
1093 pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
1094 }
1095 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1044 if (err) 1096 if (err)
1045 pr_err("overlayfs: failed to verify index dir origin\n"); 1097 pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1046 1098
1047 /* Cleanup bad/stale/orphan index entries */ 1099 /* Cleanup bad/stale/orphan index entries */
1048 if (!err) 1100 if (!err)
1049 err = ovl_indexdir_cleanup(ofs->indexdir, 1101 err = ovl_indexdir_cleanup(ofs);
1050 ofs->upper_mnt,
1051 oe->lowerstack,
1052 oe->numlower);
1053 } 1102 }
1054 if (err || !ofs->indexdir) 1103 if (err || !ofs->indexdir)
1055 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1104 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1056 1105
1057out: 1106out:
1107 mnt_drop_write(mnt);
1058 return err; 1108 return err;
1059} 1109}
1060 1110
@@ -1094,6 +1144,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
1094 1144
1095 ofs->lower_layers[ofs->numlower].mnt = mnt; 1145 ofs->lower_layers[ofs->numlower].mnt = mnt;
1096 ofs->lower_layers[ofs->numlower].pseudo_dev = dev; 1146 ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
1147 ofs->lower_layers[ofs->numlower].idx = i + 1;
1097 ofs->numlower++; 1148 ofs->numlower++;
1098 1149
1099 /* Check if all lower layers are on same sb */ 1150 /* Check if all lower layers are on same sb */
@@ -1131,6 +1182,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1131 } else if (!ofs->config.upperdir && stacklen == 1) { 1182 } else if (!ofs->config.upperdir && stacklen == 1) {
1132 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1183 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1133 goto out_err; 1184 goto out_err;
1185 } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
1186 ofs->config.redirect_follow) {
1187 pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
1188 ofs->config.nfs_export = false;
1134 } 1189 }
1135 1190
1136 err = -ENOMEM; 1191 err = -ENOMEM;
@@ -1207,6 +1262,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1207 goto out_err; 1262 goto out_err;
1208 1263
1209 ofs->config.index = ovl_index_def; 1264 ofs->config.index = ovl_index_def;
1265 ofs->config.nfs_export = ovl_nfs_export_def;
1210 err = ovl_parse_opt((char *) data, &ofs->config); 1266 err = ovl_parse_opt((char *) data, &ofs->config);
1211 if (err) 1267 if (err)
1212 goto out_err; 1268 goto out_err;
@@ -1257,13 +1313,26 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1257 if (err) 1313 if (err)
1258 goto out_free_oe; 1314 goto out_free_oe;
1259 1315
1260 if (!ofs->indexdir) 1316 /* Force r/o mount with no index dir */
1317 if (!ofs->indexdir) {
1318 dput(ofs->workdir);
1319 ofs->workdir = NULL;
1261 sb->s_flags |= SB_RDONLY; 1320 sb->s_flags |= SB_RDONLY;
1321 }
1322
1262 } 1323 }
1263 1324
1264 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1325 /* Show index=off in /proc/mounts for forced r/o mount */
1265 if (!ofs->indexdir) 1326 if (!ofs->indexdir) {
1266 ofs->config.index = false; 1327 ofs->config.index = false;
1328 if (ofs->upper_mnt && ofs->config.nfs_export) {
1329 pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
1330 ofs->config.nfs_export = false;
1331 }
1332 }
1333
1334 if (ofs->config.nfs_export)
1335 sb->s_export_op = &ovl_export_operations;
1267 1336
1268 /* Never override disk quota limits or use reserved space */ 1337 /* Never override disk quota limits or use reserved space */
1269 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1338 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
@@ -1279,15 +1348,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1279 if (!root_dentry) 1348 if (!root_dentry)
1280 goto out_free_oe; 1349 goto out_free_oe;
1281 1350
1351 root_dentry->d_fsdata = oe;
1352
1282 mntput(upperpath.mnt); 1353 mntput(upperpath.mnt);
1283 if (upperpath.dentry) { 1354 if (upperpath.dentry) {
1284 oe->has_upper = true; 1355 ovl_dentry_set_upper_alias(root_dentry);
1285 if (ovl_is_impuredir(upperpath.dentry)) 1356 if (ovl_is_impuredir(upperpath.dentry))
1286 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1357 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1287 } 1358 }
1288 1359
1289 root_dentry->d_fsdata = oe;
1290
1291 /* Root is always merge -> can have whiteouts */ 1360 /* Root is always merge -> can have whiteouts */
1292 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1361 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1293 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1362 ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index d6bb1c9f5e7a..930784a26623 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb)
63 return ofs->indexdir; 63 return ofs->indexdir;
64} 64}
65 65
66/* Index all files on copy up. For now only enabled for NFS export */
67bool ovl_index_all(struct super_block *sb)
68{
69 struct ovl_fs *ofs = sb->s_fs_info;
70
71 return ofs->config.nfs_export && ofs->config.index;
72}
73
74/* Verify lower origin on lookup. For now only enabled for NFS export */
75bool ovl_verify_lower(struct super_block *sb)
76{
77 struct ovl_fs *ofs = sb->s_fs_info;
78
79 return ofs->config.nfs_export && ofs->config.index;
80}
81
66struct ovl_entry *ovl_alloc_entry(unsigned int numlower) 82struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
67{ 83{
68 size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); 84 size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -194,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
194 OVL_I(inode)->cache = cache; 210 OVL_I(inode)->cache = cache;
195} 211}
196 212
213void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
214{
215 set_bit(flag, &OVL_E(dentry)->flags);
216}
217
218void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
219{
220 clear_bit(flag, &OVL_E(dentry)->flags);
221}
222
223bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
224{
225 return test_bit(flag, &OVL_E(dentry)->flags);
226}
227
197bool ovl_dentry_is_opaque(struct dentry *dentry) 228bool ovl_dentry_is_opaque(struct dentry *dentry)
198{ 229{
199 struct ovl_entry *oe = dentry->d_fsdata; 230 return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
200 return oe->opaque;
201} 231}
202 232
203bool ovl_dentry_is_whiteout(struct dentry *dentry) 233bool ovl_dentry_is_whiteout(struct dentry *dentry)
@@ -207,28 +237,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry)
207 237
208void ovl_dentry_set_opaque(struct dentry *dentry) 238void ovl_dentry_set_opaque(struct dentry *dentry)
209{ 239{
210 struct ovl_entry *oe = dentry->d_fsdata; 240 ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
211
212 oe->opaque = true;
213} 241}
214 242
215/* 243/*
216 * For hard links it's possible for ovl_dentry_upper() to return positive, while 244 * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
217 * there's no actual upper alias for the inode. Copy up code needs to know 245 * to return positive, while there's no actual upper alias for the inode.
218 * about the existence of the upper alias, so it can't use ovl_dentry_upper(). 246 * Copy up code needs to know about the existence of the upper alias, so it
247 * can't use ovl_dentry_upper().
219 */ 248 */
220bool ovl_dentry_has_upper_alias(struct dentry *dentry) 249bool ovl_dentry_has_upper_alias(struct dentry *dentry)
221{ 250{
222 struct ovl_entry *oe = dentry->d_fsdata; 251 return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
223
224 return oe->has_upper;
225} 252}
226 253
227void ovl_dentry_set_upper_alias(struct dentry *dentry) 254void ovl_dentry_set_upper_alias(struct dentry *dentry)
228{ 255{
229 struct ovl_entry *oe = dentry->d_fsdata; 256 ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
230
231 oe->has_upper = true;
232} 257}
233 258
234bool ovl_redirect_dir(struct super_block *sb) 259bool ovl_redirect_dir(struct super_block *sb)
@@ -257,7 +282,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
257 if (upperdentry) 282 if (upperdentry)
258 OVL_I(inode)->__upperdentry = upperdentry; 283 OVL_I(inode)->__upperdentry = upperdentry;
259 if (lowerdentry) 284 if (lowerdentry)
260 OVL_I(inode)->lower = d_inode(lowerdentry); 285 OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
261 286
262 ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); 287 ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
263} 288}
@@ -273,7 +298,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
273 */ 298 */
274 smp_wmb(); 299 smp_wmb();
275 OVL_I(inode)->__upperdentry = upperdentry; 300 OVL_I(inode)->__upperdentry = upperdentry;
276 if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { 301 if (inode_unhashed(inode)) {
277 inode->i_private = upperinode; 302 inode->i_private = upperinode;
278 __insert_inode_hash(inode, (unsigned long) upperinode); 303 __insert_inode_hash(inode, (unsigned long) upperinode);
279 } 304 }
@@ -447,10 +472,32 @@ void ovl_inuse_unlock(struct dentry *dentry)
447 } 472 }
448} 473}
449 474
475/*
476 * Does this overlay dentry need to be indexed on copy up?
477 */
478bool ovl_need_index(struct dentry *dentry)
479{
480 struct dentry *lower = ovl_dentry_lower(dentry);
481
482 if (!lower || !ovl_indexdir(dentry->d_sb))
483 return false;
484
485 /* Index all files for NFS export and consistency verification */
486 if (ovl_index_all(dentry->d_sb))
487 return true;
488
489 /* Index only lower hardlinks on copy up */
490 if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
491 return true;
492
493 return false;
494}
495
450/* Caller must hold OVL_I(inode)->lock */ 496/* Caller must hold OVL_I(inode)->lock */
451static void ovl_cleanup_index(struct dentry *dentry) 497static void ovl_cleanup_index(struct dentry *dentry)
452{ 498{
453 struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; 499 struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
500 struct inode *dir = indexdir->d_inode;
454 struct dentry *lowerdentry = ovl_dentry_lower(dentry); 501 struct dentry *lowerdentry = ovl_dentry_lower(dentry);
455 struct dentry *upperdentry = ovl_dentry_upper(dentry); 502 struct dentry *upperdentry = ovl_dentry_upper(dentry);
456 struct dentry *index = NULL; 503 struct dentry *index = NULL;
@@ -463,7 +510,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
463 goto fail; 510 goto fail;
464 511
465 inode = d_inode(upperdentry); 512 inode = d_inode(upperdentry);
466 if (inode->i_nlink != 1) { 513 if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
467 pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", 514 pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
468 upperdentry, inode->i_ino, inode->i_nlink); 515 upperdentry, inode->i_ino, inode->i_nlink);
469 /* 516 /*
@@ -481,13 +528,17 @@ static void ovl_cleanup_index(struct dentry *dentry)
481 } 528 }
482 529
483 inode_lock_nested(dir, I_MUTEX_PARENT); 530 inode_lock_nested(dir, I_MUTEX_PARENT);
484 /* TODO: whiteout instead of cleanup to block future open by handle */ 531 index = lookup_one_len(name.name, indexdir, name.len);
485 index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
486 err = PTR_ERR(index); 532 err = PTR_ERR(index);
487 if (!IS_ERR(index)) 533 if (IS_ERR(index)) {
488 err = ovl_cleanup(dir, index);
489 else
490 index = NULL; 534 index = NULL;
535 } else if (ovl_index_all(dentry->d_sb)) {
536 /* Whiteout orphan index to block future open by handle */
537 err = ovl_cleanup_and_whiteout(indexdir, dir, index);
538 } else {
539 /* Cleanup orphan index entries */
540 err = ovl_cleanup(dir, index);
541 }
491 542
492 inode_unlock(dir); 543 inode_unlock(dir);
493 if (err) 544 if (err)
@@ -512,16 +563,16 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
512 const struct cred *old_cred; 563 const struct cred *old_cred;
513 int err; 564 int err;
514 565
515 if (!d_inode(dentry) || d_is_dir(dentry)) 566 if (!d_inode(dentry))
516 return 0; 567 return 0;
517 568
518 /* 569 /*
519 * With inodes index is enabled, we store the union overlay nlink 570 * With inodes index is enabled, we store the union overlay nlink
520 * in an xattr on the index inode. When whiting out lower hardlinks 571 * in an xattr on the index inode. When whiting out an indexed lower,
521 * we need to decrement the overlay persistent nlink, but before the 572 * we need to decrement the overlay persistent nlink, but before the
522 * first copy up, we have no upper index inode to store the xattr. 573 * first copy up, we have no upper index inode to store the xattr.
523 * 574 *
524 * As a workaround, before whiteout/rename over of a lower hardlink, 575 * As a workaround, before whiteout/rename over an indexed lower,
525 * copy up to create the upper index. Creating the upper index will 576 * copy up to create the upper index. Creating the upper index will
526 * initialize the overlay nlink, so it could be dropped if unlink 577 * initialize the overlay nlink, so it could be dropped if unlink
527 * or rename succeeds. 578 * or rename succeeds.
@@ -529,8 +580,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
529 * TODO: implement metadata only index copy up when called with 580 * TODO: implement metadata only index copy up when called with
530 * ovl_copy_up_flags(dentry, O_PATH). 581 * ovl_copy_up_flags(dentry, O_PATH).
531 */ 582 */
532 if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && 583 if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
533 d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
534 err = ovl_copy_up(dentry); 584 err = ovl_copy_up(dentry);
535 if (err) 585 if (err)
536 return err; 586 return err;
@@ -540,7 +590,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
540 if (err) 590 if (err)
541 return err; 591 return err;
542 592
543 if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) 593 if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
544 goto out; 594 goto out;
545 595
546 old_cred = ovl_override_creds(dentry->d_sb); 596 old_cred = ovl_override_creds(dentry->d_sb);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 65cd8ab60b7a..82a99d366aec 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -227,6 +227,7 @@ extern seqlock_t rename_lock;
227 */ 227 */
228extern void d_instantiate(struct dentry *, struct inode *); 228extern void d_instantiate(struct dentry *, struct inode *);
229extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); 229extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
230extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
230extern int d_instantiate_no_diralias(struct dentry *, struct inode *); 231extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
231extern void __d_drop(struct dentry *dentry); 232extern void __d_drop(struct dentry *dentry);
232extern void d_drop(struct dentry *dentry); 233extern void d_drop(struct dentry *dentry);
@@ -235,6 +236,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
235 236
236/* allocate/de-allocate */ 237/* allocate/de-allocate */
237extern struct dentry * d_alloc(struct dentry *, const struct qstr *); 238extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
239extern struct dentry * d_alloc_anon(struct super_block *);
238extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); 240extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
239extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, 241extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
240 wait_queue_head_t *); 242 wait_queue_head_t *);