summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 19:59:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 19:59:14 -0400
commitf60c55a94e1d127186566f06294f2dadd966e9b4 (patch)
tree2d3dbd572c0096d24f87f581194563ff76e07a6e
parent734d1ed83e1f9b7bafb650033fb87c657858cf5b (diff)
parent95ae251fe82838b85c6d37e5a1775006e2a42ae0 (diff)
Merge tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt
Pull fs-verity support from Eric Biggers: "fs-verity is a filesystem feature that provides Merkle tree based hashing (similar to dm-verity) for individual readonly files, mainly for the purpose of efficient authenticity verification. This pull request includes: (a) The fs/verity/ support layer and documentation. (b) fs-verity support for ext4 and f2fs. Compared to the original fs-verity patchset from last year, the UAPI to enable fs-verity on a file has been greatly simplified. Lots of other things were cleaned up too. fs-verity is planned to be used by two different projects on Android; most of the userspace code is in place already. Another userspace tool ("fsverity-utils"), and xfstests, are also available. e2fsprogs and f2fs-tools already have fs-verity support. Other people have shown interest in using fs-verity too. I've tested this on ext4 and f2fs with xfstests, both the existing tests and the new fs-verity tests. This has also been in linux-next since July 30 with no reported issues except a couple minor ones I found myself and folded in fixes for. Ted and I will be co-maintaining fs-verity" * tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt: f2fs: add fs-verity support ext4: update on-disk format documentation for fs-verity ext4: add fs-verity read support ext4: add basic fs-verity support fs-verity: support builtin file signatures fs-verity: add SHA-512 support fs-verity: implement FS_IOC_MEASURE_VERITY ioctl fs-verity: implement FS_IOC_ENABLE_VERITY ioctl fs-verity: add data verification hooks for ->readpages() fs-verity: add the hook for file ->setattr() fs-verity: add the hook for file ->open() fs-verity: add inode and superblock fields fs-verity: add Kconfig and the helper functions for hashing fs: uapi: define verity bit for FS_IOC_GETFLAGS fs-verity: add UAPI header fs-verity: add MAINTAINERS file entry fs-verity: add a documentation file
-rw-r--r--Documentation/filesystems/ext4/inodes.rst6
-rw-r--r--Documentation/filesystems/ext4/overview.rst1
-rw-r--r--Documentation/filesystems/ext4/super.rst2
-rw-r--r--Documentation/filesystems/ext4/verity.rst41
-rw-r--r--Documentation/filesystems/fsverity.rst726
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--Documentation/ioctl/ioctl-number.rst1
-rw-r--r--MAINTAINERS12
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/ext4/Makefile1
-rw-r--r--fs/ext4/ext4.h23
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/readpage.c211
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/ext4/verity.c367
-rw-r--r--fs/f2fs/Makefile1
-rw-r--r--fs/f2fs/data.c75
-rw-r--r--fs/f2fs/f2fs.h20
-rw-r--r--fs/f2fs/file.c43
-rw-r--r--fs/f2fs/inode.c5
-rw-r--r--fs/f2fs/super.c3
-rw-r--r--fs/f2fs/sysfs.c11
-rw-r--r--fs/f2fs/verity.c247
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/verity/Kconfig55
-rw-r--r--fs/verity/Makefile10
-rw-r--r--fs/verity/enable.c377
-rw-r--r--fs/verity/fsverity_private.h185
-rw-r--r--fs/verity/hash_algs.c280
-rw-r--r--fs/verity/init.c61
-rw-r--r--fs/verity/measure.c57
-rw-r--r--fs/verity/open.c356
-rw-r--r--fs/verity/signature.c157
-rw-r--r--fs/verity/verify.c281
-rw-r--r--include/linux/fs.h11
-rw-r--r--include/linux/fsverity.h211
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/fsverity.h40
42 files changed, 3910 insertions, 70 deletions
diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst
index 6bd35e506b6f..e851e6ca31fa 100644
--- a/Documentation/filesystems/ext4/inodes.rst
+++ b/Documentation/filesystems/ext4/inodes.rst
@@ -277,6 +277,8 @@ The ``i_flags`` field is a combination of these values:
277 - This is a huge file (EXT4\_HUGE\_FILE\_FL). 277 - This is a huge file (EXT4\_HUGE\_FILE\_FL).
278 * - 0x80000 278 * - 0x80000
279 - Inode uses extents (EXT4\_EXTENTS\_FL). 279 - Inode uses extents (EXT4\_EXTENTS\_FL).
280 * - 0x100000
281 - Verity protected file (EXT4\_VERITY\_FL).
280 * - 0x200000 282 * - 0x200000
281 - Inode stores a large extended attribute value in its data blocks 283 - Inode stores a large extended attribute value in its data blocks
282 (EXT4\_EA\_INODE\_FL). 284 (EXT4\_EA\_INODE\_FL).
@@ -299,9 +301,9 @@ The ``i_flags`` field is a combination of these values:
299 - Reserved for ext4 library (EXT4\_RESERVED\_FL). 301 - Reserved for ext4 library (EXT4\_RESERVED\_FL).
300 * - 302 * -
301 - Aggregate flags: 303 - Aggregate flags:
302 * - 0x4BDFFF 304 * - 0x705BDFFF
303 - User-visible flags. 305 - User-visible flags.
304 * - 0x4B80FF 306 * - 0x604BC0FF
305 - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and 307 - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and
306 EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's 308 EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's
307 EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of 309 EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of
diff --git a/Documentation/filesystems/ext4/overview.rst b/Documentation/filesystems/ext4/overview.rst
index cbab18baba12..123ebfde47ee 100644
--- a/Documentation/filesystems/ext4/overview.rst
+++ b/Documentation/filesystems/ext4/overview.rst
@@ -24,3 +24,4 @@ order.
24.. include:: bigalloc.rst 24.. include:: bigalloc.rst
25.. include:: inlinedata.rst 25.. include:: inlinedata.rst
26.. include:: eainode.rst 26.. include:: eainode.rst
27.. include:: verity.rst
diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst
index 04ff079a2acf..6eae92054827 100644
--- a/Documentation/filesystems/ext4/super.rst
+++ b/Documentation/filesystems/ext4/super.rst
@@ -696,6 +696,8 @@ the following:
696 (RO\_COMPAT\_READONLY) 696 (RO\_COMPAT\_READONLY)
697 * - 0x2000 697 * - 0x2000
698 - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT) 698 - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
699 * - 0x8000
700 - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
699 701
700.. _super_def_hash: 702.. _super_def_hash:
701 703
diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst
new file mode 100644
index 000000000000..3e4c0ee0e068
--- /dev/null
+++ b/Documentation/filesystems/ext4/verity.rst
@@ -0,0 +1,41 @@
1.. SPDX-License-Identifier: GPL-2.0
2
3Verity files
4------------
5
6ext4 supports fs-verity, which is a filesystem feature that provides
7Merkle tree based hashing for individual readonly files. Most of
8fs-verity is common to all filesystems that support it; see
9:ref:`Documentation/filesystems/fsverity.rst <fsverity>` for the
10fs-verity documentation. However, the on-disk layout of the verity
11metadata is filesystem-specific. On ext4, the verity metadata is
12stored after the end of the file data itself, in the following format:
13
14- Zero-padding to the next 65536-byte boundary. This padding need not
15 actually be allocated on-disk, i.e. it may be a hole.
16
17- The Merkle tree, as documented in
18 :ref:`Documentation/filesystems/fsverity.rst
19 <fsverity_merkle_tree>`, with the tree levels stored in order from
20 root to leaf, and the tree blocks within each level stored in their
21 natural order.
22
23- Zero-padding to the next filesystem block boundary.
24
25- The verity descriptor, as documented in
26 :ref:`Documentation/filesystems/fsverity.rst <fsverity_descriptor>`,
27 with optionally appended signature blob.
28
29- Zero-padding to the next offset that is 4 bytes before a filesystem
30 block boundary.
31
32- The size of the verity descriptor in bytes, as a 4-byte little
33 endian integer.
34
35Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e.
36EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear.
37They can have EXT4_ENCRYPT_FL set, in which case the verity metadata
38is encrypted as well as the data itself.
39
40Verity files cannot have blocks allocated past the end of the verity
41metadata.
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
new file mode 100644
index 000000000000..42a0b6dd9e0b
--- /dev/null
+++ b/Documentation/filesystems/fsverity.rst
@@ -0,0 +1,726 @@
1.. SPDX-License-Identifier: GPL-2.0
2
3.. _fsverity:
4
5=======================================================
6fs-verity: read-only file-based authenticity protection
7=======================================================
8
9Introduction
10============
11
12fs-verity (``fs/verity/``) is a support layer that filesystems can
13hook into to support transparent integrity and authenticity protection
14of read-only files. Currently, it is supported by the ext4 and f2fs
15filesystems. Like fscrypt, not too much filesystem-specific code is
16needed to support fs-verity.
17
18fs-verity is similar to `dm-verity
19<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
20but works on files rather than block devices. On regular files on
21filesystems supporting fs-verity, userspace can execute an ioctl that
22causes the filesystem to build a Merkle tree for the file and persist
23it to a filesystem-specific location associated with the file.
24
25After this, the file is made readonly, and all reads from the file are
26automatically verified against the file's Merkle tree. Reads of any
27corrupted data, including mmap reads, will fail.
28
29Userspace can use another ioctl to retrieve the root hash (actually
30the "file measurement", which is a hash that includes the root hash)
31that fs-verity is enforcing for the file. This ioctl executes in
32constant time, regardless of the file size.
33
34fs-verity is essentially a way to hash a file in constant time,
35subject to the caveat that reads which would violate the hash will
36fail at runtime.
37
38Use cases
39=========
40
41By itself, the base fs-verity feature only provides integrity
42protection, i.e. detection of accidental (non-malicious) corruption.
43
44However, because fs-verity makes retrieving the file hash extremely
45efficient, it's primarily meant to be used as a tool to support
46authentication (detection of malicious modifications) or auditing
47(logging file hashes before use).
48
49Trusted userspace code (e.g. operating system code running on a
50read-only partition that is itself authenticated by dm-verity) can
51authenticate the contents of an fs-verity file by using the
52`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
53digital signature of it.
54
55A standard file hash could be used instead of fs-verity. However,
56this is inefficient if the file is large and only a small portion may
57be accessed. This is often the case for Android application package
58(APK) files, for example. These typically contain many translations,
59classes, and other resources that are infrequently or even never
60accessed on a particular device. It would be slow and wasteful to
61read and hash the entire file before starting the application.
62
63Unlike an ahead-of-time hash, fs-verity also re-verifies data each
64time it's paged in. This ensures that malicious disk firmware can't
65undetectably change the contents of the file at runtime.
66
67fs-verity does not replace or obsolete dm-verity. dm-verity should
68still be used on read-only filesystems. fs-verity is for files that
69must live on a read-write filesystem because they are independently
70updated and potentially user-installed, so dm-verity cannot be used.
71
72The base fs-verity feature is a hashing mechanism only; actually
73authenticating the files is up to userspace. However, to meet some
74users' needs, fs-verity optionally supports a simple signature
75verification mechanism where users can configure the kernel to require
76that all fs-verity files be signed by a key loaded into a keyring; see
77`Built-in signature verification`_. Support for fs-verity file hashes
78in IMA (Integrity Measurement Architecture) policies is also planned.
79
80User API
81========
82
83FS_IOC_ENABLE_VERITY
84--------------------
85
86The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
87in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
88follows::
89
90 struct fsverity_enable_arg {
91 __u32 version;
92 __u32 hash_algorithm;
93 __u32 block_size;
94 __u32 salt_size;
95 __u64 salt_ptr;
96 __u32 sig_size;
97 __u32 __reserved1;
98 __u64 sig_ptr;
99 __u64 __reserved2[11];
100 };
101
102This structure contains the parameters of the Merkle tree to build for
103the file, and optionally contains a signature. It must be initialized
104as follows:
105
106- ``version`` must be 1.
107- ``hash_algorithm`` must be the identifier for the hash algorithm to
108 use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
109 ``include/uapi/linux/fsverity.h`` for the list of possible values.
110- ``block_size`` must be the Merkle tree block size. Currently, this
111 must be equal to the system page size, which is usually 4096 bytes.
112 Other sizes may be supported in the future. This value is not
113 necessarily the same as the filesystem block size.
114- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
115 provided. The salt is a value that is prepended to every hashed
116 block; it can be used to personalize the hashing for a particular
117 file or device. Currently the maximum salt size is 32 bytes.
118- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
119 provided.
120- ``sig_size`` is the size of the signature in bytes, or 0 if no
121 signature is provided. Currently the signature is (somewhat
122 arbitrarily) limited to 16128 bytes. See `Built-in signature
123 verification`_ for more information.
124- ``sig_ptr`` is the pointer to the signature, or NULL if no
125 signature is provided.
126- All reserved fields must be zeroed.
127
128FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
129the file and persist it to a filesystem-specific location associated
130with the file, then mark the file as a verity file. This ioctl may
131take a long time to execute on large files, and it is interruptible by
132fatal signals.
133
134FS_IOC_ENABLE_VERITY checks for write access to the inode. However,
135it must be executed on an O_RDONLY file descriptor and no processes
136can have the file open for writing. Attempts to open the file for
137writing while this ioctl is executing will fail with ETXTBSY. (This
138is necessary to guarantee that no writable file descriptors will exist
139after verity is enabled, and to guarantee that the file's contents are
140stable while the Merkle tree is being built over it.)
141
142On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
143verity file. On failure (including the case of interruption by a
144fatal signal), no changes are made to the file.
145
146FS_IOC_ENABLE_VERITY can fail with the following errors:
147
148- ``EACCES``: the process does not have write access to the file
149- ``EBADMSG``: the signature is malformed
150- ``EBUSY``: this ioctl is already running on the file
151- ``EEXIST``: the file already has verity enabled
152- ``EFAULT``: the caller provided inaccessible memory
153- ``EINTR``: the operation was interrupted by a fatal signal
154- ``EINVAL``: unsupported version, hash algorithm, or block size; or
155 reserved bits are set; or the file descriptor refers to neither a
156 regular file nor a directory.
157- ``EISDIR``: the file descriptor refers to a directory
158- ``EKEYREJECTED``: the signature doesn't match the file
159- ``EMSGSIZE``: the salt or signature is too long
160- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
161 needed to verify the signature
162- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
163 available in the kernel's crypto API as currently configured (e.g.
164 for SHA-512, missing CONFIG_CRYPTO_SHA512).
165- ``ENOTTY``: this type of filesystem does not implement fs-verity
166- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
167 support; or the filesystem superblock has not had the 'verity'
168 feature enabled on it; or the filesystem does not support fs-verity
169 on this file. (See `Filesystem support`_.)
170- ``EPERM``: the file is append-only; or, a signature is required and
171 one was not provided.
172- ``EROFS``: the filesystem is read-only
173- ``ETXTBSY``: someone has the file open for writing. This can be the
174 caller's file descriptor, another open file descriptor, or the file
175 reference held by a writable memory map.
176
177FS_IOC_MEASURE_VERITY
178---------------------
179
180The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
181file. The file measurement is a digest that cryptographically
182identifies the file contents that are being enforced on reads.
183
184This ioctl takes in a pointer to a variable-length structure::
185
186 struct fsverity_digest {
187 __u16 digest_algorithm;
188 __u16 digest_size; /* input/output */
189 __u8 digest[];
190 };
191
192``digest_size`` is an input/output field. On input, it must be
193initialized to the number of bytes allocated for the variable-length
194``digest`` field.
195
196On success, 0 is returned and the kernel fills in the structure as
197follows:
198
199- ``digest_algorithm`` will be the hash algorithm used for the file
200 measurement. It will match ``fsverity_enable_arg::hash_algorithm``.
201- ``digest_size`` will be the size of the digest in bytes, e.g. 32
202 for SHA-256. (This can be redundant with ``digest_algorithm``.)
203- ``digest`` will be the actual bytes of the digest.
204
205FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
206regardless of the size of the file.
207
208FS_IOC_MEASURE_VERITY can fail with the following errors:
209
210- ``EFAULT``: the caller provided inaccessible memory
211- ``ENODATA``: the file is not a verity file
212- ``ENOTTY``: this type of filesystem does not implement fs-verity
213- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
214 support, or the filesystem superblock has not had the 'verity'
215 feature enabled on it. (See `Filesystem support`_.)
216- ``EOVERFLOW``: the digest is longer than the specified
217 ``digest_size`` bytes. Try providing a larger buffer.
218
219FS_IOC_GETFLAGS
220---------------
221
222The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
223can also be used to check whether a file has fs-verity enabled or not.
224To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
225
226The verity flag is not settable via FS_IOC_SETFLAGS. You must use
227FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
228
229Accessing verity files
230======================
231
232Applications can transparently access a verity file just like a
233non-verity one, with the following exceptions:
234
235- Verity files are readonly. They cannot be opened for writing or
236 truncate()d, even if the file mode bits allow it. Attempts to do
237 one of these things will fail with EPERM. However, changes to
238 metadata such as owner, mode, timestamps, and xattrs are still
239 allowed, since these are not measured by fs-verity. Verity files
240 can also still be renamed, deleted, and linked to.
241
242- Direct I/O is not supported on verity files. Attempts to use direct
243 I/O on such files will fall back to buffered I/O.
244
245- DAX (Direct Access) is not supported on verity files, because this
246 would circumvent the data verification.
247
248- Reads of data that doesn't match the verity Merkle tree will fail
249 with EIO (for read()) or SIGBUS (for mmap() reads).
250
251- If the sysctl "fs.verity.require_signatures" is set to 1 and the
252 file's verity measurement is not signed by a key in the fs-verity
253 keyring, then opening the file will fail. See `Built-in signature
254 verification`_.
255
256Direct access to the Merkle tree is not supported. Therefore, if a
257verity file is copied, or is backed up and restored, then it will lose
258its "verity"-ness. fs-verity is primarily meant for files like
259executables that are managed by a package manager.
260
261File measurement computation
262============================
263
264This section describes how fs-verity hashes the file contents using a
265Merkle tree to produce the "file measurement" which cryptographically
266identifies the file contents. This algorithm is the same for all
267filesystems that support fs-verity.
268
269Userspace only needs to be aware of this algorithm if it needs to
270compute the file measurement itself, e.g. in order to sign the file.
271
272.. _fsverity_merkle_tree:
273
274Merkle tree
275-----------
276
277The file contents is divided into blocks, where the block size is
278configurable but is usually 4096 bytes. The end of the last block is
279zero-padded if needed. Each block is then hashed, producing the first
280level of hashes. Then, the hashes in this first level are grouped
281into 'blocksize'-byte blocks (zero-padding the ends as needed) and
282these blocks are hashed, producing the second level of hashes. This
283proceeds up the tree until only a single block remains. The hash of
284this block is the "Merkle tree root hash".
285
286If the file fits in one block and is nonempty, then the "Merkle tree
287root hash" is simply the hash of the single data block. If the file
288is empty, then the "Merkle tree root hash" is all zeroes.
289
290The "blocks" here are not necessarily the same as "filesystem blocks".
291
292If a salt was specified, then it's zero-padded to the closest multiple
293of the input size of the hash algorithm's compression function, e.g.
29464 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is
295prepended to every data or Merkle tree block that is hashed.
296
297The purpose of the block padding is to cause every hash to be taken
298over the same amount of data, which simplifies the implementation and
299keeps open more possibilities for hardware acceleration. The purpose
300of the salt padding is to make the salting "free" when the salted hash
301state is precomputed, then imported for each hash.
302
303Example: in the recommended configuration of SHA-256 and 4K blocks,
304128 hash values fit in each block. Thus, each level of the Merkle
305tree is approximately 128 times smaller than the previous, and for
306large files the Merkle tree's size converges to approximately 1/127 of
307the original file size. However, for small files, the padding is
308significant, making the space overhead proportionally more.
309
310.. _fsverity_descriptor:
311
312fs-verity descriptor
313--------------------
314
315By itself, the Merkle tree root hash is ambiguous. For example, it
316can't a distinguish a large file from a small second file whose data
317is exactly the top-level hash block of the first file. Ambiguities
318also arise from the convention of padding to the next block boundary.
319
320To solve this problem, the verity file measurement is actually
321computed as a hash of the following structure, which contains the
322Merkle tree root hash as well as other fields such as the file size::
323
324 struct fsverity_descriptor {
325 __u8 version; /* must be 1 */
326 __u8 hash_algorithm; /* Merkle tree hash algorithm */
327 __u8 log_blocksize; /* log2 of size of data and tree blocks */
328 __u8 salt_size; /* size of salt in bytes; 0 if none */
329 __le32 sig_size; /* must be 0 */
330 __le64 data_size; /* size of file the Merkle tree is built over */
331 __u8 root_hash[64]; /* Merkle tree root hash */
332 __u8 salt[32]; /* salt prepended to each hashed block */
333 __u8 __reserved[144]; /* must be 0's */
334 };
335
336Note that the ``sig_size`` field must be set to 0 for the purpose of
337computing the file measurement, even if a signature was provided (or
338will be provided) to `FS_IOC_ENABLE_VERITY`_.
339
340Built-in signature verification
341===============================
342
343With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
344a portion of an authentication policy (see `Use cases`_) in the
345kernel. Specifically, it adds support for:
346
3471. At fs-verity module initialization time, a keyring ".fs-verity" is
348 created. The root user can add trusted X.509 certificates to this
349 keyring using the add_key() system call, then (when done)
350 optionally use keyctl_restrict_keyring() to prevent additional
351 certificates from being added.
352
3532. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
354 detached signature in DER format of the file measurement. On
355 success, this signature is persisted alongside the Merkle tree.
356 Then, any time the file is opened, the kernel will verify the
357 file's actual measurement against this signature, using the
358 certificates in the ".fs-verity" keyring.
359
3603. A new sysctl "fs.verity.require_signatures" is made available.
361 When set to 1, the kernel requires that all verity files have a
362 correctly signed file measurement as described in (2).
363
364File measurements must be signed in the following format, which is
365similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
366
367 struct fsverity_signed_digest {
368 char magic[8]; /* must be "FSVerity" */
369 __le16 digest_algorithm;
370 __le16 digest_size;
371 __u8 digest[];
372 };
373
374fs-verity's built-in signature verification support is meant as a
375relatively simple mechanism that can be used to provide some level of
376authenticity protection for verity files, as an alternative to doing
377the signature verification in userspace or using IMA-appraisal.
378However, with this mechanism, userspace programs still need to check
379that the verity bit is set, and there is no protection against verity
380files being swapped around.
381
382Filesystem support
383==================
384
385fs-verity is currently supported by the ext4 and f2fs filesystems.
386The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
387on either filesystem.
388
389``include/linux/fsverity.h`` declares the interface between the
390``fs/verity/`` support layer and filesystems. Briefly, filesystems
391must provide an ``fsverity_operations`` structure that provides
392methods to read and write the verity metadata to a filesystem-specific
393location, including the Merkle tree blocks and
394``fsverity_descriptor``. Filesystems must also call functions in
395``fs/verity/`` at certain times, such as when a file is opened or when
396pages have been read into the pagecache. (See `Verifying data`_.)
397
398ext4
399----
400
401ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
402
403To create verity files on an ext4 filesystem, the filesystem must have
404been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
405it. "verity" is an RO_COMPAT filesystem feature, so once set, old
406kernels will only be able to mount the filesystem readonly, and old
407versions of e2fsck will be unable to check the filesystem. Moreover,
408currently ext4 only supports mounting a filesystem with the "verity"
409feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
410
411ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
412can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
413
414ext4 also supports encryption, which can be used simultaneously with
415fs-verity. In this case, the plaintext data is verified rather than
416the ciphertext. This is necessary in order to make the file
417measurement meaningful, since every file is encrypted differently.
418
419ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
420past the end of the file, starting at the first 64K boundary beyond
421i_size. This approach works because (a) verity files are readonly,
422and (b) pages fully beyond i_size aren't visible to userspace but can
423be read/written internally by ext4 with only some relatively small
424changes to ext4. This approach avoids having to depend on the
425EA_INODE feature and on rearchitecturing ext4's xattr support to
426support paging multi-gigabyte xattrs into memory, and to support
427encrypting xattrs. Note that the verity metadata *must* be encrypted
428when the file is, since it contains hashes of the plaintext data.
429
430Currently, ext4 verity only supports the case where the Merkle tree
431block size, filesystem block size, and page size are all the same. It
432also only supports extent-based files.
433
434f2fs
435----
436
437f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
438
439To create verity files on an f2fs filesystem, the filesystem must have
440been formatted with ``-O verity``.
441
442f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
443It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
444cleared.
445
446Like ext4, f2fs stores the verity metadata (Merkle tree and
447fsverity_descriptor) past the end of the file, starting at the first
44864K boundary beyond i_size. See explanation for ext4 above.
449Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
450which wouldn't be enough for even a single Merkle tree block.
451
452Currently, f2fs verity only supports a Merkle tree block size of 4096.
453Also, f2fs doesn't support enabling verity on files that currently
454have atomic or volatile writes pending.
455
456Implementation details
457======================
458
459Verifying data
460--------------
461
462fs-verity ensures that all reads of a verity file's data are verified,
463regardless of which syscall is used to do the read (e.g. mmap(),
464read(), pread()) and regardless of whether it's the first read or a
465later read (unless the later read can return cached data that was
466already verified). Below, we describe how filesystems implement this.
467
468Pagecache
469~~~~~~~~~
470
471For filesystems using Linux's pagecache, the ``->readpage()`` and
472``->readpages()`` methods must be modified to verify pages before they
473are marked Uptodate. Merely hooking ``->read_iter()`` would be
474insufficient, since ``->read_iter()`` is not used for memory maps.
475
476Therefore, fs/verity/ provides a function fsverity_verify_page() which
477verifies a page that has been read into the pagecache of a verity
478inode, but is still locked and not Uptodate, so it's not yet readable
479by userspace. As needed to do the verification,
480fsverity_verify_page() will call back into the filesystem to read
481Merkle tree pages via fsverity_operations::read_merkle_tree_page().
482
483fsverity_verify_page() returns false if verification failed; in this
484case, the filesystem must not set the page Uptodate. Following this,
485as per the usual Linux pagecache behavior, attempts by userspace to
486read() from the part of the file containing the page will fail with
487EIO, and accesses to the page within a memory map will raise SIGBUS.
488
489fsverity_verify_page() currently only supports the case where the
490Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
491
492In principle, fsverity_verify_page() verifies the entire path in the
493Merkle tree from the data page to the root hash. However, for
494efficiency the filesystem may cache the hash pages. Therefore,
495fsverity_verify_page() only ascends the tree reading hash pages until
496an already-verified hash page is seen, as indicated by the PageChecked
497bit being set. It then verifies the path to that page.
498
499This optimization, which is also used by dm-verity, results in
500excellent sequential read performance. This is because usually (e.g.
501127 in 128 times for 4K blocks and SHA-256) the hash page from the
502bottom level of the tree will already be cached and checked from
503reading a previous data page. However, random reads perform worse.
504
505Block device based filesystems
506~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
507
508Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
509the pagecache, so the above subsection applies too. However, they
510also usually read many pages from a file at once, grouped into a
511structure called a "bio". To make it easier for these types of
512filesystems to support fs-verity, fs/verity/ also provides a function
513fsverity_verify_bio() which verifies all pages in a bio.
514
515ext4 and f2fs also support encryption. If a verity file is also
516encrypted, the pages must be decrypted before being verified. To
517support this, these filesystems allocate a "post-read context" for
518each bio and store it in ``->bi_private``::
519
520 struct bio_post_read_ctx {
521 struct bio *bio;
522 struct work_struct work;
523 unsigned int cur_step;
524 unsigned int enabled_steps;
525 };
526
527``enabled_steps`` is a bitmask that specifies whether decryption,
528verity, or both is enabled. After the bio completes, for each needed
529postprocessing step the filesystem enqueues the bio_post_read_ctx on a
530workqueue, and then the workqueue work does the decryption or
531verification. Finally, pages where no decryption or verity error
532occurred are marked Uptodate, and the pages are unlocked.
533
534Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
535simply zeroes holes and sets the corresponding pages Uptodate; no bios
536are issued. To prevent this case from bypassing fs-verity, these
537filesystems use fsverity_verify_page() to verify hole pages.
538
539ext4 and f2fs disable direct I/O on verity files, since otherwise
540direct I/O would bypass fs-verity. (They also do the same for
541encrypted files.)
542
543Userspace utility
544=================
545
546This document focuses on the kernel, but a userspace utility for
547fs-verity can be found at:
548
549 https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
550
551See the README.md file in the fsverity-utils source tree for details,
552including examples of setting up fs-verity protected files.
553
554Tests
555=====
556
557To test fs-verity, use xfstests. For example, using `kvm-xfstests
558<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
559
560 kvm-xfstests -c ext4,f2fs -g verity
561
562FAQ
563===
564
565This section answers frequently asked questions about fs-verity that
566weren't already directly answered in other parts of this document.
567
568:Q: Why isn't fs-verity part of IMA?
569:A: fs-verity and IMA (Integrity Measurement Architecture) have
570 different focuses. fs-verity is a filesystem-level mechanism for
571 hashing individual files using a Merkle tree. In contrast, IMA
572 specifies a system-wide policy that specifies which files are
573 hashed and what to do with those hashes, such as log them,
574 authenticate them, or add them to a measurement list.
575
576 IMA is planned to support the fs-verity hashing mechanism as an
577 alternative to doing full file hashes, for people who want the
578 performance and security benefits of the Merkle tree based hash.
579 But it doesn't make sense to force all uses of fs-verity to be
580 through IMA. As a standalone filesystem feature, fs-verity
581 already meets many users' needs, and it's testable like other
582 filesystem features e.g. with xfstests.
583
584:Q: Isn't fs-verity useless because the attacker can just modify the
585 hashes in the Merkle tree, which is stored on-disk?
586:A: To verify the authenticity of an fs-verity file you must verify
587 the authenticity of the "file measurement", which is basically the
588 root hash of the Merkle tree. See `Use cases`_.
589
590:Q: Isn't fs-verity useless because the attacker can just replace a
591 verity file with a non-verity one?
592:A: See `Use cases`_. In the initial use case, it's really trusted
593 userspace code that authenticates the files; fs-verity is just a
594 tool to do this job efficiently and securely. The trusted
595 userspace code will consider non-verity files to be inauthentic.
596
597:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you
598 store just the root hash?
599:A: If the Merkle tree wasn't stored on-disk, then you'd have to
600 compute the entire tree when the file is first accessed, even if
601 just one byte is being read. This is a fundamental consequence of
602 how Merkle tree hashing works. To verify a leaf node, you need to
603 verify the whole path to the root hash, including the root node
604 (the thing which the root hash is a hash of). But if the root
605 node isn't stored on-disk, you have to compute it by hashing its
606 children, and so on until you've actually hashed the entire file.
607
608 That defeats most of the point of doing a Merkle tree-based hash,
609 since if you have to hash the whole file ahead of time anyway,
610 then you could simply do sha256(file) instead. That would be much
611 simpler, and a bit faster too.
612
613 It's true that an in-memory Merkle tree could still provide the
614 advantage of verification on every read rather than just on the
615 first read. However, it would be inefficient because every time a
616 hash page gets evicted (you can't pin the entire Merkle tree into
617 memory, since it may be very large), in order to restore it you
618 again need to hash everything below it in the tree. This again
619 defeats most of the point of doing a Merkle tree-based hash, since
620 a single block read could trigger re-hashing gigabytes of data.
621
622:Q: But couldn't you store just the leaf nodes and compute the rest?
623:A: See previous answer; this really just moves up one level, since
624 one could alternatively interpret the data blocks as being the
625 leaf nodes of the Merkle tree. It's true that the tree can be
626 computed much faster if the leaf level is stored rather than just
627 the data, but that's only because each level is less than 1% the
628 size of the level below (assuming the recommended settings of
629 SHA-256 and 4K blocks). For the exact same reason, by storing
630 "just the leaf nodes" you'd already be storing over 99% of the
631 tree, so you might as well simply store the whole tree.
632
633:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
634 part of a package that is installed to many computers?
635:A: This isn't currently supported. It was part of the original
636 design, but was removed to simplify the kernel UAPI and because it
637 wasn't a critical use case. Files are usually installed once and
638 used many times, and cryptographic hashing is somewhat fast on
639 most modern processors.
640
641:Q: Why doesn't fs-verity support writes?
642:A: Write support would be very difficult and would require a
643 completely different design, so it's well outside the scope of
644 fs-verity. Write support would require:
645
646 - A way to maintain consistency between the data and hashes,
647 including all levels of hashes, since corruption after a crash
648 (especially of potentially the entire file!) is unacceptable.
649 The main options for solving this are data journalling,
650 copy-on-write, and log-structured volume. But it's very hard to
651 retrofit existing filesystems with new consistency mechanisms.
652 Data journalling is available on ext4, but is very slow.
653
654 - Rebuilding the the Merkle tree after every write, which would be
655 extremely inefficient. Alternatively, a different authenticated
656 dictionary structure such as an "authenticated skiplist" could
657 be used. However, this would be far more complex.
658
659 Compare it to dm-verity vs. dm-integrity. dm-verity is very
660 simple: the kernel just verifies read-only data against a
661 read-only Merkle tree. In contrast, dm-integrity supports writes
662 but is slow, is much more complex, and doesn't actually support
663 full-device authentication since it authenticates each sector
664 independently, i.e. there is no "root hash". It doesn't really
665 make sense for the same device-mapper target to support these two
666 very different cases; the same applies to fs-verity.
667
668:Q: Since verity files are immutable, why isn't the immutable bit set?
669:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
670 specific set of semantics which not only make the file contents
671 read-only, but also prevent the file from being deleted, renamed,
672 linked to, or having its owner or mode changed. These extra
673 properties are unwanted for fs-verity, so reusing the immutable
674 bit isn't appropriate.
675
676:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
677:A: Abusing the xattr interface for basically arbitrary syscalls is
678 heavily frowned upon by most of the Linux filesystem developers.
679 An xattr should really just be an xattr on-disk, not an API to
680 e.g. magically trigger construction of a Merkle tree.
681
682:Q: Does fs-verity support remote filesystems?
683:A: Only ext4 and f2fs support is implemented currently, but in
684 principle any filesystem that can store per-file verity metadata
685 can support fs-verity, regardless of whether it's local or remote.
686 Some filesystems may have fewer options of where to store the
687 verity metadata; one possibility is to store it past the end of
688 the file and "hide" it from userspace by manipulating i_size. The
689 data verification functions provided by ``fs/verity/`` also assume
690 that the filesystem uses the Linux pagecache, but both local and
691 remote filesystems normally do so.
692
693:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity
694 be implemented entirely at the VFS level?
695:A: There are many reasons why this is not possible or would be very
696 difficult, including the following:
697
698 - To prevent bypassing verification, pages must not be marked
699 Uptodate until they've been verified. Currently, each
700 filesystem is responsible for marking pages Uptodate via
701 ``->readpages()``. Therefore, currently it's not possible for
702 the VFS to do the verification on its own. Changing this would
703 require significant changes to the VFS and all filesystems.
704
705 - It would require defining a filesystem-independent way to store
706 the verity metadata. Extended attributes don't work for this
707 because (a) the Merkle tree may be gigabytes, but many
708 filesystems assume that all xattrs fit into a single 4K
709 filesystem block, and (b) ext4 and f2fs encryption doesn't
710 encrypt xattrs, yet the Merkle tree *must* be encrypted when the
711 file contents are, because it stores hashes of the plaintext
712 file contents.
713
714 So the verity metadata would have to be stored in an actual
715 file. Using a separate file would be very ugly, since the
716 metadata is fundamentally part of the file to be protected, and
717 it could cause problems where users could delete the real file
718 but not the metadata file or vice versa. On the other hand,
719 having it be in the same file would break applications unless
720 filesystems' notion of i_size were divorced from the VFS's,
721 which would be complex and require changes to all filesystems.
722
723 - It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
724 transaction mechanism so that either the file ends up with
725 verity enabled, or no changes were made. Allowing intermediate
726 states to occur after a crash may cause problems.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 96653ebefd7e..fd2bcf99cda0 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -36,3 +36,4 @@ filesystem implementations.
36 36
37 journalling 37 journalling
38 fscrypt 38 fscrypt
39 fsverity
diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
index 7f8dcae7a230..bef79cd4c6b4 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -233,6 +233,7 @@ Code Seq# Include File Comments
233'f' 00-0F fs/ext4/ext4.h conflict! 233'f' 00-0F fs/ext4/ext4.h conflict!
234'f' 00-0F linux/fs.h conflict! 234'f' 00-0F linux/fs.h conflict!
235'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict! 235'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict!
236'f' 81-8F linux/fsverity.h
236'g' 00-0F linux/usb/gadgetfs.h 237'g' 00-0F linux/usb/gadgetfs.h
237'g' 20-2F linux/usb/g_printer.h 238'g' 20-2F linux/usb/g_printer.h
238'h' 00-7F conflict! Charon filesystem 239'h' 00-7F conflict! Charon filesystem
diff --git a/MAINTAINERS b/MAINTAINERS
index ce6113999cf8..b2326dece28e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6694,6 +6694,18 @@ S: Maintained
6694F: fs/notify/ 6694F: fs/notify/
6695F: include/linux/fsnotify*.h 6695F: include/linux/fsnotify*.h
6696 6696
6697FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
6698M: Eric Biggers <ebiggers@kernel.org>
6699M: Theodore Y. Ts'o <tytso@mit.edu>
6700L: linux-fscrypt@vger.kernel.org
6701Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
6702T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
6703S: Supported
6704F: fs/verity/
6705F: include/linux/fsverity.h
6706F: include/uapi/linux/fsverity.h
6707F: Documentation/filesystems/fsverity.rst
6708
6697FUJITSU LAPTOP EXTRAS 6709FUJITSU LAPTOP EXTRAS
6698M: Jonathan Woithe <jwoithe@just42.net> 6710M: Jonathan Woithe <jwoithe@just42.net>
6699L: platform-driver-x86@vger.kernel.org 6711L: platform-driver-x86@vger.kernel.org
diff --git a/fs/Kconfig b/fs/Kconfig
index 669d46550e6d..2501e6f1f965 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -112,6 +112,8 @@ config MANDATORY_FILE_LOCKING
112 112
113source "fs/crypto/Kconfig" 113source "fs/crypto/Kconfig"
114 114
115source "fs/verity/Kconfig"
116
115source "fs/notify/Kconfig" 117source "fs/notify/Kconfig"
116 118
117source "fs/quota/Kconfig" 119source "fs/quota/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index b2e4973a0bea..14231b4cf383 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_AIO) += aio.o
34obj-$(CONFIG_IO_URING) += io_uring.o 34obj-$(CONFIG_IO_URING) += io_uring.o
35obj-$(CONFIG_FS_DAX) += dax.o 35obj-$(CONFIG_FS_DAX) += dax.o
36obj-$(CONFIG_FS_ENCRYPTION) += crypto/ 36obj-$(CONFIG_FS_ENCRYPTION) += crypto/
37obj-$(CONFIG_FS_VERITY) += verity/
37obj-$(CONFIG_FILE_LOCKING) += locks.o 38obj-$(CONFIG_FILE_LOCKING) += locks.o
38obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 39obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
39obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 40obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8fdfcd3c3e04..b17ddc229ac5 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
13 13
14ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 14ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
15ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o 15ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
16ext4-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf660aa7a9e0..9c7f4036021b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -41,6 +41,7 @@
41#endif 41#endif
42 42
43#include <linux/fscrypt.h> 43#include <linux/fscrypt.h>
44#include <linux/fsverity.h>
44 45
45#include <linux/compiler.h> 46#include <linux/compiler.h>
46 47
@@ -395,6 +396,7 @@ struct flex_groups {
395#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 396#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
396#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ 397#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
397#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 398#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
399#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
398#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ 400#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
399#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ 401#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
400#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ 402#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
@@ -402,7 +404,7 @@ struct flex_groups {
402#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ 404#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
403#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 405#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
404 406
405#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */ 407#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
406#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ 408#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
407 409
408/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ 410/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
@@ -467,6 +469,7 @@ enum {
467 EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ 469 EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
468 EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ 470 EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
469 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ 471 EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
472 EXT4_INODE_VERITY = 20, /* Verity protected inode */
470 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ 473 EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
471 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ 474 EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
472 EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ 475 EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
@@ -512,6 +515,7 @@ static inline void ext4_check_flag_values(void)
512 CHECK_FLAG_VALUE(TOPDIR); 515 CHECK_FLAG_VALUE(TOPDIR);
513 CHECK_FLAG_VALUE(HUGE_FILE); 516 CHECK_FLAG_VALUE(HUGE_FILE);
514 CHECK_FLAG_VALUE(EXTENTS); 517 CHECK_FLAG_VALUE(EXTENTS);
518 CHECK_FLAG_VALUE(VERITY);
515 CHECK_FLAG_VALUE(EA_INODE); 519 CHECK_FLAG_VALUE(EA_INODE);
516 CHECK_FLAG_VALUE(EOFBLOCKS); 520 CHECK_FLAG_VALUE(EOFBLOCKS);
517 CHECK_FLAG_VALUE(INLINE_DATA); 521 CHECK_FLAG_VALUE(INLINE_DATA);
@@ -1560,6 +1564,7 @@ enum {
1560 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1564 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1561 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ 1565 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
1562 EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ 1566 EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
1567 EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
1563}; 1568};
1564 1569
1565#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1570#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1610,6 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1610#define EXT4_SB(sb) (sb) 1615#define EXT4_SB(sb) (sb)
1611#endif 1616#endif
1612 1617
1618static inline bool ext4_verity_in_progress(struct inode *inode)
1619{
1620 return IS_ENABLED(CONFIG_FS_VERITY) &&
1621 ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
1622}
1623
1613#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime 1624#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
1614 1625
1615/* 1626/*
@@ -1662,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1662#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 1673#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
1663#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 1674#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
1664#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 1675#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000
1676#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000
1665 1677
1666#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1678#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1667#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1679#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1756,6 +1768,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC)
1756EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM) 1768EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM)
1757EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY) 1769EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY)
1758EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT) 1770EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT)
1771EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY)
1759 1772
1760EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION) 1773EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION)
1761EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE) 1774EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE)
@@ -1813,7 +1826,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
1813 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ 1826 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
1814 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ 1827 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
1815 EXT4_FEATURE_RO_COMPAT_QUOTA |\ 1828 EXT4_FEATURE_RO_COMPAT_QUOTA |\
1816 EXT4_FEATURE_RO_COMPAT_PROJECT) 1829 EXT4_FEATURE_RO_COMPAT_PROJECT |\
1830 EXT4_FEATURE_RO_COMPAT_VERITY)
1817 1831
1818#define EXTN_FEATURE_FUNCS(ver) \ 1832#define EXTN_FEATURE_FUNCS(ver) \
1819static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ 1833static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -3177,6 +3191,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
3177extern int ext4_mpage_readpages(struct address_space *mapping, 3191extern int ext4_mpage_readpages(struct address_space *mapping,
3178 struct list_head *pages, struct page *page, 3192 struct list_head *pages, struct page *page,
3179 unsigned nr_pages, bool is_readahead); 3193 unsigned nr_pages, bool is_readahead);
3194extern int __init ext4_init_post_read_processing(void);
3195extern void ext4_exit_post_read_processing(void);
3180 3196
3181/* symlink.c */ 3197/* symlink.c */
3182extern const struct inode_operations ext4_encrypted_symlink_inode_operations; 3198extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
@@ -3283,6 +3299,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
3283/* mmp.c */ 3299/* mmp.c */
3284extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); 3300extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
3285 3301
3302/* verity.c */
3303extern const struct fsverity_operations ext4_verityops;
3304
3286/* 3305/*
3287 * Add new method to test whether block and inode bitmaps are properly 3306 * Add new method to test whether block and inode bitmaps are properly
3288 * initialized. With uninit_bg reading the block from disk is not enough 3307 * initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 70b0438dbc94..b8a20bb9a145 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -457,6 +457,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
457 if (ret) 457 if (ret)
458 return ret; 458 return ret;
459 459
460 ret = fsverity_file_open(inode, filp);
461 if (ret)
462 return ret;
463
460 /* 464 /*
461 * Set up the jbd2_inode if we are opening the inode for 465 * Set up the jbd2_inode if we are opening the inode for
462 * writing and the journal is present 466 * writing and the journal is present
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 006b7a2070bf..d0dc0e3463db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1340,6 +1340,9 @@ retry_journal:
1340 } 1340 }
1341 1341
1342 if (ret) { 1342 if (ret) {
1343 bool extended = (pos + len > inode->i_size) &&
1344 !ext4_verity_in_progress(inode);
1345
1343 unlock_page(page); 1346 unlock_page(page);
1344 /* 1347 /*
1345 * __block_write_begin may have instantiated a few blocks 1348 * __block_write_begin may have instantiated a few blocks
@@ -1349,11 +1352,11 @@ retry_journal:
1349 * Add inode to orphan list in case we crash before 1352 * Add inode to orphan list in case we crash before
1350 * truncate finishes 1353 * truncate finishes
1351 */ 1354 */
1352 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1355 if (extended && ext4_can_truncate(inode))
1353 ext4_orphan_add(handle, inode); 1356 ext4_orphan_add(handle, inode);
1354 1357
1355 ext4_journal_stop(handle); 1358 ext4_journal_stop(handle);
1356 if (pos + len > inode->i_size) { 1359 if (extended) {
1357 ext4_truncate_failed_write(inode); 1360 ext4_truncate_failed_write(inode);
1358 /* 1361 /*
1359 * If truncate failed early the inode might 1362 * If truncate failed early the inode might
@@ -1406,6 +1409,7 @@ static int ext4_write_end(struct file *file,
1406 int ret = 0, ret2; 1409 int ret = 0, ret2;
1407 int i_size_changed = 0; 1410 int i_size_changed = 0;
1408 int inline_data = ext4_has_inline_data(inode); 1411 int inline_data = ext4_has_inline_data(inode);
1412 bool verity = ext4_verity_in_progress(inode);
1409 1413
1410 trace_ext4_write_end(inode, pos, len, copied); 1414 trace_ext4_write_end(inode, pos, len, copied);
1411 if (inline_data) { 1415 if (inline_data) {
@@ -1423,12 +1427,16 @@ static int ext4_write_end(struct file *file,
1423 /* 1427 /*
1424 * it's important to update i_size while still holding page lock: 1428 * it's important to update i_size while still holding page lock:
1425 * page writeout could otherwise come in and zero beyond i_size. 1429 * page writeout could otherwise come in and zero beyond i_size.
1430 *
1431 * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
1432 * blocks are being written past EOF, so skip the i_size update.
1426 */ 1433 */
1427 i_size_changed = ext4_update_inode_size(inode, pos + copied); 1434 if (!verity)
1435 i_size_changed = ext4_update_inode_size(inode, pos + copied);
1428 unlock_page(page); 1436 unlock_page(page);
1429 put_page(page); 1437 put_page(page);
1430 1438
1431 if (old_size < pos) 1439 if (old_size < pos && !verity)
1432 pagecache_isize_extended(inode, old_size, pos); 1440 pagecache_isize_extended(inode, old_size, pos);
1433 /* 1441 /*
1434 * Don't mark the inode dirty under page lock. First, it unnecessarily 1442 * Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1439,7 +1447,7 @@ static int ext4_write_end(struct file *file,
1439 if (i_size_changed || inline_data) 1447 if (i_size_changed || inline_data)
1440 ext4_mark_inode_dirty(handle, inode); 1448 ext4_mark_inode_dirty(handle, inode);
1441 1449
1442 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1450 if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
1443 /* if we have allocated more blocks and copied 1451 /* if we have allocated more blocks and copied
1444 * less. We will have blocks allocated outside 1452 * less. We will have blocks allocated outside
1445 * inode->i_size. So truncate them 1453 * inode->i_size. So truncate them
@@ -1450,7 +1458,7 @@ errout:
1450 if (!ret) 1458 if (!ret)
1451 ret = ret2; 1459 ret = ret2;
1452 1460
1453 if (pos + len > inode->i_size) { 1461 if (pos + len > inode->i_size && !verity) {
1454 ext4_truncate_failed_write(inode); 1462 ext4_truncate_failed_write(inode);
1455 /* 1463 /*
1456 * If truncate failed early the inode might still be 1464 * If truncate failed early the inode might still be
@@ -1511,6 +1519,7 @@ static int ext4_journalled_write_end(struct file *file,
1511 unsigned from, to; 1519 unsigned from, to;
1512 int size_changed = 0; 1520 int size_changed = 0;
1513 int inline_data = ext4_has_inline_data(inode); 1521 int inline_data = ext4_has_inline_data(inode);
1522 bool verity = ext4_verity_in_progress(inode);
1514 1523
1515 trace_ext4_journalled_write_end(inode, pos, len, copied); 1524 trace_ext4_journalled_write_end(inode, pos, len, copied);
1516 from = pos & (PAGE_SIZE - 1); 1525 from = pos & (PAGE_SIZE - 1);
@@ -1540,13 +1549,14 @@ static int ext4_journalled_write_end(struct file *file,
1540 if (!partial) 1549 if (!partial)
1541 SetPageUptodate(page); 1550 SetPageUptodate(page);
1542 } 1551 }
1543 size_changed = ext4_update_inode_size(inode, pos + copied); 1552 if (!verity)
1553 size_changed = ext4_update_inode_size(inode, pos + copied);
1544 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1554 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1545 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1555 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1546 unlock_page(page); 1556 unlock_page(page);
1547 put_page(page); 1557 put_page(page);
1548 1558
1549 if (old_size < pos) 1559 if (old_size < pos && !verity)
1550 pagecache_isize_extended(inode, old_size, pos); 1560 pagecache_isize_extended(inode, old_size, pos);
1551 1561
1552 if (size_changed || inline_data) { 1562 if (size_changed || inline_data) {
@@ -1555,7 +1565,7 @@ static int ext4_journalled_write_end(struct file *file,
1555 ret = ret2; 1565 ret = ret2;
1556 } 1566 }
1557 1567
1558 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1568 if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
1559 /* if we have allocated more blocks and copied 1569 /* if we have allocated more blocks and copied
1560 * less. We will have blocks allocated outside 1570 * less. We will have blocks allocated outside
1561 * inode->i_size. So truncate them 1571 * inode->i_size. So truncate them
@@ -1566,7 +1576,7 @@ errout:
1566 ret2 = ext4_journal_stop(handle); 1576 ret2 = ext4_journal_stop(handle);
1567 if (!ret) 1577 if (!ret)
1568 ret = ret2; 1578 ret = ret2;
1569 if (pos + len > inode->i_size) { 1579 if (pos + len > inode->i_size && !verity) {
1570 ext4_truncate_failed_write(inode); 1580 ext4_truncate_failed_write(inode);
1571 /* 1581 /*
1572 * If truncate failed early the inode might still be 1582 * If truncate failed early the inode might still be
@@ -2162,7 +2172,8 @@ static int ext4_writepage(struct page *page,
2162 2172
2163 trace_ext4_writepage(page); 2173 trace_ext4_writepage(page);
2164 size = i_size_read(inode); 2174 size = i_size_read(inode);
2165 if (page->index == size >> PAGE_SHIFT) 2175 if (page->index == size >> PAGE_SHIFT &&
2176 !ext4_verity_in_progress(inode))
2166 len = size & ~PAGE_MASK; 2177 len = size & ~PAGE_MASK;
2167 else 2178 else
2168 len = PAGE_SIZE; 2179 len = PAGE_SIZE;
@@ -2246,7 +2257,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
2246 * after page tables are updated. 2257 * after page tables are updated.
2247 */ 2258 */
2248 size = i_size_read(mpd->inode); 2259 size = i_size_read(mpd->inode);
2249 if (page->index == size >> PAGE_SHIFT) 2260 if (page->index == size >> PAGE_SHIFT &&
2261 !ext4_verity_in_progress(mpd->inode))
2250 len = size & ~PAGE_MASK; 2262 len = size & ~PAGE_MASK;
2251 else 2263 else
2252 len = PAGE_SIZE; 2264 len = PAGE_SIZE;
@@ -2345,6 +2357,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
2345 ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) 2357 ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
2346 >> inode->i_blkbits; 2358 >> inode->i_blkbits;
2347 2359
2360 if (ext4_verity_in_progress(inode))
2361 blocks = EXT_MAX_BLOCKS;
2362
2348 do { 2363 do {
2349 BUG_ON(buffer_locked(bh)); 2364 BUG_ON(buffer_locked(bh));
2350 2365
@@ -3061,8 +3076,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3061 3076
3062 index = pos >> PAGE_SHIFT; 3077 index = pos >> PAGE_SHIFT;
3063 3078
3064 if (ext4_nonda_switch(inode->i_sb) || 3079 if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
3065 S_ISLNK(inode->i_mode)) { 3080 ext4_verity_in_progress(inode)) {
3066 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 3081 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
3067 return ext4_write_begin(file, mapping, pos, 3082 return ext4_write_begin(file, mapping, pos,
3068 len, flags, pagep, fsdata); 3083 len, flags, pagep, fsdata);
@@ -3897,6 +3912,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3897 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) 3912 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
3898 return 0; 3913 return 0;
3899#endif 3914#endif
3915 if (fsverity_active(inode))
3916 return 0;
3900 3917
3901 /* 3918 /*
3902 * If we are doing data journalling we don't support O_DIRECT 3919 * If we are doing data journalling we don't support O_DIRECT
@@ -4736,6 +4753,8 @@ static bool ext4_should_use_dax(struct inode *inode)
4736 return false; 4753 return false;
4737 if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT)) 4754 if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
4738 return false; 4755 return false;
4756 if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
4757 return false;
4739 return true; 4758 return true;
4740} 4759}
4741 4760
@@ -4760,9 +4779,11 @@ void ext4_set_inode_flags(struct inode *inode)
4760 new_fl |= S_ENCRYPTED; 4779 new_fl |= S_ENCRYPTED;
4761 if (flags & EXT4_CASEFOLD_FL) 4780 if (flags & EXT4_CASEFOLD_FL)
4762 new_fl |= S_CASEFOLD; 4781 new_fl |= S_CASEFOLD;
4782 if (flags & EXT4_VERITY_FL)
4783 new_fl |= S_VERITY;
4763 inode_set_flags(inode, new_fl, 4784 inode_set_flags(inode, new_fl,
4764 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| 4785 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
4765 S_ENCRYPTED|S_CASEFOLD); 4786 S_ENCRYPTED|S_CASEFOLD|S_VERITY);
4766} 4787}
4767 4788
4768static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4789static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5552,6 +5573,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5552 if (error) 5573 if (error)
5553 return error; 5574 return error;
5554 5575
5576 error = fsverity_prepare_setattr(dentry, attr);
5577 if (error)
5578 return error;
5579
5555 if (is_quota_modification(inode, attr)) { 5580 if (is_quota_modification(inode, attr)) {
5556 error = dquot_initialize(inode); 5581 error = dquot_initialize(inode);
5557 if (error) 5582 if (error)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5703d607f5af..5444d49cbf09 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1198,6 +1198,17 @@ out:
1198 } 1198 }
1199 case EXT4_IOC_SHUTDOWN: 1199 case EXT4_IOC_SHUTDOWN:
1200 return ext4_shutdown(sb, arg); 1200 return ext4_shutdown(sb, arg);
1201
1202 case FS_IOC_ENABLE_VERITY:
1203 if (!ext4_has_feature_verity(sb))
1204 return -EOPNOTSUPP;
1205 return fsverity_ioctl_enable(filp, (const void __user *)arg);
1206
1207 case FS_IOC_MEASURE_VERITY:
1208 if (!ext4_has_feature_verity(sb))
1209 return -EOPNOTSUPP;
1210 return fsverity_ioctl_measure(filp, (void __user *)arg);
1211
1201 default: 1212 default:
1202 return -ENOTTY; 1213 return -ENOTTY;
1203 } 1214 }
@@ -1265,6 +1276,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1265 case FS_IOC_GET_ENCRYPTION_KEY_STATUS: 1276 case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
1266 case EXT4_IOC_SHUTDOWN: 1277 case EXT4_IOC_SHUTDOWN:
1267 case FS_IOC_GETFSMAP: 1278 case FS_IOC_GETFSMAP:
1279 case FS_IOC_ENABLE_VERITY:
1280 case FS_IOC_MEASURE_VERITY:
1268 break; 1281 break;
1269 default: 1282 default:
1270 return -ENOIOCTLCMD; 1283 return -ENOIOCTLCMD;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index c916017db334..a30b203fa461 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -47,13 +47,103 @@
47 47
48#include "ext4.h" 48#include "ext4.h"
49 49
50static inline bool ext4_bio_encrypted(struct bio *bio) 50#define NUM_PREALLOC_POST_READ_CTXS 128
51
52static struct kmem_cache *bio_post_read_ctx_cache;
53static mempool_t *bio_post_read_ctx_pool;
54
55/* postprocessing steps for read bios */
56enum bio_post_read_step {
57 STEP_INITIAL = 0,
58 STEP_DECRYPT,
59 STEP_VERITY,
60};
61
62struct bio_post_read_ctx {
63 struct bio *bio;
64 struct work_struct work;
65 unsigned int cur_step;
66 unsigned int enabled_steps;
67};
68
69static void __read_end_io(struct bio *bio)
51{ 70{
52#ifdef CONFIG_FS_ENCRYPTION 71 struct page *page;
53 return unlikely(bio->bi_private != NULL); 72 struct bio_vec *bv;
54#else 73 struct bvec_iter_all iter_all;
55 return false; 74
56#endif 75 bio_for_each_segment_all(bv, bio, iter_all) {
76 page = bv->bv_page;
77
78 /* PG_error was set if any post_read step failed */
79 if (bio->bi_status || PageError(page)) {
80 ClearPageUptodate(page);
81 /* will re-read again later */
82 ClearPageError(page);
83 } else {
84 SetPageUptodate(page);
85 }
86 unlock_page(page);
87 }
88 if (bio->bi_private)
89 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
90 bio_put(bio);
91}
92
93static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
94
95static void decrypt_work(struct work_struct *work)
96{
97 struct bio_post_read_ctx *ctx =
98 container_of(work, struct bio_post_read_ctx, work);
99
100 fscrypt_decrypt_bio(ctx->bio);
101
102 bio_post_read_processing(ctx);
103}
104
105static void verity_work(struct work_struct *work)
106{
107 struct bio_post_read_ctx *ctx =
108 container_of(work, struct bio_post_read_ctx, work);
109
110 fsverity_verify_bio(ctx->bio);
111
112 bio_post_read_processing(ctx);
113}
114
115static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
116{
117 /*
118 * We use different work queues for decryption and for verity because
119 * verity may require reading metadata pages that need decryption, and
120 * we shouldn't recurse to the same workqueue.
121 */
122 switch (++ctx->cur_step) {
123 case STEP_DECRYPT:
124 if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
125 INIT_WORK(&ctx->work, decrypt_work);
126 fscrypt_enqueue_decrypt_work(&ctx->work);
127 return;
128 }
129 ctx->cur_step++;
130 /* fall-through */
131 case STEP_VERITY:
132 if (ctx->enabled_steps & (1 << STEP_VERITY)) {
133 INIT_WORK(&ctx->work, verity_work);
134 fsverity_enqueue_verify_work(&ctx->work);
135 return;
136 }
137 ctx->cur_step++;
138 /* fall-through */
139 default:
140 __read_end_io(ctx->bio);
141 }
142}
143
144static bool bio_post_read_required(struct bio *bio)
145{
146 return bio->bi_private && !bio->bi_status;
57} 147}
58 148
59/* 149/*
@@ -70,30 +160,53 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
70 */ 160 */
71static void mpage_end_io(struct bio *bio) 161static void mpage_end_io(struct bio *bio)
72{ 162{
73 struct bio_vec *bv; 163 if (bio_post_read_required(bio)) {
74 struct bvec_iter_all iter_all; 164 struct bio_post_read_ctx *ctx = bio->bi_private;
75 165
76 if (ext4_bio_encrypted(bio)) { 166 ctx->cur_step = STEP_INITIAL;
77 if (bio->bi_status) { 167 bio_post_read_processing(ctx);
78 fscrypt_release_ctx(bio->bi_private); 168 return;
79 } else {
80 fscrypt_enqueue_decrypt_bio(bio->bi_private, bio);
81 return;
82 }
83 } 169 }
84 bio_for_each_segment_all(bv, bio, iter_all) { 170 __read_end_io(bio);
85 struct page *page = bv->bv_page; 171}
86 172
87 if (!bio->bi_status) { 173static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
88 SetPageUptodate(page); 174{
89 } else { 175 return fsverity_active(inode) &&
90 ClearPageUptodate(page); 176 idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
91 SetPageError(page); 177}
92 } 178
93 unlock_page(page); 179static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
180 struct bio *bio,
181 pgoff_t first_idx)
182{
183 unsigned int post_read_steps = 0;
184 struct bio_post_read_ctx *ctx = NULL;
185
186 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
187 post_read_steps |= 1 << STEP_DECRYPT;
188
189 if (ext4_need_verity(inode, first_idx))
190 post_read_steps |= 1 << STEP_VERITY;
191
192 if (post_read_steps) {
193 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
194 if (!ctx)
195 return ERR_PTR(-ENOMEM);
196 ctx->bio = bio;
197 ctx->enabled_steps = post_read_steps;
198 bio->bi_private = ctx;
94 } 199 }
200 return ctx;
201}
95 202
96 bio_put(bio); 203static inline loff_t ext4_readpage_limit(struct inode *inode)
204{
205 if (IS_ENABLED(CONFIG_FS_VERITY) &&
206 (IS_VERITY(inode) || ext4_verity_in_progress(inode)))
207 return inode->i_sb->s_maxbytes;
208
209 return i_size_read(inode);
97} 210}
98 211
99int ext4_mpage_readpages(struct address_space *mapping, 212int ext4_mpage_readpages(struct address_space *mapping,
@@ -141,7 +254,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
141 254
142 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); 255 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
143 last_block = block_in_file + nr_pages * blocks_per_page; 256 last_block = block_in_file + nr_pages * blocks_per_page;
144 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; 257 last_block_in_file = (ext4_readpage_limit(inode) +
258 blocksize - 1) >> blkbits;
145 if (last_block > last_block_in_file) 259 if (last_block > last_block_in_file)
146 last_block = last_block_in_file; 260 last_block = last_block_in_file;
147 page_block = 0; 261 page_block = 0;
@@ -218,6 +332,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
218 zero_user_segment(page, first_hole << blkbits, 332 zero_user_segment(page, first_hole << blkbits,
219 PAGE_SIZE); 333 PAGE_SIZE);
220 if (first_hole == 0) { 334 if (first_hole == 0) {
335 if (ext4_need_verity(inode, page->index) &&
336 !fsverity_verify_page(page))
337 goto set_error_page;
221 SetPageUptodate(page); 338 SetPageUptodate(page);
222 unlock_page(page); 339 unlock_page(page);
223 goto next_page; 340 goto next_page;
@@ -241,18 +358,16 @@ int ext4_mpage_readpages(struct address_space *mapping,
241 bio = NULL; 358 bio = NULL;
242 } 359 }
243 if (bio == NULL) { 360 if (bio == NULL) {
244 struct fscrypt_ctx *ctx = NULL; 361 struct bio_post_read_ctx *ctx;
245 362
246 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
247 ctx = fscrypt_get_ctx(GFP_NOFS);
248 if (IS_ERR(ctx))
249 goto set_error_page;
250 }
251 bio = bio_alloc(GFP_KERNEL, 363 bio = bio_alloc(GFP_KERNEL,
252 min_t(int, nr_pages, BIO_MAX_PAGES)); 364 min_t(int, nr_pages, BIO_MAX_PAGES));
253 if (!bio) { 365 if (!bio)
254 if (ctx) 366 goto set_error_page;
255 fscrypt_release_ctx(ctx); 367 ctx = get_bio_post_read_ctx(inode, bio, page->index);
368 if (IS_ERR(ctx)) {
369 bio_put(bio);
370 bio = NULL;
256 goto set_error_page; 371 goto set_error_page;
257 } 372 }
258 bio_set_dev(bio, bdev); 373 bio_set_dev(bio, bdev);
@@ -293,3 +408,29 @@ int ext4_mpage_readpages(struct address_space *mapping,
293 submit_bio(bio); 408 submit_bio(bio);
294 return 0; 409 return 0;
295} 410}
411
412int __init ext4_init_post_read_processing(void)
413{
414 bio_post_read_ctx_cache =
415 kmem_cache_create("ext4_bio_post_read_ctx",
416 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
417 if (!bio_post_read_ctx_cache)
418 goto fail;
419 bio_post_read_ctx_pool =
420 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
421 bio_post_read_ctx_cache);
422 if (!bio_post_read_ctx_pool)
423 goto fail_free_cache;
424 return 0;
425
426fail_free_cache:
427 kmem_cache_destroy(bio_post_read_ctx_cache);
428fail:
429 return -ENOMEM;
430}
431
432void ext4_exit_post_read_processing(void)
433{
434 mempool_destroy(bio_post_read_ctx_pool);
435 kmem_cache_destroy(bio_post_read_ctx_cache);
436}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 757819139b8f..27cd622676e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1182,6 +1182,7 @@ void ext4_clear_inode(struct inode *inode)
1182 EXT4_I(inode)->jinode = NULL; 1182 EXT4_I(inode)->jinode = NULL;
1183 } 1183 }
1184 fscrypt_put_encryption_info(inode); 1184 fscrypt_put_encryption_info(inode);
1185 fsverity_cleanup_inode(inode);
1185} 1186}
1186 1187
1187static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1188static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -4275,6 +4276,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4275#ifdef CONFIG_FS_ENCRYPTION 4276#ifdef CONFIG_FS_ENCRYPTION
4276 sb->s_cop = &ext4_cryptops; 4277 sb->s_cop = &ext4_cryptops;
4277#endif 4278#endif
4279#ifdef CONFIG_FS_VERITY
4280 sb->s_vop = &ext4_verityops;
4281#endif
4278#ifdef CONFIG_QUOTA 4282#ifdef CONFIG_QUOTA
4279 sb->dq_op = &ext4_quota_operations; 4283 sb->dq_op = &ext4_quota_operations;
4280 if (ext4_has_feature_quota(sb)) 4284 if (ext4_has_feature_quota(sb))
@@ -4422,6 +4426,11 @@ no_journal:
4422 goto failed_mount_wq; 4426 goto failed_mount_wq;
4423 } 4427 }
4424 4428
4429 if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
4430 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
4431 goto failed_mount_wq;
4432 }
4433
4425 if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && 4434 if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
4426 !ext4_has_feature_encrypt(sb)) { 4435 !ext4_has_feature_encrypt(sb)) {
4427 ext4_set_feature_encrypt(sb); 4436 ext4_set_feature_encrypt(sb);
@@ -6098,6 +6107,10 @@ static int __init ext4_init_fs(void)
6098 6107
6099 err = ext4_init_pending(); 6108 err = ext4_init_pending();
6100 if (err) 6109 if (err)
6110 goto out7;
6111
6112 err = ext4_init_post_read_processing();
6113 if (err)
6101 goto out6; 6114 goto out6;
6102 6115
6103 err = ext4_init_pageio(); 6116 err = ext4_init_pageio();
@@ -6138,8 +6151,10 @@ out3:
6138out4: 6151out4:
6139 ext4_exit_pageio(); 6152 ext4_exit_pageio();
6140out5: 6153out5:
6141 ext4_exit_pending(); 6154 ext4_exit_post_read_processing();
6142out6: 6155out6:
6156 ext4_exit_pending();
6157out7:
6143 ext4_exit_es(); 6158 ext4_exit_es();
6144 6159
6145 return err; 6160 return err;
@@ -6156,6 +6171,7 @@ static void __exit ext4_exit_fs(void)
6156 ext4_exit_sysfs(); 6171 ext4_exit_sysfs();
6157 ext4_exit_system_zone(); 6172 ext4_exit_system_zone();
6158 ext4_exit_pageio(); 6173 ext4_exit_pageio();
6174 ext4_exit_post_read_processing();
6159 ext4_exit_es(); 6175 ext4_exit_es();
6160 ext4_exit_pending(); 6176 ext4_exit_pending();
6161} 6177}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b3cd7655a6ff..eb1efad0e20a 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -242,6 +242,9 @@ EXT4_ATTR_FEATURE(encryption);
242#ifdef CONFIG_UNICODE 242#ifdef CONFIG_UNICODE
243EXT4_ATTR_FEATURE(casefold); 243EXT4_ATTR_FEATURE(casefold);
244#endif 244#endif
245#ifdef CONFIG_FS_VERITY
246EXT4_ATTR_FEATURE(verity);
247#endif
245EXT4_ATTR_FEATURE(metadata_csum_seed); 248EXT4_ATTR_FEATURE(metadata_csum_seed);
246 249
247static struct attribute *ext4_feat_attrs[] = { 250static struct attribute *ext4_feat_attrs[] = {
@@ -254,6 +257,9 @@ static struct attribute *ext4_feat_attrs[] = {
254#ifdef CONFIG_UNICODE 257#ifdef CONFIG_UNICODE
255 ATTR_LIST(casefold), 258 ATTR_LIST(casefold),
256#endif 259#endif
260#ifdef CONFIG_FS_VERITY
261 ATTR_LIST(verity),
262#endif
257 ATTR_LIST(metadata_csum_seed), 263 ATTR_LIST(metadata_csum_seed),
258 NULL, 264 NULL,
259}; 265};
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
new file mode 100644
index 000000000000..d0d8a9795dd6
--- /dev/null
+++ b/fs/ext4/verity.c
@@ -0,0 +1,367 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/ext4/verity.c: fs-verity support for ext4
4 *
5 * Copyright 2019 Google LLC
6 */
7
8/*
9 * Implementation of fsverity_operations for ext4.
10 *
11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
12 * the end of the file, starting at the first 64K boundary beyond i_size. This
13 * approach works because (a) verity files are readonly, and (b) pages fully
14 * beyond i_size aren't visible to userspace but can be read/written internally
15 * by ext4 with only some relatively small changes to ext4. This approach
16 * avoids having to depend on the EA_INODE feature and on rearchitecturing
17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
18 * to support encrypting xattrs. Note that the verity metadata *must* be
19 * encrypted when the file is, since it contains hashes of the plaintext data.
20 *
21 * Using a 64K boundary rather than a 4K one keeps things ready for
22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk
23 * since there can be a hole between i_size and the start of the Merkle tree.
24 */
25
26#include <linux/quotaops.h>
27
28#include "ext4.h"
29#include "ext4_extents.h"
30#include "ext4_jbd2.h"
31
32static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
33{
34 return round_up(inode->i_size, 65536);
35}
36
37/*
38 * Read some verity metadata from the inode. __vfs_read() can't be used because
39 * we need to read beyond i_size.
40 */
41static int pagecache_read(struct inode *inode, void *buf, size_t count,
42 loff_t pos)
43{
44 while (count) {
45 size_t n = min_t(size_t, count,
46 PAGE_SIZE - offset_in_page(pos));
47 struct page *page;
48 void *addr;
49
50 page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
51 NULL);
52 if (IS_ERR(page))
53 return PTR_ERR(page);
54
55 addr = kmap_atomic(page);
56 memcpy(buf, addr + offset_in_page(pos), n);
57 kunmap_atomic(addr);
58
59 put_page(page);
60
61 buf += n;
62 pos += n;
63 count -= n;
64 }
65 return 0;
66}
67
68/*
69 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
70 * kernel_write() can't be used because the file descriptor is readonly.
71 */
72static int pagecache_write(struct inode *inode, const void *buf, size_t count,
73 loff_t pos)
74{
75 if (pos + count > inode->i_sb->s_maxbytes)
76 return -EFBIG;
77
78 while (count) {
79 size_t n = min_t(size_t, count,
80 PAGE_SIZE - offset_in_page(pos));
81 struct page *page;
82 void *fsdata;
83 void *addr;
84 int res;
85
86 res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
87 &page, &fsdata);
88 if (res)
89 return res;
90
91 addr = kmap_atomic(page);
92 memcpy(addr + offset_in_page(pos), buf, n);
93 kunmap_atomic(addr);
94
95 res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
96 page, fsdata);
97 if (res < 0)
98 return res;
99 if (res != n)
100 return -EIO;
101
102 buf += n;
103 pos += n;
104 count -= n;
105 }
106 return 0;
107}
108
109static int ext4_begin_enable_verity(struct file *filp)
110{
111 struct inode *inode = file_inode(filp);
112 const int credits = 2; /* superblock and inode for ext4_orphan_add() */
113 handle_t *handle;
114 int err;
115
116 if (ext4_verity_in_progress(inode))
117 return -EBUSY;
118
119 /*
120 * Since the file was opened readonly, we have to initialize the jbd
121 * inode and quotas here and not rely on ->open() doing it. This must
122 * be done before evicting the inline data.
123 */
124
125 err = ext4_inode_attach_jinode(inode);
126 if (err)
127 return err;
128
129 err = dquot_initialize(inode);
130 if (err)
131 return err;
132
133 err = ext4_convert_inline_data(inode);
134 if (err)
135 return err;
136
137 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
138 ext4_warning_inode(inode,
139 "verity is only allowed on extent-based files");
140 return -EOPNOTSUPP;
141 }
142
143 /*
144 * ext4 uses the last allocated block to find the verity descriptor, so
145 * we must remove any other blocks past EOF which might confuse things.
146 */
147 err = ext4_truncate(inode);
148 if (err)
149 return err;
150
151 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
152 if (IS_ERR(handle))
153 return PTR_ERR(handle);
154
155 err = ext4_orphan_add(handle, inode);
156 if (err == 0)
157 ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
158
159 ext4_journal_stop(handle);
160 return err;
161}
162
163/*
164 * ext4 stores the verity descriptor beginning on the next filesystem block
165 * boundary after the Merkle tree. Then, the descriptor size is stored in the
166 * last 4 bytes of the last allocated filesystem block --- which is either the
167 * block in which the descriptor ends, or the next block after that if there
168 * weren't at least 4 bytes remaining.
169 *
170 * We can't simply store the descriptor in an xattr because it *must* be
171 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
172 * xattrs. Also, if the descriptor includes a large signature blob it may be
173 * too large to store in an xattr without the EA_INODE feature.
174 */
175static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
176 size_t desc_size, u64 merkle_tree_size)
177{
178 const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
179 merkle_tree_size, i_blocksize(inode));
180 const u64 desc_end = desc_pos + desc_size;
181 const __le32 desc_size_disk = cpu_to_le32(desc_size);
182 const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
183 i_blocksize(inode)) -
184 sizeof(desc_size_disk);
185 int err;
186
187 err = pagecache_write(inode, desc, desc_size, desc_pos);
188 if (err)
189 return err;
190
191 return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
192 desc_size_pos);
193}
194
195static int ext4_end_enable_verity(struct file *filp, const void *desc,
196 size_t desc_size, u64 merkle_tree_size)
197{
198 struct inode *inode = file_inode(filp);
199 const int credits = 2; /* superblock and inode for ext4_orphan_del() */
200 handle_t *handle;
201 int err = 0;
202 int err2;
203
204 if (desc != NULL) {
205 /* Succeeded; write the verity descriptor. */
206 err = ext4_write_verity_descriptor(inode, desc, desc_size,
207 merkle_tree_size);
208
209 /* Write all pages before clearing VERITY_IN_PROGRESS. */
210 if (!err)
211 err = filemap_write_and_wait(inode->i_mapping);
212 }
213
214 /* If we failed, truncate anything we wrote past i_size. */
215 if (desc == NULL || err)
216 ext4_truncate(inode);
217
218 /*
219 * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
220 * deleting the inode from the orphan list, even if something failed.
221 * If everything succeeded, we'll also set the verity bit in the same
222 * transaction.
223 */
224
225 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
226
227 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
228 if (IS_ERR(handle)) {
229 ext4_orphan_del(NULL, inode);
230 return PTR_ERR(handle);
231 }
232
233 err2 = ext4_orphan_del(handle, inode);
234 if (err2)
235 goto out_stop;
236
237 if (desc != NULL && !err) {
238 struct ext4_iloc iloc;
239
240 err = ext4_reserve_inode_write(handle, inode, &iloc);
241 if (err)
242 goto out_stop;
243 ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
244 ext4_set_inode_flags(inode);
245 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
246 }
247out_stop:
248 ext4_journal_stop(handle);
249 return err ?: err2;
250}
251
252static int ext4_get_verity_descriptor_location(struct inode *inode,
253 size_t *desc_size_ret,
254 u64 *desc_pos_ret)
255{
256 struct ext4_ext_path *path;
257 struct ext4_extent *last_extent;
258 u32 end_lblk;
259 u64 desc_size_pos;
260 __le32 desc_size_disk;
261 u32 desc_size;
262 u64 desc_pos;
263 int err;
264
265 /*
266 * Descriptor size is in last 4 bytes of last allocated block.
267 * See ext4_write_verity_descriptor().
268 */
269
270 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
271 EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
272 return -EFSCORRUPTED;
273 }
274
275 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
276 if (IS_ERR(path))
277 return PTR_ERR(path);
278
279 last_extent = path[path->p_depth].p_ext;
280 if (!last_extent) {
281 EXT4_ERROR_INODE(inode, "verity file has no extents");
282 ext4_ext_drop_refs(path);
283 kfree(path);
284 return -EFSCORRUPTED;
285 }
286
287 end_lblk = le32_to_cpu(last_extent->ee_block) +
288 ext4_ext_get_actual_len(last_extent);
289 desc_size_pos = (u64)end_lblk << inode->i_blkbits;
290 ext4_ext_drop_refs(path);
291 kfree(path);
292
293 if (desc_size_pos < sizeof(desc_size_disk))
294 goto bad;
295 desc_size_pos -= sizeof(desc_size_disk);
296
297 err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
298 desc_size_pos);
299 if (err)
300 return err;
301 desc_size = le32_to_cpu(desc_size_disk);
302
303 /*
304 * The descriptor is stored just before the desc_size_disk, but starting
305 * on a filesystem block boundary.
306 */
307
308 if (desc_size > INT_MAX || desc_size > desc_size_pos)
309 goto bad;
310
311 desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
312 if (desc_pos < ext4_verity_metadata_pos(inode))
313 goto bad;
314
315 *desc_size_ret = desc_size;
316 *desc_pos_ret = desc_pos;
317 return 0;
318
319bad:
320 EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
321 return -EFSCORRUPTED;
322}
323
324static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
325 size_t buf_size)
326{
327 size_t desc_size = 0;
328 u64 desc_pos = 0;
329 int err;
330
331 err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
332 if (err)
333 return err;
334
335 if (buf_size) {
336 if (desc_size > buf_size)
337 return -ERANGE;
338 err = pagecache_read(inode, buf, desc_size, desc_pos);
339 if (err)
340 return err;
341 }
342 return desc_size;
343}
344
345static struct page *ext4_read_merkle_tree_page(struct inode *inode,
346 pgoff_t index)
347{
348 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
349
350 return read_mapping_page(inode->i_mapping, index, NULL);
351}
352
353static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
354 u64 index, int log_blocksize)
355{
356 loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
357
358 return pagecache_write(inode, buf, 1 << log_blocksize, pos);
359}
360
361const struct fsverity_operations ext4_verityops = {
362 .begin_enable_verity = ext4_begin_enable_verity,
363 .end_enable_verity = ext4_end_enable_verity,
364 .get_verity_descriptor = ext4_get_verity_descriptor,
365 .read_merkle_tree_page = ext4_read_merkle_tree_page,
366 .write_merkle_tree_block = ext4_write_merkle_tree_block,
367};
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 776c4b936504..2aaecc63834f 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -8,3 +8,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
8f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o 8f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
9f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o 9f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
10f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o 10f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
11f2fs-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index abbf14e9bd72..54cad80acb7d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -74,6 +74,7 @@ static enum count_type __read_io_type(struct page *page)
74enum bio_post_read_step { 74enum bio_post_read_step {
75 STEP_INITIAL = 0, 75 STEP_INITIAL = 0,
76 STEP_DECRYPT, 76 STEP_DECRYPT,
77 STEP_VERITY,
77}; 78};
78 79
79struct bio_post_read_ctx { 80struct bio_post_read_ctx {
@@ -120,8 +121,23 @@ static void decrypt_work(struct work_struct *work)
120 bio_post_read_processing(ctx); 121 bio_post_read_processing(ctx);
121} 122}
122 123
124static void verity_work(struct work_struct *work)
125{
126 struct bio_post_read_ctx *ctx =
127 container_of(work, struct bio_post_read_ctx, work);
128
129 fsverity_verify_bio(ctx->bio);
130
131 bio_post_read_processing(ctx);
132}
133
123static void bio_post_read_processing(struct bio_post_read_ctx *ctx) 134static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
124{ 135{
136 /*
137 * We use different work queues for decryption and for verity because
138 * verity may require reading metadata pages that need decryption, and
139 * we shouldn't recurse to the same workqueue.
140 */
125 switch (++ctx->cur_step) { 141 switch (++ctx->cur_step) {
126 case STEP_DECRYPT: 142 case STEP_DECRYPT:
127 if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { 143 if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
@@ -131,6 +147,14 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
131 } 147 }
132 ctx->cur_step++; 148 ctx->cur_step++;
133 /* fall-through */ 149 /* fall-through */
150 case STEP_VERITY:
151 if (ctx->enabled_steps & (1 << STEP_VERITY)) {
152 INIT_WORK(&ctx->work, verity_work);
153 fsverity_enqueue_verify_work(&ctx->work);
154 return;
155 }
156 ctx->cur_step++;
157 /* fall-through */
134 default: 158 default:
135 __read_end_io(ctx->bio); 159 __read_end_io(ctx->bio);
136 } 160 }
@@ -608,8 +632,15 @@ out:
608 up_write(&io->io_rwsem); 632 up_write(&io->io_rwsem);
609} 633}
610 634
635static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
636{
637 return fsverity_active(inode) &&
638 idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
639}
640
611static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, 641static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
612 unsigned nr_pages, unsigned op_flag) 642 unsigned nr_pages, unsigned op_flag,
643 pgoff_t first_idx)
613{ 644{
614 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 645 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
615 struct bio *bio; 646 struct bio *bio;
@@ -625,6 +656,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
625 656
626 if (f2fs_encrypted_file(inode)) 657 if (f2fs_encrypted_file(inode))
627 post_read_steps |= 1 << STEP_DECRYPT; 658 post_read_steps |= 1 << STEP_DECRYPT;
659
660 if (f2fs_need_verity(inode, first_idx))
661 post_read_steps |= 1 << STEP_VERITY;
662
628 if (post_read_steps) { 663 if (post_read_steps) {
629 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); 664 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
630 if (!ctx) { 665 if (!ctx) {
@@ -646,7 +681,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
646 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 681 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
647 struct bio *bio; 682 struct bio *bio;
648 683
649 bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); 684 bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index);
650 if (IS_ERR(bio)) 685 if (IS_ERR(bio))
651 return PTR_ERR(bio); 686 return PTR_ERR(bio);
652 687
@@ -1569,6 +1604,15 @@ out:
1569 return ret; 1604 return ret;
1570} 1605}
1571 1606
1607static inline loff_t f2fs_readpage_limit(struct inode *inode)
1608{
1609 if (IS_ENABLED(CONFIG_FS_VERITY) &&
1610 (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
1611 return inode->i_sb->s_maxbytes;
1612
1613 return i_size_read(inode);
1614}
1615
1572static int f2fs_read_single_page(struct inode *inode, struct page *page, 1616static int f2fs_read_single_page(struct inode *inode, struct page *page,
1573 unsigned nr_pages, 1617 unsigned nr_pages,
1574 struct f2fs_map_blocks *map, 1618 struct f2fs_map_blocks *map,
@@ -1587,7 +1631,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
1587 1631
1588 block_in_file = (sector_t)page_index(page); 1632 block_in_file = (sector_t)page_index(page);
1589 last_block = block_in_file + nr_pages; 1633 last_block = block_in_file + nr_pages;
1590 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> 1634 last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
1591 blkbits; 1635 blkbits;
1592 if (last_block > last_block_in_file) 1636 if (last_block > last_block_in_file)
1593 last_block = last_block_in_file; 1637 last_block = last_block_in_file;
@@ -1632,6 +1676,11 @@ got_it:
1632 } else { 1676 } else {
1633zero_out: 1677zero_out:
1634 zero_user_segment(page, 0, PAGE_SIZE); 1678 zero_user_segment(page, 0, PAGE_SIZE);
1679 if (f2fs_need_verity(inode, page->index) &&
1680 !fsverity_verify_page(page)) {
1681 ret = -EIO;
1682 goto out;
1683 }
1635 if (!PageUptodate(page)) 1684 if (!PageUptodate(page))
1636 SetPageUptodate(page); 1685 SetPageUptodate(page);
1637 unlock_page(page); 1686 unlock_page(page);
@@ -1650,7 +1699,7 @@ submit_and_realloc:
1650 } 1699 }
1651 if (bio == NULL) { 1700 if (bio == NULL) {
1652 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, 1701 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
1653 is_readahead ? REQ_RAHEAD : 0); 1702 is_readahead ? REQ_RAHEAD : 0, page->index);
1654 if (IS_ERR(bio)) { 1703 if (IS_ERR(bio)) {
1655 ret = PTR_ERR(bio); 1704 ret = PTR_ERR(bio);
1656 bio = NULL; 1705 bio = NULL;
@@ -2052,7 +2101,7 @@ static int __write_data_page(struct page *page, bool *submitted,
2052 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 2101 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2053 goto redirty_out; 2102 goto redirty_out;
2054 2103
2055 if (page->index < end_index) 2104 if (page->index < end_index || f2fs_verity_in_progress(inode))
2056 goto write; 2105 goto write;
2057 2106
2058 /* 2107 /*
@@ -2427,7 +2476,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
2427 struct inode *inode = mapping->host; 2476 struct inode *inode = mapping->host;
2428 loff_t i_size = i_size_read(inode); 2477 loff_t i_size = i_size_read(inode);
2429 2478
2430 if (to > i_size) { 2479 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
2480 if (to > i_size && !f2fs_verity_in_progress(inode)) {
2431 down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 2481 down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2432 down_write(&F2FS_I(inode)->i_mmap_sem); 2482 down_write(&F2FS_I(inode)->i_mmap_sem);
2433 2483
@@ -2458,7 +2508,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
2458 * the block addresses when there is no need to fill the page. 2508 * the block addresses when there is no need to fill the page.
2459 */ 2509 */
2460 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && 2510 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
2461 !is_inode_flag_set(inode, FI_NO_PREALLOC)) 2511 !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
2512 !f2fs_verity_in_progress(inode))
2462 return 0; 2513 return 0;
2463 2514
2464 /* f2fs_lock_op avoids race between write CP and convert_inline_page */ 2515 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -2597,7 +2648,8 @@ repeat:
2597 if (len == PAGE_SIZE || PageUptodate(page)) 2648 if (len == PAGE_SIZE || PageUptodate(page))
2598 return 0; 2649 return 0;
2599 2650
2600 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { 2651 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
2652 !f2fs_verity_in_progress(inode)) {
2601 zero_user_segment(page, len, PAGE_SIZE); 2653 zero_user_segment(page, len, PAGE_SIZE);
2602 return 0; 2654 return 0;
2603 } 2655 }
@@ -2660,7 +2712,8 @@ static int f2fs_write_end(struct file *file,
2660 2712
2661 set_page_dirty(page); 2713 set_page_dirty(page);
2662 2714
2663 if (pos + copied > i_size_read(inode)) 2715 if (pos + copied > i_size_read(inode) &&
2716 !f2fs_verity_in_progress(inode))
2664 f2fs_i_size_write(inode, pos + copied); 2717 f2fs_i_size_write(inode, pos + copied);
2665unlock_out: 2718unlock_out:
2666 f2fs_put_page(page, 1); 2719 f2fs_put_page(page, 1);
@@ -3104,7 +3157,9 @@ void f2fs_clear_page_cache_dirty_tag(struct page *page)
3104 3157
3105int __init f2fs_init_post_read_processing(void) 3158int __init f2fs_init_post_read_processing(void)
3106{ 3159{
3107 bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); 3160 bio_post_read_ctx_cache =
3161 kmem_cache_create("f2fs_bio_post_read_ctx",
3162 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
3108 if (!bio_post_read_ctx_cache) 3163 if (!bio_post_read_ctx_cache)
3109 goto fail; 3164 goto fail;
3110 bio_post_read_ctx_pool = 3165 bio_post_read_ctx_pool =
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 17382da7f0bd..7c5f121edac5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -25,6 +25,7 @@
25#include <crypto/hash.h> 25#include <crypto/hash.h>
26 26
27#include <linux/fscrypt.h> 27#include <linux/fscrypt.h>
28#include <linux/fsverity.h>
28 29
29#ifdef CONFIG_F2FS_CHECK_FS 30#ifdef CONFIG_F2FS_CHECK_FS
30#define f2fs_bug_on(sbi, condition) BUG_ON(condition) 31#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -151,7 +152,7 @@ struct f2fs_mount_info {
151#define F2FS_FEATURE_QUOTA_INO 0x0080 152#define F2FS_FEATURE_QUOTA_INO 0x0080
152#define F2FS_FEATURE_INODE_CRTIME 0x0100 153#define F2FS_FEATURE_INODE_CRTIME 0x0100
153#define F2FS_FEATURE_LOST_FOUND 0x0200 154#define F2FS_FEATURE_LOST_FOUND 0x0200
154#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ 155#define F2FS_FEATURE_VERITY 0x0400
155#define F2FS_FEATURE_SB_CHKSUM 0x0800 156#define F2FS_FEATURE_SB_CHKSUM 0x0800
156 157
157#define __F2FS_HAS_FEATURE(raw_super, mask) \ 158#define __F2FS_HAS_FEATURE(raw_super, mask) \
@@ -630,7 +631,7 @@ enum {
630#define FADVISE_ENC_NAME_BIT 0x08 631#define FADVISE_ENC_NAME_BIT 0x08
631#define FADVISE_KEEP_SIZE_BIT 0x10 632#define FADVISE_KEEP_SIZE_BIT 0x10
632#define FADVISE_HOT_BIT 0x20 633#define FADVISE_HOT_BIT 0x20
633#define FADVISE_VERITY_BIT 0x40 /* reserved */ 634#define FADVISE_VERITY_BIT 0x40
634 635
635#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) 636#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
636 637
@@ -650,6 +651,8 @@ enum {
650#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) 651#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
651#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) 652#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
652#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) 653#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
654#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
655#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
653 656
654#define DEF_DIR_LEVEL 0 657#define DEF_DIR_LEVEL 0
655 658
@@ -2412,6 +2415,7 @@ enum {
2412 FI_PROJ_INHERIT, /* indicate file inherits projectid */ 2415 FI_PROJ_INHERIT, /* indicate file inherits projectid */
2413 FI_PIN_FILE, /* indicate file should not be gced */ 2416 FI_PIN_FILE, /* indicate file should not be gced */
2414 FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ 2417 FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
2418 FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
2415}; 2419};
2416 2420
2417static inline void __mark_inode_dirty_flag(struct inode *inode, 2421static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2451,6 +2455,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag)
2451 __mark_inode_dirty_flag(inode, flag, false); 2455 __mark_inode_dirty_flag(inode, flag, false);
2452} 2456}
2453 2457
2458static inline bool f2fs_verity_in_progress(struct inode *inode)
2459{
2460 return IS_ENABLED(CONFIG_FS_VERITY) &&
2461 is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS);
2462}
2463
2454static inline void set_acl_inode(struct inode *inode, umode_t mode) 2464static inline void set_acl_inode(struct inode *inode, umode_t mode)
2455{ 2465{
2456 F2FS_I(inode)->i_acl_mode = mode; 2466 F2FS_I(inode)->i_acl_mode = mode;
@@ -3521,6 +3531,9 @@ void f2fs_exit_sysfs(void);
3521int f2fs_register_sysfs(struct f2fs_sb_info *sbi); 3531int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
3522void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); 3532void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi);
3523 3533
3534/* verity.c */
3535extern const struct fsverity_operations f2fs_verityops;
3536
3524/* 3537/*
3525 * crypto support 3538 * crypto support
3526 */ 3539 */
@@ -3543,7 +3556,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
3543 */ 3556 */
3544static inline bool f2fs_post_read_required(struct inode *inode) 3557static inline bool f2fs_post_read_required(struct inode *inode)
3545{ 3558{
3546 return f2fs_encrypted_file(inode); 3559 return f2fs_encrypted_file(inode) || fsverity_active(inode);
3547} 3560}
3548 3561
3549#define F2FS_FEATURE_FUNCS(name, flagname) \ 3562#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -3561,6 +3574,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
3561F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); 3574F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
3562F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); 3575F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
3563F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); 3576F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
3577F2FS_FEATURE_FUNCS(verity, VERITY);
3564F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); 3578F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
3565 3579
3566#ifdef CONFIG_BLK_DEV_ZONED 3580#ifdef CONFIG_BLK_DEV_ZONED
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6a7349f9ac15..39fffc19e00c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -496,6 +496,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
496 if (err) 496 if (err)
497 return err; 497 return err;
498 498
499 err = fsverity_file_open(inode, filp);
500 if (err)
501 return err;
502
499 filp->f_mode |= FMODE_NOWAIT; 503 filp->f_mode |= FMODE_NOWAIT;
500 504
501 return dquot_file_open(inode, filp); 505 return dquot_file_open(inode, filp);
@@ -778,6 +782,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
778 if (err) 782 if (err)
779 return err; 783 return err;
780 784
785 err = fsverity_prepare_setattr(dentry, attr);
786 if (err)
787 return err;
788
781 if (is_quota_modification(inode, attr)) { 789 if (is_quota_modification(inode, attr)) {
782 err = dquot_initialize(inode); 790 err = dquot_initialize(inode);
783 if (err) 791 if (err)
@@ -1705,7 +1713,8 @@ static const struct {
1705 FS_PROJINHERIT_FL | \ 1713 FS_PROJINHERIT_FL | \
1706 FS_ENCRYPT_FL | \ 1714 FS_ENCRYPT_FL | \
1707 FS_INLINE_DATA_FL | \ 1715 FS_INLINE_DATA_FL | \
1708 FS_NOCOW_FL) 1716 FS_NOCOW_FL | \
1717 FS_VERITY_FL)
1709 1718
1710#define F2FS_SETTABLE_FS_FL ( \ 1719#define F2FS_SETTABLE_FS_FL ( \
1711 FS_SYNC_FL | \ 1720 FS_SYNC_FL | \
@@ -1750,6 +1759,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
1750 1759
1751 if (IS_ENCRYPTED(inode)) 1760 if (IS_ENCRYPTED(inode))
1752 fsflags |= FS_ENCRYPT_FL; 1761 fsflags |= FS_ENCRYPT_FL;
1762 if (IS_VERITY(inode))
1763 fsflags |= FS_VERITY_FL;
1753 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) 1764 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
1754 fsflags |= FS_INLINE_DATA_FL; 1765 fsflags |= FS_INLINE_DATA_FL;
1755 if (is_inode_flag_set(inode, FI_PIN_FILE)) 1766 if (is_inode_flag_set(inode, FI_PIN_FILE))
@@ -3103,6 +3114,30 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3103 return ret; 3114 return ret;
3104} 3115}
3105 3116
3117static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3118{
3119 struct inode *inode = file_inode(filp);
3120
3121 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3122
3123 if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3124 f2fs_warn(F2FS_I_SB(inode),
3125 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n",
3126 inode->i_ino);
3127 return -EOPNOTSUPP;
3128 }
3129
3130 return fsverity_ioctl_enable(filp, (const void __user *)arg);
3131}
3132
3133static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3134{
3135 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3136 return -EOPNOTSUPP;
3137
3138 return fsverity_ioctl_measure(filp, (void __user *)arg);
3139}
3140
3106long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 3141long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3107{ 3142{
3108 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) 3143 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3171,6 +3206,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3171 return f2fs_ioc_precache_extents(filp, arg); 3206 return f2fs_ioc_precache_extents(filp, arg);
3172 case F2FS_IOC_RESIZE_FS: 3207 case F2FS_IOC_RESIZE_FS:
3173 return f2fs_ioc_resize_fs(filp, arg); 3208 return f2fs_ioc_resize_fs(filp, arg);
3209 case FS_IOC_ENABLE_VERITY:
3210 return f2fs_ioc_enable_verity(filp, arg);
3211 case FS_IOC_MEASURE_VERITY:
3212 return f2fs_ioc_measure_verity(filp, arg);
3174 default: 3213 default:
3175 return -ENOTTY; 3214 return -ENOTTY;
3176 } 3215 }
@@ -3290,6 +3329,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3290 case F2FS_IOC_SET_PIN_FILE: 3329 case F2FS_IOC_SET_PIN_FILE:
3291 case F2FS_IOC_PRECACHE_EXTENTS: 3330 case F2FS_IOC_PRECACHE_EXTENTS:
3292 case F2FS_IOC_RESIZE_FS: 3331 case F2FS_IOC_RESIZE_FS:
3332 case FS_IOC_ENABLE_VERITY:
3333 case FS_IOC_MEASURE_VERITY:
3293 break; 3334 break;
3294 default: 3335 default:
3295 return -ENOIOCTLCMD; 3336 return -ENOIOCTLCMD;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index a33d7a849b2d..06da75d418e0 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -46,9 +46,11 @@ void f2fs_set_inode_flags(struct inode *inode)
46 new_fl |= S_DIRSYNC; 46 new_fl |= S_DIRSYNC;
47 if (file_is_encrypt(inode)) 47 if (file_is_encrypt(inode))
48 new_fl |= S_ENCRYPTED; 48 new_fl |= S_ENCRYPTED;
49 if (file_is_verity(inode))
50 new_fl |= S_VERITY;
49 inode_set_flags(inode, new_fl, 51 inode_set_flags(inode, new_fl,
50 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| 52 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
51 S_ENCRYPTED); 53 S_ENCRYPTED|S_VERITY);
52} 54}
53 55
54static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 56static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -733,6 +735,7 @@ no_delete:
733 } 735 }
734out_clear: 736out_clear:
735 fscrypt_put_encryption_info(inode); 737 fscrypt_put_encryption_info(inode);
738 fsverity_cleanup_inode(inode);
736 clear_inode(inode); 739 clear_inode(inode);
737} 740}
738 741
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e15bd29bd453..f43befda0e1a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3146,6 +3146,9 @@ try_onemore:
3146#ifdef CONFIG_FS_ENCRYPTION 3146#ifdef CONFIG_FS_ENCRYPTION
3147 sb->s_cop = &f2fs_cryptops; 3147 sb->s_cop = &f2fs_cryptops;
3148#endif 3148#endif
3149#ifdef CONFIG_FS_VERITY
3150 sb->s_vop = &f2fs_verityops;
3151#endif
3149 sb->s_xattr = f2fs_xattr_handlers; 3152 sb->s_xattr = f2fs_xattr_handlers;
3150 sb->s_export_op = &f2fs_export_ops; 3153 sb->s_export_op = &f2fs_export_ops;
3151 sb->s_magic = F2FS_SUPER_MAGIC; 3154 sb->s_magic = F2FS_SUPER_MAGIC;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3aeacd0aacfd..0cd64f994068 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -131,6 +131,9 @@ static ssize_t features_show(struct f2fs_attr *a,
131 if (f2fs_sb_has_lost_found(sbi)) 131 if (f2fs_sb_has_lost_found(sbi))
132 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", 132 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
133 len ? ", " : "", "lost_found"); 133 len ? ", " : "", "lost_found");
134 if (f2fs_sb_has_verity(sbi))
135 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
136 len ? ", " : "", "verity");
134 if (f2fs_sb_has_sb_chksum(sbi)) 137 if (f2fs_sb_has_sb_chksum(sbi))
135 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", 138 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
136 len ? ", " : "", "sb_checksum"); 139 len ? ", " : "", "sb_checksum");
@@ -364,6 +367,7 @@ enum feat_id {
364 FEAT_QUOTA_INO, 367 FEAT_QUOTA_INO,
365 FEAT_INODE_CRTIME, 368 FEAT_INODE_CRTIME,
366 FEAT_LOST_FOUND, 369 FEAT_LOST_FOUND,
370 FEAT_VERITY,
367 FEAT_SB_CHECKSUM, 371 FEAT_SB_CHECKSUM,
368}; 372};
369 373
@@ -381,6 +385,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
381 case FEAT_QUOTA_INO: 385 case FEAT_QUOTA_INO:
382 case FEAT_INODE_CRTIME: 386 case FEAT_INODE_CRTIME:
383 case FEAT_LOST_FOUND: 387 case FEAT_LOST_FOUND:
388 case FEAT_VERITY:
384 case FEAT_SB_CHECKSUM: 389 case FEAT_SB_CHECKSUM:
385 return snprintf(buf, PAGE_SIZE, "supported\n"); 390 return snprintf(buf, PAGE_SIZE, "supported\n");
386 } 391 }
@@ -470,6 +475,9 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
470F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); 475F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
471F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); 476F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
472F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); 477F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
478#ifdef CONFIG_FS_VERITY
479F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
480#endif
473F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); 481F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
474 482
475#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 483#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
@@ -534,6 +542,9 @@ static struct attribute *f2fs_feat_attrs[] = {
534 ATTR_LIST(quota_ino), 542 ATTR_LIST(quota_ino),
535 ATTR_LIST(inode_crtime), 543 ATTR_LIST(inode_crtime),
536 ATTR_LIST(lost_found), 544 ATTR_LIST(lost_found),
545#ifdef CONFIG_FS_VERITY
546 ATTR_LIST(verity),
547#endif
537 ATTR_LIST(sb_checksum), 548 ATTR_LIST(sb_checksum),
538 NULL, 549 NULL,
539}; 550};
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
new file mode 100644
index 000000000000..a401ef72bc82
--- /dev/null
+++ b/fs/f2fs/verity.c
@@ -0,0 +1,247 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/f2fs/verity.c: fs-verity support for f2fs
4 *
5 * Copyright 2019 Google LLC
6 */
7
8/*
9 * Implementation of fsverity_operations for f2fs.
10 *
11 * Like ext4, f2fs stores the verity metadata (Merkle tree and
12 * fsverity_descriptor) past the end of the file, starting at the first 64K
13 * boundary beyond i_size. This approach works because (a) verity files are
14 * readonly, and (b) pages fully beyond i_size aren't visible to userspace but
15 * can be read/written internally by f2fs with only some relatively small
16 * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits
17 * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be
18 * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't
19 * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is
20 * because it contains hashes of the plaintext data.
21 *
22 * Using a 64K boundary rather than a 4K one keeps things ready for
23 * architectures with 64K pages, and it doesn't necessarily waste space on-disk
24 * since there can be a hole between i_size and the start of the Merkle tree.
25 */
26
27#include <linux/f2fs_fs.h>
28
29#include "f2fs.h"
30#include "xattr.h"
31
32static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
33{
34 return round_up(inode->i_size, 65536);
35}
36
37/*
38 * Read some verity metadata from the inode. __vfs_read() can't be used because
39 * we need to read beyond i_size.
40 */
41static int pagecache_read(struct inode *inode, void *buf, size_t count,
42 loff_t pos)
43{
44 while (count) {
45 size_t n = min_t(size_t, count,
46 PAGE_SIZE - offset_in_page(pos));
47 struct page *page;
48 void *addr;
49
50 page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
51 NULL);
52 if (IS_ERR(page))
53 return PTR_ERR(page);
54
55 addr = kmap_atomic(page);
56 memcpy(buf, addr + offset_in_page(pos), n);
57 kunmap_atomic(addr);
58
59 put_page(page);
60
61 buf += n;
62 pos += n;
63 count -= n;
64 }
65 return 0;
66}
67
68/*
69 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
70 * kernel_write() can't be used because the file descriptor is readonly.
71 */
72static int pagecache_write(struct inode *inode, const void *buf, size_t count,
73 loff_t pos)
74{
75 if (pos + count > inode->i_sb->s_maxbytes)
76 return -EFBIG;
77
78 while (count) {
79 size_t n = min_t(size_t, count,
80 PAGE_SIZE - offset_in_page(pos));
81 struct page *page;
82 void *fsdata;
83 void *addr;
84 int res;
85
86 res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
87 &page, &fsdata);
88 if (res)
89 return res;
90
91 addr = kmap_atomic(page);
92 memcpy(addr + offset_in_page(pos), buf, n);
93 kunmap_atomic(addr);
94
95 res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
96 page, fsdata);
97 if (res < 0)
98 return res;
99 if (res != n)
100 return -EIO;
101
102 buf += n;
103 pos += n;
104 count -= n;
105 }
106 return 0;
107}
108
109/*
110 * Format of f2fs verity xattr. This points to the location of the verity
111 * descriptor within the file data rather than containing it directly because
112 * the verity descriptor *must* be encrypted when f2fs encryption is used. But,
113 * f2fs encryption does not encrypt xattrs.
114 */
115struct fsverity_descriptor_location {
116 __le32 version;
117 __le32 size;
118 __le64 pos;
119};
120
121static int f2fs_begin_enable_verity(struct file *filp)
122{
123 struct inode *inode = file_inode(filp);
124 int err;
125
126 if (f2fs_verity_in_progress(inode))
127 return -EBUSY;
128
129 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
130 return -EOPNOTSUPP;
131
132 /*
133 * Since the file was opened readonly, we have to initialize the quotas
134 * here and not rely on ->open() doing it. This must be done before
135 * evicting the inline data.
136 */
137 err = dquot_initialize(inode);
138 if (err)
139 return err;
140
141 err = f2fs_convert_inline_inode(inode);
142 if (err)
143 return err;
144
145 set_inode_flag(inode, FI_VERITY_IN_PROGRESS);
146 return 0;
147}
148
149static int f2fs_end_enable_verity(struct file *filp, const void *desc,
150 size_t desc_size, u64 merkle_tree_size)
151{
152 struct inode *inode = file_inode(filp);
153 u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
154 struct fsverity_descriptor_location dloc = {
155 .version = cpu_to_le32(1),
156 .size = cpu_to_le32(desc_size),
157 .pos = cpu_to_le64(desc_pos),
158 };
159 int err = 0;
160
161 if (desc != NULL) {
162 /* Succeeded; write the verity descriptor. */
163 err = pagecache_write(inode, desc, desc_size, desc_pos);
164
165 /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
166 if (!err)
167 err = filemap_write_and_wait(inode->i_mapping);
168 }
169
170 /* If we failed, truncate anything we wrote past i_size. */
171 if (desc == NULL || err)
172 f2fs_truncate(inode);
173
174 clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
175
176 if (desc != NULL && !err) {
177 err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
178 F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
179 NULL, XATTR_CREATE);
180 if (!err) {
181 file_set_verity(inode);
182 f2fs_set_inode_flags(inode);
183 f2fs_mark_inode_dirty_sync(inode, true);
184 }
185 }
186 return err;
187}
188
189static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
190 size_t buf_size)
191{
192 struct fsverity_descriptor_location dloc;
193 int res;
194 u32 size;
195 u64 pos;
196
197 /* Get the descriptor location */
198 res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY,
199 F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
200 if (res < 0 && res != -ERANGE)
201 return res;
202 if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
203 f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
204 return -EINVAL;
205 }
206 size = le32_to_cpu(dloc.size);
207 pos = le64_to_cpu(dloc.pos);
208
209 /* Get the descriptor */
210 if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
211 pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
212 f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
213 return -EFSCORRUPTED;
214 }
215 if (buf_size) {
216 if (size > buf_size)
217 return -ERANGE;
218 res = pagecache_read(inode, buf, size, pos);
219 if (res)
220 return res;
221 }
222 return size;
223}
224
225static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
226 pgoff_t index)
227{
228 index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
229
230 return read_mapping_page(inode->i_mapping, index, NULL);
231}
232
233static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
234 u64 index, int log_blocksize)
235{
236 loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize);
237
238 return pagecache_write(inode, buf, 1 << log_blocksize, pos);
239}
240
241const struct fsverity_operations f2fs_verityops = {
242 .begin_enable_verity = f2fs_begin_enable_verity,
243 .end_enable_verity = f2fs_end_enable_verity,
244 .get_verity_descriptor = f2fs_get_verity_descriptor,
245 .read_merkle_tree_page = f2fs_read_merkle_tree_page,
246 .write_merkle_tree_block = f2fs_write_merkle_tree_block,
247};
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index a90920e2f949..de0c600b9cab 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -34,8 +34,10 @@
34#define F2FS_XATTR_INDEX_ADVISE 7 34#define F2FS_XATTR_INDEX_ADVISE 7
35/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ 35/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */
36#define F2FS_XATTR_INDEX_ENCRYPTION 9 36#define F2FS_XATTR_INDEX_ENCRYPTION 9
37#define F2FS_XATTR_INDEX_VERITY 11
37 38
38#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" 39#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
40#define F2FS_XATTR_NAME_VERITY "v"
39 41
40struct f2fs_xattr_header { 42struct f2fs_xattr_header {
41 __le32 h_magic; /* magic number for identification */ 43 __le32 h_magic; /* magic number for identification */
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig
new file mode 100644
index 000000000000..88fb25119899
--- /dev/null
+++ b/fs/verity/Kconfig
@@ -0,0 +1,55 @@
1# SPDX-License-Identifier: GPL-2.0
2
3config FS_VERITY
4 bool "FS Verity (read-only file-based authenticity protection)"
5 select CRYPTO
6 # SHA-256 is selected as it's intended to be the default hash algorithm.
7 # To avoid bloat, other wanted algorithms must be selected explicitly.
8 select CRYPTO_SHA256
9 help
10 This option enables fs-verity. fs-verity is the dm-verity
11 mechanism implemented at the file level. On supported
12 filesystems (currently EXT4 and F2FS), userspace can use an
13 ioctl to enable verity for a file, which causes the filesystem
14 to build a Merkle tree for the file. The filesystem will then
15 transparently verify any data read from the file against the
16 Merkle tree. The file is also made read-only.
17
18 This serves as an integrity check, but the availability of the
19 Merkle tree root hash also allows efficiently supporting
20 various use cases where normally the whole file would need to
21 be hashed at once, such as: (a) auditing (logging the file's
22 hash), or (b) authenticity verification (comparing the hash
23 against a known good value, e.g. from a digital signature).
24
25 fs-verity is especially useful on large files where not all
26 the contents may actually be needed. Also, fs-verity verifies
27 data each time it is paged back in, which provides better
28 protection against malicious disks vs. an ahead-of-time hash.
29
30 If unsure, say N.
31
32config FS_VERITY_DEBUG
33 bool "FS Verity debugging"
34 depends on FS_VERITY
35 help
36 Enable debugging messages related to fs-verity by default.
37
38 Say N unless you are an fs-verity developer.
39
40config FS_VERITY_BUILTIN_SIGNATURES
41 bool "FS Verity builtin signature support"
42 depends on FS_VERITY
43 select SYSTEM_DATA_VERIFICATION
44 help
45 Support verifying signatures of verity files against the X.509
46 certificates that have been loaded into the ".fs-verity"
47 kernel keyring.
48
49 This is meant as a relatively simple mechanism that can be
50 used to provide an authenticity guarantee for verity files, as
51 an alternative to IMA appraisal. Userspace programs still
52 need to check that the verity bit is set in order to get an
53 authenticity guarantee.
54
55 If unsure, say N.
diff --git a/fs/verity/Makefile b/fs/verity/Makefile
new file mode 100644
index 000000000000..570e9136334d
--- /dev/null
+++ b/fs/verity/Makefile
@@ -0,0 +1,10 @@
1# SPDX-License-Identifier: GPL-2.0
2
3obj-$(CONFIG_FS_VERITY) += enable.o \
4 hash_algs.o \
5 init.o \
6 measure.o \
7 open.o \
8 verify.o
9
10obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
new file mode 100644
index 000000000000..eabc6ac19906
--- /dev/null
+++ b/fs/verity/enable.c
@@ -0,0 +1,377 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/enable.c: ioctl to enable verity on a file
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <crypto/hash.h>
11#include <linux/mount.h>
12#include <linux/pagemap.h>
13#include <linux/sched/signal.h>
14#include <linux/uaccess.h>
15
16static int build_merkle_tree_level(struct inode *inode, unsigned int level,
17 u64 num_blocks_to_hash,
18 const struct merkle_tree_params *params,
19 u8 *pending_hashes,
20 struct ahash_request *req)
21{
22 const struct fsverity_operations *vops = inode->i_sb->s_vop;
23 unsigned int pending_size = 0;
24 u64 dst_block_num;
25 u64 i;
26 int err;
27
28 if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
29 return -EINVAL;
30
31 if (level < params->num_levels) {
32 dst_block_num = params->level_start[level];
33 } else {
34 if (WARN_ON(num_blocks_to_hash != 1))
35 return -EINVAL;
36 dst_block_num = 0; /* unused */
37 }
38
39 for (i = 0; i < num_blocks_to_hash; i++) {
40 struct page *src_page;
41
42 if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
43 pr_debug("Hashing block %llu of %llu for level %u\n",
44 i + 1, num_blocks_to_hash, level);
45
46 if (level == 0) {
47 /* Leaf: hashing a data block */
48 src_page = read_mapping_page(inode->i_mapping, i, NULL);
49 if (IS_ERR(src_page)) {
50 err = PTR_ERR(src_page);
51 fsverity_err(inode,
52 "Error %d reading data page %llu",
53 err, i);
54 return err;
55 }
56 } else {
57 /* Non-leaf: hashing hash block from level below */
58 src_page = vops->read_merkle_tree_page(inode,
59 params->level_start[level - 1] + i);
60 if (IS_ERR(src_page)) {
61 err = PTR_ERR(src_page);
62 fsverity_err(inode,
63 "Error %d reading Merkle tree page %llu",
64 err, params->level_start[level - 1] + i);
65 return err;
66 }
67 }
68
69 err = fsverity_hash_page(params, inode, req, src_page,
70 &pending_hashes[pending_size]);
71 put_page(src_page);
72 if (err)
73 return err;
74 pending_size += params->digest_size;
75
76 if (level == params->num_levels) /* Root hash? */
77 return 0;
78
79 if (pending_size + params->digest_size > params->block_size ||
80 i + 1 == num_blocks_to_hash) {
81 /* Flush the pending hash block */
82 memset(&pending_hashes[pending_size], 0,
83 params->block_size - pending_size);
84 err = vops->write_merkle_tree_block(inode,
85 pending_hashes,
86 dst_block_num,
87 params->log_blocksize);
88 if (err) {
89 fsverity_err(inode,
90 "Error %d writing Merkle tree block %llu",
91 err, dst_block_num);
92 return err;
93 }
94 dst_block_num++;
95 pending_size = 0;
96 }
97
98 if (fatal_signal_pending(current))
99 return -EINTR;
100 cond_resched();
101 }
102 return 0;
103}
104
105/*
106 * Build the Merkle tree for the given inode using the given parameters, and
107 * return the root hash in @root_hash.
108 *
109 * The tree is written to a filesystem-specific location as determined by the
110 * ->write_merkle_tree_block() method. However, the blocks that comprise the
111 * tree are the same for all filesystems.
112 */
113static int build_merkle_tree(struct inode *inode,
114 const struct merkle_tree_params *params,
115 u8 *root_hash)
116{
117 u8 *pending_hashes;
118 struct ahash_request *req;
119 u64 blocks;
120 unsigned int level;
121 int err = -ENOMEM;
122
123 if (inode->i_size == 0) {
124 /* Empty file is a special case; root hash is all 0's */
125 memset(root_hash, 0, params->digest_size);
126 return 0;
127 }
128
129 pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
130 req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL);
131 if (!pending_hashes || !req)
132 goto out;
133
134 /*
135 * Build each level of the Merkle tree, starting at the leaf level
136 * (level 0) and ascending to the root node (level 'num_levels - 1').
137 * Then at the end (level 'num_levels'), calculate the root hash.
138 */
139 blocks = (inode->i_size + params->block_size - 1) >>
140 params->log_blocksize;
141 for (level = 0; level <= params->num_levels; level++) {
142 err = build_merkle_tree_level(inode, level, blocks, params,
143 pending_hashes, req);
144 if (err)
145 goto out;
146 blocks = (blocks + params->hashes_per_block - 1) >>
147 params->log_arity;
148 }
149 memcpy(root_hash, pending_hashes, params->digest_size);
150 err = 0;
151out:
152 kfree(pending_hashes);
153 ahash_request_free(req);
154 return err;
155}
156
157static int enable_verity(struct file *filp,
158 const struct fsverity_enable_arg *arg)
159{
160 struct inode *inode = file_inode(filp);
161 const struct fsverity_operations *vops = inode->i_sb->s_vop;
162 struct merkle_tree_params params = { };
163 struct fsverity_descriptor *desc;
164 size_t desc_size = sizeof(*desc) + arg->sig_size;
165 struct fsverity_info *vi;
166 int err;
167
168 /* Start initializing the fsverity_descriptor */
169 desc = kzalloc(desc_size, GFP_KERNEL);
170 if (!desc)
171 return -ENOMEM;
172 desc->version = 1;
173 desc->hash_algorithm = arg->hash_algorithm;
174 desc->log_blocksize = ilog2(arg->block_size);
175
176 /* Get the salt if the user provided one */
177 if (arg->salt_size &&
178 copy_from_user(desc->salt,
179 (const u8 __user *)(uintptr_t)arg->salt_ptr,
180 arg->salt_size)) {
181 err = -EFAULT;
182 goto out;
183 }
184 desc->salt_size = arg->salt_size;
185
186 /* Get the signature if the user provided one */
187 if (arg->sig_size &&
188 copy_from_user(desc->signature,
189 (const u8 __user *)(uintptr_t)arg->sig_ptr,
190 arg->sig_size)) {
191 err = -EFAULT;
192 goto out;
193 }
194 desc->sig_size = cpu_to_le32(arg->sig_size);
195
196 desc->data_size = cpu_to_le64(inode->i_size);
197
198 /* Prepare the Merkle tree parameters */
199 err = fsverity_init_merkle_tree_params(&params, inode,
200 arg->hash_algorithm,
201 desc->log_blocksize,
202 desc->salt, desc->salt_size);
203 if (err)
204 goto out;
205
206 /*
207 * Start enabling verity on this file, serialized by the inode lock.
208 * Fail if verity is already enabled or is already being enabled.
209 */
210 inode_lock(inode);
211 if (IS_VERITY(inode))
212 err = -EEXIST;
213 else
214 err = vops->begin_enable_verity(filp);
215 inode_unlock(inode);
216 if (err)
217 goto out;
218
219 /*
220 * Build the Merkle tree. Don't hold the inode lock during this, since
221 * on huge files this may take a very long time and we don't want to
222 * force unrelated syscalls like chown() to block forever. We don't
223 * need the inode lock here because deny_write_access() already prevents
224 * the file from being written to or truncated, and we still serialize
225 * ->begin_enable_verity() and ->end_enable_verity() using the inode
226 * lock and only allow one process to be here at a time on a given file.
227 */
228 pr_debug("Building Merkle tree...\n");
229 BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
230 err = build_merkle_tree(inode, &params, desc->root_hash);
231 if (err) {
232 fsverity_err(inode, "Error %d building Merkle tree", err);
233 goto rollback;
234 }
235 pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n",
236 params.hash_alg->name, params.digest_size, desc->root_hash);
237
238 /*
239 * Create the fsverity_info. Don't bother trying to save work by
240 * reusing the merkle_tree_params from above. Instead, just create the
241 * fsverity_info from the fsverity_descriptor as if it were just loaded
242 * from disk. This is simpler, and it serves as an extra check that the
243 * metadata we're writing is valid before actually enabling verity.
244 */
245 vi = fsverity_create_info(inode, desc, desc_size);
246 if (IS_ERR(vi)) {
247 err = PTR_ERR(vi);
248 goto rollback;
249 }
250
251 if (arg->sig_size)
252 pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n",
253 arg->sig_size);
254
255 /*
256 * Tell the filesystem to finish enabling verity on the file.
257 * Serialized with ->begin_enable_verity() by the inode lock.
258 */
259 inode_lock(inode);
260 err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
261 inode_unlock(inode);
262 if (err) {
263 fsverity_err(inode, "%ps() failed with err %d",
264 vops->end_enable_verity, err);
265 fsverity_free_info(vi);
266 } else if (WARN_ON(!IS_VERITY(inode))) {
267 err = -EINVAL;
268 fsverity_free_info(vi);
269 } else {
270 /* Successfully enabled verity */
271
272 /*
273 * Readers can start using ->i_verity_info immediately, so it
274 * can't be rolled back once set. So don't set it until just
275 * after the filesystem has successfully enabled verity.
276 */
277 fsverity_set_info(inode, vi);
278 }
279out:
280 kfree(params.hashstate);
281 kfree(desc);
282 return err;
283
284rollback:
285 inode_lock(inode);
286 (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
287 inode_unlock(inode);
288 goto out;
289}
290
291/**
292 * fsverity_ioctl_enable() - enable verity on a file
293 *
294 * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of
295 * Documentation/filesystems/fsverity.rst for the documentation.
296 *
297 * Return: 0 on success, -errno on failure
298 */
299int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
300{
301 struct inode *inode = file_inode(filp);
302 struct fsverity_enable_arg arg;
303 int err;
304
305 if (copy_from_user(&arg, uarg, sizeof(arg)))
306 return -EFAULT;
307
308 if (arg.version != 1)
309 return -EINVAL;
310
311 if (arg.__reserved1 ||
312 memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
313 return -EINVAL;
314
315 if (arg.block_size != PAGE_SIZE)
316 return -EINVAL;
317
318 if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
319 return -EMSGSIZE;
320
321 if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
322 return -EMSGSIZE;
323
324 /*
325 * Require a regular file with write access. But the actual fd must
326 * still be readonly so that we can lock out all writers. This is
327 * needed to guarantee that no writable fds exist to the file once it
328 * has verity enabled, and to stabilize the data being hashed.
329 */
330
331 err = inode_permission(inode, MAY_WRITE);
332 if (err)
333 return err;
334
335 if (IS_APPEND(inode))
336 return -EPERM;
337
338 if (S_ISDIR(inode->i_mode))
339 return -EISDIR;
340
341 if (!S_ISREG(inode->i_mode))
342 return -EINVAL;
343
344 err = mnt_want_write_file(filp);
345 if (err) /* -EROFS */
346 return err;
347
348 err = deny_write_access(filp);
349 if (err) /* -ETXTBSY */
350 goto out_drop_write;
351
352 err = enable_verity(filp, &arg);
353 if (err)
354 goto out_allow_write_access;
355
356 /*
357 * Some pages of the file may have been evicted from pagecache after
358 * being used in the Merkle tree construction, then read into pagecache
359 * again by another process reading from the file concurrently. Since
360 * these pages didn't undergo verification against the file measurement
361 * which fs-verity now claims to be enforcing, we have to wipe the
362 * pagecache to ensure that all future reads are verified.
363 */
364 filemap_write_and_wait(inode->i_mapping);
365 invalidate_inode_pages2(inode->i_mapping);
366
367 /*
368 * allow_write_access() is needed to pair with deny_write_access().
369 * Regardless, the filesystem won't allow writing to verity files.
370 */
371out_allow_write_access:
372 allow_write_access(filp);
373out_drop_write:
374 mnt_drop_write_file(filp);
375 return err;
376}
377EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
new file mode 100644
index 000000000000..e74c79b64d88
--- /dev/null
+++ b/fs/verity/fsverity_private.h
@@ -0,0 +1,185 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * fs-verity: read-only file-based authenticity protection
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#ifndef _FSVERITY_PRIVATE_H
9#define _FSVERITY_PRIVATE_H
10
11#ifdef CONFIG_FS_VERITY_DEBUG
12#define DEBUG
13#endif
14
15#define pr_fmt(fmt) "fs-verity: " fmt
16
17#include <crypto/sha.h>
18#include <linux/fsverity.h>
19
20struct ahash_request;
21
22/*
23 * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty;
24 * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks.
25 */
26#define FS_VERITY_MAX_LEVELS 8
27
28/*
29 * Largest digest size among all hash algorithms supported by fs-verity.
30 * Currently assumed to be <= size of fsverity_descriptor::root_hash.
31 */
32#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
33
34/* A hash algorithm supported by fs-verity */
35struct fsverity_hash_alg {
36 struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
37 const char *name; /* crypto API name, e.g. sha256 */
38 unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */
39 unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */
40};
41
42/* Merkle tree parameters: hash algorithm, initial hash state, and topology */
43struct merkle_tree_params {
44 const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
45 const u8 *hashstate; /* initial hash state or NULL */
46 unsigned int digest_size; /* same as hash_alg->digest_size */
47 unsigned int block_size; /* size of data and tree blocks */
48 unsigned int hashes_per_block; /* number of hashes per tree block */
49 unsigned int log_blocksize; /* log2(block_size) */
50 unsigned int log_arity; /* log2(hashes_per_block) */
51 unsigned int num_levels; /* number of levels in Merkle tree */
52 u64 tree_size; /* Merkle tree size in bytes */
53
54 /*
55 * Starting block index for each tree level, ordered from leaf level (0)
56 * to root level ('num_levels - 1')
57 */
58 u64 level_start[FS_VERITY_MAX_LEVELS];
59};
60
61/**
62 * fsverity_info - cached verity metadata for an inode
63 *
64 * When a verity file is first opened, an instance of this struct is allocated
65 * and stored in ->i_verity_info; it remains until the inode is evicted. It
66 * caches information about the Merkle tree that's needed to efficiently verify
67 * data read from the file. It also caches the file measurement. The Merkle
68 * tree pages themselves are not cached here, but the filesystem may cache them.
69 */
70struct fsverity_info {
71 struct merkle_tree_params tree_params;
72 u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
73 u8 measurement[FS_VERITY_MAX_DIGEST_SIZE];
74 const struct inode *inode;
75};
76
77/*
78 * Merkle tree properties. The file measurement is the hash of this structure
79 * excluding the signature and with the sig_size field set to 0.
80 */
81struct fsverity_descriptor {
82 __u8 version; /* must be 1 */
83 __u8 hash_algorithm; /* Merkle tree hash algorithm */
84 __u8 log_blocksize; /* log2 of size of data and tree blocks */
85 __u8 salt_size; /* size of salt in bytes; 0 if none */
86 __le32 sig_size; /* size of signature in bytes; 0 if none */
87 __le64 data_size; /* size of file the Merkle tree is built over */
88 __u8 root_hash[64]; /* Merkle tree root hash */
89 __u8 salt[32]; /* salt prepended to each hashed block */
90 __u8 __reserved[144]; /* must be 0's */
91 __u8 signature[]; /* optional PKCS#7 signature */
92};
93
94/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
95#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
96
97#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
98 sizeof(struct fsverity_descriptor))
99
100/*
101 * Format in which verity file measurements are signed. This is the same as
102 * 'struct fsverity_digest', except here some magic bytes are prepended to
103 * provide some context about what is being signed in case the same key is used
104 * for non-fsverity purposes, and here the fields have fixed endianness.
105 */
106struct fsverity_signed_digest {
107 char magic[8]; /* must be "FSVerity" */
108 __le16 digest_algorithm;
109 __le16 digest_size;
110 __u8 digest[];
111};
112
113/* hash_algs.c */
114
115extern struct fsverity_hash_alg fsverity_hash_algs[];
116
117const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
118 unsigned int num);
119const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
120 const u8 *salt, size_t salt_size);
121int fsverity_hash_page(const struct merkle_tree_params *params,
122 const struct inode *inode,
123 struct ahash_request *req, struct page *page, u8 *out);
124int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
125 const void *data, size_t size, u8 *out);
126void __init fsverity_check_hash_algs(void);
127
128/* init.c */
129
130extern void __printf(3, 4) __cold
131fsverity_msg(const struct inode *inode, const char *level,
132 const char *fmt, ...);
133
134#define fsverity_warn(inode, fmt, ...) \
135 fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__)
136#define fsverity_err(inode, fmt, ...) \
137 fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
138
139/* open.c */
140
141int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
142 const struct inode *inode,
143 unsigned int hash_algorithm,
144 unsigned int log_blocksize,
145 const u8 *salt, size_t salt_size);
146
147struct fsverity_info *fsverity_create_info(const struct inode *inode,
148 void *desc, size_t desc_size);
149
150void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
151
152void fsverity_free_info(struct fsverity_info *vi);
153
154int __init fsverity_init_info_cache(void);
155void __init fsverity_exit_info_cache(void);
156
157/* signature.c */
158
159#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
160int fsverity_verify_signature(const struct fsverity_info *vi,
161 const struct fsverity_descriptor *desc,
162 size_t desc_size);
163
164int __init fsverity_init_signature(void);
165#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
166static inline int
167fsverity_verify_signature(const struct fsverity_info *vi,
168 const struct fsverity_descriptor *desc,
169 size_t desc_size)
170{
171 return 0;
172}
173
174static inline int fsverity_init_signature(void)
175{
176 return 0;
177}
178#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
179
180/* verify.c */
181
182int __init fsverity_init_workqueue(void);
183void __init fsverity_exit_workqueue(void);
184
185#endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
new file mode 100644
index 000000000000..31e6d7d2389a
--- /dev/null
+++ b/fs/verity/hash_algs.c
@@ -0,0 +1,280 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/hash_algs.c: fs-verity hash algorithms
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <crypto/hash.h>
11#include <linux/scatterlist.h>
12
13/* The hash algorithms supported by fs-verity */
14struct fsverity_hash_alg fsverity_hash_algs[] = {
15 [FS_VERITY_HASH_ALG_SHA256] = {
16 .name = "sha256",
17 .digest_size = SHA256_DIGEST_SIZE,
18 .block_size = SHA256_BLOCK_SIZE,
19 },
20 [FS_VERITY_HASH_ALG_SHA512] = {
21 .name = "sha512",
22 .digest_size = SHA512_DIGEST_SIZE,
23 .block_size = SHA512_BLOCK_SIZE,
24 },
25};
26
27/**
28 * fsverity_get_hash_alg() - validate and prepare a hash algorithm
29 * @inode: optional inode for logging purposes
30 * @num: the hash algorithm number
31 *
32 * Get the struct fsverity_hash_alg for the given hash algorithm number, and
33 * ensure it has a hash transform ready to go. The hash transforms are
34 * allocated on-demand so that we don't waste resources unnecessarily, and
35 * because the crypto modules may be initialized later than fs/verity/.
36 *
37 * Return: pointer to the hash alg on success, else an ERR_PTR()
38 */
39const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
40 unsigned int num)
41{
42 struct fsverity_hash_alg *alg;
43 struct crypto_ahash *tfm;
44 int err;
45
46 if (num >= ARRAY_SIZE(fsverity_hash_algs) ||
47 !fsverity_hash_algs[num].name) {
48 fsverity_warn(inode, "Unknown hash algorithm number: %u", num);
49 return ERR_PTR(-EINVAL);
50 }
51 alg = &fsverity_hash_algs[num];
52
53 /* pairs with cmpxchg() below */
54 tfm = READ_ONCE(alg->tfm);
55 if (likely(tfm != NULL))
56 return alg;
57 /*
58 * Using the shash API would make things a bit simpler, but the ahash
59 * API is preferable as it allows the use of crypto accelerators.
60 */
61 tfm = crypto_alloc_ahash(alg->name, 0, 0);
62 if (IS_ERR(tfm)) {
63 if (PTR_ERR(tfm) == -ENOENT) {
64 fsverity_warn(inode,
65 "Missing crypto API support for hash algorithm \"%s\"",
66 alg->name);
67 return ERR_PTR(-ENOPKG);
68 }
69 fsverity_err(inode,
70 "Error allocating hash algorithm \"%s\": %ld",
71 alg->name, PTR_ERR(tfm));
72 return ERR_CAST(tfm);
73 }
74
75 err = -EINVAL;
76 if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm)))
77 goto err_free_tfm;
78 if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm)))
79 goto err_free_tfm;
80
81 pr_info("%s using implementation \"%s\"\n",
82 alg->name, crypto_ahash_driver_name(tfm));
83
84 /* pairs with READ_ONCE() above */
85 if (cmpxchg(&alg->tfm, NULL, tfm) != NULL)
86 crypto_free_ahash(tfm);
87
88 return alg;
89
90err_free_tfm:
91 crypto_free_ahash(tfm);
92 return ERR_PTR(err);
93}
94
95/**
96 * fsverity_prepare_hash_state() - precompute the initial hash state
97 * @alg: hash algorithm
98 * @salt: a salt which is to be prepended to all data to be hashed
99 * @salt_size: salt size in bytes, possibly 0
100 *
101 * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed
102 * initial hash state on success or an ERR_PTR() on failure.
103 */
104const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
105 const u8 *salt, size_t salt_size)
106{
107 u8 *hashstate = NULL;
108 struct ahash_request *req = NULL;
109 u8 *padded_salt = NULL;
110 size_t padded_salt_size;
111 struct scatterlist sg;
112 DECLARE_CRYPTO_WAIT(wait);
113 int err;
114
115 if (salt_size == 0)
116 return NULL;
117
118 hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL);
119 if (!hashstate)
120 return ERR_PTR(-ENOMEM);
121
122 req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
123 if (!req) {
124 err = -ENOMEM;
125 goto err_free;
126 }
127
128 /*
129 * Zero-pad the salt to the next multiple of the input size of the hash
130 * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128
131 * bytes for SHA-512. This ensures that the hash algorithm won't have
132 * any bytes buffered internally after processing the salt, thus making
133 * salted hashing just as fast as unsalted hashing.
134 */
135 padded_salt_size = round_up(salt_size, alg->block_size);
136 padded_salt = kzalloc(padded_salt_size, GFP_KERNEL);
137 if (!padded_salt) {
138 err = -ENOMEM;
139 goto err_free;
140 }
141 memcpy(padded_salt, salt, salt_size);
142
143 sg_init_one(&sg, padded_salt, padded_salt_size);
144 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
145 CRYPTO_TFM_REQ_MAY_BACKLOG,
146 crypto_req_done, &wait);
147 ahash_request_set_crypt(req, &sg, NULL, padded_salt_size);
148
149 err = crypto_wait_req(crypto_ahash_init(req), &wait);
150 if (err)
151 goto err_free;
152
153 err = crypto_wait_req(crypto_ahash_update(req), &wait);
154 if (err)
155 goto err_free;
156
157 err = crypto_ahash_export(req, hashstate);
158 if (err)
159 goto err_free;
160out:
161 ahash_request_free(req);
162 kfree(padded_salt);
163 return hashstate;
164
165err_free:
166 kfree(hashstate);
167 hashstate = ERR_PTR(err);
168 goto out;
169}
170
171/**
172 * fsverity_hash_page() - hash a single data or hash page
173 * @params: the Merkle tree's parameters
174 * @inode: inode for which the hashing is being done
175 * @req: preallocated hash request
176 * @page: the page to hash
177 * @out: output digest, size 'params->digest_size' bytes
178 *
179 * Hash a single data or hash block, assuming block_size == PAGE_SIZE.
180 * The hash is salted if a salt is specified in the Merkle tree parameters.
181 *
182 * Return: 0 on success, -errno on failure
183 */
184int fsverity_hash_page(const struct merkle_tree_params *params,
185 const struct inode *inode,
186 struct ahash_request *req, struct page *page, u8 *out)
187{
188 struct scatterlist sg;
189 DECLARE_CRYPTO_WAIT(wait);
190 int err;
191
192 if (WARN_ON(params->block_size != PAGE_SIZE))
193 return -EINVAL;
194
195 sg_init_table(&sg, 1);
196 sg_set_page(&sg, page, PAGE_SIZE, 0);
197 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
198 CRYPTO_TFM_REQ_MAY_BACKLOG,
199 crypto_req_done, &wait);
200 ahash_request_set_crypt(req, &sg, out, PAGE_SIZE);
201
202 if (params->hashstate) {
203 err = crypto_ahash_import(req, params->hashstate);
204 if (err) {
205 fsverity_err(inode,
206 "Error %d importing hash state", err);
207 return err;
208 }
209 err = crypto_ahash_finup(req);
210 } else {
211 err = crypto_ahash_digest(req);
212 }
213
214 err = crypto_wait_req(err, &wait);
215 if (err)
216 fsverity_err(inode, "Error %d computing page hash", err);
217 return err;
218}
219
220/**
221 * fsverity_hash_buffer() - hash some data
222 * @alg: the hash algorithm to use
223 * @data: the data to hash
224 * @size: size of data to hash, in bytes
225 * @out: output digest, size 'alg->digest_size' bytes
226 *
227 * Hash some data which is located in physically contiguous memory (i.e. memory
228 * allocated by kmalloc(), not by vmalloc()). No salt is used.
229 *
230 * Return: 0 on success, -errno on failure
231 */
232int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
233 const void *data, size_t size, u8 *out)
234{
235 struct ahash_request *req;
236 struct scatterlist sg;
237 DECLARE_CRYPTO_WAIT(wait);
238 int err;
239
240 req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
241 if (!req)
242 return -ENOMEM;
243
244 sg_init_one(&sg, data, size);
245 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
246 CRYPTO_TFM_REQ_MAY_BACKLOG,
247 crypto_req_done, &wait);
248 ahash_request_set_crypt(req, &sg, out, size);
249
250 err = crypto_wait_req(crypto_ahash_digest(req), &wait);
251
252 ahash_request_free(req);
253 return err;
254}
255
256void __init fsverity_check_hash_algs(void)
257{
258 size_t i;
259
260 /*
261 * Sanity check the hash algorithms (could be a build-time check, but
262 * they're in an array)
263 */
264 for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) {
265 const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i];
266
267 if (!alg->name)
268 continue;
269
270 BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
271
272 /*
273 * For efficiency, the implementation currently assumes the
274 * digest and block sizes are powers of 2. This limitation can
275 * be lifted if the code is updated to handle other values.
276 */
277 BUG_ON(!is_power_of_2(alg->digest_size));
278 BUG_ON(!is_power_of_2(alg->block_size));
279 }
280}
diff --git a/fs/verity/init.c b/fs/verity/init.c
new file mode 100644
index 000000000000..94c104e00861
--- /dev/null
+++ b/fs/verity/init.c
@@ -0,0 +1,61 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/init.c: fs-verity module initialization and logging
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <linux/ratelimit.h>
11
12void fsverity_msg(const struct inode *inode, const char *level,
13 const char *fmt, ...)
14{
15 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
16 DEFAULT_RATELIMIT_BURST);
17 struct va_format vaf;
18 va_list args;
19
20 if (!__ratelimit(&rs))
21 return;
22
23 va_start(args, fmt);
24 vaf.fmt = fmt;
25 vaf.va = &args;
26 if (inode)
27 printk("%sfs-verity (%s, inode %lu): %pV\n",
28 level, inode->i_sb->s_id, inode->i_ino, &vaf);
29 else
30 printk("%sfs-verity: %pV\n", level, &vaf);
31 va_end(args);
32}
33
34static int __init fsverity_init(void)
35{
36 int err;
37
38 fsverity_check_hash_algs();
39
40 err = fsverity_init_info_cache();
41 if (err)
42 return err;
43
44 err = fsverity_init_workqueue();
45 if (err)
46 goto err_exit_info_cache;
47
48 err = fsverity_init_signature();
49 if (err)
50 goto err_exit_workqueue;
51
52 pr_debug("Initialized fs-verity\n");
53 return 0;
54
55err_exit_workqueue:
56 fsverity_exit_workqueue();
57err_exit_info_cache:
58 fsverity_exit_info_cache();
59 return err;
60}
61late_initcall(fsverity_init)
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
new file mode 100644
index 000000000000..05049b68c745
--- /dev/null
+++ b/fs/verity/measure.c
@@ -0,0 +1,57 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/measure.c: ioctl to get a verity file's measurement
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <linux/uaccess.h>
11
12/**
13 * fsverity_ioctl_measure() - get a verity file's measurement
14 *
15 * Retrieve the file measurement that the kernel is enforcing for reads from a
16 * verity file. See the "FS_IOC_MEASURE_VERITY" section of
17 * Documentation/filesystems/fsverity.rst for the documentation.
18 *
19 * Return: 0 on success, -errno on failure
20 */
21int fsverity_ioctl_measure(struct file *filp, void __user *_uarg)
22{
23 const struct inode *inode = file_inode(filp);
24 struct fsverity_digest __user *uarg = _uarg;
25 const struct fsverity_info *vi;
26 const struct fsverity_hash_alg *hash_alg;
27 struct fsverity_digest arg;
28
29 vi = fsverity_get_info(inode);
30 if (!vi)
31 return -ENODATA; /* not a verity file */
32 hash_alg = vi->tree_params.hash_alg;
33
34 /*
35 * The user specifies the digest_size their buffer has space for; we can
36 * return the digest if it fits in the available space. We write back
37 * the actual size, which may be shorter than the user-specified size.
38 */
39
40 if (get_user(arg.digest_size, &uarg->digest_size))
41 return -EFAULT;
42 if (arg.digest_size < hash_alg->digest_size)
43 return -EOVERFLOW;
44
45 memset(&arg, 0, sizeof(arg));
46 arg.digest_algorithm = hash_alg - fsverity_hash_algs;
47 arg.digest_size = hash_alg->digest_size;
48
49 if (copy_to_user(uarg, &arg, sizeof(arg)))
50 return -EFAULT;
51
52 if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size))
53 return -EFAULT;
54
55 return 0;
56}
57EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
diff --git a/fs/verity/open.c b/fs/verity/open.c
new file mode 100644
index 000000000000..63d1004b688c
--- /dev/null
+++ b/fs/verity/open.c
@@ -0,0 +1,356 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/open.c: opening fs-verity files
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <linux/slab.h>
11
12static struct kmem_cache *fsverity_info_cachep;
13
14/**
15 * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
16 * @params: the parameters struct to initialize
17 * @inode: the inode for which the Merkle tree is being built
18 * @hash_algorithm: number of hash algorithm to use
19 * @log_blocksize: log base 2 of block size to use
20 * @salt: pointer to salt (optional)
21 * @salt_size: size of salt, possibly 0
22 *
23 * Validate the hash algorithm and block size, then compute the tree topology
24 * (num levels, num blocks in each level, etc.) and initialize @params.
25 *
26 * Return: 0 on success, -errno on failure
27 */
28int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
29 const struct inode *inode,
30 unsigned int hash_algorithm,
31 unsigned int log_blocksize,
32 const u8 *salt, size_t salt_size)
33{
34 const struct fsverity_hash_alg *hash_alg;
35 int err;
36 u64 blocks;
37 u64 offset;
38 int level;
39
40 memset(params, 0, sizeof(*params));
41
42 hash_alg = fsverity_get_hash_alg(inode, hash_algorithm);
43 if (IS_ERR(hash_alg))
44 return PTR_ERR(hash_alg);
45 params->hash_alg = hash_alg;
46 params->digest_size = hash_alg->digest_size;
47
48 params->hashstate = fsverity_prepare_hash_state(hash_alg, salt,
49 salt_size);
50 if (IS_ERR(params->hashstate)) {
51 err = PTR_ERR(params->hashstate);
52 params->hashstate = NULL;
53 fsverity_err(inode, "Error %d preparing hash state", err);
54 goto out_err;
55 }
56
57 if (log_blocksize != PAGE_SHIFT) {
58 fsverity_warn(inode, "Unsupported log_blocksize: %u",
59 log_blocksize);
60 err = -EINVAL;
61 goto out_err;
62 }
63 params->log_blocksize = log_blocksize;
64 params->block_size = 1 << log_blocksize;
65
66 if (WARN_ON(!is_power_of_2(params->digest_size))) {
67 err = -EINVAL;
68 goto out_err;
69 }
70 if (params->block_size < 2 * params->digest_size) {
71 fsverity_warn(inode,
72 "Merkle tree block size (%u) too small for hash algorithm \"%s\"",
73 params->block_size, hash_alg->name);
74 err = -EINVAL;
75 goto out_err;
76 }
77 params->log_arity = params->log_blocksize - ilog2(params->digest_size);
78 params->hashes_per_block = 1 << params->log_arity;
79
80 pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
81 hash_alg->name, params->block_size, params->hashes_per_block,
82 (int)salt_size, salt);
83
84 /*
85 * Compute the number of levels in the Merkle tree and create a map from
86 * level to the starting block of that level. Level 'num_levels - 1' is
87 * the root and is stored first. Level 0 is the level directly "above"
88 * the data blocks and is stored last.
89 */
90
91 /* Compute number of levels and the number of blocks in each level */
92 blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
93 pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
94 while (blocks > 1) {
95 if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
96 fsverity_err(inode, "Too many levels in Merkle tree");
97 err = -EINVAL;
98 goto out_err;
99 }
100 blocks = (blocks + params->hashes_per_block - 1) >>
101 params->log_arity;
102 /* temporarily using level_start[] to store blocks in level */
103 params->level_start[params->num_levels++] = blocks;
104 }
105
106 /* Compute the starting block of each level */
107 offset = 0;
108 for (level = (int)params->num_levels - 1; level >= 0; level--) {
109 blocks = params->level_start[level];
110 params->level_start[level] = offset;
111 pr_debug("Level %d is %llu blocks starting at index %llu\n",
112 level, blocks, offset);
113 offset += blocks;
114 }
115
116 params->tree_size = offset << log_blocksize;
117 return 0;
118
119out_err:
120 kfree(params->hashstate);
121 memset(params, 0, sizeof(*params));
122 return err;
123}
124
125/*
126 * Compute the file measurement by hashing the fsverity_descriptor excluding the
127 * signature and with the sig_size field set to 0.
128 */
129static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg,
130 struct fsverity_descriptor *desc,
131 u8 *measurement)
132{
133 __le32 sig_size = desc->sig_size;
134 int err;
135
136 desc->sig_size = 0;
137 err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement);
138 desc->sig_size = sig_size;
139
140 return err;
141}
142
143/*
144 * Validate the given fsverity_descriptor and create a new fsverity_info from
145 * it. The signature (if present) is also checked.
146 */
147struct fsverity_info *fsverity_create_info(const struct inode *inode,
148 void *_desc, size_t desc_size)
149{
150 struct fsverity_descriptor *desc = _desc;
151 struct fsverity_info *vi;
152 int err;
153
154 if (desc_size < sizeof(*desc)) {
155 fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
156 desc_size);
157 return ERR_PTR(-EINVAL);
158 }
159
160 if (desc->version != 1) {
161 fsverity_err(inode, "Unrecognized descriptor version: %u",
162 desc->version);
163 return ERR_PTR(-EINVAL);
164 }
165
166 if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
167 fsverity_err(inode, "Reserved bits set in descriptor");
168 return ERR_PTR(-EINVAL);
169 }
170
171 if (desc->salt_size > sizeof(desc->salt)) {
172 fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
173 return ERR_PTR(-EINVAL);
174 }
175
176 if (le64_to_cpu(desc->data_size) != inode->i_size) {
177 fsverity_err(inode,
178 "Wrong data_size: %llu (desc) != %lld (inode)",
179 le64_to_cpu(desc->data_size), inode->i_size);
180 return ERR_PTR(-EINVAL);
181 }
182
183 vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
184 if (!vi)
185 return ERR_PTR(-ENOMEM);
186 vi->inode = inode;
187
188 err = fsverity_init_merkle_tree_params(&vi->tree_params, inode,
189 desc->hash_algorithm,
190 desc->log_blocksize,
191 desc->salt, desc->salt_size);
192 if (err) {
193 fsverity_err(inode,
194 "Error %d initializing Merkle tree parameters",
195 err);
196 goto out;
197 }
198
199 memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
200
201 err = compute_file_measurement(vi->tree_params.hash_alg, desc,
202 vi->measurement);
203 if (err) {
204 fsverity_err(inode, "Error %d computing file measurement", err);
205 goto out;
206 }
207 pr_debug("Computed file measurement: %s:%*phN\n",
208 vi->tree_params.hash_alg->name,
209 vi->tree_params.digest_size, vi->measurement);
210
211 err = fsverity_verify_signature(vi, desc, desc_size);
212out:
213 if (err) {
214 fsverity_free_info(vi);
215 vi = ERR_PTR(err);
216 }
217 return vi;
218}
219
220void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
221{
222 /*
223 * Multiple processes may race to set ->i_verity_info, so use cmpxchg.
224 * This pairs with the READ_ONCE() in fsverity_get_info().
225 */
226 if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
227 fsverity_free_info(vi);
228}
229
230void fsverity_free_info(struct fsverity_info *vi)
231{
232 if (!vi)
233 return;
234 kfree(vi->tree_params.hashstate);
235 kmem_cache_free(fsverity_info_cachep, vi);
236}
237
238/* Ensure the inode has an ->i_verity_info */
239static int ensure_verity_info(struct inode *inode)
240{
241 struct fsverity_info *vi = fsverity_get_info(inode);
242 struct fsverity_descriptor *desc;
243 int res;
244
245 if (vi)
246 return 0;
247
248 res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
249 if (res < 0) {
250 fsverity_err(inode,
251 "Error %d getting verity descriptor size", res);
252 return res;
253 }
254 if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
255 fsverity_err(inode, "Verity descriptor is too large (%d bytes)",
256 res);
257 return -EMSGSIZE;
258 }
259 desc = kmalloc(res, GFP_KERNEL);
260 if (!desc)
261 return -ENOMEM;
262 res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
263 if (res < 0) {
264 fsverity_err(inode, "Error %d reading verity descriptor", res);
265 goto out_free_desc;
266 }
267
268 vi = fsverity_create_info(inode, desc, res);
269 if (IS_ERR(vi)) {
270 res = PTR_ERR(vi);
271 goto out_free_desc;
272 }
273
274 fsverity_set_info(inode, vi);
275 res = 0;
276out_free_desc:
277 kfree(desc);
278 return res;
279}
280
281/**
282 * fsverity_file_open() - prepare to open a verity file
283 * @inode: the inode being opened
284 * @filp: the struct file being set up
285 *
286 * When opening a verity file, deny the open if it is for writing. Otherwise,
287 * set up the inode's ->i_verity_info if not already done.
288 *
289 * When combined with fscrypt, this must be called after fscrypt_file_open().
290 * Otherwise, we won't have the key set up to decrypt the verity metadata.
291 *
292 * Return: 0 on success, -errno on failure
293 */
294int fsverity_file_open(struct inode *inode, struct file *filp)
295{
296 if (!IS_VERITY(inode))
297 return 0;
298
299 if (filp->f_mode & FMODE_WRITE) {
300 pr_debug("Denying opening verity file (ino %lu) for write\n",
301 inode->i_ino);
302 return -EPERM;
303 }
304
305 return ensure_verity_info(inode);
306}
307EXPORT_SYMBOL_GPL(fsverity_file_open);
308
309/**
310 * fsverity_prepare_setattr() - prepare to change a verity inode's attributes
311 * @dentry: dentry through which the inode is being changed
312 * @attr: attributes to change
313 *
314 * Verity files are immutable, so deny truncates. This isn't covered by the
315 * open-time check because sys_truncate() takes a path, not a file descriptor.
316 *
317 * Return: 0 on success, -errno on failure
318 */
319int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
320{
321 if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
322 pr_debug("Denying truncate of verity file (ino %lu)\n",
323 d_inode(dentry)->i_ino);
324 return -EPERM;
325 }
326 return 0;
327}
328EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
329
330/**
331 * fsverity_cleanup_inode() - free the inode's verity info, if present
332 *
333 * Filesystems must call this on inode eviction to free ->i_verity_info.
334 */
335void fsverity_cleanup_inode(struct inode *inode)
336{
337 fsverity_free_info(inode->i_verity_info);
338 inode->i_verity_info = NULL;
339}
340EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
341
342int __init fsverity_init_info_cache(void)
343{
344 fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
345 SLAB_RECLAIM_ACCOUNT,
346 measurement);
347 if (!fsverity_info_cachep)
348 return -ENOMEM;
349 return 0;
350}
351
352void __init fsverity_exit_info_cache(void)
353{
354 kmem_cache_destroy(fsverity_info_cachep);
355 fsverity_info_cachep = NULL;
356}
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
new file mode 100644
index 000000000000..c8b255232de5
--- /dev/null
+++ b/fs/verity/signature.c
@@ -0,0 +1,157 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/signature.c: verification of builtin signatures
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <linux/cred.h>
11#include <linux/key.h>
12#include <linux/slab.h>
13#include <linux/verification.h>
14
15/*
16 * /proc/sys/fs/verity/require_signatures
17 * If 1, all verity files must have a valid builtin signature.
18 */
19static int fsverity_require_signatures;
20
21/*
22 * Keyring that contains the trusted X.509 certificates.
23 *
24 * Only root (kuid=0) can modify this. Also, root may use
25 * keyctl_restrict_keyring() to prevent any more additions.
26 */
27static struct key *fsverity_keyring;
28
29/**
30 * fsverity_verify_signature() - check a verity file's signature
31 *
32 * If the file's fs-verity descriptor includes a signature of the file
33 * measurement, verify it against the certificates in the fs-verity keyring.
34 *
35 * Return: 0 on success (signature valid or not required); -errno on failure
36 */
37int fsverity_verify_signature(const struct fsverity_info *vi,
38 const struct fsverity_descriptor *desc,
39 size_t desc_size)
40{
41 const struct inode *inode = vi->inode;
42 const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg;
43 const u32 sig_size = le32_to_cpu(desc->sig_size);
44 struct fsverity_signed_digest *d;
45 int err;
46
47 if (sig_size == 0) {
48 if (fsverity_require_signatures) {
49 fsverity_err(inode,
50 "require_signatures=1, rejecting unsigned file!");
51 return -EPERM;
52 }
53 return 0;
54 }
55
56 if (sig_size > desc_size - sizeof(*desc)) {
57 fsverity_err(inode, "Signature overflows verity descriptor");
58 return -EBADMSG;
59 }
60
61 d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
62 if (!d)
63 return -ENOMEM;
64 memcpy(d->magic, "FSVerity", 8);
65 d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs);
66 d->digest_size = cpu_to_le16(hash_alg->digest_size);
67 memcpy(d->digest, vi->measurement, hash_alg->digest_size);
68
69 err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size,
70 desc->signature, sig_size,
71 fsverity_keyring,
72 VERIFYING_UNSPECIFIED_SIGNATURE,
73 NULL, NULL);
74 kfree(d);
75
76 if (err) {
77 if (err == -ENOKEY)
78 fsverity_err(inode,
79 "File's signing cert isn't in the fs-verity keyring");
80 else if (err == -EKEYREJECTED)
81 fsverity_err(inode, "Incorrect file signature");
82 else if (err == -EBADMSG)
83 fsverity_err(inode, "Malformed file signature");
84 else
85 fsverity_err(inode, "Error %d verifying file signature",
86 err);
87 return err;
88 }
89
90 pr_debug("Valid signature for file measurement %s:%*phN\n",
91 hash_alg->name, hash_alg->digest_size, vi->measurement);
92 return 0;
93}
94
95#ifdef CONFIG_SYSCTL
96static struct ctl_table_header *fsverity_sysctl_header;
97
98static const struct ctl_path fsverity_sysctl_path[] = {
99 { .procname = "fs", },
100 { .procname = "verity", },
101 { }
102};
103
104static struct ctl_table fsverity_sysctl_table[] = {
105 {
106 .procname = "require_signatures",
107 .data = &fsverity_require_signatures,
108 .maxlen = sizeof(int),
109 .mode = 0644,
110 .proc_handler = proc_dointvec_minmax,
111 .extra1 = SYSCTL_ZERO,
112 .extra2 = SYSCTL_ONE,
113 },
114 { }
115};
116
117static int __init fsverity_sysctl_init(void)
118{
119 fsverity_sysctl_header = register_sysctl_paths(fsverity_sysctl_path,
120 fsverity_sysctl_table);
121 if (!fsverity_sysctl_header) {
122 pr_err("sysctl registration failed!\n");
123 return -ENOMEM;
124 }
125 return 0;
126}
127#else /* !CONFIG_SYSCTL */
128static inline int __init fsverity_sysctl_init(void)
129{
130 return 0;
131}
132#endif /* !CONFIG_SYSCTL */
133
134int __init fsverity_init_signature(void)
135{
136 struct key *ring;
137 int err;
138
139 ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
140 current_cred(), KEY_POS_SEARCH |
141 KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
142 KEY_USR_SEARCH | KEY_USR_SETATTR,
143 KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
144 if (IS_ERR(ring))
145 return PTR_ERR(ring);
146
147 err = fsverity_sysctl_init();
148 if (err)
149 goto err_put_ring;
150
151 fsverity_keyring = ring;
152 return 0;
153
154err_put_ring:
155 key_put(ring);
156 return err;
157}
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
new file mode 100644
index 000000000000..3e8f2de44667
--- /dev/null
+++ b/fs/verity/verify.c
@@ -0,0 +1,281 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages()
4 *
5 * Copyright 2019 Google LLC
6 */
7
8#include "fsverity_private.h"
9
10#include <crypto/hash.h>
11#include <linux/bio.h>
12#include <linux/ratelimit.h>
13
14static struct workqueue_struct *fsverity_read_workqueue;
15
16/**
17 * hash_at_level() - compute the location of the block's hash at the given level
18 *
19 * @params: (in) the Merkle tree parameters
20 * @dindex: (in) the index of the data block being verified
21 * @level: (in) the level of hash we want (0 is leaf level)
22 * @hindex: (out) the index of the hash block containing the wanted hash
23 * @hoffset: (out) the byte offset to the wanted hash within the hash block
24 */
25static void hash_at_level(const struct merkle_tree_params *params,
26 pgoff_t dindex, unsigned int level, pgoff_t *hindex,
27 unsigned int *hoffset)
28{
29 pgoff_t position;
30
31 /* Offset of the hash within the level's region, in hashes */
32 position = dindex >> (level * params->log_arity);
33
34 /* Index of the hash block in the tree overall */
35 *hindex = params->level_start[level] + (position >> params->log_arity);
36
37 /* Offset of the wanted hash (in bytes) within the hash block */
38 *hoffset = (position & ((1 << params->log_arity) - 1)) <<
39 (params->log_blocksize - params->log_arity);
40}
41
42/* Extract a hash from a hash page */
43static void extract_hash(struct page *hpage, unsigned int hoffset,
44 unsigned int hsize, u8 *out)
45{
46 void *virt = kmap_atomic(hpage);
47
48 memcpy(out, virt + hoffset, hsize);
49 kunmap_atomic(virt);
50}
51
52static inline int cmp_hashes(const struct fsverity_info *vi,
53 const u8 *want_hash, const u8 *real_hash,
54 pgoff_t index, int level)
55{
56 const unsigned int hsize = vi->tree_params.digest_size;
57
58 if (memcmp(want_hash, real_hash, hsize) == 0)
59 return 0;
60
61 fsverity_err(vi->inode,
62 "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
63 index, level,
64 vi->tree_params.hash_alg->name, hsize, want_hash,
65 vi->tree_params.hash_alg->name, hsize, real_hash);
66 return -EBADMSG;
67}
68
69/*
70 * Verify a single data page against the file's Merkle tree.
71 *
72 * In principle, we need to verify the entire path to the root node. However,
73 * for efficiency the filesystem may cache the hash pages. Therefore we need
74 * only ascend the tree until an already-verified page is seen, as indicated by
75 * the PageChecked bit being set; then verify the path to that page.
76 *
77 * This code currently only supports the case where the verity block size is
78 * equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we
79 * wouldn't be able to use the PageChecked bit.
80 *
81 * Note that multiple processes may race to verify a hash page and mark it
82 * Checked, but it doesn't matter; the result will be the same either way.
83 *
84 * Return: true if the page is valid, else false.
85 */
86static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
87 struct ahash_request *req, struct page *data_page)
88{
89 const struct merkle_tree_params *params = &vi->tree_params;
90 const unsigned int hsize = params->digest_size;
91 const pgoff_t index = data_page->index;
92 int level;
93 u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
94 const u8 *want_hash;
95 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
96 struct page *hpages[FS_VERITY_MAX_LEVELS];
97 unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
98 int err;
99
100 if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
101 return false;
102
103 pr_debug_ratelimited("Verifying data page %lu...\n", index);
104
105 /*
106 * Starting at the leaf level, ascend the tree saving hash pages along
107 * the way until we find a verified hash page, indicated by PageChecked;
108 * or until we reach the root.
109 */
110 for (level = 0; level < params->num_levels; level++) {
111 pgoff_t hindex;
112 unsigned int hoffset;
113 struct page *hpage;
114
115 hash_at_level(params, index, level, &hindex, &hoffset);
116
117 pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
118 level, hindex, hoffset);
119
120 hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
121 hindex);
122 if (IS_ERR(hpage)) {
123 err = PTR_ERR(hpage);
124 fsverity_err(inode,
125 "Error %d reading Merkle tree page %lu",
126 err, hindex);
127 goto out;
128 }
129
130 if (PageChecked(hpage)) {
131 extract_hash(hpage, hoffset, hsize, _want_hash);
132 want_hash = _want_hash;
133 put_page(hpage);
134 pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
135 params->hash_alg->name,
136 hsize, want_hash);
137 goto descend;
138 }
139 pr_debug_ratelimited("Hash page not yet checked\n");
140 hpages[level] = hpage;
141 hoffsets[level] = hoffset;
142 }
143
144 want_hash = vi->root_hash;
145 pr_debug("Want root hash: %s:%*phN\n",
146 params->hash_alg->name, hsize, want_hash);
147descend:
148 /* Descend the tree verifying hash pages */
149 for (; level > 0; level--) {
150 struct page *hpage = hpages[level - 1];
151 unsigned int hoffset = hoffsets[level - 1];
152
153 err = fsverity_hash_page(params, inode, req, hpage, real_hash);
154 if (err)
155 goto out;
156 err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
157 if (err)
158 goto out;
159 SetPageChecked(hpage);
160 extract_hash(hpage, hoffset, hsize, _want_hash);
161 want_hash = _want_hash;
162 put_page(hpage);
163 pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
164 level - 1, params->hash_alg->name, hsize, want_hash);
165 }
166
167 /* Finally, verify the data page */
168 err = fsverity_hash_page(params, inode, req, data_page, real_hash);
169 if (err)
170 goto out;
171 err = cmp_hashes(vi, want_hash, real_hash, index, -1);
172out:
173 for (; level > 0; level--)
174 put_page(hpages[level - 1]);
175
176 return err == 0;
177}
178
179/**
180 * fsverity_verify_page() - verify a data page
181 *
182 * Verify a page that has just been read from a verity file. The page must be a
183 * pagecache page that is still locked and not yet uptodate.
184 *
185 * Return: true if the page is valid, else false.
186 */
187bool fsverity_verify_page(struct page *page)
188{
189 struct inode *inode = page->mapping->host;
190 const struct fsverity_info *vi = inode->i_verity_info;
191 struct ahash_request *req;
192 bool valid;
193
194 req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
195 if (unlikely(!req))
196 return false;
197
198 valid = verify_page(inode, vi, req, page);
199
200 ahash_request_free(req);
201
202 return valid;
203}
204EXPORT_SYMBOL_GPL(fsverity_verify_page);
205
206#ifdef CONFIG_BLOCK
207/**
208 * fsverity_verify_bio() - verify a 'read' bio that has just completed
209 *
210 * Verify a set of pages that have just been read from a verity file. The pages
211 * must be pagecache pages that are still locked and not yet uptodate. Pages
212 * that fail verification are set to the Error state. Verification is skipped
213 * for pages already in the Error state, e.g. due to fscrypt decryption failure.
214 *
215 * This is a helper function for use by the ->readpages() method of filesystems
216 * that issue bios to read data directly into the page cache. Filesystems that
217 * populate the page cache without issuing bios (e.g. non block-based
218 * filesystems) must instead call fsverity_verify_page() directly on each page.
219 * All filesystems must also call fsverity_verify_page() on holes.
220 */
221void fsverity_verify_bio(struct bio *bio)
222{
223 struct inode *inode = bio_first_page_all(bio)->mapping->host;
224 const struct fsverity_info *vi = inode->i_verity_info;
225 struct ahash_request *req;
226 struct bio_vec *bv;
227 struct bvec_iter_all iter_all;
228
229 req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
230 if (unlikely(!req)) {
231 bio_for_each_segment_all(bv, bio, iter_all)
232 SetPageError(bv->bv_page);
233 return;
234 }
235
236 bio_for_each_segment_all(bv, bio, iter_all) {
237 struct page *page = bv->bv_page;
238
239 if (!PageError(page) && !verify_page(inode, vi, req, page))
240 SetPageError(page);
241 }
242
243 ahash_request_free(req);
244}
245EXPORT_SYMBOL_GPL(fsverity_verify_bio);
246#endif /* CONFIG_BLOCK */
247
248/**
249 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
250 *
251 * Enqueue verification work for asynchronous processing.
252 */
253void fsverity_enqueue_verify_work(struct work_struct *work)
254{
255 queue_work(fsverity_read_workqueue, work);
256}
257EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
258
259int __init fsverity_init_workqueue(void)
260{
261 /*
262 * Use an unbound workqueue to allow bios to be verified in parallel
263 * even when they happen to complete on the same CPU. This sacrifices
264 * locality, but it's worthwhile since hashing is CPU-intensive.
265 *
266 * Also use a high-priority workqueue to prioritize verification work,
267 * which blocks reads from completing, over regular application tasks.
268 */
269 fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
270 WQ_UNBOUND | WQ_HIGHPRI,
271 num_online_cpus());
272 if (!fsverity_read_workqueue)
273 return -ENOMEM;
274 return 0;
275}
276
277void __init fsverity_exit_workqueue(void)
278{
279 destroy_workqueue(fsverity_read_workqueue);
280 fsverity_read_workqueue = NULL;
281}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5dff77326cec..104a727f8a67 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -64,6 +64,8 @@ struct workqueue_struct;
64struct iov_iter; 64struct iov_iter;
65struct fscrypt_info; 65struct fscrypt_info;
66struct fscrypt_operations; 66struct fscrypt_operations;
67struct fsverity_info;
68struct fsverity_operations;
67struct fs_context; 69struct fs_context;
68struct fs_parameter_description; 70struct fs_parameter_description;
69 71
@@ -723,6 +725,10 @@ struct inode {
723 struct fscrypt_info *i_crypt_info; 725 struct fscrypt_info *i_crypt_info;
724#endif 726#endif
725 727
728#ifdef CONFIG_FS_VERITY
729 struct fsverity_info *i_verity_info;
730#endif
731
726 void *i_private; /* fs or device private pointer */ 732 void *i_private; /* fs or device private pointer */
727} __randomize_layout; 733} __randomize_layout;
728 734
@@ -1429,6 +1435,9 @@ struct super_block {
1429 const struct fscrypt_operations *s_cop; 1435 const struct fscrypt_operations *s_cop;
1430 struct key *s_master_keys; /* master crypto keys in use */ 1436 struct key *s_master_keys; /* master crypto keys in use */
1431#endif 1437#endif
1438#ifdef CONFIG_FS_VERITY
1439 const struct fsverity_operations *s_vop;
1440#endif
1432 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ 1441 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
1433 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1442 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1434 struct block_device *s_bdev; 1443 struct block_device *s_bdev;
@@ -1966,6 +1975,7 @@ struct super_operations {
1966#endif 1975#endif
1967#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ 1976#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
1968#define S_CASEFOLD 32768 /* Casefolded file */ 1977#define S_CASEFOLD 32768 /* Casefolded file */
1978#define S_VERITY 65536 /* Verity file (using fs/verity/) */
1969 1979
1970/* 1980/*
1971 * Note that nosuid etc flags are inode-specific: setting some file-system 1981 * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2007,6 +2017,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
2007#define IS_DAX(inode) ((inode)->i_flags & S_DAX) 2017#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
2008#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) 2018#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
2009#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD) 2019#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
2020#define IS_VERITY(inode) ((inode)->i_flags & S_VERITY)
2010 2021
2011#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ 2022#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
2012 (inode)->i_rdev == WHITEOUT_DEV) 2023 (inode)->i_rdev == WHITEOUT_DEV)
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
new file mode 100644
index 000000000000..3b6b8ccebe7d
--- /dev/null
+++ b/include/linux/fsverity.h
@@ -0,0 +1,211 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * fs-verity: read-only file-based authenticity protection
4 *
5 * This header declares the interface between the fs/verity/ support layer and
6 * filesystems that support fs-verity.
7 *
8 * Copyright 2019 Google LLC
9 */
10
11#ifndef _LINUX_FSVERITY_H
12#define _LINUX_FSVERITY_H
13
14#include <linux/fs.h>
15#include <uapi/linux/fsverity.h>
16
17/* Verity operations for filesystems */
18struct fsverity_operations {
19
20 /**
21 * Begin enabling verity on the given file.
22 *
23 * @filp: a readonly file descriptor for the file
24 *
25 * The filesystem must do any needed filesystem-specific preparations
26 * for enabling verity, e.g. evicting inline data. It also must return
27 * -EBUSY if verity is already being enabled on the given file.
28 *
29 * i_rwsem is held for write.
30 *
31 * Return: 0 on success, -errno on failure
32 */
33 int (*begin_enable_verity)(struct file *filp);
34
35 /**
36 * End enabling verity on the given file.
37 *
38 * @filp: a readonly file descriptor for the file
39 * @desc: the verity descriptor to write, or NULL on failure
40 * @desc_size: size of verity descriptor, or 0 on failure
41 * @merkle_tree_size: total bytes the Merkle tree took up
42 *
43 * If desc == NULL, then enabling verity failed and the filesystem only
44 * must do any necessary cleanups. Else, it must also store the given
45 * verity descriptor to a fs-specific location associated with the inode
46 * and do any fs-specific actions needed to mark the inode as a verity
47 * inode, e.g. setting a bit in the on-disk inode. The filesystem is
48 * also responsible for setting the S_VERITY flag in the VFS inode.
49 *
50 * i_rwsem is held for write, but it may have been dropped between
51 * ->begin_enable_verity() and ->end_enable_verity().
52 *
53 * Return: 0 on success, -errno on failure
54 */
55 int (*end_enable_verity)(struct file *filp, const void *desc,
56 size_t desc_size, u64 merkle_tree_size);
57
58 /**
59 * Get the verity descriptor of the given inode.
60 *
61 * @inode: an inode with the S_VERITY flag set
62 * @buf: buffer in which to place the verity descriptor
63 * @bufsize: size of @buf, or 0 to retrieve the size only
64 *
65 * If bufsize == 0, then the size of the verity descriptor is returned.
66 * Otherwise the verity descriptor is written to 'buf' and its actual
67 * size is returned; -ERANGE is returned if it's too large. This may be
68 * called by multiple processes concurrently on the same inode.
69 *
70 * Return: the size on success, -errno on failure
71 */
72 int (*get_verity_descriptor)(struct inode *inode, void *buf,
73 size_t bufsize);
74
75 /**
76 * Read a Merkle tree page of the given inode.
77 *
78 * @inode: the inode
79 * @index: 0-based index of the page within the Merkle tree
80 *
81 * This can be called at any time on an open verity file, as well as
82 * between ->begin_enable_verity() and ->end_enable_verity(). It may be
83 * called by multiple processes concurrently, even with the same page.
84 *
85 * Note that this must retrieve a *page*, not necessarily a *block*.
86 *
87 * Return: the page on success, ERR_PTR() on failure
88 */
89 struct page *(*read_merkle_tree_page)(struct inode *inode,
90 pgoff_t index);
91
92 /**
93 * Write a Merkle tree block to the given inode.
94 *
95 * @inode: the inode for which the Merkle tree is being built
96 * @buf: block to write
97 * @index: 0-based index of the block within the Merkle tree
98 * @log_blocksize: log base 2 of the Merkle tree block size
99 *
100 * This is only called between ->begin_enable_verity() and
101 * ->end_enable_verity().
102 *
103 * Return: 0 on success, -errno on failure
104 */
105 int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
106 u64 index, int log_blocksize);
107};
108
109#ifdef CONFIG_FS_VERITY
110
111static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
112{
113 /* pairs with the cmpxchg() in fsverity_set_info() */
114 return READ_ONCE(inode->i_verity_info);
115}
116
117/* enable.c */
118
119extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
120
121/* measure.c */
122
123extern int fsverity_ioctl_measure(struct file *filp, void __user *arg);
124
125/* open.c */
126
127extern int fsverity_file_open(struct inode *inode, struct file *filp);
128extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
129extern void fsverity_cleanup_inode(struct inode *inode);
130
131/* verify.c */
132
133extern bool fsverity_verify_page(struct page *page);
134extern void fsverity_verify_bio(struct bio *bio);
135extern void fsverity_enqueue_verify_work(struct work_struct *work);
136
137#else /* !CONFIG_FS_VERITY */
138
139static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
140{
141 return NULL;
142}
143
144/* enable.c */
145
146static inline int fsverity_ioctl_enable(struct file *filp,
147 const void __user *arg)
148{
149 return -EOPNOTSUPP;
150}
151
152/* measure.c */
153
154static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
155{
156 return -EOPNOTSUPP;
157}
158
159/* open.c */
160
161static inline int fsverity_file_open(struct inode *inode, struct file *filp)
162{
163 return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
164}
165
166static inline int fsverity_prepare_setattr(struct dentry *dentry,
167 struct iattr *attr)
168{
169 return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0;
170}
171
172static inline void fsverity_cleanup_inode(struct inode *inode)
173{
174}
175
176/* verify.c */
177
178static inline bool fsverity_verify_page(struct page *page)
179{
180 WARN_ON(1);
181 return false;
182}
183
184static inline void fsverity_verify_bio(struct bio *bio)
185{
186 WARN_ON(1);
187}
188
189static inline void fsverity_enqueue_verify_work(struct work_struct *work)
190{
191 WARN_ON(1);
192}
193
194#endif /* !CONFIG_FS_VERITY */
195
196/**
197 * fsverity_active() - do reads from the inode need to go through fs-verity?
198 *
199 * This checks whether ->i_verity_info has been set.
200 *
201 * Filesystems call this from ->readpages() to check whether the pages need to
202 * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
203 * a race condition where the file is being read concurrently with
204 * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
205 */
206static inline bool fsverity_active(const struct inode *inode)
207{
208 return fsverity_get_info(inode) != NULL;
209}
210
211#endif /* _LINUX_FSVERITY_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 41bd84d25a98..aad225b05be7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -258,6 +258,7 @@ struct fsxattr {
258#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 258#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
259#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ 259#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
260#define FS_EXTENT_FL 0x00080000 /* Extents */ 260#define FS_EXTENT_FL 0x00080000 /* Extents */
261#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
261#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ 262#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
262#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ 263#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
263#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ 264#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h
new file mode 100644
index 000000000000..da0daf6c193b
--- /dev/null
+++ b/include/uapi/linux/fsverity.h
@@ -0,0 +1,40 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * fs-verity user API
4 *
5 * These ioctls can be used on filesystems that support fs-verity. See the
6 * "User API" section of Documentation/filesystems/fsverity.rst.
7 *
8 * Copyright 2019 Google LLC
9 */
10#ifndef _UAPI_LINUX_FSVERITY_H
11#define _UAPI_LINUX_FSVERITY_H
12
13#include <linux/ioctl.h>
14#include <linux/types.h>
15
16#define FS_VERITY_HASH_ALG_SHA256 1
17#define FS_VERITY_HASH_ALG_SHA512 2
18
19struct fsverity_enable_arg {
20 __u32 version;
21 __u32 hash_algorithm;
22 __u32 block_size;
23 __u32 salt_size;
24 __u64 salt_ptr;
25 __u32 sig_size;
26 __u32 __reserved1;
27 __u64 sig_ptr;
28 __u64 __reserved2[11];
29};
30
31struct fsverity_digest {
32 __u16 digest_algorithm;
33 __u16 digest_size; /* input/output */
34 __u8 digest[];
35};
36
37#define FS_IOC_ENABLE_VERITY _IOW('f', 133, struct fsverity_enable_arg)
38#define FS_IOC_MEASURE_VERITY _IOWR('f', 134, struct fsverity_digest)
39
40#endif /* _UAPI_LINUX_FSVERITY_H */