aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Terrell <terrelln@fb.com>2017-08-09 22:39:02 -0400
committerChris Mason <clm@fb.com>2017-08-15 12:02:09 -0400
commit5c1aab1dd5445ed8bdcdbb575abc1b0d7ee5b2e7 (patch)
tree3dbf4bef55ed320a36623f9fde983ee3ebb9f621
parent73f3d1b48f5069d46ba48aa28c2898dc93185560 (diff)
btrfs: Add zstd support
Add zstd compression and decompression support to BtrFS. zstd at its fastest level compresses almost as well as zlib, while offering much faster compression and decompression, approaching lzo speeds. I benchmarked btrfs with zstd compression against no compression, lzo compression, and zlib compression. I benchmarked two scenarios. Copying a set of files to btrfs, and then reading the files. Copying a tarball to btrfs, extracting it to btrfs, and then reading the extracted files. After every operation, I call `sync` and include the sync time. Between every pair of operations I unmount and remount the filesystem to avoid caching. The benchmark files can be found in the upstream zstd source repository under `contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}` [1] [2]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. The first compression benchmark is copying 10 copies of the unzipped Silesia corpus [3] into a BtrFS filesystem mounted with `-o compress-force=Method`. The decompression benchmark times how long it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is measured by comparing the output of `df` and `du`. See the benchmark file [1] for details. I benchmarked multiple zstd compression levels, although the patch uses zstd level 1. | Method | Ratio | Compression MB/s | Decompression speed | |---------|-------|------------------|---------------------| | None | 0.99 | 504 | 686 | | lzo | 1.66 | 398 | 442 | | zlib | 2.58 | 65 | 241 | | zstd 1 | 2.57 | 260 | 383 | | zstd 3 | 2.71 | 174 | 408 | | zstd 6 | 2.87 | 70 | 398 | | zstd 9 | 2.92 | 43 | 406 | | zstd 12 | 2.93 | 21 | 408 | | zstd 15 | 3.01 | 11 | 354 | The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it measures the compression ratio, extracts the tar, and deletes the tar. Then it measures the compression ratio again, and `tar`s the extracted files into `/dev/null`. See the benchmark file [2] for details. | Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) | |--------|-----------|---------------|----------|------------|----------| | None | 0.97 | 0.78 | 0.981 | 5.501 | 8.807 | | lzo | 2.06 | 1.38 | 1.631 | 8.458 | 8.585 | | zlib | 3.40 | 1.86 | 7.750 | 21.544 | 11.744 | | zstd 1 | 3.57 | 1.85 | 2.579 | 11.479 | 9.389 | [1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh [3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia [4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz zstd source repository: https://github.com/facebook/zstd Signed-off-by: Nick Terrell <terrelln@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/compression.h6
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/props.c6
-rw-r--r--fs/btrfs/super.c12
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/zstd.c432
-rw-r--r--include/uapi/linux/btrfs.h8
12 files changed, 468 insertions, 12 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 80e9c18ea64f..a26c63b4ad68 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -6,6 +6,8 @@ config BTRFS_FS
6 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
7 select LZO_COMPRESS 7 select LZO_COMPRESS
8 select LZO_DECOMPRESS 8 select LZO_DECOMPRESS
9 select ZSTD_COMPRESS
10 select ZSTD_DECOMPRESS
9 select RAID6_PQ 11 select RAID6_PQ
10 select XOR_BLOCKS 12 select XOR_BLOCKS
11 select SRCU 13 select SRCU
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 128ce17a80b0..962a95aefb81 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,7 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12 uuid-tree.o props.o hash.o free-space-tree.o 12 uuid-tree.o props.o hash.o free-space-tree.o
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d2ef9ac2a630..4ff42d18a64d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -704,6 +704,7 @@ static struct {
704static const struct btrfs_compress_op * const btrfs_compress_op[] = { 704static const struct btrfs_compress_op * const btrfs_compress_op[] = {
705 &btrfs_zlib_compress, 705 &btrfs_zlib_compress,
706 &btrfs_lzo_compress, 706 &btrfs_lzo_compress,
707 &btrfs_zstd_compress,
707}; 708};
708 709
709void __init btrfs_init_compress(void) 710void __init btrfs_init_compress(void)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 87f6d3332163..2269e00854d8 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -99,8 +99,9 @@ enum btrfs_compression_type {
99 BTRFS_COMPRESS_NONE = 0, 99 BTRFS_COMPRESS_NONE = 0,
100 BTRFS_COMPRESS_ZLIB = 1, 100 BTRFS_COMPRESS_ZLIB = 1,
101 BTRFS_COMPRESS_LZO = 2, 101 BTRFS_COMPRESS_LZO = 2,
102 BTRFS_COMPRESS_TYPES = 2, 102 BTRFS_COMPRESS_ZSTD = 3,
103 BTRFS_COMPRESS_LAST = 3, 103 BTRFS_COMPRESS_TYPES = 3,
104 BTRFS_COMPRESS_LAST = 4,
104}; 105};
105 106
106struct btrfs_compress_op { 107struct btrfs_compress_op {
@@ -128,5 +129,6 @@ struct btrfs_compress_op {
128 129
129extern const struct btrfs_compress_op btrfs_zlib_compress; 130extern const struct btrfs_compress_op btrfs_zlib_compress;
130extern const struct btrfs_compress_op btrfs_lzo_compress; 131extern const struct btrfs_compress_op btrfs_lzo_compress;
132extern const struct btrfs_compress_op btrfs_zstd_compress;
131 133
132#endif 134#endif
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3f3eb7b17cac..845d77c097d6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -270,6 +270,7 @@ struct btrfs_super_block {
270 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 270 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
271 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 271 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
272 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 272 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
273 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
273 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 274 BTRFS_FEATURE_INCOMPAT_RAID56 | \
274 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 275 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
275 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ 276 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 080e2ebb8aa0..04632f4de933 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2828,6 +2828,8 @@ int open_ctree(struct super_block *sb,
2828 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2828 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2829 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 2829 if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
2830 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2830 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2831 else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
2832 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
2831 2833
2832 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2834 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2833 btrfs_info(fs_info, "has skinny extents"); 2835 btrfs_info(fs_info, "has skinny extents");
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fa1b78cf25f6..b9963d94d727 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -327,8 +327,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
327 327
328 if (fs_info->compress_type == BTRFS_COMPRESS_LZO) 328 if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
329 comp = "lzo"; 329 comp = "lzo";
330 else 330 else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB)
331 comp = "zlib"; 331 comp = "zlib";
332 else
333 comp = "zstd";
332 ret = btrfs_set_prop(inode, "btrfs.compression", 334 ret = btrfs_set_prop(inode, "btrfs.compression",
333 comp, strlen(comp), 0); 335 comp, strlen(comp), 0);
334 if (ret) 336 if (ret)
@@ -1466,6 +1468,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1466 1468
1467 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1469 if (range->compress_type == BTRFS_COMPRESS_LZO) {
1468 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1470 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
1471 } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
1472 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
1469 } 1473 }
1470 1474
1471 ret = defrag_count; 1475 ret = defrag_count;
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 4b23ae5d0e5c..20631e9273a0 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -390,6 +390,8 @@ static int prop_compression_validate(const char *value, size_t len)
390 return 0; 390 return 0;
391 else if (!strncmp("zlib", value, len)) 391 else if (!strncmp("zlib", value, len))
392 return 0; 392 return 0;
393 else if (!strncmp("zstd", value, len))
394 return 0;
393 395
394 return -EINVAL; 396 return -EINVAL;
395} 397}
@@ -412,6 +414,8 @@ static int prop_compression_apply(struct inode *inode,
412 type = BTRFS_COMPRESS_LZO; 414 type = BTRFS_COMPRESS_LZO;
413 else if (!strncmp("zlib", value, len)) 415 else if (!strncmp("zlib", value, len))
414 type = BTRFS_COMPRESS_ZLIB; 416 type = BTRFS_COMPRESS_ZLIB;
417 else if (!strncmp("zstd", value, len))
418 type = BTRFS_COMPRESS_ZSTD;
415 else 419 else
416 return -EINVAL; 420 return -EINVAL;
417 421
@@ -429,6 +433,8 @@ static const char *prop_compression_extract(struct inode *inode)
429 return "zlib"; 433 return "zlib";
430 case BTRFS_COMPRESS_LZO: 434 case BTRFS_COMPRESS_LZO:
431 return "lzo"; 435 return "lzo";
436 case BTRFS_COMPRESS_ZSTD:
437 return "zstd";
432 } 438 }
433 439
434 return NULL; 440 return NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 12540b6104b5..c370deadb790 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -513,6 +513,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
513 btrfs_clear_opt(info->mount_opt, NODATASUM); 513 btrfs_clear_opt(info->mount_opt, NODATASUM);
514 btrfs_set_fs_incompat(info, COMPRESS_LZO); 514 btrfs_set_fs_incompat(info, COMPRESS_LZO);
515 no_compress = 0; 515 no_compress = 0;
516 } else if (strcmp(args[0].from, "zstd") == 0) {
517 compress_type = "zstd";
518 info->compress_type = BTRFS_COMPRESS_ZSTD;
519 btrfs_set_opt(info->mount_opt, COMPRESS);
520 btrfs_clear_opt(info->mount_opt, NODATACOW);
521 btrfs_clear_opt(info->mount_opt, NODATASUM);
522 btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
523 no_compress = 0;
516 } else if (strncmp(args[0].from, "no", 2) == 0) { 524 } else if (strncmp(args[0].from, "no", 2) == 0) {
517 compress_type = "no"; 525 compress_type = "no";
518 btrfs_clear_opt(info->mount_opt, COMPRESS); 526 btrfs_clear_opt(info->mount_opt, COMPRESS);
@@ -1227,8 +1235,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1227 if (btrfs_test_opt(info, COMPRESS)) { 1235 if (btrfs_test_opt(info, COMPRESS)) {
1228 if (info->compress_type == BTRFS_COMPRESS_ZLIB) 1236 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
1229 compress_type = "zlib"; 1237 compress_type = "zlib";
1230 else 1238 else if (info->compress_type == BTRFS_COMPRESS_LZO)
1231 compress_type = "lzo"; 1239 compress_type = "lzo";
1240 else
1241 compress_type = "zstd";
1232 if (btrfs_test_opt(info, FORCE_COMPRESS)) 1242 if (btrfs_test_opt(info, FORCE_COMPRESS))
1233 seq_printf(seq, ",compress-force=%s", compress_type); 1243 seq_printf(seq, ",compress-force=%s", compress_type);
1234 else 1244 else
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c2d5f3580b4c..2b6d37c09a81 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -200,6 +200,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
200BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); 200BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
201BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); 201BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
202BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); 202BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
203BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD);
203BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); 204BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
204BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); 205BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
205BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); 206BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
@@ -212,6 +213,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
212 BTRFS_FEAT_ATTR_PTR(default_subvol), 213 BTRFS_FEAT_ATTR_PTR(default_subvol),
213 BTRFS_FEAT_ATTR_PTR(mixed_groups), 214 BTRFS_FEAT_ATTR_PTR(mixed_groups),
214 BTRFS_FEAT_ATTR_PTR(compress_lzo), 215 BTRFS_FEAT_ATTR_PTR(compress_lzo),
216 BTRFS_FEAT_ATTR_PTR(compress_zstd),
215 BTRFS_FEAT_ATTR_PTR(big_metadata), 217 BTRFS_FEAT_ATTR_PTR(big_metadata),
216 BTRFS_FEAT_ATTR_PTR(extended_iref), 218 BTRFS_FEAT_ATTR_PTR(extended_iref),
217 BTRFS_FEAT_ATTR_PTR(raid56), 219 BTRFS_FEAT_ATTR_PTR(raid56),
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
new file mode 100644
index 000000000000..607ce47b483a
--- /dev/null
+++ b/fs/btrfs/zstd.c
@@ -0,0 +1,432 @@
1/*
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14#include <linux/bio.h>
15#include <linux/err.h>
16#include <linux/init.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/pagemap.h>
20#include <linux/refcount.h>
21#include <linux/sched.h>
22#include <linux/slab.h>
23#include <linux/zstd.h>
24#include "compression.h"
25
26#define ZSTD_BTRFS_MAX_WINDOWLOG 17
27#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
28#define ZSTD_BTRFS_DEFAULT_LEVEL 3
29
30static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
31{
32 ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
33 src_len, 0);
34
35 if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
36 params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
37 WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
38 return params;
39}
40
41struct workspace {
42 void *mem;
43 size_t size;
44 char *buf;
45 struct list_head list;
46};
47
48static void zstd_free_workspace(struct list_head *ws)
49{
50 struct workspace *workspace = list_entry(ws, struct workspace, list);
51
52 kvfree(workspace->mem);
53 kfree(workspace->buf);
54 kfree(workspace);
55}
56
57static struct list_head *zstd_alloc_workspace(void)
58{
59 ZSTD_parameters params =
60 zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
61 struct workspace *workspace;
62
63 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
64 if (!workspace)
65 return ERR_PTR(-ENOMEM);
66
67 workspace->size = max_t(size_t,
68 ZSTD_CStreamWorkspaceBound(params.cParams),
69 ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
70 workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
71 workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
72 if (!workspace->mem || !workspace->buf)
73 goto fail;
74
75 INIT_LIST_HEAD(&workspace->list);
76
77 return &workspace->list;
78fail:
79 zstd_free_workspace(&workspace->list);
80 return ERR_PTR(-ENOMEM);
81}
82
83static int zstd_compress_pages(struct list_head *ws,
84 struct address_space *mapping,
85 u64 start,
86 struct page **pages,
87 unsigned long *out_pages,
88 unsigned long *total_in,
89 unsigned long *total_out)
90{
91 struct workspace *workspace = list_entry(ws, struct workspace, list);
92 ZSTD_CStream *stream;
93 int ret = 0;
94 int nr_pages = 0;
95 struct page *in_page = NULL; /* The current page to read */
96 struct page *out_page = NULL; /* The current page to write to */
97 ZSTD_inBuffer in_buf = { NULL, 0, 0 };
98 ZSTD_outBuffer out_buf = { NULL, 0, 0 };
99 unsigned long tot_in = 0;
100 unsigned long tot_out = 0;
101 unsigned long len = *total_out;
102 const unsigned long nr_dest_pages = *out_pages;
103 unsigned long max_out = nr_dest_pages * PAGE_SIZE;
104 ZSTD_parameters params = zstd_get_btrfs_parameters(len);
105
106 *out_pages = 0;
107 *total_out = 0;
108 *total_in = 0;
109
110 /* Initialize the stream */
111 stream = ZSTD_initCStream(params, len, workspace->mem,
112 workspace->size);
113 if (!stream) {
114 pr_warn("BTRFS: ZSTD_initCStream failed\n");
115 ret = -EIO;
116 goto out;
117 }
118
119 /* map in the first page of input data */
120 in_page = find_get_page(mapping, start >> PAGE_SHIFT);
121 in_buf.src = kmap(in_page);
122 in_buf.pos = 0;
123 in_buf.size = min_t(size_t, len, PAGE_SIZE);
124
125
126 /* Allocate and map in the output buffer */
127 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
128 if (out_page == NULL) {
129 ret = -ENOMEM;
130 goto out;
131 }
132 pages[nr_pages++] = out_page;
133 out_buf.dst = kmap(out_page);
134 out_buf.pos = 0;
135 out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
136
137 while (1) {
138 size_t ret2;
139
140 ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf);
141 if (ZSTD_isError(ret2)) {
142 pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
143 ZSTD_getErrorCode(ret2));
144 ret = -EIO;
145 goto out;
146 }
147
148 /* Check to see if we are making it bigger */
149 if (tot_in + in_buf.pos > 8192 &&
150 tot_in + in_buf.pos <
151 tot_out + out_buf.pos) {
152 ret = -E2BIG;
153 goto out;
154 }
155
156 /* We've reached the end of our output range */
157 if (out_buf.pos >= max_out) {
158 tot_out += out_buf.pos;
159 ret = -E2BIG;
160 goto out;
161 }
162
163 /* Check if we need more output space */
164 if (out_buf.pos == out_buf.size) {
165 tot_out += PAGE_SIZE;
166 max_out -= PAGE_SIZE;
167 kunmap(out_page);
168 if (nr_pages == nr_dest_pages) {
169 out_page = NULL;
170 ret = -E2BIG;
171 goto out;
172 }
173 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
174 if (out_page == NULL) {
175 ret = -ENOMEM;
176 goto out;
177 }
178 pages[nr_pages++] = out_page;
179 out_buf.dst = kmap(out_page);
180 out_buf.pos = 0;
181 out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
182 }
183
184 /* We've reached the end of the input */
185 if (in_buf.pos >= len) {
186 tot_in += in_buf.pos;
187 break;
188 }
189
190 /* Check if we need more input */
191 if (in_buf.pos == in_buf.size) {
192 tot_in += PAGE_SIZE;
193 kunmap(in_page);
194 put_page(in_page);
195
196 start += PAGE_SIZE;
197 len -= PAGE_SIZE;
198 in_page = find_get_page(mapping, start >> PAGE_SHIFT);
199 in_buf.src = kmap(in_page);
200 in_buf.pos = 0;
201 in_buf.size = min_t(size_t, len, PAGE_SIZE);
202 }
203 }
204 while (1) {
205 size_t ret2;
206
207 ret2 = ZSTD_endStream(stream, &out_buf);
208 if (ZSTD_isError(ret2)) {
209 pr_debug("BTRFS: ZSTD_endStream returned %d\n",
210 ZSTD_getErrorCode(ret2));
211 ret = -EIO;
212 goto out;
213 }
214 if (ret2 == 0) {
215 tot_out += out_buf.pos;
216 break;
217 }
218 if (out_buf.pos >= max_out) {
219 tot_out += out_buf.pos;
220 ret = -E2BIG;
221 goto out;
222 }
223
224 tot_out += PAGE_SIZE;
225 max_out -= PAGE_SIZE;
226 kunmap(out_page);
227 if (nr_pages == nr_dest_pages) {
228 out_page = NULL;
229 ret = -E2BIG;
230 goto out;
231 }
232 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
233 if (out_page == NULL) {
234 ret = -ENOMEM;
235 goto out;
236 }
237 pages[nr_pages++] = out_page;
238 out_buf.dst = kmap(out_page);
239 out_buf.pos = 0;
240 out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
241 }
242
243 if (tot_out >= tot_in) {
244 ret = -E2BIG;
245 goto out;
246 }
247
248 ret = 0;
249 *total_in = tot_in;
250 *total_out = tot_out;
251out:
252 *out_pages = nr_pages;
253 /* Cleanup */
254 if (in_page) {
255 kunmap(in_page);
256 put_page(in_page);
257 }
258 if (out_page)
259 kunmap(out_page);
260 return ret;
261}
262
263static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
264{
265 struct workspace *workspace = list_entry(ws, struct workspace, list);
266 struct page **pages_in = cb->compressed_pages;
267 u64 disk_start = cb->start;
268 struct bio *orig_bio = cb->orig_bio;
269 size_t srclen = cb->compressed_len;
270 ZSTD_DStream *stream;
271 int ret = 0;
272 unsigned long page_in_index = 0;
273 unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
274 unsigned long buf_start;
275 unsigned long total_out = 0;
276 ZSTD_inBuffer in_buf = { NULL, 0, 0 };
277 ZSTD_outBuffer out_buf = { NULL, 0, 0 };
278
279 stream = ZSTD_initDStream(
280 ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
281 if (!stream) {
282 pr_debug("BTRFS: ZSTD_initDStream failed\n");
283 ret = -EIO;
284 goto done;
285 }
286
287 in_buf.src = kmap(pages_in[page_in_index]);
288 in_buf.pos = 0;
289 in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
290
291 out_buf.dst = workspace->buf;
292 out_buf.pos = 0;
293 out_buf.size = PAGE_SIZE;
294
295 while (1) {
296 size_t ret2;
297
298 ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
299 if (ZSTD_isError(ret2)) {
300 pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
301 ZSTD_getErrorCode(ret2));
302 ret = -EIO;
303 goto done;
304 }
305 buf_start = total_out;
306 total_out += out_buf.pos;
307 out_buf.pos = 0;
308
309 ret = btrfs_decompress_buf2page(out_buf.dst, buf_start,
310 total_out, disk_start, orig_bio);
311 if (ret == 0)
312 break;
313
314 if (in_buf.pos >= srclen)
315 break;
316
317 /* Check if we've hit the end of a frame */
318 if (ret2 == 0)
319 break;
320
321 if (in_buf.pos == in_buf.size) {
322 kunmap(pages_in[page_in_index++]);
323 if (page_in_index >= total_pages_in) {
324 in_buf.src = NULL;
325 ret = -EIO;
326 goto done;
327 }
328 srclen -= PAGE_SIZE;
329 in_buf.src = kmap(pages_in[page_in_index]);
330 in_buf.pos = 0;
331 in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
332 }
333 }
334 ret = 0;
335 zero_fill_bio(orig_bio);
336done:
337 if (in_buf.src)
338 kunmap(pages_in[page_in_index]);
339 return ret;
340}
341
342static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
343 struct page *dest_page,
344 unsigned long start_byte,
345 size_t srclen, size_t destlen)
346{
347 struct workspace *workspace = list_entry(ws, struct workspace, list);
348 ZSTD_DStream *stream;
349 int ret = 0;
350 size_t ret2;
351 ZSTD_inBuffer in_buf = { NULL, 0, 0 };
352 ZSTD_outBuffer out_buf = { NULL, 0, 0 };
353 unsigned long total_out = 0;
354 unsigned long pg_offset = 0;
355 char *kaddr;
356
357 stream = ZSTD_initDStream(
358 ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
359 if (!stream) {
360 pr_warn("BTRFS: ZSTD_initDStream failed\n");
361 ret = -EIO;
362 goto finish;
363 }
364
365 destlen = min_t(size_t, destlen, PAGE_SIZE);
366
367 in_buf.src = data_in;
368 in_buf.pos = 0;
369 in_buf.size = srclen;
370
371 out_buf.dst = workspace->buf;
372 out_buf.pos = 0;
373 out_buf.size = PAGE_SIZE;
374
375 ret2 = 1;
376 while (pg_offset < destlen && in_buf.pos < in_buf.size) {
377 unsigned long buf_start;
378 unsigned long buf_offset;
379 unsigned long bytes;
380
381 /* Check if the frame is over and we still need more input */
382 if (ret2 == 0) {
383 pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
384 ret = -EIO;
385 goto finish;
386 }
387 ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
388 if (ZSTD_isError(ret2)) {
389 pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
390 ZSTD_getErrorCode(ret2));
391 ret = -EIO;
392 goto finish;
393 }
394
395 buf_start = total_out;
396 total_out += out_buf.pos;
397 out_buf.pos = 0;
398
399 if (total_out <= start_byte)
400 continue;
401
402 if (total_out > start_byte && buf_start < start_byte)
403 buf_offset = start_byte - buf_start;
404 else
405 buf_offset = 0;
406
407 bytes = min_t(unsigned long, destlen - pg_offset,
408 out_buf.size - buf_offset);
409
410 kaddr = kmap_atomic(dest_page);
411 memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes);
412 kunmap_atomic(kaddr);
413
414 pg_offset += bytes;
415 }
416 ret = 0;
417finish:
418 if (pg_offset < destlen) {
419 kaddr = kmap_atomic(dest_page);
420 memset(kaddr + pg_offset, 0, destlen - pg_offset);
421 kunmap_atomic(kaddr);
422 }
423 return ret;
424}
425
426const struct btrfs_compress_op btrfs_zstd_compress = {
427 .alloc_workspace = zstd_alloc_workspace,
428 .free_workspace = zstd_free_workspace,
429 .compress_pages = zstd_compress_pages,
430 .decompress_bio = zstd_decompress_bio,
431 .decompress = zstd_decompress,
432};
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 9aa74f317747..378230c163d5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args {
255#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 255#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
256#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 256#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
257#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 257#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
258/* 258#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4)
259 * some patches floated around with a second compression method
260 * lets save that incompat here for when they do get in
261 * Note we don't actually support it, we're just reserving the
262 * number
263 */
264#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
265 259
266/* 260/*
267 * older kernels tried to do bigger metadata blocks, but the 261 * older kernels tried to do bigger metadata blocks, but the