aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKyungsik Lee <kyungsik.lee@lge.com>2013-07-08 19:01:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-09 13:33:30 -0400
commitcffb78b0e0b3a30b059b27a1d97500cf6464efa9 (patch)
treee20e06f58e90d3a6f3c3ff91547a9e9d51a8f230
parent4df87bb7b6a22dfc6fdd5abb3dd362b3af2c164d (diff)
decompressor: add LZ4 decompressor module
Add support for LZ4 decompression in the Linux Kernel. LZ4 Decompression APIs for kernel are based on LZ4 implementation by Yann Collet. Benchmark Results(PATCH v3) Compiler: Linaro ARM gcc 4.6.2 1. ARMv7, 1.5GHz based board Kernel: linux 3.4 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.7MB 20.1MB/s, 25.2MB/s(UA) LZ4 7.3MB 29.1MB/s, 45.6MB/s(UA) 2. ARMv7, 1.7GHz based board Kernel: linux 3.7 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.0MB 34.1MB/s, 52.2MB/s(UA) LZ4 6.5MB 86.7MB/s - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch set is for adding support for LZ4-compressed Kernel. LZ4 is a very fast lossless compression algorithm and it also features an extremely fast decoder [1]. But we have five of decompressors already and one question which does arise, however, is that of where do we stop adding new ones? This issue had been discussed and came to the conclusion [2]. Russell King said that we should have: - one decompressor which is the fastest - one decompressor for the highest compression ratio - one popular decompressor (eg conventional gzip) If we have a replacement one for one of these, then it should do exactly that: replace it. The benchmark shows that an 8% increase in image size vs a 66% increase in decompression speed compared to LZO(which has been known as the fastest decompressor in the Kernel). Therefore the "fast but may not be small" compression title has clearly been taken by LZ4 [3]. [1] http://code.google.com/p/lz4/ [2] http://thread.gmane.org/gmane.linux.kbuild.devel/9157 [3] http://thread.gmane.org/gmane.linux.kbuild.devel/9347 LZ4 homepage: http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository: http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee <kyungsik.lee@lge.com> Signed-off-by: Yann Collet <yann.collet.73@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Borislav Petkov <bp@alien8.de> Cc: Florian Fainelli <florian@openwrt.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/lz4.h51
-rw-r--r--lib/lz4/lz4_decompress.c326
-rw-r--r--lib/lz4/lz4defs.h94
3 files changed, 471 insertions, 0 deletions
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
new file mode 100644
index 000000000000..7f6c75a093f8
--- /dev/null
+++ b/include/linux/lz4.h
@@ -0,0 +1,51 @@
1#ifndef __LZ4_H__
2#define __LZ4_H__
3/*
4 * LZ4 Kernel Interface
5 *
6 * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13/*
14 * lz4_compressbound()
15 * Provides the maximum size that LZ4 may output in a "worst case" scenario
16 * (input data not compressible)
17 */
18static inline size_t lz4_compressbound(size_t isize)
19{
20 return isize + (isize / 255) + 16;
21}
22
23/*
24 * lz4_decompress()
25 * src : source address of the compressed data
26 * src_len : is the input size, whcih is returned after decompress done
27 * dest : output buffer address of the decompressed data
28 * actual_dest_len: is the size of uncompressed data, supposing it's known
29 * return : Success if return 0
30 * Error if return (< 0)
31 * note : Destination buffer must be already allocated.
32 * slightly faster than lz4_decompress_unknownoutputsize()
33 */
34int lz4_decompress(const char *src, size_t *src_len, char *dest,
35 size_t actual_dest_len);
36
37/*
38 * lz4_decompress_unknownoutputsize()
39 * src : source address of the compressed data
40 * src_len : is the input size, therefore the compressed size
41 * dest : output buffer address of the decompressed data
42 * dest_len: is the max size of the destination buffer, which is
43 * returned with actual size of decompressed data after
44 * decompress done
45 * return : Success if return 0
46 * Error if return (< 0)
47 * note : Destination buffer must be already allocated.
48 */
49int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
50 char *dest, size_t *dest_len);
51#endif
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
new file mode 100644
index 000000000000..dcc89753af65
--- /dev/null
+++ b/lib/lz4/lz4_decompress.c
@@ -0,0 +1,326 @@
1/*
2 * LZ4 Decompressor for Linux kernel
3 *
4 * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/)
5 *
6 * Based on LZ4 implementation by Yann Collet.
7 *
8 * LZ4 - Fast LZ compression algorithm
9 * Copyright (C) 2011-2012, Yann Collet.
10 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions are
14 * met:
15 *
16 * * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * * Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following disclaimer
20 * in the documentation and/or other materials provided with the
21 * distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * You can contact the author at :
36 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
37 * - LZ4 source repository : http://code.google.com/p/lz4/
38 */
39
40#ifndef STATIC
41#include <linux/module.h>
42#include <linux/kernel.h>
43#endif
44#include <linux/lz4.h>
45
46#include <asm/unaligned.h>
47
48#include "lz4defs.h"
49
50static int lz4_uncompress(const char *source, char *dest, int osize)
51{
52 const BYTE *ip = (const BYTE *) source;
53 const BYTE *ref;
54 BYTE *op = (BYTE *) dest;
55 BYTE * const oend = op + osize;
56 BYTE *cpy;
57 unsigned token;
58 size_t length;
59 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
60#if LZ4_ARCH64
61 size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
62#endif
63
64 while (1) {
65
66 /* get runlength */
67 token = *ip++;
68 length = (token >> ML_BITS);
69 if (length == RUN_MASK) {
70 size_t len;
71
72 len = *ip++;
73 for (; len == 255; length += 255)
74 len = *ip++;
75 length += len;
76 }
77
78 /* copy literals */
79 cpy = op + length;
80 if (unlikely(cpy > oend - COPYLENGTH)) {
81 /*
82 * Error: not enough place for another match
83 * (min 4) + 5 literals
84 */
85 if (cpy != oend)
86 goto _output_error;
87
88 memcpy(op, ip, length);
89 ip += length;
90 break; /* EOF */
91 }
92 LZ4_WILDCOPY(ip, op, cpy);
93 ip -= (op - cpy);
94 op = cpy;
95
96 /* get offset */
97 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
98 ip += 2;
99
100 /* Error: offset create reference outside destination buffer */
101 if (unlikely(ref < (BYTE *const) dest))
102 goto _output_error;
103
104 /* get matchlength */
105 length = token & ML_MASK;
106 if (length == ML_MASK) {
107 for (; *ip == 255; length += 255)
108 ip++;
109 length += *ip++;
110 }
111
112 /* copy repeated sequence */
113 if (unlikely((op - ref) < STEPSIZE)) {
114#if LZ4_ARCH64
115 size_t dec64 = dec64table[op - ref];
116#else
117 const int dec64 = 0;
118#endif
119 op[0] = ref[0];
120 op[1] = ref[1];
121 op[2] = ref[2];
122 op[3] = ref[3];
123 op += 4;
124 ref += 4;
125 ref -= dec32table[op-ref];
126 PUT4(ref, op);
127 op += STEPSIZE - 4;
128 ref -= dec64;
129 } else {
130 LZ4_COPYSTEP(ref, op);
131 }
132 cpy = op + length - (STEPSIZE - 4);
133 if (cpy > (oend - COPYLENGTH)) {
134
135 /* Error: request to write beyond destination buffer */
136 if (cpy > oend)
137 goto _output_error;
138 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
139 while (op < cpy)
140 *op++ = *ref++;
141 op = cpy;
142 /*
143 * Check EOF (should never happen, since last 5 bytes
144 * are supposed to be literals)
145 */
146 if (op == oend)
147 goto _output_error;
148 continue;
149 }
150 LZ4_SECURECOPY(ref, op, cpy);
151 op = cpy; /* correction */
152 }
153 /* end of decoding */
154 return (int) (((char *)ip) - source);
155
156 /* write overflow error detected */
157_output_error:
158 return (int) (-(((char *)ip) - source));
159}
160
161static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
162 int isize, size_t maxoutputsize)
163{
164 const BYTE *ip = (const BYTE *) source;
165 const BYTE *const iend = ip + isize;
166 const BYTE *ref;
167
168
169 BYTE *op = (BYTE *) dest;
170 BYTE * const oend = op + maxoutputsize;
171 BYTE *cpy;
172
173 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
174#if LZ4_ARCH64
175 size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
176#endif
177
178 /* Main Loop */
179 while (ip < iend) {
180
181 unsigned token;
182 size_t length;
183
184 /* get runlength */
185 token = *ip++;
186 length = (token >> ML_BITS);
187 if (length == RUN_MASK) {
188 int s = 255;
189 while ((ip < iend) && (s == 255)) {
190 s = *ip++;
191 length += s;
192 }
193 }
194 /* copy literals */
195 cpy = op + length;
196 if ((cpy > oend - COPYLENGTH) ||
197 (ip + length > iend - COPYLENGTH)) {
198
199 if (cpy > oend)
200 goto _output_error;/* writes beyond buffer */
201
202 if (ip + length != iend)
203 goto _output_error;/*
204 * Error: LZ4 format requires
205 * to consume all input
206 * at this stage
207 */
208 memcpy(op, ip, length);
209 op += length;
210 break;/* Necessarily EOF, due to parsing restrictions */
211 }
212 LZ4_WILDCOPY(ip, op, cpy);
213 ip -= (op - cpy);
214 op = cpy;
215
216 /* get offset */
217 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
218 ip += 2;
219 if (ref < (BYTE * const) dest)
220 goto _output_error;
221 /*
222 * Error : offset creates reference
223 * outside of destination buffer
224 */
225
226 /* get matchlength */
227 length = (token & ML_MASK);
228 if (length == ML_MASK) {
229 while (ip < iend) {
230 int s = *ip++;
231 length += s;
232 if (s == 255)
233 continue;
234 break;
235 }
236 }
237
238 /* copy repeated sequence */
239 if (unlikely((op - ref) < STEPSIZE)) {
240#if LZ4_ARCH64
241 size_t dec64 = dec64table[op - ref];
242#else
243 const int dec64 = 0;
244#endif
245 op[0] = ref[0];
246 op[1] = ref[1];
247 op[2] = ref[2];
248 op[3] = ref[3];
249 op += 4;
250 ref += 4;
251 ref -= dec32table[op - ref];
252 PUT4(ref, op);
253 op += STEPSIZE - 4;
254 ref -= dec64;
255 } else {
256 LZ4_COPYSTEP(ref, op);
257 }
258 cpy = op + length - (STEPSIZE-4);
259 if (cpy > oend - COPYLENGTH) {
260 if (cpy > oend)
261 goto _output_error; /* write outside of buf */
262
263 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
264 while (op < cpy)
265 *op++ = *ref++;
266 op = cpy;
267 /*
268 * Check EOF (should never happen, since last 5 bytes
269 * are supposed to be literals)
270 */
271 if (op == oend)
272 goto _output_error;
273 continue;
274 }
275 LZ4_SECURECOPY(ref, op, cpy);
276 op = cpy; /* correction */
277 }
278 /* end of decoding */
279 return (int) (((char *) op) - dest);
280
281 /* write overflow error detected */
282_output_error:
283 return (int) (-(((char *) ip) - source));
284}
285
286int lz4_decompress(const char *src, size_t *src_len, char *dest,
287 size_t actual_dest_len)
288{
289 int ret = -1;
290 int input_len = 0;
291
292 input_len = lz4_uncompress(src, dest, actual_dest_len);
293 if (input_len < 0)
294 goto exit_0;
295 *src_len = input_len;
296
297 return 0;
298exit_0:
299 return ret;
300}
301#ifndef STATIC
302EXPORT_SYMBOL_GPL(lz4_decompress);
303#endif
304
305int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
306 char *dest, size_t *dest_len)
307{
308 int ret = -1;
309 int out_len = 0;
310
311 out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
312 *dest_len);
313 if (out_len < 0)
314 goto exit_0;
315 *dest_len = out_len;
316
317 return 0;
318exit_0:
319 return ret;
320}
321#ifndef STATIC
322EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize);
323
324MODULE_LICENSE("GPL");
325MODULE_DESCRIPTION("LZ4 Decompressor");
326#endif
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
new file mode 100644
index 000000000000..43ac31d63f36
--- /dev/null
+++ b/lib/lz4/lz4defs.h
@@ -0,0 +1,94 @@
1/*
2 * lz4defs.h -- architecture specific defines
3 *
4 * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * Detects 64 bits mode
13 */
14#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \
15 || defined(__ppc64__) || defined(__LP64__))
16#define LZ4_ARCH64 1
17#else
18#define LZ4_ARCH64 0
19#endif
20
21/*
22 * Architecture-specific macros
23 */
24#define BYTE u8
25#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
26 || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \
27 && defined(ARM_EFFICIENT_UNALIGNED_ACCESS)
28typedef struct _U32_S { u32 v; } U32_S;
29typedef struct _U64_S { u64 v; } U64_S;
30
31#define A32(x) (((U32_S *)(x))->v)
32#define A64(x) (((U64_S *)(x))->v)
33
34#define PUT4(s, d) (A32(d) = A32(s))
35#define PUT8(s, d) (A64(d) = A64(s))
36#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
37
38#define PUT4(s, d) \
39 put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
40#define PUT8(s, d) \
41 put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
42#endif
43
44#define COPYLENGTH 8
45#define ML_BITS 4
46#define ML_MASK ((1U << ML_BITS) - 1)
47#define RUN_BITS (8 - ML_BITS)
48#define RUN_MASK ((1U << RUN_BITS) - 1)
49
50#if LZ4_ARCH64/* 64-bit */
51#define STEPSIZE 8
52
53#define LZ4_COPYSTEP(s, d) \
54 do { \
55 PUT8(s, d); \
56 d += 8; \
57 s += 8; \
58 } while (0)
59
60#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
61
62#define LZ4_SECURECOPY(s, d, e) \
63 do { \
64 if (d < e) { \
65 LZ4_WILDCOPY(s, d, e); \
66 } \
67 } while (0)
68
69#else /* 32-bit */
70#define STEPSIZE 4
71
72#define LZ4_COPYSTEP(s, d) \
73 do { \
74 PUT4(s, d); \
75 d += 4; \
76 s += 4; \
77 } while (0)
78
79#define LZ4_COPYPACKET(s, d) \
80 do { \
81 LZ4_COPYSTEP(s, d); \
82 LZ4_COPYSTEP(s, d); \
83 } while (0)
84
85#define LZ4_SECURECOPY LZ4_WILDCOPY
86#endif
87
88#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
89 (d = s - get_unaligned_le16(p))
90
91#define LZ4_WILDCOPY(s, d, e) \
92 do { \
93 LZ4_COPYPACKET(s, d); \
94 } while (d < e)