aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChanho Min <chanho.min@lge.com>2013-07-08 19:01:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-09 13:33:30 -0400
commitc72ac7a1a926dbffb59daf0f275450e5eecce16f (patch)
tree11350b56ad27c7001bdd45ce35d95666c355dfa8
parentf9b493ac9b833fd9dd3bbd50460adb33f29e1238 (diff)
lib: add lz4 compressor module
This patchset is for supporting LZ4 compression and the crypto API using it. As shown below, the size of data is a little bit bigger but compressing speed is faster under the enabled unaligned memory access. We can use lz4 de/compression through crypto API as well. Also, It will be useful for another potential user of lz4 compression. lz4 Compression Benchmark: Compiler: ARM gcc 4.6.4 ARMv7, 1 GHz based board Kernel: linux 3.4 Uncompressed data Size: 101 MB Compressed Size compression Speed LZO 72.1MB 32.1MB/s, 33.0MB/s(UA) LZ4 75.1MB 30.4MB/s, 35.9MB/s(UA) LZ4HC 59.8MB 2.4MB/s, 2.5MB/s(UA) - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch: Add support for LZ4 compression in the Linux Kernel. LZ4 Compression APIs for kernel are based on LZ4 implementation by Yann Collet and were changed for kernel coding style. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ svn revision : r90 Two APIs are added: lz4_compress() support basic lz4 compression whereas lz4hc_compress() support high compression or CPU performance get lower but compression ratio get higher. Also, we require the pre-allocated working memory with the defined size and destination buffer must be allocated with the size of lz4_compressbound. [akpm@linux-foundation.org: make lz4_compresshcctx() static] Signed-off-by: Chanho Min <chanho.min@lge.com> Cc: "Darrick J. Wong" <djwong@us.ibm.com> Cc: Bob Pearson <rpearson@systemfabricworks.com> Cc: Richard Weinberger <richard@nod.at> Cc: Herbert Xu <herbert@gondor.hengli.com.au> Cc: Yann Collet <yann.collet.73@gmail.com> Cc: Kyungsik Lee <kyungsik.lee@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/lz4.h36
-rw-r--r--lib/Kconfig6
-rw-r--r--lib/Makefile2
-rw-r--r--lib/lz4/Makefile2
-rw-r--r--lib/lz4/lz4_compress.c443
-rw-r--r--lib/lz4/lz4defs.h66
-rw-r--r--lib/lz4/lz4hc_compress.c539
7 files changed, 1092 insertions, 2 deletions
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 7f6c75a093f8..d21c13f10a64 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -9,6 +9,8 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *))
13#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *))
12 14
13/* 15/*
14 * lz4_compressbound() 16 * lz4_compressbound()
@@ -21,6 +23,40 @@ static inline size_t lz4_compressbound(size_t isize)
21} 23}
22 24
23/* 25/*
26 * lz4_compress()
27 * src : source address of the original data
28 * src_len : size of the original data
29 * dst : output buffer address of the compressed data
30 * This requires 'dst' of size LZ4_COMPRESSBOUND.
31 * dst_len : is the output size, which is returned after compress done
32 * workmem : address of the working memory.
33 * This requires 'workmem' of size LZ4_MEM_COMPRESS.
34 * return : Success if return 0
35 * Error if return (< 0)
36 * note : Destination buffer and workmem must be already allocated with
37 * the defined size.
38 */
39int lz4_compress(const unsigned char *src, size_t src_len,
40 unsigned char *dst, size_t *dst_len, void *wrkmem);
41
42 /*
43 * lz4hc_compress()
44 * src : source address of the original data
45 * src_len : size of the original data
46 * dst : output buffer address of the compressed data
47 * This requires 'dst' of size LZ4_COMPRESSBOUND.
48 * dst_len : is the output size, which is returned after compress done
49 * workmem : address of the working memory.
50 * This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
51 * return : Success if return 0
52 * Error if return (< 0)
53 * note : Destination buffer and workmem must be already allocated with
54 * the defined size.
55 */
56int lz4hc_compress(const unsigned char *src, size_t src_len,
57 unsigned char *dst, size_t *dst_len, void *wrkmem);
58
59/*
24 * lz4_decompress() 60 * lz4_decompress()
25 * src : source address of the compressed data 61 * src : source address of the compressed data
26 * src_len : is the input size, whcih is returned after decompress done 62 * src_len : is the input size, whcih is returned after decompress done
diff --git a/lib/Kconfig b/lib/Kconfig
index e9cdc8ffdcee..35da51359d40 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -194,6 +194,12 @@ config LZO_COMPRESS
194config LZO_DECOMPRESS 194config LZO_DECOMPRESS
195 tristate 195 tristate
196 196
197config LZ4_COMPRESS
198 tristate
199
200config LZ4HC_COMPRESS
201 tristate
202
197config LZ4_DECOMPRESS 203config LZ4_DECOMPRESS
198 tristate 204 tristate
199 205
diff --git a/lib/Makefile b/lib/Makefile
index 326f6c7e6db1..7baccfd8a4e9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -75,6 +75,8 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
75obj-$(CONFIG_BCH) += bch.o 75obj-$(CONFIG_BCH) += bch.o
76obj-$(CONFIG_LZO_COMPRESS) += lzo/ 76obj-$(CONFIG_LZO_COMPRESS) += lzo/
77obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 77obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
78obj-$(CONFIG_LZ4_COMPRESS) += lz4/
79obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
78obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ 80obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
79obj-$(CONFIG_XZ_DEC) += xz/ 81obj-$(CONFIG_XZ_DEC) += xz/
80obj-$(CONFIG_RAID6_PQ) += raid6/ 82obj-$(CONFIG_RAID6_PQ) += raid6/
diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile
index 7f548c6d1c5c..8085d04e9309 100644
--- a/lib/lz4/Makefile
+++ b/lib/lz4/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o
2obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o
1obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o 3obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
new file mode 100644
index 000000000000..fd94058bd7f9
--- /dev/null
+++ b/lib/lz4/lz4_compress.c
@@ -0,0 +1,443 @@
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Copyright (C) 2011-2012, Yann Collet.
4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * You can contact the author at :
30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 * - LZ4 source repository : http://code.google.com/p/lz4/
32 *
33 * Changed for kernel use by:
34 * Chanho Min <chanho.min@lge.com>
35 */
36
37#include <linux/module.h>
38#include <linux/kernel.h>
39#include <linux/lz4.h>
40#include <asm/unaligned.h>
41#include "lz4defs.h"
42
43/*
44 * LZ4_compressCtx :
45 * -----------------
46 * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
47 * maximum size 'maxOutputSize'. * If it cannot achieve it, compression
48 * will stop, and result of the function will be zero.
49 * return : the number of bytes written in buffer 'dest', or 0 if the
50 * compression fails
51 */
52static inline int lz4_compressctx(void *ctx,
53 const char *source,
54 char *dest,
55 int isize,
56 int maxoutputsize)
57{
58 HTYPE *hashtable = (HTYPE *)ctx;
59 const u8 *ip = (u8 *)source;
60#if LZ4_ARCH64
61 const BYTE * const base = ip;
62#else
63 const int base = 0;
64#endif
65 const u8 *anchor = ip;
66 const u8 *const iend = ip + isize;
67 const u8 *const mflimit = iend - MFLIMIT;
68 #define MATCHLIMIT (iend - LASTLITERALS)
69
70 u8 *op = (u8 *) dest;
71 u8 *const oend = op + maxoutputsize;
72 int length;
73 const int skipstrength = SKIPSTRENGTH;
74 u32 forwardh;
75 int lastrun;
76
77 /* Init */
78 if (isize < MINLENGTH)
79 goto _last_literals;
80
81 memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
82
83 /* First Byte */
84 hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
85 ip++;
86 forwardh = LZ4_HASH_VALUE(ip);
87
88 /* Main Loop */
89 for (;;) {
90 int findmatchattempts = (1U << skipstrength) + 3;
91 const u8 *forwardip = ip;
92 const u8 *ref;
93 u8 *token;
94
95 /* Find a match */
96 do {
97 u32 h = forwardh;
98 int step = findmatchattempts++ >> skipstrength;
99 ip = forwardip;
100 forwardip = ip + step;
101
102 if (unlikely(forwardip > mflimit))
103 goto _last_literals;
104
105 forwardh = LZ4_HASH_VALUE(forwardip);
106 ref = base + hashtable[h];
107 hashtable[h] = ip - base;
108 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
109
110 /* Catch up */
111 while ((ip > anchor) && (ref > (u8 *)source) &&
112 unlikely(ip[-1] == ref[-1])) {
113 ip--;
114 ref--;
115 }
116
117 /* Encode Literal length */
118 length = (int)(ip - anchor);
119 token = op++;
120 /* check output limit */
121 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
122 (length >> 8) > oend))
123 return 0;
124
125 if (length >= (int)RUN_MASK) {
126 int len;
127 *token = (RUN_MASK << ML_BITS);
128 len = length - RUN_MASK;
129 for (; len > 254 ; len -= 255)
130 *op++ = 255;
131 *op++ = (u8)len;
132 } else
133 *token = (length << ML_BITS);
134
135 /* Copy Literals */
136 LZ4_BLINDCOPY(anchor, op, length);
137_next_match:
138 /* Encode Offset */
139 LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
140
141 /* Start Counting */
142 ip += MINMATCH;
143 /* MinMatch verified */
144 ref += MINMATCH;
145 anchor = ip;
146 while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
147 #if LZ4_ARCH64
148 u64 diff = A64(ref) ^ A64(ip);
149 #else
150 u32 diff = A32(ref) ^ A32(ip);
151 #endif
152 if (!diff) {
153 ip += STEPSIZE;
154 ref += STEPSIZE;
155 continue;
156 }
157 ip += LZ4_NBCOMMONBYTES(diff);
158 goto _endcount;
159 }
160 #if LZ4_ARCH64
161 if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
162 ip += 4;
163 ref += 4;
164 }
165 #endif
166 if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
167 ip += 2;
168 ref += 2;
169 }
170 if ((ip < MATCHLIMIT) && (*ref == *ip))
171 ip++;
172_endcount:
173 /* Encode MatchLength */
174 length = (int)(ip - anchor);
175 /* Check output limit */
176 if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
177 return 0;
178 if (length >= (int)ML_MASK) {
179 *token += ML_MASK;
180 length -= ML_MASK;
181 for (; length > 509 ; length -= 510) {
182 *op++ = 255;
183 *op++ = 255;
184 }
185 if (length > 254) {
186 length -= 255;
187 *op++ = 255;
188 }
189 *op++ = (u8)length;
190 } else
191 *token += length;
192
193 /* Test end of chunk */
194 if (ip > mflimit) {
195 anchor = ip;
196 break;
197 }
198
199 /* Fill table */
200 hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
201
202 /* Test next position */
203 ref = base + hashtable[LZ4_HASH_VALUE(ip)];
204 hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
205 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
206 token = op++;
207 *token = 0;
208 goto _next_match;
209 }
210
211 /* Prepare next loop */
212 anchor = ip++;
213 forwardh = LZ4_HASH_VALUE(ip);
214 }
215
216_last_literals:
217 /* Encode Last Literals */
218 lastrun = (int)(iend - anchor);
219 if (((char *)op - dest) + lastrun + 1
220 + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
221 return 0;
222
223 if (lastrun >= (int)RUN_MASK) {
224 *op++ = (RUN_MASK << ML_BITS);
225 lastrun -= RUN_MASK;
226 for (; lastrun > 254 ; lastrun -= 255)
227 *op++ = 255;
228 *op++ = (u8)lastrun;
229 } else
230 *op++ = (lastrun << ML_BITS);
231 memcpy(op, anchor, iend - anchor);
232 op += iend - anchor;
233
234 /* End */
235 return (int)(((char *)op) - dest);
236}
237
238static inline int lz4_compress64kctx(void *ctx,
239 const char *source,
240 char *dest,
241 int isize,
242 int maxoutputsize)
243{
244 u16 *hashtable = (u16 *)ctx;
245 const u8 *ip = (u8 *) source;
246 const u8 *anchor = ip;
247 const u8 *const base = ip;
248 const u8 *const iend = ip + isize;
249 const u8 *const mflimit = iend - MFLIMIT;
250 #define MATCHLIMIT (iend - LASTLITERALS)
251
252 u8 *op = (u8 *) dest;
253 u8 *const oend = op + maxoutputsize;
254 int len, length;
255 const int skipstrength = SKIPSTRENGTH;
256 u32 forwardh;
257 int lastrun;
258
259 /* Init */
260 if (isize < MINLENGTH)
261 goto _last_literals;
262
263 memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
264
265 /* First Byte */
266 ip++;
267 forwardh = LZ4_HASH64K_VALUE(ip);
268
269 /* Main Loop */
270 for (;;) {
271 int findmatchattempts = (1U << skipstrength) + 3;
272 const u8 *forwardip = ip;
273 const u8 *ref;
274 u8 *token;
275
276 /* Find a match */
277 do {
278 u32 h = forwardh;
279 int step = findmatchattempts++ >> skipstrength;
280 ip = forwardip;
281 forwardip = ip + step;
282
283 if (forwardip > mflimit)
284 goto _last_literals;
285
286 forwardh = LZ4_HASH64K_VALUE(forwardip);
287 ref = base + hashtable[h];
288 hashtable[h] = (u16)(ip - base);
289 } while (A32(ref) != A32(ip));
290
291 /* Catch up */
292 while ((ip > anchor) && (ref > (u8 *)source)
293 && (ip[-1] == ref[-1])) {
294 ip--;
295 ref--;
296 }
297
298 /* Encode Literal length */
299 length = (int)(ip - anchor);
300 token = op++;
301 /* Check output limit */
302 if (unlikely(op + length + (2 + 1 + LASTLITERALS)
303 + (length >> 8) > oend))
304 return 0;
305 if (length >= (int)RUN_MASK) {
306 *token = (RUN_MASK << ML_BITS);
307 len = length - RUN_MASK;
308 for (; len > 254 ; len -= 255)
309 *op++ = 255;
310 *op++ = (u8)len;
311 } else
312 *token = (length << ML_BITS);
313
314 /* Copy Literals */
315 LZ4_BLINDCOPY(anchor, op, length);
316
317_next_match:
318 /* Encode Offset */
319 LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
320
321 /* Start Counting */
322 ip += MINMATCH;
323 /* MinMatch verified */
324 ref += MINMATCH;
325 anchor = ip;
326
327 while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
328 #if LZ4_ARCH64
329 u64 diff = A64(ref) ^ A64(ip);
330 #else
331 u32 diff = A32(ref) ^ A32(ip);
332 #endif
333
334 if (!diff) {
335 ip += STEPSIZE;
336 ref += STEPSIZE;
337 continue;
338 }
339 ip += LZ4_NBCOMMONBYTES(diff);
340 goto _endcount;
341 }
342 #if LZ4_ARCH64
343 if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
344 ip += 4;
345 ref += 4;
346 }
347 #endif
348 if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
349 ip += 2;
350 ref += 2;
351 }
352 if ((ip < MATCHLIMIT) && (*ref == *ip))
353 ip++;
354_endcount:
355
356 /* Encode MatchLength */
357 len = (int)(ip - anchor);
358 /* Check output limit */
359 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
360 return 0;
361 if (len >= (int)ML_MASK) {
362 *token += ML_MASK;
363 len -= ML_MASK;
364 for (; len > 509 ; len -= 510) {
365 *op++ = 255;
366 *op++ = 255;
367 }
368 if (len > 254) {
369 len -= 255;
370 *op++ = 255;
371 }
372 *op++ = (u8)len;
373 } else
374 *token += len;
375
376 /* Test end of chunk */
377 if (ip > mflimit) {
378 anchor = ip;
379 break;
380 }
381
382 /* Fill table */
383 hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
384
385 /* Test next position */
386 ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
387 hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
388 if (A32(ref) == A32(ip)) {
389 token = op++;
390 *token = 0;
391 goto _next_match;
392 }
393
394 /* Prepare next loop */
395 anchor = ip++;
396 forwardh = LZ4_HASH64K_VALUE(ip);
397 }
398
399_last_literals:
400 /* Encode Last Literals */
401 lastrun = (int)(iend - anchor);
402 if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
403 return 0;
404 if (lastrun >= (int)RUN_MASK) {
405 *op++ = (RUN_MASK << ML_BITS);
406 lastrun -= RUN_MASK;
407 for (; lastrun > 254 ; lastrun -= 255)
408 *op++ = 255;
409 *op++ = (u8)lastrun;
410 } else
411 *op++ = (lastrun << ML_BITS);
412 memcpy(op, anchor, iend - anchor);
413 op += iend - anchor;
414 /* End */
415 return (int)(((char *)op) - dest);
416}
417
418int lz4_compress(const unsigned char *src, size_t src_len,
419 unsigned char *dst, size_t *dst_len, void *wrkmem)
420{
421 int ret = -1;
422 int out_len = 0;
423
424 if (src_len < LZ4_64KLIMIT)
425 out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
426 lz4_compressbound(src_len));
427 else
428 out_len = lz4_compressctx(wrkmem, src, dst, src_len,
429 lz4_compressbound(src_len));
430
431 if (out_len < 0)
432 goto exit;
433
434 *dst_len = out_len;
435
436 return 0;
437exit:
438 return ret;
439}
440EXPORT_SYMBOL_GPL(lz4_compress);
441
442MODULE_LICENSE("GPL");
443MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 43ac31d63f36..abcecdc2d0f2 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -22,23 +22,40 @@
22 * Architecture-specific macros 22 * Architecture-specific macros
23 */ 23 */
24#define BYTE u8 24#define BYTE u8
25typedef struct _U16_S { u16 v; } U16_S;
26typedef struct _U32_S { u32 v; } U32_S;
27typedef struct _U64_S { u64 v; } U64_S;
25#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ 28#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
26 || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ 29 || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \
27 && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) 30 && defined(ARM_EFFICIENT_UNALIGNED_ACCESS)
28typedef struct _U32_S { u32 v; } U32_S;
29typedef struct _U64_S { u64 v; } U64_S;
30 31
32#define A16(x) (((U16_S *)(x))->v)
31#define A32(x) (((U32_S *)(x))->v) 33#define A32(x) (((U32_S *)(x))->v)
32#define A64(x) (((U64_S *)(x))->v) 34#define A64(x) (((U64_S *)(x))->v)
33 35
34#define PUT4(s, d) (A32(d) = A32(s)) 36#define PUT4(s, d) (A32(d) = A32(s))
35#define PUT8(s, d) (A64(d) = A64(s)) 37#define PUT8(s, d) (A64(d) = A64(s))
38#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
39 do { \
40 A16(p) = v; \
41 p += 2; \
42 } while (0)
36#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ 43#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
37 44
45#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
46#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
47#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
48
38#define PUT4(s, d) \ 49#define PUT4(s, d) \
39 put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) 50 put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
40#define PUT8(s, d) \ 51#define PUT8(s, d) \
41 put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) 52 put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
53
54#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
55 do { \
56 put_unaligned(v, (u16 *)(p)); \
57 p += 2; \
58 } while (0)
42#endif 59#endif
43 60
44#define COPYLENGTH 8 61#define COPYLENGTH 8
@@ -46,6 +63,29 @@ typedef struct _U64_S { u64 v; } U64_S;
46#define ML_MASK ((1U << ML_BITS) - 1) 63#define ML_MASK ((1U << ML_BITS) - 1)
47#define RUN_BITS (8 - ML_BITS) 64#define RUN_BITS (8 - ML_BITS)
48#define RUN_MASK ((1U << RUN_BITS) - 1) 65#define RUN_MASK ((1U << RUN_BITS) - 1)
66#define MEMORY_USAGE 14
67#define MINMATCH 4
68#define SKIPSTRENGTH 6
69#define LASTLITERALS 5
70#define MFLIMIT (COPYLENGTH + MINMATCH)
71#define MINLENGTH (MFLIMIT + 1)
72#define MAXD_LOG 16
73#define MAXD (1 << MAXD_LOG)
74#define MAXD_MASK (u32)(MAXD - 1)
75#define MAX_DISTANCE (MAXD - 1)
76#define HASH_LOG (MAXD_LOG - 1)
77#define HASHTABLESIZE (1 << HASH_LOG)
78#define MAX_NB_ATTEMPTS 256
79#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
80#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1))
81#define HASHLOG64K ((MEMORY_USAGE - 2) + 1)
82#define HASH64KTABLESIZE (1U << HASHLOG64K)
83#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
84 ((MINMATCH * 8) - (MEMORY_USAGE-2)))
85#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \
86 ((MINMATCH * 8) - HASHLOG64K))
87#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
88 ((MINMATCH * 8) - HASH_LOG))
49 89
50#if LZ4_ARCH64/* 64-bit */ 90#if LZ4_ARCH64/* 64-bit */
51#define STEPSIZE 8 91#define STEPSIZE 8
@@ -65,6 +105,13 @@ typedef struct _U64_S { u64 v; } U64_S;
65 LZ4_WILDCOPY(s, d, e); \ 105 LZ4_WILDCOPY(s, d, e); \
66 } \ 106 } \
67 } while (0) 107 } while (0)
108#define HTYPE u32
109
110#ifdef __BIG_ENDIAN
111#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
112#else
113#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
114#endif
68 115
69#else /* 32-bit */ 116#else /* 32-bit */
70#define STEPSIZE 4 117#define STEPSIZE 4
@@ -83,6 +130,14 @@ typedef struct _U64_S { u64 v; } U64_S;
83 } while (0) 130 } while (0)
84 131
85#define LZ4_SECURECOPY LZ4_WILDCOPY 132#define LZ4_SECURECOPY LZ4_WILDCOPY
133#define HTYPE const u8*
134
135#ifdef __BIG_ENDIAN
136#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
137#else
138#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
139#endif
140
86#endif 141#endif
87 142
88#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 143#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
@@ -92,3 +147,10 @@ typedef struct _U64_S { u64 v; } U64_S;
92 do { \ 147 do { \
93 LZ4_COPYPACKET(s, d); \ 148 LZ4_COPYPACKET(s, d); \
94 } while (d < e) 149 } while (d < e)
150
151#define LZ4_BLINDCOPY(s, d, l) \
152 do { \
153 u8 *e = (d) + l; \
154 LZ4_WILDCOPY(s, d, e); \
155 d = e; \
156 } while (0)
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
new file mode 100644
index 000000000000..eb1a74f5e368
--- /dev/null
+++ b/lib/lz4/lz4hc_compress.c
@@ -0,0 +1,539 @@
1/*
2 * LZ4 HC - High Compression Mode of LZ4
3 * Copyright (C) 2011-2012, Yann Collet.
4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * You can contact the author at :
30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 * - LZ4 source repository : http://code.google.com/p/lz4/
32 *
33 * Changed for kernel use by:
34 * Chanho Min <chanho.min@lge.com>
35 */
36
37#include <linux/module.h>
38#include <linux/kernel.h>
39#include <linux/lz4.h>
40#include <asm/unaligned.h>
41#include "lz4defs.h"
42
43struct lz4hc_data {
44 const u8 *base;
45 HTYPE hashtable[HASHTABLESIZE];
46 u16 chaintable[MAXD];
47 const u8 *nexttoupdate;
48} __attribute__((__packed__));
49
50static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
51{
52 memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53 memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
54
55#if LZ4_ARCH64
56 hc4->nexttoupdate = base + 1;
57#else
58 hc4->nexttoupdate = base;
59#endif
60 hc4->base = base;
61 return 1;
62}
63
64/* Update chains up to ip (excluded) */
65static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
66{
67 u16 *chaintable = hc4->chaintable;
68 HTYPE *hashtable = hc4->hashtable;
69#if LZ4_ARCH64
70 const BYTE * const base = hc4->base;
71#else
72 const int base = 0;
73#endif
74
75 while (hc4->nexttoupdate < ip) {
76 const u8 *p = hc4->nexttoupdate;
77 size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78 if (delta > MAX_DISTANCE)
79 delta = MAX_DISTANCE;
80 chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81 hashtable[HASH_VALUE(p)] = (p) - base;
82 hc4->nexttoupdate++;
83 }
84}
85
86static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
87 const u8 *const matchlimit)
88{
89 const u8 *p1t = p1;
90
91 while (p1t < matchlimit - (STEPSIZE - 1)) {
92#if LZ4_ARCH64
93 u64 diff = A64(p2) ^ A64(p1t);
94#else
95 u32 diff = A32(p2) ^ A32(p1t);
96#endif
97 if (!diff) {
98 p1t += STEPSIZE;
99 p2 += STEPSIZE;
100 continue;
101 }
102 p1t += LZ4_NBCOMMONBYTES(diff);
103 return p1t - p1;
104 }
105#if LZ4_ARCH64
106 if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
107 p1t += 4;
108 p2 += 4;
109 }
110#endif
111
112 if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
113 p1t += 2;
114 p2 += 2;
115 }
116 if ((p1t < matchlimit) && (*p2 == *p1t))
117 p1t++;
118 return p1t - p1;
119}
120
121static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
122 const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
123{
124 u16 *const chaintable = hc4->chaintable;
125 HTYPE *const hashtable = hc4->hashtable;
126 const u8 *ref;
127#if LZ4_ARCH64
128 const BYTE * const base = hc4->base;
129#else
130 const int base = 0;
131#endif
132 int nbattempts = MAX_NB_ATTEMPTS;
133 size_t repl = 0, ml = 0;
134 u16 delta;
135
136 /* HC4 match finder */
137 lz4hc_insert(hc4, ip);
138 ref = hashtable[HASH_VALUE(ip)] + base;
139
140 /* potential repetition */
141 if (ref >= ip-4) {
142 /* confirmed */
143 if (A32(ref) == A32(ip)) {
144 delta = (u16)(ip-ref);
145 repl = ml = lz4hc_commonlength(ip + MINMATCH,
146 ref + MINMATCH, matchlimit) + MINMATCH;
147 *matchpos = ref;
148 }
149 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
150 }
151
152 while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
153 nbattempts--;
154 if (*(ref + ml) == *(ip + ml)) {
155 if (A32(ref) == A32(ip)) {
156 size_t mlt =
157 lz4hc_commonlength(ip + MINMATCH,
158 ref + MINMATCH, matchlimit) + MINMATCH;
159 if (mlt > ml) {
160 ml = mlt;
161 *matchpos = ref;
162 }
163 }
164 }
165 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
166 }
167
168 /* Complete table */
169 if (repl) {
170 const BYTE *ptr = ip;
171 const BYTE *end;
172 end = ip + repl - (MINMATCH-1);
173 /* Pre-Load */
174 while (ptr < end - delta) {
175 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
176 ptr++;
177 }
178 do {
179 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
180 /* Head of chain */
181 hashtable[HASH_VALUE(ptr)] = (ptr) - base;
182 ptr++;
183 } while (ptr < end);
184 hc4->nexttoupdate = end;
185 }
186
187 return (int)ml;
188}
189
190static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
191 const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
192 const u8 **matchpos, const u8 **startpos)
193{
194 u16 *const chaintable = hc4->chaintable;
195 HTYPE *const hashtable = hc4->hashtable;
196#if LZ4_ARCH64
197 const BYTE * const base = hc4->base;
198#else
199 const int base = 0;
200#endif
201 const u8 *ref;
202 int nbattempts = MAX_NB_ATTEMPTS;
203 int delta = (int)(ip - startlimit);
204
205 /* First Match */
206 lz4hc_insert(hc4, ip);
207 ref = hashtable[HASH_VALUE(ip)] + base;
208
209 while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
210 && (nbattempts)) {
211 nbattempts--;
212 if (*(startlimit + longest) == *(ref - delta + longest)) {
213 if (A32(ref) == A32(ip)) {
214 const u8 *reft = ref + MINMATCH;
215 const u8 *ipt = ip + MINMATCH;
216 const u8 *startt = ip;
217
218 while (ipt < matchlimit-(STEPSIZE - 1)) {
219 #if LZ4_ARCH64
220 u64 diff = A64(reft) ^ A64(ipt);
221 #else
222 u32 diff = A32(reft) ^ A32(ipt);
223 #endif
224
225 if (!diff) {
226 ipt += STEPSIZE;
227 reft += STEPSIZE;
228 continue;
229 }
230 ipt += LZ4_NBCOMMONBYTES(diff);
231 goto _endcount;
232 }
233 #if LZ4_ARCH64
234 if ((ipt < (matchlimit - 3))
235 && (A32(reft) == A32(ipt))) {
236 ipt += 4;
237 reft += 4;
238 }
239 ipt += 2;
240 #endif
241 if ((ipt < (matchlimit - 1))
242 && (A16(reft) == A16(ipt))) {
243 reft += 2;
244 }
245 if ((ipt < matchlimit) && (*reft == *ipt))
246 ipt++;
247_endcount:
248 reft = ref;
249
250 while ((startt > startlimit)
251 && (reft > hc4->base)
252 && (startt[-1] == reft[-1])) {
253 startt--;
254 reft--;
255 }
256
257 if ((ipt - startt) > longest) {
258 longest = (int)(ipt - startt);
259 *matchpos = reft;
260 *startpos = startt;
261 }
262 }
263 }
264 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
265 }
266 return longest;
267}
268
269static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
270 int ml, const u8 *ref)
271{
272 int length, len;
273 u8 *token;
274
275 /* Encode Literal length */
276 length = (int)(*ip - *anchor);
277 token = (*op)++;
278 if (length >= (int)RUN_MASK) {
279 *token = (RUN_MASK << ML_BITS);
280 len = length - RUN_MASK;
281 for (; len > 254 ; len -= 255)
282 *(*op)++ = 255;
283 *(*op)++ = (u8)len;
284 } else
285 *token = (length << ML_BITS);
286
287 /* Copy Literals */
288 LZ4_BLINDCOPY(*anchor, *op, length);
289
290 /* Encode Offset */
291 LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
292
293 /* Encode MatchLength */
294 len = (int)(ml - MINMATCH);
295 if (len >= (int)ML_MASK) {
296 *token += ML_MASK;
297 len -= ML_MASK;
298 for (; len > 509 ; len -= 510) {
299 *(*op)++ = 255;
300 *(*op)++ = 255;
301 }
302 if (len > 254) {
303 len -= 255;
304 *(*op)++ = 255;
305 }
306 *(*op)++ = (u8)len;
307 } else
308 *token += len;
309
310 /* Prepare next loop */
311 *ip += ml;
312 *anchor = *ip;
313
314 return 0;
315}
316
317static int lz4_compresshcctx(struct lz4hc_data *ctx,
318 const char *source,
319 char *dest,
320 int isize)
321{
322 const u8 *ip = (const u8 *)source;
323 const u8 *anchor = ip;
324 const u8 *const iend = ip + isize;
325 const u8 *const mflimit = iend - MFLIMIT;
326 const u8 *const matchlimit = (iend - LASTLITERALS);
327
328 u8 *op = (u8 *)dest;
329
330 int ml, ml2, ml3, ml0;
331 const u8 *ref = NULL;
332 const u8 *start2 = NULL;
333 const u8 *ref2 = NULL;
334 const u8 *start3 = NULL;
335 const u8 *ref3 = NULL;
336 const u8 *start0;
337 const u8 *ref0;
338 int lastrun;
339
340 ip++;
341
342 /* Main Loop */
343 while (ip < mflimit) {
344 ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
345 if (!ml) {
346 ip++;
347 continue;
348 }
349
350 /* saved, in case we would skip too much */
351 start0 = ip;
352 ref0 = ref;
353 ml0 = ml;
354_search2:
355 if (ip+ml < mflimit)
356 ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
357 ip + 1, matchlimit, ml, &ref2, &start2);
358 else
359 ml2 = ml;
360 /* No better match */
361 if (ml2 == ml) {
362 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
363 continue;
364 }
365
366 if (start0 < ip) {
367 /* empirical */
368 if (start2 < ip + ml0) {
369 ip = start0;
370 ref = ref0;
371 ml = ml0;
372 }
373 }
374 /*
375 * Here, start0==ip
376 * First Match too small : removed
377 */
378 if ((start2 - ip) < 3) {
379 ml = ml2;
380 ip = start2;
381 ref = ref2;
382 goto _search2;
383 }
384
385_search3:
386 /*
387 * Currently we have :
388 * ml2 > ml1, and
389 * ip1+3 <= ip2 (usually < ip1+ml1)
390 */
391 if ((start2 - ip) < OPTIMAL_ML) {
392 int correction;
393 int new_ml = ml;
394 if (new_ml > OPTIMAL_ML)
395 new_ml = OPTIMAL_ML;
396 if (ip + new_ml > start2 + ml2 - MINMATCH)
397 new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
398 correction = new_ml - (int)(start2 - ip);
399 if (correction > 0) {
400 start2 += correction;
401 ref2 += correction;
402 ml2 -= correction;
403 }
404 }
405 /*
406 * Now, we have start2 = ip+new_ml,
407 * with new_ml=min(ml, OPTIMAL_ML=18)
408 */
409 if (start2 + ml2 < mflimit)
410 ml3 = lz4hc_insertandgetwidermatch(ctx,
411 start2 + ml2 - 3, start2, matchlimit,
412 ml2, &ref3, &start3);
413 else
414 ml3 = ml2;
415
416 /* No better match : 2 sequences to encode */
417 if (ml3 == ml2) {
418 /* ip & ref are known; Now for ml */
419 if (start2 < ip+ml)
420 ml = (int)(start2 - ip);
421
422 /* Now, encode 2 sequences */
423 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
424 ip = start2;
425 lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
426 continue;
427 }
428
429 /* Not enough space for match 2 : remove it */
430 if (start3 < ip + ml + 3) {
431 /*
432 * can write Seq1 immediately ==> Seq2 is removed,
433 * so Seq3 becomes Seq1
434 */
435 if (start3 >= (ip + ml)) {
436 if (start2 < ip + ml) {
437 int correction =
438 (int)(ip + ml - start2);
439 start2 += correction;
440 ref2 += correction;
441 ml2 -= correction;
442 if (ml2 < MINMATCH) {
443 start2 = start3;
444 ref2 = ref3;
445 ml2 = ml3;
446 }
447 }
448
449 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
450 ip = start3;
451 ref = ref3;
452 ml = ml3;
453
454 start0 = start2;
455 ref0 = ref2;
456 ml0 = ml2;
457 goto _search2;
458 }
459
460 start2 = start3;
461 ref2 = ref3;
462 ml2 = ml3;
463 goto _search3;
464 }
465
466 /*
467 * OK, now we have 3 ascending matches; let's write at least
468 * the first one ip & ref are known; Now for ml
469 */
470 if (start2 < ip + ml) {
471 if ((start2 - ip) < (int)ML_MASK) {
472 int correction;
473 if (ml > OPTIMAL_ML)
474 ml = OPTIMAL_ML;
475 if (ip + ml > start2 + ml2 - MINMATCH)
476 ml = (int)(start2 - ip) + ml2
477 - MINMATCH;
478 correction = ml - (int)(start2 - ip);
479 if (correction > 0) {
480 start2 += correction;
481 ref2 += correction;
482 ml2 -= correction;
483 }
484 } else
485 ml = (int)(start2 - ip);
486 }
487 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
488
489 ip = start2;
490 ref = ref2;
491 ml = ml2;
492
493 start2 = start3;
494 ref2 = ref3;
495 ml2 = ml3;
496
497 goto _search3;
498 }
499
500 /* Encode Last Literals */
501 lastrun = (int)(iend - anchor);
502 if (lastrun >= (int)RUN_MASK) {
503 *op++ = (RUN_MASK << ML_BITS);
504 lastrun -= RUN_MASK;
505 for (; lastrun > 254 ; lastrun -= 255)
506 *op++ = 255;
507 *op++ = (u8) lastrun;
508 } else
509 *op++ = (lastrun << ML_BITS);
510 memcpy(op, anchor, iend - anchor);
511 op += iend - anchor;
512 /* End */
513 return (int) (((char *)op) - dest);
514}
515
516int lz4hc_compress(const unsigned char *src, size_t src_len,
517 unsigned char *dst, size_t *dst_len, void *wrkmem)
518{
519 int ret = -1;
520 int out_len = 0;
521
522 struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
523 lz4hc_init(hc4, (const u8 *)src);
524 out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
525 (char *)dst, (int)src_len);
526
527 if (out_len < 0)
528 goto exit;
529
530 *dst_len = out_len;
531 return 0;
532
533exit:
534 return ret;
535}
536EXPORT_SYMBOL_GPL(lz4hc_compress);
537
538MODULE_LICENSE("GPL");
539MODULE_DESCRIPTION("LZ4HC compressor");