diff options
author | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2012-08-13 11:25:44 -0400 |
---|---|---|
committer | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2013-02-20 13:36:01 -0500 |
commit | 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 (patch) | |
tree | 09078bdf5e805a5f921d8b7b592262ef3e877937 /lib/lzo | |
parent | b6bec26cea948148a9420e7a0ac337f925de49e7 (diff) |
lib/lzo: Update LZO compression to current upstream version
This commit updates the kernel LZO code to the current upsteam version
which features a significant speed improvement - benchmarking the Calgary
and Silesia test corpora typically shows a doubled performance in
both compression and decompression on modern i386/x86_64/powerpc machines.
Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Diffstat (limited to 'lib/lzo')
-rw-r--r-- | lib/lzo/lzo1x_compress.c | 335 | ||||
-rw-r--r-- | lib/lzo/lzo1x_decompress_safe.c | 350 | ||||
-rw-r--r-- | lib/lzo/lzodefs.h | 38 |
3 files changed, 387 insertions, 336 deletions
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62e..236eb21167b5 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c | |||
@@ -1,194 +1,243 @@ | |||
1 | /* | 1 | /* |
2 | * LZO1X Compressor from MiniLZO | 2 | * LZO1X Compressor from LZO |
3 | * | 3 | * |
4 | * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | 4 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
5 | * | 5 | * |
6 | * The full LZO package can be found at: | 6 | * The full LZO package can be found at: |
7 | * http://www.oberhumer.com/opensource/lzo/ | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | * | 8 | * |
9 | * Changed for kernel use by: | 9 | * Changed for Linux kernel use by: |
10 | * Nitin Gupta <nitingupta910@gmail.com> | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | * Richard Purdie <rpurdie@openedhand.com> | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/lzo.h> | ||
17 | #include <asm/unaligned.h> | 16 | #include <asm/unaligned.h> |
17 | #include <linux/lzo.h> | ||
18 | #include "lzodefs.h" | 18 | #include "lzodefs.h" |
19 | 19 | ||
20 | static noinline size_t | 20 | static noinline size_t |
21 | _lzo1x_1_do_compress(const unsigned char *in, size_t in_len, | 21 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
22 | unsigned char *out, size_t *out_len, void *wrkmem) | 22 | unsigned char *out, size_t *out_len, |
23 | size_t ti, void *wrkmem) | ||
23 | { | 24 | { |
25 | const unsigned char *ip; | ||
26 | unsigned char *op; | ||
24 | const unsigned char * const in_end = in + in_len; | 27 | const unsigned char * const in_end = in + in_len; |
25 | const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; | 28 | const unsigned char * const ip_end = in + in_len - 20; |
26 | const unsigned char ** const dict = wrkmem; | 29 | const unsigned char *ii; |
27 | const unsigned char *ip = in, *ii = ip; | 30 | lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; |
28 | const unsigned char *end, *m, *m_pos; | ||
29 | size_t m_off, m_len, dindex; | ||
30 | unsigned char *op = out; | ||
31 | 31 | ||
32 | ip += 4; | 32 | op = out; |
33 | ip = in; | ||
34 | ii = ip; | ||
35 | ip += ti < 4 ? 4 - ti : 0; | ||
33 | 36 | ||
34 | for (;;) { | 37 | for (;;) { |
35 | dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; | 38 | const unsigned char *m_pos; |
36 | m_pos = dict[dindex]; | 39 | size_t t, m_len, m_off; |
37 | 40 | u32 dv; | |
38 | if (m_pos < in) | ||
39 | goto literal; | ||
40 | |||
41 | if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | ||
42 | goto literal; | ||
43 | |||
44 | m_off = ip - m_pos; | ||
45 | if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | ||
46 | goto try_match; | ||
47 | |||
48 | dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); | ||
49 | m_pos = dict[dindex]; | ||
50 | |||
51 | if (m_pos < in) | ||
52 | goto literal; | ||
53 | |||
54 | if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | ||
55 | goto literal; | ||
56 | |||
57 | m_off = ip - m_pos; | ||
58 | if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | ||
59 | goto try_match; | ||
60 | |||
61 | goto literal; | ||
62 | |||
63 | try_match: | ||
64 | if (get_unaligned((const unsigned short *)m_pos) | ||
65 | == get_unaligned((const unsigned short *)ip)) { | ||
66 | if (likely(m_pos[2] == ip[2])) | ||
67 | goto match; | ||
68 | } | ||
69 | |||
70 | literal: | 41 | literal: |
71 | dict[dindex] = ip; | 42 | ip += 1 + ((ip - ii) >> 5); |
72 | ++ip; | 43 | next: |
73 | if (unlikely(ip >= ip_end)) | 44 | if (unlikely(ip >= ip_end)) |
74 | break; | 45 | break; |
75 | continue; | 46 | dv = get_unaligned_le32(ip); |
76 | 47 | t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; | |
77 | match: | 48 | m_pos = in + dict[t]; |
78 | dict[dindex] = ip; | 49 | dict[t] = (lzo_dict_t) (ip - in); |
79 | if (ip != ii) { | 50 | if (unlikely(dv != get_unaligned_le32(m_pos))) |
80 | size_t t = ip - ii; | 51 | goto literal; |
81 | 52 | ||
53 | ii -= ti; | ||
54 | ti = 0; | ||
55 | t = ip - ii; | ||
56 | if (t != 0) { | ||
82 | if (t <= 3) { | 57 | if (t <= 3) { |
83 | op[-2] |= t; | 58 | op[-2] |= t; |
84 | } else if (t <= 18) { | 59 | COPY4(op, ii); |
60 | op += t; | ||
61 | } else if (t <= 16) { | ||
85 | *op++ = (t - 3); | 62 | *op++ = (t - 3); |
63 | COPY8(op, ii); | ||
64 | COPY8(op + 8, ii + 8); | ||
65 | op += t; | ||
86 | } else { | 66 | } else { |
87 | size_t tt = t - 18; | 67 | if (t <= 18) { |
88 | 68 | *op++ = (t - 3); | |
89 | *op++ = 0; | 69 | } else { |
90 | while (tt > 255) { | 70 | size_t tt = t - 18; |
91 | tt -= 255; | ||
92 | *op++ = 0; | 71 | *op++ = 0; |
72 | while (unlikely(tt > 255)) { | ||
73 | tt -= 255; | ||
74 | *op++ = 0; | ||
75 | } | ||
76 | *op++ = tt; | ||
93 | } | 77 | } |
94 | *op++ = tt; | 78 | do { |
79 | COPY8(op, ii); | ||
80 | COPY8(op + 8, ii + 8); | ||
81 | op += 16; | ||
82 | ii += 16; | ||
83 | t -= 16; | ||
84 | } while (t >= 16); | ||
85 | if (t > 0) do { | ||
86 | *op++ = *ii++; | ||
87 | } while (--t > 0); | ||
95 | } | 88 | } |
96 | do { | ||
97 | *op++ = *ii++; | ||
98 | } while (--t > 0); | ||
99 | } | 89 | } |
100 | 90 | ||
101 | ip += 3; | 91 | m_len = 4; |
102 | if (m_pos[3] != *ip++ || m_pos[4] != *ip++ | 92 | { |
103 | || m_pos[5] != *ip++ || m_pos[6] != *ip++ | 93 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) |
104 | || m_pos[7] != *ip++ || m_pos[8] != *ip++) { | 94 | u64 v; |
105 | --ip; | 95 | v = get_unaligned((const u64 *) (ip + m_len)) ^ |
106 | m_len = ip - ii; | 96 | get_unaligned((const u64 *) (m_pos + m_len)); |
97 | if (unlikely(v == 0)) { | ||
98 | do { | ||
99 | m_len += 8; | ||
100 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | ||
101 | get_unaligned((const u64 *) (m_pos + m_len)); | ||
102 | if (unlikely(ip + m_len >= ip_end)) | ||
103 | goto m_len_done; | ||
104 | } while (v == 0); | ||
105 | } | ||
106 | # if defined(__LITTLE_ENDIAN) | ||
107 | m_len += (unsigned) __builtin_ctzll(v) / 8; | ||
108 | # elif defined(__BIG_ENDIAN) | ||
109 | m_len += (unsigned) __builtin_clzll(v) / 8; | ||
110 | # else | ||
111 | # error "missing endian definition" | ||
112 | # endif | ||
113 | #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) | ||
114 | u32 v; | ||
115 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
116 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
117 | if (unlikely(v == 0)) { | ||
118 | do { | ||
119 | m_len += 4; | ||
120 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
121 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
122 | if (v != 0) | ||
123 | break; | ||
124 | m_len += 4; | ||
125 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
126 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
127 | if (unlikely(ip + m_len >= ip_end)) | ||
128 | goto m_len_done; | ||
129 | } while (v == 0); | ||
130 | } | ||
131 | # if defined(__LITTLE_ENDIAN) | ||
132 | m_len += (unsigned) __builtin_ctz(v) / 8; | ||
133 | # elif defined(__BIG_ENDIAN) | ||
134 | m_len += (unsigned) __builtin_clz(v) / 8; | ||
135 | # else | ||
136 | # error "missing endian definition" | ||
137 | # endif | ||
138 | #else | ||
139 | if (unlikely(ip[m_len] == m_pos[m_len])) { | ||
140 | do { | ||
141 | m_len += 1; | ||
142 | if (ip[m_len] != m_pos[m_len]) | ||
143 | break; | ||
144 | m_len += 1; | ||
145 | if (ip[m_len] != m_pos[m_len]) | ||
146 | break; | ||
147 | m_len += 1; | ||
148 | if (ip[m_len] != m_pos[m_len]) | ||
149 | break; | ||
150 | m_len += 1; | ||
151 | if (ip[m_len] != m_pos[m_len]) | ||
152 | break; | ||
153 | m_len += 1; | ||
154 | if (ip[m_len] != m_pos[m_len]) | ||
155 | break; | ||
156 | m_len += 1; | ||
157 | if (ip[m_len] != m_pos[m_len]) | ||
158 | break; | ||
159 | m_len += 1; | ||
160 | if (ip[m_len] != m_pos[m_len]) | ||
161 | break; | ||
162 | m_len += 1; | ||
163 | if (unlikely(ip + m_len >= ip_end)) | ||
164 | goto m_len_done; | ||
165 | } while (ip[m_len] == m_pos[m_len]); | ||
166 | } | ||
167 | #endif | ||
168 | } | ||
169 | m_len_done: | ||
107 | 170 | ||
108 | if (m_off <= M2_MAX_OFFSET) { | 171 | m_off = ip - m_pos; |
109 | m_off -= 1; | 172 | ip += m_len; |
110 | *op++ = (((m_len - 1) << 5) | 173 | ii = ip; |
111 | | ((m_off & 7) << 2)); | 174 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
112 | *op++ = (m_off >> 3); | 175 | m_off -= 1; |
113 | } else if (m_off <= M3_MAX_OFFSET) { | 176 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); |
114 | m_off -= 1; | 177 | *op++ = (m_off >> 3); |
178 | } else if (m_off <= M3_MAX_OFFSET) { | ||
179 | m_off -= 1; | ||
180 | if (m_len <= M3_MAX_LEN) | ||
115 | *op++ = (M3_MARKER | (m_len - 2)); | 181 | *op++ = (M3_MARKER | (m_len - 2)); |
116 | goto m3_m4_offset; | 182 | else { |
117 | } else { | 183 | m_len -= M3_MAX_LEN; |
118 | m_off -= 0x4000; | 184 | *op++ = M3_MARKER | 0; |
119 | 185 | while (unlikely(m_len > 255)) { | |
120 | *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) | 186 | m_len -= 255; |
121 | | (m_len - 2)); | 187 | *op++ = 0; |
122 | goto m3_m4_offset; | 188 | } |
189 | *op++ = (m_len); | ||
123 | } | 190 | } |
191 | *op++ = (m_off << 2); | ||
192 | *op++ = (m_off >> 6); | ||
124 | } else { | 193 | } else { |
125 | end = in_end; | 194 | m_off -= 0x4000; |
126 | m = m_pos + M2_MAX_LEN + 1; | 195 | if (m_len <= M4_MAX_LEN) |
127 | 196 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) | |
128 | while (ip < end && *m == *ip) { | ||
129 | m++; | ||
130 | ip++; | ||
131 | } | ||
132 | m_len = ip - ii; | ||
133 | |||
134 | if (m_off <= M3_MAX_OFFSET) { | ||
135 | m_off -= 1; | ||
136 | if (m_len <= 33) { | ||
137 | *op++ = (M3_MARKER | (m_len - 2)); | ||
138 | } else { | ||
139 | m_len -= 33; | ||
140 | *op++ = M3_MARKER | 0; | ||
141 | goto m3_m4_len; | ||
142 | } | ||
143 | } else { | ||
144 | m_off -= 0x4000; | ||
145 | if (m_len <= M4_MAX_LEN) { | ||
146 | *op++ = (M4_MARKER | ||
147 | | ((m_off & 0x4000) >> 11) | ||
148 | | (m_len - 2)); | 197 | | (m_len - 2)); |
149 | } else { | 198 | else { |
150 | m_len -= M4_MAX_LEN; | 199 | m_len -= M4_MAX_LEN; |
151 | *op++ = (M4_MARKER | 200 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); |
152 | | ((m_off & 0x4000) >> 11)); | 201 | while (unlikely(m_len > 255)) { |
153 | m3_m4_len: | 202 | m_len -= 255; |
154 | while (m_len > 255) { | 203 | *op++ = 0; |
155 | m_len -= 255; | ||
156 | *op++ = 0; | ||
157 | } | ||
158 | |||
159 | *op++ = (m_len); | ||
160 | } | 204 | } |
205 | *op++ = (m_len); | ||
161 | } | 206 | } |
162 | m3_m4_offset: | 207 | *op++ = (m_off << 2); |
163 | *op++ = ((m_off & 63) << 2); | ||
164 | *op++ = (m_off >> 6); | 208 | *op++ = (m_off >> 6); |
165 | } | 209 | } |
166 | 210 | goto next; | |
167 | ii = ip; | ||
168 | if (unlikely(ip >= ip_end)) | ||
169 | break; | ||
170 | } | 211 | } |
171 | |||
172 | *out_len = op - out; | 212 | *out_len = op - out; |
173 | return in_end - ii; | 213 | return in_end - (ii - ti); |
174 | } | 214 | } |
175 | 215 | ||
176 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, | 216 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, |
177 | size_t *out_len, void *wrkmem) | 217 | unsigned char *out, size_t *out_len, |
218 | void *wrkmem) | ||
178 | { | 219 | { |
179 | const unsigned char *ii; | 220 | const unsigned char *ip = in; |
180 | unsigned char *op = out; | 221 | unsigned char *op = out; |
181 | size_t t; | 222 | size_t l = in_len; |
223 | size_t t = 0; | ||
182 | 224 | ||
183 | if (unlikely(in_len <= M2_MAX_LEN + 5)) { | 225 | while (l > 20) { |
184 | t = in_len; | 226 | size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); |
185 | } else { | 227 | uintptr_t ll_end = (uintptr_t) ip + ll; |
186 | t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); | 228 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) |
229 | break; | ||
230 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); | ||
231 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); | ||
232 | t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); | ||
233 | ip += ll; | ||
187 | op += *out_len; | 234 | op += *out_len; |
235 | l -= ll; | ||
188 | } | 236 | } |
237 | t += l; | ||
189 | 238 | ||
190 | if (t > 0) { | 239 | if (t > 0) { |
191 | ii = in + in_len - t; | 240 | const unsigned char *ii = in + in_len - t; |
192 | 241 | ||
193 | if (op == out && t <= 238) { | 242 | if (op == out && t <= 238) { |
194 | *op++ = (17 + t); | 243 | *op++ = (17 + t); |
@@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, | |||
198 | *op++ = (t - 3); | 247 | *op++ = (t - 3); |
199 | } else { | 248 | } else { |
200 | size_t tt = t - 18; | 249 | size_t tt = t - 18; |
201 | |||
202 | *op++ = 0; | 250 | *op++ = 0; |
203 | while (tt > 255) { | 251 | while (tt > 255) { |
204 | tt -= 255; | 252 | tt -= 255; |
205 | *op++ = 0; | 253 | *op++ = 0; |
206 | } | 254 | } |
207 | |||
208 | *op++ = tt; | 255 | *op++ = tt; |
209 | } | 256 | } |
210 | do { | 257 | if (t >= 16) do { |
258 | COPY8(op, ii); | ||
259 | COPY8(op + 8, ii + 8); | ||
260 | op += 16; | ||
261 | ii += 16; | ||
262 | t -= 16; | ||
263 | } while (t >= 16); | ||
264 | if (t > 0) do { | ||
211 | *op++ = *ii++; | 265 | *op++ = *ii++; |
212 | } while (--t > 0); | 266 | } while (--t > 0); |
213 | } | 267 | } |
@@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); | |||
223 | 277 | ||
224 | MODULE_LICENSE("GPL"); | 278 | MODULE_LICENSE("GPL"); |
225 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); | 279 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); |
226 | |||
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index f2fd09850223..569985d522d5 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c | |||
@@ -1,12 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * LZO1X Decompressor from MiniLZO | 2 | * LZO1X Decompressor from LZO |
3 | * | 3 | * |
4 | * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | 4 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
5 | * | 5 | * |
6 | * The full LZO package can be found at: | 6 | * The full LZO package can be found at: |
7 | * http://www.oberhumer.com/opensource/lzo/ | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | * | 8 | * |
9 | * Changed for kernel use by: | 9 | * Changed for Linux kernel use by: |
10 | * Nitin Gupta <nitingupta910@gmail.com> | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | * Richard Purdie <rpurdie@openedhand.com> | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | */ | 12 | */ |
@@ -15,225 +15,207 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #endif | 17 | #endif |
18 | |||
19 | #include <asm/unaligned.h> | 18 | #include <asm/unaligned.h> |
20 | #include <linux/lzo.h> | 19 | #include <linux/lzo.h> |
21 | #include "lzodefs.h" | 20 | #include "lzodefs.h" |
22 | 21 | ||
23 | #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) | 22 | #define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) |
24 | #define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) | 23 | #define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) |
25 | #define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) | 24 | #define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun |
26 | 25 | #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun | |
27 | #define COPY4(dst, src) \ | 26 | #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun |
28 | put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) | ||
29 | 27 | ||
30 | int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, | 28 | int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, |
31 | unsigned char *out, size_t *out_len) | 29 | unsigned char *out, size_t *out_len) |
32 | { | 30 | { |
31 | unsigned char *op; | ||
32 | const unsigned char *ip; | ||
33 | size_t t, next; | ||
34 | size_t state = 0; | ||
35 | const unsigned char *m_pos; | ||
33 | const unsigned char * const ip_end = in + in_len; | 36 | const unsigned char * const ip_end = in + in_len; |
34 | unsigned char * const op_end = out + *out_len; | 37 | unsigned char * const op_end = out + *out_len; |
35 | const unsigned char *ip = in, *m_pos; | ||
36 | unsigned char *op = out; | ||
37 | size_t t; | ||
38 | 38 | ||
39 | *out_len = 0; | 39 | op = out; |
40 | ip = in; | ||
40 | 41 | ||
42 | if (unlikely(in_len < 3)) | ||
43 | goto input_overrun; | ||
41 | if (*ip > 17) { | 44 | if (*ip > 17) { |
42 | t = *ip++ - 17; | 45 | t = *ip++ - 17; |
43 | if (t < 4) | 46 | if (t < 4) { |
47 | next = t; | ||
44 | goto match_next; | 48 | goto match_next; |
45 | if (HAVE_OP(t, op_end, op)) | ||
46 | goto output_overrun; | ||
47 | if (HAVE_IP(t + 1, ip_end, ip)) | ||
48 | goto input_overrun; | ||
49 | do { | ||
50 | *op++ = *ip++; | ||
51 | } while (--t > 0); | ||
52 | goto first_literal_run; | ||
53 | } | ||
54 | |||
55 | while ((ip < ip_end)) { | ||
56 | t = *ip++; | ||
57 | if (t >= 16) | ||
58 | goto match; | ||
59 | if (t == 0) { | ||
60 | if (HAVE_IP(1, ip_end, ip)) | ||
61 | goto input_overrun; | ||
62 | while (*ip == 0) { | ||
63 | t += 255; | ||
64 | ip++; | ||
65 | if (HAVE_IP(1, ip_end, ip)) | ||
66 | goto input_overrun; | ||
67 | } | ||
68 | t += 15 + *ip++; | ||
69 | } | ||
70 | if (HAVE_OP(t + 3, op_end, op)) | ||
71 | goto output_overrun; | ||
72 | if (HAVE_IP(t + 4, ip_end, ip)) | ||
73 | goto input_overrun; | ||
74 | |||
75 | COPY4(op, ip); | ||
76 | op += 4; | ||
77 | ip += 4; | ||
78 | if (--t > 0) { | ||
79 | if (t >= 4) { | ||
80 | do { | ||
81 | COPY4(op, ip); | ||
82 | op += 4; | ||
83 | ip += 4; | ||
84 | t -= 4; | ||
85 | } while (t >= 4); | ||
86 | if (t > 0) { | ||
87 | do { | ||
88 | *op++ = *ip++; | ||
89 | } while (--t > 0); | ||
90 | } | ||
91 | } else { | ||
92 | do { | ||
93 | *op++ = *ip++; | ||
94 | } while (--t > 0); | ||
95 | } | ||
96 | } | 49 | } |
50 | goto copy_literal_run; | ||
51 | } | ||
97 | 52 | ||
98 | first_literal_run: | 53 | for (;;) { |
99 | t = *ip++; | 54 | t = *ip++; |
100 | if (t >= 16) | 55 | if (t < 16) { |
101 | goto match; | 56 | if (likely(state == 0)) { |
102 | m_pos = op - (1 + M2_MAX_OFFSET); | 57 | if (unlikely(t == 0)) { |
103 | m_pos -= t >> 2; | 58 | while (unlikely(*ip == 0)) { |
104 | m_pos -= *ip++ << 2; | ||
105 | |||
106 | if (HAVE_LB(m_pos, out, op)) | ||
107 | goto lookbehind_overrun; | ||
108 | |||
109 | if (HAVE_OP(3, op_end, op)) | ||
110 | goto output_overrun; | ||
111 | *op++ = *m_pos++; | ||
112 | *op++ = *m_pos++; | ||
113 | *op++ = *m_pos; | ||
114 | |||
115 | goto match_done; | ||
116 | |||
117 | do { | ||
118 | match: | ||
119 | if (t >= 64) { | ||
120 | m_pos = op - 1; | ||
121 | m_pos -= (t >> 2) & 7; | ||
122 | m_pos -= *ip++ << 3; | ||
123 | t = (t >> 5) - 1; | ||
124 | if (HAVE_LB(m_pos, out, op)) | ||
125 | goto lookbehind_overrun; | ||
126 | if (HAVE_OP(t + 3 - 1, op_end, op)) | ||
127 | goto output_overrun; | ||
128 | goto copy_match; | ||
129 | } else if (t >= 32) { | ||
130 | t &= 31; | ||
131 | if (t == 0) { | ||
132 | if (HAVE_IP(1, ip_end, ip)) | ||
133 | goto input_overrun; | ||
134 | while (*ip == 0) { | ||
135 | t += 255; | 59 | t += 255; |
136 | ip++; | 60 | ip++; |
137 | if (HAVE_IP(1, ip_end, ip)) | 61 | NEED_IP(1); |
138 | goto input_overrun; | ||
139 | } | 62 | } |
140 | t += 31 + *ip++; | 63 | t += 15 + *ip++; |
141 | } | 64 | } |
142 | m_pos = op - 1; | 65 | t += 3; |
143 | m_pos -= get_unaligned_le16(ip) >> 2; | 66 | copy_literal_run: |
144 | ip += 2; | 67 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
145 | } else if (t >= 16) { | 68 | if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { |
146 | m_pos = op; | 69 | const unsigned char *ie = ip + t; |
147 | m_pos -= (t & 8) << 11; | 70 | unsigned char *oe = op + t; |
148 | 71 | do { | |
149 | t &= 7; | 72 | COPY8(op, ip); |
150 | if (t == 0) { | 73 | op += 8; |
151 | if (HAVE_IP(1, ip_end, ip)) | 74 | ip += 8; |
152 | goto input_overrun; | 75 | COPY8(op, ip); |
153 | while (*ip == 0) { | 76 | op += 8; |
154 | t += 255; | 77 | ip += 8; |
155 | ip++; | 78 | } while (ip < ie); |
156 | if (HAVE_IP(1, ip_end, ip)) | 79 | ip = ie; |
157 | goto input_overrun; | 80 | op = oe; |
158 | } | 81 | } else |
159 | t += 7 + *ip++; | 82 | #endif |
83 | { | ||
84 | NEED_OP(t); | ||
85 | NEED_IP(t + 3); | ||
86 | do { | ||
87 | *op++ = *ip++; | ||
88 | } while (--t > 0); | ||
160 | } | 89 | } |
161 | m_pos -= get_unaligned_le16(ip) >> 2; | 90 | state = 4; |
162 | ip += 2; | 91 | continue; |
163 | if (m_pos == op) | 92 | } else if (state != 4) { |
164 | goto eof_found; | 93 | next = t & 3; |
165 | m_pos -= 0x4000; | ||
166 | } else { | ||
167 | m_pos = op - 1; | 94 | m_pos = op - 1; |
168 | m_pos -= t >> 2; | 95 | m_pos -= t >> 2; |
169 | m_pos -= *ip++ << 2; | 96 | m_pos -= *ip++ << 2; |
170 | 97 | TEST_LB(m_pos); | |
171 | if (HAVE_LB(m_pos, out, op)) | 98 | NEED_OP(2); |
172 | goto lookbehind_overrun; | 99 | op[0] = m_pos[0]; |
173 | if (HAVE_OP(2, op_end, op)) | 100 | op[1] = m_pos[1]; |
174 | goto output_overrun; | 101 | op += 2; |
175 | 102 | goto match_next; | |
176 | *op++ = *m_pos++; | 103 | } else { |
177 | *op++ = *m_pos; | 104 | next = t & 3; |
178 | goto match_done; | 105 | m_pos = op - (1 + M2_MAX_OFFSET); |
106 | m_pos -= t >> 2; | ||
107 | m_pos -= *ip++ << 2; | ||
108 | t = 3; | ||
179 | } | 109 | } |
180 | 110 | } else if (t >= 64) { | |
181 | if (HAVE_LB(m_pos, out, op)) | 111 | next = t & 3; |
182 | goto lookbehind_overrun; | 112 | m_pos = op - 1; |
183 | if (HAVE_OP(t + 3 - 1, op_end, op)) | 113 | m_pos -= (t >> 2) & 7; |
184 | goto output_overrun; | 114 | m_pos -= *ip++ << 3; |
185 | 115 | t = (t >> 5) - 1 + (3 - 1); | |
186 | if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { | 116 | } else if (t >= 32) { |
187 | COPY4(op, m_pos); | 117 | t = (t & 31) + (3 - 1); |
188 | op += 4; | 118 | if (unlikely(t == 2)) { |
189 | m_pos += 4; | 119 | while (unlikely(*ip == 0)) { |
190 | t -= 4 - (3 - 1); | 120 | t += 255; |
121 | ip++; | ||
122 | NEED_IP(1); | ||
123 | } | ||
124 | t += 31 + *ip++; | ||
125 | NEED_IP(2); | ||
126 | } | ||
127 | m_pos = op - 1; | ||
128 | next = get_unaligned_le16(ip); | ||
129 | ip += 2; | ||
130 | m_pos -= next >> 2; | ||
131 | next &= 3; | ||
132 | } else { | ||
133 | m_pos = op; | ||
134 | m_pos -= (t & 8) << 11; | ||
135 | t = (t & 7) + (3 - 1); | ||
136 | if (unlikely(t == 2)) { | ||
137 | while (unlikely(*ip == 0)) { | ||
138 | t += 255; | ||
139 | ip++; | ||
140 | NEED_IP(1); | ||
141 | } | ||
142 | t += 7 + *ip++; | ||
143 | NEED_IP(2); | ||
144 | } | ||
145 | next = get_unaligned_le16(ip); | ||
146 | ip += 2; | ||
147 | m_pos -= next >> 2; | ||
148 | next &= 3; | ||
149 | if (m_pos == op) | ||
150 | goto eof_found; | ||
151 | m_pos -= 0x4000; | ||
152 | } | ||
153 | TEST_LB(m_pos); | ||
154 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | ||
155 | if (op - m_pos >= 8) { | ||
156 | unsigned char *oe = op + t; | ||
157 | if (likely(HAVE_OP(t + 15))) { | ||
191 | do { | 158 | do { |
192 | COPY4(op, m_pos); | 159 | COPY8(op, m_pos); |
193 | op += 4; | 160 | op += 8; |
194 | m_pos += 4; | 161 | m_pos += 8; |
195 | t -= 4; | 162 | COPY8(op, m_pos); |
196 | } while (t >= 4); | 163 | op += 8; |
197 | if (t > 0) | 164 | m_pos += 8; |
198 | do { | 165 | } while (op < oe); |
199 | *op++ = *m_pos++; | 166 | op = oe; |
200 | } while (--t > 0); | 167 | if (HAVE_IP(6)) { |
168 | state = next; | ||
169 | COPY4(op, ip); | ||
170 | op += next; | ||
171 | ip += next; | ||
172 | continue; | ||
173 | } | ||
201 | } else { | 174 | } else { |
202 | copy_match: | 175 | NEED_OP(t); |
203 | *op++ = *m_pos++; | ||
204 | *op++ = *m_pos++; | ||
205 | do { | 176 | do { |
206 | *op++ = *m_pos++; | 177 | *op++ = *m_pos++; |
207 | } while (--t > 0); | 178 | } while (op < oe); |
208 | } | 179 | } |
209 | match_done: | 180 | } else |
210 | t = ip[-2] & 3; | 181 | #endif |
211 | if (t == 0) | 182 | { |
212 | break; | 183 | unsigned char *oe = op + t; |
184 | NEED_OP(t); | ||
185 | op[0] = m_pos[0]; | ||
186 | op[1] = m_pos[1]; | ||
187 | op += 2; | ||
188 | m_pos += 2; | ||
189 | do { | ||
190 | *op++ = *m_pos++; | ||
191 | } while (op < oe); | ||
192 | } | ||
213 | match_next: | 193 | match_next: |
214 | if (HAVE_OP(t, op_end, op)) | 194 | state = next; |
215 | goto output_overrun; | 195 | t = next; |
216 | if (HAVE_IP(t + 1, ip_end, ip)) | 196 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
217 | goto input_overrun; | 197 | if (likely(HAVE_IP(6) && HAVE_OP(4))) { |
218 | 198 | COPY4(op, ip); | |
219 | *op++ = *ip++; | 199 | op += t; |
220 | if (t > 1) { | 200 | ip += t; |
201 | } else | ||
202 | #endif | ||
203 | { | ||
204 | NEED_IP(t + 3); | ||
205 | NEED_OP(t); | ||
206 | while (t > 0) { | ||
221 | *op++ = *ip++; | 207 | *op++ = *ip++; |
222 | if (t > 2) | 208 | t--; |
223 | *op++ = *ip++; | ||
224 | } | 209 | } |
225 | 210 | } | |
226 | t = *ip++; | ||
227 | } while (ip < ip_end); | ||
228 | } | 211 | } |
229 | 212 | ||
230 | *out_len = op - out; | ||
231 | return LZO_E_EOF_NOT_FOUND; | ||
232 | |||
233 | eof_found: | 213 | eof_found: |
234 | *out_len = op - out; | 214 | *out_len = op - out; |
235 | return (ip == ip_end ? LZO_E_OK : | 215 | return (t != 3 ? LZO_E_ERROR : |
236 | (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); | 216 | ip == ip_end ? LZO_E_OK : |
217 | ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); | ||
218 | |||
237 | input_overrun: | 219 | input_overrun: |
238 | *out_len = op - out; | 220 | *out_len = op - out; |
239 | return LZO_E_INPUT_OVERRUN; | 221 | return LZO_E_INPUT_OVERRUN; |
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index b6d482c492ef..6710b83ce72e 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h | |||
@@ -1,19 +1,37 @@ | |||
1 | /* | 1 | /* |
2 | * lzodefs.h -- architecture, OS and compiler specific defines | 2 | * lzodefs.h -- architecture, OS and compiler specific defines |
3 | * | 3 | * |
4 | * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | 4 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
5 | * | 5 | * |
6 | * The full LZO package can be found at: | 6 | * The full LZO package can be found at: |
7 | * http://www.oberhumer.com/opensource/lzo/ | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | * | 8 | * |
9 | * Changed for kernel use by: | 9 | * Changed for Linux kernel use by: |
10 | * Nitin Gupta <nitingupta910@gmail.com> | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | * Richard Purdie <rpurdie@openedhand.com> | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #define LZO_VERSION 0x2020 | 14 | |
15 | #define LZO_VERSION_STRING "2.02" | 15 | #define COPY4(dst, src) \ |
16 | #define LZO_VERSION_DATE "Oct 17 2005" | 16 | put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) |
17 | #if defined(__x86_64__) | ||
18 | #define COPY8(dst, src) \ | ||
19 | put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) | ||
20 | #else | ||
21 | #define COPY8(dst, src) \ | ||
22 | COPY4(dst, src); COPY4((dst) + 4, (src) + 4) | ||
23 | #endif | ||
24 | |||
25 | #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) | ||
26 | #error "conflicting endian definitions" | ||
27 | #elif defined(__x86_64__) | ||
28 | #define LZO_USE_CTZ64 1 | ||
29 | #define LZO_USE_CTZ32 1 | ||
30 | #elif defined(__i386__) || defined(__powerpc__) | ||
31 | #define LZO_USE_CTZ32 1 | ||
32 | #elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) | ||
33 | #define LZO_USE_CTZ32 1 | ||
34 | #endif | ||
17 | 35 | ||
18 | #define M1_MAX_OFFSET 0x0400 | 36 | #define M1_MAX_OFFSET 0x0400 |
19 | #define M2_MAX_OFFSET 0x0800 | 37 | #define M2_MAX_OFFSET 0x0800 |
@@ -34,10 +52,8 @@ | |||
34 | #define M3_MARKER 32 | 52 | #define M3_MARKER 32 |
35 | #define M4_MARKER 16 | 53 | #define M4_MARKER 16 |
36 | 54 | ||
37 | #define D_BITS 14 | 55 | #define lzo_dict_t unsigned short |
38 | #define D_MASK ((1u << D_BITS) - 1) | 56 | #define D_BITS 13 |
57 | #define D_SIZE (1u << D_BITS) | ||
58 | #define D_MASK (D_SIZE - 1) | ||
39 | #define D_HIGH ((D_MASK >> 1) + 1) | 59 | #define D_HIGH ((D_MASK >> 1) + 1) |
40 | |||
41 | #define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ | ||
42 | << (s1)) ^ (p)[0]) | ||
43 | #define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) | ||