diff options
author | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2012-08-13 11:25:44 -0400 |
---|---|---|
committer | Markus F.X.J. Oberhumer <markus@oberhumer.com> | 2013-02-20 13:36:01 -0500 |
commit | 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 (patch) | |
tree | 09078bdf5e805a5f921d8b7b592262ef3e877937 /lib/lzo/lzo1x_compress.c | |
parent | b6bec26cea948148a9420e7a0ac337f925de49e7 (diff) |
lib/lzo: Update LZO compression to current upstream version
This commit updates the kernel LZO code to the current upsteam version
which features a significant speed improvement - benchmarking the Calgary
and Silesia test corpora typically shows a doubled performance in
both compression and decompression on modern i386/x86_64/powerpc machines.
Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Diffstat (limited to 'lib/lzo/lzo1x_compress.c')
-rw-r--r-- | lib/lzo/lzo1x_compress.c | 335 |
1 files changed, 194 insertions, 141 deletions
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62e..236eb21167b5 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c | |||
@@ -1,194 +1,243 @@ | |||
1 | /* | 1 | /* |
2 | * LZO1X Compressor from MiniLZO | 2 | * LZO1X Compressor from LZO |
3 | * | 3 | * |
4 | * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> | 4 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
5 | * | 5 | * |
6 | * The full LZO package can be found at: | 6 | * The full LZO package can be found at: |
7 | * http://www.oberhumer.com/opensource/lzo/ | 7 | * http://www.oberhumer.com/opensource/lzo/ |
8 | * | 8 | * |
9 | * Changed for kernel use by: | 9 | * Changed for Linux kernel use by: |
10 | * Nitin Gupta <nitingupta910@gmail.com> | 10 | * Nitin Gupta <nitingupta910@gmail.com> |
11 | * Richard Purdie <rpurdie@openedhand.com> | 11 | * Richard Purdie <rpurdie@openedhand.com> |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/lzo.h> | ||
17 | #include <asm/unaligned.h> | 16 | #include <asm/unaligned.h> |
17 | #include <linux/lzo.h> | ||
18 | #include "lzodefs.h" | 18 | #include "lzodefs.h" |
19 | 19 | ||
20 | static noinline size_t | 20 | static noinline size_t |
21 | _lzo1x_1_do_compress(const unsigned char *in, size_t in_len, | 21 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
22 | unsigned char *out, size_t *out_len, void *wrkmem) | 22 | unsigned char *out, size_t *out_len, |
23 | size_t ti, void *wrkmem) | ||
23 | { | 24 | { |
25 | const unsigned char *ip; | ||
26 | unsigned char *op; | ||
24 | const unsigned char * const in_end = in + in_len; | 27 | const unsigned char * const in_end = in + in_len; |
25 | const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; | 28 | const unsigned char * const ip_end = in + in_len - 20; |
26 | const unsigned char ** const dict = wrkmem; | 29 | const unsigned char *ii; |
27 | const unsigned char *ip = in, *ii = ip; | 30 | lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; |
28 | const unsigned char *end, *m, *m_pos; | ||
29 | size_t m_off, m_len, dindex; | ||
30 | unsigned char *op = out; | ||
31 | 31 | ||
32 | ip += 4; | 32 | op = out; |
33 | ip = in; | ||
34 | ii = ip; | ||
35 | ip += ti < 4 ? 4 - ti : 0; | ||
33 | 36 | ||
34 | for (;;) { | 37 | for (;;) { |
35 | dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; | 38 | const unsigned char *m_pos; |
36 | m_pos = dict[dindex]; | 39 | size_t t, m_len, m_off; |
37 | 40 | u32 dv; | |
38 | if (m_pos < in) | ||
39 | goto literal; | ||
40 | |||
41 | if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | ||
42 | goto literal; | ||
43 | |||
44 | m_off = ip - m_pos; | ||
45 | if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | ||
46 | goto try_match; | ||
47 | |||
48 | dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); | ||
49 | m_pos = dict[dindex]; | ||
50 | |||
51 | if (m_pos < in) | ||
52 | goto literal; | ||
53 | |||
54 | if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) | ||
55 | goto literal; | ||
56 | |||
57 | m_off = ip - m_pos; | ||
58 | if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) | ||
59 | goto try_match; | ||
60 | |||
61 | goto literal; | ||
62 | |||
63 | try_match: | ||
64 | if (get_unaligned((const unsigned short *)m_pos) | ||
65 | == get_unaligned((const unsigned short *)ip)) { | ||
66 | if (likely(m_pos[2] == ip[2])) | ||
67 | goto match; | ||
68 | } | ||
69 | |||
70 | literal: | 41 | literal: |
71 | dict[dindex] = ip; | 42 | ip += 1 + ((ip - ii) >> 5); |
72 | ++ip; | 43 | next: |
73 | if (unlikely(ip >= ip_end)) | 44 | if (unlikely(ip >= ip_end)) |
74 | break; | 45 | break; |
75 | continue; | 46 | dv = get_unaligned_le32(ip); |
76 | 47 | t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; | |
77 | match: | 48 | m_pos = in + dict[t]; |
78 | dict[dindex] = ip; | 49 | dict[t] = (lzo_dict_t) (ip - in); |
79 | if (ip != ii) { | 50 | if (unlikely(dv != get_unaligned_le32(m_pos))) |
80 | size_t t = ip - ii; | 51 | goto literal; |
81 | 52 | ||
53 | ii -= ti; | ||
54 | ti = 0; | ||
55 | t = ip - ii; | ||
56 | if (t != 0) { | ||
82 | if (t <= 3) { | 57 | if (t <= 3) { |
83 | op[-2] |= t; | 58 | op[-2] |= t; |
84 | } else if (t <= 18) { | 59 | COPY4(op, ii); |
60 | op += t; | ||
61 | } else if (t <= 16) { | ||
85 | *op++ = (t - 3); | 62 | *op++ = (t - 3); |
63 | COPY8(op, ii); | ||
64 | COPY8(op + 8, ii + 8); | ||
65 | op += t; | ||
86 | } else { | 66 | } else { |
87 | size_t tt = t - 18; | 67 | if (t <= 18) { |
88 | 68 | *op++ = (t - 3); | |
89 | *op++ = 0; | 69 | } else { |
90 | while (tt > 255) { | 70 | size_t tt = t - 18; |
91 | tt -= 255; | ||
92 | *op++ = 0; | 71 | *op++ = 0; |
72 | while (unlikely(tt > 255)) { | ||
73 | tt -= 255; | ||
74 | *op++ = 0; | ||
75 | } | ||
76 | *op++ = tt; | ||
93 | } | 77 | } |
94 | *op++ = tt; | 78 | do { |
79 | COPY8(op, ii); | ||
80 | COPY8(op + 8, ii + 8); | ||
81 | op += 16; | ||
82 | ii += 16; | ||
83 | t -= 16; | ||
84 | } while (t >= 16); | ||
85 | if (t > 0) do { | ||
86 | *op++ = *ii++; | ||
87 | } while (--t > 0); | ||
95 | } | 88 | } |
96 | do { | ||
97 | *op++ = *ii++; | ||
98 | } while (--t > 0); | ||
99 | } | 89 | } |
100 | 90 | ||
101 | ip += 3; | 91 | m_len = 4; |
102 | if (m_pos[3] != *ip++ || m_pos[4] != *ip++ | 92 | { |
103 | || m_pos[5] != *ip++ || m_pos[6] != *ip++ | 93 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) |
104 | || m_pos[7] != *ip++ || m_pos[8] != *ip++) { | 94 | u64 v; |
105 | --ip; | 95 | v = get_unaligned((const u64 *) (ip + m_len)) ^ |
106 | m_len = ip - ii; | 96 | get_unaligned((const u64 *) (m_pos + m_len)); |
97 | if (unlikely(v == 0)) { | ||
98 | do { | ||
99 | m_len += 8; | ||
100 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | ||
101 | get_unaligned((const u64 *) (m_pos + m_len)); | ||
102 | if (unlikely(ip + m_len >= ip_end)) | ||
103 | goto m_len_done; | ||
104 | } while (v == 0); | ||
105 | } | ||
106 | # if defined(__LITTLE_ENDIAN) | ||
107 | m_len += (unsigned) __builtin_ctzll(v) / 8; | ||
108 | # elif defined(__BIG_ENDIAN) | ||
109 | m_len += (unsigned) __builtin_clzll(v) / 8; | ||
110 | # else | ||
111 | # error "missing endian definition" | ||
112 | # endif | ||
113 | #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) | ||
114 | u32 v; | ||
115 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
116 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
117 | if (unlikely(v == 0)) { | ||
118 | do { | ||
119 | m_len += 4; | ||
120 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
121 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
122 | if (v != 0) | ||
123 | break; | ||
124 | m_len += 4; | ||
125 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | ||
126 | get_unaligned((const u32 *) (m_pos + m_len)); | ||
127 | if (unlikely(ip + m_len >= ip_end)) | ||
128 | goto m_len_done; | ||
129 | } while (v == 0); | ||
130 | } | ||
131 | # if defined(__LITTLE_ENDIAN) | ||
132 | m_len += (unsigned) __builtin_ctz(v) / 8; | ||
133 | # elif defined(__BIG_ENDIAN) | ||
134 | m_len += (unsigned) __builtin_clz(v) / 8; | ||
135 | # else | ||
136 | # error "missing endian definition" | ||
137 | # endif | ||
138 | #else | ||
139 | if (unlikely(ip[m_len] == m_pos[m_len])) { | ||
140 | do { | ||
141 | m_len += 1; | ||
142 | if (ip[m_len] != m_pos[m_len]) | ||
143 | break; | ||
144 | m_len += 1; | ||
145 | if (ip[m_len] != m_pos[m_len]) | ||
146 | break; | ||
147 | m_len += 1; | ||
148 | if (ip[m_len] != m_pos[m_len]) | ||
149 | break; | ||
150 | m_len += 1; | ||
151 | if (ip[m_len] != m_pos[m_len]) | ||
152 | break; | ||
153 | m_len += 1; | ||
154 | if (ip[m_len] != m_pos[m_len]) | ||
155 | break; | ||
156 | m_len += 1; | ||
157 | if (ip[m_len] != m_pos[m_len]) | ||
158 | break; | ||
159 | m_len += 1; | ||
160 | if (ip[m_len] != m_pos[m_len]) | ||
161 | break; | ||
162 | m_len += 1; | ||
163 | if (unlikely(ip + m_len >= ip_end)) | ||
164 | goto m_len_done; | ||
165 | } while (ip[m_len] == m_pos[m_len]); | ||
166 | } | ||
167 | #endif | ||
168 | } | ||
169 | m_len_done: | ||
107 | 170 | ||
108 | if (m_off <= M2_MAX_OFFSET) { | 171 | m_off = ip - m_pos; |
109 | m_off -= 1; | 172 | ip += m_len; |
110 | *op++ = (((m_len - 1) << 5) | 173 | ii = ip; |
111 | | ((m_off & 7) << 2)); | 174 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
112 | *op++ = (m_off >> 3); | 175 | m_off -= 1; |
113 | } else if (m_off <= M3_MAX_OFFSET) { | 176 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); |
114 | m_off -= 1; | 177 | *op++ = (m_off >> 3); |
178 | } else if (m_off <= M3_MAX_OFFSET) { | ||
179 | m_off -= 1; | ||
180 | if (m_len <= M3_MAX_LEN) | ||
115 | *op++ = (M3_MARKER | (m_len - 2)); | 181 | *op++ = (M3_MARKER | (m_len - 2)); |
116 | goto m3_m4_offset; | 182 | else { |
117 | } else { | 183 | m_len -= M3_MAX_LEN; |
118 | m_off -= 0x4000; | 184 | *op++ = M3_MARKER | 0; |
119 | 185 | while (unlikely(m_len > 255)) { | |
120 | *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) | 186 | m_len -= 255; |
121 | | (m_len - 2)); | 187 | *op++ = 0; |
122 | goto m3_m4_offset; | 188 | } |
189 | *op++ = (m_len); | ||
123 | } | 190 | } |
191 | *op++ = (m_off << 2); | ||
192 | *op++ = (m_off >> 6); | ||
124 | } else { | 193 | } else { |
125 | end = in_end; | 194 | m_off -= 0x4000; |
126 | m = m_pos + M2_MAX_LEN + 1; | 195 | if (m_len <= M4_MAX_LEN) |
127 | 196 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) | |
128 | while (ip < end && *m == *ip) { | ||
129 | m++; | ||
130 | ip++; | ||
131 | } | ||
132 | m_len = ip - ii; | ||
133 | |||
134 | if (m_off <= M3_MAX_OFFSET) { | ||
135 | m_off -= 1; | ||
136 | if (m_len <= 33) { | ||
137 | *op++ = (M3_MARKER | (m_len - 2)); | ||
138 | } else { | ||
139 | m_len -= 33; | ||
140 | *op++ = M3_MARKER | 0; | ||
141 | goto m3_m4_len; | ||
142 | } | ||
143 | } else { | ||
144 | m_off -= 0x4000; | ||
145 | if (m_len <= M4_MAX_LEN) { | ||
146 | *op++ = (M4_MARKER | ||
147 | | ((m_off & 0x4000) >> 11) | ||
148 | | (m_len - 2)); | 197 | | (m_len - 2)); |
149 | } else { | 198 | else { |
150 | m_len -= M4_MAX_LEN; | 199 | m_len -= M4_MAX_LEN; |
151 | *op++ = (M4_MARKER | 200 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); |
152 | | ((m_off & 0x4000) >> 11)); | 201 | while (unlikely(m_len > 255)) { |
153 | m3_m4_len: | 202 | m_len -= 255; |
154 | while (m_len > 255) { | 203 | *op++ = 0; |
155 | m_len -= 255; | ||
156 | *op++ = 0; | ||
157 | } | ||
158 | |||
159 | *op++ = (m_len); | ||
160 | } | 204 | } |
205 | *op++ = (m_len); | ||
161 | } | 206 | } |
162 | m3_m4_offset: | 207 | *op++ = (m_off << 2); |
163 | *op++ = ((m_off & 63) << 2); | ||
164 | *op++ = (m_off >> 6); | 208 | *op++ = (m_off >> 6); |
165 | } | 209 | } |
166 | 210 | goto next; | |
167 | ii = ip; | ||
168 | if (unlikely(ip >= ip_end)) | ||
169 | break; | ||
170 | } | 211 | } |
171 | |||
172 | *out_len = op - out; | 212 | *out_len = op - out; |
173 | return in_end - ii; | 213 | return in_end - (ii - ti); |
174 | } | 214 | } |
175 | 215 | ||
176 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, | 216 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, |
177 | size_t *out_len, void *wrkmem) | 217 | unsigned char *out, size_t *out_len, |
218 | void *wrkmem) | ||
178 | { | 219 | { |
179 | const unsigned char *ii; | 220 | const unsigned char *ip = in; |
180 | unsigned char *op = out; | 221 | unsigned char *op = out; |
181 | size_t t; | 222 | size_t l = in_len; |
223 | size_t t = 0; | ||
182 | 224 | ||
183 | if (unlikely(in_len <= M2_MAX_LEN + 5)) { | 225 | while (l > 20) { |
184 | t = in_len; | 226 | size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); |
185 | } else { | 227 | uintptr_t ll_end = (uintptr_t) ip + ll; |
186 | t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); | 228 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) |
229 | break; | ||
230 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); | ||
231 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); | ||
232 | t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); | ||
233 | ip += ll; | ||
187 | op += *out_len; | 234 | op += *out_len; |
235 | l -= ll; | ||
188 | } | 236 | } |
237 | t += l; | ||
189 | 238 | ||
190 | if (t > 0) { | 239 | if (t > 0) { |
191 | ii = in + in_len - t; | 240 | const unsigned char *ii = in + in_len - t; |
192 | 241 | ||
193 | if (op == out && t <= 238) { | 242 | if (op == out && t <= 238) { |
194 | *op++ = (17 + t); | 243 | *op++ = (17 + t); |
@@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, | |||
198 | *op++ = (t - 3); | 247 | *op++ = (t - 3); |
199 | } else { | 248 | } else { |
200 | size_t tt = t - 18; | 249 | size_t tt = t - 18; |
201 | |||
202 | *op++ = 0; | 250 | *op++ = 0; |
203 | while (tt > 255) { | 251 | while (tt > 255) { |
204 | tt -= 255; | 252 | tt -= 255; |
205 | *op++ = 0; | 253 | *op++ = 0; |
206 | } | 254 | } |
207 | |||
208 | *op++ = tt; | 255 | *op++ = tt; |
209 | } | 256 | } |
210 | do { | 257 | if (t >= 16) do { |
258 | COPY8(op, ii); | ||
259 | COPY8(op + 8, ii + 8); | ||
260 | op += 16; | ||
261 | ii += 16; | ||
262 | t -= 16; | ||
263 | } while (t >= 16); | ||
264 | if (t > 0) do { | ||
211 | *op++ = *ii++; | 265 | *op++ = *ii++; |
212 | } while (--t > 0); | 266 | } while (--t > 0); |
213 | } | 267 | } |
@@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); | |||
223 | 277 | ||
224 | MODULE_LICENSE("GPL"); | 278 | MODULE_LICENSE("GPL"); |
225 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); | 279 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); |
226 | |||