aboutsummaryrefslogtreecommitdiffstats
path: root/lib/zlib_inflate/inffast.c
diff options
context:
space:
mode:
authorRichard Purdie <rpurdie@rpsys.net>2006-06-22 17:47:34 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-22 18:05:58 -0400
commit4f3865fb57a04db7cca068fed1c15badc064a302 (patch)
tree4c923c72b6ac9b633c87cc73b55a75c7cfd0f044 /lib/zlib_inflate/inffast.c
parent4f1bcaf094ccc512c23e10104c05a6f8e5b7a9e4 (diff)
[PATCH] zlib_inflate: Upgrade library code to a recent version
Upgrade the zlib_inflate implementation in the kernel from a patched version 1.1.3/4 to a patched 1.2.3. The code in the kernel is about seven years old and I noticed that the external zlib library's inflate performance was significantly faster (~50%) than the code in the kernel on ARM (and faster again on x86_32). For comparison the newer deflate code is 20% slower on ARM and 50% slower on x86_32 but gives an approx 1% compression ratio improvement. I don't consider this to be an improvement for kernel use so have no plans to change the zlib_deflate code. Various changes have been made to the zlib code in the kernel, the most significant being the extra functions/flush option used by ppp_deflate. This update reimplements the features PPP needs to ensure it continues to work. This code has been tested on ARM under both JFFS2 (with zlib compression enabled) and ppp_deflate and on x86_32. JFFS2 sees an approx. 10% real world file read speed improvement. This patch also removes ZLIB_VERSION as it no longer has a correct value. We don't need version checks anyway as the kernel's module handling will take care of that for us. This removal is also more in keeping with the zlib author's wishes (http://www.zlib.net/zlib_faq.html#faq24) and I've added something to the zlib.h header to note its a modified version. Signed-off-by: Richard Purdie <rpurdie@rpsys.net> Acked-by: Joern Engel <joern@wh.fh-wedel.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'lib/zlib_inflate/inffast.c')
-rw-r--r--lib/zlib_inflate/inffast.c462
1 files changed, 299 insertions, 163 deletions
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 0bd7623fc85..02a16eacb72 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -1,176 +1,312 @@
1/* inffast.c -- process literals and length/distance pairs fast 1/* inffast.c -- fast decoding
2 * Copyright (C) 1995-1998 Mark Adler 2 * Copyright (C) 1995-2004 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h 3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */ 4 */
5 5
6#include <linux/zutil.h> 6#include <linux/zutil.h>
7#include "inftrees.h" 7#include "inftrees.h"
8#include "infblock.h" 8#include "inflate.h"
9#include "infcodes.h"
10#include "infutil.h"
11#include "inffast.h" 9#include "inffast.h"
12 10
13struct inflate_codes_state; 11#ifndef ASMINF
14 12
15/* simplify the use of the inflate_huft type with some defines */ 13/* Allow machine dependent optimization for post-increment or pre-increment.
16#define exop word.what.Exop 14 Based on testing to date,
17#define bits word.what.Bits 15 Pre-increment preferred for:
18 16 - PowerPC G3 (Adler)
19/* macros for bit input with no checking and for returning unused bytes */ 17 - MIPS R5000 (Randers-Pehrson)
20#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}} 18 Post-increment preferred for:
21#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;} 19 - none
22 20 No measurable difference:
23/* Called with number of bytes left to write in window at least 258 21 - Pentium III (Anderson)
24 (the maximum string length) and number of input bytes available 22 - M68060 (Nikl)
25 at least ten. The ten bytes are six bytes for the longest length/ 23 */
26 distance pair plus four bytes for overloading the bit buffer. */ 24#ifdef POSTINC
27 25# define OFF 0
28int zlib_inflate_fast( 26# define PUP(a) *(a)++
29 uInt bl, 27#else
30 uInt bd, 28# define OFF 1
31 inflate_huft *tl, 29# define PUP(a) *++(a)
32 inflate_huft *td, /* need separate declaration for Borland C++ */ 30#endif
33 inflate_blocks_statef *s, 31
34 z_streamp z 32/*
35) 33 Decode literal, length, and distance codes and write out the resulting
34 literal and match bytes until either not enough input or output is
35 available, an end-of-block is encountered, or a data error is encountered.
36 When large enough input and output buffers are supplied to inflate(), for
37 example, a 16K input buffer and a 64K output buffer, more than 95% of the
38 inflate execution time is spent in this routine.
39
40 Entry assumptions:
41
42 state->mode == LEN
43 strm->avail_in >= 6
44 strm->avail_out >= 258
45 start >= strm->avail_out
46 state->bits < 8
47
48 On return, state->mode is one of:
49
50 LEN -- ran out of enough output space or enough available input
51 TYPE -- reached end of block code, inflate() to interpret next block
52 BAD -- error in block data
53
54 Notes:
55
56 - The maximum input bits used by a length/distance pair is 15 bits for the
57 length code, 5 bits for the length extra, 15 bits for the distance code,
58 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59 Therefore if strm->avail_in >= 6, then there is enough input to avoid
60 checking for available input while decoding.
61
62 - The maximum bytes that a single length/distance pair can output is 258
63 bytes, which is the maximum length that can be coded. inflate_fast()
64 requires strm->avail_out >= 258 for each loop to avoid checking for
65 output space.
66 */
67void inflate_fast(strm, start)
68z_streamp strm;
69unsigned start; /* inflate()'s starting value for strm->avail_out */
36{ 70{
37 inflate_huft *t; /* temporary pointer */ 71 struct inflate_state *state;
38 uInt e; /* extra bits or operation */ 72 unsigned char *in; /* local strm->next_in */
39 uLong b; /* bit buffer */ 73 unsigned char *last; /* while in < last, enough input available */
40 uInt k; /* bits in bit buffer */ 74 unsigned char *out; /* local strm->next_out */
41 Byte *p; /* input data pointer */ 75 unsigned char *beg; /* inflate()'s initial strm->next_out */
42 uInt n; /* bytes available there */ 76 unsigned char *end; /* while out < end, enough space available */
43 Byte *q; /* output window write pointer */ 77#ifdef INFLATE_STRICT
44 uInt m; /* bytes to end of window or read pointer */ 78 unsigned dmax; /* maximum distance from zlib header */
45 uInt ml; /* mask for literal/length tree */ 79#endif
46 uInt md; /* mask for distance tree */ 80 unsigned wsize; /* window size or zero if not using window */
47 uInt c; /* bytes to copy */ 81 unsigned whave; /* valid bytes in the window */
48 uInt d; /* distance back to copy from */ 82 unsigned write; /* window write index */
49 Byte *r; /* copy source pointer */ 83 unsigned char *window; /* allocated sliding window, if wsize != 0 */
50 84 unsigned long hold; /* local strm->hold */
51 /* load input, output, bit values */ 85 unsigned bits; /* local strm->bits */
52 LOAD 86 code const *lcode; /* local strm->lencode */
53 87 code const *dcode; /* local strm->distcode */
54 /* initialize masks */ 88 unsigned lmask; /* mask for first level of length codes */
55 ml = zlib_inflate_mask[bl]; 89 unsigned dmask; /* mask for first level of distance codes */
56 md = zlib_inflate_mask[bd]; 90 code this; /* retrieved table entry */
57 91 unsigned op; /* code bits, operation, extra bits, or */
58 /* do until not enough input or output space for fast loop */ 92 /* window position, window bytes to copy */
59 do { /* assume called with m >= 258 && n >= 10 */ 93 unsigned len; /* match length, unused bytes */
60 /* get literal/length code */ 94 unsigned dist; /* match distance */
61 GRABBITS(20) /* max bits for literal/length code */ 95 unsigned char *from; /* where to copy match from */
62 if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) 96
63 { 97 /* copy state to local variables */
64 DUMPBITS(t->bits) 98 state = (struct inflate_state *)strm->state;
65 *q++ = (Byte)t->base; 99 in = strm->next_in - OFF;
66 m--; 100 last = in + (strm->avail_in - 5);
67 continue; 101 out = strm->next_out - OFF;
68 } 102 beg = out - (start - strm->avail_out);
103 end = out + (strm->avail_out - 257);
104#ifdef INFLATE_STRICT
105 dmax = state->dmax;
106#endif
107 wsize = state->wsize;
108 whave = state->whave;
109 write = state->write;
110 window = state->window;
111 hold = state->hold;
112 bits = state->bits;
113 lcode = state->lencode;
114 dcode = state->distcode;
115 lmask = (1U << state->lenbits) - 1;
116 dmask = (1U << state->distbits) - 1;
117
118 /* decode literals and length/distances until end-of-block or not enough
119 input data or output space */
69 do { 120 do {
70 DUMPBITS(t->bits) 121 if (bits < 15) {
71 if (e & 16) 122 hold += (unsigned long)(PUP(in)) << bits;
72 { 123 bits += 8;
73 /* get extra bits for length */ 124 hold += (unsigned long)(PUP(in)) << bits;
74 e &= 15; 125 bits += 8;
75 c = t->base + ((uInt)b & zlib_inflate_mask[e]); 126 }
76 DUMPBITS(e) 127 this = lcode[hold & lmask];
77 128 dolen:
78 /* decode distance base of block to copy */ 129 op = (unsigned)(this.bits);
79 GRABBITS(15); /* max bits for distance code */ 130 hold >>= op;
80 e = (t = td + ((uInt)b & md))->exop; 131 bits -= op;
81 do { 132 op = (unsigned)(this.op);
82 DUMPBITS(t->bits) 133 if (op == 0) { /* literal */
83 if (e & 16) 134 PUP(out) = (unsigned char)(this.val);
84 { 135 }
85 /* get extra bits to add to distance base */ 136 else if (op & 16) { /* length base */
86 e &= 15; 137 len = (unsigned)(this.val);
87 GRABBITS(e) /* get extra bits (up to 13) */ 138 op &= 15; /* number of extra bits */
88 d = t->base + ((uInt)b & zlib_inflate_mask[e]); 139 if (op) {
89 DUMPBITS(e) 140 if (bits < op) {
90 141 hold += (unsigned long)(PUP(in)) << bits;
91 /* do the copy */ 142 bits += 8;
92 m -= c; 143 }
93 r = q - d; 144 len += (unsigned)hold & ((1U << op) - 1);
94 if (r < s->window) /* wrap if needed */ 145 hold >>= op;
95 { 146 bits -= op;
96 do { 147 }
97 r += s->end - s->window; /* force pointer in window */ 148 if (bits < 15) {
98 } while (r < s->window); /* covers invalid distances */ 149 hold += (unsigned long)(PUP(in)) << bits;
99 e = s->end - r; 150 bits += 8;
100 if (c > e) 151 hold += (unsigned long)(PUP(in)) << bits;
101 { 152 bits += 8;
102 c -= e; /* wrapped copy */ 153 }
103 do { 154 this = dcode[hold & dmask];
104 *q++ = *r++; 155 dodist:
105 } while (--e); 156 op = (unsigned)(this.bits);
106 r = s->window; 157 hold >>= op;
107 do { 158 bits -= op;
108 *q++ = *r++; 159 op = (unsigned)(this.op);
109 } while (--c); 160 if (op & 16) { /* distance base */
110 } 161 dist = (unsigned)(this.val);
111 else /* normal copy */ 162 op &= 15; /* number of extra bits */
112 { 163 if (bits < op) {
113 *q++ = *r++; c--; 164 hold += (unsigned long)(PUP(in)) << bits;
114 *q++ = *r++; c--; 165 bits += 8;
115 do { 166 if (bits < op) {
116 *q++ = *r++; 167 hold += (unsigned long)(PUP(in)) << bits;
117 } while (--c); 168 bits += 8;
118 } 169 }
170 }
171 dist += (unsigned)hold & ((1U << op) - 1);
172#ifdef INFLATE_STRICT
173 if (dist > dmax) {
174 strm->msg = (char *)"invalid distance too far back";
175 state->mode = BAD;
176 break;
177 }
178#endif
179 hold >>= op;
180 bits -= op;
181 op = (unsigned)(out - beg); /* max distance in output */
182 if (dist > op) { /* see if copy from window */
183 op = dist - op; /* distance back in window */
184 if (op > whave) {
185 strm->msg = (char *)"invalid distance too far back";
186 state->mode = BAD;
187 break;
188 }
189 from = window - OFF;
190 if (write == 0) { /* very common case */
191 from += wsize - op;
192 if (op < len) { /* some from window */
193 len -= op;
194 do {
195 PUP(out) = PUP(from);
196 } while (--op);
197 from = out - dist; /* rest from output */
198 }
199 }
200 else if (write < op) { /* wrap around window */
201 from += wsize + write - op;
202 op -= write;
203 if (op < len) { /* some from end of window */
204 len -= op;
205 do {
206 PUP(out) = PUP(from);
207 } while (--op);
208 from = window - OFF;
209 if (write < len) { /* some from start of window */
210 op = write;
211 len -= op;
212 do {
213 PUP(out) = PUP(from);
214 } while (--op);
215 from = out - dist; /* rest from output */
216 }
217 }
218 }
219 else { /* contiguous in window */
220 from += write - op;
221 if (op < len) { /* some from window */
222 len -= op;
223 do {
224 PUP(out) = PUP(from);
225 } while (--op);
226 from = out - dist; /* rest from output */
227 }
228 }
229 while (len > 2) {
230 PUP(out) = PUP(from);
231 PUP(out) = PUP(from);
232 PUP(out) = PUP(from);
233 len -= 3;
234 }
235 if (len) {
236 PUP(out) = PUP(from);
237 if (len > 1)
238 PUP(out) = PUP(from);
239 }
240 }
241 else {
242 from = out - dist; /* copy direct from output */
243 do { /* minimum length is three */
244 PUP(out) = PUP(from);
245 PUP(out) = PUP(from);
246 PUP(out) = PUP(from);
247 len -= 3;
248 } while (len > 2);
249 if (len) {
250 PUP(out) = PUP(from);
251 if (len > 1)
252 PUP(out) = PUP(from);
253 }
254 }
255 }
256 else if ((op & 64) == 0) { /* 2nd level distance code */
257 this = dcode[this.val + (hold & ((1U << op) - 1))];
258 goto dodist;
119 } 259 }
120 else /* normal copy */ 260 else {
121 { 261 strm->msg = (char *)"invalid distance code";
122 *q++ = *r++; c--; 262 state->mode = BAD;
123 *q++ = *r++; c--; 263 break;
124 do {
125 *q++ = *r++;
126 } while (--c);
127 } 264 }
265 }
266 else if ((op & 64) == 0) { /* 2nd level length code */
267 this = lcode[this.val + (hold & ((1U << op) - 1))];
268 goto dolen;
269 }
270 else if (op & 32) { /* end-of-block */
271 state->mode = TYPE;
128 break; 272 break;
129 }
130 else if ((e & 64) == 0)
131 {
132 t += t->base;
133 e = (t += ((uInt)b & zlib_inflate_mask[e]))->exop;
134 }
135 else
136 {
137 z->msg = (char*)"invalid distance code";
138 UNGRAB
139 UPDATE
140 return Z_DATA_ERROR;
141 }
142 } while (1);
143 break;
144 }
145 if ((e & 64) == 0)
146 {
147 t += t->base;
148 if ((e = (t += ((uInt)b & zlib_inflate_mask[e]))->exop) == 0)
149 {
150 DUMPBITS(t->bits)
151 *q++ = (Byte)t->base;
152 m--;
153 break;
154 } 273 }
155 } 274 else {
156 else if (e & 32) 275 strm->msg = (char *)"invalid literal/length code";
157 { 276 state->mode = BAD;
158 UNGRAB 277 break;
159 UPDATE 278 }
160 return Z_STREAM_END; 279 } while (in < last && out < end);
161 } 280
162 else 281 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
163 { 282 len = bits >> 3;
164 z->msg = (char*)"invalid literal/length code"; 283 in -= len;
165 UNGRAB 284 bits -= len << 3;
166 UPDATE 285 hold &= (1U << bits) - 1;
167 return Z_DATA_ERROR; 286
168 } 287 /* update state and return */
169 } while (1); 288 strm->next_in = in + OFF;
170 } while (m >= 258 && n >= 10); 289 strm->next_out = out + OFF;
171 290 strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
172 /* not enough input or output--restore pointers and return */ 291 strm->avail_out = (unsigned)(out < end ?
173 UNGRAB 292 257 + (end - out) : 257 - (out - end));
174 UPDATE 293 state->hold = hold;
175 return Z_OK; 294 state->bits = bits;
295 return;
176} 296}
297
298/*
299 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
300 - Using bit fields for code structure
301 - Different op definition to avoid & for extra bits (do & for table bits)
302 - Three separate decoding do-loops for direct, window, and write == 0
303 - Special case for distance > 1 copies to do overlapped load and store copy
304 - Explicit branch predictions (based on measured branch probabilities)
305 - Deferring match copy and interspersed it with decoding subsequent codes
306 - Swapping literal/length else
307 - Swapping window/direct else
308 - Larger unrolled copy loops (three is about right)
309 - Moving len -= 3 statement into middle of loop
310 */
311
312#endif /* !ASMINF */