diff options
Diffstat (limited to 'drivers/char/ftape/compressor/lzrw3.c')
-rw-r--r-- | drivers/char/ftape/compressor/lzrw3.c | 743 |
1 files changed, 0 insertions, 743 deletions
diff --git a/drivers/char/ftape/compressor/lzrw3.c b/drivers/char/ftape/compressor/lzrw3.c deleted file mode 100644 index a032a0ee2a9..00000000000 --- a/drivers/char/ftape/compressor/lzrw3.c +++ /dev/null | |||
@@ -1,743 +0,0 @@ | |||
1 | /* | ||
2 | * $Source: /homes/cvs/ftape-stacked/ftape/compressor/lzrw3.c,v $ | ||
3 | * $Revision: 1.1 $ | ||
4 | * $Date: 1997/10/05 19:12:29 $ | ||
5 | * | ||
6 | * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include "../compressor/lzrw3.h" /* Defines single exported function "compress". */ | ||
11 | |||
12 | /******************************************************************************/ | ||
13 | /* */ | ||
14 | /* LZRW3.C */ | ||
15 | /* */ | ||
16 | /******************************************************************************/ | ||
17 | /* */ | ||
18 | /* Author : Ross Williams. */ | ||
19 | /* Date : 30-Jun-1991. */ | ||
20 | /* Release : 1. */ | ||
21 | /* */ | ||
22 | /******************************************************************************/ | ||
23 | /* */ | ||
24 | /* This file contains an implementation of the LZRW3 data compression */ | ||
25 | /* algorithm in C. */ | ||
26 | /* */ | ||
27 | /* The algorithm is a general purpose compression algorithm that runs fast */ | ||
28 | /* and gives reasonable compression. The algorithm is a member of the Lempel */ | ||
29 | /* Ziv family of algorithms and bases its compression on the presence in the */ | ||
30 | /* data of repeated substrings. */ | ||
31 | /* */ | ||
32 | /* This algorithm is unpatented and the code is public domain. As the */ | ||
33 | /* algorithm is based on the LZ77 class of algorithms, it is unlikely to be */ | ||
34 | /* the subject of a patent challenge. */ | ||
35 | /* */ | ||
36 | /* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is */ | ||
37 | /* deterministic and is guaranteed to yield the same compressed */ | ||
38 | /* representation for a given file each time it is run. */ | ||
39 | /* */ | ||
40 | /* The LZRW3 algorithm was originally designed and implemented */ | ||
41 | /* by Ross Williams on 31-Dec-1990. */ | ||
42 | /* */ | ||
43 | /* Here are the results of applying this code, compiled under THINK C 4.0 */ | ||
44 | /* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */ | ||
45 | /* */ | ||
46 | /* +----------------------------------------------------------------+ */ | ||
47 | /* | DATA COMPRESSION TEST | */ | ||
48 | /* | ===================== | */ | ||
49 | /* | Time of run : Sun 30-Jun-1991 09:31PM | */ | ||
50 | /* | Timing accuracy : One part in 100 | */ | ||
51 | /* | Context length : 262144 bytes (= 256.0000K) | */ | ||
52 | /* | Test suite : Calgary Corpus Suite | */ | ||
53 | /* | Files in suite : 14 | */ | ||
54 | /* | Algorithm : LZRW3 | */ | ||
55 | /* | Note: All averages are calculated from the un-rounded values. | */ | ||
56 | /* +----------------------------------------------------------------+ */ | ||
57 | /* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */ | ||
58 | /* | ---------- ------ --- ------ ----- ---- ------- ------- | */ | ||
59 | /* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | */ | ||
60 | /* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | */ | ||
61 | /* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | */ | ||
62 | /* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | */ | ||
63 | /* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | */ | ||
64 | /* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | */ | ||
65 | /* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | */ | ||
66 | /* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | */ | ||
67 | /* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | */ | ||
68 | /* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | */ | ||
69 | /* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | */ | ||
70 | /* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | */ | ||
71 | /* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | */ | ||
72 | /* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | */ | ||
73 | /* +----------------------------------------------------------------+ */ | ||
74 | /* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | */ | ||
75 | /* +----------------------------------------------------------------+ */ | ||
76 | /* */ | ||
77 | /******************************************************************************/ | ||
78 | |||
79 | /******************************************************************************/ | ||
80 | |||
81 | /* The following structure is returned by the "compress" function below when */ | ||
82 | /* the user asks the function to return identifying information. */ | ||
83 | /* The most important field in the record is the working memory field which */ | ||
84 | /* tells the calling program how much working memory should be passed to */ | ||
85 | /* "compress" when it is called to perform a compression or decompression. */ | ||
86 | /* LZRW3 uses the same amount of memory during compression and decompression. */ | ||
87 | /* For more information on this structure see "compress.h". */ | ||
88 | |||
89 | #define U(X) ((ULONG) X) | ||
90 | #define SIZE_P_BYTE (U(sizeof(UBYTE *))) | ||
91 | #define SIZE_WORD (U(sizeof(UWORD ))) | ||
92 | #define ALIGNMENT_FUDGE (U(16)) | ||
93 | #define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE ) | ||
94 | |||
95 | static struct compress_identity identity = | ||
96 | { | ||
97 | U(0x032DDEA8), /* Algorithm identification number. */ | ||
98 | MEM_REQ, /* Working memory (bytes) required. */ | ||
99 | "LZRW3", /* Name of algorithm. */ | ||
100 | "1.0", /* Version number of algorithm. */ | ||
101 | "31-Dec-1990", /* Date of algorithm. */ | ||
102 | "Public Domain", /* Copyright notice. */ | ||
103 | "Ross N. Williams", /* Author of algorithm. */ | ||
104 | "Renaissance Software", /* Affiliation of author. */ | ||
105 | "Public Domain" /* Vendor of algorithm. */ | ||
106 | }; | ||
107 | |||
108 | LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *); | ||
109 | LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *); | ||
110 | |||
111 | /******************************************************************************/ | ||
112 | |||
113 | /* This function is the only function exported by this module. */ | ||
114 | /* Depending on its first parameter, the function can be requested to */ | ||
115 | /* compress a block of memory, decompress a block of memory, or to identify */ | ||
116 | /* itself. For more information, see the specification file "compress.h". */ | ||
117 | |||
118 | EXPORT void lzrw3_compress( | ||
119 | UWORD action, /* Action to be performed. */ | ||
120 | UBYTE *wrk_mem, /* Address of working memory we can use.*/ | ||
121 | UBYTE *src_adr, /* Address of input data. */ | ||
122 | LONG src_len, /* Length of input data. */ | ||
123 | UBYTE *dst_adr, /* Address to put output data. */ | ||
124 | void *p_dst_len /* Address of longword for length of output data.*/ | ||
125 | ) | ||
126 | { | ||
127 | switch (action) | ||
128 | { | ||
129 | case COMPRESS_ACTION_IDENTITY: | ||
130 | *((struct compress_identity **)p_dst_len)= &identity; | ||
131 | break; | ||
132 | case COMPRESS_ACTION_COMPRESS: | ||
133 | compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); | ||
134 | break; | ||
135 | case COMPRESS_ACTION_DECOMPRESS: | ||
136 | compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); | ||
137 | break; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /******************************************************************************/ | ||
142 | /* */ | ||
143 | /* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM */ | ||
144 | /* ======================================== */ | ||
145 | /* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that */ | ||
146 | /* instead of transmitting history offsets, it transmits hash table indexes. */ | ||
147 | /* In order to decode the indexes, the decompressor must maintain an */ | ||
148 | /* identical hash table. Copy items are straightforward:when the decompressor */ | ||
149 | /* receives a copy item, it simply looks up the hash table to translate the */ | ||
150 | /* index into a pointer into the data already decompressed. To update the */ | ||
151 | /* hash table, it replaces the same table entry with a pointer to the start */ | ||
152 | /* of the newly decoded phrase. The tricky part is with literal items, for at */ | ||
153 | /* the time that the decompressor receives a literal item the decompressor */ | ||
154 | /* does not have the three bytes in the Ziv (that the compressor has) to */ | ||
155 | /* perform the three-byte hash. To solve this problem, in LZRW3, both the */ | ||
156 | /* compressor and decompressor are wired up so that they "buffer" these */ | ||
157 | /* literals and update their hash tables only when three bytes are available. */ | ||
158 | /* This makes the maximum buffering 2 bytes. */ | ||
159 | /* */ | ||
160 | /* Replacement of offsets by hash table indexes yields a few percent extra */ | ||
161 | /* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */ | ||
162 | /* and LZRW2, but yields better compression. */ | ||
163 | /* */ | ||
164 | /* Extra compression could be obtained by using a hash table of depth two. */ | ||
165 | /* However, increasing the depth above one incurs a significant decrease in */ | ||
166 | /* compression speed which was not considered worthwhile. Another reason for */ | ||
167 | /* keeping the depth down to one was to allow easy comparison with the */ | ||
168 | /* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the */ | ||
169 | /* use of direct hash indexes. */ | ||
170 | /* */ | ||
171 | /* +---+ */ | ||
172 | /* |___|4095 */ | ||
173 | /* |___| */ | ||
174 | /* +---------------------*_|<---+ /----+---\ */ | ||
175 | /* | |___| +---|Hash | */ | ||
176 | /* | |___| |Function| */ | ||
177 | /* | |___| \--------/ */ | ||
178 | /* | |___|0 ^ */ | ||
179 | /* | +---+ | */ | ||
180 | /* | Hash +-----+ */ | ||
181 | /* | Table | */ | ||
182 | /* | --- */ | ||
183 | /* v ^^^ */ | ||
184 | /* +-------------------------------------|----------------+ */ | ||
185 | /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ | ||
186 | /* +-------------------------------------|----------------+ */ | ||
187 | /* | |1......18| | */ | ||
188 | /* |<------- Lempel=History ------------>|<--Ziv-->| | */ | ||
189 | /* | (=bytes already processed) |<-Still to go-->| */ | ||
190 | /* |<-------------------- INPUT BLOCK ------------------->| */ | ||
191 | /* */ | ||
192 | /* The diagram above for LZRW3 looks almost identical to the diagram for */ | ||
193 | /* LZRW1. The difference is that in LZRW3, the compressor transmits hash */ | ||
194 | /* table indices instead of Lempel offsets. For this to work, the */ | ||
195 | /* decompressor must maintain a hash table as well as the compressor and both */ | ||
196 | /* compressor and decompressor must "buffer" literals, as the decompressor */ | ||
197 | /* cannot hash phrases commencing with a literal until another two bytes have */ | ||
198 | /* arrived. */ | ||
199 | /* */ | ||
200 | /* LZRW3 Algorithm Execution Summary */ | ||
201 | /* --------------------------------- */ | ||
202 | /* 1. Hash the first three bytes of the Ziv to yield a hash table index h. */ | ||
203 | /* 2. Look up the hash table yielding history pointer p. */ | ||
204 | /* 3. Match where p points with the Ziv. If there is a match of three or */ | ||
205 | /* more bytes, code those bytes (in the Ziv) as a copy item, otherwise */ | ||
206 | /* code the next byte in the Ziv as a literal item. */ | ||
207 | /* 4. Update the hash table as possible subject to the constraint that only */ | ||
208 | /* phrases commencing three bytes back from the Ziv can be hashed and */ | ||
209 | /* entered into the hash table. (This enables the decompressor to keep */ | ||
210 | /* pace). See the description and code for more details. */ | ||
211 | /* */ | ||
212 | /******************************************************************************/ | ||
213 | /* */ | ||
214 | /* DEFINITION OF COMPRESSED FILE FORMAT */ | ||
215 | /* ==================================== */ | ||
216 | /* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */ | ||
217 | /* * The copy flag CF uses up four bytes with the first byte being the */ | ||
218 | /* least significant. */ | ||
219 | /* * If CF=1, then the compressed file represents the remainder of the file */ | ||
220 | /* exactly. Otherwise CF=0 and the remainder of the file consists of zero */ | ||
221 | /* or more GROUPS, each of which represents one or more bytes. */ | ||
222 | /* * Each group consists of two bytes of CONTROL information followed by */ | ||
223 | /* sixteen ITEMs except for the last group which can contain from one */ | ||
224 | /* to sixteen items. */ | ||
225 | /* * An item can be either a LITERAL item or a COPY item. */ | ||
226 | /* * Each item corresponds to a bit in the control bytes. */ | ||
227 | /* * The first control byte corresponds to the first 8 items in the group */ | ||
228 | /* with bit 0 corresponding to the first item in the group and bit 7 to */ | ||
229 | /* the eighth item in the group. */ | ||
230 | /* * The second control byte corresponds to the second 8 items in the group */ | ||
231 | /* with bit 0 corresponding to the ninth item in the group and bit 7 to */ | ||
232 | /* the sixteenth item in the group. */ | ||
233 | /* * A zero bit in a control word means that the corresponding item is a */ | ||
234 | /* literal item. A one bit corresponds to a copy item. */ | ||
235 | /* * A literal item consists of a single byte which represents itself. */ | ||
236 | /* * A copy item consists of two bytes that represent from 3 to 18 bytes. */ | ||
237 | /* * The first byte in a copy item will be denoted C1. */ | ||
238 | /* * The second byte in a copy item will be denoted C2. */ | ||
239 | /* * Bits will be selected using square brackets. */ | ||
240 | /* For example: C1[0..3] is the low nibble of the first control byte. */ | ||
241 | /* of copy item C1. */ | ||
242 | /* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */ | ||
243 | /* in the range [3,18]. */ | ||
244 | /* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */ | ||
245 | /* is a number in the range [0,4095]. */ | ||
246 | /* * A copy item represents the sequence of bytes */ | ||
247 | /* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */ | ||
248 | /* text is the entire text of the uncompressed string. */ | ||
249 | /* POS is the index in the text of the character following the */ | ||
250 | /* string represented by all the items preceeding the item */ | ||
251 | /* being defined. */ | ||
252 | /* OFFSET is obtained from INDEX by looking up the hash table. */ | ||
253 | /* */ | ||
254 | /******************************************************************************/ | ||
255 | |||
256 | /* The following #define defines the length of the copy flag that appears at */ | ||
257 | /* the start of the compressed file. The value of four bytes was chosen */ | ||
258 | /* because the fast_copy routine on my Macintosh runs faster if the source */ | ||
259 | /* and destination blocks are relatively longword aligned. */ | ||
260 | /* The actual flag data appears in the first byte. The rest are zeroed so as */ | ||
261 | /* to normalize the compressed representation (i.e. not non-deterministic). */ | ||
262 | #define FLAG_BYTES 4 | ||
263 | |||
264 | /* The following #defines define the meaning of the values of the copy */ | ||
265 | /* flag at the start of the compressed file. */ | ||
266 | #define FLAG_COMPRESS 0 /* Signals that output was result of compression. */ | ||
267 | #define FLAG_COPY 1 /* Signals that output was simply copied over. */ | ||
268 | |||
269 | /* The 68000 microprocessor (on which this algorithm was originally developed */ | ||
270 | /* is fussy about non-aligned arrays of words. To avoid these problems the */ | ||
271 | /* following macro can be used to "waste" from 0 to 3 bytes so as to align */ | ||
272 | /* the argument pointer. */ | ||
273 | #define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1)) | ||
274 | |||
275 | |||
276 | /* The following constant defines the maximum length of an uncompressed item. */ | ||
277 | /* This definition must not be changed; its value is hardwired into the code. */ | ||
278 | /* The longest number of bytes that can be spanned by a single item is 18 */ | ||
279 | /* for the longest copy item. */ | ||
280 | #define MAX_RAW_ITEM (18) | ||
281 | |||
282 | /* The following constant defines the maximum length of an uncompressed group.*/ | ||
283 | /* This definition must not be changed; its value is hardwired into the code. */ | ||
284 | /* A group contains at most 16 items which explains this definition. */ | ||
285 | #define MAX_RAW_GROUP (16*MAX_RAW_ITEM) | ||
286 | |||
287 | /* The following constant defines the maximum length of a compressed group. */ | ||
288 | /* This definition must not be changed; its value is hardwired into the code. */ | ||
289 | /* A compressed group consists of two control bytes followed by up to 16 */ | ||
290 | /* compressed items each of which can have a maximum length of two bytes. */ | ||
291 | #define MAX_CMP_GROUP (2+16*2) | ||
292 | |||
293 | /* The following constant defines the number of entries in the hash table. */ | ||
294 | /* This definition must not be changed; its value is hardwired into the code. */ | ||
295 | #define HASH_TABLE_LENGTH (4096) | ||
296 | |||
297 | /* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable */ | ||
298 | /* the compressor and decompressor to stay in step maintaining identical hash */ | ||
299 | /* tables. In an early version of the algorithm, the tables were simply */ | ||
300 | /* initialized to zero and a check for zero was included just before the */ | ||
301 | /* matching code. However, this test costs time. A better solution is to */ | ||
302 | /* initialize all the entries in the hash table to point to a constant */ | ||
303 | /* string. The decompressor does the same. This solution requires no extra */ | ||
304 | /* test. The contents of the string do not matter so long as the string is */ | ||
305 | /* the same for the compressor and decompressor and contains at least */ | ||
306 | /* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */ | ||
307 | /* have white space problems (e.g. there is no chance that the compiler will */ | ||
308 | /* replace more than one space by a TAB) and because they make the length of */ | ||
309 | /* the string obvious by inspection. */ | ||
310 | #define START_STRING_18 ((UBYTE *) "123456789012345678") | ||
311 | |||
312 | /* In this algorithm, hash values have to be calculated at more than one */ | ||
313 | /* point. The following macro neatens the code up for this. */ | ||
314 | #define HASH(PTR) \ | ||
315 | (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF) | ||
316 | |||
317 | /******************************************************************************/ | ||
318 | |||
319 | /* Input : Hand over the required amount of working memory in p_wrk_mem. */ | ||
320 | /* Input : Specify input block using p_src_first and src_len. */ | ||
321 | /* Input : Point p_dst_first to the start of the output zone (OZ). */ | ||
322 | /* Input : Point p_dst_len to a ULONG to receive the output length. */ | ||
323 | /* Input : Input block and output zone must not overlap. */ | ||
324 | /* Output : Length of output block written to *p_dst_len. */ | ||
325 | /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */ | ||
326 | /* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/ | ||
327 | /* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */ | ||
328 | LOCAL void compress_compress(UBYTE *p_wrk_mem, | ||
329 | UBYTE *p_src_first, ULONG src_len, | ||
330 | UBYTE *p_dst_first, LONG *p_dst_len) | ||
331 | { | ||
332 | /* p_src and p_dst step through the source and destination blocks. */ | ||
333 | register UBYTE *p_src = p_src_first; | ||
334 | register UBYTE *p_dst = p_dst_first; | ||
335 | |||
336 | /* The following variables are never modified and are used in the */ | ||
337 | /* calculations that determine when the main loop terminates. */ | ||
338 | UBYTE *p_src_post = p_src_first+src_len; | ||
339 | UBYTE *p_dst_post = p_dst_first+src_len; | ||
340 | UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; | ||
341 | UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; | ||
342 | |||
343 | /* The variables 'p_control' and 'control' are used to buffer control bits. */ | ||
344 | /* Before each group is processed, the next two bytes of the output block */ | ||
345 | /* are set aside for the control word for the group about to be processed. */ | ||
346 | /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ | ||
347 | /* 'control' buffers the control bits being generated during the processing */ | ||
348 | /* of the group. Instead of having a counter to keep track of how many items */ | ||
349 | /* have been processed (=the number of bits in the control word), at the */ | ||
350 | /* start of each group, the top word of 'control' is filled with 1 bits. */ | ||
351 | /* As 'control' is shifted for each item, the 1 bits in the top word are */ | ||
352 | /* absorbed or destroyed. When they all run out (i.e. when the top word is */ | ||
353 | /* all zero bits, we know that we are at the end of a group. */ | ||
354 | # define TOPWORD 0xFFFF0000 | ||
355 | UBYTE *p_control; | ||
356 | register ULONG control=TOPWORD; | ||
357 | |||
358 | /* THe variable 'hash' always points to the first element of the hash table. */ | ||
359 | UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); | ||
360 | |||
361 | /* The following two variables represent the literal buffer. p_h1 points to */ | ||
362 | /* the hash table entry corresponding to the youngest literal. p_h2 points */ | ||
363 | /* to the hash table entry corresponding to the second youngest literal. */ | ||
364 | /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */ | ||
365 | /* literal. The variables are initialized to zero meaning an empty "buffer". */ | ||
366 | UBYTE **p_h1=NULL; | ||
367 | UBYTE **p_h2=NULL; | ||
368 | |||
369 | /* To start, we write the flag bytes. Being optimistic, we set the flag to */ | ||
370 | /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ | ||
371 | /* algorithm deterministic. */ | ||
372 | *p_dst++=FLAG_COMPRESS; | ||
373 | {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} | ||
374 | |||
375 | /* Reserve the first word of output as the control word for the first group. */ | ||
376 | /* Note: This is undone at the end if the input block is empty. */ | ||
377 | p_control=p_dst; p_dst+=2; | ||
378 | |||
379 | /* Initialize all elements of the hash table to point to a constant string. */ | ||
380 | /* Use of an unrolled loop speeds this up considerably. */ | ||
381 | {UWORD i; UBYTE **p_h=hash; | ||
382 | # define ZH *p_h++=START_STRING_18 | ||
383 | for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ | ||
384 | {ZH;ZH;ZH;ZH; | ||
385 | ZH;ZH;ZH;ZH; | ||
386 | ZH;ZH;ZH;ZH; | ||
387 | ZH;ZH;ZH;ZH;} | ||
388 | } | ||
389 | |||
390 | /* The main loop processes either 1 or 16 items per iteration. As its */ | ||
391 | /* termination logic is complicated, I have opted for an infinite loop */ | ||
392 | /* structure containing 'break' and 'goto' statements. */ | ||
393 | while (TRUE) | ||
394 | {/* Begin main processing loop. */ | ||
395 | |||
396 | /* Note: All the variables here except unroll should be defined within */ | ||
397 | /* the inner loop. Unfortunately the loop hasn't got a block. */ | ||
398 | register UBYTE *p; /* Scans through targ phrase during matching. */ | ||
399 | register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */ | ||
400 | register UWORD unroll; /* Loop counter for unrolled inner loop. */ | ||
401 | register UWORD index; /* Index of current hash table entry. */ | ||
402 | register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */ | ||
403 | |||
404 | /* Test for overrun and jump to overrun code if necessary. */ | ||
405 | if (p_dst>p_dst_post) | ||
406 | goto overrun; | ||
407 | |||
408 | /* The following cascade of if statements efficiently catches and deals */ | ||
409 | /* with varying degrees of closeness to the end of the input block. */ | ||
410 | /* When we get very close to the end, we stop updating the table and */ | ||
411 | /* code the remaining bytes as literals. This makes the code simpler. */ | ||
412 | unroll=16; | ||
413 | if (p_src>p_src_max16) | ||
414 | { | ||
415 | unroll=1; | ||
416 | if (p_src>p_src_max1) | ||
417 | { | ||
418 | if (p_src==p_src_post) | ||
419 | break; | ||
420 | else | ||
421 | goto literal; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ | ||
426 | /* or 16) items. I have chosen to implement this loop with labels and */ | ||
427 | /* gotos to heighten the ease with which the loop may be implemented with */ | ||
428 | /* a single decrement and branch instruction in assembly language and */ | ||
429 | /* also because the labels act as highly readable place markers. */ | ||
430 | /* (Also because we jump into the loop for endgame literals (see above)). */ | ||
431 | |||
432 | begin_unrolled_loop: | ||
433 | |||
434 | /* To process the next phrase, we hash the next three bytes and use */ | ||
435 | /* the resultant hash table index to look up the hash table. A pointer */ | ||
436 | /* to the entry is stored in p_h0 so as to avoid an array lookup. The */ | ||
437 | /* hash table entry *p_h0 is looked up yielding a pointer p to a */ | ||
438 | /* potential match of the Ziv in the history. */ | ||
439 | index=HASH(p_src); | ||
440 | p_h0=&hash[index]; | ||
441 | p=*p_h0; | ||
442 | |||
443 | /* Having looked up the candidate position, we are in a position to */ | ||
444 | /* attempt a match. The match loop has been unrolled using the PS */ | ||
445 | /* macro so that failure within the first three bytes automatically */ | ||
446 | /* results in the literal branch being taken. The coding is simple. */ | ||
447 | /* p_ziv saves p_src so we can let p_src wander. */ | ||
448 | # define PS *p++!=*p_src++ | ||
449 | p_ziv=p_src; | ||
450 | if (PS || PS || PS) | ||
451 | { | ||
452 | /* Literal. */ | ||
453 | |||
454 | /* Code the literal byte as itself and a zero control bit. */ | ||
455 | p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; | ||
456 | |||
457 | /* We have just coded a literal. If we had two pending ones, that */ | ||
458 | /* makes three and we can update the hash table. */ | ||
459 | if (p_h2!=0) | ||
460 | {*p_h2=p_ziv-2;} | ||
461 | |||
462 | /* In any case, rotate the hash table pointers for next time. */ | ||
463 | p_h2=p_h1; p_h1=p_h0; | ||
464 | |||
465 | } | ||
466 | else | ||
467 | { | ||
468 | /* Copy */ | ||
469 | |||
470 | /* Match up to 15 remaining bytes using an unrolled loop and code. */ | ||
471 | #if 0 | ||
472 | PS || PS || PS || PS || PS || PS || PS || PS || | ||
473 | PS || PS || PS || PS || PS || PS || PS || p_src++; | ||
474 | #else | ||
475 | if ( | ||
476 | !( PS || PS || PS || PS || PS || PS || PS || PS || | ||
477 | PS || PS || PS || PS || PS || PS || PS ) | ||
478 | ) p_src++; | ||
479 | #endif | ||
480 | *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3); | ||
481 | *p_dst++=index&0xFF; | ||
482 | |||
483 | /* As we have just coded three bytes, we are now in a position to */ | ||
484 | /* update the hash table with the literal bytes that were pending */ | ||
485 | /* upon the arrival of extra context bytes. */ | ||
486 | if (p_h1!=0) | ||
487 | { | ||
488 | if (p_h2) | ||
489 | {*p_h2=p_ziv-2; p_h2=NULL;} | ||
490 | *p_h1=p_ziv-1; p_h1=NULL; | ||
491 | } | ||
492 | |||
493 | /* In any case, we can update the hash table based on the current */ | ||
494 | /* position as we just coded at least three bytes in a copy items. */ | ||
495 | *p_h0=p_ziv; | ||
496 | |||
497 | } | ||
498 | control>>=1; | ||
499 | |||
500 | /* This loop is all set up for a decrement and jump instruction! */ | ||
501 | #ifndef linux | ||
502 | ` end_unrolled_loop: if (--unroll) goto begin_unrolled_loop; | ||
503 | #else | ||
504 | /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop; | ||
505 | #endif | ||
506 | |||
507 | /* At this point it will nearly always be the end of a group in which */ | ||
508 | /* case, we have to do some control-word processing. However, near the */ | ||
509 | /* end of the input block, the inner unrolled loop is only executed once. */ | ||
510 | /* This necessitates the 'if' test. */ | ||
511 | if ((control&TOPWORD)==0) | ||
512 | { | ||
513 | /* Write the control word to the place we saved for it in the output. */ | ||
514 | *p_control++= control &0xFF; | ||
515 | *p_control = (control>>8) &0xFF; | ||
516 | |||
517 | /* Reserve the next word in the output block for the control word */ | ||
518 | /* for the group about to be processed. */ | ||
519 | p_control=p_dst; p_dst+=2; | ||
520 | |||
521 | /* Reset the control bits buffer. */ | ||
522 | control=TOPWORD; | ||
523 | } | ||
524 | |||
525 | } /* End main processing loop. */ | ||
526 | |||
527 | /* After the main processing loop has executed, all the input bytes have */ | ||
528 | /* been processed. However, the control word has still to be written to the */ | ||
529 | /* word reserved for it in the output at the start of the most recent group. */ | ||
530 | /* Before writing, the control word has to be shifted so that all the bits */ | ||
531 | /* are in the right place. The "empty" bit positions are filled with 1s */ | ||
532 | /* which partially fill the top word. */ | ||
533 | while(control&TOPWORD) control>>=1; | ||
534 | *p_control++= control &0xFF; | ||
535 | *p_control++=(control>>8) &0xFF; | ||
536 | |||
537 | /* If the last group contained no items, delete the control word too. */ | ||
538 | if (p_control==p_dst) p_dst-=2; | ||
539 | |||
540 | /* Write the length of the output block to the dst_len parameter and return. */ | ||
541 | *p_dst_len=p_dst-p_dst_first; | ||
542 | return; | ||
543 | |||
544 | /* Jump here as soon as an overrun is detected. An overrun is defined to */ | ||
545 | /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */ | ||
546 | /* length of the output written so far exceeds the length of the input block.*/ | ||
547 | /* The algorithm checks for overruns at least at the end of each group */ | ||
548 | /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */ | ||
549 | /* Once an overrun occurs, the only thing to do is to set the copy flag and */ | ||
550 | /* copy the input over. */ | ||
551 | overrun: | ||
552 | #if 0 | ||
553 | *p_dst_first=FLAG_COPY; | ||
554 | fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len); | ||
555 | *p_dst_len=src_len+FLAG_BYTES; | ||
556 | #else | ||
557 | fast_copy(p_src_first,p_dst_first,src_len); | ||
558 | *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */ | ||
559 | #endif | ||
560 | } | ||
561 | |||
562 | /******************************************************************************/ | ||
563 | |||
564 | /* Input : Hand over the required amount of working memory in p_wrk_mem. */ | ||
565 | /* Input : Specify input block using p_src_first and src_len. */ | ||
566 | /* Input : Point p_dst_first to the start of the output zone. */ | ||
567 | /* Input : Point p_dst_len to a ULONG to receive the output length. */ | ||
568 | /* Input : Input block and output zone must not overlap. User knows */ | ||
569 | /* Input : upperbound on output block length from earlier compression. */ | ||
570 | /* Input : In any case, maximum expansion possible is nine times. */ | ||
571 | /* Output : Length of output block written to *p_dst_len. */ | ||
572 | /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ | ||
573 | /* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ | ||
574 | LOCAL void compress_decompress( UBYTE *p_wrk_mem, | ||
575 | UBYTE *p_src_first, LONG src_len, | ||
576 | UBYTE *p_dst_first, ULONG *p_dst_len) | ||
577 | { | ||
578 | /* Byte pointers p_src and p_dst scan through the input and output blocks. */ | ||
579 | register UBYTE *p_src = p_src_first+FLAG_BYTES; | ||
580 | register UBYTE *p_dst = p_dst_first; | ||
581 | /* we need to avoid a SEGV when trying to uncompress corrupt data */ | ||
582 | register UBYTE *p_dst_post = p_dst_first + *p_dst_len; | ||
583 | |||
584 | /* The following two variables are never modified and are used to control */ | ||
585 | /* the main loop. */ | ||
586 | UBYTE *p_src_post = p_src_first+src_len; | ||
587 | UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2); | ||
588 | |||
589 | /* The hash table is the only resident of the working memory. The hash table */ | ||
590 | /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */ | ||
591 | /* keep Macintoshes happy, it is longword aligned. */ | ||
592 | UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); | ||
593 | |||
594 | /* The variable 'control' is used to buffer the control bits which appear in */ | ||
595 | /* groups of 16 bits (control words) at the start of each compressed group. */ | ||
596 | /* When each group is read, bit 16 of the register is set to one. Whenever */ | ||
597 | /* a new bit is needed, the register is shifted right. When the value of the */ | ||
598 | /* register becomes 1, we know that we have reached the end of a group. */ | ||
599 | /* Initializing the register to 1 thus instructs the code to follow that it */ | ||
600 | /* should read a new control word immediately. */ | ||
601 | register ULONG control=1; | ||
602 | |||
603 | /* The value of 'literals' is always in the range 0..3. It is the number of */ | ||
604 | /* consecutive literal items just seen. We have to record this number so as */ | ||
605 | /* to know when to update the hash table. When literals gets to 3, there */ | ||
606 | /* have been three consecutive literals and we can update at the position of */ | ||
607 | /* the oldest of the three. */ | ||
608 | register UWORD literals=0; | ||
609 | |||
610 | /* Check the leading copy flag to see if the compressor chose to use a copy */ | ||
611 | /* operation instead of a compression operation. If a copy operation was */ | ||
612 | /* used, then all we need to do is copy the data over, set the output length */ | ||
613 | /* and return. */ | ||
614 | #if 0 | ||
615 | if (*p_src_first==FLAG_COPY) | ||
616 | { | ||
617 | fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES); | ||
618 | *p_dst_len=src_len-FLAG_BYTES; | ||
619 | return; | ||
620 | } | ||
621 | #else | ||
622 | if ( src_len < 0 ) | ||
623 | { | ||
624 | fast_copy(p_src_first,p_dst_first,-src_len ); | ||
625 | *p_dst_len = (ULONG)-src_len; | ||
626 | return; | ||
627 | } | ||
628 | #endif | ||
629 | |||
630 | /* Initialize all elements of the hash table to point to a constant string. */ | ||
631 | /* Use of an unrolled loop speeds this up considerably. */ | ||
632 | {UWORD i; UBYTE **p_h=hash; | ||
633 | # define ZJ *p_h++=START_STRING_18 | ||
634 | for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ | ||
635 | {ZJ;ZJ;ZJ;ZJ; | ||
636 | ZJ;ZJ;ZJ;ZJ; | ||
637 | ZJ;ZJ;ZJ;ZJ; | ||
638 | ZJ;ZJ;ZJ;ZJ;} | ||
639 | } | ||
640 | |||
641 | /* The outer loop processes either 1 or 16 items per iteration depending on */ | ||
642 | /* how close p_src is to the end of the input block. */ | ||
643 | while (p_src!=p_src_post) | ||
644 | {/* Start of outer loop */ | ||
645 | |||
646 | register UWORD unroll; /* Counts unrolled loop executions. */ | ||
647 | |||
648 | /* When 'control' has the value 1, it means that the 16 buffered control */ | ||
649 | /* bits that were read in at the start of the current group have all been */ | ||
650 | /* shifted out and that all that is left is the 1 bit that was injected */ | ||
651 | /* into bit 16 at the start of the current group. When we reach the end */ | ||
652 | /* of a group, we have to load a new control word and inject a new 1 bit. */ | ||
653 | if (control==1) | ||
654 | { | ||
655 | control=0x10000|*p_src++; | ||
656 | control|=(*p_src++)<<8; | ||
657 | } | ||
658 | |||
659 | /* If it is possible that we are within 16 groups from the end of the */ | ||
660 | /* input, execute the unrolled loop only once, else process a whole group */ | ||
661 | /* of 16 items by looping 16 times. */ | ||
662 | unroll= p_src<=p_src_max16 ? 16 : 1; | ||
663 | |||
664 | /* This inner loop processes one phrase (item) per iteration. */ | ||
665 | while (unroll--) | ||
666 | { /* Begin unrolled inner loop. */ | ||
667 | |||
668 | /* Process a literal or copy item depending on the next control bit. */ | ||
669 | if (control&1) | ||
670 | { | ||
671 | /* Copy item. */ | ||
672 | |||
673 | register UBYTE *p; /* Points to place from which to copy. */ | ||
674 | register UWORD lenmt; /* Length of copy item minus three. */ | ||
675 | register UBYTE **p_hte; /* Pointer to current hash table entry.*/ | ||
676 | register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */ | ||
677 | |||
678 | /* Read and dismantle the copy word. Work out from where to copy. */ | ||
679 | lenmt=*p_src++; | ||
680 | p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++]; | ||
681 | p=*p_hte; | ||
682 | lenmt&=0xF; | ||
683 | |||
684 | /* Now perform the copy using a half unrolled loop. */ | ||
685 | *p_dst++=*p++; | ||
686 | *p_dst++=*p++; | ||
687 | *p_dst++=*p++; | ||
688 | while (lenmt--) | ||
689 | *p_dst++=*p++; | ||
690 | |||
691 | /* Because we have just received 3 or more bytes in a copy item */ | ||
692 | /* (whose bytes we have just installed in the output), we are now */ | ||
693 | /* in a position to flush all the pending literal hashings that had */ | ||
694 | /* been postponed for lack of bytes. */ | ||
695 | if (literals>0) | ||
696 | { | ||
697 | register UBYTE *r=p_ziv-literals; | ||
698 | hash[HASH(r)]=r; | ||
699 | if (literals==2) | ||
700 | {r++; hash[HASH(r)]=r;} | ||
701 | literals=0; | ||
702 | } | ||
703 | |||
704 | /* In any case, we can immediately update the hash table with the */ | ||
705 | /* current position. We don't need to do a HASH(...) to work out */ | ||
706 | /* where to put the pointer, as the compressor just told us!!! */ | ||
707 | *p_hte=p_ziv; | ||
708 | |||
709 | } | ||
710 | else | ||
711 | { | ||
712 | /* Literal item. */ | ||
713 | |||
714 | /* Copy over the literal byte. */ | ||
715 | *p_dst++=*p_src++; | ||
716 | |||
717 | /* If we now have three literals waiting to be hashed into the hash */ | ||
718 | /* table, we can do one of them now (because there are three). */ | ||
719 | if (++literals == 3) | ||
720 | {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;} | ||
721 | } | ||
722 | |||
723 | /* Shift the control buffer so the next control bit is in bit 0. */ | ||
724 | control>>=1; | ||
725 | #if 1 | ||
726 | if (p_dst > p_dst_post) | ||
727 | { | ||
728 | /* Shit: we tried to decompress corrupt data */ | ||
729 | *p_dst_len = 0; | ||
730 | return; | ||
731 | } | ||
732 | #endif | ||
733 | } /* End unrolled inner loop. */ | ||
734 | |||
735 | } /* End of outer loop */ | ||
736 | |||
737 | /* Write the length of the decompressed data before returning. */ | ||
738 | *p_dst_len=p_dst-p_dst_first; | ||
739 | } | ||
740 | |||
741 | /******************************************************************************/ | ||
742 | /* End of LZRW3.C */ | ||
743 | /******************************************************************************/ | ||