diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2011-01-12 20:01:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 11:03:25 -0500 |
commit | 3ebe12439ba7fc62e1d6ecb569b7287771716ca1 (patch) | |
tree | 65945a63ad1474489d80c8ca1fb1c1c8091fb7a2 /lib/decompress_unxz.c | |
parent | 24fa0402a9b6a537e87e38341e78b7da86486846 (diff) |
decompressors: add boot-time XZ support
This implements the API defined in <linux/decompress/generic.h> which is
used for kernel, initramfs, and initrd decompression. This patch together
with the first patch is enough for XZ-compressed initramfs and initrd;
XZ-compressed kernel will need arch-specific changes.
The buffering requirements described in decompress_unxz.c are stricter
than with gzip, so the relevant changes should be done to the
arch-specific code when adding support for XZ-compressed kernel.
Similarly, the heap size in arch-specific pre-boot code may need to be
increased (30 KiB is enough).
The XZ decompressor needs memmove(), memeq() (memcmp() == 0), and
memzero() (memset(ptr, 0, size)), which aren't available in all
arch-specific pre-boot environments. I'm including simple versions in
decompress_unxz.c, but a cleaner solution would naturally be nicer.
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alain Knaff <alain@knaff.lu>
Cc: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Cc: Phillip Lougher <phillip@lougher.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/decompress_unxz.c')
-rw-r--r-- | lib/decompress_unxz.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c new file mode 100644 index 000000000000..cecd23df2b9a --- /dev/null +++ b/lib/decompress_unxz.c | |||
@@ -0,0 +1,397 @@ | |||
1 | /* | ||
2 | * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd | ||
3 | * | ||
4 | * Author: Lasse Collin <lasse.collin@tukaani.org> | ||
5 | * | ||
6 | * This file has been put into the public domain. | ||
7 | * You can do whatever you want with this file. | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | * Important notes about in-place decompression | ||
12 | * | ||
13 | * At least on x86, the kernel is decompressed in place: the compressed data | ||
14 | * is placed to the end of the output buffer, and the decompressor overwrites | ||
15 | * most of the compressed data. There must be enough safety margin to | ||
16 | * guarantee that the write position is always behind the read position. | ||
17 | * | ||
18 | * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below. | ||
19 | * Note that the margin with XZ is bigger than with Deflate (gzip)! | ||
20 | * | ||
21 | * The worst case for in-place decompression is that the beginning of | ||
22 | * the file is compressed extremely well, and the rest of the file is | ||
23 | * uncompressible. Thus, we must look for worst-case expansion when the | ||
24 | * compressor is encoding uncompressible data. | ||
25 | * | ||
26 | * The structure of the .xz file in case of a compresed kernel is as follows. | ||
27 | * Sizes (as bytes) of the fields are in parenthesis. | ||
28 | * | ||
29 | * Stream Header (12) | ||
30 | * Block Header: | ||
31 | * Block Header (8-12) | ||
32 | * Compressed Data (N) | ||
33 | * Block Padding (0-3) | ||
34 | * CRC32 (4) | ||
35 | * Index (8-20) | ||
36 | * Stream Footer (12) | ||
37 | * | ||
38 | * Normally there is exactly one Block, but let's assume that there are | ||
39 | * 2-4 Blocks just in case. Because Stream Header and also Block Header | ||
40 | * of the first Block don't make the decompressor produce any uncompressed | ||
41 | * data, we can ignore them from our calculations. Block Headers of possible | ||
42 | * additional Blocks have to be taken into account still. With these | ||
43 | * assumptions, it is safe to assume that the total header overhead is | ||
44 | * less than 128 bytes. | ||
45 | * | ||
46 | * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ | ||
47 | * doesn't change the size of the data, it is enough to calculate the | ||
48 | * safety margin for LZMA2. | ||
49 | * | ||
50 | * LZMA2 stores the data in chunks. Each chunk has a header whose size is | ||
51 | * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that | ||
52 | * the maximum chunk header size is 8 bytes. After the chunk header, there | ||
53 | * may be up to 64 KiB of actual payload in the chunk. Often the payload is | ||
54 | * quite a bit smaller though; to be safe, let's assume that an average | ||
55 | * chunk has only 32 KiB of payload. | ||
56 | * | ||
57 | * The maximum uncompressed size of the payload is 2 MiB. The minimum | ||
58 | * uncompressed size of the payload is in practice never less than the | ||
59 | * payload size itself. The LZMA2 format would allow uncompressed size | ||
60 | * to be less than the payload size, but no sane compressor creates such | ||
61 | * files. LZMA2 supports storing uncompressible data in uncompressed form, | ||
62 | * so there's never a need to create payloads whose uncompressed size is | ||
63 | * smaller than the compressed size. | ||
64 | * | ||
65 | * The assumption, that the uncompressed size of the payload is never | ||
66 | * smaller than the payload itself, is valid only when talking about | ||
67 | * the payload as a whole. It is possible that the payload has parts where | ||
68 | * the decompressor consumes more input than it produces output. Calculating | ||
69 | * the worst case for this would be tricky. Instead of trying to do that, | ||
70 | * let's simply make sure that the decompressor never overwrites any bytes | ||
71 | * of the payload which it is currently reading. | ||
72 | * | ||
73 | * Now we have enough information to calculate the safety margin. We need | ||
74 | * - 128 bytes for the .xz file format headers; | ||
75 | * - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header | ||
76 | * per chunk, each chunk having average payload size of 32 KiB); and | ||
77 | * - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that | ||
78 | * the decompressor never overwrites anything from the LZMA2 chunk | ||
79 | * payload it is currently reading. | ||
80 | * | ||
81 | * We get the following formula: | ||
82 | * | ||
83 | * safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536 | ||
84 | * = 128 + (uncompressed_size >> 12) + 65536 | ||
85 | * | ||
86 | * For comparision, according to arch/x86/boot/compressed/misc.c, the | ||
87 | * equivalent formula for Deflate is this: | ||
88 | * | ||
89 | * safety_margin = 18 + (uncompressed_size >> 12) + 32768 | ||
90 | * | ||
91 | * Thus, when updating Deflate-only in-place kernel decompressor to | ||
92 | * support XZ, the fixed overhead has to be increased from 18+32768 bytes | ||
93 | * to 128+65536 bytes. | ||
94 | */ | ||
95 | |||
96 | /* | ||
97 | * STATIC is defined to "static" if we are being built for kernel | ||
98 | * decompression (pre-boot code). <linux/decompress/mm.h> will define | ||
99 | * STATIC to empty if it wasn't already defined. Since we will need to | ||
100 | * know later if we are being used for kernel decompression, we define | ||
101 | * XZ_PREBOOT here. | ||
102 | */ | ||
103 | #ifdef STATIC | ||
104 | # define XZ_PREBOOT | ||
105 | #endif | ||
106 | #ifdef __KERNEL__ | ||
107 | # include <linux/decompress/mm.h> | ||
108 | #endif | ||
109 | #define XZ_EXTERN STATIC | ||
110 | |||
111 | #ifndef XZ_PREBOOT | ||
112 | # include <linux/slab.h> | ||
113 | # include <linux/xz.h> | ||
114 | #else | ||
115 | /* | ||
116 | * Use the internal CRC32 code instead of kernel's CRC32 module, which | ||
117 | * is not available in early phase of booting. | ||
118 | */ | ||
119 | #define XZ_INTERNAL_CRC32 1 | ||
120 | |||
121 | /* | ||
122 | * For boot time use, we enable only the BCJ filter of the current | ||
123 | * architecture or none if no BCJ filter is available for the architecture. | ||
124 | */ | ||
125 | #ifdef CONFIG_X86 | ||
126 | # define XZ_DEC_X86 | ||
127 | #endif | ||
128 | #ifdef CONFIG_PPC | ||
129 | # define XZ_DEC_POWERPC | ||
130 | #endif | ||
131 | #ifdef CONFIG_ARM | ||
132 | # define XZ_DEC_ARM | ||
133 | #endif | ||
134 | #ifdef CONFIG_IA64 | ||
135 | # define XZ_DEC_IA64 | ||
136 | #endif | ||
137 | #ifdef CONFIG_SPARC | ||
138 | # define XZ_DEC_SPARC | ||
139 | #endif | ||
140 | |||
141 | /* | ||
142 | * This will get the basic headers so that memeq() and others | ||
143 | * can be defined. | ||
144 | */ | ||
145 | #include "xz/xz_private.h" | ||
146 | |||
147 | /* | ||
148 | * Replace the normal allocation functions with the versions from | ||
149 | * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL) | ||
150 | * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it. | ||
151 | * Workaround it here because the other decompressors don't need it. | ||
152 | */ | ||
153 | #undef kmalloc | ||
154 | #undef kfree | ||
155 | #undef vmalloc | ||
156 | #undef vfree | ||
157 | #define kmalloc(size, flags) malloc(size) | ||
158 | #define kfree(ptr) free(ptr) | ||
159 | #define vmalloc(size) malloc(size) | ||
160 | #define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0) | ||
161 | |||
162 | /* | ||
163 | * FIXME: Not all basic memory functions are provided in architecture-specific | ||
164 | * files (yet). We define our own versions here for now, but this should be | ||
165 | * only a temporary solution. | ||
166 | * | ||
167 | * memeq and memzero are not used much and any remotely sane implementation | ||
168 | * is fast enough. memcpy/memmove speed matters in multi-call mode, but | ||
169 | * the kernel image is decompressed in single-call mode, in which only | ||
170 | * memcpy speed can matter and only if there is a lot of uncompressible data | ||
171 | * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the | ||
172 | * functions below should just be kept small; it's probably not worth | ||
173 | * optimizing for speed. | ||
174 | */ | ||
175 | |||
176 | #ifndef memeq | ||
177 | static bool memeq(const void *a, const void *b, size_t size) | ||
178 | { | ||
179 | const uint8_t *x = a; | ||
180 | const uint8_t *y = b; | ||
181 | size_t i; | ||
182 | |||
183 | for (i = 0; i < size; ++i) | ||
184 | if (x[i] != y[i]) | ||
185 | return false; | ||
186 | |||
187 | return true; | ||
188 | } | ||
189 | #endif | ||
190 | |||
191 | #ifndef memzero | ||
192 | static void memzero(void *buf, size_t size) | ||
193 | { | ||
194 | uint8_t *b = buf; | ||
195 | uint8_t *e = b + size; | ||
196 | |||
197 | while (b != e) | ||
198 | *b++ = '\0'; | ||
199 | } | ||
200 | #endif | ||
201 | |||
202 | #ifndef memmove | ||
203 | /* Not static to avoid a conflict with the prototype in the Linux headers. */ | ||
204 | void *memmove(void *dest, const void *src, size_t size) | ||
205 | { | ||
206 | uint8_t *d = dest; | ||
207 | const uint8_t *s = src; | ||
208 | size_t i; | ||
209 | |||
210 | if (d < s) { | ||
211 | for (i = 0; i < size; ++i) | ||
212 | d[i] = s[i]; | ||
213 | } else if (d > s) { | ||
214 | i = size; | ||
215 | while (i-- > 0) | ||
216 | d[i] = s[i]; | ||
217 | } | ||
218 | |||
219 | return dest; | ||
220 | } | ||
221 | #endif | ||
222 | |||
223 | /* | ||
224 | * Since we need memmove anyway, would use it as memcpy too. | ||
225 | * Commented out for now to avoid breaking things. | ||
226 | */ | ||
227 | /* | ||
228 | #ifndef memcpy | ||
229 | # define memcpy memmove | ||
230 | #endif | ||
231 | */ | ||
232 | |||
233 | #include "xz/xz_crc32.c" | ||
234 | #include "xz/xz_dec_stream.c" | ||
235 | #include "xz/xz_dec_lzma2.c" | ||
236 | #include "xz/xz_dec_bcj.c" | ||
237 | |||
238 | #endif /* XZ_PREBOOT */ | ||
239 | |||
240 | /* Size of the input and output buffers in multi-call mode */ | ||
241 | #define XZ_IOBUF_SIZE 4096 | ||
242 | |||
243 | /* | ||
244 | * This function implements the API defined in <linux/decompress/generic.h>. | ||
245 | * | ||
246 | * This wrapper will automatically choose single-call or multi-call mode | ||
247 | * of the native XZ decoder API. The single-call mode can be used only when | ||
248 | * both input and output buffers are available as a single chunk, i.e. when | ||
249 | * fill() and flush() won't be used. | ||
250 | */ | ||
251 | STATIC int INIT unxz(unsigned char *in, int in_size, | ||
252 | int (*fill)(void *dest, unsigned int size), | ||
253 | int (*flush)(void *src, unsigned int size), | ||
254 | unsigned char *out, int *in_used, | ||
255 | void (*error)(char *x)) | ||
256 | { | ||
257 | struct xz_buf b; | ||
258 | struct xz_dec *s; | ||
259 | enum xz_ret ret; | ||
260 | bool must_free_in = false; | ||
261 | |||
262 | #if XZ_INTERNAL_CRC32 | ||
263 | xz_crc32_init(); | ||
264 | #endif | ||
265 | |||
266 | if (in_used != NULL) | ||
267 | *in_used = 0; | ||
268 | |||
269 | if (fill == NULL && flush == NULL) | ||
270 | s = xz_dec_init(XZ_SINGLE, 0); | ||
271 | else | ||
272 | s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1); | ||
273 | |||
274 | if (s == NULL) | ||
275 | goto error_alloc_state; | ||
276 | |||
277 | if (flush == NULL) { | ||
278 | b.out = out; | ||
279 | b.out_size = (size_t)-1; | ||
280 | } else { | ||
281 | b.out_size = XZ_IOBUF_SIZE; | ||
282 | b.out = malloc(XZ_IOBUF_SIZE); | ||
283 | if (b.out == NULL) | ||
284 | goto error_alloc_out; | ||
285 | } | ||
286 | |||
287 | if (in == NULL) { | ||
288 | must_free_in = true; | ||
289 | in = malloc(XZ_IOBUF_SIZE); | ||
290 | if (in == NULL) | ||
291 | goto error_alloc_in; | ||
292 | } | ||
293 | |||
294 | b.in = in; | ||
295 | b.in_pos = 0; | ||
296 | b.in_size = in_size; | ||
297 | b.out_pos = 0; | ||
298 | |||
299 | if (fill == NULL && flush == NULL) { | ||
300 | ret = xz_dec_run(s, &b); | ||
301 | } else { | ||
302 | do { | ||
303 | if (b.in_pos == b.in_size && fill != NULL) { | ||
304 | if (in_used != NULL) | ||
305 | *in_used += b.in_pos; | ||
306 | |||
307 | b.in_pos = 0; | ||
308 | |||
309 | in_size = fill(in, XZ_IOBUF_SIZE); | ||
310 | if (in_size < 0) { | ||
311 | /* | ||
312 | * This isn't an optimal error code | ||
313 | * but it probably isn't worth making | ||
314 | * a new one either. | ||
315 | */ | ||
316 | ret = XZ_BUF_ERROR; | ||
317 | break; | ||
318 | } | ||
319 | |||
320 | b.in_size = in_size; | ||
321 | } | ||
322 | |||
323 | ret = xz_dec_run(s, &b); | ||
324 | |||
325 | if (flush != NULL && (b.out_pos == b.out_size | ||
326 | || (ret != XZ_OK && b.out_pos > 0))) { | ||
327 | /* | ||
328 | * Setting ret here may hide an error | ||
329 | * returned by xz_dec_run(), but probably | ||
330 | * it's not too bad. | ||
331 | */ | ||
332 | if (flush(b.out, b.out_pos) != (int)b.out_pos) | ||
333 | ret = XZ_BUF_ERROR; | ||
334 | |||
335 | b.out_pos = 0; | ||
336 | } | ||
337 | } while (ret == XZ_OK); | ||
338 | |||
339 | if (must_free_in) | ||
340 | free(in); | ||
341 | |||
342 | if (flush != NULL) | ||
343 | free(b.out); | ||
344 | } | ||
345 | |||
346 | if (in_used != NULL) | ||
347 | *in_used += b.in_pos; | ||
348 | |||
349 | xz_dec_end(s); | ||
350 | |||
351 | switch (ret) { | ||
352 | case XZ_STREAM_END: | ||
353 | return 0; | ||
354 | |||
355 | case XZ_MEM_ERROR: | ||
356 | /* This can occur only in multi-call mode. */ | ||
357 | error("XZ decompressor ran out of memory"); | ||
358 | break; | ||
359 | |||
360 | case XZ_FORMAT_ERROR: | ||
361 | error("Input is not in the XZ format (wrong magic bytes)"); | ||
362 | break; | ||
363 | |||
364 | case XZ_OPTIONS_ERROR: | ||
365 | error("Input was encoded with settings that are not " | ||
366 | "supported by this XZ decoder"); | ||
367 | break; | ||
368 | |||
369 | case XZ_DATA_ERROR: | ||
370 | case XZ_BUF_ERROR: | ||
371 | error("XZ-compressed data is corrupt"); | ||
372 | break; | ||
373 | |||
374 | default: | ||
375 | error("Bug in the XZ decompressor"); | ||
376 | break; | ||
377 | } | ||
378 | |||
379 | return -1; | ||
380 | |||
381 | error_alloc_in: | ||
382 | if (flush != NULL) | ||
383 | free(b.out); | ||
384 | |||
385 | error_alloc_out: | ||
386 | xz_dec_end(s); | ||
387 | |||
388 | error_alloc_state: | ||
389 | error("XZ decompressor ran out of memory"); | ||
390 | return -1; | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * This macro is used by architecture-specific files to decompress | ||
395 | * the kernel image. | ||
396 | */ | ||
397 | #define decompress unxz | ||