diff options
author | Joakim Tjernlund <joakim.tjernlund@transmode.se> | 2010-03-10 18:23:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-12 18:52:44 -0500 |
commit | e69eae65523b457a3ac4262a66cfff57f2c924a9 (patch) | |
tree | 620315728b7ebfe7876b9314206de8339e0343bb | |
parent | 5ceaa2f39bfa73c4398cd01e78f1c3ebde3d3383 (diff) |
zlib: make new optimized inflate endian independent
Commit 6846ee5ca68d81e6baccf0d56221d7a00c1be18b ("zlib: Fix build of
powerpc boot wrapper") made the new optimized inflate only available on
arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
This patch will again enable the optimization for all arch's by defining
our own endian independent version of unaligned access. As an added
bonus, arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS do a
plain load instead.
Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | lib/zlib_inflate/inffast.c | 70 |
1 files changed, 30 insertions, 40 deletions
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c index 215447c55261..fa62fc7a94f9 100644 --- a/lib/zlib_inflate/inffast.c +++ b/lib/zlib_inflate/inffast.c | |||
@@ -8,21 +8,6 @@ | |||
8 | #include "inflate.h" | 8 | #include "inflate.h" |
9 | #include "inffast.h" | 9 | #include "inffast.h" |
10 | 10 | ||
11 | /* Only do the unaligned "Faster" variant when | ||
12 | * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set | ||
13 | * | ||
14 | * On powerpc, it won't be as we don't include autoconf.h | ||
15 | * automatically for the boot wrapper, which is intended as | ||
16 | * we run in an environment where we may not be able to deal | ||
17 | * with (even rare) alignment faults. In addition, we do not | ||
18 | * define __KERNEL__ for arch/powerpc/boot unlike x86 | ||
19 | */ | ||
20 | |||
21 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
22 | #include <asm/unaligned.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #endif | ||
25 | |||
26 | #ifndef ASMINF | 11 | #ifndef ASMINF |
27 | 12 | ||
28 | /* Allow machine dependent optimization for post-increment or pre-increment. | 13 | /* Allow machine dependent optimization for post-increment or pre-increment. |
@@ -36,14 +21,31 @@ | |||
36 | - Pentium III (Anderson) | 21 | - Pentium III (Anderson) |
37 | - M68060 (Nikl) | 22 | - M68060 (Nikl) |
38 | */ | 23 | */ |
24 | union uu { | ||
25 | unsigned short us; | ||
26 | unsigned char b[2]; | ||
27 | }; | ||
28 | |||
29 | /* Endian independed version */ | ||
30 | static inline unsigned short | ||
31 | get_unaligned16(const unsigned short *p) | ||
32 | { | ||
33 | union uu mm; | ||
34 | unsigned char *b = (unsigned char *)p; | ||
35 | |||
36 | mm.b[0] = b[0]; | ||
37 | mm.b[1] = b[1]; | ||
38 | return mm.us; | ||
39 | } | ||
40 | |||
39 | #ifdef POSTINC | 41 | #ifdef POSTINC |
40 | # define OFF 0 | 42 | # define OFF 0 |
41 | # define PUP(a) *(a)++ | 43 | # define PUP(a) *(a)++ |
42 | # define UP_UNALIGNED(a) get_unaligned((a)++) | 44 | # define UP_UNALIGNED(a) get_unaligned16((a)++) |
43 | #else | 45 | #else |
44 | # define OFF 1 | 46 | # define OFF 1 |
45 | # define PUP(a) *++(a) | 47 | # define PUP(a) *++(a) |
46 | # define UP_UNALIGNED(a) get_unaligned(++(a)) | 48 | # define UP_UNALIGNED(a) get_unaligned16(++(a)) |
47 | #endif | 49 | #endif |
48 | 50 | ||
49 | /* | 51 | /* |
@@ -256,7 +258,6 @@ void inflate_fast(z_streamp strm, unsigned start) | |||
256 | } | 258 | } |
257 | } | 259 | } |
258 | else { | 260 | else { |
259 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
260 | unsigned short *sout; | 261 | unsigned short *sout; |
261 | unsigned long loops; | 262 | unsigned long loops; |
262 | 263 | ||
@@ -274,7 +275,11 @@ void inflate_fast(z_streamp strm, unsigned start) | |||
274 | sfrom = (unsigned short *)(from - OFF); | 275 | sfrom = (unsigned short *)(from - OFF); |
275 | loops = len >> 1; | 276 | loops = len >> 1; |
276 | do | 277 | do |
278 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
279 | PUP(sout) = PUP(sfrom); | ||
280 | #else | ||
277 | PUP(sout) = UP_UNALIGNED(sfrom); | 281 | PUP(sout) = UP_UNALIGNED(sfrom); |
282 | #endif | ||
278 | while (--loops); | 283 | while (--loops); |
279 | out = (unsigned char *)sout + OFF; | 284 | out = (unsigned char *)sout + OFF; |
280 | from = (unsigned char *)sfrom + OFF; | 285 | from = (unsigned char *)sfrom + OFF; |
@@ -282,14 +287,13 @@ void inflate_fast(z_streamp strm, unsigned start) | |||
282 | unsigned short pat16; | 287 | unsigned short pat16; |
283 | 288 | ||
284 | pat16 = *(sout-2+2*OFF); | 289 | pat16 = *(sout-2+2*OFF); |
285 | if (dist == 1) | 290 | if (dist == 1) { |
286 | #if defined(__BIG_ENDIAN) | 291 | union uu mm; |
287 | pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); | 292 | /* copy one char pattern to both bytes */ |
288 | #elif defined(__LITTLE_ENDIAN) | 293 | mm.us = pat16; |
289 | pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); | 294 | mm.b[0] = mm.b[1]; |
290 | #else | 295 | pat16 = mm.us; |
291 | #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined | 296 | } |
292 | #endif | ||
293 | loops = len >> 1; | 297 | loops = len >> 1; |
294 | do | 298 | do |
295 | PUP(sout) = pat16; | 299 | PUP(sout) = pat16; |
@@ -298,20 +302,6 @@ void inflate_fast(z_streamp strm, unsigned start) | |||
298 | } | 302 | } |
299 | if (len & 1) | 303 | if (len & 1) |
300 | PUP(out) = PUP(from); | 304 | PUP(out) = PUP(from); |
301 | #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | ||
302 | from = out - dist; /* copy direct from output */ | ||
303 | do { /* minimum length is three */ | ||
304 | PUP(out) = PUP(from); | ||
305 | PUP(out) = PUP(from); | ||
306 | PUP(out) = PUP(from); | ||
307 | len -= 3; | ||
308 | } while (len > 2); | ||
309 | if (len) { | ||
310 | PUP(out) = PUP(from); | ||
311 | if (len > 1) | ||
312 | PUP(out) = PUP(from); | ||
313 | } | ||
314 | #endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | ||
315 | } | 305 | } |
316 | } | 306 | } |
317 | else if ((op & 64) == 0) { /* 2nd level distance code */ | 307 | else if ((op & 64) == 0) { /* 2nd level distance code */ |