aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoakim Tjernlund <Joakim.Tjernlund@transmode.se>2010-01-08 17:42:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-01-11 12:34:04 -0500
commitac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585 (patch)
treea7b8f2d618497cd4152ebe8e7390107a442bf0f6
parent129182e5626972ac0df85d43a36dd46ad61c64e1 (diff)
zlib: optimize inffast when copying direct from output
JFFS2 uses lesser compression ratio and inflate always ends up in "copy direct from output" case. This patch tries to optimize the direct copy procedure. Uses get_unaligned() but only in one place. The copy loop just above this one can also use this optimization, but I havn't done so as I have not tested if it is a win there too. On my MPC8321 this is about 17% faster on my JFFS2 root FS than the original. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> Cc: Roel Kluin <roel.kluin@gmail.com> Cc: Richard Purdie <rpurdie@rpsys.net> Cc: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--lib/zlib_inflate/inffast.c55
2 files changed, 47 insertions, 12 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index bb2465bcb327..826a30a00f59 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -20,7 +20,7 @@
20all: $(obj)/zImage 20all: $(obj)/zImage
21 21
22BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ 22BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
23 -fno-strict-aliasing -Os -msoft-float -pipe \ 23 -fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\
24 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ 24 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
25 -isystem $(shell $(CROSS32CC) -print-file-name=include) 25 -isystem $(shell $(CROSS32CC) -print-file-name=include)
26BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc 26BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
@@ -34,6 +34,8 @@ BOOTCFLAGS += -fno-stack-protector
34endif 34endif
35 35
36BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) 36BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
37BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include
38BOOTCFLAGS += -Iinclude
37 39
38DTS_FLAGS ?= -p 1024 40DTS_FLAGS ?= -p 1024
39 41
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 8550b0c05d00..05e1559fa156 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -4,6 +4,8 @@
4 */ 4 */
5 5
6#include <linux/zutil.h> 6#include <linux/zutil.h>
7#include <asm/unaligned.h>
8#include <asm/byteorder.h>
7#include "inftrees.h" 9#include "inftrees.h"
8#include "inflate.h" 10#include "inflate.h"
9#include "inffast.h" 11#include "inffast.h"
@@ -24,9 +26,11 @@
24#ifdef POSTINC 26#ifdef POSTINC
25# define OFF 0 27# define OFF 0
26# define PUP(a) *(a)++ 28# define PUP(a) *(a)++
29# define UP_UNALIGNED(a) get_unaligned((a)++)
27#else 30#else
28# define OFF 1 31# define OFF 1
29# define PUP(a) *++(a) 32# define PUP(a) *++(a)
33# define UP_UNALIGNED(a) get_unaligned(++(a))
30#endif 34#endif
31 35
32/* 36/*
@@ -239,18 +243,47 @@ void inflate_fast(z_streamp strm, unsigned start)
239 } 243 }
240 } 244 }
241 else { 245 else {
246 unsigned short *sout;
247 unsigned long loops;
248
242 from = out - dist; /* copy direct from output */ 249 from = out - dist; /* copy direct from output */
243 do { /* minimum length is three */ 250 /* minimum length is three */
244 PUP(out) = PUP(from); 251 /* Align out addr */
245 PUP(out) = PUP(from); 252 if (!((long)(out - 1 + OFF) & 1)) {
246 PUP(out) = PUP(from); 253 PUP(out) = PUP(from);
247 len -= 3; 254 len--;
248 } while (len > 2); 255 }
249 if (len) { 256 sout = (unsigned short *)(out - OFF);
250 PUP(out) = PUP(from); 257 if (dist > 2) {
251 if (len > 1) 258 unsigned short *sfrom;
252 PUP(out) = PUP(from); 259
253 } 260 sfrom = (unsigned short *)(from - OFF);
261 loops = len >> 1;
262 do
263 PUP(sout) = UP_UNALIGNED(sfrom);
264 while (--loops);
265 out = (unsigned char *)sout + OFF;
266 from = (unsigned char *)sfrom + OFF;
267 } else { /* dist == 1 or dist == 2 */
268 unsigned short pat16;
269
270 pat16 = *(sout-2+2*OFF);
271 if (dist == 1)
272#if defined(__BIG_ENDIAN)
273 pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
274#elif defined(__LITTLE_ENDIAN)
275 pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
276#else
277#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
278#endif
279 loops = len >> 1;
280 do
281 PUP(sout) = pat16;
282 while (--loops);
283 out = (unsigned char *)sout + OFF;
284 }
285 if (len & 1)
286 PUP(out) = PUP(from);
254 } 287 }
255 } 288 }
256 else if ((op & 64) == 0) { /* 2nd level distance code */ 289 else if ((op & 64) == 0) { /* 2nd level distance code */