aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorChristian Borntraeger <borntraeger@de.ibm.com>2015-01-30 04:31:13 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2015-02-26 03:24:49 -0500
commitfb3d1c085c05e0e4b112d915dbd06b20b259e6c5 (patch)
tree650756a903449ccc32a9cc8c3c4cbadae347d929 /arch
parentf0483044c1c96089256cda4cf182eea1ead77fe4 (diff)
s390: let the compiler do page clearing
The hardware folks told me that for page clearing "when you exactly know what to do, hand written xc+pfd is usally faster then mvcl for page clearing, as it saves millicode overhead and parameter parsing and checking" as long as you dont need the cache bypassing. Turns out that gcc already does a proper xc,pfd loop. A small test on z196 that does buff = mmap(NULL, bufsize,PROT_EXEC|PROT_WRITE|PROT_READ,AP_PRIVATE| MAP_ANONYMOUS,0,0); for ( i = 0; i < bufsize; i+= 256) buff[i] = 0x5; gets 20% faster (touches every cache line of a page) and buff = mmap(NULL, bufsize,PROT_EXEC|PROT_WRITE|PROT_READ,AP_PRIVATE| MAP_ANONYMOUS,0,0); for ( i = 0; i < bufsize; i+= 4096) buff[i] = 0x5; is within noise ratio (touches one cache line of a page). As the clear_page is usually called for first memory accesses we can assume that at least one cache line is used afterwards, so this change should be always better. Another benchmark, a make -j 40 of my testsuite in tmpfs with hot caches on a 32cpu system: -- unpatched -- -- patched -- real 0m1.017s real 0m0.994s (~2% faster, but in noise) user 0m5.339s user 0m5.016s (~6% faster) sys 0m0.691s sys 0m0.632s (~8% faster) Let use the same define to memset as the asm-generic variant Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/include/asm/page.h11
1 files changed, 1 insertions, 10 deletions
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 7b2ac6e44166..53eacbd4f09b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end
37#endif 37#endif
38} 38}
39 39
40static inline void clear_page(void *page) 40#define clear_page(page) memset((page), 0, PAGE_SIZE)
41{
42 register unsigned long reg1 asm ("1") = 0;
43 register void *reg2 asm ("2") = page;
44 register unsigned long reg3 asm ("3") = 4096;
45 asm volatile(
46 " mvcl 2,0"
47 : "+d" (reg2), "+d" (reg3) : "d" (reg1)
48 : "memory", "cc");
49}
50 41
51/* 42/*
52 * copy_page uses the mvcl instruction with 0xb0 padding byte in order to 43 * copy_page uses the mvcl instruction with 0xb0 padding byte in order to