aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Leroy <christophe.leroy@c-s.fr>2016-02-09 11:07:50 -0500
committerScott Wood <oss@buserror.net>2016-03-11 18:18:01 -0500
commita372acfac51e0d5858f8f6f84da52defcabf054b (patch)
tree5e7c1a109e7c1167f1638f9e6f7f0154e09d2eef
parent913a6b3d10d85a032eff2f31254c35e0976f5e32 (diff)
powerpc/8xx: Map linear kernel RAM with 8M pages
On a live running system (VoIP gateway for Air Trafic Control), over a 10 minutes period (with 277s idle), we get 87 millions DTLB misses and approximatly 35 secondes are spent in DTLB handler. This represents 5.8% of the overall time and even 10.8% of the non-idle time. Among those 87 millions DTLB misses, 15% are on user addresses and 85% are on kernel addresses. And within the kernel addresses, 93% are on addresses from the linear address space and only 7% are on addresses from the virtual address space. MPC8xx has no BATs but it has 8Mb page size. This patch implements mapping of kernel RAM using 8Mb pages, on the same model as what is done on the 40x. In 4k pages mode, each PGD entry maps a 4Mb area: we map every two entries to the same 8Mb physical page. In each second entry, we add 4Mb to the page physical address to ease life of the FixupDAR routine. This is just ignored by HW. In 16k pages mode, each PGD entry maps a 64Mb area: each PGD entry will point to the first page of the area. The DTLB handler adds the 3 bits from EPN to map the correct page. With this patch applied, we now get only 13 millions TLB misses during the 10 minutes period. The idle time has increased to 313s and the overall time spent in DTLB miss handler is 6.3s, which represents 1% of the overall time and 2.2% of non-idle time. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Scott Wood <oss@buserror.net>
-rw-r--r--arch/powerpc/kernel/head_8xx.S35
-rw-r--r--arch/powerpc/mm/8xx_mmu.c83
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/mmu_decl.h15
4 files changed, 120 insertions, 14 deletions
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a89492eb6e46..87d1f5f0d808 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -398,11 +398,13 @@ DataStoreTLBMiss:
398 BRANCH_UNLESS_KERNEL(3f) 398 BRANCH_UNLESS_KERNEL(3f)
399 lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 399 lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
4003: 4003:
401 mtcr r3
402 401
403 /* Insert level 1 index */ 402 /* Insert level 1 index */
404 rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 403 rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
405 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ 404 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
405 mtcr r11
406 bt- 28,DTLBMiss8M /* bit 28 = Large page (8M) */
407 mtcr r3
406 408
407 /* We have a pte table, so load fetch the pte from the table. 409 /* We have a pte table, so load fetch the pte from the table.
408 */ 410 */
@@ -455,6 +457,29 @@ DataStoreTLBMiss:
455 EXCEPTION_EPILOG_0 457 EXCEPTION_EPILOG_0
456 rfi 458 rfi
457 459
460DTLBMiss8M:
461 mtcr r3
462 ori r11, r11, MD_SVALID
463 MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
464#ifdef CONFIG_PPC_16K_PAGES
465 /*
466 * In 16k pages mode, each PGD entry defines a 64M block.
467 * Here we select the 8M page within the block.
468 */
469 rlwimi r11, r10, 0, 0x03800000
470#endif
471 rlwinm r10, r11, 0, 0xff800000
472 ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
473 _PAGE_PRESENT
474 MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
475
476 li r11, RPN_PATTERN
477 mfspr r3, SPRN_SPRG_SCRATCH2
478 mtspr SPRN_DAR, r11 /* Tag DAR */
479 EXCEPTION_EPILOG_0
480 rfi
481
482
458/* This is an instruction TLB error on the MPC8xx. This could be due 483/* This is an instruction TLB error on the MPC8xx. This could be due
459 * to many reasons, such as executing guarded memory or illegal instruction 484 * to many reasons, such as executing guarded memory or illegal instruction
460 * addresses. There is nothing to do but handle a big time error fault. 485 * addresses. There is nothing to do but handle a big time error fault.
@@ -532,13 +557,15 @@ FixupDAR:/* Entry point for dcbx workaround. */
532 /* Insert level 1 index */ 557 /* Insert level 1 index */
5333: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 5583: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
534 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ 559 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
560 mtcr r11
561 bt 28,200f /* bit 28 = Large page (8M) */
535 rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ 562 rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */
536 /* Insert level 2 index */ 563 /* Insert level 2 index */
537 rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 564 rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
538 lwz r11, 0(r11) /* Get the pte */ 565 lwz r11, 0(r11) /* Get the pte */
539 /* concat physical page address(r11) and page offset(r10) */ 566 /* concat physical page address(r11) and page offset(r10) */
540 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 567 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
541 lwz r11,0(r11) 568201: lwz r11,0(r11)
542/* Check if it really is a dcbx instruction. */ 569/* Check if it really is a dcbx instruction. */
543/* dcbt and dcbtst does not generate DTLB Misses/Errors, 570/* dcbt and dcbtst does not generate DTLB Misses/Errors,
544 * no need to include them here */ 571 * no need to include them here */
@@ -557,6 +584,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
557141: mfspr r10,SPRN_SPRG_SCRATCH2 584141: mfspr r10,SPRN_SPRG_SCRATCH2
558 b DARFixed /* Nope, go back to normal TLB processing */ 585 b DARFixed /* Nope, go back to normal TLB processing */
559 586
587 /* concat physical page address(r11) and page offset(r10) */
588200: rlwimi r11, r10, 0, 32 - (PAGE_SHIFT << 1), 31
589 b 201b
590
560144: mfspr r10, SPRN_DSISR 591144: mfspr r10, SPRN_DSISR
561 rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ 592 rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
562 mtspr SPRN_DSISR, r10 593 mtspr SPRN_DSISR, r10
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
new file mode 100644
index 000000000000..2d42745f6cff
--- /dev/null
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -0,0 +1,83 @@
1/*
2 * This file contains the routines for initializing the MMU
3 * on the 8xx series of chips.
4 * -- christophe
5 *
6 * Derived from arch/powerpc/mm/40x_mmu.c:
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 */
14
15#include <linux/memblock.h>
16
17#include "mmu_decl.h"
18
19extern int __map_without_ltlbs;
20/*
21 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
22 */
23void __init MMU_init_hw(void)
24{
25 /* Nothing to do for the time being but keep it similar to other PPC */
26}
27
28#define LARGE_PAGE_SIZE_4M (1<<22)
29#define LARGE_PAGE_SIZE_8M (1<<23)
30#define LARGE_PAGE_SIZE_64M (1<<26)
31
32unsigned long __init mmu_mapin_ram(unsigned long top)
33{
34 unsigned long v, s, mapped;
35 phys_addr_t p;
36
37 v = KERNELBASE;
38 p = 0;
39 s = top;
40
41 if (__map_without_ltlbs)
42 return 0;
43
44#ifdef CONFIG_PPC_4K_PAGES
45 while (s >= LARGE_PAGE_SIZE_8M) {
46 pmd_t *pmdp;
47 unsigned long val = p | MD_PS8MEG;
48
49 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
50 *pmdp++ = __pmd(val);
51 *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M);
52
53 v += LARGE_PAGE_SIZE_8M;
54 p += LARGE_PAGE_SIZE_8M;
55 s -= LARGE_PAGE_SIZE_8M;
56 }
57#else /* CONFIG_PPC_16K_PAGES */
58 while (s >= LARGE_PAGE_SIZE_64M) {
59 pmd_t *pmdp;
60 unsigned long val = p | MD_PS8MEG;
61
62 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
63 *pmdp++ = __pmd(val);
64
65 v += LARGE_PAGE_SIZE_64M;
66 p += LARGE_PAGE_SIZE_64M;
67 s -= LARGE_PAGE_SIZE_64M;
68 }
69#endif
70
71 mapped = top - s;
72
73 /* If the size of RAM is not an exact power of two, we may not
74 * have covered RAM in its entirety with 8 MiB
75 * pages. Consequently, restrict the top end of RAM currently
76 * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
77 * coverage with normal-sized pages (or other reasons) do not
78 * attempt to allocate outside the allowed range.
79 */
80 memblock_set_current_limit(mapped);
81
82 return mapped;
83}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1ffeda85c086..adfee3f1aeb9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PPC_ICSWX) += icswx.o
25obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o 25obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o
26obj-$(CONFIG_40x) += 40x_mmu.o 26obj-$(CONFIG_40x) += 40x_mmu.o
27obj-$(CONFIG_44x) += 44x_mmu.o 27obj-$(CONFIG_44x) += 44x_mmu.o
28obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o
28obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o 29obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
29obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 30obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
30obj-$(CONFIG_PPC_SPLPAR) += vphn.o 31obj-$(CONFIG_PPC_SPLPAR) += vphn.o
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 898d63365cdd..718076ff0b8a 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -133,22 +133,17 @@ extern void wii_memory_fixups(void);
133/* ...and now those things that may be slightly different between processor 133/* ...and now those things that may be slightly different between processor
134 * architectures. -- Dan 134 * architectures. -- Dan
135 */ 135 */
136#if defined(CONFIG_8xx) 136#ifdef CONFIG_PPC32
137#define MMU_init_hw() do { } while(0)
138#define mmu_mapin_ram(top) (0UL)
139
140#elif defined(CONFIG_4xx)
141extern void MMU_init_hw(void); 137extern void MMU_init_hw(void);
142extern unsigned long mmu_mapin_ram(unsigned long top); 138extern unsigned long mmu_mapin_ram(unsigned long top);
139#endif
143 140
144#elif defined(CONFIG_PPC_FSL_BOOK3E) 141#ifdef CONFIG_PPC_FSL_BOOK3E
145extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, 142extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
146 bool dryrun); 143 bool dryrun);
147extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, 144extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
148 phys_addr_t phys); 145 phys_addr_t phys);
149#ifdef CONFIG_PPC32 146#ifdef CONFIG_PPC32
150extern void MMU_init_hw(void);
151extern unsigned long mmu_mapin_ram(unsigned long top);
152extern void adjust_total_lowmem(void); 147extern void adjust_total_lowmem(void);
153extern int switch_to_as1(void); 148extern int switch_to_as1(void);
154extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); 149extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
@@ -163,8 +158,4 @@ struct tlbcam {
163 u32 MAS3; 158 u32 MAS3;
164 u32 MAS7; 159 u32 MAS7;
165}; 160};
166#elif defined(CONFIG_PPC32)
167/* anything 32-bit except 4xx or 8xx */
168extern void MMU_init_hw(void);
169extern unsigned long mmu_mapin_ram(unsigned long top);
170#endif 161#endif