aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hash_utils_64.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2006-06-14 20:45:18 -0400
committerPaul Mackerras <paulus@samba.org>2006-06-14 20:45:18 -0400
commitbf72aeba2ffef599d1d386425c9e46b82be657cd (patch)
treeead8e5111dbcfa22e156999d1bb8a96e50f06fef /arch/powerpc/mm/hash_utils_64.c
parent31925323b1b51bb65db729e029472a8b1f635b7d (diff)
powerpc: Use 64k pages without needing cache-inhibited large pages
Some POWER5+ machines can do 64k hardware pages for normal memory but not for cache-inhibited pages. This patch lets us use 64k hardware pages for most user processes on such machines (assuming the kernel has been configured with CONFIG_PPC_64K_PAGES=y). User processes start out using 64k pages and get switched to 4k pages if they use any non-cacheable mappings. With this, we use 64k pages for the vmalloc region and 4k pages for the imalloc region. If anything creates a non-cacheable mapping in the vmalloc region, the vmalloc region will get switched to 4k pages. I don't know of any driver other than the DRM that would do this, though, and these machines don't have AGP. When a region gets switched from 64k pages to 4k pages, we do not have to clear out all the 64k HPTEs from the hash table immediately. We use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page was hashed in as a 64k page or a set of 4k pages. If hash_page is trying to insert a 4k page for a Linux PTE and it sees that it has already been inserted as a 64k page, it first invalidates the 64k HPTE before inserting the 4k HPTE. The hash invalidation routines also use the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a set of 4k HPTEs to remove. With those two changes, we can tolerate a mix of 4k and 64k HPTEs in the hash table, and they will all get removed when the address space is torn down. Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm/hash_utils_64.c')
-rw-r--r--arch/powerpc/mm/hash_utils_64.c84
1 files changed, 75 insertions, 9 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b43ed92ef471..d03fd2b4445e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -92,10 +92,15 @@ unsigned long htab_size_bytes;
92unsigned long htab_hash_mask; 92unsigned long htab_hash_mask;
93int mmu_linear_psize = MMU_PAGE_4K; 93int mmu_linear_psize = MMU_PAGE_4K;
94int mmu_virtual_psize = MMU_PAGE_4K; 94int mmu_virtual_psize = MMU_PAGE_4K;
95int mmu_vmalloc_psize = MMU_PAGE_4K;
96int mmu_io_psize = MMU_PAGE_4K;
95#ifdef CONFIG_HUGETLB_PAGE 97#ifdef CONFIG_HUGETLB_PAGE
96int mmu_huge_psize = MMU_PAGE_16M; 98int mmu_huge_psize = MMU_PAGE_16M;
97unsigned int HPAGE_SHIFT; 99unsigned int HPAGE_SHIFT;
98#endif 100#endif
101#ifdef CONFIG_PPC_64K_PAGES
102int mmu_ci_restrictions;
103#endif
99 104
100/* There are definitions of page sizes arrays to be used when none 105/* There are definitions of page sizes arrays to be used when none
101 * is provided by the firmware. 106 * is provided by the firmware.
@@ -308,20 +313,31 @@ static void __init htab_init_page_sizes(void)
308 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 313 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
309 mmu_linear_psize = MMU_PAGE_1M; 314 mmu_linear_psize = MMU_PAGE_1M;
310 315
316#ifdef CONFIG_PPC_64K_PAGES
311 /* 317 /*
312 * Pick a size for the ordinary pages. Default is 4K, we support 318 * Pick a size for the ordinary pages. Default is 4K, we support
313 * 64K if cache inhibited large pages are supported by the 319 * 64K for user mappings and vmalloc if supported by the processor.
314 * processor 320 * We only use 64k for ioremap if the processor
321 * (and firmware) support cache-inhibited large pages.
322 * If not, we use 4k and set mmu_ci_restrictions so that
323 * hash_page knows to switch processes that use cache-inhibited
324 * mappings to 4k pages.
315 */ 325 */
316#ifdef CONFIG_PPC_64K_PAGES 326 if (mmu_psize_defs[MMU_PAGE_64K].shift) {
317 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
318 cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
319 mmu_virtual_psize = MMU_PAGE_64K; 327 mmu_virtual_psize = MMU_PAGE_64K;
328 mmu_vmalloc_psize = MMU_PAGE_64K;
329 if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
330 mmu_io_psize = MMU_PAGE_64K;
331 else
332 mmu_ci_restrictions = 1;
333 }
320#endif 334#endif
321 335
322 printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n", 336 printk(KERN_DEBUG "Page orders: linear mapping = %d, "
337 "virtual = %d, io = %d\n",
323 mmu_psize_defs[mmu_linear_psize].shift, 338 mmu_psize_defs[mmu_linear_psize].shift,
324 mmu_psize_defs[mmu_virtual_psize].shift); 339 mmu_psize_defs[mmu_virtual_psize].shift,
340 mmu_psize_defs[mmu_io_psize].shift);
325 341
326#ifdef CONFIG_HUGETLB_PAGE 342#ifdef CONFIG_HUGETLB_PAGE
327 /* Init large page size. Currently, we pick 16M or 1M depending 343 /* Init large page size. Currently, we pick 16M or 1M depending
@@ -556,6 +572,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
556 pte_t *ptep; 572 pte_t *ptep;
557 cpumask_t tmp; 573 cpumask_t tmp;
558 int rc, user_region = 0, local = 0; 574 int rc, user_region = 0, local = 0;
575 int psize;
559 576
560 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 577 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
561 ea, access, trap); 578 ea, access, trap);
@@ -575,10 +592,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
575 return 1; 592 return 1;
576 } 593 }
577 vsid = get_vsid(mm->context.id, ea); 594 vsid = get_vsid(mm->context.id, ea);
595 psize = mm->context.user_psize;
578 break; 596 break;
579 case VMALLOC_REGION_ID: 597 case VMALLOC_REGION_ID:
580 mm = &init_mm; 598 mm = &init_mm;
581 vsid = get_kernel_vsid(ea); 599 vsid = get_kernel_vsid(ea);
600 if (ea < VMALLOC_END)
601 psize = mmu_vmalloc_psize;
602 else
603 psize = mmu_io_psize;
582 break; 604 break;
583 default: 605 default:
584 /* Not a valid range 606 /* Not a valid range
@@ -629,7 +651,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
629#ifndef CONFIG_PPC_64K_PAGES 651#ifndef CONFIG_PPC_64K_PAGES
630 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 652 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
631#else 653#else
632 if (mmu_virtual_psize == MMU_PAGE_64K) 654 if (mmu_ci_restrictions) {
655 /* If this PTE is non-cacheable, switch to 4k */
656 if (psize == MMU_PAGE_64K &&
657 (pte_val(*ptep) & _PAGE_NO_CACHE)) {
658 if (user_region) {
659 psize = MMU_PAGE_4K;
660 mm->context.user_psize = MMU_PAGE_4K;
661 mm->context.sllp = SLB_VSID_USER |
662 mmu_psize_defs[MMU_PAGE_4K].sllp;
663 } else if (ea < VMALLOC_END) {
664 /*
665 * some driver did a non-cacheable mapping
666 * in vmalloc space, so switch vmalloc
667 * to 4k pages
668 */
669 printk(KERN_ALERT "Reducing vmalloc segment "
670 "to 4kB pages because of "
671 "non-cacheable mapping\n");
672 psize = mmu_vmalloc_psize = MMU_PAGE_4K;
673 }
674 }
675 if (user_region) {
676 if (psize != get_paca()->context.user_psize) {
677 get_paca()->context = mm->context;
678 slb_flush_and_rebolt();
679 }
680 } else if (get_paca()->vmalloc_sllp !=
681 mmu_psize_defs[mmu_vmalloc_psize].sllp) {
682 get_paca()->vmalloc_sllp =
683 mmu_psize_defs[mmu_vmalloc_psize].sllp;
684 slb_flush_and_rebolt();
685 }
686 }
687 if (psize == MMU_PAGE_64K)
633 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); 688 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
634 else 689 else
635 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 690 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
@@ -681,7 +736,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
681#ifndef CONFIG_PPC_64K_PAGES 736#ifndef CONFIG_PPC_64K_PAGES
682 __hash_page_4K(ea, access, vsid, ptep, trap, local); 737 __hash_page_4K(ea, access, vsid, ptep, trap, local);
683#else 738#else
684 if (mmu_virtual_psize == MMU_PAGE_64K) 739 if (mmu_ci_restrictions) {
740 /* If this PTE is non-cacheable, switch to 4k */
741 if (mm->context.user_psize == MMU_PAGE_64K &&
742 (pte_val(*ptep) & _PAGE_NO_CACHE)) {
743 mm->context.user_psize = MMU_PAGE_4K;
744 mm->context.sllp = SLB_VSID_USER |
745 mmu_psize_defs[MMU_PAGE_4K].sllp;
746 get_paca()->context = mm->context;
747 slb_flush_and_rebolt();
748 }
749 }
750 if (mm->context.user_psize == MMU_PAGE_64K)
685 __hash_page_64K(ea, access, vsid, ptep, trap, local); 751 __hash_page_64K(ea, access, vsid, ptep, trap, local);
686 else 752 else
687 __hash_page_4K(ea, access, vsid, ptep, trap, local); 753 __hash_page_4K(ea, access, vsid, ptep, trap, local);