diff options
author | Paul Mackerras <paulus@samba.org> | 2006-06-14 20:45:18 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-06-14 20:45:18 -0400 |
commit | bf72aeba2ffef599d1d386425c9e46b82be657cd (patch) | |
tree | ead8e5111dbcfa22e156999d1bb8a96e50f06fef /arch/powerpc/mm/hash_utils_64.c | |
parent | 31925323b1b51bb65db729e029472a8b1f635b7d (diff) |
powerpc: Use 64k pages without needing cache-inhibited large pages
Some POWER5+ machines can do 64k hardware pages for normal memory but
not for cache-inhibited pages. This patch lets us use 64k hardware
pages for most user processes on such machines (assuming the kernel
has been configured with CONFIG_PPC_64K_PAGES=y). User processes
start out using 64k pages and get switched to 4k pages if they use any
non-cacheable mappings.
With this, we use 64k pages for the vmalloc region and 4k pages for
the imalloc region. If anything creates a non-cacheable mapping in
the vmalloc region, the vmalloc region will get switched to 4k pages.
I don't know of any driver other than the DRM that would do this,
though, and these machines don't have AGP.
When a region gets switched from 64k pages to 4k pages, we do not have
to clear out all the 64k HPTEs from the hash table immediately. We
use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page
was hashed in as a 64k page or a set of 4k pages. If hash_page is
trying to insert a 4k page for a Linux PTE and it sees that it has
already been inserted as a 64k page, it first invalidates the 64k HPTE
before inserting the 4k HPTE. The hash invalidation routines also use
the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a
set of 4k HPTEs to remove. With those two changes, we can tolerate a
mix of 4k and 64k HPTEs in the hash table, and they will all get
removed when the address space is torn down.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm/hash_utils_64.c')
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 84 |
1 files changed, 75 insertions, 9 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b43ed92ef471..d03fd2b4445e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -92,10 +92,15 @@ unsigned long htab_size_bytes; | |||
92 | unsigned long htab_hash_mask; | 92 | unsigned long htab_hash_mask; |
93 | int mmu_linear_psize = MMU_PAGE_4K; | 93 | int mmu_linear_psize = MMU_PAGE_4K; |
94 | int mmu_virtual_psize = MMU_PAGE_4K; | 94 | int mmu_virtual_psize = MMU_PAGE_4K; |
95 | int mmu_vmalloc_psize = MMU_PAGE_4K; | ||
96 | int mmu_io_psize = MMU_PAGE_4K; | ||
95 | #ifdef CONFIG_HUGETLB_PAGE | 97 | #ifdef CONFIG_HUGETLB_PAGE |
96 | int mmu_huge_psize = MMU_PAGE_16M; | 98 | int mmu_huge_psize = MMU_PAGE_16M; |
97 | unsigned int HPAGE_SHIFT; | 99 | unsigned int HPAGE_SHIFT; |
98 | #endif | 100 | #endif |
101 | #ifdef CONFIG_PPC_64K_PAGES | ||
102 | int mmu_ci_restrictions; | ||
103 | #endif | ||
99 | 104 | ||
100 | /* There are definitions of page sizes arrays to be used when none | 105 | /* There are definitions of page sizes arrays to be used when none |
101 | * is provided by the firmware. | 106 | * is provided by the firmware. |
@@ -308,20 +313,31 @@ static void __init htab_init_page_sizes(void) | |||
308 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | 313 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) |
309 | mmu_linear_psize = MMU_PAGE_1M; | 314 | mmu_linear_psize = MMU_PAGE_1M; |
310 | 315 | ||
316 | #ifdef CONFIG_PPC_64K_PAGES | ||
311 | /* | 317 | /* |
312 | * Pick a size for the ordinary pages. Default is 4K, we support | 318 | * Pick a size for the ordinary pages. Default is 4K, we support |
313 | * 64K if cache inhibited large pages are supported by the | 319 | * 64K for user mappings and vmalloc if supported by the processor. |
314 | * processor | 320 | * We only use 64k for ioremap if the processor |
321 | * (and firmware) support cache-inhibited large pages. | ||
322 | * If not, we use 4k and set mmu_ci_restrictions so that | ||
323 | * hash_page knows to switch processes that use cache-inhibited | ||
324 | * mappings to 4k pages. | ||
315 | */ | 325 | */ |
316 | #ifdef CONFIG_PPC_64K_PAGES | 326 | if (mmu_psize_defs[MMU_PAGE_64K].shift) { |
317 | if (mmu_psize_defs[MMU_PAGE_64K].shift && | ||
318 | cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) | ||
319 | mmu_virtual_psize = MMU_PAGE_64K; | 327 | mmu_virtual_psize = MMU_PAGE_64K; |
328 | mmu_vmalloc_psize = MMU_PAGE_64K; | ||
329 | if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) | ||
330 | mmu_io_psize = MMU_PAGE_64K; | ||
331 | else | ||
332 | mmu_ci_restrictions = 1; | ||
333 | } | ||
320 | #endif | 334 | #endif |
321 | 335 | ||
322 | printk(KERN_DEBUG "Page orders: linear mapping = %d, others = %d\n", | 336 | printk(KERN_DEBUG "Page orders: linear mapping = %d, " |
337 | "virtual = %d, io = %d\n", | ||
323 | mmu_psize_defs[mmu_linear_psize].shift, | 338 | mmu_psize_defs[mmu_linear_psize].shift, |
324 | mmu_psize_defs[mmu_virtual_psize].shift); | 339 | mmu_psize_defs[mmu_virtual_psize].shift, |
340 | mmu_psize_defs[mmu_io_psize].shift); | ||
325 | 341 | ||
326 | #ifdef CONFIG_HUGETLB_PAGE | 342 | #ifdef CONFIG_HUGETLB_PAGE |
327 | /* Init large page size. Currently, we pick 16M or 1M depending | 343 | /* Init large page size. Currently, we pick 16M or 1M depending |
@@ -556,6 +572,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
556 | pte_t *ptep; | 572 | pte_t *ptep; |
557 | cpumask_t tmp; | 573 | cpumask_t tmp; |
558 | int rc, user_region = 0, local = 0; | 574 | int rc, user_region = 0, local = 0; |
575 | int psize; | ||
559 | 576 | ||
560 | DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", | 577 | DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", |
561 | ea, access, trap); | 578 | ea, access, trap); |
@@ -575,10 +592,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
575 | return 1; | 592 | return 1; |
576 | } | 593 | } |
577 | vsid = get_vsid(mm->context.id, ea); | 594 | vsid = get_vsid(mm->context.id, ea); |
595 | psize = mm->context.user_psize; | ||
578 | break; | 596 | break; |
579 | case VMALLOC_REGION_ID: | 597 | case VMALLOC_REGION_ID: |
580 | mm = &init_mm; | 598 | mm = &init_mm; |
581 | vsid = get_kernel_vsid(ea); | 599 | vsid = get_kernel_vsid(ea); |
600 | if (ea < VMALLOC_END) | ||
601 | psize = mmu_vmalloc_psize; | ||
602 | else | ||
603 | psize = mmu_io_psize; | ||
582 | break; | 604 | break; |
583 | default: | 605 | default: |
584 | /* Not a valid range | 606 | /* Not a valid range |
@@ -629,7 +651,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
629 | #ifndef CONFIG_PPC_64K_PAGES | 651 | #ifndef CONFIG_PPC_64K_PAGES |
630 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); | 652 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); |
631 | #else | 653 | #else |
632 | if (mmu_virtual_psize == MMU_PAGE_64K) | 654 | if (mmu_ci_restrictions) { |
655 | /* If this PTE is non-cacheable, switch to 4k */ | ||
656 | if (psize == MMU_PAGE_64K && | ||
657 | (pte_val(*ptep) & _PAGE_NO_CACHE)) { | ||
658 | if (user_region) { | ||
659 | psize = MMU_PAGE_4K; | ||
660 | mm->context.user_psize = MMU_PAGE_4K; | ||
661 | mm->context.sllp = SLB_VSID_USER | | ||
662 | mmu_psize_defs[MMU_PAGE_4K].sllp; | ||
663 | } else if (ea < VMALLOC_END) { | ||
664 | /* | ||
665 | * some driver did a non-cacheable mapping | ||
666 | * in vmalloc space, so switch vmalloc | ||
667 | * to 4k pages | ||
668 | */ | ||
669 | printk(KERN_ALERT "Reducing vmalloc segment " | ||
670 | "to 4kB pages because of " | ||
671 | "non-cacheable mapping\n"); | ||
672 | psize = mmu_vmalloc_psize = MMU_PAGE_4K; | ||
673 | } | ||
674 | } | ||
675 | if (user_region) { | ||
676 | if (psize != get_paca()->context.user_psize) { | ||
677 | get_paca()->context = mm->context; | ||
678 | slb_flush_and_rebolt(); | ||
679 | } | ||
680 | } else if (get_paca()->vmalloc_sllp != | ||
681 | mmu_psize_defs[mmu_vmalloc_psize].sllp) { | ||
682 | get_paca()->vmalloc_sllp = | ||
683 | mmu_psize_defs[mmu_vmalloc_psize].sllp; | ||
684 | slb_flush_and_rebolt(); | ||
685 | } | ||
686 | } | ||
687 | if (psize == MMU_PAGE_64K) | ||
633 | rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); | 688 | rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); |
634 | else | 689 | else |
635 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); | 690 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); |
@@ -681,7 +736,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
681 | #ifndef CONFIG_PPC_64K_PAGES | 736 | #ifndef CONFIG_PPC_64K_PAGES |
682 | __hash_page_4K(ea, access, vsid, ptep, trap, local); | 737 | __hash_page_4K(ea, access, vsid, ptep, trap, local); |
683 | #else | 738 | #else |
684 | if (mmu_virtual_psize == MMU_PAGE_64K) | 739 | if (mmu_ci_restrictions) { |
740 | /* If this PTE is non-cacheable, switch to 4k */ | ||
741 | if (mm->context.user_psize == MMU_PAGE_64K && | ||
742 | (pte_val(*ptep) & _PAGE_NO_CACHE)) { | ||
743 | mm->context.user_psize = MMU_PAGE_4K; | ||
744 | mm->context.sllp = SLB_VSID_USER | | ||
745 | mmu_psize_defs[MMU_PAGE_4K].sllp; | ||
746 | get_paca()->context = mm->context; | ||
747 | slb_flush_and_rebolt(); | ||
748 | } | ||
749 | } | ||
750 | if (mm->context.user_psize == MMU_PAGE_64K) | ||
685 | __hash_page_64K(ea, access, vsid, ptep, trap, local); | 751 | __hash_page_64K(ea, access, vsid, ptep, trap, local); |
686 | else | 752 | else |
687 | __hash_page_4K(ea, access, vsid, ptep, trap, local); | 753 | __hash_page_4K(ea, access, vsid, ptep, trap, local); |