diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:56:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:56:01 -0400 |
commit | aabded9c3aab5160ae2ca3dd1fa0fa37f3d510e4 (patch) | |
tree | 8544d546735bcb975b8dec296eb9b6dc6531fb2a /arch/powerpc | |
parent | 9a9136e270af14da506f66bcafcc506b86a86498 (diff) | |
parent | f1a1eb299a8422c3e8d41753095bec44b2493398 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
[POWERPC] Further fixes for the removal of 4level-fixup hack from ppc32
[POWERPC] EEH: log all PCI-X and PCI-E AER registers
[POWERPC] EEH: capture and log pci state on error
[POWERPC] EEH: Split up long error msg
[POWERPC] EEH: log error only after driver notification.
[POWERPC] fsl_soc: Make mac_addr const in fs_enet_of_init().
[POWERPC] Don't use SLAB/SLUB for PTE pages
[POWERPC] Spufs support for 64K LS mappings on 4K kernels
[POWERPC] Add ability to 4K kernel to hash in 64K pages
[POWERPC] Introduce address space "slices"
[POWERPC] Small fixes & cleanups in segment page size demotion
[POWERPC] iSeries: Make HVC_ISERIES the default
[POWERPC] iSeries: suppress build warning in lparmap.c
[POWERPC] Mark pages that don't exist as nosave
[POWERPC] swsusp: Introduce register_nosave_region_late
Diffstat (limited to 'arch/powerpc')
28 files changed, 1219 insertions, 714 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 808d2ef80e2f..ccc5410af996 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -120,19 +120,6 @@ config GENERIC_BUG | |||
120 | config SYS_SUPPORTS_APM_EMULATION | 120 | config SYS_SUPPORTS_APM_EMULATION |
121 | bool | 121 | bool |
122 | 122 | ||
123 | # | ||
124 | # Powerpc uses the slab allocator to manage its ptes and the | ||
125 | # page structs of ptes are used for splitting the page table | ||
126 | # lock for configurations supporting more than SPLIT_PTLOCK_CPUS. | ||
127 | # | ||
128 | # In that special configuration the page structs of slabs are modified. | ||
129 | # This setting disables the selection of SLUB as a slab allocator. | ||
130 | # | ||
131 | config ARCH_USES_SLAB_PAGE_STRUCT | ||
132 | bool | ||
133 | default y | ||
134 | depends on SPLIT_PTLOCK_CPUS <= NR_CPUS | ||
135 | |||
136 | config DEFAULT_UIMAGE | 123 | config DEFAULT_UIMAGE |
137 | bool | 124 | bool |
138 | help | 125 | help |
@@ -352,6 +339,11 @@ config PPC_STD_MMU_32 | |||
352 | def_bool y | 339 | def_bool y |
353 | depends on PPC_STD_MMU && PPC32 | 340 | depends on PPC_STD_MMU && PPC32 |
354 | 341 | ||
342 | config PPC_MM_SLICES | ||
343 | bool | ||
344 | default y if HUGETLB_PAGE | ||
345 | default n | ||
346 | |||
355 | config VIRT_CPU_ACCOUNTING | 347 | config VIRT_CPU_ACCOUNTING |
356 | bool "Deterministic task and CPU time accounting" | 348 | bool "Deterministic task and CPU time accounting" |
357 | depends on PPC64 | 349 | depends on PPC64 |
@@ -541,9 +533,15 @@ config NODES_SPAN_OTHER_NODES | |||
541 | def_bool y | 533 | def_bool y |
542 | depends on NEED_MULTIPLE_NODES | 534 | depends on NEED_MULTIPLE_NODES |
543 | 535 | ||
536 | config PPC_HAS_HASH_64K | ||
537 | bool | ||
538 | depends on PPC64 | ||
539 | default n | ||
540 | |||
544 | config PPC_64K_PAGES | 541 | config PPC_64K_PAGES |
545 | bool "64k page size" | 542 | bool "64k page size" |
546 | depends on PPC64 | 543 | depends on PPC64 |
544 | select PPC_HAS_HASH_64K | ||
547 | help | 545 | help |
548 | This option changes the kernel logical page size to 64k. On machines | 546 | This option changes the kernel logical page size to 64k. On machines |
549 | without processor support for 64k pages, the kernel will simulate | 547 | without processor support for 64k pages, the kernel will simulate |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 37bc35e69dbe..2cb1d9487796 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -122,12 +122,18 @@ int main(void) | |||
122 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); | 122 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); |
123 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); | 123 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); |
124 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 124 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
125 | DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); | ||
126 | DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); | 125 | DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); |
127 | #ifdef CONFIG_HUGETLB_PAGE | 126 | #ifdef CONFIG_PPC_MM_SLICES |
128 | DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); | 127 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, |
129 | DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); | 128 | context.low_slices_psize)); |
130 | #endif /* CONFIG_HUGETLB_PAGE */ | 129 | DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, |
130 | context.high_slices_psize)); | ||
131 | DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); | ||
132 | DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); | ||
133 | #else | ||
134 | DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); | ||
135 | |||
136 | #endif /* CONFIG_PPC_MM_SLICES */ | ||
131 | DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); | 137 | DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); |
132 | DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); | 138 | DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); |
133 | DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); | 139 | DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); |
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c index 584d1e3c013d..af11285ffbd1 100644 --- a/arch/powerpc/kernel/lparmap.c +++ b/arch/powerpc/kernel/lparmap.c | |||
@@ -10,7 +10,8 @@ | |||
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/iseries/lpar_map.h> | 11 | #include <asm/iseries/lpar_map.h> |
12 | 12 | ||
13 | const struct LparMap __attribute__((__section__(".text"))) xLparMap = { | 13 | /* The # is to stop gcc trying to make .text nonexecutable */ |
14 | const struct LparMap __attribute__((__section__(".text #"))) xLparMap = { | ||
14 | .xNumberEsids = HvEsidsToMap, | 15 | .xNumberEsids = HvEsidsToMap, |
15 | .xNumberRanges = HvRangesToMap, | 16 | .xNumberRanges = HvRangesToMap, |
16 | .xSegmentTableOffs = STAB0_PAGE, | 17 | .xSegmentTableOffs = STAB0_PAGE, |
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 38a81967ca07..4f839c6a9768 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o | |||
18 | obj-$(CONFIG_44x) += 44x_mmu.o | 18 | obj-$(CONFIG_44x) += 44x_mmu.o |
19 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o | 19 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o |
20 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o | 20 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o |
21 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o | ||
21 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 22 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e64ce3eec36e..4762ff7c14df 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S | |||
@@ -615,6 +615,9 @@ htab_pte_insert_failure: | |||
615 | li r3,-1 | 615 | li r3,-1 |
616 | b htab_bail | 616 | b htab_bail |
617 | 617 | ||
618 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
619 | |||
620 | #ifdef CONFIG_PPC_HAS_HASH_64K | ||
618 | 621 | ||
619 | /***************************************************************************** | 622 | /***************************************************************************** |
620 | * * | 623 | * * |
@@ -870,7 +873,7 @@ ht64_pte_insert_failure: | |||
870 | b ht64_bail | 873 | b ht64_bail |
871 | 874 | ||
872 | 875 | ||
873 | #endif /* CONFIG_PPC_64K_PAGES */ | 876 | #endif /* CONFIG_PPC_HAS_HASH_64K */ |
874 | 877 | ||
875 | 878 | ||
876 | /***************************************************************************** | 879 | /***************************************************************************** |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 9b226fa7006f..028ba4ed03d2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/cputable.h> | 51 | #include <asm/cputable.h> |
52 | #include <asm/abs_addr.h> | 52 | #include <asm/abs_addr.h> |
53 | #include <asm/sections.h> | 53 | #include <asm/sections.h> |
54 | #include <asm/spu.h> | ||
54 | 55 | ||
55 | #ifdef DEBUG | 56 | #ifdef DEBUG |
56 | #define DBG(fmt...) udbg_printf(fmt) | 57 | #define DBG(fmt...) udbg_printf(fmt) |
@@ -419,7 +420,7 @@ static void __init htab_finish_init(void) | |||
419 | extern unsigned int *htab_call_hpte_remove; | 420 | extern unsigned int *htab_call_hpte_remove; |
420 | extern unsigned int *htab_call_hpte_updatepp; | 421 | extern unsigned int *htab_call_hpte_updatepp; |
421 | 422 | ||
422 | #ifdef CONFIG_PPC_64K_PAGES | 423 | #ifdef CONFIG_PPC_HAS_HASH_64K |
423 | extern unsigned int *ht64_call_hpte_insert1; | 424 | extern unsigned int *ht64_call_hpte_insert1; |
424 | extern unsigned int *ht64_call_hpte_insert2; | 425 | extern unsigned int *ht64_call_hpte_insert2; |
425 | extern unsigned int *ht64_call_hpte_remove; | 426 | extern unsigned int *ht64_call_hpte_remove; |
@@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) | |||
596 | * Demote a segment to using 4k pages. | 597 | * Demote a segment to using 4k pages. |
597 | * For now this makes the whole process use 4k pages. | 598 | * For now this makes the whole process use 4k pages. |
598 | */ | 599 | */ |
599 | void demote_segment_4k(struct mm_struct *mm, unsigned long addr) | ||
600 | { | ||
601 | #ifdef CONFIG_PPC_64K_PAGES | 600 | #ifdef CONFIG_PPC_64K_PAGES |
601 | static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) | ||
602 | { | ||
602 | if (mm->context.user_psize == MMU_PAGE_4K) | 603 | if (mm->context.user_psize == MMU_PAGE_4K) |
603 | return; | 604 | return; |
605 | #ifdef CONFIG_PPC_MM_SLICES | ||
606 | slice_set_user_psize(mm, MMU_PAGE_4K); | ||
607 | #else /* CONFIG_PPC_MM_SLICES */ | ||
604 | mm->context.user_psize = MMU_PAGE_4K; | 608 | mm->context.user_psize = MMU_PAGE_4K; |
605 | mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; | 609 | mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; |
606 | get_paca()->context = mm->context; | 610 | #endif /* CONFIG_PPC_MM_SLICES */ |
607 | slb_flush_and_rebolt(); | 611 | |
608 | #ifdef CONFIG_SPE_BASE | 612 | #ifdef CONFIG_SPE_BASE |
609 | spu_flush_all_slbs(mm); | 613 | spu_flush_all_slbs(mm); |
610 | #endif | 614 | #endif |
611 | #endif | ||
612 | } | 615 | } |
613 | 616 | #endif /* CONFIG_PPC_64K_PAGES */ | |
614 | EXPORT_SYMBOL_GPL(demote_segment_4k); | ||
615 | 617 | ||
616 | /* Result code is: | 618 | /* Result code is: |
617 | * 0 - handled | 619 | * 0 - handled |
@@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
646 | return 1; | 648 | return 1; |
647 | } | 649 | } |
648 | vsid = get_vsid(mm->context.id, ea); | 650 | vsid = get_vsid(mm->context.id, ea); |
651 | #ifdef CONFIG_PPC_MM_SLICES | ||
652 | psize = get_slice_psize(mm, ea); | ||
653 | #else | ||
649 | psize = mm->context.user_psize; | 654 | psize = mm->context.user_psize; |
655 | #endif | ||
650 | break; | 656 | break; |
651 | case VMALLOC_REGION_ID: | 657 | case VMALLOC_REGION_ID: |
652 | mm = &init_mm; | 658 | mm = &init_mm; |
@@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
674 | if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) | 680 | if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) |
675 | local = 1; | 681 | local = 1; |
676 | 682 | ||
683 | #ifdef CONFIG_HUGETLB_PAGE | ||
677 | /* Handle hugepage regions */ | 684 | /* Handle hugepage regions */ |
678 | if (unlikely(in_hugepage_area(mm->context, ea))) { | 685 | if (HPAGE_SHIFT && psize == mmu_huge_psize) { |
679 | DBG_LOW(" -> huge page !\n"); | 686 | DBG_LOW(" -> huge page !\n"); |
680 | return hash_huge_page(mm, access, ea, vsid, local, trap); | 687 | return hash_huge_page(mm, access, ea, vsid, local, trap); |
681 | } | 688 | } |
689 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
690 | |||
691 | #ifndef CONFIG_PPC_64K_PAGES | ||
692 | /* If we use 4K pages and our psize is not 4K, then we are hitting | ||
693 | * a special driver mapping, we need to align the address before | ||
694 | * we fetch the PTE | ||
695 | */ | ||
696 | if (psize != MMU_PAGE_4K) | ||
697 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); | ||
698 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
682 | 699 | ||
683 | /* Get PTE and page size from page tables */ | 700 | /* Get PTE and page size from page tables */ |
684 | ptep = find_linux_pte(pgdir, ea); | 701 | ptep = find_linux_pte(pgdir, ea); |
@@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
702 | } | 719 | } |
703 | 720 | ||
704 | /* Do actual hashing */ | 721 | /* Do actual hashing */ |
705 | #ifndef CONFIG_PPC_64K_PAGES | 722 | #ifdef CONFIG_PPC_64K_PAGES |
706 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); | ||
707 | #else | ||
708 | /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ | 723 | /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ |
709 | if (pte_val(*ptep) & _PAGE_4K_PFN) { | 724 | if (pte_val(*ptep) & _PAGE_4K_PFN) { |
710 | demote_segment_4k(mm, ea); | 725 | demote_segment_4k(mm, ea); |
711 | psize = MMU_PAGE_4K; | 726 | psize = MMU_PAGE_4K; |
712 | } | 727 | } |
713 | 728 | ||
714 | if (mmu_ci_restrictions) { | 729 | /* If this PTE is non-cacheable and we have restrictions on |
715 | /* If this PTE is non-cacheable, switch to 4k */ | 730 | * using non cacheable large pages, then we switch to 4k |
716 | if (psize == MMU_PAGE_64K && | 731 | */ |
717 | (pte_val(*ptep) & _PAGE_NO_CACHE)) { | 732 | if (mmu_ci_restrictions && psize == MMU_PAGE_64K && |
718 | if (user_region) { | 733 | (pte_val(*ptep) & _PAGE_NO_CACHE)) { |
719 | demote_segment_4k(mm, ea); | 734 | if (user_region) { |
720 | psize = MMU_PAGE_4K; | 735 | demote_segment_4k(mm, ea); |
721 | } else if (ea < VMALLOC_END) { | 736 | psize = MMU_PAGE_4K; |
722 | /* | 737 | } else if (ea < VMALLOC_END) { |
723 | * some driver did a non-cacheable mapping | 738 | /* |
724 | * in vmalloc space, so switch vmalloc | 739 | * some driver did a non-cacheable mapping |
725 | * to 4k pages | 740 | * in vmalloc space, so switch vmalloc |
726 | */ | 741 | * to 4k pages |
727 | printk(KERN_ALERT "Reducing vmalloc segment " | 742 | */ |
728 | "to 4kB pages because of " | 743 | printk(KERN_ALERT "Reducing vmalloc segment " |
729 | "non-cacheable mapping\n"); | 744 | "to 4kB pages because of " |
730 | psize = mmu_vmalloc_psize = MMU_PAGE_4K; | 745 | "non-cacheable mapping\n"); |
731 | } | 746 | psize = mmu_vmalloc_psize = MMU_PAGE_4K; |
732 | #ifdef CONFIG_SPE_BASE | 747 | #ifdef CONFIG_SPE_BASE |
733 | spu_flush_all_slbs(mm); | 748 | spu_flush_all_slbs(mm); |
734 | #endif | 749 | #endif |
735 | } | 750 | } |
736 | if (user_region) { | 751 | } |
737 | if (psize != get_paca()->context.user_psize) { | 752 | if (user_region) { |
738 | get_paca()->context = mm->context; | 753 | if (psize != get_paca()->context.user_psize) { |
739 | slb_flush_and_rebolt(); | 754 | get_paca()->context.user_psize = |
740 | } | 755 | mm->context.user_psize; |
741 | } else if (get_paca()->vmalloc_sllp != | ||
742 | mmu_psize_defs[mmu_vmalloc_psize].sllp) { | ||
743 | get_paca()->vmalloc_sllp = | ||
744 | mmu_psize_defs[mmu_vmalloc_psize].sllp; | ||
745 | slb_flush_and_rebolt(); | 756 | slb_flush_and_rebolt(); |
746 | } | 757 | } |
758 | } else if (get_paca()->vmalloc_sllp != | ||
759 | mmu_psize_defs[mmu_vmalloc_psize].sllp) { | ||
760 | get_paca()->vmalloc_sllp = | ||
761 | mmu_psize_defs[mmu_vmalloc_psize].sllp; | ||
762 | slb_flush_and_rebolt(); | ||
747 | } | 763 | } |
764 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
765 | |||
766 | #ifdef CONFIG_PPC_HAS_HASH_64K | ||
748 | if (psize == MMU_PAGE_64K) | 767 | if (psize == MMU_PAGE_64K) |
749 | rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); | 768 | rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); |
750 | else | 769 | else |
770 | #endif /* CONFIG_PPC_HAS_HASH_64K */ | ||
751 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); | 771 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); |
752 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
753 | 772 | ||
754 | #ifndef CONFIG_PPC_64K_PAGES | 773 | #ifndef CONFIG_PPC_64K_PAGES |
755 | DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); | 774 | DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); |
@@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
772 | unsigned long flags; | 791 | unsigned long flags; |
773 | int local = 0; | 792 | int local = 0; |
774 | 793 | ||
775 | /* We don't want huge pages prefaulted for now | 794 | BUG_ON(REGION_ID(ea) != USER_REGION_ID); |
776 | */ | 795 | |
777 | if (unlikely(in_hugepage_area(mm->context, ea))) | 796 | #ifdef CONFIG_PPC_MM_SLICES |
797 | /* We only prefault standard pages for now */ | ||
798 | if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); | ||
778 | return; | 799 | return; |
800 | #endif | ||
779 | 801 | ||
780 | DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," | 802 | DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," |
781 | " trap=%lx\n", mm, mm->pgd, ea, access, trap); | 803 | " trap=%lx\n", mm, mm->pgd, ea, access, trap); |
782 | 804 | ||
783 | /* Get PTE, VSID, access mask */ | 805 | /* Get Linux PTE if available */ |
784 | pgdir = mm->pgd; | 806 | pgdir = mm->pgd; |
785 | if (pgdir == NULL) | 807 | if (pgdir == NULL) |
786 | return; | 808 | return; |
787 | ptep = find_linux_pte(pgdir, ea); | 809 | ptep = find_linux_pte(pgdir, ea); |
788 | if (!ptep) | 810 | if (!ptep) |
789 | return; | 811 | return; |
812 | |||
813 | #ifdef CONFIG_PPC_64K_PAGES | ||
814 | /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on | ||
815 | * a 64K kernel), then we don't preload, hash_page() will take | ||
816 | * care of it once we actually try to access the page. | ||
817 | * That way we don't have to duplicate all of the logic for segment | ||
818 | * page size demotion here | ||
819 | */ | ||
820 | if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) | ||
821 | return; | ||
822 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
823 | |||
824 | /* Get VSID */ | ||
790 | vsid = get_vsid(mm->context.id, ea); | 825 | vsid = get_vsid(mm->context.id, ea); |
791 | 826 | ||
792 | /* Hash it in */ | 827 | /* Hash doesn't like irqs */ |
793 | local_irq_save(flags); | 828 | local_irq_save(flags); |
829 | |||
830 | /* Is that local to this CPU ? */ | ||
794 | mask = cpumask_of_cpu(smp_processor_id()); | 831 | mask = cpumask_of_cpu(smp_processor_id()); |
795 | if (cpus_equal(mm->cpu_vm_mask, mask)) | 832 | if (cpus_equal(mm->cpu_vm_mask, mask)) |
796 | local = 1; | 833 | local = 1; |
797 | #ifndef CONFIG_PPC_64K_PAGES | 834 | |
798 | __hash_page_4K(ea, access, vsid, ptep, trap, local); | 835 | /* Hash it in */ |
799 | #else | 836 | #ifdef CONFIG_PPC_HAS_HASH_64K |
800 | if (mmu_ci_restrictions) { | ||
801 | /* If this PTE is non-cacheable, switch to 4k */ | ||
802 | if (mm->context.user_psize == MMU_PAGE_64K && | ||
803 | (pte_val(*ptep) & _PAGE_NO_CACHE)) | ||
804 | demote_segment_4k(mm, ea); | ||
805 | } | ||
806 | if (mm->context.user_psize == MMU_PAGE_64K) | 837 | if (mm->context.user_psize == MMU_PAGE_64K) |
807 | __hash_page_64K(ea, access, vsid, ptep, trap, local); | 838 | __hash_page_64K(ea, access, vsid, ptep, trap, local); |
808 | else | 839 | else |
809 | __hash_page_4K(ea, access, vsid, ptep, trap, local); | ||
810 | #endif /* CONFIG_PPC_64K_PAGES */ | 840 | #endif /* CONFIG_PPC_64K_PAGES */ |
841 | __hash_page_4K(ea, access, vsid, ptep, trap, local); | ||
842 | |||
811 | local_irq_restore(flags); | 843 | local_irq_restore(flags); |
812 | } | 844 | } |
813 | 845 | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb959264c104..92a1b16fb7e3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
91 | pgd_t *pg; | 91 | pgd_t *pg; |
92 | pud_t *pu; | 92 | pud_t *pu; |
93 | 93 | ||
94 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 94 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); |
95 | 95 | ||
96 | addr &= HPAGE_MASK; | 96 | addr &= HPAGE_MASK; |
97 | 97 | ||
@@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
119 | pud_t *pu; | 119 | pud_t *pu; |
120 | hugepd_t *hpdp = NULL; | 120 | hugepd_t *hpdp = NULL; |
121 | 121 | ||
122 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 122 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); |
123 | 123 | ||
124 | addr &= HPAGE_MASK; | 124 | addr &= HPAGE_MASK; |
125 | 125 | ||
@@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |||
302 | start = addr; | 302 | start = addr; |
303 | pgd = pgd_offset((*tlb)->mm, addr); | 303 | pgd = pgd_offset((*tlb)->mm, addr); |
304 | do { | 304 | do { |
305 | BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); | 305 | BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); |
306 | next = pgd_addr_end(addr, end); | 306 | next = pgd_addr_end(addr, end); |
307 | if (pgd_none_or_clear_bad(pgd)) | 307 | if (pgd_none_or_clear_bad(pgd)) |
308 | continue; | 308 | continue; |
@@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | |||
331 | return __pte(old); | 331 | return __pte(old); |
332 | } | 332 | } |
333 | 333 | ||
334 | struct slb_flush_info { | ||
335 | struct mm_struct *mm; | ||
336 | u16 newareas; | ||
337 | }; | ||
338 | |||
339 | static void flush_low_segments(void *parm) | ||
340 | { | ||
341 | struct slb_flush_info *fi = parm; | ||
342 | unsigned long i; | ||
343 | |||
344 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); | ||
345 | |||
346 | if (current->active_mm != fi->mm) | ||
347 | return; | ||
348 | |||
349 | /* Only need to do anything if this CPU is working in the same | ||
350 | * mm as the one which has changed */ | ||
351 | |||
352 | /* update the paca copy of the context struct */ | ||
353 | get_paca()->context = current->active_mm->context; | ||
354 | |||
355 | asm volatile("isync" : : : "memory"); | ||
356 | for (i = 0; i < NUM_LOW_AREAS; i++) { | ||
357 | if (! (fi->newareas & (1U << i))) | ||
358 | continue; | ||
359 | asm volatile("slbie %0" | ||
360 | : : "r" ((i << SID_SHIFT) | SLBIE_C)); | ||
361 | } | ||
362 | asm volatile("isync" : : : "memory"); | ||
363 | } | ||
364 | |||
365 | static void flush_high_segments(void *parm) | ||
366 | { | ||
367 | struct slb_flush_info *fi = parm; | ||
368 | unsigned long i, j; | ||
369 | |||
370 | |||
371 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); | ||
372 | |||
373 | if (current->active_mm != fi->mm) | ||
374 | return; | ||
375 | |||
376 | /* Only need to do anything if this CPU is working in the same | ||
377 | * mm as the one which has changed */ | ||
378 | |||
379 | /* update the paca copy of the context struct */ | ||
380 | get_paca()->context = current->active_mm->context; | ||
381 | |||
382 | asm volatile("isync" : : : "memory"); | ||
383 | for (i = 0; i < NUM_HIGH_AREAS; i++) { | ||
384 | if (! (fi->newareas & (1U << i))) | ||
385 | continue; | ||
386 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | ||
387 | asm volatile("slbie %0" | ||
388 | :: "r" (((i << HTLB_AREA_SHIFT) | ||
389 | + (j << SID_SHIFT)) | SLBIE_C)); | ||
390 | } | ||
391 | asm volatile("isync" : : : "memory"); | ||
392 | } | ||
393 | |||
394 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
395 | { | ||
396 | unsigned long start = area << SID_SHIFT; | ||
397 | unsigned long end = (area+1) << SID_SHIFT; | ||
398 | struct vm_area_struct *vma; | ||
399 | |||
400 | BUG_ON(area >= NUM_LOW_AREAS); | ||
401 | |||
402 | /* Check no VMAs are in the region */ | ||
403 | vma = find_vma(mm, start); | ||
404 | if (vma && (vma->vm_start < end)) | ||
405 | return -EBUSY; | ||
406 | |||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
411 | { | ||
412 | unsigned long start = area << HTLB_AREA_SHIFT; | ||
413 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | ||
414 | struct vm_area_struct *vma; | ||
415 | |||
416 | BUG_ON(area >= NUM_HIGH_AREAS); | ||
417 | |||
418 | /* Hack, so that each addresses is controlled by exactly one | ||
419 | * of the high or low area bitmaps, the first high area starts | ||
420 | * at 4GB, not 0 */ | ||
421 | if (start == 0) | ||
422 | start = 0x100000000UL; | ||
423 | |||
424 | /* Check no VMAs are in the region */ | ||
425 | vma = find_vma(mm, start); | ||
426 | if (vma && (vma->vm_start < end)) | ||
427 | return -EBUSY; | ||
428 | |||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
433 | { | ||
434 | unsigned long i; | ||
435 | struct slb_flush_info fi; | ||
436 | |||
437 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); | ||
438 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); | ||
439 | |||
440 | newareas &= ~(mm->context.low_htlb_areas); | ||
441 | if (! newareas) | ||
442 | return 0; /* The segments we want are already open */ | ||
443 | |||
444 | for (i = 0; i < NUM_LOW_AREAS; i++) | ||
445 | if ((1 << i) & newareas) | ||
446 | if (prepare_low_area_for_htlb(mm, i) != 0) | ||
447 | return -EBUSY; | ||
448 | |||
449 | mm->context.low_htlb_areas |= newareas; | ||
450 | |||
451 | /* the context change must make it to memory before the flush, | ||
452 | * so that further SLB misses do the right thing. */ | ||
453 | mb(); | ||
454 | |||
455 | fi.mm = mm; | ||
456 | fi.newareas = newareas; | ||
457 | on_each_cpu(flush_low_segments, &fi, 0, 1); | ||
458 | |||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
463 | { | ||
464 | struct slb_flush_info fi; | ||
465 | unsigned long i; | ||
466 | |||
467 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | ||
468 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | ||
469 | != NUM_HIGH_AREAS); | ||
470 | |||
471 | newareas &= ~(mm->context.high_htlb_areas); | ||
472 | if (! newareas) | ||
473 | return 0; /* The areas we want are already open */ | ||
474 | |||
475 | for (i = 0; i < NUM_HIGH_AREAS; i++) | ||
476 | if ((1 << i) & newareas) | ||
477 | if (prepare_high_area_for_htlb(mm, i) != 0) | ||
478 | return -EBUSY; | ||
479 | |||
480 | mm->context.high_htlb_areas |= newareas; | ||
481 | |||
482 | /* the context change must make it to memory before the flush, | ||
483 | * so that further SLB misses do the right thing. */ | ||
484 | mb(); | ||
485 | |||
486 | fi.mm = mm; | ||
487 | fi.newareas = newareas; | ||
488 | on_each_cpu(flush_high_segments, &fi, 0, 1); | ||
489 | |||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) | ||
494 | { | ||
495 | int err = 0; | ||
496 | |||
497 | if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) | ||
498 | return -EINVAL; | ||
499 | if (len & ~HPAGE_MASK) | ||
500 | return -EINVAL; | ||
501 | if (addr & ~HPAGE_MASK) | ||
502 | return -EINVAL; | ||
503 | |||
504 | if (addr < 0x100000000UL) | ||
505 | err = open_low_hpage_areas(current->mm, | ||
506 | LOW_ESID_MASK(addr, len)); | ||
507 | if ((addr + len) > 0x100000000UL) | ||
508 | err = open_high_hpage_areas(current->mm, | ||
509 | HTLB_AREA_MASK(addr, len)); | ||
510 | #ifdef CONFIG_SPE_BASE | ||
511 | spu_flush_all_slbs(current->mm); | ||
512 | #endif | ||
513 | if (err) { | ||
514 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | ||
515 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | ||
516 | addr, len, | ||
517 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | ||
518 | return err; | ||
519 | } | ||
520 | |||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | struct page * | 334 | struct page * |
525 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 335 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
526 | { | 336 | { |
527 | pte_t *ptep; | 337 | pte_t *ptep; |
528 | struct page *page; | 338 | struct page *page; |
529 | 339 | ||
530 | if (! in_hugepage_area(mm->context, address)) | 340 | if (get_slice_psize(mm, address) != mmu_huge_psize) |
531 | return ERR_PTR(-EINVAL); | 341 | return ERR_PTR(-EINVAL); |
532 | 342 | ||
533 | ptep = huge_pte_offset(mm, address); | 343 | ptep = huge_pte_offset(mm, address); |
@@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
551 | return NULL; | 361 | return NULL; |
552 | } | 362 | } |
553 | 363 | ||
554 | /* Because we have an exclusive hugepage region which lies within the | ||
555 | * normal user address space, we have to take special measures to make | ||
556 | * non-huge mmap()s evade the hugepage reserved regions. */ | ||
557 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | ||
558 | unsigned long len, unsigned long pgoff, | ||
559 | unsigned long flags) | ||
560 | { | ||
561 | struct mm_struct *mm = current->mm; | ||
562 | struct vm_area_struct *vma; | ||
563 | unsigned long start_addr; | ||
564 | |||
565 | if (len > TASK_SIZE) | ||
566 | return -ENOMEM; | ||
567 | |||
568 | /* handle fixed mapping: prevent overlap with huge pages */ | ||
569 | if (flags & MAP_FIXED) { | ||
570 | if (is_hugepage_only_range(mm, addr, len)) | ||
571 | return -EINVAL; | ||
572 | return addr; | ||
573 | } | ||
574 | |||
575 | if (addr) { | ||
576 | addr = PAGE_ALIGN(addr); | ||
577 | vma = find_vma(mm, addr); | ||
578 | if (((TASK_SIZE - len) >= addr) | ||
579 | && (!vma || (addr+len) <= vma->vm_start) | ||
580 | && !is_hugepage_only_range(mm, addr,len)) | ||
581 | return addr; | ||
582 | } | ||
583 | if (len > mm->cached_hole_size) { | ||
584 | start_addr = addr = mm->free_area_cache; | ||
585 | } else { | ||
586 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
587 | mm->cached_hole_size = 0; | ||
588 | } | ||
589 | |||
590 | full_search: | ||
591 | vma = find_vma(mm, addr); | ||
592 | while (TASK_SIZE - len >= addr) { | ||
593 | BUG_ON(vma && (addr >= vma->vm_end)); | ||
594 | |||
595 | if (touches_hugepage_low_range(mm, addr, len)) { | ||
596 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | ||
597 | vma = find_vma(mm, addr); | ||
598 | continue; | ||
599 | } | ||
600 | if (touches_hugepage_high_range(mm, addr, len)) { | ||
601 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
602 | vma = find_vma(mm, addr); | ||
603 | continue; | ||
604 | } | ||
605 | if (!vma || addr + len <= vma->vm_start) { | ||
606 | /* | ||
607 | * Remember the place where we stopped the search: | ||
608 | */ | ||
609 | mm->free_area_cache = addr + len; | ||
610 | return addr; | ||
611 | } | ||
612 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
613 | mm->cached_hole_size = vma->vm_start - addr; | ||
614 | addr = vma->vm_end; | ||
615 | vma = vma->vm_next; | ||
616 | } | ||
617 | |||
618 | /* Make sure we didn't miss any holes */ | ||
619 | if (start_addr != TASK_UNMAPPED_BASE) { | ||
620 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
621 | mm->cached_hole_size = 0; | ||
622 | goto full_search; | ||
623 | } | ||
624 | return -ENOMEM; | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * This mmap-allocator allocates new areas top-down from below the | ||
629 | * stack's low limit (the base): | ||
630 | * | ||
631 | * Because we have an exclusive hugepage region which lies within the | ||
632 | * normal user address space, we have to take special measures to make | ||
633 | * non-huge mmap()s evade the hugepage reserved regions. | ||
634 | */ | ||
635 | unsigned long | ||
636 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | ||
637 | const unsigned long len, const unsigned long pgoff, | ||
638 | const unsigned long flags) | ||
639 | { | ||
640 | struct vm_area_struct *vma, *prev_vma; | ||
641 | struct mm_struct *mm = current->mm; | ||
642 | unsigned long base = mm->mmap_base, addr = addr0; | ||
643 | unsigned long largest_hole = mm->cached_hole_size; | ||
644 | int first_time = 1; | ||
645 | |||
646 | /* requested length too big for entire address space */ | ||
647 | if (len > TASK_SIZE) | ||
648 | return -ENOMEM; | ||
649 | |||
650 | /* handle fixed mapping: prevent overlap with huge pages */ | ||
651 | if (flags & MAP_FIXED) { | ||
652 | if (is_hugepage_only_range(mm, addr, len)) | ||
653 | return -EINVAL; | ||
654 | return addr; | ||
655 | } | ||
656 | |||
657 | /* dont allow allocations above current base */ | ||
658 | if (mm->free_area_cache > base) | ||
659 | mm->free_area_cache = base; | ||
660 | |||
661 | /* requesting a specific address */ | ||
662 | if (addr) { | ||
663 | addr = PAGE_ALIGN(addr); | ||
664 | vma = find_vma(mm, addr); | ||
665 | if (TASK_SIZE - len >= addr && | ||
666 | (!vma || addr + len <= vma->vm_start) | ||
667 | && !is_hugepage_only_range(mm, addr,len)) | ||
668 | return addr; | ||
669 | } | ||
670 | |||
671 | if (len <= largest_hole) { | ||
672 | largest_hole = 0; | ||
673 | mm->free_area_cache = base; | ||
674 | } | ||
675 | try_again: | ||
676 | /* make sure it can fit in the remaining address space */ | ||
677 | if (mm->free_area_cache < len) | ||
678 | goto fail; | ||
679 | |||
680 | /* either no address requested or cant fit in requested address hole */ | ||
681 | addr = (mm->free_area_cache - len) & PAGE_MASK; | ||
682 | do { | ||
683 | hugepage_recheck: | ||
684 | if (touches_hugepage_low_range(mm, addr, len)) { | ||
685 | addr = (addr & ((~0) << SID_SHIFT)) - len; | ||
686 | goto hugepage_recheck; | ||
687 | } else if (touches_hugepage_high_range(mm, addr, len)) { | ||
688 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; | ||
689 | goto hugepage_recheck; | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * Lookup failure means no vma is above this address, | ||
694 | * i.e. return with success: | ||
695 | */ | ||
696 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | ||
697 | return addr; | ||
698 | |||
699 | /* | ||
700 | * new region fits between prev_vma->vm_end and | ||
701 | * vma->vm_start, use it: | ||
702 | */ | ||
703 | if (addr+len <= vma->vm_start && | ||
704 | (!prev_vma || (addr >= prev_vma->vm_end))) { | ||
705 | /* remember the address as a hint for next time */ | ||
706 | mm->cached_hole_size = largest_hole; | ||
707 | return (mm->free_area_cache = addr); | ||
708 | } else { | ||
709 | /* pull free_area_cache down to the first hole */ | ||
710 | if (mm->free_area_cache == vma->vm_end) { | ||
711 | mm->free_area_cache = vma->vm_start; | ||
712 | mm->cached_hole_size = largest_hole; | ||
713 | } | ||
714 | } | ||
715 | |||
716 | /* remember the largest hole we saw so far */ | ||
717 | if (addr + largest_hole < vma->vm_start) | ||
718 | largest_hole = vma->vm_start - addr; | ||
719 | |||
720 | /* try just below the current vma->vm_start */ | ||
721 | addr = vma->vm_start-len; | ||
722 | } while (len <= vma->vm_start); | ||
723 | |||
724 | fail: | ||
725 | /* | ||
726 | * if hint left us with no space for the requested | ||
727 | * mapping then try again: | ||
728 | */ | ||
729 | if (first_time) { | ||
730 | mm->free_area_cache = base; | ||
731 | largest_hole = 0; | ||
732 | first_time = 0; | ||
733 | goto try_again; | ||
734 | } | ||
735 | /* | ||
736 | * A failed mmap() very likely causes application failure, | ||
737 | * so fall back to the bottom-up function here. This scenario | ||
738 | * can happen with large stack limits and large mmap() | ||
739 | * allocations. | ||
740 | */ | ||
741 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
742 | mm->cached_hole_size = ~0UL; | ||
743 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | ||
744 | /* | ||
745 | * Restore the topdown base: | ||
746 | */ | ||
747 | mm->free_area_cache = base; | ||
748 | mm->cached_hole_size = ~0UL; | ||
749 | |||
750 | return addr; | ||
751 | } | ||
752 | |||
753 | static int htlb_check_hinted_area(unsigned long addr, unsigned long len) | ||
754 | { | ||
755 | struct vm_area_struct *vma; | ||
756 | |||
757 | vma = find_vma(current->mm, addr); | ||
758 | if (TASK_SIZE - len >= addr && | ||
759 | (!vma || ((addr + len) <= vma->vm_start))) | ||
760 | return 0; | ||
761 | |||
762 | return -ENOMEM; | ||
763 | } | ||
764 | |||
765 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | ||
766 | { | ||
767 | unsigned long addr = 0; | ||
768 | struct vm_area_struct *vma; | ||
769 | |||
770 | vma = find_vma(current->mm, addr); | ||
771 | while (addr + len <= 0x100000000UL) { | ||
772 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | ||
773 | |||
774 | if (! __within_hugepage_low_range(addr, len, segmask)) { | ||
775 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | ||
776 | vma = find_vma(current->mm, addr); | ||
777 | continue; | ||
778 | } | ||
779 | |||
780 | if (!vma || (addr + len) <= vma->vm_start) | ||
781 | return addr; | ||
782 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | ||
783 | /* Depending on segmask this might not be a confirmed | ||
784 | * hugepage region, so the ALIGN could have skipped | ||
785 | * some VMAs */ | ||
786 | vma = find_vma(current->mm, addr); | ||
787 | } | ||
788 | |||
789 | return -ENOMEM; | ||
790 | } | ||
791 | |||
792 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) | ||
793 | { | ||
794 | unsigned long addr = 0x100000000UL; | ||
795 | struct vm_area_struct *vma; | ||
796 | |||
797 | vma = find_vma(current->mm, addr); | ||
798 | while (addr + len <= TASK_SIZE_USER64) { | ||
799 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | ||
800 | |||
801 | if (! __within_hugepage_high_range(addr, len, areamask)) { | ||
802 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
803 | vma = find_vma(current->mm, addr); | ||
804 | continue; | ||
805 | } | ||
806 | |||
807 | if (!vma || (addr + len) <= vma->vm_start) | ||
808 | return addr; | ||
809 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | ||
810 | /* Depending on segmask this might not be a confirmed | ||
811 | * hugepage region, so the ALIGN could have skipped | ||
812 | * some VMAs */ | ||
813 | vma = find_vma(current->mm, addr); | ||
814 | } | ||
815 | |||
816 | return -ENOMEM; | ||
817 | } | ||
818 | 364 | ||
819 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 365 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
820 | unsigned long len, unsigned long pgoff, | 366 | unsigned long len, unsigned long pgoff, |
821 | unsigned long flags) | 367 | unsigned long flags) |
822 | { | 368 | { |
823 | int lastshift; | 369 | return slice_get_unmapped_area(addr, len, flags, |
824 | u16 areamask, curareas; | 370 | mmu_huge_psize, 1, 0); |
825 | |||
826 | if (HPAGE_SHIFT == 0) | ||
827 | return -EINVAL; | ||
828 | if (len & ~HPAGE_MASK) | ||
829 | return -EINVAL; | ||
830 | if (len > TASK_SIZE) | ||
831 | return -ENOMEM; | ||
832 | |||
833 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | ||
834 | return -EINVAL; | ||
835 | |||
836 | /* Paranoia, caller should have dealt with this */ | ||
837 | BUG_ON((addr + len) < addr); | ||
838 | |||
839 | /* Handle MAP_FIXED */ | ||
840 | if (flags & MAP_FIXED) { | ||
841 | if (prepare_hugepage_range(addr, len, pgoff)) | ||
842 | return -EINVAL; | ||
843 | return addr; | ||
844 | } | ||
845 | |||
846 | if (test_thread_flag(TIF_32BIT)) { | ||
847 | curareas = current->mm->context.low_htlb_areas; | ||
848 | |||
849 | /* First see if we can use the hint address */ | ||
850 | if (addr && (htlb_check_hinted_area(addr, len) == 0)) { | ||
851 | areamask = LOW_ESID_MASK(addr, len); | ||
852 | if (open_low_hpage_areas(current->mm, areamask) == 0) | ||
853 | return addr; | ||
854 | } | ||
855 | |||
856 | /* Next see if we can map in the existing low areas */ | ||
857 | addr = htlb_get_low_area(len, curareas); | ||
858 | if (addr != -ENOMEM) | ||
859 | return addr; | ||
860 | |||
861 | /* Finally go looking for areas to open */ | ||
862 | lastshift = 0; | ||
863 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); | ||
864 | ! lastshift; areamask >>=1) { | ||
865 | if (areamask & 1) | ||
866 | lastshift = 1; | ||
867 | |||
868 | addr = htlb_get_low_area(len, curareas | areamask); | ||
869 | if ((addr != -ENOMEM) | ||
870 | && open_low_hpage_areas(current->mm, areamask) == 0) | ||
871 | return addr; | ||
872 | } | ||
873 | } else { | ||
874 | curareas = current->mm->context.high_htlb_areas; | ||
875 | |||
876 | /* First see if we can use the hint address */ | ||
877 | /* We discourage 64-bit processes from doing hugepage | ||
878 | * mappings below 4GB (must use MAP_FIXED) */ | ||
879 | if ((addr >= 0x100000000UL) | ||
880 | && (htlb_check_hinted_area(addr, len) == 0)) { | ||
881 | areamask = HTLB_AREA_MASK(addr, len); | ||
882 | if (open_high_hpage_areas(current->mm, areamask) == 0) | ||
883 | return addr; | ||
884 | } | ||
885 | |||
886 | /* Next see if we can map in the existing high areas */ | ||
887 | addr = htlb_get_high_area(len, curareas); | ||
888 | if (addr != -ENOMEM) | ||
889 | return addr; | ||
890 | |||
891 | /* Finally go looking for areas to open */ | ||
892 | lastshift = 0; | ||
893 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | ||
894 | ! lastshift; areamask >>=1) { | ||
895 | if (areamask & 1) | ||
896 | lastshift = 1; | ||
897 | |||
898 | addr = htlb_get_high_area(len, curareas | areamask); | ||
899 | if ((addr != -ENOMEM) | ||
900 | && open_high_hpage_areas(current->mm, areamask) == 0) | ||
901 | return addr; | ||
902 | } | ||
903 | } | ||
904 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | ||
905 | " enough areas\n"); | ||
906 | return -ENOMEM; | ||
907 | } | 371 | } |
908 | 372 | ||
909 | /* | 373 | /* |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fe1fe852181a..7312a265545f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) | |||
146 | memset(addr, 0, kmem_cache_size(cache)); | 146 | memset(addr, 0, kmem_cache_size(cache)); |
147 | } | 147 | } |
148 | 148 | ||
149 | #ifdef CONFIG_PPC_64K_PAGES | ||
150 | static const unsigned int pgtable_cache_size[3] = { | ||
151 | PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE | ||
152 | }; | ||
153 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | ||
154 | "pte_pmd_cache", "pmd_cache", "pgd_cache", | ||
155 | }; | ||
156 | #else | ||
157 | static const unsigned int pgtable_cache_size[2] = { | 149 | static const unsigned int pgtable_cache_size[2] = { |
158 | PTE_TABLE_SIZE, PMD_TABLE_SIZE | 150 | PGD_TABLE_SIZE, PMD_TABLE_SIZE |
159 | }; | 151 | }; |
160 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | 152 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { |
161 | "pgd_pte_cache", "pud_pmd_cache", | 153 | #ifdef CONFIG_PPC_64K_PAGES |
162 | }; | 154 | "pgd_cache", "pmd_cache", |
155 | #else | ||
156 | "pgd_cache", "pud_pmd_cache", | ||
163 | #endif /* CONFIG_PPC_64K_PAGES */ | 157 | #endif /* CONFIG_PPC_64K_PAGES */ |
158 | }; | ||
164 | 159 | ||
165 | #ifdef CONFIG_HUGETLB_PAGE | 160 | #ifdef CONFIG_HUGETLB_PAGE |
166 | /* Hugepages need one extra cache, initialized in hugetlbpage.c. We | 161 | /* Hugepages need one extra cache, initialized in hugetlbpage.c. We |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1a6e08f3298f..246eeea40ece 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
32 | #include <linux/initrd.h> | 32 | #include <linux/initrd.h> |
33 | #include <linux/pagemap.h> | 33 | #include <linux/pagemap.h> |
34 | #include <linux/suspend.h> | ||
34 | 35 | ||
35 | #include <asm/pgalloc.h> | 36 | #include <asm/pgalloc.h> |
36 | #include <asm/prom.h> | 37 | #include <asm/prom.h> |
@@ -276,6 +277,28 @@ void __init do_init_bootmem(void) | |||
276 | init_bootmem_done = 1; | 277 | init_bootmem_done = 1; |
277 | } | 278 | } |
278 | 279 | ||
280 | /* mark pages that don't exist as nosave */ | ||
281 | static int __init mark_nonram_nosave(void) | ||
282 | { | ||
283 | unsigned long lmb_next_region_start_pfn, | ||
284 | lmb_region_max_pfn; | ||
285 | int i; | ||
286 | |||
287 | for (i = 0; i < lmb.memory.cnt - 1; i++) { | ||
288 | lmb_region_max_pfn = | ||
289 | (lmb.memory.region[i].base >> PAGE_SHIFT) + | ||
290 | (lmb.memory.region[i].size >> PAGE_SHIFT); | ||
291 | lmb_next_region_start_pfn = | ||
292 | lmb.memory.region[i+1].base >> PAGE_SHIFT; | ||
293 | |||
294 | if (lmb_region_max_pfn < lmb_next_region_start_pfn) | ||
295 | register_nosave_region(lmb_region_max_pfn, | ||
296 | lmb_next_region_start_pfn); | ||
297 | } | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
279 | /* | 302 | /* |
280 | * paging_init() sets up the page tables - in fact we've already done this. | 303 | * paging_init() sets up the page tables - in fact we've already done this. |
281 | */ | 304 | */ |
@@ -307,6 +330,8 @@ void __init paging_init(void) | |||
307 | max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | 330 | max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; |
308 | #endif | 331 | #endif |
309 | free_area_init_nodes(max_zone_pfns); | 332 | free_area_init_nodes(max_zone_pfns); |
333 | |||
334 | mark_nonram_nosave(); | ||
310 | } | 335 | } |
311 | #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ | 336 | #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ |
312 | 337 | ||
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c index 90a06ac02d5e..7a78cdc0515a 100644 --- a/arch/powerpc/mm/mmu_context_64.c +++ b/arch/powerpc/mm/mmu_context_64.c | |||
@@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
28 | { | 28 | { |
29 | int index; | 29 | int index; |
30 | int err; | 30 | int err; |
31 | int new_context = (mm->context.id == 0); | ||
31 | 32 | ||
32 | again: | 33 | again: |
33 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | 34 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) |
@@ -50,9 +51,18 @@ again: | |||
50 | } | 51 | } |
51 | 52 | ||
52 | mm->context.id = index; | 53 | mm->context.id = index; |
54 | #ifdef CONFIG_PPC_MM_SLICES | ||
55 | /* The old code would re-promote on fork, we don't do that | ||
56 | * when using slices as it could cause problem promoting slices | ||
57 | * that have been forced down to 4K | ||
58 | */ | ||
59 | if (new_context) | ||
60 | slice_set_user_psize(mm, mmu_virtual_psize); | ||
61 | #else | ||
53 | mm->context.user_psize = mmu_virtual_psize; | 62 | mm->context.user_psize = mmu_virtual_psize; |
54 | mm->context.sllp = SLB_VSID_USER | | 63 | mm->context.sllp = SLB_VSID_USER | |
55 | mmu_psize_defs[mmu_virtual_psize].sllp; | 64 | mmu_psize_defs[mmu_virtual_psize].sllp; |
65 | #endif | ||
56 | 66 | ||
57 | return 0; | 67 | return 0; |
58 | } | 68 | } |
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 05066674a7a0..ec1421a20aaa 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c | |||
@@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
185 | 185 | ||
186 | if (Hash == 0) | 186 | if (Hash == 0) |
187 | return; | 187 | return; |
188 | pmd = pmd_offset(pgd_offset(mm, ea), ea); | 188 | pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); |
189 | if (!pmd_none(*pmd)) | 189 | if (!pmd_none(*pmd)) |
190 | add_hash_page(mm->context.id, ea, pmd_val(*pmd)); | 190 | add_hash_page(mm->context.id, ea, pmd_val(*pmd)); |
191 | } | 191 | } |
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 224e960650a0..304375a73574 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c | |||
@@ -198,12 +198,6 @@ void slb_initialize(void) | |||
198 | static int slb_encoding_inited; | 198 | static int slb_encoding_inited; |
199 | extern unsigned int *slb_miss_kernel_load_linear; | 199 | extern unsigned int *slb_miss_kernel_load_linear; |
200 | extern unsigned int *slb_miss_kernel_load_io; | 200 | extern unsigned int *slb_miss_kernel_load_io; |
201 | #ifdef CONFIG_HUGETLB_PAGE | ||
202 | extern unsigned int *slb_miss_user_load_huge; | ||
203 | unsigned long huge_llp; | ||
204 | |||
205 | huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; | ||
206 | #endif | ||
207 | 201 | ||
208 | /* Prepare our SLB miss handler based on our page size */ | 202 | /* Prepare our SLB miss handler based on our page size */ |
209 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; | 203 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
@@ -220,11 +214,6 @@ void slb_initialize(void) | |||
220 | 214 | ||
221 | DBG("SLB: linear LLP = %04x\n", linear_llp); | 215 | DBG("SLB: linear LLP = %04x\n", linear_llp); |
222 | DBG("SLB: io LLP = %04x\n", io_llp); | 216 | DBG("SLB: io LLP = %04x\n", io_llp); |
223 | #ifdef CONFIG_HUGETLB_PAGE | ||
224 | patch_slb_encoding(slb_miss_user_load_huge, | ||
225 | SLB_VSID_USER | huge_llp); | ||
226 | DBG("SLB: huge LLP = %04x\n", huge_llp); | ||
227 | #endif | ||
228 | } | 217 | } |
229 | 218 | ||
230 | get_paca()->stab_rr = SLB_NUM_BOLTED; | 219 | get_paca()->stab_rr = SLB_NUM_BOLTED; |
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b10e4707d7c1..cd1a93d4948c 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S | |||
@@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io) | |||
82 | srdi. r9,r10,USER_ESID_BITS | 82 | srdi. r9,r10,USER_ESID_BITS |
83 | bne- 8f /* invalid ea bits set */ | 83 | bne- 8f /* invalid ea bits set */ |
84 | 84 | ||
85 | /* Figure out if the segment contains huge pages */ | 85 | |
86 | #ifdef CONFIG_HUGETLB_PAGE | 86 | /* when using slices, we extract the psize off the slice bitmaps |
87 | BEGIN_FTR_SECTION | 87 | * and then we need to get the sllp encoding off the mmu_psize_defs |
88 | b 1f | 88 | * array. |
89 | END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) | 89 | * |
90 | * XXX This is a bit inefficient especially for the normal case, | ||
91 | * so we should try to implement a fast path for the standard page | ||
92 | * size using the old sllp value so we avoid the array. We cannot | ||
93 | * really do dynamic patching unfortunately as processes might flip | ||
94 | * between 4k and 64k standard page size | ||
95 | */ | ||
96 | #ifdef CONFIG_PPC_MM_SLICES | ||
90 | cmpldi r10,16 | 97 | cmpldi r10,16 |
91 | 98 | ||
92 | lhz r9,PACALOWHTLBAREAS(r13) | 99 | /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ |
93 | mr r11,r10 | 100 | ld r9,PACALOWSLICESPSIZE(r13) |
101 | sldi r11,r10,2 | ||
94 | blt 5f | 102 | blt 5f |
103 | ld r9,PACAHIGHSLICEPSIZE(r13) | ||
104 | srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) | ||
105 | andi. r11,r11,0x3c | ||
95 | 106 | ||
96 | lhz r9,PACAHIGHHTLBAREAS(r13) | 107 | 5: /* Extract the psize and multiply to get an array offset */ |
97 | srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) | 108 | srd r9,r9,r11 |
98 | 109 | andi. r9,r9,0xf | |
99 | 5: srd r9,r9,r11 | 110 | mulli r9,r9,MMUPSIZEDEFSIZE |
100 | andi. r9,r9,1 | ||
101 | beq 1f | ||
102 | _GLOBAL(slb_miss_user_load_huge) | ||
103 | li r11,0 | ||
104 | b 2f | ||
105 | 1: | ||
106 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
107 | 111 | ||
112 | /* Now get to the array and obtain the sllp | ||
113 | */ | ||
114 | ld r11,PACATOC(r13) | ||
115 | ld r11,mmu_psize_defs@got(r11) | ||
116 | add r11,r11,r9 | ||
117 | ld r11,MMUPSIZESLLP(r11) | ||
118 | ori r11,r11,SLB_VSID_USER | ||
119 | #else | ||
120 | /* paca context sllp already contains the SLB_VSID_USER bits */ | ||
108 | lhz r11,PACACONTEXTSLLP(r13) | 121 | lhz r11,PACACONTEXTSLLP(r13) |
109 | 2: | 122 | #endif /* CONFIG_PPC_MM_SLICES */ |
123 | |||
110 | ld r9,PACACONTEXTID(r13) | 124 | ld r9,PACACONTEXTID(r13) |
111 | rldimi r10,r9,USER_ESID_BITS,0 | 125 | rldimi r10,r9,USER_ESID_BITS,0 |
112 | b slb_finish_load | 126 | b slb_finish_load |
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c new file mode 100644 index 000000000000..f833dba2a028 --- /dev/null +++ b/arch/powerpc/mm/slice.c | |||
@@ -0,0 +1,633 @@ | |||
1 | /* | ||
2 | * address space "slices" (meta-segments) support | ||
3 | * | ||
4 | * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. | ||
5 | * | ||
6 | * Based on hugetlb implementation | ||
7 | * | ||
8 | * Copyright (C) 2003 David Gibson, IBM Corporation. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | #undef DEBUG | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/pagemap.h> | ||
30 | #include <linux/err.h> | ||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <asm/mman.h> | ||
34 | #include <asm/mmu.h> | ||
35 | #include <asm/spu.h> | ||
36 | |||
37 | static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; | ||
38 | |||
39 | |||
40 | #ifdef DEBUG | ||
41 | int _slice_debug = 1; | ||
42 | |||
43 | static void slice_print_mask(const char *label, struct slice_mask mask) | ||
44 | { | ||
45 | char *p, buf[16 + 3 + 16 + 1]; | ||
46 | int i; | ||
47 | |||
48 | if (!_slice_debug) | ||
49 | return; | ||
50 | p = buf; | ||
51 | for (i = 0; i < SLICE_NUM_LOW; i++) | ||
52 | *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; | ||
53 | *(p++) = ' '; | ||
54 | *(p++) = '-'; | ||
55 | *(p++) = ' '; | ||
56 | for (i = 0; i < SLICE_NUM_HIGH; i++) | ||
57 | *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; | ||
58 | *(p++) = 0; | ||
59 | |||
60 | printk(KERN_DEBUG "%s:%s\n", label, buf); | ||
61 | } | ||
62 | |||
63 | #define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) | ||
64 | |||
65 | #else | ||
66 | |||
67 | static void slice_print_mask(const char *label, struct slice_mask mask) {} | ||
68 | #define slice_dbg(fmt...) | ||
69 | |||
70 | #endif | ||
71 | |||
72 | static struct slice_mask slice_range_to_mask(unsigned long start, | ||
73 | unsigned long len) | ||
74 | { | ||
75 | unsigned long end = start + len - 1; | ||
76 | struct slice_mask ret = { 0, 0 }; | ||
77 | |||
78 | if (start < SLICE_LOW_TOP) { | ||
79 | unsigned long mend = min(end, SLICE_LOW_TOP); | ||
80 | unsigned long mstart = min(start, SLICE_LOW_TOP); | ||
81 | |||
82 | ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) | ||
83 | - (1u << GET_LOW_SLICE_INDEX(mstart)); | ||
84 | } | ||
85 | |||
86 | if ((start + len) > SLICE_LOW_TOP) | ||
87 | ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) | ||
88 | - (1u << GET_HIGH_SLICE_INDEX(start)); | ||
89 | |||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, | ||
94 | unsigned long len) | ||
95 | { | ||
96 | struct vm_area_struct *vma; | ||
97 | |||
98 | if ((mm->task_size - len) < addr) | ||
99 | return 0; | ||
100 | vma = find_vma(mm, addr); | ||
101 | return (!vma || (addr + len) <= vma->vm_start); | ||
102 | } | ||
103 | |||
104 | static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) | ||
105 | { | ||
106 | return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, | ||
107 | 1ul << SLICE_LOW_SHIFT); | ||
108 | } | ||
109 | |||
110 | static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) | ||
111 | { | ||
112 | unsigned long start = slice << SLICE_HIGH_SHIFT; | ||
113 | unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); | ||
114 | |||
115 | /* Hack, so that each addresses is controlled by exactly one | ||
116 | * of the high or low area bitmaps, the first high area starts | ||
117 | * at 4GB, not 0 */ | ||
118 | if (start == 0) | ||
119 | start = SLICE_LOW_TOP; | ||
120 | |||
121 | return !slice_area_is_free(mm, start, end - start); | ||
122 | } | ||
123 | |||
124 | static struct slice_mask slice_mask_for_free(struct mm_struct *mm) | ||
125 | { | ||
126 | struct slice_mask ret = { 0, 0 }; | ||
127 | unsigned long i; | ||
128 | |||
129 | for (i = 0; i < SLICE_NUM_LOW; i++) | ||
130 | if (!slice_low_has_vma(mm, i)) | ||
131 | ret.low_slices |= 1u << i; | ||
132 | |||
133 | if (mm->task_size <= SLICE_LOW_TOP) | ||
134 | return ret; | ||
135 | |||
136 | for (i = 0; i < SLICE_NUM_HIGH; i++) | ||
137 | if (!slice_high_has_vma(mm, i)) | ||
138 | ret.high_slices |= 1u << i; | ||
139 | |||
140 | return ret; | ||
141 | } | ||
142 | |||
143 | static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) | ||
144 | { | ||
145 | struct slice_mask ret = { 0, 0 }; | ||
146 | unsigned long i; | ||
147 | u64 psizes; | ||
148 | |||
149 | psizes = mm->context.low_slices_psize; | ||
150 | for (i = 0; i < SLICE_NUM_LOW; i++) | ||
151 | if (((psizes >> (i * 4)) & 0xf) == psize) | ||
152 | ret.low_slices |= 1u << i; | ||
153 | |||
154 | psizes = mm->context.high_slices_psize; | ||
155 | for (i = 0; i < SLICE_NUM_HIGH; i++) | ||
156 | if (((psizes >> (i * 4)) & 0xf) == psize) | ||
157 | ret.high_slices |= 1u << i; | ||
158 | |||
159 | return ret; | ||
160 | } | ||
161 | |||
162 | static int slice_check_fit(struct slice_mask mask, struct slice_mask available) | ||
163 | { | ||
164 | return (mask.low_slices & available.low_slices) == mask.low_slices && | ||
165 | (mask.high_slices & available.high_slices) == mask.high_slices; | ||
166 | } | ||
167 | |||
168 | static void slice_flush_segments(void *parm) | ||
169 | { | ||
170 | struct mm_struct *mm = parm; | ||
171 | unsigned long flags; | ||
172 | |||
173 | if (mm != current->active_mm) | ||
174 | return; | ||
175 | |||
176 | /* update the paca copy of the context struct */ | ||
177 | get_paca()->context = current->active_mm->context; | ||
178 | |||
179 | local_irq_save(flags); | ||
180 | slb_flush_and_rebolt(); | ||
181 | local_irq_restore(flags); | ||
182 | } | ||
183 | |||
184 | static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) | ||
185 | { | ||
186 | /* Write the new slice psize bits */ | ||
187 | u64 lpsizes, hpsizes; | ||
188 | unsigned long i, flags; | ||
189 | |||
190 | slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); | ||
191 | slice_print_mask(" mask", mask); | ||
192 | |||
193 | /* We need to use a spinlock here to protect against | ||
194 | * concurrent 64k -> 4k demotion ... | ||
195 | */ | ||
196 | spin_lock_irqsave(&slice_convert_lock, flags); | ||
197 | |||
198 | lpsizes = mm->context.low_slices_psize; | ||
199 | for (i = 0; i < SLICE_NUM_LOW; i++) | ||
200 | if (mask.low_slices & (1u << i)) | ||
201 | lpsizes = (lpsizes & ~(0xful << (i * 4))) | | ||
202 | (((unsigned long)psize) << (i * 4)); | ||
203 | |||
204 | hpsizes = mm->context.high_slices_psize; | ||
205 | for (i = 0; i < SLICE_NUM_HIGH; i++) | ||
206 | if (mask.high_slices & (1u << i)) | ||
207 | hpsizes = (hpsizes & ~(0xful << (i * 4))) | | ||
208 | (((unsigned long)psize) << (i * 4)); | ||
209 | |||
210 | mm->context.low_slices_psize = lpsizes; | ||
211 | mm->context.high_slices_psize = hpsizes; | ||
212 | |||
213 | slice_dbg(" lsps=%lx, hsps=%lx\n", | ||
214 | mm->context.low_slices_psize, | ||
215 | mm->context.high_slices_psize); | ||
216 | |||
217 | spin_unlock_irqrestore(&slice_convert_lock, flags); | ||
218 | mb(); | ||
219 | |||
220 | /* XXX this is sub-optimal but will do for now */ | ||
221 | on_each_cpu(slice_flush_segments, mm, 0, 1); | ||
222 | #ifdef CONFIG_SPU_BASE | ||
223 | spu_flush_all_slbs(mm); | ||
224 | #endif | ||
225 | } | ||
226 | |||
227 | static unsigned long slice_find_area_bottomup(struct mm_struct *mm, | ||
228 | unsigned long len, | ||
229 | struct slice_mask available, | ||
230 | int psize, int use_cache) | ||
231 | { | ||
232 | struct vm_area_struct *vma; | ||
233 | unsigned long start_addr, addr; | ||
234 | struct slice_mask mask; | ||
235 | int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); | ||
236 | |||
237 | if (use_cache) { | ||
238 | if (len <= mm->cached_hole_size) { | ||
239 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
240 | mm->cached_hole_size = 0; | ||
241 | } else | ||
242 | start_addr = addr = mm->free_area_cache; | ||
243 | } else | ||
244 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
245 | |||
246 | full_search: | ||
247 | for (;;) { | ||
248 | addr = _ALIGN_UP(addr, 1ul << pshift); | ||
249 | if ((TASK_SIZE - len) < addr) | ||
250 | break; | ||
251 | vma = find_vma(mm, addr); | ||
252 | BUG_ON(vma && (addr >= vma->vm_end)); | ||
253 | |||
254 | mask = slice_range_to_mask(addr, len); | ||
255 | if (!slice_check_fit(mask, available)) { | ||
256 | if (addr < SLICE_LOW_TOP) | ||
257 | addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); | ||
258 | else | ||
259 | addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); | ||
260 | continue; | ||
261 | } | ||
262 | if (!vma || addr + len <= vma->vm_start) { | ||
263 | /* | ||
264 | * Remember the place where we stopped the search: | ||
265 | */ | ||
266 | if (use_cache) | ||
267 | mm->free_area_cache = addr + len; | ||
268 | return addr; | ||
269 | } | ||
270 | if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) | ||
271 | mm->cached_hole_size = vma->vm_start - addr; | ||
272 | addr = vma->vm_end; | ||
273 | } | ||
274 | |||
275 | /* Make sure we didn't miss any holes */ | ||
276 | if (use_cache && start_addr != TASK_UNMAPPED_BASE) { | ||
277 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
278 | mm->cached_hole_size = 0; | ||
279 | goto full_search; | ||
280 | } | ||
281 | return -ENOMEM; | ||
282 | } | ||
283 | |||
284 | static unsigned long slice_find_area_topdown(struct mm_struct *mm, | ||
285 | unsigned long len, | ||
286 | struct slice_mask available, | ||
287 | int psize, int use_cache) | ||
288 | { | ||
289 | struct vm_area_struct *vma; | ||
290 | unsigned long addr; | ||
291 | struct slice_mask mask; | ||
292 | int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); | ||
293 | |||
294 | /* check if free_area_cache is useful for us */ | ||
295 | if (use_cache) { | ||
296 | if (len <= mm->cached_hole_size) { | ||
297 | mm->cached_hole_size = 0; | ||
298 | mm->free_area_cache = mm->mmap_base; | ||
299 | } | ||
300 | |||
301 | /* either no address requested or can't fit in requested | ||
302 | * address hole | ||
303 | */ | ||
304 | addr = mm->free_area_cache; | ||
305 | |||
306 | /* make sure it can fit in the remaining address space */ | ||
307 | if (addr > len) { | ||
308 | addr = _ALIGN_DOWN(addr - len, 1ul << pshift); | ||
309 | mask = slice_range_to_mask(addr, len); | ||
310 | if (slice_check_fit(mask, available) && | ||
311 | slice_area_is_free(mm, addr, len)) | ||
312 | /* remember the address as a hint for | ||
313 | * next time | ||
314 | */ | ||
315 | return (mm->free_area_cache = addr); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | addr = mm->mmap_base; | ||
320 | while (addr > len) { | ||
321 | /* Go down by chunk size */ | ||
322 | addr = _ALIGN_DOWN(addr - len, 1ul << pshift); | ||
323 | |||
324 | /* Check for hit with different page size */ | ||
325 | mask = slice_range_to_mask(addr, len); | ||
326 | if (!slice_check_fit(mask, available)) { | ||
327 | if (addr < SLICE_LOW_TOP) | ||
328 | addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); | ||
329 | else if (addr < (1ul << SLICE_HIGH_SHIFT)) | ||
330 | addr = SLICE_LOW_TOP; | ||
331 | else | ||
332 | addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); | ||
333 | continue; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Lookup failure means no vma is above this address, | ||
338 | * else if new region fits below vma->vm_start, | ||
339 | * return with success: | ||
340 | */ | ||
341 | vma = find_vma(mm, addr); | ||
342 | if (!vma || (addr + len) <= vma->vm_start) { | ||
343 | /* remember the address as a hint for next time */ | ||
344 | if (use_cache) | ||
345 | mm->free_area_cache = addr; | ||
346 | return addr; | ||
347 | } | ||
348 | |||
349 | /* remember the largest hole we saw so far */ | ||
350 | if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) | ||
351 | mm->cached_hole_size = vma->vm_start - addr; | ||
352 | |||
353 | /* try just below the current vma->vm_start */ | ||
354 | addr = vma->vm_start; | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * A failed mmap() very likely causes application failure, | ||
359 | * so fall back to the bottom-up function here. This scenario | ||
360 | * can happen with large stack limits and large mmap() | ||
361 | * allocations. | ||
362 | */ | ||
363 | addr = slice_find_area_bottomup(mm, len, available, psize, 0); | ||
364 | |||
365 | /* | ||
366 | * Restore the topdown base: | ||
367 | */ | ||
368 | if (use_cache) { | ||
369 | mm->free_area_cache = mm->mmap_base; | ||
370 | mm->cached_hole_size = ~0UL; | ||
371 | } | ||
372 | |||
373 | return addr; | ||
374 | } | ||
375 | |||
376 | |||
377 | static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, | ||
378 | struct slice_mask mask, int psize, | ||
379 | int topdown, int use_cache) | ||
380 | { | ||
381 | if (topdown) | ||
382 | return slice_find_area_topdown(mm, len, mask, psize, use_cache); | ||
383 | else | ||
384 | return slice_find_area_bottomup(mm, len, mask, psize, use_cache); | ||
385 | } | ||
386 | |||
387 | unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, | ||
388 | unsigned long flags, unsigned int psize, | ||
389 | int topdown, int use_cache) | ||
390 | { | ||
391 | struct slice_mask mask; | ||
392 | struct slice_mask good_mask; | ||
393 | struct slice_mask potential_mask = {0,0} /* silence stupid warning */; | ||
394 | int pmask_set = 0; | ||
395 | int fixed = (flags & MAP_FIXED); | ||
396 | int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); | ||
397 | struct mm_struct *mm = current->mm; | ||
398 | |||
399 | /* Sanity checks */ | ||
400 | BUG_ON(mm->task_size == 0); | ||
401 | |||
402 | slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); | ||
403 | slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", | ||
404 | addr, len, flags, topdown, use_cache); | ||
405 | |||
406 | if (len > mm->task_size) | ||
407 | return -ENOMEM; | ||
408 | if (fixed && (addr & ((1ul << pshift) - 1))) | ||
409 | return -EINVAL; | ||
410 | if (fixed && addr > (mm->task_size - len)) | ||
411 | return -EINVAL; | ||
412 | |||
413 | /* If hint, make sure it matches our alignment restrictions */ | ||
414 | if (!fixed && addr) { | ||
415 | addr = _ALIGN_UP(addr, 1ul << pshift); | ||
416 | slice_dbg(" aligned addr=%lx\n", addr); | ||
417 | } | ||
418 | |||
419 | /* First makeup a "good" mask of slices that have the right size | ||
420 | * already | ||
421 | */ | ||
422 | good_mask = slice_mask_for_size(mm, psize); | ||
423 | slice_print_mask(" good_mask", good_mask); | ||
424 | |||
425 | /* First check hint if it's valid or if we have MAP_FIXED */ | ||
426 | if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { | ||
427 | |||
428 | /* Don't bother with hint if it overlaps a VMA */ | ||
429 | if (!fixed && !slice_area_is_free(mm, addr, len)) | ||
430 | goto search; | ||
431 | |||
432 | /* Build a mask for the requested range */ | ||
433 | mask = slice_range_to_mask(addr, len); | ||
434 | slice_print_mask(" mask", mask); | ||
435 | |||
436 | /* Check if we fit in the good mask. If we do, we just return, | ||
437 | * nothing else to do | ||
438 | */ | ||
439 | if (slice_check_fit(mask, good_mask)) { | ||
440 | slice_dbg(" fits good !\n"); | ||
441 | return addr; | ||
442 | } | ||
443 | |||
444 | /* We don't fit in the good mask, check what other slices are | ||
445 | * empty and thus can be converted | ||
446 | */ | ||
447 | potential_mask = slice_mask_for_free(mm); | ||
448 | potential_mask.low_slices |= good_mask.low_slices; | ||
449 | potential_mask.high_slices |= good_mask.high_slices; | ||
450 | pmask_set = 1; | ||
451 | slice_print_mask(" potential", potential_mask); | ||
452 | if (slice_check_fit(mask, potential_mask)) { | ||
453 | slice_dbg(" fits potential !\n"); | ||
454 | goto convert; | ||
455 | } | ||
456 | } | ||
457 | |||
458 | /* If we have MAP_FIXED and failed the above step, then error out */ | ||
459 | if (fixed) | ||
460 | return -EBUSY; | ||
461 | |||
462 | search: | ||
463 | slice_dbg(" search...\n"); | ||
464 | |||
465 | /* Now let's see if we can find something in the existing slices | ||
466 | * for that size | ||
467 | */ | ||
468 | addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); | ||
469 | if (addr != -ENOMEM) { | ||
470 | /* Found within the good mask, we don't have to setup, | ||
471 | * we thus return directly | ||
472 | */ | ||
473 | slice_dbg(" found area at 0x%lx\n", addr); | ||
474 | return addr; | ||
475 | } | ||
476 | |||
477 | /* Won't fit, check what can be converted */ | ||
478 | if (!pmask_set) { | ||
479 | potential_mask = slice_mask_for_free(mm); | ||
480 | potential_mask.low_slices |= good_mask.low_slices; | ||
481 | potential_mask.high_slices |= good_mask.high_slices; | ||
482 | pmask_set = 1; | ||
483 | slice_print_mask(" potential", potential_mask); | ||
484 | } | ||
485 | |||
486 | /* Now let's see if we can find something in the existing slices | ||
487 | * for that size | ||
488 | */ | ||
489 | addr = slice_find_area(mm, len, potential_mask, psize, topdown, | ||
490 | use_cache); | ||
491 | if (addr == -ENOMEM) | ||
492 | return -ENOMEM; | ||
493 | |||
494 | mask = slice_range_to_mask(addr, len); | ||
495 | slice_dbg(" found potential area at 0x%lx\n", addr); | ||
496 | slice_print_mask(" mask", mask); | ||
497 | |||
498 | convert: | ||
499 | slice_convert(mm, mask, psize); | ||
500 | return addr; | ||
501 | |||
502 | } | ||
503 | EXPORT_SYMBOL_GPL(slice_get_unmapped_area); | ||
504 | |||
505 | unsigned long arch_get_unmapped_area(struct file *filp, | ||
506 | unsigned long addr, | ||
507 | unsigned long len, | ||
508 | unsigned long pgoff, | ||
509 | unsigned long flags) | ||
510 | { | ||
511 | return slice_get_unmapped_area(addr, len, flags, | ||
512 | current->mm->context.user_psize, | ||
513 | 0, 1); | ||
514 | } | ||
515 | |||
516 | unsigned long arch_get_unmapped_area_topdown(struct file *filp, | ||
517 | const unsigned long addr0, | ||
518 | const unsigned long len, | ||
519 | const unsigned long pgoff, | ||
520 | const unsigned long flags) | ||
521 | { | ||
522 | return slice_get_unmapped_area(addr0, len, flags, | ||
523 | current->mm->context.user_psize, | ||
524 | 1, 1); | ||
525 | } | ||
526 | |||
527 | unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) | ||
528 | { | ||
529 | u64 psizes; | ||
530 | int index; | ||
531 | |||
532 | if (addr < SLICE_LOW_TOP) { | ||
533 | psizes = mm->context.low_slices_psize; | ||
534 | index = GET_LOW_SLICE_INDEX(addr); | ||
535 | } else { | ||
536 | psizes = mm->context.high_slices_psize; | ||
537 | index = GET_HIGH_SLICE_INDEX(addr); | ||
538 | } | ||
539 | |||
540 | return (psizes >> (index * 4)) & 0xf; | ||
541 | } | ||
542 | EXPORT_SYMBOL_GPL(get_slice_psize); | ||
543 | |||
544 | /* | ||
545 | * This is called by hash_page when it needs to do a lazy conversion of | ||
546 | * an address space from real 64K pages to combo 4K pages (typically | ||
547 | * when hitting a non cacheable mapping on a processor or hypervisor | ||
548 | * that won't allow them for 64K pages). | ||
549 | * | ||
550 | * This is also called in init_new_context() to change back the user | ||
551 | * psize from whatever the parent context had it set to | ||
552 | * | ||
553 | * This function will only change the content of the {low,high)_slice_psize | ||
554 | * masks, it will not flush SLBs as this shall be handled lazily by the | ||
555 | * caller. | ||
556 | */ | ||
557 | void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) | ||
558 | { | ||
559 | unsigned long flags, lpsizes, hpsizes; | ||
560 | unsigned int old_psize; | ||
561 | int i; | ||
562 | |||
563 | slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); | ||
564 | |||
565 | spin_lock_irqsave(&slice_convert_lock, flags); | ||
566 | |||
567 | old_psize = mm->context.user_psize; | ||
568 | slice_dbg(" old_psize=%d\n", old_psize); | ||
569 | if (old_psize == psize) | ||
570 | goto bail; | ||
571 | |||
572 | mm->context.user_psize = psize; | ||
573 | wmb(); | ||
574 | |||
575 | lpsizes = mm->context.low_slices_psize; | ||
576 | for (i = 0; i < SLICE_NUM_LOW; i++) | ||
577 | if (((lpsizes >> (i * 4)) & 0xf) == old_psize) | ||
578 | lpsizes = (lpsizes & ~(0xful << (i * 4))) | | ||
579 | (((unsigned long)psize) << (i * 4)); | ||
580 | |||
581 | hpsizes = mm->context.high_slices_psize; | ||
582 | for (i = 0; i < SLICE_NUM_HIGH; i++) | ||
583 | if (((hpsizes >> (i * 4)) & 0xf) == old_psize) | ||
584 | hpsizes = (hpsizes & ~(0xful << (i * 4))) | | ||
585 | (((unsigned long)psize) << (i * 4)); | ||
586 | |||
587 | mm->context.low_slices_psize = lpsizes; | ||
588 | mm->context.high_slices_psize = hpsizes; | ||
589 | |||
590 | slice_dbg(" lsps=%lx, hsps=%lx\n", | ||
591 | mm->context.low_slices_psize, | ||
592 | mm->context.high_slices_psize); | ||
593 | |||
594 | bail: | ||
595 | spin_unlock_irqrestore(&slice_convert_lock, flags); | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * is_hugepage_only_range() is used by generic code to verify wether | ||
600 | * a normal mmap mapping (non hugetlbfs) is valid on a given area. | ||
601 | * | ||
602 | * until the generic code provides a more generic hook and/or starts | ||
603 | * calling arch get_unmapped_area for MAP_FIXED (which our implementation | ||
604 | * here knows how to deal with), we hijack it to keep standard mappings | ||
605 | * away from us. | ||
606 | * | ||
607 | * because of that generic code limitation, MAP_FIXED mapping cannot | ||
608 | * "convert" back a slice with no VMAs to the standard page size, only | ||
609 | * get_unmapped_area() can. It would be possible to fix it here but I | ||
610 | * prefer working on fixing the generic code instead. | ||
611 | * | ||
612 | * WARNING: This will not work if hugetlbfs isn't enabled since the | ||
613 | * generic code will redefine that function as 0 in that. This is ok | ||
614 | * for now as we only use slices with hugetlbfs enabled. This should | ||
615 | * be fixed as the generic code gets fixed. | ||
616 | */ | ||
617 | int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, | ||
618 | unsigned long len) | ||
619 | { | ||
620 | struct slice_mask mask, available; | ||
621 | |||
622 | mask = slice_range_to_mask(addr, len); | ||
623 | available = slice_mask_for_size(mm, mm->context.user_psize); | ||
624 | |||
625 | #if 0 /* too verbose */ | ||
626 | slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", | ||
627 | mm, addr, len); | ||
628 | slice_print_mask(" mask", mask); | ||
629 | slice_print_mask(" available", available); | ||
630 | #endif | ||
631 | return !slice_check_fit(mask, available); | ||
632 | } | ||
633 | |||
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index 925ff70be8ba..6a69417cbc0e 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c | |||
@@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, | |||
111 | if (start >= end) | 111 | if (start >= end) |
112 | return; | 112 | return; |
113 | end = (end - 1) | ~PAGE_MASK; | 113 | end = (end - 1) | ~PAGE_MASK; |
114 | pmd = pmd_offset(pgd_offset(mm, start), start); | 114 | pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); |
115 | for (;;) { | 115 | for (;;) { |
116 | pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; | 116 | pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; |
117 | if (pmd_end > end) | 117 | if (pmd_end > end) |
@@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | |||
169 | return; | 169 | return; |
170 | } | 170 | } |
171 | mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; | 171 | mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; |
172 | pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); | 172 | pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); |
173 | if (!pmd_none(*pmd)) | 173 | if (!pmd_none(*pmd)) |
174 | flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); | 174 | flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); |
175 | FINISH_FLUSH; | 175 | FINISH_FLUSH; |
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index fd8d08c325eb..2bfc4d7e1aa2 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c | |||
@@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
143 | */ | 143 | */ |
144 | addr &= PAGE_MASK; | 144 | addr &= PAGE_MASK; |
145 | 145 | ||
146 | /* Get page size (maybe move back to caller) */ | 146 | /* Get page size (maybe move back to caller). |
147 | * | ||
148 | * NOTE: when using special 64K mappings in 4K environment like | ||
149 | * for SPEs, we obtain the page size from the slice, which thus | ||
150 | * must still exist (and thus the VMA not reused) at the time | ||
151 | * of this call | ||
152 | */ | ||
147 | if (huge) { | 153 | if (huge) { |
148 | #ifdef CONFIG_HUGETLB_PAGE | 154 | #ifdef CONFIG_HUGETLB_PAGE |
149 | psize = mmu_huge_psize; | 155 | psize = mmu_huge_psize; |
150 | #else | 156 | #else |
151 | BUG(); | 157 | BUG(); |
152 | psize = pte_pagesize_index(pte); /* shutup gcc */ | 158 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |
153 | #endif | 159 | #endif |
154 | } else | 160 | } else |
155 | psize = pte_pagesize_index(pte); | 161 | psize = pte_pagesize_index(mm, addr, pte); |
156 | 162 | ||
157 | /* Build full vaddr */ | 163 | /* Build full vaddr */ |
158 | if (!is_kernel_addr(addr)) { | 164 | if (!is_kernel_addr(addr)) { |
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 7ef0c6854799..ba55b0ff0f74 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c | |||
@@ -15,8 +15,8 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | 17 | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/page.h> | 18 | #include <asm/page.h> |
19 | #include <asm/pgtable.h> | ||
20 | #include <asm/pci-bridge.h> | 20 | #include <asm/pci-bridge.h> |
21 | #include <asm-powerpc/mpic.h> | 21 | #include <asm-powerpc/mpic.h> |
22 | #include <asm/mpc86xx.h> | 22 | #include <asm/mpc86xx.h> |
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 82551770917c..9b2b386ccf48 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig | |||
@@ -35,6 +35,21 @@ config SPU_FS | |||
35 | Units on machines implementing the Broadband Processor | 35 | Units on machines implementing the Broadband Processor |
36 | Architecture. | 36 | Architecture. |
37 | 37 | ||
38 | config SPU_FS_64K_LS | ||
39 | bool "Use 64K pages to map SPE local store" | ||
40 | # we depend on PPC_MM_SLICES for now rather than selecting | ||
41 | # it because we depend on hugetlbfs hooks being present. We | ||
42 | # will fix that when the generic code has been improved to | ||
43 | # not require hijacking hugetlbfs hooks. | ||
44 | depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES | ||
45 | default y | ||
46 | select PPC_HAS_HASH_64K | ||
47 | help | ||
48 | This option causes SPE local stores to be mapped in process | ||
49 | address spaces using 64K pages while the rest of the kernel | ||
50 | uses 4K pages. This can improve performances of applications | ||
51 | using multiple SPEs by lowering the TLB pressure on them. | ||
52 | |||
38 | config SPU_BASE | 53 | config SPU_BASE |
39 | bool | 54 | bool |
40 | default n | 55 | default n |
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index fec51525252e..a7f5a7653c62 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
@@ -144,12 +144,11 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) | |||
144 | 144 | ||
145 | switch(REGION_ID(ea)) { | 145 | switch(REGION_ID(ea)) { |
146 | case USER_REGION_ID: | 146 | case USER_REGION_ID: |
147 | #ifdef CONFIG_HUGETLB_PAGE | 147 | #ifdef CONFIG_PPC_MM_SLICES |
148 | if (in_hugepage_area(mm->context, ea)) | 148 | psize = get_slice_psize(mm, ea); |
149 | psize = mmu_huge_psize; | 149 | #else |
150 | else | 150 | psize = mm->context.user_psize; |
151 | #endif | 151 | #endif |
152 | psize = mm->context.user_psize; | ||
153 | vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | | 152 | vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | |
154 | SLB_VSID_USER; | 153 | SLB_VSID_USER; |
155 | break; | 154 | break; |
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index 2cd89c11af5a..328afcf89503 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y += switch.o fault.o | 1 | obj-y += switch.o fault.o lscsa_alloc.o |
2 | 2 | ||
3 | obj-$(CONFIG_SPU_FS) += spufs.o | 3 | obj-$(CONFIG_SPU_FS) += spufs.o |
4 | spufs-y += inode.o file.o context.o syscalls.o coredump.o | 4 | spufs-y += inode.o file.o context.o syscalls.o coredump.o |
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index a87d9ca3dba2..8654749e317b 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c | |||
@@ -36,10 +36,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) | |||
36 | /* Binding to physical processor deferred | 36 | /* Binding to physical processor deferred |
37 | * until spu_activate(). | 37 | * until spu_activate(). |
38 | */ | 38 | */ |
39 | spu_init_csa(&ctx->csa); | 39 | if (spu_init_csa(&ctx->csa)) |
40 | if (!ctx->csa.lscsa) { | ||
41 | goto out_free; | 40 | goto out_free; |
42 | } | ||
43 | spin_lock_init(&ctx->mmio_lock); | 41 | spin_lock_init(&ctx->mmio_lock); |
44 | spin_lock_init(&ctx->mapping_lock); | 42 | spin_lock_init(&ctx->mapping_lock); |
45 | kref_init(&ctx->kref); | 43 | kref_init(&ctx->kref); |
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d010b2464a98..45614c73c784 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c | |||
@@ -118,14 +118,32 @@ spufs_mem_write(struct file *file, const char __user *buffer, | |||
118 | static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, | 118 | static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, |
119 | unsigned long address) | 119 | unsigned long address) |
120 | { | 120 | { |
121 | struct spu_context *ctx = vma->vm_file->private_data; | 121 | struct spu_context *ctx = vma->vm_file->private_data; |
122 | unsigned long pfn, offset = address - vma->vm_start; | 122 | unsigned long pfn, offset, addr0 = address; |
123 | 123 | #ifdef CONFIG_SPU_FS_64K_LS | |
124 | offset += vma->vm_pgoff << PAGE_SHIFT; | 124 | struct spu_state *csa = &ctx->csa; |
125 | int psize; | ||
126 | |||
127 | /* Check what page size we are using */ | ||
128 | psize = get_slice_psize(vma->vm_mm, address); | ||
129 | |||
130 | /* Some sanity checking */ | ||
131 | BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K)); | ||
132 | |||
133 | /* Wow, 64K, cool, we need to align the address though */ | ||
134 | if (csa->use_big_pages) { | ||
135 | BUG_ON(vma->vm_start & 0xffff); | ||
136 | address &= ~0xfffful; | ||
137 | } | ||
138 | #endif /* CONFIG_SPU_FS_64K_LS */ | ||
125 | 139 | ||
140 | offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT); | ||
126 | if (offset >= LS_SIZE) | 141 | if (offset >= LS_SIZE) |
127 | return NOPFN_SIGBUS; | 142 | return NOPFN_SIGBUS; |
128 | 143 | ||
144 | pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n", | ||
145 | addr0, address, offset); | ||
146 | |||
129 | spu_acquire(ctx); | 147 | spu_acquire(ctx); |
130 | 148 | ||
131 | if (ctx->state == SPU_STATE_SAVED) { | 149 | if (ctx->state == SPU_STATE_SAVED) { |
@@ -149,9 +167,24 @@ static struct vm_operations_struct spufs_mem_mmap_vmops = { | |||
149 | .nopfn = spufs_mem_mmap_nopfn, | 167 | .nopfn = spufs_mem_mmap_nopfn, |
150 | }; | 168 | }; |
151 | 169 | ||
152 | static int | 170 | static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) |
153 | spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) | 171 | { |
154 | { | 172 | #ifdef CONFIG_SPU_FS_64K_LS |
173 | struct spu_context *ctx = file->private_data; | ||
174 | struct spu_state *csa = &ctx->csa; | ||
175 | |||
176 | /* Sanity check VMA alignment */ | ||
177 | if (csa->use_big_pages) { | ||
178 | pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx," | ||
179 | " pgoff=0x%lx\n", vma->vm_start, vma->vm_end, | ||
180 | vma->vm_pgoff); | ||
181 | if (vma->vm_start & 0xffff) | ||
182 | return -EINVAL; | ||
183 | if (vma->vm_pgoff & 0xf) | ||
184 | return -EINVAL; | ||
185 | } | ||
186 | #endif /* CONFIG_SPU_FS_64K_LS */ | ||
187 | |||
155 | if (!(vma->vm_flags & VM_SHARED)) | 188 | if (!(vma->vm_flags & VM_SHARED)) |
156 | return -EINVAL; | 189 | return -EINVAL; |
157 | 190 | ||
@@ -163,13 +196,34 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) | |||
163 | return 0; | 196 | return 0; |
164 | } | 197 | } |
165 | 198 | ||
199 | #ifdef CONFIG_SPU_FS_64K_LS | ||
200 | unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr, | ||
201 | unsigned long len, unsigned long pgoff, | ||
202 | unsigned long flags) | ||
203 | { | ||
204 | struct spu_context *ctx = file->private_data; | ||
205 | struct spu_state *csa = &ctx->csa; | ||
206 | |||
207 | /* If not using big pages, fallback to normal MM g_u_a */ | ||
208 | if (!csa->use_big_pages) | ||
209 | return current->mm->get_unmapped_area(file, addr, len, | ||
210 | pgoff, flags); | ||
211 | |||
212 | /* Else, try to obtain a 64K pages slice */ | ||
213 | return slice_get_unmapped_area(addr, len, flags, | ||
214 | MMU_PAGE_64K, 1, 0); | ||
215 | } | ||
216 | #endif /* CONFIG_SPU_FS_64K_LS */ | ||
217 | |||
166 | static const struct file_operations spufs_mem_fops = { | 218 | static const struct file_operations spufs_mem_fops = { |
167 | .open = spufs_mem_open, | 219 | .open = spufs_mem_open, |
168 | .release = spufs_mem_release, | 220 | .read = spufs_mem_read, |
169 | .read = spufs_mem_read, | 221 | .write = spufs_mem_write, |
170 | .write = spufs_mem_write, | 222 | .llseek = generic_file_llseek, |
171 | .llseek = generic_file_llseek, | 223 | .mmap = spufs_mem_mmap, |
172 | .mmap = spufs_mem_mmap, | 224 | #ifdef CONFIG_SPU_FS_64K_LS |
225 | .get_unmapped_area = spufs_get_unmapped_area, | ||
226 | #endif | ||
173 | }; | 227 | }; |
174 | 228 | ||
175 | static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, | 229 | static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, |
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c new file mode 100644 index 000000000000..f4b3c052dabf --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c | |||
@@ -0,0 +1,181 @@ | |||
1 | /* | ||
2 | * SPU local store allocation routines | ||
3 | * | ||
4 | * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | */ | ||
20 | |||
21 | #undef DEBUG | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | |||
27 | #include <asm/spu.h> | ||
28 | #include <asm/spu_csa.h> | ||
29 | #include <asm/mmu.h> | ||
30 | |||
31 | static int spu_alloc_lscsa_std(struct spu_state *csa) | ||
32 | { | ||
33 | struct spu_lscsa *lscsa; | ||
34 | unsigned char *p; | ||
35 | |||
36 | lscsa = vmalloc(sizeof(struct spu_lscsa)); | ||
37 | if (!lscsa) | ||
38 | return -ENOMEM; | ||
39 | memset(lscsa, 0, sizeof(struct spu_lscsa)); | ||
40 | csa->lscsa = lscsa; | ||
41 | |||
42 | /* Set LS pages reserved to allow for user-space mapping. */ | ||
43 | for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
44 | SetPageReserved(vmalloc_to_page(p)); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static void spu_free_lscsa_std(struct spu_state *csa) | ||
50 | { | ||
51 | /* Clear reserved bit before vfree. */ | ||
52 | unsigned char *p; | ||
53 | |||
54 | if (csa->lscsa == NULL) | ||
55 | return; | ||
56 | |||
57 | for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
58 | ClearPageReserved(vmalloc_to_page(p)); | ||
59 | |||
60 | vfree(csa->lscsa); | ||
61 | } | ||
62 | |||
63 | #ifdef CONFIG_SPU_FS_64K_LS | ||
64 | |||
65 | #define SPU_64K_PAGE_SHIFT 16 | ||
66 | #define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT) | ||
67 | #define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER) | ||
68 | |||
69 | int spu_alloc_lscsa(struct spu_state *csa) | ||
70 | { | ||
71 | struct page **pgarray; | ||
72 | unsigned char *p; | ||
73 | int i, j, n_4k; | ||
74 | |||
75 | /* Check availability of 64K pages */ | ||
76 | if (mmu_psize_defs[MMU_PAGE_64K].shift == 0) | ||
77 | goto fail; | ||
78 | |||
79 | csa->use_big_pages = 1; | ||
80 | |||
81 | pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n", | ||
82 | csa); | ||
83 | |||
84 | /* First try to allocate our 64K pages. We need 5 of them | ||
85 | * with the current implementation. In the future, we should try | ||
86 | * to separate the lscsa with the actual local store image, thus | ||
87 | * allowing us to require only 4 64K pages per context | ||
88 | */ | ||
89 | for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) { | ||
90 | /* XXX This is likely to fail, we should use a special pool | ||
91 | * similiar to what hugetlbfs does. | ||
92 | */ | ||
93 | csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL, | ||
94 | SPU_64K_PAGE_ORDER); | ||
95 | if (csa->lscsa_pages[i] == NULL) | ||
96 | goto fail; | ||
97 | } | ||
98 | |||
99 | pr_debug(" success ! creating vmap...\n"); | ||
100 | |||
101 | /* Now we need to create a vmalloc mapping of these for the kernel | ||
102 | * and SPU context switch code to use. Currently, we stick to a | ||
103 | * normal kernel vmalloc mapping, which in our case will be 4K | ||
104 | */ | ||
105 | n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES; | ||
106 | pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL); | ||
107 | if (pgarray == NULL) | ||
108 | goto fail; | ||
109 | for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) | ||
110 | for (j = 0; j < SPU_64K_PAGE_COUNT; j++) | ||
111 | /* We assume all the struct page's are contiguous | ||
112 | * which should be hopefully the case for an order 4 | ||
113 | * allocation.. | ||
114 | */ | ||
115 | pgarray[i * SPU_64K_PAGE_COUNT + j] = | ||
116 | csa->lscsa_pages[i] + j; | ||
117 | csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL); | ||
118 | kfree(pgarray); | ||
119 | if (csa->lscsa == NULL) | ||
120 | goto fail; | ||
121 | |||
122 | memset(csa->lscsa, 0, sizeof(struct spu_lscsa)); | ||
123 | |||
124 | /* Set LS pages reserved to allow for user-space mapping. | ||
125 | * | ||
126 | * XXX isn't that a bit obsolete ? I think we should just | ||
127 | * make sure the page count is high enough. Anyway, won't harm | ||
128 | * for now | ||
129 | */ | ||
130 | for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
131 | SetPageReserved(vmalloc_to_page(p)); | ||
132 | |||
133 | pr_debug(" all good !\n"); | ||
134 | |||
135 | return 0; | ||
136 | fail: | ||
137 | pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n"); | ||
138 | spu_free_lscsa(csa); | ||
139 | return spu_alloc_lscsa_std(csa); | ||
140 | } | ||
141 | |||
142 | void spu_free_lscsa(struct spu_state *csa) | ||
143 | { | ||
144 | unsigned char *p; | ||
145 | int i; | ||
146 | |||
147 | if (!csa->use_big_pages) { | ||
148 | spu_free_lscsa_std(csa); | ||
149 | return; | ||
150 | } | ||
151 | csa->use_big_pages = 0; | ||
152 | |||
153 | if (csa->lscsa == NULL) | ||
154 | goto free_pages; | ||
155 | |||
156 | for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
157 | ClearPageReserved(vmalloc_to_page(p)); | ||
158 | |||
159 | vunmap(csa->lscsa); | ||
160 | csa->lscsa = NULL; | ||
161 | |||
162 | free_pages: | ||
163 | |||
164 | for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) | ||
165 | if (csa->lscsa_pages[i]) | ||
166 | __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER); | ||
167 | } | ||
168 | |||
169 | #else /* CONFIG_SPU_FS_64K_LS */ | ||
170 | |||
171 | int spu_alloc_lscsa(struct spu_state *csa) | ||
172 | { | ||
173 | return spu_alloc_lscsa_std(csa); | ||
174 | } | ||
175 | |||
176 | void spu_free_lscsa(struct spu_state *csa) | ||
177 | { | ||
178 | spu_free_lscsa_std(csa); | ||
179 | } | ||
180 | |||
181 | #endif /* !defined(CONFIG_SPU_FS_64K_LS) */ | ||
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 29dc59cefc38..71a0b41adb8c 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c | |||
@@ -2188,40 +2188,30 @@ static void init_priv2(struct spu_state *csa) | |||
2188 | * as it is by far the largest of the context save regions, | 2188 | * as it is by far the largest of the context save regions, |
2189 | * and may need to be pinned or otherwise specially aligned. | 2189 | * and may need to be pinned or otherwise specially aligned. |
2190 | */ | 2190 | */ |
2191 | void spu_init_csa(struct spu_state *csa) | 2191 | int spu_init_csa(struct spu_state *csa) |
2192 | { | 2192 | { |
2193 | struct spu_lscsa *lscsa; | 2193 | int rc; |
2194 | unsigned char *p; | ||
2195 | 2194 | ||
2196 | if (!csa) | 2195 | if (!csa) |
2197 | return; | 2196 | return -EINVAL; |
2198 | memset(csa, 0, sizeof(struct spu_state)); | 2197 | memset(csa, 0, sizeof(struct spu_state)); |
2199 | 2198 | ||
2200 | lscsa = vmalloc(sizeof(struct spu_lscsa)); | 2199 | rc = spu_alloc_lscsa(csa); |
2201 | if (!lscsa) | 2200 | if (rc) |
2202 | return; | 2201 | return rc; |
2203 | 2202 | ||
2204 | memset(lscsa, 0, sizeof(struct spu_lscsa)); | ||
2205 | csa->lscsa = lscsa; | ||
2206 | spin_lock_init(&csa->register_lock); | 2203 | spin_lock_init(&csa->register_lock); |
2207 | 2204 | ||
2208 | /* Set LS pages reserved to allow for user-space mapping. */ | ||
2209 | for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
2210 | SetPageReserved(vmalloc_to_page(p)); | ||
2211 | |||
2212 | init_prob(csa); | 2205 | init_prob(csa); |
2213 | init_priv1(csa); | 2206 | init_priv1(csa); |
2214 | init_priv2(csa); | 2207 | init_priv2(csa); |
2208 | |||
2209 | return 0; | ||
2215 | } | 2210 | } |
2216 | EXPORT_SYMBOL_GPL(spu_init_csa); | 2211 | EXPORT_SYMBOL_GPL(spu_init_csa); |
2217 | 2212 | ||
2218 | void spu_fini_csa(struct spu_state *csa) | 2213 | void spu_fini_csa(struct spu_state *csa) |
2219 | { | 2214 | { |
2220 | /* Clear reserved bit before vfree. */ | 2215 | spu_free_lscsa(csa); |
2221 | unsigned char *p; | ||
2222 | for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) | ||
2223 | ClearPageReserved(vmalloc_to_page(p)); | ||
2224 | |||
2225 | vfree(csa->lscsa); | ||
2226 | } | 2216 | } |
2227 | EXPORT_SYMBOL_GPL(spu_fini_csa); | 2217 | EXPORT_SYMBOL_GPL(spu_fini_csa); |
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig index 46c3a8e7c3a8..761d9e971fc4 100644 --- a/arch/powerpc/platforms/iseries/Kconfig +++ b/arch/powerpc/platforms/iseries/Kconfig | |||
@@ -7,7 +7,9 @@ menu "iSeries device drivers" | |||
7 | depends on PPC_ISERIES | 7 | depends on PPC_ISERIES |
8 | 8 | ||
9 | config VIOCONS | 9 | config VIOCONS |
10 | tristate "iSeries Virtual Console Support (Obsolete)" | 10 | bool "iSeries Virtual Console Support (Obsolete)" |
11 | depends on !HVC_ISERIES | ||
12 | default n | ||
11 | help | 13 | help |
12 | This is the old virtual console driver for legacy iSeries. | 14 | This is the old virtual console driver for legacy iSeries. |
13 | You should use the iSeries Hypervisor Virtual Console | 15 | You should use the iSeries Hypervisor Virtual Console |
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 63e23062e982..093438b93bd9 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c | |||
@@ -100,6 +100,9 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; | |||
100 | static DEFINE_SPINLOCK(slot_errbuf_lock); | 100 | static DEFINE_SPINLOCK(slot_errbuf_lock); |
101 | static int eeh_error_buf_size; | 101 | static int eeh_error_buf_size; |
102 | 102 | ||
103 | #define EEH_PCI_REGS_LOG_LEN 4096 | ||
104 | static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; | ||
105 | |||
103 | /* System monitoring statistics */ | 106 | /* System monitoring statistics */ |
104 | static unsigned long no_device; | 107 | static unsigned long no_device; |
105 | static unsigned long no_dn; | 108 | static unsigned long no_dn; |
@@ -115,7 +118,8 @@ static unsigned long slot_resets; | |||
115 | /* --------------------------------------------------------------- */ | 118 | /* --------------------------------------------------------------- */ |
116 | /* Below lies the EEH event infrastructure */ | 119 | /* Below lies the EEH event infrastructure */ |
117 | 120 | ||
118 | void eeh_slot_error_detail (struct pci_dn *pdn, int severity) | 121 | static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, |
122 | char *driver_log, size_t loglen) | ||
119 | { | 123 | { |
120 | int config_addr; | 124 | int config_addr; |
121 | unsigned long flags; | 125 | unsigned long flags; |
@@ -133,7 +137,8 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) | |||
133 | rc = rtas_call(ibm_slot_error_detail, | 137 | rc = rtas_call(ibm_slot_error_detail, |
134 | 8, 1, NULL, config_addr, | 138 | 8, 1, NULL, config_addr, |
135 | BUID_HI(pdn->phb->buid), | 139 | BUID_HI(pdn->phb->buid), |
136 | BUID_LO(pdn->phb->buid), NULL, 0, | 140 | BUID_LO(pdn->phb->buid), |
141 | virt_to_phys(driver_log), loglen, | ||
137 | virt_to_phys(slot_errbuf), | 142 | virt_to_phys(slot_errbuf), |
138 | eeh_error_buf_size, | 143 | eeh_error_buf_size, |
139 | severity); | 144 | severity); |
@@ -144,6 +149,84 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) | |||
144 | } | 149 | } |
145 | 150 | ||
146 | /** | 151 | /** |
152 | * gather_pci_data - copy assorted PCI config space registers to buff | ||
153 | * @pdn: device to report data for | ||
154 | * @buf: point to buffer in which to log | ||
155 | * @len: amount of room in buffer | ||
156 | * | ||
157 | * This routine captures assorted PCI configuration space data, | ||
158 | * and puts them into a buffer for RTAS error logging. | ||
159 | */ | ||
160 | static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) | ||
161 | { | ||
162 | u32 cfg; | ||
163 | int cap, i; | ||
164 | int n = 0; | ||
165 | |||
166 | n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name); | ||
167 | printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name); | ||
168 | |||
169 | rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg); | ||
170 | n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); | ||
171 | printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); | ||
172 | |||
173 | rtas_read_config(pdn, PCI_COMMAND, 4, &cfg); | ||
174 | n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); | ||
175 | printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); | ||
176 | |||
177 | /* Dump out the PCI-X command and status regs */ | ||
178 | cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_PCIX); | ||
179 | if (cap) { | ||
180 | rtas_read_config(pdn, cap, 4, &cfg); | ||
181 | n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); | ||
182 | printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); | ||
183 | |||
184 | rtas_read_config(pdn, cap+4, 4, &cfg); | ||
185 | n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); | ||
186 | printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); | ||
187 | } | ||
188 | |||
189 | /* If PCI-E capable, dump PCI-E cap 10, and the AER */ | ||
190 | cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_EXP); | ||
191 | if (cap) { | ||
192 | n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); | ||
193 | printk(KERN_WARNING | ||
194 | "EEH: PCI-E capabilities and status follow:\n"); | ||
195 | |||
196 | for (i=0; i<=8; i++) { | ||
197 | rtas_read_config(pdn, cap+4*i, 4, &cfg); | ||
198 | n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); | ||
199 | printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); | ||
200 | } | ||
201 | |||
202 | cap = pci_find_ext_capability(pdn->pcidev,PCI_EXT_CAP_ID_ERR); | ||
203 | if (cap) { | ||
204 | n += scnprintf(buf+n, len-n, "pci-e AER:\n"); | ||
205 | printk(KERN_WARNING | ||
206 | "EEH: PCI-E AER capability register set follows:\n"); | ||
207 | |||
208 | for (i=0; i<14; i++) { | ||
209 | rtas_read_config(pdn, cap+4*i, 4, &cfg); | ||
210 | n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); | ||
211 | printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); | ||
212 | } | ||
213 | } | ||
214 | } | ||
215 | return n; | ||
216 | } | ||
217 | |||
218 | void eeh_slot_error_detail(struct pci_dn *pdn, int severity) | ||
219 | { | ||
220 | size_t loglen = 0; | ||
221 | memset(pci_regs_buf, 0, EEH_PCI_REGS_LOG_LEN); | ||
222 | |||
223 | rtas_pci_enable(pdn, EEH_THAW_MMIO); | ||
224 | loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); | ||
225 | |||
226 | rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); | ||
227 | } | ||
228 | |||
229 | /** | ||
147 | * read_slot_reset_state - Read the reset state of a device node's slot | 230 | * read_slot_reset_state - Read the reset state of a device node's slot |
148 | * @dn: device node to read | 231 | * @dn: device node to read |
149 | * @rets: array to return results in | 232 | * @rets: array to return results in |
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 3170e003f76a..f07d849cfc84 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c | |||
@@ -361,11 +361,12 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) | |||
361 | goto hard_fail; | 361 | goto hard_fail; |
362 | } | 362 | } |
363 | 363 | ||
364 | eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); | ||
365 | printk(KERN_WARNING | 364 | printk(KERN_WARNING |
366 | "EEH: This PCI device has failed %d times since last reboot: " | 365 | "EEH: This PCI device has failed %d times in the last hour:\n", |
367 | "location=%s driver=%s pci addr=%s\n", | 366 | frozen_pdn->eeh_freeze_count); |
368 | frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); | 367 | printk(KERN_WARNING |
368 | "EEH: location=%s driver=%s pci addr=%s\n", | ||
369 | location, drv_str, pci_str); | ||
369 | 370 | ||
370 | /* Walk the various device drivers attached to this slot through | 371 | /* Walk the various device drivers attached to this slot through |
371 | * a reset sequence, giving each an opportunity to do what it needs | 372 | * a reset sequence, giving each an opportunity to do what it needs |
@@ -375,6 +376,11 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) | |||
375 | */ | 376 | */ |
376 | pci_walk_bus(frozen_bus, eeh_report_error, &result); | 377 | pci_walk_bus(frozen_bus, eeh_report_error, &result); |
377 | 378 | ||
379 | /* Since rtas may enable MMIO when posting the error log, | ||
380 | * don't post the error log until after all dev drivers | ||
381 | * have been informed. */ | ||
382 | eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); | ||
383 | |||
378 | /* If all device drivers were EEH-unaware, then shut | 384 | /* If all device drivers were EEH-unaware, then shut |
379 | * down all of the device drivers, and hope they | 385 | * down all of the device drivers, and hope they |
380 | * go down willingly, without panicing the system. | 386 | * go down willingly, without panicing the system. |
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 8a123c71449f..cad175724359 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c | |||
@@ -907,7 +907,7 @@ static int __init fs_enet_of_init(void) | |||
907 | struct fs_platform_info fs_enet_data; | 907 | struct fs_platform_info fs_enet_data; |
908 | const unsigned int *id; | 908 | const unsigned int *id; |
909 | const unsigned int *phy_addr; | 909 | const unsigned int *phy_addr; |
910 | void *mac_addr; | 910 | const void *mac_addr; |
911 | const phandle *ph; | 911 | const phandle *ph; |
912 | const char *model; | 912 | const char *model; |
913 | 913 | ||