aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:56:01 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:56:01 -0400
commitaabded9c3aab5160ae2ca3dd1fa0fa37f3d510e4 (patch)
tree8544d546735bcb975b8dec296eb9b6dc6531fb2a
parent9a9136e270af14da506f66bcafcc506b86a86498 (diff)
parentf1a1eb299a8422c3e8d41753095bec44b2493398 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc: [POWERPC] Further fixes for the removal of 4level-fixup hack from ppc32 [POWERPC] EEH: log all PCI-X and PCI-E AER registers [POWERPC] EEH: capture and log pci state on error [POWERPC] EEH: Split up long error msg [POWERPC] EEH: log error only after driver notification. [POWERPC] fsl_soc: Make mac_addr const in fs_enet_of_init(). [POWERPC] Don't use SLAB/SLUB for PTE pages [POWERPC] Spufs support for 64K LS mappings on 4K kernels [POWERPC] Add ability to 4K kernel to hash in 64K pages [POWERPC] Introduce address space "slices" [POWERPC] Small fixes & cleanups in segment page size demotion [POWERPC] iSeries: Make HVC_ISERIES the default [POWERPC] iSeries: suppress build warning in lparmap.c [POWERPC] Mark pages that don't exist as nosave [POWERPC] swsusp: Introduce register_nosave_region_late
-rw-r--r--arch/powerpc/Kconfig24
-rw-r--r--arch/powerpc/kernel/asm-offsets.c16
-rw-r--r--arch/powerpc/kernel/lparmap.c3
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/hash_low_64.S5
-rw-r--r--arch/powerpc/mm/hash_utils_64.c142
-rw-r--r--arch/powerpc/mm/hugetlbpage.c548
-rw-r--r--arch/powerpc/mm/init_64.c17
-rw-r--r--arch/powerpc/mm/mem.c25
-rw-r--r--arch/powerpc/mm/mmu_context_64.c10
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c2
-rw-r--r--arch/powerpc/mm/slb.c11
-rw-r--r--arch/powerpc/mm/slb_low.S52
-rw-r--r--arch/powerpc/mm/slice.c633
-rw-r--r--arch/powerpc/mm/tlb_32.c4
-rw-r--r--arch/powerpc/mm/tlb_64.c12
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c2
-rw-r--r--arch/powerpc/platforms/cell/Kconfig15
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile2
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c80
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c181
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c28
-rw-r--r--arch/powerpc/platforms/iseries/Kconfig4
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c87
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c14
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c2
-rw-r--r--drivers/char/Kconfig3
-rw-r--r--include/asm-powerpc/mmu-hash64.h11
-rw-r--r--include/asm-powerpc/paca.h2
-rw-r--r--include/asm-powerpc/page_64.h86
-rw-r--r--include/asm-powerpc/pgalloc-64.h31
-rw-r--r--include/asm-powerpc/pgtable-4k.h6
-rw-r--r--include/asm-powerpc/pgtable-64k.h7
-rw-r--r--include/asm-powerpc/spu_csa.h10
-rw-r--r--include/linux/suspend.h11
-rw-r--r--kernel/power/snapshot.c12
38 files changed, 1323 insertions, 789 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 808d2ef80e2f..ccc5410af996 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -120,19 +120,6 @@ config GENERIC_BUG
120config SYS_SUPPORTS_APM_EMULATION 120config SYS_SUPPORTS_APM_EMULATION
121 bool 121 bool
122 122
123#
124# Powerpc uses the slab allocator to manage its ptes and the
125# page structs of ptes are used for splitting the page table
126# lock for configurations supporting more than SPLIT_PTLOCK_CPUS.
127#
128# In that special configuration the page structs of slabs are modified.
129# This setting disables the selection of SLUB as a slab allocator.
130#
131config ARCH_USES_SLAB_PAGE_STRUCT
132 bool
133 default y
134 depends on SPLIT_PTLOCK_CPUS <= NR_CPUS
135
136config DEFAULT_UIMAGE 123config DEFAULT_UIMAGE
137 bool 124 bool
138 help 125 help
@@ -352,6 +339,11 @@ config PPC_STD_MMU_32
352 def_bool y 339 def_bool y
353 depends on PPC_STD_MMU && PPC32 340 depends on PPC_STD_MMU && PPC32
354 341
342config PPC_MM_SLICES
343 bool
344 default y if HUGETLB_PAGE
345 default n
346
355config VIRT_CPU_ACCOUNTING 347config VIRT_CPU_ACCOUNTING
356 bool "Deterministic task and CPU time accounting" 348 bool "Deterministic task and CPU time accounting"
357 depends on PPC64 349 depends on PPC64
@@ -541,9 +533,15 @@ config NODES_SPAN_OTHER_NODES
541 def_bool y 533 def_bool y
542 depends on NEED_MULTIPLE_NODES 534 depends on NEED_MULTIPLE_NODES
543 535
536config PPC_HAS_HASH_64K
537 bool
538 depends on PPC64
539 default n
540
544config PPC_64K_PAGES 541config PPC_64K_PAGES
545 bool "64k page size" 542 bool "64k page size"
546 depends on PPC64 543 depends on PPC64
544 select PPC_HAS_HASH_64K
547 help 545 help
548 This option changes the kernel logical page size to 64k. On machines 546 This option changes the kernel logical page size to 64k. On machines
549 without processor support for 64k pages, the kernel will simulate 547 without processor support for 64k pages, the kernel will simulate
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 37bc35e69dbe..2cb1d9487796 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -122,12 +122,18 @@ int main(void)
122 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 122 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
123 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 123 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
124 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 124 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
125 DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
126 DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); 125 DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
127#ifdef CONFIG_HUGETLB_PAGE 126#ifdef CONFIG_PPC_MM_SLICES
128 DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); 127 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
129 DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); 128 context.low_slices_psize));
130#endif /* CONFIG_HUGETLB_PAGE */ 129 DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
130 context.high_slices_psize));
131 DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
132 DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
133#else
134 DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
135
136#endif /* CONFIG_PPC_MM_SLICES */
131 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); 137 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
132 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); 138 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
133 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); 139 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index 584d1e3c013d..af11285ffbd1 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -10,7 +10,8 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/iseries/lpar_map.h> 11#include <asm/iseries/lpar_map.h>
12 12
13const struct LparMap __attribute__((__section__(".text"))) xLparMap = { 13/* The # is to stop gcc trying to make .text nonexecutable */
14const struct LparMap __attribute__((__section__(".text #"))) xLparMap = {
14 .xNumberEsids = HvEsidsToMap, 15 .xNumberEsids = HvEsidsToMap,
15 .xNumberRanges = HvRangesToMap, 16 .xNumberRanges = HvRangesToMap,
16 .xSegmentTableOffs = STAB0_PAGE, 17 .xSegmentTableOffs = STAB0_PAGE,
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 38a81967ca07..4f839c6a9768 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o
18obj-$(CONFIG_44x) += 44x_mmu.o 18obj-$(CONFIG_44x) += 44x_mmu.o
19obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o 19obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
20obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 20obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
21obj-$(CONFIG_PPC_MM_SLICES) += slice.o
21obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 22obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index e64ce3eec36e..4762ff7c14df 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -615,6 +615,9 @@ htab_pte_insert_failure:
615 li r3,-1 615 li r3,-1
616 b htab_bail 616 b htab_bail
617 617
618#endif /* CONFIG_PPC_64K_PAGES */
619
620#ifdef CONFIG_PPC_HAS_HASH_64K
618 621
619/***************************************************************************** 622/*****************************************************************************
620 * * 623 * *
@@ -870,7 +873,7 @@ ht64_pte_insert_failure:
870 b ht64_bail 873 b ht64_bail
871 874
872 875
873#endif /* CONFIG_PPC_64K_PAGES */ 876#endif /* CONFIG_PPC_HAS_HASH_64K */
874 877
875 878
876/***************************************************************************** 879/*****************************************************************************
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 9b226fa7006f..028ba4ed03d2 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,6 +51,7 @@
51#include <asm/cputable.h> 51#include <asm/cputable.h>
52#include <asm/abs_addr.h> 52#include <asm/abs_addr.h>
53#include <asm/sections.h> 53#include <asm/sections.h>
54#include <asm/spu.h>
54 55
55#ifdef DEBUG 56#ifdef DEBUG
56#define DBG(fmt...) udbg_printf(fmt) 57#define DBG(fmt...) udbg_printf(fmt)
@@ -419,7 +420,7 @@ static void __init htab_finish_init(void)
419 extern unsigned int *htab_call_hpte_remove; 420 extern unsigned int *htab_call_hpte_remove;
420 extern unsigned int *htab_call_hpte_updatepp; 421 extern unsigned int *htab_call_hpte_updatepp;
421 422
422#ifdef CONFIG_PPC_64K_PAGES 423#ifdef CONFIG_PPC_HAS_HASH_64K
423 extern unsigned int *ht64_call_hpte_insert1; 424 extern unsigned int *ht64_call_hpte_insert1;
424 extern unsigned int *ht64_call_hpte_insert2; 425 extern unsigned int *ht64_call_hpte_insert2;
425 extern unsigned int *ht64_call_hpte_remove; 426 extern unsigned int *ht64_call_hpte_remove;
@@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
596 * Demote a segment to using 4k pages. 597 * Demote a segment to using 4k pages.
597 * For now this makes the whole process use 4k pages. 598 * For now this makes the whole process use 4k pages.
598 */ 599 */
599void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
600{
601#ifdef CONFIG_PPC_64K_PAGES 600#ifdef CONFIG_PPC_64K_PAGES
601static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
602{
602 if (mm->context.user_psize == MMU_PAGE_4K) 603 if (mm->context.user_psize == MMU_PAGE_4K)
603 return; 604 return;
605#ifdef CONFIG_PPC_MM_SLICES
606 slice_set_user_psize(mm, MMU_PAGE_4K);
607#else /* CONFIG_PPC_MM_SLICES */
604 mm->context.user_psize = MMU_PAGE_4K; 608 mm->context.user_psize = MMU_PAGE_4K;
605 mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; 609 mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
606 get_paca()->context = mm->context; 610#endif /* CONFIG_PPC_MM_SLICES */
607 slb_flush_and_rebolt(); 611
608#ifdef CONFIG_SPE_BASE 612#ifdef CONFIG_SPE_BASE
609 spu_flush_all_slbs(mm); 613 spu_flush_all_slbs(mm);
610#endif 614#endif
611#endif
612} 615}
613 616#endif /* CONFIG_PPC_64K_PAGES */
614EXPORT_SYMBOL_GPL(demote_segment_4k);
615 617
616/* Result code is: 618/* Result code is:
617 * 0 - handled 619 * 0 - handled
@@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
646 return 1; 648 return 1;
647 } 649 }
648 vsid = get_vsid(mm->context.id, ea); 650 vsid = get_vsid(mm->context.id, ea);
651#ifdef CONFIG_PPC_MM_SLICES
652 psize = get_slice_psize(mm, ea);
653#else
649 psize = mm->context.user_psize; 654 psize = mm->context.user_psize;
655#endif
650 break; 656 break;
651 case VMALLOC_REGION_ID: 657 case VMALLOC_REGION_ID:
652 mm = &init_mm; 658 mm = &init_mm;
@@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
674 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 680 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
675 local = 1; 681 local = 1;
676 682
683#ifdef CONFIG_HUGETLB_PAGE
677 /* Handle hugepage regions */ 684 /* Handle hugepage regions */
678 if (unlikely(in_hugepage_area(mm->context, ea))) { 685 if (HPAGE_SHIFT && psize == mmu_huge_psize) {
679 DBG_LOW(" -> huge page !\n"); 686 DBG_LOW(" -> huge page !\n");
680 return hash_huge_page(mm, access, ea, vsid, local, trap); 687 return hash_huge_page(mm, access, ea, vsid, local, trap);
681 } 688 }
689#endif /* CONFIG_HUGETLB_PAGE */
690
691#ifndef CONFIG_PPC_64K_PAGES
692 /* If we use 4K pages and our psize is not 4K, then we are hitting
693 * a special driver mapping, we need to align the address before
694 * we fetch the PTE
695 */
696 if (psize != MMU_PAGE_4K)
697 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
698#endif /* CONFIG_PPC_64K_PAGES */
682 699
683 /* Get PTE and page size from page tables */ 700 /* Get PTE and page size from page tables */
684 ptep = find_linux_pte(pgdir, ea); 701 ptep = find_linux_pte(pgdir, ea);
@@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
702 } 719 }
703 720
704 /* Do actual hashing */ 721 /* Do actual hashing */
705#ifndef CONFIG_PPC_64K_PAGES 722#ifdef CONFIG_PPC_64K_PAGES
706 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
707#else
708 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 723 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
709 if (pte_val(*ptep) & _PAGE_4K_PFN) { 724 if (pte_val(*ptep) & _PAGE_4K_PFN) {
710 demote_segment_4k(mm, ea); 725 demote_segment_4k(mm, ea);
711 psize = MMU_PAGE_4K; 726 psize = MMU_PAGE_4K;
712 } 727 }
713 728
714 if (mmu_ci_restrictions) { 729 /* If this PTE is non-cacheable and we have restrictions on
715 /* If this PTE is non-cacheable, switch to 4k */ 730 * using non cacheable large pages, then we switch to 4k
716 if (psize == MMU_PAGE_64K && 731 */
717 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 732 if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
718 if (user_region) { 733 (pte_val(*ptep) & _PAGE_NO_CACHE)) {
719 demote_segment_4k(mm, ea); 734 if (user_region) {
720 psize = MMU_PAGE_4K; 735 demote_segment_4k(mm, ea);
721 } else if (ea < VMALLOC_END) { 736 psize = MMU_PAGE_4K;
722 /* 737 } else if (ea < VMALLOC_END) {
723 * some driver did a non-cacheable mapping 738 /*
724 * in vmalloc space, so switch vmalloc 739 * some driver did a non-cacheable mapping
725 * to 4k pages 740 * in vmalloc space, so switch vmalloc
726 */ 741 * to 4k pages
727 printk(KERN_ALERT "Reducing vmalloc segment " 742 */
728 "to 4kB pages because of " 743 printk(KERN_ALERT "Reducing vmalloc segment "
729 "non-cacheable mapping\n"); 744 "to 4kB pages because of "
730 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 745 "non-cacheable mapping\n");
731 } 746 psize = mmu_vmalloc_psize = MMU_PAGE_4K;
732#ifdef CONFIG_SPE_BASE 747#ifdef CONFIG_SPE_BASE
733 spu_flush_all_slbs(mm); 748 spu_flush_all_slbs(mm);
734#endif 749#endif
735 } 750 }
736 if (user_region) { 751 }
737 if (psize != get_paca()->context.user_psize) { 752 if (user_region) {
738 get_paca()->context = mm->context; 753 if (psize != get_paca()->context.user_psize) {
739 slb_flush_and_rebolt(); 754 get_paca()->context.user_psize =
740 } 755 mm->context.user_psize;
741 } else if (get_paca()->vmalloc_sllp !=
742 mmu_psize_defs[mmu_vmalloc_psize].sllp) {
743 get_paca()->vmalloc_sllp =
744 mmu_psize_defs[mmu_vmalloc_psize].sllp;
745 slb_flush_and_rebolt(); 756 slb_flush_and_rebolt();
746 } 757 }
758 } else if (get_paca()->vmalloc_sllp !=
759 mmu_psize_defs[mmu_vmalloc_psize].sllp) {
760 get_paca()->vmalloc_sllp =
761 mmu_psize_defs[mmu_vmalloc_psize].sllp;
762 slb_flush_and_rebolt();
747 } 763 }
764#endif /* CONFIG_PPC_64K_PAGES */
765
766#ifdef CONFIG_PPC_HAS_HASH_64K
748 if (psize == MMU_PAGE_64K) 767 if (psize == MMU_PAGE_64K)
749 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); 768 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
750 else 769 else
770#endif /* CONFIG_PPC_HAS_HASH_64K */
751 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 771 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
752#endif /* CONFIG_PPC_64K_PAGES */
753 772
754#ifndef CONFIG_PPC_64K_PAGES 773#ifndef CONFIG_PPC_64K_PAGES
755 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 774 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
@@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
772 unsigned long flags; 791 unsigned long flags;
773 int local = 0; 792 int local = 0;
774 793
775 /* We don't want huge pages prefaulted for now 794 BUG_ON(REGION_ID(ea) != USER_REGION_ID);
776 */ 795
777 if (unlikely(in_hugepage_area(mm->context, ea))) 796#ifdef CONFIG_PPC_MM_SLICES
797 /* We only prefault standard pages for now */
798 if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize));
778 return; 799 return;
800#endif
779 801
780 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 802 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
781 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 803 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
782 804
783 /* Get PTE, VSID, access mask */ 805 /* Get Linux PTE if available */
784 pgdir = mm->pgd; 806 pgdir = mm->pgd;
785 if (pgdir == NULL) 807 if (pgdir == NULL)
786 return; 808 return;
787 ptep = find_linux_pte(pgdir, ea); 809 ptep = find_linux_pte(pgdir, ea);
788 if (!ptep) 810 if (!ptep)
789 return; 811 return;
812
813#ifdef CONFIG_PPC_64K_PAGES
814 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
815 * a 64K kernel), then we don't preload, hash_page() will take
816 * care of it once we actually try to access the page.
817 * That way we don't have to duplicate all of the logic for segment
818 * page size demotion here
819 */
820 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
821 return;
822#endif /* CONFIG_PPC_64K_PAGES */
823
824 /* Get VSID */
790 vsid = get_vsid(mm->context.id, ea); 825 vsid = get_vsid(mm->context.id, ea);
791 826
792 /* Hash it in */ 827 /* Hash doesn't like irqs */
793 local_irq_save(flags); 828 local_irq_save(flags);
829
830 /* Is that local to this CPU ? */
794 mask = cpumask_of_cpu(smp_processor_id()); 831 mask = cpumask_of_cpu(smp_processor_id());
795 if (cpus_equal(mm->cpu_vm_mask, mask)) 832 if (cpus_equal(mm->cpu_vm_mask, mask))
796 local = 1; 833 local = 1;
797#ifndef CONFIG_PPC_64K_PAGES 834
798 __hash_page_4K(ea, access, vsid, ptep, trap, local); 835 /* Hash it in */
799#else 836#ifdef CONFIG_PPC_HAS_HASH_64K
800 if (mmu_ci_restrictions) {
801 /* If this PTE is non-cacheable, switch to 4k */
802 if (mm->context.user_psize == MMU_PAGE_64K &&
803 (pte_val(*ptep) & _PAGE_NO_CACHE))
804 demote_segment_4k(mm, ea);
805 }
806 if (mm->context.user_psize == MMU_PAGE_64K) 837 if (mm->context.user_psize == MMU_PAGE_64K)
807 __hash_page_64K(ea, access, vsid, ptep, trap, local); 838 __hash_page_64K(ea, access, vsid, ptep, trap, local);
808 else 839 else
809 __hash_page_4K(ea, access, vsid, ptep, trap, local);
810#endif /* CONFIG_PPC_64K_PAGES */ 840#endif /* CONFIG_PPC_64K_PAGES */
841 __hash_page_4K(ea, access, vsid, ptep, trap, local);
842
811 local_irq_restore(flags); 843 local_irq_restore(flags);
812} 844}
813 845
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb959264c104..92a1b16fb7e3 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
91 pgd_t *pg; 91 pgd_t *pg;
92 pud_t *pu; 92 pud_t *pu;
93 93
94 BUG_ON(! in_hugepage_area(mm->context, addr)); 94 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
95 95
96 addr &= HPAGE_MASK; 96 addr &= HPAGE_MASK;
97 97
@@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
119 pud_t *pu; 119 pud_t *pu;
120 hugepd_t *hpdp = NULL; 120 hugepd_t *hpdp = NULL;
121 121
122 BUG_ON(! in_hugepage_area(mm->context, addr)); 122 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
123 123
124 addr &= HPAGE_MASK; 124 addr &= HPAGE_MASK;
125 125
@@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
302 start = addr; 302 start = addr;
303 pgd = pgd_offset((*tlb)->mm, addr); 303 pgd = pgd_offset((*tlb)->mm, addr);
304 do { 304 do {
305 BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); 305 BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
306 next = pgd_addr_end(addr, end); 306 next = pgd_addr_end(addr, end);
307 if (pgd_none_or_clear_bad(pgd)) 307 if (pgd_none_or_clear_bad(pgd))
308 continue; 308 continue;
@@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
331 return __pte(old); 331 return __pte(old);
332} 332}
333 333
334struct slb_flush_info {
335 struct mm_struct *mm;
336 u16 newareas;
337};
338
339static void flush_low_segments(void *parm)
340{
341 struct slb_flush_info *fi = parm;
342 unsigned long i;
343
344 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
345
346 if (current->active_mm != fi->mm)
347 return;
348
349 /* Only need to do anything if this CPU is working in the same
350 * mm as the one which has changed */
351
352 /* update the paca copy of the context struct */
353 get_paca()->context = current->active_mm->context;
354
355 asm volatile("isync" : : : "memory");
356 for (i = 0; i < NUM_LOW_AREAS; i++) {
357 if (! (fi->newareas & (1U << i)))
358 continue;
359 asm volatile("slbie %0"
360 : : "r" ((i << SID_SHIFT) | SLBIE_C));
361 }
362 asm volatile("isync" : : : "memory");
363}
364
365static void flush_high_segments(void *parm)
366{
367 struct slb_flush_info *fi = parm;
368 unsigned long i, j;
369
370
371 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
372
373 if (current->active_mm != fi->mm)
374 return;
375
376 /* Only need to do anything if this CPU is working in the same
377 * mm as the one which has changed */
378
379 /* update the paca copy of the context struct */
380 get_paca()->context = current->active_mm->context;
381
382 asm volatile("isync" : : : "memory");
383 for (i = 0; i < NUM_HIGH_AREAS; i++) {
384 if (! (fi->newareas & (1U << i)))
385 continue;
386 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
387 asm volatile("slbie %0"
388 :: "r" (((i << HTLB_AREA_SHIFT)
389 + (j << SID_SHIFT)) | SLBIE_C));
390 }
391 asm volatile("isync" : : : "memory");
392}
393
394static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
395{
396 unsigned long start = area << SID_SHIFT;
397 unsigned long end = (area+1) << SID_SHIFT;
398 struct vm_area_struct *vma;
399
400 BUG_ON(area >= NUM_LOW_AREAS);
401
402 /* Check no VMAs are in the region */
403 vma = find_vma(mm, start);
404 if (vma && (vma->vm_start < end))
405 return -EBUSY;
406
407 return 0;
408}
409
410static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
411{
412 unsigned long start = area << HTLB_AREA_SHIFT;
413 unsigned long end = (area+1) << HTLB_AREA_SHIFT;
414 struct vm_area_struct *vma;
415
416 BUG_ON(area >= NUM_HIGH_AREAS);
417
418 /* Hack, so that each addresses is controlled by exactly one
419 * of the high or low area bitmaps, the first high area starts
420 * at 4GB, not 0 */
421 if (start == 0)
422 start = 0x100000000UL;
423
424 /* Check no VMAs are in the region */
425 vma = find_vma(mm, start);
426 if (vma && (vma->vm_start < end))
427 return -EBUSY;
428
429 return 0;
430}
431
432static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
433{
434 unsigned long i;
435 struct slb_flush_info fi;
436
437 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
438 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
439
440 newareas &= ~(mm->context.low_htlb_areas);
441 if (! newareas)
442 return 0; /* The segments we want are already open */
443
444 for (i = 0; i < NUM_LOW_AREAS; i++)
445 if ((1 << i) & newareas)
446 if (prepare_low_area_for_htlb(mm, i) != 0)
447 return -EBUSY;
448
449 mm->context.low_htlb_areas |= newareas;
450
451 /* the context change must make it to memory before the flush,
452 * so that further SLB misses do the right thing. */
453 mb();
454
455 fi.mm = mm;
456 fi.newareas = newareas;
457 on_each_cpu(flush_low_segments, &fi, 0, 1);
458
459 return 0;
460}
461
462static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
463{
464 struct slb_flush_info fi;
465 unsigned long i;
466
467 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
468 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
469 != NUM_HIGH_AREAS);
470
471 newareas &= ~(mm->context.high_htlb_areas);
472 if (! newareas)
473 return 0; /* The areas we want are already open */
474
475 for (i = 0; i < NUM_HIGH_AREAS; i++)
476 if ((1 << i) & newareas)
477 if (prepare_high_area_for_htlb(mm, i) != 0)
478 return -EBUSY;
479
480 mm->context.high_htlb_areas |= newareas;
481
482 /* the context change must make it to memory before the flush,
483 * so that further SLB misses do the right thing. */
484 mb();
485
486 fi.mm = mm;
487 fi.newareas = newareas;
488 on_each_cpu(flush_high_segments, &fi, 0, 1);
489
490 return 0;
491}
492
493int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
494{
495 int err = 0;
496
497 if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
498 return -EINVAL;
499 if (len & ~HPAGE_MASK)
500 return -EINVAL;
501 if (addr & ~HPAGE_MASK)
502 return -EINVAL;
503
504 if (addr < 0x100000000UL)
505 err = open_low_hpage_areas(current->mm,
506 LOW_ESID_MASK(addr, len));
507 if ((addr + len) > 0x100000000UL)
508 err = open_high_hpage_areas(current->mm,
509 HTLB_AREA_MASK(addr, len));
510#ifdef CONFIG_SPE_BASE
511 spu_flush_all_slbs(current->mm);
512#endif
513 if (err) {
514 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
515 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
516 addr, len,
517 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
518 return err;
519 }
520
521 return 0;
522}
523
524struct page * 334struct page *
525follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 335follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
526{ 336{
527 pte_t *ptep; 337 pte_t *ptep;
528 struct page *page; 338 struct page *page;
529 339
530 if (! in_hugepage_area(mm->context, address)) 340 if (get_slice_psize(mm, address) != mmu_huge_psize)
531 return ERR_PTR(-EINVAL); 341 return ERR_PTR(-EINVAL);
532 342
533 ptep = huge_pte_offset(mm, address); 343 ptep = huge_pte_offset(mm, address);
@@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
551 return NULL; 361 return NULL;
552} 362}
553 363
554/* Because we have an exclusive hugepage region which lies within the
555 * normal user address space, we have to take special measures to make
556 * non-huge mmap()s evade the hugepage reserved regions. */
557unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
558 unsigned long len, unsigned long pgoff,
559 unsigned long flags)
560{
561 struct mm_struct *mm = current->mm;
562 struct vm_area_struct *vma;
563 unsigned long start_addr;
564
565 if (len > TASK_SIZE)
566 return -ENOMEM;
567
568 /* handle fixed mapping: prevent overlap with huge pages */
569 if (flags & MAP_FIXED) {
570 if (is_hugepage_only_range(mm, addr, len))
571 return -EINVAL;
572 return addr;
573 }
574
575 if (addr) {
576 addr = PAGE_ALIGN(addr);
577 vma = find_vma(mm, addr);
578 if (((TASK_SIZE - len) >= addr)
579 && (!vma || (addr+len) <= vma->vm_start)
580 && !is_hugepage_only_range(mm, addr,len))
581 return addr;
582 }
583 if (len > mm->cached_hole_size) {
584 start_addr = addr = mm->free_area_cache;
585 } else {
586 start_addr = addr = TASK_UNMAPPED_BASE;
587 mm->cached_hole_size = 0;
588 }
589
590full_search:
591 vma = find_vma(mm, addr);
592 while (TASK_SIZE - len >= addr) {
593 BUG_ON(vma && (addr >= vma->vm_end));
594
595 if (touches_hugepage_low_range(mm, addr, len)) {
596 addr = ALIGN(addr+1, 1<<SID_SHIFT);
597 vma = find_vma(mm, addr);
598 continue;
599 }
600 if (touches_hugepage_high_range(mm, addr, len)) {
601 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
602 vma = find_vma(mm, addr);
603 continue;
604 }
605 if (!vma || addr + len <= vma->vm_start) {
606 /*
607 * Remember the place where we stopped the search:
608 */
609 mm->free_area_cache = addr + len;
610 return addr;
611 }
612 if (addr + mm->cached_hole_size < vma->vm_start)
613 mm->cached_hole_size = vma->vm_start - addr;
614 addr = vma->vm_end;
615 vma = vma->vm_next;
616 }
617
618 /* Make sure we didn't miss any holes */
619 if (start_addr != TASK_UNMAPPED_BASE) {
620 start_addr = addr = TASK_UNMAPPED_BASE;
621 mm->cached_hole_size = 0;
622 goto full_search;
623 }
624 return -ENOMEM;
625}
626
627/*
628 * This mmap-allocator allocates new areas top-down from below the
629 * stack's low limit (the base):
630 *
631 * Because we have an exclusive hugepage region which lies within the
632 * normal user address space, we have to take special measures to make
633 * non-huge mmap()s evade the hugepage reserved regions.
634 */
635unsigned long
636arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
637 const unsigned long len, const unsigned long pgoff,
638 const unsigned long flags)
639{
640 struct vm_area_struct *vma, *prev_vma;
641 struct mm_struct *mm = current->mm;
642 unsigned long base = mm->mmap_base, addr = addr0;
643 unsigned long largest_hole = mm->cached_hole_size;
644 int first_time = 1;
645
646 /* requested length too big for entire address space */
647 if (len > TASK_SIZE)
648 return -ENOMEM;
649
650 /* handle fixed mapping: prevent overlap with huge pages */
651 if (flags & MAP_FIXED) {
652 if (is_hugepage_only_range(mm, addr, len))
653 return -EINVAL;
654 return addr;
655 }
656
657 /* dont allow allocations above current base */
658 if (mm->free_area_cache > base)
659 mm->free_area_cache = base;
660
661 /* requesting a specific address */
662 if (addr) {
663 addr = PAGE_ALIGN(addr);
664 vma = find_vma(mm, addr);
665 if (TASK_SIZE - len >= addr &&
666 (!vma || addr + len <= vma->vm_start)
667 && !is_hugepage_only_range(mm, addr,len))
668 return addr;
669 }
670
671 if (len <= largest_hole) {
672 largest_hole = 0;
673 mm->free_area_cache = base;
674 }
675try_again:
676 /* make sure it can fit in the remaining address space */
677 if (mm->free_area_cache < len)
678 goto fail;
679
680 /* either no address requested or cant fit in requested address hole */
681 addr = (mm->free_area_cache - len) & PAGE_MASK;
682 do {
683hugepage_recheck:
684 if (touches_hugepage_low_range(mm, addr, len)) {
685 addr = (addr & ((~0) << SID_SHIFT)) - len;
686 goto hugepage_recheck;
687 } else if (touches_hugepage_high_range(mm, addr, len)) {
688 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
689 goto hugepage_recheck;
690 }
691
692 /*
693 * Lookup failure means no vma is above this address,
694 * i.e. return with success:
695 */
696 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
697 return addr;
698
699 /*
700 * new region fits between prev_vma->vm_end and
701 * vma->vm_start, use it:
702 */
703 if (addr+len <= vma->vm_start &&
704 (!prev_vma || (addr >= prev_vma->vm_end))) {
705 /* remember the address as a hint for next time */
706 mm->cached_hole_size = largest_hole;
707 return (mm->free_area_cache = addr);
708 } else {
709 /* pull free_area_cache down to the first hole */
710 if (mm->free_area_cache == vma->vm_end) {
711 mm->free_area_cache = vma->vm_start;
712 mm->cached_hole_size = largest_hole;
713 }
714 }
715
716 /* remember the largest hole we saw so far */
717 if (addr + largest_hole < vma->vm_start)
718 largest_hole = vma->vm_start - addr;
719
720 /* try just below the current vma->vm_start */
721 addr = vma->vm_start-len;
722 } while (len <= vma->vm_start);
723
724fail:
725 /*
726 * if hint left us with no space for the requested
727 * mapping then try again:
728 */
729 if (first_time) {
730 mm->free_area_cache = base;
731 largest_hole = 0;
732 first_time = 0;
733 goto try_again;
734 }
735 /*
736 * A failed mmap() very likely causes application failure,
737 * so fall back to the bottom-up function here. This scenario
738 * can happen with large stack limits and large mmap()
739 * allocations.
740 */
741 mm->free_area_cache = TASK_UNMAPPED_BASE;
742 mm->cached_hole_size = ~0UL;
743 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
744 /*
745 * Restore the topdown base:
746 */
747 mm->free_area_cache = base;
748 mm->cached_hole_size = ~0UL;
749
750 return addr;
751}
752
753static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
754{
755 struct vm_area_struct *vma;
756
757 vma = find_vma(current->mm, addr);
758 if (TASK_SIZE - len >= addr &&
759 (!vma || ((addr + len) <= vma->vm_start)))
760 return 0;
761
762 return -ENOMEM;
763}
764
765static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
766{
767 unsigned long addr = 0;
768 struct vm_area_struct *vma;
769
770 vma = find_vma(current->mm, addr);
771 while (addr + len <= 0x100000000UL) {
772 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
773
774 if (! __within_hugepage_low_range(addr, len, segmask)) {
775 addr = ALIGN(addr+1, 1<<SID_SHIFT);
776 vma = find_vma(current->mm, addr);
777 continue;
778 }
779
780 if (!vma || (addr + len) <= vma->vm_start)
781 return addr;
782 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
783 /* Depending on segmask this might not be a confirmed
784 * hugepage region, so the ALIGN could have skipped
785 * some VMAs */
786 vma = find_vma(current->mm, addr);
787 }
788
789 return -ENOMEM;
790}
791
792static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
793{
794 unsigned long addr = 0x100000000UL;
795 struct vm_area_struct *vma;
796
797 vma = find_vma(current->mm, addr);
798 while (addr + len <= TASK_SIZE_USER64) {
799 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
800
801 if (! __within_hugepage_high_range(addr, len, areamask)) {
802 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
803 vma = find_vma(current->mm, addr);
804 continue;
805 }
806
807 if (!vma || (addr + len) <= vma->vm_start)
808 return addr;
809 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
810 /* Depending on segmask this might not be a confirmed
811 * hugepage region, so the ALIGN could have skipped
812 * some VMAs */
813 vma = find_vma(current->mm, addr);
814 }
815
816 return -ENOMEM;
817}
818 364
819unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 365unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
820 unsigned long len, unsigned long pgoff, 366 unsigned long len, unsigned long pgoff,
821 unsigned long flags) 367 unsigned long flags)
822{ 368{
823 int lastshift; 369 return slice_get_unmapped_area(addr, len, flags,
824 u16 areamask, curareas; 370 mmu_huge_psize, 1, 0);
825
826 if (HPAGE_SHIFT == 0)
827 return -EINVAL;
828 if (len & ~HPAGE_MASK)
829 return -EINVAL;
830 if (len > TASK_SIZE)
831 return -ENOMEM;
832
833 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
834 return -EINVAL;
835
836 /* Paranoia, caller should have dealt with this */
837 BUG_ON((addr + len) < addr);
838
839 /* Handle MAP_FIXED */
840 if (flags & MAP_FIXED) {
841 if (prepare_hugepage_range(addr, len, pgoff))
842 return -EINVAL;
843 return addr;
844 }
845
846 if (test_thread_flag(TIF_32BIT)) {
847 curareas = current->mm->context.low_htlb_areas;
848
849 /* First see if we can use the hint address */
850 if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
851 areamask = LOW_ESID_MASK(addr, len);
852 if (open_low_hpage_areas(current->mm, areamask) == 0)
853 return addr;
854 }
855
856 /* Next see if we can map in the existing low areas */
857 addr = htlb_get_low_area(len, curareas);
858 if (addr != -ENOMEM)
859 return addr;
860
861 /* Finally go looking for areas to open */
862 lastshift = 0;
863 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
864 ! lastshift; areamask >>=1) {
865 if (areamask & 1)
866 lastshift = 1;
867
868 addr = htlb_get_low_area(len, curareas | areamask);
869 if ((addr != -ENOMEM)
870 && open_low_hpage_areas(current->mm, areamask) == 0)
871 return addr;
872 }
873 } else {
874 curareas = current->mm->context.high_htlb_areas;
875
876 /* First see if we can use the hint address */
877 /* We discourage 64-bit processes from doing hugepage
878 * mappings below 4GB (must use MAP_FIXED) */
879 if ((addr >= 0x100000000UL)
880 && (htlb_check_hinted_area(addr, len) == 0)) {
881 areamask = HTLB_AREA_MASK(addr, len);
882 if (open_high_hpage_areas(current->mm, areamask) == 0)
883 return addr;
884 }
885
886 /* Next see if we can map in the existing high areas */
887 addr = htlb_get_high_area(len, curareas);
888 if (addr != -ENOMEM)
889 return addr;
890
891 /* Finally go looking for areas to open */
892 lastshift = 0;
893 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
894 ! lastshift; areamask >>=1) {
895 if (areamask & 1)
896 lastshift = 1;
897
898 addr = htlb_get_high_area(len, curareas | areamask);
899 if ((addr != -ENOMEM)
900 && open_high_hpage_areas(current->mm, areamask) == 0)
901 return addr;
902 }
903 }
904 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
905 " enough areas\n");
906 return -ENOMEM;
907} 371}
908 372
909/* 373/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1fe852181a..7312a265545f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
146 memset(addr, 0, kmem_cache_size(cache)); 146 memset(addr, 0, kmem_cache_size(cache));
147} 147}
148 148
149#ifdef CONFIG_PPC_64K_PAGES
150static const unsigned int pgtable_cache_size[3] = {
151 PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
152};
153static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
154 "pte_pmd_cache", "pmd_cache", "pgd_cache",
155};
156#else
157static const unsigned int pgtable_cache_size[2] = { 149static const unsigned int pgtable_cache_size[2] = {
158 PTE_TABLE_SIZE, PMD_TABLE_SIZE 150 PGD_TABLE_SIZE, PMD_TABLE_SIZE
159}; 151};
160static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 152static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
161 "pgd_pte_cache", "pud_pmd_cache", 153#ifdef CONFIG_PPC_64K_PAGES
162}; 154 "pgd_cache", "pmd_cache",
155#else
156 "pgd_cache", "pud_pmd_cache",
163#endif /* CONFIG_PPC_64K_PAGES */ 157#endif /* CONFIG_PPC_64K_PAGES */
158};
164 159
165#ifdef CONFIG_HUGETLB_PAGE 160#ifdef CONFIG_HUGETLB_PAGE
166/* Hugepages need one extra cache, initialized in hugetlbpage.c. We 161/* Hugepages need one extra cache, initialized in hugetlbpage.c. We
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a6e08f3298f..246eeea40ece 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
31#include <linux/highmem.h> 31#include <linux/highmem.h>
32#include <linux/initrd.h> 32#include <linux/initrd.h>
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/suspend.h>
34 35
35#include <asm/pgalloc.h> 36#include <asm/pgalloc.h>
36#include <asm/prom.h> 37#include <asm/prom.h>
@@ -276,6 +277,28 @@ void __init do_init_bootmem(void)
276 init_bootmem_done = 1; 277 init_bootmem_done = 1;
277} 278}
278 279
280/* mark pages that don't exist as nosave */
281static int __init mark_nonram_nosave(void)
282{
283 unsigned long lmb_next_region_start_pfn,
284 lmb_region_max_pfn;
285 int i;
286
287 for (i = 0; i < lmb.memory.cnt - 1; i++) {
288 lmb_region_max_pfn =
289 (lmb.memory.region[i].base >> PAGE_SHIFT) +
290 (lmb.memory.region[i].size >> PAGE_SHIFT);
291 lmb_next_region_start_pfn =
292 lmb.memory.region[i+1].base >> PAGE_SHIFT;
293
294 if (lmb_region_max_pfn < lmb_next_region_start_pfn)
295 register_nosave_region(lmb_region_max_pfn,
296 lmb_next_region_start_pfn);
297 }
298
299 return 0;
300}
301
279/* 302/*
280 * paging_init() sets up the page tables - in fact we've already done this. 303 * paging_init() sets up the page tables - in fact we've already done this.
281 */ 304 */
@@ -307,6 +330,8 @@ void __init paging_init(void)
307 max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; 330 max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
308#endif 331#endif
309 free_area_init_nodes(max_zone_pfns); 332 free_area_init_nodes(max_zone_pfns);
333
334 mark_nonram_nosave();
310} 335}
311#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ 336#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
312 337
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c
index 90a06ac02d5e..7a78cdc0515a 100644
--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_64.c
@@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
28{ 28{
29 int index; 29 int index;
30 int err; 30 int err;
31 int new_context = (mm->context.id == 0);
31 32
32again: 33again:
33 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) 34 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
@@ -50,9 +51,18 @@ again:
50 } 51 }
51 52
52 mm->context.id = index; 53 mm->context.id = index;
54#ifdef CONFIG_PPC_MM_SLICES
55 /* The old code would re-promote on fork, we don't do that
56 * when using slices as it could cause problem promoting slices
57 * that have been forced down to 4K
58 */
59 if (new_context)
60 slice_set_user_psize(mm, mmu_virtual_psize);
61#else
53 mm->context.user_psize = mmu_virtual_psize; 62 mm->context.user_psize = mmu_virtual_psize;
54 mm->context.sllp = SLB_VSID_USER | 63 mm->context.sllp = SLB_VSID_USER |
55 mmu_psize_defs[mmu_virtual_psize].sllp; 64 mmu_psize_defs[mmu_virtual_psize].sllp;
65#endif
56 66
57 return 0; 67 return 0;
58} 68}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 05066674a7a0..ec1421a20aaa 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
185 185
186 if (Hash == 0) 186 if (Hash == 0)
187 return; 187 return;
188 pmd = pmd_offset(pgd_offset(mm, ea), ea); 188 pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
189 if (!pmd_none(*pmd)) 189 if (!pmd_none(*pmd))
190 add_hash_page(mm->context.id, ea, pmd_val(*pmd)); 190 add_hash_page(mm->context.id, ea, pmd_val(*pmd));
191} 191}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 224e960650a0..304375a73574 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -198,12 +198,6 @@ void slb_initialize(void)
198 static int slb_encoding_inited; 198 static int slb_encoding_inited;
199 extern unsigned int *slb_miss_kernel_load_linear; 199 extern unsigned int *slb_miss_kernel_load_linear;
200 extern unsigned int *slb_miss_kernel_load_io; 200 extern unsigned int *slb_miss_kernel_load_io;
201#ifdef CONFIG_HUGETLB_PAGE
202 extern unsigned int *slb_miss_user_load_huge;
203 unsigned long huge_llp;
204
205 huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
206#endif
207 201
208 /* Prepare our SLB miss handler based on our page size */ 202 /* Prepare our SLB miss handler based on our page size */
209 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; 203 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -220,11 +214,6 @@ void slb_initialize(void)
220 214
221 DBG("SLB: linear LLP = %04x\n", linear_llp); 215 DBG("SLB: linear LLP = %04x\n", linear_llp);
222 DBG("SLB: io LLP = %04x\n", io_llp); 216 DBG("SLB: io LLP = %04x\n", io_llp);
223#ifdef CONFIG_HUGETLB_PAGE
224 patch_slb_encoding(slb_miss_user_load_huge,
225 SLB_VSID_USER | huge_llp);
226 DBG("SLB: huge LLP = %04x\n", huge_llp);
227#endif
228 } 217 }
229 218
230 get_paca()->stab_rr = SLB_NUM_BOLTED; 219 get_paca()->stab_rr = SLB_NUM_BOLTED;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b10e4707d7c1..cd1a93d4948c 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io)
82 srdi. r9,r10,USER_ESID_BITS 82 srdi. r9,r10,USER_ESID_BITS
83 bne- 8f /* invalid ea bits set */ 83 bne- 8f /* invalid ea bits set */
84 84
85 /* Figure out if the segment contains huge pages */ 85
86#ifdef CONFIG_HUGETLB_PAGE 86 /* when using slices, we extract the psize off the slice bitmaps
87BEGIN_FTR_SECTION 87 * and then we need to get the sllp encoding off the mmu_psize_defs
88 b 1f 88 * array.
89END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) 89 *
90 * XXX This is a bit inefficient especially for the normal case,
91 * so we should try to implement a fast path for the standard page
92 * size using the old sllp value so we avoid the array. We cannot
93 * really do dynamic patching unfortunately as processes might flip
94 * between 4k and 64k standard page size
95 */
96#ifdef CONFIG_PPC_MM_SLICES
90 cmpldi r10,16 97 cmpldi r10,16
91 98
92 lhz r9,PACALOWHTLBAREAS(r13) 99 /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
93 mr r11,r10 100 ld r9,PACALOWSLICESPSIZE(r13)
101 sldi r11,r10,2
94 blt 5f 102 blt 5f
103 ld r9,PACAHIGHSLICEPSIZE(r13)
104 srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
105 andi. r11,r11,0x3c
95 106
96 lhz r9,PACAHIGHHTLBAREAS(r13) 1075: /* Extract the psize and multiply to get an array offset */
97 srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) 108 srd r9,r9,r11
98 109 andi. r9,r9,0xf
995: srd r9,r9,r11 110 mulli r9,r9,MMUPSIZEDEFSIZE
100 andi. r9,r9,1
101 beq 1f
102_GLOBAL(slb_miss_user_load_huge)
103 li r11,0
104 b 2f
1051:
106#endif /* CONFIG_HUGETLB_PAGE */
107 111
112 /* Now get to the array and obtain the sllp
113 */
114 ld r11,PACATOC(r13)
115 ld r11,mmu_psize_defs@got(r11)
116 add r11,r11,r9
117 ld r11,MMUPSIZESLLP(r11)
118 ori r11,r11,SLB_VSID_USER
119#else
120 /* paca context sllp already contains the SLB_VSID_USER bits */
108 lhz r11,PACACONTEXTSLLP(r13) 121 lhz r11,PACACONTEXTSLLP(r13)
1092: 122#endif /* CONFIG_PPC_MM_SLICES */
123
110 ld r9,PACACONTEXTID(r13) 124 ld r9,PACACONTEXTID(r13)
111 rldimi r10,r9,USER_ESID_BITS,0 125 rldimi r10,r9,USER_ESID_BITS,0
112 b slb_finish_load 126 b slb_finish_load
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
new file mode 100644
index 000000000000..f833dba2a028
--- /dev/null
+++ b/arch/powerpc/mm/slice.c
@@ -0,0 +1,633 @@
1/*
2 * address space "slices" (meta-segments) support
3 *
4 * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
5 *
6 * Based on hugetlb implementation
7 *
8 * Copyright (C) 2003 David Gibson, IBM Corporation.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#undef DEBUG
26
27#include <linux/kernel.h>
28#include <linux/mm.h>
29#include <linux/pagemap.h>
30#include <linux/err.h>
31#include <linux/spinlock.h>
32#include <linux/module.h>
33#include <asm/mman.h>
34#include <asm/mmu.h>
35#include <asm/spu.h>
36
37static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED;
38
39
40#ifdef DEBUG
41int _slice_debug = 1;
42
43static void slice_print_mask(const char *label, struct slice_mask mask)
44{
45 char *p, buf[16 + 3 + 16 + 1];
46 int i;
47
48 if (!_slice_debug)
49 return;
50 p = buf;
51 for (i = 0; i < SLICE_NUM_LOW; i++)
52 *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
53 *(p++) = ' ';
54 *(p++) = '-';
55 *(p++) = ' ';
56 for (i = 0; i < SLICE_NUM_HIGH; i++)
57 *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
58 *(p++) = 0;
59
60 printk(KERN_DEBUG "%s:%s\n", label, buf);
61}
62
63#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
64
65#else
66
67static void slice_print_mask(const char *label, struct slice_mask mask) {}
68#define slice_dbg(fmt...)
69
70#endif
71
72static struct slice_mask slice_range_to_mask(unsigned long start,
73 unsigned long len)
74{
75 unsigned long end = start + len - 1;
76 struct slice_mask ret = { 0, 0 };
77
78 if (start < SLICE_LOW_TOP) {
79 unsigned long mend = min(end, SLICE_LOW_TOP);
80 unsigned long mstart = min(start, SLICE_LOW_TOP);
81
82 ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
83 - (1u << GET_LOW_SLICE_INDEX(mstart));
84 }
85
86 if ((start + len) > SLICE_LOW_TOP)
87 ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
88 - (1u << GET_HIGH_SLICE_INDEX(start));
89
90 return ret;
91}
92
93static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
94 unsigned long len)
95{
96 struct vm_area_struct *vma;
97
98 if ((mm->task_size - len) < addr)
99 return 0;
100 vma = find_vma(mm, addr);
101 return (!vma || (addr + len) <= vma->vm_start);
102}
103
104static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
105{
106 return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
107 1ul << SLICE_LOW_SHIFT);
108}
109
110static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
111{
112 unsigned long start = slice << SLICE_HIGH_SHIFT;
113 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
114
115 /* Hack, so that each addresses is controlled by exactly one
116 * of the high or low area bitmaps, the first high area starts
117 * at 4GB, not 0 */
118 if (start == 0)
119 start = SLICE_LOW_TOP;
120
121 return !slice_area_is_free(mm, start, end - start);
122}
123
124static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
125{
126 struct slice_mask ret = { 0, 0 };
127 unsigned long i;
128
129 for (i = 0; i < SLICE_NUM_LOW; i++)
130 if (!slice_low_has_vma(mm, i))
131 ret.low_slices |= 1u << i;
132
133 if (mm->task_size <= SLICE_LOW_TOP)
134 return ret;
135
136 for (i = 0; i < SLICE_NUM_HIGH; i++)
137 if (!slice_high_has_vma(mm, i))
138 ret.high_slices |= 1u << i;
139
140 return ret;
141}
142
143static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
144{
145 struct slice_mask ret = { 0, 0 };
146 unsigned long i;
147 u64 psizes;
148
149 psizes = mm->context.low_slices_psize;
150 for (i = 0; i < SLICE_NUM_LOW; i++)
151 if (((psizes >> (i * 4)) & 0xf) == psize)
152 ret.low_slices |= 1u << i;
153
154 psizes = mm->context.high_slices_psize;
155 for (i = 0; i < SLICE_NUM_HIGH; i++)
156 if (((psizes >> (i * 4)) & 0xf) == psize)
157 ret.high_slices |= 1u << i;
158
159 return ret;
160}
161
162static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
163{
164 return (mask.low_slices & available.low_slices) == mask.low_slices &&
165 (mask.high_slices & available.high_slices) == mask.high_slices;
166}
167
168static void slice_flush_segments(void *parm)
169{
170 struct mm_struct *mm = parm;
171 unsigned long flags;
172
173 if (mm != current->active_mm)
174 return;
175
176 /* update the paca copy of the context struct */
177 get_paca()->context = current->active_mm->context;
178
179 local_irq_save(flags);
180 slb_flush_and_rebolt();
181 local_irq_restore(flags);
182}
183
184static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
185{
186 /* Write the new slice psize bits */
187 u64 lpsizes, hpsizes;
188 unsigned long i, flags;
189
190 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
191 slice_print_mask(" mask", mask);
192
193 /* We need to use a spinlock here to protect against
194 * concurrent 64k -> 4k demotion ...
195 */
196 spin_lock_irqsave(&slice_convert_lock, flags);
197
198 lpsizes = mm->context.low_slices_psize;
199 for (i = 0; i < SLICE_NUM_LOW; i++)
200 if (mask.low_slices & (1u << i))
201 lpsizes = (lpsizes & ~(0xful << (i * 4))) |
202 (((unsigned long)psize) << (i * 4));
203
204 hpsizes = mm->context.high_slices_psize;
205 for (i = 0; i < SLICE_NUM_HIGH; i++)
206 if (mask.high_slices & (1u << i))
207 hpsizes = (hpsizes & ~(0xful << (i * 4))) |
208 (((unsigned long)psize) << (i * 4));
209
210 mm->context.low_slices_psize = lpsizes;
211 mm->context.high_slices_psize = hpsizes;
212
213 slice_dbg(" lsps=%lx, hsps=%lx\n",
214 mm->context.low_slices_psize,
215 mm->context.high_slices_psize);
216
217 spin_unlock_irqrestore(&slice_convert_lock, flags);
218 mb();
219
220 /* XXX this is sub-optimal but will do for now */
221 on_each_cpu(slice_flush_segments, mm, 0, 1);
222#ifdef CONFIG_SPU_BASE
223 spu_flush_all_slbs(mm);
224#endif
225}
226
227static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
228 unsigned long len,
229 struct slice_mask available,
230 int psize, int use_cache)
231{
232 struct vm_area_struct *vma;
233 unsigned long start_addr, addr;
234 struct slice_mask mask;
235 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
236
237 if (use_cache) {
238 if (len <= mm->cached_hole_size) {
239 start_addr = addr = TASK_UNMAPPED_BASE;
240 mm->cached_hole_size = 0;
241 } else
242 start_addr = addr = mm->free_area_cache;
243 } else
244 start_addr = addr = TASK_UNMAPPED_BASE;
245
246full_search:
247 for (;;) {
248 addr = _ALIGN_UP(addr, 1ul << pshift);
249 if ((TASK_SIZE - len) < addr)
250 break;
251 vma = find_vma(mm, addr);
252 BUG_ON(vma && (addr >= vma->vm_end));
253
254 mask = slice_range_to_mask(addr, len);
255 if (!slice_check_fit(mask, available)) {
256 if (addr < SLICE_LOW_TOP)
257 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
258 else
259 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
260 continue;
261 }
262 if (!vma || addr + len <= vma->vm_start) {
263 /*
264 * Remember the place where we stopped the search:
265 */
266 if (use_cache)
267 mm->free_area_cache = addr + len;
268 return addr;
269 }
270 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
271 mm->cached_hole_size = vma->vm_start - addr;
272 addr = vma->vm_end;
273 }
274
275 /* Make sure we didn't miss any holes */
276 if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
277 start_addr = addr = TASK_UNMAPPED_BASE;
278 mm->cached_hole_size = 0;
279 goto full_search;
280 }
281 return -ENOMEM;
282}
283
284static unsigned long slice_find_area_topdown(struct mm_struct *mm,
285 unsigned long len,
286 struct slice_mask available,
287 int psize, int use_cache)
288{
289 struct vm_area_struct *vma;
290 unsigned long addr;
291 struct slice_mask mask;
292 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
293
294 /* check if free_area_cache is useful for us */
295 if (use_cache) {
296 if (len <= mm->cached_hole_size) {
297 mm->cached_hole_size = 0;
298 mm->free_area_cache = mm->mmap_base;
299 }
300
301 /* either no address requested or can't fit in requested
302 * address hole
303 */
304 addr = mm->free_area_cache;
305
306 /* make sure it can fit in the remaining address space */
307 if (addr > len) {
308 addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
309 mask = slice_range_to_mask(addr, len);
310 if (slice_check_fit(mask, available) &&
311 slice_area_is_free(mm, addr, len))
312 /* remember the address as a hint for
313 * next time
314 */
315 return (mm->free_area_cache = addr);
316 }
317 }
318
319 addr = mm->mmap_base;
320 while (addr > len) {
321 /* Go down by chunk size */
322 addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
323
324 /* Check for hit with different page size */
325 mask = slice_range_to_mask(addr, len);
326 if (!slice_check_fit(mask, available)) {
327 if (addr < SLICE_LOW_TOP)
328 addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
329 else if (addr < (1ul << SLICE_HIGH_SHIFT))
330 addr = SLICE_LOW_TOP;
331 else
332 addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
333 continue;
334 }
335
336 /*
337 * Lookup failure means no vma is above this address,
338 * else if new region fits below vma->vm_start,
339 * return with success:
340 */
341 vma = find_vma(mm, addr);
342 if (!vma || (addr + len) <= vma->vm_start) {
343 /* remember the address as a hint for next time */
344 if (use_cache)
345 mm->free_area_cache = addr;
346 return addr;
347 }
348
349 /* remember the largest hole we saw so far */
350 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
351 mm->cached_hole_size = vma->vm_start - addr;
352
353 /* try just below the current vma->vm_start */
354 addr = vma->vm_start;
355 }
356
357 /*
358 * A failed mmap() very likely causes application failure,
359 * so fall back to the bottom-up function here. This scenario
360 * can happen with large stack limits and large mmap()
361 * allocations.
362 */
363 addr = slice_find_area_bottomup(mm, len, available, psize, 0);
364
365 /*
366 * Restore the topdown base:
367 */
368 if (use_cache) {
369 mm->free_area_cache = mm->mmap_base;
370 mm->cached_hole_size = ~0UL;
371 }
372
373 return addr;
374}
375
376
377static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
378 struct slice_mask mask, int psize,
379 int topdown, int use_cache)
380{
381 if (topdown)
382 return slice_find_area_topdown(mm, len, mask, psize, use_cache);
383 else
384 return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
385}
386
387unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
388 unsigned long flags, unsigned int psize,
389 int topdown, int use_cache)
390{
391 struct slice_mask mask;
392 struct slice_mask good_mask;
393 struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
394 int pmask_set = 0;
395 int fixed = (flags & MAP_FIXED);
396 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
397 struct mm_struct *mm = current->mm;
398
399 /* Sanity checks */
400 BUG_ON(mm->task_size == 0);
401
402 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
403 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
404 addr, len, flags, topdown, use_cache);
405
406 if (len > mm->task_size)
407 return -ENOMEM;
408 if (fixed && (addr & ((1ul << pshift) - 1)))
409 return -EINVAL;
410 if (fixed && addr > (mm->task_size - len))
411 return -EINVAL;
412
413 /* If hint, make sure it matches our alignment restrictions */
414 if (!fixed && addr) {
415 addr = _ALIGN_UP(addr, 1ul << pshift);
416 slice_dbg(" aligned addr=%lx\n", addr);
417 }
418
419 /* First makeup a "good" mask of slices that have the right size
420 * already
421 */
422 good_mask = slice_mask_for_size(mm, psize);
423 slice_print_mask(" good_mask", good_mask);
424
425 /* First check hint if it's valid or if we have MAP_FIXED */
426 if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) {
427
428 /* Don't bother with hint if it overlaps a VMA */
429 if (!fixed && !slice_area_is_free(mm, addr, len))
430 goto search;
431
432 /* Build a mask for the requested range */
433 mask = slice_range_to_mask(addr, len);
434 slice_print_mask(" mask", mask);
435
436 /* Check if we fit in the good mask. If we do, we just return,
437 * nothing else to do
438 */
439 if (slice_check_fit(mask, good_mask)) {
440 slice_dbg(" fits good !\n");
441 return addr;
442 }
443
444 /* We don't fit in the good mask, check what other slices are
445 * empty and thus can be converted
446 */
447 potential_mask = slice_mask_for_free(mm);
448 potential_mask.low_slices |= good_mask.low_slices;
449 potential_mask.high_slices |= good_mask.high_slices;
450 pmask_set = 1;
451 slice_print_mask(" potential", potential_mask);
452 if (slice_check_fit(mask, potential_mask)) {
453 slice_dbg(" fits potential !\n");
454 goto convert;
455 }
456 }
457
458 /* If we have MAP_FIXED and failed the above step, then error out */
459 if (fixed)
460 return -EBUSY;
461
462 search:
463 slice_dbg(" search...\n");
464
465 /* Now let's see if we can find something in the existing slices
466 * for that size
467 */
468 addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache);
469 if (addr != -ENOMEM) {
470 /* Found within the good mask, we don't have to setup,
471 * we thus return directly
472 */
473 slice_dbg(" found area at 0x%lx\n", addr);
474 return addr;
475 }
476
477 /* Won't fit, check what can be converted */
478 if (!pmask_set) {
479 potential_mask = slice_mask_for_free(mm);
480 potential_mask.low_slices |= good_mask.low_slices;
481 potential_mask.high_slices |= good_mask.high_slices;
482 pmask_set = 1;
483 slice_print_mask(" potential", potential_mask);
484 }
485
486 /* Now let's see if we can find something in the existing slices
487 * for that size
488 */
489 addr = slice_find_area(mm, len, potential_mask, psize, topdown,
490 use_cache);
491 if (addr == -ENOMEM)
492 return -ENOMEM;
493
494 mask = slice_range_to_mask(addr, len);
495 slice_dbg(" found potential area at 0x%lx\n", addr);
496 slice_print_mask(" mask", mask);
497
498 convert:
499 slice_convert(mm, mask, psize);
500 return addr;
501
502}
503EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
504
505unsigned long arch_get_unmapped_area(struct file *filp,
506 unsigned long addr,
507 unsigned long len,
508 unsigned long pgoff,
509 unsigned long flags)
510{
511 return slice_get_unmapped_area(addr, len, flags,
512 current->mm->context.user_psize,
513 0, 1);
514}
515
516unsigned long arch_get_unmapped_area_topdown(struct file *filp,
517 const unsigned long addr0,
518 const unsigned long len,
519 const unsigned long pgoff,
520 const unsigned long flags)
521{
522 return slice_get_unmapped_area(addr0, len, flags,
523 current->mm->context.user_psize,
524 1, 1);
525}
526
527unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
528{
529 u64 psizes;
530 int index;
531
532 if (addr < SLICE_LOW_TOP) {
533 psizes = mm->context.low_slices_psize;
534 index = GET_LOW_SLICE_INDEX(addr);
535 } else {
536 psizes = mm->context.high_slices_psize;
537 index = GET_HIGH_SLICE_INDEX(addr);
538 }
539
540 return (psizes >> (index * 4)) & 0xf;
541}
542EXPORT_SYMBOL_GPL(get_slice_psize);
543
544/*
545 * This is called by hash_page when it needs to do a lazy conversion of
546 * an address space from real 64K pages to combo 4K pages (typically
547 * when hitting a non cacheable mapping on a processor or hypervisor
548 * that won't allow them for 64K pages).
549 *
550 * This is also called in init_new_context() to change back the user
551 * psize from whatever the parent context had it set to
552 *
553 * This function will only change the content of the {low,high)_slice_psize
554 * masks, it will not flush SLBs as this shall be handled lazily by the
555 * caller.
556 */
557void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
558{
559 unsigned long flags, lpsizes, hpsizes;
560 unsigned int old_psize;
561 int i;
562
563 slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
564
565 spin_lock_irqsave(&slice_convert_lock, flags);
566
567 old_psize = mm->context.user_psize;
568 slice_dbg(" old_psize=%d\n", old_psize);
569 if (old_psize == psize)
570 goto bail;
571
572 mm->context.user_psize = psize;
573 wmb();
574
575 lpsizes = mm->context.low_slices_psize;
576 for (i = 0; i < SLICE_NUM_LOW; i++)
577 if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
578 lpsizes = (lpsizes & ~(0xful << (i * 4))) |
579 (((unsigned long)psize) << (i * 4));
580
581 hpsizes = mm->context.high_slices_psize;
582 for (i = 0; i < SLICE_NUM_HIGH; i++)
583 if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
584 hpsizes = (hpsizes & ~(0xful << (i * 4))) |
585 (((unsigned long)psize) << (i * 4));
586
587 mm->context.low_slices_psize = lpsizes;
588 mm->context.high_slices_psize = hpsizes;
589
590 slice_dbg(" lsps=%lx, hsps=%lx\n",
591 mm->context.low_slices_psize,
592 mm->context.high_slices_psize);
593
594 bail:
595 spin_unlock_irqrestore(&slice_convert_lock, flags);
596}
597
598/*
599 * is_hugepage_only_range() is used by generic code to verify wether
600 * a normal mmap mapping (non hugetlbfs) is valid on a given area.
601 *
602 * until the generic code provides a more generic hook and/or starts
603 * calling arch get_unmapped_area for MAP_FIXED (which our implementation
604 * here knows how to deal with), we hijack it to keep standard mappings
605 * away from us.
606 *
607 * because of that generic code limitation, MAP_FIXED mapping cannot
608 * "convert" back a slice with no VMAs to the standard page size, only
609 * get_unmapped_area() can. It would be possible to fix it here but I
610 * prefer working on fixing the generic code instead.
611 *
612 * WARNING: This will not work if hugetlbfs isn't enabled since the
613 * generic code will redefine that function as 0 in that. This is ok
614 * for now as we only use slices with hugetlbfs enabled. This should
615 * be fixed as the generic code gets fixed.
616 */
617int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
618 unsigned long len)
619{
620 struct slice_mask mask, available;
621
622 mask = slice_range_to_mask(addr, len);
623 available = slice_mask_for_size(mm, mm->context.user_psize);
624
625#if 0 /* too verbose */
626 slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
627 mm, addr, len);
628 slice_print_mask(" mask", mask);
629 slice_print_mask(" available", available);
630#endif
631 return !slice_check_fit(mask, available);
632}
633
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index 925ff70be8ba..6a69417cbc0e 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
111 if (start >= end) 111 if (start >= end)
112 return; 112 return;
113 end = (end - 1) | ~PAGE_MASK; 113 end = (end - 1) | ~PAGE_MASK;
114 pmd = pmd_offset(pgd_offset(mm, start), start); 114 pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
115 for (;;) { 115 for (;;) {
116 pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; 116 pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
117 if (pmd_end > end) 117 if (pmd_end > end)
@@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
169 return; 169 return;
170 } 170 }
171 mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; 171 mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
172 pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); 172 pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
173 if (!pmd_none(*pmd)) 173 if (!pmd_none(*pmd))
174 flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); 174 flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
175 FINISH_FLUSH; 175 FINISH_FLUSH;
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index fd8d08c325eb..2bfc4d7e1aa2 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
143 */ 143 */
144 addr &= PAGE_MASK; 144 addr &= PAGE_MASK;
145 145
146 /* Get page size (maybe move back to caller) */ 146 /* Get page size (maybe move back to caller).
147 *
148 * NOTE: when using special 64K mappings in 4K environment like
149 * for SPEs, we obtain the page size from the slice, which thus
150 * must still exist (and thus the VMA not reused) at the time
151 * of this call
152 */
147 if (huge) { 153 if (huge) {
148#ifdef CONFIG_HUGETLB_PAGE 154#ifdef CONFIG_HUGETLB_PAGE
149 psize = mmu_huge_psize; 155 psize = mmu_huge_psize;
150#else 156#else
151 BUG(); 157 BUG();
152 psize = pte_pagesize_index(pte); /* shutup gcc */ 158 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
153#endif 159#endif
154 } else 160 } else
155 psize = pte_pagesize_index(pte); 161 psize = pte_pagesize_index(mm, addr, pte);
156 162
157 /* Build full vaddr */ 163 /* Build full vaddr */
158 if (!is_kernel_addr(addr)) { 164 if (!is_kernel_addr(addr)) {
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 7ef0c6854799..ba55b0ff0f74 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -15,8 +15,8 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/delay.h> 16#include <linux/delay.h>
17 17
18#include <asm/pgtable.h>
19#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/pgtable.h>
20#include <asm/pci-bridge.h> 20#include <asm/pci-bridge.h>
21#include <asm-powerpc/mpic.h> 21#include <asm-powerpc/mpic.h>
22#include <asm/mpc86xx.h> 22#include <asm/mpc86xx.h>
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 82551770917c..9b2b386ccf48 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,21 @@ config SPU_FS
35 Units on machines implementing the Broadband Processor 35 Units on machines implementing the Broadband Processor
36 Architecture. 36 Architecture.
37 37
38config SPU_FS_64K_LS
39 bool "Use 64K pages to map SPE local store"
40 # we depend on PPC_MM_SLICES for now rather than selecting
41 # it because we depend on hugetlbfs hooks being present. We
42 # will fix that when the generic code has been improved to
43 # not require hijacking hugetlbfs hooks.
44 depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
45 default y
46 select PPC_HAS_HASH_64K
47 help
48 This option causes SPE local stores to be mapped in process
49 address spaces using 64K pages while the rest of the kernel
50 uses 4K pages. This can improve performances of applications
51 using multiple SPEs by lowering the TLB pressure on them.
52
38config SPU_BASE 53config SPU_BASE
39 bool 54 bool
40 default n 55 default n
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index fec51525252e..a7f5a7653c62 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -144,12 +144,11 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
144 144
145 switch(REGION_ID(ea)) { 145 switch(REGION_ID(ea)) {
146 case USER_REGION_ID: 146 case USER_REGION_ID:
147#ifdef CONFIG_HUGETLB_PAGE 147#ifdef CONFIG_PPC_MM_SLICES
148 if (in_hugepage_area(mm->context, ea)) 148 psize = get_slice_psize(mm, ea);
149 psize = mmu_huge_psize; 149#else
150 else 150 psize = mm->context.user_psize;
151#endif 151#endif
152 psize = mm->context.user_psize;
153 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | 152 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
154 SLB_VSID_USER; 153 SLB_VSID_USER;
155 break; 154 break;
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index 2cd89c11af5a..328afcf89503 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
1obj-y += switch.o fault.o 1obj-y += switch.o fault.o lscsa_alloc.o
2 2
3obj-$(CONFIG_SPU_FS) += spufs.o 3obj-$(CONFIG_SPU_FS) += spufs.o
4spufs-y += inode.o file.o context.o syscalls.o coredump.o 4spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index a87d9ca3dba2..8654749e317b 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -36,10 +36,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
36 /* Binding to physical processor deferred 36 /* Binding to physical processor deferred
37 * until spu_activate(). 37 * until spu_activate().
38 */ 38 */
39 spu_init_csa(&ctx->csa); 39 if (spu_init_csa(&ctx->csa))
40 if (!ctx->csa.lscsa) {
41 goto out_free; 40 goto out_free;
42 }
43 spin_lock_init(&ctx->mmio_lock); 41 spin_lock_init(&ctx->mmio_lock);
44 spin_lock_init(&ctx->mapping_lock); 42 spin_lock_init(&ctx->mapping_lock);
45 kref_init(&ctx->kref); 43 kref_init(&ctx->kref);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index d010b2464a98..45614c73c784 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -118,14 +118,32 @@ spufs_mem_write(struct file *file, const char __user *buffer,
118static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, 118static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
119 unsigned long address) 119 unsigned long address)
120{ 120{
121 struct spu_context *ctx = vma->vm_file->private_data; 121 struct spu_context *ctx = vma->vm_file->private_data;
122 unsigned long pfn, offset = address - vma->vm_start; 122 unsigned long pfn, offset, addr0 = address;
123 123#ifdef CONFIG_SPU_FS_64K_LS
124 offset += vma->vm_pgoff << PAGE_SHIFT; 124 struct spu_state *csa = &ctx->csa;
125 int psize;
126
127 /* Check what page size we are using */
128 psize = get_slice_psize(vma->vm_mm, address);
129
130 /* Some sanity checking */
131 BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
132
133 /* Wow, 64K, cool, we need to align the address though */
134 if (csa->use_big_pages) {
135 BUG_ON(vma->vm_start & 0xffff);
136 address &= ~0xfffful;
137 }
138#endif /* CONFIG_SPU_FS_64K_LS */
125 139
140 offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
126 if (offset >= LS_SIZE) 141 if (offset >= LS_SIZE)
127 return NOPFN_SIGBUS; 142 return NOPFN_SIGBUS;
128 143
144 pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n",
145 addr0, address, offset);
146
129 spu_acquire(ctx); 147 spu_acquire(ctx);
130 148
131 if (ctx->state == SPU_STATE_SAVED) { 149 if (ctx->state == SPU_STATE_SAVED) {
@@ -149,9 +167,24 @@ static struct vm_operations_struct spufs_mem_mmap_vmops = {
149 .nopfn = spufs_mem_mmap_nopfn, 167 .nopfn = spufs_mem_mmap_nopfn,
150}; 168};
151 169
152static int 170static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
153spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) 171{
154{ 172#ifdef CONFIG_SPU_FS_64K_LS
173 struct spu_context *ctx = file->private_data;
174 struct spu_state *csa = &ctx->csa;
175
176 /* Sanity check VMA alignment */
177 if (csa->use_big_pages) {
178 pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
179 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
180 vma->vm_pgoff);
181 if (vma->vm_start & 0xffff)
182 return -EINVAL;
183 if (vma->vm_pgoff & 0xf)
184 return -EINVAL;
185 }
186#endif /* CONFIG_SPU_FS_64K_LS */
187
155 if (!(vma->vm_flags & VM_SHARED)) 188 if (!(vma->vm_flags & VM_SHARED))
156 return -EINVAL; 189 return -EINVAL;
157 190
@@ -163,13 +196,34 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
163 return 0; 196 return 0;
164} 197}
165 198
199#ifdef CONFIG_SPU_FS_64K_LS
200unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr,
201 unsigned long len, unsigned long pgoff,
202 unsigned long flags)
203{
204 struct spu_context *ctx = file->private_data;
205 struct spu_state *csa = &ctx->csa;
206
207 /* If not using big pages, fallback to normal MM g_u_a */
208 if (!csa->use_big_pages)
209 return current->mm->get_unmapped_area(file, addr, len,
210 pgoff, flags);
211
212 /* Else, try to obtain a 64K pages slice */
213 return slice_get_unmapped_area(addr, len, flags,
214 MMU_PAGE_64K, 1, 0);
215}
216#endif /* CONFIG_SPU_FS_64K_LS */
217
166static const struct file_operations spufs_mem_fops = { 218static const struct file_operations spufs_mem_fops = {
167 .open = spufs_mem_open, 219 .open = spufs_mem_open,
168 .release = spufs_mem_release, 220 .read = spufs_mem_read,
169 .read = spufs_mem_read, 221 .write = spufs_mem_write,
170 .write = spufs_mem_write, 222 .llseek = generic_file_llseek,
171 .llseek = generic_file_llseek, 223 .mmap = spufs_mem_mmap,
172 .mmap = spufs_mem_mmap, 224#ifdef CONFIG_SPU_FS_64K_LS
225 .get_unmapped_area = spufs_get_unmapped_area,
226#endif
173}; 227};
174 228
175static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, 229static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000000..f4b3c052dabf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,181 @@
1/*
2 * SPU local store allocation routines
3 *
4 * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#undef DEBUG
22
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/vmalloc.h>
26
27#include <asm/spu.h>
28#include <asm/spu_csa.h>
29#include <asm/mmu.h>
30
31static int spu_alloc_lscsa_std(struct spu_state *csa)
32{
33 struct spu_lscsa *lscsa;
34 unsigned char *p;
35
36 lscsa = vmalloc(sizeof(struct spu_lscsa));
37 if (!lscsa)
38 return -ENOMEM;
39 memset(lscsa, 0, sizeof(struct spu_lscsa));
40 csa->lscsa = lscsa;
41
42 /* Set LS pages reserved to allow for user-space mapping. */
43 for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
44 SetPageReserved(vmalloc_to_page(p));
45
46 return 0;
47}
48
49static void spu_free_lscsa_std(struct spu_state *csa)
50{
51 /* Clear reserved bit before vfree. */
52 unsigned char *p;
53
54 if (csa->lscsa == NULL)
55 return;
56
57 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
58 ClearPageReserved(vmalloc_to_page(p));
59
60 vfree(csa->lscsa);
61}
62
63#ifdef CONFIG_SPU_FS_64K_LS
64
65#define SPU_64K_PAGE_SHIFT 16
66#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
67#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
68
69int spu_alloc_lscsa(struct spu_state *csa)
70{
71 struct page **pgarray;
72 unsigned char *p;
73 int i, j, n_4k;
74
75 /* Check availability of 64K pages */
76 if (mmu_psize_defs[MMU_PAGE_64K].shift == 0)
77 goto fail;
78
79 csa->use_big_pages = 1;
80
81 pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
82 csa);
83
84 /* First try to allocate our 64K pages. We need 5 of them
85 * with the current implementation. In the future, we should try
86 * to separate the lscsa with the actual local store image, thus
87 * allowing us to require only 4 64K pages per context
88 */
89 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
90 /* XXX This is likely to fail, we should use a special pool
91 * similiar to what hugetlbfs does.
92 */
93 csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
94 SPU_64K_PAGE_ORDER);
95 if (csa->lscsa_pages[i] == NULL)
96 goto fail;
97 }
98
99 pr_debug(" success ! creating vmap...\n");
100
101 /* Now we need to create a vmalloc mapping of these for the kernel
102 * and SPU context switch code to use. Currently, we stick to a
103 * normal kernel vmalloc mapping, which in our case will be 4K
104 */
105 n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
106 pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
107 if (pgarray == NULL)
108 goto fail;
109 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
110 for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
111 /* We assume all the struct page's are contiguous
112 * which should be hopefully the case for an order 4
113 * allocation..
114 */
115 pgarray[i * SPU_64K_PAGE_COUNT + j] =
116 csa->lscsa_pages[i] + j;
117 csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
118 kfree(pgarray);
119 if (csa->lscsa == NULL)
120 goto fail;
121
122 memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
123
124 /* Set LS pages reserved to allow for user-space mapping.
125 *
126 * XXX isn't that a bit obsolete ? I think we should just
127 * make sure the page count is high enough. Anyway, won't harm
128 * for now
129 */
130 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
131 SetPageReserved(vmalloc_to_page(p));
132
133 pr_debug(" all good !\n");
134
135 return 0;
136fail:
137 pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
138 spu_free_lscsa(csa);
139 return spu_alloc_lscsa_std(csa);
140}
141
142void spu_free_lscsa(struct spu_state *csa)
143{
144 unsigned char *p;
145 int i;
146
147 if (!csa->use_big_pages) {
148 spu_free_lscsa_std(csa);
149 return;
150 }
151 csa->use_big_pages = 0;
152
153 if (csa->lscsa == NULL)
154 goto free_pages;
155
156 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
157 ClearPageReserved(vmalloc_to_page(p));
158
159 vunmap(csa->lscsa);
160 csa->lscsa = NULL;
161
162 free_pages:
163
164 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
165 if (csa->lscsa_pages[i])
166 __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
167}
168
169#else /* CONFIG_SPU_FS_64K_LS */
170
171int spu_alloc_lscsa(struct spu_state *csa)
172{
173 return spu_alloc_lscsa_std(csa);
174}
175
176void spu_free_lscsa(struct spu_state *csa)
177{
178 spu_free_lscsa_std(csa);
179}
180
181#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 29dc59cefc38..71a0b41adb8c 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2188,40 +2188,30 @@ static void init_priv2(struct spu_state *csa)
2188 * as it is by far the largest of the context save regions, 2188 * as it is by far the largest of the context save regions,
2189 * and may need to be pinned or otherwise specially aligned. 2189 * and may need to be pinned or otherwise specially aligned.
2190 */ 2190 */
2191void spu_init_csa(struct spu_state *csa) 2191int spu_init_csa(struct spu_state *csa)
2192{ 2192{
2193 struct spu_lscsa *lscsa; 2193 int rc;
2194 unsigned char *p;
2195 2194
2196 if (!csa) 2195 if (!csa)
2197 return; 2196 return -EINVAL;
2198 memset(csa, 0, sizeof(struct spu_state)); 2197 memset(csa, 0, sizeof(struct spu_state));
2199 2198
2200 lscsa = vmalloc(sizeof(struct spu_lscsa)); 2199 rc = spu_alloc_lscsa(csa);
2201 if (!lscsa) 2200 if (rc)
2202 return; 2201 return rc;
2203 2202
2204 memset(lscsa, 0, sizeof(struct spu_lscsa));
2205 csa->lscsa = lscsa;
2206 spin_lock_init(&csa->register_lock); 2203 spin_lock_init(&csa->register_lock);
2207 2204
2208 /* Set LS pages reserved to allow for user-space mapping. */
2209 for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
2210 SetPageReserved(vmalloc_to_page(p));
2211
2212 init_prob(csa); 2205 init_prob(csa);
2213 init_priv1(csa); 2206 init_priv1(csa);
2214 init_priv2(csa); 2207 init_priv2(csa);
2208
2209 return 0;
2215} 2210}
2216EXPORT_SYMBOL_GPL(spu_init_csa); 2211EXPORT_SYMBOL_GPL(spu_init_csa);
2217 2212
2218void spu_fini_csa(struct spu_state *csa) 2213void spu_fini_csa(struct spu_state *csa)
2219{ 2214{
2220 /* Clear reserved bit before vfree. */ 2215 spu_free_lscsa(csa);
2221 unsigned char *p;
2222 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
2223 ClearPageReserved(vmalloc_to_page(p));
2224
2225 vfree(csa->lscsa);
2226} 2216}
2227EXPORT_SYMBOL_GPL(spu_fini_csa); 2217EXPORT_SYMBOL_GPL(spu_fini_csa);
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index 46c3a8e7c3a8..761d9e971fc4 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -7,7 +7,9 @@ menu "iSeries device drivers"
7 depends on PPC_ISERIES 7 depends on PPC_ISERIES
8 8
9config VIOCONS 9config VIOCONS
10 tristate "iSeries Virtual Console Support (Obsolete)" 10 bool "iSeries Virtual Console Support (Obsolete)"
11 depends on !HVC_ISERIES
12 default n
11 help 13 help
12 This is the old virtual console driver for legacy iSeries. 14 This is the old virtual console driver for legacy iSeries.
13 You should use the iSeries Hypervisor Virtual Console 15 You should use the iSeries Hypervisor Virtual Console
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 63e23062e982..093438b93bd9 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -100,6 +100,9 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
100static DEFINE_SPINLOCK(slot_errbuf_lock); 100static DEFINE_SPINLOCK(slot_errbuf_lock);
101static int eeh_error_buf_size; 101static int eeh_error_buf_size;
102 102
103#define EEH_PCI_REGS_LOG_LEN 4096
104static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
105
103/* System monitoring statistics */ 106/* System monitoring statistics */
104static unsigned long no_device; 107static unsigned long no_device;
105static unsigned long no_dn; 108static unsigned long no_dn;
@@ -115,7 +118,8 @@ static unsigned long slot_resets;
115/* --------------------------------------------------------------- */ 118/* --------------------------------------------------------------- */
116/* Below lies the EEH event infrastructure */ 119/* Below lies the EEH event infrastructure */
117 120
118void eeh_slot_error_detail (struct pci_dn *pdn, int severity) 121static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
122 char *driver_log, size_t loglen)
119{ 123{
120 int config_addr; 124 int config_addr;
121 unsigned long flags; 125 unsigned long flags;
@@ -133,7 +137,8 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
133 rc = rtas_call(ibm_slot_error_detail, 137 rc = rtas_call(ibm_slot_error_detail,
134 8, 1, NULL, config_addr, 138 8, 1, NULL, config_addr,
135 BUID_HI(pdn->phb->buid), 139 BUID_HI(pdn->phb->buid),
136 BUID_LO(pdn->phb->buid), NULL, 0, 140 BUID_LO(pdn->phb->buid),
141 virt_to_phys(driver_log), loglen,
137 virt_to_phys(slot_errbuf), 142 virt_to_phys(slot_errbuf),
138 eeh_error_buf_size, 143 eeh_error_buf_size,
139 severity); 144 severity);
@@ -144,6 +149,84 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
144} 149}
145 150
146/** 151/**
152 * gather_pci_data - copy assorted PCI config space registers to buff
153 * @pdn: device to report data for
154 * @buf: point to buffer in which to log
155 * @len: amount of room in buffer
156 *
157 * This routine captures assorted PCI configuration space data,
158 * and puts them into a buffer for RTAS error logging.
159 */
160static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
161{
162 u32 cfg;
163 int cap, i;
164 int n = 0;
165
166 n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
167 printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
168
169 rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
170 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
171 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
172
173 rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
174 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
175 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
176
177 /* Dump out the PCI-X command and status regs */
178 cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_PCIX);
179 if (cap) {
180 rtas_read_config(pdn, cap, 4, &cfg);
181 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
182 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
183
184 rtas_read_config(pdn, cap+4, 4, &cfg);
185 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
186 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
187 }
188
189 /* If PCI-E capable, dump PCI-E cap 10, and the AER */
190 cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_EXP);
191 if (cap) {
192 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
193 printk(KERN_WARNING
194 "EEH: PCI-E capabilities and status follow:\n");
195
196 for (i=0; i<=8; i++) {
197 rtas_read_config(pdn, cap+4*i, 4, &cfg);
198 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
199 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
200 }
201
202 cap = pci_find_ext_capability(pdn->pcidev,PCI_EXT_CAP_ID_ERR);
203 if (cap) {
204 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
205 printk(KERN_WARNING
206 "EEH: PCI-E AER capability register set follows:\n");
207
208 for (i=0; i<14; i++) {
209 rtas_read_config(pdn, cap+4*i, 4, &cfg);
210 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
211 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
212 }
213 }
214 }
215 return n;
216}
217
218void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
219{
220 size_t loglen = 0;
221 memset(pci_regs_buf, 0, EEH_PCI_REGS_LOG_LEN);
222
223 rtas_pci_enable(pdn, EEH_THAW_MMIO);
224 loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
225
226 rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
227}
228
229/**
147 * read_slot_reset_state - Read the reset state of a device node's slot 230 * read_slot_reset_state - Read the reset state of a device node's slot
148 * @dn: device node to read 231 * @dn: device node to read
149 * @rets: array to return results in 232 * @rets: array to return results in
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 3170e003f76a..f07d849cfc84 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -361,11 +361,12 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
361 goto hard_fail; 361 goto hard_fail;
362 } 362 }
363 363
364 eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
365 printk(KERN_WARNING 364 printk(KERN_WARNING
366 "EEH: This PCI device has failed %d times since last reboot: " 365 "EEH: This PCI device has failed %d times in the last hour:\n",
367 "location=%s driver=%s pci addr=%s\n", 366 frozen_pdn->eeh_freeze_count);
368 frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); 367 printk(KERN_WARNING
368 "EEH: location=%s driver=%s pci addr=%s\n",
369 location, drv_str, pci_str);
369 370
370 /* Walk the various device drivers attached to this slot through 371 /* Walk the various device drivers attached to this slot through
371 * a reset sequence, giving each an opportunity to do what it needs 372 * a reset sequence, giving each an opportunity to do what it needs
@@ -375,6 +376,11 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
375 */ 376 */
376 pci_walk_bus(frozen_bus, eeh_report_error, &result); 377 pci_walk_bus(frozen_bus, eeh_report_error, &result);
377 378
379 /* Since rtas may enable MMIO when posting the error log,
380 * don't post the error log until after all dev drivers
381 * have been informed. */
382 eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
383
378 /* If all device drivers were EEH-unaware, then shut 384 /* If all device drivers were EEH-unaware, then shut
379 * down all of the device drivers, and hope they 385 * down all of the device drivers, and hope they
380 * go down willingly, without panicing the system. 386 * go down willingly, without panicing the system.
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 8a123c71449f..cad175724359 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -907,7 +907,7 @@ static int __init fs_enet_of_init(void)
907 struct fs_platform_info fs_enet_data; 907 struct fs_platform_info fs_enet_data;
908 const unsigned int *id; 908 const unsigned int *id;
909 const unsigned int *phy_addr; 909 const unsigned int *phy_addr;
910 void *mac_addr; 910 const void *mac_addr;
911 const phandle *ph; 911 const phandle *ph;
912 const char *model; 912 const char *model;
913 913
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 1e32fb834eb8..2df42fdcdc91 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -631,7 +631,8 @@ config HVC_CONSOLE
631 631
632config HVC_ISERIES 632config HVC_ISERIES
633 bool "iSeries Hypervisor Virtual Console support" 633 bool "iSeries Hypervisor Virtual Console support"
634 depends on PPC_ISERIES && !VIOCONS 634 depends on PPC_ISERIES
635 default y
635 select HVC_DRIVER 636 select HVC_DRIVER
636 help 637 help
637 iSeries machines support a hypervisor virtual console. 638 iSeries machines support a hypervisor virtual console.
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index 6739457d8bc0..e2ca55bcfe0b 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -350,10 +350,13 @@ typedef unsigned long mm_context_id_t;
350 350
351typedef struct { 351typedef struct {
352 mm_context_id_t id; 352 mm_context_id_t id;
353 u16 user_psize; /* page size index */ 353 u16 user_psize; /* page size index */
354 u16 sllp; /* SLB entry page size encoding */ 354
355#ifdef CONFIG_HUGETLB_PAGE 355#ifdef CONFIG_PPC_MM_SLICES
356 u16 low_htlb_areas, high_htlb_areas; 356 u64 low_slices_psize; /* SLB page size encodings */
357 u64 high_slices_psize; /* 4 bits per slice for now */
358#else
359 u16 sllp; /* SLB page size encoding */
357#endif 360#endif
358 unsigned long vdso_base; 361 unsigned long vdso_base;
359} mm_context_t; 362} mm_context_t;
diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h
index cf95274f735e..c6a5b1735666 100644
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@@ -83,8 +83,8 @@ struct paca_struct {
83 83
84 mm_context_t context; 84 mm_context_t context;
85 u16 vmalloc_sllp; 85 u16 vmalloc_sllp;
86 u16 slb_cache[SLB_CACHE_ENTRIES];
87 u16 slb_cache_ptr; 86 u16 slb_cache_ptr;
87 u16 slb_cache[SLB_CACHE_ENTRIES];
88 88
89 /* 89 /*
90 * then miscellaneous read-write fields 90 * then miscellaneous read-write fields
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
index eab779c21995..3448a3d4bc64 100644
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@@ -88,57 +88,55 @@ extern unsigned int HPAGE_SHIFT;
88 88
89#endif /* __ASSEMBLY__ */ 89#endif /* __ASSEMBLY__ */
90 90
91#ifdef CONFIG_HUGETLB_PAGE 91#ifdef CONFIG_PPC_MM_SLICES
92 92
93#define HTLB_AREA_SHIFT 40 93#define SLICE_LOW_SHIFT 28
94#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) 94#define SLICE_HIGH_SHIFT 40
95#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
96 95
97#define LOW_ESID_MASK(addr, len) \ 96#define SLICE_LOW_TOP (0x100000000ul)
98 (((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \ 97#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
99 - (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff) 98#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
100#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
101 - (1U << GET_HTLB_AREA(addr))) & 0xffff)
102 99
103#define ARCH_HAS_HUGEPAGE_ONLY_RANGE 100#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
104#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE 101#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
105#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
106#define ARCH_HAS_SETCLEAR_HUGE_PTE
107 102
108#define touches_hugepage_low_range(mm, addr, len) \ 103#ifndef __ASSEMBLY__
109 (((addr) < 0x100000000UL) \ 104
110 && (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)) 105struct slice_mask {
111#define touches_hugepage_high_range(mm, addr, len) \ 106 u16 low_slices;
112 ((((addr) + (len)) > 0x100000000UL) \ 107 u16 high_slices;
113 && (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)) 108};
114 109
115#define __within_hugepage_low_range(addr, len, segmask) \ 110struct mm_struct;
116 ( (((addr)+(len)) <= 0x100000000UL) \
117 && ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)))
118#define within_hugepage_low_range(addr, len) \
119 __within_hugepage_low_range((addr), (len), \
120 current->mm->context.low_htlb_areas)
121#define __within_hugepage_high_range(addr, len, zonemask) \
122 ( ((addr) >= 0x100000000UL) \
123 && ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)))
124#define within_hugepage_high_range(addr, len) \
125 __within_hugepage_high_range((addr), (len), \
126 current->mm->context.high_htlb_areas)
127
128#define is_hugepage_only_range(mm, addr, len) \
129 (touches_hugepage_high_range((mm), (addr), (len)) || \
130 touches_hugepage_low_range((mm), (addr), (len)))
131#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
132 111
133#define in_hugepage_area(context, addr) \ 112extern unsigned long slice_get_unmapped_area(unsigned long addr,
134 (cpu_has_feature(CPU_FTR_16M_PAGE) && \ 113 unsigned long len,
135 ( ( (addr) >= 0x100000000UL) \ 114 unsigned long flags,
136 ? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \ 115 unsigned int psize,
137 : ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) 116 int topdown,
117 int use_cache);
138 118
139#else /* !CONFIG_HUGETLB_PAGE */ 119extern unsigned int get_slice_psize(struct mm_struct *mm,
120 unsigned long addr);
140 121
141#define in_hugepage_area(mm, addr) 0 122extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
123extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
124
125#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
126extern int is_hugepage_only_range(struct mm_struct *m,
127 unsigned long addr,
128 unsigned long len);
129
130#endif /* __ASSEMBLY__ */
131#else
132#define slice_init()
133#endif /* CONFIG_PPC_MM_SLICES */
134
135#ifdef CONFIG_HUGETLB_PAGE
136
137#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
138#define ARCH_HAS_SETCLEAR_HUGE_PTE
139#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
142 140
143#endif /* !CONFIG_HUGETLB_PAGE */ 141#endif /* !CONFIG_HUGETLB_PAGE */
144 142
diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h
index 30b50cf56e2c..d9a3a8ca58a1 100644
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@@ -14,18 +14,11 @@
14 14
15extern struct kmem_cache *pgtable_cache[]; 15extern struct kmem_cache *pgtable_cache[];
16 16
17#ifdef CONFIG_PPC_64K_PAGES 17#define PGD_CACHE_NUM 0
18#define PTE_CACHE_NUM 0 18#define PUD_CACHE_NUM 1
19#define PMD_CACHE_NUM 1 19#define PMD_CACHE_NUM 1
20#define PGD_CACHE_NUM 2 20#define HUGEPTE_CACHE_NUM 2
21#define HUGEPTE_CACHE_NUM 3 21#define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */
22#else
23#define PTE_CACHE_NUM 0
24#define PMD_CACHE_NUM 1
25#define PUD_CACHE_NUM 1
26#define PGD_CACHE_NUM 0
27#define HUGEPTE_CACHE_NUM 2
28#endif
29 22
30static inline pgd_t *pgd_alloc(struct mm_struct *mm) 23static inline pgd_t *pgd_alloc(struct mm_struct *mm)
31{ 24{
@@ -91,8 +84,7 @@ static inline void pmd_free(pmd_t *pmd)
91static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 84static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
92 unsigned long address) 85 unsigned long address)
93{ 86{
94 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], 87 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
95 GFP_KERNEL|__GFP_REPEAT);
96} 88}
97 89
98static inline struct page *pte_alloc_one(struct mm_struct *mm, 90static inline struct page *pte_alloc_one(struct mm_struct *mm,
@@ -103,12 +95,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
103 95
104static inline void pte_free_kernel(pte_t *pte) 96static inline void pte_free_kernel(pte_t *pte)
105{ 97{
106 kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); 98 free_page((unsigned long)pte);
107} 99}
108 100
109static inline void pte_free(struct page *ptepage) 101static inline void pte_free(struct page *ptepage)
110{ 102{
111 pte_free_kernel(page_address(ptepage)); 103 __free_page(ptepage);
112} 104}
113 105
114#define PGF_CACHENUM_MASK 0x3 106#define PGF_CACHENUM_MASK 0x3
@@ -130,14 +122,17 @@ static inline void pgtable_free(pgtable_free_t pgf)
130 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); 122 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
131 int cachenum = pgf.val & PGF_CACHENUM_MASK; 123 int cachenum = pgf.val & PGF_CACHENUM_MASK;
132 124
133 kmem_cache_free(pgtable_cache[cachenum], p); 125 if (cachenum == PTE_NONCACHE_NUM)
126 free_page((unsigned long)p);
127 else
128 kmem_cache_free(pgtable_cache[cachenum], p);
134} 129}
135 130
136extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 131extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
137 132
138#define __pte_free_tlb(tlb, ptepage) \ 133#define __pte_free_tlb(tlb, ptepage) \
139 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ 134 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
140 PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) 135 PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
141#define __pmd_free_tlb(tlb, pmd) \ 136#define __pmd_free_tlb(tlb, pmd) \
142 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 137 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
143 PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) 138 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index 1744d6ac12a2..add5481fd7c7 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -80,7 +80,11 @@
80 80
81#define pte_iterate_hashed_end() } while(0) 81#define pte_iterate_hashed_end() } while(0)
82 82
83#define pte_pagesize_index(pte) MMU_PAGE_4K 83#ifdef CONFIG_PPC_HAS_HASH_64K
84#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr)
85#else
86#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K
87#endif
84 88
85/* 89/*
86 * 4-level page tables related bits 90 * 4-level page tables related bits
diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h
index 16ef4978520d..31cbd3d7fce8 100644
--- a/include/asm-powerpc/pgtable-64k.h
+++ b/include/asm-powerpc/pgtable-64k.h
@@ -35,6 +35,11 @@
35#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */ 35#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
36#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */ 36#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
37#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */ 37#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */
38
39/* Note the full page bits must be in the same location as for normal
40 * 4k pages as the same asssembly will be used to insert 64K pages
41 * wether the kernel has CONFIG_PPC_64K_PAGES or not
42 */
38#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ 43#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
39#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */ 44#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
40 45
@@ -88,7 +93,7 @@
88 93
89#define pte_iterate_hashed_end() } while(0); } } while(0) 94#define pte_iterate_hashed_end() } while(0); } } while(0)
90 95
91#define pte_pagesize_index(pte) \ 96#define pte_pagesize_index(mm, addr, pte) \
92 (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) 97 (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
93 98
94#define remap_4k_pfn(vma, addr, pfn, prot) \ 99#define remap_4k_pfn(vma, addr, pfn, prot) \
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h
index 02e56a6685a2..c48ae185c874 100644
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -235,6 +235,12 @@ struct spu_priv2_collapsed {
235 */ 235 */
236struct spu_state { 236struct spu_state {
237 struct spu_lscsa *lscsa; 237 struct spu_lscsa *lscsa;
238#ifdef CONFIG_SPU_FS_64K_LS
239 int use_big_pages;
240 /* One struct page per 64k page */
241#define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000)
242 struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES];
243#endif
238 struct spu_problem_collapsed prob; 244 struct spu_problem_collapsed prob;
239 struct spu_priv1_collapsed priv1; 245 struct spu_priv1_collapsed priv1;
240 struct spu_priv2_collapsed priv2; 246 struct spu_priv2_collapsed priv2;
@@ -247,12 +253,14 @@ struct spu_state {
247 spinlock_t register_lock; 253 spinlock_t register_lock;
248}; 254};
249 255
250extern void spu_init_csa(struct spu_state *csa); 256extern int spu_init_csa(struct spu_state *csa);
251extern void spu_fini_csa(struct spu_state *csa); 257extern void spu_fini_csa(struct spu_state *csa);
252extern int spu_save(struct spu_state *prev, struct spu *spu); 258extern int spu_save(struct spu_state *prev, struct spu *spu);
253extern int spu_restore(struct spu_state *new, struct spu *spu); 259extern int spu_restore(struct spu_state *new, struct spu *spu);
254extern int spu_switch(struct spu_state *prev, struct spu_state *new, 260extern int spu_switch(struct spu_state *prev, struct spu_state *new,
255 struct spu *spu); 261 struct spu *spu);
262extern int spu_alloc_lscsa(struct spu_state *csa);
263extern void spu_free_lscsa(struct spu_state *csa);
256 264
257#endif /* !__SPU__ */ 265#endif /* !__SPU__ */
258#endif /* __KERNEL__ */ 266#endif /* __KERNEL__ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d74da9122b60..9c7cb6430666 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -52,7 +52,15 @@ struct hibernation_ops {
52 52
53#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) 53#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
54/* kernel/power/snapshot.c */ 54/* kernel/power/snapshot.c */
55extern void __init register_nosave_region(unsigned long, unsigned long); 55extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
56static inline void register_nosave_region(unsigned long b, unsigned long e)
57{
58 __register_nosave_region(b, e, 0);
59}
60static inline void register_nosave_region_late(unsigned long b, unsigned long e)
61{
62 __register_nosave_region(b, e, 1);
63}
56extern int swsusp_page_is_forbidden(struct page *); 64extern int swsusp_page_is_forbidden(struct page *);
57extern void swsusp_set_page_free(struct page *); 65extern void swsusp_set_page_free(struct page *);
58extern void swsusp_unset_page_free(struct page *); 66extern void swsusp_unset_page_free(struct page *);
@@ -62,6 +70,7 @@ extern void hibernation_set_ops(struct hibernation_ops *ops);
62extern int hibernate(void); 70extern int hibernate(void);
63#else 71#else
64static inline void register_nosave_region(unsigned long b, unsigned long e) {} 72static inline void register_nosave_region(unsigned long b, unsigned long e) {}
73static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
65static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } 74static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
66static inline void swsusp_set_page_free(struct page *p) {} 75static inline void swsusp_set_page_free(struct page *p) {}
67static inline void swsusp_unset_page_free(struct page *p) {} 76static inline void swsusp_unset_page_free(struct page *p) {}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 48383ea72290..a3b7854b8f7c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -607,7 +607,8 @@ static LIST_HEAD(nosave_regions);
607 */ 607 */
608 608
609void __init 609void __init
610register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) 610__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
611 int use_kmalloc)
611{ 612{
612 struct nosave_region *region; 613 struct nosave_region *region;
613 614
@@ -623,8 +624,13 @@ register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
623 goto Report; 624 goto Report;
624 } 625 }
625 } 626 }
626 /* This allocation cannot fail */ 627 if (use_kmalloc) {
627 region = alloc_bootmem_low(sizeof(struct nosave_region)); 628 /* during init, this shouldn't fail */
629 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
630 BUG_ON(!region);
631 } else
632 /* This allocation cannot fail */
633 region = alloc_bootmem_low(sizeof(struct nosave_region));
628 region->start_pfn = start_pfn; 634 region->start_pfn = start_pfn;
629 region->end_pfn = end_pfn; 635 region->end_pfn = end_pfn;
630 list_add_tail(&region->list, &nosave_regions); 636 list_add_tail(&region->list, &nosave_regions);