aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
committerTejun Heo <tj@kernel.org>2011-11-28 12:46:22 -0500
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /arch/powerpc/mm
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c13
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c43
-rw-r--r--arch/powerpc/mm/gup.c12
-rw-r--r--arch/powerpc/mm/hash_native_64.c6
-rw-r--r--arch/powerpc/mm/hash_utils_64.c10
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c121
-rw-r--r--arch/powerpc/mm/hugetlbpage.c401
-rw-r--r--arch/powerpc/mm/init_32.c41
-rw-r--r--arch/powerpc/mm/init_64.c16
-rw-r--r--arch/powerpc/mm/mem.c75
-rw-r--r--arch/powerpc/mm/mmu_context_hash32.c1
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c14
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c5
-rw-r--r--arch/powerpc/mm/mmu_decl.h2
-rw-r--r--arch/powerpc/mm/numa.c46
-rw-r--r--arch/powerpc/mm/pgtable.c3
-rw-r--r--arch/powerpc/mm/pgtable_64.c1
-rw-r--r--arch/powerpc/mm/slice.c2
-rw-r--r--arch/powerpc/mm/tlb_hash32.c5
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S230
-rw-r--r--arch/powerpc/mm/tlb_nohash.c132
24 files changed, 984 insertions, 203 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 024acab588fd..f60e006d90c3 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -186,10 +186,11 @@ void __init MMU_init_hw(void)
186unsigned long __init mmu_mapin_ram(unsigned long top) 186unsigned long __init mmu_mapin_ram(unsigned long top)
187{ 187{
188 unsigned long addr; 188 unsigned long addr;
189 unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
189 190
190 /* Pin in enough TLBs to cover any lowmem not covered by the 191 /* Pin in enough TLBs to cover any lowmem not covered by the
191 * initial 256M mapping established in head_44x.S */ 192 * initial 256M mapping established in head_44x.S */
192 for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; 193 for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
193 addr += PPC_PIN_SIZE) { 194 addr += PPC_PIN_SIZE) {
194 if (mmu_has_feature(MMU_FTR_TYPE_47x)) 195 if (mmu_has_feature(MMU_FTR_TYPE_47x))
195 ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); 196 ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
@@ -218,19 +219,25 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
218void setup_initial_memory_limit(phys_addr_t first_memblock_base, 219void setup_initial_memory_limit(phys_addr_t first_memblock_base,
219 phys_addr_t first_memblock_size) 220 phys_addr_t first_memblock_size)
220{ 221{
222 u64 size;
223
224#ifndef CONFIG_RELOCATABLE
221 /* We don't currently support the first MEMBLOCK not mapping 0 225 /* We don't currently support the first MEMBLOCK not mapping 0
222 * physical on those processors 226 * physical on those processors
223 */ 227 */
224 BUG_ON(first_memblock_base != 0); 228 BUG_ON(first_memblock_base != 0);
229#endif
225 230
226 /* 44x has a 256M TLB entry pinned at boot */ 231 /* 44x has a 256M TLB entry pinned at boot */
227 memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); 232 size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE));
233 memblock_set_current_limit(first_memblock_base + size);
228} 234}
229 235
230#ifdef CONFIG_SMP 236#ifdef CONFIG_SMP
231void __cpuinit mmu_init_secondary(int cpu) 237void __cpuinit mmu_init_secondary(int cpu)
232{ 238{
233 unsigned long addr; 239 unsigned long addr;
240 unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
234 241
235 /* Pin in enough TLBs to cover any lowmem not covered by the 242 /* Pin in enough TLBs to cover any lowmem not covered by the
236 * initial 256M mapping established in head_44x.S 243 * initial 256M mapping established in head_44x.S
@@ -241,7 +248,7 @@ void __cpuinit mmu_init_secondary(int cpu)
241 * stack. current (r2) isn't initialized, smp_processor_id() 248 * stack. current (r2) isn't initialized, smp_processor_id()
242 * will not work, current thread info isn't accessible, ... 249 * will not work, current thread info isn't accessible, ...
243 */ 250 */
244 for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; 251 for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
245 addr += PPC_PIN_SIZE) { 252 addr += PPC_PIN_SIZE) {
246 if (mmu_has_feature(MMU_FTR_TYPE_47x)) 253 if (mmu_has_feature(MMU_FTR_TYPE_47x))
247 ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); 254 ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index bdca46e08382..991ee813d2a8 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o
29ifeq ($(CONFIG_HUGETLB_PAGE),y) 29ifeq ($(CONFIG_HUGETLB_PAGE),y)
30obj-y += hugetlbpage.o 30obj-y += hugetlbpage.o
31obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o 31obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
32obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
32endif 33endif
33obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 34obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
34obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o 35obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b42f76c4948d..329be36c0a8d 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -30,6 +30,7 @@
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/highmem.h> 31#include <linux/highmem.h>
32#include <linux/dma-mapping.h> 32#include <linux/dma-mapping.h>
33#include <linux/export.h>
33 34
34#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
35 36
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ad35f66c69e8..5efe8c96d37f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
174 die("Weird page fault", regs, SIGSEGV); 174 die("Weird page fault", regs, SIGSEGV);
175 } 175 }
176 176
177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
178 178
179 /* When running in the kernel we expect faults to occur only to 179 /* When running in the kernel we expect faults to occur only to
180 * addresses in user space. All other faults represent errors in the 180 * addresses in user space. All other faults represent errors in the
@@ -320,7 +320,7 @@ good_area:
320 } 320 }
321 if (ret & VM_FAULT_MAJOR) { 321 if (ret & VM_FAULT_MAJOR) {
322 current->maj_flt++; 322 current->maj_flt++;
323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
324 regs, address); 324 regs, address);
325#ifdef CONFIG_PPC_SMLPAR 325#ifdef CONFIG_PPC_SMLPAR
326 if (firmware_has_feature(FW_FEATURE_CMO)) { 326 if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -331,7 +331,7 @@ good_area:
331#endif 331#endif
332 } else { 332 } else {
333 current->min_flt++; 333 current->min_flt++;
334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
335 regs, address); 335 regs, address);
336 } 336 }
337 up_read(&mm->mmap_sem); 337 up_read(&mm->mmap_sem);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index f7802c8bba0a..66a6fd38e9cd 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -101,17 +101,17 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa)
101 101
102/* 102/*
103 * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; 103 * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
104 * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus 104 * in particular size must be a power of 4 between 4k and the max supported by
105 * that support extended page sizes). Note that while some cpus support a 105 * an implementation; max may further be limited by what can be represented in
106 * page size of 4G, we don't allow its use here. 106 * an unsigned long (for example, 32-bit implementations cannot support a 4GB
107 * size).
107 */ 108 */
108static void settlbcam(int index, unsigned long virt, phys_addr_t phys, 109static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
109 unsigned long size, unsigned long flags, unsigned int pid) 110 unsigned long size, unsigned long flags, unsigned int pid)
110{ 111{
111 unsigned int tsize, lz; 112 unsigned int tsize;
112 113
113 asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size)); 114 tsize = __ilog2(size) - 10;
114 tsize = 21 - lz;
115 115
116#ifdef CONFIG_SMP 116#ifdef CONFIG_SMP
117 if ((flags & _PAGE_NO_CACHE) == 0) 117 if ((flags & _PAGE_NO_CACHE) == 0)
@@ -146,29 +146,36 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
146 loadcam_entry(index); 146 loadcam_entry(index);
147} 147}
148 148
149unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
150 phys_addr_t phys)
151{
152 unsigned int camsize = __ilog2(ram) & ~1U;
153 unsigned int align = __ffs(virt | phys) & ~1U;
154 unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf;
155
156 /* Convert (4^max) kB to (2^max) bytes */
157 max_cam = max_cam * 2 + 10;
158
159 if (camsize > align)
160 camsize = align;
161 if (camsize > max_cam)
162 camsize = max_cam;
163
164 return 1UL << camsize;
165}
166
149unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) 167unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
150{ 168{
151 int i; 169 int i;
152 unsigned long virt = PAGE_OFFSET; 170 unsigned long virt = PAGE_OFFSET;
153 phys_addr_t phys = memstart_addr; 171 phys_addr_t phys = memstart_addr;
154 unsigned long amount_mapped = 0; 172 unsigned long amount_mapped = 0;
155 unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf;
156
157 /* Convert (4^max) kB to (2^max) bytes */
158 max_cam = max_cam * 2 + 10;
159 173
160 /* Calculate CAM values */ 174 /* Calculate CAM values */
161 for (i = 0; ram && i < max_cam_idx; i++) { 175 for (i = 0; ram && i < max_cam_idx; i++) {
162 unsigned int camsize = __ilog2(ram) & ~1U;
163 unsigned int align = __ffs(virt | phys) & ~1U;
164 unsigned long cam_sz; 176 unsigned long cam_sz;
165 177
166 if (camsize > align) 178 cam_sz = calc_cam_sz(ram, virt, phys);
167 camsize = align;
168 if (camsize > max_cam)
169 camsize = max_cam;
170
171 cam_sz = 1UL << camsize;
172 settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); 179 settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
173 180
174 ram -= cam_sz; 181 ram -= cam_sz;
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index fec13200868f..d7efdbf640c7 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -16,16 +16,6 @@
16 16
17#ifdef __HAVE_ARCH_PTE_SPECIAL 17#ifdef __HAVE_ARCH_PTE_SPECIAL
18 18
19static inline void get_huge_page_tail(struct page *page)
20{
21 /*
22 * __split_huge_page_refcount() cannot run
23 * from under us.
24 */
25 VM_BUG_ON(atomic_read(&page->_count) < 0);
26 atomic_inc(&page->_count);
27}
28
29/* 19/*
30 * The performance critical leaf functions are made noinline otherwise gcc 20 * The performance critical leaf functions are made noinline otherwise gcc
31 * inlines everything into a single function which results in too much 21 * inlines everything into a single function which results in too much
@@ -57,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
57 put_page(page); 47 put_page(page);
58 return 0; 48 return 0;
59 } 49 }
60 if (PageTail(page))
61 get_huge_page_tail(page);
62 pages[*nr] = page; 50 pages[*nr] = page;
63 (*nr)++; 51 (*nr)++;
64 52
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index dfd764896db0..90039bc64119 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -37,7 +37,7 @@
37 37
38#define HPTE_LOCK_BIT 3 38#define HPTE_LOCK_BIT 3
39 39
40static DEFINE_RAW_SPINLOCK(native_tlbie_lock); 40DEFINE_RAW_SPINLOCK(native_tlbie_lock);
41 41
42static inline void __tlbie(unsigned long va, int psize, int ssize) 42static inline void __tlbie(unsigned long va, int psize, int ssize)
43{ 43{
@@ -51,7 +51,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
51 va &= ~0xffful; 51 va &= ~0xffful;
52 va |= ssize << 8; 52 va |= ssize << 8;
53 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) 53 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
54 : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) 54 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
55 : "memory"); 55 : "memory");
56 break; 56 break;
57 default: 57 default:
@@ -61,7 +61,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
61 va |= ssize << 8; 61 va |= ssize << 8;
62 va |= 1; /* L */ 62 va |= 1; /* L */
63 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) 63 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
64 : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) 64 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
65 : "memory"); 65 : "memory");
66 break; 66 break;
67 } 67 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 26b2872b3d00..2d282186cb45 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -27,6 +27,7 @@
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/sysctl.h> 29#include <linux/sysctl.h>
30#include <linux/export.h>
30#include <linux/ctype.h> 31#include <linux/ctype.h>
31#include <linux/cache.h> 32#include <linux/cache.h>
32#include <linux/init.h> 33#include <linux/init.h>
@@ -105,9 +106,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
105int mmu_highuser_ssize = MMU_SEGSIZE_256M; 106int mmu_highuser_ssize = MMU_SEGSIZE_256M;
106u16 mmu_slb_size = 64; 107u16 mmu_slb_size = 64;
107EXPORT_SYMBOL_GPL(mmu_slb_size); 108EXPORT_SYMBOL_GPL(mmu_slb_size);
108#ifdef CONFIG_HUGETLB_PAGE
109unsigned int HPAGE_SHIFT;
110#endif
111#ifdef CONFIG_PPC_64K_PAGES 109#ifdef CONFIG_PPC_64K_PAGES
112int mmu_ci_restrictions; 110int mmu_ci_restrictions;
113#endif 111#endif
@@ -534,11 +532,11 @@ static unsigned long __init htab_get_table_size(void)
534} 532}
535 533
536#ifdef CONFIG_MEMORY_HOTPLUG 534#ifdef CONFIG_MEMORY_HOTPLUG
537void create_section_mapping(unsigned long start, unsigned long end) 535int create_section_mapping(unsigned long start, unsigned long end)
538{ 536{
539 BUG_ON(htab_bolt_mapping(start, end, __pa(start), 537 return htab_bolt_mapping(start, end, __pa(start),
540 pgprot_val(PAGE_KERNEL), mmu_linear_psize, 538 pgprot_val(PAGE_KERNEL), mmu_linear_psize,
541 mmu_kernel_ssize)); 539 mmu_kernel_ssize);
542} 540}
543 541
544int remove_section_mapping(unsigned long start, unsigned long end) 542int remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
new file mode 100644
index 000000000000..343ad0b87261
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -0,0 +1,121 @@
1/*
2 * PPC Huge TLB Page Support for Book3E MMU
3 *
4 * Copyright (C) 2009 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
6 *
7 */
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10
11static inline int mmu_get_tsize(int psize)
12{
13 return mmu_psize_defs[psize].enc;
14}
15
16static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
17{
18 int found = 0;
19
20 mtspr(SPRN_MAS6, pid << 16);
21 if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
22 asm volatile(
23 "li %0,0\n"
24 "tlbsx. 0,%1\n"
25 "bne 1f\n"
26 "li %0,1\n"
27 "1:\n"
28 : "=&r"(found) : "r"(ea));
29 } else {
30 asm volatile(
31 "tlbsx 0,%1\n"
32 "mfspr %0,0x271\n"
33 "srwi %0,%0,31\n"
34 : "=&r"(found) : "r"(ea));
35 }
36
37 return found;
38}
39
40void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte)
41{
42 unsigned long mas1, mas2;
43 u64 mas7_3;
44 unsigned long psize, tsize, shift;
45 unsigned long flags;
46
47#ifdef CONFIG_PPC_FSL_BOOK3E
48 int index, lz, ncams;
49 struct vm_area_struct *vma;
50#endif
51
52 if (unlikely(is_kernel_addr(ea)))
53 return;
54
55#ifdef CONFIG_PPC_MM_SLICES
56 psize = mmu_get_tsize(get_slice_psize(mm, ea));
57 tsize = mmu_get_psize(psize);
58 shift = mmu_psize_defs[psize].shift;
59#else
60 vma = find_vma(mm, ea);
61 psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */
62 asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize));
63 shift = 31 - lz;
64 tsize = 21 - lz;
65#endif
66
67 /*
68 * We can't be interrupted while we're setting up the MAS
69 * regusters or after we've confirmed that no tlb exists.
70 */
71 local_irq_save(flags);
72
73 if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
74 local_irq_restore(flags);
75 return;
76 }
77
78#ifdef CONFIG_PPC_FSL_BOOK3E
79 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
80
81 /* We have to use the CAM(TLB1) on FSL parts for hugepages */
82 index = __get_cpu_var(next_tlbcam_idx);
83 mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
84
85 /* Just round-robin the entries and wrap when we hit the end */
86 if (unlikely(index == ncams - 1))
87 __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
88 else
89 __get_cpu_var(next_tlbcam_idx)++;
90#endif
91 mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
92 mas2 = ea & ~((1UL << shift) - 1);
93 mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
94 mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
95 mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
96 if (!pte_dirty(pte))
97 mas7_3 &= ~(MAS3_SW|MAS3_UW);
98
99 mtspr(SPRN_MAS1, mas1);
100 mtspr(SPRN_MAS2, mas2);
101
102 if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
103 mtspr(SPRN_MAS7_MAS3, mas7_3);
104 } else {
105 mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
106 mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
107 }
108
109 asm volatile ("tlbwe");
110
111 local_irq_restore(flags);
112}
113
114void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
115{
116 struct hstate *hstate = hstate_file(vma->vm_file);
117 unsigned long tsize = huge_page_shift(hstate) - 10;
118
119 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0);
120
121}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0b9a5c1901b9..8558b572e55d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 2 * PPC Huge TLB Page Support for Kernel.
3 * 3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation. 4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
5 * 6 *
6 * Based on the IA-32 version: 7 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
@@ -11,24 +12,40 @@
11#include <linux/io.h> 12#include <linux/io.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/of_fdt.h>
16#include <linux/memblock.h>
17#include <linux/bootmem.h>
18#include <linux/moduleparam.h>
14#include <asm/pgtable.h> 19#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
16#include <asm/tlb.h> 21#include <asm/tlb.h>
22#include <asm/setup.h>
17 23
18#define PAGE_SHIFT_64K 16 24#define PAGE_SHIFT_64K 16
19#define PAGE_SHIFT_16M 24 25#define PAGE_SHIFT_16M 24
20#define PAGE_SHIFT_16G 34 26#define PAGE_SHIFT_16G 34
21 27
22#define MAX_NUMBER_GPAGES 1024 28unsigned int HPAGE_SHIFT;
23 29
24/* Tracks the 16G pages after the device tree is scanned and before the 30/*
25 * huge_boot_pages list is ready. */ 31 * Tracks gpages after the device tree is scanned and before the
26static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 32 * huge_boot_pages list is ready. On 64-bit implementations, this is
33 * just used to track 16G pages and so is a single array. 32-bit
34 * implementations may have more than one gpage size due to limitations
35 * of the memory allocators, so we need multiple arrays
36 */
37#ifdef CONFIG_PPC64
38#define MAX_NUMBER_GPAGES 1024
39static u64 gpage_freearray[MAX_NUMBER_GPAGES];
27static unsigned nr_gpages; 40static unsigned nr_gpages;
28 41#else
29/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 42#define MAX_NUMBER_GPAGES 128
30 * will choke on pointers to hugepte tables, which is handy for 43struct psize_gpages {
31 * catching screwups early. */ 44 u64 gpage_list[MAX_NUMBER_GPAGES];
45 unsigned int nr_gpages;
46};
47static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
48#endif
32 49
33static inline int shift_to_mmu_psize(unsigned int shift) 50static inline int shift_to_mmu_psize(unsigned int shift)
34{ 51{
@@ -49,25 +66,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
49 66
50#define hugepd_none(hpd) ((hpd).pd == 0) 67#define hugepd_none(hpd) ((hpd).pd == 0)
51 68
52static inline pte_t *hugepd_page(hugepd_t hpd)
53{
54 BUG_ON(!hugepd_ok(hpd));
55 return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
56}
57
58static inline unsigned int hugepd_shift(hugepd_t hpd)
59{
60 return hpd.pd & HUGEPD_SHIFT_MASK;
61}
62
63static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
64{
65 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
66 pte_t *dir = hugepd_page(*hpdp);
67
68 return dir + idx;
69}
70
71pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 69pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
72{ 70{
73 pgd_t *pg; 71 pgd_t *pg;
@@ -93,7 +91,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
93 if (is_hugepd(pm)) 91 if (is_hugepd(pm))
94 hpdp = (hugepd_t *)pm; 92 hpdp = (hugepd_t *)pm;
95 else if (!pmd_none(*pm)) { 93 else if (!pmd_none(*pm)) {
96 return pte_offset_map(pm, ea); 94 return pte_offset_kernel(pm, ea);
97 } 95 }
98 } 96 }
99 } 97 }
@@ -114,8 +112,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 112static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift) 113 unsigned long address, unsigned pdshift, unsigned pshift)
116{ 114{
117 pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), 115 struct kmem_cache *cachep;
118 GFP_KERNEL|__GFP_REPEAT); 116 pte_t *new;
117
118#ifdef CONFIG_PPC64
119 cachep = PGT_CACHE(pdshift - pshift);
120#else
121 int i;
122 int num_hugepd = 1 << (pshift - pdshift);
123 cachep = hugepte_cache;
124#endif
125
126 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
119 127
120 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 128 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
121 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); 129 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -124,10 +132,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
124 return -ENOMEM; 132 return -ENOMEM;
125 133
126 spin_lock(&mm->page_table_lock); 134 spin_lock(&mm->page_table_lock);
135#ifdef CONFIG_PPC64
127 if (!hugepd_none(*hpdp)) 136 if (!hugepd_none(*hpdp))
128 kmem_cache_free(PGT_CACHE(pdshift - pshift), new); 137 kmem_cache_free(cachep, new);
129 else 138 else
130 hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; 139 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
140#else
141 /*
142 * We have multiple higher-level entries that point to the same
143 * actual pte location. Fill in each as we go and backtrack on error.
144 * We need all of these so the DTLB pgtable walk code can find the
145 * right higher-level entry without knowing if it's a hugepage or not.
146 */
147 for (i = 0; i < num_hugepd; i++, hpdp++) {
148 if (unlikely(!hugepd_none(*hpdp)))
149 break;
150 else
151 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
152 }
153 /* If we bailed from the for loop early, an error occurred, clean up */
154 if (i < num_hugepd) {
155 for (i = i - 1 ; i >= 0; i--, hpdp--)
156 hpdp->pd = 0;
157 kmem_cache_free(cachep, new);
158 }
159#endif
131 spin_unlock(&mm->page_table_lock); 160 spin_unlock(&mm->page_table_lock);
132 return 0; 161 return 0;
133} 162}
@@ -169,11 +198,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
169 return hugepte_offset(hpdp, addr, pdshift); 198 return hugepte_offset(hpdp, addr, pdshift);
170} 199}
171 200
201#ifdef CONFIG_PPC32
202/* Build list of addresses of gigantic pages. This function is used in early
203 * boot before the buddy or bootmem allocator is setup.
204 */
205void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
206{
207 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
208 int i;
209
210 if (addr == 0)
211 return;
212
213 gpage_freearray[idx].nr_gpages = number_of_pages;
214
215 for (i = 0; i < number_of_pages; i++) {
216 gpage_freearray[idx].gpage_list[i] = addr;
217 addr += page_size;
218 }
219}
220
221/*
222 * Moves the gigantic page addresses from the temporary list to the
223 * huge_boot_pages list.
224 */
225int alloc_bootmem_huge_page(struct hstate *hstate)
226{
227 struct huge_bootmem_page *m;
228 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
229 int nr_gpages = gpage_freearray[idx].nr_gpages;
230
231 if (nr_gpages == 0)
232 return 0;
233
234#ifdef CONFIG_HIGHMEM
235 /*
236 * If gpages can be in highmem we can't use the trick of storing the
237 * data structure in the page; allocate space for this
238 */
239 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
240 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
241#else
242 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
243#endif
244
245 list_add(&m->list, &huge_boot_pages);
246 gpage_freearray[idx].nr_gpages = nr_gpages;
247 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
248 m->hstate = hstate;
249
250 return 1;
251}
252/*
253 * Scan the command line hugepagesz= options for gigantic pages; store those in
254 * a list that we use to allocate the memory once all options are parsed.
255 */
256
257unsigned long gpage_npages[MMU_PAGE_COUNT];
258
259static int __init do_gpage_early_setup(char *param, char *val)
260{
261 static phys_addr_t size;
262 unsigned long npages;
263
264 /*
265 * The hugepagesz and hugepages cmdline options are interleaved. We
266 * use the size variable to keep track of whether or not this was done
267 * properly and skip over instances where it is incorrect. Other
268 * command-line parsing code will issue warnings, so we don't need to.
269 *
270 */
271 if ((strcmp(param, "default_hugepagesz") == 0) ||
272 (strcmp(param, "hugepagesz") == 0)) {
273 size = memparse(val, NULL);
274 } else if (strcmp(param, "hugepages") == 0) {
275 if (size != 0) {
276 if (sscanf(val, "%lu", &npages) <= 0)
277 npages = 0;
278 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
279 size = 0;
280 }
281 }
282 return 0;
283}
284
285
286/*
287 * This function allocates physical space for pages that are larger than the
288 * buddy allocator can handle. We want to allocate these in highmem because
289 * the amount of lowmem is limited. This means that this function MUST be
290 * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
291 * allocate to grab highmem.
292 */
293void __init reserve_hugetlb_gpages(void)
294{
295 static __initdata char cmdline[COMMAND_LINE_SIZE];
296 phys_addr_t size, base;
297 int i;
298
299 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
300 parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
301
302 /*
303 * Walk gpage list in reverse, allocating larger page sizes first.
304 * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
305 * When we reach the point in the list where pages are no longer
306 * considered gpages, we're done.
307 */
308 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
309 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
310 continue;
311 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
312 break;
313
314 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
315 base = memblock_alloc_base(size * gpage_npages[i], size,
316 MEMBLOCK_ALLOC_ANYWHERE);
317 add_gpage(base, size, gpage_npages[i]);
318 }
319}
320
321#else /* PPC64 */
322
172/* Build list of addresses of gigantic pages. This function is used in early 323/* Build list of addresses of gigantic pages. This function is used in early
173 * boot before the buddy or bootmem allocator is setup. 324 * boot before the buddy or bootmem allocator is setup.
174 */ 325 */
175void add_gpage(unsigned long addr, unsigned long page_size, 326void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
176 unsigned long number_of_pages)
177{ 327{
178 if (!addr) 328 if (!addr)
179 return; 329 return;
@@ -199,19 +349,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
199 m->hstate = hstate; 349 m->hstate = hstate;
200 return 1; 350 return 1;
201} 351}
352#endif
202 353
203int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 354int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
204{ 355{
205 return 0; 356 return 0;
206} 357}
207 358
359#ifdef CONFIG_PPC32
360#define HUGEPD_FREELIST_SIZE \
361 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
362
363struct hugepd_freelist {
364 struct rcu_head rcu;
365 unsigned int index;
366 void *ptes[0];
367};
368
369static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
370
371static void hugepd_free_rcu_callback(struct rcu_head *head)
372{
373 struct hugepd_freelist *batch =
374 container_of(head, struct hugepd_freelist, rcu);
375 unsigned int i;
376
377 for (i = 0; i < batch->index; i++)
378 kmem_cache_free(hugepte_cache, batch->ptes[i]);
379
380 free_page((unsigned long)batch);
381}
382
383static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
384{
385 struct hugepd_freelist **batchp;
386
387 batchp = &__get_cpu_var(hugepd_freelist_cur);
388
389 if (atomic_read(&tlb->mm->mm_users) < 2 ||
390 cpumask_equal(mm_cpumask(tlb->mm),
391 cpumask_of(smp_processor_id()))) {
392 kmem_cache_free(hugepte_cache, hugepte);
393 return;
394 }
395
396 if (*batchp == NULL) {
397 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
398 (*batchp)->index = 0;
399 }
400
401 (*batchp)->ptes[(*batchp)->index++] = hugepte;
402 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
403 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
404 *batchp = NULL;
405 }
406}
407#endif
408
208static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 409static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
209 unsigned long start, unsigned long end, 410 unsigned long start, unsigned long end,
210 unsigned long floor, unsigned long ceiling) 411 unsigned long floor, unsigned long ceiling)
211{ 412{
212 pte_t *hugepte = hugepd_page(*hpdp); 413 pte_t *hugepte = hugepd_page(*hpdp);
213 unsigned shift = hugepd_shift(*hpdp); 414 int i;
415
214 unsigned long pdmask = ~((1UL << pdshift) - 1); 416 unsigned long pdmask = ~((1UL << pdshift) - 1);
417 unsigned int num_hugepd = 1;
418
419#ifdef CONFIG_PPC64
420 unsigned int shift = hugepd_shift(*hpdp);
421#else
422 /* Note: On 32-bit the hpdp may be the first of several */
423 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
424#endif
215 425
216 start &= pdmask; 426 start &= pdmask;
217 if (start < floor) 427 if (start < floor)
@@ -224,9 +434,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
224 if (end - 1 > ceiling - 1) 434 if (end - 1 > ceiling - 1)
225 return; 435 return;
226 436
227 hpdp->pd = 0; 437 for (i = 0; i < num_hugepd; i++, hpdp++)
438 hpdp->pd = 0;
439
228 tlb->need_flush = 1; 440 tlb->need_flush = 1;
441#ifdef CONFIG_PPC64
229 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 442 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
443#else
444 hugepd_free(tlb, hugepte);
445#endif
230} 446}
231 447
232static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 448static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -331,18 +547,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
331 * too. 547 * too.
332 */ 548 */
333 549
334 pgd = pgd_offset(tlb->mm, addr);
335 do { 550 do {
336 next = pgd_addr_end(addr, end); 551 next = pgd_addr_end(addr, end);
552 pgd = pgd_offset(tlb->mm, addr);
337 if (!is_hugepd(pgd)) { 553 if (!is_hugepd(pgd)) {
338 if (pgd_none_or_clear_bad(pgd)) 554 if (pgd_none_or_clear_bad(pgd))
339 continue; 555 continue;
340 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 556 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
341 } else { 557 } else {
558#ifdef CONFIG_PPC32
559 /*
560 * Increment next by the size of the huge mapping since
561 * on 32-bit there may be more than one entry at the pgd
562 * level for a single hugepage, but all of them point to
563 * the same kmem cache that holds the hugepte.
564 */
565 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
566#endif
342 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 567 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
343 addr, next, floor, ceiling); 568 addr, next, floor, ceiling);
344 } 569 }
345 } while (pgd++, addr = next, addr != end); 570 } while (addr = next, addr != end);
346} 571}
347 572
348struct page * 573struct page *
@@ -390,7 +615,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
390{ 615{
391 unsigned long mask; 616 unsigned long mask;
392 unsigned long pte_end; 617 unsigned long pte_end;
393 struct page *head, *page; 618 struct page *head, *page, *tail;
394 pte_t pte; 619 pte_t pte;
395 int refs; 620 int refs;
396 621
@@ -413,6 +638,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
413 head = pte_page(pte); 638 head = pte_page(pte);
414 639
415 page = head + ((addr & (sz-1)) >> PAGE_SHIFT); 640 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
641 tail = page;
416 do { 642 do {
417 VM_BUG_ON(compound_head(page) != head); 643 VM_BUG_ON(compound_head(page) != head);
418 pages[*nr] = page; 644 pages[*nr] = page;
@@ -428,10 +654,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
428 654
429 if (unlikely(pte_val(pte) != pte_val(*ptep))) { 655 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
430 /* Could be optimized better */ 656 /* Could be optimized better */
431 while (*nr) { 657 *nr -= refs;
432 put_page(page); 658 while (refs--)
433 (*nr)--; 659 put_page(head);
434 } 660 return 0;
661 }
662
663 /*
664 * Any tail page need their mapcount reference taken before we
665 * return.
666 */
667 while (refs--) {
668 if (PageTail(tail))
669 get_huge_page_tail(tail);
670 tail++;
435 } 671 }
436 672
437 return 1; 673 return 1;
@@ -466,17 +702,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
466 unsigned long len, unsigned long pgoff, 702 unsigned long len, unsigned long pgoff,
467 unsigned long flags) 703 unsigned long flags)
468{ 704{
705#ifdef CONFIG_PPC_MM_SLICES
469 struct hstate *hstate = hstate_file(file); 706 struct hstate *hstate = hstate_file(file);
470 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 707 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
471 708
472 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 709 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
710#else
711 return get_unmapped_area(file, addr, len, pgoff, flags);
712#endif
473} 713}
474 714
475unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 715unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
476{ 716{
717#ifdef CONFIG_PPC_MM_SLICES
477 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 718 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
478 719
479 return 1UL << mmu_psize_to_shift(psize); 720 return 1UL << mmu_psize_to_shift(psize);
721#else
722 if (!is_vm_hugetlb_page(vma))
723 return PAGE_SIZE;
724
725 return huge_page_size(hstate_vma(vma));
726#endif
727}
728
729static inline bool is_power_of_4(unsigned long x)
730{
731 if (is_power_of_2(x))
732 return (__ilog2(x) % 2) ? false : true;
733 return false;
480} 734}
481 735
482static int __init add_huge_page_size(unsigned long long size) 736static int __init add_huge_page_size(unsigned long long size)
@@ -486,9 +740,14 @@ static int __init add_huge_page_size(unsigned long long size)
486 740
487 /* Check that it is a page size supported by the hardware and 741 /* Check that it is a page size supported by the hardware and
488 * that it fits within pagetable and slice limits. */ 742 * that it fits within pagetable and slice limits. */
743#ifdef CONFIG_PPC_FSL_BOOK3E
744 if ((size < PAGE_SIZE) || !is_power_of_4(size))
745 return -EINVAL;
746#else
489 if (!is_power_of_2(size) 747 if (!is_power_of_2(size)
490 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 748 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
491 return -EINVAL; 749 return -EINVAL;
750#endif
492 751
493 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 752 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
494 return -EINVAL; 753 return -EINVAL;
@@ -525,6 +784,46 @@ static int __init hugepage_setup_sz(char *str)
525} 784}
526__setup("hugepagesz=", hugepage_setup_sz); 785__setup("hugepagesz=", hugepage_setup_sz);
527 786
787#ifdef CONFIG_FSL_BOOKE
788struct kmem_cache *hugepte_cache;
789static int __init hugetlbpage_init(void)
790{
791 int psize;
792
793 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
794 unsigned shift;
795
796 if (!mmu_psize_defs[psize].shift)
797 continue;
798
799 shift = mmu_psize_to_shift(psize);
800
801 /* Don't treat normal page sizes as huge... */
802 if (shift != PAGE_SHIFT)
803 if (add_huge_page_size(1ULL << shift) < 0)
804 continue;
805 }
806
807 /*
808 * Create a kmem cache for hugeptes. The bottom bits in the pte have
809 * size information encoded in them, so align them to allow this
810 */
811 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
812 HUGEPD_SHIFT_MASK + 1, 0, NULL);
813 if (hugepte_cache == NULL)
814 panic("%s: Unable to create kmem cache for hugeptes\n",
815 __func__);
816
817 /* Default hpage size = 4M */
818 if (mmu_psize_defs[MMU_PAGE_4M].shift)
819 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
820 else
821 panic("%s: Unable to set default huge page size\n", __func__);
822
823
824 return 0;
825}
826#else
528static int __init hugetlbpage_init(void) 827static int __init hugetlbpage_init(void)
529{ 828{
530 int psize; 829 int psize;
@@ -567,15 +866,23 @@ static int __init hugetlbpage_init(void)
567 866
568 return 0; 867 return 0;
569} 868}
570 869#endif
571module_init(hugetlbpage_init); 870module_init(hugetlbpage_init);
572 871
573void flush_dcache_icache_hugepage(struct page *page) 872void flush_dcache_icache_hugepage(struct page *page)
574{ 873{
575 int i; 874 int i;
875 void *start;
576 876
577 BUG_ON(!PageCompound(page)); 877 BUG_ON(!PageCompound(page));
578 878
579 for (i = 0; i < (1UL << compound_order(page)); i++) 879 for (i = 0; i < (1UL << compound_order(page)); i++) {
580 __flush_dcache_icache(page_address(page+i)); 880 if (!PageHighMem(page)) {
881 __flush_dcache_icache(page_address(page+i));
882 } else {
883 start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
884 __flush_dcache_icache(start);
885 kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
886 }
887 }
581} 888}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 5de0f254dbb5..161cefde5c15 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -32,6 +32,8 @@
32#include <linux/pagemap.h> 32#include <linux/pagemap.h>
33#include <linux/memblock.h> 33#include <linux/memblock.h>
34#include <linux/gfp.h> 34#include <linux/gfp.h>
35#include <linux/slab.h>
36#include <linux/hugetlb.h>
35 37
36#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
37#include <asm/prom.h> 39#include <asm/prom.h>
@@ -44,6 +46,7 @@
44#include <asm/tlb.h> 46#include <asm/tlb.h>
45#include <asm/sections.h> 47#include <asm/sections.h>
46#include <asm/system.h> 48#include <asm/system.h>
49#include <asm/hugetlb.h>
47 50
48#include "mmu_decl.h" 51#include "mmu_decl.h"
49 52
@@ -123,6 +126,12 @@ void __init MMU_init(void)
123 /* parse args from command line */ 126 /* parse args from command line */
124 MMU_setup(); 127 MMU_setup();
125 128
129 /*
130 * Reserve gigantic pages for hugetlb. This MUST occur before
131 * lowmem_end_addr is initialized below.
132 */
133 reserve_hugetlb_gpages();
134
126 if (memblock.memory.cnt > 1) { 135 if (memblock.memory.cnt > 1) {
127#ifndef CONFIG_WII 136#ifndef CONFIG_WII
128 memblock.memory.cnt = 1; 137 memblock.memory.cnt = 1;
@@ -191,38 +200,6 @@ void __init *early_get_page(void)
191 return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); 200 return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
192} 201}
193 202
194/* Free up now-unused memory */
195static void free_sec(unsigned long start, unsigned long end, const char *name)
196{
197 unsigned long cnt = 0;
198
199 while (start < end) {
200 ClearPageReserved(virt_to_page(start));
201 init_page_count(virt_to_page(start));
202 free_page(start);
203 cnt++;
204 start += PAGE_SIZE;
205 }
206 if (cnt) {
207 printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
208 totalram_pages += cnt;
209 }
210}
211
212void free_initmem(void)
213{
214#define FREESEC(TYPE) \
215 free_sec((unsigned long)(&__ ## TYPE ## _begin), \
216 (unsigned long)(&__ ## TYPE ## _end), \
217 #TYPE);
218
219 printk ("Freeing unused kernel memory:");
220 FREESEC(init);
221 printk("\n");
222 ppc_md.progress = NULL;
223#undef FREESEC
224}
225
226#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ 203#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
227void setup_initial_memory_limit(phys_addr_t first_memblock_base, 204void setup_initial_memory_limit(phys_addr_t first_memblock_base,
228 phys_addr_t first_memblock_size) 205 phys_addr_t first_memblock_size)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index f6dbb4c20e64..e94b57fb79a0 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -83,22 +83,6 @@ EXPORT_SYMBOL_GPL(memstart_addr);
83phys_addr_t kernstart_addr; 83phys_addr_t kernstart_addr;
84EXPORT_SYMBOL_GPL(kernstart_addr); 84EXPORT_SYMBOL_GPL(kernstart_addr);
85 85
86void free_initmem(void)
87{
88 unsigned long addr;
89
90 addr = (unsigned long)__init_begin;
91 for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
92 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
93 ClearPageReserved(virt_to_page(addr));
94 init_page_count(virt_to_page(addr));
95 free_page(addr);
96 totalram_pages++;
97 }
98 printk ("Freeing unused kernel memory: %luk freed\n",
99 ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
100}
101
102static void pgd_ctor(void *addr) 86static void pgd_ctor(void *addr)
103{ 87{
104 memset(addr, 0, PGD_TABLE_SIZE); 88 memset(addr, 0, PGD_TABLE_SIZE);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 29d4dde65c45..2dd6bdd31fe1 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -17,7 +17,7 @@
17 * 17 *
18 */ 18 */
19 19
20#include <linux/module.h> 20#include <linux/export.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
@@ -34,6 +34,7 @@
34#include <linux/suspend.h> 34#include <linux/suspend.h>
35#include <linux/memblock.h> 35#include <linux/memblock.h>
36#include <linux/hugetlb.h> 36#include <linux/hugetlb.h>
37#include <linux/slab.h>
37 38
38#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
39#include <asm/prom.h> 40#include <asm/prom.h>
@@ -123,7 +124,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
123 pgdata = NODE_DATA(nid); 124 pgdata = NODE_DATA(nid);
124 125
125 start = (unsigned long)__va(start); 126 start = (unsigned long)__va(start);
126 create_section_mapping(start, start + size); 127 if (create_section_mapping(start, start + size))
128 return -EINVAL;
127 129
128 /* this should work for most non-highmem platforms */ 130 /* this should work for most non-highmem platforms */
129 zone = pgdata->node_zones; 131 zone = pgdata->node_zones;
@@ -249,7 +251,7 @@ static int __init mark_nonram_nosave(void)
249 */ 251 */
250void __init paging_init(void) 252void __init paging_init(void)
251{ 253{
252 unsigned long total_ram = memblock_phys_mem_size(); 254 unsigned long long total_ram = memblock_phys_mem_size();
253 phys_addr_t top_of_ram = memblock_end_of_DRAM(); 255 phys_addr_t top_of_ram = memblock_end_of_DRAM();
254 unsigned long max_zone_pfns[MAX_NR_ZONES]; 256 unsigned long max_zone_pfns[MAX_NR_ZONES];
255 257
@@ -269,7 +271,7 @@ void __init paging_init(void)
269 kmap_prot = PAGE_KERNEL; 271 kmap_prot = PAGE_KERNEL;
270#endif /* CONFIG_HIGHMEM */ 272#endif /* CONFIG_HIGHMEM */
271 273
272 printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", 274 printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n",
273 (unsigned long long)top_of_ram, total_ram); 275 (unsigned long long)top_of_ram, total_ram);
274 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 276 printk(KERN_DEBUG "Memory hole size: %ldMB\n",
275 (long int)((top_of_ram - total_ram) >> 20)); 277 (long int)((top_of_ram - total_ram) >> 20));
@@ -337,8 +339,9 @@ void __init mem_init(void)
337 339
338 highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; 340 highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
339 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { 341 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
342 phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
340 struct page *page = pfn_to_page(pfn); 343 struct page *page = pfn_to_page(pfn);
341 if (memblock_is_reserved(pfn << PAGE_SHIFT)) 344 if (memblock_is_reserved(paddr))
342 continue; 345 continue;
343 ClearPageReserved(page); 346 ClearPageReserved(page);
344 init_page_count(page); 347 init_page_count(page);
@@ -352,6 +355,15 @@ void __init mem_init(void)
352 } 355 }
353#endif /* CONFIG_HIGHMEM */ 356#endif /* CONFIG_HIGHMEM */
354 357
358#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP)
359 /*
360 * If smp is enabled, next_tlbcam_idx is initialized in the cpu up
361 * functions.... do it here for the non-smp case.
362 */
363 per_cpu(next_tlbcam_idx, smp_processor_id()) =
364 (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
365#endif
366
355 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " 367 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
356 "%luk reserved, %luk data, %luk bss, %luk init)\n", 368 "%luk reserved, %luk data, %luk bss, %luk init)\n",
357 nr_free_pages() << (PAGE_SHIFT-10), 369 nr_free_pages() << (PAGE_SHIFT-10),
@@ -382,6 +394,25 @@ void __init mem_init(void)
382 mem_init_done = 1; 394 mem_init_done = 1;
383} 395}
384 396
397void free_initmem(void)
398{
399 unsigned long addr;
400
401 ppc_md.progress = ppc_printk_progress;
402
403 addr = (unsigned long)__init_begin;
404 for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
405 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
406 ClearPageReserved(virt_to_page(addr));
407 init_page_count(virt_to_page(addr));
408 free_page(addr);
409 totalram_pages++;
410 }
411 pr_info("Freeing unused kernel memory: %luk freed\n",
412 ((unsigned long)__init_end -
413 (unsigned long)__init_begin) >> 10);
414}
415
385#ifdef CONFIG_BLK_DEV_INITRD 416#ifdef CONFIG_BLK_DEV_INITRD
386void __init free_initrd_mem(unsigned long start, unsigned long end) 417void __init free_initrd_mem(unsigned long start, unsigned long end)
387{ 418{
@@ -519,4 +550,38 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
519 return; 550 return;
520 hash_preload(vma->vm_mm, address, access, trap); 551 hash_preload(vma->vm_mm, address, access, trap);
521#endif /* CONFIG_PPC_STD_MMU */ 552#endif /* CONFIG_PPC_STD_MMU */
553#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
554 && defined(CONFIG_HUGETLB_PAGE)
555 if (is_vm_hugetlb_page(vma))
556 book3e_hugetlb_preload(vma->vm_mm, address, *ptep);
557#endif
558}
559
560/*
561 * System memory should not be in /proc/iomem but various tools expect it
562 * (eg kdump).
563 */
564static int add_system_ram_resources(void)
565{
566 struct memblock_region *reg;
567
568 for_each_memblock(memory, reg) {
569 struct resource *res;
570 unsigned long base = reg->base;
571 unsigned long size = reg->size;
572
573 res = kzalloc(sizeof(struct resource), GFP_KERNEL);
574 WARN_ON(!res);
575
576 if (res) {
577 res->name = "System RAM";
578 res->start = base;
579 res->end = base + size - 1;
580 res->flags = IORESOURCE_MEM;
581 WARN_ON(request_resource(&iomem_resource, res) < 0);
582 }
583 }
584
585 return 0;
522} 586}
587subsys_initcall(add_system_ram_resources);
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index d0ee554e86e4..78fef6726e10 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/export.h>
27 28
28#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
29#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 3bafc3deca6d..ca988a3d5fb2 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -18,7 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/module.h> 21#include <linux/export.h>
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24 24
@@ -136,8 +136,8 @@ int use_cop(unsigned long acop, struct mm_struct *mm)
136 if (!mm || !acop) 136 if (!mm || !acop)
137 return -EINVAL; 137 return -EINVAL;
138 138
139 /* We need to make sure mm_users doesn't change */ 139 /* The page_table_lock ensures mm_users won't change under us */
140 down_read(&mm->mmap_sem); 140 spin_lock(&mm->page_table_lock);
141 spin_lock(mm->context.cop_lockp); 141 spin_lock(mm->context.cop_lockp);
142 142
143 if (mm->context.cop_pid == COP_PID_NONE) { 143 if (mm->context.cop_pid == COP_PID_NONE) {
@@ -164,7 +164,7 @@ int use_cop(unsigned long acop, struct mm_struct *mm)
164 164
165out: 165out:
166 spin_unlock(mm->context.cop_lockp); 166 spin_unlock(mm->context.cop_lockp);
167 up_read(&mm->mmap_sem); 167 spin_unlock(&mm->page_table_lock);
168 168
169 return ret; 169 return ret;
170} 170}
@@ -185,8 +185,8 @@ void drop_cop(unsigned long acop, struct mm_struct *mm)
185 if (WARN_ON_ONCE(!mm)) 185 if (WARN_ON_ONCE(!mm))
186 return; 186 return;
187 187
188 /* We need to make sure mm_users doesn't change */ 188 /* The page_table_lock ensures mm_users won't change under us */
189 down_read(&mm->mmap_sem); 189 spin_lock(&mm->page_table_lock);
190 spin_lock(mm->context.cop_lockp); 190 spin_lock(mm->context.cop_lockp);
191 191
192 mm->context.acop &= ~acop; 192 mm->context.acop &= ~acop;
@@ -213,7 +213,7 @@ void drop_cop(unsigned long acop, struct mm_struct *mm)
213 } 213 }
214 214
215 spin_unlock(mm->context.cop_lockp); 215 spin_unlock(mm->context.cop_lockp);
216 up_read(&mm->mmap_sem); 216 spin_unlock(&mm->page_table_lock);
217} 217}
218EXPORT_SYMBOL_GPL(drop_cop); 218EXPORT_SYMBOL_GPL(drop_cop);
219 219
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 336807de550e..5b63bd3da4a9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
292 mm->context.id = MMU_NO_CONTEXT; 292 mm->context.id = MMU_NO_CONTEXT;
293 mm->context.active = 0; 293 mm->context.active = 0;
294 294
295#ifdef CONFIG_PPC_MM_SLICES
296 if (slice_mm_new_context(mm))
297 slice_set_user_psize(mm, mmu_virtual_psize);
298#endif
299
295 return 0; 300 return 0;
296} 301}
297 302
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index dd0a2589591d..83eb5d5f53d5 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -142,6 +142,8 @@ extern unsigned long mmu_mapin_ram(unsigned long top);
142 142
143#elif defined(CONFIG_PPC_FSL_BOOK3E) 143#elif defined(CONFIG_PPC_FSL_BOOK3E)
144extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx); 144extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx);
145extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
146 phys_addr_t phys);
145#ifdef CONFIG_PPC32 147#ifdef CONFIG_PPC32
146extern void MMU_init_hw(void); 148extern void MMU_init_hw(void);
147extern unsigned long mmu_mapin_ram(unsigned long top); 149extern unsigned long mmu_mapin_ram(unsigned long top);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 6f06ea53bca2..261adbd3b55a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -13,7 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/mmzone.h> 15#include <linux/mmzone.h>
16#include <linux/module.h> 16#include <linux/export.h>
17#include <linux/nodemask.h> 17#include <linux/nodemask.h>
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/notifier.h> 19#include <linux/notifier.h>
@@ -295,7 +295,10 @@ static int __init find_min_common_depth(void)
295 struct device_node *root; 295 struct device_node *root;
296 const char *vec5; 296 const char *vec5;
297 297
298 root = of_find_node_by_path("/rtas"); 298 if (firmware_has_feature(FW_FEATURE_OPAL))
299 root = of_find_node_by_path("/ibm,opal");
300 else
301 root = of_find_node_by_path("/rtas");
299 if (!root) 302 if (!root)
300 root = of_find_node_by_path("/"); 303 root = of_find_node_by_path("/");
301 304
@@ -324,12 +327,19 @@ static int __init find_min_common_depth(void)
324 327
325#define VEC5_AFFINITY_BYTE 5 328#define VEC5_AFFINITY_BYTE 5
326#define VEC5_AFFINITY 0x80 329#define VEC5_AFFINITY 0x80
327 chosen = of_find_node_by_path("/chosen"); 330
328 if (chosen) { 331 if (firmware_has_feature(FW_FEATURE_OPAL))
329 vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); 332 form1_affinity = 1;
330 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { 333 else {
331 dbg("Using form 1 affinity\n"); 334 chosen = of_find_node_by_path("/chosen");
332 form1_affinity = 1; 335 if (chosen) {
336 vec5 = of_get_property(chosen,
337 "ibm,architecture-vec-5", NULL);
338 if (vec5 && (vec5[VEC5_AFFINITY_BYTE] &
339 VEC5_AFFINITY)) {
340 dbg("Using form 1 affinity\n");
341 form1_affinity = 1;
342 }
333 } 343 }
334 } 344 }
335 345
@@ -689,8 +699,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
689 699
690static int __init parse_numa_properties(void) 700static int __init parse_numa_properties(void)
691{ 701{
692 struct device_node *cpu = NULL; 702 struct device_node *memory;
693 struct device_node *memory = NULL;
694 int default_nid = 0; 703 int default_nid = 0;
695 unsigned long i; 704 unsigned long i;
696 705
@@ -712,6 +721,7 @@ static int __init parse_numa_properties(void)
712 * each node to be onlined must have NODE_DATA etc backing it. 721 * each node to be onlined must have NODE_DATA etc backing it.
713 */ 722 */
714 for_each_present_cpu(i) { 723 for_each_present_cpu(i) {
724 struct device_node *cpu;
715 int nid; 725 int nid;
716 726
717 cpu = of_get_cpu_node(i, NULL); 727 cpu = of_get_cpu_node(i, NULL);
@@ -730,8 +740,8 @@ static int __init parse_numa_properties(void)
730 } 740 }
731 741
732 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 742 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
733 memory = NULL; 743
734 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 744 for_each_node_by_type(memory, "memory") {
735 unsigned long start; 745 unsigned long start;
736 unsigned long size; 746 unsigned long size;
737 int nid; 747 int nid;
@@ -780,8 +790,9 @@ new_range:
780 } 790 }
781 791
782 /* 792 /*
783 * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory 793 * Now do the same thing for each MEMBLOCK listed in the
784 * property in the ibm,dynamic-reconfiguration-memory node. 794 * ibm,dynamic-memory property in the
795 * ibm,dynamic-reconfiguration-memory node.
785 */ 796 */
786 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 797 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
787 if (memory) 798 if (memory)
@@ -1167,10 +1178,10 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1167 */ 1178 */
1168int hot_add_node_scn_to_nid(unsigned long scn_addr) 1179int hot_add_node_scn_to_nid(unsigned long scn_addr)
1169{ 1180{
1170 struct device_node *memory = NULL; 1181 struct device_node *memory;
1171 int nid = -1; 1182 int nid = -1;
1172 1183
1173 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 1184 for_each_node_by_type(memory, "memory") {
1174 unsigned long start, size; 1185 unsigned long start, size;
1175 int ranges; 1186 int ranges;
1176 const unsigned int *memcell_buf; 1187 const unsigned int *memcell_buf;
@@ -1194,11 +1205,12 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr)
1194 break; 1205 break;
1195 } 1206 }
1196 1207
1197 of_node_put(memory);
1198 if (nid >= 0) 1208 if (nid >= 0)
1199 break; 1209 break;
1200 } 1210 }
1201 1211
1212 of_node_put(memory);
1213
1202 return nid; 1214 return nid;
1203} 1215}
1204 1216
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index af40c8768a78..214130a4edc6 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -27,6 +27,7 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/percpu.h> 28#include <linux/percpu.h>
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/hugetlb.h>
30#include <asm/pgalloc.h> 31#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
@@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
212 entry = set_access_flags_filter(entry, vma, dirty); 213 entry = set_access_flags_filter(entry, vma, dirty);
213 changed = !pte_same(*(ptep), entry); 214 changed = !pte_same(*(ptep), entry);
214 if (changed) { 215 if (changed) {
215 if (!(vma->vm_flags & VM_HUGETLB)) 216 if (!is_vm_hugetlb_page(vma))
216 assert_pte_locked(vma->vm_mm, address); 217 assert_pte_locked(vma->vm_mm, address);
217 __ptep_set_access_flags(ptep, entry); 218 __ptep_set_access_flags(ptep, entry);
218 flush_tlb_page_nohash(vma, address); 219 flush_tlb_page_nohash(vma, address);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6e595f6496d4..ad36ede469cc 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -26,6 +26,7 @@
26#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/export.h>
29#include <linux/types.h> 30#include <linux/types.h>
30#include <linux/mman.h> 31#include <linux/mman.h>
31#include <linux/mm.h> 32#include <linux/mm.h>
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index ba5194817f8a..73709f7ce92c 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -29,7 +29,7 @@
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/err.h> 30#include <linux/err.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32#include <linux/module.h> 32#include <linux/export.h>
33#include <asm/mman.h> 33#include <asm/mman.h>
34#include <asm/mmu.h> 34#include <asm/mmu.h>
35#include <asm/spu.h> 35#include <asm/spu.h>
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 27b863c14941..558e30cce33e 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -27,6 +27,7 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/export.h>
30 31
31#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
@@ -177,3 +178,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
177 flush_range(vma->vm_mm, start, end); 178 flush_range(vma->vm_mm, start, end);
178} 179}
179EXPORT_SYMBOL(flush_tlb_range); 180EXPORT_SYMBOL(flush_tlb_range);
181
182void __init early_init_mmu(void)
183{
184}
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index af0892209417..dc4a5f385e41 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -30,6 +30,212 @@
30#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) 30#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
31#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) 31#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
32 32
33/**********************************************************************
34 * *
35 * TLB miss handling for Book3E with a bolted linear mapping *
36 * No virtual page table, no nested TLB misses *
37 * *
38 **********************************************************************/
39
40.macro tlb_prolog_bolted addr
41 mtspr SPRN_SPRG_TLB_SCRATCH,r13
42 mfspr r13,SPRN_SPRG_PACA
43 std r10,PACA_EXTLB+EX_TLB_R10(r13)
44 mfcr r10
45 std r11,PACA_EXTLB+EX_TLB_R11(r13)
46 std r16,PACA_EXTLB+EX_TLB_R16(r13)
47 mfspr r16,\addr /* get faulting address */
48 std r14,PACA_EXTLB+EX_TLB_R14(r13)
49 ld r14,PACAPGD(r13)
50 std r15,PACA_EXTLB+EX_TLB_R15(r13)
51 std r10,PACA_EXTLB+EX_TLB_CR(r13)
52 TLB_MISS_PROLOG_STATS_BOLTED
53.endm
54
55.macro tlb_epilog_bolted
56 ld r14,PACA_EXTLB+EX_TLB_CR(r13)
57 ld r10,PACA_EXTLB+EX_TLB_R10(r13)
58 ld r11,PACA_EXTLB+EX_TLB_R11(r13)
59 mtcr r14
60 ld r14,PACA_EXTLB+EX_TLB_R14(r13)
61 ld r15,PACA_EXTLB+EX_TLB_R15(r13)
62 TLB_MISS_RESTORE_STATS_BOLTED
63 ld r16,PACA_EXTLB+EX_TLB_R16(r13)
64 mfspr r13,SPRN_SPRG_TLB_SCRATCH
65.endm
66
67/* Data TLB miss */
68 START_EXCEPTION(data_tlb_miss_bolted)
69 tlb_prolog_bolted SPRN_DEAR
70
71 /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */
72
73 /* We do the user/kernel test for the PID here along with the RW test
74 */
75 /* We pre-test some combination of permissions to avoid double
76 * faults:
77 *
78 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
79 * ESR_ST is 0x00800000
80 * _PAGE_BAP_SW is 0x00000010
81 * So the shift is >> 19. This tests for supervisor writeability.
82 * If the page happens to be supervisor writeable and not user
83 * writeable, we will take a new fault later, but that should be
84 * a rare enough case.
85 *
86 * We also move ESR_ST in _PAGE_DIRTY position
87 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
88 *
89 * MAS1 is preset for all we need except for TID that needs to
90 * be cleared for kernel translations
91 */
92
93 mfspr r11,SPRN_ESR
94
95 srdi r15,r16,60 /* get region */
96 rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
97 bne- dtlb_miss_fault_bolted
98
99 rlwinm r10,r11,32-19,27,27
100 rlwimi r10,r11,32-16,19,19
101 cmpwi r15,0
102 ori r10,r10,_PAGE_PRESENT
103 oris r11,r10,_PAGE_ACCESSED@h
104
105 TLB_MISS_STATS_SAVE_INFO_BOLTED
106 bne tlb_miss_kernel_bolted
107
108tlb_miss_common_bolted:
109/*
110 * This is the guts of the TLB miss handler for bolted-linear.
111 * We are entered with:
112 *
113 * r16 = faulting address
114 * r15 = crap (free to use)
115 * r14 = page table base
116 * r13 = PACA
117 * r11 = PTE permission mask
118 * r10 = crap (free to use)
119 */
120 rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
121 cmpldi cr0,r14,0
122 clrrdi r15,r15,3
123 beq tlb_miss_fault_bolted
124
125BEGIN_MMU_FTR_SECTION
126 /* Set the TLB reservation and search for existing entry. Then load
127 * the entry.
128 */
129 PPC_TLBSRX_DOT(0,r16)
130 ldx r14,r14,r15
131 beq normal_tlb_miss_done
132MMU_FTR_SECTION_ELSE
133 ldx r14,r14,r15
134ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
135
136#ifndef CONFIG_PPC_64K_PAGES
137 rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
138 clrrdi r15,r15,3
139
140 cmpldi cr0,r14,0
141 beq tlb_miss_fault_bolted
142
143 ldx r14,r14,r15
144#endif /* CONFIG_PPC_64K_PAGES */
145
146 rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
147 clrrdi r15,r15,3
148
149 cmpldi cr0,r14,0
150 beq tlb_miss_fault_bolted
151
152 ldx r14,r14,r15
153
154 rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
155 clrrdi r15,r15,3
156
157 cmpldi cr0,r14,0
158 beq tlb_miss_fault_bolted
159
160 ldx r14,r14,r15
161
162 /* Check if required permissions are met */
163 andc. r15,r11,r14
164 rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
165 bne- tlb_miss_fault_bolted
166
167 /* Now we build the MAS:
168 *
169 * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
170 * MAS 1 : Almost fully setup
171 * - PID already updated by caller if necessary
172 * - TSIZE need change if !base page size, not
173 * yet implemented for now
174 * MAS 2 : Defaults not useful, need to be redone
175 * MAS 3+7 : Needs to be done
176 */
177 clrrdi r11,r16,12 /* Clear low crap in EA */
178 clrldi r15,r15,12 /* Clear crap at the top */
179 rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
180 rlwimi r15,r14,32-8,22,25 /* Move in U bits */
181 mtspr SPRN_MAS2,r11
182 andi. r11,r14,_PAGE_DIRTY
183 rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
184
185 /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
186 bne 1f
187 li r11,MAS3_SW|MAS3_UW
188 andc r15,r15,r11
1891:
190 mtspr SPRN_MAS7_MAS3,r15
191 tlbwe
192
193 TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
194 tlb_epilog_bolted
195 rfi
196
197itlb_miss_kernel_bolted:
198 li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
199 oris r11,r11,_PAGE_ACCESSED@h
200tlb_miss_kernel_bolted:
201 mfspr r10,SPRN_MAS1
202 ld r14,PACA_KERNELPGD(r13)
203 cmpldi cr0,r15,8 /* Check for vmalloc region */
204 rlwinm r10,r10,0,16,1 /* Clear TID */
205 mtspr SPRN_MAS1,r10
206 beq+ tlb_miss_common_bolted
207
208tlb_miss_fault_bolted:
209 /* We need to check if it was an instruction miss */
210 andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
211 bne itlb_miss_fault_bolted
212dtlb_miss_fault_bolted:
213 TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
214 tlb_epilog_bolted
215 b exc_data_storage_book3e
216itlb_miss_fault_bolted:
217 TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
218 tlb_epilog_bolted
219 b exc_instruction_storage_book3e
220
221/* Instruction TLB miss */
222 START_EXCEPTION(instruction_tlb_miss_bolted)
223 tlb_prolog_bolted SPRN_SRR0
224
225 rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
226 srdi r15,r16,60 /* get region */
227 TLB_MISS_STATS_SAVE_INFO_BOLTED
228 bne- itlb_miss_fault_bolted
229
230 li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
231
232 /* We do the user/kernel test for the PID here along with the RW test
233 */
234
235 cmpldi cr0,r15,0 /* Check for user region */
236 oris r11,r11,_PAGE_ACCESSED@h
237 beq tlb_miss_common_bolted
238 b itlb_miss_kernel_bolted
33 239
34/********************************************************************** 240/**********************************************************************
35 * * 241 * *
@@ -347,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
347 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 553 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
348 clrrdi r10,r11,3 554 clrrdi r10,r11,3
349 ldx r15,r10,r15 555 ldx r15,r10,r15
350 cmpldi cr0,r15,0 556 cmpdi cr0,r15,0
351 beq virt_page_table_tlb_miss_fault 557 bge virt_page_table_tlb_miss_fault
352 558
353#ifndef CONFIG_PPC_64K_PAGES 559#ifndef CONFIG_PPC_64K_PAGES
354 /* Get to PUD entry */ 560 /* Get to PUD entry */
355 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 561 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
356 clrrdi r10,r11,3 562 clrrdi r10,r11,3
357 ldx r15,r10,r15 563 ldx r15,r10,r15
358 cmpldi cr0,r15,0 564 cmpdi cr0,r15,0
359 beq virt_page_table_tlb_miss_fault 565 bge virt_page_table_tlb_miss_fault
360#endif /* CONFIG_PPC_64K_PAGES */ 566#endif /* CONFIG_PPC_64K_PAGES */
361 567
362 /* Get to PMD entry */ 568 /* Get to PMD entry */
363 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 569 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
364 clrrdi r10,r11,3 570 clrrdi r10,r11,3
365 ldx r15,r10,r15 571 ldx r15,r10,r15
366 cmpldi cr0,r15,0 572 cmpdi cr0,r15,0
367 beq virt_page_table_tlb_miss_fault 573 bge virt_page_table_tlb_miss_fault
368 574
369 /* Ok, we're all right, we can now create a kernel translation for 575 /* Ok, we're all right, we can now create a kernel translation for
370 * a 4K or 64K page from r16 -> r15. 576 * a 4K or 64K page from r16 -> r15.
@@ -596,24 +802,24 @@ htw_tlb_miss:
596 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 802 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
597 clrrdi r10,r11,3 803 clrrdi r10,r11,3
598 ldx r15,r10,r15 804 ldx r15,r10,r15
599 cmpldi cr0,r15,0 805 cmpdi cr0,r15,0
600 beq htw_tlb_miss_fault 806 bge htw_tlb_miss_fault
601 807
602#ifndef CONFIG_PPC_64K_PAGES 808#ifndef CONFIG_PPC_64K_PAGES
603 /* Get to PUD entry */ 809 /* Get to PUD entry */
604 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 810 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
605 clrrdi r10,r11,3 811 clrrdi r10,r11,3
606 ldx r15,r10,r15 812 ldx r15,r10,r15
607 cmpldi cr0,r15,0 813 cmpdi cr0,r15,0
608 beq htw_tlb_miss_fault 814 bge htw_tlb_miss_fault
609#endif /* CONFIG_PPC_64K_PAGES */ 815#endif /* CONFIG_PPC_64K_PAGES */
610 816
611 /* Get to PMD entry */ 817 /* Get to PMD entry */
612 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 818 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
613 clrrdi r10,r11,3 819 clrrdi r10,r11,3
614 ldx r15,r10,r15 820 ldx r15,r10,r15
615 cmpldi cr0,r15,0 821 cmpdi cr0,r15,0
616 beq htw_tlb_miss_fault 822 bge htw_tlb_miss_fault
617 823
618 /* Ok, we're all right, we can now create an indirect entry for 824 /* Ok, we're all right, we can now create an indirect entry for
619 * a 1M or 256M page. 825 * a 1M or 256M page.
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 0bdad3aecc67..4e13d6f9023e 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -28,6 +28,7 @@
28 */ 28 */
29 29
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/export.h>
31#include <linux/mm.h> 32#include <linux/mm.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/highmem.h> 34#include <linux/highmem.h>
@@ -35,14 +36,50 @@
35#include <linux/preempt.h> 36#include <linux/preempt.h>
36#include <linux/spinlock.h> 37#include <linux/spinlock.h>
37#include <linux/memblock.h> 38#include <linux/memblock.h>
39#include <linux/of_fdt.h>
40#include <linux/hugetlb.h>
38 41
39#include <asm/tlbflush.h> 42#include <asm/tlbflush.h>
40#include <asm/tlb.h> 43#include <asm/tlb.h>
41#include <asm/code-patching.h> 44#include <asm/code-patching.h>
45#include <asm/hugetlb.h>
42 46
43#include "mmu_decl.h" 47#include "mmu_decl.h"
44 48
45#ifdef CONFIG_PPC_BOOK3E 49/*
50 * This struct lists the sw-supported page sizes. The hardawre MMU may support
51 * other sizes not listed here. The .ind field is only used on MMUs that have
52 * indirect page table entries.
53 */
54#ifdef CONFIG_PPC_BOOK3E_MMU
55#ifdef CONFIG_FSL_BOOKE
56struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
57 [MMU_PAGE_4K] = {
58 .shift = 12,
59 .enc = BOOK3E_PAGESZ_4K,
60 },
61 [MMU_PAGE_4M] = {
62 .shift = 22,
63 .enc = BOOK3E_PAGESZ_4M,
64 },
65 [MMU_PAGE_16M] = {
66 .shift = 24,
67 .enc = BOOK3E_PAGESZ_16M,
68 },
69 [MMU_PAGE_64M] = {
70 .shift = 26,
71 .enc = BOOK3E_PAGESZ_64M,
72 },
73 [MMU_PAGE_256M] = {
74 .shift = 28,
75 .enc = BOOK3E_PAGESZ_256M,
76 },
77 [MMU_PAGE_1G] = {
78 .shift = 30,
79 .enc = BOOK3E_PAGESZ_1GB,
80 },
81};
82#else
46struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 83struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
47 [MMU_PAGE_4K] = { 84 [MMU_PAGE_4K] = {
48 .shift = 12, 85 .shift = 12,
@@ -76,6 +113,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
76 .enc = BOOK3E_PAGESZ_1GB, 113 .enc = BOOK3E_PAGESZ_1GB,
77 }, 114 },
78}; 115};
116#endif /* CONFIG_FSL_BOOKE */
117
79static inline int mmu_get_tsize(int psize) 118static inline int mmu_get_tsize(int psize)
80{ 119{
81 return mmu_psize_defs[psize].enc; 120 return mmu_psize_defs[psize].enc;
@@ -86,7 +125,7 @@ static inline int mmu_get_tsize(int psize)
86 /* This isn't used on !Book3E for now */ 125 /* This isn't used on !Book3E for now */
87 return 0; 126 return 0;
88} 127}
89#endif 128#endif /* CONFIG_PPC_BOOK3E_MMU */
90 129
91/* The variables below are currently only used on 64-bit Book3E 130/* The variables below are currently only used on 64-bit Book3E
92 * though this will probably be made common with other nohash 131 * though this will probably be made common with other nohash
@@ -102,6 +141,12 @@ unsigned long linear_map_top; /* Top of linear mapping */
102 141
103#endif /* CONFIG_PPC64 */ 142#endif /* CONFIG_PPC64 */
104 143
144#ifdef CONFIG_PPC_FSL_BOOK3E
145/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
146DEFINE_PER_CPU(int, next_tlbcam_idx);
147EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
148#endif
149
105/* 150/*
106 * Base TLB flushing operations: 151 * Base TLB flushing operations:
107 * 152 *
@@ -259,6 +304,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
259 304
260void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 305void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
261{ 306{
307#ifdef CONFIG_HUGETLB_PAGE
308 if (is_vm_hugetlb_page(vma))
309 flush_hugetlb_page(vma, vmaddr);
310#endif
311
262 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 312 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
263 mmu_get_tsize(mmu_virtual_psize), 0); 313 mmu_get_tsize(mmu_virtual_psize), 0);
264} 314}
@@ -266,6 +316,17 @@ EXPORT_SYMBOL(flush_tlb_page);
266 316
267#endif /* CONFIG_SMP */ 317#endif /* CONFIG_SMP */
268 318
319#ifdef CONFIG_PPC_47x
320void __init early_init_mmu_47x(void)
321{
322#ifdef CONFIG_SMP
323 unsigned long root = of_get_flat_dt_root();
324 if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
325 mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
326#endif /* CONFIG_SMP */
327}
328#endif /* CONFIG_PPC_47x */
329
269/* 330/*
270 * Flush kernel TLB entries in the given range 331 * Flush kernel TLB entries in the given range
271 */ 332 */
@@ -443,14 +504,27 @@ static void setup_page_sizes(void)
443 } 504 }
444} 505}
445 506
446static void setup_mmu_htw(void) 507static void __patch_exception(int exc, unsigned long addr)
447{ 508{
448 extern unsigned int interrupt_base_book3e; 509 extern unsigned int interrupt_base_book3e;
449 extern unsigned int exc_data_tlb_miss_htw_book3e; 510 unsigned int *ibase = &interrupt_base_book3e;
450 extern unsigned int exc_instruction_tlb_miss_htw_book3e; 511
512 /* Our exceptions vectors start with a NOP and -then- a branch
513 * to deal with single stepping from userspace which stops on
514 * the second instruction. Thus we need to patch the second
515 * instruction of the exception, not the first one
516 */
517
518 patch_branch(ibase + (exc / 4) + 1, addr, 0);
519}
451 520
452 unsigned int *ibase = &interrupt_base_book3e; 521#define patch_exception(exc, name) do { \
522 extern unsigned int name; \
523 __patch_exception((exc), (unsigned long)&name); \
524} while (0)
453 525
526static void setup_mmu_htw(void)
527{
454 /* Check if HW tablewalk is present, and if yes, enable it by: 528 /* Check if HW tablewalk is present, and if yes, enable it by:
455 * 529 *
456 * - patching the TLB miss handlers to branch to the 530 * - patching the TLB miss handlers to branch to the
@@ -462,19 +536,12 @@ static void setup_mmu_htw(void)
462 536
463 if ((tlb0cfg & TLBnCFG_IND) && 537 if ((tlb0cfg & TLBnCFG_IND) &&
464 (tlb0cfg & TLBnCFG_PT)) { 538 (tlb0cfg & TLBnCFG_PT)) {
465 /* Our exceptions vectors start with a NOP and -then- a branch 539 patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
466 * to deal with single stepping from userspace which stops on 540 patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
467 * the second instruction. Thus we need to patch the second
468 * instruction of the exception, not the first one
469 */
470 patch_branch(ibase + (0x1c0 / 4) + 1,
471 (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
472 patch_branch(ibase + (0x1e0 / 4) + 1,
473 (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
474 book3e_htw_enabled = 1; 541 book3e_htw_enabled = 1;
475 } 542 }
476 pr_info("MMU: Book3E Page Tables %s\n", 543 pr_info("MMU: Book3E HW tablewalk %s\n",
477 book3e_htw_enabled ? "Enabled" : "Disabled"); 544 book3e_htw_enabled ? "enabled" : "not supported");
478} 545}
479 546
480/* 547/*
@@ -549,6 +616,9 @@ static void __early_init_mmu(int boot_cpu)
549 /* limit memory so we dont have linear faults */ 616 /* limit memory so we dont have linear faults */
550 memblock_enforce_memory_limit(linear_map_top); 617 memblock_enforce_memory_limit(linear_map_top);
551 memblock_analyze(); 618 memblock_analyze();
619
620 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
621 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
552 } 622 }
553#endif 623#endif
554 624
@@ -573,15 +643,37 @@ void __cpuinit early_init_mmu_secondary(void)
573void setup_initial_memory_limit(phys_addr_t first_memblock_base, 643void setup_initial_memory_limit(phys_addr_t first_memblock_base,
574 phys_addr_t first_memblock_size) 644 phys_addr_t first_memblock_size)
575{ 645{
576 /* On Embedded 64-bit, we adjust the RMA size to match 646 /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
577 * the bolted TLB entry. We know for now that only 1G 647 * the bolted TLB entry. We know for now that only 1G
578 * entries are supported though that may eventually 648 * entries are supported though that may eventually
579 * change. We crop it to the size of the first MEMBLOCK to 649 * change.
650 *
651 * on FSL Embedded 64-bit, we adjust the RMA size to match the
652 * first bolted TLB entry size. We still limit max to 1G even if
653 * the TLB could cover more. This is due to what the early init
654 * code is setup to do.
655 *
656 * We crop it to the size of the first MEMBLOCK to
580 * avoid going over total available memory just in case... 657 * avoid going over total available memory just in case...
581 */ 658 */
582 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); 659#ifdef CONFIG_PPC_FSL_BOOK3E
660 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
661 unsigned long linear_sz;
662 linear_sz = calc_cam_sz(first_memblock_size, PAGE_OFFSET,
663 first_memblock_base);
664 ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
665 } else
666#endif
667 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
583 668
584 /* Finally limit subsequent allocations */ 669 /* Finally limit subsequent allocations */
585 memblock_set_current_limit(first_memblock_base + ppc64_rma_size); 670 memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
586} 671}
672#else /* ! CONFIG_PPC64 */
673void __init early_init_mmu(void)
674{
675#ifdef CONFIG_PPC_47x
676 early_init_mmu_47x();
677#endif
678}
587#endif /* CONFIG_PPC64 */ 679#endif /* CONFIG_PPC64 */