aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2014-11-20 08:42:02 -0500
committerJiri Kosina <jkosina@suse.cz>2014-11-20 08:42:02 -0500
commita02001086bbfb4da35d1228bebc2f1b442db455f (patch)
tree62ab47936cef06fd08657ca5b6cd1df98c19be57 /arch/powerpc/mm
parenteff264efeeb0898408e8c9df72d8a32621035bed (diff)
parentfc14f9c1272f62c3e8d01300f52467c0d9af50f9 (diff)
Merge Linus' tree to be be to apply submitted patches to newer code than
current trivial.git base
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile5
-rw-r--r--arch/powerpc/mm/copro_fault.c148
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/mm/fault.c48
-rw-r--r--arch/powerpc/mm/hash_native_64.c46
-rw-r--r--arch/powerpc/mm/hash_utils_64.c186
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c88
-rw-r--r--arch/powerpc/mm/init_32.c6
-rw-r--r--arch/powerpc/mm/init_64.c129
-rw-r--r--arch/powerpc/mm/mem.c71
-rw-r--r--arch/powerpc/mm/mmu_context_hash32.c2
-rw-r--r--arch/powerpc/mm/numa.c84
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c46
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c2
-rw-r--r--arch/powerpc/mm/slb.c3
-rw-r--r--arch/powerpc/mm/slice.c15
-rw-r--r--arch/powerpc/mm/stab.c286
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S69
-rw-r--r--arch/powerpc/mm/tlb_nohash.c111
22 files changed, 780 insertions, 576 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 51230ee6a407..325e861616a1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
13 tlb_nohash_low.o 13 tlb_nohash_low.o
14obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o 14obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
15hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o 15hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
16obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ 16obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y)
17 slb_low.o slb.o stab.o \
18 $(hash64-y)
19obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o 17obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
20obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ 18obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
21 tlb_hash$(CONFIG_WORD_SIZE).o \ 19 tlb_hash$(CONFIG_WORD_SIZE).o \
@@ -36,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
36obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 34obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
37obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o 35obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
38obj-$(CONFIG_HIGHMEM) += highmem.o 36obj-$(CONFIG_HIGHMEM) += highmem.o
37obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
new file mode 100644
index 000000000000..5a236f082c78
--- /dev/null
+++ b/arch/powerpc/mm/copro_fault.c
@@ -0,0 +1,148 @@
1/*
2 * CoProcessor (SPU/AFU) mm fault handler
3 *
4 * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
5 *
6 * Author: Arnd Bergmann <arndb@de.ibm.com>
7 * Author: Jeremy Kerr <jk@ozlabs.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23#include <linux/sched.h>
24#include <linux/mm.h>
25#include <linux/export.h>
26#include <asm/reg.h>
27#include <asm/copro.h>
28#include <asm/spu.h>
29#include <misc/cxl.h>
30
31/*
32 * This ought to be kept in sync with the powerpc specific do_page_fault
33 * function. Currently, there are a few corner cases that we haven't had
34 * to handle fortunately.
35 */
36int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
37 unsigned long dsisr, unsigned *flt)
38{
39 struct vm_area_struct *vma;
40 unsigned long is_write;
41 int ret;
42
43 if (mm == NULL)
44 return -EFAULT;
45
46 if (mm->pgd == NULL)
47 return -EFAULT;
48
49 down_read(&mm->mmap_sem);
50 ret = -EFAULT;
51 vma = find_vma(mm, ea);
52 if (!vma)
53 goto out_unlock;
54
55 if (ea < vma->vm_start) {
56 if (!(vma->vm_flags & VM_GROWSDOWN))
57 goto out_unlock;
58 if (expand_stack(vma, ea))
59 goto out_unlock;
60 }
61
62 is_write = dsisr & DSISR_ISSTORE;
63 if (is_write) {
64 if (!(vma->vm_flags & VM_WRITE))
65 goto out_unlock;
66 } else {
67 if (dsisr & DSISR_PROTFAULT)
68 goto out_unlock;
69 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
70 goto out_unlock;
71 }
72
73 ret = 0;
74 *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
75 if (unlikely(*flt & VM_FAULT_ERROR)) {
76 if (*flt & VM_FAULT_OOM) {
77 ret = -ENOMEM;
78 goto out_unlock;
79 } else if (*flt & VM_FAULT_SIGBUS) {
80 ret = -EFAULT;
81 goto out_unlock;
82 }
83 BUG();
84 }
85
86 if (*flt & VM_FAULT_MAJOR)
87 current->maj_flt++;
88 else
89 current->min_flt++;
90
91out_unlock:
92 up_read(&mm->mmap_sem);
93 return ret;
94}
95EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
96
97int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
98{
99 u64 vsid;
100 int psize, ssize;
101
102 switch (REGION_ID(ea)) {
103 case USER_REGION_ID:
104 pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
105 psize = get_slice_psize(mm, ea);
106 ssize = user_segment_size(ea);
107 vsid = get_vsid(mm->context.id, ea, ssize);
108 break;
109 case VMALLOC_REGION_ID:
110 pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
111 if (ea < VMALLOC_END)
112 psize = mmu_vmalloc_psize;
113 else
114 psize = mmu_io_psize;
115 ssize = mmu_kernel_ssize;
116 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
117 break;
118 case KERNEL_REGION_ID:
119 pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
120 psize = mmu_linear_psize;
121 ssize = mmu_kernel_ssize;
122 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
123 break;
124 default:
125 pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
126 return 1;
127 }
128
129 vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
130
131 vsid |= mmu_psize_defs[psize].sllp |
132 ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
133
134 slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
135 slb->vsid = vsid;
136
137 return 0;
138}
139EXPORT_SYMBOL_GPL(copro_calculate_slb);
140
141void copro_flush_all_slbs(struct mm_struct *mm)
142{
143#ifdef CONFIG_SPU_BASE
144 spu_flush_all_slbs(mm);
145#endif
146 cxl_slbia(mm);
147}
148EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
33#include <linux/export.h> 33#include <linux/export.h>
34 34
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/dma.h>
36 37
37#include "mmu_decl.h" 38#include "mmu_decl.h"
38 39
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7e6c39..08d659a9fcdb 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,9 +30,9 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/magic.h>
34#include <linux/ratelimit.h> 33#include <linux/ratelimit.h>
35#include <linux/context_tracking.h> 34#include <linux/context_tracking.h>
35#include <linux/hugetlb.h>
36 36
37#include <asm/firmware.h> 37#include <asm/firmware.h>
38#include <asm/page.h> 38#include <asm/page.h>
@@ -114,22 +114,37 @@ static int store_updates_sp(struct pt_regs *regs)
114#define MM_FAULT_CONTINUE -1 114#define MM_FAULT_CONTINUE -1
115#define MM_FAULT_ERR(sig) (sig) 115#define MM_FAULT_ERR(sig) (sig)
116 116
117static int do_sigbus(struct pt_regs *regs, unsigned long address) 117static int do_sigbus(struct pt_regs *regs, unsigned long address,
118 unsigned int fault)
118{ 119{
119 siginfo_t info; 120 siginfo_t info;
121 unsigned int lsb = 0;
120 122
121 up_read(&current->mm->mmap_sem); 123 up_read(&current->mm->mmap_sem);
122 124
123 if (user_mode(regs)) { 125 if (!user_mode(regs))
124 current->thread.trap_nr = BUS_ADRERR; 126 return MM_FAULT_ERR(SIGBUS);
125 info.si_signo = SIGBUS; 127
126 info.si_errno = 0; 128 current->thread.trap_nr = BUS_ADRERR;
127 info.si_code = BUS_ADRERR; 129 info.si_signo = SIGBUS;
128 info.si_addr = (void __user *)address; 130 info.si_errno = 0;
129 force_sig_info(SIGBUS, &info, current); 131 info.si_code = BUS_ADRERR;
130 return MM_FAULT_RETURN; 132 info.si_addr = (void __user *)address;
133#ifdef CONFIG_MEMORY_FAILURE
134 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
135 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
136 current->comm, current->pid, address);
137 info.si_code = BUS_MCEERR_AR;
131 } 138 }
132 return MM_FAULT_ERR(SIGBUS); 139
140 if (fault & VM_FAULT_HWPOISON_LARGE)
141 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
142 if (fault & VM_FAULT_HWPOISON)
143 lsb = PAGE_SHIFT;
144#endif
145 info.si_addr_lsb = lsb;
146 force_sig_info(SIGBUS, &info, current);
147 return MM_FAULT_RETURN;
133} 148}
134 149
135static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) 150static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
@@ -170,11 +185,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
170 return MM_FAULT_RETURN; 185 return MM_FAULT_RETURN;
171 } 186 }
172 187
173 /* Bus error. x86 handles HWPOISON here, we'll add this if/when 188 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
174 * we support the feature in HW 189 return do_sigbus(regs, addr, fault);
175 */
176 if (fault & VM_FAULT_SIGBUS)
177 return do_sigbus(regs, addr);
178 190
179 /* We don't understand the fault code, this is fatal */ 191 /* We don't understand the fault code, this is fatal */
180 BUG(); 192 BUG();
@@ -508,7 +520,6 @@ bail:
508void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) 520void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
509{ 521{
510 const struct exception_table_entry *entry; 522 const struct exception_table_entry *entry;
511 unsigned long *stackend;
512 523
513 /* Are we prepared to handle this fault? */ 524 /* Are we prepared to handle this fault? */
514 if ((entry = search_exception_tables(regs->nip)) != NULL) { 525 if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -537,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
537 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", 548 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
538 regs->nip); 549 regs->nip);
539 550
540 stackend = end_of_stack(current); 551 if (task_stack_end_corrupted(current))
541 if (current != &init_task && *stackend != STACK_END_MAGIC)
542 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); 552 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
543 553
544 die("Kernel access of bad area", regs, sig); 554 die("Kernel access of bad area", regs, sig);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cf1d325eae8b..ae4962a06476 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -29,6 +29,8 @@
29#include <asm/kexec.h> 29#include <asm/kexec.h>
30#include <asm/ppc-opcode.h> 30#include <asm/ppc-opcode.h>
31 31
32#include <misc/cxl.h>
33
32#ifdef DEBUG_LOW 34#ifdef DEBUG_LOW
33#define DBG_LOW(fmt...) udbg_printf(fmt) 35#define DBG_LOW(fmt...) udbg_printf(fmt)
34#else 36#else
@@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
149static inline void tlbie(unsigned long vpn, int psize, int apsize, 151static inline void tlbie(unsigned long vpn, int psize, int apsize,
150 int ssize, int local) 152 int ssize, int local)
151{ 153{
152 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); 154 unsigned int use_local;
153 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 155 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
154 156
157 use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
158
155 if (use_local) 159 if (use_local)
156 use_local = mmu_psize_defs[psize].tlbiel; 160 use_local = mmu_psize_defs[psize].tlbiel;
157 if (lock_tlbie && !use_local) 161 if (lock_tlbie && !use_local)
@@ -412,18 +416,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
412 local_irq_restore(flags); 416 local_irq_restore(flags);
413} 417}
414 418
415static void native_hugepage_invalidate(struct mm_struct *mm, 419static void native_hugepage_invalidate(unsigned long vsid,
420 unsigned long addr,
416 unsigned char *hpte_slot_array, 421 unsigned char *hpte_slot_array,
417 unsigned long addr, int psize) 422 int psize, int ssize)
418{ 423{
419 int ssize = 0, i; 424 int i;
420 int lock_tlbie;
421 struct hash_pte *hptep; 425 struct hash_pte *hptep;
422 int actual_psize = MMU_PAGE_16M; 426 int actual_psize = MMU_PAGE_16M;
423 unsigned int max_hpte_count, valid; 427 unsigned int max_hpte_count, valid;
424 unsigned long flags, s_addr = addr; 428 unsigned long flags, s_addr = addr;
425 unsigned long hpte_v, want_v, shift; 429 unsigned long hpte_v, want_v, shift;
426 unsigned long hidx, vpn = 0, vsid, hash, slot; 430 unsigned long hidx, vpn = 0, hash, slot;
427 431
428 shift = mmu_psize_defs[psize].shift; 432 shift = mmu_psize_defs[psize].shift;
429 max_hpte_count = 1U << (PMD_SHIFT - shift); 433 max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -437,15 +441,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
437 441
438 /* get the vpn */ 442 /* get the vpn */
439 addr = s_addr + (i * (1ul << shift)); 443 addr = s_addr + (i * (1ul << shift));
440 if (!is_kernel_addr(addr)) {
441 ssize = user_segment_size(addr);
442 vsid = get_vsid(mm->context.id, addr, ssize);
443 WARN_ON(vsid == 0);
444 } else {
445 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
446 ssize = mmu_kernel_ssize;
447 }
448
449 vpn = hpt_vpn(addr, vsid, ssize); 444 vpn = hpt_vpn(addr, vsid, ssize);
450 hash = hpt_hash(vpn, shift, ssize); 445 hash = hpt_hash(vpn, shift, ssize);
451 if (hidx & _PTEIDX_SECONDARY) 446 if (hidx & _PTEIDX_SECONDARY)
@@ -465,22 +460,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
465 else 460 else
466 /* Invalidate the hpte. NOTE: this also unlocks it */ 461 /* Invalidate the hpte. NOTE: this also unlocks it */
467 hptep->v = 0; 462 hptep->v = 0;
463 /*
464 * We need to do tlb invalidate for all the address, tlbie
465 * instruction compares entry_VA in tlb with the VA specified
466 * here
467 */
468 tlbie(vpn, psize, actual_psize, ssize, 0);
468 } 469 }
469 /*
470 * Since this is a hugepage, we just need a single tlbie.
471 * use the last vpn.
472 */
473 lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
474 if (lock_tlbie)
475 raw_spin_lock(&native_tlbie_lock);
476
477 asm volatile("ptesync":::"memory");
478 __tlbie(vpn, psize, actual_psize, ssize);
479 asm volatile("eieio; tlbsync; ptesync":::"memory");
480
481 if (lock_tlbie)
482 raw_spin_unlock(&native_tlbie_lock);
483
484 local_irq_restore(flags); 470 local_irq_restore(flags);
485} 471}
486 472
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 88fdd9d25077..d5339a3b9945 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,7 +51,7 @@
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/cputable.h> 52#include <asm/cputable.h>
53#include <asm/sections.h> 53#include <asm/sections.h>
54#include <asm/spu.h> 54#include <asm/copro.h>
55#include <asm/udbg.h> 55#include <asm/udbg.h>
56#include <asm/code-patching.h> 56#include <asm/code-patching.h>
57#include <asm/fadump.h> 57#include <asm/fadump.h>
@@ -92,12 +92,14 @@ extern unsigned long dart_tablebase;
92 92
93static unsigned long _SDR1; 93static unsigned long _SDR1;
94struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 94struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
95EXPORT_SYMBOL_GPL(mmu_psize_defs);
95 96
96struct hash_pte *htab_address; 97struct hash_pte *htab_address;
97unsigned long htab_size_bytes; 98unsigned long htab_size_bytes;
98unsigned long htab_hash_mask; 99unsigned long htab_hash_mask;
99EXPORT_SYMBOL_GPL(htab_hash_mask); 100EXPORT_SYMBOL_GPL(htab_hash_mask);
100int mmu_linear_psize = MMU_PAGE_4K; 101int mmu_linear_psize = MMU_PAGE_4K;
102EXPORT_SYMBOL_GPL(mmu_linear_psize);
101int mmu_virtual_psize = MMU_PAGE_4K; 103int mmu_virtual_psize = MMU_PAGE_4K;
102int mmu_vmalloc_psize = MMU_PAGE_4K; 104int mmu_vmalloc_psize = MMU_PAGE_4K;
103#ifdef CONFIG_SPARSEMEM_VMEMMAP 105#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K;
105#endif 107#endif
106int mmu_io_psize = MMU_PAGE_4K; 108int mmu_io_psize = MMU_PAGE_4K;
107int mmu_kernel_ssize = MMU_SEGSIZE_256M; 109int mmu_kernel_ssize = MMU_SEGSIZE_256M;
110EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
108int mmu_highuser_ssize = MMU_SEGSIZE_256M; 111int mmu_highuser_ssize = MMU_SEGSIZE_256M;
109u16 mmu_slb_size = 64; 112u16 mmu_slb_size = 64;
110EXPORT_SYMBOL_GPL(mmu_slb_size); 113EXPORT_SYMBOL_GPL(mmu_slb_size);
@@ -243,7 +246,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
243} 246}
244 247
245#ifdef CONFIG_MEMORY_HOTPLUG 248#ifdef CONFIG_MEMORY_HOTPLUG
246static int htab_remove_mapping(unsigned long vstart, unsigned long vend, 249int htab_remove_mapping(unsigned long vstart, unsigned long vend,
247 int psize, int ssize) 250 int psize, int ssize)
248{ 251{
249 unsigned long vaddr; 252 unsigned long vaddr;
@@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
333 return 0; 336 return 0;
334 337
335 prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); 338 prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
336 if (prop != NULL) { 339 if (!prop)
337 pr_info("Page sizes from device-tree:\n"); 340 return 0;
338 size /= 4; 341
339 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); 342 pr_info("Page sizes from device-tree:\n");
340 while(size > 0) { 343 size /= 4;
341 unsigned int base_shift = be32_to_cpu(prop[0]); 344 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
342 unsigned int slbenc = be32_to_cpu(prop[1]); 345 while(size > 0) {
343 unsigned int lpnum = be32_to_cpu(prop[2]); 346 unsigned int base_shift = be32_to_cpu(prop[0]);
344 struct mmu_psize_def *def; 347 unsigned int slbenc = be32_to_cpu(prop[1]);
345 int idx, base_idx; 348 unsigned int lpnum = be32_to_cpu(prop[2]);
346 349 struct mmu_psize_def *def;
347 size -= 3; prop += 3; 350 int idx, base_idx;
348 base_idx = get_idx_from_shift(base_shift); 351
349 if (base_idx < 0) { 352 size -= 3; prop += 3;
350 /* 353 base_idx = get_idx_from_shift(base_shift);
351 * skip the pte encoding also 354 if (base_idx < 0) {
352 */ 355 /* skip the pte encoding also */
353 prop += lpnum * 2; size -= lpnum * 2; 356 prop += lpnum * 2; size -= lpnum * 2;
357 continue;
358 }
359 def = &mmu_psize_defs[base_idx];
360 if (base_idx == MMU_PAGE_16M)
361 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
362
363 def->shift = base_shift;
364 if (base_shift <= 23)
365 def->avpnm = 0;
366 else
367 def->avpnm = (1 << (base_shift - 23)) - 1;
368 def->sllp = slbenc;
369 /*
370 * We don't know for sure what's up with tlbiel, so
371 * for now we only set it for 4K and 64K pages
372 */
373 if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
374 def->tlbiel = 1;
375 else
376 def->tlbiel = 0;
377
378 while (size > 0 && lpnum) {
379 unsigned int shift = be32_to_cpu(prop[0]);
380 int penc = be32_to_cpu(prop[1]);
381
382 prop += 2; size -= 2;
383 lpnum--;
384
385 idx = get_idx_from_shift(shift);
386 if (idx < 0)
354 continue; 387 continue;
355 } 388
356 def = &mmu_psize_defs[base_idx]; 389 if (penc == -1)
357 if (base_idx == MMU_PAGE_16M) 390 pr_err("Invalid penc for base_shift=%d "
358 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; 391 "shift=%d\n", base_shift, shift);
359 392
360 def->shift = base_shift; 393 def->penc[idx] = penc;
361 if (base_shift <= 23) 394 pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
362 def->avpnm = 0; 395 " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
363 else 396 base_shift, shift, def->sllp,
364 def->avpnm = (1 << (base_shift - 23)) - 1; 397 def->avpnm, def->tlbiel, def->penc[idx]);
365 def->sllp = slbenc;
366 /*
367 * We don't know for sure what's up with tlbiel, so
368 * for now we only set it for 4K and 64K pages
369 */
370 if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
371 def->tlbiel = 1;
372 else
373 def->tlbiel = 0;
374
375 while (size > 0 && lpnum) {
376 unsigned int shift = be32_to_cpu(prop[0]);
377 int penc = be32_to_cpu(prop[1]);
378
379 prop += 2; size -= 2;
380 lpnum--;
381
382 idx = get_idx_from_shift(shift);
383 if (idx < 0)
384 continue;
385
386 if (penc == -1)
387 pr_err("Invalid penc for base_shift=%d "
388 "shift=%d\n", base_shift, shift);
389
390 def->penc[idx] = penc;
391 pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
392 " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
393 base_shift, shift, def->sllp,
394 def->avpnm, def->tlbiel, def->penc[idx]);
395 }
396 } 398 }
397 return 1;
398 } 399 }
399 return 0; 400
401 return 1;
400} 402}
401 403
402#ifdef CONFIG_HUGETLB_PAGE 404#ifdef CONFIG_HUGETLB_PAGE
@@ -821,21 +823,14 @@ static void __init htab_initialize(void)
821 823
822void __init early_init_mmu(void) 824void __init early_init_mmu(void)
823{ 825{
824 /* Setup initial STAB address in the PACA */
825 get_paca()->stab_real = __pa((u64)&initial_stab);
826 get_paca()->stab_addr = (u64)&initial_stab;
827
828 /* Initialize the MMU Hash table and create the linear mapping 826 /* Initialize the MMU Hash table and create the linear mapping
829 * of memory. Has to be done before stab/slb initialization as 827 * of memory. Has to be done before SLB initialization as this is
830 * this is currently where the page size encoding is obtained 828 * currently where the page size encoding is obtained.
831 */ 829 */
832 htab_initialize(); 830 htab_initialize();
833 831
834 /* Initialize stab / SLB management */ 832 /* Initialize SLB management */
835 if (mmu_has_feature(MMU_FTR_SLB)) 833 slb_initialize();
836 slb_initialize();
837 else
838 stab_initialize(get_paca()->stab_real);
839} 834}
840 835
841#ifdef CONFIG_SMP 836#ifdef CONFIG_SMP
@@ -845,13 +840,8 @@ void early_init_mmu_secondary(void)
845 if (!firmware_has_feature(FW_FEATURE_LPAR)) 840 if (!firmware_has_feature(FW_FEATURE_LPAR))
846 mtspr(SPRN_SDR1, _SDR1); 841 mtspr(SPRN_SDR1, _SDR1);
847 842
848 /* Initialize STAB/SLB. We use a virtual address as it works 843 /* Initialize SLB */
849 * in real mode on pSeries. 844 slb_initialize();
850 */
851 if (mmu_has_feature(MMU_FTR_SLB))
852 slb_initialize();
853 else
854 stab_initialize(get_paca()->stab_addr);
855} 845}
856#endif /* CONFIG_SMP */ 846#endif /* CONFIG_SMP */
857 847
@@ -879,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
879} 869}
880 870
881#ifdef CONFIG_PPC_MM_SLICES 871#ifdef CONFIG_PPC_MM_SLICES
882unsigned int get_paca_psize(unsigned long addr) 872static unsigned int get_paca_psize(unsigned long addr)
883{ 873{
884 u64 lpsizes; 874 u64 lpsizes;
885 unsigned char *hpsizes; 875 unsigned char *hpsizes;
@@ -913,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
913 if (get_slice_psize(mm, addr) == MMU_PAGE_4K) 903 if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
914 return; 904 return;
915 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); 905 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
916#ifdef CONFIG_SPU_BASE 906 copro_flush_all_slbs(mm);
917 spu_flush_all_slbs(mm); 907 if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
918#endif
919 if (get_paca_psize(addr) != MMU_PAGE_4K) {
920 get_paca()->context = mm->context; 908 get_paca()->context = mm->context;
921 slb_flush_and_rebolt(); 909 slb_flush_and_rebolt();
922 } 910 }
@@ -1001,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
1001 * -1 - critical hash insertion error 989 * -1 - critical hash insertion error
1002 * -2 - access not permitted by subpage protection mechanism 990 * -2 - access not permitted by subpage protection mechanism
1003 */ 991 */
1004int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 992int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap)
1005{ 993{
1006 enum ctx_state prev_state = exception_enter(); 994 enum ctx_state prev_state = exception_enter();
1007 pgd_t *pgdir; 995 pgd_t *pgdir;
1008 unsigned long vsid; 996 unsigned long vsid;
1009 struct mm_struct *mm;
1010 pte_t *ptep; 997 pte_t *ptep;
1011 unsigned hugeshift; 998 unsigned hugeshift;
1012 const struct cpumask *tmp; 999 const struct cpumask *tmp;
@@ -1020,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1020 switch (REGION_ID(ea)) { 1007 switch (REGION_ID(ea)) {
1021 case USER_REGION_ID: 1008 case USER_REGION_ID:
1022 user_region = 1; 1009 user_region = 1;
1023 mm = current->mm;
1024 if (! mm) { 1010 if (! mm) {
1025 DBG_LOW(" user region with no mm !\n"); 1011 DBG_LOW(" user region with no mm !\n");
1026 rc = 1; 1012 rc = 1;
@@ -1031,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1031 vsid = get_vsid(mm->context.id, ea, ssize); 1017 vsid = get_vsid(mm->context.id, ea, ssize);
1032 break; 1018 break;
1033 case VMALLOC_REGION_ID: 1019 case VMALLOC_REGION_ID:
1034 mm = &init_mm;
1035 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 1020 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
1036 if (ea < VMALLOC_END) 1021 if (ea < VMALLOC_END)
1037 psize = mmu_vmalloc_psize; 1022 psize = mmu_vmalloc_psize;
@@ -1116,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1116 WARN_ON(1); 1101 WARN_ON(1);
1117 } 1102 }
1118#endif 1103#endif
1119 check_paca_psize(ea, mm, psize, user_region); 1104 if (current->mm == mm)
1105 check_paca_psize(ea, mm, psize, user_region);
1120 1106
1121 goto bail; 1107 goto bail;
1122 } 1108 }
@@ -1153,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1153 "to 4kB pages because of " 1139 "to 4kB pages because of "
1154 "non-cacheable mapping\n"); 1140 "non-cacheable mapping\n");
1155 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 1141 psize = mmu_vmalloc_psize = MMU_PAGE_4K;
1156#ifdef CONFIG_SPU_BASE 1142 copro_flush_all_slbs(mm);
1157 spu_flush_all_slbs(mm);
1158#endif
1159 } 1143 }
1160 } 1144 }
1161 1145
1162 check_paca_psize(ea, mm, psize, user_region); 1146 if (current->mm == mm)
1147 check_paca_psize(ea, mm, psize, user_region);
1163#endif /* CONFIG_PPC_64K_PAGES */ 1148#endif /* CONFIG_PPC_64K_PAGES */
1164 1149
1165#ifdef CONFIG_PPC_HAS_HASH_64K 1150#ifdef CONFIG_PPC_HAS_HASH_64K
@@ -1194,6 +1179,17 @@ bail:
1194 exception_exit(prev_state); 1179 exception_exit(prev_state);
1195 return rc; 1180 return rc;
1196} 1181}
1182EXPORT_SYMBOL_GPL(hash_page_mm);
1183
1184int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1185{
1186 struct mm_struct *mm = current->mm;
1187
1188 if (REGION_ID(ea) == VMALLOC_REGION_ID)
1189 mm = &init_mm;
1190
1191 return hash_page_mm(mm, ea, access, trap);
1192}
1197EXPORT_SYMBOL_GPL(hash_page); 1193EXPORT_SYMBOL_GPL(hash_page);
1198 1194
1199void hash_preload(struct mm_struct *mm, unsigned long ea, 1195void hash_preload(struct mm_struct *mm, unsigned long ea,
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 826893fcb3a7..5f5e6328c21c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,6 +18,57 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20 20
21static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
22 pmd_t *pmdp, unsigned int psize, int ssize)
23{
24 int i, max_hpte_count, valid;
25 unsigned long s_addr;
26 unsigned char *hpte_slot_array;
27 unsigned long hidx, shift, vpn, hash, slot;
28
29 s_addr = addr & HPAGE_PMD_MASK;
30 hpte_slot_array = get_hpte_slot_array(pmdp);
31 /*
32 * IF we try to do a HUGE PTE update after a withdraw is done.
33 * we will find the below NULL. This happens when we do
34 * split_huge_page_pmd
35 */
36 if (!hpte_slot_array)
37 return;
38
39 if (ppc_md.hugepage_invalidate)
40 return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
41 psize, ssize);
42 /*
43 * No bluk hpte removal support, invalidate each entry
44 */
45 shift = mmu_psize_defs[psize].shift;
46 max_hpte_count = HPAGE_PMD_SIZE >> shift;
47 for (i = 0; i < max_hpte_count; i++) {
48 /*
49 * 8 bits per each hpte entries
50 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
51 */
52 valid = hpte_valid(hpte_slot_array, i);
53 if (!valid)
54 continue;
55 hidx = hpte_hash_index(hpte_slot_array, i);
56
57 /* get the vpn */
58 addr = s_addr + (i * (1ul << shift));
59 vpn = hpt_vpn(addr, vsid, ssize);
60 hash = hpt_hash(vpn, shift, ssize);
61 if (hidx & _PTEIDX_SECONDARY)
62 hash = ~hash;
63
64 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
65 slot += hidx & _PTEIDX_GROUP_IX;
66 ppc_md.hpte_invalidate(slot, vpn, psize,
67 MMU_PAGE_16M, ssize, 0);
68 }
69}
70
71
21int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, 72int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
22 pmd_t *pmdp, unsigned long trap, int local, int ssize, 73 pmd_t *pmdp, unsigned long trap, int local, int ssize,
23 unsigned int psize) 74 unsigned int psize)
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
33 * atomically mark the linux large page PMD busy and dirty 84 * atomically mark the linux large page PMD busy and dirty
34 */ 85 */
35 do { 86 do {
36 old_pmd = pmd_val(*pmdp); 87 pmd_t pmd = ACCESS_ONCE(*pmdp);
88
89 old_pmd = pmd_val(pmd);
37 /* If PMD busy, retry the access */ 90 /* If PMD busy, retry the access */
38 if (unlikely(old_pmd & _PAGE_BUSY)) 91 if (unlikely(old_pmd & _PAGE_BUSY))
39 return 0; 92 return 0;
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
85 vpn = hpt_vpn(ea, vsid, ssize); 138 vpn = hpt_vpn(ea, vsid, ssize);
86 hash = hpt_hash(vpn, shift, ssize); 139 hash = hpt_hash(vpn, shift, ssize);
87 hpte_slot_array = get_hpte_slot_array(pmdp); 140 hpte_slot_array = get_hpte_slot_array(pmdp);
141 if (psize == MMU_PAGE_4K) {
142 /*
143 * invalidate the old hpte entry if we have that mapped via 64K
144 * base page size. This is because demote_segment won't flush
145 * hash page table entries.
146 */
147 if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
148 invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
149 }
88 150
89 valid = hpte_valid(hpte_slot_array, index); 151 valid = hpte_valid(hpte_slot_array, index);
90 if (valid) { 152 if (valid) {
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
107 * safely update this here. 169 * safely update this here.
108 */ 170 */
109 valid = 0; 171 valid = 0;
110 new_pmd &= ~_PAGE_HPTEFLAGS;
111 hpte_slot_array[index] = 0; 172 hpte_slot_array[index] = 0;
112 } else 173 }
113 /* clear the busy bits and set the hash pte bits */
114 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
115 } 174 }
116 175
117 if (!valid) { 176 if (!valid) {
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
119 178
120 /* insert new entry */ 179 /* insert new entry */
121 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; 180 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
122repeat: 181 new_pmd |= _PAGE_HASHPTE;
123 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
124
125 /* clear the busy bits and set the hash pte bits */
126 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
127 182
128 /* Add in WIMG bits */ 183 /* Add in WIMG bits */
129 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 184 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
@@ -132,6 +187,8 @@ repeat:
132 * enable the memory coherence always 187 * enable the memory coherence always
133 */ 188 */
134 rflags |= HPTE_R_M; 189 rflags |= HPTE_R_M;
190repeat:
191 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
135 192
136 /* Insert into the hash table, primary slot */ 193 /* Insert into the hash table, primary slot */
137 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, 194 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -172,8 +229,17 @@ repeat:
172 mark_hpte_slot_valid(hpte_slot_array, index, slot); 229 mark_hpte_slot_valid(hpte_slot_array, index, slot);
173 } 230 }
174 /* 231 /*
175 * No need to use ldarx/stdcx here 232 * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
233 * base page size 4k.
234 */
235 if (psize == MMU_PAGE_4K)
236 new_pmd |= _PAGE_COMBO;
237 /*
238 * The hpte valid is stored in the pgtable whose address is in the
239 * second half of the PMD. Order this against clearing of the busy bit in
240 * huge pmd.
176 */ 241 */
242 smp_wmb();
177 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); 243 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
178 return 0; 244 return 0;
179} 245}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index cff59f1bec23..415a51b028b9 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -103,14 +103,14 @@ unsigned long __max_low_memory = MAX_LOW_MEM;
103/* 103/*
104 * Check for command-line options that affect what MMU_init will do. 104 * Check for command-line options that affect what MMU_init will do.
105 */ 105 */
106void MMU_setup(void) 106void __init MMU_setup(void)
107{ 107{
108 /* Check for nobats option (used in mapin_ram). */ 108 /* Check for nobats option (used in mapin_ram). */
109 if (strstr(cmd_line, "nobats")) { 109 if (strstr(boot_command_line, "nobats")) {
110 __map_without_bats = 1; 110 __map_without_bats = 1;
111 } 111 }
112 112
113 if (strstr(cmd_line, "noltlbs")) { 113 if (strstr(boot_command_line, "noltlbs")) {
114 __map_without_ltlbs = 1; 114 __map_without_ltlbs = 1;
115 } 115 }
116#ifdef CONFIG_DEBUG_PAGEALLOC 116#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e3734edffa69..3481556a1880 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page)
175static int __meminit vmemmap_populated(unsigned long start, int page_size) 175static int __meminit vmemmap_populated(unsigned long start, int page_size)
176{ 176{
177 unsigned long end = start + page_size; 177 unsigned long end = start + page_size;
178 start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
178 179
179 for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) 180 for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
180 if (pfn_valid(vmemmap_section_start(start))) 181 if (pfn_valid(page_to_pfn((struct page *)start)))
181 return 1; 182 return 1;
182 183
183 return 0; 184 return 0;
@@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
212 for (i = 0; i < page_size; i += PAGE_SIZE) 213 for (i = 0; i < page_size; i += PAGE_SIZE)
213 BUG_ON(map_kernel_page(start + i, phys, flags)); 214 BUG_ON(map_kernel_page(start + i, phys, flags));
214} 215}
216
217#ifdef CONFIG_MEMORY_HOTPLUG
218static void vmemmap_remove_mapping(unsigned long start,
219 unsigned long page_size)
220{
221}
222#endif
215#else /* CONFIG_PPC_BOOK3E */ 223#else /* CONFIG_PPC_BOOK3E */
216static void __meminit vmemmap_create_mapping(unsigned long start, 224static void __meminit vmemmap_create_mapping(unsigned long start,
217 unsigned long page_size, 225 unsigned long page_size,
@@ -223,17 +231,39 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
223 mmu_kernel_ssize); 231 mmu_kernel_ssize);
224 BUG_ON(mapped < 0); 232 BUG_ON(mapped < 0);
225} 233}
234
235#ifdef CONFIG_MEMORY_HOTPLUG
236static void vmemmap_remove_mapping(unsigned long start,
237 unsigned long page_size)
238{
239 int mapped = htab_remove_mapping(start, start + page_size,
240 mmu_vmemmap_psize,
241 mmu_kernel_ssize);
242 BUG_ON(mapped < 0);
243}
244#endif
245
226#endif /* CONFIG_PPC_BOOK3E */ 246#endif /* CONFIG_PPC_BOOK3E */
227 247
228struct vmemmap_backing *vmemmap_list; 248struct vmemmap_backing *vmemmap_list;
249static struct vmemmap_backing *next;
250static int num_left;
251static int num_freed;
229 252
230static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) 253static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
231{ 254{
232 static struct vmemmap_backing *next; 255 struct vmemmap_backing *vmem_back;
233 static int num_left; 256 /* get from freed entries first */
257 if (num_freed) {
258 num_freed--;
259 vmem_back = next;
260 next = next->list;
261
262 return vmem_back;
263 }
234 264
235 /* allocate a page when required and hand out chunks */ 265 /* allocate a page when required and hand out chunks */
236 if (!next || !num_left) { 266 if (!num_left) {
237 next = vmemmap_alloc_block(PAGE_SIZE, node); 267 next = vmemmap_alloc_block(PAGE_SIZE, node);
238 if (unlikely(!next)) { 268 if (unlikely(!next)) {
239 WARN_ON(1); 269 WARN_ON(1);
@@ -296,10 +326,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
296 return 0; 326 return 0;
297} 327}
298 328
299void vmemmap_free(unsigned long start, unsigned long end) 329#ifdef CONFIG_MEMORY_HOTPLUG
330static unsigned long vmemmap_list_free(unsigned long start)
300{ 331{
332 struct vmemmap_backing *vmem_back, *vmem_back_prev;
333
334 vmem_back_prev = vmem_back = vmemmap_list;
335
336 /* look for it with prev pointer recorded */
337 for (; vmem_back; vmem_back = vmem_back->list) {
338 if (vmem_back->virt_addr == start)
339 break;
340 vmem_back_prev = vmem_back;
341 }
342
343 if (unlikely(!vmem_back)) {
344 WARN_ON(1);
345 return 0;
346 }
347
348 /* remove it from vmemmap_list */
349 if (vmem_back == vmemmap_list) /* remove head */
350 vmemmap_list = vmem_back->list;
351 else
352 vmem_back_prev->list = vmem_back->list;
353
354 /* next point to this freed entry */
355 vmem_back->list = next;
356 next = vmem_back;
357 num_freed++;
358
359 return vmem_back->phys;
301} 360}
302 361
362void __ref vmemmap_free(unsigned long start, unsigned long end)
363{
364 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
365
366 start = _ALIGN_DOWN(start, page_size);
367
368 pr_debug("vmemmap_free %lx...%lx\n", start, end);
369
370 for (; start < end; start += page_size) {
371 unsigned long addr;
372
373 /*
374 * the section has already be marked as invalid, so
375 * vmemmap_populated() true means some other sections still
376 * in this page, so skip it.
377 */
378 if (vmemmap_populated(start, page_size))
379 continue;
380
381 addr = vmemmap_list_free(start);
382 if (addr) {
383 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
384
385 if (PageReserved(page)) {
386 /* allocated from bootmem */
387 if (page_size < PAGE_SIZE) {
388 /*
389 * this shouldn't happen, but if it is
390 * the case, leave the memory there
391 */
392 WARN_ON_ONCE(1);
393 } else {
394 unsigned int nr_pages =
395 1 << get_order(page_size);
396 while (nr_pages--)
397 free_reserved_page(page++);
398 }
399 } else
400 free_pages((unsigned long)(__va(addr)),
401 get_order(page_size));
402
403 vmemmap_remove_mapping(start, page_size);
404 }
405 }
406}
407#endif
303void register_page_bootmem_memmap(unsigned long section_nr, 408void register_page_bootmem_memmap(unsigned long section_nr,
304 struct page *start_page, unsigned long size) 409 struct page *start_page, unsigned long size)
305{ 410{
@@ -331,16 +436,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
331 if (pg_va < vmem_back->virt_addr) 436 if (pg_va < vmem_back->virt_addr)
332 continue; 437 continue;
333 438
334 /* Check that page struct is not split between real pages */ 439 /* After vmemmap_list entry free is possible, need check all */
335 if ((pg_va + sizeof(struct page)) > 440 if ((pg_va + sizeof(struct page)) <=
336 (vmem_back->virt_addr + page_size)) 441 (vmem_back->virt_addr + page_size)) {
337 return NULL; 442 page = (struct page *) (vmem_back->phys + pg_va -
338
339 page = (struct page *) (vmem_back->phys + pg_va -
340 vmem_back->virt_addr); 443 vmem_back->virt_addr);
341 return page; 444 return page;
445 }
342 } 446 }
343 447
448 /* Probably that page struct is split between real pages */
344 return NULL; 449 return NULL;
345} 450}
346EXPORT_SYMBOL_GPL(realmode_pfn_to_page); 451EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f5789e..8ebaac75c940 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
128 return -EINVAL; 128 return -EINVAL;
129 129
130 /* this should work for most non-highmem platforms */ 130 /* this should work for most non-highmem platforms */
131 zone = pgdata->node_zones; 131 zone = pgdata->node_zones +
132 zone_for_memory(nid, start, size, 0);
132 133
133 return __add_pages(nid, zone, start_pfn, nr_pages); 134 return __add_pages(nid, zone, start_pfn, nr_pages);
134} 135}
@@ -259,6 +260,60 @@ static int __init mark_nonram_nosave(void)
259 } 260 }
260 return 0; 261 return 0;
261} 262}
263#else /* CONFIG_NEED_MULTIPLE_NODES */
264static int __init mark_nonram_nosave(void)
265{
266 return 0;
267}
268#endif
269
270static bool zone_limits_final;
271
272static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
273 [0 ... MAX_NR_ZONES - 1] = ~0UL
274};
275
276/*
277 * Restrict the specified zone and all more restrictive zones
278 * to be below the specified pfn. May not be called after
279 * paging_init().
280 */
281void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
282{
283 int i;
284
285 if (WARN_ON(zone_limits_final))
286 return;
287
288 for (i = zone; i >= 0; i--) {
289 if (max_zone_pfns[i] > pfn_limit)
290 max_zone_pfns[i] = pfn_limit;
291 }
292}
293
294/*
295 * Find the least restrictive zone that is entirely below the
296 * specified pfn limit. Returns < 0 if no suitable zone is found.
297 *
298 * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
299 * systems -- the DMA limit can be higher than any possible real pfn.
300 */
301int dma_pfn_limit_to_zone(u64 pfn_limit)
302{
303 enum zone_type top_zone = ZONE_NORMAL;
304 int i;
305
306#ifdef CONFIG_HIGHMEM
307 top_zone = ZONE_HIGHMEM;
308#endif
309
310 for (i = top_zone; i >= 0; i--) {
311 if (max_zone_pfns[i] <= pfn_limit)
312 return i;
313 }
314
315 return -EPERM;
316}
262 317
263/* 318/*
264 * paging_init() sets up the page tables - in fact we've already done this. 319 * paging_init() sets up the page tables - in fact we've already done this.
@@ -267,7 +322,7 @@ void __init paging_init(void)
267{ 322{
268 unsigned long long total_ram = memblock_phys_mem_size(); 323 unsigned long long total_ram = memblock_phys_mem_size();
269 phys_addr_t top_of_ram = memblock_end_of_DRAM(); 324 phys_addr_t top_of_ram = memblock_end_of_DRAM();
270 unsigned long max_zone_pfns[MAX_NR_ZONES]; 325 enum zone_type top_zone;
271 326
272#ifdef CONFIG_PPC32 327#ifdef CONFIG_PPC32
273 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); 328 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
@@ -289,18 +344,20 @@ void __init paging_init(void)
289 (unsigned long long)top_of_ram, total_ram); 344 (unsigned long long)top_of_ram, total_ram);
290 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 345 printk(KERN_DEBUG "Memory hole size: %ldMB\n",
291 (long int)((top_of_ram - total_ram) >> 20)); 346 (long int)((top_of_ram - total_ram) >> 20));
292 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 347
293#ifdef CONFIG_HIGHMEM 348#ifdef CONFIG_HIGHMEM
294 max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; 349 top_zone = ZONE_HIGHMEM;
295 max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; 350 limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
296#else 351#else
297 max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; 352 top_zone = ZONE_NORMAL;
298#endif 353#endif
354
355 limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
356 zone_limits_final = true;
299 free_area_init_nodes(max_zone_pfns); 357 free_area_init_nodes(max_zone_pfns);
300 358
301 mark_nonram_nosave(); 359 mark_nonram_nosave();
302} 360}
303#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
304 361
305static void __init register_page_bootmem_info(void) 362static void __init register_page_bootmem_info(void)
306{ 363{
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index 78fef6726e10..aa5a7fd89461 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -2,7 +2,7 @@
2 * This file contains the routines for handling the MMU on those 2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the 3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx, 4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx. 5 * and 8260 implementations but excludes the 8xx and 4xx.
6 * -- paulus 6 * -- paulus
7 * 7 *
8 * Derived from arch/ppc/mm/init.c: 8 * Derived from arch/ppc/mm/init.c:
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3b181b22cd46..b9d1dfdbe5bb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -8,6 +8,8 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11#define pr_fmt(fmt) "numa: " fmt
12
11#include <linux/threads.h> 13#include <linux/threads.h>
12#include <linux/bootmem.h> 14#include <linux/bootmem.h>
13#include <linux/init.h> 15#include <linux/init.h>
@@ -538,7 +540,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
538 */ 540 */
539static int numa_setup_cpu(unsigned long lcpu) 541static int numa_setup_cpu(unsigned long lcpu)
540{ 542{
541 int nid; 543 int nid = -1;
542 struct device_node *cpu; 544 struct device_node *cpu;
543 545
544 /* 546 /*
@@ -555,19 +557,21 @@ static int numa_setup_cpu(unsigned long lcpu)
555 557
556 if (!cpu) { 558 if (!cpu) {
557 WARN_ON(1); 559 WARN_ON(1);
558 nid = 0; 560 if (cpu_present(lcpu))
559 goto out; 561 goto out_present;
562 else
563 goto out;
560 } 564 }
561 565
562 nid = of_node_to_nid_single(cpu); 566 nid = of_node_to_nid_single(cpu);
563 567
568out_present:
564 if (nid < 0 || !node_online(nid)) 569 if (nid < 0 || !node_online(nid))
565 nid = first_online_node; 570 nid = first_online_node;
566out:
567 map_cpu_to_node(lcpu, nid);
568 571
572 map_cpu_to_node(lcpu, nid);
569 of_node_put(cpu); 573 of_node_put(cpu);
570 574out:
571 return nid; 575 return nid;
572} 576}
573 577
@@ -611,8 +615,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
611 case CPU_UP_CANCELED: 615 case CPU_UP_CANCELED:
612 case CPU_UP_CANCELED_FROZEN: 616 case CPU_UP_CANCELED_FROZEN:
613 unmap_cpu_from_node(lcpu); 617 unmap_cpu_from_node(lcpu);
614 break;
615 ret = NOTIFY_OK; 618 ret = NOTIFY_OK;
619 break;
616#endif 620#endif
617 } 621 }
618 return ret; 622 return ret;
@@ -1049,7 +1053,7 @@ static void __init mark_reserved_regions_for_nid(int nid)
1049 1053
1050void __init do_init_bootmem(void) 1054void __init do_init_bootmem(void)
1051{ 1055{
1052 int nid; 1056 int nid, cpu;
1053 1057
1054 min_low_pfn = 0; 1058 min_low_pfn = 0;
1055 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 1059 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -1122,16 +1126,14 @@ void __init do_init_bootmem(void)
1122 1126
1123 reset_numa_cpu_lookup_table(); 1127 reset_numa_cpu_lookup_table();
1124 register_cpu_notifier(&ppc64_numa_nb); 1128 register_cpu_notifier(&ppc64_numa_nb);
1125 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 1129 /*
1126 (void *)(unsigned long)boot_cpuid); 1130 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
1127} 1131 * even before we online them, so that we can use cpu_to_{node,mem}
1128 1132 * early in boot, cf. smp_prepare_cpus().
1129void __init paging_init(void) 1133 */
1130{ 1134 for_each_present_cpu(cpu) {
1131 unsigned long max_zone_pfns[MAX_NR_ZONES]; 1135 numa_setup_cpu((unsigned long)cpu);
1132 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 1136 }
1133 max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
1134 free_area_init_nodes(max_zone_pfns);
1135} 1137}
1136 1138
1137static int __init early_numa(char *p) 1139static int __init early_numa(char *p)
@@ -1153,6 +1155,22 @@ static int __init early_numa(char *p)
1153} 1155}
1154early_param("numa", early_numa); 1156early_param("numa", early_numa);
1155 1157
1158static bool topology_updates_enabled = true;
1159
1160static int __init early_topology_updates(char *p)
1161{
1162 if (!p)
1163 return 0;
1164
1165 if (!strcmp(p, "off")) {
1166 pr_info("Disabling topology updates\n");
1167 topology_updates_enabled = false;
1168 }
1169
1170 return 0;
1171}
1172early_param("topology_updates", early_topology_updates);
1173
1156#ifdef CONFIG_MEMORY_HOTPLUG 1174#ifdef CONFIG_MEMORY_HOTPLUG
1157/* 1175/*
1158 * Find the node associated with a hot added memory section for 1176 * Find the node associated with a hot added memory section for
@@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
1442 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 1460 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1443 u64 flags = 1; 1461 u64 flags = 1;
1444 int hwcpu = get_hard_smp_processor_id(cpu); 1462 int hwcpu = get_hard_smp_processor_id(cpu);
1463 int i;
1445 1464
1446 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); 1465 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1466 for (i = 0; i < 6; i++)
1467 retbuf[i] = cpu_to_be64(retbuf[i]);
1447 vphn_unpack_associativity(retbuf, associativity); 1468 vphn_unpack_associativity(retbuf, associativity);
1448 1469
1449 return rc; 1470 return rc;
@@ -1488,11 +1509,14 @@ static int update_cpu_topology(void *data)
1488 cpu = smp_processor_id(); 1509 cpu = smp_processor_id();
1489 1510
1490 for (update = data; update; update = update->next) { 1511 for (update = data; update; update = update->next) {
1512 int new_nid = update->new_nid;
1491 if (cpu != update->cpu) 1513 if (cpu != update->cpu)
1492 continue; 1514 continue;
1493 1515
1494 unmap_cpu_from_node(update->cpu); 1516 unmap_cpu_from_node(cpu);
1495 map_cpu_to_node(update->cpu, update->new_nid); 1517 map_cpu_to_node(cpu, new_nid);
1518 set_cpu_numa_node(cpu, new_nid);
1519 set_cpu_numa_mem(cpu, local_memory_node(new_nid));
1496 vdso_getcpu_init(); 1520 vdso_getcpu_init();
1497 } 1521 }
1498 1522
@@ -1539,6 +1563,9 @@ int arch_update_cpu_topology(void)
1539 struct device *dev; 1563 struct device *dev;
1540 int weight, new_nid, i = 0; 1564 int weight, new_nid, i = 0;
1541 1565
1566 if (!prrn_enabled && !vphn_enabled)
1567 return 0;
1568
1542 weight = cpumask_weight(&cpu_associativity_changes_mask); 1569 weight = cpumask_weight(&cpu_associativity_changes_mask);
1543 if (!weight) 1570 if (!weight)
1544 return 0; 1571 return 0;
@@ -1592,6 +1619,15 @@ int arch_update_cpu_topology(void)
1592 cpu = cpu_last_thread_sibling(cpu); 1619 cpu = cpu_last_thread_sibling(cpu);
1593 } 1620 }
1594 1621
1622 pr_debug("Topology update for the following CPUs:\n");
1623 if (cpumask_weight(&updated_cpus)) {
1624 for (ud = &updates[0]; ud; ud = ud->next) {
1625 pr_debug("cpu %d moving from node %d "
1626 "to %d\n", ud->cpu,
1627 ud->old_nid, ud->new_nid);
1628 }
1629 }
1630
1595 /* 1631 /*
1596 * In cases where we have nothing to update (because the updates list 1632 * In cases where we have nothing to update (because the updates list
1597 * is too short or because the new topology is same as the old one), 1633 * is too short or because the new topology is same as the old one),
@@ -1800,8 +1836,12 @@ static const struct file_operations topology_ops = {
1800 1836
1801static int topology_update_init(void) 1837static int topology_update_init(void)
1802{ 1838{
1803 start_topology_update(); 1839 /* Do not poll for changes if disabled at boot */
1804 proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops); 1840 if (topology_updates_enabled)
1841 start_topology_update();
1842
1843 if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
1844 return -ENOMEM;
1805 1845
1806 return 0; 1846 return 0;
1807} 1847}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c695943a513c..c90e602677c9 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte)
48 (_PAGE_PRESENT | _PAGE_USER); 48 (_PAGE_PRESENT | _PAGE_USER);
49} 49}
50 50
51struct page * maybe_pte_to_page(pte_t pte) 51static struct page *maybe_pte_to_page(pte_t pte)
52{ 52{
53 unsigned long pfn = pte_pfn(pte); 53 unsigned long pfn = pte_pfn(pte);
54 struct page *page; 54 struct page *page;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 343a87fa78b5..cf11342bf519 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -41,7 +41,7 @@ unsigned long ioremap_base;
41unsigned long ioremap_bot; 41unsigned long ioremap_bot;
42EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ 42EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
43 43
44#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) 44#ifdef CONFIG_6xx
45#define HAVE_BATS 1 45#define HAVE_BATS 1
46#endif 46#endif
47 47
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f6ce1f111f5b..c8d709ab489d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -54,6 +54,9 @@
54 54
55#include "mmu_decl.h" 55#include "mmu_decl.h"
56 56
57#define CREATE_TRACE_POINTS
58#include <trace/events/thp.h>
59
57/* Some sanity checking */ 60/* Some sanity checking */
58#if TASK_SIZE_USER64 > PGTABLE_RANGE 61#if TASK_SIZE_USER64 > PGTABLE_RANGE
59#error TASK_SIZE_USER64 exceeds pagetable range 62#error TASK_SIZE_USER64 exceeds pagetable range
@@ -68,7 +71,7 @@
68unsigned long ioremap_bot = IOREMAP_BASE; 71unsigned long ioremap_bot = IOREMAP_BASE;
69 72
70#ifdef CONFIG_PPC_MMU_NOHASH 73#ifdef CONFIG_PPC_MMU_NOHASH
71static void *early_alloc_pgtable(unsigned long size) 74static __ref void *early_alloc_pgtable(unsigned long size)
72{ 75{
73 void *pt; 76 void *pt;
74 77
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
537 old = pmd_val(*pmdp); 540 old = pmd_val(*pmdp);
538 *pmdp = __pmd((old & ~clr) | set); 541 *pmdp = __pmd((old & ~clr) | set);
539#endif 542#endif
543 trace_hugepage_update(addr, old, clr, set);
540 if (old & _PAGE_HASHPTE) 544 if (old & _PAGE_HASHPTE)
541 hpte_do_hugepage_flush(mm, addr, pmdp); 545 hpte_do_hugepage_flush(mm, addr, pmdp, old);
542 return old; 546 return old;
543} 547}
544 548
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
642 * If we didn't had the splitting flag set, go and flush the 646 * If we didn't had the splitting flag set, go and flush the
643 * HPTE entries. 647 * HPTE entries.
644 */ 648 */
649 trace_hugepage_splitting(address, old);
645 if (!(old & _PAGE_SPLITTING)) { 650 if (!(old & _PAGE_SPLITTING)) {
646 /* We need to flush the hpte */ 651 /* We need to flush the hpte */
647 if (old & _PAGE_HASHPTE) 652 if (old & _PAGE_HASHPTE)
648 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); 653 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
649 } 654 }
650 /* 655 /*
651 * This ensures that generic code that rely on IRQ disabling 656 * This ensures that generic code that rely on IRQ disabling
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
709 assert_spin_locked(&mm->page_table_lock); 714 assert_spin_locked(&mm->page_table_lock);
710 WARN_ON(!pmd_trans_huge(pmd)); 715 WARN_ON(!pmd_trans_huge(pmd));
711#endif 716#endif
717 trace_hugepage_set_pmd(addr, pmd);
712 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 718 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
713} 719}
714 720
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
723 * neesd to be flushed. 729 * neesd to be flushed.
724 */ 730 */
725void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 731void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
726 pmd_t *pmdp) 732 pmd_t *pmdp, unsigned long old_pmd)
727{ 733{
728 int ssize, i; 734 int ssize, i;
729 unsigned long s_addr; 735 unsigned long s_addr;
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
745 if (!hpte_slot_array) 751 if (!hpte_slot_array)
746 return; 752 return;
747 753
748 /* get the base page size */ 754 /* get the base page size,vsid and segment size */
755#ifdef CONFIG_DEBUG_VM
749 psize = get_slice_psize(mm, s_addr); 756 psize = get_slice_psize(mm, s_addr);
757 BUG_ON(psize == MMU_PAGE_16M);
758#endif
759 if (old_pmd & _PAGE_COMBO)
760 psize = MMU_PAGE_4K;
761 else
762 psize = MMU_PAGE_64K;
763
764 if (!is_kernel_addr(s_addr)) {
765 ssize = user_segment_size(s_addr);
766 vsid = get_vsid(mm->context.id, s_addr, ssize);
767 WARN_ON(vsid == 0);
768 } else {
769 vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
770 ssize = mmu_kernel_ssize;
771 }
750 772
751 if (ppc_md.hugepage_invalidate) 773 if (ppc_md.hugepage_invalidate)
752 return ppc_md.hugepage_invalidate(mm, hpte_slot_array, 774 return ppc_md.hugepage_invalidate(vsid, s_addr,
753 s_addr, psize); 775 hpte_slot_array,
776 psize, ssize);
754 /* 777 /*
755 * No bluk hpte removal support, invalidate each entry 778 * No bluk hpte removal support, invalidate each entry
756 */ 779 */
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
768 791
769 /* get the vpn */ 792 /* get the vpn */
770 addr = s_addr + (i * (1ul << shift)); 793 addr = s_addr + (i * (1ul << shift));
771 if (!is_kernel_addr(addr)) {
772 ssize = user_segment_size(addr);
773 vsid = get_vsid(mm->context.id, addr, ssize);
774 WARN_ON(vsid == 0);
775 } else {
776 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
777 ssize = mmu_kernel_ssize;
778 }
779
780 vpn = hpt_vpn(addr, vsid, ssize); 794 vpn = hpt_vpn(addr, vsid, ssize);
781 hash = hpt_hash(vpn, shift, ssize); 795 hash = hpt_hash(vpn, shift, ssize);
782 if (hidx & _PTEIDX_SECONDARY) 796 if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 11571e118831..5029dc19b517 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -2,7 +2,7 @@
2 * This file contains the routines for handling the MMU on those 2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the 3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx, 4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx. 5 * and 8260 implementations but excludes the 8xx and 4xx.
6 * -- paulus 6 * -- paulus
7 * 7 *
8 * Derived from arch/ppc/mm/init.c: 8 * Derived from arch/ppc/mm/init.c:
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0399a6702958..6e450ca66526 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
46 return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; 46 return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
47} 47}
48 48
49#define slb_vsid_shift(ssize) \
50 ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T)
51
52static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, 49static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
53 unsigned long flags) 50 unsigned long flags)
54{ 51{
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index b0c75cc15efc..ded0ea1afde4 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -30,9 +30,11 @@
30#include <linux/err.h> 30#include <linux/err.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32#include <linux/export.h> 32#include <linux/export.h>
33#include <linux/hugetlb.h>
33#include <asm/mman.h> 34#include <asm/mman.h>
34#include <asm/mmu.h> 35#include <asm/mmu.h>
35#include <asm/spu.h> 36#include <asm/copro.h>
37#include <asm/hugetlb.h>
36 38
37/* some sanity checks */ 39/* some sanity checks */
38#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE 40#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
@@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
232 234
233 spin_unlock_irqrestore(&slice_convert_lock, flags); 235 spin_unlock_irqrestore(&slice_convert_lock, flags);
234 236
235#ifdef CONFIG_SPU_BASE 237 copro_flush_all_slbs(mm);
236 spu_flush_all_slbs(mm);
237#endif
238} 238}
239 239
240/* 240/*
@@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address,
671 671
672 spin_unlock_irqrestore(&slice_convert_lock, flags); 672 spin_unlock_irqrestore(&slice_convert_lock, flags);
673 673
674#ifdef CONFIG_SPU_BASE 674 copro_flush_all_slbs(mm);
675 spu_flush_all_slbs(mm);
676#endif
677} 675}
678 676
679void slice_set_range_psize(struct mm_struct *mm, unsigned long start, 677void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -684,6 +682,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
684 slice_convert(mm, mask, psize); 682 slice_convert(mm, mask, psize);
685} 683}
686 684
685#ifdef CONFIG_HUGETLB_PAGE
687/* 686/*
688 * is_hugepage_only_range() is used by generic code to verify whether 687 * is_hugepage_only_range() is used by generic code to verify whether
689 * a normal mmap mapping (non hugetlbfs) is valid on a given area. 688 * a normal mmap mapping (non hugetlbfs) is valid on a given area.
@@ -728,4 +727,4 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
728#endif 727#endif
729 return !slice_check_fit(mask, available); 728 return !slice_check_fit(mask, available);
730} 729}
731 730#endif
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
deleted file mode 100644
index 3f8efa6f2997..000000000000
--- a/arch/powerpc/mm/stab.c
+++ /dev/null
@@ -1,286 +0,0 @@
1/*
2 * PowerPC64 Segment Translation Support.
3 *
4 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
5 * Copyright (c) 2001 Dave Engebretsen
6 *
7 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/memblock.h>
16
17#include <asm/pgtable.h>
18#include <asm/mmu.h>
19#include <asm/mmu_context.h>
20#include <asm/paca.h>
21#include <asm/cputable.h>
22#include <asm/prom.h>
23
24struct stab_entry {
25 unsigned long esid_data;
26 unsigned long vsid_data;
27};
28
29#define NR_STAB_CACHE_ENTRIES 8
30static DEFINE_PER_CPU(long, stab_cache_ptr);
31static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache);
32
33/*
34 * Create a segment table entry for the given esid/vsid pair.
35 */
36static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
37{
38 unsigned long esid_data, vsid_data;
39 unsigned long entry, group, old_esid, castout_entry, i;
40 unsigned int global_entry;
41 struct stab_entry *ste, *castout_ste;
42 unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET;
43
44 vsid_data = vsid << STE_VSID_SHIFT;
45 esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
46 if (! kernel_segment)
47 esid_data |= STE_ESID_KS;
48
49 /* Search the primary group first. */
50 global_entry = (esid & 0x1f) << 3;
51 ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
52
53 /* Find an empty entry, if one exists. */
54 for (group = 0; group < 2; group++) {
55 for (entry = 0; entry < 8; entry++, ste++) {
56 if (!(ste->esid_data & STE_ESID_V)) {
57 ste->vsid_data = vsid_data;
58 eieio();
59 ste->esid_data = esid_data;
60 return (global_entry | entry);
61 }
62 }
63 /* Now search the secondary group. */
64 global_entry = ((~esid) & 0x1f) << 3;
65 ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
66 }
67
68 /*
69 * Could not find empty entry, pick one with a round robin selection.
70 * Search all entries in the two groups.
71 */
72 castout_entry = get_paca()->stab_rr;
73 for (i = 0; i < 16; i++) {
74 if (castout_entry < 8) {
75 global_entry = (esid & 0x1f) << 3;
76 ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
77 castout_ste = ste + castout_entry;
78 } else {
79 global_entry = ((~esid) & 0x1f) << 3;
80 ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
81 castout_ste = ste + (castout_entry - 8);
82 }
83
84 /* Dont cast out the first kernel segment */
85 if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET)
86 break;
87
88 castout_entry = (castout_entry + 1) & 0xf;
89 }
90
91 get_paca()->stab_rr = (castout_entry + 1) & 0xf;
92
93 /* Modify the old entry to the new value. */
94
95 /* Force previous translations to complete. DRENG */
96 asm volatile("isync" : : : "memory");
97
98 old_esid = castout_ste->esid_data >> SID_SHIFT;
99 castout_ste->esid_data = 0; /* Invalidate old entry */
100
101 asm volatile("sync" : : : "memory"); /* Order update */
102
103 castout_ste->vsid_data = vsid_data;
104 eieio(); /* Order update */
105 castout_ste->esid_data = esid_data;
106
107 asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT));
108 /* Ensure completion of slbie */
109 asm volatile("sync" : : : "memory");
110
111 return (global_entry | (castout_entry & 0x7));
112}
113
114/*
115 * Allocate a segment table entry for the given ea and mm
116 */
117static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
118{
119 unsigned long vsid;
120 unsigned char stab_entry;
121 unsigned long offset;
122
123 /* Kernel or user address? */
124 if (is_kernel_addr(ea)) {
125 vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
126 } else {
127 if ((ea >= TASK_SIZE_USER64) || (! mm))
128 return 1;
129
130 vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M);
131 }
132
133 stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
134
135 if (!is_kernel_addr(ea)) {
136 offset = __get_cpu_var(stab_cache_ptr);
137 if (offset < NR_STAB_CACHE_ENTRIES)
138 __get_cpu_var(stab_cache[offset++]) = stab_entry;
139 else
140 offset = NR_STAB_CACHE_ENTRIES+1;
141 __get_cpu_var(stab_cache_ptr) = offset;
142
143 /* Order update */
144 asm volatile("sync":::"memory");
145 }
146
147 return 0;
148}
149
150int ste_allocate(unsigned long ea)
151{
152 return __ste_allocate(ea, current->mm);
153}
154
155/*
156 * Do the segment table work for a context switch: flush all user
157 * entries from the table, then preload some probably useful entries
158 * for the new task
159 */
160void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
161{
162 struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
163 struct stab_entry *ste;
164 unsigned long offset;
165 unsigned long pc = KSTK_EIP(tsk);
166 unsigned long stack = KSTK_ESP(tsk);
167 unsigned long unmapped_base;
168
169 /* Force previous translations to complete. DRENG */
170 asm volatile("isync" : : : "memory");
171
172 /*
173 * We need interrupts hard-disabled here, not just soft-disabled,
174 * so that a PMU interrupt can't occur, which might try to access
175 * user memory (to get a stack trace) and possible cause an STAB miss
176 * which would update the stab_cache/stab_cache_ptr per-cpu variables.
177 */
178 hard_irq_disable();
179
180 offset = __get_cpu_var(stab_cache_ptr);
181 if (offset <= NR_STAB_CACHE_ENTRIES) {
182 int i;
183
184 for (i = 0; i < offset; i++) {
185 ste = stab + __get_cpu_var(stab_cache[i]);
186 ste->esid_data = 0; /* invalidate entry */
187 }
188 } else {
189 unsigned long entry;
190
191 /* Invalidate all entries. */
192 ste = stab;
193
194 /* Never flush the first entry. */
195 ste += 1;
196 for (entry = 1;
197 entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
198 entry++, ste++) {
199 unsigned long ea;
200 ea = ste->esid_data & ESID_MASK;
201 if (!is_kernel_addr(ea)) {
202 ste->esid_data = 0;
203 }
204 }
205 }
206
207 asm volatile("sync; slbia; sync":::"memory");
208
209 __get_cpu_var(stab_cache_ptr) = 0;
210
211 /* Now preload some entries for the new task */
212 if (test_tsk_thread_flag(tsk, TIF_32BIT))
213 unmapped_base = TASK_UNMAPPED_BASE_USER32;
214 else
215 unmapped_base = TASK_UNMAPPED_BASE_USER64;
216
217 __ste_allocate(pc, mm);
218
219 if (GET_ESID(pc) == GET_ESID(stack))
220 return;
221
222 __ste_allocate(stack, mm);
223
224 if ((GET_ESID(pc) == GET_ESID(unmapped_base))
225 || (GET_ESID(stack) == GET_ESID(unmapped_base)))
226 return;
227
228 __ste_allocate(unmapped_base, mm);
229
230 /* Order update */
231 asm volatile("sync" : : : "memory");
232}
233
234/*
235 * Allocate segment tables for secondary CPUs. These must all go in
236 * the first (bolted) segment, so that do_stab_bolted won't get a
237 * recursive segment miss on the segment table itself.
238 */
239void __init stabs_alloc(void)
240{
241 int cpu;
242
243 if (mmu_has_feature(MMU_FTR_SLB))
244 return;
245
246 for_each_possible_cpu(cpu) {
247 unsigned long newstab;
248
249 if (cpu == 0)
250 continue; /* stab for CPU 0 is statically allocated */
251
252 newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
253 1<<SID_SHIFT);
254 newstab = (unsigned long)__va(newstab);
255
256 memset((void *)newstab, 0, HW_PAGE_SIZE);
257
258 paca[cpu].stab_addr = newstab;
259 paca[cpu].stab_real = __pa(newstab);
260 printk(KERN_INFO "Segment table for CPU %d at 0x%llx "
261 "virtual, 0x%llx absolute\n",
262 cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
263 }
264}
265
266/*
267 * Build an entry for the base kernel segment and put it into
268 * the segment table or SLB. All other segment table or SLB
269 * entries are faulted in.
270 */
271void stab_initialize(unsigned long stab)
272{
273 unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M);
274 unsigned long stabreal;
275
276 asm volatile("isync; slbia; isync":::"memory");
277 make_ste(stab, GET_ESID(PAGE_OFFSET), vsid);
278
279 /* Order update */
280 asm volatile("sync":::"memory");
281
282 /* Set ASR */
283 stabreal = get_paca()->stab_real | 0x1ul;
284
285 mtspr(SPRN_ASR, stabreal);
286}
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c99f6510a0b2..d2a94b85dbc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -30,6 +30,8 @@
30#include <asm/tlb.h> 30#include <asm/tlb.h>
31#include <asm/bug.h> 31#include <asm/bug.h>
32 32
33#include <trace/events/thp.h>
34
33DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
34 36
35/* 37/*
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
213 if (ptep == NULL) 215 if (ptep == NULL)
214 continue; 216 continue;
215 pte = pte_val(*ptep); 217 pte = pte_val(*ptep);
218 if (hugepage_shift)
219 trace_hugepage_invalidate(start, pte_val(pte));
216 if (!(pte & _PAGE_HASHPTE)) 220 if (!(pte & _PAGE_HASHPTE))
217 continue; 221 continue;
218 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) 222 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
219 hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); 223 hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
220 else 224 else
221 hpte_need_flush(mm, start, ptep, pte, 0); 225 hpte_need_flush(mm, start, ptep, pte, 0);
222 } 226 }
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 356e8b41fb09..89bf95bd63b1 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -296,9 +296,12 @@ itlb_miss_fault_bolted:
296 * r14 = page table base 296 * r14 = page table base
297 * r13 = PACA 297 * r13 = PACA
298 * r11 = tlb_per_core ptr 298 * r11 = tlb_per_core ptr
299 * r10 = cpu number 299 * r10 = crap (free to use)
300 */ 300 */
301tlb_miss_common_e6500: 301tlb_miss_common_e6500:
302 crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */
303
304BEGIN_FTR_SECTION /* CPU_FTR_SMT */
302 /* 305 /*
303 * Search if we already have an indirect entry for that virtual 306 * Search if we already have an indirect entry for that virtual
304 * address, and if we do, bail out. 307 * address, and if we do, bail out.
@@ -309,6 +312,7 @@ tlb_miss_common_e6500:
309 lhz r10,PACAPACAINDEX(r13) 312 lhz r10,PACAPACAINDEX(r13)
310 cmpdi r15,0 313 cmpdi r15,0
311 cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ 314 cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */
315 addi r10,r10,1
312 bne 2f 316 bne 2f
313 stbcx. r10,0,r11 317 stbcx. r10,0,r11
314 bne 1b 318 bne 1b
@@ -322,18 +326,62 @@ tlb_miss_common_e6500:
322 b 1b 326 b 1b
323 .previous 327 .previous
324 328
329 /*
330 * Erratum A-008139 says that we can't use tlbwe to change
331 * an indirect entry in any way (including replacing or
332 * invalidating) if the other thread could be in the process
333 * of a lookup. The workaround is to invalidate the entry
334 * with tlbilx before overwriting.
335 */
336
337 lbz r15,TCD_ESEL_NEXT(r11)
338 rlwinm r10,r15,16,0xff0000
339 oris r10,r10,MAS0_TLBSEL(1)@h
340 mtspr SPRN_MAS0,r10
341 isync
342 tlbre
343 mfspr r15,SPRN_MAS1
344 andis. r15,r15,MAS1_VALID@h
345 beq 5f
346
347BEGIN_FTR_SECTION_NESTED(532)
348 mfspr r10,SPRN_MAS8
349 rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */
350 mtspr SPRN_MAS5,r10
351END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
352
353 mfspr r10,SPRN_MAS1
354 rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */
355 rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
356 mfspr r10,SPRN_MAS6
357 mtspr SPRN_MAS6,r15
358
325 mfspr r15,SPRN_MAS2 359 mfspr r15,SPRN_MAS2
360 isync
361 tlbilxva 0,r15
362 isync
363
364 mtspr SPRN_MAS6,r10
365
3665:
367BEGIN_FTR_SECTION_NESTED(532)
368 li r10,0
369 mtspr SPRN_MAS8,r10
370 mtspr SPRN_MAS5,r10
371END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
326 372
327 tlbsx 0,r16 373 tlbsx 0,r16
328 mfspr r10,SPRN_MAS1 374 mfspr r10,SPRN_MAS1
329 andis. r10,r10,MAS1_VALID@h 375 andis. r15,r10,MAS1_VALID@h
330 bne tlb_miss_done_e6500 376 bne tlb_miss_done_e6500
331 377FTR_SECTION_ELSE
332 /* Undo MAS-damage from the tlbsx */
333 mfspr r10,SPRN_MAS1 378 mfspr r10,SPRN_MAS1
379ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
380
334 oris r10,r10,MAS1_VALID@h 381 oris r10,r10,MAS1_VALID@h
335 mtspr SPRN_MAS1,r10 382 beq cr2,4f
336 mtspr SPRN_MAS2,r15 383 rlwinm r10,r10,0,16,1 /* Clear TID */
3844: mtspr SPRN_MAS1,r10
337 385
338 /* Now, we need to walk the page tables. First check if we are in 386 /* Now, we need to walk the page tables. First check if we are in
339 * range. 387 * range.
@@ -394,11 +442,13 @@ tlb_miss_common_e6500:
394 442
395tlb_miss_done_e6500: 443tlb_miss_done_e6500:
396 .macro tlb_unlock_e6500 444 .macro tlb_unlock_e6500
445BEGIN_FTR_SECTION
397 beq cr1,1f /* no unlock if lock was recursively grabbed */ 446 beq cr1,1f /* no unlock if lock was recursively grabbed */
398 li r15,0 447 li r15,0
399 isync 448 isync
400 stb r15,0(r11) 449 stb r15,0(r11)
4011: 4501:
451END_FTR_SECTION_IFSET(CPU_FTR_SMT)
402 .endm 452 .endm
403 453
404 tlb_unlock_e6500 454 tlb_unlock_e6500
@@ -407,12 +457,9 @@ tlb_miss_done_e6500:
407 rfi 457 rfi
408 458
409tlb_miss_kernel_e6500: 459tlb_miss_kernel_e6500:
410 mfspr r10,SPRN_MAS1
411 ld r14,PACA_KERNELPGD(r13) 460 ld r14,PACA_KERNELPGD(r13)
412 cmpldi cr0,r15,8 /* Check for vmalloc region */ 461 cmpldi cr1,r15,8 /* Check for vmalloc region */
413 rlwinm r10,r10,0,16,1 /* Clear TID */ 462 beq+ cr1,tlb_miss_common_e6500
414 mtspr SPRN_MAS1,r10
415 beq+ tlb_miss_common_e6500
416 463
417tlb_miss_fault_e6500: 464tlb_miss_fault_e6500:
418 tlb_unlock_e6500 465 tlb_unlock_e6500
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 92cb18d52ea8..f38ea4df6a85 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void)
581/* 581/*
582 * Early initialization of the MMU TLB code 582 * Early initialization of the MMU TLB code
583 */ 583 */
584static void __early_init_mmu(int boot_cpu) 584static void early_init_this_mmu(void)
585{ 585{
586 unsigned int mas4; 586 unsigned int mas4;
587 587
588 /* XXX This will have to be decided at runtime, but right
589 * now our boot and TLB miss code hard wires it. Ideally
590 * we should find out a suitable page size and patch the
591 * TLB miss code (either that or use the PACA to store
592 * the value we want)
593 */
594 mmu_linear_psize = MMU_PAGE_1G;
595
596 /* XXX This should be decided at runtime based on supported
597 * page sizes in the TLB, but for now let's assume 16M is
598 * always there and a good fit (which it probably is)
599 *
600 * Freescale booke only supports 4K pages in TLB0, so use that.
601 */
602 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
603 mmu_vmemmap_psize = MMU_PAGE_4K;
604 else
605 mmu_vmemmap_psize = MMU_PAGE_16M;
606
607 /* XXX This code only checks for TLB 0 capabilities and doesn't
608 * check what page size combos are supported by the HW. It
609 * also doesn't handle the case where a separate array holds
610 * the IND entries from the array loaded by the PT.
611 */
612 if (boot_cpu) {
613 /* Look for supported page sizes */
614 setup_page_sizes();
615
616 /* Look for HW tablewalk support */
617 setup_mmu_htw();
618 }
619
620 /* Set MAS4 based on page table setting */ 588 /* Set MAS4 based on page table setting */
621 589
622 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 590 mas4 = 0x4 << MAS4_WIMGED_SHIFT;
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu)
650 } 618 }
651 mtspr(SPRN_MAS4, mas4); 619 mtspr(SPRN_MAS4, mas4);
652 620
653 /* Set the global containing the top of the linear mapping
654 * for use by the TLB miss code
655 */
656 linear_map_top = memblock_end_of_DRAM();
657
658#ifdef CONFIG_PPC_FSL_BOOK3E 621#ifdef CONFIG_PPC_FSL_BOOK3E
659 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { 622 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
660 unsigned int num_cams; 623 unsigned int num_cams;
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu)
662 /* use a quarter of the TLBCAM for bolted linear map */ 625 /* use a quarter of the TLBCAM for bolted linear map */
663 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; 626 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
664 linear_map_top = map_mem_in_cams(linear_map_top, num_cams); 627 linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
628 }
629#endif
665 630
666 /* limit memory so we dont have linear faults */ 631 /* A sync won't hurt us after mucking around with
667 memblock_enforce_memory_limit(linear_map_top); 632 * the MMU configuration
633 */
634 mb();
635}
668 636
637static void __init early_init_mmu_global(void)
638{
639 /* XXX This will have to be decided at runtime, but right
640 * now our boot and TLB miss code hard wires it. Ideally
641 * we should find out a suitable page size and patch the
642 * TLB miss code (either that or use the PACA to store
643 * the value we want)
644 */
645 mmu_linear_psize = MMU_PAGE_1G;
646
647 /* XXX This should be decided at runtime based on supported
648 * page sizes in the TLB, but for now let's assume 16M is
649 * always there and a good fit (which it probably is)
650 *
651 * Freescale booke only supports 4K pages in TLB0, so use that.
652 */
653 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
654 mmu_vmemmap_psize = MMU_PAGE_4K;
655 else
656 mmu_vmemmap_psize = MMU_PAGE_16M;
657
658 /* XXX This code only checks for TLB 0 capabilities and doesn't
659 * check what page size combos are supported by the HW. It
660 * also doesn't handle the case where a separate array holds
661 * the IND entries from the array loaded by the PT.
662 */
663 /* Look for supported page sizes */
664 setup_page_sizes();
665
666 /* Look for HW tablewalk support */
667 setup_mmu_htw();
668
669#ifdef CONFIG_PPC_FSL_BOOK3E
670 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
669 if (book3e_htw_mode == PPC_HTW_NONE) { 671 if (book3e_htw_mode == PPC_HTW_NONE) {
670 extlb_level_exc = EX_TLB_SIZE; 672 extlb_level_exc = EX_TLB_SIZE;
671 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); 673 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu)
675 } 677 }
676#endif 678#endif
677 679
678 /* A sync won't hurt us after mucking around with 680 /* Set the global containing the top of the linear mapping
679 * the MMU configuration 681 * for use by the TLB miss code
680 */ 682 */
681 mb(); 683 linear_map_top = memblock_end_of_DRAM();
684}
685
686static void __init early_mmu_set_memory_limit(void)
687{
688#ifdef CONFIG_PPC_FSL_BOOK3E
689 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
690 /*
691 * Limit memory so we dont have linear faults.
692 * Unlike memblock_set_current_limit, which limits
693 * memory available during early boot, this permanently
694 * reduces the memory available to Linux. We need to
695 * do this because highmem is not supported on 64-bit.
696 */
697 memblock_enforce_memory_limit(linear_map_top);
698 }
699#endif
682 700
683 memblock_set_current_limit(linear_map_top); 701 memblock_set_current_limit(linear_map_top);
684} 702}
685 703
704/* boot cpu only */
686void __init early_init_mmu(void) 705void __init early_init_mmu(void)
687{ 706{
688 __early_init_mmu(1); 707 early_init_mmu_global();
708 early_init_this_mmu();
709 early_mmu_set_memory_limit();
689} 710}
690 711
691void early_init_mmu_secondary(void) 712void early_init_mmu_secondary(void)
692{ 713{
693 __early_init_mmu(0); 714 early_init_this_mmu();
694} 715}
695 716
696void setup_initial_memory_limit(phys_addr_t first_memblock_base, 717void setup_initial_memory_limit(phys_addr_t first_memblock_base,