diff options
author | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 08:42:02 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 08:42:02 -0500 |
commit | a02001086bbfb4da35d1228bebc2f1b442db455f (patch) | |
tree | 62ab47936cef06fd08657ca5b6cd1df98c19be57 /arch/powerpc/mm | |
parent | eff264efeeb0898408e8c9df72d8a32621035bed (diff) | |
parent | fc14f9c1272f62c3e8d01300f52467c0d9af50f9 (diff) |
Merge Linus' tree to be be to apply submitted patches to newer code than
current trivial.git base
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 148 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 48 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 186 | ||||
-rw-r--r-- | arch/powerpc/mm/hugepage-hash64.c | 88 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 129 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 71 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 84 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 286 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 69 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 111 |
22 files changed, 780 insertions, 576 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 51230ee6a407..325e861616a1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ | |||
13 | tlb_nohash_low.o | 13 | tlb_nohash_low.o |
14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o | 14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o |
15 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o | 15 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o |
16 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ | 16 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y) |
17 | slb_low.o slb.o stab.o \ | ||
18 | $(hash64-y) | ||
19 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o | 17 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o |
20 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ | 18 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ |
21 | tlb_hash$(CONFIG_WORD_SIZE).o \ | 19 | tlb_hash$(CONFIG_WORD_SIZE).o \ |
@@ -36,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o | |||
36 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o | 34 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o |
37 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o | 35 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o |
38 | obj-$(CONFIG_HIGHMEM) += highmem.o | 36 | obj-$(CONFIG_HIGHMEM) += highmem.o |
37 | obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o | ||
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c new file mode 100644 index 000000000000..5a236f082c78 --- /dev/null +++ b/arch/powerpc/mm/copro_fault.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * CoProcessor (SPU/AFU) mm fault handler | ||
3 | * | ||
4 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 | ||
5 | * | ||
6 | * Author: Arnd Bergmann <arndb@de.ibm.com> | ||
7 | * Author: Jeremy Kerr <jk@ozlabs.org> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | */ | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/export.h> | ||
26 | #include <asm/reg.h> | ||
27 | #include <asm/copro.h> | ||
28 | #include <asm/spu.h> | ||
29 | #include <misc/cxl.h> | ||
30 | |||
31 | /* | ||
32 | * This ought to be kept in sync with the powerpc specific do_page_fault | ||
33 | * function. Currently, there are a few corner cases that we haven't had | ||
34 | * to handle fortunately. | ||
35 | */ | ||
36 | int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, | ||
37 | unsigned long dsisr, unsigned *flt) | ||
38 | { | ||
39 | struct vm_area_struct *vma; | ||
40 | unsigned long is_write; | ||
41 | int ret; | ||
42 | |||
43 | if (mm == NULL) | ||
44 | return -EFAULT; | ||
45 | |||
46 | if (mm->pgd == NULL) | ||
47 | return -EFAULT; | ||
48 | |||
49 | down_read(&mm->mmap_sem); | ||
50 | ret = -EFAULT; | ||
51 | vma = find_vma(mm, ea); | ||
52 | if (!vma) | ||
53 | goto out_unlock; | ||
54 | |||
55 | if (ea < vma->vm_start) { | ||
56 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
57 | goto out_unlock; | ||
58 | if (expand_stack(vma, ea)) | ||
59 | goto out_unlock; | ||
60 | } | ||
61 | |||
62 | is_write = dsisr & DSISR_ISSTORE; | ||
63 | if (is_write) { | ||
64 | if (!(vma->vm_flags & VM_WRITE)) | ||
65 | goto out_unlock; | ||
66 | } else { | ||
67 | if (dsisr & DSISR_PROTFAULT) | ||
68 | goto out_unlock; | ||
69 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | ||
70 | goto out_unlock; | ||
71 | } | ||
72 | |||
73 | ret = 0; | ||
74 | *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0); | ||
75 | if (unlikely(*flt & VM_FAULT_ERROR)) { | ||
76 | if (*flt & VM_FAULT_OOM) { | ||
77 | ret = -ENOMEM; | ||
78 | goto out_unlock; | ||
79 | } else if (*flt & VM_FAULT_SIGBUS) { | ||
80 | ret = -EFAULT; | ||
81 | goto out_unlock; | ||
82 | } | ||
83 | BUG(); | ||
84 | } | ||
85 | |||
86 | if (*flt & VM_FAULT_MAJOR) | ||
87 | current->maj_flt++; | ||
88 | else | ||
89 | current->min_flt++; | ||
90 | |||
91 | out_unlock: | ||
92 | up_read(&mm->mmap_sem); | ||
93 | return ret; | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(copro_handle_mm_fault); | ||
96 | |||
97 | int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) | ||
98 | { | ||
99 | u64 vsid; | ||
100 | int psize, ssize; | ||
101 | |||
102 | switch (REGION_ID(ea)) { | ||
103 | case USER_REGION_ID: | ||
104 | pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); | ||
105 | psize = get_slice_psize(mm, ea); | ||
106 | ssize = user_segment_size(ea); | ||
107 | vsid = get_vsid(mm->context.id, ea, ssize); | ||
108 | break; | ||
109 | case VMALLOC_REGION_ID: | ||
110 | pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); | ||
111 | if (ea < VMALLOC_END) | ||
112 | psize = mmu_vmalloc_psize; | ||
113 | else | ||
114 | psize = mmu_io_psize; | ||
115 | ssize = mmu_kernel_ssize; | ||
116 | vsid = get_kernel_vsid(ea, mmu_kernel_ssize); | ||
117 | break; | ||
118 | case KERNEL_REGION_ID: | ||
119 | pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); | ||
120 | psize = mmu_linear_psize; | ||
121 | ssize = mmu_kernel_ssize; | ||
122 | vsid = get_kernel_vsid(ea, mmu_kernel_ssize); | ||
123 | break; | ||
124 | default: | ||
125 | pr_debug("%s: invalid region access at %016llx\n", __func__, ea); | ||
126 | return 1; | ||
127 | } | ||
128 | |||
129 | vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; | ||
130 | |||
131 | vsid |= mmu_psize_defs[psize].sllp | | ||
132 | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); | ||
133 | |||
134 | slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V; | ||
135 | slb->vsid = vsid; | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(copro_calculate_slb); | ||
140 | |||
141 | void copro_flush_all_slbs(struct mm_struct *mm) | ||
142 | { | ||
143 | #ifdef CONFIG_SPU_BASE | ||
144 | spu_flush_all_slbs(mm); | ||
145 | #endif | ||
146 | cxl_slbia(mm); | ||
147 | } | ||
148 | EXPORT_SYMBOL_GPL(copro_flush_all_slbs); | ||
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 7b6c10750179..d85e86aac7fb 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/export.h> | 33 | #include <linux/export.h> |
34 | 34 | ||
35 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
36 | #include <asm/dma.h> | ||
36 | 37 | ||
37 | #include "mmu_decl.h" | 38 | #include "mmu_decl.h" |
38 | 39 | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..08d659a9fcdb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -30,9 +30,9 @@ | |||
30 | #include <linux/kprobes.h> | 30 | #include <linux/kprobes.h> |
31 | #include <linux/kdebug.h> | 31 | #include <linux/kdebug.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/magic.h> | ||
34 | #include <linux/ratelimit.h> | 33 | #include <linux/ratelimit.h> |
35 | #include <linux/context_tracking.h> | 34 | #include <linux/context_tracking.h> |
35 | #include <linux/hugetlb.h> | ||
36 | 36 | ||
37 | #include <asm/firmware.h> | 37 | #include <asm/firmware.h> |
38 | #include <asm/page.h> | 38 | #include <asm/page.h> |
@@ -114,22 +114,37 @@ static int store_updates_sp(struct pt_regs *regs) | |||
114 | #define MM_FAULT_CONTINUE -1 | 114 | #define MM_FAULT_CONTINUE -1 |
115 | #define MM_FAULT_ERR(sig) (sig) | 115 | #define MM_FAULT_ERR(sig) (sig) |
116 | 116 | ||
117 | static int do_sigbus(struct pt_regs *regs, unsigned long address) | 117 | static int do_sigbus(struct pt_regs *regs, unsigned long address, |
118 | unsigned int fault) | ||
118 | { | 119 | { |
119 | siginfo_t info; | 120 | siginfo_t info; |
121 | unsigned int lsb = 0; | ||
120 | 122 | ||
121 | up_read(¤t->mm->mmap_sem); | 123 | up_read(¤t->mm->mmap_sem); |
122 | 124 | ||
123 | if (user_mode(regs)) { | 125 | if (!user_mode(regs)) |
124 | current->thread.trap_nr = BUS_ADRERR; | 126 | return MM_FAULT_ERR(SIGBUS); |
125 | info.si_signo = SIGBUS; | 127 | |
126 | info.si_errno = 0; | 128 | current->thread.trap_nr = BUS_ADRERR; |
127 | info.si_code = BUS_ADRERR; | 129 | info.si_signo = SIGBUS; |
128 | info.si_addr = (void __user *)address; | 130 | info.si_errno = 0; |
129 | force_sig_info(SIGBUS, &info, current); | 131 | info.si_code = BUS_ADRERR; |
130 | return MM_FAULT_RETURN; | 132 | info.si_addr = (void __user *)address; |
133 | #ifdef CONFIG_MEMORY_FAILURE | ||
134 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { | ||
135 | pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", | ||
136 | current->comm, current->pid, address); | ||
137 | info.si_code = BUS_MCEERR_AR; | ||
131 | } | 138 | } |
132 | return MM_FAULT_ERR(SIGBUS); | 139 | |
140 | if (fault & VM_FAULT_HWPOISON_LARGE) | ||
141 | lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); | ||
142 | if (fault & VM_FAULT_HWPOISON) | ||
143 | lsb = PAGE_SHIFT; | ||
144 | #endif | ||
145 | info.si_addr_lsb = lsb; | ||
146 | force_sig_info(SIGBUS, &info, current); | ||
147 | return MM_FAULT_RETURN; | ||
133 | } | 148 | } |
134 | 149 | ||
135 | static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) | 150 | static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) |
@@ -170,11 +185,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) | |||
170 | return MM_FAULT_RETURN; | 185 | return MM_FAULT_RETURN; |
171 | } | 186 | } |
172 | 187 | ||
173 | /* Bus error. x86 handles HWPOISON here, we'll add this if/when | 188 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) |
174 | * we support the feature in HW | 189 | return do_sigbus(regs, addr, fault); |
175 | */ | ||
176 | if (fault & VM_FAULT_SIGBUS) | ||
177 | return do_sigbus(regs, addr); | ||
178 | 190 | ||
179 | /* We don't understand the fault code, this is fatal */ | 191 | /* We don't understand the fault code, this is fatal */ |
180 | BUG(); | 192 | BUG(); |
@@ -508,7 +520,6 @@ bail: | |||
508 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | 520 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) |
509 | { | 521 | { |
510 | const struct exception_table_entry *entry; | 522 | const struct exception_table_entry *entry; |
511 | unsigned long *stackend; | ||
512 | 523 | ||
513 | /* Are we prepared to handle this fault? */ | 524 | /* Are we prepared to handle this fault? */ |
514 | if ((entry = search_exception_tables(regs->nip)) != NULL) { | 525 | if ((entry = search_exception_tables(regs->nip)) != NULL) { |
@@ -537,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | |||
537 | printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", | 548 | printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", |
538 | regs->nip); | 549 | regs->nip); |
539 | 550 | ||
540 | stackend = end_of_stack(current); | 551 | if (task_stack_end_corrupted(current)) |
541 | if (current != &init_task && *stackend != STACK_END_MAGIC) | ||
542 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | 552 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); |
543 | 553 | ||
544 | die("Kernel access of bad area", regs, sig); | 554 | die("Kernel access of bad area", regs, sig); |
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cf1d325eae8b..ae4962a06476 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <asm/kexec.h> | 29 | #include <asm/kexec.h> |
30 | #include <asm/ppc-opcode.h> | 30 | #include <asm/ppc-opcode.h> |
31 | 31 | ||
32 | #include <misc/cxl.h> | ||
33 | |||
32 | #ifdef DEBUG_LOW | 34 | #ifdef DEBUG_LOW |
33 | #define DBG_LOW(fmt...) udbg_printf(fmt) | 35 | #define DBG_LOW(fmt...) udbg_printf(fmt) |
34 | #else | 36 | #else |
@@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) | |||
149 | static inline void tlbie(unsigned long vpn, int psize, int apsize, | 151 | static inline void tlbie(unsigned long vpn, int psize, int apsize, |
150 | int ssize, int local) | 152 | int ssize, int local) |
151 | { | 153 | { |
152 | unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); | 154 | unsigned int use_local; |
153 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | 155 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
154 | 156 | ||
157 | use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); | ||
158 | |||
155 | if (use_local) | 159 | if (use_local) |
156 | use_local = mmu_psize_defs[psize].tlbiel; | 160 | use_local = mmu_psize_defs[psize].tlbiel; |
157 | if (lock_tlbie && !use_local) | 161 | if (lock_tlbie && !use_local) |
@@ -412,18 +416,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
412 | local_irq_restore(flags); | 416 | local_irq_restore(flags); |
413 | } | 417 | } |
414 | 418 | ||
415 | static void native_hugepage_invalidate(struct mm_struct *mm, | 419 | static void native_hugepage_invalidate(unsigned long vsid, |
420 | unsigned long addr, | ||
416 | unsigned char *hpte_slot_array, | 421 | unsigned char *hpte_slot_array, |
417 | unsigned long addr, int psize) | 422 | int psize, int ssize) |
418 | { | 423 | { |
419 | int ssize = 0, i; | 424 | int i; |
420 | int lock_tlbie; | ||
421 | struct hash_pte *hptep; | 425 | struct hash_pte *hptep; |
422 | int actual_psize = MMU_PAGE_16M; | 426 | int actual_psize = MMU_PAGE_16M; |
423 | unsigned int max_hpte_count, valid; | 427 | unsigned int max_hpte_count, valid; |
424 | unsigned long flags, s_addr = addr; | 428 | unsigned long flags, s_addr = addr; |
425 | unsigned long hpte_v, want_v, shift; | 429 | unsigned long hpte_v, want_v, shift; |
426 | unsigned long hidx, vpn = 0, vsid, hash, slot; | 430 | unsigned long hidx, vpn = 0, hash, slot; |
427 | 431 | ||
428 | shift = mmu_psize_defs[psize].shift; | 432 | shift = mmu_psize_defs[psize].shift; |
429 | max_hpte_count = 1U << (PMD_SHIFT - shift); | 433 | max_hpte_count = 1U << (PMD_SHIFT - shift); |
@@ -437,15 +441,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, | |||
437 | 441 | ||
438 | /* get the vpn */ | 442 | /* get the vpn */ |
439 | addr = s_addr + (i * (1ul << shift)); | 443 | addr = s_addr + (i * (1ul << shift)); |
440 | if (!is_kernel_addr(addr)) { | ||
441 | ssize = user_segment_size(addr); | ||
442 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
443 | WARN_ON(vsid == 0); | ||
444 | } else { | ||
445 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
446 | ssize = mmu_kernel_ssize; | ||
447 | } | ||
448 | |||
449 | vpn = hpt_vpn(addr, vsid, ssize); | 444 | vpn = hpt_vpn(addr, vsid, ssize); |
450 | hash = hpt_hash(vpn, shift, ssize); | 445 | hash = hpt_hash(vpn, shift, ssize); |
451 | if (hidx & _PTEIDX_SECONDARY) | 446 | if (hidx & _PTEIDX_SECONDARY) |
@@ -465,22 +460,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, | |||
465 | else | 460 | else |
466 | /* Invalidate the hpte. NOTE: this also unlocks it */ | 461 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
467 | hptep->v = 0; | 462 | hptep->v = 0; |
463 | /* | ||
464 | * We need to do tlb invalidate for all the address, tlbie | ||
465 | * instruction compares entry_VA in tlb with the VA specified | ||
466 | * here | ||
467 | */ | ||
468 | tlbie(vpn, psize, actual_psize, ssize, 0); | ||
468 | } | 469 | } |
469 | /* | ||
470 | * Since this is a hugepage, we just need a single tlbie. | ||
471 | * use the last vpn. | ||
472 | */ | ||
473 | lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | ||
474 | if (lock_tlbie) | ||
475 | raw_spin_lock(&native_tlbie_lock); | ||
476 | |||
477 | asm volatile("ptesync":::"memory"); | ||
478 | __tlbie(vpn, psize, actual_psize, ssize); | ||
479 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | ||
480 | |||
481 | if (lock_tlbie) | ||
482 | raw_spin_unlock(&native_tlbie_lock); | ||
483 | |||
484 | local_irq_restore(flags); | 470 | local_irq_restore(flags); |
485 | } | 471 | } |
486 | 472 | ||
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88fdd9d25077..d5339a3b9945 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/cputable.h> | 52 | #include <asm/cputable.h> |
53 | #include <asm/sections.h> | 53 | #include <asm/sections.h> |
54 | #include <asm/spu.h> | 54 | #include <asm/copro.h> |
55 | #include <asm/udbg.h> | 55 | #include <asm/udbg.h> |
56 | #include <asm/code-patching.h> | 56 | #include <asm/code-patching.h> |
57 | #include <asm/fadump.h> | 57 | #include <asm/fadump.h> |
@@ -92,12 +92,14 @@ extern unsigned long dart_tablebase; | |||
92 | 92 | ||
93 | static unsigned long _SDR1; | 93 | static unsigned long _SDR1; |
94 | struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; | 94 | struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; |
95 | EXPORT_SYMBOL_GPL(mmu_psize_defs); | ||
95 | 96 | ||
96 | struct hash_pte *htab_address; | 97 | struct hash_pte *htab_address; |
97 | unsigned long htab_size_bytes; | 98 | unsigned long htab_size_bytes; |
98 | unsigned long htab_hash_mask; | 99 | unsigned long htab_hash_mask; |
99 | EXPORT_SYMBOL_GPL(htab_hash_mask); | 100 | EXPORT_SYMBOL_GPL(htab_hash_mask); |
100 | int mmu_linear_psize = MMU_PAGE_4K; | 101 | int mmu_linear_psize = MMU_PAGE_4K; |
102 | EXPORT_SYMBOL_GPL(mmu_linear_psize); | ||
101 | int mmu_virtual_psize = MMU_PAGE_4K; | 103 | int mmu_virtual_psize = MMU_PAGE_4K; |
102 | int mmu_vmalloc_psize = MMU_PAGE_4K; | 104 | int mmu_vmalloc_psize = MMU_PAGE_4K; |
103 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 105 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
@@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K; | |||
105 | #endif | 107 | #endif |
106 | int mmu_io_psize = MMU_PAGE_4K; | 108 | int mmu_io_psize = MMU_PAGE_4K; |
107 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; | 109 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; |
110 | EXPORT_SYMBOL_GPL(mmu_kernel_ssize); | ||
108 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; | 111 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; |
109 | u16 mmu_slb_size = 64; | 112 | u16 mmu_slb_size = 64; |
110 | EXPORT_SYMBOL_GPL(mmu_slb_size); | 113 | EXPORT_SYMBOL_GPL(mmu_slb_size); |
@@ -243,7 +246,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, | |||
243 | } | 246 | } |
244 | 247 | ||
245 | #ifdef CONFIG_MEMORY_HOTPLUG | 248 | #ifdef CONFIG_MEMORY_HOTPLUG |
246 | static int htab_remove_mapping(unsigned long vstart, unsigned long vend, | 249 | int htab_remove_mapping(unsigned long vstart, unsigned long vend, |
247 | int psize, int ssize) | 250 | int psize, int ssize) |
248 | { | 251 | { |
249 | unsigned long vaddr; | 252 | unsigned long vaddr; |
@@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, | |||
333 | return 0; | 336 | return 0; |
334 | 337 | ||
335 | prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); | 338 | prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); |
336 | if (prop != NULL) { | 339 | if (!prop) |
337 | pr_info("Page sizes from device-tree:\n"); | 340 | return 0; |
338 | size /= 4; | 341 | |
339 | cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); | 342 | pr_info("Page sizes from device-tree:\n"); |
340 | while(size > 0) { | 343 | size /= 4; |
341 | unsigned int base_shift = be32_to_cpu(prop[0]); | 344 | cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); |
342 | unsigned int slbenc = be32_to_cpu(prop[1]); | 345 | while(size > 0) { |
343 | unsigned int lpnum = be32_to_cpu(prop[2]); | 346 | unsigned int base_shift = be32_to_cpu(prop[0]); |
344 | struct mmu_psize_def *def; | 347 | unsigned int slbenc = be32_to_cpu(prop[1]); |
345 | int idx, base_idx; | 348 | unsigned int lpnum = be32_to_cpu(prop[2]); |
346 | 349 | struct mmu_psize_def *def; | |
347 | size -= 3; prop += 3; | 350 | int idx, base_idx; |
348 | base_idx = get_idx_from_shift(base_shift); | 351 | |
349 | if (base_idx < 0) { | 352 | size -= 3; prop += 3; |
350 | /* | 353 | base_idx = get_idx_from_shift(base_shift); |
351 | * skip the pte encoding also | 354 | if (base_idx < 0) { |
352 | */ | 355 | /* skip the pte encoding also */ |
353 | prop += lpnum * 2; size -= lpnum * 2; | 356 | prop += lpnum * 2; size -= lpnum * 2; |
357 | continue; | ||
358 | } | ||
359 | def = &mmu_psize_defs[base_idx]; | ||
360 | if (base_idx == MMU_PAGE_16M) | ||
361 | cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; | ||
362 | |||
363 | def->shift = base_shift; | ||
364 | if (base_shift <= 23) | ||
365 | def->avpnm = 0; | ||
366 | else | ||
367 | def->avpnm = (1 << (base_shift - 23)) - 1; | ||
368 | def->sllp = slbenc; | ||
369 | /* | ||
370 | * We don't know for sure what's up with tlbiel, so | ||
371 | * for now we only set it for 4K and 64K pages | ||
372 | */ | ||
373 | if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) | ||
374 | def->tlbiel = 1; | ||
375 | else | ||
376 | def->tlbiel = 0; | ||
377 | |||
378 | while (size > 0 && lpnum) { | ||
379 | unsigned int shift = be32_to_cpu(prop[0]); | ||
380 | int penc = be32_to_cpu(prop[1]); | ||
381 | |||
382 | prop += 2; size -= 2; | ||
383 | lpnum--; | ||
384 | |||
385 | idx = get_idx_from_shift(shift); | ||
386 | if (idx < 0) | ||
354 | continue; | 387 | continue; |
355 | } | 388 | |
356 | def = &mmu_psize_defs[base_idx]; | 389 | if (penc == -1) |
357 | if (base_idx == MMU_PAGE_16M) | 390 | pr_err("Invalid penc for base_shift=%d " |
358 | cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; | 391 | "shift=%d\n", base_shift, shift); |
359 | 392 | ||
360 | def->shift = base_shift; | 393 | def->penc[idx] = penc; |
361 | if (base_shift <= 23) | 394 | pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," |
362 | def->avpnm = 0; | 395 | " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", |
363 | else | 396 | base_shift, shift, def->sllp, |
364 | def->avpnm = (1 << (base_shift - 23)) - 1; | 397 | def->avpnm, def->tlbiel, def->penc[idx]); |
365 | def->sllp = slbenc; | ||
366 | /* | ||
367 | * We don't know for sure what's up with tlbiel, so | ||
368 | * for now we only set it for 4K and 64K pages | ||
369 | */ | ||
370 | if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) | ||
371 | def->tlbiel = 1; | ||
372 | else | ||
373 | def->tlbiel = 0; | ||
374 | |||
375 | while (size > 0 && lpnum) { | ||
376 | unsigned int shift = be32_to_cpu(prop[0]); | ||
377 | int penc = be32_to_cpu(prop[1]); | ||
378 | |||
379 | prop += 2; size -= 2; | ||
380 | lpnum--; | ||
381 | |||
382 | idx = get_idx_from_shift(shift); | ||
383 | if (idx < 0) | ||
384 | continue; | ||
385 | |||
386 | if (penc == -1) | ||
387 | pr_err("Invalid penc for base_shift=%d " | ||
388 | "shift=%d\n", base_shift, shift); | ||
389 | |||
390 | def->penc[idx] = penc; | ||
391 | pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," | ||
392 | " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", | ||
393 | base_shift, shift, def->sllp, | ||
394 | def->avpnm, def->tlbiel, def->penc[idx]); | ||
395 | } | ||
396 | } | 398 | } |
397 | return 1; | ||
398 | } | 399 | } |
399 | return 0; | 400 | |
401 | return 1; | ||
400 | } | 402 | } |
401 | 403 | ||
402 | #ifdef CONFIG_HUGETLB_PAGE | 404 | #ifdef CONFIG_HUGETLB_PAGE |
@@ -821,21 +823,14 @@ static void __init htab_initialize(void) | |||
821 | 823 | ||
822 | void __init early_init_mmu(void) | 824 | void __init early_init_mmu(void) |
823 | { | 825 | { |
824 | /* Setup initial STAB address in the PACA */ | ||
825 | get_paca()->stab_real = __pa((u64)&initial_stab); | ||
826 | get_paca()->stab_addr = (u64)&initial_stab; | ||
827 | |||
828 | /* Initialize the MMU Hash table and create the linear mapping | 826 | /* Initialize the MMU Hash table and create the linear mapping |
829 | * of memory. Has to be done before stab/slb initialization as | 827 | * of memory. Has to be done before SLB initialization as this is |
830 | * this is currently where the page size encoding is obtained | 828 | * currently where the page size encoding is obtained. |
831 | */ | 829 | */ |
832 | htab_initialize(); | 830 | htab_initialize(); |
833 | 831 | ||
834 | /* Initialize stab / SLB management */ | 832 | /* Initialize SLB management */ |
835 | if (mmu_has_feature(MMU_FTR_SLB)) | 833 | slb_initialize(); |
836 | slb_initialize(); | ||
837 | else | ||
838 | stab_initialize(get_paca()->stab_real); | ||
839 | } | 834 | } |
840 | 835 | ||
841 | #ifdef CONFIG_SMP | 836 | #ifdef CONFIG_SMP |
@@ -845,13 +840,8 @@ void early_init_mmu_secondary(void) | |||
845 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 840 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
846 | mtspr(SPRN_SDR1, _SDR1); | 841 | mtspr(SPRN_SDR1, _SDR1); |
847 | 842 | ||
848 | /* Initialize STAB/SLB. We use a virtual address as it works | 843 | /* Initialize SLB */ |
849 | * in real mode on pSeries. | 844 | slb_initialize(); |
850 | */ | ||
851 | if (mmu_has_feature(MMU_FTR_SLB)) | ||
852 | slb_initialize(); | ||
853 | else | ||
854 | stab_initialize(get_paca()->stab_addr); | ||
855 | } | 845 | } |
856 | #endif /* CONFIG_SMP */ | 846 | #endif /* CONFIG_SMP */ |
857 | 847 | ||
@@ -879,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) | |||
879 | } | 869 | } |
880 | 870 | ||
881 | #ifdef CONFIG_PPC_MM_SLICES | 871 | #ifdef CONFIG_PPC_MM_SLICES |
882 | unsigned int get_paca_psize(unsigned long addr) | 872 | static unsigned int get_paca_psize(unsigned long addr) |
883 | { | 873 | { |
884 | u64 lpsizes; | 874 | u64 lpsizes; |
885 | unsigned char *hpsizes; | 875 | unsigned char *hpsizes; |
@@ -913,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) | |||
913 | if (get_slice_psize(mm, addr) == MMU_PAGE_4K) | 903 | if (get_slice_psize(mm, addr) == MMU_PAGE_4K) |
914 | return; | 904 | return; |
915 | slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); | 905 | slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); |
916 | #ifdef CONFIG_SPU_BASE | 906 | copro_flush_all_slbs(mm); |
917 | spu_flush_all_slbs(mm); | 907 | if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { |
918 | #endif | ||
919 | if (get_paca_psize(addr) != MMU_PAGE_4K) { | ||
920 | get_paca()->context = mm->context; | 908 | get_paca()->context = mm->context; |
921 | slb_flush_and_rebolt(); | 909 | slb_flush_and_rebolt(); |
922 | } | 910 | } |
@@ -1001,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, | |||
1001 | * -1 - critical hash insertion error | 989 | * -1 - critical hash insertion error |
1002 | * -2 - access not permitted by subpage protection mechanism | 990 | * -2 - access not permitted by subpage protection mechanism |
1003 | */ | 991 | */ |
1004 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | 992 | int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) |
1005 | { | 993 | { |
1006 | enum ctx_state prev_state = exception_enter(); | 994 | enum ctx_state prev_state = exception_enter(); |
1007 | pgd_t *pgdir; | 995 | pgd_t *pgdir; |
1008 | unsigned long vsid; | 996 | unsigned long vsid; |
1009 | struct mm_struct *mm; | ||
1010 | pte_t *ptep; | 997 | pte_t *ptep; |
1011 | unsigned hugeshift; | 998 | unsigned hugeshift; |
1012 | const struct cpumask *tmp; | 999 | const struct cpumask *tmp; |
@@ -1020,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1020 | switch (REGION_ID(ea)) { | 1007 | switch (REGION_ID(ea)) { |
1021 | case USER_REGION_ID: | 1008 | case USER_REGION_ID: |
1022 | user_region = 1; | 1009 | user_region = 1; |
1023 | mm = current->mm; | ||
1024 | if (! mm) { | 1010 | if (! mm) { |
1025 | DBG_LOW(" user region with no mm !\n"); | 1011 | DBG_LOW(" user region with no mm !\n"); |
1026 | rc = 1; | 1012 | rc = 1; |
@@ -1031,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1031 | vsid = get_vsid(mm->context.id, ea, ssize); | 1017 | vsid = get_vsid(mm->context.id, ea, ssize); |
1032 | break; | 1018 | break; |
1033 | case VMALLOC_REGION_ID: | 1019 | case VMALLOC_REGION_ID: |
1034 | mm = &init_mm; | ||
1035 | vsid = get_kernel_vsid(ea, mmu_kernel_ssize); | 1020 | vsid = get_kernel_vsid(ea, mmu_kernel_ssize); |
1036 | if (ea < VMALLOC_END) | 1021 | if (ea < VMALLOC_END) |
1037 | psize = mmu_vmalloc_psize; | 1022 | psize = mmu_vmalloc_psize; |
@@ -1116,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1116 | WARN_ON(1); | 1101 | WARN_ON(1); |
1117 | } | 1102 | } |
1118 | #endif | 1103 | #endif |
1119 | check_paca_psize(ea, mm, psize, user_region); | 1104 | if (current->mm == mm) |
1105 | check_paca_psize(ea, mm, psize, user_region); | ||
1120 | 1106 | ||
1121 | goto bail; | 1107 | goto bail; |
1122 | } | 1108 | } |
@@ -1153,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1153 | "to 4kB pages because of " | 1139 | "to 4kB pages because of " |
1154 | "non-cacheable mapping\n"); | 1140 | "non-cacheable mapping\n"); |
1155 | psize = mmu_vmalloc_psize = MMU_PAGE_4K; | 1141 | psize = mmu_vmalloc_psize = MMU_PAGE_4K; |
1156 | #ifdef CONFIG_SPU_BASE | 1142 | copro_flush_all_slbs(mm); |
1157 | spu_flush_all_slbs(mm); | ||
1158 | #endif | ||
1159 | } | 1143 | } |
1160 | } | 1144 | } |
1161 | 1145 | ||
1162 | check_paca_psize(ea, mm, psize, user_region); | 1146 | if (current->mm == mm) |
1147 | check_paca_psize(ea, mm, psize, user_region); | ||
1163 | #endif /* CONFIG_PPC_64K_PAGES */ | 1148 | #endif /* CONFIG_PPC_64K_PAGES */ |
1164 | 1149 | ||
1165 | #ifdef CONFIG_PPC_HAS_HASH_64K | 1150 | #ifdef CONFIG_PPC_HAS_HASH_64K |
@@ -1194,6 +1179,17 @@ bail: | |||
1194 | exception_exit(prev_state); | 1179 | exception_exit(prev_state); |
1195 | return rc; | 1180 | return rc; |
1196 | } | 1181 | } |
1182 | EXPORT_SYMBOL_GPL(hash_page_mm); | ||
1183 | |||
1184 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | ||
1185 | { | ||
1186 | struct mm_struct *mm = current->mm; | ||
1187 | |||
1188 | if (REGION_ID(ea) == VMALLOC_REGION_ID) | ||
1189 | mm = &init_mm; | ||
1190 | |||
1191 | return hash_page_mm(mm, ea, access, trap); | ||
1192 | } | ||
1197 | EXPORT_SYMBOL_GPL(hash_page); | 1193 | EXPORT_SYMBOL_GPL(hash_page); |
1198 | 1194 | ||
1199 | void hash_preload(struct mm_struct *mm, unsigned long ea, | 1195 | void hash_preload(struct mm_struct *mm, unsigned long ea, |
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 826893fcb3a7..5f5e6328c21c 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c | |||
@@ -18,6 +18,57 @@ | |||
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <asm/machdep.h> | 19 | #include <asm/machdep.h> |
20 | 20 | ||
21 | static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, | ||
22 | pmd_t *pmdp, unsigned int psize, int ssize) | ||
23 | { | ||
24 | int i, max_hpte_count, valid; | ||
25 | unsigned long s_addr; | ||
26 | unsigned char *hpte_slot_array; | ||
27 | unsigned long hidx, shift, vpn, hash, slot; | ||
28 | |||
29 | s_addr = addr & HPAGE_PMD_MASK; | ||
30 | hpte_slot_array = get_hpte_slot_array(pmdp); | ||
31 | /* | ||
32 | * IF we try to do a HUGE PTE update after a withdraw is done. | ||
33 | * we will find the below NULL. This happens when we do | ||
34 | * split_huge_page_pmd | ||
35 | */ | ||
36 | if (!hpte_slot_array) | ||
37 | return; | ||
38 | |||
39 | if (ppc_md.hugepage_invalidate) | ||
40 | return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, | ||
41 | psize, ssize); | ||
42 | /* | ||
43 | * No bluk hpte removal support, invalidate each entry | ||
44 | */ | ||
45 | shift = mmu_psize_defs[psize].shift; | ||
46 | max_hpte_count = HPAGE_PMD_SIZE >> shift; | ||
47 | for (i = 0; i < max_hpte_count; i++) { | ||
48 | /* | ||
49 | * 8 bits per each hpte entries | ||
50 | * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] | ||
51 | */ | ||
52 | valid = hpte_valid(hpte_slot_array, i); | ||
53 | if (!valid) | ||
54 | continue; | ||
55 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
56 | |||
57 | /* get the vpn */ | ||
58 | addr = s_addr + (i * (1ul << shift)); | ||
59 | vpn = hpt_vpn(addr, vsid, ssize); | ||
60 | hash = hpt_hash(vpn, shift, ssize); | ||
61 | if (hidx & _PTEIDX_SECONDARY) | ||
62 | hash = ~hash; | ||
63 | |||
64 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
65 | slot += hidx & _PTEIDX_GROUP_IX; | ||
66 | ppc_md.hpte_invalidate(slot, vpn, psize, | ||
67 | MMU_PAGE_16M, ssize, 0); | ||
68 | } | ||
69 | } | ||
70 | |||
71 | |||
21 | int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | 72 | int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, |
22 | pmd_t *pmdp, unsigned long trap, int local, int ssize, | 73 | pmd_t *pmdp, unsigned long trap, int local, int ssize, |
23 | unsigned int psize) | 74 | unsigned int psize) |
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | |||
33 | * atomically mark the linux large page PMD busy and dirty | 84 | * atomically mark the linux large page PMD busy and dirty |
34 | */ | 85 | */ |
35 | do { | 86 | do { |
36 | old_pmd = pmd_val(*pmdp); | 87 | pmd_t pmd = ACCESS_ONCE(*pmdp); |
88 | |||
89 | old_pmd = pmd_val(pmd); | ||
37 | /* If PMD busy, retry the access */ | 90 | /* If PMD busy, retry the access */ |
38 | if (unlikely(old_pmd & _PAGE_BUSY)) | 91 | if (unlikely(old_pmd & _PAGE_BUSY)) |
39 | return 0; | 92 | return 0; |
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | |||
85 | vpn = hpt_vpn(ea, vsid, ssize); | 138 | vpn = hpt_vpn(ea, vsid, ssize); |
86 | hash = hpt_hash(vpn, shift, ssize); | 139 | hash = hpt_hash(vpn, shift, ssize); |
87 | hpte_slot_array = get_hpte_slot_array(pmdp); | 140 | hpte_slot_array = get_hpte_slot_array(pmdp); |
141 | if (psize == MMU_PAGE_4K) { | ||
142 | /* | ||
143 | * invalidate the old hpte entry if we have that mapped via 64K | ||
144 | * base page size. This is because demote_segment won't flush | ||
145 | * hash page table entries. | ||
146 | */ | ||
147 | if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) | ||
148 | invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); | ||
149 | } | ||
88 | 150 | ||
89 | valid = hpte_valid(hpte_slot_array, index); | 151 | valid = hpte_valid(hpte_slot_array, index); |
90 | if (valid) { | 152 | if (valid) { |
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | |||
107 | * safely update this here. | 169 | * safely update this here. |
108 | */ | 170 | */ |
109 | valid = 0; | 171 | valid = 0; |
110 | new_pmd &= ~_PAGE_HPTEFLAGS; | ||
111 | hpte_slot_array[index] = 0; | 172 | hpte_slot_array[index] = 0; |
112 | } else | 173 | } |
113 | /* clear the busy bits and set the hash pte bits */ | ||
114 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
115 | } | 174 | } |
116 | 175 | ||
117 | if (!valid) { | 176 | if (!valid) { |
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | |||
119 | 178 | ||
120 | /* insert new entry */ | 179 | /* insert new entry */ |
121 | pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; | 180 | pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; |
122 | repeat: | 181 | new_pmd |= _PAGE_HASHPTE; |
123 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | ||
124 | |||
125 | /* clear the busy bits and set the hash pte bits */ | ||
126 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
127 | 182 | ||
128 | /* Add in WIMG bits */ | 183 | /* Add in WIMG bits */ |
129 | rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | 184 | rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | |
@@ -132,6 +187,8 @@ repeat: | |||
132 | * enable the memory coherence always | 187 | * enable the memory coherence always |
133 | */ | 188 | */ |
134 | rflags |= HPTE_R_M; | 189 | rflags |= HPTE_R_M; |
190 | repeat: | ||
191 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | ||
135 | 192 | ||
136 | /* Insert into the hash table, primary slot */ | 193 | /* Insert into the hash table, primary slot */ |
137 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, | 194 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, |
@@ -172,8 +229,17 @@ repeat: | |||
172 | mark_hpte_slot_valid(hpte_slot_array, index, slot); | 229 | mark_hpte_slot_valid(hpte_slot_array, index, slot); |
173 | } | 230 | } |
174 | /* | 231 | /* |
175 | * No need to use ldarx/stdcx here | 232 | * Mark the pte with _PAGE_COMBO, if we are trying to hash it with |
233 | * base page size 4k. | ||
234 | */ | ||
235 | if (psize == MMU_PAGE_4K) | ||
236 | new_pmd |= _PAGE_COMBO; | ||
237 | /* | ||
238 | * The hpte valid is stored in the pgtable whose address is in the | ||
239 | * second half of the PMD. Order this against clearing of the busy bit in | ||
240 | * huge pmd. | ||
176 | */ | 241 | */ |
242 | smp_wmb(); | ||
177 | *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); | 243 | *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); |
178 | return 0; | 244 | return 0; |
179 | } | 245 | } |
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec23..415a51b028b9 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
@@ -103,14 +103,14 @@ unsigned long __max_low_memory = MAX_LOW_MEM; | |||
103 | /* | 103 | /* |
104 | * Check for command-line options that affect what MMU_init will do. | 104 | * Check for command-line options that affect what MMU_init will do. |
105 | */ | 105 | */ |
106 | void MMU_setup(void) | 106 | void __init MMU_setup(void) |
107 | { | 107 | { |
108 | /* Check for nobats option (used in mapin_ram). */ | 108 | /* Check for nobats option (used in mapin_ram). */ |
109 | if (strstr(cmd_line, "nobats")) { | 109 | if (strstr(boot_command_line, "nobats")) { |
110 | __map_without_bats = 1; | 110 | __map_without_bats = 1; |
111 | } | 111 | } |
112 | 112 | ||
113 | if (strstr(cmd_line, "noltlbs")) { | 113 | if (strstr(boot_command_line, "noltlbs")) { |
114 | __map_without_ltlbs = 1; | 114 | __map_without_ltlbs = 1; |
115 | } | 115 | } |
116 | #ifdef CONFIG_DEBUG_PAGEALLOC | 116 | #ifdef CONFIG_DEBUG_PAGEALLOC |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e3734edffa69..3481556a1880 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page) | |||
175 | static int __meminit vmemmap_populated(unsigned long start, int page_size) | 175 | static int __meminit vmemmap_populated(unsigned long start, int page_size) |
176 | { | 176 | { |
177 | unsigned long end = start + page_size; | 177 | unsigned long end = start + page_size; |
178 | start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); | ||
178 | 179 | ||
179 | for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) | 180 | for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) |
180 | if (pfn_valid(vmemmap_section_start(start))) | 181 | if (pfn_valid(page_to_pfn((struct page *)start))) |
181 | return 1; | 182 | return 1; |
182 | 183 | ||
183 | return 0; | 184 | return 0; |
@@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start, | |||
212 | for (i = 0; i < page_size; i += PAGE_SIZE) | 213 | for (i = 0; i < page_size; i += PAGE_SIZE) |
213 | BUG_ON(map_kernel_page(start + i, phys, flags)); | 214 | BUG_ON(map_kernel_page(start + i, phys, flags)); |
214 | } | 215 | } |
216 | |||
217 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
218 | static void vmemmap_remove_mapping(unsigned long start, | ||
219 | unsigned long page_size) | ||
220 | { | ||
221 | } | ||
222 | #endif | ||
215 | #else /* CONFIG_PPC_BOOK3E */ | 223 | #else /* CONFIG_PPC_BOOK3E */ |
216 | static void __meminit vmemmap_create_mapping(unsigned long start, | 224 | static void __meminit vmemmap_create_mapping(unsigned long start, |
217 | unsigned long page_size, | 225 | unsigned long page_size, |
@@ -223,17 +231,39 @@ static void __meminit vmemmap_create_mapping(unsigned long start, | |||
223 | mmu_kernel_ssize); | 231 | mmu_kernel_ssize); |
224 | BUG_ON(mapped < 0); | 232 | BUG_ON(mapped < 0); |
225 | } | 233 | } |
234 | |||
235 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
236 | static void vmemmap_remove_mapping(unsigned long start, | ||
237 | unsigned long page_size) | ||
238 | { | ||
239 | int mapped = htab_remove_mapping(start, start + page_size, | ||
240 | mmu_vmemmap_psize, | ||
241 | mmu_kernel_ssize); | ||
242 | BUG_ON(mapped < 0); | ||
243 | } | ||
244 | #endif | ||
245 | |||
226 | #endif /* CONFIG_PPC_BOOK3E */ | 246 | #endif /* CONFIG_PPC_BOOK3E */ |
227 | 247 | ||
228 | struct vmemmap_backing *vmemmap_list; | 248 | struct vmemmap_backing *vmemmap_list; |
249 | static struct vmemmap_backing *next; | ||
250 | static int num_left; | ||
251 | static int num_freed; | ||
229 | 252 | ||
230 | static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) | 253 | static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) |
231 | { | 254 | { |
232 | static struct vmemmap_backing *next; | 255 | struct vmemmap_backing *vmem_back; |
233 | static int num_left; | 256 | /* get from freed entries first */ |
257 | if (num_freed) { | ||
258 | num_freed--; | ||
259 | vmem_back = next; | ||
260 | next = next->list; | ||
261 | |||
262 | return vmem_back; | ||
263 | } | ||
234 | 264 | ||
235 | /* allocate a page when required and hand out chunks */ | 265 | /* allocate a page when required and hand out chunks */ |
236 | if (!next || !num_left) { | 266 | if (!num_left) { |
237 | next = vmemmap_alloc_block(PAGE_SIZE, node); | 267 | next = vmemmap_alloc_block(PAGE_SIZE, node); |
238 | if (unlikely(!next)) { | 268 | if (unlikely(!next)) { |
239 | WARN_ON(1); | 269 | WARN_ON(1); |
@@ -296,10 +326,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | |||
296 | return 0; | 326 | return 0; |
297 | } | 327 | } |
298 | 328 | ||
299 | void vmemmap_free(unsigned long start, unsigned long end) | 329 | #ifdef CONFIG_MEMORY_HOTPLUG |
330 | static unsigned long vmemmap_list_free(unsigned long start) | ||
300 | { | 331 | { |
332 | struct vmemmap_backing *vmem_back, *vmem_back_prev; | ||
333 | |||
334 | vmem_back_prev = vmem_back = vmemmap_list; | ||
335 | |||
336 | /* look for it with prev pointer recorded */ | ||
337 | for (; vmem_back; vmem_back = vmem_back->list) { | ||
338 | if (vmem_back->virt_addr == start) | ||
339 | break; | ||
340 | vmem_back_prev = vmem_back; | ||
341 | } | ||
342 | |||
343 | if (unlikely(!vmem_back)) { | ||
344 | WARN_ON(1); | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /* remove it from vmemmap_list */ | ||
349 | if (vmem_back == vmemmap_list) /* remove head */ | ||
350 | vmemmap_list = vmem_back->list; | ||
351 | else | ||
352 | vmem_back_prev->list = vmem_back->list; | ||
353 | |||
354 | /* next point to this freed entry */ | ||
355 | vmem_back->list = next; | ||
356 | next = vmem_back; | ||
357 | num_freed++; | ||
358 | |||
359 | return vmem_back->phys; | ||
301 | } | 360 | } |
302 | 361 | ||
362 | void __ref vmemmap_free(unsigned long start, unsigned long end) | ||
363 | { | ||
364 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; | ||
365 | |||
366 | start = _ALIGN_DOWN(start, page_size); | ||
367 | |||
368 | pr_debug("vmemmap_free %lx...%lx\n", start, end); | ||
369 | |||
370 | for (; start < end; start += page_size) { | ||
371 | unsigned long addr; | ||
372 | |||
373 | /* | ||
374 | * the section has already be marked as invalid, so | ||
375 | * vmemmap_populated() true means some other sections still | ||
376 | * in this page, so skip it. | ||
377 | */ | ||
378 | if (vmemmap_populated(start, page_size)) | ||
379 | continue; | ||
380 | |||
381 | addr = vmemmap_list_free(start); | ||
382 | if (addr) { | ||
383 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); | ||
384 | |||
385 | if (PageReserved(page)) { | ||
386 | /* allocated from bootmem */ | ||
387 | if (page_size < PAGE_SIZE) { | ||
388 | /* | ||
389 | * this shouldn't happen, but if it is | ||
390 | * the case, leave the memory there | ||
391 | */ | ||
392 | WARN_ON_ONCE(1); | ||
393 | } else { | ||
394 | unsigned int nr_pages = | ||
395 | 1 << get_order(page_size); | ||
396 | while (nr_pages--) | ||
397 | free_reserved_page(page++); | ||
398 | } | ||
399 | } else | ||
400 | free_pages((unsigned long)(__va(addr)), | ||
401 | get_order(page_size)); | ||
402 | |||
403 | vmemmap_remove_mapping(start, page_size); | ||
404 | } | ||
405 | } | ||
406 | } | ||
407 | #endif | ||
303 | void register_page_bootmem_memmap(unsigned long section_nr, | 408 | void register_page_bootmem_memmap(unsigned long section_nr, |
304 | struct page *start_page, unsigned long size) | 409 | struct page *start_page, unsigned long size) |
305 | { | 410 | { |
@@ -331,16 +436,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn) | |||
331 | if (pg_va < vmem_back->virt_addr) | 436 | if (pg_va < vmem_back->virt_addr) |
332 | continue; | 437 | continue; |
333 | 438 | ||
334 | /* Check that page struct is not split between real pages */ | 439 | /* After vmemmap_list entry free is possible, need check all */ |
335 | if ((pg_va + sizeof(struct page)) > | 440 | if ((pg_va + sizeof(struct page)) <= |
336 | (vmem_back->virt_addr + page_size)) | 441 | (vmem_back->virt_addr + page_size)) { |
337 | return NULL; | 442 | page = (struct page *) (vmem_back->phys + pg_va - |
338 | |||
339 | page = (struct page *) (vmem_back->phys + pg_va - | ||
340 | vmem_back->virt_addr); | 443 | vmem_back->virt_addr); |
341 | return page; | 444 | return page; |
445 | } | ||
342 | } | 446 | } |
343 | 447 | ||
448 | /* Probably that page struct is split between real pages */ | ||
344 | return NULL; | 449 | return NULL; |
345 | } | 450 | } |
346 | EXPORT_SYMBOL_GPL(realmode_pfn_to_page); | 451 | EXPORT_SYMBOL_GPL(realmode_pfn_to_page); |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2c8e90f5789e..8ebaac75c940 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
128 | return -EINVAL; | 128 | return -EINVAL; |
129 | 129 | ||
130 | /* this should work for most non-highmem platforms */ | 130 | /* this should work for most non-highmem platforms */ |
131 | zone = pgdata->node_zones; | 131 | zone = pgdata->node_zones + |
132 | zone_for_memory(nid, start, size, 0); | ||
132 | 133 | ||
133 | return __add_pages(nid, zone, start_pfn, nr_pages); | 134 | return __add_pages(nid, zone, start_pfn, nr_pages); |
134 | } | 135 | } |
@@ -259,6 +260,60 @@ static int __init mark_nonram_nosave(void) | |||
259 | } | 260 | } |
260 | return 0; | 261 | return 0; |
261 | } | 262 | } |
263 | #else /* CONFIG_NEED_MULTIPLE_NODES */ | ||
264 | static int __init mark_nonram_nosave(void) | ||
265 | { | ||
266 | return 0; | ||
267 | } | ||
268 | #endif | ||
269 | |||
270 | static bool zone_limits_final; | ||
271 | |||
272 | static unsigned long max_zone_pfns[MAX_NR_ZONES] = { | ||
273 | [0 ... MAX_NR_ZONES - 1] = ~0UL | ||
274 | }; | ||
275 | |||
276 | /* | ||
277 | * Restrict the specified zone and all more restrictive zones | ||
278 | * to be below the specified pfn. May not be called after | ||
279 | * paging_init(). | ||
280 | */ | ||
281 | void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) | ||
282 | { | ||
283 | int i; | ||
284 | |||
285 | if (WARN_ON(zone_limits_final)) | ||
286 | return; | ||
287 | |||
288 | for (i = zone; i >= 0; i--) { | ||
289 | if (max_zone_pfns[i] > pfn_limit) | ||
290 | max_zone_pfns[i] = pfn_limit; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Find the least restrictive zone that is entirely below the | ||
296 | * specified pfn limit. Returns < 0 if no suitable zone is found. | ||
297 | * | ||
298 | * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit | ||
299 | * systems -- the DMA limit can be higher than any possible real pfn. | ||
300 | */ | ||
301 | int dma_pfn_limit_to_zone(u64 pfn_limit) | ||
302 | { | ||
303 | enum zone_type top_zone = ZONE_NORMAL; | ||
304 | int i; | ||
305 | |||
306 | #ifdef CONFIG_HIGHMEM | ||
307 | top_zone = ZONE_HIGHMEM; | ||
308 | #endif | ||
309 | |||
310 | for (i = top_zone; i >= 0; i--) { | ||
311 | if (max_zone_pfns[i] <= pfn_limit) | ||
312 | return i; | ||
313 | } | ||
314 | |||
315 | return -EPERM; | ||
316 | } | ||
262 | 317 | ||
263 | /* | 318 | /* |
264 | * paging_init() sets up the page tables - in fact we've already done this. | 319 | * paging_init() sets up the page tables - in fact we've already done this. |
@@ -267,7 +322,7 @@ void __init paging_init(void) | |||
267 | { | 322 | { |
268 | unsigned long long total_ram = memblock_phys_mem_size(); | 323 | unsigned long long total_ram = memblock_phys_mem_size(); |
269 | phys_addr_t top_of_ram = memblock_end_of_DRAM(); | 324 | phys_addr_t top_of_ram = memblock_end_of_DRAM(); |
270 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 325 | enum zone_type top_zone; |
271 | 326 | ||
272 | #ifdef CONFIG_PPC32 | 327 | #ifdef CONFIG_PPC32 |
273 | unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); | 328 | unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); |
@@ -289,18 +344,20 @@ void __init paging_init(void) | |||
289 | (unsigned long long)top_of_ram, total_ram); | 344 | (unsigned long long)top_of_ram, total_ram); |
290 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", | 345 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", |
291 | (long int)((top_of_ram - total_ram) >> 20)); | 346 | (long int)((top_of_ram - total_ram) >> 20)); |
292 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 347 | |
293 | #ifdef CONFIG_HIGHMEM | 348 | #ifdef CONFIG_HIGHMEM |
294 | max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; | 349 | top_zone = ZONE_HIGHMEM; |
295 | max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; | 350 | limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); |
296 | #else | 351 | #else |
297 | max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | 352 | top_zone = ZONE_NORMAL; |
298 | #endif | 353 | #endif |
354 | |||
355 | limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT); | ||
356 | zone_limits_final = true; | ||
299 | free_area_init_nodes(max_zone_pfns); | 357 | free_area_init_nodes(max_zone_pfns); |
300 | 358 | ||
301 | mark_nonram_nosave(); | 359 | mark_nonram_nosave(); |
302 | } | 360 | } |
303 | #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ | ||
304 | 361 | ||
305 | static void __init register_page_bootmem_info(void) | 362 | static void __init register_page_bootmem_info(void) |
306 | { | 363 | { |
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 78fef6726e10..aa5a7fd89461 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * This file contains the routines for handling the MMU on those | 2 | * This file contains the routines for handling the MMU on those |
3 | * PowerPC implementations where the MMU substantially follows the | 3 | * PowerPC implementations where the MMU substantially follows the |
4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, | 4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, |
5 | * 8260, and POWER3 implementations but excludes the 8xx and 4xx. | 5 | * and 8260 implementations but excludes the 8xx and 4xx. |
6 | * -- paulus | 6 | * -- paulus |
7 | * | 7 | * |
8 | * Derived from arch/ppc/mm/init.c: | 8 | * Derived from arch/ppc/mm/init.c: |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3b181b22cd46..b9d1dfdbe5bb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -8,6 +8,8 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | #define pr_fmt(fmt) "numa: " fmt | ||
12 | |||
11 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
12 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
13 | #include <linux/init.h> | 15 | #include <linux/init.h> |
@@ -538,7 +540,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, | |||
538 | */ | 540 | */ |
539 | static int numa_setup_cpu(unsigned long lcpu) | 541 | static int numa_setup_cpu(unsigned long lcpu) |
540 | { | 542 | { |
541 | int nid; | 543 | int nid = -1; |
542 | struct device_node *cpu; | 544 | struct device_node *cpu; |
543 | 545 | ||
544 | /* | 546 | /* |
@@ -555,19 +557,21 @@ static int numa_setup_cpu(unsigned long lcpu) | |||
555 | 557 | ||
556 | if (!cpu) { | 558 | if (!cpu) { |
557 | WARN_ON(1); | 559 | WARN_ON(1); |
558 | nid = 0; | 560 | if (cpu_present(lcpu)) |
559 | goto out; | 561 | goto out_present; |
562 | else | ||
563 | goto out; | ||
560 | } | 564 | } |
561 | 565 | ||
562 | nid = of_node_to_nid_single(cpu); | 566 | nid = of_node_to_nid_single(cpu); |
563 | 567 | ||
568 | out_present: | ||
564 | if (nid < 0 || !node_online(nid)) | 569 | if (nid < 0 || !node_online(nid)) |
565 | nid = first_online_node; | 570 | nid = first_online_node; |
566 | out: | ||
567 | map_cpu_to_node(lcpu, nid); | ||
568 | 571 | ||
572 | map_cpu_to_node(lcpu, nid); | ||
569 | of_node_put(cpu); | 573 | of_node_put(cpu); |
570 | 574 | out: | |
571 | return nid; | 575 | return nid; |
572 | } | 576 | } |
573 | 577 | ||
@@ -611,8 +615,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, | |||
611 | case CPU_UP_CANCELED: | 615 | case CPU_UP_CANCELED: |
612 | case CPU_UP_CANCELED_FROZEN: | 616 | case CPU_UP_CANCELED_FROZEN: |
613 | unmap_cpu_from_node(lcpu); | 617 | unmap_cpu_from_node(lcpu); |
614 | break; | ||
615 | ret = NOTIFY_OK; | 618 | ret = NOTIFY_OK; |
619 | break; | ||
616 | #endif | 620 | #endif |
617 | } | 621 | } |
618 | return ret; | 622 | return ret; |
@@ -1049,7 +1053,7 @@ static void __init mark_reserved_regions_for_nid(int nid) | |||
1049 | 1053 | ||
1050 | void __init do_init_bootmem(void) | 1054 | void __init do_init_bootmem(void) |
1051 | { | 1055 | { |
1052 | int nid; | 1056 | int nid, cpu; |
1053 | 1057 | ||
1054 | min_low_pfn = 0; | 1058 | min_low_pfn = 0; |
1055 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; | 1059 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; |
@@ -1122,16 +1126,14 @@ void __init do_init_bootmem(void) | |||
1122 | 1126 | ||
1123 | reset_numa_cpu_lookup_table(); | 1127 | reset_numa_cpu_lookup_table(); |
1124 | register_cpu_notifier(&ppc64_numa_nb); | 1128 | register_cpu_notifier(&ppc64_numa_nb); |
1125 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, | 1129 | /* |
1126 | (void *)(unsigned long)boot_cpuid); | 1130 | * We need the numa_cpu_lookup_table to be accurate for all CPUs, |
1127 | } | 1131 | * even before we online them, so that we can use cpu_to_{node,mem} |
1128 | 1132 | * early in boot, cf. smp_prepare_cpus(). | |
1129 | void __init paging_init(void) | 1133 | */ |
1130 | { | 1134 | for_each_present_cpu(cpu) { |
1131 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 1135 | numa_setup_cpu((unsigned long)cpu); |
1132 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 1136 | } |
1133 | max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; | ||
1134 | free_area_init_nodes(max_zone_pfns); | ||
1135 | } | 1137 | } |
1136 | 1138 | ||
1137 | static int __init early_numa(char *p) | 1139 | static int __init early_numa(char *p) |
@@ -1153,6 +1155,22 @@ static int __init early_numa(char *p) | |||
1153 | } | 1155 | } |
1154 | early_param("numa", early_numa); | 1156 | early_param("numa", early_numa); |
1155 | 1157 | ||
1158 | static bool topology_updates_enabled = true; | ||
1159 | |||
1160 | static int __init early_topology_updates(char *p) | ||
1161 | { | ||
1162 | if (!p) | ||
1163 | return 0; | ||
1164 | |||
1165 | if (!strcmp(p, "off")) { | ||
1166 | pr_info("Disabling topology updates\n"); | ||
1167 | topology_updates_enabled = false; | ||
1168 | } | ||
1169 | |||
1170 | return 0; | ||
1171 | } | ||
1172 | early_param("topology_updates", early_topology_updates); | ||
1173 | |||
1156 | #ifdef CONFIG_MEMORY_HOTPLUG | 1174 | #ifdef CONFIG_MEMORY_HOTPLUG |
1157 | /* | 1175 | /* |
1158 | * Find the node associated with a hot added memory section for | 1176 | * Find the node associated with a hot added memory section for |
@@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity) | |||
1442 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; | 1460 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
1443 | u64 flags = 1; | 1461 | u64 flags = 1; |
1444 | int hwcpu = get_hard_smp_processor_id(cpu); | 1462 | int hwcpu = get_hard_smp_processor_id(cpu); |
1463 | int i; | ||
1445 | 1464 | ||
1446 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); | 1465 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); |
1466 | for (i = 0; i < 6; i++) | ||
1467 | retbuf[i] = cpu_to_be64(retbuf[i]); | ||
1447 | vphn_unpack_associativity(retbuf, associativity); | 1468 | vphn_unpack_associativity(retbuf, associativity); |
1448 | 1469 | ||
1449 | return rc; | 1470 | return rc; |
@@ -1488,11 +1509,14 @@ static int update_cpu_topology(void *data) | |||
1488 | cpu = smp_processor_id(); | 1509 | cpu = smp_processor_id(); |
1489 | 1510 | ||
1490 | for (update = data; update; update = update->next) { | 1511 | for (update = data; update; update = update->next) { |
1512 | int new_nid = update->new_nid; | ||
1491 | if (cpu != update->cpu) | 1513 | if (cpu != update->cpu) |
1492 | continue; | 1514 | continue; |
1493 | 1515 | ||
1494 | unmap_cpu_from_node(update->cpu); | 1516 | unmap_cpu_from_node(cpu); |
1495 | map_cpu_to_node(update->cpu, update->new_nid); | 1517 | map_cpu_to_node(cpu, new_nid); |
1518 | set_cpu_numa_node(cpu, new_nid); | ||
1519 | set_cpu_numa_mem(cpu, local_memory_node(new_nid)); | ||
1496 | vdso_getcpu_init(); | 1520 | vdso_getcpu_init(); |
1497 | } | 1521 | } |
1498 | 1522 | ||
@@ -1539,6 +1563,9 @@ int arch_update_cpu_topology(void) | |||
1539 | struct device *dev; | 1563 | struct device *dev; |
1540 | int weight, new_nid, i = 0; | 1564 | int weight, new_nid, i = 0; |
1541 | 1565 | ||
1566 | if (!prrn_enabled && !vphn_enabled) | ||
1567 | return 0; | ||
1568 | |||
1542 | weight = cpumask_weight(&cpu_associativity_changes_mask); | 1569 | weight = cpumask_weight(&cpu_associativity_changes_mask); |
1543 | if (!weight) | 1570 | if (!weight) |
1544 | return 0; | 1571 | return 0; |
@@ -1592,6 +1619,15 @@ int arch_update_cpu_topology(void) | |||
1592 | cpu = cpu_last_thread_sibling(cpu); | 1619 | cpu = cpu_last_thread_sibling(cpu); |
1593 | } | 1620 | } |
1594 | 1621 | ||
1622 | pr_debug("Topology update for the following CPUs:\n"); | ||
1623 | if (cpumask_weight(&updated_cpus)) { | ||
1624 | for (ud = &updates[0]; ud; ud = ud->next) { | ||
1625 | pr_debug("cpu %d moving from node %d " | ||
1626 | "to %d\n", ud->cpu, | ||
1627 | ud->old_nid, ud->new_nid); | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1595 | /* | 1631 | /* |
1596 | * In cases where we have nothing to update (because the updates list | 1632 | * In cases where we have nothing to update (because the updates list |
1597 | * is too short or because the new topology is same as the old one), | 1633 | * is too short or because the new topology is same as the old one), |
@@ -1800,8 +1836,12 @@ static const struct file_operations topology_ops = { | |||
1800 | 1836 | ||
1801 | static int topology_update_init(void) | 1837 | static int topology_update_init(void) |
1802 | { | 1838 | { |
1803 | start_topology_update(); | 1839 | /* Do not poll for changes if disabled at boot */ |
1804 | proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops); | 1840 | if (topology_updates_enabled) |
1841 | start_topology_update(); | ||
1842 | |||
1843 | if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) | ||
1844 | return -ENOMEM; | ||
1805 | 1845 | ||
1806 | return 0; | 1846 | return 0; |
1807 | } | 1847 | } |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513c..c90e602677c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) | |||
48 | (_PAGE_PRESENT | _PAGE_USER); | 48 | (_PAGE_PRESENT | _PAGE_USER); |
49 | } | 49 | } |
50 | 50 | ||
51 | struct page * maybe_pte_to_page(pte_t pte) | 51 | static struct page *maybe_pte_to_page(pte_t pte) |
52 | { | 52 | { |
53 | unsigned long pfn = pte_pfn(pte); | 53 | unsigned long pfn = pte_pfn(pte); |
54 | struct page *page; | 54 | struct page *page; |
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 343a87fa78b5..cf11342bf519 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
@@ -41,7 +41,7 @@ unsigned long ioremap_base; | |||
41 | unsigned long ioremap_bot; | 41 | unsigned long ioremap_bot; |
42 | EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ | 42 | EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ |
43 | 43 | ||
44 | #if defined(CONFIG_6xx) || defined(CONFIG_POWER3) | 44 | #ifdef CONFIG_6xx |
45 | #define HAVE_BATS 1 | 45 | #define HAVE_BATS 1 |
46 | #endif | 46 | #endif |
47 | 47 | ||
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f6ce1f111f5b..c8d709ab489d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -54,6 +54,9 @@ | |||
54 | 54 | ||
55 | #include "mmu_decl.h" | 55 | #include "mmu_decl.h" |
56 | 56 | ||
57 | #define CREATE_TRACE_POINTS | ||
58 | #include <trace/events/thp.h> | ||
59 | |||
57 | /* Some sanity checking */ | 60 | /* Some sanity checking */ |
58 | #if TASK_SIZE_USER64 > PGTABLE_RANGE | 61 | #if TASK_SIZE_USER64 > PGTABLE_RANGE |
59 | #error TASK_SIZE_USER64 exceeds pagetable range | 62 | #error TASK_SIZE_USER64 exceeds pagetable range |
@@ -68,7 +71,7 @@ | |||
68 | unsigned long ioremap_bot = IOREMAP_BASE; | 71 | unsigned long ioremap_bot = IOREMAP_BASE; |
69 | 72 | ||
70 | #ifdef CONFIG_PPC_MMU_NOHASH | 73 | #ifdef CONFIG_PPC_MMU_NOHASH |
71 | static void *early_alloc_pgtable(unsigned long size) | 74 | static __ref void *early_alloc_pgtable(unsigned long size) |
72 | { | 75 | { |
73 | void *pt; | 76 | void *pt; |
74 | 77 | ||
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, | |||
537 | old = pmd_val(*pmdp); | 540 | old = pmd_val(*pmdp); |
538 | *pmdp = __pmd((old & ~clr) | set); | 541 | *pmdp = __pmd((old & ~clr) | set); |
539 | #endif | 542 | #endif |
543 | trace_hugepage_update(addr, old, clr, set); | ||
540 | if (old & _PAGE_HASHPTE) | 544 | if (old & _PAGE_HASHPTE) |
541 | hpte_do_hugepage_flush(mm, addr, pmdp); | 545 | hpte_do_hugepage_flush(mm, addr, pmdp, old); |
542 | return old; | 546 | return old; |
543 | } | 547 | } |
544 | 548 | ||
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, | |||
642 | * If we didn't had the splitting flag set, go and flush the | 646 | * If we didn't had the splitting flag set, go and flush the |
643 | * HPTE entries. | 647 | * HPTE entries. |
644 | */ | 648 | */ |
649 | trace_hugepage_splitting(address, old); | ||
645 | if (!(old & _PAGE_SPLITTING)) { | 650 | if (!(old & _PAGE_SPLITTING)) { |
646 | /* We need to flush the hpte */ | 651 | /* We need to flush the hpte */ |
647 | if (old & _PAGE_HASHPTE) | 652 | if (old & _PAGE_HASHPTE) |
648 | hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); | 653 | hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); |
649 | } | 654 | } |
650 | /* | 655 | /* |
651 | * This ensures that generic code that rely on IRQ disabling | 656 | * This ensures that generic code that rely on IRQ disabling |
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, | |||
709 | assert_spin_locked(&mm->page_table_lock); | 714 | assert_spin_locked(&mm->page_table_lock); |
710 | WARN_ON(!pmd_trans_huge(pmd)); | 715 | WARN_ON(!pmd_trans_huge(pmd)); |
711 | #endif | 716 | #endif |
717 | trace_hugepage_set_pmd(addr, pmd); | ||
712 | return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); | 718 | return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); |
713 | } | 719 | } |
714 | 720 | ||
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | |||
723 | * neesd to be flushed. | 729 | * neesd to be flushed. |
724 | */ | 730 | */ |
725 | void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | 731 | void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, |
726 | pmd_t *pmdp) | 732 | pmd_t *pmdp, unsigned long old_pmd) |
727 | { | 733 | { |
728 | int ssize, i; | 734 | int ssize, i; |
729 | unsigned long s_addr; | 735 | unsigned long s_addr; |
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | |||
745 | if (!hpte_slot_array) | 751 | if (!hpte_slot_array) |
746 | return; | 752 | return; |
747 | 753 | ||
748 | /* get the base page size */ | 754 | /* get the base page size,vsid and segment size */ |
755 | #ifdef CONFIG_DEBUG_VM | ||
749 | psize = get_slice_psize(mm, s_addr); | 756 | psize = get_slice_psize(mm, s_addr); |
757 | BUG_ON(psize == MMU_PAGE_16M); | ||
758 | #endif | ||
759 | if (old_pmd & _PAGE_COMBO) | ||
760 | psize = MMU_PAGE_4K; | ||
761 | else | ||
762 | psize = MMU_PAGE_64K; | ||
763 | |||
764 | if (!is_kernel_addr(s_addr)) { | ||
765 | ssize = user_segment_size(s_addr); | ||
766 | vsid = get_vsid(mm->context.id, s_addr, ssize); | ||
767 | WARN_ON(vsid == 0); | ||
768 | } else { | ||
769 | vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); | ||
770 | ssize = mmu_kernel_ssize; | ||
771 | } | ||
750 | 772 | ||
751 | if (ppc_md.hugepage_invalidate) | 773 | if (ppc_md.hugepage_invalidate) |
752 | return ppc_md.hugepage_invalidate(mm, hpte_slot_array, | 774 | return ppc_md.hugepage_invalidate(vsid, s_addr, |
753 | s_addr, psize); | 775 | hpte_slot_array, |
776 | psize, ssize); | ||
754 | /* | 777 | /* |
755 | * No bluk hpte removal support, invalidate each entry | 778 | * No bluk hpte removal support, invalidate each entry |
756 | */ | 779 | */ |
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | |||
768 | 791 | ||
769 | /* get the vpn */ | 792 | /* get the vpn */ |
770 | addr = s_addr + (i * (1ul << shift)); | 793 | addr = s_addr + (i * (1ul << shift)); |
771 | if (!is_kernel_addr(addr)) { | ||
772 | ssize = user_segment_size(addr); | ||
773 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
774 | WARN_ON(vsid == 0); | ||
775 | } else { | ||
776 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
777 | ssize = mmu_kernel_ssize; | ||
778 | } | ||
779 | |||
780 | vpn = hpt_vpn(addr, vsid, ssize); | 794 | vpn = hpt_vpn(addr, vsid, ssize); |
781 | hash = hpt_hash(vpn, shift, ssize); | 795 | hash = hpt_hash(vpn, shift, ssize); |
782 | if (hidx & _PTEIDX_SECONDARY) | 796 | if (hidx & _PTEIDX_SECONDARY) |
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 11571e118831..5029dc19b517 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * This file contains the routines for handling the MMU on those | 2 | * This file contains the routines for handling the MMU on those |
3 | * PowerPC implementations where the MMU substantially follows the | 3 | * PowerPC implementations where the MMU substantially follows the |
4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, | 4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, |
5 | * 8260, and POWER3 implementations but excludes the 8xx and 4xx. | 5 | * and 8260 implementations but excludes the 8xx and 4xx. |
6 | * -- paulus | 6 | * -- paulus |
7 | * | 7 | * |
8 | * Derived from arch/ppc/mm/init.c: | 8 | * Derived from arch/ppc/mm/init.c: |
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0399a6702958..6e450ca66526 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c | |||
@@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, | |||
46 | return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; | 46 | return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; |
47 | } | 47 | } |
48 | 48 | ||
49 | #define slb_vsid_shift(ssize) \ | ||
50 | ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T) | ||
51 | |||
52 | static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, | 49 | static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, |
53 | unsigned long flags) | 50 | unsigned long flags) |
54 | { | 51 | { |
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15efc..ded0ea1afde4 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c | |||
@@ -30,9 +30,11 @@ | |||
30 | #include <linux/err.h> | 30 | #include <linux/err.h> |
31 | #include <linux/spinlock.h> | 31 | #include <linux/spinlock.h> |
32 | #include <linux/export.h> | 32 | #include <linux/export.h> |
33 | #include <linux/hugetlb.h> | ||
33 | #include <asm/mman.h> | 34 | #include <asm/mman.h> |
34 | #include <asm/mmu.h> | 35 | #include <asm/mmu.h> |
35 | #include <asm/spu.h> | 36 | #include <asm/copro.h> |
37 | #include <asm/hugetlb.h> | ||
36 | 38 | ||
37 | /* some sanity checks */ | 39 | /* some sanity checks */ |
38 | #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE | 40 | #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE |
@@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz | |||
232 | 234 | ||
233 | spin_unlock_irqrestore(&slice_convert_lock, flags); | 235 | spin_unlock_irqrestore(&slice_convert_lock, flags); |
234 | 236 | ||
235 | #ifdef CONFIG_SPU_BASE | 237 | copro_flush_all_slbs(mm); |
236 | spu_flush_all_slbs(mm); | ||
237 | #endif | ||
238 | } | 238 | } |
239 | 239 | ||
240 | /* | 240 | /* |
@@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address, | |||
671 | 671 | ||
672 | spin_unlock_irqrestore(&slice_convert_lock, flags); | 672 | spin_unlock_irqrestore(&slice_convert_lock, flags); |
673 | 673 | ||
674 | #ifdef CONFIG_SPU_BASE | 674 | copro_flush_all_slbs(mm); |
675 | spu_flush_all_slbs(mm); | ||
676 | #endif | ||
677 | } | 675 | } |
678 | 676 | ||
679 | void slice_set_range_psize(struct mm_struct *mm, unsigned long start, | 677 | void slice_set_range_psize(struct mm_struct *mm, unsigned long start, |
@@ -684,6 +682,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, | |||
684 | slice_convert(mm, mask, psize); | 682 | slice_convert(mm, mask, psize); |
685 | } | 683 | } |
686 | 684 | ||
685 | #ifdef CONFIG_HUGETLB_PAGE | ||
687 | /* | 686 | /* |
688 | * is_hugepage_only_range() is used by generic code to verify whether | 687 | * is_hugepage_only_range() is used by generic code to verify whether |
689 | * a normal mmap mapping (non hugetlbfs) is valid on a given area. | 688 | * a normal mmap mapping (non hugetlbfs) is valid on a given area. |
@@ -728,4 +727,4 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, | |||
728 | #endif | 727 | #endif |
729 | return !slice_check_fit(mask, available); | 728 | return !slice_check_fit(mask, available); |
730 | } | 729 | } |
731 | 730 | #endif | |
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c deleted file mode 100644 index 3f8efa6f2997..000000000000 --- a/arch/powerpc/mm/stab.c +++ /dev/null | |||
@@ -1,286 +0,0 @@ | |||
1 | /* | ||
2 | * PowerPC64 Segment Translation Support. | ||
3 | * | ||
4 | * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com | ||
5 | * Copyright (c) 2001 Dave Engebretsen | ||
6 | * | ||
7 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/memblock.h> | ||
16 | |||
17 | #include <asm/pgtable.h> | ||
18 | #include <asm/mmu.h> | ||
19 | #include <asm/mmu_context.h> | ||
20 | #include <asm/paca.h> | ||
21 | #include <asm/cputable.h> | ||
22 | #include <asm/prom.h> | ||
23 | |||
24 | struct stab_entry { | ||
25 | unsigned long esid_data; | ||
26 | unsigned long vsid_data; | ||
27 | }; | ||
28 | |||
29 | #define NR_STAB_CACHE_ENTRIES 8 | ||
30 | static DEFINE_PER_CPU(long, stab_cache_ptr); | ||
31 | static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); | ||
32 | |||
33 | /* | ||
34 | * Create a segment table entry for the given esid/vsid pair. | ||
35 | */ | ||
36 | static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) | ||
37 | { | ||
38 | unsigned long esid_data, vsid_data; | ||
39 | unsigned long entry, group, old_esid, castout_entry, i; | ||
40 | unsigned int global_entry; | ||
41 | struct stab_entry *ste, *castout_ste; | ||
42 | unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; | ||
43 | |||
44 | vsid_data = vsid << STE_VSID_SHIFT; | ||
45 | esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; | ||
46 | if (! kernel_segment) | ||
47 | esid_data |= STE_ESID_KS; | ||
48 | |||
49 | /* Search the primary group first. */ | ||
50 | global_entry = (esid & 0x1f) << 3; | ||
51 | ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); | ||
52 | |||
53 | /* Find an empty entry, if one exists. */ | ||
54 | for (group = 0; group < 2; group++) { | ||
55 | for (entry = 0; entry < 8; entry++, ste++) { | ||
56 | if (!(ste->esid_data & STE_ESID_V)) { | ||
57 | ste->vsid_data = vsid_data; | ||
58 | eieio(); | ||
59 | ste->esid_data = esid_data; | ||
60 | return (global_entry | entry); | ||
61 | } | ||
62 | } | ||
63 | /* Now search the secondary group. */ | ||
64 | global_entry = ((~esid) & 0x1f) << 3; | ||
65 | ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Could not find empty entry, pick one with a round robin selection. | ||
70 | * Search all entries in the two groups. | ||
71 | */ | ||
72 | castout_entry = get_paca()->stab_rr; | ||
73 | for (i = 0; i < 16; i++) { | ||
74 | if (castout_entry < 8) { | ||
75 | global_entry = (esid & 0x1f) << 3; | ||
76 | ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); | ||
77 | castout_ste = ste + castout_entry; | ||
78 | } else { | ||
79 | global_entry = ((~esid) & 0x1f) << 3; | ||
80 | ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); | ||
81 | castout_ste = ste + (castout_entry - 8); | ||
82 | } | ||
83 | |||
84 | /* Dont cast out the first kernel segment */ | ||
85 | if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) | ||
86 | break; | ||
87 | |||
88 | castout_entry = (castout_entry + 1) & 0xf; | ||
89 | } | ||
90 | |||
91 | get_paca()->stab_rr = (castout_entry + 1) & 0xf; | ||
92 | |||
93 | /* Modify the old entry to the new value. */ | ||
94 | |||
95 | /* Force previous translations to complete. DRENG */ | ||
96 | asm volatile("isync" : : : "memory"); | ||
97 | |||
98 | old_esid = castout_ste->esid_data >> SID_SHIFT; | ||
99 | castout_ste->esid_data = 0; /* Invalidate old entry */ | ||
100 | |||
101 | asm volatile("sync" : : : "memory"); /* Order update */ | ||
102 | |||
103 | castout_ste->vsid_data = vsid_data; | ||
104 | eieio(); /* Order update */ | ||
105 | castout_ste->esid_data = esid_data; | ||
106 | |||
107 | asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); | ||
108 | /* Ensure completion of slbie */ | ||
109 | asm volatile("sync" : : : "memory"); | ||
110 | |||
111 | return (global_entry | (castout_entry & 0x7)); | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Allocate a segment table entry for the given ea and mm | ||
116 | */ | ||
117 | static int __ste_allocate(unsigned long ea, struct mm_struct *mm) | ||
118 | { | ||
119 | unsigned long vsid; | ||
120 | unsigned char stab_entry; | ||
121 | unsigned long offset; | ||
122 | |||
123 | /* Kernel or user address? */ | ||
124 | if (is_kernel_addr(ea)) { | ||
125 | vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); | ||
126 | } else { | ||
127 | if ((ea >= TASK_SIZE_USER64) || (! mm)) | ||
128 | return 1; | ||
129 | |||
130 | vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M); | ||
131 | } | ||
132 | |||
133 | stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); | ||
134 | |||
135 | if (!is_kernel_addr(ea)) { | ||
136 | offset = __get_cpu_var(stab_cache_ptr); | ||
137 | if (offset < NR_STAB_CACHE_ENTRIES) | ||
138 | __get_cpu_var(stab_cache[offset++]) = stab_entry; | ||
139 | else | ||
140 | offset = NR_STAB_CACHE_ENTRIES+1; | ||
141 | __get_cpu_var(stab_cache_ptr) = offset; | ||
142 | |||
143 | /* Order update */ | ||
144 | asm volatile("sync":::"memory"); | ||
145 | } | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | int ste_allocate(unsigned long ea) | ||
151 | { | ||
152 | return __ste_allocate(ea, current->mm); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Do the segment table work for a context switch: flush all user | ||
157 | * entries from the table, then preload some probably useful entries | ||
158 | * for the new task | ||
159 | */ | ||
160 | void switch_stab(struct task_struct *tsk, struct mm_struct *mm) | ||
161 | { | ||
162 | struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; | ||
163 | struct stab_entry *ste; | ||
164 | unsigned long offset; | ||
165 | unsigned long pc = KSTK_EIP(tsk); | ||
166 | unsigned long stack = KSTK_ESP(tsk); | ||
167 | unsigned long unmapped_base; | ||
168 | |||
169 | /* Force previous translations to complete. DRENG */ | ||
170 | asm volatile("isync" : : : "memory"); | ||
171 | |||
172 | /* | ||
173 | * We need interrupts hard-disabled here, not just soft-disabled, | ||
174 | * so that a PMU interrupt can't occur, which might try to access | ||
175 | * user memory (to get a stack trace) and possible cause an STAB miss | ||
176 | * which would update the stab_cache/stab_cache_ptr per-cpu variables. | ||
177 | */ | ||
178 | hard_irq_disable(); | ||
179 | |||
180 | offset = __get_cpu_var(stab_cache_ptr); | ||
181 | if (offset <= NR_STAB_CACHE_ENTRIES) { | ||
182 | int i; | ||
183 | |||
184 | for (i = 0; i < offset; i++) { | ||
185 | ste = stab + __get_cpu_var(stab_cache[i]); | ||
186 | ste->esid_data = 0; /* invalidate entry */ | ||
187 | } | ||
188 | } else { | ||
189 | unsigned long entry; | ||
190 | |||
191 | /* Invalidate all entries. */ | ||
192 | ste = stab; | ||
193 | |||
194 | /* Never flush the first entry. */ | ||
195 | ste += 1; | ||
196 | for (entry = 1; | ||
197 | entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); | ||
198 | entry++, ste++) { | ||
199 | unsigned long ea; | ||
200 | ea = ste->esid_data & ESID_MASK; | ||
201 | if (!is_kernel_addr(ea)) { | ||
202 | ste->esid_data = 0; | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
207 | asm volatile("sync; slbia; sync":::"memory"); | ||
208 | |||
209 | __get_cpu_var(stab_cache_ptr) = 0; | ||
210 | |||
211 | /* Now preload some entries for the new task */ | ||
212 | if (test_tsk_thread_flag(tsk, TIF_32BIT)) | ||
213 | unmapped_base = TASK_UNMAPPED_BASE_USER32; | ||
214 | else | ||
215 | unmapped_base = TASK_UNMAPPED_BASE_USER64; | ||
216 | |||
217 | __ste_allocate(pc, mm); | ||
218 | |||
219 | if (GET_ESID(pc) == GET_ESID(stack)) | ||
220 | return; | ||
221 | |||
222 | __ste_allocate(stack, mm); | ||
223 | |||
224 | if ((GET_ESID(pc) == GET_ESID(unmapped_base)) | ||
225 | || (GET_ESID(stack) == GET_ESID(unmapped_base))) | ||
226 | return; | ||
227 | |||
228 | __ste_allocate(unmapped_base, mm); | ||
229 | |||
230 | /* Order update */ | ||
231 | asm volatile("sync" : : : "memory"); | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * Allocate segment tables for secondary CPUs. These must all go in | ||
236 | * the first (bolted) segment, so that do_stab_bolted won't get a | ||
237 | * recursive segment miss on the segment table itself. | ||
238 | */ | ||
239 | void __init stabs_alloc(void) | ||
240 | { | ||
241 | int cpu; | ||
242 | |||
243 | if (mmu_has_feature(MMU_FTR_SLB)) | ||
244 | return; | ||
245 | |||
246 | for_each_possible_cpu(cpu) { | ||
247 | unsigned long newstab; | ||
248 | |||
249 | if (cpu == 0) | ||
250 | continue; /* stab for CPU 0 is statically allocated */ | ||
251 | |||
252 | newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, | ||
253 | 1<<SID_SHIFT); | ||
254 | newstab = (unsigned long)__va(newstab); | ||
255 | |||
256 | memset((void *)newstab, 0, HW_PAGE_SIZE); | ||
257 | |||
258 | paca[cpu].stab_addr = newstab; | ||
259 | paca[cpu].stab_real = __pa(newstab); | ||
260 | printk(KERN_INFO "Segment table for CPU %d at 0x%llx " | ||
261 | "virtual, 0x%llx absolute\n", | ||
262 | cpu, paca[cpu].stab_addr, paca[cpu].stab_real); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Build an entry for the base kernel segment and put it into | ||
268 | * the segment table or SLB. All other segment table or SLB | ||
269 | * entries are faulted in. | ||
270 | */ | ||
271 | void stab_initialize(unsigned long stab) | ||
272 | { | ||
273 | unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M); | ||
274 | unsigned long stabreal; | ||
275 | |||
276 | asm volatile("isync; slbia; isync":::"memory"); | ||
277 | make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); | ||
278 | |||
279 | /* Order update */ | ||
280 | asm volatile("sync":::"memory"); | ||
281 | |||
282 | /* Set ASR */ | ||
283 | stabreal = get_paca()->stab_real | 0x1ul; | ||
284 | |||
285 | mtspr(SPRN_ASR, stabreal); | ||
286 | } | ||
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c99f6510a0b2..d2a94b85dbc2 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/tlb.h> | 30 | #include <asm/tlb.h> |
31 | #include <asm/bug.h> | 31 | #include <asm/bug.h> |
32 | 32 | ||
33 | #include <trace/events/thp.h> | ||
34 | |||
33 | DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); | 35 | DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); |
34 | 36 | ||
35 | /* | 37 | /* |
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |||
213 | if (ptep == NULL) | 215 | if (ptep == NULL) |
214 | continue; | 216 | continue; |
215 | pte = pte_val(*ptep); | 217 | pte = pte_val(*ptep); |
218 | if (hugepage_shift) | ||
219 | trace_hugepage_invalidate(start, pte_val(pte)); | ||
216 | if (!(pte & _PAGE_HASHPTE)) | 220 | if (!(pte & _PAGE_HASHPTE)) |
217 | continue; | 221 | continue; |
218 | if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) | 222 | if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) |
219 | hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); | 223 | hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); |
220 | else | 224 | else |
221 | hpte_need_flush(mm, start, ptep, pte, 0); | 225 | hpte_need_flush(mm, start, ptep, pte, 0); |
222 | } | 226 | } |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 356e8b41fb09..89bf95bd63b1 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
@@ -296,9 +296,12 @@ itlb_miss_fault_bolted: | |||
296 | * r14 = page table base | 296 | * r14 = page table base |
297 | * r13 = PACA | 297 | * r13 = PACA |
298 | * r11 = tlb_per_core ptr | 298 | * r11 = tlb_per_core ptr |
299 | * r10 = cpu number | 299 | * r10 = crap (free to use) |
300 | */ | 300 | */ |
301 | tlb_miss_common_e6500: | 301 | tlb_miss_common_e6500: |
302 | crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ | ||
303 | |||
304 | BEGIN_FTR_SECTION /* CPU_FTR_SMT */ | ||
302 | /* | 305 | /* |
303 | * Search if we already have an indirect entry for that virtual | 306 | * Search if we already have an indirect entry for that virtual |
304 | * address, and if we do, bail out. | 307 | * address, and if we do, bail out. |
@@ -309,6 +312,7 @@ tlb_miss_common_e6500: | |||
309 | lhz r10,PACAPACAINDEX(r13) | 312 | lhz r10,PACAPACAINDEX(r13) |
310 | cmpdi r15,0 | 313 | cmpdi r15,0 |
311 | cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ | 314 | cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ |
315 | addi r10,r10,1 | ||
312 | bne 2f | 316 | bne 2f |
313 | stbcx. r10,0,r11 | 317 | stbcx. r10,0,r11 |
314 | bne 1b | 318 | bne 1b |
@@ -322,18 +326,62 @@ tlb_miss_common_e6500: | |||
322 | b 1b | 326 | b 1b |
323 | .previous | 327 | .previous |
324 | 328 | ||
329 | /* | ||
330 | * Erratum A-008139 says that we can't use tlbwe to change | ||
331 | * an indirect entry in any way (including replacing or | ||
332 | * invalidating) if the other thread could be in the process | ||
333 | * of a lookup. The workaround is to invalidate the entry | ||
334 | * with tlbilx before overwriting. | ||
335 | */ | ||
336 | |||
337 | lbz r15,TCD_ESEL_NEXT(r11) | ||
338 | rlwinm r10,r15,16,0xff0000 | ||
339 | oris r10,r10,MAS0_TLBSEL(1)@h | ||
340 | mtspr SPRN_MAS0,r10 | ||
341 | isync | ||
342 | tlbre | ||
343 | mfspr r15,SPRN_MAS1 | ||
344 | andis. r15,r15,MAS1_VALID@h | ||
345 | beq 5f | ||
346 | |||
347 | BEGIN_FTR_SECTION_NESTED(532) | ||
348 | mfspr r10,SPRN_MAS8 | ||
349 | rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */ | ||
350 | mtspr SPRN_MAS5,r10 | ||
351 | END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) | ||
352 | |||
353 | mfspr r10,SPRN_MAS1 | ||
354 | rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */ | ||
355 | rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */ | ||
356 | mfspr r10,SPRN_MAS6 | ||
357 | mtspr SPRN_MAS6,r15 | ||
358 | |||
325 | mfspr r15,SPRN_MAS2 | 359 | mfspr r15,SPRN_MAS2 |
360 | isync | ||
361 | tlbilxva 0,r15 | ||
362 | isync | ||
363 | |||
364 | mtspr SPRN_MAS6,r10 | ||
365 | |||
366 | 5: | ||
367 | BEGIN_FTR_SECTION_NESTED(532) | ||
368 | li r10,0 | ||
369 | mtspr SPRN_MAS8,r10 | ||
370 | mtspr SPRN_MAS5,r10 | ||
371 | END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) | ||
326 | 372 | ||
327 | tlbsx 0,r16 | 373 | tlbsx 0,r16 |
328 | mfspr r10,SPRN_MAS1 | 374 | mfspr r10,SPRN_MAS1 |
329 | andis. r10,r10,MAS1_VALID@h | 375 | andis. r15,r10,MAS1_VALID@h |
330 | bne tlb_miss_done_e6500 | 376 | bne tlb_miss_done_e6500 |
331 | 377 | FTR_SECTION_ELSE | |
332 | /* Undo MAS-damage from the tlbsx */ | ||
333 | mfspr r10,SPRN_MAS1 | 378 | mfspr r10,SPRN_MAS1 |
379 | ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) | ||
380 | |||
334 | oris r10,r10,MAS1_VALID@h | 381 | oris r10,r10,MAS1_VALID@h |
335 | mtspr SPRN_MAS1,r10 | 382 | beq cr2,4f |
336 | mtspr SPRN_MAS2,r15 | 383 | rlwinm r10,r10,0,16,1 /* Clear TID */ |
384 | 4: mtspr SPRN_MAS1,r10 | ||
337 | 385 | ||
338 | /* Now, we need to walk the page tables. First check if we are in | 386 | /* Now, we need to walk the page tables. First check if we are in |
339 | * range. | 387 | * range. |
@@ -394,11 +442,13 @@ tlb_miss_common_e6500: | |||
394 | 442 | ||
395 | tlb_miss_done_e6500: | 443 | tlb_miss_done_e6500: |
396 | .macro tlb_unlock_e6500 | 444 | .macro tlb_unlock_e6500 |
445 | BEGIN_FTR_SECTION | ||
397 | beq cr1,1f /* no unlock if lock was recursively grabbed */ | 446 | beq cr1,1f /* no unlock if lock was recursively grabbed */ |
398 | li r15,0 | 447 | li r15,0 |
399 | isync | 448 | isync |
400 | stb r15,0(r11) | 449 | stb r15,0(r11) |
401 | 1: | 450 | 1: |
451 | END_FTR_SECTION_IFSET(CPU_FTR_SMT) | ||
402 | .endm | 452 | .endm |
403 | 453 | ||
404 | tlb_unlock_e6500 | 454 | tlb_unlock_e6500 |
@@ -407,12 +457,9 @@ tlb_miss_done_e6500: | |||
407 | rfi | 457 | rfi |
408 | 458 | ||
409 | tlb_miss_kernel_e6500: | 459 | tlb_miss_kernel_e6500: |
410 | mfspr r10,SPRN_MAS1 | ||
411 | ld r14,PACA_KERNELPGD(r13) | 460 | ld r14,PACA_KERNELPGD(r13) |
412 | cmpldi cr0,r15,8 /* Check for vmalloc region */ | 461 | cmpldi cr1,r15,8 /* Check for vmalloc region */ |
413 | rlwinm r10,r10,0,16,1 /* Clear TID */ | 462 | beq+ cr1,tlb_miss_common_e6500 |
414 | mtspr SPRN_MAS1,r10 | ||
415 | beq+ tlb_miss_common_e6500 | ||
416 | 463 | ||
417 | tlb_miss_fault_e6500: | 464 | tlb_miss_fault_e6500: |
418 | tlb_unlock_e6500 | 465 | tlb_unlock_e6500 |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 92cb18d52ea8..f38ea4df6a85 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void) | |||
581 | /* | 581 | /* |
582 | * Early initialization of the MMU TLB code | 582 | * Early initialization of the MMU TLB code |
583 | */ | 583 | */ |
584 | static void __early_init_mmu(int boot_cpu) | 584 | static void early_init_this_mmu(void) |
585 | { | 585 | { |
586 | unsigned int mas4; | 586 | unsigned int mas4; |
587 | 587 | ||
588 | /* XXX This will have to be decided at runtime, but right | ||
589 | * now our boot and TLB miss code hard wires it. Ideally | ||
590 | * we should find out a suitable page size and patch the | ||
591 | * TLB miss code (either that or use the PACA to store | ||
592 | * the value we want) | ||
593 | */ | ||
594 | mmu_linear_psize = MMU_PAGE_1G; | ||
595 | |||
596 | /* XXX This should be decided at runtime based on supported | ||
597 | * page sizes in the TLB, but for now let's assume 16M is | ||
598 | * always there and a good fit (which it probably is) | ||
599 | * | ||
600 | * Freescale booke only supports 4K pages in TLB0, so use that. | ||
601 | */ | ||
602 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) | ||
603 | mmu_vmemmap_psize = MMU_PAGE_4K; | ||
604 | else | ||
605 | mmu_vmemmap_psize = MMU_PAGE_16M; | ||
606 | |||
607 | /* XXX This code only checks for TLB 0 capabilities and doesn't | ||
608 | * check what page size combos are supported by the HW. It | ||
609 | * also doesn't handle the case where a separate array holds | ||
610 | * the IND entries from the array loaded by the PT. | ||
611 | */ | ||
612 | if (boot_cpu) { | ||
613 | /* Look for supported page sizes */ | ||
614 | setup_page_sizes(); | ||
615 | |||
616 | /* Look for HW tablewalk support */ | ||
617 | setup_mmu_htw(); | ||
618 | } | ||
619 | |||
620 | /* Set MAS4 based on page table setting */ | 588 | /* Set MAS4 based on page table setting */ |
621 | 589 | ||
622 | mas4 = 0x4 << MAS4_WIMGED_SHIFT; | 590 | mas4 = 0x4 << MAS4_WIMGED_SHIFT; |
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu) | |||
650 | } | 618 | } |
651 | mtspr(SPRN_MAS4, mas4); | 619 | mtspr(SPRN_MAS4, mas4); |
652 | 620 | ||
653 | /* Set the global containing the top of the linear mapping | ||
654 | * for use by the TLB miss code | ||
655 | */ | ||
656 | linear_map_top = memblock_end_of_DRAM(); | ||
657 | |||
658 | #ifdef CONFIG_PPC_FSL_BOOK3E | 621 | #ifdef CONFIG_PPC_FSL_BOOK3E |
659 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { | 622 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { |
660 | unsigned int num_cams; | 623 | unsigned int num_cams; |
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu) | |||
662 | /* use a quarter of the TLBCAM for bolted linear map */ | 625 | /* use a quarter of the TLBCAM for bolted linear map */ |
663 | num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; | 626 | num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; |
664 | linear_map_top = map_mem_in_cams(linear_map_top, num_cams); | 627 | linear_map_top = map_mem_in_cams(linear_map_top, num_cams); |
628 | } | ||
629 | #endif | ||
665 | 630 | ||
666 | /* limit memory so we dont have linear faults */ | 631 | /* A sync won't hurt us after mucking around with |
667 | memblock_enforce_memory_limit(linear_map_top); | 632 | * the MMU configuration |
633 | */ | ||
634 | mb(); | ||
635 | } | ||
668 | 636 | ||
637 | static void __init early_init_mmu_global(void) | ||
638 | { | ||
639 | /* XXX This will have to be decided at runtime, but right | ||
640 | * now our boot and TLB miss code hard wires it. Ideally | ||
641 | * we should find out a suitable page size and patch the | ||
642 | * TLB miss code (either that or use the PACA to store | ||
643 | * the value we want) | ||
644 | */ | ||
645 | mmu_linear_psize = MMU_PAGE_1G; | ||
646 | |||
647 | /* XXX This should be decided at runtime based on supported | ||
648 | * page sizes in the TLB, but for now let's assume 16M is | ||
649 | * always there and a good fit (which it probably is) | ||
650 | * | ||
651 | * Freescale booke only supports 4K pages in TLB0, so use that. | ||
652 | */ | ||
653 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) | ||
654 | mmu_vmemmap_psize = MMU_PAGE_4K; | ||
655 | else | ||
656 | mmu_vmemmap_psize = MMU_PAGE_16M; | ||
657 | |||
658 | /* XXX This code only checks for TLB 0 capabilities and doesn't | ||
659 | * check what page size combos are supported by the HW. It | ||
660 | * also doesn't handle the case where a separate array holds | ||
661 | * the IND entries from the array loaded by the PT. | ||
662 | */ | ||
663 | /* Look for supported page sizes */ | ||
664 | setup_page_sizes(); | ||
665 | |||
666 | /* Look for HW tablewalk support */ | ||
667 | setup_mmu_htw(); | ||
668 | |||
669 | #ifdef CONFIG_PPC_FSL_BOOK3E | ||
670 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { | ||
669 | if (book3e_htw_mode == PPC_HTW_NONE) { | 671 | if (book3e_htw_mode == PPC_HTW_NONE) { |
670 | extlb_level_exc = EX_TLB_SIZE; | 672 | extlb_level_exc = EX_TLB_SIZE; |
671 | patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); | 673 | patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); |
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu) | |||
675 | } | 677 | } |
676 | #endif | 678 | #endif |
677 | 679 | ||
678 | /* A sync won't hurt us after mucking around with | 680 | /* Set the global containing the top of the linear mapping |
679 | * the MMU configuration | 681 | * for use by the TLB miss code |
680 | */ | 682 | */ |
681 | mb(); | 683 | linear_map_top = memblock_end_of_DRAM(); |
684 | } | ||
685 | |||
686 | static void __init early_mmu_set_memory_limit(void) | ||
687 | { | ||
688 | #ifdef CONFIG_PPC_FSL_BOOK3E | ||
689 | if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { | ||
690 | /* | ||
691 | * Limit memory so we dont have linear faults. | ||
692 | * Unlike memblock_set_current_limit, which limits | ||
693 | * memory available during early boot, this permanently | ||
694 | * reduces the memory available to Linux. We need to | ||
695 | * do this because highmem is not supported on 64-bit. | ||
696 | */ | ||
697 | memblock_enforce_memory_limit(linear_map_top); | ||
698 | } | ||
699 | #endif | ||
682 | 700 | ||
683 | memblock_set_current_limit(linear_map_top); | 701 | memblock_set_current_limit(linear_map_top); |
684 | } | 702 | } |
685 | 703 | ||
704 | /* boot cpu only */ | ||
686 | void __init early_init_mmu(void) | 705 | void __init early_init_mmu(void) |
687 | { | 706 | { |
688 | __early_init_mmu(1); | 707 | early_init_mmu_global(); |
708 | early_init_this_mmu(); | ||
709 | early_mmu_set_memory_limit(); | ||
689 | } | 710 | } |
690 | 711 | ||
691 | void early_init_mmu_secondary(void) | 712 | void early_init_mmu_secondary(void) |
692 | { | 713 | { |
693 | __early_init_mmu(0); | 714 | early_init_this_mmu(); |
694 | } | 715 | } |
695 | 716 | ||
696 | void setup_initial_memory_limit(phys_addr_t first_memblock_base, | 717 | void setup_initial_memory_limit(phys_addr_t first_memblock_base, |