diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/40x_mmu.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_32.S | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 55 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 96 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 37 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 179 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 59 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 83 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash32.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 770 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 268 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash_low.S | 87 |
19 files changed, 1523 insertions, 170 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 29954dc28942..f5e7b9ce63dd 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c | |||
@@ -105,7 +105,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
105 | 105 | ||
106 | while (s >= LARGE_PAGE_SIZE_16M) { | 106 | while (s >= LARGE_PAGE_SIZE_16M) { |
107 | pmd_t *pmdp; | 107 | pmd_t *pmdp; |
108 | unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; | 108 | unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; |
109 | 109 | ||
110 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); | 110 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); |
111 | pmd_val(*pmdp++) = val; | 111 | pmd_val(*pmdp++) = val; |
@@ -120,7 +120,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
120 | 120 | ||
121 | while (s >= LARGE_PAGE_SIZE_4M) { | 121 | while (s >= LARGE_PAGE_SIZE_4M) { |
122 | pmd_t *pmdp; | 122 | pmd_t *pmdp; |
123 | unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; | 123 | unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; |
124 | 124 | ||
125 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); | 125 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); |
126 | pmd_val(*pmdp) = val; | 126 | pmd_val(*pmdp) = val; |
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3e68363405b7..6fb8fc8d2fea 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -13,6 +13,7 @@ obj-y := fault.o mem.o pgtable.o gup.o \ | |||
13 | pgtable_$(CONFIG_WORD_SIZE).o | 13 | pgtable_$(CONFIG_WORD_SIZE).o |
14 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ | 14 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ |
15 | tlb_nohash_low.o | 15 | tlb_nohash_low.o |
16 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o | ||
16 | obj-$(CONFIG_PPC64) += mmap_64.o | 17 | obj-$(CONFIG_PPC64) += mmap_64.o |
17 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o | 18 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o |
18 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ | 19 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ |
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index bb3d65998e6b..dc93e95b256e 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c | |||
@@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
161 | unsigned long virt = PAGE_OFFSET; | 161 | unsigned long virt = PAGE_OFFSET; |
162 | phys_addr_t phys = memstart_addr; | 162 | phys_addr_t phys = memstart_addr; |
163 | 163 | ||
164 | while (cam[tlbcam_index] && tlbcam_index < ARRAY_SIZE(cam)) { | 164 | while (tlbcam_index < ARRAY_SIZE(cam) && cam[tlbcam_index]) { |
165 | settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0); | 165 | settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0); |
166 | virt += cam[tlbcam_index]; | 166 | virt += cam[tlbcam_index]; |
167 | phys += cam[tlbcam_index]; | 167 | phys += cam[tlbcam_index]; |
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 14af8cedab70..b13d58932bf6 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S | |||
@@ -40,7 +40,7 @@ mmu_hash_lock: | |||
40 | * The address is in r4, and r3 contains an access flag: | 40 | * The address is in r4, and r3 contains an access flag: |
41 | * _PAGE_RW (0x400) if a write. | 41 | * _PAGE_RW (0x400) if a write. |
42 | * r9 contains the SRR1 value, from which we use the MSR_PR bit. | 42 | * r9 contains the SRR1 value, from which we use the MSR_PR bit. |
43 | * SPRG3 contains the physical address of the current task's thread. | 43 | * SPRG_THREAD contains the physical address of the current task's thread. |
44 | * | 44 | * |
45 | * Returns to the caller if the access is illegal or there is no | 45 | * Returns to the caller if the access is illegal or there is no |
46 | * mapping for the address. Otherwise it places an appropriate PTE | 46 | * mapping for the address. Otherwise it places an appropriate PTE |
@@ -68,7 +68,7 @@ _GLOBAL(hash_page) | |||
68 | /* Get PTE (linux-style) and check access */ | 68 | /* Get PTE (linux-style) and check access */ |
69 | lis r0,KERNELBASE@h /* check if kernel address */ | 69 | lis r0,KERNELBASE@h /* check if kernel address */ |
70 | cmplw 0,r4,r0 | 70 | cmplw 0,r4,r0 |
71 | mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ | 71 | mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ |
72 | ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ | 72 | ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ |
73 | lwz r5,PGDIR(r8) /* virt page-table root */ | 73 | lwz r5,PGDIR(r8) /* virt page-table root */ |
74 | blt+ 112f /* assume user more likely */ | 74 | blt+ 112f /* assume user more likely */ |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index c46ef2ffa3d9..90df6ffe3a43 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -57,8 +57,10 @@ unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | |||
57 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) | 57 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) |
58 | 58 | ||
59 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { | 59 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { |
60 | "unused_4K", "hugepte_cache_64K", "unused_64K_AP", | 60 | [MMU_PAGE_64K] = "hugepte_cache_64K", |
61 | "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" | 61 | [MMU_PAGE_1M] = "hugepte_cache_1M", |
62 | [MMU_PAGE_16M] = "hugepte_cache_16M", | ||
63 | [MMU_PAGE_16G] = "hugepte_cache_16G", | ||
62 | }; | 64 | }; |
63 | 65 | ||
64 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 66 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
@@ -700,6 +702,8 @@ static void __init set_huge_psize(int psize) | |||
700 | if (mmu_huge_psizes[psize] || | 702 | if (mmu_huge_psizes[psize] || |
701 | mmu_psize_defs[psize].shift == PAGE_SHIFT) | 703 | mmu_psize_defs[psize].shift == PAGE_SHIFT) |
702 | return; | 704 | return; |
705 | if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL)) | ||
706 | return; | ||
703 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); | 707 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); |
704 | 708 | ||
705 | switch (mmu_psize_defs[psize].shift) { | 709 | switch (mmu_psize_defs[psize].shift) { |
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3de6a0d93824..3ef5084b90ca 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
@@ -54,8 +54,6 @@ | |||
54 | #endif | 54 | #endif |
55 | #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE | 55 | #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE |
56 | 56 | ||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | |||
59 | phys_addr_t total_memory; | 57 | phys_addr_t total_memory; |
60 | phys_addr_t total_lowmem; | 58 | phys_addr_t total_lowmem; |
61 | 59 | ||
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 68a821add28d..31582329cd67 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -205,6 +205,47 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size) | |||
205 | return 0; | 205 | return 0; |
206 | } | 206 | } |
207 | 207 | ||
208 | /* On hash-based CPUs, the vmemmap is bolted in the hash table. | ||
209 | * | ||
210 | * On Book3E CPUs, the vmemmap is currently mapped in the top half of | ||
211 | * the vmalloc space using normal page tables, though the size of | ||
212 | * pages encoded in the PTEs can be different | ||
213 | */ | ||
214 | |||
215 | #ifdef CONFIG_PPC_BOOK3E | ||
216 | static void __meminit vmemmap_create_mapping(unsigned long start, | ||
217 | unsigned long page_size, | ||
218 | unsigned long phys) | ||
219 | { | ||
220 | /* Create a PTE encoding without page size */ | ||
221 | unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED | | ||
222 | _PAGE_KERNEL_RW; | ||
223 | |||
224 | /* PTEs only contain page size encodings up to 32M */ | ||
225 | BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf); | ||
226 | |||
227 | /* Encode the size in the PTE */ | ||
228 | flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8; | ||
229 | |||
230 | /* For each PTE for that area, map things. Note that we don't | ||
231 | * increment phys because all PTEs are of the large size and | ||
232 | * thus must have the low bits clear | ||
233 | */ | ||
234 | for (i = 0; i < page_size; i += PAGE_SIZE) | ||
235 | BUG_ON(map_kernel_page(start + i, phys, flags)); | ||
236 | } | ||
237 | #else /* CONFIG_PPC_BOOK3E */ | ||
238 | static void __meminit vmemmap_create_mapping(unsigned long start, | ||
239 | unsigned long page_size, | ||
240 | unsigned long phys) | ||
241 | { | ||
242 | int mapped = htab_bolt_mapping(start, start + page_size, phys, | ||
243 | PAGE_KERNEL, mmu_vmemmap_psize, | ||
244 | mmu_kernel_ssize); | ||
245 | BUG_ON(mapped < 0); | ||
246 | } | ||
247 | #endif /* CONFIG_PPC_BOOK3E */ | ||
248 | |||
208 | int __meminit vmemmap_populate(struct page *start_page, | 249 | int __meminit vmemmap_populate(struct page *start_page, |
209 | unsigned long nr_pages, int node) | 250 | unsigned long nr_pages, int node) |
210 | { | 251 | { |
@@ -215,8 +256,11 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
215 | /* Align to the page size of the linear mapping. */ | 256 | /* Align to the page size of the linear mapping. */ |
216 | start = _ALIGN_DOWN(start, page_size); | 257 | start = _ALIGN_DOWN(start, page_size); |
217 | 258 | ||
259 | pr_debug("vmemmap_populate page %p, %ld pages, node %d\n", | ||
260 | start_page, nr_pages, node); | ||
261 | pr_debug(" -> map %lx..%lx\n", start, end); | ||
262 | |||
218 | for (; start < end; start += page_size) { | 263 | for (; start < end; start += page_size) { |
219 | int mapped; | ||
220 | void *p; | 264 | void *p; |
221 | 265 | ||
222 | if (vmemmap_populated(start, page_size)) | 266 | if (vmemmap_populated(start, page_size)) |
@@ -226,13 +270,10 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
226 | if (!p) | 270 | if (!p) |
227 | return -ENOMEM; | 271 | return -ENOMEM; |
228 | 272 | ||
229 | pr_debug("vmemmap %08lx allocated at %p, physical %08lx.\n", | 273 | pr_debug(" * %016lx..%016lx allocated at %p\n", |
230 | start, p, __pa(p)); | 274 | start, start + page_size, p); |
231 | 275 | ||
232 | mapped = htab_bolt_mapping(start, start + page_size, __pa(p), | 276 | vmemmap_create_mapping(start, page_size, __pa(p)); |
233 | pgprot_val(PAGE_KERNEL), | ||
234 | mmu_vmemmap_psize, mmu_kernel_ssize); | ||
235 | BUG_ON(mapped < 0); | ||
236 | } | 277 | } |
237 | 278 | ||
238 | return 0; | 279 | return 0; |
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index b1a727def15b..c2f93dc470e6 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
@@ -25,10 +25,20 @@ | |||
25 | * also clear mm->cpu_vm_mask bits when processes are migrated | 25 | * also clear mm->cpu_vm_mask bits when processes are migrated |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #undef DEBUG | 28 | #define DEBUG_MAP_CONSISTENCY |
29 | #define DEBUG_STEAL_ONLY | 29 | #define DEBUG_CLAMP_LAST_CONTEXT 31 |
30 | #undef DEBUG_MAP_CONSISTENCY | 30 | //#define DEBUG_HARDER |
31 | /*#define DEBUG_CLAMP_LAST_CONTEXT 15 */ | 31 | |
32 | /* We don't use DEBUG because it tends to be compiled in always nowadays | ||
33 | * and this would generate way too much output | ||
34 | */ | ||
35 | #ifdef DEBUG_HARDER | ||
36 | #define pr_hard(args...) printk(KERN_DEBUG args) | ||
37 | #define pr_hardcont(args...) printk(KERN_CONT args) | ||
38 | #else | ||
39 | #define pr_hard(args...) do { } while(0) | ||
40 | #define pr_hardcont(args...) do { } while(0) | ||
41 | #endif | ||
32 | 42 | ||
33 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
34 | #include <linux/mm.h> | 44 | #include <linux/mm.h> |
@@ -71,7 +81,7 @@ static DEFINE_SPINLOCK(context_lock); | |||
71 | static unsigned int steal_context_smp(unsigned int id) | 81 | static unsigned int steal_context_smp(unsigned int id) |
72 | { | 82 | { |
73 | struct mm_struct *mm; | 83 | struct mm_struct *mm; |
74 | unsigned int cpu, max; | 84 | unsigned int cpu, max, i; |
75 | 85 | ||
76 | max = last_context - first_context; | 86 | max = last_context - first_context; |
77 | 87 | ||
@@ -89,15 +99,22 @@ static unsigned int steal_context_smp(unsigned int id) | |||
89 | id = first_context; | 99 | id = first_context; |
90 | continue; | 100 | continue; |
91 | } | 101 | } |
92 | pr_devel("[%d] steal context %d from mm @%p\n", | 102 | pr_hardcont(" | steal %d from 0x%p", id, mm); |
93 | smp_processor_id(), id, mm); | ||
94 | 103 | ||
95 | /* Mark this mm has having no context anymore */ | 104 | /* Mark this mm has having no context anymore */ |
96 | mm->context.id = MMU_NO_CONTEXT; | 105 | mm->context.id = MMU_NO_CONTEXT; |
97 | 106 | ||
98 | /* Mark it stale on all CPUs that used this mm */ | 107 | /* Mark it stale on all CPUs that used this mm. For threaded |
99 | for_each_cpu(cpu, mm_cpumask(mm)) | 108 | * implementations, we set it on all threads on each core |
100 | __set_bit(id, stale_map[cpu]); | 109 | * represented in the mask. A future implementation will use |
110 | * a core map instead but this will do for now. | ||
111 | */ | ||
112 | for_each_cpu(cpu, mm_cpumask(mm)) { | ||
113 | for (i = cpu_first_thread_in_core(cpu); | ||
114 | i <= cpu_last_thread_in_core(cpu); i++) | ||
115 | __set_bit(id, stale_map[i]); | ||
116 | cpu = i - 1; | ||
117 | } | ||
101 | return id; | 118 | return id; |
102 | } | 119 | } |
103 | 120 | ||
@@ -126,7 +143,7 @@ static unsigned int steal_context_up(unsigned int id) | |||
126 | /* Pick up the victim mm */ | 143 | /* Pick up the victim mm */ |
127 | mm = context_mm[id]; | 144 | mm = context_mm[id]; |
128 | 145 | ||
129 | pr_devel("[%d] steal context %d from mm @%p\n", cpu, id, mm); | 146 | pr_hardcont(" | steal %d from 0x%p", id, mm); |
130 | 147 | ||
131 | /* Flush the TLB for that context */ | 148 | /* Flush the TLB for that context */ |
132 | local_flush_tlb_mm(mm); | 149 | local_flush_tlb_mm(mm); |
@@ -173,25 +190,20 @@ static void context_check_map(void) { } | |||
173 | 190 | ||
174 | void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | 191 | void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) |
175 | { | 192 | { |
176 | unsigned int id, cpu = smp_processor_id(); | 193 | unsigned int i, id, cpu = smp_processor_id(); |
177 | unsigned long *map; | 194 | unsigned long *map; |
178 | 195 | ||
179 | /* No lockless fast path .. yet */ | 196 | /* No lockless fast path .. yet */ |
180 | spin_lock(&context_lock); | 197 | spin_lock(&context_lock); |
181 | 198 | ||
182 | #ifndef DEBUG_STEAL_ONLY | 199 | pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", |
183 | pr_devel("[%d] activating context for mm @%p, active=%d, id=%d\n", | 200 | cpu, next, next->context.active, next->context.id); |
184 | cpu, next, next->context.active, next->context.id); | ||
185 | #endif | ||
186 | 201 | ||
187 | #ifdef CONFIG_SMP | 202 | #ifdef CONFIG_SMP |
188 | /* Mark us active and the previous one not anymore */ | 203 | /* Mark us active and the previous one not anymore */ |
189 | next->context.active++; | 204 | next->context.active++; |
190 | if (prev) { | 205 | if (prev) { |
191 | #ifndef DEBUG_STEAL_ONLY | 206 | pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); |
192 | pr_devel(" old context %p active was: %d\n", | ||
193 | prev, prev->context.active); | ||
194 | #endif | ||
195 | WARN_ON(prev->context.active < 1); | 207 | WARN_ON(prev->context.active < 1); |
196 | prev->context.active--; | 208 | prev->context.active--; |
197 | } | 209 | } |
@@ -201,8 +213,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
201 | 213 | ||
202 | /* If we already have a valid assigned context, skip all that */ | 214 | /* If we already have a valid assigned context, skip all that */ |
203 | id = next->context.id; | 215 | id = next->context.id; |
204 | if (likely(id != MMU_NO_CONTEXT)) | 216 | if (likely(id != MMU_NO_CONTEXT)) { |
217 | #ifdef DEBUG_MAP_CONSISTENCY | ||
218 | if (context_mm[id] != next) | ||
219 | pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", | ||
220 | next, id, id, context_mm[id]); | ||
221 | #endif | ||
205 | goto ctxt_ok; | 222 | goto ctxt_ok; |
223 | } | ||
206 | 224 | ||
207 | /* We really don't have a context, let's try to acquire one */ | 225 | /* We really don't have a context, let's try to acquire one */ |
208 | id = next_context; | 226 | id = next_context; |
@@ -235,11 +253,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
235 | next_context = id + 1; | 253 | next_context = id + 1; |
236 | context_mm[id] = next; | 254 | context_mm[id] = next; |
237 | next->context.id = id; | 255 | next->context.id = id; |
238 | 256 | pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); | |
239 | #ifndef DEBUG_STEAL_ONLY | ||
240 | pr_devel("[%d] picked up new id %d, nrf is now %d\n", | ||
241 | cpu, id, nr_free_contexts); | ||
242 | #endif | ||
243 | 257 | ||
244 | context_check_map(); | 258 | context_check_map(); |
245 | ctxt_ok: | 259 | ctxt_ok: |
@@ -248,15 +262,21 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
248 | * local TLB for it and unmark it before we use it | 262 | * local TLB for it and unmark it before we use it |
249 | */ | 263 | */ |
250 | if (test_bit(id, stale_map[cpu])) { | 264 | if (test_bit(id, stale_map[cpu])) { |
251 | pr_devel("[%d] flushing stale context %d for mm @%p !\n", | 265 | pr_hardcont(" | stale flush %d [%d..%d]", |
252 | cpu, id, next); | 266 | id, cpu_first_thread_in_core(cpu), |
267 | cpu_last_thread_in_core(cpu)); | ||
268 | |||
253 | local_flush_tlb_mm(next); | 269 | local_flush_tlb_mm(next); |
254 | 270 | ||
255 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ | 271 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ |
256 | __clear_bit(id, stale_map[cpu]); | 272 | for (i = cpu_first_thread_in_core(cpu); |
273 | i <= cpu_last_thread_in_core(cpu); i++) { | ||
274 | __clear_bit(id, stale_map[i]); | ||
275 | } | ||
257 | } | 276 | } |
258 | 277 | ||
259 | /* Flick the MMU and release lock */ | 278 | /* Flick the MMU and release lock */ |
279 | pr_hardcont(" -> %d\n", id); | ||
260 | set_context(id, next->pgd); | 280 | set_context(id, next->pgd); |
261 | spin_unlock(&context_lock); | 281 | spin_unlock(&context_lock); |
262 | } | 282 | } |
@@ -266,6 +286,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
266 | */ | 286 | */ |
267 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | 287 | int init_new_context(struct task_struct *t, struct mm_struct *mm) |
268 | { | 288 | { |
289 | pr_hard("initing context for mm @%p\n", mm); | ||
290 | |||
269 | mm->context.id = MMU_NO_CONTEXT; | 291 | mm->context.id = MMU_NO_CONTEXT; |
270 | mm->context.active = 0; | 292 | mm->context.active = 0; |
271 | 293 | ||
@@ -305,7 +327,9 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
305 | unsigned long action, void *hcpu) | 327 | unsigned long action, void *hcpu) |
306 | { | 328 | { |
307 | unsigned int cpu = (unsigned int)(long)hcpu; | 329 | unsigned int cpu = (unsigned int)(long)hcpu; |
308 | 330 | #ifdef CONFIG_HOTPLUG_CPU | |
331 | struct task_struct *p; | ||
332 | #endif | ||
309 | /* We don't touch CPU 0 map, it's allocated at aboot and kept | 333 | /* We don't touch CPU 0 map, it's allocated at aboot and kept |
310 | * around forever | 334 | * around forever |
311 | */ | 335 | */ |
@@ -324,8 +348,16 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
324 | pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); | 348 | pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); |
325 | kfree(stale_map[cpu]); | 349 | kfree(stale_map[cpu]); |
326 | stale_map[cpu] = NULL; | 350 | stale_map[cpu] = NULL; |
327 | break; | 351 | |
328 | #endif | 352 | /* We also clear the cpu_vm_mask bits of CPUs going away */ |
353 | read_lock(&tasklist_lock); | ||
354 | for_each_process(p) { | ||
355 | if (p->mm) | ||
356 | cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm)); | ||
357 | } | ||
358 | read_unlock(&tasklist_lock); | ||
359 | break; | ||
360 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
329 | } | 361 | } |
330 | return NOTIFY_OK; | 362 | return NOTIFY_OK; |
331 | } | 363 | } |
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d1f9c62dc177..d2e5321d5ea6 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h | |||
@@ -36,21 +36,37 @@ static inline void _tlbil_pid(unsigned int pid) | |||
36 | { | 36 | { |
37 | asm volatile ("sync; tlbia; isync" : : : "memory"); | 37 | asm volatile ("sync; tlbia; isync" : : : "memory"); |
38 | } | 38 | } |
39 | #define _tlbil_pid_noind(pid) _tlbil_pid(pid) | ||
40 | |||
39 | #else /* CONFIG_40x || CONFIG_8xx */ | 41 | #else /* CONFIG_40x || CONFIG_8xx */ |
40 | extern void _tlbil_all(void); | 42 | extern void _tlbil_all(void); |
41 | extern void _tlbil_pid(unsigned int pid); | 43 | extern void _tlbil_pid(unsigned int pid); |
44 | #ifdef CONFIG_PPC_BOOK3E | ||
45 | extern void _tlbil_pid_noind(unsigned int pid); | ||
46 | #else | ||
47 | #define _tlbil_pid_noind(pid) _tlbil_pid(pid) | ||
48 | #endif | ||
42 | #endif /* !(CONFIG_40x || CONFIG_8xx) */ | 49 | #endif /* !(CONFIG_40x || CONFIG_8xx) */ |
43 | 50 | ||
44 | /* | 51 | /* |
45 | * On 8xx, we directly inline tlbie, on others, it's extern | 52 | * On 8xx, we directly inline tlbie, on others, it's extern |
46 | */ | 53 | */ |
47 | #ifdef CONFIG_8xx | 54 | #ifdef CONFIG_8xx |
48 | static inline void _tlbil_va(unsigned long address, unsigned int pid) | 55 | static inline void _tlbil_va(unsigned long address, unsigned int pid, |
56 | unsigned int tsize, unsigned int ind) | ||
49 | { | 57 | { |
50 | asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); | 58 | asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); |
51 | } | 59 | } |
52 | #else /* CONFIG_8xx */ | 60 | #elif defined(CONFIG_PPC_BOOK3E) |
53 | extern void _tlbil_va(unsigned long address, unsigned int pid); | 61 | extern void _tlbil_va(unsigned long address, unsigned int pid, |
62 | unsigned int tsize, unsigned int ind); | ||
63 | #else | ||
64 | extern void __tlbil_va(unsigned long address, unsigned int pid); | ||
65 | static inline void _tlbil_va(unsigned long address, unsigned int pid, | ||
66 | unsigned int tsize, unsigned int ind) | ||
67 | { | ||
68 | __tlbil_va(address, pid); | ||
69 | } | ||
54 | #endif /* CONIFG_8xx */ | 70 | #endif /* CONIFG_8xx */ |
55 | 71 | ||
56 | /* | 72 | /* |
@@ -58,10 +74,16 @@ extern void _tlbil_va(unsigned long address, unsigned int pid); | |||
58 | * implementation. When that becomes the case, this will be | 74 | * implementation. When that becomes the case, this will be |
59 | * an extern. | 75 | * an extern. |
60 | */ | 76 | */ |
61 | static inline void _tlbivax_bcast(unsigned long address, unsigned int pid) | 77 | #ifdef CONFIG_PPC_BOOK3E |
78 | extern void _tlbivax_bcast(unsigned long address, unsigned int pid, | ||
79 | unsigned int tsize, unsigned int ind); | ||
80 | #else | ||
81 | static inline void _tlbivax_bcast(unsigned long address, unsigned int pid, | ||
82 | unsigned int tsize, unsigned int ind) | ||
62 | { | 83 | { |
63 | BUG(); | 84 | BUG(); |
64 | } | 85 | } |
86 | #endif | ||
65 | 87 | ||
66 | #else /* CONFIG_PPC_MMU_NOHASH */ | 88 | #else /* CONFIG_PPC_MMU_NOHASH */ |
67 | 89 | ||
@@ -99,7 +121,12 @@ extern unsigned int rtas_data, rtas_size; | |||
99 | struct hash_pte; | 121 | struct hash_pte; |
100 | extern struct hash_pte *Hash, *Hash_end; | 122 | extern struct hash_pte *Hash, *Hash_end; |
101 | extern unsigned long Hash_size, Hash_mask; | 123 | extern unsigned long Hash_size, Hash_mask; |
102 | #endif | 124 | |
125 | #endif /* CONFIG_PPC32 */ | ||
126 | |||
127 | #ifdef CONFIG_PPC64 | ||
128 | extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags); | ||
129 | #endif /* CONFIG_PPC64 */ | ||
103 | 130 | ||
104 | extern unsigned long ioremap_bot; | 131 | extern unsigned long ioremap_bot; |
105 | extern unsigned long __max_low_memory; | 132 | extern unsigned long __max_low_memory; |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 627767d6169b..83f1551ec2c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -30,6 +30,16 @@ | |||
30 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
31 | #include <asm/tlb.h> | 31 | #include <asm/tlb.h> |
32 | 32 | ||
33 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
34 | |||
35 | #ifdef CONFIG_SMP | ||
36 | |||
37 | /* | ||
38 | * Handle batching of page table freeing on SMP. Page tables are | ||
39 | * queued up and send to be freed later by RCU in order to avoid | ||
40 | * freeing a page table page that is being walked without locks | ||
41 | */ | ||
42 | |||
33 | static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | 43 | static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); |
34 | static unsigned long pte_freelist_forced_free; | 44 | static unsigned long pte_freelist_forced_free; |
35 | 45 | ||
@@ -116,27 +126,7 @@ void pte_free_finish(void) | |||
116 | *batchp = NULL; | 126 | *batchp = NULL; |
117 | } | 127 | } |
118 | 128 | ||
119 | /* | 129 | #endif /* CONFIG_SMP */ |
120 | * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags() | ||
121 | */ | ||
122 | static pte_t do_dcache_icache_coherency(pte_t pte) | ||
123 | { | ||
124 | unsigned long pfn = pte_pfn(pte); | ||
125 | struct page *page; | ||
126 | |||
127 | if (unlikely(!pfn_valid(pfn))) | ||
128 | return pte; | ||
129 | page = pfn_to_page(pfn); | ||
130 | |||
131 | if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) { | ||
132 | pr_devel("do_dcache_icache_coherency... flushing\n"); | ||
133 | flush_dcache_icache_page(page); | ||
134 | set_bit(PG_arch_1, &page->flags); | ||
135 | } | ||
136 | else | ||
137 | pr_devel("do_dcache_icache_coherency... already clean\n"); | ||
138 | return __pte(pte_val(pte) | _PAGE_HWEXEC); | ||
139 | } | ||
140 | 130 | ||
141 | static inline int is_exec_fault(void) | 131 | static inline int is_exec_fault(void) |
142 | { | 132 | { |
@@ -145,49 +135,139 @@ static inline int is_exec_fault(void) | |||
145 | 135 | ||
146 | /* We only try to do i/d cache coherency on stuff that looks like | 136 | /* We only try to do i/d cache coherency on stuff that looks like |
147 | * reasonably "normal" PTEs. We currently require a PTE to be present | 137 | * reasonably "normal" PTEs. We currently require a PTE to be present |
148 | * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE | 138 | * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that |
139 | * on userspace PTEs | ||
149 | */ | 140 | */ |
150 | static inline int pte_looks_normal(pte_t pte) | 141 | static inline int pte_looks_normal(pte_t pte) |
151 | { | 142 | { |
152 | return (pte_val(pte) & | 143 | return (pte_val(pte) & |
153 | (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == | 144 | (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == |
154 | (_PAGE_PRESENT); | 145 | (_PAGE_PRESENT | _PAGE_USER); |
155 | } | 146 | } |
156 | 147 | ||
157 | #if defined(CONFIG_PPC_STD_MMU) | 148 | struct page * maybe_pte_to_page(pte_t pte) |
149 | { | ||
150 | unsigned long pfn = pte_pfn(pte); | ||
151 | struct page *page; | ||
152 | |||
153 | if (unlikely(!pfn_valid(pfn))) | ||
154 | return NULL; | ||
155 | page = pfn_to_page(pfn); | ||
156 | if (PageReserved(page)) | ||
157 | return NULL; | ||
158 | return page; | ||
159 | } | ||
160 | |||
161 | #if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 | ||
162 | |||
158 | /* Server-style MMU handles coherency when hashing if HW exec permission | 163 | /* Server-style MMU handles coherency when hashing if HW exec permission |
159 | * is supposed per page (currently 64-bit only). Else, we always flush | 164 | * is supposed per page (currently 64-bit only). If not, then, we always |
160 | * valid PTEs in set_pte. | 165 | * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec |
166 | * support falls into the same category. | ||
161 | */ | 167 | */ |
162 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | 168 | |
169 | static pte_t set_pte_filter(pte_t pte) | ||
163 | { | 170 | { |
164 | return set_pte && pte_looks_normal(pte) && | 171 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
165 | !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || | 172 | if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || |
166 | cpu_has_feature(CPU_FTR_NOEXECUTE)); | 173 | cpu_has_feature(CPU_FTR_NOEXECUTE))) { |
174 | struct page *pg = maybe_pte_to_page(pte); | ||
175 | if (!pg) | ||
176 | return pte; | ||
177 | if (!test_bit(PG_arch_1, &pg->flags)) { | ||
178 | flush_dcache_icache_page(pg); | ||
179 | set_bit(PG_arch_1, &pg->flags); | ||
180 | } | ||
181 | } | ||
182 | return pte; | ||
167 | } | 183 | } |
168 | #elif _PAGE_HWEXEC == 0 | 184 | |
169 | /* Embedded type MMU without HW exec support (8xx only so far), we flush | 185 | static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, |
170 | * the cache for any present PTE | 186 | int dirty) |
171 | */ | ||
172 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | ||
173 | { | 187 | { |
174 | return set_pte && pte_looks_normal(pte); | 188 | return pte; |
175 | } | 189 | } |
176 | #else | 190 | |
177 | /* Other embedded CPUs with HW exec support per-page, we flush on exec | 191 | #else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ |
178 | * fault if HWEXEC is not set | 192 | |
193 | /* Embedded type MMU with HW exec support. This is a bit more complicated | ||
194 | * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so | ||
195 | * instead we "filter out" the exec permission for non clean pages. | ||
179 | */ | 196 | */ |
180 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | 197 | static pte_t set_pte_filter(pte_t pte) |
181 | { | 198 | { |
182 | return pte_looks_normal(pte) && is_exec_fault() && | 199 | struct page *pg; |
183 | !(pte_val(pte) & _PAGE_HWEXEC); | 200 | |
201 | /* No exec permission in the first place, move on */ | ||
202 | if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) | ||
203 | return pte; | ||
204 | |||
205 | /* If you set _PAGE_EXEC on weird pages you're on your own */ | ||
206 | pg = maybe_pte_to_page(pte); | ||
207 | if (unlikely(!pg)) | ||
208 | return pte; | ||
209 | |||
210 | /* If the page clean, we move on */ | ||
211 | if (test_bit(PG_arch_1, &pg->flags)) | ||
212 | return pte; | ||
213 | |||
214 | /* If it's an exec fault, we flush the cache and make it clean */ | ||
215 | if (is_exec_fault()) { | ||
216 | flush_dcache_icache_page(pg); | ||
217 | set_bit(PG_arch_1, &pg->flags); | ||
218 | return pte; | ||
219 | } | ||
220 | |||
221 | /* Else, we filter out _PAGE_EXEC */ | ||
222 | return __pte(pte_val(pte) & ~_PAGE_EXEC); | ||
184 | } | 223 | } |
185 | #endif | 224 | |
225 | static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, | ||
226 | int dirty) | ||
227 | { | ||
228 | struct page *pg; | ||
229 | |||
230 | /* So here, we only care about exec faults, as we use them | ||
231 | * to recover lost _PAGE_EXEC and perform I$/D$ coherency | ||
232 | * if necessary. Also if _PAGE_EXEC is already set, same deal, | ||
233 | * we just bail out | ||
234 | */ | ||
235 | if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) | ||
236 | return pte; | ||
237 | |||
238 | #ifdef CONFIG_DEBUG_VM | ||
239 | /* So this is an exec fault, _PAGE_EXEC is not set. If it was | ||
240 | * an error we would have bailed out earlier in do_page_fault() | ||
241 | * but let's make sure of it | ||
242 | */ | ||
243 | if (WARN_ON(!(vma->vm_flags & VM_EXEC))) | ||
244 | return pte; | ||
245 | #endif /* CONFIG_DEBUG_VM */ | ||
246 | |||
247 | /* If you set _PAGE_EXEC on weird pages you're on your own */ | ||
248 | pg = maybe_pte_to_page(pte); | ||
249 | if (unlikely(!pg)) | ||
250 | goto bail; | ||
251 | |||
252 | /* If the page is already clean, we move on */ | ||
253 | if (test_bit(PG_arch_1, &pg->flags)) | ||
254 | goto bail; | ||
255 | |||
256 | /* Clean the page and set PG_arch_1 */ | ||
257 | flush_dcache_icache_page(pg); | ||
258 | set_bit(PG_arch_1, &pg->flags); | ||
259 | |||
260 | bail: | ||
261 | return __pte(pte_val(pte) | _PAGE_EXEC); | ||
262 | } | ||
263 | |||
264 | #endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ | ||
186 | 265 | ||
187 | /* | 266 | /* |
188 | * set_pte stores a linux PTE into the linux page table. | 267 | * set_pte stores a linux PTE into the linux page table. |
189 | */ | 268 | */ |
190 | void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 269 | void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, |
270 | pte_t pte) | ||
191 | { | 271 | { |
192 | #ifdef CONFIG_DEBUG_VM | 272 | #ifdef CONFIG_DEBUG_VM |
193 | WARN_ON(pte_present(*ptep)); | 273 | WARN_ON(pte_present(*ptep)); |
@@ -196,9 +276,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte | |||
196 | * this context might not have been activated yet when this | 276 | * this context might not have been activated yet when this |
197 | * is called. | 277 | * is called. |
198 | */ | 278 | */ |
199 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | 279 | pte = set_pte_filter(pte); |
200 | if (pte_need_exec_flush(pte, 1)) | ||
201 | pte = do_dcache_icache_coherency(pte); | ||
202 | 280 | ||
203 | /* Perform the setting of the PTE */ | 281 | /* Perform the setting of the PTE */ |
204 | __set_pte_at(mm, addr, ptep, pte, 0); | 282 | __set_pte_at(mm, addr, ptep, pte, 0); |
@@ -215,8 +293,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, | |||
215 | pte_t *ptep, pte_t entry, int dirty) | 293 | pte_t *ptep, pte_t entry, int dirty) |
216 | { | 294 | { |
217 | int changed; | 295 | int changed; |
218 | if (!dirty && pte_need_exec_flush(entry, 0)) | 296 | entry = set_access_flags_filter(entry, vma, dirty); |
219 | entry = do_dcache_icache_coherency(entry); | ||
220 | changed = !pte_same(*(ptep), entry); | 297 | changed = !pte_same(*(ptep), entry); |
221 | if (changed) { | 298 | if (changed) { |
222 | if (!(vma->vm_flags & VM_HUGETLB)) | 299 | if (!(vma->vm_flags & VM_HUGETLB)) |
@@ -242,7 +319,7 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) | |||
242 | BUG_ON(pud_none(*pud)); | 319 | BUG_ON(pud_none(*pud)); |
243 | pmd = pmd_offset(pud, addr); | 320 | pmd = pmd_offset(pud, addr); |
244 | BUG_ON(!pmd_present(*pmd)); | 321 | BUG_ON(!pmd_present(*pmd)); |
245 | BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd))); | 322 | assert_spin_locked(pte_lockptr(mm, pmd)); |
246 | } | 323 | } |
247 | #endif /* CONFIG_DEBUG_VM */ | 324 | #endif /* CONFIG_DEBUG_VM */ |
248 | 325 | ||
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5422169626ba..cb96cb2e17cc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
@@ -142,7 +142,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) | |||
142 | flags |= _PAGE_DIRTY | _PAGE_HWWRITE; | 142 | flags |= _PAGE_DIRTY | _PAGE_HWWRITE; |
143 | 143 | ||
144 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ | 144 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ |
145 | flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); | 145 | flags &= ~(_PAGE_USER | _PAGE_EXEC); |
146 | 146 | ||
147 | return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); | 147 | return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); |
148 | } | 148 | } |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index bfa7db6b2fd5..853d5565eed5 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/stddef.h> | 33 | #include <linux/stddef.h> |
34 | #include <linux/vmalloc.h> | 34 | #include <linux/vmalloc.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/bootmem.h> | ||
37 | #include <linux/lmb.h> | ||
36 | 38 | ||
37 | #include <asm/pgalloc.h> | 39 | #include <asm/pgalloc.h> |
38 | #include <asm/page.h> | 40 | #include <asm/page.h> |
@@ -55,19 +57,36 @@ | |||
55 | 57 | ||
56 | unsigned long ioremap_bot = IOREMAP_BASE; | 58 | unsigned long ioremap_bot = IOREMAP_BASE; |
57 | 59 | ||
60 | |||
61 | #ifdef CONFIG_PPC_MMU_NOHASH | ||
62 | static void *early_alloc_pgtable(unsigned long size) | ||
63 | { | ||
64 | void *pt; | ||
65 | |||
66 | if (init_bootmem_done) | ||
67 | pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); | ||
68 | else | ||
69 | pt = __va(lmb_alloc_base(size, size, | ||
70 | __pa(MAX_DMA_ADDRESS))); | ||
71 | memset(pt, 0, size); | ||
72 | |||
73 | return pt; | ||
74 | } | ||
75 | #endif /* CONFIG_PPC_MMU_NOHASH */ | ||
76 | |||
58 | /* | 77 | /* |
59 | * map_io_page currently only called by __ioremap | 78 | * map_kernel_page currently only called by __ioremap |
60 | * map_io_page adds an entry to the ioremap page table | 79 | * map_kernel_page adds an entry to the ioremap page table |
61 | * and adds an entry to the HPT, possibly bolting it | 80 | * and adds an entry to the HPT, possibly bolting it |
62 | */ | 81 | */ |
63 | static int map_io_page(unsigned long ea, unsigned long pa, int flags) | 82 | int map_kernel_page(unsigned long ea, unsigned long pa, int flags) |
64 | { | 83 | { |
65 | pgd_t *pgdp; | 84 | pgd_t *pgdp; |
66 | pud_t *pudp; | 85 | pud_t *pudp; |
67 | pmd_t *pmdp; | 86 | pmd_t *pmdp; |
68 | pte_t *ptep; | 87 | pte_t *ptep; |
69 | 88 | ||
70 | if (mem_init_done) { | 89 | if (slab_is_available()) { |
71 | pgdp = pgd_offset_k(ea); | 90 | pgdp = pgd_offset_k(ea); |
72 | pudp = pud_alloc(&init_mm, pgdp, ea); | 91 | pudp = pud_alloc(&init_mm, pgdp, ea); |
73 | if (!pudp) | 92 | if (!pudp) |
@@ -81,6 +100,35 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) | |||
81 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, | 100 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, |
82 | __pgprot(flags))); | 101 | __pgprot(flags))); |
83 | } else { | 102 | } else { |
103 | #ifdef CONFIG_PPC_MMU_NOHASH | ||
104 | /* Warning ! This will blow up if bootmem is not initialized | ||
105 | * which our ppc64 code is keen to do that, we'll need to | ||
106 | * fix it and/or be more careful | ||
107 | */ | ||
108 | pgdp = pgd_offset_k(ea); | ||
109 | #ifdef PUD_TABLE_SIZE | ||
110 | if (pgd_none(*pgdp)) { | ||
111 | pudp = early_alloc_pgtable(PUD_TABLE_SIZE); | ||
112 | BUG_ON(pudp == NULL); | ||
113 | pgd_populate(&init_mm, pgdp, pudp); | ||
114 | } | ||
115 | #endif /* PUD_TABLE_SIZE */ | ||
116 | pudp = pud_offset(pgdp, ea); | ||
117 | if (pud_none(*pudp)) { | ||
118 | pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); | ||
119 | BUG_ON(pmdp == NULL); | ||
120 | pud_populate(&init_mm, pudp, pmdp); | ||
121 | } | ||
122 | pmdp = pmd_offset(pudp, ea); | ||
123 | if (!pmd_present(*pmdp)) { | ||
124 | ptep = early_alloc_pgtable(PAGE_SIZE); | ||
125 | BUG_ON(ptep == NULL); | ||
126 | pmd_populate_kernel(&init_mm, pmdp, ptep); | ||
127 | } | ||
128 | ptep = pte_offset_kernel(pmdp, ea); | ||
129 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, | ||
130 | __pgprot(flags))); | ||
131 | #else /* CONFIG_PPC_MMU_NOHASH */ | ||
84 | /* | 132 | /* |
85 | * If the mm subsystem is not fully up, we cannot create a | 133 | * If the mm subsystem is not fully up, we cannot create a |
86 | * linux page table entry for this mapping. Simply bolt an | 134 | * linux page table entry for this mapping. Simply bolt an |
@@ -93,6 +141,7 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) | |||
93 | "memory at %016lx !\n", pa); | 141 | "memory at %016lx !\n", pa); |
94 | return -ENOMEM; | 142 | return -ENOMEM; |
95 | } | 143 | } |
144 | #endif /* !CONFIG_PPC_MMU_NOHASH */ | ||
96 | } | 145 | } |
97 | return 0; | 146 | return 0; |
98 | } | 147 | } |
@@ -124,7 +173,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, | |||
124 | WARN_ON(size & ~PAGE_MASK); | 173 | WARN_ON(size & ~PAGE_MASK); |
125 | 174 | ||
126 | for (i = 0; i < size; i += PAGE_SIZE) | 175 | for (i = 0; i < size; i += PAGE_SIZE) |
127 | if (map_io_page((unsigned long)ea+i, pa+i, flags)) | 176 | if (map_kernel_page((unsigned long)ea+i, pa+i, flags)) |
128 | return NULL; | 177 | return NULL; |
129 | 178 | ||
130 | return (void __iomem *)ea; | 179 | return (void __iomem *)ea; |
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..1d98ecc8eecd 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c | |||
@@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, | |||
92 | : "memory" ); | 92 | : "memory" ); |
93 | } | 93 | } |
94 | 94 | ||
95 | void slb_flush_and_rebolt(void) | 95 | static void __slb_flush_and_rebolt(void) |
96 | { | 96 | { |
97 | /* If you change this make sure you change SLB_NUM_BOLTED | 97 | /* If you change this make sure you change SLB_NUM_BOLTED |
98 | * appropriately too. */ | 98 | * appropriately too. */ |
99 | unsigned long linear_llp, vmalloc_llp, lflags, vflags; | 99 | unsigned long linear_llp, vmalloc_llp, lflags, vflags; |
100 | unsigned long ksp_esid_data, ksp_vsid_data; | 100 | unsigned long ksp_esid_data, ksp_vsid_data; |
101 | 101 | ||
102 | WARN_ON(!irqs_disabled()); | ||
103 | |||
104 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; | 102 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
105 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; | 103 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
106 | lflags = SLB_VSID_KERNEL | linear_llp; | 104 | lflags = SLB_VSID_KERNEL | linear_llp; |
@@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) | |||
117 | ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; | 115 | ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; |
118 | } | 116 | } |
119 | 117 | ||
120 | /* | ||
121 | * We can't take a PMU exception in the following code, so hard | ||
122 | * disable interrupts. | ||
123 | */ | ||
124 | hard_irq_disable(); | ||
125 | |||
126 | /* We need to do this all in asm, so we're sure we don't touch | 118 | /* We need to do this all in asm, so we're sure we don't touch |
127 | * the stack between the slbia and rebolting it. */ | 119 | * the stack between the slbia and rebolting it. */ |
128 | asm volatile("isync\n" | 120 | asm volatile("isync\n" |
@@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) | |||
139 | : "memory"); | 131 | : "memory"); |
140 | } | 132 | } |
141 | 133 | ||
134 | void slb_flush_and_rebolt(void) | ||
135 | { | ||
136 | |||
137 | WARN_ON(!irqs_disabled()); | ||
138 | |||
139 | /* | ||
140 | * We can't take a PMU exception in the following code, so hard | ||
141 | * disable interrupts. | ||
142 | */ | ||
143 | hard_irq_disable(); | ||
144 | |||
145 | __slb_flush_and_rebolt(); | ||
146 | get_paca()->slb_cache_ptr = 0; | ||
147 | } | ||
148 | |||
142 | void slb_vmalloc_update(void) | 149 | void slb_vmalloc_update(void) |
143 | { | 150 | { |
144 | unsigned long vflags; | 151 | unsigned long vflags; |
@@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) | |||
180 | /* Flush all user entries from the segment table of the current processor. */ | 187 | /* Flush all user entries from the segment table of the current processor. */ |
181 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) | 188 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) |
182 | { | 189 | { |
183 | unsigned long offset = get_paca()->slb_cache_ptr; | 190 | unsigned long offset; |
184 | unsigned long slbie_data = 0; | 191 | unsigned long slbie_data = 0; |
185 | unsigned long pc = KSTK_EIP(tsk); | 192 | unsigned long pc = KSTK_EIP(tsk); |
186 | unsigned long stack = KSTK_ESP(tsk); | 193 | unsigned long stack = KSTK_ESP(tsk); |
187 | unsigned long unmapped_base; | 194 | unsigned long exec_base; |
188 | 195 | ||
196 | /* | ||
197 | * We need interrupts hard-disabled here, not just soft-disabled, | ||
198 | * so that a PMU interrupt can't occur, which might try to access | ||
199 | * user memory (to get a stack trace) and possible cause an SLB miss | ||
200 | * which would update the slb_cache/slb_cache_ptr fields in the PACA. | ||
201 | */ | ||
202 | hard_irq_disable(); | ||
203 | offset = get_paca()->slb_cache_ptr; | ||
189 | if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && | 204 | if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && |
190 | offset <= SLB_CACHE_ENTRIES) { | 205 | offset <= SLB_CACHE_ENTRIES) { |
191 | int i; | 206 | int i; |
@@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) | |||
200 | } | 215 | } |
201 | asm volatile("isync" : : : "memory"); | 216 | asm volatile("isync" : : : "memory"); |
202 | } else { | 217 | } else { |
203 | slb_flush_and_rebolt(); | 218 | __slb_flush_and_rebolt(); |
204 | } | 219 | } |
205 | 220 | ||
206 | /* Workaround POWER5 < DD2.1 issue */ | 221 | /* Workaround POWER5 < DD2.1 issue */ |
@@ -212,42 +227,44 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) | |||
212 | 227 | ||
213 | /* | 228 | /* |
214 | * preload some userspace segments into the SLB. | 229 | * preload some userspace segments into the SLB. |
230 | * Almost all 32 and 64bit PowerPC executables are linked at | ||
231 | * 0x10000000 so it makes sense to preload this segment. | ||
215 | */ | 232 | */ |
216 | if (test_tsk_thread_flag(tsk, TIF_32BIT)) | 233 | exec_base = 0x10000000; |
217 | unmapped_base = TASK_UNMAPPED_BASE_USER32; | ||
218 | else | ||
219 | unmapped_base = TASK_UNMAPPED_BASE_USER64; | ||
220 | 234 | ||
221 | if (is_kernel_addr(pc)) | 235 | if (is_kernel_addr(pc) || is_kernel_addr(stack) || |
222 | return; | 236 | is_kernel_addr(exec_base)) |
223 | slb_allocate(pc); | ||
224 | |||
225 | if (esids_match(pc,stack)) | ||
226 | return; | 237 | return; |
227 | 238 | ||
228 | if (is_kernel_addr(stack)) | 239 | slb_allocate(pc); |
229 | return; | ||
230 | slb_allocate(stack); | ||
231 | 240 | ||
232 | if (esids_match(pc,unmapped_base) || esids_match(stack,unmapped_base)) | 241 | if (!esids_match(pc, stack)) |
233 | return; | 242 | slb_allocate(stack); |
234 | 243 | ||
235 | if (is_kernel_addr(unmapped_base)) | 244 | if (!esids_match(pc, exec_base) && |
236 | return; | 245 | !esids_match(stack, exec_base)) |
237 | slb_allocate(unmapped_base); | 246 | slb_allocate(exec_base); |
238 | } | 247 | } |
239 | 248 | ||
240 | static inline void patch_slb_encoding(unsigned int *insn_addr, | 249 | static inline void patch_slb_encoding(unsigned int *insn_addr, |
241 | unsigned int immed) | 250 | unsigned int immed) |
242 | { | 251 | { |
243 | /* Assume the instruction had a "0" immediate value, just | 252 | *insn_addr = (*insn_addr & 0xffff0000) | immed; |
244 | * "or" in the new value | ||
245 | */ | ||
246 | *insn_addr |= immed; | ||
247 | flush_icache_range((unsigned long)insn_addr, 4+ | 253 | flush_icache_range((unsigned long)insn_addr, 4+ |
248 | (unsigned long)insn_addr); | 254 | (unsigned long)insn_addr); |
249 | } | 255 | } |
250 | 256 | ||
257 | void slb_set_size(u16 size) | ||
258 | { | ||
259 | extern unsigned int *slb_compare_rr_to_size; | ||
260 | |||
261 | if (mmu_slb_size == size) | ||
262 | return; | ||
263 | |||
264 | mmu_slb_size = size; | ||
265 | patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); | ||
266 | } | ||
267 | |||
251 | void slb_initialize(void) | 268 | void slb_initialize(void) |
252 | { | 269 | { |
253 | unsigned long linear_llp, vmalloc_llp, io_llp; | 270 | unsigned long linear_llp, vmalloc_llp, io_llp; |
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 98cd1dc2ae75..687fddaa24c5 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c | |||
@@ -31,7 +31,7 @@ struct stab_entry { | |||
31 | 31 | ||
32 | #define NR_STAB_CACHE_ENTRIES 8 | 32 | #define NR_STAB_CACHE_ENTRIES 8 |
33 | static DEFINE_PER_CPU(long, stab_cache_ptr); | 33 | static DEFINE_PER_CPU(long, stab_cache_ptr); |
34 | static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); | 34 | static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Create a segment table entry for the given esid/vsid pair. | 37 | * Create a segment table entry for the given esid/vsid pair. |
@@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) | |||
164 | { | 164 | { |
165 | struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; | 165 | struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; |
166 | struct stab_entry *ste; | 166 | struct stab_entry *ste; |
167 | unsigned long offset = __get_cpu_var(stab_cache_ptr); | 167 | unsigned long offset; |
168 | unsigned long pc = KSTK_EIP(tsk); | 168 | unsigned long pc = KSTK_EIP(tsk); |
169 | unsigned long stack = KSTK_ESP(tsk); | 169 | unsigned long stack = KSTK_ESP(tsk); |
170 | unsigned long unmapped_base; | 170 | unsigned long unmapped_base; |
@@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) | |||
172 | /* Force previous translations to complete. DRENG */ | 172 | /* Force previous translations to complete. DRENG */ |
173 | asm volatile("isync" : : : "memory"); | 173 | asm volatile("isync" : : : "memory"); |
174 | 174 | ||
175 | /* | ||
176 | * We need interrupts hard-disabled here, not just soft-disabled, | ||
177 | * so that a PMU interrupt can't occur, which might try to access | ||
178 | * user memory (to get a stack trace) and possible cause an STAB miss | ||
179 | * which would update the stab_cache/stab_cache_ptr per-cpu variables. | ||
180 | */ | ||
181 | hard_irq_disable(); | ||
182 | |||
183 | offset = __get_cpu_var(stab_cache_ptr); | ||
175 | if (offset <= NR_STAB_CACHE_ENTRIES) { | 184 | if (offset <= NR_STAB_CACHE_ENTRIES) { |
176 | int i; | 185 | int i; |
177 | 186 | ||
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 65190587a365..8aaa8b7eb324 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c | |||
@@ -71,6 +71,9 @@ void tlb_flush(struct mmu_gather *tlb) | |||
71 | */ | 71 | */ |
72 | _tlbia(); | 72 | _tlbia(); |
73 | } | 73 | } |
74 | |||
75 | /* Push out batch of freed page tables */ | ||
76 | pte_free_finish(); | ||
74 | } | 77 | } |
75 | 78 | ||
76 | /* | 79 | /* |
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 937eb90677d9..2b2f35f6985e 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
@@ -33,11 +33,6 @@ | |||
33 | 33 | ||
34 | DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); | 34 | DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); |
35 | 35 | ||
36 | /* This is declared as we are using the more or less generic | ||
37 | * arch/powerpc/include/asm/tlb.h file -- tgall | ||
38 | */ | ||
39 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
40 | |||
41 | /* | 36 | /* |
42 | * A linux PTE was changed and the corresponding hash table entry | 37 | * A linux PTE was changed and the corresponding hash table entry |
43 | * neesd to be flushed. This function will either perform the flush | 38 | * neesd to be flushed. This function will either perform the flush |
@@ -154,6 +149,21 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) | |||
154 | batch->index = 0; | 149 | batch->index = 0; |
155 | } | 150 | } |
156 | 151 | ||
152 | void tlb_flush(struct mmu_gather *tlb) | ||
153 | { | ||
154 | struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch); | ||
155 | |||
156 | /* If there's a TLB batch pending, then we must flush it because the | ||
157 | * pages are going to be freed and we really don't want to have a CPU | ||
158 | * access a freed page because it has a stale TLB | ||
159 | */ | ||
160 | if (tlbbatch->index) | ||
161 | __flush_tlb_pending(tlbbatch); | ||
162 | |||
163 | /* Push out batch of freed page tables */ | ||
164 | pte_free_finish(); | ||
165 | } | ||
166 | |||
157 | /** | 167 | /** |
158 | * __flush_hash_table_range - Flush all HPTEs for a given address range | 168 | * __flush_hash_table_range - Flush all HPTEs for a given address range |
159 | * from the hash table (and the TLB). But keeps | 169 | * from the hash table (and the TLB). But keeps |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S new file mode 100644 index 000000000000..ef1cccf71173 --- /dev/null +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
@@ -0,0 +1,770 @@ | |||
1 | /* | ||
2 | * Low leve TLB miss handlers for Book3E | ||
3 | * | ||
4 | * Copyright (C) 2008-2009 | ||
5 | * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/page.h> | ||
16 | #include <asm/mmu.h> | ||
17 | #include <asm/ppc_asm.h> | ||
18 | #include <asm/asm-offsets.h> | ||
19 | #include <asm/cputable.h> | ||
20 | #include <asm/pgtable.h> | ||
21 | #include <asm/reg.h> | ||
22 | #include <asm/exception-64e.h> | ||
23 | #include <asm/ppc-opcode.h> | ||
24 | |||
25 | #ifdef CONFIG_PPC_64K_PAGES | ||
26 | #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) | ||
27 | #else | ||
28 | #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) | ||
29 | #endif | ||
30 | #define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) | ||
31 | #define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) | ||
32 | #define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) | ||
33 | |||
34 | |||
35 | /********************************************************************** | ||
36 | * * | ||
37 | * TLB miss handling for Book3E with TLB reservation and HES support * | ||
38 | * * | ||
39 | **********************************************************************/ | ||
40 | |||
41 | |||
42 | /* Data TLB miss */ | ||
43 | START_EXCEPTION(data_tlb_miss) | ||
44 | TLB_MISS_PROLOG | ||
45 | |||
46 | /* Now we handle the fault proper. We only save DEAR in normal | ||
47 | * fault case since that's the only interesting values here. | ||
48 | * We could probably also optimize by not saving SRR0/1 in the | ||
49 | * linear mapping case but I'll leave that for later | ||
50 | */ | ||
51 | mfspr r14,SPRN_ESR | ||
52 | mfspr r16,SPRN_DEAR /* get faulting address */ | ||
53 | srdi r15,r16,60 /* get region */ | ||
54 | cmpldi cr0,r15,0xc /* linear mapping ? */ | ||
55 | TLB_MISS_STATS_SAVE_INFO | ||
56 | beq tlb_load_linear /* yes -> go to linear map load */ | ||
57 | |||
58 | /* The page tables are mapped virtually linear. At this point, though, | ||
59 | * we don't know whether we are trying to fault in a first level | ||
60 | * virtual address or a virtual page table address. We can get that | ||
61 | * from bit 0x1 of the region ID which we have set for a page table | ||
62 | */ | ||
63 | andi. r10,r15,0x1 | ||
64 | bne- virt_page_table_tlb_miss | ||
65 | |||
66 | std r14,EX_TLB_ESR(r12); /* save ESR */ | ||
67 | std r16,EX_TLB_DEAR(r12); /* save DEAR */ | ||
68 | |||
69 | /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ | ||
70 | li r11,_PAGE_PRESENT | ||
71 | oris r11,r11,_PAGE_ACCESSED@h | ||
72 | |||
73 | /* We do the user/kernel test for the PID here along with the RW test | ||
74 | */ | ||
75 | cmpldi cr0,r15,0 /* Check for user region */ | ||
76 | |||
77 | /* We pre-test some combination of permissions to avoid double | ||
78 | * faults: | ||
79 | * | ||
80 | * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE | ||
81 | * ESR_ST is 0x00800000 | ||
82 | * _PAGE_BAP_SW is 0x00000010 | ||
83 | * So the shift is >> 19. This tests for supervisor writeability. | ||
84 | * If the page happens to be supervisor writeable and not user | ||
85 | * writeable, we will take a new fault later, but that should be | ||
86 | * a rare enough case. | ||
87 | * | ||
88 | * We also move ESR_ST in _PAGE_DIRTY position | ||
89 | * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 | ||
90 | * | ||
91 | * MAS1 is preset for all we need except for TID that needs to | ||
92 | * be cleared for kernel translations | ||
93 | */ | ||
94 | rlwimi r11,r14,32-19,27,27 | ||
95 | rlwimi r11,r14,32-16,19,19 | ||
96 | beq normal_tlb_miss | ||
97 | /* XXX replace the RMW cycles with immediate loads + writes */ | ||
98 | 1: mfspr r10,SPRN_MAS1 | ||
99 | cmpldi cr0,r15,8 /* Check for vmalloc region */ | ||
100 | rlwinm r10,r10,0,16,1 /* Clear TID */ | ||
101 | mtspr SPRN_MAS1,r10 | ||
102 | beq+ normal_tlb_miss | ||
103 | |||
104 | /* We got a crappy address, just fault with whatever DEAR and ESR | ||
105 | * are here | ||
106 | */ | ||
107 | TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) | ||
108 | TLB_MISS_EPILOG_ERROR | ||
109 | b exc_data_storage_book3e | ||
110 | |||
111 | /* Instruction TLB miss */ | ||
112 | START_EXCEPTION(instruction_tlb_miss) | ||
113 | TLB_MISS_PROLOG | ||
114 | |||
115 | /* If we take a recursive fault, the second level handler may need | ||
116 | * to know whether we are handling a data or instruction fault in | ||
117 | * order to get to the right store fault handler. We provide that | ||
118 | * info by writing a crazy value in ESR in our exception frame | ||
119 | */ | ||
120 | li r14,-1 /* store to exception frame is done later */ | ||
121 | |||
122 | /* Now we handle the fault proper. We only save DEAR in the non | ||
123 | * linear mapping case since we know the linear mapping case will | ||
124 | * not re-enter. We could indeed optimize and also not save SRR0/1 | ||
125 | * in the linear mapping case but I'll leave that for later | ||
126 | * | ||
127 | * Faulting address is SRR0 which is already in r16 | ||
128 | */ | ||
129 | srdi r15,r16,60 /* get region */ | ||
130 | cmpldi cr0,r15,0xc /* linear mapping ? */ | ||
131 | TLB_MISS_STATS_SAVE_INFO | ||
132 | beq tlb_load_linear /* yes -> go to linear map load */ | ||
133 | |||
134 | /* We do the user/kernel test for the PID here along with the RW test | ||
135 | */ | ||
136 | li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ | ||
137 | oris r11,r11,_PAGE_ACCESSED@h | ||
138 | |||
139 | cmpldi cr0,r15,0 /* Check for user region */ | ||
140 | std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ | ||
141 | beq normal_tlb_miss | ||
142 | /* XXX replace the RMW cycles with immediate loads + writes */ | ||
143 | 1: mfspr r10,SPRN_MAS1 | ||
144 | cmpldi cr0,r15,8 /* Check for vmalloc region */ | ||
145 | rlwinm r10,r10,0,16,1 /* Clear TID */ | ||
146 | mtspr SPRN_MAS1,r10 | ||
147 | beq+ normal_tlb_miss | ||
148 | |||
149 | /* We got a crappy address, just fault */ | ||
150 | TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) | ||
151 | TLB_MISS_EPILOG_ERROR | ||
152 | b exc_instruction_storage_book3e | ||
153 | |||
154 | /* | ||
155 | * This is the guts of the first-level TLB miss handler for direct | ||
156 | * misses. We are entered with: | ||
157 | * | ||
158 | * r16 = faulting address | ||
159 | * r15 = region ID | ||
160 | * r14 = crap (free to use) | ||
161 | * r13 = PACA | ||
162 | * r12 = TLB exception frame in PACA | ||
163 | * r11 = PTE permission mask | ||
164 | * r10 = crap (free to use) | ||
165 | */ | ||
166 | normal_tlb_miss: | ||
167 | /* So we first construct the page table address. We do that by | ||
168 | * shifting the bottom of the address (not the region ID) by | ||
169 | * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and | ||
170 | * or'ing the fourth high bit. | ||
171 | * | ||
172 | * NOTE: For 64K pages, we do things slightly differently in | ||
173 | * order to handle the weird page table format used by linux | ||
174 | */ | ||
175 | ori r10,r15,0x1 | ||
176 | #ifdef CONFIG_PPC_64K_PAGES | ||
177 | /* For the top bits, 16 bytes per PTE */ | ||
178 | rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4 | ||
179 | /* Now create the bottom bits as 0 in position 0x8000 and | ||
180 | * the rest calculated for 8 bytes per PTE | ||
181 | */ | ||
182 | rldicl r15,r16,64-(PAGE_SHIFT-3),64-15 | ||
183 | /* Insert the bottom bits in */ | ||
184 | rlwimi r14,r15,0,16,31 | ||
185 | #else | ||
186 | rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 | ||
187 | #endif | ||
188 | sldi r15,r10,60 | ||
189 | clrrdi r14,r14,3 | ||
190 | or r10,r15,r14 | ||
191 | |||
192 | BEGIN_MMU_FTR_SECTION | ||
193 | /* Set the TLB reservation and seach for existing entry. Then load | ||
194 | * the entry. | ||
195 | */ | ||
196 | PPC_TLBSRX_DOT(0,r16) | ||
197 | ld r14,0(r10) | ||
198 | beq normal_tlb_miss_done | ||
199 | MMU_FTR_SECTION_ELSE | ||
200 | ld r14,0(r10) | ||
201 | ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) | ||
202 | |||
203 | finish_normal_tlb_miss: | ||
204 | /* Check if required permissions are met */ | ||
205 | andc. r15,r11,r14 | ||
206 | bne- normal_tlb_miss_access_fault | ||
207 | |||
208 | /* Now we build the MAS: | ||
209 | * | ||
210 | * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG | ||
211 | * MAS 1 : Almost fully setup | ||
212 | * - PID already updated by caller if necessary | ||
213 | * - TSIZE need change if !base page size, not | ||
214 | * yet implemented for now | ||
215 | * MAS 2 : Defaults not useful, need to be redone | ||
216 | * MAS 3+7 : Needs to be done | ||
217 | * | ||
218 | * TODO: mix up code below for better scheduling | ||
219 | */ | ||
220 | clrrdi r11,r16,12 /* Clear low crap in EA */ | ||
221 | rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ | ||
222 | mtspr SPRN_MAS2,r11 | ||
223 | |||
224 | /* Check page size, if not standard, update MAS1 */ | ||
225 | rldicl r11,r14,64-8,64-8 | ||
226 | #ifdef CONFIG_PPC_64K_PAGES | ||
227 | cmpldi cr0,r11,BOOK3E_PAGESZ_64K | ||
228 | #else | ||
229 | cmpldi cr0,r11,BOOK3E_PAGESZ_4K | ||
230 | #endif | ||
231 | beq- 1f | ||
232 | mfspr r11,SPRN_MAS1 | ||
233 | rlwimi r11,r14,31,21,24 | ||
234 | rlwinm r11,r11,0,21,19 | ||
235 | mtspr SPRN_MAS1,r11 | ||
236 | 1: | ||
237 | /* Move RPN in position */ | ||
238 | rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT | ||
239 | clrldi r15,r11,12 /* Clear crap at the top */ | ||
240 | rlwimi r15,r14,32-8,22,25 /* Move in U bits */ | ||
241 | rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ | ||
242 | |||
243 | /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ | ||
244 | andi. r11,r14,_PAGE_DIRTY | ||
245 | bne 1f | ||
246 | li r11,MAS3_SW|MAS3_UW | ||
247 | andc r15,r15,r11 | ||
248 | 1: | ||
249 | BEGIN_MMU_FTR_SECTION | ||
250 | srdi r16,r15,32 | ||
251 | mtspr SPRN_MAS3,r15 | ||
252 | mtspr SPRN_MAS7,r16 | ||
253 | MMU_FTR_SECTION_ELSE | ||
254 | mtspr SPRN_MAS7_MAS3,r15 | ||
255 | ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) | ||
256 | |||
257 | tlbwe | ||
258 | |||
259 | normal_tlb_miss_done: | ||
260 | /* We don't bother with restoring DEAR or ESR since we know we are | ||
261 | * level 0 and just going back to userland. They are only needed | ||
262 | * if you are going to take an access fault | ||
263 | */ | ||
264 | TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) | ||
265 | TLB_MISS_EPILOG_SUCCESS | ||
266 | rfi | ||
267 | |||
268 | normal_tlb_miss_access_fault: | ||
269 | /* We need to check if it was an instruction miss */ | ||
270 | andi. r10,r11,_PAGE_EXEC | ||
271 | bne 1f | ||
272 | ld r14,EX_TLB_DEAR(r12) | ||
273 | ld r15,EX_TLB_ESR(r12) | ||
274 | mtspr SPRN_DEAR,r14 | ||
275 | mtspr SPRN_ESR,r15 | ||
276 | TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) | ||
277 | TLB_MISS_EPILOG_ERROR | ||
278 | b exc_data_storage_book3e | ||
279 | 1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) | ||
280 | TLB_MISS_EPILOG_ERROR | ||
281 | b exc_instruction_storage_book3e | ||
282 | |||
283 | |||
284 | /* | ||
285 | * This is the guts of the second-level TLB miss handler for direct | ||
286 | * misses. We are entered with: | ||
287 | * | ||
288 | * r16 = virtual page table faulting address | ||
289 | * r15 = region (top 4 bits of address) | ||
290 | * r14 = crap (free to use) | ||
291 | * r13 = PACA | ||
292 | * r12 = TLB exception frame in PACA | ||
293 | * r11 = crap (free to use) | ||
294 | * r10 = crap (free to use) | ||
295 | * | ||
296 | * Note that this should only ever be called as a second level handler | ||
297 | * with the current scheme when using SW load. | ||
298 | * That means we can always get the original fault DEAR at | ||
299 | * EX_TLB_DEAR-EX_TLB_SIZE(r12) | ||
300 | * | ||
301 | * It can be re-entered by the linear mapping miss handler. However, to | ||
302 | * avoid too much complication, it will restart the whole fault at level | ||
303 | * 0 so we don't care too much about clobbers | ||
304 | * | ||
305 | * XXX That code was written back when we couldn't clobber r14. We can now, | ||
306 | * so we could probably optimize things a bit | ||
307 | */ | ||
308 | virt_page_table_tlb_miss: | ||
309 | /* Are we hitting a kernel page table ? */ | ||
310 | andi. r10,r15,0x8 | ||
311 | |||
312 | /* The cool thing now is that r10 contains 0 for user and 8 for kernel, | ||
313 | * and we happen to have the swapper_pg_dir at offset 8 from the user | ||
314 | * pgdir in the PACA :-). | ||
315 | */ | ||
316 | add r11,r10,r13 | ||
317 | |||
318 | /* If kernel, we need to clear MAS1 TID */ | ||
319 | beq 1f | ||
320 | /* XXX replace the RMW cycles with immediate loads + writes */ | ||
321 | mfspr r10,SPRN_MAS1 | ||
322 | rlwinm r10,r10,0,16,1 /* Clear TID */ | ||
323 | mtspr SPRN_MAS1,r10 | ||
324 | 1: | ||
325 | BEGIN_MMU_FTR_SECTION | ||
326 | /* Search if we already have a TLB entry for that virtual address, and | ||
327 | * if we do, bail out. | ||
328 | */ | ||
329 | PPC_TLBSRX_DOT(0,r16) | ||
330 | beq virt_page_table_tlb_miss_done | ||
331 | END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) | ||
332 | |||
333 | /* Now, we need to walk the page tables. First check if we are in | ||
334 | * range. | ||
335 | */ | ||
336 | rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 | ||
337 | bne- virt_page_table_tlb_miss_fault | ||
338 | |||
339 | /* Get the PGD pointer */ | ||
340 | ld r15,PACAPGD(r11) | ||
341 | cmpldi cr0,r15,0 | ||
342 | beq- virt_page_table_tlb_miss_fault | ||
343 | |||
344 | /* Get to PGD entry */ | ||
345 | rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 | ||
346 | clrrdi r10,r11,3 | ||
347 | ldx r15,r10,r15 | ||
348 | cmpldi cr0,r15,0 | ||
349 | beq virt_page_table_tlb_miss_fault | ||
350 | |||
351 | #ifndef CONFIG_PPC_64K_PAGES | ||
352 | /* Get to PUD entry */ | ||
353 | rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 | ||
354 | clrrdi r10,r11,3 | ||
355 | ldx r15,r10,r15 | ||
356 | cmpldi cr0,r15,0 | ||
357 | beq virt_page_table_tlb_miss_fault | ||
358 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
359 | |||
360 | /* Get to PMD entry */ | ||
361 | rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 | ||
362 | clrrdi r10,r11,3 | ||
363 | ldx r15,r10,r15 | ||
364 | cmpldi cr0,r15,0 | ||
365 | beq virt_page_table_tlb_miss_fault | ||
366 | |||
367 | /* Ok, we're all right, we can now create a kernel translation for | ||
368 | * a 4K or 64K page from r16 -> r15. | ||
369 | */ | ||
370 | /* Now we build the MAS: | ||
371 | * | ||
372 | * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG | ||
373 | * MAS 1 : Almost fully setup | ||
374 | * - PID already updated by caller if necessary | ||
375 | * - TSIZE for now is base page size always | ||
376 | * MAS 2 : Use defaults | ||
377 | * MAS 3+7 : Needs to be done | ||
378 | * | ||
379 | * So we only do MAS 2 and 3 for now... | ||
380 | */ | ||
381 | clrldi r11,r15,4 /* remove region ID from RPN */ | ||
382 | ori r10,r11,1 /* Or-in SR */ | ||
383 | |||
384 | BEGIN_MMU_FTR_SECTION | ||
385 | srdi r16,r10,32 | ||
386 | mtspr SPRN_MAS3,r10 | ||
387 | mtspr SPRN_MAS7,r16 | ||
388 | MMU_FTR_SECTION_ELSE | ||
389 | mtspr SPRN_MAS7_MAS3,r10 | ||
390 | ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) | ||
391 | |||
392 | tlbwe | ||
393 | |||
394 | BEGIN_MMU_FTR_SECTION | ||
395 | virt_page_table_tlb_miss_done: | ||
396 | |||
397 | /* We have overriden MAS2:EPN but currently our primary TLB miss | ||
398 | * handler will always restore it so that should not be an issue, | ||
399 | * if we ever optimize the primary handler to not write MAS2 on | ||
400 | * some cases, we'll have to restore MAS2:EPN here based on the | ||
401 | * original fault's DEAR. If we do that we have to modify the | ||
402 | * ITLB miss handler to also store SRR0 in the exception frame | ||
403 | * as DEAR. | ||
404 | * | ||
405 | * However, one nasty thing we did is we cleared the reservation | ||
406 | * (well, potentially we did). We do a trick here thus if we | ||
407 | * are not a level 0 exception (we interrupted the TLB miss) we | ||
408 | * offset the return address by -4 in order to replay the tlbsrx | ||
409 | * instruction there | ||
410 | */ | ||
411 | subf r10,r13,r12 | ||
412 | cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE | ||
413 | bne- 1f | ||
414 | ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) | ||
415 | addi r10,r11,-4 | ||
416 | std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) | ||
417 | 1: | ||
418 | END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) | ||
419 | /* Return to caller, normal case */ | ||
420 | TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); | ||
421 | TLB_MISS_EPILOG_SUCCESS | ||
422 | rfi | ||
423 | |||
424 | virt_page_table_tlb_miss_fault: | ||
425 | /* If we fault here, things are a little bit tricky. We need to call | ||
426 | * either data or instruction store fault, and we need to retreive | ||
427 | * the original fault address and ESR (for data). | ||
428 | * | ||
429 | * The thing is, we know that in normal circumstances, this is | ||
430 | * always called as a second level tlb miss for SW load or as a first | ||
431 | * level TLB miss for HW load, so we should be able to peek at the | ||
432 | * relevant informations in the first exception frame in the PACA. | ||
433 | * | ||
434 | * However, we do need to double check that, because we may just hit | ||
435 | * a stray kernel pointer or a userland attack trying to hit those | ||
436 | * areas. If that is the case, we do a data fault. (We can't get here | ||
437 | * from an instruction tlb miss anyway). | ||
438 | * | ||
439 | * Note also that when going to a fault, we must unwind the previous | ||
440 | * level as well. Since we are doing that, we don't need to clear or | ||
441 | * restore the TLB reservation neither. | ||
442 | */ | ||
443 | subf r10,r13,r12 | ||
444 | cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE | ||
445 | bne- virt_page_table_tlb_miss_whacko_fault | ||
446 | |||
447 | /* We dig the original DEAR and ESR from slot 0 */ | ||
448 | ld r15,EX_TLB_DEAR+PACA_EXTLB(r13) | ||
449 | ld r16,EX_TLB_ESR+PACA_EXTLB(r13) | ||
450 | |||
451 | /* We check for the "special" ESR value for instruction faults */ | ||
452 | cmpdi cr0,r16,-1 | ||
453 | beq 1f | ||
454 | mtspr SPRN_DEAR,r15 | ||
455 | mtspr SPRN_ESR,r16 | ||
456 | TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); | ||
457 | TLB_MISS_EPILOG_ERROR | ||
458 | b exc_data_storage_book3e | ||
459 | 1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); | ||
460 | TLB_MISS_EPILOG_ERROR | ||
461 | b exc_instruction_storage_book3e | ||
462 | |||
463 | virt_page_table_tlb_miss_whacko_fault: | ||
464 | /* The linear fault will restart everything so ESR and DEAR will | ||
465 | * not have been clobbered, let's just fault with what we have | ||
466 | */ | ||
467 | TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); | ||
468 | TLB_MISS_EPILOG_ERROR | ||
469 | b exc_data_storage_book3e | ||
470 | |||
471 | |||
472 | /************************************************************** | ||
473 | * * | ||
474 | * TLB miss handling for Book3E with hw page table support * | ||
475 | * * | ||
476 | **************************************************************/ | ||
477 | |||
478 | |||
479 | /* Data TLB miss */ | ||
480 | START_EXCEPTION(data_tlb_miss_htw) | ||
481 | TLB_MISS_PROLOG | ||
482 | |||
483 | /* Now we handle the fault proper. We only save DEAR in normal | ||
484 | * fault case since that's the only interesting values here. | ||
485 | * We could probably also optimize by not saving SRR0/1 in the | ||
486 | * linear mapping case but I'll leave that for later | ||
487 | */ | ||
488 | mfspr r14,SPRN_ESR | ||
489 | mfspr r16,SPRN_DEAR /* get faulting address */ | ||
490 | srdi r11,r16,60 /* get region */ | ||
491 | cmpldi cr0,r11,0xc /* linear mapping ? */ | ||
492 | TLB_MISS_STATS_SAVE_INFO | ||
493 | beq tlb_load_linear /* yes -> go to linear map load */ | ||
494 | |||
495 | /* We do the user/kernel test for the PID here along with the RW test | ||
496 | */ | ||
497 | cmpldi cr0,r11,0 /* Check for user region */ | ||
498 | ld r15,PACAPGD(r13) /* Load user pgdir */ | ||
499 | beq htw_tlb_miss | ||
500 | |||
501 | /* XXX replace the RMW cycles with immediate loads + writes */ | ||
502 | 1: mfspr r10,SPRN_MAS1 | ||
503 | cmpldi cr0,r11,8 /* Check for vmalloc region */ | ||
504 | rlwinm r10,r10,0,16,1 /* Clear TID */ | ||
505 | mtspr SPRN_MAS1,r10 | ||
506 | ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ | ||
507 | beq+ htw_tlb_miss | ||
508 | |||
509 | /* We got a crappy address, just fault with whatever DEAR and ESR | ||
510 | * are here | ||
511 | */ | ||
512 | TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) | ||
513 | TLB_MISS_EPILOG_ERROR | ||
514 | b exc_data_storage_book3e | ||
515 | |||
516 | /* Instruction TLB miss */ | ||
517 | START_EXCEPTION(instruction_tlb_miss_htw) | ||
518 | TLB_MISS_PROLOG | ||
519 | |||
520 | /* If we take a recursive fault, the second level handler may need | ||
521 | * to know whether we are handling a data or instruction fault in | ||
522 | * order to get to the right store fault handler. We provide that | ||
523 | * info by keeping a crazy value for ESR in r14 | ||
524 | */ | ||
525 | li r14,-1 /* store to exception frame is done later */ | ||
526 | |||
527 | /* Now we handle the fault proper. We only save DEAR in the non | ||
528 | * linear mapping case since we know the linear mapping case will | ||
529 | * not re-enter. We could indeed optimize and also not save SRR0/1 | ||
530 | * in the linear mapping case but I'll leave that for later | ||
531 | * | ||
532 | * Faulting address is SRR0 which is already in r16 | ||
533 | */ | ||
534 | srdi r11,r16,60 /* get region */ | ||
535 | cmpldi cr0,r11,0xc /* linear mapping ? */ | ||
536 | TLB_MISS_STATS_SAVE_INFO | ||
537 | beq tlb_load_linear /* yes -> go to linear map load */ | ||
538 | |||
539 | /* We do the user/kernel test for the PID here along with the RW test | ||
540 | */ | ||
541 | cmpldi cr0,r11,0 /* Check for user region */ | ||
542 | ld r15,PACAPGD(r13) /* Load user pgdir */ | ||
543 | beq htw_tlb_miss | ||
544 | |||
545 | /* XXX replace the RMW cycles with immediate loads + writes */ | ||
546 | 1: mfspr r10,SPRN_MAS1 | ||
547 | cmpldi cr0,r11,8 /* Check for vmalloc region */ | ||
548 | rlwinm r10,r10,0,16,1 /* Clear TID */ | ||
549 | mtspr SPRN_MAS1,r10 | ||
550 | ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ | ||
551 | beq+ htw_tlb_miss | ||
552 | |||
553 | /* We got a crappy address, just fault */ | ||
554 | TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) | ||
555 | TLB_MISS_EPILOG_ERROR | ||
556 | b exc_instruction_storage_book3e | ||
557 | |||
558 | |||
559 | /* | ||
560 | * This is the guts of the second-level TLB miss handler for direct | ||
561 | * misses. We are entered with: | ||
562 | * | ||
563 | * r16 = virtual page table faulting address | ||
564 | * r15 = PGD pointer | ||
565 | * r14 = ESR | ||
566 | * r13 = PACA | ||
567 | * r12 = TLB exception frame in PACA | ||
568 | * r11 = crap (free to use) | ||
569 | * r10 = crap (free to use) | ||
570 | * | ||
571 | * It can be re-entered by the linear mapping miss handler. However, to | ||
572 | * avoid too much complication, it will save/restore things for us | ||
573 | */ | ||
574 | htw_tlb_miss: | ||
575 | /* Search if we already have a TLB entry for that virtual address, and | ||
576 | * if we do, bail out. | ||
577 | * | ||
578 | * MAS1:IND should be already set based on MAS4 | ||
579 | */ | ||
580 | PPC_TLBSRX_DOT(0,r16) | ||
581 | beq htw_tlb_miss_done | ||
582 | |||
583 | /* Now, we need to walk the page tables. First check if we are in | ||
584 | * range. | ||
585 | */ | ||
586 | rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 | ||
587 | bne- htw_tlb_miss_fault | ||
588 | |||
589 | /* Get the PGD pointer */ | ||
590 | cmpldi cr0,r15,0 | ||
591 | beq- htw_tlb_miss_fault | ||
592 | |||
593 | /* Get to PGD entry */ | ||
594 | rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 | ||
595 | clrrdi r10,r11,3 | ||
596 | ldx r15,r10,r15 | ||
597 | cmpldi cr0,r15,0 | ||
598 | beq htw_tlb_miss_fault | ||
599 | |||
600 | #ifndef CONFIG_PPC_64K_PAGES | ||
601 | /* Get to PUD entry */ | ||
602 | rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 | ||
603 | clrrdi r10,r11,3 | ||
604 | ldx r15,r10,r15 | ||
605 | cmpldi cr0,r15,0 | ||
606 | beq htw_tlb_miss_fault | ||
607 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
608 | |||
609 | /* Get to PMD entry */ | ||
610 | rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 | ||
611 | clrrdi r10,r11,3 | ||
612 | ldx r15,r10,r15 | ||
613 | cmpldi cr0,r15,0 | ||
614 | beq htw_tlb_miss_fault | ||
615 | |||
616 | /* Ok, we're all right, we can now create an indirect entry for | ||
617 | * a 1M or 256M page. | ||
618 | * | ||
619 | * The last trick is now that because we use "half" pages for | ||
620 | * the HTW (1M IND is 2K and 256M IND is 32K) we need to account | ||
621 | * for an added LSB bit to the RPN. For 64K pages, there is no | ||
622 | * problem as we already use 32K arrays (half PTE pages), but for | ||
623 | * 4K page we need to extract a bit from the virtual address and | ||
624 | * insert it into the "PA52" bit of the RPN. | ||
625 | */ | ||
626 | #ifndef CONFIG_PPC_64K_PAGES | ||
627 | rlwimi r15,r16,32-9,20,20 | ||
628 | #endif | ||
629 | /* Now we build the MAS: | ||
630 | * | ||
631 | * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG | ||
632 | * MAS 1 : Almost fully setup | ||
633 | * - PID already updated by caller if necessary | ||
634 | * - TSIZE for now is base ind page size always | ||
635 | * MAS 2 : Use defaults | ||
636 | * MAS 3+7 : Needs to be done | ||
637 | */ | ||
638 | #ifdef CONFIG_PPC_64K_PAGES | ||
639 | ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT) | ||
640 | #else | ||
641 | ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) | ||
642 | #endif | ||
643 | |||
644 | BEGIN_MMU_FTR_SECTION | ||
645 | srdi r16,r10,32 | ||
646 | mtspr SPRN_MAS3,r10 | ||
647 | mtspr SPRN_MAS7,r16 | ||
648 | MMU_FTR_SECTION_ELSE | ||
649 | mtspr SPRN_MAS7_MAS3,r10 | ||
650 | ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) | ||
651 | |||
652 | tlbwe | ||
653 | |||
654 | htw_tlb_miss_done: | ||
655 | /* We don't bother with restoring DEAR or ESR since we know we are | ||
656 | * level 0 and just going back to userland. They are only needed | ||
657 | * if you are going to take an access fault | ||
658 | */ | ||
659 | TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) | ||
660 | TLB_MISS_EPILOG_SUCCESS | ||
661 | rfi | ||
662 | |||
663 | htw_tlb_miss_fault: | ||
664 | /* We need to check if it was an instruction miss. We know this | ||
665 | * though because r14 would contain -1 | ||
666 | */ | ||
667 | cmpdi cr0,r14,-1 | ||
668 | beq 1f | ||
669 | mtspr SPRN_DEAR,r16 | ||
670 | mtspr SPRN_ESR,r14 | ||
671 | TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) | ||
672 | TLB_MISS_EPILOG_ERROR | ||
673 | b exc_data_storage_book3e | ||
674 | 1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) | ||
675 | TLB_MISS_EPILOG_ERROR | ||
676 | b exc_instruction_storage_book3e | ||
677 | |||
678 | /* | ||
679 | * This is the guts of "any" level TLB miss handler for kernel linear | ||
680 | * mapping misses. We are entered with: | ||
681 | * | ||
682 | * | ||
683 | * r16 = faulting address | ||
684 | * r15 = crap (free to use) | ||
685 | * r14 = ESR (data) or -1 (instruction) | ||
686 | * r13 = PACA | ||
687 | * r12 = TLB exception frame in PACA | ||
688 | * r11 = crap (free to use) | ||
689 | * r10 = crap (free to use) | ||
690 | * | ||
691 | * In addition we know that we will not re-enter, so in theory, we could | ||
692 | * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later. | ||
693 | * | ||
694 | * We also need to be careful about MAS registers here & TLB reservation, | ||
695 | * as we know we'll have clobbered them if we interrupt the main TLB miss | ||
696 | * handlers in which case we probably want to do a full restart at level | ||
697 | * 0 rather than saving / restoring the MAS. | ||
698 | * | ||
699 | * Note: If we care about performance of that core, we can easily shuffle | ||
700 | * a few things around | ||
701 | */ | ||
702 | tlb_load_linear: | ||
703 | /* For now, we assume the linear mapping is contiguous and stops at | ||
704 | * linear_map_top. We also assume the size is a multiple of 1G, thus | ||
705 | * we only use 1G pages for now. That might have to be changed in a | ||
706 | * final implementation, especially when dealing with hypervisors | ||
707 | */ | ||
708 | ld r11,PACATOC(r13) | ||
709 | ld r11,linear_map_top@got(r11) | ||
710 | ld r10,0(r11) | ||
711 | cmpld cr0,r10,r16 | ||
712 | bge tlb_load_linear_fault | ||
713 | |||
714 | /* MAS1 need whole new setup. */ | ||
715 | li r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT) | ||
716 | oris r15,r15,MAS1_VALID@h /* MAS1 needs V and TSIZE */ | ||
717 | mtspr SPRN_MAS1,r15 | ||
718 | |||
719 | /* Already somebody there ? */ | ||
720 | PPC_TLBSRX_DOT(0,r16) | ||
721 | beq tlb_load_linear_done | ||
722 | |||
723 | /* Now we build the remaining MAS. MAS0 and 2 should be fine | ||
724 | * with their defaults, which leaves us with MAS 3 and 7. The | ||
725 | * mapping is linear, so we just take the address, clear the | ||
726 | * region bits, and or in the permission bits which are currently | ||
727 | * hard wired | ||
728 | */ | ||
729 | clrrdi r10,r16,30 /* 1G page index */ | ||
730 | clrldi r10,r10,4 /* clear region bits */ | ||
731 | ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX | ||
732 | |||
733 | BEGIN_MMU_FTR_SECTION | ||
734 | srdi r16,r10,32 | ||
735 | mtspr SPRN_MAS3,r10 | ||
736 | mtspr SPRN_MAS7,r16 | ||
737 | MMU_FTR_SECTION_ELSE | ||
738 | mtspr SPRN_MAS7_MAS3,r10 | ||
739 | ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) | ||
740 | |||
741 | tlbwe | ||
742 | |||
743 | tlb_load_linear_done: | ||
744 | /* We use the "error" epilog for success as we do want to | ||
745 | * restore to the initial faulting context, whatever it was. | ||
746 | * We do that because we can't resume a fault within a TLB | ||
747 | * miss handler, due to MAS and TLB reservation being clobbered. | ||
748 | */ | ||
749 | TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR) | ||
750 | TLB_MISS_EPILOG_ERROR | ||
751 | rfi | ||
752 | |||
753 | tlb_load_linear_fault: | ||
754 | /* We keep the DEAR and ESR around, this shouldn't have happened */ | ||
755 | cmpdi cr0,r14,-1 | ||
756 | beq 1f | ||
757 | TLB_MISS_EPILOG_ERROR_SPECIAL | ||
758 | b exc_data_storage_book3e | ||
759 | 1: TLB_MISS_EPILOG_ERROR_SPECIAL | ||
760 | b exc_instruction_storage_book3e | ||
761 | |||
762 | |||
763 | #ifdef CONFIG_BOOK3E_MMU_TLB_STATS | ||
764 | .tlb_stat_inc: | ||
765 | 1: ldarx r8,0,r9 | ||
766 | addi r8,r8,1 | ||
767 | stdcx. r8,0,r9 | ||
768 | bne- 1b | ||
769 | blr | ||
770 | #endif | ||
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ad2eb4d34dd4..2fbc680c2c71 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -7,8 +7,8 @@ | |||
7 | * | 7 | * |
8 | * -- BenH | 8 | * -- BenH |
9 | * | 9 | * |
10 | * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org> | 10 | * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org> |
11 | * IBM Corp. | 11 | * IBM Corp. |
12 | * | 12 | * |
13 | * Derived from arch/ppc/mm/init.c: | 13 | * Derived from arch/ppc/mm/init.c: |
14 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | 14 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
@@ -34,12 +34,71 @@ | |||
34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
35 | #include <linux/preempt.h> | 35 | #include <linux/preempt.h> |
36 | #include <linux/spinlock.h> | 36 | #include <linux/spinlock.h> |
37 | #include <linux/lmb.h> | ||
37 | 38 | ||
38 | #include <asm/tlbflush.h> | 39 | #include <asm/tlbflush.h> |
39 | #include <asm/tlb.h> | 40 | #include <asm/tlb.h> |
41 | #include <asm/code-patching.h> | ||
40 | 42 | ||
41 | #include "mmu_decl.h" | 43 | #include "mmu_decl.h" |
42 | 44 | ||
45 | #ifdef CONFIG_PPC_BOOK3E | ||
46 | struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { | ||
47 | [MMU_PAGE_4K] = { | ||
48 | .shift = 12, | ||
49 | .enc = BOOK3E_PAGESZ_4K, | ||
50 | }, | ||
51 | [MMU_PAGE_16K] = { | ||
52 | .shift = 14, | ||
53 | .enc = BOOK3E_PAGESZ_16K, | ||
54 | }, | ||
55 | [MMU_PAGE_64K] = { | ||
56 | .shift = 16, | ||
57 | .enc = BOOK3E_PAGESZ_64K, | ||
58 | }, | ||
59 | [MMU_PAGE_1M] = { | ||
60 | .shift = 20, | ||
61 | .enc = BOOK3E_PAGESZ_1M, | ||
62 | }, | ||
63 | [MMU_PAGE_16M] = { | ||
64 | .shift = 24, | ||
65 | .enc = BOOK3E_PAGESZ_16M, | ||
66 | }, | ||
67 | [MMU_PAGE_256M] = { | ||
68 | .shift = 28, | ||
69 | .enc = BOOK3E_PAGESZ_256M, | ||
70 | }, | ||
71 | [MMU_PAGE_1G] = { | ||
72 | .shift = 30, | ||
73 | .enc = BOOK3E_PAGESZ_1GB, | ||
74 | }, | ||
75 | }; | ||
76 | static inline int mmu_get_tsize(int psize) | ||
77 | { | ||
78 | return mmu_psize_defs[psize].enc; | ||
79 | } | ||
80 | #else | ||
81 | static inline int mmu_get_tsize(int psize) | ||
82 | { | ||
83 | /* This isn't used on !Book3E for now */ | ||
84 | return 0; | ||
85 | } | ||
86 | #endif | ||
87 | |||
88 | /* The variables below are currently only used on 64-bit Book3E | ||
89 | * though this will probably be made common with other nohash | ||
90 | * implementations at some point | ||
91 | */ | ||
92 | #ifdef CONFIG_PPC64 | ||
93 | |||
94 | int mmu_linear_psize; /* Page size used for the linear mapping */ | ||
95 | int mmu_pte_psize; /* Page size used for PTE pages */ | ||
96 | int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ | ||
97 | int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ | ||
98 | unsigned long linear_map_top; /* Top of linear mapping */ | ||
99 | |||
100 | #endif /* CONFIG_PPC64 */ | ||
101 | |||
43 | /* | 102 | /* |
44 | * Base TLB flushing operations: | 103 | * Base TLB flushing operations: |
45 | * | 104 | * |
@@ -67,18 +126,24 @@ void local_flush_tlb_mm(struct mm_struct *mm) | |||
67 | } | 126 | } |
68 | EXPORT_SYMBOL(local_flush_tlb_mm); | 127 | EXPORT_SYMBOL(local_flush_tlb_mm); |
69 | 128 | ||
70 | void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | 129 | void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, |
130 | int tsize, int ind) | ||
71 | { | 131 | { |
72 | unsigned int pid; | 132 | unsigned int pid; |
73 | 133 | ||
74 | preempt_disable(); | 134 | preempt_disable(); |
75 | pid = vma ? vma->vm_mm->context.id : 0; | 135 | pid = mm ? mm->context.id : 0; |
76 | if (pid != MMU_NO_CONTEXT) | 136 | if (pid != MMU_NO_CONTEXT) |
77 | _tlbil_va(vmaddr, pid); | 137 | _tlbil_va(vmaddr, pid, tsize, ind); |
78 | preempt_enable(); | 138 | preempt_enable(); |
79 | } | 139 | } |
80 | EXPORT_SYMBOL(local_flush_tlb_page); | ||
81 | 140 | ||
141 | void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | ||
142 | { | ||
143 | __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, | ||
144 | mmu_get_tsize(mmu_virtual_psize), 0); | ||
145 | } | ||
146 | EXPORT_SYMBOL(local_flush_tlb_page); | ||
82 | 147 | ||
83 | /* | 148 | /* |
84 | * And here are the SMP non-local implementations | 149 | * And here are the SMP non-local implementations |
@@ -87,9 +152,17 @@ EXPORT_SYMBOL(local_flush_tlb_page); | |||
87 | 152 | ||
88 | static DEFINE_SPINLOCK(tlbivax_lock); | 153 | static DEFINE_SPINLOCK(tlbivax_lock); |
89 | 154 | ||
155 | static int mm_is_core_local(struct mm_struct *mm) | ||
156 | { | ||
157 | return cpumask_subset(mm_cpumask(mm), | ||
158 | topology_thread_cpumask(smp_processor_id())); | ||
159 | } | ||
160 | |||
90 | struct tlb_flush_param { | 161 | struct tlb_flush_param { |
91 | unsigned long addr; | 162 | unsigned long addr; |
92 | unsigned int pid; | 163 | unsigned int pid; |
164 | unsigned int tsize; | ||
165 | unsigned int ind; | ||
93 | }; | 166 | }; |
94 | 167 | ||
95 | static void do_flush_tlb_mm_ipi(void *param) | 168 | static void do_flush_tlb_mm_ipi(void *param) |
@@ -103,7 +176,7 @@ static void do_flush_tlb_page_ipi(void *param) | |||
103 | { | 176 | { |
104 | struct tlb_flush_param *p = param; | 177 | struct tlb_flush_param *p = param; |
105 | 178 | ||
106 | _tlbil_va(p->addr, p->pid); | 179 | _tlbil_va(p->addr, p->pid, p->tsize, p->ind); |
107 | } | 180 | } |
108 | 181 | ||
109 | 182 | ||
@@ -131,7 +204,7 @@ void flush_tlb_mm(struct mm_struct *mm) | |||
131 | pid = mm->context.id; | 204 | pid = mm->context.id; |
132 | if (unlikely(pid == MMU_NO_CONTEXT)) | 205 | if (unlikely(pid == MMU_NO_CONTEXT)) |
133 | goto no_context; | 206 | goto no_context; |
134 | if (!cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | 207 | if (!mm_is_core_local(mm)) { |
135 | struct tlb_flush_param p = { .pid = pid }; | 208 | struct tlb_flush_param p = { .pid = pid }; |
136 | /* Ignores smp_processor_id() even if set. */ | 209 | /* Ignores smp_processor_id() even if set. */ |
137 | smp_call_function_many(mm_cpumask(mm), | 210 | smp_call_function_many(mm_cpumask(mm), |
@@ -143,37 +216,49 @@ void flush_tlb_mm(struct mm_struct *mm) | |||
143 | } | 216 | } |
144 | EXPORT_SYMBOL(flush_tlb_mm); | 217 | EXPORT_SYMBOL(flush_tlb_mm); |
145 | 218 | ||
146 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | 219 | void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, |
220 | int tsize, int ind) | ||
147 | { | 221 | { |
148 | struct cpumask *cpu_mask; | 222 | struct cpumask *cpu_mask; |
149 | unsigned int pid; | 223 | unsigned int pid; |
150 | 224 | ||
151 | preempt_disable(); | 225 | preempt_disable(); |
152 | pid = vma ? vma->vm_mm->context.id : 0; | 226 | pid = mm ? mm->context.id : 0; |
153 | if (unlikely(pid == MMU_NO_CONTEXT)) | 227 | if (unlikely(pid == MMU_NO_CONTEXT)) |
154 | goto bail; | 228 | goto bail; |
155 | cpu_mask = mm_cpumask(vma->vm_mm); | 229 | cpu_mask = mm_cpumask(mm); |
156 | if (!cpumask_equal(cpu_mask, cpumask_of(smp_processor_id()))) { | 230 | if (!mm_is_core_local(mm)) { |
157 | /* If broadcast tlbivax is supported, use it */ | 231 | /* If broadcast tlbivax is supported, use it */ |
158 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { | 232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { |
159 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); | 233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); |
160 | if (lock) | 234 | if (lock) |
161 | spin_lock(&tlbivax_lock); | 235 | spin_lock(&tlbivax_lock); |
162 | _tlbivax_bcast(vmaddr, pid); | 236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); |
163 | if (lock) | 237 | if (lock) |
164 | spin_unlock(&tlbivax_lock); | 238 | spin_unlock(&tlbivax_lock); |
165 | goto bail; | 239 | goto bail; |
166 | } else { | 240 | } else { |
167 | struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; | 241 | struct tlb_flush_param p = { |
242 | .pid = pid, | ||
243 | .addr = vmaddr, | ||
244 | .tsize = tsize, | ||
245 | .ind = ind, | ||
246 | }; | ||
168 | /* Ignores smp_processor_id() even if set in cpu_mask */ | 247 | /* Ignores smp_processor_id() even if set in cpu_mask */ |
169 | smp_call_function_many(cpu_mask, | 248 | smp_call_function_many(cpu_mask, |
170 | do_flush_tlb_page_ipi, &p, 1); | 249 | do_flush_tlb_page_ipi, &p, 1); |
171 | } | 250 | } |
172 | } | 251 | } |
173 | _tlbil_va(vmaddr, pid); | 252 | _tlbil_va(vmaddr, pid, tsize, ind); |
174 | bail: | 253 | bail: |
175 | preempt_enable(); | 254 | preempt_enable(); |
176 | } | 255 | } |
256 | |||
257 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | ||
258 | { | ||
259 | __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, | ||
260 | mmu_get_tsize(mmu_virtual_psize), 0); | ||
261 | } | ||
177 | EXPORT_SYMBOL(flush_tlb_page); | 262 | EXPORT_SYMBOL(flush_tlb_page); |
178 | 263 | ||
179 | #endif /* CONFIG_SMP */ | 264 | #endif /* CONFIG_SMP */ |
@@ -207,3 +292,156 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, | |||
207 | flush_tlb_mm(vma->vm_mm); | 292 | flush_tlb_mm(vma->vm_mm); |
208 | } | 293 | } |
209 | EXPORT_SYMBOL(flush_tlb_range); | 294 | EXPORT_SYMBOL(flush_tlb_range); |
295 | |||
296 | void tlb_flush(struct mmu_gather *tlb) | ||
297 | { | ||
298 | flush_tlb_mm(tlb->mm); | ||
299 | |||
300 | /* Push out batch of freed page tables */ | ||
301 | pte_free_finish(); | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Below are functions specific to the 64-bit variant of Book3E though that | ||
306 | * may change in the future | ||
307 | */ | ||
308 | |||
309 | #ifdef CONFIG_PPC64 | ||
310 | |||
311 | /* | ||
312 | * Handling of virtual linear page tables or indirect TLB entries | ||
313 | * flushing when PTE pages are freed | ||
314 | */ | ||
315 | void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) | ||
316 | { | ||
317 | int tsize = mmu_psize_defs[mmu_pte_psize].enc; | ||
318 | |||
319 | if (book3e_htw_enabled) { | ||
320 | unsigned long start = address & PMD_MASK; | ||
321 | unsigned long end = address + PMD_SIZE; | ||
322 | unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; | ||
323 | |||
324 | /* This isn't the most optimal, ideally we would factor out the | ||
325 | * while preempt & CPU mask mucking around, or even the IPI but | ||
326 | * it will do for now | ||
327 | */ | ||
328 | while (start < end) { | ||
329 | __flush_tlb_page(tlb->mm, start, tsize, 1); | ||
330 | start += size; | ||
331 | } | ||
332 | } else { | ||
333 | unsigned long rmask = 0xf000000000000000ul; | ||
334 | unsigned long rid = (address & rmask) | 0x1000000000000000ul; | ||
335 | unsigned long vpte = address & ~rmask; | ||
336 | |||
337 | #ifdef CONFIG_PPC_64K_PAGES | ||
338 | vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; | ||
339 | #else | ||
340 | vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; | ||
341 | #endif | ||
342 | vpte |= rid; | ||
343 | __flush_tlb_page(tlb->mm, vpte, tsize, 0); | ||
344 | } | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Early initialization of the MMU TLB code | ||
349 | */ | ||
350 | static void __early_init_mmu(int boot_cpu) | ||
351 | { | ||
352 | extern unsigned int interrupt_base_book3e; | ||
353 | extern unsigned int exc_data_tlb_miss_htw_book3e; | ||
354 | extern unsigned int exc_instruction_tlb_miss_htw_book3e; | ||
355 | |||
356 | unsigned int *ibase = &interrupt_base_book3e; | ||
357 | unsigned int mas4; | ||
358 | |||
359 | /* XXX This will have to be decided at runtime, but right | ||
360 | * now our boot and TLB miss code hard wires it. Ideally | ||
361 | * we should find out a suitable page size and patch the | ||
362 | * TLB miss code (either that or use the PACA to store | ||
363 | * the value we want) | ||
364 | */ | ||
365 | mmu_linear_psize = MMU_PAGE_1G; | ||
366 | |||
367 | /* XXX This should be decided at runtime based on supported | ||
368 | * page sizes in the TLB, but for now let's assume 16M is | ||
369 | * always there and a good fit (which it probably is) | ||
370 | */ | ||
371 | mmu_vmemmap_psize = MMU_PAGE_16M; | ||
372 | |||
373 | /* Check if HW tablewalk is present, and if yes, enable it by: | ||
374 | * | ||
375 | * - patching the TLB miss handlers to branch to the | ||
376 | * one dedicates to it | ||
377 | * | ||
378 | * - setting the global book3e_htw_enabled | ||
379 | * | ||
380 | * - Set MAS4:INDD and default page size | ||
381 | */ | ||
382 | |||
383 | /* XXX This code only checks for TLB 0 capabilities and doesn't | ||
384 | * check what page size combos are supported by the HW. It | ||
385 | * also doesn't handle the case where a separate array holds | ||
386 | * the IND entries from the array loaded by the PT. | ||
387 | */ | ||
388 | if (boot_cpu) { | ||
389 | unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); | ||
390 | |||
391 | /* Check if HW loader is supported */ | ||
392 | if ((tlb0cfg & TLBnCFG_IND) && | ||
393 | (tlb0cfg & TLBnCFG_PT)) { | ||
394 | patch_branch(ibase + (0x1c0 / 4), | ||
395 | (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); | ||
396 | patch_branch(ibase + (0x1e0 / 4), | ||
397 | (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); | ||
398 | book3e_htw_enabled = 1; | ||
399 | } | ||
400 | pr_info("MMU: Book3E Page Tables %s\n", | ||
401 | book3e_htw_enabled ? "Enabled" : "Disabled"); | ||
402 | } | ||
403 | |||
404 | /* Set MAS4 based on page table setting */ | ||
405 | |||
406 | mas4 = 0x4 << MAS4_WIMGED_SHIFT; | ||
407 | if (book3e_htw_enabled) { | ||
408 | mas4 |= mas4 | MAS4_INDD; | ||
409 | #ifdef CONFIG_PPC_64K_PAGES | ||
410 | mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; | ||
411 | mmu_pte_psize = MMU_PAGE_256M; | ||
412 | #else | ||
413 | mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; | ||
414 | mmu_pte_psize = MMU_PAGE_1M; | ||
415 | #endif | ||
416 | } else { | ||
417 | #ifdef CONFIG_PPC_64K_PAGES | ||
418 | mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; | ||
419 | #else | ||
420 | mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; | ||
421 | #endif | ||
422 | mmu_pte_psize = mmu_virtual_psize; | ||
423 | } | ||
424 | mtspr(SPRN_MAS4, mas4); | ||
425 | |||
426 | /* Set the global containing the top of the linear mapping | ||
427 | * for use by the TLB miss code | ||
428 | */ | ||
429 | linear_map_top = lmb_end_of_DRAM(); | ||
430 | |||
431 | /* A sync won't hurt us after mucking around with | ||
432 | * the MMU configuration | ||
433 | */ | ||
434 | mb(); | ||
435 | } | ||
436 | |||
437 | void __init early_init_mmu(void) | ||
438 | { | ||
439 | __early_init_mmu(1); | ||
440 | } | ||
441 | |||
442 | void __cpuinit early_init_mmu_secondary(void) | ||
443 | { | ||
444 | __early_init_mmu(0); | ||
445 | } | ||
446 | |||
447 | #endif /* CONFIG_PPC64 */ | ||
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 3037911279b1..bbdc5b577b85 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S | |||
@@ -39,7 +39,7 @@ | |||
39 | /* | 39 | /* |
40 | * 40x implementation needs only tlbil_va | 40 | * 40x implementation needs only tlbil_va |
41 | */ | 41 | */ |
42 | _GLOBAL(_tlbil_va) | 42 | _GLOBAL(__tlbil_va) |
43 | /* We run the search with interrupts disabled because we have to change | 43 | /* We run the search with interrupts disabled because we have to change |
44 | * the PID and I don't want to preempt when that happens. | 44 | * the PID and I don't want to preempt when that happens. |
45 | */ | 45 | */ |
@@ -71,7 +71,7 @@ _GLOBAL(_tlbil_va) | |||
71 | * 440 implementation uses tlbsx/we for tlbil_va and a full sweep | 71 | * 440 implementation uses tlbsx/we for tlbil_va and a full sweep |
72 | * of the TLB for everything else. | 72 | * of the TLB for everything else. |
73 | */ | 73 | */ |
74 | _GLOBAL(_tlbil_va) | 74 | _GLOBAL(__tlbil_va) |
75 | mfspr r5,SPRN_MMUCR | 75 | mfspr r5,SPRN_MMUCR |
76 | rlwimi r5,r4,0,24,31 /* Set TID */ | 76 | rlwimi r5,r4,0,24,31 /* Set TID */ |
77 | 77 | ||
@@ -124,8 +124,6 @@ _GLOBAL(_tlbil_pid) | |||
124 | * to have the larger code path before the _SECTION_ELSE | 124 | * to have the larger code path before the _SECTION_ELSE |
125 | */ | 125 | */ |
126 | 126 | ||
127 | #define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ | ||
128 | MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) | ||
129 | /* | 127 | /* |
130 | * Flush MMU TLB on the local processor | 128 | * Flush MMU TLB on the local processor |
131 | */ | 129 | */ |
@@ -170,7 +168,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX) | |||
170 | * Flush MMU TLB for a particular address, but only on the local processor | 168 | * Flush MMU TLB for a particular address, but only on the local processor |
171 | * (no broadcast) | 169 | * (no broadcast) |
172 | */ | 170 | */ |
173 | _GLOBAL(_tlbil_va) | 171 | _GLOBAL(__tlbil_va) |
174 | mfmsr r10 | 172 | mfmsr r10 |
175 | wrteei 0 | 173 | wrteei 0 |
176 | slwi r4,r4,16 | 174 | slwi r4,r4,16 |
@@ -191,6 +189,85 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX) | |||
191 | isync | 189 | isync |
192 | 1: wrtee r10 | 190 | 1: wrtee r10 |
193 | blr | 191 | blr |
192 | #elif defined(CONFIG_PPC_BOOK3E) | ||
193 | /* | ||
194 | * New Book3E (>= 2.06) implementation | ||
195 | * | ||
196 | * Note: We may be able to get away without the interrupt masking stuff | ||
197 | * if we save/restore MAS6 on exceptions that might modify it | ||
198 | */ | ||
199 | _GLOBAL(_tlbil_pid) | ||
200 | slwi r4,r3,MAS6_SPID_SHIFT | ||
201 | mfmsr r10 | ||
202 | wrteei 0 | ||
203 | mtspr SPRN_MAS6,r4 | ||
204 | PPC_TLBILX_PID(0,0) | ||
205 | wrtee r10 | ||
206 | msync | ||
207 | isync | ||
208 | blr | ||
209 | |||
210 | _GLOBAL(_tlbil_pid_noind) | ||
211 | slwi r4,r3,MAS6_SPID_SHIFT | ||
212 | mfmsr r10 | ||
213 | ori r4,r4,MAS6_SIND | ||
214 | wrteei 0 | ||
215 | mtspr SPRN_MAS6,r4 | ||
216 | PPC_TLBILX_PID(0,0) | ||
217 | wrtee r10 | ||
218 | msync | ||
219 | isync | ||
220 | blr | ||
221 | |||
222 | _GLOBAL(_tlbil_all) | ||
223 | PPC_TLBILX_ALL(0,0) | ||
224 | msync | ||
225 | isync | ||
226 | blr | ||
227 | |||
228 | _GLOBAL(_tlbil_va) | ||
229 | mfmsr r10 | ||
230 | wrteei 0 | ||
231 | cmpwi cr0,r6,0 | ||
232 | slwi r4,r4,MAS6_SPID_SHIFT | ||
233 | rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK | ||
234 | beq 1f | ||
235 | rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND | ||
236 | 1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ | ||
237 | PPC_TLBILX_VA(0,r3) | ||
238 | msync | ||
239 | isync | ||
240 | wrtee r10 | ||
241 | blr | ||
242 | |||
243 | _GLOBAL(_tlbivax_bcast) | ||
244 | mfmsr r10 | ||
245 | wrteei 0 | ||
246 | cmpwi cr0,r6,0 | ||
247 | slwi r4,r4,MAS6_SPID_SHIFT | ||
248 | rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK | ||
249 | beq 1f | ||
250 | rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND | ||
251 | 1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ | ||
252 | PPC_TLBIVAX(0,r3) | ||
253 | eieio | ||
254 | tlbsync | ||
255 | sync | ||
256 | wrtee r10 | ||
257 | blr | ||
258 | |||
259 | _GLOBAL(set_context) | ||
260 | #ifdef CONFIG_BDI_SWITCH | ||
261 | /* Context switch the PTE pointer for the Abatron BDI2000. | ||
262 | * The PGDIR is the second parameter. | ||
263 | */ | ||
264 | lis r5, abatron_pteptrs@h | ||
265 | ori r5, r5, abatron_pteptrs@l | ||
266 | stw r4, 0x4(r5) | ||
267 | #endif | ||
268 | mtspr SPRN_PID,r3 | ||
269 | isync /* Force context change */ | ||
270 | blr | ||
194 | #else | 271 | #else |
195 | #error Unsupported processor type ! | 272 | #error Unsupported processor type ! |
196 | #endif | 273 | #endif |