aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/fault.c46
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c95
-rw-r--r--arch/powerpc/mm/gup.c16
-rw-r--r--arch/powerpc/mm/hash_utils_64.c51
-rw-r--r--arch/powerpc/mm/mem.c33
-rw-r--r--arch/powerpc/mm/mmap_64.c (renamed from arch/powerpc/mm/mmap.c)64
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c4
-rw-r--r--arch/powerpc/mm/numa.c174
-rw-r--r--arch/powerpc/mm/pgtable.c134
-rw-r--r--arch/powerpc/mm/pgtable_32.c18
-rw-r--r--arch/powerpc/mm/pgtable_64.c25
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c10
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_nohash.c18
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S44
16 files changed, 457 insertions, 285 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 953cc4a1cde5..17290bcedc5e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ ifeq ($(CONFIG_PPC64),y)
6EXTRA_CFLAGS += -mno-minimal-toc 6EXTRA_CFLAGS += -mno-minimal-toc
7endif 7endif
8 8
9obj-y := fault.o mem.o pgtable.o \ 9obj-y := fault.o mem.o pgtable.o gup.o \
10 init_$(CONFIG_WORD_SIZE).o \ 10 init_$(CONFIG_WORD_SIZE).o \
11 pgtable_$(CONFIG_WORD_SIZE).o 11 pgtable_$(CONFIG_WORD_SIZE).o
12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ 12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
@@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
14hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o 14hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
15obj-$(CONFIG_PPC64) += hash_utils_64.o \ 15obj-$(CONFIG_PPC64) += hash_utils_64.o \
16 slb_low.o slb.o stab.o \ 16 slb_low.o slb.o stab.o \
17 gup.o mmap.o $(hash-y) 17 mmap_64.o $(hash-y)
18obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o 18obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
19obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ 19obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
20 tlb_hash$(CONFIG_WORD_SIZE).o \ 20 tlb_hash$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b8636b8a..76993941cac9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,45 +253,33 @@ good_area:
253#endif /* CONFIG_8xx */ 253#endif /* CONFIG_8xx */
254 254
255 if (is_exec) { 255 if (is_exec) {
256#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) 256#ifdef CONFIG_PPC_STD_MMU
257 /* protection fault */ 257 /* Protection fault on exec go straight to failure on
258 * Hash based MMUs as they either don't support per-page
259 * execute permission, or if they do, it's handled already
260 * at the hash level. This test would probably have to
261 * be removed if we change the way this works to make hash
262 * processors use the same I/D cache coherency mechanism
263 * as embedded.
264 */
258 if (error_code & DSISR_PROTFAULT) 265 if (error_code & DSISR_PROTFAULT)
259 goto bad_area; 266 goto bad_area;
267#endif /* CONFIG_PPC_STD_MMU */
268
260 /* 269 /*
261 * Allow execution from readable areas if the MMU does not 270 * Allow execution from readable areas if the MMU does not
262 * provide separate controls over reading and executing. 271 * provide separate controls over reading and executing.
272 *
273 * Note: That code used to not be enabled for 4xx/BookE.
274 * It is now as I/D cache coherency for these is done at
275 * set_pte_at() time and I see no reason why the test
276 * below wouldn't be valid on those processors. This -may-
277 * break programs compiled with a really old ABI though.
263 */ 278 */
264 if (!(vma->vm_flags & VM_EXEC) && 279 if (!(vma->vm_flags & VM_EXEC) &&
265 (cpu_has_feature(CPU_FTR_NOEXECUTE) || 280 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
266 !(vma->vm_flags & (VM_READ | VM_WRITE)))) 281 !(vma->vm_flags & (VM_READ | VM_WRITE))))
267 goto bad_area; 282 goto bad_area;
268#else
269 pte_t *ptep;
270 pmd_t *pmdp;
271
272 /* Since 4xx/Book-E supports per-page execute permission,
273 * we lazily flush dcache to icache. */
274 ptep = NULL;
275 if (get_pteptr(mm, address, &ptep, &pmdp)) {
276 spinlock_t *ptl = pte_lockptr(mm, pmdp);
277 spin_lock(ptl);
278 if (pte_present(*ptep)) {
279 struct page *page = pte_page(*ptep);
280
281 if (!test_bit(PG_arch_1, &page->flags)) {
282 flush_dcache_icache_page(page);
283 set_bit(PG_arch_1, &page->flags);
284 }
285 pte_update(ptep, 0, _PAGE_HWEXEC |
286 _PAGE_ACCESSED);
287 local_flush_tlb_page(vma, address);
288 pte_unmap_unlock(ptep, ptl);
289 up_read(&mm->mmap_sem);
290 return 0;
291 }
292 pte_unmap_unlock(ptep, ptl);
293 }
294#endif
295 /* a write */ 283 /* a write */
296 } else if (is_write) { 284 } else if (is_write) {
297 if (!(vma->vm_flags & VM_WRITE)) 285 if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ea6e41e39d9f..bb3d65998e6b 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -56,10 +56,14 @@
56 56
57extern void loadcam_entry(unsigned int index); 57extern void loadcam_entry(unsigned int index);
58unsigned int tlbcam_index; 58unsigned int tlbcam_index;
59static unsigned long __cam0, __cam1, __cam2; 59static unsigned long cam[CONFIG_LOWMEM_CAM_NUM];
60 60
61#define NUM_TLBCAMS (16) 61#define NUM_TLBCAMS (16)
62 62
63#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
64#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
65#endif
66
63struct tlbcam TLBCAM[NUM_TLBCAMS]; 67struct tlbcam TLBCAM[NUM_TLBCAMS];
64 68
65struct tlbcamrange { 69struct tlbcamrange {
@@ -107,7 +111,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys,
107 unsigned int tsize, lz; 111 unsigned int tsize, lz;
108 112
109 asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); 113 asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
110 tsize = (21 - lz) / 2; 114 tsize = 21 - lz;
111 115
112#ifdef CONFIG_SMP 116#ifdef CONFIG_SMP
113 if ((flags & _PAGE_NO_CACHE) == 0) 117 if ((flags & _PAGE_NO_CACHE) == 0)
@@ -152,19 +156,19 @@ void invalidate_tlbcam_entry(int index)
152 loadcam_entry(index); 156 loadcam_entry(index);
153} 157}
154 158
155void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, 159unsigned long __init mmu_mapin_ram(void)
156 unsigned long cam2)
157{ 160{
158 settlbcam(0, PAGE_OFFSET, memstart_addr, cam0, _PAGE_KERNEL, 0); 161 unsigned long virt = PAGE_OFFSET;
159 tlbcam_index++; 162 phys_addr_t phys = memstart_addr;
160 if (cam1) { 163
161 tlbcam_index++; 164 while (cam[tlbcam_index] && tlbcam_index < ARRAY_SIZE(cam)) {
162 settlbcam(1, PAGE_OFFSET+cam0, memstart_addr+cam0, cam1, _PAGE_KERNEL, 0); 165 settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0);
163 } 166 virt += cam[tlbcam_index];
164 if (cam2) { 167 phys += cam[tlbcam_index];
165 tlbcam_index++; 168 tlbcam_index++;
166 settlbcam(2, PAGE_OFFSET+cam0+cam1, memstart_addr+cam0+cam1, cam2, _PAGE_KERNEL, 0);
167 } 169 }
170
171 return virt - PAGE_OFFSET;
168} 172}
169 173
170/* 174/*
@@ -175,51 +179,46 @@ void __init MMU_init_hw(void)
175 flush_instruction_cache(); 179 flush_instruction_cache();
176} 180}
177 181
178unsigned long __init mmu_mapin_ram(void)
179{
180 cam_mapin_ram(__cam0, __cam1, __cam2);
181
182 return __cam0 + __cam1 + __cam2;
183}
184
185
186void __init 182void __init
187adjust_total_lowmem(void) 183adjust_total_lowmem(void)
188{ 184{
189 phys_addr_t max_lowmem_size = __max_low_memory;
190 phys_addr_t cam_max_size = 0x10000000;
191 phys_addr_t ram; 185 phys_addr_t ram;
186 unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff;
187 char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf;
188 int i;
189 unsigned long virt = PAGE_OFFSET & 0xffffffffUL;
190 unsigned long phys = memstart_addr & 0xffffffffUL;
192 191
193 /* adjust CAM size to max_lowmem_size */ 192 /* Convert (4^max) kB to (2^max) bytes */
194 if (max_lowmem_size < cam_max_size) 193 max_cam = max_cam * 2 + 10;
195 cam_max_size = max_lowmem_size;
196 194
197 /* adjust lowmem size to max_lowmem_size */ 195 /* adjust lowmem size to __max_low_memory */
198 ram = min(max_lowmem_size, total_lowmem); 196 ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
199 197
200 /* Calculate CAM values */ 198 /* Calculate CAM values */
201 __cam0 = 1UL << 2 * (__ilog2(ram) / 2); 199 __max_low_memory = 0;
202 if (__cam0 > cam_max_size) 200 for (i = 0; ram && i < ARRAY_SIZE(cam); i++) {
203 __cam0 = cam_max_size; 201 unsigned int camsize = __ilog2(ram) & ~1U;
204 ram -= __cam0; 202 unsigned int align = __ffs(virt | phys) & ~1U;
205 if (ram) { 203
206 __cam1 = 1UL << 2 * (__ilog2(ram) / 2); 204 if (camsize > align)
207 if (__cam1 > cam_max_size) 205 camsize = align;
208 __cam1 = cam_max_size; 206 if (camsize > max_cam)
209 ram -= __cam1; 207 camsize = max_cam;
210 } 208
211 if (ram) { 209 cam[i] = 1UL << camsize;
212 __cam2 = 1UL << 2 * (__ilog2(ram) / 2); 210 ram -= cam[i];
213 if (__cam2 > cam_max_size) 211 __max_low_memory += cam[i];
214 __cam2 = cam_max_size; 212 virt += cam[i];
215 ram -= __cam2; 213 phys += cam[i];
214
215 p += sprintf(p, "%lu/", cam[i] >> 20);
216 } 216 }
217 for (; i < ARRAY_SIZE(cam); i++)
218 p += sprintf(p, "0/");
219 p[-1] = '\0';
217 220
218 printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb," 221 pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf,
219 " CAM2=%ldMb residual: %ldMb\n", 222 (unsigned int)((total_lowmem - __max_low_memory) >> 20));
220 __cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
221 (long int)((total_lowmem - __cam0 - __cam1 - __cam2)
222 >> 20));
223 __max_low_memory = __cam0 + __cam1 + __cam2;
224 __initial_memory_limit_addr = memstart_addr + __max_low_memory; 223 __initial_memory_limit_addr = memstart_addr + __max_low_memory;
225} 224}
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index 28a114db3ba0..bc400c78c97f 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -14,6 +14,8 @@
14#include <linux/rwsem.h> 14#include <linux/rwsem.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16 16
17#ifdef __HAVE_ARCH_PTE_SPECIAL
18
17/* 19/*
18 * The performance critical leaf functions are made noinline otherwise gcc 20 * The performance critical leaf functions are made noinline otherwise gcc
19 * inlines everything into a single function which results in too much 21 * inlines everything into a single function which results in too much
@@ -151,8 +153,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
151 unsigned long addr, len, end; 153 unsigned long addr, len, end;
152 unsigned long next; 154 unsigned long next;
153 pgd_t *pgdp; 155 pgd_t *pgdp;
154 int psize, nr = 0; 156 int nr = 0;
157#ifdef CONFIG_PPC64
155 unsigned int shift; 158 unsigned int shift;
159 int psize;
160#endif
156 161
157 pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); 162 pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
158 163
@@ -205,8 +210,13 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
205 */ 210 */
206 local_irq_disable(); 211 local_irq_disable();
207 212
213#ifdef CONFIG_PPC64
214 /* Those bits are related to hugetlbfs implementation and only exist
215 * on 64-bit for now
216 */
208 psize = get_slice_psize(mm, addr); 217 psize = get_slice_psize(mm, addr);
209 shift = mmu_psize_defs[psize].shift; 218 shift = mmu_psize_defs[psize].shift;
219#endif /* CONFIG_PPC64 */
210 220
211#ifdef CONFIG_HUGETLB_PAGE 221#ifdef CONFIG_HUGETLB_PAGE
212 if (unlikely(mmu_huge_psizes[psize])) { 222 if (unlikely(mmu_huge_psizes[psize])) {
@@ -236,7 +246,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
236 do { 246 do {
237 pgd_t pgd = *pgdp; 247 pgd_t pgd = *pgdp;
238 248
249#ifdef CONFIG_PPC64
239 VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); 250 VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
251#endif
240 pr_debug(" %016lx: normal pgd %p\n", addr, 252 pr_debug(" %016lx: normal pgd %p\n", addr,
241 (void *)pgd_val(pgd)); 253 (void *)pgd_val(pgd));
242 next = pgd_addr_end(addr, end); 254 next = pgd_addr_end(addr, end);
@@ -279,3 +291,5 @@ slow_irqon:
279 return ret; 291 return ret;
280 } 292 }
281} 293}
294
295#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8d5b4758c13a..db556d25c3a7 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -516,7 +516,7 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
516 516
517static unsigned long __init htab_get_table_size(void) 517static unsigned long __init htab_get_table_size(void)
518{ 518{
519 unsigned long mem_size, rnd_mem_size, pteg_count; 519 unsigned long mem_size, rnd_mem_size, pteg_count, psize;
520 520
521 /* If hash size isn't already provided by the platform, we try to 521 /* If hash size isn't already provided by the platform, we try to
522 * retrieve it from the device-tree. If it's not there neither, we 522 * retrieve it from the device-tree. If it's not there neither, we
@@ -534,7 +534,8 @@ static unsigned long __init htab_get_table_size(void)
534 rnd_mem_size <<= 1; 534 rnd_mem_size <<= 1;
535 535
536 /* # pages / 2 */ 536 /* # pages / 2 */
537 pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); 537 psize = mmu_psize_defs[mmu_virtual_psize].shift;
538 pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11);
538 539
539 return pteg_count << 7; 540 return pteg_count << 7;
540} 541}
@@ -589,7 +590,7 @@ static void __init htab_finish_init(void)
589 make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); 590 make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
590} 591}
591 592
592void __init htab_initialize(void) 593static void __init htab_initialize(void)
593{ 594{
594 unsigned long table; 595 unsigned long table;
595 unsigned long pteg_count; 596 unsigned long pteg_count;
@@ -731,11 +732,43 @@ void __init htab_initialize(void)
731#undef KB 732#undef KB
732#undef MB 733#undef MB
733 734
734void htab_initialize_secondary(void) 735void __init early_init_mmu(void)
735{ 736{
737 /* Setup initial STAB address in the PACA */
738 get_paca()->stab_real = __pa((u64)&initial_stab);
739 get_paca()->stab_addr = (u64)&initial_stab;
740
741 /* Initialize the MMU Hash table and create the linear mapping
742 * of memory. Has to be done before stab/slb initialization as
743 * this is currently where the page size encoding is obtained
744 */
745 htab_initialize();
746
747 /* Initialize stab / SLB management except on iSeries
748 */
749 if (cpu_has_feature(CPU_FTR_SLB))
750 slb_initialize();
751 else if (!firmware_has_feature(FW_FEATURE_ISERIES))
752 stab_initialize(get_paca()->stab_real);
753}
754
755#ifdef CONFIG_SMP
756void __init early_init_mmu_secondary(void)
757{
758 /* Initialize hash table for that CPU */
736 if (!firmware_has_feature(FW_FEATURE_LPAR)) 759 if (!firmware_has_feature(FW_FEATURE_LPAR))
737 mtspr(SPRN_SDR1, _SDR1); 760 mtspr(SPRN_SDR1, _SDR1);
761
762 /* Initialize STAB/SLB. We use a virtual address as it works
763 * in real mode on pSeries and we want a virutal address on
764 * iSeries anyway
765 */
766 if (cpu_has_feature(CPU_FTR_SLB))
767 slb_initialize();
768 else
769 stab_initialize(get_paca()->stab_addr);
738} 770}
771#endif /* CONFIG_SMP */
739 772
740/* 773/*
741 * Called by asm hashtable.S for doing lazy icache flush 774 * Called by asm hashtable.S for doing lazy icache flush
@@ -858,7 +891,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
858 unsigned long vsid; 891 unsigned long vsid;
859 struct mm_struct *mm; 892 struct mm_struct *mm;
860 pte_t *ptep; 893 pte_t *ptep;
861 cpumask_t tmp; 894 const struct cpumask *tmp;
862 int rc, user_region = 0, local = 0; 895 int rc, user_region = 0, local = 0;
863 int psize, ssize; 896 int psize, ssize;
864 897
@@ -906,8 +939,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
906 return 1; 939 return 1;
907 940
908 /* Check CPU locality */ 941 /* Check CPU locality */
909 tmp = cpumask_of_cpu(smp_processor_id()); 942 tmp = cpumask_of(smp_processor_id());
910 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 943 if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
911 local = 1; 944 local = 1;
912 945
913#ifdef CONFIG_HUGETLB_PAGE 946#ifdef CONFIG_HUGETLB_PAGE
@@ -1023,7 +1056,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1023 unsigned long vsid; 1056 unsigned long vsid;
1024 void *pgdir; 1057 void *pgdir;
1025 pte_t *ptep; 1058 pte_t *ptep;
1026 cpumask_t mask;
1027 unsigned long flags; 1059 unsigned long flags;
1028 int local = 0; 1060 int local = 0;
1029 int ssize; 1061 int ssize;
@@ -1066,8 +1098,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1066 local_irq_save(flags); 1098 local_irq_save(flags);
1067 1099
1068 /* Is that local to this CPU ? */ 1100 /* Is that local to this CPU ? */
1069 mask = cpumask_of_cpu(smp_processor_id()); 1101 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1070 if (cpus_equal(mm->cpu_vm_mask, mask))
1071 local = 1; 1102 local = 1;
1072 1103
1073 /* Hash it in */ 1104 /* Hash it in */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f00f09a77f12..f668fa9ba804 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -472,40 +472,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
472{ 472{
473#ifdef CONFIG_PPC_STD_MMU 473#ifdef CONFIG_PPC_STD_MMU
474 unsigned long access = 0, trap; 474 unsigned long access = 0, trap;
475#endif
476 unsigned long pfn = pte_pfn(pte);
477
478 /* handle i-cache coherency */
479 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
480 !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
481 pfn_valid(pfn)) {
482 struct page *page = pfn_to_page(pfn);
483#ifdef CONFIG_8xx
484 /* On 8xx, cache control instructions (particularly
485 * "dcbst" from flush_dcache_icache) fault as write
486 * operation if there is an unpopulated TLB entry
487 * for the address in question. To workaround that,
488 * we invalidate the TLB here, thus avoiding dcbst
489 * misbehaviour.
490 */
491 _tlbil_va(address, 0 /* 8xx doesn't care about PID */);
492#endif
493 /* The _PAGE_USER test should really be _PAGE_EXEC, but
494 * older glibc versions execute some code from no-exec
495 * pages, which for now we are supporting. If exec-only
496 * pages are ever implemented, this will have to change.
497 */
498 if (!PageReserved(page) && (pte_val(pte) & _PAGE_USER)
499 && !test_bit(PG_arch_1, &page->flags)) {
500 if (vma->vm_mm == current->active_mm) {
501 __flush_dcache_icache((void *) address);
502 } else
503 flush_dcache_icache_page(page);
504 set_bit(PG_arch_1, &page->flags);
505 }
506 }
507 475
508#ifdef CONFIG_PPC_STD_MMU
509 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 476 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
510 if (!pte_young(pte) || address >= TASK_SIZE) 477 if (!pte_young(pte) || address >= TASK_SIZE)
511 return; 478 return;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap_64.c
index 86010fc7d3b1..0d957a4c70fe 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap_64.c
@@ -24,36 +24,26 @@
24 24
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/random.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
28 29
29/* 30/*
30 * Top of mmap area (just below the process stack). 31 * Top of mmap area (just below the process stack).
31 * 32 *
32 * Leave an at least ~128 MB hole. 33 * Leave at least a ~128 MB hole on 32bit applications.
34 *
35 * On 64bit applications we randomise the stack by 1GB so we need to
36 * space our mmap start address by a further 1GB, otherwise there is a
37 * chance the mmap area will end up closer to the stack than our ulimit
38 * requires.
33 */ 39 */
34#define MIN_GAP (128*1024*1024) 40#define MIN_GAP32 (128*1024*1024)
41#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
42#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
35#define MAX_GAP (TASK_SIZE/6*5) 43#define MAX_GAP (TASK_SIZE/6*5)
36 44
37static inline unsigned long mmap_base(void)
38{
39 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
40
41 if (gap < MIN_GAP)
42 gap = MIN_GAP;
43 else if (gap > MAX_GAP)
44 gap = MAX_GAP;
45
46 return TASK_SIZE - (gap & PAGE_MASK);
47}
48
49static inline int mmap_is_legacy(void) 45static inline int mmap_is_legacy(void)
50{ 46{
51 /*
52 * Force standard allocation for 64 bit programs.
53 */
54 if (!test_thread_flag(TIF_32BIT))
55 return 1;
56
57 if (current->personality & ADDR_COMPAT_LAYOUT) 47 if (current->personality & ADDR_COMPAT_LAYOUT)
58 return 1; 48 return 1;
59 49
@@ -64,6 +54,40 @@ static inline int mmap_is_legacy(void)
64} 54}
65 55
66/* 56/*
57 * Since get_random_int() returns the same value within a 1 jiffy window,
58 * we will almost always get the same randomisation for the stack and mmap
59 * region. This will mean the relative distance between stack and mmap will
60 * be the same.
61 *
62 * To avoid this we can shift the randomness by 1 bit.
63 */
64static unsigned long mmap_rnd(void)
65{
66 unsigned long rnd = 0;
67
68 if (current->flags & PF_RANDOMIZE) {
69 /* 8MB for 32bit, 1GB for 64bit */
70 if (is_32bit_task())
71 rnd = (long)(get_random_int() % (1<<(22-PAGE_SHIFT)));
72 else
73 rnd = (long)(get_random_int() % (1<<(29-PAGE_SHIFT)));
74 }
75 return (rnd << PAGE_SHIFT) * 2;
76}
77
78static inline unsigned long mmap_base(void)
79{
80 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
81
82 if (gap < MIN_GAP)
83 gap = MIN_GAP;
84 else if (gap > MAX_GAP)
85 gap = MAX_GAP;
86
87 return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
88}
89
90/*
67 * This function, called very early during the creation of a new 91 * This function, called very early during the creation of a new
68 * process VM image, sets up which VM layout function to use: 92 * process VM image, sets up which VM layout function to use:
69 */ 93 */
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 52a0cfc38b64..a70e311bd457 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -97,7 +97,7 @@ static unsigned int steal_context_smp(unsigned int id)
97 mm->context.id = MMU_NO_CONTEXT; 97 mm->context.id = MMU_NO_CONTEXT;
98 98
99 /* Mark it stale on all CPUs that used this mm */ 99 /* Mark it stale on all CPUs that used this mm */
100 for_each_cpu_mask_nr(cpu, mm->cpu_vm_mask) 100 for_each_cpu(cpu, mm_cpumask(mm))
101 __set_bit(id, stale_map[cpu]); 101 __set_bit(id, stale_map[cpu]);
102 return id; 102 return id;
103 } 103 }
@@ -380,7 +380,7 @@ void __init mmu_context_init(void)
380#endif 380#endif
381 381
382 printk(KERN_INFO 382 printk(KERN_INFO
383 "MMU: Allocated %d bytes of context maps for %d contexts\n", 383 "MMU: Allocated %zu bytes of context maps for %d contexts\n",
384 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)), 384 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
385 last_context - first_context + 1); 385 last_context - first_context + 1);
386 386
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ac08b8ab654..9047145095aa 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -158,35 +158,6 @@ static void unmap_cpu_from_node(unsigned long cpu)
158} 158}
159#endif /* CONFIG_HOTPLUG_CPU */ 159#endif /* CONFIG_HOTPLUG_CPU */
160 160
161static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
162{
163 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
164 struct device_node *cpu_node = NULL;
165 const unsigned int *interrupt_server, *reg;
166 int len;
167
168 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
169 /* Try interrupt server first */
170 interrupt_server = of_get_property(cpu_node,
171 "ibm,ppc-interrupt-server#s", &len);
172
173 len = len / sizeof(u32);
174
175 if (interrupt_server && (len > 0)) {
176 while (len--) {
177 if (interrupt_server[len] == hw_cpuid)
178 return cpu_node;
179 }
180 } else {
181 reg = of_get_property(cpu_node, "reg", &len);
182 if (reg && (len > 0) && (reg[0] == hw_cpuid))
183 return cpu_node;
184 }
185 }
186
187 return NULL;
188}
189
190/* must hold reference to node during call */ 161/* must hold reference to node during call */
191static const int *of_get_associativity(struct device_node *dev) 162static const int *of_get_associativity(struct device_node *dev)
192{ 163{
@@ -290,7 +261,7 @@ static int __init find_min_common_depth(void)
290 ref_points = of_get_property(rtas_root, 261 ref_points = of_get_property(rtas_root,
291 "ibm,associativity-reference-points", &len); 262 "ibm,associativity-reference-points", &len);
292 263
293 if ((len >= 1) && ref_points) { 264 if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
294 depth = ref_points[1]; 265 depth = ref_points[1];
295 } else { 266 } else {
296 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 267 dbg("NUMA: ibm,associativity-reference-points not found.\n");
@@ -470,7 +441,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
470static int __cpuinit numa_setup_cpu(unsigned long lcpu) 441static int __cpuinit numa_setup_cpu(unsigned long lcpu)
471{ 442{
472 int nid = 0; 443 int nid = 0;
473 struct device_node *cpu = find_cpu_node(lcpu); 444 struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
474 445
475 if (!cpu) { 446 if (!cpu) {
476 WARN_ON(1); 447 WARN_ON(1);
@@ -652,7 +623,7 @@ static int __init parse_numa_properties(void)
652 for_each_present_cpu(i) { 623 for_each_present_cpu(i) {
653 int nid; 624 int nid;
654 625
655 cpu = find_cpu_node(i); 626 cpu = of_get_cpu_node(i, NULL);
656 BUG_ON(!cpu); 627 BUG_ON(!cpu);
657 nid = of_node_to_nid_single(cpu); 628 nid = of_node_to_nid_single(cpu);
658 of_node_put(cpu); 629 of_node_put(cpu);
@@ -1041,57 +1012,32 @@ early_param("numa", early_numa);
1041 1012
1042#ifdef CONFIG_MEMORY_HOTPLUG 1013#ifdef CONFIG_MEMORY_HOTPLUG
1043/* 1014/*
1044 * Validate the node associated with the memory section we are 1015 * Find the node associated with a hot added memory section for
1045 * trying to add. 1016 * memory represented in the device tree by the property
1046 */ 1017 * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
1047int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size,
1048 unsigned long scn_addr)
1049{
1050 nodemask_t nodes;
1051
1052 if (*nid < 0 || !node_online(*nid))
1053 *nid = any_online_node(NODE_MASK_ALL);
1054
1055 if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) {
1056 nodes_setall(nodes);
1057 while (NODE_DATA(*nid)->node_spanned_pages == 0) {
1058 node_clear(*nid, nodes);
1059 *nid = any_online_node(nodes);
1060 }
1061
1062 return 1;
1063 }
1064
1065 return 0;
1066}
1067
1068/*
1069 * Find the node associated with a hot added memory section represented
1070 * by the ibm,dynamic-reconfiguration-memory node.
1071 */ 1018 */
1072static int hot_add_drconf_scn_to_nid(struct device_node *memory, 1019static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1073 unsigned long scn_addr) 1020 unsigned long scn_addr)
1074{ 1021{
1075 const u32 *dm; 1022 const u32 *dm;
1076 unsigned int n, rc; 1023 unsigned int drconf_cell_cnt, rc;
1077 unsigned long lmb_size; 1024 unsigned long lmb_size;
1078 int default_nid = any_online_node(NODE_MASK_ALL);
1079 int nid;
1080 struct assoc_arrays aa; 1025 struct assoc_arrays aa;
1026 int nid = -1;
1081 1027
1082 n = of_get_drconf_memory(memory, &dm); 1028 drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1083 if (!n) 1029 if (!drconf_cell_cnt)
1084 return default_nid;; 1030 return -1;
1085 1031
1086 lmb_size = of_get_lmb_size(memory); 1032 lmb_size = of_get_lmb_size(memory);
1087 if (!lmb_size) 1033 if (!lmb_size)
1088 return default_nid; 1034 return -1;
1089 1035
1090 rc = of_get_assoc_arrays(memory, &aa); 1036 rc = of_get_assoc_arrays(memory, &aa);
1091 if (rc) 1037 if (rc)
1092 return default_nid; 1038 return -1;
1093 1039
1094 for (; n != 0; --n) { 1040 for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
1095 struct of_drconf_cell drmem; 1041 struct of_drconf_cell drmem;
1096 1042
1097 read_drconf_cell(&drmem, &dm); 1043 read_drconf_cell(&drmem, &dm);
@@ -1102,15 +1048,57 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1102 || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 1048 || !(drmem.flags & DRCONF_MEM_ASSIGNED))
1103 continue; 1049 continue;
1104 1050
1051 if ((scn_addr < drmem.base_addr)
1052 || (scn_addr >= (drmem.base_addr + lmb_size)))
1053 continue;
1054
1105 nid = of_drconf_to_nid_single(&drmem, &aa); 1055 nid = of_drconf_to_nid_single(&drmem, &aa);
1056 break;
1057 }
1058
1059 return nid;
1060}
1106 1061
1107 if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, 1062/*
1108 scn_addr)) 1063 * Find the node associated with a hot added memory section for memory
1109 return nid; 1064 * represented in the device tree as a node (i.e. memory@XXXX) for
1065 * each lmb.
1066 */
1067int hot_add_node_scn_to_nid(unsigned long scn_addr)
1068{
1069 struct device_node *memory = NULL;
1070 int nid = -1;
1071
1072 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
1073 unsigned long start, size;
1074 int ranges;
1075 const unsigned int *memcell_buf;
1076 unsigned int len;
1077
1078 memcell_buf = of_get_property(memory, "reg", &len);
1079 if (!memcell_buf || len <= 0)
1080 continue;
1081
1082 /* ranges in cell */
1083 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
1084
1085 while (ranges--) {
1086 start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1087 size = read_n_cells(n_mem_size_cells, &memcell_buf);
1088
1089 if ((scn_addr < start) || (scn_addr >= (start + size)))
1090 continue;
1091
1092 nid = of_node_to_nid_single(memory);
1093 break;
1094 }
1095
1096 of_node_put(memory);
1097 if (nid >= 0)
1098 break;
1110 } 1099 }
1111 1100
1112 BUG(); /* section address should be found above */ 1101 return nid;
1113 return 0;
1114} 1102}
1115 1103
1116/* 1104/*
@@ -1121,7 +1109,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1121int hot_add_scn_to_nid(unsigned long scn_addr) 1109int hot_add_scn_to_nid(unsigned long scn_addr)
1122{ 1110{
1123 struct device_node *memory = NULL; 1111 struct device_node *memory = NULL;
1124 int nid; 1112 int nid, found = 0;
1125 1113
1126 if (!numa_enabled || (min_common_depth < 0)) 1114 if (!numa_enabled || (min_common_depth < 0))
1127 return any_online_node(NODE_MASK_ALL); 1115 return any_online_node(NODE_MASK_ALL);
@@ -1130,35 +1118,25 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
1130 if (memory) { 1118 if (memory) {
1131 nid = hot_add_drconf_scn_to_nid(memory, scn_addr); 1119 nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
1132 of_node_put(memory); 1120 of_node_put(memory);
1133 return nid; 1121 } else {
1122 nid = hot_add_node_scn_to_nid(scn_addr);
1134 } 1123 }
1135 1124
1136 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 1125 if (nid < 0 || !node_online(nid))
1137 unsigned long start, size; 1126 nid = any_online_node(NODE_MASK_ALL);
1138 int ranges;
1139 const unsigned int *memcell_buf;
1140 unsigned int len;
1141
1142 memcell_buf = of_get_property(memory, "reg", &len);
1143 if (!memcell_buf || len <= 0)
1144 continue;
1145 1127
1146 /* ranges in cell */ 1128 if (NODE_DATA(nid)->node_spanned_pages)
1147 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 1129 return nid;
1148ha_new_range:
1149 start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1150 size = read_n_cells(n_mem_size_cells, &memcell_buf);
1151 nid = of_node_to_nid_single(memory);
1152 1130
1153 if (valid_hot_add_scn(&nid, start, size, scn_addr)) { 1131 for_each_online_node(nid) {
1154 of_node_put(memory); 1132 if (NODE_DATA(nid)->node_spanned_pages) {
1155 return nid; 1133 found = 1;
1134 break;
1156 } 1135 }
1157
1158 if (--ranges) /* process all ranges in cell */
1159 goto ha_new_range;
1160 } 1136 }
1161 BUG(); /* section address should be found above */ 1137
1162 return 0; 1138 BUG_ON(!found);
1139 return nid;
1163} 1140}
1141
1164#endif /* CONFIG_MEMORY_HOTPLUG */ 1142#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6d94116fdea1..f5c6fd42265c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * This file contains common routines for dealing with free of page tables 2 * This file contains common routines for dealing with free of page tables
3 * Along with common page table handling code
3 * 4 *
4 * Derived from arch/powerpc/mm/tlb_64.c: 5 * Derived from arch/powerpc/mm/tlb_64.c:
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -81,11 +82,10 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
81void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) 82void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
82{ 83{
83 /* This is safe since tlb_gather_mmu has disabled preemption */ 84 /* This is safe since tlb_gather_mmu has disabled preemption */
84 cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
85 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); 85 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
86 86
87 if (atomic_read(&tlb->mm->mm_users) < 2 || 87 if (atomic_read(&tlb->mm->mm_users) < 2 ||
88 cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { 88 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
89 pgtable_free(pgf); 89 pgtable_free(pgf);
90 return; 90 return;
91 } 91 }
@@ -115,3 +115,133 @@ void pte_free_finish(void)
115 pte_free_submit(*batchp); 115 pte_free_submit(*batchp);
116 *batchp = NULL; 116 *batchp = NULL;
117} 117}
118
119/*
120 * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
121 */
122static pte_t do_dcache_icache_coherency(pte_t pte)
123{
124 unsigned long pfn = pte_pfn(pte);
125 struct page *page;
126
127 if (unlikely(!pfn_valid(pfn)))
128 return pte;
129 page = pfn_to_page(pfn);
130
131 if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
132 pr_debug("do_dcache_icache_coherency... flushing\n");
133 flush_dcache_icache_page(page);
134 set_bit(PG_arch_1, &page->flags);
135 }
136 else
137 pr_debug("do_dcache_icache_coherency... already clean\n");
138 return __pte(pte_val(pte) | _PAGE_HWEXEC);
139}
140
141static inline int is_exec_fault(void)
142{
143 return current->thread.regs && TRAP(current->thread.regs) == 0x400;
144}
145
146/* We only try to do i/d cache coherency on stuff that looks like
147 * reasonably "normal" PTEs. We currently require a PTE to be present
148 * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
149 */
150static inline int pte_looks_normal(pte_t pte)
151{
152 return (pte_val(pte) &
153 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
154 (_PAGE_PRESENT);
155}
156
157#if defined(CONFIG_PPC_STD_MMU)
158/* Server-style MMU handles coherency when hashing if HW exec permission
159 * is supposed per page (currently 64-bit only). Else, we always flush
160 * valid PTEs in set_pte.
161 */
162static inline int pte_need_exec_flush(pte_t pte, int set_pte)
163{
164 return set_pte && pte_looks_normal(pte) &&
165 !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
166 cpu_has_feature(CPU_FTR_NOEXECUTE));
167}
168#elif _PAGE_HWEXEC == 0
169/* Embedded type MMU without HW exec support (8xx only so far), we flush
170 * the cache for any present PTE
171 */
172static inline int pte_need_exec_flush(pte_t pte, int set_pte)
173{
174 return set_pte && pte_looks_normal(pte);
175}
176#else
177/* Other embedded CPUs with HW exec support per-page, we flush on exec
178 * fault if HWEXEC is not set
179 */
180static inline int pte_need_exec_flush(pte_t pte, int set_pte)
181{
182 return pte_looks_normal(pte) && is_exec_fault() &&
183 !(pte_val(pte) & _PAGE_HWEXEC);
184}
185#endif
186
187/*
188 * set_pte stores a linux PTE into the linux page table.
189 */
190void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
191{
192#ifdef CONFIG_DEBUG_VM
193 WARN_ON(pte_present(*ptep));
194#endif
195 /* Note: mm->context.id might not yet have been assigned as
196 * this context might not have been activated yet when this
197 * is called.
198 */
199 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
200 if (pte_need_exec_flush(pte, 1))
201 pte = do_dcache_icache_coherency(pte);
202
203 /* Perform the setting of the PTE */
204 __set_pte_at(mm, addr, ptep, pte, 0);
205}
206
207/*
208 * This is called when relaxing access to a PTE. It's also called in the page
209 * fault path when we don't hit any of the major fault cases, ie, a minor
210 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
211 * handled those two for us, we additionally deal with missing execute
212 * permission here on some processors
213 */
214int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
215 pte_t *ptep, pte_t entry, int dirty)
216{
217 int changed;
218 if (!dirty && pte_need_exec_flush(entry, 0))
219 entry = do_dcache_icache_coherency(entry);
220 changed = !pte_same(*(ptep), entry);
221 if (changed) {
222 assert_pte_locked(vma->vm_mm, address);
223 __ptep_set_access_flags(ptep, entry);
224 flush_tlb_page_nohash(vma, address);
225 }
226 return changed;
227}
228
229#ifdef CONFIG_DEBUG_VM
230void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
231{
232 pgd_t *pgd;
233 pud_t *pud;
234 pmd_t *pmd;
235
236 if (mm == &init_mm)
237 return;
238 pgd = mm->pgd + pgd_index(addr);
239 BUG_ON(pgd_none(*pgd));
240 pud = pud_offset(pgd, addr);
241 BUG_ON(pud_none(*pud));
242 pmd = pmd_offset(pud, addr);
243 BUG_ON(!pmd_present(*pmd));
244 BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
245}
246#endif /* CONFIG_DEBUG_VM */
247
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 58bcaeba728d..430d0908fa50 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -129,7 +129,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
129void __iomem * 129void __iomem *
130ioremap(phys_addr_t addr, unsigned long size) 130ioremap(phys_addr_t addr, unsigned long size)
131{ 131{
132 return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); 132 return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
133 __builtin_return_address(0));
133} 134}
134EXPORT_SYMBOL(ioremap); 135EXPORT_SYMBOL(ioremap);
135 136
@@ -143,20 +144,27 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
143 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ 144 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
144 flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); 145 flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC);
145 146
146 return __ioremap(addr, size, flags); 147 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
147} 148}
148EXPORT_SYMBOL(ioremap_flags); 149EXPORT_SYMBOL(ioremap_flags);
149 150
150void __iomem * 151void __iomem *
151__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) 152__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
152{ 153{
154 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
155}
156
157void __iomem *
158__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
159 void *caller)
160{
153 unsigned long v, i; 161 unsigned long v, i;
154 phys_addr_t p; 162 phys_addr_t p;
155 int err; 163 int err;
156 164
157 /* Make sure we have the base flags */ 165 /* Make sure we have the base flags */
158 if ((flags & _PAGE_PRESENT) == 0) 166 if ((flags & _PAGE_PRESENT) == 0)
159 flags |= _PAGE_KERNEL; 167 flags |= PAGE_KERNEL;
160 168
161 /* Non-cacheable page cannot be coherent */ 169 /* Non-cacheable page cannot be coherent */
162 if (flags & _PAGE_NO_CACHE) 170 if (flags & _PAGE_NO_CACHE)
@@ -212,7 +220,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
212 220
213 if (mem_init_done) { 221 if (mem_init_done) {
214 struct vm_struct *area; 222 struct vm_struct *area;
215 area = get_vm_area(size, VM_IOREMAP); 223 area = get_vm_area_caller(size, VM_IOREMAP, caller);
216 if (area == 0) 224 if (area == 0)
217 return NULL; 225 return NULL;
218 v = (unsigned long) area->addr; 226 v = (unsigned long) area->addr;
@@ -288,7 +296,7 @@ void __init mapin_ram(void)
288 p = memstart_addr + s; 296 p = memstart_addr + s;
289 for (; s < total_lowmem; s += PAGE_SIZE) { 297 for (; s < total_lowmem; s += PAGE_SIZE) {
290 ktext = ((char *) v >= _stext && (char *) v < etext); 298 ktext = ((char *) v >= _stext && (char *) v < etext);
291 f = ktext ?_PAGE_RAM_TEXT : _PAGE_RAM; 299 f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
292 map_page(v, p, f); 300 map_page(v, p, f);
293#ifdef CONFIG_PPC_STD_MMU_32 301#ifdef CONFIG_PPC_STD_MMU_32
294 if (ktext) 302 if (ktext)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 365e61ae5dbc..bfa7db6b2fd5 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -144,8 +144,8 @@ void __iounmap_at(void *ea, unsigned long size)
144 unmap_kernel_range((unsigned long)ea, size); 144 unmap_kernel_range((unsigned long)ea, size);
145} 145}
146 146
147void __iomem * __ioremap(phys_addr_t addr, unsigned long size, 147void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
148 unsigned long flags) 148 unsigned long flags, void *caller)
149{ 149{
150 phys_addr_t paligned; 150 phys_addr_t paligned;
151 void __iomem *ret; 151 void __iomem *ret;
@@ -168,8 +168,9 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
168 if (mem_init_done) { 168 if (mem_init_done) {
169 struct vm_struct *area; 169 struct vm_struct *area;
170 170
171 area = __get_vm_area(size, VM_IOREMAP, 171 area = __get_vm_area_caller(size, VM_IOREMAP,
172 ioremap_bot, IOREMAP_END); 172 ioremap_bot, IOREMAP_END,
173 caller);
173 if (area == NULL) 174 if (area == NULL)
174 return NULL; 175 return NULL;
175 ret = __ioremap_at(paligned, area->addr, size, flags); 176 ret = __ioremap_at(paligned, area->addr, size, flags);
@@ -186,19 +187,27 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
186 return ret; 187 return ret;
187} 188}
188 189
190void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
191 unsigned long flags)
192{
193 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
194}
189 195
190void __iomem * ioremap(phys_addr_t addr, unsigned long size) 196void __iomem * ioremap(phys_addr_t addr, unsigned long size)
191{ 197{
192 unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED; 198 unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
199 void *caller = __builtin_return_address(0);
193 200
194 if (ppc_md.ioremap) 201 if (ppc_md.ioremap)
195 return ppc_md.ioremap(addr, size, flags); 202 return ppc_md.ioremap(addr, size, flags, caller);
196 return __ioremap(addr, size, flags); 203 return __ioremap_caller(addr, size, flags, caller);
197} 204}
198 205
199void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, 206void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
200 unsigned long flags) 207 unsigned long flags)
201{ 208{
209 void *caller = __builtin_return_address(0);
210
202 /* writeable implies dirty for kernel addresses */ 211 /* writeable implies dirty for kernel addresses */
203 if (flags & _PAGE_RW) 212 if (flags & _PAGE_RW)
204 flags |= _PAGE_DIRTY; 213 flags |= _PAGE_DIRTY;
@@ -207,8 +216,8 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
207 flags &= ~(_PAGE_USER | _PAGE_EXEC); 216 flags &= ~(_PAGE_USER | _PAGE_EXEC);
208 217
209 if (ppc_md.ioremap) 218 if (ppc_md.ioremap)
210 return ppc_md.ioremap(addr, size, flags); 219 return ppc_md.ioremap(addr, size, flags, caller);
211 return __ioremap(addr, size, flags); 220 return __ioremap_caller(addr, size, flags, caller);
212} 221}
213 222
214 223
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index fe65c405412c..2d2a87e10154 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -74,9 +74,6 @@ unsigned long p_mapped_by_bats(phys_addr_t pa)
74 74
75unsigned long __init mmu_mapin_ram(void) 75unsigned long __init mmu_mapin_ram(void)
76{ 76{
77#ifdef CONFIG_POWER4
78 return 0;
79#else
80 unsigned long tot, bl, done; 77 unsigned long tot, bl, done;
81 unsigned long max_size = (256<<20); 78 unsigned long max_size = (256<<20);
82 79
@@ -95,7 +92,7 @@ unsigned long __init mmu_mapin_ram(void)
95 break; 92 break;
96 } 93 }
97 94
98 setbat(2, PAGE_OFFSET, 0, bl, _PAGE_RAM); 95 setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
99 done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1; 96 done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
100 if ((done < tot) && !bat_addrs[3].limit) { 97 if ((done < tot) && !bat_addrs[3].limit) {
101 /* use BAT3 to cover a bit more */ 98 /* use BAT3 to cover a bit more */
@@ -103,12 +100,11 @@ unsigned long __init mmu_mapin_ram(void)
103 for (bl = 128<<10; bl < max_size; bl <<= 1) 100 for (bl = 128<<10; bl < max_size; bl <<= 1)
104 if (bl * 2 > tot) 101 if (bl * 2 > tot)
105 break; 102 break;
106 setbat(3, PAGE_OFFSET+done, done, bl, _PAGE_RAM); 103 setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
107 done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1; 104 done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
108 } 105 }
109 106
110 return done; 107 return done;
111#endif
112} 108}
113 109
114/* 110/*
@@ -136,9 +132,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
136 wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; 132 wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
137 bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ 133 bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
138 bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; 134 bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
139#ifndef CONFIG_KGDB /* want user access for breakpoints */
140 if (flags & _PAGE_USER) 135 if (flags & _PAGE_USER)
141#endif
142 bat[1].batu |= 1; /* Vp = 1 */ 136 bat[1].batu |= 1; /* Vp = 1 */
143 if (flags & _PAGE_GUARDED) { 137 if (flags & _PAGE_GUARDED) {
144 /* G bit must be zero in IBATs */ 138 /* G bit must be zero in IBATs */
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c931bc7d1079..1be1b5e59796 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -139,12 +139,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
139 */ 139 */
140void __flush_tlb_pending(struct ppc64_tlb_batch *batch) 140void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
141{ 141{
142 cpumask_t tmp; 142 const struct cpumask *tmp;
143 int i, local = 0; 143 int i, local = 0;
144 144
145 i = batch->index; 145 i = batch->index;
146 tmp = cpumask_of_cpu(smp_processor_id()); 146 tmp = cpumask_of(smp_processor_id());
147 if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) 147 if (cpumask_equal(mm_cpumask(batch->mm), tmp))
148 local = 1; 148 local = 1;
149 if (i == 1) 149 if (i == 1)
150 flush_hash_page(batch->vaddr[0], batch->pte[0], 150 flush_hash_page(batch->vaddr[0], batch->pte[0],
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 39ac22b13c73..7af72970faed 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -132,11 +132,11 @@ void flush_tlb_mm(struct mm_struct *mm)
132 pid = mm->context.id; 132 pid = mm->context.id;
133 if (unlikely(pid == MMU_NO_CONTEXT)) 133 if (unlikely(pid == MMU_NO_CONTEXT))
134 goto no_context; 134 goto no_context;
135 cpu_mask = mm->cpu_vm_mask; 135 if (!cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
136 cpu_clear(smp_processor_id(), cpu_mask);
137 if (!cpus_empty(cpu_mask)) {
138 struct tlb_flush_param p = { .pid = pid }; 136 struct tlb_flush_param p = { .pid = pid };
139 smp_call_function_mask(cpu_mask, do_flush_tlb_mm_ipi, &p, 1); 137 /* Ignores smp_processor_id() even if set. */
138 smp_call_function_many(mm_cpumask(mm),
139 do_flush_tlb_mm_ipi, &p, 1);
140 } 140 }
141 _tlbil_pid(pid); 141 _tlbil_pid(pid);
142 no_context: 142 no_context:
@@ -146,16 +146,15 @@ EXPORT_SYMBOL(flush_tlb_mm);
146 146
147void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 147void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
148{ 148{
149 cpumask_t cpu_mask; 149 struct cpumask *cpu_mask;
150 unsigned int pid; 150 unsigned int pid;
151 151
152 preempt_disable(); 152 preempt_disable();
153 pid = vma ? vma->vm_mm->context.id : 0; 153 pid = vma ? vma->vm_mm->context.id : 0;
154 if (unlikely(pid == MMU_NO_CONTEXT)) 154 if (unlikely(pid == MMU_NO_CONTEXT))
155 goto bail; 155 goto bail;
156 cpu_mask = vma->vm_mm->cpu_vm_mask; 156 cpu_mask = mm_cpumask(vma->vm_mm);
157 cpu_clear(smp_processor_id(), cpu_mask); 157 if (!cpumask_equal(cpu_mask, cpumask_of(smp_processor_id()))) {
158 if (!cpus_empty(cpu_mask)) {
159 /* If broadcast tlbivax is supported, use it */ 158 /* If broadcast tlbivax is supported, use it */
160 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { 159 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
161 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); 160 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
@@ -167,7 +166,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
167 goto bail; 166 goto bail;
168 } else { 167 } else {
169 struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; 168 struct tlb_flush_param p = { .pid = pid, .addr = vmaddr };
170 smp_call_function_mask(cpu_mask, 169 /* Ignores smp_processor_id() even if set in cpu_mask */
170 smp_call_function_many(cpu_mask,
171 do_flush_tlb_page_ipi, &p, 1); 171 do_flush_tlb_page_ipi, &p, 1);
172 } 172 }
173 } 173 }
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index f900a39e6ec4..788b87c36f77 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -118,25 +118,50 @@ _GLOBAL(_tlbil_pid)
118 118
119#elif defined(CONFIG_FSL_BOOKE) 119#elif defined(CONFIG_FSL_BOOKE)
120/* 120/*
121 * FSL BookE implementations. Currently _pid and _all are the 121 * FSL BookE implementations.
122 * same. This will change when tlbilx is actually supported and 122 *
123 * performs invalidate-by-PID. This change will be driven by 123 * Since feature sections are using _SECTION_ELSE we need
124 * mmu_features conditional 124 * to have the larger code path before the _SECTION_ELSE
125 */ 125 */
126 126
127#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
128 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
127/* 129/*
128 * Flush MMU TLB on the local processor 130 * Flush MMU TLB on the local processor
129 */ 131 */
130_GLOBAL(_tlbil_pid)
131_GLOBAL(_tlbil_all) 132_GLOBAL(_tlbil_all)
132#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ 133BEGIN_MMU_FTR_SECTION
133 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) 134 li r3,(MMUCSR0_TLBFI)@l
135 mtspr SPRN_MMUCSR0, r3
1361:
137 mfspr r3,SPRN_MMUCSR0
138 andi. r3,r3,MMUCSR0_TLBFI@l
139 bne 1b
140MMU_FTR_SECTION_ELSE
141 PPC_TLBILX_ALL(0,0)
142ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
143 msync
144 isync
145 blr
146
147_GLOBAL(_tlbil_pid)
148BEGIN_MMU_FTR_SECTION
149 slwi r3,r3,16
150 mfmsr r10
151 wrteei 0
152 mfspr r4,SPRN_MAS6 /* save MAS6 */
153 mtspr SPRN_MAS6,r3
154 PPC_TLBILX_PID(0,0)
155 mtspr SPRN_MAS6,r4 /* restore MAS6 */
156 wrtee r10
157MMU_FTR_SECTION_ELSE
134 li r3,(MMUCSR0_TLBFI)@l 158 li r3,(MMUCSR0_TLBFI)@l
135 mtspr SPRN_MMUCSR0, r3 159 mtspr SPRN_MMUCSR0, r3
1361: 1601:
137 mfspr r3,SPRN_MMUCSR0 161 mfspr r3,SPRN_MMUCSR0
138 andi. r3,r3,MMUCSR0_TLBFI@l 162 andi. r3,r3,MMUCSR0_TLBFI@l
139 bne 1b 163 bne 1b
164ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
140 msync 165 msync
141 isync 166 isync
142 blr 167 blr
@@ -149,7 +174,9 @@ _GLOBAL(_tlbil_va)
149 mfmsr r10 174 mfmsr r10
150 wrteei 0 175 wrteei 0
151 slwi r4,r4,16 176 slwi r4,r4,16
177 ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
152 mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ 178 mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
179BEGIN_MMU_FTR_SECTION
153 tlbsx 0,r3 180 tlbsx 0,r3
154 mfspr r4,SPRN_MAS1 /* check valid */ 181 mfspr r4,SPRN_MAS1 /* check valid */
155 andis. r3,r4,MAS1_VALID@h 182 andis. r3,r4,MAS1_VALID@h
@@ -157,6 +184,9 @@ _GLOBAL(_tlbil_va)
157 rlwinm r4,r4,0,1,31 184 rlwinm r4,r4,0,1,31
158 mtspr SPRN_MAS1,r4 185 mtspr SPRN_MAS1,r4
159 tlbwe 186 tlbwe
187MMU_FTR_SECTION_ELSE
188 PPC_TLBILX_VA(0,r3)
189ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
160 msync 190 msync
161 isync 191 isync
1621: wrtee r10 1921: wrtee r10