aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/powerpc/mm
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/40x_mmu.c17
-rw-r--r--arch/powerpc/mm/44x_mmu.c14
-rw-r--r--arch/powerpc/mm/Makefile6
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c20
-rw-r--r--arch/powerpc/mm/fault.c16
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c27
-rw-r--r--arch/powerpc/mm/gup.c12
-rw-r--r--arch/powerpc/mm/hash_low_64.S32
-rw-r--r--arch/powerpc/mm/hash_native_64.c18
-rw-r--r--arch/powerpc/mm/hash_utils_64.c103
-rw-r--r--arch/powerpc/mm/highmem.c37
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/init_32.c54
-rw-r--r--arch/powerpc/mm/init_64.c15
-rw-r--r--arch/powerpc/mm/mem.c115
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c214
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c36
-rw-r--r--arch/powerpc/mm/mmu_decl.h5
-rw-r--r--arch/powerpc/mm/numa.c361
-rw-r--r--arch/powerpc/mm/pgtable.c104
-rw-r--r--arch/powerpc/mm/pgtable_32.c15
-rw-r--r--arch/powerpc/mm/pgtable_64.c17
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c18
-rw-r--r--arch/powerpc/mm/slb.c10
-rw-r--r--arch/powerpc/mm/slb_low.S8
-rw-r--r--arch/powerpc/mm/stab.c2
-rw-r--r--arch/powerpc/mm/tlb_hash32.c3
-rw-r--r--arch/powerpc/mm/tlb_hash64.c11
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S11
-rw-r--r--arch/powerpc/mm/tlb_nohash.c75
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S37
31 files changed, 1020 insertions, 395 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 1dc2fa5ce1bd..5810967511d4 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/highmem.h> 37#include <linux/highmem.h>
38#include <linux/memblock.h>
38 39
39#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
40#include <asm/prom.h> 41#include <asm/prom.h>
@@ -47,6 +48,7 @@
47#include <asm/bootx.h> 48#include <asm/bootx.h>
48#include <asm/machdep.h> 49#include <asm/machdep.h>
49#include <asm/setup.h> 50#include <asm/setup.h>
51
50#include "mmu_decl.h" 52#include "mmu_decl.h"
51 53
52extern int __map_without_ltlbs; 54extern int __map_without_ltlbs;
@@ -139,8 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
139 * coverage with normal-sized pages (or other reasons) do not 141 * coverage with normal-sized pages (or other reasons) do not
140 * attempt to allocate outside the allowed range. 142 * attempt to allocate outside the allowed range.
141 */ 143 */
142 144 memblock_set_current_limit(mapped);
143 __initial_memory_limit_addr = memstart_addr + mapped;
144 145
145 return mapped; 146 return mapped;
146} 147}
148
149void setup_initial_memory_limit(phys_addr_t first_memblock_base,
150 phys_addr_t first_memblock_size)
151{
152 /* We don't currently support the first MEMBLOCK not mapping 0
153 * physical on those processors
154 */
155 BUG_ON(first_memblock_base != 0);
156
157 /* 40x can only access 16MB at the moment (see head_40x.S) */
158 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
159}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index d8c6efb32bc6..024acab588fd 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -24,6 +24,8 @@
24 */ 24 */
25 25
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/memblock.h>
28
27#include <asm/mmu.h> 29#include <asm/mmu.h>
28#include <asm/system.h> 30#include <asm/system.h>
29#include <asm/page.h> 31#include <asm/page.h>
@@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
213 return total_lowmem; 215 return total_lowmem;
214} 216}
215 217
218void setup_initial_memory_limit(phys_addr_t first_memblock_base,
219 phys_addr_t first_memblock_size)
220{
221 /* We don't currently support the first MEMBLOCK not mapping 0
222 * physical on those processors
223 */
224 BUG_ON(first_memblock_base != 0);
225
226 /* 44x has a 256M TLB entry pinned at boot */
227 memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE));
228}
229
216#ifdef CONFIG_SMP 230#ifdef CONFIG_SMP
217void __cpuinit mmu_init_secondary(int cpu) 231void __cpuinit mmu_init_secondary(int cpu)
218{ 232{
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index ce68708bbad5..bdca46e08382 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -4,9 +4,7 @@
4 4
5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ifeq ($(CONFIG_PPC64),y) 7ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
8EXTRA_CFLAGS += -mno-minimal-toc
9endif
10 8
11obj-y := fault.o mem.o pgtable.o gup.o \ 9obj-y := fault.o mem.o pgtable.o gup.o \
12 init_$(CONFIG_WORD_SIZE).o \ 10 init_$(CONFIG_WORD_SIZE).o \
@@ -25,7 +23,7 @@ obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
25 mmu_context_hash$(CONFIG_WORD_SIZE).o 23 mmu_context_hash$(CONFIG_WORD_SIZE).o
26obj-$(CONFIG_40x) += 40x_mmu.o 24obj-$(CONFIG_40x) += 40x_mmu.o
27obj-$(CONFIG_44x) += 44x_mmu.o 25obj-$(CONFIG_44x) += 44x_mmu.o
28obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o 26obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
29obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 27obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
30obj-$(CONFIG_PPC_MM_SLICES) += slice.o 28obj-$(CONFIG_PPC_MM_SLICES) += slice.o
31ifeq ($(CONFIG_HUGETLB_PAGE),y) 29ifeq ($(CONFIG_HUGETLB_PAGE),y)
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 757c0bed9a91..b42f76c4948d 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -399,3 +399,23 @@ void __dma_sync_page(struct page *page, unsigned long offset,
399#endif 399#endif
400} 400}
401EXPORT_SYMBOL(__dma_sync_page); 401EXPORT_SYMBOL(__dma_sync_page);
402
403/*
404 * Return the PFN for a given cpu virtual address returned by
405 * __dma_alloc_coherent. This is used by dma_mmap_coherent()
406 */
407unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
408{
409 /* This should always be populated, so we don't test every
410 * level. If that fails, we'll have a nice crash which
411 * will be as good as a BUG_ON()
412 */
413 pgd_t *pgd = pgd_offset_k(cpu_addr);
414 pud_t *pud = pud_offset(pgd, cpu_addr);
415 pmd_t *pmd = pmd_offset(pud, cpu_addr);
416 pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
417
418 if (pte_none(*ptep) || !pte_present(*ptep))
419 return 0;
420 return pte_pfn(*ptep);
421}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1bd712c33ce2..ad35f66c69e8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,6 +30,8 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/magic.h>
34#include <linux/ratelimit.h>
33 35
34#include <asm/firmware.h> 36#include <asm/firmware.h>
35#include <asm/page.h> 37#include <asm/page.h>
@@ -345,11 +347,10 @@ bad_area_nosemaphore:
345 return 0; 347 return 0;
346 } 348 }
347 349
348 if (is_exec && (error_code & DSISR_PROTFAULT) 350 if (is_exec && (error_code & DSISR_PROTFAULT))
349 && printk_ratelimit()) 351 printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
350 printk(KERN_CRIT "kernel tried to execute NX-protected" 352 " page (%lx) - exploit attempt? (uid: %d)\n",
351 " page (%lx) - exploit attempt? (uid: %d)\n", 353 address, current_uid());
352 address, current_uid());
353 354
354 return SIGSEGV; 355 return SIGSEGV;
355 356
@@ -385,6 +386,7 @@ do_sigbus:
385void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) 386void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
386{ 387{
387 const struct exception_table_entry *entry; 388 const struct exception_table_entry *entry;
389 unsigned long *stackend;
388 390
389 /* Are we prepared to handle this fault? */ 391 /* Are we prepared to handle this fault? */
390 if ((entry = search_exception_tables(regs->nip)) != NULL) { 392 if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -413,5 +415,9 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
413 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", 415 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
414 regs->nip); 416 regs->nip);
415 417
418 stackend = end_of_stack(current);
419 if (current != &init_task && *stackend != STACK_END_MAGIC)
420 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
421
416 die("Kernel access of bad area", regs, sig); 422 die("Kernel access of bad area", regs, sig);
417} 423}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 4b66a1ece6d8..f7802c8bba0a 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -40,6 +40,7 @@
40#include <linux/init.h> 40#include <linux/init.h>
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/memblock.h>
43 44
44#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
45#include <asm/prom.h> 46#include <asm/prom.h>
@@ -56,11 +57,6 @@
56 57
57unsigned int tlbcam_index; 58unsigned int tlbcam_index;
58 59
59
60#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
61#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
62#endif
63
64#define NUM_TLBCAMS (64) 60#define NUM_TLBCAMS (64)
65struct tlbcam TLBCAM[NUM_TLBCAMS]; 61struct tlbcam TLBCAM[NUM_TLBCAMS];
66 62
@@ -137,7 +133,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
137 if (mmu_has_feature(MMU_FTR_BIG_PHYS)) 133 if (mmu_has_feature(MMU_FTR_BIG_PHYS))
138 TLBCAM[index].MAS7 = (u64)phys >> 32; 134 TLBCAM[index].MAS7 = (u64)phys >> 32;
139 135
140 if (flags & _PAGE_USER) { 136 /* Below is unlikely -- only for large user pages or similar */
137 if (pte_user(flags)) {
141 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; 138 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
142 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); 139 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
143 } 140 }
@@ -184,6 +181,12 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
184 return amount_mapped; 181 return amount_mapped;
185} 182}
186 183
184#ifdef CONFIG_PPC32
185
186#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
187#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
188#endif
189
187unsigned long __init mmu_mapin_ram(unsigned long top) 190unsigned long __init mmu_mapin_ram(unsigned long top)
188{ 191{
189 return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; 192 return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
@@ -213,5 +216,15 @@ void __init adjust_total_lowmem(void)
213 pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, 216 pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
214 (unsigned int)((total_lowmem - __max_low_memory) >> 20)); 217 (unsigned int)((total_lowmem - __max_low_memory) >> 20));
215 218
216 __initial_memory_limit_addr = memstart_addr + __max_low_memory; 219 memblock_set_current_limit(memstart_addr + __max_low_memory);
217} 220}
221
222void setup_initial_memory_limit(phys_addr_t first_memblock_base,
223 phys_addr_t first_memblock_size)
224{
225 phys_addr_t limit = first_memblock_base + first_memblock_size;
226
227 /* 64M mapped initially according to head_fsl_booke.S */
228 memblock_set_current_limit(min_t(u64, limit, 0x04000000));
229}
230#endif
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index d7efdbf640c7..fec13200868f 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -16,6 +16,16 @@
16 16
17#ifdef __HAVE_ARCH_PTE_SPECIAL 17#ifdef __HAVE_ARCH_PTE_SPECIAL
18 18
19static inline void get_huge_page_tail(struct page *page)
20{
21 /*
22 * __split_huge_page_refcount() cannot run
23 * from under us.
24 */
25 VM_BUG_ON(atomic_read(&page->_count) < 0);
26 atomic_inc(&page->_count);
27}
28
19/* 29/*
20 * The performance critical leaf functions are made noinline otherwise gcc 30 * The performance critical leaf functions are made noinline otherwise gcc
21 * inlines everything into a single function which results in too much 31 * inlines everything into a single function which results in too much
@@ -47,6 +57,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
47 put_page(page); 57 put_page(page);
48 return 0; 58 return 0;
49 } 59 }
60 if (PageTail(page))
61 get_huge_page_tail(page);
50 pages[*nr] = page; 62 pages[*nr] = page;
51 (*nr)++; 63 (*nr)++;
52 64
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 3079f6b44cf5..a242b5d7cbe4 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K)
118BEGIN_FTR_SECTION 118BEGIN_FTR_SECTION
119 cmpdi r9,0 /* check segment size */ 119 cmpdi r9,0 /* check segment size */
120 bne 3f 120 bne 3f
121END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) 121END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
122 /* Calc va and put it in r29 */ 122 /* Calc va and put it in r29 */
123 rldicr r29,r5,28,63-28 123 rldicr r29,r5,28,63-28
124 rldicl r3,r3,0,36 124 rldicl r3,r3,0,36
@@ -192,8 +192,8 @@ htab_insert_pte:
192 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */ 192 rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
193 193
194 /* Call ppc_md.hpte_insert */ 194 /* Call ppc_md.hpte_insert */
195 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 195 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
196 mr r4,r29 /* Retreive va */ 196 mr r4,r29 /* Retrieve va */
197 li r7,0 /* !bolted, !secondary */ 197 li r7,0 /* !bolted, !secondary */
198 li r8,MMU_PAGE_4K /* page size */ 198 li r8,MMU_PAGE_4K /* page size */
199 ld r9,STK_PARM(r9)(r1) /* segment size */ 199 ld r9,STK_PARM(r9)(r1) /* segment size */
@@ -215,8 +215,8 @@ _GLOBAL(htab_call_hpte_insert1)
215 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 215 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
216 216
217 /* Call ppc_md.hpte_insert */ 217 /* Call ppc_md.hpte_insert */
218 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 218 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
219 mr r4,r29 /* Retreive va */ 219 mr r4,r29 /* Retrieve va */
220 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 220 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
221 li r8,MMU_PAGE_4K /* page size */ 221 li r8,MMU_PAGE_4K /* page size */
222 ld r9,STK_PARM(r9)(r1) /* segment size */ 222 ld r9,STK_PARM(r9)(r1) /* segment size */
@@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K)
401BEGIN_FTR_SECTION 401BEGIN_FTR_SECTION
402 cmpdi r9,0 /* check segment size */ 402 cmpdi r9,0 /* check segment size */
403 bne 3f 403 bne 3f
404END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) 404END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
405 /* Calc va and put it in r29 */ 405 /* Calc va and put it in r29 */
406 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ 406 rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
407 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ 407 rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
@@ -495,8 +495,8 @@ htab_special_pfn:
495 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 495 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
496 496
497 /* Call ppc_md.hpte_insert */ 497 /* Call ppc_md.hpte_insert */
498 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 498 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
499 mr r4,r29 /* Retreive va */ 499 mr r4,r29 /* Retrieve va */
500 li r7,0 /* !bolted, !secondary */ 500 li r7,0 /* !bolted, !secondary */
501 li r8,MMU_PAGE_4K /* page size */ 501 li r8,MMU_PAGE_4K /* page size */
502 ld r9,STK_PARM(r9)(r1) /* segment size */ 502 ld r9,STK_PARM(r9)(r1) /* segment size */
@@ -522,8 +522,8 @@ _GLOBAL(htab_call_hpte_insert1)
522 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 522 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
523 523
524 /* Call ppc_md.hpte_insert */ 524 /* Call ppc_md.hpte_insert */
525 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 525 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
526 mr r4,r29 /* Retreive va */ 526 mr r4,r29 /* Retrieve va */
527 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 527 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
528 li r8,MMU_PAGE_4K /* page size */ 528 li r8,MMU_PAGE_4K /* page size */
529 ld r9,STK_PARM(r9)(r1) /* segment size */ 529 ld r9,STK_PARM(r9)(r1) /* segment size */
@@ -715,7 +715,7 @@ BEGIN_FTR_SECTION
715 andi. r0,r31,_PAGE_NO_CACHE 715 andi. r0,r31,_PAGE_NO_CACHE
716 /* If so, bail out and refault as a 4k page */ 716 /* If so, bail out and refault as a 4k page */
717 bne- ht64_bail_ok 717 bne- ht64_bail_ok
718END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) 718END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
719 /* Prepare new PTE value (turn access RW into DIRTY, then 719 /* Prepare new PTE value (turn access RW into DIRTY, then
720 * add BUSY and ACCESSED) 720 * add BUSY and ACCESSED)
721 */ 721 */
@@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
736BEGIN_FTR_SECTION 736BEGIN_FTR_SECTION
737 cmpdi r9,0 /* check segment size */ 737 cmpdi r9,0 /* check segment size */
738 bne 3f 738 bne 3f
739END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) 739END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
740 /* Calc va and put it in r29 */ 740 /* Calc va and put it in r29 */
741 rldicr r29,r5,28,63-28 741 rldicr r29,r5,28,63-28
742 rldicl r3,r3,0,36 742 rldicl r3,r3,0,36
@@ -813,8 +813,8 @@ ht64_insert_pte:
813 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ 813 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
814 814
815 /* Call ppc_md.hpte_insert */ 815 /* Call ppc_md.hpte_insert */
816 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 816 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
817 mr r4,r29 /* Retreive va */ 817 mr r4,r29 /* Retrieve va */
818 li r7,0 /* !bolted, !secondary */ 818 li r7,0 /* !bolted, !secondary */
819 li r8,MMU_PAGE_64K 819 li r8,MMU_PAGE_64K
820 ld r9,STK_PARM(r9)(r1) /* segment size */ 820 ld r9,STK_PARM(r9)(r1) /* segment size */
@@ -836,8 +836,8 @@ _GLOBAL(ht64_call_hpte_insert1)
836 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ 836 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
837 837
838 /* Call ppc_md.hpte_insert */ 838 /* Call ppc_md.hpte_insert */
839 ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ 839 ld r6,STK_PARM(r4)(r1) /* Retrieve new pp bits */
840 mr r4,r29 /* Retreive va */ 840 mr r4,r29 /* Retrieve va */
841 li r7,HPTE_V_SECONDARY /* !bolted, secondary */ 841 li r7,HPTE_V_SECONDARY /* !bolted, secondary */
842 li r8,MMU_PAGE_64K 842 li r8,MMU_PAGE_64K
843 ld r9,STK_PARM(r9)(r1) /* segment size */ 843 ld r9,STK_PARM(r9)(r1) /* segment size */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 784a400e0781..dfd764896db0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
50 case MMU_PAGE_4K: 50 case MMU_PAGE_4K:
51 va &= ~0xffful; 51 va &= ~0xffful;
52 va |= ssize << 8; 52 va |= ssize << 8;
53 asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), 53 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
54 %2) 54 : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
55 : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
56 : "memory"); 55 : "memory");
57 break; 56 break;
58 default: 57 default:
@@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
61 va |= penc << 12; 60 va |= penc << 12;
62 va |= ssize << 8; 61 va |= ssize << 8;
63 va |= 1; /* L */ 62 va |= 1; /* L */
64 asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), 63 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
65 %2) 64 : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
66 : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
67 : "memory"); 65 : "memory");
68 break; 66 break;
69 } 67 }
@@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
98 96
99static inline void tlbie(unsigned long va, int psize, int ssize, int local) 97static inline void tlbie(unsigned long va, int psize, int ssize, int local)
100{ 98{
101 unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL); 99 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
102 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 100 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
103 101
104 if (use_local) 102 if (use_local)
105 use_local = mmu_psize_defs[psize].tlbiel; 103 use_local = mmu_psize_defs[psize].tlbiel;
@@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local)
503 } pte_iterate_hashed_end(); 501 } pte_iterate_hashed_end();
504 } 502 }
505 503
506 if (cpu_has_feature(CPU_FTR_TLBIEL) && 504 if (mmu_has_feature(MMU_FTR_TLBIEL) &&
507 mmu_psize_defs[psize].tlbiel && local) { 505 mmu_psize_defs[psize].tlbiel && local) {
508 asm volatile("ptesync":::"memory"); 506 asm volatile("ptesync":::"memory");
509 for (i = 0; i < number; i++) { 507 for (i = 0; i < number; i++) {
@@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local)
517 } 515 }
518 asm volatile("ptesync":::"memory"); 516 asm volatile("ptesync":::"memory");
519 } else { 517 } else {
520 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); 518 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
521 519
522 if (lock_tlbie) 520 if (lock_tlbie)
523 raw_spin_lock(&native_tlbie_lock); 521 raw_spin_lock(&native_tlbie_lock);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 09dffe6efa46..26b2872b3d00 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -53,6 +53,7 @@
53#include <asm/sections.h> 53#include <asm/sections.h>
54#include <asm/spu.h> 54#include <asm/spu.h>
55#include <asm/udbg.h> 55#include <asm/udbg.h>
56#include <asm/code-patching.h>
56 57
57#ifdef DEBUG 58#ifdef DEBUG
58#define DBG(fmt...) udbg_printf(fmt) 59#define DBG(fmt...) udbg_printf(fmt)
@@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
258 for (; size >= 4; size -= 4, ++prop) { 259 for (; size >= 4; size -= 4, ++prop) {
259 if (prop[0] == 40) { 260 if (prop[0] == 40) {
260 DBG("1T segment support detected\n"); 261 DBG("1T segment support detected\n");
261 cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT; 262 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
262 return 1; 263 return 1;
263 } 264 }
264 } 265 }
265 cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B; 266 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
266 return 0; 267 return 0;
267} 268}
268 269
@@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
288 if (prop != NULL) { 289 if (prop != NULL) {
289 DBG("Page sizes from device-tree:\n"); 290 DBG("Page sizes from device-tree:\n");
290 size /= 4; 291 size /= 4;
291 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE); 292 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
292 while(size > 0) { 293 while(size > 0) {
293 unsigned int shift = prop[0]; 294 unsigned int shift = prop[0];
294 unsigned int slbenc = prop[1]; 295 unsigned int slbenc = prop[1];
@@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
316 break; 317 break;
317 case 0x18: 318 case 0x18:
318 idx = MMU_PAGE_16M; 319 idx = MMU_PAGE_16M;
319 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE; 320 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
320 break; 321 break;
321 case 0x22: 322 case 0x22:
322 idx = MMU_PAGE_16G; 323 idx = MMU_PAGE_16G;
@@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void)
411 * Not in the device-tree, let's fallback on known size 412 * Not in the device-tree, let's fallback on known size
412 * list for 16M capable GP & GR 413 * list for 16M capable GP & GR
413 */ 414 */
414 if (cpu_has_feature(CPU_FTR_16M_PAGE)) 415 if (mmu_has_feature(MMU_FTR_16M_PAGE))
415 memcpy(mmu_psize_defs, mmu_psize_defaults_gp, 416 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
416 sizeof(mmu_psize_defaults_gp)); 417 sizeof(mmu_psize_defaults_gp));
417 found: 418 found:
@@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void)
441 mmu_vmalloc_psize = MMU_PAGE_64K; 442 mmu_vmalloc_psize = MMU_PAGE_64K;
442 if (mmu_linear_psize == MMU_PAGE_4K) 443 if (mmu_linear_psize == MMU_PAGE_4K)
443 mmu_linear_psize = MMU_PAGE_64K; 444 mmu_linear_psize = MMU_PAGE_64K;
444 if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) { 445 if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
445 /* 446 /*
446 * Don't use 64k pages for ioremap on pSeries, since 447 * Don't use 64k pages for ioremap on pSeries, since
447 * that would stop us accessing the HEA ethernet. 448 * that would stop us accessing the HEA ethernet.
@@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end)
547} 548}
548#endif /* CONFIG_MEMORY_HOTPLUG */ 549#endif /* CONFIG_MEMORY_HOTPLUG */
549 550
550static inline void make_bl(unsigned int *insn_addr, void *func) 551#define FUNCTION_TEXT(A) ((*(unsigned long *)(A)))
551{
552 unsigned long funcp = *((unsigned long *)func);
553 int offset = funcp - (unsigned long)insn_addr;
554
555 *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
556 flush_icache_range((unsigned long)insn_addr, 4+
557 (unsigned long)insn_addr);
558}
559 552
560static void __init htab_finish_init(void) 553static void __init htab_finish_init(void)
561{ 554{
@@ -570,16 +563,33 @@ static void __init htab_finish_init(void)
570 extern unsigned int *ht64_call_hpte_remove; 563 extern unsigned int *ht64_call_hpte_remove;
571 extern unsigned int *ht64_call_hpte_updatepp; 564 extern unsigned int *ht64_call_hpte_updatepp;
572 565
573 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert); 566 patch_branch(ht64_call_hpte_insert1,
574 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert); 567 FUNCTION_TEXT(ppc_md.hpte_insert),
575 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove); 568 BRANCH_SET_LINK);
576 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp); 569 patch_branch(ht64_call_hpte_insert2,
570 FUNCTION_TEXT(ppc_md.hpte_insert),
571 BRANCH_SET_LINK);
572 patch_branch(ht64_call_hpte_remove,
573 FUNCTION_TEXT(ppc_md.hpte_remove),
574 BRANCH_SET_LINK);
575 patch_branch(ht64_call_hpte_updatepp,
576 FUNCTION_TEXT(ppc_md.hpte_updatepp),
577 BRANCH_SET_LINK);
578
577#endif /* CONFIG_PPC_HAS_HASH_64K */ 579#endif /* CONFIG_PPC_HAS_HASH_64K */
578 580
579 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 581 patch_branch(htab_call_hpte_insert1,
580 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 582 FUNCTION_TEXT(ppc_md.hpte_insert),
581 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 583 BRANCH_SET_LINK);
582 make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); 584 patch_branch(htab_call_hpte_insert2,
585 FUNCTION_TEXT(ppc_md.hpte_insert),
586 BRANCH_SET_LINK);
587 patch_branch(htab_call_hpte_remove,
588 FUNCTION_TEXT(ppc_md.hpte_remove),
589 BRANCH_SET_LINK);
590 patch_branch(htab_call_hpte_updatepp,
591 FUNCTION_TEXT(ppc_md.hpte_updatepp),
592 BRANCH_SET_LINK);
583} 593}
584 594
585static void __init htab_initialize(void) 595static void __init htab_initialize(void)
@@ -588,7 +598,7 @@ static void __init htab_initialize(void)
588 unsigned long pteg_count; 598 unsigned long pteg_count;
589 unsigned long prot; 599 unsigned long prot;
590 unsigned long base = 0, size = 0, limit; 600 unsigned long base = 0, size = 0, limit;
591 int i; 601 struct memblock_region *reg;
592 602
593 DBG(" -> htab_initialize()\n"); 603 DBG(" -> htab_initialize()\n");
594 604
@@ -598,7 +608,7 @@ static void __init htab_initialize(void)
598 /* Initialize page sizes */ 608 /* Initialize page sizes */
599 htab_init_page_sizes(); 609 htab_init_page_sizes();
600 610
601 if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { 611 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
602 mmu_kernel_ssize = MMU_SEGSIZE_1T; 612 mmu_kernel_ssize = MMU_SEGSIZE_1T;
603 mmu_highuser_ssize = MMU_SEGSIZE_1T; 613 mmu_highuser_ssize = MMU_SEGSIZE_1T;
604 printk(KERN_INFO "Using 1TB segments\n"); 614 printk(KERN_INFO "Using 1TB segments\n");
@@ -625,7 +635,7 @@ static void __init htab_initialize(void)
625 if (machine_is(cell)) 635 if (machine_is(cell))
626 limit = 0x80000000; 636 limit = 0x80000000;
627 else 637 else
628 limit = 0; 638 limit = MEMBLOCK_ALLOC_ANYWHERE;
629 639
630 table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); 640 table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
631 641
@@ -649,7 +659,7 @@ static void __init htab_initialize(void)
649#ifdef CONFIG_DEBUG_PAGEALLOC 659#ifdef CONFIG_DEBUG_PAGEALLOC
650 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; 660 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
651 linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, 661 linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
652 1, memblock.rmo_size)); 662 1, ppc64_rma_size));
653 memset(linear_map_hash_slots, 0, linear_map_hash_count); 663 memset(linear_map_hash_slots, 0, linear_map_hash_count);
654#endif /* CONFIG_DEBUG_PAGEALLOC */ 664#endif /* CONFIG_DEBUG_PAGEALLOC */
655 665
@@ -659,9 +669,9 @@ static void __init htab_initialize(void)
659 */ 669 */
660 670
661 /* create bolted the linear mapping in the hash table */ 671 /* create bolted the linear mapping in the hash table */
662 for (i=0; i < memblock.memory.cnt; i++) { 672 for_each_memblock(memory, reg) {
663 base = (unsigned long)__va(memblock.memory.region[i].base); 673 base = (unsigned long)__va(reg->base);
664 size = memblock.memory.region[i].size; 674 size = reg->size;
665 675
666 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", 676 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
667 base, size, prot); 677 base, size, prot);
@@ -696,7 +706,8 @@ static void __init htab_initialize(void)
696#endif /* CONFIG_U3_DART */ 706#endif /* CONFIG_U3_DART */
697 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 707 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
698 prot, mmu_linear_psize, mmu_kernel_ssize)); 708 prot, mmu_linear_psize, mmu_kernel_ssize));
699 } 709 }
710 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
700 711
701 /* 712 /*
702 * If we have a memory_limit and we've allocated TCEs then we need to 713 * If we have a memory_limit and we've allocated TCEs then we need to
@@ -738,7 +749,7 @@ void __init early_init_mmu(void)
738 749
739 /* Initialize stab / SLB management except on iSeries 750 /* Initialize stab / SLB management except on iSeries
740 */ 751 */
741 if (cpu_has_feature(CPU_FTR_SLB)) 752 if (mmu_has_feature(MMU_FTR_SLB))
742 slb_initialize(); 753 slb_initialize();
743 else if (!firmware_has_feature(FW_FEATURE_ISERIES)) 754 else if (!firmware_has_feature(FW_FEATURE_ISERIES))
744 stab_initialize(get_paca()->stab_real); 755 stab_initialize(get_paca()->stab_real);
@@ -752,10 +763,10 @@ void __cpuinit early_init_mmu_secondary(void)
752 mtspr(SPRN_SDR1, _SDR1); 763 mtspr(SPRN_SDR1, _SDR1);
753 764
754 /* Initialize STAB/SLB. We use a virtual address as it works 765 /* Initialize STAB/SLB. We use a virtual address as it works
755 * in real mode on pSeries and we want a virutal address on 766 * in real mode on pSeries and we want a virtual address on
756 * iSeries anyway 767 * iSeries anyway
757 */ 768 */
758 if (cpu_has_feature(CPU_FTR_SLB)) 769 if (mmu_has_feature(MMU_FTR_SLB))
759 slb_initialize(); 770 slb_initialize();
760 else 771 else
761 stab_initialize(get_paca()->stab_addr); 772 stab_initialize(get_paca()->stab_addr);
@@ -1069,7 +1080,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1069 unsigned long access, unsigned long trap) 1080 unsigned long access, unsigned long trap)
1070{ 1081{
1071 unsigned long vsid; 1082 unsigned long vsid;
1072 void *pgdir; 1083 pgd_t *pgdir;
1073 pte_t *ptep; 1084 pte_t *ptep;
1074 unsigned long flags; 1085 unsigned long flags;
1075 int rc, ssize, local = 0; 1086 int rc, ssize, local = 0;
@@ -1122,7 +1133,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1122 else 1133 else
1123#endif /* CONFIG_PPC_HAS_HASH_64K */ 1134#endif /* CONFIG_PPC_HAS_HASH_64K */
1124 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, 1135 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
1125 subpage_protection(pgdir, ea)); 1136 subpage_protection(mm, ea));
1126 1137
1127 /* Dump some info in case of hash insertion failure, they should 1138 /* Dump some info in case of hash insertion failure, they should
1128 * never happen so it is really useful to know if/when they do 1139 * never happen so it is really useful to know if/when they do
@@ -1247,3 +1258,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1247 local_irq_restore(flags); 1258 local_irq_restore(flags);
1248} 1259}
1249#endif /* CONFIG_DEBUG_PAGEALLOC */ 1260#endif /* CONFIG_DEBUG_PAGEALLOC */
1261
1262void setup_initial_memory_limit(phys_addr_t first_memblock_base,
1263 phys_addr_t first_memblock_size)
1264{
1265 /* We don't currently support the first MEMBLOCK not mapping 0
1266 * physical on those processors
1267 */
1268 BUG_ON(first_memblock_base != 0);
1269
1270 /* On LPAR systems, the first entry is our RMA region,
1271 * non-LPAR 64-bit hash MMU systems don't have a limitation
1272 * on real mode access, but using the first entry works well
1273 * enough. We also clamp it to 1G to avoid some funky things
1274 * such as RTAS bugs etc...
1275 */
1276 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
1277
1278 /* Finally limit subsequent allocations */
1279 memblock_set_current_limit(ppc64_rma_size);
1280}
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 857d4173f9c6..e7450bdbe83a 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -29,17 +29,17 @@
29 * be used in IRQ contexts, so in some (very limited) cases we need 29 * be used in IRQ contexts, so in some (very limited) cases we need
30 * it. 30 * it.
31 */ 31 */
32void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) 32void *kmap_atomic_prot(struct page *page, pgprot_t prot)
33{ 33{
34 unsigned int idx;
35 unsigned long vaddr; 34 unsigned long vaddr;
35 int idx, type;
36 36
37 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ 37 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
38 pagefault_disable(); 38 pagefault_disable();
39 if (!PageHighMem(page)) 39 if (!PageHighMem(page))
40 return page_address(page); 40 return page_address(page);
41 41
42 debug_kmap_atomic(type); 42 type = kmap_atomic_idx_push();
43 idx = type + KM_TYPE_NR*smp_processor_id(); 43 idx = type + KM_TYPE_NR*smp_processor_id();
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45#ifdef CONFIG_DEBUG_HIGHMEM 45#ifdef CONFIG_DEBUG_HIGHMEM
@@ -52,26 +52,35 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
52} 52}
53EXPORT_SYMBOL(kmap_atomic_prot); 53EXPORT_SYMBOL(kmap_atomic_prot);
54 54
55void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) 55void __kunmap_atomic(void *kvaddr)
56{ 56{
57#ifdef CONFIG_DEBUG_HIGHMEM
58 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 57 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
59 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); 58 int type;
60 59
61 if (vaddr < __fix_to_virt(FIX_KMAP_END)) { 60 if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
62 pagefault_enable(); 61 pagefault_enable();
63 return; 62 return;
64 } 63 }
65 64
66 BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); 65 type = kmap_atomic_idx();
67 66
68 /* 67#ifdef CONFIG_DEBUG_HIGHMEM
69 * force other mappings to Oops if they'll try to access 68 {
70 * this pte without first remap it 69 unsigned int idx;
71 */ 70
72 pte_clear(&init_mm, vaddr, kmap_pte-idx); 71 idx = type + KM_TYPE_NR * smp_processor_id();
73 local_flush_tlb_page(NULL, vaddr); 72 BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
73
74 /*
75 * force other mappings to Oops if they'll try to access
76 * this pte without first remap it
77 */
78 pte_clear(&init_mm, vaddr, kmap_pte-idx);
79 local_flush_tlb_page(NULL, vaddr);
80 }
74#endif 81#endif
82
83 kmap_atomic_idx_pop();
75 pagefault_enable(); 84 pagefault_enable();
76} 85}
77EXPORT_SYMBOL(kunmap_atomic_notypecheck); 86EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9bb249c3046e..0b9a5c1901b9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void)
529{ 529{
530 int psize; 530 int psize;
531 531
532 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 532 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
533 return -ENODEV; 533 return -ENODEV;
534 534
535 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 535 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6a6975dc2654..5de0f254dbb5 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -92,12 +92,6 @@ int __allow_ioremap_reserved;
92unsigned long __max_low_memory = MAX_LOW_MEM; 92unsigned long __max_low_memory = MAX_LOW_MEM;
93 93
94/* 94/*
95 * address of the limit of what is accessible with initial MMU setup -
96 * 256MB usually, but only 16MB on 601.
97 */
98phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000;
99
100/*
101 * Check for command-line options that affect what MMU_init will do. 95 * Check for command-line options that affect what MMU_init will do.
102 */ 96 */
103void MMU_setup(void) 97void MMU_setup(void)
@@ -126,13 +120,6 @@ void __init MMU_init(void)
126 if (ppc_md.progress) 120 if (ppc_md.progress)
127 ppc_md.progress("MMU:enter", 0x111); 121 ppc_md.progress("MMU:enter", 0x111);
128 122
129 /* 601 can only access 16MB at the moment */
130 if (PVR_VER(mfspr(SPRN_PVR)) == 1)
131 __initial_memory_limit_addr = 0x01000000;
132 /* 8xx can only access 8MB at the moment */
133 if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
134 __initial_memory_limit_addr = 0x00800000;
135
136 /* parse args from command line */ 123 /* parse args from command line */
137 MMU_setup(); 124 MMU_setup();
138 125
@@ -161,7 +148,7 @@ void __init MMU_init(void)
161 lowmem_end_addr = memstart_addr + total_lowmem; 148 lowmem_end_addr = memstart_addr + total_lowmem;
162#ifndef CONFIG_HIGHMEM 149#ifndef CONFIG_HIGHMEM
163 total_memory = total_lowmem; 150 total_memory = total_lowmem;
164 memblock_enforce_memory_limit(lowmem_end_addr); 151 memblock_enforce_memory_limit(total_lowmem);
165 memblock_analyze(); 152 memblock_analyze();
166#endif /* CONFIG_HIGHMEM */ 153#endif /* CONFIG_HIGHMEM */
167 } 154 }
@@ -190,20 +177,18 @@ void __init MMU_init(void)
190#ifdef CONFIG_BOOTX_TEXT 177#ifdef CONFIG_BOOTX_TEXT
191 btext_unmap(); 178 btext_unmap();
192#endif 179#endif
180
181 /* Shortly after that, the entire linear mapping will be available */
182 memblock_set_current_limit(lowmem_end_addr);
193} 183}
194 184
195/* This is only called until mem_init is done. */ 185/* This is only called until mem_init is done. */
196void __init *early_get_page(void) 186void __init *early_get_page(void)
197{ 187{
198 void *p; 188 if (init_bootmem_done)
199 189 return alloc_bootmem_pages(PAGE_SIZE);
200 if (init_bootmem_done) { 190 else
201 p = alloc_bootmem_pages(PAGE_SIZE); 191 return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
202 } else {
203 p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
204 __initial_memory_limit_addr));
205 }
206 return p;
207} 192}
208 193
209/* Free up now-unused memory */ 194/* Free up now-unused memory */
@@ -238,17 +223,16 @@ void free_initmem(void)
238#undef FREESEC 223#undef FREESEC
239} 224}
240 225
241#ifdef CONFIG_BLK_DEV_INITRD 226#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
242void free_initrd_mem(unsigned long start, unsigned long end) 227void setup_initial_memory_limit(phys_addr_t first_memblock_base,
228 phys_addr_t first_memblock_size)
243{ 229{
244 if (start < end) 230 /* We don't currently support the first MEMBLOCK not mapping 0
245 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); 231 * physical on those processors
246 for (; start < end; start += PAGE_SIZE) { 232 */
247 ClearPageReserved(virt_to_page(start)); 233 BUG_ON(first_memblock_base != 0);
248 init_page_count(virt_to_page(start));
249 free_page(start);
250 totalram_pages++;
251 }
252}
253#endif
254 234
235 /* 8xx can only access 8MB at the moment */
236 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
237}
238#endif /* CONFIG_8xx */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ace85fa74b29..f6dbb4c20e64 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -99,20 +99,6 @@ void free_initmem(void)
99 ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); 99 ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
100} 100}
101 101
102#ifdef CONFIG_BLK_DEV_INITRD
103void free_initrd_mem(unsigned long start, unsigned long end)
104{
105 if (start < end)
106 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
107 for (; start < end; start += PAGE_SIZE) {
108 ClearPageReserved(virt_to_page(start));
109 init_page_count(virt_to_page(start));
110 free_page(start);
111 totalram_pages++;
112 }
113}
114#endif
115
116static void pgd_ctor(void *addr) 102static void pgd_ctor(void *addr)
117{ 103{
118 memset(addr, 0, PGD_TABLE_SIZE); 104 memset(addr, 0, PGD_TABLE_SIZE);
@@ -330,3 +316,4 @@ int __meminit vmemmap_populate(struct page *start_page,
330 return 0; 316 return 0;
331} 317}
332#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 318#endif /* CONFIG_SPARSEMEM_VMEMMAP */
319
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a84a8d00005..29d4dde65c45 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,18 +82,11 @@ int page_is_ram(unsigned long pfn)
82 return pfn < max_pfn; 82 return pfn < max_pfn;
83#else 83#else
84 unsigned long paddr = (pfn << PAGE_SHIFT); 84 unsigned long paddr = (pfn << PAGE_SHIFT);
85 int i; 85 struct memblock_region *reg;
86 for (i=0; i < memblock.memory.cnt; i++) {
87 unsigned long base;
88 86
89 base = memblock.memory.region[i].base; 87 for_each_memblock(memory, reg)
90 88 if (paddr >= reg->base && paddr < (reg->base + reg->size))
91 if ((paddr >= base) &&
92 (paddr < (base + memblock.memory.region[i].size))) {
93 return 1; 89 return 1;
94 }
95 }
96
97 return 0; 90 return 0;
98#endif 91#endif
99} 92}
@@ -149,23 +142,19 @@ int
149walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, 142walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
150 void *arg, int (*func)(unsigned long, unsigned long, void *)) 143 void *arg, int (*func)(unsigned long, unsigned long, void *))
151{ 144{
152 struct memblock_property res; 145 struct memblock_region *reg;
153 unsigned long pfn, len; 146 unsigned long end_pfn = start_pfn + nr_pages;
154 u64 end; 147 unsigned long tstart, tend;
155 int ret = -1; 148 int ret = -1;
156 149
157 res.base = (u64) start_pfn << PAGE_SHIFT; 150 for_each_memblock(memory, reg) {
158 res.size = (u64) nr_pages << PAGE_SHIFT; 151 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
159 152 tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
160 end = res.base + res.size - 1; 153 if (tstart >= tend)
161 while ((res.base < end) && (memblock_find(&res) >= 0)) { 154 continue;
162 pfn = (unsigned long)(res.base >> PAGE_SHIFT); 155 ret = (*func)(tstart, tend - tstart, arg);
163 len = (unsigned long)(res.size >> PAGE_SHIFT);
164 ret = (*func)(pfn, len, arg);
165 if (ret) 156 if (ret)
166 break; 157 break;
167 res.base += (res.size + 1);
168 res.size = (end - res.base + 1);
169 } 158 }
170 return ret; 159 return ret;
171} 160}
@@ -179,9 +168,9 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range);
179#ifndef CONFIG_NEED_MULTIPLE_NODES 168#ifndef CONFIG_NEED_MULTIPLE_NODES
180void __init do_init_bootmem(void) 169void __init do_init_bootmem(void)
181{ 170{
182 unsigned long i;
183 unsigned long start, bootmap_pages; 171 unsigned long start, bootmap_pages;
184 unsigned long total_pages; 172 unsigned long total_pages;
173 struct memblock_region *reg;
185 int boot_mapsize; 174 int boot_mapsize;
186 175
187 max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 176 max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -204,10 +193,10 @@ void __init do_init_bootmem(void)
204 boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); 193 boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
205 194
206 /* Add active regions with valid PFNs */ 195 /* Add active regions with valid PFNs */
207 for (i = 0; i < memblock.memory.cnt; i++) { 196 for_each_memblock(memory, reg) {
208 unsigned long start_pfn, end_pfn; 197 unsigned long start_pfn, end_pfn;
209 start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; 198 start_pfn = memblock_region_memory_base_pfn(reg);
210 end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); 199 end_pfn = memblock_region_memory_end_pfn(reg);
211 add_active_range(0, start_pfn, end_pfn); 200 add_active_range(0, start_pfn, end_pfn);
212 } 201 }
213 202
@@ -218,29 +207,21 @@ void __init do_init_bootmem(void)
218 free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); 207 free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
219 208
220 /* reserve the sections we're already using */ 209 /* reserve the sections we're already using */
221 for (i = 0; i < memblock.reserved.cnt; i++) { 210 for_each_memblock(reserved, reg) {
222 unsigned long addr = memblock.reserved.region[i].base + 211 unsigned long top = reg->base + reg->size - 1;
223 memblock_size_bytes(&memblock.reserved, i) - 1; 212 if (top < lowmem_end_addr)
224 if (addr < lowmem_end_addr) 213 reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
225 reserve_bootmem(memblock.reserved.region[i].base, 214 else if (reg->base < lowmem_end_addr) {
226 memblock_size_bytes(&memblock.reserved, i), 215 unsigned long trunc_size = lowmem_end_addr - reg->base;
227 BOOTMEM_DEFAULT); 216 reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
228 else if (memblock.reserved.region[i].base < lowmem_end_addr) {
229 unsigned long adjusted_size = lowmem_end_addr -
230 memblock.reserved.region[i].base;
231 reserve_bootmem(memblock.reserved.region[i].base,
232 adjusted_size, BOOTMEM_DEFAULT);
233 } 217 }
234 } 218 }
235#else 219#else
236 free_bootmem_with_active_regions(0, max_pfn); 220 free_bootmem_with_active_regions(0, max_pfn);
237 221
238 /* reserve the sections we're already using */ 222 /* reserve the sections we're already using */
239 for (i = 0; i < memblock.reserved.cnt; i++) 223 for_each_memblock(reserved, reg)
240 reserve_bootmem(memblock.reserved.region[i].base, 224 reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
241 memblock_size_bytes(&memblock.reserved, i),
242 BOOTMEM_DEFAULT);
243
244#endif 225#endif
245 /* XXX need to clip this if using highmem? */ 226 /* XXX need to clip this if using highmem? */
246 sparse_memory_present_with_active_regions(0); 227 sparse_memory_present_with_active_regions(0);
@@ -251,22 +232,15 @@ void __init do_init_bootmem(void)
251/* mark pages that don't exist as nosave */ 232/* mark pages that don't exist as nosave */
252static int __init mark_nonram_nosave(void) 233static int __init mark_nonram_nosave(void)
253{ 234{
254 unsigned long memblock_next_region_start_pfn, 235 struct memblock_region *reg, *prev = NULL;
255 memblock_region_max_pfn; 236
256 int i; 237 for_each_memblock(memory, reg) {
257 238 if (prev &&
258 for (i = 0; i < memblock.memory.cnt - 1; i++) { 239 memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
259 memblock_region_max_pfn = 240 register_nosave_region(memblock_region_memory_end_pfn(prev),
260 (memblock.memory.region[i].base >> PAGE_SHIFT) + 241 memblock_region_memory_base_pfn(reg));
261 (memblock.memory.region[i].size >> PAGE_SHIFT); 242 prev = reg;
262 memblock_next_region_start_pfn =
263 memblock.memory.region[i+1].base >> PAGE_SHIFT;
264
265 if (memblock_region_max_pfn < memblock_next_region_start_pfn)
266 register_nosave_region(memblock_region_max_pfn,
267 memblock_next_region_start_pfn);
268 } 243 }
269
270 return 0; 244 return 0;
271} 245}
272 246
@@ -327,7 +301,7 @@ void __init mem_init(void)
327 swiotlb_init(1); 301 swiotlb_init(1);
328#endif 302#endif
329 303
330 num_physpages = memblock.memory.size >> PAGE_SHIFT; 304 num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT;
331 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 305 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
332 306
333#ifdef CONFIG_NEED_MULTIPLE_NODES 307#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -408,6 +382,25 @@ void __init mem_init(void)
408 mem_init_done = 1; 382 mem_init_done = 1;
409} 383}
410 384
385#ifdef CONFIG_BLK_DEV_INITRD
386void __init free_initrd_mem(unsigned long start, unsigned long end)
387{
388 if (start >= end)
389 return;
390
391 start = _ALIGN_DOWN(start, PAGE_SIZE);
392 end = _ALIGN_UP(end, PAGE_SIZE);
393 pr_info("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
394
395 for (; start < end; start += PAGE_SIZE) {
396 ClearPageReserved(virt_to_page(start));
397 init_page_count(virt_to_page(start));
398 free_page(start);
399 totalram_pages++;
400 }
401}
402#endif
403
411/* 404/*
412 * This is called when a page has been modified by the kernel. 405 * This is called when a page has been modified by the kernel.
413 * It just marks the page as not i-cache clean. We do the i-cache 406 * It just marks the page as not i-cache clean. We do the i-cache
@@ -450,7 +443,7 @@ void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
450 clear_page(page); 443 clear_page(page);
451 444
452 /* 445 /*
453 * We shouldnt have to do this, but some versions of glibc 446 * We shouldn't have to do this, but some versions of glibc
454 * require it (ld.so assumes zero filled pages are icache clean) 447 * require it (ld.so assumes zero filled pages are icache clean)
455 * - Anton 448 * - Anton
456 */ 449 */
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 2535828aa84b..3bafc3deca6d 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -20,9 +20,205 @@
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <linux/slab.h>
23 24
24#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
25 26
27#ifdef CONFIG_PPC_ICSWX
28/*
29 * The processor and its L2 cache cause the icswx instruction to
30 * generate a COP_REQ transaction on PowerBus. The transaction has
31 * no address, and the processor does not perform an MMU access
32 * to authenticate the transaction. The command portion of the
33 * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
34 * the coprocessor Process ID (PID), which the coprocessor compares
35 * to the authorized LPID and PID held in the coprocessor, to determine
36 * if the process is authorized to generate the transaction.
37 * The data of the COP_REQ transaction is 128-byte or less and is
38 * placed in cacheable memory on a 128-byte cache line boundary.
39 *
40 * The task to use a coprocessor should use use_cop() to allocate
41 * a coprocessor PID before executing icswx instruction. use_cop()
42 * also enables the coprocessor context switching. Drop_cop() is
43 * used to free the coprocessor PID.
44 *
45 * Example:
46 * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
47 * Each HFI have multiple windows. Each HFI window serves as a
48 * network device sending to and receiving from HFI network.
49 * HFI immediate send function uses icswx instruction. The immediate
50 * send function allows small (single cache-line) packets be sent
51 * without using the regular HFI send FIFO and doorbell, which are
52 * much slower than immediate send.
53 *
54 * For each task intending to use HFI immediate send, the HFI driver
55 * calls use_cop() to obtain a coprocessor PID for the task.
56 * The HFI driver then allocate a free HFI window and save the
57 * coprocessor PID to the HFI window to allow the task to use the
58 * HFI window.
59 *
60 * The HFI driver repeatedly creates immediate send packets and
61 * issues icswx instruction to send data through the HFI window.
62 * The HFI compares the coprocessor PID in the CPU PID register
63 * to the PID held in the HFI window to determine if the transaction
64 * is allowed.
65 *
66 * When the task to release the HFI window, the HFI driver calls
67 * drop_cop() to release the coprocessor PID.
68 */
69
70#define COP_PID_NONE 0
71#define COP_PID_MIN (COP_PID_NONE + 1)
72#define COP_PID_MAX (0xFFFF)
73
74static DEFINE_SPINLOCK(mmu_context_acop_lock);
75static DEFINE_IDA(cop_ida);
76
77void switch_cop(struct mm_struct *next)
78{
79 mtspr(SPRN_PID, next->context.cop_pid);
80 mtspr(SPRN_ACOP, next->context.acop);
81}
82
83static int new_cop_pid(struct ida *ida, int min_id, int max_id,
84 spinlock_t *lock)
85{
86 int index;
87 int err;
88
89again:
90 if (!ida_pre_get(ida, GFP_KERNEL))
91 return -ENOMEM;
92
93 spin_lock(lock);
94 err = ida_get_new_above(ida, min_id, &index);
95 spin_unlock(lock);
96
97 if (err == -EAGAIN)
98 goto again;
99 else if (err)
100 return err;
101
102 if (index > max_id) {
103 spin_lock(lock);
104 ida_remove(ida, index);
105 spin_unlock(lock);
106 return -ENOMEM;
107 }
108
109 return index;
110}
111
112static void sync_cop(void *arg)
113{
114 struct mm_struct *mm = arg;
115
116 if (mm == current->active_mm)
117 switch_cop(current->active_mm);
118}
119
120/**
121 * Start using a coprocessor.
122 * @acop: mask of coprocessor to be used.
123 * @mm: The mm the coprocessor to associate with. Most likely current mm.
124 *
125 * Return a positive PID if successful. Negative errno otherwise.
126 * The returned PID will be fed to the coprocessor to determine if an
127 * icswx transaction is authenticated.
128 */
129int use_cop(unsigned long acop, struct mm_struct *mm)
130{
131 int ret;
132
133 if (!cpu_has_feature(CPU_FTR_ICSWX))
134 return -ENODEV;
135
136 if (!mm || !acop)
137 return -EINVAL;
138
139 /* We need to make sure mm_users doesn't change */
140 down_read(&mm->mmap_sem);
141 spin_lock(mm->context.cop_lockp);
142
143 if (mm->context.cop_pid == COP_PID_NONE) {
144 ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
145 &mmu_context_acop_lock);
146 if (ret < 0)
147 goto out;
148
149 mm->context.cop_pid = ret;
150 }
151 mm->context.acop |= acop;
152
153 sync_cop(mm);
154
155 /*
156 * If this is a threaded process then there might be other threads
157 * running. We need to send an IPI to force them to pick up any
158 * change in PID and ACOP.
159 */
160 if (atomic_read(&mm->mm_users) > 1)
161 smp_call_function(sync_cop, mm, 1);
162
163 ret = mm->context.cop_pid;
164
165out:
166 spin_unlock(mm->context.cop_lockp);
167 up_read(&mm->mmap_sem);
168
169 return ret;
170}
171EXPORT_SYMBOL_GPL(use_cop);
172
173/**
174 * Stop using a coprocessor.
175 * @acop: mask of coprocessor to be stopped.
176 * @mm: The mm the coprocessor associated with.
177 */
178void drop_cop(unsigned long acop, struct mm_struct *mm)
179{
180 int free_pid = COP_PID_NONE;
181
182 if (!cpu_has_feature(CPU_FTR_ICSWX))
183 return;
184
185 if (WARN_ON_ONCE(!mm))
186 return;
187
188 /* We need to make sure mm_users doesn't change */
189 down_read(&mm->mmap_sem);
190 spin_lock(mm->context.cop_lockp);
191
192 mm->context.acop &= ~acop;
193
194 if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
195 free_pid = mm->context.cop_pid;
196 mm->context.cop_pid = COP_PID_NONE;
197 }
198
199 sync_cop(mm);
200
201 /*
202 * If this is a threaded process then there might be other threads
203 * running. We need to send an IPI to force them to pick up any
204 * change in PID and ACOP.
205 */
206 if (atomic_read(&mm->mm_users) > 1)
207 smp_call_function(sync_cop, mm, 1);
208
209 if (free_pid != COP_PID_NONE) {
210 spin_lock(&mmu_context_acop_lock);
211 ida_remove(&cop_ida, free_pid);
212 spin_unlock(&mmu_context_acop_lock);
213 }
214
215 spin_unlock(mm->context.cop_lockp);
216 up_read(&mm->mmap_sem);
217}
218EXPORT_SYMBOL_GPL(drop_cop);
219
220#endif /* CONFIG_PPC_ICSWX */
221
26static DEFINE_SPINLOCK(mmu_context_lock); 222static DEFINE_SPINLOCK(mmu_context_lock);
27static DEFINE_IDA(mmu_context_ida); 223static DEFINE_IDA(mmu_context_ida);
28 224
@@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida);
31 * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, 227 * Each segment contains 2^28 bytes. Each context maps 2^44 bytes,
32 * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). 228 * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
33 */ 229 */
34#define NO_CONTEXT 0
35#define MAX_CONTEXT ((1UL << 19) - 1) 230#define MAX_CONTEXT ((1UL << 19) - 1)
36 231
37int __init_new_context(void) 232int __init_new_context(void)
@@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
79 slice_set_user_psize(mm, mmu_virtual_psize); 274 slice_set_user_psize(mm, mmu_virtual_psize);
80 subpage_prot_init_new_context(mm); 275 subpage_prot_init_new_context(mm);
81 mm->context.id = index; 276 mm->context.id = index;
277#ifdef CONFIG_PPC_ICSWX
278 mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
279 if (!mm->context.cop_lockp) {
280 __destroy_context(index);
281 subpage_prot_free(mm);
282 mm->context.id = MMU_NO_CONTEXT;
283 return -ENOMEM;
284 }
285 spin_lock_init(mm->context.cop_lockp);
286#endif /* CONFIG_PPC_ICSWX */
82 287
83 return 0; 288 return 0;
84} 289}
@@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context);
93 298
94void destroy_context(struct mm_struct *mm) 299void destroy_context(struct mm_struct *mm)
95{ 300{
301#ifdef CONFIG_PPC_ICSWX
302 drop_cop(mm->context.acop, mm);
303 kfree(mm->context.cop_lockp);
304 mm->context.cop_lockp = NULL;
305#endif /* CONFIG_PPC_ICSWX */
96 __destroy_context(mm->context.id); 306 __destroy_context(mm->context.id);
97 subpage_prot_free(mm); 307 subpage_prot_free(mm);
98 mm->context.id = NO_CONTEXT; 308 mm->context.id = MMU_NO_CONTEXT;
99} 309}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index ddfd7ad4e1d6..336807de550e 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -111,8 +111,8 @@ static unsigned int steal_context_smp(unsigned int id)
111 * a core map instead but this will do for now. 111 * a core map instead but this will do for now.
112 */ 112 */
113 for_each_cpu(cpu, mm_cpumask(mm)) { 113 for_each_cpu(cpu, mm_cpumask(mm)) {
114 for (i = cpu_first_thread_in_core(cpu); 114 for (i = cpu_first_thread_sibling(cpu);
115 i <= cpu_last_thread_in_core(cpu); i++) 115 i <= cpu_last_thread_sibling(cpu); i++)
116 __set_bit(id, stale_map[i]); 116 __set_bit(id, stale_map[i]);
117 cpu = i - 1; 117 cpu = i - 1;
118 } 118 }
@@ -264,14 +264,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
264 */ 264 */
265 if (test_bit(id, stale_map[cpu])) { 265 if (test_bit(id, stale_map[cpu])) {
266 pr_hardcont(" | stale flush %d [%d..%d]", 266 pr_hardcont(" | stale flush %d [%d..%d]",
267 id, cpu_first_thread_in_core(cpu), 267 id, cpu_first_thread_sibling(cpu),
268 cpu_last_thread_in_core(cpu)); 268 cpu_last_thread_sibling(cpu));
269 269
270 local_flush_tlb_mm(next); 270 local_flush_tlb_mm(next);
271 271
272 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 272 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
273 for (i = cpu_first_thread_in_core(cpu); 273 for (i = cpu_first_thread_sibling(cpu);
274 i <= cpu_last_thread_in_core(cpu); i++) { 274 i <= cpu_last_thread_sibling(cpu); i++) {
275 __clear_bit(id, stale_map[i]); 275 __clear_bit(id, stale_map[i]);
276 } 276 }
277 } 277 }
@@ -334,16 +334,18 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
334 /* We don't touch CPU 0 map, it's allocated at aboot and kept 334 /* We don't touch CPU 0 map, it's allocated at aboot and kept
335 * around forever 335 * around forever
336 */ 336 */
337 if (cpu == 0) 337 if (cpu == boot_cpuid)
338 return NOTIFY_OK; 338 return NOTIFY_OK;
339 339
340 switch (action) { 340 switch (action) {
341 case CPU_ONLINE: 341 case CPU_UP_PREPARE:
342 case CPU_ONLINE_FROZEN: 342 case CPU_UP_PREPARE_FROZEN:
343 pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); 343 pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
344 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); 344 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
345 break; 345 break;
346#ifdef CONFIG_HOTPLUG_CPU 346#ifdef CONFIG_HOTPLUG_CPU
347 case CPU_UP_CANCELED:
348 case CPU_UP_CANCELED_FROZEN:
347 case CPU_DEAD: 349 case CPU_DEAD:
348 case CPU_DEAD_FROZEN: 350 case CPU_DEAD_FROZEN:
349 pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); 351 pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
@@ -407,7 +409,17 @@ void __init mmu_context_init(void)
407 } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { 409 } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
408 first_context = 1; 410 first_context = 1;
409 last_context = 65535; 411 last_context = 65535;
410 } else { 412 } else
413#ifdef CONFIG_PPC_BOOK3E_MMU
414 if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
415 u32 mmucfg = mfspr(SPRN_MMUCFG);
416 u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
417 >> MMUCFG_PIDSIZE_SHIFT;
418 first_context = 1;
419 last_context = (1UL << (pid_bits + 1)) - 1;
420 } else
421#endif
422 {
411 first_context = 1; 423 first_context = 1;
412 last_context = 255; 424 last_context = 255;
413 } 425 }
@@ -420,9 +432,11 @@ void __init mmu_context_init(void)
420 */ 432 */
421 context_map = alloc_bootmem(CTX_MAP_SIZE); 433 context_map = alloc_bootmem(CTX_MAP_SIZE);
422 context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); 434 context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
435#ifndef CONFIG_SMP
423 stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); 436 stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
437#else
438 stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE);
424 439
425#ifdef CONFIG_SMP
426 register_cpu_notifier(&mmu_context_cpu_nb); 440 register_cpu_notifier(&mmu_context_cpu_nb);
427#endif 441#endif
428 442
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 63b84a0d3b10..dd0a2589591d 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -140,10 +140,13 @@ extern void wii_memory_fixups(void);
140extern void MMU_init_hw(void); 140extern void MMU_init_hw(void);
141extern unsigned long mmu_mapin_ram(unsigned long top); 141extern unsigned long mmu_mapin_ram(unsigned long top);
142 142
143#elif defined(CONFIG_FSL_BOOKE) 143#elif defined(CONFIG_PPC_FSL_BOOK3E)
144extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx);
145#ifdef CONFIG_PPC32
144extern void MMU_init_hw(void); 146extern void MMU_init_hw(void);
145extern unsigned long mmu_mapin_ram(unsigned long top); 147extern unsigned long mmu_mapin_ram(unsigned long top);
146extern void adjust_total_lowmem(void); 148extern void adjust_total_lowmem(void);
149#endif
147extern void loadcam_entry(unsigned int index); 150extern void loadcam_entry(unsigned int index);
148 151
149struct tlbcam { 152struct tlbcam {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 002878ccf90b..2164006fe170 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,10 +20,15 @@
20#include <linux/memblock.h> 20#include <linux/memblock.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/pfn.h> 22#include <linux/pfn.h>
23#include <linux/cpuset.h>
24#include <linux/node.h>
23#include <asm/sparsemem.h> 25#include <asm/sparsemem.h>
24#include <asm/prom.h> 26#include <asm/prom.h>
25#include <asm/system.h> 27#include <asm/system.h>
26#include <asm/smp.h> 28#include <asm/smp.h>
29#include <asm/firmware.h>
30#include <asm/paca.h>
31#include <asm/hvcall.h>
27 32
28static int numa_enabled = 1; 33static int numa_enabled = 1;
29 34
@@ -163,7 +168,7 @@ static void __init get_node_active_region(unsigned long start_pfn,
163 work_with_active_regions(nid, get_active_region_work_fn, node_ar); 168 work_with_active_regions(nid, get_active_region_work_fn, node_ar);
164} 169}
165 170
166static void __cpuinit map_cpu_to_node(int cpu, int node) 171static void map_cpu_to_node(int cpu, int node)
167{ 172{
168 numa_cpu_lookup_table[cpu] = node; 173 numa_cpu_lookup_table[cpu] = node;
169 174
@@ -173,7 +178,7 @@ static void __cpuinit map_cpu_to_node(int cpu, int node)
173 cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 178 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
174} 179}
175 180
176#ifdef CONFIG_HOTPLUG_CPU 181#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
177static void unmap_cpu_from_node(unsigned long cpu) 182static void unmap_cpu_from_node(unsigned long cpu)
178{ 183{
179 int node = numa_cpu_lookup_table[cpu]; 184 int node = numa_cpu_lookup_table[cpu];
@@ -181,13 +186,13 @@ static void unmap_cpu_from_node(unsigned long cpu)
181 dbg("removing cpu %lu from node %d\n", cpu, node); 186 dbg("removing cpu %lu from node %d\n", cpu, node);
182 187
183 if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { 188 if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
184 cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 189 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
185 } else { 190 } else {
186 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 191 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
187 cpu, node); 192 cpu, node);
188 } 193 }
189} 194}
190#endif /* CONFIG_HOTPLUG_CPU */ 195#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
191 196
192/* must hold reference to node during call */ 197/* must hold reference to node during call */
193static const int *of_get_associativity(struct device_node *dev) 198static const int *of_get_associativity(struct device_node *dev)
@@ -246,32 +251,41 @@ static void initialize_distance_lookup_table(int nid,
246/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 251/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
247 * info is found. 252 * info is found.
248 */ 253 */
249static int of_node_to_nid_single(struct device_node *device) 254static int associativity_to_nid(const unsigned int *associativity)
250{ 255{
251 int nid = -1; 256 int nid = -1;
252 const unsigned int *tmp;
253 257
254 if (min_common_depth == -1) 258 if (min_common_depth == -1)
255 goto out; 259 goto out;
256 260
257 tmp = of_get_associativity(device); 261 if (associativity[0] >= min_common_depth)
258 if (!tmp) 262 nid = associativity[min_common_depth];
259 goto out;
260
261 if (tmp[0] >= min_common_depth)
262 nid = tmp[min_common_depth];
263 263
264 /* POWER4 LPAR uses 0xffff as invalid node */ 264 /* POWER4 LPAR uses 0xffff as invalid node */
265 if (nid == 0xffff || nid >= MAX_NUMNODES) 265 if (nid == 0xffff || nid >= MAX_NUMNODES)
266 nid = -1; 266 nid = -1;
267 267
268 if (nid > 0 && tmp[0] >= distance_ref_points_depth) 268 if (nid > 0 && associativity[0] >= distance_ref_points_depth)
269 initialize_distance_lookup_table(nid, tmp); 269 initialize_distance_lookup_table(nid, associativity);
270 270
271out: 271out:
272 return nid; 272 return nid;
273} 273}
274 274
275/* Returns the nid associated with the given device tree node,
276 * or -1 if not found.
277 */
278static int of_node_to_nid_single(struct device_node *device)
279{
280 int nid = -1;
281 const unsigned int *tmp;
282
283 tmp = of_get_associativity(device);
284 if (tmp)
285 nid = associativity_to_nid(tmp);
286 return nid;
287}
288
275/* Walk the device tree upwards, looking for an associativity id */ 289/* Walk the device tree upwards, looking for an associativity id */
276int of_node_to_nid(struct device_node *device) 290int of_node_to_nid(struct device_node *device)
277{ 291{
@@ -297,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
297static int __init find_min_common_depth(void) 311static int __init find_min_common_depth(void)
298{ 312{
299 int depth; 313 int depth;
300 struct device_node *rtas_root;
301 struct device_node *chosen; 314 struct device_node *chosen;
315 struct device_node *root;
302 const char *vec5; 316 const char *vec5;
303 317
304 rtas_root = of_find_node_by_path("/rtas"); 318 root = of_find_node_by_path("/rtas");
305 319 if (!root)
306 if (!rtas_root) 320 root = of_find_node_by_path("/");
307 return -1;
308 321
309 /* 322 /*
310 * This property is a set of 32-bit integers, each representing 323 * This property is a set of 32-bit integers, each representing
@@ -318,7 +331,7 @@ static int __init find_min_common_depth(void)
318 * NUMA boundary and the following are progressively less significant 331 * NUMA boundary and the following are progressively less significant
319 * boundaries. There can be more than one level of NUMA. 332 * boundaries. There can be more than one level of NUMA.
320 */ 333 */
321 distance_ref_points = of_get_property(rtas_root, 334 distance_ref_points = of_get_property(root,
322 "ibm,associativity-reference-points", 335 "ibm,associativity-reference-points",
323 &distance_ref_points_depth); 336 &distance_ref_points_depth);
324 337
@@ -362,11 +375,11 @@ static int __init find_min_common_depth(void)
362 distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; 375 distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
363 } 376 }
364 377
365 of_node_put(rtas_root); 378 of_node_put(root);
366 return depth; 379 return depth;
367 380
368err: 381err:
369 of_node_put(rtas_root); 382 of_node_put(root);
370 return -1; 383 return -1;
371} 384}
372 385
@@ -426,11 +439,11 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
426} 439}
427 440
428/* 441/*
429 * Retreive and validate the ibm,dynamic-memory property of the device tree. 442 * Retrieve and validate the ibm,dynamic-memory property of the device tree.
430 * 443 *
431 * The layout of the ibm,dynamic-memory property is a number N of memblock 444 * The layout of the ibm,dynamic-memory property is a number N of memblock
432 * list entries followed by N memblock list entries. Each memblock list entry 445 * list entries followed by N memblock list entries. Each memblock list entry
433 * contains information as layed out in the of_drconf_cell struct above. 446 * contains information as laid out in the of_drconf_cell struct above.
434 */ 447 */
435static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) 448static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
436{ 449{
@@ -454,7 +467,7 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
454} 467}
455 468
456/* 469/*
457 * Retreive and validate the ibm,lmb-size property for drconf memory 470 * Retrieve and validate the ibm,lmb-size property for drconf memory
458 * from the device tree. 471 * from the device tree.
459 */ 472 */
460static u64 of_get_lmb_size(struct device_node *memory) 473static u64 of_get_lmb_size(struct device_node *memory)
@@ -476,7 +489,7 @@ struct assoc_arrays {
476}; 489};
477 490
478/* 491/*
479 * Retreive and validate the list of associativity arrays for drconf 492 * Retrieve and validate the list of associativity arrays for drconf
480 * memory from the ibm,associativity-lookup-arrays property of the 493 * memory from the ibm,associativity-lookup-arrays property of the
481 * device tree.. 494 * device tree..
482 * 495 *
@@ -590,7 +603,7 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
590 * Returns the size the region should have to enforce the memory limit. 603 * Returns the size the region should have to enforce the memory limit.
591 * This will either be the original value of size, a truncated value, 604 * This will either be the original value of size, a truncated value,
592 * or zero. If the returned value of size is 0 the region should be 605 * or zero. If the returned value of size is 0 the region should be
593 * discarded as it lies wholy above the memory limit. 606 * discarded as it lies wholly above the memory limit.
594 */ 607 */
595static unsigned long __init numa_enforce_memory_limit(unsigned long start, 608static unsigned long __init numa_enforce_memory_limit(unsigned long start,
596 unsigned long size) 609 unsigned long size)
@@ -802,16 +815,17 @@ static void __init setup_nonnuma(void)
802 unsigned long top_of_ram = memblock_end_of_DRAM(); 815 unsigned long top_of_ram = memblock_end_of_DRAM();
803 unsigned long total_ram = memblock_phys_mem_size(); 816 unsigned long total_ram = memblock_phys_mem_size();
804 unsigned long start_pfn, end_pfn; 817 unsigned long start_pfn, end_pfn;
805 unsigned int i, nid = 0; 818 unsigned int nid = 0;
819 struct memblock_region *reg;
806 820
807 printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 821 printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
808 top_of_ram, total_ram); 822 top_of_ram, total_ram);
809 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 823 printk(KERN_DEBUG "Memory hole size: %ldMB\n",
810 (top_of_ram - total_ram) >> 20); 824 (top_of_ram - total_ram) >> 20);
811 825
812 for (i = 0; i < memblock.memory.cnt; ++i) { 826 for_each_memblock(memory, reg) {
813 start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; 827 start_pfn = memblock_region_memory_base_pfn(reg);
814 end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); 828 end_pfn = memblock_region_memory_end_pfn(reg);
815 829
816 fake_numa_create_new_node(end_pfn, &nid); 830 fake_numa_create_new_node(end_pfn, &nid);
817 add_active_range(nid, start_pfn, end_pfn); 831 add_active_range(nid, start_pfn, end_pfn);
@@ -947,11 +961,11 @@ static struct notifier_block __cpuinitdata ppc64_numa_nb = {
947static void mark_reserved_regions_for_nid(int nid) 961static void mark_reserved_regions_for_nid(int nid)
948{ 962{
949 struct pglist_data *node = NODE_DATA(nid); 963 struct pglist_data *node = NODE_DATA(nid);
950 int i; 964 struct memblock_region *reg;
951 965
952 for (i = 0; i < memblock.reserved.cnt; i++) { 966 for_each_memblock(reserved, reg) {
953 unsigned long physbase = memblock.reserved.region[i].base; 967 unsigned long physbase = reg->base;
954 unsigned long size = memblock.reserved.region[i].size; 968 unsigned long size = reg->size;
955 unsigned long start_pfn = physbase >> PAGE_SHIFT; 969 unsigned long start_pfn = physbase >> PAGE_SHIFT;
956 unsigned long end_pfn = PFN_UP(physbase + size); 970 unsigned long end_pfn = PFN_UP(physbase + size);
957 struct node_active_region node_ar; 971 struct node_active_region node_ar;
@@ -1246,4 +1260,281 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
1246 return nid; 1260 return nid;
1247} 1261}
1248 1262
1263static u64 hot_add_drconf_memory_max(void)
1264{
1265 struct device_node *memory = NULL;
1266 unsigned int drconf_cell_cnt = 0;
1267 u64 lmb_size = 0;
1268 const u32 *dm = 0;
1269
1270 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1271 if (memory) {
1272 drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1273 lmb_size = of_get_lmb_size(memory);
1274 of_node_put(memory);
1275 }
1276 return lmb_size * drconf_cell_cnt;
1277}
1278
1279/*
1280 * memory_hotplug_max - return max address of memory that may be added
1281 *
1282 * This is currently only used on systems that support drconfig memory
1283 * hotplug.
1284 */
1285u64 memory_hotplug_max(void)
1286{
1287 return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1288}
1249#endif /* CONFIG_MEMORY_HOTPLUG */ 1289#endif /* CONFIG_MEMORY_HOTPLUG */
1290
1291/* Virtual Processor Home Node (VPHN) support */
1292#ifdef CONFIG_PPC_SPLPAR
1293static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1294static cpumask_t cpu_associativity_changes_mask;
1295static int vphn_enabled;
1296static void set_topology_timer(void);
1297
1298/*
1299 * Store the current values of the associativity change counters in the
1300 * hypervisor.
1301 */
1302static void setup_cpu_associativity_change_counters(void)
1303{
1304 int cpu;
1305
1306 /* The VPHN feature supports a maximum of 8 reference points */
1307 BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
1308
1309 for_each_possible_cpu(cpu) {
1310 int i;
1311 u8 *counts = vphn_cpu_change_counts[cpu];
1312 volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1313
1314 for (i = 0; i < distance_ref_points_depth; i++)
1315 counts[i] = hypervisor_counts[i];
1316 }
1317}
1318
1319/*
1320 * The hypervisor maintains a set of 8 associativity change counters in
1321 * the VPA of each cpu that correspond to the associativity levels in the
1322 * ibm,associativity-reference-points property. When an associativity
1323 * level changes, the corresponding counter is incremented.
1324 *
1325 * Set a bit in cpu_associativity_changes_mask for each cpu whose home
1326 * node associativity levels have changed.
1327 *
1328 * Returns the number of cpus with unhandled associativity changes.
1329 */
1330static int update_cpu_associativity_changes_mask(void)
1331{
1332 int cpu, nr_cpus = 0;
1333 cpumask_t *changes = &cpu_associativity_changes_mask;
1334
1335 cpumask_clear(changes);
1336
1337 for_each_possible_cpu(cpu) {
1338 int i, changed = 0;
1339 u8 *counts = vphn_cpu_change_counts[cpu];
1340 volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1341
1342 for (i = 0; i < distance_ref_points_depth; i++) {
1343 if (hypervisor_counts[i] != counts[i]) {
1344 counts[i] = hypervisor_counts[i];
1345 changed = 1;
1346 }
1347 }
1348 if (changed) {
1349 cpumask_set_cpu(cpu, changes);
1350 nr_cpus++;
1351 }
1352 }
1353
1354 return nr_cpus;
1355}
1356
1357/*
1358 * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
1359 * the complete property we have to add the length in the first cell.
1360 */
1361#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
1362
1363/*
1364 * Convert the associativity domain numbers returned from the hypervisor
1365 * to the sequence they would appear in the ibm,associativity property.
1366 */
1367static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
1368{
1369 int i, nr_assoc_doms = 0;
1370 const u16 *field = (const u16*) packed;
1371
1372#define VPHN_FIELD_UNUSED (0xffff)
1373#define VPHN_FIELD_MSB (0x8000)
1374#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
1375
1376 for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
1377 if (*field == VPHN_FIELD_UNUSED) {
1378 /* All significant fields processed, and remaining
1379 * fields contain the reserved value of all 1's.
1380 * Just store them.
1381 */
1382 unpacked[i] = *((u32*)field);
1383 field += 2;
1384 } else if (*field & VPHN_FIELD_MSB) {
1385 /* Data is in the lower 15 bits of this field */
1386 unpacked[i] = *field & VPHN_FIELD_MASK;
1387 field++;
1388 nr_assoc_doms++;
1389 } else {
1390 /* Data is in the lower 15 bits of this field
1391 * concatenated with the next 16 bit field
1392 */
1393 unpacked[i] = *((u32*)field);
1394 field += 2;
1395 nr_assoc_doms++;
1396 }
1397 }
1398
1399 /* The first cell contains the length of the property */
1400 unpacked[0] = nr_assoc_doms;
1401
1402 return nr_assoc_doms;
1403}
1404
1405/*
1406 * Retrieve the new associativity information for a virtual processor's
1407 * home node.
1408 */
1409static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
1410{
1411 long rc;
1412 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1413 u64 flags = 1;
1414 int hwcpu = get_hard_smp_processor_id(cpu);
1415
1416 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1417 vphn_unpack_associativity(retbuf, associativity);
1418
1419 return rc;
1420}
1421
1422static long vphn_get_associativity(unsigned long cpu,
1423 unsigned int *associativity)
1424{
1425 long rc;
1426
1427 rc = hcall_vphn(cpu, associativity);
1428
1429 switch (rc) {
1430 case H_FUNCTION:
1431 printk(KERN_INFO
1432 "VPHN is not supported. Disabling polling...\n");
1433 stop_topology_update();
1434 break;
1435 case H_HARDWARE:
1436 printk(KERN_ERR
1437 "hcall_vphn() experienced a hardware fault "
1438 "preventing VPHN. Disabling polling...\n");
1439 stop_topology_update();
1440 }
1441
1442 return rc;
1443}
1444
1445/*
1446 * Update the node maps and sysfs entries for each cpu whose home node
1447 * has changed.
1448 */
1449int arch_update_cpu_topology(void)
1450{
1451 int cpu, nid, old_nid;
1452 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1453 struct sys_device *sysdev;
1454
1455 for_each_cpu(cpu,&cpu_associativity_changes_mask) {
1456 vphn_get_associativity(cpu, associativity);
1457 nid = associativity_to_nid(associativity);
1458
1459 if (nid < 0 || !node_online(nid))
1460 nid = first_online_node;
1461
1462 old_nid = numa_cpu_lookup_table[cpu];
1463
1464 /* Disable hotplug while we update the cpu
1465 * masks and sysfs.
1466 */
1467 get_online_cpus();
1468 unregister_cpu_under_node(cpu, old_nid);
1469 unmap_cpu_from_node(cpu);
1470 map_cpu_to_node(cpu, nid);
1471 register_cpu_under_node(cpu, nid);
1472 put_online_cpus();
1473
1474 sysdev = get_cpu_sysdev(cpu);
1475 if (sysdev)
1476 kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
1477 }
1478
1479 return 1;
1480}
1481
1482static void topology_work_fn(struct work_struct *work)
1483{
1484 rebuild_sched_domains();
1485}
1486static DECLARE_WORK(topology_work, topology_work_fn);
1487
1488void topology_schedule_update(void)
1489{
1490 schedule_work(&topology_work);
1491}
1492
1493static void topology_timer_fn(unsigned long ignored)
1494{
1495 if (!vphn_enabled)
1496 return;
1497 if (update_cpu_associativity_changes_mask() > 0)
1498 topology_schedule_update();
1499 set_topology_timer();
1500}
1501static struct timer_list topology_timer =
1502 TIMER_INITIALIZER(topology_timer_fn, 0, 0);
1503
1504static void set_topology_timer(void)
1505{
1506 topology_timer.data = 0;
1507 topology_timer.expires = jiffies + 60 * HZ;
1508 add_timer(&topology_timer);
1509}
1510
1511/*
1512 * Start polling for VPHN associativity changes.
1513 */
1514int start_topology_update(void)
1515{
1516 int rc = 0;
1517
1518 /* Disabled until races with load balancing are fixed */
1519 if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1520 get_lppaca()->shared_proc) {
1521 vphn_enabled = 1;
1522 setup_cpu_associativity_change_counters();
1523 init_timer_deferrable(&topology_timer);
1524 set_topology_timer();
1525 rc = 1;
1526 }
1527
1528 return rc;
1529}
1530__initcall(start_topology_update);
1531
1532/*
1533 * Disable polling for VPHN associativity changes.
1534 */
1535int stop_topology_update(void)
1536{
1537 vphn_enabled = 0;
1538 return del_timer_sync(&topology_timer);
1539}
1540#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 2c7e801ab20b..af40c8768a78 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -33,110 +33,6 @@
33 33
34#include "mmu_decl.h" 34#include "mmu_decl.h"
35 35
36DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
37
38#ifdef CONFIG_SMP
39
40/*
41 * Handle batching of page table freeing on SMP. Page tables are
42 * queued up and send to be freed later by RCU in order to avoid
43 * freeing a page table page that is being walked without locks
44 */
45
46static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
47static unsigned long pte_freelist_forced_free;
48
49struct pte_freelist_batch
50{
51 struct rcu_head rcu;
52 unsigned int index;
53 unsigned long tables[0];
54};
55
56#define PTE_FREELIST_SIZE \
57 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
58 / sizeof(unsigned long))
59
60static void pte_free_smp_sync(void *arg)
61{
62 /* Do nothing, just ensure we sync with all CPUs */
63}
64
65/* This is only called when we are critically out of memory
66 * (and fail to get a page in pte_free_tlb).
67 */
68static void pgtable_free_now(void *table, unsigned shift)
69{
70 pte_freelist_forced_free++;
71
72 smp_call_function(pte_free_smp_sync, NULL, 1);
73
74 pgtable_free(table, shift);
75}
76
77static void pte_free_rcu_callback(struct rcu_head *head)
78{
79 struct pte_freelist_batch *batch =
80 container_of(head, struct pte_freelist_batch, rcu);
81 unsigned int i;
82
83 for (i = 0; i < batch->index; i++) {
84 void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
85 unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
86
87 pgtable_free(table, shift);
88 }
89
90 free_page((unsigned long)batch);
91}
92
93static void pte_free_submit(struct pte_freelist_batch *batch)
94{
95 call_rcu(&batch->rcu, pte_free_rcu_callback);
96}
97
98void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
99{
100 /* This is safe since tlb_gather_mmu has disabled preemption */
101 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
102 unsigned long pgf;
103
104 if (atomic_read(&tlb->mm->mm_users) < 2 ||
105 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
106 pgtable_free(table, shift);
107 return;
108 }
109
110 if (*batchp == NULL) {
111 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
112 if (*batchp == NULL) {
113 pgtable_free_now(table, shift);
114 return;
115 }
116 (*batchp)->index = 0;
117 }
118 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
119 pgf = (unsigned long)table | shift;
120 (*batchp)->tables[(*batchp)->index++] = pgf;
121 if ((*batchp)->index == PTE_FREELIST_SIZE) {
122 pte_free_submit(*batchp);
123 *batchp = NULL;
124 }
125}
126
127void pte_free_finish(void)
128{
129 /* This is safe since tlb_gather_mmu has disabled preemption */
130 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
131
132 if (*batchp == NULL)
133 return;
134 pte_free_submit(*batchp);
135 *batchp = NULL;
136}
137
138#endif /* CONFIG_SMP */
139
140static inline int is_exec_fault(void) 36static inline int is_exec_fault(void)
141{ 37{
142 return current->thread.regs && TRAP(current->thread.regs) == 0x400; 38 return current->thread.regs && TRAP(current->thread.regs) == 0x400;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a87ead0138b4..51f87956f8f8 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -78,7 +78,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
78 78
79 /* pgdir take page or two with 4K pages and a page fraction otherwise */ 79 /* pgdir take page or two with 4K pages and a page fraction otherwise */
80#ifndef CONFIG_PPC_4K_PAGES 80#ifndef CONFIG_PPC_4K_PAGES
81 ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); 81 ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
82#else 82#else
83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
84 PGDIR_ORDER - PAGE_SHIFT); 84 PGDIR_ORDER - PAGE_SHIFT);
@@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size)
133EXPORT_SYMBOL(ioremap); 133EXPORT_SYMBOL(ioremap);
134 134
135void __iomem * 135void __iomem *
136ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) 136ioremap_wc(phys_addr_t addr, unsigned long size)
137{
138 return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
139 __builtin_return_address(0));
140}
141EXPORT_SYMBOL(ioremap_wc);
142
143void __iomem *
144ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
137{ 145{
138 /* writeable implies dirty for kernel addresses */ 146 /* writeable implies dirty for kernel addresses */
139 if (flags & _PAGE_RW) 147 if (flags & _PAGE_RW)
@@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
152 160
153 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 161 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
154} 162}
155EXPORT_SYMBOL(ioremap_flags); 163EXPORT_SYMBOL(ioremap_prot);
156 164
157void __iomem * 165void __iomem *
158__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) 166__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
@@ -230,6 +238,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
230 area = get_vm_area_caller(size, VM_IOREMAP, caller); 238 area = get_vm_area_caller(size, VM_IOREMAP, caller);
231 if (area == 0) 239 if (area == 0)
232 return NULL; 240 return NULL;
241 area->phys_addr = p;
233 v = (unsigned long) area->addr; 242 v = (unsigned long) area->addr;
234 } else { 243 } else {
235 v = (ioremap_bot -= size); 244 v = (ioremap_bot -= size);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 21d6dfab7942..6e595f6496d4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -223,6 +223,8 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
223 caller); 223 caller);
224 if (area == NULL) 224 if (area == NULL)
225 return NULL; 225 return NULL;
226
227 area->phys_addr = paligned;
226 ret = __ioremap_at(paligned, area->addr, size, flags); 228 ret = __ioremap_at(paligned, area->addr, size, flags);
227 if (!ret) 229 if (!ret)
228 vunmap(area->addr); 230 vunmap(area->addr);
@@ -253,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
253 return __ioremap_caller(addr, size, flags, caller); 255 return __ioremap_caller(addr, size, flags, caller);
254} 256}
255 257
256void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, 258void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
259{
260 unsigned long flags = _PAGE_NO_CACHE;
261 void *caller = __builtin_return_address(0);
262
263 if (ppc_md.ioremap)
264 return ppc_md.ioremap(addr, size, flags, caller);
265 return __ioremap_caller(addr, size, flags, caller);
266}
267
268void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
257 unsigned long flags) 269 unsigned long flags)
258{ 270{
259 void *caller = __builtin_return_address(0); 271 void *caller = __builtin_return_address(0);
@@ -309,7 +321,8 @@ void iounmap(volatile void __iomem *token)
309} 321}
310 322
311EXPORT_SYMBOL(ioremap); 323EXPORT_SYMBOL(ioremap);
312EXPORT_SYMBOL(ioremap_flags); 324EXPORT_SYMBOL(ioremap_wc);
325EXPORT_SYMBOL(ioremap_prot);
313EXPORT_SYMBOL(__ioremap); 326EXPORT_SYMBOL(__ioremap);
314EXPORT_SYMBOL(__ioremap_at); 327EXPORT_SYMBOL(__ioremap_at);
315EXPORT_SYMBOL(iounmap); 328EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f8a01829d64f..11571e118831 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -223,8 +223,7 @@ void __init MMU_init_hw(void)
223 * Find some memory for the hash table. 223 * Find some memory for the hash table.
224 */ 224 */
225 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); 225 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
226 Hash = __va(memblock_alloc_base(Hash_size, Hash_size, 226 Hash = __va(memblock_alloc(Hash_size, Hash_size));
227 __initial_memory_limit_addr));
228 cacheable_memzero(Hash, Hash_size); 227 cacheable_memzero(Hash, Hash_size);
229 _SDR1 = __pa(Hash) | SDR1_LOW_BITS; 228 _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
230 229
@@ -272,3 +271,18 @@ void __init MMU_init_hw(void)
272 271
273 if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); 272 if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
274} 273}
274
275void setup_initial_memory_limit(phys_addr_t first_memblock_base,
276 phys_addr_t first_memblock_size)
277{
278 /* We don't currently support the first MEMBLOCK not mapping 0
279 * physical on those processors
280 */
281 BUG_ON(first_memblock_base != 0);
282
283 /* 601 can only access 16MB at the moment */
284 if (PVR_VER(mfspr(SPRN_PVR)) == 1)
285 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
286 else /* Anything else has 256M mapped */
287 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
288}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 1d98ecc8eecd..e22276cb67a4 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -24,6 +24,7 @@
24#include <asm/firmware.h> 24#include <asm/firmware.h>
25#include <linux/compiler.h> 25#include <linux/compiler.h>
26#include <asm/udbg.h> 26#include <asm/udbg.h>
27#include <asm/code-patching.h>
27 28
28 29
29extern void slb_allocate_realmode(unsigned long ea); 30extern void slb_allocate_realmode(unsigned long ea);
@@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
166 int esid_1t_count; 167 int esid_1t_count;
167 168
168 /* System is not 1T segment size capable. */ 169 /* System is not 1T segment size capable. */
169 if (!cpu_has_feature(CPU_FTR_1T_SEGMENT)) 170 if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
170 return (GET_ESID(addr1) == GET_ESID(addr2)); 171 return (GET_ESID(addr1) == GET_ESID(addr2));
171 172
172 esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + 173 esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
@@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
201 */ 202 */
202 hard_irq_disable(); 203 hard_irq_disable();
203 offset = get_paca()->slb_cache_ptr; 204 offset = get_paca()->slb_cache_ptr;
204 if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && 205 if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
205 offset <= SLB_CACHE_ENTRIES) { 206 offset <= SLB_CACHE_ENTRIES) {
206 int i; 207 int i;
207 asm volatile("isync" : : : "memory"); 208 asm volatile("isync" : : : "memory");
@@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
249static inline void patch_slb_encoding(unsigned int *insn_addr, 250static inline void patch_slb_encoding(unsigned int *insn_addr,
250 unsigned int immed) 251 unsigned int immed)
251{ 252{
252 *insn_addr = (*insn_addr & 0xffff0000) | immed; 253 int insn = (*insn_addr & 0xffff0000) | immed;
253 flush_icache_range((unsigned long)insn_addr, 4+ 254 patch_instruction(insn_addr, insn);
254 (unsigned long)insn_addr);
255} 255}
256 256
257void slb_set_size(u16 size) 257void slb_set_size(u16 size)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 95ce35581696..ef653dc95b65 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear)
58 li r11,0 58 li r11,0
59BEGIN_FTR_SECTION 59BEGIN_FTR_SECTION
60 b slb_finish_load 60 b slb_finish_load
61END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) 61END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
62 b slb_finish_load_1T 62 b slb_finish_load_1T
63 63
641: 641:
@@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
876: 876:
88BEGIN_FTR_SECTION 88BEGIN_FTR_SECTION
89 b slb_finish_load 89 b slb_finish_load
90END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) 90END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
91 b slb_finish_load_1T 91 b slb_finish_load_1T
92 92
930: /* user address: proto-VSID = context << 15 | ESID. First check 930: /* user address: proto-VSID = context << 15 | ESID. First check
@@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
138 ld r9,PACACONTEXTID(r13) 138 ld r9,PACACONTEXTID(r13)
139BEGIN_FTR_SECTION 139BEGIN_FTR_SECTION
140 cmpldi r10,0x1000 140 cmpldi r10,0x1000
141END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) 141END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
142 rldimi r10,r9,USER_ESID_BITS,0 142 rldimi r10,r9,USER_ESID_BITS,0
143BEGIN_FTR_SECTION 143BEGIN_FTR_SECTION
144 bge slb_finish_load_1T 144 bge slb_finish_load_1T
145END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) 145END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
146 b slb_finish_load 146 b slb_finish_load
147 147
1488: /* invalid EA */ 1488: /* invalid EA */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 446a01842a73..41e31642a86a 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -243,7 +243,7 @@ void __init stabs_alloc(void)
243{ 243{
244 int cpu; 244 int cpu;
245 245
246 if (cpu_has_feature(CPU_FTR_SLB)) 246 if (mmu_has_feature(MMU_FTR_SLB))
247 return; 247 return;
248 248
249 for_each_possible_cpu(cpu) { 249 for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 690566b66e8e..27b863c14941 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
71 */ 71 */
72 _tlbia(); 72 _tlbia();
73 } 73 }
74
75 /* Push out batch of freed page tables */
76 pte_free_finish();
77} 74}
78 75
79/* 76/*
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 1ec06576f619..31f18207970b 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
38 * neesd to be flushed. This function will either perform the flush 38 * neesd to be flushed. This function will either perform the flush
39 * immediately or will batch it up if the current CPU has an active 39 * immediately or will batch it up if the current CPU has an active
40 * batch on it. 40 * batch on it.
41 *
42 * Must be called from within some kind of spinlock/non-preempt region...
43 */ 41 */
44void hpte_need_flush(struct mm_struct *mm, unsigned long addr, 42void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
45 pte_t *ptep, unsigned long pte, int huge) 43 pte_t *ptep, unsigned long pte, int huge)
46{ 44{
47 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 45 struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
48 unsigned long vsid, vaddr; 46 unsigned long vsid, vaddr;
49 unsigned int psize; 47 unsigned int psize;
50 int ssize; 48 int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
99 */ 97 */
100 if (!batch->active) { 98 if (!batch->active) {
101 flush_hash_page(vaddr, rpte, psize, ssize, 0); 99 flush_hash_page(vaddr, rpte, psize, ssize, 0);
100 put_cpu_var(ppc64_tlb_batch);
102 return; 101 return;
103 } 102 }
104 103
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
127 batch->index = ++i; 126 batch->index = ++i;
128 if (i >= PPC64_TLB_BATCH_NR) 127 if (i >= PPC64_TLB_BATCH_NR)
129 __flush_tlb_pending(batch); 128 __flush_tlb_pending(batch);
129 put_cpu_var(ppc64_tlb_batch);
130} 130}
131 131
132/* 132/*
@@ -155,7 +155,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
155 155
156void tlb_flush(struct mmu_gather *tlb) 156void tlb_flush(struct mmu_gather *tlb)
157{ 157{
158 struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch); 158 struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
159 159
160 /* If there's a TLB batch pending, then we must flush it because the 160 /* If there's a TLB batch pending, then we must flush it because the
161 * pages are going to be freed and we really don't want to have a CPU 161 * pages are going to be freed and we really don't want to have a CPU
@@ -164,8 +164,7 @@ void tlb_flush(struct mmu_gather *tlb)
164 if (tlbbatch->index) 164 if (tlbbatch->index)
165 __flush_tlb_pending(tlbbatch); 165 __flush_tlb_pending(tlbbatch);
166 166
167 /* Push out batch of freed page tables */ 167 put_cpu_var(ppc64_tlb_batch);
168 pte_free_finish();
169} 168}
170 169
171/** 170/**
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 8b04c54e596f..af0892209417 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -138,8 +138,11 @@
138 cmpldi cr0,r15,0 /* Check for user region */ 138 cmpldi cr0,r15,0 /* Check for user region */
139 std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ 139 std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */
140 beq normal_tlb_miss 140 beq normal_tlb_miss
141
142 li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
143 oris r11,r11,_PAGE_ACCESSED@h
141 /* XXX replace the RMW cycles with immediate loads + writes */ 144 /* XXX replace the RMW cycles with immediate loads + writes */
1421: mfspr r10,SPRN_MAS1 145 mfspr r10,SPRN_MAS1
143 cmpldi cr0,r15,8 /* Check for vmalloc region */ 146 cmpldi cr0,r15,8 /* Check for vmalloc region */
144 rlwinm r10,r10,0,16,1 /* Clear TID */ 147 rlwinm r10,r10,0,16,1 /* Clear TID */
145 mtspr SPRN_MAS1,r10 148 mtspr SPRN_MAS1,r10
@@ -189,7 +192,7 @@ normal_tlb_miss:
189 or r10,r15,r14 192 or r10,r15,r14
190 193
191BEGIN_MMU_FTR_SECTION 194BEGIN_MMU_FTR_SECTION
192 /* Set the TLB reservation and seach for existing entry. Then load 195 /* Set the TLB reservation and search for existing entry. Then load
193 * the entry. 196 * the entry.
194 */ 197 */
195 PPC_TLBSRX_DOT(0,r16) 198 PPC_TLBSRX_DOT(0,r16)
@@ -422,13 +425,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
422 425
423virt_page_table_tlb_miss_fault: 426virt_page_table_tlb_miss_fault:
424 /* If we fault here, things are a little bit tricky. We need to call 427 /* If we fault here, things are a little bit tricky. We need to call
425 * either data or instruction store fault, and we need to retreive 428 * either data or instruction store fault, and we need to retrieve
426 * the original fault address and ESR (for data). 429 * the original fault address and ESR (for data).
427 * 430 *
428 * The thing is, we know that in normal circumstances, this is 431 * The thing is, we know that in normal circumstances, this is
429 * always called as a second level tlb miss for SW load or as a first 432 * always called as a second level tlb miss for SW load or as a first
430 * level TLB miss for HW load, so we should be able to peek at the 433 * level TLB miss for HW load, so we should be able to peek at the
431 * relevant informations in the first exception frame in the PACA. 434 * relevant information in the first exception frame in the PACA.
432 * 435 *
433 * However, we do need to double check that, because we may just hit 436 * However, we do need to double check that, because we may just hit
434 * a stray kernel pointer or a userland attack trying to hit those 437 * a stray kernel pointer or a userland attack trying to hit those
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index fe391e942521..0bdad3aecc67 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
299void tlb_flush(struct mmu_gather *tlb) 299void tlb_flush(struct mmu_gather *tlb)
300{ 300{
301 flush_tlb_mm(tlb->mm); 301 flush_tlb_mm(tlb->mm);
302
303 /* Push out batch of freed page tables */
304 pte_free_finish();
305} 302}
306 303
307/* 304/*
@@ -349,11 +346,47 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
349 346
350static void setup_page_sizes(void) 347static void setup_page_sizes(void)
351{ 348{
352 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); 349 unsigned int tlb0cfg;
353 unsigned int tlb0ps = mfspr(SPRN_TLB0PS); 350 unsigned int tlb0ps;
354 unsigned int eptcfg = mfspr(SPRN_EPTCFG); 351 unsigned int eptcfg;
355 int i, psize; 352 int i, psize;
356 353
354#ifdef CONFIG_PPC_FSL_BOOK3E
355 unsigned int mmucfg = mfspr(SPRN_MMUCFG);
356
357 if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) &&
358 (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) {
359 unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
360 unsigned int min_pg, max_pg;
361
362 min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
363 max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
364
365 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
366 struct mmu_psize_def *def;
367 unsigned int shift;
368
369 def = &mmu_psize_defs[psize];
370 shift = def->shift;
371
372 if (shift == 0)
373 continue;
374
375 /* adjust to be in terms of 4^shift Kb */
376 shift = (shift - 10) >> 1;
377
378 if ((shift >= min_pg) && (shift <= max_pg))
379 def->flags |= MMU_PAGE_SIZE_DIRECT;
380 }
381
382 goto no_indirect;
383 }
384#endif
385
386 tlb0cfg = mfspr(SPRN_TLB0CFG);
387 tlb0ps = mfspr(SPRN_TLB0PS);
388 eptcfg = mfspr(SPRN_EPTCFG);
389
357 /* Look for supported direct sizes */ 390 /* Look for supported direct sizes */
358 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 391 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
359 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 392 struct mmu_psize_def *def = &mmu_psize_defs[psize];
@@ -505,10 +538,26 @@ static void __early_init_mmu(int boot_cpu)
505 */ 538 */
506 linear_map_top = memblock_end_of_DRAM(); 539 linear_map_top = memblock_end_of_DRAM();
507 540
541#ifdef CONFIG_PPC_FSL_BOOK3E
542 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
543 unsigned int num_cams;
544
545 /* use a quarter of the TLBCAM for bolted linear map */
546 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
547 linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
548
549 /* limit memory so we dont have linear faults */
550 memblock_enforce_memory_limit(linear_map_top);
551 memblock_analyze();
552 }
553#endif
554
508 /* A sync won't hurt us after mucking around with 555 /* A sync won't hurt us after mucking around with
509 * the MMU configuration 556 * the MMU configuration
510 */ 557 */
511 mb(); 558 mb();
559
560 memblock_set_current_limit(linear_map_top);
512} 561}
513 562
514void __init early_init_mmu(void) 563void __init early_init_mmu(void)
@@ -521,4 +570,18 @@ void __cpuinit early_init_mmu_secondary(void)
521 __early_init_mmu(0); 570 __early_init_mmu(0);
522} 571}
523 572
573void setup_initial_memory_limit(phys_addr_t first_memblock_base,
574 phys_addr_t first_memblock_size)
575{
576 /* On Embedded 64-bit, we adjust the RMA size to match
577 * the bolted TLB entry. We know for now that only 1G
578 * entries are supported though that may eventually
579 * change. We crop it to the size of the first MEMBLOCK to
580 * avoid going over total available memory just in case...
581 */
582 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
583
584 /* Finally limit subsequent allocations */
585 memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
586}
524#endif /* CONFIG_PPC64 */ 587#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index b9d9fed8f36e..7c63c0ed4f1b 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -189,6 +189,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
189 blr 189 blr
190 190
191#ifdef CONFIG_PPC_47x 191#ifdef CONFIG_PPC_47x
192
193/*
194 * 47x variant of icbt
195 */
196# define ICBT(CT,RA,RB) \
197 .long 0x7c00002c | ((CT) << 21) | ((RA) << 16) | ((RB) << 11)
198
192/* 199/*
193 * _tlbivax_bcast is only on 47x. We don't bother doing a runtime 200 * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
194 * check though, it will blow up soon enough if we mistakenly try 201 * check though, it will blow up soon enough if we mistakenly try
@@ -206,7 +213,35 @@ _GLOBAL(_tlbivax_bcast)
206 isync 213 isync
207 eieio 214 eieio
208 tlbsync 215 tlbsync
216BEGIN_FTR_SECTION
217 b 1f
218END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
219 sync
220 wrtee r10
221 blr
222/*
223 * DD2 HW could hang if in instruction fetch happens before msync completes.
224 * Touch enough instruction cache lines to ensure cache hits
225 */
2261: mflr r9
227 bl 2f
2282: mflr r6
229 li r7,32
230 ICBT(0,r6,r7) /* touch next cache line */
231 add r6,r6,r7
232 ICBT(0,r6,r7) /* touch next cache line */
233 add r6,r6,r7
234 ICBT(0,r6,r7) /* touch next cache line */
209 sync 235 sync
236 nop
237 nop
238 nop
239 nop
240 nop
241 nop
242 nop
243 nop
244 mtlr r9
210 wrtee r10 245 wrtee r10
211 blr 246 blr
212#endif /* CONFIG_PPC_47x */ 247#endif /* CONFIG_PPC_47x */
@@ -367,7 +402,7 @@ _GLOBAL(set_context)
367#error Unsupported processor type ! 402#error Unsupported processor type !
368#endif 403#endif
369 404
370#if defined(CONFIG_FSL_BOOKE) 405#if defined(CONFIG_PPC_FSL_BOOK3E)
371/* 406/*
372 * extern void loadcam_entry(unsigned int index) 407 * extern void loadcam_entry(unsigned int index)
373 * 408 *