aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 19:54:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 19:54:33 -0500
commit3c92ec8ae91ecf59d88c798301833d7cf83f2179 (patch)
tree08a38cd3523c42bd49882f17cd501fd879e7ca1c /arch/powerpc/mm
parentc4c9f0183b7c4e97836e8fecbb67898b06c47e78 (diff)
parentca9153a3a2a7556d091dfe080e42b0e67881fff6 (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc: (144 commits) powerpc/44x: Support 16K/64K base page sizes on 44x powerpc: Force memory size to be a multiple of PAGE_SIZE powerpc/32: Wire up the trampoline code for kdump powerpc/32: Add the ability for a classic ppc kernel to be loaded at 32M powerpc/32: Allow __ioremap on RAM addresses for kdump kernel powerpc/32: Setup OF properties for kdump powerpc/32/kdump: Implement crash_setup_regs() using ppc_save_regs() powerpc: Prepare xmon_save_regs for use with kdump powerpc: Remove default kexec/crash_kernel ops assignments powerpc: Make default kexec/crash_kernel ops implicit powerpc: Setup OF properties for ppc32 kexec powerpc/pseries: Fix cpu hotplug powerpc: Fix KVM build on ppc440 powerpc/cell: add QPACE as a separate Cell platform powerpc/cell: fix build breakage with CONFIG_SPUFS disabled powerpc/mpc5200: fix error paths in PSC UART probe function powerpc/mpc5200: add rts/cts handling in PSC UART driver powerpc/mpc5200: Make PSC UART driver update serial errors counters powerpc/mpc5200: Remove obsolete code from mpc5200 MDIO driver powerpc/mpc5200: Add MDMA/UDMA support to MPC5200 ATA driver ... Fix trivial conflict in drivers/char/Makefile as per Paul's directions
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile10
-rw-r--r--arch/powerpc/mm/fault.c14
-rw-r--r--arch/powerpc/mm/hash_low_32.S111
-rw-r--r--arch/powerpc/mm/hugetlbpage.c22
-rw-r--r--arch/powerpc/mm/init_32.c6
-rw-r--r--arch/powerpc/mm/mem.c6
-rw-r--r--arch/powerpc/mm/mmu_context_32.c84
-rw-r--r--arch/powerpc/mm/mmu_context_hash32.c103
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c (renamed from arch/powerpc/mm/mmu_context_64.c)8
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c397
-rw-r--r--arch/powerpc/mm/mmu_decl.h65
-rw-r--r--arch/powerpc/mm/pgtable.c117
-rw-r--r--arch/powerpc/mm/pgtable_32.c56
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c10
-rw-r--r--arch/powerpc/mm/tlb_hash32.c (renamed from arch/powerpc/mm/tlb_32.c)4
-rw-r--r--arch/powerpc/mm/tlb_hash64.c (renamed from arch/powerpc/mm/tlb_64.c)86
-rw-r--r--arch/powerpc/mm/tlb_nohash.c209
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S166
18 files changed, 1191 insertions, 283 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index e7392b45a5ef..953cc4a1cde5 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,17 +6,19 @@ ifeq ($(CONFIG_PPC64),y)
6EXTRA_CFLAGS += -mno-minimal-toc 6EXTRA_CFLAGS += -mno-minimal-toc
7endif 7endif
8 8
9obj-y := fault.o mem.o \ 9obj-y := fault.o mem.o pgtable.o \
10 init_$(CONFIG_WORD_SIZE).o \ 10 init_$(CONFIG_WORD_SIZE).o \
11 pgtable_$(CONFIG_WORD_SIZE).o \ 11 pgtable_$(CONFIG_WORD_SIZE).o
12 mmu_context_$(CONFIG_WORD_SIZE).o 12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
13 tlb_nohash_low.o
13hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o 14hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
14obj-$(CONFIG_PPC64) += hash_utils_64.o \ 15obj-$(CONFIG_PPC64) += hash_utils_64.o \
15 slb_low.o slb.o stab.o \ 16 slb_low.o slb.o stab.o \
16 gup.o mmap.o $(hash-y) 17 gup.o mmap.o $(hash-y)
17obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o 18obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
18obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ 19obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
19 tlb_$(CONFIG_WORD_SIZE).o 20 tlb_hash$(CONFIG_WORD_SIZE).o \
21 mmu_context_hash$(CONFIG_WORD_SIZE).o
20obj-$(CONFIG_40x) += 40x_mmu.o 22obj-$(CONFIG_40x) += 40x_mmu.o
21obj-$(CONFIG_44x) += 44x_mmu.o 23obj-$(CONFIG_44x) += 44x_mmu.o
22obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o 24obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 866098686da8..91c7b8636b8a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,6 +30,7 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32 32
33#include <asm/firmware.h>
33#include <asm/page.h> 34#include <asm/page.h>
34#include <asm/pgtable.h> 35#include <asm/pgtable.h>
35#include <asm/mmu.h> 36#include <asm/mmu.h>
@@ -283,7 +284,7 @@ good_area:
283 } 284 }
284 pte_update(ptep, 0, _PAGE_HWEXEC | 285 pte_update(ptep, 0, _PAGE_HWEXEC |
285 _PAGE_ACCESSED); 286 _PAGE_ACCESSED);
286 _tlbie(address, mm->context.id); 287 local_flush_tlb_page(vma, address);
287 pte_unmap_unlock(ptep, ptl); 288 pte_unmap_unlock(ptep, ptl);
288 up_read(&mm->mmap_sem); 289 up_read(&mm->mmap_sem);
289 return 0; 290 return 0;
@@ -318,9 +319,16 @@ good_area:
318 goto do_sigbus; 319 goto do_sigbus;
319 BUG(); 320 BUG();
320 } 321 }
321 if (ret & VM_FAULT_MAJOR) 322 if (ret & VM_FAULT_MAJOR) {
322 current->maj_flt++; 323 current->maj_flt++;
323 else 324#ifdef CONFIG_PPC_SMLPAR
325 if (firmware_has_feature(FW_FEATURE_CMO)) {
326 preempt_disable();
327 get_lppaca()->page_ins += (1 << PAGE_FACTOR);
328 preempt_enable();
329 }
330#endif
331 } else
324 current->min_flt++; 332 current->min_flt++;
325 up_read(&mm->mmap_sem); 333 up_read(&mm->mmap_sem);
326 return 0; 334 return 0;
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 7bffb70b9fe2..67850ec9feb3 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -36,36 +36,6 @@ mmu_hash_lock:
36#endif /* CONFIG_SMP */ 36#endif /* CONFIG_SMP */
37 37
38/* 38/*
39 * Sync CPUs with hash_page taking & releasing the hash
40 * table lock
41 */
42#ifdef CONFIG_SMP
43 .text
44_GLOBAL(hash_page_sync)
45 mfmsr r10
46 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
47 mtmsr r0
48 lis r8,mmu_hash_lock@h
49 ori r8,r8,mmu_hash_lock@l
50 lis r0,0x0fff
51 b 10f
5211: lwz r6,0(r8)
53 cmpwi 0,r6,0
54 bne 11b
5510: lwarx r6,0,r8
56 cmpwi 0,r6,0
57 bne- 11b
58 stwcx. r0,0,r8
59 bne- 10b
60 isync
61 eieio
62 li r0,0
63 stw r0,0(r8)
64 mtmsr r10
65 blr
66#endif /* CONFIG_SMP */
67
68/*
69 * Load a PTE into the hash table, if possible. 39 * Load a PTE into the hash table, if possible.
70 * The address is in r4, and r3 contains an access flag: 40 * The address is in r4, and r3 contains an access flag:
71 * _PAGE_RW (0x400) if a write. 41 * _PAGE_RW (0x400) if a write.
@@ -353,8 +323,8 @@ _GLOBAL(create_hpte)
353 ori r8,r8,0xe14 /* clear out reserved bits and M */ 323 ori r8,r8,0xe14 /* clear out reserved bits and M */
354 andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ 324 andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
355BEGIN_FTR_SECTION 325BEGIN_FTR_SECTION
356 ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ 326 rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
357END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) 327END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
358#ifdef CONFIG_PTE_64BIT 328#ifdef CONFIG_PTE_64BIT
359 /* Put the XPN bits into the PTE */ 329 /* Put the XPN bits into the PTE */
360 rlwimi r8,r10,8,20,22 330 rlwimi r8,r10,8,20,22
@@ -663,3 +633,80 @@ _GLOBAL(flush_hash_patch_B)
663 SYNC_601 633 SYNC_601
664 isync 634 isync
665 blr 635 blr
636
637/*
638 * Flush an entry from the TLB
639 */
640_GLOBAL(_tlbie)
641#ifdef CONFIG_SMP
642 rlwinm r8,r1,0,0,(31-THREAD_SHIFT)
643 lwz r8,TI_CPU(r8)
644 oris r8,r8,11
645 mfmsr r10
646 SYNC
647 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
648 rlwinm r0,r0,0,28,26 /* clear DR */
649 mtmsr r0
650 SYNC_601
651 isync
652 lis r9,mmu_hash_lock@h
653 ori r9,r9,mmu_hash_lock@l
654 tophys(r9,r9)
65510: lwarx r7,0,r9
656 cmpwi 0,r7,0
657 bne- 10b
658 stwcx. r8,0,r9
659 bne- 10b
660 eieio
661 tlbie r3
662 sync
663 TLBSYNC
664 li r0,0
665 stw r0,0(r9) /* clear mmu_hash_lock */
666 mtmsr r10
667 SYNC_601
668 isync
669#else /* CONFIG_SMP */
670 tlbie r3
671 sync
672#endif /* CONFIG_SMP */
673 blr
674
675/*
676 * Flush the entire TLB. 603/603e only
677 */
678_GLOBAL(_tlbia)
679#if defined(CONFIG_SMP)
680 rlwinm r8,r1,0,0,(31-THREAD_SHIFT)
681 lwz r8,TI_CPU(r8)
682 oris r8,r8,10
683 mfmsr r10
684 SYNC
685 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
686 rlwinm r0,r0,0,28,26 /* clear DR */
687 mtmsr r0
688 SYNC_601
689 isync
690 lis r9,mmu_hash_lock@h
691 ori r9,r9,mmu_hash_lock@l
692 tophys(r9,r9)
69310: lwarx r7,0,r9
694 cmpwi 0,r7,0
695 bne- 10b
696 stwcx. r8,0,r9
697 bne- 10b
698 sync
699 tlbia
700 sync
701 TLBSYNC
702 li r0,0
703 stw r0,0(r9) /* clear mmu_hash_lock */
704 mtmsr r10
705 SYNC_601
706 isync
707#else /* CONFIG_SMP */
708 sync
709 tlbia
710 sync
711#endif /* CONFIG_SMP */
712 blr
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f0c3b88d50fa..201c7a5486cb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -53,8 +53,7 @@ unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
53 53
54/* Subtract one from array size because we don't need a cache for 4K since 54/* Subtract one from array size because we don't need a cache for 4K since
55 * is not a huge page size */ 55 * is not a huge page size */
56#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ 56#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57 + psize-1])
58#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) 57#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
59 58
60static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { 59static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
@@ -113,7 +112,7 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
113static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 112static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
114 unsigned long address, unsigned int psize) 113 unsigned long address, unsigned int psize)
115{ 114{
116 pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize), 115 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
117 GFP_KERNEL|__GFP_REPEAT); 116 GFP_KERNEL|__GFP_REPEAT);
118 117
119 if (! new) 118 if (! new)
@@ -121,7 +120,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
121 120
122 spin_lock(&mm->page_table_lock); 121 spin_lock(&mm->page_table_lock);
123 if (!hugepd_none(*hpdp)) 122 if (!hugepd_none(*hpdp))
124 kmem_cache_free(huge_pgtable_cache(psize), new); 123 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
125 else 124 else
126 hpdp->pd = (unsigned long)new | HUGEPD_OK; 125 hpdp->pd = (unsigned long)new | HUGEPD_OK;
127 spin_unlock(&mm->page_table_lock); 126 spin_unlock(&mm->page_table_lock);
@@ -763,13 +762,14 @@ static int __init hugetlbpage_init(void)
763 762
764 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 763 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
765 if (mmu_huge_psizes[psize]) { 764 if (mmu_huge_psizes[psize]) {
766 huge_pgtable_cache(psize) = kmem_cache_create( 765 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
767 HUGEPTE_CACHE_NAME(psize), 766 kmem_cache_create(
768 HUGEPTE_TABLE_SIZE(psize), 767 HUGEPTE_CACHE_NAME(psize),
769 HUGEPTE_TABLE_SIZE(psize), 768 HUGEPTE_TABLE_SIZE(psize),
770 0, 769 HUGEPTE_TABLE_SIZE(psize),
771 NULL); 770 0,
772 if (!huge_pgtable_cache(psize)) 771 NULL);
772 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
773 panic("hugetlbpage_init(): could not create %s"\ 773 panic("hugetlbpage_init(): could not create %s"\
774 "\n", HUGEPTE_CACHE_NAME(psize)); 774 "\n", HUGEPTE_CACHE_NAME(psize));
775 } 775 }
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 388ceda632f3..666a5e8a5be1 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -35,7 +35,6 @@
35#include <asm/pgalloc.h> 35#include <asm/pgalloc.h>
36#include <asm/prom.h> 36#include <asm/prom.h>
37#include <asm/io.h> 37#include <asm/io.h>
38#include <asm/mmu_context.h>
39#include <asm/pgtable.h> 38#include <asm/pgtable.h>
40#include <asm/mmu.h> 39#include <asm/mmu.h>
41#include <asm/smp.h> 40#include <asm/smp.h>
@@ -49,7 +48,7 @@
49 48
50#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) 49#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
51/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ 50/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
52#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) 51#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET))
53#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" 52#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
54#endif 53#endif
55#endif 54#endif
@@ -180,9 +179,6 @@ void __init MMU_init(void)
180 if (ppc_md.progress) 179 if (ppc_md.progress)
181 ppc_md.progress("MMU:setio", 0x302); 180 ppc_md.progress("MMU:setio", 0x302);
182 181
183 /* Initialize the context management stuff */
184 mmu_context_init();
185
186 if (ppc_md.progress) 182 if (ppc_md.progress)
187 ppc_md.progress("MMU:exit", 0x211); 183 ppc_md.progress("MMU:exit", 0x211);
188 184
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b9e1a1da6e52..53b06ebb3f2f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -102,8 +102,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
102 return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); 102 return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
103 103
104 if (!page_is_ram(pfn)) 104 if (!page_is_ram(pfn))
105 vma_prot = __pgprot(pgprot_val(vma_prot) 105 vma_prot = pgprot_noncached(vma_prot);
106 | _PAGE_GUARDED | _PAGE_NO_CACHE); 106
107 return vma_prot; 107 return vma_prot;
108} 108}
109EXPORT_SYMBOL(phys_mem_access_prot); 109EXPORT_SYMBOL(phys_mem_access_prot);
@@ -488,7 +488,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
488 * we invalidate the TLB here, thus avoiding dcbst 488 * we invalidate the TLB here, thus avoiding dcbst
489 * misbehaviour. 489 * misbehaviour.
490 */ 490 */
491 _tlbie(address, 0 /* 8xx doesn't care about PID */); 491 _tlbil_va(address, 0 /* 8xx doesn't care about PID */);
492#endif 492#endif
493 /* The _PAGE_USER test should really be _PAGE_EXEC, but 493 /* The _PAGE_USER test should really be _PAGE_EXEC, but
494 * older glibc versions execute some code from no-exec 494 * older glibc versions execute some code from no-exec
diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c
deleted file mode 100644
index cc32ba41d900..000000000000
--- a/arch/powerpc/mm/mmu_context_32.c
+++ /dev/null
@@ -1,84 +0,0 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 *
15 * Derived from "arch/i386/mm/init.c"
16 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 *
23 */
24
25#include <linux/mm.h>
26#include <linux/init.h>
27
28#include <asm/mmu_context.h>
29#include <asm/tlbflush.h>
30
31unsigned long next_mmu_context;
32unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
33#ifdef FEW_CONTEXTS
34atomic_t nr_free_contexts;
35struct mm_struct *context_mm[LAST_CONTEXT+1];
36void steal_context(void);
37#endif /* FEW_CONTEXTS */
38
39/*
40 * Initialize the context management stuff.
41 */
42void __init
43mmu_context_init(void)
44{
45 /*
46 * Some processors have too few contexts to reserve one for
47 * init_mm, and require using context 0 for a normal task.
48 * Other processors reserve the use of context zero for the kernel.
49 * This code assumes FIRST_CONTEXT < 32.
50 */
51 context_map[0] = (1 << FIRST_CONTEXT) - 1;
52 next_mmu_context = FIRST_CONTEXT;
53#ifdef FEW_CONTEXTS
54 atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
55#endif /* FEW_CONTEXTS */
56}
57
58#ifdef FEW_CONTEXTS
59/*
60 * Steal a context from a task that has one at the moment.
61 * This is only used on 8xx and 4xx and we presently assume that
62 * they don't do SMP. If they do then this will have to check
63 * whether the MM we steal is in use.
64 * We also assume that this is only used on systems that don't
65 * use an MMU hash table - this is true for 8xx and 4xx.
66 * This isn't an LRU system, it just frees up each context in
67 * turn (sort-of pseudo-random replacement :). This would be the
68 * place to implement an LRU scheme if anyone was motivated to do it.
69 * -- paulus
70 */
71void
72steal_context(void)
73{
74 struct mm_struct *mm;
75
76 /* free up context `next_mmu_context' */
77 /* if we shouldn't free context 0, don't... */
78 if (next_mmu_context < FIRST_CONTEXT)
79 next_mmu_context = FIRST_CONTEXT;
80 mm = context_mm[next_mmu_context];
81 flush_tlb_mm(mm);
82 destroy_context(mm);
83}
84#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
new file mode 100644
index 000000000000..0dfba2bf7f31
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -0,0 +1,103 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 *
15 * Derived from "arch/i386/mm/init.c"
16 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 *
23 */
24
25#include <linux/mm.h>
26#include <linux/init.h>
27
28#include <asm/mmu_context.h>
29#include <asm/tlbflush.h>
30
31/*
32 * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
33 * (virtual segment identifiers) for each context. Although the
34 * hardware supports 24-bit VSIDs, and thus >1 million contexts,
35 * we only use 32,768 of them. That is ample, since there can be
36 * at most around 30,000 tasks in the system anyway, and it means
37 * that we can use a bitmap to indicate which contexts are in use.
38 * Using a bitmap means that we entirely avoid all of the problems
39 * that we used to have when the context number overflowed,
40 * particularly on SMP systems.
41 * -- paulus.
42 */
43#define NO_CONTEXT ((unsigned long) -1)
44#define LAST_CONTEXT 32767
45#define FIRST_CONTEXT 1
46
47/*
48 * This function defines the mapping from contexts to VSIDs (virtual
49 * segment IDs). We use a skew on both the context and the high 4 bits
50 * of the 32-bit virtual address (the "effective segment ID") in order
51 * to spread out the entries in the MMU hash table. Note, if this
52 * function is changed then arch/ppc/mm/hashtable.S will have to be
53 * changed to correspond.
54 *
55 *
56 * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
57 * & 0xffffff)
58 */
59
60static unsigned long next_mmu_context;
61static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
62
63
64/*
65 * Set up the context for a new address space.
66 */
67int init_new_context(struct task_struct *t, struct mm_struct *mm)
68{
69 unsigned long ctx = next_mmu_context;
70
71 while (test_and_set_bit(ctx, context_map)) {
72 ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
73 if (ctx > LAST_CONTEXT)
74 ctx = 0;
75 }
76 next_mmu_context = (ctx + 1) & LAST_CONTEXT;
77 mm->context.id = ctx;
78
79 return 0;
80}
81
82/*
83 * We're finished using the context for an address space.
84 */
85void destroy_context(struct mm_struct *mm)
86{
87 preempt_disable();
88 if (mm->context.id != NO_CONTEXT) {
89 clear_bit(mm->context.id, context_map);
90 mm->context.id = NO_CONTEXT;
91 }
92 preempt_enable();
93}
94
95/*
96 * Initialize the context management stuff.
97 */
98void __init mmu_context_init(void)
99{
100 /* Reserve context 0 for kernel use */
101 context_map[0] = (1 << FIRST_CONTEXT) - 1;
102 next_mmu_context = FIRST_CONTEXT;
103}
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 1db38ba1f544..dbeb86ac90cd 100644
--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -24,6 +24,14 @@
24static DEFINE_SPINLOCK(mmu_context_lock); 24static DEFINE_SPINLOCK(mmu_context_lock);
25static DEFINE_IDR(mmu_context_idr); 25static DEFINE_IDR(mmu_context_idr);
26 26
27/*
28 * The proto-VSID space has 2^35 - 1 segments available for user mappings.
29 * Each segment contains 2^28 bytes. Each context maps 2^44 bytes,
30 * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
31 */
32#define NO_CONTEXT 0
33#define MAX_CONTEXT ((1UL << 19) - 1)
34
27int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 35int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
28{ 36{
29 int index; 37 int index;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
new file mode 100644
index 000000000000..52a0cfc38b64
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -0,0 +1,397 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU is not using the hash
4 * table, such as 8xx, 4xx, BookE's etc...
5 *
6 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
7 * IBM Corp.
8 *
9 * Derived from previous arch/powerpc/mm/mmu_context.c
10 * and arch/powerpc/include/asm/mmu_context.h
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * TODO:
18 *
19 * - The global context lock will not scale very well
20 * - The maps should be dynamically allocated to allow for processors
21 * that support more PID bits at runtime
22 * - Implement flush_tlb_mm() by making the context stale and picking
23 * a new one
24 * - More aggressively clear stale map bits and maybe find some way to
25 * also clear mm->cpu_vm_mask bits when processes are migrated
26 */
27
28#undef DEBUG
29#define DEBUG_STEAL_ONLY
30#undef DEBUG_MAP_CONSISTENCY
31/*#define DEBUG_CLAMP_LAST_CONTEXT 15 */
32
33#include <linux/kernel.h>
34#include <linux/mm.h>
35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/bootmem.h>
38#include <linux/notifier.h>
39#include <linux/cpu.h>
40
41#include <asm/mmu_context.h>
42#include <asm/tlbflush.h>
43
44static unsigned int first_context, last_context;
45static unsigned int next_context, nr_free_contexts;
46static unsigned long *context_map;
47static unsigned long *stale_map[NR_CPUS];
48static struct mm_struct **context_mm;
49static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
50
51#define CTX_MAP_SIZE \
52 (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
53
54
55/* Steal a context from a task that has one at the moment.
56 *
57 * This is used when we are running out of available PID numbers
58 * on the processors.
59 *
60 * This isn't an LRU system, it just frees up each context in
61 * turn (sort-of pseudo-random replacement :). This would be the
62 * place to implement an LRU scheme if anyone was motivated to do it.
63 * -- paulus
64 *
65 * For context stealing, we use a slightly different approach for
66 * SMP and UP. Basically, the UP one is simpler and doesn't use
67 * the stale map as we can just flush the local CPU
68 * -- benh
69 */
70#ifdef CONFIG_SMP
71static unsigned int steal_context_smp(unsigned int id)
72{
73 struct mm_struct *mm;
74 unsigned int cpu, max;
75
76 again:
77 max = last_context - first_context;
78
79 /* Attempt to free next_context first and then loop until we manage */
80 while (max--) {
81 /* Pick up the victim mm */
82 mm = context_mm[id];
83
84 /* We have a candidate victim, check if it's active, on SMP
85 * we cannot steal active contexts
86 */
87 if (mm->context.active) {
88 id++;
89 if (id > last_context)
90 id = first_context;
91 continue;
92 }
93 pr_debug("[%d] steal context %d from mm @%p\n",
94 smp_processor_id(), id, mm);
95
96 /* Mark this mm has having no context anymore */
97 mm->context.id = MMU_NO_CONTEXT;
98
99 /* Mark it stale on all CPUs that used this mm */
100 for_each_cpu_mask_nr(cpu, mm->cpu_vm_mask)
101 __set_bit(id, stale_map[cpu]);
102 return id;
103 }
104
105 /* This will happen if you have more CPUs than available contexts,
106 * all we can do here is wait a bit and try again
107 */
108 spin_unlock(&context_lock);
109 cpu_relax();
110 spin_lock(&context_lock);
111 goto again;
112}
113#endif /* CONFIG_SMP */
114
115/* Note that this will also be called on SMP if all other CPUs are
116 * offlined, which means that it may be called for cpu != 0. For
117 * this to work, we somewhat assume that CPUs that are onlined
118 * come up with a fully clean TLB (or are cleaned when offlined)
119 */
120static unsigned int steal_context_up(unsigned int id)
121{
122 struct mm_struct *mm;
123 int cpu = smp_processor_id();
124
125 /* Pick up the victim mm */
126 mm = context_mm[id];
127
128 pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
129
130 /* Mark this mm has having no context anymore */
131 mm->context.id = MMU_NO_CONTEXT;
132
133 /* Flush the TLB for that context */
134 local_flush_tlb_mm(mm);
135
136 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
137 __clear_bit(id, stale_map[cpu]);
138
139 return id;
140}
141
142#ifdef DEBUG_MAP_CONSISTENCY
143static void context_check_map(void)
144{
145 unsigned int id, nrf, nact;
146
147 nrf = nact = 0;
148 for (id = first_context; id <= last_context; id++) {
149 int used = test_bit(id, context_map);
150 if (!used)
151 nrf++;
152 if (used != (context_mm[id] != NULL))
153 pr_err("MMU: Context %d is %s and MM is %p !\n",
154 id, used ? "used" : "free", context_mm[id]);
155 if (context_mm[id] != NULL)
156 nact += context_mm[id]->context.active;
157 }
158 if (nrf != nr_free_contexts) {
159 pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
160 nr_free_contexts, nrf);
161 nr_free_contexts = nrf;
162 }
163 if (nact > num_online_cpus())
164 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
165 nact, num_online_cpus());
166 if (first_context > 0 && !test_bit(0, context_map))
167 pr_err("MMU: Context 0 has been freed !!!\n");
168}
169#else
170static void context_check_map(void) { }
171#endif
172
173void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
174{
175 unsigned int id, cpu = smp_processor_id();
176 unsigned long *map;
177
178 /* No lockless fast path .. yet */
179 spin_lock(&context_lock);
180
181#ifndef DEBUG_STEAL_ONLY
182 pr_debug("[%d] activating context for mm @%p, active=%d, id=%d\n",
183 cpu, next, next->context.active, next->context.id);
184#endif
185
186#ifdef CONFIG_SMP
187 /* Mark us active and the previous one not anymore */
188 next->context.active++;
189 if (prev) {
190#ifndef DEBUG_STEAL_ONLY
191 pr_debug(" old context %p active was: %d\n",
192 prev, prev->context.active);
193#endif
194 WARN_ON(prev->context.active < 1);
195 prev->context.active--;
196 }
197#endif /* CONFIG_SMP */
198
199 /* If we already have a valid assigned context, skip all that */
200 id = next->context.id;
201 if (likely(id != MMU_NO_CONTEXT))
202 goto ctxt_ok;
203
204 /* We really don't have a context, let's try to acquire one */
205 id = next_context;
206 if (id > last_context)
207 id = first_context;
208 map = context_map;
209
210 /* No more free contexts, let's try to steal one */
211 if (nr_free_contexts == 0) {
212#ifdef CONFIG_SMP
213 if (num_online_cpus() > 1) {
214 id = steal_context_smp(id);
215 goto stolen;
216 }
217#endif /* CONFIG_SMP */
218 id = steal_context_up(id);
219 goto stolen;
220 }
221 nr_free_contexts--;
222
223 /* We know there's at least one free context, try to find it */
224 while (__test_and_set_bit(id, map)) {
225 id = find_next_zero_bit(map, last_context+1, id);
226 if (id > last_context)
227 id = first_context;
228 }
229 stolen:
230 next_context = id + 1;
231 context_mm[id] = next;
232 next->context.id = id;
233
234#ifndef DEBUG_STEAL_ONLY
235 pr_debug("[%d] picked up new id %d, nrf is now %d\n",
236 cpu, id, nr_free_contexts);
237#endif
238
239 context_check_map();
240 ctxt_ok:
241
242 /* If that context got marked stale on this CPU, then flush the
243 * local TLB for it and unmark it before we use it
244 */
245 if (test_bit(id, stale_map[cpu])) {
246 pr_debug("[%d] flushing stale context %d for mm @%p !\n",
247 cpu, id, next);
248 local_flush_tlb_mm(next);
249
250 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
251 __clear_bit(id, stale_map[cpu]);
252 }
253
254 /* Flick the MMU and release lock */
255 set_context(id, next->pgd);
256 spin_unlock(&context_lock);
257}
258
259/*
260 * Set up the context for a new address space.
261 */
262int init_new_context(struct task_struct *t, struct mm_struct *mm)
263{
264 mm->context.id = MMU_NO_CONTEXT;
265 mm->context.active = 0;
266
267 return 0;
268}
269
270/*
271 * We're finished using the context for an address space.
272 */
273void destroy_context(struct mm_struct *mm)
274{
275 unsigned int id;
276
277 if (mm->context.id == MMU_NO_CONTEXT)
278 return;
279
280 WARN_ON(mm->context.active != 0);
281
282 spin_lock(&context_lock);
283 id = mm->context.id;
284 if (id != MMU_NO_CONTEXT) {
285 __clear_bit(id, context_map);
286 mm->context.id = MMU_NO_CONTEXT;
287#ifdef DEBUG_MAP_CONSISTENCY
288 mm->context.active = 0;
289 context_mm[id] = NULL;
290#endif
291 nr_free_contexts++;
292 }
293 spin_unlock(&context_lock);
294}
295
296#ifdef CONFIG_SMP
297
298static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
299 unsigned long action, void *hcpu)
300{
301 unsigned int cpu = (unsigned int)(long)hcpu;
302
303 /* We don't touch CPU 0 map, it's allocated at aboot and kept
304 * around forever
305 */
306 if (cpu == 0)
307 return NOTIFY_OK;
308
309 switch (action) {
310 case CPU_ONLINE:
311 case CPU_ONLINE_FROZEN:
312 pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu);
313 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
314 break;
315#ifdef CONFIG_HOTPLUG_CPU
316 case CPU_DEAD:
317 case CPU_DEAD_FROZEN:
318 pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu);
319 kfree(stale_map[cpu]);
320 stale_map[cpu] = NULL;
321 break;
322#endif
323 }
324 return NOTIFY_OK;
325}
326
327static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
328 .notifier_call = mmu_context_cpu_notify,
329};
330
331#endif /* CONFIG_SMP */
332
333/*
334 * Initialize the context management stuff.
335 */
336void __init mmu_context_init(void)
337{
338 /* Mark init_mm as being active on all possible CPUs since
339 * we'll get called with prev == init_mm the first time
340 * we schedule on a given CPU
341 */
342 init_mm.context.active = NR_CPUS;
343
344 /*
345 * The MPC8xx has only 16 contexts. We rotate through them on each
346 * task switch. A better way would be to keep track of tasks that
347 * own contexts, and implement an LRU usage. That way very active
348 * tasks don't always have to pay the TLB reload overhead. The
349 * kernel pages are mapped shared, so the kernel can run on behalf
350 * of any task that makes a kernel entry. Shared does not mean they
351 * are not protected, just that the ASID comparison is not performed.
352 * -- Dan
353 *
354 * The IBM4xx has 256 contexts, so we can just rotate through these
355 * as a way of "switching" contexts. If the TID of the TLB is zero,
356 * the PID/TID comparison is disabled, so we can use a TID of zero
357 * to represent all kernel pages as shared among all contexts.
358 * -- Dan
359 */
360 if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
361 first_context = 0;
362 last_context = 15;
363 } else {
364 first_context = 1;
365 last_context = 255;
366 }
367
368#ifdef DEBUG_CLAMP_LAST_CONTEXT
369 last_context = DEBUG_CLAMP_LAST_CONTEXT;
370#endif
371 /*
372 * Allocate the maps used by context management
373 */
374 context_map = alloc_bootmem(CTX_MAP_SIZE);
375 context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
376 stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
377
378#ifdef CONFIG_SMP
379 register_cpu_notifier(&mmu_context_cpu_nb);
380#endif
381
382 printk(KERN_INFO
383 "MMU: Allocated %d bytes of context maps for %d contexts\n",
384 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
385 last_context - first_context + 1);
386
387 /*
388 * Some processors have too few contexts to reserve one for
389 * init_mm, and require using context 0 for a normal task.
390 * Other processors reserve the use of context zero for the kernel.
391 * This code assumes first_context < 32.
392 */
393 context_map[0] = (1 << first_context) - 1;
394 next_context = first_context;
395 nr_free_contexts = last_context - first_context + 1;
396}
397
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index fab3cfad4099..4314b39b6faf 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -22,10 +22,58 @@
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23#include <asm/mmu.h> 23#include <asm/mmu.h>
24 24
25#ifdef CONFIG_PPC_MMU_NOHASH
26
27/*
28 * On 40x and 8xx, we directly inline tlbia and tlbivax
29 */
30#if defined(CONFIG_40x) || defined(CONFIG_8xx)
31static inline void _tlbil_all(void)
32{
33 asm volatile ("sync; tlbia; isync" : : : "memory")
34}
35static inline void _tlbil_pid(unsigned int pid)
36{
37 asm volatile ("sync; tlbia; isync" : : : "memory")
38}
39#else /* CONFIG_40x || CONFIG_8xx */
40extern void _tlbil_all(void);
41extern void _tlbil_pid(unsigned int pid);
42#endif /* !(CONFIG_40x || CONFIG_8xx) */
43
44/*
45 * On 8xx, we directly inline tlbie, on others, it's extern
46 */
47#ifdef CONFIG_8xx
48static inline void _tlbil_va(unsigned long address, unsigned int pid)
49{
50 asm volatile ("tlbie %0; sync" : : "r" (address) : "memory")
51}
52#else /* CONFIG_8xx */
53extern void _tlbil_va(unsigned long address, unsigned int pid);
54#endif /* CONIFG_8xx */
55
56/*
57 * As of today, we don't support tlbivax broadcast on any
58 * implementation. When that becomes the case, this will be
59 * an extern.
60 */
61static inline void _tlbivax_bcast(unsigned long address, unsigned int pid)
62{
63 BUG();
64}
65
66#else /* CONFIG_PPC_MMU_NOHASH */
67
25extern void hash_preload(struct mm_struct *mm, unsigned long ea, 68extern void hash_preload(struct mm_struct *mm, unsigned long ea,
26 unsigned long access, unsigned long trap); 69 unsigned long access, unsigned long trap);
27 70
28 71
72extern void _tlbie(unsigned long address);
73extern void _tlbia(void);
74
75#endif /* CONFIG_PPC_MMU_NOHASH */
76
29#ifdef CONFIG_PPC32 77#ifdef CONFIG_PPC32
30extern void mapin_ram(void); 78extern void mapin_ram(void);
31extern int map_page(unsigned long va, phys_addr_t pa, int flags); 79extern int map_page(unsigned long va, phys_addr_t pa, int flags);
@@ -58,17 +106,14 @@ extern phys_addr_t lowmem_end_addr;
58 * architectures. -- Dan 106 * architectures. -- Dan
59 */ 107 */
60#if defined(CONFIG_8xx) 108#if defined(CONFIG_8xx)
61#define flush_HPTE(X, va, pg) _tlbie(va, 0 /* 8xx doesn't care about PID */)
62#define MMU_init_hw() do { } while(0) 109#define MMU_init_hw() do { } while(0)
63#define mmu_mapin_ram() (0UL) 110#define mmu_mapin_ram() (0UL)
64 111
65#elif defined(CONFIG_4xx) 112#elif defined(CONFIG_4xx)
66#define flush_HPTE(pid, va, pg) _tlbie(va, pid)
67extern void MMU_init_hw(void); 113extern void MMU_init_hw(void);
68extern unsigned long mmu_mapin_ram(void); 114extern unsigned long mmu_mapin_ram(void);
69 115
70#elif defined(CONFIG_FSL_BOOKE) 116#elif defined(CONFIG_FSL_BOOKE)
71#define flush_HPTE(pid, va, pg) _tlbie(va, pid)
72extern void MMU_init_hw(void); 117extern void MMU_init_hw(void);
73extern unsigned long mmu_mapin_ram(void); 118extern unsigned long mmu_mapin_ram(void);
74extern void adjust_total_lowmem(void); 119extern void adjust_total_lowmem(void);
@@ -77,18 +122,4 @@ extern void adjust_total_lowmem(void);
77/* anything 32-bit except 4xx or 8xx */ 122/* anything 32-bit except 4xx or 8xx */
78extern void MMU_init_hw(void); 123extern void MMU_init_hw(void);
79extern unsigned long mmu_mapin_ram(void); 124extern unsigned long mmu_mapin_ram(void);
80
81/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
82 * which includes all new 82xx processors. We need tlbie/tlbsync here
83 * in that case (I think). -- Dan.
84 */
85static inline void flush_HPTE(unsigned context, unsigned long va,
86 unsigned long pdval)
87{
88 if ((Hash != 0) &&
89 cpu_has_feature(CPU_FTR_HPTE_TABLE))
90 flush_hash_pages(0, va, pdval, 1);
91 else
92 _tlbie(va);
93}
94#endif 125#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 000000000000..6d94116fdea1
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,117 @@
1/*
2 * This file contains common routines for dealing with free of page tables
3 *
4 * Derived from arch/powerpc/mm/tlb_64.c:
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
8 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Paul Mackerras
10 *
11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
13 *
14 * Dave Engebretsen <engebret@us.ibm.com>
15 * Rework for PPC64 port.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/init.h>
26#include <linux/percpu.h>
27#include <linux/hardirq.h>
28#include <asm/pgalloc.h>
29#include <asm/tlbflush.h>
30#include <asm/tlb.h>
31
32static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
33static unsigned long pte_freelist_forced_free;
34
35struct pte_freelist_batch
36{
37 struct rcu_head rcu;
38 unsigned int index;
39 pgtable_free_t tables[0];
40};
41
42#define PTE_FREELIST_SIZE \
43 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
44 / sizeof(pgtable_free_t))
45
46static void pte_free_smp_sync(void *arg)
47{
48 /* Do nothing, just ensure we sync with all CPUs */
49}
50
51/* This is only called when we are critically out of memory
52 * (and fail to get a page in pte_free_tlb).
53 */
54static void pgtable_free_now(pgtable_free_t pgf)
55{
56 pte_freelist_forced_free++;
57
58 smp_call_function(pte_free_smp_sync, NULL, 1);
59
60 pgtable_free(pgf);
61}
62
63static void pte_free_rcu_callback(struct rcu_head *head)
64{
65 struct pte_freelist_batch *batch =
66 container_of(head, struct pte_freelist_batch, rcu);
67 unsigned int i;
68
69 for (i = 0; i < batch->index; i++)
70 pgtable_free(batch->tables[i]);
71
72 free_page((unsigned long)batch);
73}
74
75static void pte_free_submit(struct pte_freelist_batch *batch)
76{
77 INIT_RCU_HEAD(&batch->rcu);
78 call_rcu(&batch->rcu, pte_free_rcu_callback);
79}
80
81void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
82{
83 /* This is safe since tlb_gather_mmu has disabled preemption */
84 cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
85 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
86
87 if (atomic_read(&tlb->mm->mm_users) < 2 ||
88 cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
89 pgtable_free(pgf);
90 return;
91 }
92
93 if (*batchp == NULL) {
94 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
95 if (*batchp == NULL) {
96 pgtable_free_now(pgf);
97 return;
98 }
99 (*batchp)->index = 0;
100 }
101 (*batchp)->tables[(*batchp)->index++] = pgf;
102 if ((*batchp)->index == PTE_FREELIST_SIZE) {
103 pte_free_submit(*batchp);
104 *batchp = NULL;
105 }
106}
107
108void pte_free_finish(void)
109{
110 /* This is safe since tlb_gather_mmu has disabled preemption */
111 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
112
113 if (*batchp == NULL)
114 return;
115 pte_free_submit(*batchp);
116 *batchp = NULL;
117}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index c31d6d26f0b5..38ff35f2142a 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -48,10 +48,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
48 48
49extern char etext[], _stext[]; 49extern char etext[], _stext[];
50 50
51#ifdef CONFIG_SMP
52extern void hash_page_sync(void);
53#endif
54
55#ifdef HAVE_BATS 51#ifdef HAVE_BATS
56extern phys_addr_t v_mapped_by_bats(unsigned long va); 52extern phys_addr_t v_mapped_by_bats(unsigned long va);
57extern unsigned long p_mapped_by_bats(phys_addr_t pa); 53extern unsigned long p_mapped_by_bats(phys_addr_t pa);
@@ -72,24 +68,29 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
72#define p_mapped_by_tlbcam(x) (0UL) 68#define p_mapped_by_tlbcam(x) (0UL)
73#endif /* HAVE_TLBCAM */ 69#endif /* HAVE_TLBCAM */
74 70
75#ifdef CONFIG_PTE_64BIT 71#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT)
76/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */
77#define PGDIR_ORDER 1
78#else
79#define PGDIR_ORDER 0
80#endif
81 72
82pgd_t *pgd_alloc(struct mm_struct *mm) 73pgd_t *pgd_alloc(struct mm_struct *mm)
83{ 74{
84 pgd_t *ret; 75 pgd_t *ret;
85 76
86 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); 77 /* pgdir take page or two with 4K pages and a page fraction otherwise */
78#ifndef CONFIG_PPC_4K_PAGES
79 ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
80#else
81 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
82 PGDIR_ORDER - PAGE_SHIFT);
83#endif
87 return ret; 84 return ret;
88} 85}
89 86
90void pgd_free(struct mm_struct *mm, pgd_t *pgd) 87void pgd_free(struct mm_struct *mm, pgd_t *pgd)
91{ 88{
92 free_pages((unsigned long)pgd, PGDIR_ORDER); 89#ifndef CONFIG_PPC_4K_PAGES
90 kfree((void *)pgd);
91#else
92 free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
93#endif
93} 94}
94 95
95__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 96__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -125,23 +126,6 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
125 return ptepage; 126 return ptepage;
126} 127}
127 128
128void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
129{
130#ifdef CONFIG_SMP
131 hash_page_sync();
132#endif
133 free_page((unsigned long)pte);
134}
135
136void pte_free(struct mm_struct *mm, pgtable_t ptepage)
137{
138#ifdef CONFIG_SMP
139 hash_page_sync();
140#endif
141 pgtable_page_dtor(ptepage);
142 __free_page(ptepage);
143}
144
145void __iomem * 129void __iomem *
146ioremap(phys_addr_t addr, unsigned long size) 130ioremap(phys_addr_t addr, unsigned long size)
147{ 131{
@@ -194,6 +178,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
194 if (p < 16*1024*1024) 178 if (p < 16*1024*1024)
195 p += _ISA_MEM_BASE; 179 p += _ISA_MEM_BASE;
196 180
181#ifndef CONFIG_CRASH_DUMP
197 /* 182 /*
198 * Don't allow anybody to remap normal RAM that we're using. 183 * Don't allow anybody to remap normal RAM that we're using.
199 * mem_init() sets high_memory so only do the check after that. 184 * mem_init() sets high_memory so only do the check after that.
@@ -203,6 +188,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
203 (unsigned long long)p, __builtin_return_address(0)); 188 (unsigned long long)p, __builtin_return_address(0));
204 return NULL; 189 return NULL;
205 } 190 }
191#endif
206 192
207 if (size == 0) 193 if (size == 0)
208 return NULL; 194 return NULL;
@@ -288,7 +274,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
288} 274}
289 275
290/* 276/*
291 * Map in a big chunk of physical memory starting at KERNELBASE. 277 * Map in a big chunk of physical memory starting at PAGE_OFFSET.
292 */ 278 */
293void __init mapin_ram(void) 279void __init mapin_ram(void)
294{ 280{
@@ -297,7 +283,7 @@ void __init mapin_ram(void)
297 int ktext; 283 int ktext;
298 284
299 s = mmu_mapin_ram(); 285 s = mmu_mapin_ram();
300 v = KERNELBASE + s; 286 v = PAGE_OFFSET + s;
301 p = memstart_addr + s; 287 p = memstart_addr + s;
302 for (; s < total_lowmem; s += PAGE_SIZE) { 288 for (; s < total_lowmem; s += PAGE_SIZE) {
303 ktext = ((char *) v >= _stext && (char *) v < etext); 289 ktext = ((char *) v >= _stext && (char *) v < etext);
@@ -363,7 +349,11 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
363 return -EINVAL; 349 return -EINVAL;
364 set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); 350 set_pte_at(&init_mm, address, kpte, mk_pte(page, prot));
365 wmb(); 351 wmb();
366 flush_HPTE(0, address, pmd_val(*kpmd)); 352#ifdef CONFIG_PPC_STD_MMU
353 flush_hash_pages(0, address, pmd_val(*kpmd), 1);
354#else
355 flush_tlb_page(NULL, address);
356#endif
367 pte_unmap(kpte); 357 pte_unmap(kpte);
368 358
369 return 0; 359 return 0;
@@ -400,7 +390,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
400#endif /* CONFIG_DEBUG_PAGEALLOC */ 390#endif /* CONFIG_DEBUG_PAGEALLOC */
401 391
402static int fixmaps; 392static int fixmaps;
403unsigned long FIXADDR_TOP = 0xfffff000; 393unsigned long FIXADDR_TOP = (-PAGE_SIZE);
404EXPORT_SYMBOL(FIXADDR_TOP); 394EXPORT_SYMBOL(FIXADDR_TOP);
405 395
406void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) 396void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 6aa120813775..45d925360b89 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -95,16 +95,16 @@ unsigned long __init mmu_mapin_ram(void)
95 break; 95 break;
96 } 96 }
97 97
98 setbat(2, KERNELBASE, 0, bl, _PAGE_RAM); 98 setbat(2, PAGE_OFFSET, 0, bl, _PAGE_RAM);
99 done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; 99 done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
100 if ((done < tot) && !bat_addrs[3].limit) { 100 if ((done < tot) && !bat_addrs[3].limit) {
101 /* use BAT3 to cover a bit more */ 101 /* use BAT3 to cover a bit more */
102 tot -= done; 102 tot -= done;
103 for (bl = 128<<10; bl < max_size; bl <<= 1) 103 for (bl = 128<<10; bl < max_size; bl <<= 1)
104 if (bl * 2 > tot) 104 if (bl * 2 > tot)
105 break; 105 break;
106 setbat(3, KERNELBASE+done, done, bl, _PAGE_RAM); 106 setbat(3, PAGE_OFFSET+done, done, bl, _PAGE_RAM);
107 done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; 107 done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
108 } 108 }
109 109
110 return done; 110 return done;
@@ -192,7 +192,7 @@ void __init MMU_init_hw(void)
192 extern unsigned int hash_page[]; 192 extern unsigned int hash_page[];
193 extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; 193 extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
194 194
195 if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { 195 if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
196 /* 196 /*
197 * Put a blr (procedure return) instruction at the 197 * Put a blr (procedure return) instruction at the
198 * start of hash_page, since we can still get DSI 198 * start of hash_page, since we can still get DSI
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_hash32.c
index f9a47fee3927..65190587a365 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -137,6 +137,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
137 flush_range(&init_mm, start, end); 137 flush_range(&init_mm, start, end);
138 FINISH_FLUSH; 138 FINISH_FLUSH;
139} 139}
140EXPORT_SYMBOL(flush_tlb_kernel_range);
140 141
141/* 142/*
142 * Flush all the (user) entries for the address space described by mm. 143 * Flush all the (user) entries for the address space described by mm.
@@ -160,6 +161,7 @@ void flush_tlb_mm(struct mm_struct *mm)
160 flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); 161 flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
161 FINISH_FLUSH; 162 FINISH_FLUSH;
162} 163}
164EXPORT_SYMBOL(flush_tlb_mm);
163 165
164void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 166void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
165{ 167{
@@ -176,6 +178,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
176 flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); 178 flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
177 FINISH_FLUSH; 179 FINISH_FLUSH;
178} 180}
181EXPORT_SYMBOL(flush_tlb_page);
179 182
180/* 183/*
181 * For each address in the range, find the pte for the address 184 * For each address in the range, find the pte for the address
@@ -188,3 +191,4 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
188 flush_range(vma->vm_mm, start, end); 191 flush_range(vma->vm_mm, start, end);
189 FINISH_FLUSH; 192 FINISH_FLUSH;
190} 193}
194EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_hash64.c
index be7dd422c0fa..c931bc7d1079 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -37,81 +37,6 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
37 * arch/powerpc/include/asm/tlb.h file -- tgall 37 * arch/powerpc/include/asm/tlb.h file -- tgall
38 */ 38 */
39DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 39DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
40static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
41static unsigned long pte_freelist_forced_free;
42
43struct pte_freelist_batch
44{
45 struct rcu_head rcu;
46 unsigned int index;
47 pgtable_free_t tables[0];
48};
49
50#define PTE_FREELIST_SIZE \
51 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
52 / sizeof(pgtable_free_t))
53
54static void pte_free_smp_sync(void *arg)
55{
56 /* Do nothing, just ensure we sync with all CPUs */
57}
58
59/* This is only called when we are critically out of memory
60 * (and fail to get a page in pte_free_tlb).
61 */
62static void pgtable_free_now(pgtable_free_t pgf)
63{
64 pte_freelist_forced_free++;
65
66 smp_call_function(pte_free_smp_sync, NULL, 1);
67
68 pgtable_free(pgf);
69}
70
71static void pte_free_rcu_callback(struct rcu_head *head)
72{
73 struct pte_freelist_batch *batch =
74 container_of(head, struct pte_freelist_batch, rcu);
75 unsigned int i;
76
77 for (i = 0; i < batch->index; i++)
78 pgtable_free(batch->tables[i]);
79
80 free_page((unsigned long)batch);
81}
82
83static void pte_free_submit(struct pte_freelist_batch *batch)
84{
85 INIT_RCU_HEAD(&batch->rcu);
86 call_rcu(&batch->rcu, pte_free_rcu_callback);
87}
88
89void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
90{
91 /* This is safe since tlb_gather_mmu has disabled preemption */
92 cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
93 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
94
95 if (atomic_read(&tlb->mm->mm_users) < 2 ||
96 cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
97 pgtable_free(pgf);
98 return;
99 }
100
101 if (*batchp == NULL) {
102 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
103 if (*batchp == NULL) {
104 pgtable_free_now(pgf);
105 return;
106 }
107 (*batchp)->index = 0;
108 }
109 (*batchp)->tables[(*batchp)->index++] = pgf;
110 if ((*batchp)->index == PTE_FREELIST_SIZE) {
111 pte_free_submit(*batchp);
112 *batchp = NULL;
113 }
114}
115 40
116/* 41/*
117 * A linux PTE was changed and the corresponding hash table entry 42 * A linux PTE was changed and the corresponding hash table entry
@@ -229,17 +154,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
229 batch->index = 0; 154 batch->index = 0;
230} 155}
231 156
232void pte_free_finish(void)
233{
234 /* This is safe since tlb_gather_mmu has disabled preemption */
235 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
236
237 if (*batchp == NULL)
238 return;
239 pte_free_submit(*batchp);
240 *batchp = NULL;
241}
242
243/** 157/**
244 * __flush_hash_table_range - Flush all HPTEs for a given address range 158 * __flush_hash_table_range - Flush all HPTEs for a given address range
245 * from the hash table (and the TLB). But keeps 159 * from the hash table (and the TLB). But keeps
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
new file mode 100644
index 000000000000..803a64c02b06
--- /dev/null
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -0,0 +1,209 @@
1/*
2 * This file contains the routines for TLB flushing.
3 * On machines where the MMU does not use a hash table to store virtual to
4 * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
5 * this does -not- include 603 however which shares the implementation with
6 * hash based processors)
7 *
8 * -- BenH
9 *
10 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
11 * IBM Corp.
12 *
13 * Derived from arch/ppc/mm/init.c:
14 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
15 *
16 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
17 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
18 * Copyright (C) 1996 Paul Mackerras
19 *
20 * Derived from "arch/i386/mm/init.c"
21 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
22 *
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public License
25 * as published by the Free Software Foundation; either version
26 * 2 of the License, or (at your option) any later version.
27 *
28 */
29
30#include <linux/kernel.h>
31#include <linux/mm.h>
32#include <linux/init.h>
33#include <linux/highmem.h>
34#include <linux/pagemap.h>
35#include <linux/preempt.h>
36#include <linux/spinlock.h>
37
38#include <asm/tlbflush.h>
39#include <asm/tlb.h>
40
41#include "mmu_decl.h"
42
43/*
44 * Base TLB flushing operations:
45 *
46 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
47 * - flush_tlb_page(vma, vmaddr) flushes one page
48 * - flush_tlb_range(vma, start, end) flushes a range of pages
49 * - flush_tlb_kernel_range(start, end) flushes kernel pages
50 *
51 * - local_* variants of page and mm only apply to the current
52 * processor
53 */
54
55/*
56 * These are the base non-SMP variants of page and mm flushing
57 */
58void local_flush_tlb_mm(struct mm_struct *mm)
59{
60 unsigned int pid;
61
62 preempt_disable();
63 pid = mm->context.id;
64 if (pid != MMU_NO_CONTEXT)
65 _tlbil_pid(pid);
66 preempt_enable();
67}
68EXPORT_SYMBOL(local_flush_tlb_mm);
69
70void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
71{
72 unsigned int pid;
73
74 preempt_disable();
75 pid = vma ? vma->vm_mm->context.id : 0;
76 if (pid != MMU_NO_CONTEXT)
77 _tlbil_va(vmaddr, pid);
78 preempt_enable();
79}
80EXPORT_SYMBOL(local_flush_tlb_page);
81
82
83/*
84 * And here are the SMP non-local implementations
85 */
86#ifdef CONFIG_SMP
87
88static DEFINE_SPINLOCK(tlbivax_lock);
89
90struct tlb_flush_param {
91 unsigned long addr;
92 unsigned int pid;
93};
94
95static void do_flush_tlb_mm_ipi(void *param)
96{
97 struct tlb_flush_param *p = param;
98
99 _tlbil_pid(p ? p->pid : 0);
100}
101
102static void do_flush_tlb_page_ipi(void *param)
103{
104 struct tlb_flush_param *p = param;
105
106 _tlbil_va(p->addr, p->pid);
107}
108
109
110/* Note on invalidations and PID:
111 *
112 * We snapshot the PID with preempt disabled. At this point, it can still
113 * change either because:
114 * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
115 * - we are invaliating some target that isn't currently running here
116 * and is concurrently acquiring a new PID on another CPU
117 * - some other CPU is re-acquiring a lost PID for this mm
118 * etc...
119 *
120 * However, this shouldn't be a problem as we only guarantee
121 * invalidation of TLB entries present prior to this call, so we
122 * don't care about the PID changing, and invalidating a stale PID
123 * is generally harmless.
124 */
125
126void flush_tlb_mm(struct mm_struct *mm)
127{
128 cpumask_t cpu_mask;
129 unsigned int pid;
130
131 preempt_disable();
132 pid = mm->context.id;
133 if (unlikely(pid == MMU_NO_CONTEXT))
134 goto no_context;
135 cpu_mask = mm->cpu_vm_mask;
136 cpu_clear(smp_processor_id(), cpu_mask);
137 if (!cpus_empty(cpu_mask)) {
138 struct tlb_flush_param p = { .pid = pid };
139 smp_call_function_mask(cpu_mask, do_flush_tlb_mm_ipi, &p, 1);
140 }
141 _tlbil_pid(pid);
142 no_context:
143 preempt_enable();
144}
145EXPORT_SYMBOL(flush_tlb_mm);
146
147void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
148{
149 cpumask_t cpu_mask;
150 unsigned int pid;
151
152 preempt_disable();
153 pid = vma ? vma->vm_mm->context.id : 0;
154 if (unlikely(pid == MMU_NO_CONTEXT))
155 goto bail;
156 cpu_mask = vma->vm_mm->cpu_vm_mask;
157 cpu_clear(smp_processor_id(), cpu_mask);
158 if (!cpus_empty(cpu_mask)) {
159 /* If broadcast tlbivax is supported, use it */
160 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
161 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
162 if (lock)
163 spin_lock(&tlbivax_lock);
164 _tlbivax_bcast(vmaddr, pid);
165 if (lock)
166 spin_unlock(&tlbivax_lock);
167 goto bail;
168 } else {
169 struct tlb_flush_param p = { .pid = pid, .addr = vmaddr };
170 smp_call_function_mask(cpu_mask,
171 do_flush_tlb_page_ipi, &p, 1);
172 }
173 }
174 _tlbil_va(vmaddr, pid);
175 bail:
176 preempt_enable();
177}
178EXPORT_SYMBOL(flush_tlb_page);
179
180#endif /* CONFIG_SMP */
181
182/*
183 * Flush kernel TLB entries in the given range
184 */
185void flush_tlb_kernel_range(unsigned long start, unsigned long end)
186{
187#ifdef CONFIG_SMP
188 preempt_disable();
189 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
190 _tlbil_pid(0);
191 preempt_enable();
192#endif
193 _tlbil_pid(0);
194}
195EXPORT_SYMBOL(flush_tlb_kernel_range);
196
197/*
198 * Currently, for range flushing, we just do a full mm flush. This should
199 * be optimized based on a threshold on the size of the range, since
200 * some implementation can stack multiple tlbivax before a tlbsync but
201 * for now, we keep it that way
202 */
203void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
204 unsigned long end)
205
206{
207 flush_tlb_mm(vma->vm_mm);
208}
209EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
new file mode 100644
index 000000000000..f900a39e6ec4
--- /dev/null
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -0,0 +1,166 @@
1/*
2 * This file contains low-level functions for performing various
3 * types of TLB invalidations on various processors with no hash
4 * table.
5 *
6 * This file implements the following functions for all no-hash
7 * processors. Some aren't implemented for some variants. Some
8 * are inline in tlbflush.h
9 *
10 * - tlbil_va
11 * - tlbil_pid
12 * - tlbil_all
13 * - tlbivax_bcast (not yet)
14 *
15 * Code mostly moved over from misc_32.S
16 *
17 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
18 *
19 * Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
20 * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29#include <asm/reg.h>
30#include <asm/page.h>
31#include <asm/cputable.h>
32#include <asm/mmu.h>
33#include <asm/ppc_asm.h>
34#include <asm/asm-offsets.h>
35#include <asm/processor.h>
36
37#if defined(CONFIG_40x)
38
39/*
40 * 40x implementation needs only tlbil_va
41 */
42_GLOBAL(_tlbil_va)
43 /* We run the search with interrupts disabled because we have to change
44 * the PID and I don't want to preempt when that happens.
45 */
46 mfmsr r5
47 mfspr r6,SPRN_PID
48 wrteei 0
49 mtspr SPRN_PID,r4
50 tlbsx. r3, 0, r3
51 mtspr SPRN_PID,r6
52 wrtee r5
53 bne 1f
54 sync
55 /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
56 * clear. Since 25 is the V bit in the TLB_TAG, loading this value
57 * will invalidate the TLB entry. */
58 tlbwe r3, r3, TLB_TAG
59 isync
601: blr
61
62#elif defined(CONFIG_8xx)
63
64/*
65 * Nothing to do for 8xx, everything is inline
66 */
67
68#elif defined(CONFIG_44x)
69
70/*
71 * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
72 * of the TLB for everything else.
73 */
74_GLOBAL(_tlbil_va)
75 mfspr r5,SPRN_MMUCR
76 rlwimi r5,r4,0,24,31 /* Set TID */
77
78 /* We have to run the search with interrupts disabled, otherwise
79 * an interrupt which causes a TLB miss can clobber the MMUCR
80 * between the mtspr and the tlbsx.
81 *
82 * Critical and Machine Check interrupts take care of saving
83 * and restoring MMUCR, so only normal interrupts have to be
84 * taken care of.
85 */
86 mfmsr r4
87 wrteei 0
88 mtspr SPRN_MMUCR,r5
89 tlbsx. r3, 0, r3
90 wrtee r4
91 bne 1f
92 sync
93 /* There are only 64 TLB entries, so r3 < 64,
94 * which means bit 22, is clear. Since 22 is
95 * the V bit in the TLB_PAGEID, loading this
96 * value will invalidate the TLB entry.
97 */
98 tlbwe r3, r3, PPC44x_TLB_PAGEID
99 isync
1001: blr
101
102_GLOBAL(_tlbil_all)
103_GLOBAL(_tlbil_pid)
104 li r3,0
105 sync
106
107 /* Load high watermark */
108 lis r4,tlb_44x_hwater@ha
109 lwz r5,tlb_44x_hwater@l(r4)
110
1111: tlbwe r3,r3,PPC44x_TLB_PAGEID
112 addi r3,r3,1
113 cmpw 0,r3,r5
114 ble 1b
115
116 isync
117 blr
118
119#elif defined(CONFIG_FSL_BOOKE)
120/*
121 * FSL BookE implementations. Currently _pid and _all are the
122 * same. This will change when tlbilx is actually supported and
123 * performs invalidate-by-PID. This change will be driven by
124 * mmu_features conditional
125 */
126
127/*
128 * Flush MMU TLB on the local processor
129 */
130_GLOBAL(_tlbil_pid)
131_GLOBAL(_tlbil_all)
132#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
133 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
134 li r3,(MMUCSR0_TLBFI)@l
135 mtspr SPRN_MMUCSR0, r3
1361:
137 mfspr r3,SPRN_MMUCSR0
138 andi. r3,r3,MMUCSR0_TLBFI@l
139 bne 1b
140 msync
141 isync
142 blr
143
144/*
145 * Flush MMU TLB for a particular address, but only on the local processor
146 * (no broadcast)
147 */
148_GLOBAL(_tlbil_va)
149 mfmsr r10
150 wrteei 0
151 slwi r4,r4,16
152 mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
153 tlbsx 0,r3
154 mfspr r4,SPRN_MAS1 /* check valid */
155 andis. r3,r4,MAS1_VALID@h
156 beq 1f
157 rlwinm r4,r4,0,1,31
158 mtspr SPRN_MAS1,r4
159 tlbwe
160 msync
161 isync
1621: wrtee r10
163 blr
164#elif
165#error Unsupported processor type !
166#endif