aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2005-09-26 02:04:21 -0400
committerPaul Mackerras <paulus@samba.org>2005-09-26 02:04:21 -0400
commit14cf11af6cf608eb8c23e989ddb17a715ddce109 (patch)
tree271a97ce73e265f39c569cb159c195c5b4bb3f8c /arch/powerpc/mm
parente5baa396af7560382d2cf3f0871d616b61fc284c (diff)
powerpc: Merge enough to start building in arch/powerpc.
This creates the directory structure under arch/powerpc and a bunch of Kconfig files. It does a first-cut merge of arch/powerpc/mm, arch/powerpc/lib and arch/powerpc/platforms/powermac. This is enough to build a 32-bit powermac kernel with ARCH=powerpc. For now we are getting some unmerged files from arch/ppc/kernel and arch/ppc/syslib, or arch/ppc64/kernel. This makes some minor changes to files in those directories and files outside arch/powerpc. The boot directory is still not merged. That's going to be interesting. Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c120
-rw-r--r--arch/powerpc/mm/4xx_mmu.c141
-rw-r--r--arch/powerpc/mm/Makefile12
-rw-r--r--arch/powerpc/mm/fault.c391
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c237
-rw-r--r--arch/powerpc/mm/hash_32.S618
-rw-r--r--arch/powerpc/mm/init.c581
-rw-r--r--arch/powerpc/mm/init64.c385
-rw-r--r--arch/powerpc/mm/mem.c299
-rw-r--r--arch/powerpc/mm/mem64.c259
-rw-r--r--arch/powerpc/mm/mem_pieces.c163
-rw-r--r--arch/powerpc/mm/mem_pieces.h48
-rw-r--r--arch/powerpc/mm/mmu_context.c86
-rw-r--r--arch/powerpc/mm/mmu_context64.c63
-rw-r--r--arch/powerpc/mm/mmu_decl.h85
-rw-r--r--arch/powerpc/mm/pgtable.c470
-rw-r--r--arch/powerpc/mm/pgtable64.c357
-rw-r--r--arch/powerpc/mm/ppc_mmu.c296
-rw-r--r--arch/powerpc/mm/tlb.c183
19 files changed, 4794 insertions, 0 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
new file mode 100644
index 000000000000..3d79ce281b67
--- /dev/null
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -0,0 +1,120 @@
1/*
2 * Modifications by Matt Porter (mporter@mvista.com) to support
3 * PPC44x Book E processors.
4 *
5 * This file contains the routines for initializing the MMU
6 * on the 4xx series of chips.
7 * -- paulus
8 *
9 * Derived from arch/ppc/mm/init.c:
10 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
11 *
12 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
13 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
14 * Copyright (C) 1996 Paul Mackerras
15 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
16 *
17 * Derived from "arch/i386/mm/init.c"
18 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/signal.h>
29#include <linux/sched.h>
30#include <linux/kernel.h>
31#include <linux/errno.h>
32#include <linux/string.h>
33#include <linux/types.h>
34#include <linux/ptrace.h>
35#include <linux/mman.h>
36#include <linux/mm.h>
37#include <linux/swap.h>
38#include <linux/stddef.h>
39#include <linux/vmalloc.h>
40#include <linux/init.h>
41#include <linux/delay.h>
42#include <linux/highmem.h>
43
44#include <asm/pgalloc.h>
45#include <asm/prom.h>
46#include <asm/io.h>
47#include <asm/mmu_context.h>
48#include <asm/pgtable.h>
49#include <asm/mmu.h>
50#include <asm/uaccess.h>
51#include <asm/smp.h>
52#include <asm/bootx.h>
53#include <asm/machdep.h>
54#include <asm/setup.h>
55
56#include "mmu_decl.h"
57
58extern char etext[], _stext[];
59
60/* Used by the 44x TLB replacement exception handler.
61 * Just needed it declared someplace.
62 */
63unsigned int tlb_44x_index = 0;
64unsigned int tlb_44x_hwater = 62;
65
66/*
67 * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
68 */
69static void __init
70ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
71{
72 unsigned long attrib = 0;
73
74 __asm__ __volatile__("\
75 clrrwi %2,%2,10\n\
76 ori %2,%2,%4\n\
77 clrrwi %1,%1,10\n\
78 li %0,0\n\
79 ori %0,%0,%5\n\
80 tlbwe %2,%3,%6\n\
81 tlbwe %1,%3,%7\n\
82 tlbwe %0,%3,%8"
83 :
84 : "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
85 "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
86 "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
87 "i" (PPC44x_TLB_PAGEID),
88 "i" (PPC44x_TLB_XLAT),
89 "i" (PPC44x_TLB_ATTRIB));
90}
91
92/*
93 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
94 */
95void __init MMU_init_hw(void)
96{
97 flush_instruction_cache();
98}
99
100unsigned long __init mmu_mapin_ram(void)
101{
102 unsigned int pinned_tlbs = 1;
103 int i;
104
105 /* Determine number of entries necessary to cover lowmem */
106 pinned_tlbs = (unsigned int)
107 (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
108
109 /* Write upper watermark to save location */
110 tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
111
112 /* If necessary, set additional pinned TLBs */
113 if (pinned_tlbs > 1)
114 for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
115 unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
116 ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
117 }
118
119 return total_lowmem;
120}
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c
new file mode 100644
index 000000000000..b7bcbc232f39
--- /dev/null
+++ b/arch/powerpc/mm/4xx_mmu.c
@@ -0,0 +1,141 @@
1/*
2 * This file contains the routines for initializing the MMU
3 * on the 4xx series of chips.
4 * -- paulus
5 *
6 * Derived from arch/ppc/mm/init.c:
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 *
9 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
10 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
11 * Copyright (C) 1996 Paul Mackerras
12 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
13 *
14 * Derived from "arch/i386/mm/init.c"
15 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 *
22 */
23
24#include <linux/config.h>
25#include <linux/signal.h>
26#include <linux/sched.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/string.h>
30#include <linux/types.h>
31#include <linux/ptrace.h>
32#include <linux/mman.h>
33#include <linux/mm.h>
34#include <linux/swap.h>
35#include <linux/stddef.h>
36#include <linux/vmalloc.h>
37#include <linux/init.h>
38#include <linux/delay.h>
39#include <linux/highmem.h>
40
41#include <asm/pgalloc.h>
42#include <asm/prom.h>
43#include <asm/io.h>
44#include <asm/mmu_context.h>
45#include <asm/pgtable.h>
46#include <asm/mmu.h>
47#include <asm/uaccess.h>
48#include <asm/smp.h>
49#include <asm/bootx.h>
50#include <asm/machdep.h>
51#include <asm/setup.h>
52#include "mmu_decl.h"
53
54extern int __map_without_ltlbs;
55/*
56 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
57 */
58void __init MMU_init_hw(void)
59{
60 /*
61 * The Zone Protection Register (ZPR) defines how protection will
62 * be applied to every page which is a member of a given zone. At
63 * present, we utilize only two of the 4xx's zones.
64 * The zone index bits (of ZSEL) in the PTE are used for software
65 * indicators, except the LSB. For user access, zone 1 is used,
66 * for kernel access, zone 0 is used. We set all but zone 1
67 * to zero, allowing only kernel access as indicated in the PTE.
68 * For zone 1, we set a 01 binary (a value of 10 will not work)
69 * to allow user access as indicated in the PTE. This also allows
70 * kernel access as indicated in the PTE.
71 */
72
73 mtspr(SPRN_ZPR, 0x10000000);
74
75 flush_instruction_cache();
76
77 /*
78 * Set up the real-mode cache parameters for the exception vector
79 * handlers (which are run in real-mode).
80 */
81
82 mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
83
84 /*
85 * Cache instruction and data space where the exception
86 * vectors and the kernel live in real-mode.
87 */
88
89 mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */
90 mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */
91}
92
93#define LARGE_PAGE_SIZE_16M (1<<24)
94#define LARGE_PAGE_SIZE_4M (1<<22)
95
96unsigned long __init mmu_mapin_ram(void)
97{
98 unsigned long v, s;
99 phys_addr_t p;
100
101 v = KERNELBASE;
102 p = PPC_MEMSTART;
103 s = 0;
104
105 if (__map_without_ltlbs) {
106 return s;
107 }
108
109 while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
110 pmd_t *pmdp;
111 unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
112
113 spin_lock(&init_mm.page_table_lock);
114 pmdp = pmd_offset(pgd_offset_k(v), v);
115 pmd_val(*pmdp++) = val;
116 pmd_val(*pmdp++) = val;
117 pmd_val(*pmdp++) = val;
118 pmd_val(*pmdp++) = val;
119 spin_unlock(&init_mm.page_table_lock);
120
121 v += LARGE_PAGE_SIZE_16M;
122 p += LARGE_PAGE_SIZE_16M;
123 s += LARGE_PAGE_SIZE_16M;
124 }
125
126 while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
127 pmd_t *pmdp;
128 unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
129
130 spin_lock(&init_mm.page_table_lock);
131 pmdp = pmd_offset(pgd_offset_k(v), v);
132 pmd_val(*pmdp) = val;
133 spin_unlock(&init_mm.page_table_lock);
134
135 v += LARGE_PAGE_SIZE_4M;
136 p += LARGE_PAGE_SIZE_4M;
137 s += LARGE_PAGE_SIZE_4M;
138 }
139
140 return s;
141}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
new file mode 100644
index 000000000000..9f52c26acd86
--- /dev/null
+++ b/arch/powerpc/mm/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the linux ppc-specific parts of the memory manager.
3#
4
5obj-y := fault.o mem.o
6obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \
7 mem_pieces.o tlb.o
8obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o
9obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o
10obj-$(CONFIG_40x) += 4xx_mmu.o
11obj-$(CONFIG_44x) += 44x_mmu.o
12obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
new file mode 100644
index 000000000000..3df641fa789d
--- /dev/null
+++ b/arch/powerpc/mm/fault.c
@@ -0,0 +1,391 @@
1/*
2 * arch/ppc/mm/fault.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
9 *
10 * Modified by Cort Dougan and Paul Mackerras.
11 *
12 * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19
20#include <linux/config.h>
21#include <linux/signal.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/string.h>
26#include <linux/types.h>
27#include <linux/ptrace.h>
28#include <linux/mman.h>
29#include <linux/mm.h>
30#include <linux/interrupt.h>
31#include <linux/highmem.h>
32#include <linux/module.h>
33#include <linux/kprobes.h>
34
35#include <asm/page.h>
36#include <asm/pgtable.h>
37#include <asm/mmu.h>
38#include <asm/mmu_context.h>
39#include <asm/system.h>
40#include <asm/uaccess.h>
41#include <asm/tlbflush.h>
42#include <asm/kdebug.h>
43#include <asm/siginfo.h>
44
45/*
46 * Check whether the instruction at regs->nip is a store using
47 * an update addressing form which will update r1.
48 */
49static int store_updates_sp(struct pt_regs *regs)
50{
51 unsigned int inst;
52
53 if (get_user(inst, (unsigned int __user *)regs->nip))
54 return 0;
55 /* check for 1 in the rA field */
56 if (((inst >> 16) & 0x1f) != 1)
57 return 0;
58 /* check major opcode */
59 switch (inst >> 26) {
60 case 37: /* stwu */
61 case 39: /* stbu */
62 case 45: /* sthu */
63 case 53: /* stfsu */
64 case 55: /* stfdu */
65 return 1;
66 case 62: /* std or stdu */
67 return (inst & 3) == 1;
68 case 31:
69 /* check minor opcode */
70 switch ((inst >> 1) & 0x3ff) {
71 case 181: /* stdux */
72 case 183: /* stwux */
73 case 247: /* stbux */
74 case 439: /* sthux */
75 case 695: /* stfsux */
76 case 759: /* stfdux */
77 return 1;
78 }
79 }
80 return 0;
81}
82
83static void do_dabr(struct pt_regs *regs, unsigned long error_code)
84{
85 siginfo_t info;
86
87 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
88 11, SIGSEGV) == NOTIFY_STOP)
89 return;
90
91 if (debugger_dabr_match(regs))
92 return;
93
94 /* Clear the DABR */
95 set_dabr(0);
96
97 /* Deliver the signal to userspace */
98 info.si_signo = SIGTRAP;
99 info.si_errno = 0;
100 info.si_code = TRAP_HWBKPT;
101 info.si_addr = (void __user *)regs->nip;
102 force_sig_info(SIGTRAP, &info, current);
103}
104
105/*
106 * For 600- and 800-family processors, the error_code parameter is DSISR
107 * for a data fault, SRR1 for an instruction fault. For 400-family processors
108 * the error_code parameter is ESR for a data fault, 0 for an instruction
109 * fault.
110 * For 64-bit processors, the error_code parameter is
111 * - DSISR for a non-SLB data access fault,
112 * - SRR1 & 0x08000000 for a non-SLB instruction access fault
113 * - 0 any SLB fault.
114 *
115 * The return value is 0 if the fault was handled, or the signal
116 * number if this is a kernel fault that can't be handled here.
117 */
118int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
119 unsigned long error_code)
120{
121 struct vm_area_struct * vma;
122 struct mm_struct *mm = current->mm;
123 siginfo_t info;
124 int code = SEGV_MAPERR;
125 int is_write = 0;
126 int trap = TRAP(regs);
127 int is_exec = trap == 0x400;
128
129#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
130 /*
131 * Fortunately the bit assignments in SRR1 for an instruction
132 * fault and DSISR for a data fault are mostly the same for the
133 * bits we are interested in. But there are some bits which
134 * indicate errors in DSISR but can validly be set in SRR1.
135 */
136 if (trap == 0x400)
137 error_code &= 0x48200000;
138 else
139 is_write = error_code & DSISR_ISSTORE;
140#else
141 is_write = error_code & ESR_DST;
142#endif /* CONFIG_4xx || CONFIG_BOOKE */
143
144 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
145 11, SIGSEGV) == NOTIFY_STOP)
146 return 0;
147
148 if (trap == 0x300) {
149 if (debugger_fault_handler(regs))
150 return 0;
151 }
152
153 /* On a kernel SLB miss we can only check for a valid exception entry */
154 if (!user_mode(regs) && (address >= TASK_SIZE))
155 return SIGSEGV;
156
157#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
158 if (error_code & DSISR_DABRMATCH) {
159 /* DABR match */
160 do_dabr(regs, error_code);
161 return 0;
162 }
163#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
164
165 if (in_atomic() || mm == NULL) {
166 if (!user_mode(regs))
167 return SIGSEGV;
168 /* in_atomic() in user mode is really bad,
169 as is current->mm == NULL. */
170 printk(KERN_EMERG "Page fault in user mode with"
171 "in_atomic() = %d mm = %p\n", in_atomic(), mm);
172 printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
173 regs->nip, regs->msr);
174 die("Weird page fault", regs, SIGSEGV);
175 }
176
177 /* When running in the kernel we expect faults to occur only to
178 * addresses in user space. All other faults represent errors in the
179 * kernel and should generate an OOPS. Unfortunatly, in the case of an
180 * erroneous fault occuring in a code path which already holds mmap_sem
181 * we will deadlock attempting to validate the fault against the
182 * address space. Luckily the kernel only validly references user
183 * space from well defined areas of code, which are listed in the
184 * exceptions table.
185 *
186 * As the vast majority of faults will be valid we will only perform
187 * the source reference check when there is a possibilty of a deadlock.
188 * Attempt to lock the address space, if we cannot we then validate the
189 * source. If this is invalid we can skip the address space check,
190 * thus avoiding the deadlock.
191 */
192 if (!down_read_trylock(&mm->mmap_sem)) {
193 if (!user_mode(regs) && !search_exception_tables(regs->nip))
194 goto bad_area_nosemaphore;
195
196 down_read(&mm->mmap_sem);
197 }
198
199 vma = find_vma(mm, address);
200 if (!vma)
201 goto bad_area;
202 if (vma->vm_start <= address)
203 goto good_area;
204 if (!(vma->vm_flags & VM_GROWSDOWN))
205 goto bad_area;
206
207 /*
208 * N.B. The POWER/Open ABI allows programs to access up to
209 * 288 bytes below the stack pointer.
210 * The kernel signal delivery code writes up to about 1.5kB
211 * below the stack pointer (r1) before decrementing it.
212 * The exec code can write slightly over 640kB to the stack
213 * before setting the user r1. Thus we allow the stack to
214 * expand to 1MB without further checks.
215 */
216 if (address + 0x100000 < vma->vm_end) {
217 /* get user regs even if this fault is in kernel mode */
218 struct pt_regs *uregs = current->thread.regs;
219 if (uregs == NULL)
220 goto bad_area;
221
222 /*
223 * A user-mode access to an address a long way below
224 * the stack pointer is only valid if the instruction
225 * is one which would update the stack pointer to the
226 * address accessed if the instruction completed,
227 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
228 * (or the byte, halfword, float or double forms).
229 *
230 * If we don't check this then any write to the area
231 * between the last mapped region and the stack will
232 * expand the stack rather than segfaulting.
233 */
234 if (address + 2048 < uregs->gpr[1]
235 && (!user_mode(regs) || !store_updates_sp(regs)))
236 goto bad_area;
237 }
238 if (expand_stack(vma, address))
239 goto bad_area;
240
241good_area:
242 code = SEGV_ACCERR;
243#if defined(CONFIG_6xx)
244 if (error_code & 0x95700000)
245 /* an error such as lwarx to I/O controller space,
246 address matching DABR, eciwx, etc. */
247 goto bad_area;
248#endif /* CONFIG_6xx */
249#if defined(CONFIG_8xx)
250 /* The MPC8xx seems to always set 0x80000000, which is
251 * "undefined". Of those that can be set, this is the only
252 * one which seems bad.
253 */
254 if (error_code & 0x10000000)
255 /* Guarded storage error. */
256 goto bad_area;
257#endif /* CONFIG_8xx */
258
259 if (is_exec) {
260#ifdef CONFIG_PPC64
261 /* protection fault */
262 if (error_code & DSISR_PROTFAULT)
263 goto bad_area;
264 if (!(vma->vm_flags & VM_EXEC))
265 goto bad_area;
266#endif
267#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
268 pte_t *ptep;
269
270 /* Since 4xx/Book-E supports per-page execute permission,
271 * we lazily flush dcache to icache. */
272 ptep = NULL;
273 if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
274 struct page *page = pte_page(*ptep);
275
276 if (! test_bit(PG_arch_1, &page->flags)) {
277 flush_dcache_icache_page(page);
278 set_bit(PG_arch_1, &page->flags);
279 }
280 pte_update(ptep, 0, _PAGE_HWEXEC);
281 _tlbie(address);
282 pte_unmap(ptep);
283 up_read(&mm->mmap_sem);
284 return 0;
285 }
286 if (ptep != NULL)
287 pte_unmap(ptep);
288#endif
289 /* a write */
290 } else if (is_write) {
291 if (!(vma->vm_flags & VM_WRITE))
292 goto bad_area;
293 /* a read */
294 } else {
295 /* protection fault */
296 if (error_code & 0x08000000)
297 goto bad_area;
298 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
299 goto bad_area;
300 }
301
302 /*
303 * If for any reason at all we couldn't handle the fault,
304 * make sure we exit gracefully rather than endlessly redo
305 * the fault.
306 */
307 survive:
308 switch (handle_mm_fault(mm, vma, address, is_write)) {
309
310 case VM_FAULT_MINOR:
311 current->min_flt++;
312 break;
313 case VM_FAULT_MAJOR:
314 current->maj_flt++;
315 break;
316 case VM_FAULT_SIGBUS:
317 goto do_sigbus;
318 case VM_FAULT_OOM:
319 goto out_of_memory;
320 default:
321 BUG();
322 }
323
324 up_read(&mm->mmap_sem);
325 return 0;
326
327bad_area:
328 up_read(&mm->mmap_sem);
329
330bad_area_nosemaphore:
331 /* User mode accesses cause a SIGSEGV */
332 if (user_mode(regs)) {
333 _exception(SIGSEGV, regs, code, address);
334 return 0;
335 }
336
337 if (is_exec && (error_code & DSISR_PROTFAULT)
338 && printk_ratelimit())
339 printk(KERN_CRIT "kernel tried to execute NX-protected"
340 " page (%lx) - exploit attempt? (uid: %d)\n",
341 address, current->uid);
342
343 return SIGSEGV;
344
345/*
346 * We ran out of memory, or some other thing happened to us that made
347 * us unable to handle the page fault gracefully.
348 */
349out_of_memory:
350 up_read(&mm->mmap_sem);
351 if (current->pid == 1) {
352 yield();
353 down_read(&mm->mmap_sem);
354 goto survive;
355 }
356 printk("VM: killing process %s\n", current->comm);
357 if (user_mode(regs))
358 do_exit(SIGKILL);
359 return SIGKILL;
360
361do_sigbus:
362 up_read(&mm->mmap_sem);
363 if (user_mode(regs)) {
364 info.si_signo = SIGBUS;
365 info.si_errno = 0;
366 info.si_code = BUS_ADRERR;
367 info.si_addr = (void __user *)address;
368 force_sig_info(SIGBUS, &info, current);
369 return 0;
370 }
371 return SIGBUS;
372}
373
374/*
375 * bad_page_fault is called when we have a bad access from the kernel.
376 * It is called from the DSI and ISI handlers in head.S and from some
377 * of the procedures in traps.c.
378 */
379void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
380{
381 const struct exception_table_entry *entry;
382
383 /* Are we prepared to handle this fault? */
384 if ((entry = search_exception_tables(regs->nip)) != NULL) {
385 regs->nip = entry->fixup;
386 return;
387 }
388
389 /* kernel has accessed a bad area */
390 die("Kernel access of bad area", regs, sig);
391}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
new file mode 100644
index 000000000000..af9ca0eb6d55
--- /dev/null
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -0,0 +1,237 @@
1/*
2 * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
3 * E500 Book E processors.
4 *
5 * Copyright 2004 Freescale Semiconductor, Inc
6 *
7 * This file contains the routines for initializing the MMU
8 * on the 4xx series of chips.
9 * -- paulus
10 *
11 * Derived from arch/ppc/mm/init.c:
12 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
13 *
14 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
15 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
16 * Copyright (C) 1996 Paul Mackerras
17 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
18 *
19 * Derived from "arch/i386/mm/init.c"
20 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29#include <linux/config.h>
30#include <linux/signal.h>
31#include <linux/sched.h>
32#include <linux/kernel.h>
33#include <linux/errno.h>
34#include <linux/string.h>
35#include <linux/types.h>
36#include <linux/ptrace.h>
37#include <linux/mman.h>
38#include <linux/mm.h>
39#include <linux/swap.h>
40#include <linux/stddef.h>
41#include <linux/vmalloc.h>
42#include <linux/init.h>
43#include <linux/delay.h>
44#include <linux/highmem.h>
45
46#include <asm/pgalloc.h>
47#include <asm/prom.h>
48#include <asm/io.h>
49#include <asm/mmu_context.h>
50#include <asm/pgtable.h>
51#include <asm/mmu.h>
52#include <asm/uaccess.h>
53#include <asm/smp.h>
54#include <asm/bootx.h>
55#include <asm/machdep.h>
56#include <asm/setup.h>
57
58extern void loadcam_entry(unsigned int index);
59unsigned int tlbcam_index;
60unsigned int num_tlbcam_entries;
61static unsigned long __cam0, __cam1, __cam2;
62extern unsigned long total_lowmem;
63extern unsigned long __max_low_memory;
64#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
65
66#define NUM_TLBCAMS (16)
67
68struct tlbcam {
69 u32 MAS0;
70 u32 MAS1;
71 u32 MAS2;
72 u32 MAS3;
73 u32 MAS7;
74} TLBCAM[NUM_TLBCAMS];
75
76struct tlbcamrange {
77 unsigned long start;
78 unsigned long limit;
79 phys_addr_t phys;
80} tlbcam_addrs[NUM_TLBCAMS];
81
82extern unsigned int tlbcam_index;
83
84/*
85 * Return PA for this VA if it is mapped by a CAM, or 0
86 */
87unsigned long v_mapped_by_tlbcam(unsigned long va)
88{
89 int b;
90 for (b = 0; b < tlbcam_index; ++b)
91 if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
92 return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
93 return 0;
94}
95
96/*
97 * Return VA for a given PA or 0 if not mapped
98 */
99unsigned long p_mapped_by_tlbcam(unsigned long pa)
100{
101 int b;
102 for (b = 0; b < tlbcam_index; ++b)
103 if (pa >= tlbcam_addrs[b].phys
104 && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
105 +tlbcam_addrs[b].phys)
106 return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
107 return 0;
108}
109
110/*
111 * Set up one of the I/D BAT (block address translation) register pairs.
112 * The parameters are not checked; in particular size must be a power
113 * of 4 between 4k and 256M.
114 */
115void settlbcam(int index, unsigned long virt, phys_addr_t phys,
116 unsigned int size, int flags, unsigned int pid)
117{
118 unsigned int tsize, lz;
119
120 asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
121 tsize = (21 - lz) / 2;
122
123#ifdef CONFIG_SMP
124 if ((flags & _PAGE_NO_CACHE) == 0)
125 flags |= _PAGE_COHERENT;
126#endif
127
128 TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
129 TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
130 TLBCAM[index].MAS2 = virt & PAGE_MASK;
131
132 TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
133 TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
134 TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
135 TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
136 TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
137
138 TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
139 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
140
141#ifndef CONFIG_KGDB /* want user access for breakpoints */
142 if (flags & _PAGE_USER) {
143 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
144 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
145 }
146#else
147 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
148 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
149#endif
150
151 tlbcam_addrs[index].start = virt;
152 tlbcam_addrs[index].limit = virt + size - 1;
153 tlbcam_addrs[index].phys = phys;
154
155 loadcam_entry(index);
156}
157
158void invalidate_tlbcam_entry(int index)
159{
160 TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
161 TLBCAM[index].MAS1 = ~MAS1_VALID;
162
163 loadcam_entry(index);
164}
165
166void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
167 unsigned long cam2)
168{
169 settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
170 tlbcam_index++;
171 if (cam1) {
172 tlbcam_index++;
173 settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
174 }
175 if (cam2) {
176 tlbcam_index++;
177 settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
178 }
179}
180
181/*
182 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
183 */
184void __init MMU_init_hw(void)
185{
186 flush_instruction_cache();
187}
188
189unsigned long __init mmu_mapin_ram(void)
190{
191 cam_mapin_ram(__cam0, __cam1, __cam2);
192
193 return __cam0 + __cam1 + __cam2;
194}
195
196
197void __init
198adjust_total_lowmem(void)
199{
200 unsigned long max_low_mem = MAX_LOW_MEM;
201 unsigned long cam_max = 0x10000000;
202 unsigned long ram;
203
204 /* adjust CAM size to max_low_mem */
205 if (max_low_mem < cam_max)
206 cam_max = max_low_mem;
207
208 /* adjust lowmem size to max_low_mem */
209 if (max_low_mem < total_lowmem)
210 ram = max_low_mem;
211 else
212 ram = total_lowmem;
213
214 /* Calculate CAM values */
215 __cam0 = 1UL << 2 * (__ilog2(ram) / 2);
216 if (__cam0 > cam_max)
217 __cam0 = cam_max;
218 ram -= __cam0;
219 if (ram) {
220 __cam1 = 1UL << 2 * (__ilog2(ram) / 2);
221 if (__cam1 > cam_max)
222 __cam1 = cam_max;
223 ram -= __cam1;
224 }
225 if (ram) {
226 __cam2 = 1UL << 2 * (__ilog2(ram) / 2);
227 if (__cam2 > cam_max)
228 __cam2 = cam_max;
229 ram -= __cam2;
230 }
231
232 printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
233 " CAM2=%ldMb residual: %ldMb\n",
234 __cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
235 (total_lowmem - __cam0 - __cam1 - __cam2) >> 20);
236 __max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2;
237}
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S
new file mode 100644
index 000000000000..57278a8dd132
--- /dev/null
+++ b/arch/powerpc/mm/hash_32.S
@@ -0,0 +1,618 @@
1/*
2 * arch/ppc/kernel/hashtable.S
3 *
4 * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
5 *
6 * PowerPC version
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
9 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
10 * Adapted for Power Macintosh by Paul Mackerras.
11 * Low-level exception handlers and MMU support
12 * rewritten by Paul Mackerras.
13 * Copyright (C) 1996 Paul Mackerras.
14 *
15 * This file contains low-level assembler routines for managing
16 * the PowerPC MMU hash table. (PPC 8xx processors don't use a
17 * hash table, so this file is not used on them.)
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <asm/processor.h>
28#include <asm/page.h>
29#include <asm/pgtable.h>
30#include <asm/cputable.h>
31#include <asm/ppc_asm.h>
32#include <asm/thread_info.h>
33#include <asm/asm-offsets.h>
34
35#ifdef CONFIG_SMP
36 .comm mmu_hash_lock,4
37#endif /* CONFIG_SMP */
38
39/*
40 * Sync CPUs with hash_page taking & releasing the hash
41 * table lock
42 */
43#ifdef CONFIG_SMP
44 .text
45_GLOBAL(hash_page_sync)
46 lis r8,mmu_hash_lock@h
47 ori r8,r8,mmu_hash_lock@l
48 lis r0,0x0fff
49 b 10f
5011: lwz r6,0(r8)
51 cmpwi 0,r6,0
52 bne 11b
5310: lwarx r6,0,r8
54 cmpwi 0,r6,0
55 bne- 11b
56 stwcx. r0,0,r8
57 bne- 10b
58 isync
59 eieio
60 li r0,0
61 stw r0,0(r8)
62 blr
63#endif
64
65/*
66 * Load a PTE into the hash table, if possible.
67 * The address is in r4, and r3 contains an access flag:
68 * _PAGE_RW (0x400) if a write.
69 * r9 contains the SRR1 value, from which we use the MSR_PR bit.
70 * SPRG3 contains the physical address of the current task's thread.
71 *
72 * Returns to the caller if the access is illegal or there is no
73 * mapping for the address. Otherwise it places an appropriate PTE
74 * in the hash table and returns from the exception.
75 * Uses r0, r3 - r8, ctr, lr.
76 */
77 .text
78_GLOBAL(hash_page)
79#ifdef CONFIG_PPC64BRIDGE
80 mfmsr r0
81 clrldi r0,r0,1 /* make sure it's in 32-bit mode */
82 MTMSRD(r0)
83 isync
84#endif
85 tophys(r7,0) /* gets -KERNELBASE into r7 */
86#ifdef CONFIG_SMP
87 addis r8,r7,mmu_hash_lock@h
88 ori r8,r8,mmu_hash_lock@l
89 lis r0,0x0fff
90 b 10f
9111: lwz r6,0(r8)
92 cmpwi 0,r6,0
93 bne 11b
9410: lwarx r6,0,r8
95 cmpwi 0,r6,0
96 bne- 11b
97 stwcx. r0,0,r8
98 bne- 10b
99 isync
100#endif
101 /* Get PTE (linux-style) and check access */
102 lis r0,KERNELBASE@h /* check if kernel address */
103 cmplw 0,r4,r0
104 mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */
105 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
106 lwz r5,PGDIR(r8) /* virt page-table root */
107 blt+ 112f /* assume user more likely */
108 lis r5,swapper_pg_dir@ha /* if kernel address, use */
109 addi r5,r5,swapper_pg_dir@l /* kernel page table */
110 rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
111112: add r5,r5,r7 /* convert to phys addr */
112 rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */
113 lwz r8,0(r5) /* get pmd entry */
114 rlwinm. r8,r8,0,0,19 /* extract address of pte page */
115#ifdef CONFIG_SMP
116 beq- hash_page_out /* return if no mapping */
117#else
118 /* XXX it seems like the 601 will give a machine fault on the
119 rfi if its alignment is wrong (bottom 4 bits of address are
120 8 or 0xc) and we have had a not-taken conditional branch
121 to the address following the rfi. */
122 beqlr-
123#endif
124 rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */
125 rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */
126 ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
127
128 /*
129 * Update the linux PTE atomically. We do the lwarx up-front
130 * because almost always, there won't be a permission violation
131 * and there won't already be an HPTE, and thus we will have
132 * to update the PTE to set _PAGE_HASHPTE. -- paulus.
133 */
134retry:
135 lwarx r6,0,r8 /* get linux-style pte */
136 andc. r5,r3,r6 /* check access & ~permission */
137#ifdef CONFIG_SMP
138 bne- hash_page_out /* return if access not permitted */
139#else
140 bnelr-
141#endif
142 or r5,r0,r6 /* set accessed/dirty bits */
143 stwcx. r5,0,r8 /* attempt to update PTE */
144 bne- retry /* retry if someone got there first */
145
146 mfsrin r3,r4 /* get segment reg for segment */
147 mfctr r0
148 stw r0,_CTR(r11)
149 bl create_hpte /* add the hash table entry */
150
151#ifdef CONFIG_SMP
152 eieio
153 addis r8,r7,mmu_hash_lock@ha
154 li r0,0
155 stw r0,mmu_hash_lock@l(r8)
156#endif
157
158 /* Return from the exception */
159 lwz r5,_CTR(r11)
160 mtctr r5
161 lwz r0,GPR0(r11)
162 lwz r7,GPR7(r11)
163 lwz r8,GPR8(r11)
164 b fast_exception_return
165
166#ifdef CONFIG_SMP
167hash_page_out:
168 eieio
169 addis r8,r7,mmu_hash_lock@ha
170 li r0,0
171 stw r0,mmu_hash_lock@l(r8)
172 blr
173#endif /* CONFIG_SMP */
174
175/*
176 * Add an entry for a particular page to the hash table.
177 *
178 * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
179 *
180 * We assume any necessary modifications to the pte (e.g. setting
181 * the accessed bit) have already been done and that there is actually
182 * a hash table in use (i.e. we're not on a 603).
183 */
184_GLOBAL(add_hash_page)
185 mflr r0
186 stw r0,4(r1)
187
188 /* Convert context and va to VSID */
189 mulli r3,r3,897*16 /* multiply context by context skew */
190 rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
191 mulli r0,r0,0x111 /* multiply by ESID skew */
192 add r3,r3,r0 /* note create_hpte trims to 24 bits */
193
194#ifdef CONFIG_SMP
195 rlwinm r8,r1,0,0,18 /* use cpu number to make tag */
196 lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
197 oris r8,r8,12
198#endif /* CONFIG_SMP */
199
200 /*
201 * We disable interrupts here, even on UP, because we don't
202 * want to race with hash_page, and because we want the
203 * _PAGE_HASHPTE bit to be a reliable indication of whether
204 * the HPTE exists (or at least whether one did once).
205 * We also turn off the MMU for data accesses so that we
206 * we can't take a hash table miss (assuming the code is
207 * covered by a BAT). -- paulus
208 */
209 mfmsr r10
210 SYNC
211 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
212 rlwinm r0,r0,0,28,26 /* clear MSR_DR */
213 mtmsr r0
214 SYNC_601
215 isync
216
217 tophys(r7,0)
218
219#ifdef CONFIG_SMP
220 addis r9,r7,mmu_hash_lock@ha
221 addi r9,r9,mmu_hash_lock@l
22210: lwarx r0,0,r9 /* take the mmu_hash_lock */
223 cmpi 0,r0,0
224 bne- 11f
225 stwcx. r8,0,r9
226 beq+ 12f
22711: lwz r0,0(r9)
228 cmpi 0,r0,0
229 beq 10b
230 b 11b
23112: isync
232#endif
233
234 /*
235 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
236 * If _PAGE_HASHPTE was already set, we don't replace the existing
237 * HPTE, so we just unlock and return.
238 */
239 mr r8,r5
240 rlwimi r8,r4,22,20,29
2411: lwarx r6,0,r8
242 andi. r0,r6,_PAGE_HASHPTE
243 bne 9f /* if HASHPTE already set, done */
244 ori r5,r6,_PAGE_HASHPTE
245 stwcx. r5,0,r8
246 bne- 1b
247
248 bl create_hpte
249
2509:
251#ifdef CONFIG_SMP
252 eieio
253 li r0,0
254 stw r0,0(r9) /* clear mmu_hash_lock */
255#endif
256
257 /* reenable interrupts and DR */
258 mtmsr r10
259 SYNC_601
260 isync
261
262 lwz r0,4(r1)
263 mtlr r0
264 blr
265
266/*
267 * This routine adds a hardware PTE to the hash table.
268 * It is designed to be called with the MMU either on or off.
269 * r3 contains the VSID, r4 contains the virtual address,
270 * r5 contains the linux PTE, r6 contains the old value of the
271 * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
272 * offset to be added to addresses (0 if the MMU is on,
273 * -KERNELBASE if it is off).
274 * On SMP, the caller should have the mmu_hash_lock held.
275 * We assume that the caller has (or will) set the _PAGE_HASHPTE
276 * bit in the linux PTE in memory. The value passed in r6 should
277 * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
278 * this routine will skip the search for an existing HPTE.
279 * This procedure modifies r0, r3 - r6, r8, cr0.
280 * -- paulus.
281 *
282 * For speed, 4 of the instructions get patched once the size and
283 * physical address of the hash table are known. These definitions
284 * of Hash_base and Hash_bits below are just an example.
285 */
286Hash_base = 0xc0180000
287Hash_bits = 12 /* e.g. 256kB hash table */
288Hash_msk = (((1 << Hash_bits) - 1) * 64)
289
290#ifndef CONFIG_PPC64BRIDGE
291/* defines for the PTE format for 32-bit PPCs */
292#define PTE_SIZE 8
293#define PTEG_SIZE 64
294#define LG_PTEG_SIZE 6
295#define LDPTEu lwzu
296#define STPTE stw
297#define CMPPTE cmpw
298#define PTE_H 0x40
299#define PTE_V 0x80000000
300#define TST_V(r) rlwinm. r,r,0,0,0
301#define SET_V(r) oris r,r,PTE_V@h
302#define CLR_V(r,t) rlwinm r,r,0,1,31
303
304#else
305/* defines for the PTE format for 64-bit PPCs */
306#define PTE_SIZE 16
307#define PTEG_SIZE 128
308#define LG_PTEG_SIZE 7
309#define LDPTEu ldu
310#define STPTE std
311#define CMPPTE cmpd
312#define PTE_H 2
313#define PTE_V 1
314#define TST_V(r) andi. r,r,PTE_V
315#define SET_V(r) ori r,r,PTE_V
316#define CLR_V(r,t) li t,PTE_V; andc r,r,t
317#endif /* CONFIG_PPC64BRIDGE */
318
319#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1)
320#define HASH_RIGHT 31-LG_PTEG_SIZE
321
322_GLOBAL(create_hpte)
323 /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
324 rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
325 rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
326 and r8,r8,r0 /* writable if _RW & _DIRTY */
327 rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
328 rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
329 ori r8,r8,0xe14 /* clear out reserved bits and M */
330 andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
331BEGIN_FTR_SECTION
332 ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */
333END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
334
335 /* Construct the high word of the PPC-style PTE (r5) */
336#ifndef CONFIG_PPC64BRIDGE
337 rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
338 rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
339#else /* CONFIG_PPC64BRIDGE */
340 clrlwi r3,r3,8 /* reduce vsid to 24 bits */
341 sldi r5,r3,12 /* shift vsid into position */
342 rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */
343#endif /* CONFIG_PPC64BRIDGE */
344 SET_V(r5) /* set V (valid) bit */
345
346 /* Get the address of the primary PTE group in the hash table (r3) */
347_GLOBAL(hash_page_patch_A)
348 addis r0,r7,Hash_base@h /* base address of hash table */
349 rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
350 rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
351 xor r3,r3,r0 /* make primary hash */
352 li r0,8 /* PTEs/group */
353
354 /*
355 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
356 * if it is clear, meaning that the HPTE isn't there already...
357 */
358 andi. r6,r6,_PAGE_HASHPTE
359 beq+ 10f /* no PTE: go look for an empty slot */
360 tlbie r4
361
362 addis r4,r7,htab_hash_searches@ha
363 lwz r6,htab_hash_searches@l(r4)
364 addi r6,r6,1 /* count how many searches we do */
365 stw r6,htab_hash_searches@l(r4)
366
367 /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
368 mtctr r0
369 addi r4,r3,-PTE_SIZE
3701: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */
371 CMPPTE 0,r6,r5
372 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
373 beq+ found_slot
374
375 /* Search the secondary PTEG for a matching PTE */
376 ori r5,r5,PTE_H /* set H (secondary hash) bit */
377_GLOBAL(hash_page_patch_B)
378 xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
379 xori r4,r4,(-PTEG_SIZE & 0xffff)
380 addi r4,r4,-PTE_SIZE
381 mtctr r0
3822: LDPTEu r6,PTE_SIZE(r4)
383 CMPPTE 0,r6,r5
384 bdnzf 2,2b
385 beq+ found_slot
386 xori r5,r5,PTE_H /* clear H bit again */
387
388 /* Search the primary PTEG for an empty slot */
38910: mtctr r0
390 addi r4,r3,-PTE_SIZE /* search primary PTEG */
3911: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */
392 TST_V(r6) /* test valid bit */
393 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
394 beq+ found_empty
395
396 /* update counter of times that the primary PTEG is full */
397 addis r4,r7,primary_pteg_full@ha
398 lwz r6,primary_pteg_full@l(r4)
399 addi r6,r6,1
400 stw r6,primary_pteg_full@l(r4)
401
402 /* Search the secondary PTEG for an empty slot */
403 ori r5,r5,PTE_H /* set H (secondary hash) bit */
404_GLOBAL(hash_page_patch_C)
405 xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
406 xori r4,r4,(-PTEG_SIZE & 0xffff)
407 addi r4,r4,-PTE_SIZE
408 mtctr r0
4092: LDPTEu r6,PTE_SIZE(r4)
410 TST_V(r6)
411 bdnzf 2,2b
412 beq+ found_empty
413 xori r5,r5,PTE_H /* clear H bit again */
414
415 /*
416 * Choose an arbitrary slot in the primary PTEG to overwrite.
417 * Since both the primary and secondary PTEGs are full, and we
418 * have no information that the PTEs in the primary PTEG are
419 * more important or useful than those in the secondary PTEG,
420 * and we know there is a definite (although small) speed
421 * advantage to putting the PTE in the primary PTEG, we always
422 * put the PTE in the primary PTEG.
423 */
424 addis r4,r7,next_slot@ha
425 lwz r6,next_slot@l(r4)
426 addi r6,r6,PTE_SIZE
427 andi. r6,r6,7*PTE_SIZE
428 stw r6,next_slot@l(r4)
429 add r4,r3,r6
430
431#ifndef CONFIG_SMP
432 /* Store PTE in PTEG */
433found_empty:
434 STPTE r5,0(r4)
435found_slot:
436 STPTE r8,PTE_SIZE/2(r4)
437
438#else /* CONFIG_SMP */
439/*
440 * Between the tlbie above and updating the hash table entry below,
441 * another CPU could read the hash table entry and put it in its TLB.
442 * There are 3 cases:
443 * 1. using an empty slot
444 * 2. updating an earlier entry to change permissions (i.e. enable write)
445 * 3. taking over the PTE for an unrelated address
446 *
447 * In each case it doesn't really matter if the other CPUs have the old
448 * PTE in their TLB. So we don't need to bother with another tlbie here,
449 * which is convenient as we've overwritten the register that had the
450 * address. :-) The tlbie above is mainly to make sure that this CPU comes
451 * and gets the new PTE from the hash table.
452 *
453 * We do however have to make sure that the PTE is never in an invalid
454 * state with the V bit set.
455 */
456found_empty:
457found_slot:
458 CLR_V(r5,r0) /* clear V (valid) bit in PTE */
459 STPTE r5,0(r4)
460 sync
461 TLBSYNC
462 STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
463 sync
464 SET_V(r5)
465 STPTE r5,0(r4) /* finally set V bit in PTE */
466#endif /* CONFIG_SMP */
467
468 sync /* make sure pte updates get to memory */
469 blr
470
471 .comm next_slot,4
472 .comm primary_pteg_full,4
473 .comm htab_hash_searches,4
474
475/*
476 * Flush the entry for a particular page from the hash table.
477 *
478 * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
479 * int count)
480 *
481 * We assume that there is a hash table in use (Hash != 0).
482 */
483_GLOBAL(flush_hash_pages)
484 tophys(r7,0)
485
486 /*
487 * We disable interrupts here, even on UP, because we want
488 * the _PAGE_HASHPTE bit to be a reliable indication of
489 * whether the HPTE exists (or at least whether one did once).
490 * We also turn off the MMU for data accesses so that we
491 * we can't take a hash table miss (assuming the code is
492 * covered by a BAT). -- paulus
493 */
494 mfmsr r10
495 SYNC
496 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
497 rlwinm r0,r0,0,28,26 /* clear MSR_DR */
498 mtmsr r0
499 SYNC_601
500 isync
501
502 /* First find a PTE in the range that has _PAGE_HASHPTE set */
503 rlwimi r5,r4,22,20,29
5041: lwz r0,0(r5)
505 cmpwi cr1,r6,1
506 andi. r0,r0,_PAGE_HASHPTE
507 bne 2f
508 ble cr1,19f
509 addi r4,r4,0x1000
510 addi r5,r5,4
511 addi r6,r6,-1
512 b 1b
513
514 /* Convert context and va to VSID */
5152: mulli r3,r3,897*16 /* multiply context by context skew */
516 rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
517 mulli r0,r0,0x111 /* multiply by ESID skew */
518 add r3,r3,r0 /* note code below trims to 24 bits */
519
520 /* Construct the high word of the PPC-style PTE (r11) */
521#ifndef CONFIG_PPC64BRIDGE
522 rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
523 rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */
524#else /* CONFIG_PPC64BRIDGE */
525 clrlwi r3,r3,8 /* reduce vsid to 24 bits */
526 sldi r11,r3,12 /* shift vsid into position */
527 rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */
528#endif /* CONFIG_PPC64BRIDGE */
529 SET_V(r11) /* set V (valid) bit */
530
531#ifdef CONFIG_SMP
532 addis r9,r7,mmu_hash_lock@ha
533 addi r9,r9,mmu_hash_lock@l
534 rlwinm r8,r1,0,0,18
535 add r8,r8,r7
536 lwz r8,TI_CPU(r8)
537 oris r8,r8,9
53810: lwarx r0,0,r9
539 cmpi 0,r0,0
540 bne- 11f
541 stwcx. r8,0,r9
542 beq+ 12f
54311: lwz r0,0(r9)
544 cmpi 0,r0,0
545 beq 10b
546 b 11b
54712: isync
548#endif
549
550 /*
551 * Check the _PAGE_HASHPTE bit in the linux PTE. If it is
552 * already clear, we're done (for this pte). If not,
553 * clear it (atomically) and proceed. -- paulus.
554 */
55533: lwarx r8,0,r5 /* fetch the pte */
556 andi. r0,r8,_PAGE_HASHPTE
557 beq 8f /* done if HASHPTE is already clear */
558 rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
559 stwcx. r8,0,r5 /* update the pte */
560 bne- 33b
561
562 /* Get the address of the primary PTE group in the hash table (r3) */
563_GLOBAL(flush_hash_patch_A)
564 addis r8,r7,Hash_base@h /* base address of hash table */
565 rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
566 rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
567 xor r8,r0,r8 /* make primary hash */
568
569 /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
570 li r0,8 /* PTEs/group */
571 mtctr r0
572 addi r12,r8,-PTE_SIZE
5731: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */
574 CMPPTE 0,r0,r11
575 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
576 beq+ 3f
577
578 /* Search the secondary PTEG for a matching PTE */
579 ori r11,r11,PTE_H /* set H (secondary hash) bit */
580 li r0,8 /* PTEs/group */
581_GLOBAL(flush_hash_patch_B)
582 xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
583 xori r12,r12,(-PTEG_SIZE & 0xffff)
584 addi r12,r12,-PTE_SIZE
585 mtctr r0
5862: LDPTEu r0,PTE_SIZE(r12)
587 CMPPTE 0,r0,r11
588 bdnzf 2,2b
589 xori r11,r11,PTE_H /* clear H again */
590 bne- 4f /* should rarely fail to find it */
591
5923: li r0,0
593 STPTE r0,0(r12) /* invalidate entry */
5944: sync
595 tlbie r4 /* in hw tlb too */
596 sync
597
5988: ble cr1,9f /* if all ptes checked */
59981: addi r6,r6,-1
600 addi r5,r5,4 /* advance to next pte */
601 addi r4,r4,0x1000
602 lwz r0,0(r5) /* check next pte */
603 cmpwi cr1,r6,1
604 andi. r0,r0,_PAGE_HASHPTE
605 bne 33b
606 bgt cr1,81b
607
6089:
609#ifdef CONFIG_SMP
610 TLBSYNC
611 li r0,0
612 stw r0,0(r9) /* clear mmu_hash_lock */
613#endif
614
61519: mtmsr r10
616 SYNC_601
617 isync
618 blr
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
new file mode 100644
index 000000000000..f4d983a6e521
--- /dev/null
+++ b/arch/powerpc/mm/init.c
@@ -0,0 +1,581 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
10 *
11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/errno.h>
26#include <linux/string.h>
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/stddef.h>
30#include <linux/init.h>
31#include <linux/bootmem.h>
32#include <linux/highmem.h>
33#include <linux/initrd.h>
34#include <linux/pagemap.h>
35
36#include <asm/pgalloc.h>
37#include <asm/prom.h>
38#include <asm/io.h>
39#include <asm/mmu_context.h>
40#include <asm/pgtable.h>
41#include <asm/mmu.h>
42#include <asm/smp.h>
43#include <asm/machdep.h>
44#include <asm/btext.h>
45#include <asm/tlb.h>
46#include <asm/bootinfo.h>
47#include <asm/prom.h>
48
49#include "mem_pieces.h"
50#include "mmu_decl.h"
51
52#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
53/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
54#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
55#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
56#endif
57#endif
58#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
59
60DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
61
62unsigned long total_memory;
63unsigned long total_lowmem;
64
65unsigned long ppc_memstart;
66unsigned long ppc_memoffset = PAGE_OFFSET;
67
68int mem_init_done;
69int init_bootmem_done;
70int boot_mapsize;
71#ifdef CONFIG_PPC_PMAC
72unsigned long agp_special_page;
73#endif
74
75extern char _end[];
76extern char etext[], _stext[];
77extern char __init_begin, __init_end;
78
79#ifdef CONFIG_HIGHMEM
80pte_t *kmap_pte;
81pgprot_t kmap_prot;
82
83EXPORT_SYMBOL(kmap_prot);
84EXPORT_SYMBOL(kmap_pte);
85#endif
86
87void MMU_init(void);
88void set_phys_avail(unsigned long total_ram);
89
90/* XXX should be in current.h -- paulus */
91extern struct task_struct *current_set[NR_CPUS];
92
93char *klimit = _end;
94struct mem_pieces phys_avail;
95struct device_node *memory_node;
96
97/*
98 * this tells the system to map all of ram with the segregs
99 * (i.e. page tables) instead of the bats.
100 * -- Cort
101 */
102int __map_without_bats;
103int __map_without_ltlbs;
104
105/* max amount of RAM to use */
106unsigned long __max_memory;
107/* max amount of low RAM to map in */
108unsigned long __max_low_memory = MAX_LOW_MEM;
109
110/*
111 * Read in a property describing some pieces of memory.
112 */
113static int __init get_mem_prop(char *name, struct mem_pieces *mp)
114{
115 struct reg_property *rp;
116 int i, s;
117 unsigned int *ip;
118 int nac = prom_n_addr_cells(memory_node);
119 int nsc = prom_n_size_cells(memory_node);
120
121 ip = (unsigned int *) get_property(memory_node, name, &s);
122 if (ip == NULL) {
123 printk(KERN_ERR "error: couldn't get %s property on /memory\n",
124 name);
125 return 0;
126 }
127 s /= (nsc + nac) * 4;
128 rp = mp->regions;
129 for (i = 0; i < s; ++i, ip += nac+nsc) {
130 if (nac >= 2 && ip[nac-2] != 0)
131 continue;
132 rp->address = ip[nac-1];
133 if (nsc >= 2 && ip[nac+nsc-2] != 0)
134 rp->size = ~0U;
135 else
136 rp->size = ip[nac+nsc-1];
137 ++rp;
138 }
139 mp->n_regions = rp - mp->regions;
140
141 /* Make sure the pieces are sorted. */
142 mem_pieces_sort(mp);
143 mem_pieces_coalesce(mp);
144 return 1;
145}
146
147/*
148 * Collect information about physical RAM and which pieces are
149 * already in use from the device tree.
150 */
151unsigned long __init find_end_of_memory(void)
152{
153 unsigned long a, total;
154 struct mem_pieces phys_mem;
155
156 /*
157 * Find out where physical memory is, and check that it
158 * starts at 0 and is contiguous. It seems that RAM is
159 * always physically contiguous on Power Macintoshes.
160 *
161 * Supporting discontiguous physical memory isn't hard,
162 * it just makes the virtual <-> physical mapping functions
163 * more complicated (or else you end up wasting space
164 * in mem_map).
165 */
166 memory_node = find_devices("memory");
167 if (memory_node == NULL || !get_mem_prop("reg", &phys_mem)
168 || phys_mem.n_regions == 0)
169 panic("No RAM??");
170 a = phys_mem.regions[0].address;
171 if (a != 0)
172 panic("RAM doesn't start at physical address 0");
173 total = phys_mem.regions[0].size;
174
175 if (phys_mem.n_regions > 1) {
176 printk("RAM starting at 0x%x is not contiguous\n",
177 phys_mem.regions[1].address);
178 printk("Using RAM from 0 to 0x%lx\n", total-1);
179 }
180
181 return total;
182}
183
184/*
185 * Check for command-line options that affect what MMU_init will do.
186 */
187void MMU_setup(void)
188{
189 /* Check for nobats option (used in mapin_ram). */
190 if (strstr(cmd_line, "nobats")) {
191 __map_without_bats = 1;
192 }
193
194 if (strstr(cmd_line, "noltlbs")) {
195 __map_without_ltlbs = 1;
196 }
197
198 /* Look for mem= option on command line */
199 if (strstr(cmd_line, "mem=")) {
200 char *p, *q;
201 unsigned long maxmem = 0;
202
203 for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
204 q = p + 4;
205 if (p > cmd_line && p[-1] != ' ')
206 continue;
207 maxmem = simple_strtoul(q, &q, 0);
208 if (*q == 'k' || *q == 'K') {
209 maxmem <<= 10;
210 ++q;
211 } else if (*q == 'm' || *q == 'M') {
212 maxmem <<= 20;
213 ++q;
214 }
215 }
216 __max_memory = maxmem;
217 }
218}
219
220/*
221 * MMU_init sets up the basic memory mappings for the kernel,
222 * including both RAM and possibly some I/O regions,
223 * and sets up the page tables and the MMU hardware ready to go.
224 */
225void __init MMU_init(void)
226{
227 if (ppc_md.progress)
228 ppc_md.progress("MMU:enter", 0x111);
229
230 /* parse args from command line */
231 MMU_setup();
232
233 /*
234 * Figure out how much memory we have, how much
235 * is lowmem, and how much is highmem. If we were
236 * passed the total memory size from the bootloader,
237 * just use it.
238 */
239 if (boot_mem_size)
240 total_memory = boot_mem_size;
241 else
242 total_memory = ppc_md.find_end_of_memory();
243
244 if (__max_memory && total_memory > __max_memory)
245 total_memory = __max_memory;
246 total_lowmem = total_memory;
247#ifdef CONFIG_FSL_BOOKE
248 /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
249 * entries, so we need to adjust lowmem to match the amount we can map
250 * in the fixed entries */
251 adjust_total_lowmem();
252#endif /* CONFIG_FSL_BOOKE */
253 if (total_lowmem > __max_low_memory) {
254 total_lowmem = __max_low_memory;
255#ifndef CONFIG_HIGHMEM
256 total_memory = total_lowmem;
257#endif /* CONFIG_HIGHMEM */
258 }
259 set_phys_avail(total_lowmem);
260
261 /* Initialize the MMU hardware */
262 if (ppc_md.progress)
263 ppc_md.progress("MMU:hw init", 0x300);
264 MMU_init_hw();
265
266 /* Map in all of RAM starting at KERNELBASE */
267 if (ppc_md.progress)
268 ppc_md.progress("MMU:mapin", 0x301);
269 mapin_ram();
270
271#ifdef CONFIG_HIGHMEM
272 ioremap_base = PKMAP_BASE;
273#else
274 ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */
275#endif /* CONFIG_HIGHMEM */
276 ioremap_bot = ioremap_base;
277
278 /* Map in I/O resources */
279 if (ppc_md.progress)
280 ppc_md.progress("MMU:setio", 0x302);
281 if (ppc_md.setup_io_mappings)
282 ppc_md.setup_io_mappings();
283
284 /* Initialize the context management stuff */
285 mmu_context_init();
286
287 if (ppc_md.progress)
288 ppc_md.progress("MMU:exit", 0x211);
289
290#ifdef CONFIG_BOOTX_TEXT
291 /* By default, we are no longer mapped */
292 boot_text_mapped = 0;
293 /* Must be done last, or ppc_md.progress will die. */
294 map_boot_text();
295#endif
296}
297
298/* This is only called until mem_init is done. */
299void __init *early_get_page(void)
300{
301 void *p;
302
303 if (init_bootmem_done) {
304 p = alloc_bootmem_pages(PAGE_SIZE);
305 } else {
306 p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
307 }
308 return p;
309}
310
311/* Free up now-unused memory */
312static void free_sec(unsigned long start, unsigned long end, const char *name)
313{
314 unsigned long cnt = 0;
315
316 while (start < end) {
317 ClearPageReserved(virt_to_page(start));
318 set_page_count(virt_to_page(start), 1);
319 free_page(start);
320 cnt++;
321 start += PAGE_SIZE;
322 }
323 if (cnt) {
324 printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
325 totalram_pages += cnt;
326 }
327}
328
329void free_initmem(void)
330{
331#define FREESEC(TYPE) \
332 free_sec((unsigned long)(&__ ## TYPE ## _begin), \
333 (unsigned long)(&__ ## TYPE ## _end), \
334 #TYPE);
335
336 printk ("Freeing unused kernel memory:");
337 FREESEC(init);
338 printk("\n");
339 ppc_md.progress = NULL;
340#undef FREESEC
341}
342
343#ifdef CONFIG_BLK_DEV_INITRD
344void free_initrd_mem(unsigned long start, unsigned long end)
345{
346 if (start < end)
347 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
348 for (; start < end; start += PAGE_SIZE) {
349 ClearPageReserved(virt_to_page(start));
350 set_page_count(virt_to_page(start), 1);
351 free_page(start);
352 totalram_pages++;
353 }
354}
355#endif
356
357/*
358 * Initialize the bootmem system and give it all the memory we
359 * have available.
360 */
361void __init do_init_bootmem(void)
362{
363 unsigned long start, size;
364 int i;
365
366 /*
367 * Find an area to use for the bootmem bitmap.
368 * We look for the first area which is at least
369 * 128kB in length (128kB is enough for a bitmap
370 * for 4GB of memory, using 4kB pages), plus 1 page
371 * (in case the address isn't page-aligned).
372 */
373 start = 0;
374 size = 0;
375 for (i = 0; i < phys_avail.n_regions; ++i) {
376 unsigned long a = phys_avail.regions[i].address;
377 unsigned long s = phys_avail.regions[i].size;
378 if (s <= size)
379 continue;
380 start = a;
381 size = s;
382 if (s >= 33 * PAGE_SIZE)
383 break;
384 }
385 start = PAGE_ALIGN(start);
386
387 min_low_pfn = start >> PAGE_SHIFT;
388 max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
389 max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
390 boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
391 PPC_MEMSTART >> PAGE_SHIFT,
392 max_low_pfn);
393
394 /* remove the bootmem bitmap from the available memory */
395 mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
396
397 /* add everything in phys_avail into the bootmem map */
398 for (i = 0; i < phys_avail.n_regions; ++i)
399 free_bootmem(phys_avail.regions[i].address,
400 phys_avail.regions[i].size);
401
402 init_bootmem_done = 1;
403}
404
405/*
406 * paging_init() sets up the page tables - in fact we've already done this.
407 */
408void __init paging_init(void)
409{
410 unsigned long zones_size[MAX_NR_ZONES], i;
411
412#ifdef CONFIG_HIGHMEM
413 map_page(PKMAP_BASE, 0, 0); /* XXX gross */
414 pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
415 (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
416 map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */
417 kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
418 (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
419 kmap_prot = PAGE_KERNEL;
420#endif /* CONFIG_HIGHMEM */
421
422 /*
423 * All pages are DMA-able so we put them all in the DMA zone.
424 */
425 zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
426 for (i = 1; i < MAX_NR_ZONES; i++)
427 zones_size[i] = 0;
428
429#ifdef CONFIG_HIGHMEM
430 zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
431#endif /* CONFIG_HIGHMEM */
432
433 free_area_init(zones_size);
434}
435
436void __init mem_init(void)
437{
438 unsigned long addr;
439 int codepages = 0;
440 int datapages = 0;
441 int initpages = 0;
442#ifdef CONFIG_HIGHMEM
443 unsigned long highmem_mapnr;
444
445 highmem_mapnr = total_lowmem >> PAGE_SHIFT;
446#endif /* CONFIG_HIGHMEM */
447 max_mapnr = total_memory >> PAGE_SHIFT;
448
449 high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
450 num_physpages = max_mapnr; /* RAM is assumed contiguous */
451
452 totalram_pages += free_all_bootmem();
453
454#ifdef CONFIG_BLK_DEV_INITRD
455 /* if we are booted from BootX with an initial ramdisk,
456 make sure the ramdisk pages aren't reserved. */
457 if (initrd_start) {
458 for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
459 ClearPageReserved(virt_to_page(addr));
460 }
461#endif /* CONFIG_BLK_DEV_INITRD */
462
463#ifdef CONFIG_PPC_OF
464 /* mark the RTAS pages as reserved */
465 if ( rtas_data )
466 for (addr = (ulong)__va(rtas_data);
467 addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
468 addr += PAGE_SIZE)
469 SetPageReserved(virt_to_page(addr));
470#endif
471#ifdef CONFIG_PPC_PMAC
472 if (agp_special_page)
473 SetPageReserved(virt_to_page(agp_special_page));
474#endif
475 for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
476 addr += PAGE_SIZE) {
477 if (!PageReserved(virt_to_page(addr)))
478 continue;
479 if (addr < (ulong) etext)
480 codepages++;
481 else if (addr >= (unsigned long)&__init_begin
482 && addr < (unsigned long)&__init_end)
483 initpages++;
484 else if (addr < (ulong) klimit)
485 datapages++;
486 }
487
488#ifdef CONFIG_HIGHMEM
489 {
490 unsigned long pfn;
491
492 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
493 struct page *page = mem_map + pfn;
494
495 ClearPageReserved(page);
496 set_page_count(page, 1);
497 __free_page(page);
498 totalhigh_pages++;
499 }
500 totalram_pages += totalhigh_pages;
501 }
502#endif /* CONFIG_HIGHMEM */
503
504 printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
505 (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
506 codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
507 initpages<< (PAGE_SHIFT-10),
508 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
509
510#ifdef CONFIG_PPC_PMAC
511 if (agp_special_page)
512 printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
513#endif
514
515 mem_init_done = 1;
516}
517
518/*
519 * Set phys_avail to the amount of physical memory,
520 * less the kernel text/data/bss.
521 */
522void __init
523set_phys_avail(unsigned long total_memory)
524{
525 unsigned long kstart, ksize;
526
527 /*
528 * Initially, available physical memory is equivalent to all
529 * physical memory.
530 */
531
532 phys_avail.regions[0].address = PPC_MEMSTART;
533 phys_avail.regions[0].size = total_memory;
534 phys_avail.n_regions = 1;
535
536 /*
537 * Map out the kernel text/data/bss from the available physical
538 * memory.
539 */
540
541 kstart = __pa(_stext); /* should be 0 */
542 ksize = PAGE_ALIGN(klimit - _stext);
543
544 mem_pieces_remove(&phys_avail, kstart, ksize, 0);
545 mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
546
547#if defined(CONFIG_BLK_DEV_INITRD)
548 /* Remove the init RAM disk from the available memory. */
549 if (initrd_start) {
550 mem_pieces_remove(&phys_avail, __pa(initrd_start),
551 initrd_end - initrd_start, 1);
552 }
553#endif /* CONFIG_BLK_DEV_INITRD */
554#ifdef CONFIG_PPC_OF
555 /* remove the RTAS pages from the available memory */
556 if (rtas_data)
557 mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
558#endif
559#ifdef CONFIG_PPC_PMAC
560 /* Because of some uninorth weirdness, we need a page of
561 * memory as high as possible (it must be outside of the
562 * bus address seen as the AGP aperture). It will be used
563 * by the r128 DRM driver
564 *
565 * FIXME: We need to make sure that page doesn't overlap any of the\
566 * above. This could be done by improving mem_pieces_find to be able
567 * to do a backward search from the end of the list.
568 */
569 if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
570 agp_special_page = (total_memory - PAGE_SIZE);
571 mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
572 agp_special_page = (unsigned long)__va(agp_special_page);
573 }
574#endif /* CONFIG_PPC_PMAC */
575}
576
577/* Mark some memory as reserved by removing it from phys_avail. */
578void __init reserve_phys_mem(unsigned long start, unsigned long size)
579{
580 mem_pieces_remove(&phys_avail, start, size, 1);
581}
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
new file mode 100644
index 000000000000..81f6745b31ef
--- /dev/null
+++ b/arch/powerpc/mm/init64.c
@@ -0,0 +1,385 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 *
10 * Derived from "arch/i386/mm/init.c"
11 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
12 *
13 * Dave Engebretsen <engebret@us.ibm.com>
14 * Rework for PPC64 port.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/signal.h>
25#include <linux/sched.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/string.h>
29#include <linux/types.h>
30#include <linux/mman.h>
31#include <linux/mm.h>
32#include <linux/swap.h>
33#include <linux/stddef.h>
34#include <linux/vmalloc.h>
35#include <linux/init.h>
36#include <linux/delay.h>
37#include <linux/bootmem.h>
38#include <linux/highmem.h>
39#include <linux/idr.h>
40#include <linux/nodemask.h>
41#include <linux/module.h>
42
43#include <asm/pgalloc.h>
44#include <asm/page.h>
45#include <asm/prom.h>
46#include <asm/lmb.h>
47#include <asm/rtas.h>
48#include <asm/io.h>
49#include <asm/mmu_context.h>
50#include <asm/pgtable.h>
51#include <asm/mmu.h>
52#include <asm/uaccess.h>
53#include <asm/smp.h>
54#include <asm/machdep.h>
55#include <asm/tlb.h>
56#include <asm/eeh.h>
57#include <asm/processor.h>
58#include <asm/mmzone.h>
59#include <asm/cputable.h>
60#include <asm/ppcdebug.h>
61#include <asm/sections.h>
62#include <asm/system.h>
63#include <asm/iommu.h>
64#include <asm/abs_addr.h>
65#include <asm/vdso.h>
66#include <asm/imalloc.h>
67
68#if PGTABLE_RANGE > USER_VSID_RANGE
69#warning Limited user VSID range means pagetable space is wasted
70#endif
71
72#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
73#warning TASK_SIZE is smaller than it needs to be.
74#endif
75
76int mem_init_done;
77unsigned long ioremap_bot = IMALLOC_BASE;
78static unsigned long phbs_io_bot = PHBS_IO_BASE;
79
80extern pgd_t swapper_pg_dir[];
81extern struct task_struct *current_set[NR_CPUS];
82
83unsigned long klimit = (unsigned long)_end;
84
85unsigned long _SDR1=0;
86unsigned long _ASR=0;
87
88/* max amount of RAM to use */
89unsigned long __max_memory;
90
91/* info on what we think the IO hole is */
92unsigned long io_hole_start;
93unsigned long io_hole_size;
94
95/*
96 * Do very early mm setup.
97 */
98void __init mm_init_ppc64(void)
99{
100#ifndef CONFIG_PPC_ISERIES
101 unsigned long i;
102#endif
103
104 ppc64_boot_msg(0x100, "MM Init");
105
106 /* This is the story of the IO hole... please, keep seated,
107 * unfortunately, we are out of oxygen masks at the moment.
108 * So we need some rough way to tell where your big IO hole
109 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
110 * that area as well, on POWER4 we don't have one, etc...
111 * We need that as a "hint" when sizing the TCE table on POWER3
112 * So far, the simplest way that seem work well enough for us it
113 * to just assume that the first discontinuity in our physical
114 * RAM layout is the IO hole. That may not be correct in the future
115 * (and isn't on iSeries but then we don't care ;)
116 */
117
118#ifndef CONFIG_PPC_ISERIES
119 for (i = 1; i < lmb.memory.cnt; i++) {
120 unsigned long base, prevbase, prevsize;
121
122 prevbase = lmb.memory.region[i-1].base;
123 prevsize = lmb.memory.region[i-1].size;
124 base = lmb.memory.region[i].base;
125 if (base > (prevbase + prevsize)) {
126 io_hole_start = prevbase + prevsize;
127 io_hole_size = base - (prevbase + prevsize);
128 break;
129 }
130 }
131#endif /* CONFIG_PPC_ISERIES */
132 if (io_hole_start)
133 printk("IO Hole assumed to be %lx -> %lx\n",
134 io_hole_start, io_hole_start + io_hole_size - 1);
135
136 ppc64_boot_msg(0x100, "MM Init Done");
137}
138
139void free_initmem(void)
140{
141 unsigned long addr;
142
143 addr = (unsigned long)__init_begin;
144 for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
145 memset((void *)addr, 0xcc, PAGE_SIZE);
146 ClearPageReserved(virt_to_page(addr));
147 set_page_count(virt_to_page(addr), 1);
148 free_page(addr);
149 totalram_pages++;
150 }
151 printk ("Freeing unused kernel memory: %luk freed\n",
152 ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
153}
154
155#ifdef CONFIG_BLK_DEV_INITRD
156void free_initrd_mem(unsigned long start, unsigned long end)
157{
158 if (start < end)
159 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
160 for (; start < end; start += PAGE_SIZE) {
161 ClearPageReserved(virt_to_page(start));
162 set_page_count(virt_to_page(start), 1);
163 free_page(start);
164 totalram_pages++;
165 }
166}
167#endif
168
169/*
170 * Initialize the bootmem system and give it all the memory we
171 * have available.
172 */
173#ifndef CONFIG_NEED_MULTIPLE_NODES
174void __init do_init_bootmem(void)
175{
176 unsigned long i;
177 unsigned long start, bootmap_pages;
178 unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
179 int boot_mapsize;
180
181 /*
182 * Find an area to use for the bootmem bitmap. Calculate the size of
183 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
184 * Add 1 additional page in case the address isn't page-aligned.
185 */
186 bootmap_pages = bootmem_bootmap_pages(total_pages);
187
188 start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
189 BUG_ON(!start);
190
191 boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
192
193 max_pfn = max_low_pfn;
194
195 /* Add all physical memory to the bootmem map, mark each area
196 * present.
197 */
198 for (i=0; i < lmb.memory.cnt; i++)
199 free_bootmem(lmb.memory.region[i].base,
200 lmb_size_bytes(&lmb.memory, i));
201
202 /* reserve the sections we're already using */
203 for (i=0; i < lmb.reserved.cnt; i++)
204 reserve_bootmem(lmb.reserved.region[i].base,
205 lmb_size_bytes(&lmb.reserved, i));
206
207 for (i=0; i < lmb.memory.cnt; i++)
208 memory_present(0, lmb_start_pfn(&lmb.memory, i),
209 lmb_end_pfn(&lmb.memory, i));
210}
211
212/*
213 * paging_init() sets up the page tables - in fact we've already done this.
214 */
215void __init paging_init(void)
216{
217 unsigned long zones_size[MAX_NR_ZONES];
218 unsigned long zholes_size[MAX_NR_ZONES];
219 unsigned long total_ram = lmb_phys_mem_size();
220 unsigned long top_of_ram = lmb_end_of_DRAM();
221
222 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
223 top_of_ram, total_ram);
224 printk(KERN_INFO "Memory hole size: %ldMB\n",
225 (top_of_ram - total_ram) >> 20);
226 /*
227 * All pages are DMA-able so we put them all in the DMA zone.
228 */
229 memset(zones_size, 0, sizeof(zones_size));
230 memset(zholes_size, 0, sizeof(zholes_size));
231
232 zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
233 zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
234
235 free_area_init_node(0, NODE_DATA(0), zones_size,
236 __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
237}
238#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
239
240static struct kcore_list kcore_vmem;
241
242static int __init setup_kcore(void)
243{
244 int i;
245
246 for (i=0; i < lmb.memory.cnt; i++) {
247 unsigned long base, size;
248 struct kcore_list *kcore_mem;
249
250 base = lmb.memory.region[i].base;
251 size = lmb.memory.region[i].size;
252
253 /* GFP_ATOMIC to avoid might_sleep warnings during boot */
254 kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
255 if (!kcore_mem)
256 panic("mem_init: kmalloc failed\n");
257
258 kclist_add(kcore_mem, __va(base), size);
259 }
260
261 kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
262
263 return 0;
264}
265module_init(setup_kcore);
266
267void __init mem_init(void)
268{
269#ifdef CONFIG_NEED_MULTIPLE_NODES
270 int nid;
271#endif
272 pg_data_t *pgdat;
273 unsigned long i;
274 struct page *page;
275 unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
276
277 num_physpages = max_low_pfn; /* RAM is assumed contiguous */
278 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
279
280#ifdef CONFIG_NEED_MULTIPLE_NODES
281 for_each_online_node(nid) {
282 if (NODE_DATA(nid)->node_spanned_pages != 0) {
283 printk("freeing bootmem node %x\n", nid);
284 totalram_pages +=
285 free_all_bootmem_node(NODE_DATA(nid));
286 }
287 }
288#else
289 max_mapnr = num_physpages;
290 totalram_pages += free_all_bootmem();
291#endif
292
293 for_each_pgdat(pgdat) {
294 for (i = 0; i < pgdat->node_spanned_pages; i++) {
295 page = pgdat_page_nr(pgdat, i);
296 if (PageReserved(page))
297 reservedpages++;
298 }
299 }
300
301 codesize = (unsigned long)&_etext - (unsigned long)&_stext;
302 initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
303 datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
304 bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
305
306 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
307 "%luk reserved, %luk data, %luk bss, %luk init)\n",
308 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
309 num_physpages << (PAGE_SHIFT-10),
310 codesize >> 10,
311 reservedpages << (PAGE_SHIFT-10),
312 datasize >> 10,
313 bsssize >> 10,
314 initsize >> 10);
315
316 mem_init_done = 1;
317
318 /* Initialize the vDSO */
319 vdso_init();
320}
321
322void __iomem * reserve_phb_iospace(unsigned long size)
323{
324 void __iomem *virt_addr;
325
326 if (phbs_io_bot >= IMALLOC_BASE)
327 panic("reserve_phb_iospace(): phb io space overflow\n");
328
329 virt_addr = (void __iomem *) phbs_io_bot;
330 phbs_io_bot += size;
331
332 return virt_addr;
333}
334
335static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
336{
337 memset(addr, 0, kmem_cache_size(cache));
338}
339
340static const int pgtable_cache_size[2] = {
341 PTE_TABLE_SIZE, PMD_TABLE_SIZE
342};
343static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
344 "pgd_pte_cache", "pud_pmd_cache",
345};
346
347kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
348
349void pgtable_cache_init(void)
350{
351 int i;
352
353 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
354 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
355 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
356 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
357
358 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
359 int size = pgtable_cache_size[i];
360 const char *name = pgtable_cache_name[i];
361
362 pgtable_cache[i] = kmem_cache_create(name,
363 size, size,
364 SLAB_HWCACHE_ALIGN
365 | SLAB_MUST_HWCACHE_ALIGN,
366 zero_ctor,
367 NULL);
368 if (! pgtable_cache[i])
369 panic("pgtable_cache_init(): could not create %s!\n",
370 name);
371 }
372}
373
374pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
375 unsigned long size, pgprot_t vma_prot)
376{
377 if (ppc_md.phys_mem_access_prot)
378 return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
379
380 if (!page_is_ram(addr >> PAGE_SHIFT))
381 vma_prot = __pgprot(pgprot_val(vma_prot)
382 | _PAGE_GUARDED | _PAGE_NO_CACHE);
383 return vma_prot;
384}
385EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
new file mode 100644
index 000000000000..345db08e5d20
--- /dev/null
+++ b/arch/powerpc/mm/mem.c
@@ -0,0 +1,299 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
10 *
11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/errno.h>
26#include <linux/string.h>
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/stddef.h>
30#include <linux/init.h>
31#include <linux/bootmem.h>
32#include <linux/highmem.h>
33#include <linux/initrd.h>
34#include <linux/pagemap.h>
35
36#include <asm/pgalloc.h>
37#include <asm/prom.h>
38#include <asm/io.h>
39#include <asm/mmu_context.h>
40#include <asm/pgtable.h>
41#include <asm/mmu.h>
42#include <asm/smp.h>
43#include <asm/machdep.h>
44#include <asm/btext.h>
45#include <asm/tlb.h>
46#include <asm/bootinfo.h>
47#include <asm/prom.h>
48
49#include "mem_pieces.h"
50#include "mmu_decl.h"
51
52#ifndef CPU_FTR_COHERENT_ICACHE
53#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
54#define CPU_FTR_NOEXECUTE 0
55#endif
56
57/*
58 * This is called by /dev/mem to know if a given address has to
59 * be mapped non-cacheable or not
60 */
61int page_is_ram(unsigned long pfn)
62{
63 unsigned long paddr = (pfn << PAGE_SHIFT);
64
65#ifndef CONFIG_PPC64 /* XXX for now */
66 return paddr < __pa(high_memory);
67#else
68 int i;
69 for (i=0; i < lmb.memory.cnt; i++) {
70 unsigned long base;
71
72 base = lmb.memory.region[i].base;
73
74 if ((paddr >= base) &&
75 (paddr < (base + lmb.memory.region[i].size))) {
76 return 1;
77 }
78 }
79
80 return 0;
81#endif
82}
83EXPORT_SYMBOL(page_is_ram);
84
85pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
86 unsigned long size, pgprot_t vma_prot)
87{
88 if (ppc_md.phys_mem_access_prot)
89 return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
90
91 if (!page_is_ram(addr >> PAGE_SHIFT))
92 vma_prot = __pgprot(pgprot_val(vma_prot)
93 | _PAGE_GUARDED | _PAGE_NO_CACHE);
94 return vma_prot;
95}
96EXPORT_SYMBOL(phys_mem_access_prot);
97
98void show_mem(void)
99{
100 unsigned long total = 0, reserved = 0;
101 unsigned long shared = 0, cached = 0;
102 unsigned long highmem = 0;
103 struct page *page;
104 pg_data_t *pgdat;
105 unsigned long i;
106
107 printk("Mem-info:\n");
108 show_free_areas();
109 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
110 for_each_pgdat(pgdat) {
111 for (i = 0; i < pgdat->node_spanned_pages; i++) {
112 page = pgdat_page_nr(pgdat, i);
113 total++;
114 if (PageHighMem(page))
115 highmem++;
116 if (PageReserved(page))
117 reserved++;
118 else if (PageSwapCache(page))
119 cached++;
120 else if (page_count(page))
121 shared += page_count(page) - 1;
122 }
123 }
124 printk("%ld pages of RAM\n", total);
125#ifdef CONFIG_HIGHMEM
126 printk("%ld pages of HIGHMEM\n", highmem);
127#endif
128 printk("%ld reserved pages\n", reserved);
129 printk("%ld pages shared\n", shared);
130 printk("%ld pages swap cached\n", cached);
131}
132
133/*
134 * This is called when a page has been modified by the kernel.
135 * It just marks the page as not i-cache clean. We do the i-cache
136 * flush later when the page is given to a user process, if necessary.
137 */
138void flush_dcache_page(struct page *page)
139{
140 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
141 return;
142 /* avoid an atomic op if possible */
143 if (test_bit(PG_arch_1, &page->flags))
144 clear_bit(PG_arch_1, &page->flags);
145}
146EXPORT_SYMBOL(flush_dcache_page);
147
148void flush_dcache_icache_page(struct page *page)
149{
150#ifdef CONFIG_BOOKE
151 void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
152 __flush_dcache_icache(start);
153 kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
154#elif defined(CONFIG_8xx)
155 /* On 8xx there is no need to kmap since highmem is not supported */
156 __flush_dcache_icache(page_address(page));
157#else
158 __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
159#endif
160
161}
162void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
163{
164 clear_page(page);
165
166 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
167 return;
168 /*
169 * We shouldnt have to do this, but some versions of glibc
170 * require it (ld.so assumes zero filled pages are icache clean)
171 * - Anton
172 */
173
174 /* avoid an atomic op if possible */
175 if (test_bit(PG_arch_1, &pg->flags))
176 clear_bit(PG_arch_1, &pg->flags);
177}
178EXPORT_SYMBOL(clear_user_page);
179
180void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
181 struct page *pg)
182{
183 copy_page(vto, vfrom);
184
185 /*
186 * We should be able to use the following optimisation, however
187 * there are two problems.
188 * Firstly a bug in some versions of binutils meant PLT sections
189 * were not marked executable.
190 * Secondly the first word in the GOT section is blrl, used
191 * to establish the GOT address. Until recently the GOT was
192 * not marked executable.
193 * - Anton
194 */
195#if 0
196 if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
197 return;
198#endif
199
200 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
201 return;
202
203 /* avoid an atomic op if possible */
204 if (test_bit(PG_arch_1, &pg->flags))
205 clear_bit(PG_arch_1, &pg->flags);
206}
207
208void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
209 unsigned long addr, int len)
210{
211 unsigned long maddr;
212
213 maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
214 flush_icache_range(maddr, maddr + len);
215 kunmap(page);
216}
217EXPORT_SYMBOL(flush_icache_user_range);
218
219/*
220 * This is called at the end of handling a user page fault, when the
221 * fault has been handled by updating a PTE in the linux page tables.
222 * We use it to preload an HPTE into the hash table corresponding to
223 * the updated linux PTE.
224 *
225 * This must always be called with the mm->page_table_lock held
226 */
227void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
228 pte_t pte)
229{
230 /* handle i-cache coherency */
231 unsigned long pfn = pte_pfn(pte);
232#ifdef CONFIG_PPC32
233 pmd_t *pmd;
234#else
235 unsigned long vsid;
236 void *pgdir;
237 pte_t *ptep;
238 int local = 0;
239 cpumask_t tmp;
240 unsigned long flags;
241#endif
242
243 /* handle i-cache coherency */
244 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
245 !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
246 pfn_valid(pfn)) {
247 struct page *page = pfn_to_page(pfn);
248 if (!PageReserved(page)
249 && !test_bit(PG_arch_1, &page->flags)) {
250 if (vma->vm_mm == current->active_mm) {
251#ifdef CONFIG_8xx
252 /* On 8xx, cache control instructions (particularly
253 * "dcbst" from flush_dcache_icache) fault as write
254 * operation if there is an unpopulated TLB entry
255 * for the address in question. To workaround that,
256 * we invalidate the TLB here, thus avoiding dcbst
257 * misbehaviour.
258 */
259 _tlbie(address);
260#endif
261 __flush_dcache_icache((void *) address);
262 } else
263 flush_dcache_icache_page(page);
264 set_bit(PG_arch_1, &page->flags);
265 }
266 }
267
268#ifdef CONFIG_PPC_STD_MMU
269 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
270 if (!pte_young(pte) || address >= TASK_SIZE)
271 return;
272#ifdef CONFIG_PPC32
273 if (Hash == 0)
274 return;
275 pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
276 if (!pmd_none(*pmd))
277 add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
278#else
279 pgdir = vma->vm_mm->pgd;
280 if (pgdir == NULL)
281 return;
282
283 ptep = find_linux_pte(pgdir, ea);
284 if (!ptep)
285 return;
286
287 vsid = get_vsid(vma->vm_mm->context.id, ea);
288
289 local_irq_save(flags);
290 tmp = cpumask_of_cpu(smp_processor_id());
291 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
292 local = 1;
293
294 __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
295 0x300, local);
296 local_irq_restore(flags);
297#endif
298#endif
299}
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c
new file mode 100644
index 000000000000..ef765a84433f
--- /dev/null
+++ b/arch/powerpc/mm/mem64.c
@@ -0,0 +1,259 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 *
10 * Derived from "arch/i386/mm/init.c"
11 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
12 *
13 * Dave Engebretsen <engebret@us.ibm.com>
14 * Rework for PPC64 port.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/signal.h>
25#include <linux/sched.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/string.h>
29#include <linux/types.h>
30#include <linux/mman.h>
31#include <linux/mm.h>
32#include <linux/swap.h>
33#include <linux/stddef.h>
34#include <linux/vmalloc.h>
35#include <linux/init.h>
36#include <linux/delay.h>
37#include <linux/bootmem.h>
38#include <linux/highmem.h>
39#include <linux/idr.h>
40#include <linux/nodemask.h>
41#include <linux/module.h>
42
43#include <asm/pgalloc.h>
44#include <asm/page.h>
45#include <asm/prom.h>
46#include <asm/lmb.h>
47#include <asm/rtas.h>
48#include <asm/io.h>
49#include <asm/mmu_context.h>
50#include <asm/pgtable.h>
51#include <asm/mmu.h>
52#include <asm/uaccess.h>
53#include <asm/smp.h>
54#include <asm/machdep.h>
55#include <asm/tlb.h>
56#include <asm/eeh.h>
57#include <asm/processor.h>
58#include <asm/mmzone.h>
59#include <asm/cputable.h>
60#include <asm/ppcdebug.h>
61#include <asm/sections.h>
62#include <asm/system.h>
63#include <asm/iommu.h>
64#include <asm/abs_addr.h>
65#include <asm/vdso.h>
66#include <asm/imalloc.h>
67
68/*
69 * This is called by /dev/mem to know if a given address has to
70 * be mapped non-cacheable or not
71 */
72int page_is_ram(unsigned long pfn)
73{
74 int i;
75 unsigned long paddr = (pfn << PAGE_SHIFT);
76
77 for (i=0; i < lmb.memory.cnt; i++) {
78 unsigned long base;
79
80 base = lmb.memory.region[i].base;
81
82 if ((paddr >= base) &&
83 (paddr < (base + lmb.memory.region[i].size))) {
84 return 1;
85 }
86 }
87
88 return 0;
89}
90EXPORT_SYMBOL(page_is_ram);
91
92pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
93 unsigned long size, pgprot_t vma_prot)
94{
95 if (ppc_md.phys_mem_access_prot)
96 return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
97
98 if (!page_is_ram(addr >> PAGE_SHIFT))
99 vma_prot = __pgprot(pgprot_val(vma_prot)
100 | _PAGE_GUARDED | _PAGE_NO_CACHE);
101 return vma_prot;
102}
103EXPORT_SYMBOL(phys_mem_access_prot);
104
105void show_mem(void)
106{
107 unsigned long total = 0, reserved = 0;
108 unsigned long shared = 0, cached = 0;
109 struct page *page;
110 pg_data_t *pgdat;
111 unsigned long i;
112
113 printk("Mem-info:\n");
114 show_free_areas();
115 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
116 for_each_pgdat(pgdat) {
117 for (i = 0; i < pgdat->node_spanned_pages; i++) {
118 page = pgdat_page_nr(pgdat, i);
119 total++;
120 if (PageReserved(page))
121 reserved++;
122 else if (PageSwapCache(page))
123 cached++;
124 else if (page_count(page))
125 shared += page_count(page) - 1;
126 }
127 }
128 printk("%ld pages of RAM\n", total);
129 printk("%ld reserved pages\n", reserved);
130 printk("%ld pages shared\n", shared);
131 printk("%ld pages swap cached\n", cached);
132}
133
134/*
135 * This is called when a page has been modified by the kernel.
136 * It just marks the page as not i-cache clean. We do the i-cache
137 * flush later when the page is given to a user process, if necessary.
138 */
139void flush_dcache_page(struct page *page)
140{
141 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
142 return;
143 /* avoid an atomic op if possible */
144 if (test_bit(PG_arch_1, &page->flags))
145 clear_bit(PG_arch_1, &page->flags);
146}
147EXPORT_SYMBOL(flush_dcache_page);
148
149void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
150{
151 clear_page(page);
152
153 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
154 return;
155 /*
156 * We shouldnt have to do this, but some versions of glibc
157 * require it (ld.so assumes zero filled pages are icache clean)
158 * - Anton
159 */
160
161 /* avoid an atomic op if possible */
162 if (test_bit(PG_arch_1, &pg->flags))
163 clear_bit(PG_arch_1, &pg->flags);
164}
165EXPORT_SYMBOL(clear_user_page);
166
167void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
168 struct page *pg)
169{
170 copy_page(vto, vfrom);
171
172 /*
173 * We should be able to use the following optimisation, however
174 * there are two problems.
175 * Firstly a bug in some versions of binutils meant PLT sections
176 * were not marked executable.
177 * Secondly the first word in the GOT section is blrl, used
178 * to establish the GOT address. Until recently the GOT was
179 * not marked executable.
180 * - Anton
181 */
182#if 0
183 if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
184 return;
185#endif
186
187 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
188 return;
189
190 /* avoid an atomic op if possible */
191 if (test_bit(PG_arch_1, &pg->flags))
192 clear_bit(PG_arch_1, &pg->flags);
193}
194
195void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
196 unsigned long addr, int len)
197{
198 unsigned long maddr;
199
200 maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
201 flush_icache_range(maddr, maddr + len);
202}
203EXPORT_SYMBOL(flush_icache_user_range);
204
205/*
206 * This is called at the end of handling a user page fault, when the
207 * fault has been handled by updating a PTE in the linux page tables.
208 * We use it to preload an HPTE into the hash table corresponding to
209 * the updated linux PTE.
210 *
211 * This must always be called with the mm->page_table_lock held
212 */
213void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
214 pte_t pte)
215{
216 unsigned long vsid;
217 void *pgdir;
218 pte_t *ptep;
219 int local = 0;
220 cpumask_t tmp;
221 unsigned long flags;
222
223 /* handle i-cache coherency */
224 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
225 !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
226 unsigned long pfn = pte_pfn(pte);
227 if (pfn_valid(pfn)) {
228 struct page *page = pfn_to_page(pfn);
229 if (!PageReserved(page)
230 && !test_bit(PG_arch_1, &page->flags)) {
231 __flush_dcache_icache(page_address(page));
232 set_bit(PG_arch_1, &page->flags);
233 }
234 }
235 }
236
237 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
238 if (!pte_young(pte))
239 return;
240
241 pgdir = vma->vm_mm->pgd;
242 if (pgdir == NULL)
243 return;
244
245 ptep = find_linux_pte(pgdir, ea);
246 if (!ptep)
247 return;
248
249 vsid = get_vsid(vma->vm_mm->context.id, ea);
250
251 local_irq_save(flags);
252 tmp = cpumask_of_cpu(smp_processor_id());
253 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
254 local = 1;
255
256 __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
257 0x300, local);
258 local_irq_restore(flags);
259}
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c
new file mode 100644
index 000000000000..3d639052017e
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.c
@@ -0,0 +1,163 @@
1/*
2 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
3 * Changes to accommodate Power Macintoshes.
4 * Cort Dougan <cort@cs.nmt.edu>
5 * Rewrites.
6 * Grant Erickson <grant@lcse.umn.edu>
7 * General rework and split from mm/init.c.
8 *
9 * Module name: mem_pieces.c
10 *
11 * Description:
12 * Routines and data structures for manipulating and representing
13 * phyiscal memory extents (i.e. address/length pairs).
14 *
15 */
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/stddef.h>
20#include <linux/init.h>
21#include <asm/page.h>
22
23#include "mem_pieces.h"
24
25extern struct mem_pieces phys_avail;
26
27static void mem_pieces_print(struct mem_pieces *);
28
29/*
30 * Scan a region for a piece of a given size with the required alignment.
31 */
32void __init *
33mem_pieces_find(unsigned int size, unsigned int align)
34{
35 int i;
36 unsigned a, e;
37 struct mem_pieces *mp = &phys_avail;
38
39 for (i = 0; i < mp->n_regions; ++i) {
40 a = mp->regions[i].address;
41 e = a + mp->regions[i].size;
42 a = (a + align - 1) & -align;
43 if (a + size <= e) {
44 mem_pieces_remove(mp, a, size, 1);
45 return (void *) __va(a);
46 }
47 }
48 panic("Couldn't find %u bytes at %u alignment\n", size, align);
49
50 return NULL;
51}
52
53/*
54 * Remove some memory from an array of pieces
55 */
56void __init
57mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
58 int must_exist)
59{
60 int i, j;
61 unsigned int end, rs, re;
62 struct reg_property *rp;
63
64 end = start + size;
65 for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
66 if (end > rp->address && start < rp->address + rp->size)
67 break;
68 }
69 if (i >= mp->n_regions) {
70 if (must_exist)
71 printk("mem_pieces_remove: [%x,%x) not in any region\n",
72 start, end);
73 return;
74 }
75 for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
76 rs = rp->address;
77 re = rs + rp->size;
78 if (must_exist && (start < rs || end > re)) {
79 printk("mem_pieces_remove: bad overlap [%x,%x) with",
80 start, end);
81 mem_pieces_print(mp);
82 must_exist = 0;
83 }
84 if (start > rs) {
85 rp->size = start - rs;
86 if (end < re) {
87 /* need to split this entry */
88 if (mp->n_regions >= MEM_PIECES_MAX)
89 panic("eek... mem_pieces overflow");
90 for (j = mp->n_regions; j > i + 1; --j)
91 mp->regions[j] = mp->regions[j-1];
92 ++mp->n_regions;
93 rp[1].address = end;
94 rp[1].size = re - end;
95 }
96 } else {
97 if (end < re) {
98 rp->address = end;
99 rp->size = re - end;
100 } else {
101 /* need to delete this entry */
102 for (j = i; j < mp->n_regions - 1; ++j)
103 mp->regions[j] = mp->regions[j+1];
104 --mp->n_regions;
105 --i;
106 --rp;
107 }
108 }
109 }
110}
111
112static void __init
113mem_pieces_print(struct mem_pieces *mp)
114{
115 int i;
116
117 for (i = 0; i < mp->n_regions; ++i)
118 printk(" [%x, %x)", mp->regions[i].address,
119 mp->regions[i].address + mp->regions[i].size);
120 printk("\n");
121}
122
123void __init
124mem_pieces_sort(struct mem_pieces *mp)
125{
126 unsigned long a, s;
127 int i, j;
128
129 for (i = 1; i < mp->n_regions; ++i) {
130 a = mp->regions[i].address;
131 s = mp->regions[i].size;
132 for (j = i - 1; j >= 0; --j) {
133 if (a >= mp->regions[j].address)
134 break;
135 mp->regions[j+1] = mp->regions[j];
136 }
137 mp->regions[j+1].address = a;
138 mp->regions[j+1].size = s;
139 }
140}
141
142void __init
143mem_pieces_coalesce(struct mem_pieces *mp)
144{
145 unsigned long a, s, ns;
146 int i, j, d;
147
148 d = 0;
149 for (i = 0; i < mp->n_regions; i = j) {
150 a = mp->regions[i].address;
151 s = mp->regions[i].size;
152 for (j = i + 1; j < mp->n_regions
153 && mp->regions[j].address - a <= s; ++j) {
154 ns = mp->regions[j].address + mp->regions[j].size - a;
155 if (ns > s)
156 s = ns;
157 }
158 mp->regions[d].address = a;
159 mp->regions[d].size = s;
160 ++d;
161 }
162 mp->n_regions = d;
163}
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h
new file mode 100644
index 000000000000..e2b700dc7f18
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.h
@@ -0,0 +1,48 @@
1/*
2 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
3 * Changes to accommodate Power Macintoshes.
4 * Cort Dougan <cort@cs.nmt.edu>
5 * Rewrites.
6 * Grant Erickson <grant@lcse.umn.edu>
7 * General rework and split from mm/init.c.
8 *
9 * Module name: mem_pieces.h
10 *
11 * Description:
12 * Routines and data structures for manipulating and representing
13 * phyiscal memory extents (i.e. address/length pairs).
14 *
15 */
16
17#ifndef __MEM_PIECES_H__
18#define __MEM_PIECES_H__
19
20#include <asm/prom.h>
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26
27/* Type Definitions */
28
29#define MEM_PIECES_MAX 32
30
31struct mem_pieces {
32 int n_regions;
33 struct reg_property regions[MEM_PIECES_MAX];
34};
35
36/* Function Prototypes */
37
38extern void *mem_pieces_find(unsigned int size, unsigned int align);
39extern void mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
40 unsigned int size, int must_exist);
41extern void mem_pieces_coalesce(struct mem_pieces *mp);
42extern void mem_pieces_sort(struct mem_pieces *mp);
43
44#ifdef __cplusplus
45}
46#endif
47
48#endif /* __MEM_PIECES_H__ */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 000000000000..a8816e0f6a86
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,86 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/mm.h>
28#include <linux/init.h>
29
30#include <asm/mmu_context.h>
31#include <asm/tlbflush.h>
32
33mm_context_t next_mmu_context;
34unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
35#ifdef FEW_CONTEXTS
36atomic_t nr_free_contexts;
37struct mm_struct *context_mm[LAST_CONTEXT+1];
38void steal_context(void);
39#endif /* FEW_CONTEXTS */
40
41/*
42 * Initialize the context management stuff.
43 */
44void __init
45mmu_context_init(void)
46{
47 /*
48 * Some processors have too few contexts to reserve one for
49 * init_mm, and require using context 0 for a normal task.
50 * Other processors reserve the use of context zero for the kernel.
51 * This code assumes FIRST_CONTEXT < 32.
52 */
53 context_map[0] = (1 << FIRST_CONTEXT) - 1;
54 next_mmu_context = FIRST_CONTEXT;
55#ifdef FEW_CONTEXTS
56 atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
57#endif /* FEW_CONTEXTS */
58}
59
60#ifdef FEW_CONTEXTS
61/*
62 * Steal a context from a task that has one at the moment.
63 * This is only used on 8xx and 4xx and we presently assume that
64 * they don't do SMP. If they do then this will have to check
65 * whether the MM we steal is in use.
66 * We also assume that this is only used on systems that don't
67 * use an MMU hash table - this is true for 8xx and 4xx.
68 * This isn't an LRU system, it just frees up each context in
69 * turn (sort-of pseudo-random replacement :). This would be the
70 * place to implement an LRU scheme if anyone was motivated to do it.
71 * -- paulus
72 */
73void
74steal_context(void)
75{
76 struct mm_struct *mm;
77
78 /* free up context `next_mmu_context' */
79 /* if we shouldn't free context 0, don't... */
80 if (next_mmu_context < FIRST_CONTEXT)
81 next_mmu_context = FIRST_CONTEXT;
82 mm = context_mm[next_mmu_context];
83 flush_tlb_mm(mm);
84 destroy_context(mm);
85}
86#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c
new file mode 100644
index 000000000000..714a84dd8d5d
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context64.c
@@ -0,0 +1,63 @@
1/*
2 * MMU context allocation for 64-bit kernels.
3 *
4 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/config.h>
14#include <linux/sched.h>
15#include <linux/kernel.h>
16#include <linux/errno.h>
17#include <linux/string.h>
18#include <linux/types.h>
19#include <linux/mm.h>
20#include <linux/spinlock.h>
21#include <linux/idr.h>
22
23#include <asm/mmu_context.h>
24
25static DEFINE_SPINLOCK(mmu_context_lock);
26static DEFINE_IDR(mmu_context_idr);
27
28int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
29{
30 int index;
31 int err;
32
33again:
34 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
35 return -ENOMEM;
36
37 spin_lock(&mmu_context_lock);
38 err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
39 spin_unlock(&mmu_context_lock);
40
41 if (err == -EAGAIN)
42 goto again;
43 else if (err)
44 return err;
45
46 if (index > MAX_CONTEXT) {
47 idr_remove(&mmu_context_idr, index);
48 return -ENOMEM;
49 }
50
51 mm->context.id = index;
52
53 return 0;
54}
55
56void destroy_context(struct mm_struct *mm)
57{
58 spin_lock(&mmu_context_lock);
59 idr_remove(&mmu_context_idr, mm->context.id);
60 spin_unlock(&mmu_context_lock);
61
62 mm->context.id = NO_CONTEXT;
63}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
new file mode 100644
index 000000000000..540f3292b229
--- /dev/null
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -0,0 +1,85 @@
1/*
2 * Declarations of procedures and variables shared between files
3 * in arch/ppc/mm/.
4 *
5 * Derived from arch/ppc/mm/init.c:
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
9 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
10 * Copyright (C) 1996 Paul Mackerras
11 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 *
13 * Derived from "arch/i386/mm/init.c"
14 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22#include <asm/tlbflush.h>
23#include <asm/mmu.h>
24
25extern void mapin_ram(void);
26extern int map_page(unsigned long va, phys_addr_t pa, int flags);
27extern void setbat(int index, unsigned long virt, unsigned long phys,
28 unsigned int size, int flags);
29extern void reserve_phys_mem(unsigned long start, unsigned long size);
30extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
31 unsigned int size, int flags, unsigned int pid);
32extern void invalidate_tlbcam_entry(int index);
33
34extern int __map_without_bats;
35extern unsigned long ioremap_base;
36extern unsigned long ioremap_bot;
37extern unsigned int rtas_data, rtas_size;
38
39extern unsigned long total_memory;
40extern unsigned long total_lowmem;
41extern int mem_init_done;
42
43extern PTE *Hash, *Hash_end;
44extern unsigned long Hash_size, Hash_mask;
45
46extern unsigned int num_tlbcam_entries;
47
48/* ...and now those things that may be slightly different between processor
49 * architectures. -- Dan
50 */
51#if defined(CONFIG_8xx)
52#define flush_HPTE(X, va, pg) _tlbie(va)
53#define MMU_init_hw() do { } while(0)
54#define mmu_mapin_ram() (0UL)
55
56#elif defined(CONFIG_4xx)
57#define flush_HPTE(X, va, pg) _tlbie(va)
58extern void MMU_init_hw(void);
59extern unsigned long mmu_mapin_ram(void);
60
61#elif defined(CONFIG_FSL_BOOKE)
62#define flush_HPTE(X, va, pg) _tlbie(va)
63extern void MMU_init_hw(void);
64extern unsigned long mmu_mapin_ram(void);
65extern void adjust_total_lowmem(void);
66
67#else
68/* anything except 4xx or 8xx */
69extern void MMU_init_hw(void);
70extern unsigned long mmu_mapin_ram(void);
71
72/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
73 * which includes all new 82xx processors. We need tlbie/tlbsync here
74 * in that case (I think). -- Dan.
75 */
76static inline void flush_HPTE(unsigned context, unsigned long va,
77 unsigned long pdval)
78{
79 if ((Hash != 0) &&
80 cpu_has_feature(CPU_FTR_HPTE_TABLE))
81 flush_hash_pages(0, va, pdval, 1);
82 else
83 _tlbie(va);
84}
85#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 000000000000..81a3d7446d37
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,470 @@
1/*
2 * This file contains the routines setting up the linux page tables.
3 * -- paulus
4 *
5 * Derived from arch/ppc/mm/init.c:
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
9 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
10 * Copyright (C) 1996 Paul Mackerras
11 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 *
13 * Derived from "arch/i386/mm/init.c"
14 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/types.h>
27#include <linux/mm.h>
28#include <linux/vmalloc.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31
32#include <asm/pgtable.h>
33#include <asm/pgalloc.h>
34#include <asm/io.h>
35
36#include "mmu_decl.h"
37
38unsigned long ioremap_base;
39unsigned long ioremap_bot;
40int io_bat_index;
41
42#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
43#define HAVE_BATS 1
44#endif
45
46#if defined(CONFIG_FSL_BOOKE)
47#define HAVE_TLBCAM 1
48#endif
49
50extern char etext[], _stext[];
51
52#ifdef CONFIG_SMP
53extern void hash_page_sync(void);
54#endif
55
56#ifdef HAVE_BATS
57extern unsigned long v_mapped_by_bats(unsigned long va);
58extern unsigned long p_mapped_by_bats(unsigned long pa);
59void setbat(int index, unsigned long virt, unsigned long phys,
60 unsigned int size, int flags);
61
62#else /* !HAVE_BATS */
63#define v_mapped_by_bats(x) (0UL)
64#define p_mapped_by_bats(x) (0UL)
65#endif /* HAVE_BATS */
66
67#ifdef HAVE_TLBCAM
68extern unsigned int tlbcam_index;
69extern unsigned long v_mapped_by_tlbcam(unsigned long va);
70extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
71#else /* !HAVE_TLBCAM */
72#define v_mapped_by_tlbcam(x) (0UL)
73#define p_mapped_by_tlbcam(x) (0UL)
74#endif /* HAVE_TLBCAM */
75
76#ifdef CONFIG_PTE_64BIT
77/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
78#define PGDIR_ORDER 1
79#else
80#define PGDIR_ORDER 0
81#endif
82
83pgd_t *pgd_alloc(struct mm_struct *mm)
84{
85 pgd_t *ret;
86
87 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
88 return ret;
89}
90
91void pgd_free(pgd_t *pgd)
92{
93 free_pages((unsigned long)pgd, PGDIR_ORDER);
94}
95
96pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
97{
98 pte_t *pte;
99 extern int mem_init_done;
100 extern void *early_get_page(void);
101
102 if (mem_init_done) {
103 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
104 } else {
105 pte = (pte_t *)early_get_page();
106 if (pte)
107 clear_page(pte);
108 }
109 return pte;
110}
111
112struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
113{
114 struct page *ptepage;
115
116#ifdef CONFIG_HIGHPTE
117 int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
118#else
119 int flags = GFP_KERNEL | __GFP_REPEAT;
120#endif
121
122 ptepage = alloc_pages(flags, 0);
123 if (ptepage)
124 clear_highpage(ptepage);
125 return ptepage;
126}
127
128void pte_free_kernel(pte_t *pte)
129{
130#ifdef CONFIG_SMP
131 hash_page_sync();
132#endif
133 free_page((unsigned long)pte);
134}
135
136void pte_free(struct page *ptepage)
137{
138#ifdef CONFIG_SMP
139 hash_page_sync();
140#endif
141 __free_page(ptepage);
142}
143
144#ifndef CONFIG_PHYS_64BIT
145void __iomem *
146ioremap(phys_addr_t addr, unsigned long size)
147{
148 return __ioremap(addr, size, _PAGE_NO_CACHE);
149}
150#else /* CONFIG_PHYS_64BIT */
151void __iomem *
152ioremap64(unsigned long long addr, unsigned long size)
153{
154 return __ioremap(addr, size, _PAGE_NO_CACHE);
155}
156
157void __iomem *
158ioremap(phys_addr_t addr, unsigned long size)
159{
160 phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
161
162 return ioremap64(addr64, size);
163}
164#endif /* CONFIG_PHYS_64BIT */
165
166void __iomem *
167__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
168{
169 unsigned long v, i;
170 phys_addr_t p;
171 int err;
172
173 /*
174 * Choose an address to map it to.
175 * Once the vmalloc system is running, we use it.
176 * Before then, we use space going down from ioremap_base
177 * (ioremap_bot records where we're up to).
178 */
179 p = addr & PAGE_MASK;
180 size = PAGE_ALIGN(addr + size) - p;
181
182 /*
183 * If the address lies within the first 16 MB, assume it's in ISA
184 * memory space
185 */
186 if (p < 16*1024*1024)
187 p += _ISA_MEM_BASE;
188
189 /*
190 * Don't allow anybody to remap normal RAM that we're using.
191 * mem_init() sets high_memory so only do the check after that.
192 */
193 if ( mem_init_done && (p < virt_to_phys(high_memory)) )
194 {
195 printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
196 __builtin_return_address(0));
197 return NULL;
198 }
199
200 if (size == 0)
201 return NULL;
202
203 /*
204 * Is it already mapped? Perhaps overlapped by a previous
205 * BAT mapping. If the whole area is mapped then we're done,
206 * otherwise remap it since we want to keep the virt addrs for
207 * each request contiguous.
208 *
209 * We make the assumption here that if the bottom and top
210 * of the range we want are mapped then it's mapped to the
211 * same virt address (and this is contiguous).
212 * -- Cort
213 */
214 if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
215 goto out;
216
217 if ((v = p_mapped_by_tlbcam(p)))
218 goto out;
219
220 if (mem_init_done) {
221 struct vm_struct *area;
222 area = get_vm_area(size, VM_IOREMAP);
223 if (area == 0)
224 return NULL;
225 v = (unsigned long) area->addr;
226 } else {
227 v = (ioremap_bot -= size);
228 }
229
230 if ((flags & _PAGE_PRESENT) == 0)
231 flags |= _PAGE_KERNEL;
232 if (flags & _PAGE_NO_CACHE)
233 flags |= _PAGE_GUARDED;
234
235 /*
236 * Should check if it is a candidate for a BAT mapping
237 */
238
239 err = 0;
240 for (i = 0; i < size && err == 0; i += PAGE_SIZE)
241 err = map_page(v+i, p+i, flags);
242 if (err) {
243 if (mem_init_done)
244 vunmap((void *)v);
245 return NULL;
246 }
247
248out:
249 return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
250}
251
252void iounmap(volatile void __iomem *addr)
253{
254 /*
255 * If mapped by BATs then there is nothing to do.
256 * Calling vfree() generates a benign warning.
257 */
258 if (v_mapped_by_bats((unsigned long)addr)) return;
259
260 if (addr > high_memory && (unsigned long) addr < ioremap_bot)
261 vunmap((void *) (PAGE_MASK & (unsigned long)addr));
262}
263
264void __iomem *ioport_map(unsigned long port, unsigned int len)
265{
266 return (void __iomem *) (port + _IO_BASE);
267}
268
269void ioport_unmap(void __iomem *addr)
270{
271 /* Nothing to do */
272}
273EXPORT_SYMBOL(ioport_map);
274EXPORT_SYMBOL(ioport_unmap);
275
276int
277map_page(unsigned long va, phys_addr_t pa, int flags)
278{
279 pmd_t *pd;
280 pte_t *pg;
281 int err = -ENOMEM;
282
283 spin_lock(&init_mm.page_table_lock);
284 /* Use upper 10 bits of VA to index the first level map */
285 pd = pmd_offset(pgd_offset_k(va), va);
286 /* Use middle 10 bits of VA to index the second-level map */
287 pg = pte_alloc_kernel(&init_mm, pd, va);
288 if (pg != 0) {
289 err = 0;
290 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
291 if (mem_init_done)
292 flush_HPTE(0, va, pmd_val(*pd));
293 }
294 spin_unlock(&init_mm.page_table_lock);
295 return err;
296}
297
298/*
299 * Map in all of physical memory starting at KERNELBASE.
300 */
301void __init mapin_ram(void)
302{
303 unsigned long v, p, s, f;
304
305 s = mmu_mapin_ram();
306 v = KERNELBASE + s;
307 p = PPC_MEMSTART + s;
308 for (; s < total_lowmem; s += PAGE_SIZE) {
309 if ((char *) v >= _stext && (char *) v < etext)
310 f = _PAGE_RAM_TEXT;
311 else
312 f = _PAGE_RAM;
313 map_page(v, p, f);
314 v += PAGE_SIZE;
315 p += PAGE_SIZE;
316 }
317}
318
319/* is x a power of 2? */
320#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0))
321
322/* is x a power of 4? */
323#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
324
325/*
326 * Set up a mapping for a block of I/O.
327 * virt, phys, size must all be page-aligned.
328 * This should only be called before ioremap is called.
329 */
330void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
331 unsigned int size, int flags)
332{
333 int i;
334
335 if (virt > KERNELBASE && virt < ioremap_bot)
336 ioremap_bot = ioremap_base = virt;
337
338#ifdef HAVE_BATS
339 /*
340 * Use a BAT for this if possible...
341 */
342 if (io_bat_index < 2 && is_power_of_2(size)
343 && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
344 setbat(io_bat_index, virt, phys, size, flags);
345 ++io_bat_index;
346 return;
347 }
348#endif /* HAVE_BATS */
349
350#ifdef HAVE_TLBCAM
351 /*
352 * Use a CAM for this if possible...
353 */
354 if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
355 && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
356 settlbcam(tlbcam_index, virt, phys, size, flags, 0);
357 ++tlbcam_index;
358 return;
359 }
360#endif /* HAVE_TLBCAM */
361
362 /* No BATs available, put it in the page tables. */
363 for (i = 0; i < size; i += PAGE_SIZE)
364 map_page(virt + i, phys + i, flags);
365}
366
367/* Scan the real Linux page tables and return a PTE pointer for
368 * a virtual address in a context.
369 * Returns true (1) if PTE was found, zero otherwise. The pointer to
370 * the PTE pointer is unmodified if PTE is not found.
371 */
372int
373get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
374{
375 pgd_t *pgd;
376 pmd_t *pmd;
377 pte_t *pte;
378 int retval = 0;
379
380 pgd = pgd_offset(mm, addr & PAGE_MASK);
381 if (pgd) {
382 pmd = pmd_offset(pgd, addr & PAGE_MASK);
383 if (pmd_present(*pmd)) {
384 pte = pte_offset_map(pmd, addr & PAGE_MASK);
385 if (pte) {
386 retval = 1;
387 *ptep = pte;
388 /* XXX caller needs to do pte_unmap, yuck */
389 }
390 }
391 }
392 return(retval);
393}
394
395/* Find physical address for this virtual address. Normally used by
396 * I/O functions, but anyone can call it.
397 */
398unsigned long iopa(unsigned long addr)
399{
400 unsigned long pa;
401
402 /* I don't know why this won't work on PMacs or CHRP. It
403 * appears there is some bug, or there is some implicit
404 * mapping done not properly represented by BATs or in page
405 * tables.......I am actively working on resolving this, but
406 * can't hold up other stuff. -- Dan
407 */
408 pte_t *pte;
409 struct mm_struct *mm;
410
411 /* Check the BATs */
412 pa = v_mapped_by_bats(addr);
413 if (pa)
414 return pa;
415
416 /* Allow mapping of user addresses (within the thread)
417 * for DMA if necessary.
418 */
419 if (addr < TASK_SIZE)
420 mm = current->mm;
421 else
422 mm = &init_mm;
423
424 pa = 0;
425 if (get_pteptr(mm, addr, &pte)) {
426 pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
427 pte_unmap(pte);
428 }
429
430 return(pa);
431}
432
433/* This is will find the virtual address for a physical one....
434 * Swiped from APUS, could be dangerous :-).
435 * This is only a placeholder until I really find a way to make this
436 * work. -- Dan
437 */
438unsigned long
439mm_ptov (unsigned long paddr)
440{
441 unsigned long ret;
442#if 0
443 if (paddr < 16*1024*1024)
444 ret = ZTWO_VADDR(paddr);
445 else {
446 int i;
447
448 for (i = 0; i < kmap_chunk_count;){
449 unsigned long phys = kmap_chunks[i++];
450 unsigned long size = kmap_chunks[i++];
451 unsigned long virt = kmap_chunks[i++];
452 if (paddr >= phys
453 && paddr < (phys + size)){
454 ret = virt + paddr - phys;
455 goto exit;
456 }
457 }
458
459 ret = (unsigned long) __va(paddr);
460 }
461exit:
462#ifdef DEBUGPV
463 printk ("PTOV(%lx)=%lx\n", paddr, ret);
464#endif
465#else
466 ret = (unsigned long)paddr + KERNELBASE;
467#endif
468 return ret;
469}
470
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c
new file mode 100644
index 000000000000..724f97e5dee5
--- /dev/null
+++ b/arch/powerpc/mm/pgtable64.c
@@ -0,0 +1,357 @@
1/*
2 * This file contains ioremap and related functions for 64-bit machines.
3 *
4 * Derived from arch/ppc64/mm/init.c
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
8 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Paul Mackerras
10 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
11 *
12 * Derived from "arch/i386/mm/init.c"
13 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
14 *
15 * Dave Engebretsen <engebret@us.ibm.com>
16 * Rework for PPC64 port.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 *
23 */
24
25#include <linux/config.h>
26#include <linux/signal.h>
27#include <linux/sched.h>
28#include <linux/kernel.h>
29#include <linux/errno.h>
30#include <linux/string.h>
31#include <linux/types.h>
32#include <linux/mman.h>
33#include <linux/mm.h>
34#include <linux/swap.h>
35#include <linux/stddef.h>
36#include <linux/vmalloc.h>
37#include <linux/init.h>
38#include <linux/delay.h>
39#include <linux/bootmem.h>
40#include <linux/highmem.h>
41#include <linux/idr.h>
42#include <linux/nodemask.h>
43#include <linux/module.h>
44
45#include <asm/pgalloc.h>
46#include <asm/page.h>
47#include <asm/prom.h>
48#include <asm/lmb.h>
49#include <asm/rtas.h>
50#include <asm/io.h>
51#include <asm/mmu_context.h>
52#include <asm/pgtable.h>
53#include <asm/mmu.h>
54#include <asm/uaccess.h>
55#include <asm/smp.h>
56#include <asm/machdep.h>
57#include <asm/tlb.h>
58#include <asm/eeh.h>
59#include <asm/processor.h>
60#include <asm/mmzone.h>
61#include <asm/cputable.h>
62#include <asm/ppcdebug.h>
63#include <asm/sections.h>
64#include <asm/system.h>
65#include <asm/iommu.h>
66#include <asm/abs_addr.h>
67#include <asm/vdso.h>
68#include <asm/imalloc.h>
69
70#if PGTABLE_RANGE > USER_VSID_RANGE
71#warning Limited user VSID range means pagetable space is wasted
72#endif
73
74#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
75#warning TASK_SIZE is smaller than it needs to be.
76#endif
77
78int mem_init_done;
79unsigned long ioremap_bot = IMALLOC_BASE;
80static unsigned long phbs_io_bot = PHBS_IO_BASE;
81
82extern pgd_t swapper_pg_dir[];
83extern struct task_struct *current_set[NR_CPUS];
84
85unsigned long klimit = (unsigned long)_end;
86
87/* max amount of RAM to use */
88unsigned long __max_memory;
89
90/* info on what we think the IO hole is */
91unsigned long io_hole_start;
92unsigned long io_hole_size;
93
94#ifdef CONFIG_PPC_ISERIES
95
96void __iomem *ioremap(unsigned long addr, unsigned long size)
97{
98 return (void __iomem *)addr;
99}
100
101extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
102 unsigned long flags)
103{
104 return (void __iomem *)addr;
105}
106
107void iounmap(volatile void __iomem *addr)
108{
109 return;
110}
111
112#else
113
114/*
115 * map_io_page currently only called by __ioremap
116 * map_io_page adds an entry to the ioremap page table
117 * and adds an entry to the HPT, possibly bolting it
118 */
119static int map_io_page(unsigned long ea, unsigned long pa, int flags)
120{
121 pgd_t *pgdp;
122 pud_t *pudp;
123 pmd_t *pmdp;
124 pte_t *ptep;
125 unsigned long vsid;
126
127 if (mem_init_done) {
128 spin_lock(&init_mm.page_table_lock);
129 pgdp = pgd_offset_k(ea);
130 pudp = pud_alloc(&init_mm, pgdp, ea);
131 if (!pudp)
132 return -ENOMEM;
133 pmdp = pmd_alloc(&init_mm, pudp, ea);
134 if (!pmdp)
135 return -ENOMEM;
136 ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
137 if (!ptep)
138 return -ENOMEM;
139 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
140 __pgprot(flags)));
141 spin_unlock(&init_mm.page_table_lock);
142 } else {
143 unsigned long va, vpn, hash, hpteg;
144
145 /*
146 * If the mm subsystem is not fully up, we cannot create a
147 * linux page table entry for this mapping. Simply bolt an
148 * entry in the hardware page table.
149 */
150 vsid = get_kernel_vsid(ea);
151 va = (vsid << 28) | (ea & 0xFFFFFFF);
152 vpn = va >> PAGE_SHIFT;
153
154 hash = hpt_hash(vpn, 0);
155
156 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
157
158 /* Panic if a pte grpup is full */
159 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
160 HPTE_V_BOLTED,
161 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
162 == -1) {
163 panic("map_io_page: could not insert mapping");
164 }
165 }
166 return 0;
167}
168
169
170static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
171 unsigned long ea, unsigned long size,
172 unsigned long flags)
173{
174 unsigned long i;
175
176 if ((flags & _PAGE_PRESENT) == 0)
177 flags |= pgprot_val(PAGE_KERNEL);
178
179 for (i = 0; i < size; i += PAGE_SIZE)
180 if (map_io_page(ea+i, pa+i, flags))
181 return NULL;
182
183 return (void __iomem *) (ea + (addr & ~PAGE_MASK));
184}
185
186
187void __iomem *
188ioremap(unsigned long addr, unsigned long size)
189{
190 return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
191}
192
193void __iomem * __ioremap(unsigned long addr, unsigned long size,
194 unsigned long flags)
195{
196 unsigned long pa, ea;
197 void __iomem *ret;
198
199 /*
200 * Choose an address to map it to.
201 * Once the imalloc system is running, we use it.
202 * Before that, we map using addresses going
203 * up from ioremap_bot. imalloc will use
204 * the addresses from ioremap_bot through
205 * IMALLOC_END
206 *
207 */
208 pa = addr & PAGE_MASK;
209 size = PAGE_ALIGN(addr + size) - pa;
210
211 if (size == 0)
212 return NULL;
213
214 if (mem_init_done) {
215 struct vm_struct *area;
216 area = im_get_free_area(size);
217 if (area == NULL)
218 return NULL;
219 ea = (unsigned long)(area->addr);
220 ret = __ioremap_com(addr, pa, ea, size, flags);
221 if (!ret)
222 im_free(area->addr);
223 } else {
224 ea = ioremap_bot;
225 ret = __ioremap_com(addr, pa, ea, size, flags);
226 if (ret)
227 ioremap_bot += size;
228 }
229 return ret;
230}
231
232#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
233
234int __ioremap_explicit(unsigned long pa, unsigned long ea,
235 unsigned long size, unsigned long flags)
236{
237 struct vm_struct *area;
238 void __iomem *ret;
239
240 /* For now, require page-aligned values for pa, ea, and size */
241 if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
242 !IS_PAGE_ALIGNED(size)) {
243 printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__);
244 return 1;
245 }
246
247 if (!mem_init_done) {
248 /* Two things to consider in this case:
249 * 1) No records will be kept (imalloc, etc) that the region
250 * has been remapped
251 * 2) It won't be easy to iounmap() the region later (because
252 * of 1)
253 */
254 ;
255 } else {
256 area = im_get_area(ea, size,
257 IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
258 if (area == NULL) {
259 /* Expected when PHB-dlpar is in play */
260 return 1;
261 }
262 if (ea != (unsigned long) area->addr) {
263 printk(KERN_ERR "unexpected addr return from "
264 "im_get_area\n");
265 return 1;
266 }
267 }
268
269 ret = __ioremap_com(pa, pa, ea, size, flags);
270 if (ret == NULL) {
271 printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
272 return 1;
273 }
274 if (ret != (void *) ea) {
275 printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
276 return 1;
277 }
278
279 return 0;
280}
281
282/*
283 * Unmap an IO region and remove it from imalloc'd list.
284 * Access to IO memory should be serialized by driver.
285 * This code is modeled after vmalloc code - unmap_vm_area()
286 *
287 * XXX what about calls before mem_init_done (ie python_countermeasures())
288 */
289void iounmap(volatile void __iomem *token)
290{
291 void *addr;
292
293 if (!mem_init_done)
294 return;
295
296 addr = (void *) ((unsigned long __force) token & PAGE_MASK);
297
298 im_free(addr);
299}
300
301static int iounmap_subset_regions(unsigned long addr, unsigned long size)
302{
303 struct vm_struct *area;
304
305 /* Check whether subsets of this region exist */
306 area = im_get_area(addr, size, IM_REGION_SUPERSET);
307 if (area == NULL)
308 return 1;
309
310 while (area) {
311 iounmap((void __iomem *) area->addr);
312 area = im_get_area(addr, size,
313 IM_REGION_SUPERSET);
314 }
315
316 return 0;
317}
318
319int iounmap_explicit(volatile void __iomem *start, unsigned long size)
320{
321 struct vm_struct *area;
322 unsigned long addr;
323 int rc;
324
325 addr = (unsigned long __force) start & PAGE_MASK;
326
327 /* Verify that the region either exists or is a subset of an existing
328 * region. In the latter case, split the parent region to create
329 * the exact region
330 */
331 area = im_get_area(addr, size,
332 IM_REGION_EXISTS | IM_REGION_SUBSET);
333 if (area == NULL) {
334 /* Determine whether subset regions exist. If so, unmap */
335 rc = iounmap_subset_regions(addr, size);
336 if (rc) {
337 printk(KERN_ERR
338 "%s() cannot unmap nonexistent range 0x%lx\n",
339 __FUNCTION__, addr);
340 return 1;
341 }
342 } else {
343 iounmap((void __iomem *) area->addr);
344 }
345 /*
346 * FIXME! This can't be right:
347 iounmap(area->addr);
348 * Maybe it should be "iounmap(area);"
349 */
350 return 0;
351}
352
353#endif
354
355EXPORT_SYMBOL(ioremap);
356EXPORT_SYMBOL(__ioremap);
357EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
new file mode 100644
index 000000000000..9a381ed5eb21
--- /dev/null
+++ b/arch/powerpc/mm/ppc_mmu.c
@@ -0,0 +1,296 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/kernel.h>
28#include <linux/mm.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31
32#include <asm/prom.h>
33#include <asm/mmu.h>
34#include <asm/machdep.h>
35
36#include "mmu_decl.h"
37#include "mem_pieces.h"
38
39PTE *Hash, *Hash_end;
40unsigned long Hash_size, Hash_mask;
41unsigned long _SDR1;
42
43union ubat { /* BAT register values to be loaded */
44 BAT bat;
45#ifdef CONFIG_PPC64BRIDGE
46 u64 word[2];
47#else
48 u32 word[2];
49#endif
50} BATS[4][2]; /* 4 pairs of IBAT, DBAT */
51
52struct batrange { /* stores address ranges mapped by BATs */
53 unsigned long start;
54 unsigned long limit;
55 unsigned long phys;
56} bat_addrs[4];
57
58/*
59 * Return PA for this VA if it is mapped by a BAT, or 0
60 */
61unsigned long v_mapped_by_bats(unsigned long va)
62{
63 int b;
64 for (b = 0; b < 4; ++b)
65 if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
66 return bat_addrs[b].phys + (va - bat_addrs[b].start);
67 return 0;
68}
69
70/*
71 * Return VA for a given PA or 0 if not mapped
72 */
73unsigned long p_mapped_by_bats(unsigned long pa)
74{
75 int b;
76 for (b = 0; b < 4; ++b)
77 if (pa >= bat_addrs[b].phys
78 && pa < (bat_addrs[b].limit-bat_addrs[b].start)
79 +bat_addrs[b].phys)
80 return bat_addrs[b].start+(pa-bat_addrs[b].phys);
81 return 0;
82}
83
84unsigned long __init mmu_mapin_ram(void)
85{
86#ifdef CONFIG_POWER4
87 return 0;
88#else
89 unsigned long tot, bl, done;
90 unsigned long max_size = (256<<20);
91 unsigned long align;
92
93 if (__map_without_bats)
94 return 0;
95
96 /* Set up BAT2 and if necessary BAT3 to cover RAM. */
97
98 /* Make sure we don't map a block larger than the
99 smallest alignment of the physical address. */
100 /* alignment of PPC_MEMSTART */
101 align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
102 /* set BAT block size to MIN(max_size, align) */
103 if (align && align < max_size)
104 max_size = align;
105
106 tot = total_lowmem;
107 for (bl = 128<<10; bl < max_size; bl <<= 1) {
108 if (bl * 2 > tot)
109 break;
110 }
111
112 setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
113 done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
114 if ((done < tot) && !bat_addrs[3].limit) {
115 /* use BAT3 to cover a bit more */
116 tot -= done;
117 for (bl = 128<<10; bl < max_size; bl <<= 1)
118 if (bl * 2 > tot)
119 break;
120 setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
121 done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
122 }
123
124 return done;
125#endif
126}
127
128/*
129 * Set up one of the I/D BAT (block address translation) register pairs.
130 * The parameters are not checked; in particular size must be a power
131 * of 2 between 128k and 256M.
132 */
133void __init setbat(int index, unsigned long virt, unsigned long phys,
134 unsigned int size, int flags)
135{
136 unsigned int bl;
137 int wimgxpp;
138 union ubat *bat = BATS[index];
139
140 if (((flags & _PAGE_NO_CACHE) == 0) &&
141 cpu_has_feature(CPU_FTR_NEED_COHERENT))
142 flags |= _PAGE_COHERENT;
143
144 bl = (size >> 17) - 1;
145 if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
146 /* 603, 604, etc. */
147 /* Do DBAT first */
148 wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
149 | _PAGE_COHERENT | _PAGE_GUARDED);
150 wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
151 bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
152 bat[1].word[1] = phys | wimgxpp;
153#ifndef CONFIG_KGDB /* want user access for breakpoints */
154 if (flags & _PAGE_USER)
155#endif
156 bat[1].bat.batu.vp = 1;
157 if (flags & _PAGE_GUARDED) {
158 /* G bit must be zero in IBATs */
159 bat[0].word[0] = bat[0].word[1] = 0;
160 } else {
161 /* make IBAT same as DBAT */
162 bat[0] = bat[1];
163 }
164 } else {
165 /* 601 cpu */
166 if (bl > BL_8M)
167 bl = BL_8M;
168 wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
169 | _PAGE_COHERENT);
170 wimgxpp |= (flags & _PAGE_RW)?
171 ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
172 bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
173 bat->word[1] = phys | bl | 0x40; /* V=1 */
174 }
175
176 bat_addrs[index].start = virt;
177 bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
178 bat_addrs[index].phys = phys;
179}
180
181/*
182 * Initialize the hash table and patch the instructions in hashtable.S.
183 */
184void __init MMU_init_hw(void)
185{
186 unsigned int hmask, mb, mb2;
187 unsigned int n_hpteg, lg_n_hpteg;
188
189 extern unsigned int hash_page_patch_A[];
190 extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
191 extern unsigned int hash_page[];
192 extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
193
194 if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
195 /*
196 * Put a blr (procedure return) instruction at the
197 * start of hash_page, since we can still get DSI
198 * exceptions on a 603.
199 */
200 hash_page[0] = 0x4e800020;
201 flush_icache_range((unsigned long) &hash_page[0],
202 (unsigned long) &hash_page[1]);
203 return;
204 }
205
206 if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
207
208#ifdef CONFIG_PPC64BRIDGE
209#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */
210#define SDR1_LOW_BITS (lg_n_hpteg - 11)
211#define MIN_N_HPTEG 2048 /* min 256kB hash table */
212#else
213#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
214#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
215#define MIN_N_HPTEG 1024 /* min 64kB hash table */
216#endif
217
218#ifdef CONFIG_POWER4
219 /* The hash table has already been allocated and initialized
220 in prom.c */
221 n_hpteg = Hash_size >> LG_HPTEG_SIZE;
222 lg_n_hpteg = __ilog2(n_hpteg);
223
224 /* Remove the hash table from the available memory */
225 if (Hash)
226 reserve_phys_mem(__pa(Hash), Hash_size);
227
228#else /* CONFIG_POWER4 */
229 /*
230 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
231 * This is less than the recommended amount, but then
232 * Linux ain't AIX.
233 */
234 n_hpteg = total_memory / (PAGE_SIZE * 8);
235 if (n_hpteg < MIN_N_HPTEG)
236 n_hpteg = MIN_N_HPTEG;
237 lg_n_hpteg = __ilog2(n_hpteg);
238 if (n_hpteg & (n_hpteg - 1)) {
239 ++lg_n_hpteg; /* round up if not power of 2 */
240 n_hpteg = 1 << lg_n_hpteg;
241 }
242 Hash_size = n_hpteg << LG_HPTEG_SIZE;
243
244 /*
245 * Find some memory for the hash table.
246 */
247 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
248 Hash = mem_pieces_find(Hash_size, Hash_size);
249 cacheable_memzero(Hash, Hash_size);
250 _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
251#endif /* CONFIG_POWER4 */
252
253 Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
254
255 printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
256 total_memory >> 20, Hash_size >> 10, Hash);
257
258
259 /*
260 * Patch up the instructions in hashtable.S:create_hpte
261 */
262 if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
263 Hash_mask = n_hpteg - 1;
264 hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
265 mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
266 if (lg_n_hpteg > 16)
267 mb2 = 16 - LG_HPTEG_SIZE;
268
269 hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
270 | ((unsigned int)(Hash) >> 16);
271 hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
272 hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
273 hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
274 hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
275
276 /*
277 * Ensure that the locations we've patched have been written
278 * out from the data cache and invalidated in the instruction
279 * cache, on those machines with split caches.
280 */
281 flush_icache_range((unsigned long) &hash_page_patch_A[0],
282 (unsigned long) &hash_page_patch_C[1]);
283
284 /*
285 * Patch up the instructions in hashtable.S:flush_hash_page
286 */
287 flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
288 | ((unsigned int)(Hash) >> 16);
289 flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
290 flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
291 flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
292 flush_icache_range((unsigned long) &flush_hash_patch_A[0],
293 (unsigned long) &flush_hash_patch_B[1]);
294
295 if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
296}
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c
new file mode 100644
index 000000000000..6c3dc3c44c86
--- /dev/null
+++ b/arch/powerpc/mm/tlb.c
@@ -0,0 +1,183 @@
1/*
2 * This file contains the routines for TLB flushing.
3 * On machines where the MMU uses a hash table to store virtual to
4 * physical translations, these routines flush entries from the
5 * hash table also.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/kernel.h>
28#include <linux/mm.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31#include <asm/tlbflush.h>
32#include <asm/tlb.h>
33
34#include "mmu_decl.h"
35
36/*
37 * Called when unmapping pages to flush entries from the TLB/hash table.
38 */
39void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
40{
41 unsigned long ptephys;
42
43 if (Hash != 0) {
44 ptephys = __pa(ptep) & PAGE_MASK;
45 flush_hash_pages(mm->context, addr, ptephys, 1);
46 }
47}
48
49/*
50 * Called by ptep_set_access_flags, must flush on CPUs for which the
51 * DSI handler can't just "fixup" the TLB on a write fault
52 */
53void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
54{
55 if (Hash != 0)
56 return;
57 _tlbie(addr);
58}
59
60/*
61 * Called at the end of a mmu_gather operation to make sure the
62 * TLB flush is completely done.
63 */
64void tlb_flush(struct mmu_gather *tlb)
65{
66 if (Hash == 0) {
67 /*
68 * 603 needs to flush the whole TLB here since
69 * it doesn't use a hash table.
70 */
71 _tlbia();
72 }
73}
74
75/*
76 * TLB flushing:
77 *
78 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
79 * - flush_tlb_page(vma, vmaddr) flushes one page
80 * - flush_tlb_range(vma, start, end) flushes a range of pages
81 * - flush_tlb_kernel_range(start, end) flushes kernel pages
82 *
83 * since the hardware hash table functions as an extension of the
84 * tlb as far as the linux tables are concerned, flush it too.
85 * -- Cort
86 */
87
88/*
89 * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
90 * the cache operations on the bus. Hence we need to use an IPI
91 * to get the other CPU(s) to invalidate their TLBs.
92 */
93#ifdef CONFIG_SMP_750
94#define FINISH_FLUSH smp_send_tlb_invalidate(0)
95#else
96#define FINISH_FLUSH do { } while (0)
97#endif
98
99static void flush_range(struct mm_struct *mm, unsigned long start,
100 unsigned long end)
101{
102 pmd_t *pmd;
103 unsigned long pmd_end;
104 int count;
105 unsigned int ctx = mm->context;
106
107 if (Hash == 0) {
108 _tlbia();
109 return;
110 }
111 start &= PAGE_MASK;
112 if (start >= end)
113 return;
114 end = (end - 1) | ~PAGE_MASK;
115 pmd = pmd_offset(pgd_offset(mm, start), start);
116 for (;;) {
117 pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
118 if (pmd_end > end)
119 pmd_end = end;
120 if (!pmd_none(*pmd)) {
121 count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
122 flush_hash_pages(ctx, start, pmd_val(*pmd), count);
123 }
124 if (pmd_end == end)
125 break;
126 start = pmd_end + 1;
127 ++pmd;
128 }
129}
130
131/*
132 * Flush kernel TLB entries in the given range
133 */
134void flush_tlb_kernel_range(unsigned long start, unsigned long end)
135{
136 flush_range(&init_mm, start, end);
137 FINISH_FLUSH;
138}
139
140/*
141 * Flush all the (user) entries for the address space described by mm.
142 */
143void flush_tlb_mm(struct mm_struct *mm)
144{
145 struct vm_area_struct *mp;
146
147 if (Hash == 0) {
148 _tlbia();
149 return;
150 }
151
152 for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
153 flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
154 FINISH_FLUSH;
155}
156
157void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
158{
159 struct mm_struct *mm;
160 pmd_t *pmd;
161
162 if (Hash == 0) {
163 _tlbie(vmaddr);
164 return;
165 }
166 mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
167 pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
168 if (!pmd_none(*pmd))
169 flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
170 FINISH_FLUSH;
171}
172
173/*
174 * For each address in the range, find the pte for the address
175 * and check _PAGE_HASHPTE bit; if it is set, find and destroy
176 * the corresponding HPTE.
177 */
178void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
179 unsigned long end)
180{
181 flush_range(vma->vm_mm, start, end);
182 FINISH_FLUSH;
183}