aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc/mm
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ppc/mm')
-rw-r--r--arch/ppc/mm/44x_mmu.c121
-rw-r--r--arch/ppc/mm/4xx_mmu.c142
-rw-r--r--arch/ppc/mm/Makefile11
-rw-r--r--arch/ppc/mm/fault.c440
-rw-r--r--arch/ppc/mm/fsl_booke_mmu.c236
-rw-r--r--arch/ppc/mm/hashtable.S642
-rw-r--r--arch/ppc/mm/init.c667
-rw-r--r--arch/ppc/mm/mem_pieces.c163
-rw-r--r--arch/ppc/mm/mem_pieces.h48
-rw-r--r--arch/ppc/mm/mmu_context.c86
-rw-r--r--arch/ppc/mm/mmu_decl.h83
-rw-r--r--arch/ppc/mm/pgtable.c471
-rw-r--r--arch/ppc/mm/ppc_mmu.c296
-rw-r--r--arch/ppc/mm/tlb.c183
14 files changed, 3589 insertions, 0 deletions
diff --git a/arch/ppc/mm/44x_mmu.c b/arch/ppc/mm/44x_mmu.c
new file mode 100644
index 000000000000..72f7c0d1c0ed
--- /dev/null
+++ b/arch/ppc/mm/44x_mmu.c
@@ -0,0 +1,121 @@
1/*
2 * Modifications by Matt Porter (mporter@mvista.com) to support
3 * PPC44x Book E processors.
4 *
5 * This file contains the routines for initializing the MMU
6 * on the 4xx series of chips.
7 * -- paulus
8 *
9 * Derived from arch/ppc/mm/init.c:
10 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
11 *
12 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
13 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
14 * Copyright (C) 1996 Paul Mackerras
15 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
16 *
17 * Derived from "arch/i386/mm/init.c"
18 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27#include <linux/config.h>
28#include <linux/signal.h>
29#include <linux/sched.h>
30#include <linux/kernel.h>
31#include <linux/errno.h>
32#include <linux/string.h>
33#include <linux/types.h>
34#include <linux/ptrace.h>
35#include <linux/mman.h>
36#include <linux/mm.h>
37#include <linux/swap.h>
38#include <linux/stddef.h>
39#include <linux/vmalloc.h>
40#include <linux/init.h>
41#include <linux/delay.h>
42#include <linux/bootmem.h>
43#include <linux/highmem.h>
44
45#include <asm/pgalloc.h>
46#include <asm/prom.h>
47#include <asm/io.h>
48#include <asm/mmu_context.h>
49#include <asm/pgtable.h>
50#include <asm/mmu.h>
51#include <asm/uaccess.h>
52#include <asm/smp.h>
53#include <asm/bootx.h>
54#include <asm/machdep.h>
55#include <asm/setup.h>
56
57#include "mmu_decl.h"
58
59extern char etext[], _stext[];
60
61/* Used by the 44x TLB replacement exception handler.
62 * Just needed it declared someplace.
63 */
64unsigned int tlb_44x_index = 0;
65unsigned int tlb_44x_hwater = 62;
66
67/*
68 * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
69 */
70static void __init
71ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
72{
73 unsigned long attrib = 0;
74
75 __asm__ __volatile__("\
76 clrrwi %2,%2,10\n\
77 ori %2,%2,%4\n\
78 clrrwi %1,%1,10\n\
79 li %0,0\n\
80 ori %0,%0,%5\n\
81 tlbwe %2,%3,%6\n\
82 tlbwe %1,%3,%7\n\
83 tlbwe %0,%3,%8"
84 :
85 : "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
86 "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
87 "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
88 "i" (PPC44x_TLB_PAGEID),
89 "i" (PPC44x_TLB_XLAT),
90 "i" (PPC44x_TLB_ATTRIB));
91}
92
93/*
94 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
95 */
96void __init MMU_init_hw(void)
97{
98 flush_instruction_cache();
99}
100
101unsigned long __init mmu_mapin_ram(void)
102{
103 unsigned int pinned_tlbs = 1;
104 int i;
105
106 /* Determine number of entries necessary to cover lowmem */
107 pinned_tlbs = (unsigned int)
108 (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
109
110 /* Write upper watermark to save location */
111 tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
112
113 /* If necessary, set additional pinned TLBs */
114 if (pinned_tlbs > 1)
115 for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
116 unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
117 ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
118 }
119
120 return total_lowmem;
121}
diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c
new file mode 100644
index 000000000000..a7f616140381
--- /dev/null
+++ b/arch/ppc/mm/4xx_mmu.c
@@ -0,0 +1,142 @@
1/*
2 * This file contains the routines for initializing the MMU
3 * on the 4xx series of chips.
4 * -- paulus
5 *
6 * Derived from arch/ppc/mm/init.c:
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 *
9 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
10 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
11 * Copyright (C) 1996 Paul Mackerras
12 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
13 *
14 * Derived from "arch/i386/mm/init.c"
15 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 *
22 */
23
24#include <linux/config.h>
25#include <linux/signal.h>
26#include <linux/sched.h>
27#include <linux/kernel.h>
28#include <linux/errno.h>
29#include <linux/string.h>
30#include <linux/types.h>
31#include <linux/ptrace.h>
32#include <linux/mman.h>
33#include <linux/mm.h>
34#include <linux/swap.h>
35#include <linux/stddef.h>
36#include <linux/vmalloc.h>
37#include <linux/init.h>
38#include <linux/delay.h>
39#include <linux/bootmem.h>
40#include <linux/highmem.h>
41
42#include <asm/pgalloc.h>
43#include <asm/prom.h>
44#include <asm/io.h>
45#include <asm/mmu_context.h>
46#include <asm/pgtable.h>
47#include <asm/mmu.h>
48#include <asm/uaccess.h>
49#include <asm/smp.h>
50#include <asm/bootx.h>
51#include <asm/machdep.h>
52#include <asm/setup.h>
53#include "mmu_decl.h"
54
55extern int __map_without_ltlbs;
56/*
57 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
58 */
59void __init MMU_init_hw(void)
60{
61 /*
62 * The Zone Protection Register (ZPR) defines how protection will
63 * be applied to every page which is a member of a given zone. At
64 * present, we utilize only two of the 4xx's zones.
65 * The zone index bits (of ZSEL) in the PTE are used for software
66 * indicators, except the LSB. For user access, zone 1 is used,
67 * for kernel access, zone 0 is used. We set all but zone 1
68 * to zero, allowing only kernel access as indicated in the PTE.
69 * For zone 1, we set a 01 binary (a value of 10 will not work)
70 * to allow user access as indicated in the PTE. This also allows
71 * kernel access as indicated in the PTE.
72 */
73
74 mtspr(SPRN_ZPR, 0x10000000);
75
76 flush_instruction_cache();
77
78 /*
79 * Set up the real-mode cache parameters for the exception vector
80 * handlers (which are run in real-mode).
81 */
82
83 mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
84
85 /*
86 * Cache instruction and data space where the exception
87 * vectors and the kernel live in real-mode.
88 */
89
90 mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */
91 mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */
92}
93
94#define LARGE_PAGE_SIZE_16M (1<<24)
95#define LARGE_PAGE_SIZE_4M (1<<22)
96
97unsigned long __init mmu_mapin_ram(void)
98{
99 unsigned long v, s;
100 phys_addr_t p;
101
102 v = KERNELBASE;
103 p = PPC_MEMSTART;
104 s = 0;
105
106 if (__map_without_ltlbs) {
107 return s;
108 }
109
110 while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
111 pmd_t *pmdp;
112 unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
113
114 spin_lock(&init_mm.page_table_lock);
115 pmdp = pmd_offset(pgd_offset_k(v), v);
116 pmd_val(*pmdp++) = val;
117 pmd_val(*pmdp++) = val;
118 pmd_val(*pmdp++) = val;
119 pmd_val(*pmdp++) = val;
120 spin_unlock(&init_mm.page_table_lock);
121
122 v += LARGE_PAGE_SIZE_16M;
123 p += LARGE_PAGE_SIZE_16M;
124 s += LARGE_PAGE_SIZE_16M;
125 }
126
127 while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
128 pmd_t *pmdp;
129 unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
130
131 spin_lock(&init_mm.page_table_lock);
132 pmdp = pmd_offset(pgd_offset_k(v), v);
133 pmd_val(*pmdp) = val;
134 spin_unlock(&init_mm.page_table_lock);
135
136 v += LARGE_PAGE_SIZE_4M;
137 p += LARGE_PAGE_SIZE_4M;
138 s += LARGE_PAGE_SIZE_4M;
139 }
140
141 return s;
142}
diff --git a/arch/ppc/mm/Makefile b/arch/ppc/mm/Makefile
new file mode 100644
index 000000000000..cd3eae147cf8
--- /dev/null
+++ b/arch/ppc/mm/Makefile
@@ -0,0 +1,11 @@
1#
2# Makefile for the linux ppc-specific parts of the memory manager.
3#
4
5obj-y := fault.o init.o mem_pieces.o \
6 mmu_context.o pgtable.o
7
8obj-$(CONFIG_PPC_STD_MMU) += hashtable.o ppc_mmu.o tlb.o
9obj-$(CONFIG_40x) += 4xx_mmu.o
10obj-$(CONFIG_44x) += 44x_mmu.o
11obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
new file mode 100644
index 000000000000..57d9930843ac
--- /dev/null
+++ b/arch/ppc/mm/fault.c
@@ -0,0 +1,440 @@
1/*
2 * arch/ppc/mm/fault.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
9 *
10 * Modified by Cort Dougan and Paul Mackerras.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/signal.h>
20#include <linux/sched.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/string.h>
24#include <linux/types.h>
25#include <linux/ptrace.h>
26#include <linux/mman.h>
27#include <linux/mm.h>
28#include <linux/interrupt.h>
29#include <linux/highmem.h>
30#include <linux/module.h>
31
32#include <asm/page.h>
33#include <asm/pgtable.h>
34#include <asm/mmu.h>
35#include <asm/mmu_context.h>
36#include <asm/system.h>
37#include <asm/uaccess.h>
38#include <asm/tlbflush.h>
39
40#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
41extern void (*debugger)(struct pt_regs *);
42extern void (*debugger_fault_handler)(struct pt_regs *);
43extern int (*debugger_dabr_match)(struct pt_regs *);
44int debugger_kernel_faults = 1;
45#endif
46
47unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
48unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
49unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
50unsigned long pte_misses; /* updated by do_page_fault() */
51unsigned long pte_errors; /* updated by do_page_fault() */
52unsigned int probingmem;
53
54/*
55 * Check whether the instruction at regs->nip is a store using
56 * an update addressing form which will update r1.
57 */
58static int store_updates_sp(struct pt_regs *regs)
59{
60 unsigned int inst;
61
62 if (get_user(inst, (unsigned int __user *)regs->nip))
63 return 0;
64 /* check for 1 in the rA field */
65 if (((inst >> 16) & 0x1f) != 1)
66 return 0;
67 /* check major opcode */
68 switch (inst >> 26) {
69 case 37: /* stwu */
70 case 39: /* stbu */
71 case 45: /* sthu */
72 case 53: /* stfsu */
73 case 55: /* stfdu */
74 return 1;
75 case 31:
76 /* check minor opcode */
77 switch ((inst >> 1) & 0x3ff) {
78 case 183: /* stwux */
79 case 247: /* stbux */
80 case 439: /* sthux */
81 case 695: /* stfsux */
82 case 759: /* stfdux */
83 return 1;
84 }
85 }
86 return 0;
87}
88
89/*
90 * For 600- and 800-family processors, the error_code parameter is DSISR
91 * for a data fault, SRR1 for an instruction fault. For 400-family processors
92 * the error_code parameter is ESR for a data fault, 0 for an instruction
93 * fault.
94 */
95int do_page_fault(struct pt_regs *regs, unsigned long address,
96 unsigned long error_code)
97{
98 struct vm_area_struct * vma;
99 struct mm_struct *mm = current->mm;
100 siginfo_t info;
101 int code = SEGV_MAPERR;
102#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
103 int is_write = error_code & ESR_DST;
104#else
105 int is_write = 0;
106
107 /*
108 * Fortunately the bit assignments in SRR1 for an instruction
109 * fault and DSISR for a data fault are mostly the same for the
110 * bits we are interested in. But there are some bits which
111 * indicate errors in DSISR but can validly be set in SRR1.
112 */
113 if (TRAP(regs) == 0x400)
114 error_code &= 0x48200000;
115 else
116 is_write = error_code & 0x02000000;
117#endif /* CONFIG_4xx || CONFIG_BOOKE */
118
119#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
120 if (debugger_fault_handler && TRAP(regs) == 0x300) {
121 debugger_fault_handler(regs);
122 return 0;
123 }
124#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
125 if (error_code & 0x00400000) {
126 /* DABR match */
127 if (debugger_dabr_match(regs))
128 return 0;
129 }
130#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
131#endif /* CONFIG_XMON || CONFIG_KGDB */
132
133 if (in_atomic() || mm == NULL)
134 return SIGSEGV;
135
136 down_read(&mm->mmap_sem);
137 vma = find_vma(mm, address);
138 if (!vma)
139 goto bad_area;
140 if (vma->vm_start <= address)
141 goto good_area;
142 if (!(vma->vm_flags & VM_GROWSDOWN))
143 goto bad_area;
144 if (!is_write)
145 goto bad_area;
146
147 /*
148 * N.B. The rs6000/xcoff ABI allows programs to access up to
149 * a few hundred bytes below the stack pointer.
150 * The kernel signal delivery code writes up to about 1.5kB
151 * below the stack pointer (r1) before decrementing it.
152 * The exec code can write slightly over 640kB to the stack
153 * before setting the user r1. Thus we allow the stack to
154 * expand to 1MB without further checks.
155 */
156 if (address + 0x100000 < vma->vm_end) {
157 /* get user regs even if this fault is in kernel mode */
158 struct pt_regs *uregs = current->thread.regs;
159 if (uregs == NULL)
160 goto bad_area;
161
162 /*
163 * A user-mode access to an address a long way below
164 * the stack pointer is only valid if the instruction
165 * is one which would update the stack pointer to the
166 * address accessed if the instruction completed,
167 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
168 * (or the byte, halfword, float or double forms).
169 *
170 * If we don't check this then any write to the area
171 * between the last mapped region and the stack will
172 * expand the stack rather than segfaulting.
173 */
174 if (address + 2048 < uregs->gpr[1]
175 && (!user_mode(regs) || !store_updates_sp(regs)))
176 goto bad_area;
177 }
178 if (expand_stack(vma, address))
179 goto bad_area;
180
181good_area:
182 code = SEGV_ACCERR;
183#if defined(CONFIG_6xx)
184 if (error_code & 0x95700000)
185 /* an error such as lwarx to I/O controller space,
186 address matching DABR, eciwx, etc. */
187 goto bad_area;
188#endif /* CONFIG_6xx */
189#if defined(CONFIG_8xx)
190 /* The MPC8xx seems to always set 0x80000000, which is
191 * "undefined". Of those that can be set, this is the only
192 * one which seems bad.
193 */
194 if (error_code & 0x10000000)
195 /* Guarded storage error. */
196 goto bad_area;
197#endif /* CONFIG_8xx */
198
199 /* a write */
200 if (is_write) {
201 if (!(vma->vm_flags & VM_WRITE))
202 goto bad_area;
203#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
204 /* an exec - 4xx/Book-E allows for per-page execute permission */
205 } else if (TRAP(regs) == 0x400) {
206 pte_t *ptep;
207
208#if 0
209 /* It would be nice to actually enforce the VM execute
210 permission on CPUs which can do so, but far too
211 much stuff in userspace doesn't get the permissions
212 right, so we let any page be executed for now. */
213 if (! (vma->vm_flags & VM_EXEC))
214 goto bad_area;
215#endif
216
217 /* Since 4xx/Book-E supports per-page execute permission,
218 * we lazily flush dcache to icache. */
219 ptep = NULL;
220 if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
221 struct page *page = pte_page(*ptep);
222
223 if (! test_bit(PG_arch_1, &page->flags)) {
224 flush_dcache_icache_page(page);
225 set_bit(PG_arch_1, &page->flags);
226 }
227 pte_update(ptep, 0, _PAGE_HWEXEC);
228 _tlbie(address);
229 pte_unmap(ptep);
230 up_read(&mm->mmap_sem);
231 return 0;
232 }
233 if (ptep != NULL)
234 pte_unmap(ptep);
235#endif
236 /* a read */
237 } else {
238 /* protection fault */
239 if (error_code & 0x08000000)
240 goto bad_area;
241 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
242 goto bad_area;
243 }
244
245 /*
246 * If for any reason at all we couldn't handle the fault,
247 * make sure we exit gracefully rather than endlessly redo
248 * the fault.
249 */
250 survive:
251 switch (handle_mm_fault(mm, vma, address, is_write)) {
252 case VM_FAULT_MINOR:
253 current->min_flt++;
254 break;
255 case VM_FAULT_MAJOR:
256 current->maj_flt++;
257 break;
258 case VM_FAULT_SIGBUS:
259 goto do_sigbus;
260 case VM_FAULT_OOM:
261 goto out_of_memory;
262 default:
263 BUG();
264 }
265
266 up_read(&mm->mmap_sem);
267 /*
268 * keep track of tlb+htab misses that are good addrs but
269 * just need pte's created via handle_mm_fault()
270 * -- Cort
271 */
272 pte_misses++;
273 return 0;
274
275bad_area:
276 up_read(&mm->mmap_sem);
277 pte_errors++;
278
279 /* User mode accesses cause a SIGSEGV */
280 if (user_mode(regs)) {
281 info.si_signo = SIGSEGV;
282 info.si_errno = 0;
283 info.si_code = code;
284 info.si_addr = (void __user *) address;
285 force_sig_info(SIGSEGV, &info, current);
286 return 0;
287 }
288
289 return SIGSEGV;
290
291/*
292 * We ran out of memory, or some other thing happened to us that made
293 * us unable to handle the page fault gracefully.
294 */
295out_of_memory:
296 up_read(&mm->mmap_sem);
297 if (current->pid == 1) {
298 yield();
299 down_read(&mm->mmap_sem);
300 goto survive;
301 }
302 printk("VM: killing process %s\n", current->comm);
303 if (user_mode(regs))
304 do_exit(SIGKILL);
305 return SIGKILL;
306
307do_sigbus:
308 up_read(&mm->mmap_sem);
309 info.si_signo = SIGBUS;
310 info.si_errno = 0;
311 info.si_code = BUS_ADRERR;
312 info.si_addr = (void __user *)address;
313 force_sig_info (SIGBUS, &info, current);
314 if (!user_mode(regs))
315 return SIGBUS;
316 return 0;
317}
318
319/*
320 * bad_page_fault is called when we have a bad access from the kernel.
321 * It is called from the DSI and ISI handlers in head.S and from some
322 * of the procedures in traps.c.
323 */
324void
325bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
326{
327 const struct exception_table_entry *entry;
328
329 /* Are we prepared to handle this fault? */
330 if ((entry = search_exception_tables(regs->nip)) != NULL) {
331 regs->nip = entry->fixup;
332 return;
333 }
334
335 /* kernel has accessed a bad area */
336#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
337 if (debugger_kernel_faults)
338 debugger(regs);
339#endif
340 die("kernel access of bad area", regs, sig);
341}
342
343#ifdef CONFIG_8xx
344
345/* The pgtable.h claims some functions generically exist, but I
346 * can't find them......
347 */
348pte_t *va_to_pte(unsigned long address)
349{
350 pgd_t *dir;
351 pmd_t *pmd;
352 pte_t *pte;
353
354 if (address < TASK_SIZE)
355 return NULL;
356
357 dir = pgd_offset(&init_mm, address);
358 if (dir) {
359 pmd = pmd_offset(dir, address & PAGE_MASK);
360 if (pmd && pmd_present(*pmd)) {
361 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
362 if (pte && pte_present(*pte))
363 return(pte);
364 }
365 }
366 return NULL;
367}
368
369unsigned long va_to_phys(unsigned long address)
370{
371 pte_t *pte;
372
373 pte = va_to_pte(address);
374 if (pte)
375 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
376 return (0);
377}
378
379void
380print_8xx_pte(struct mm_struct *mm, unsigned long addr)
381{
382 pgd_t * pgd;
383 pmd_t * pmd;
384 pte_t * pte;
385
386 printk(" pte @ 0x%8lx: ", addr);
387 pgd = pgd_offset(mm, addr & PAGE_MASK);
388 if (pgd) {
389 pmd = pmd_offset(pgd, addr & PAGE_MASK);
390 if (pmd && pmd_present(*pmd)) {
391 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
392 if (pte) {
393 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
394 (long)pgd, (long)pte, (long)pte_val(*pte));
395#define pp ((long)pte_val(*pte))
396 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
397 "CI: %lx v: %lx\n",
398 pp>>12, /* rpn */
399 (pp>>10)&3, /* pp */
400 (pp>>3)&1, /* small */
401 (pp>>2)&1, /* shared */
402 (pp>>1)&1, /* cache inhibit */
403 pp&1 /* valid */
404 );
405#undef pp
406 }
407 else {
408 printk("no pte\n");
409 }
410 }
411 else {
412 printk("no pmd\n");
413 }
414 }
415 else {
416 printk("no pgd\n");
417 }
418}
419
420int
421get_8xx_pte(struct mm_struct *mm, unsigned long addr)
422{
423 pgd_t * pgd;
424 pmd_t * pmd;
425 pte_t * pte;
426 int retval = 0;
427
428 pgd = pgd_offset(mm, addr & PAGE_MASK);
429 if (pgd) {
430 pmd = pmd_offset(pgd, addr & PAGE_MASK);
431 if (pmd && pmd_present(*pmd)) {
432 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
433 if (pte) {
434 retval = (int)pte_val(*pte);
435 }
436 }
437 }
438 return(retval);
439}
440#endif /* CONFIG_8xx */
diff --git a/arch/ppc/mm/fsl_booke_mmu.c b/arch/ppc/mm/fsl_booke_mmu.c
new file mode 100644
index 000000000000..36233bdcdf8f
--- /dev/null
+++ b/arch/ppc/mm/fsl_booke_mmu.c
@@ -0,0 +1,236 @@
1/*
2 * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
3 * E500 Book E processors.
4 *
5 * Copyright 2004 Freescale Semiconductor, Inc
6 *
7 * This file contains the routines for initializing the MMU
8 * on the 4xx series of chips.
9 * -- paulus
10 *
11 * Derived from arch/ppc/mm/init.c:
12 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
13 *
14 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
15 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
16 * Copyright (C) 1996 Paul Mackerras
17 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
18 *
19 * Derived from "arch/i386/mm/init.c"
20 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29#include <linux/config.h>
30#include <linux/signal.h>
31#include <linux/sched.h>
32#include <linux/kernel.h>
33#include <linux/errno.h>
34#include <linux/string.h>
35#include <linux/types.h>
36#include <linux/ptrace.h>
37#include <linux/mman.h>
38#include <linux/mm.h>
39#include <linux/swap.h>
40#include <linux/stddef.h>
41#include <linux/vmalloc.h>
42#include <linux/init.h>
43#include <linux/delay.h>
44#include <linux/bootmem.h>
45#include <linux/highmem.h>
46
47#include <asm/pgalloc.h>
48#include <asm/prom.h>
49#include <asm/io.h>
50#include <asm/mmu_context.h>
51#include <asm/pgtable.h>
52#include <asm/mmu.h>
53#include <asm/uaccess.h>
54#include <asm/smp.h>
55#include <asm/bootx.h>
56#include <asm/machdep.h>
57#include <asm/setup.h>
58
59extern void loadcam_entry(unsigned int index);
60unsigned int tlbcam_index;
61unsigned int num_tlbcam_entries;
62static unsigned long __cam0, __cam1, __cam2;
63extern unsigned long total_lowmem;
64extern unsigned long __max_low_memory;
65#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
66
67struct tlbcam {
68 u32 MAS0;
69 u32 MAS1;
70 u32 MAS2;
71 u32 MAS3;
72 u32 MAS7;
73} TLBCAM[NUM_TLBCAMS];
74
75struct tlbcamrange {
76 unsigned long start;
77 unsigned long limit;
78 phys_addr_t phys;
79} tlbcam_addrs[NUM_TLBCAMS];
80
81extern unsigned int tlbcam_index;
82
83/*
84 * Return PA for this VA if it is mapped by a CAM, or 0
85 */
86unsigned long v_mapped_by_tlbcam(unsigned long va)
87{
88 int b;
89 for (b = 0; b < tlbcam_index; ++b)
90 if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
91 return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
92 return 0;
93}
94
95/*
96 * Return VA for a given PA or 0 if not mapped
97 */
98unsigned long p_mapped_by_tlbcam(unsigned long pa)
99{
100 int b;
101 for (b = 0; b < tlbcam_index; ++b)
102 if (pa >= tlbcam_addrs[b].phys
103 && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
104 +tlbcam_addrs[b].phys)
105 return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
106 return 0;
107}
108
109/*
110 * Set up one of the I/D BAT (block address translation) register pairs.
111 * The parameters are not checked; in particular size must be a power
112 * of 4 between 4k and 256M.
113 */
114void settlbcam(int index, unsigned long virt, phys_addr_t phys,
115 unsigned int size, int flags, unsigned int pid)
116{
117 unsigned int tsize, lz;
118
119 asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
120 tsize = (21 - lz) / 2;
121
122#ifdef CONFIG_SMP
123 if ((flags & _PAGE_NO_CACHE) == 0)
124 flags |= _PAGE_COHERENT;
125#endif
126
127 TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
128 TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
129 TLBCAM[index].MAS2 = virt & PAGE_MASK;
130
131 TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
132 TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
133 TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
134 TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
135 TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
136
137 TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
138 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
139
140#ifndef CONFIG_KGDB /* want user access for breakpoints */
141 if (flags & _PAGE_USER) {
142 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
143 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
144 }
145#else
146 TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
147 TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
148#endif
149
150 tlbcam_addrs[index].start = virt;
151 tlbcam_addrs[index].limit = virt + size - 1;
152 tlbcam_addrs[index].phys = phys;
153
154 loadcam_entry(index);
155}
156
157void invalidate_tlbcam_entry(int index)
158{
159 TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
160 TLBCAM[index].MAS1 = ~MAS1_VALID;
161
162 loadcam_entry(index);
163}
164
165void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
166 unsigned long cam2)
167{
168 settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
169 tlbcam_index++;
170 if (cam1) {
171 tlbcam_index++;
172 settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
173 }
174 if (cam2) {
175 tlbcam_index++;
176 settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
177 }
178}
179
180/*
181 * MMU_init_hw does the chip-specific initialization of the MMU hardware.
182 */
183void __init MMU_init_hw(void)
184{
185 flush_instruction_cache();
186}
187
188unsigned long __init mmu_mapin_ram(void)
189{
190 cam_mapin_ram(__cam0, __cam1, __cam2);
191
192 return __cam0 + __cam1 + __cam2;
193}
194
195
196void __init
197adjust_total_lowmem(void)
198{
199 unsigned long max_low_mem = MAX_LOW_MEM;
200 unsigned long cam_max = 0x10000000;
201 unsigned long ram;
202
203 /* adjust CAM size to max_low_mem */
204 if (max_low_mem < cam_max)
205 cam_max = max_low_mem;
206
207 /* adjust lowmem size to max_low_mem */
208 if (max_low_mem < total_lowmem)
209 ram = max_low_mem;
210 else
211 ram = total_lowmem;
212
213 /* Calculate CAM values */
214 __cam0 = 1UL << 2 * (__ilog2(ram) / 2);
215 if (__cam0 > cam_max)
216 __cam0 = cam_max;
217 ram -= __cam0;
218 if (ram) {
219 __cam1 = 1UL << 2 * (__ilog2(ram) / 2);
220 if (__cam1 > cam_max)
221 __cam1 = cam_max;
222 ram -= __cam1;
223 }
224 if (ram) {
225 __cam2 = 1UL << 2 * (__ilog2(ram) / 2);
226 if (__cam2 > cam_max)
227 __cam2 = cam_max;
228 ram -= __cam2;
229 }
230
231 printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
232 " CAM2=%ldMb residual: %ldMb\n",
233 __cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
234 (total_lowmem - __cam0 - __cam1 - __cam2) >> 20);
235 __max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2;
236}
diff --git a/arch/ppc/mm/hashtable.S b/arch/ppc/mm/hashtable.S
new file mode 100644
index 000000000000..ab83132a7ed0
--- /dev/null
+++ b/arch/ppc/mm/hashtable.S
@@ -0,0 +1,642 @@
1/*
2 * arch/ppc/kernel/hashtable.S
3 *
4 * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
5 *
6 * PowerPC version
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
9 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
10 * Adapted for Power Macintosh by Paul Mackerras.
11 * Low-level exception handlers and MMU support
12 * rewritten by Paul Mackerras.
13 * Copyright (C) 1996 Paul Mackerras.
14 *
15 * This file contains low-level assembler routines for managing
16 * the PowerPC MMU hash table. (PPC 8xx processors don't use a
17 * hash table, so this file is not used on them.)
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <asm/processor.h>
28#include <asm/page.h>
29#include <asm/pgtable.h>
30#include <asm/cputable.h>
31#include <asm/ppc_asm.h>
32#include <asm/thread_info.h>
33#include <asm/offsets.h>
34
35#ifdef CONFIG_SMP
36 .comm mmu_hash_lock,4
37#endif /* CONFIG_SMP */
38
39/*
40 * Sync CPUs with hash_page taking & releasing the hash
41 * table lock
42 */
43#ifdef CONFIG_SMP
44 .text
45_GLOBAL(hash_page_sync)
46 lis r8,mmu_hash_lock@h
47 ori r8,r8,mmu_hash_lock@l
48 lis r0,0x0fff
49 b 10f
5011: lwz r6,0(r8)
51 cmpwi 0,r6,0
52 bne 11b
5310: lwarx r6,0,r8
54 cmpwi 0,r6,0
55 bne- 11b
56 stwcx. r0,0,r8
57 bne- 10b
58 isync
59 eieio
60 li r0,0
61 stw r0,0(r8)
62 blr
63#endif
64
65/*
66 * Load a PTE into the hash table, if possible.
67 * The address is in r4, and r3 contains an access flag:
68 * _PAGE_RW (0x400) if a write.
69 * r9 contains the SRR1 value, from which we use the MSR_PR bit.
70 * SPRG3 contains the physical address of the current task's thread.
71 *
72 * Returns to the caller if the access is illegal or there is no
73 * mapping for the address. Otherwise it places an appropriate PTE
74 * in the hash table and returns from the exception.
75 * Uses r0, r3 - r8, ctr, lr.
76 */
77 .text
78_GLOBAL(hash_page)
79#ifdef CONFIG_PPC64BRIDGE
80 mfmsr r0
81 clrldi r0,r0,1 /* make sure it's in 32-bit mode */
82 MTMSRD(r0)
83 isync
84#endif
85 tophys(r7,0) /* gets -KERNELBASE into r7 */
86#ifdef CONFIG_SMP
87 addis r8,r7,mmu_hash_lock@h
88 ori r8,r8,mmu_hash_lock@l
89 lis r0,0x0fff
90 b 10f
9111: lwz r6,0(r8)
92 cmpwi 0,r6,0
93 bne 11b
9410: lwarx r6,0,r8
95 cmpwi 0,r6,0
96 bne- 11b
97 stwcx. r0,0,r8
98 bne- 10b
99 isync
100#endif
101 /* Get PTE (linux-style) and check access */
102 lis r0,KERNELBASE@h /* check if kernel address */
103 cmplw 0,r4,r0
104 mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */
105 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
106 lwz r5,PGDIR(r8) /* virt page-table root */
107 blt+ 112f /* assume user more likely */
108 lis r5,swapper_pg_dir@ha /* if kernel address, use */
109 addi r5,r5,swapper_pg_dir@l /* kernel page table */
110 rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
111112: add r5,r5,r7 /* convert to phys addr */
112 rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */
113 lwz r8,0(r5) /* get pmd entry */
114 rlwinm. r8,r8,0,0,19 /* extract address of pte page */
115#ifdef CONFIG_SMP
116 beq- hash_page_out /* return if no mapping */
117#else
118 /* XXX it seems like the 601 will give a machine fault on the
119 rfi if its alignment is wrong (bottom 4 bits of address are
120 8 or 0xc) and we have had a not-taken conditional branch
121 to the address following the rfi. */
122 beqlr-
123#endif
124 rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */
125 rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */
126 ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
127
128 /*
129 * Update the linux PTE atomically. We do the lwarx up-front
130 * because almost always, there won't be a permission violation
131 * and there won't already be an HPTE, and thus we will have
132 * to update the PTE to set _PAGE_HASHPTE. -- paulus.
133 */
134retry:
135 lwarx r6,0,r8 /* get linux-style pte */
136 andc. r5,r3,r6 /* check access & ~permission */
137#ifdef CONFIG_SMP
138 bne- hash_page_out /* return if access not permitted */
139#else
140 bnelr-
141#endif
142 or r5,r0,r6 /* set accessed/dirty bits */
143 stwcx. r5,0,r8 /* attempt to update PTE */
144 bne- retry /* retry if someone got there first */
145
146 mfsrin r3,r4 /* get segment reg for segment */
147 mfctr r0
148 stw r0,_CTR(r11)
149 bl create_hpte /* add the hash table entry */
150
151/*
152 * htab_reloads counts the number of times we have to fault an
153 * HPTE into the hash table. This should only happen after a
154 * fork (because fork does a flush_tlb_mm) or a vmalloc or ioremap.
155 * Where a page is faulted into a process's address space,
156 * update_mmu_cache gets called to put the HPTE into the hash table
157 * and those are counted as preloads rather than reloads.
158 */
159 addis r8,r7,htab_reloads@ha
160 lwz r3,htab_reloads@l(r8)
161 addi r3,r3,1
162 stw r3,htab_reloads@l(r8)
163
164#ifdef CONFIG_SMP
165 eieio
166 addis r8,r7,mmu_hash_lock@ha
167 li r0,0
168 stw r0,mmu_hash_lock@l(r8)
169#endif
170
171 /* Return from the exception */
172 lwz r5,_CTR(r11)
173 mtctr r5
174 lwz r0,GPR0(r11)
175 lwz r7,GPR7(r11)
176 lwz r8,GPR8(r11)
177 b fast_exception_return
178
179#ifdef CONFIG_SMP
180hash_page_out:
181 eieio
182 addis r8,r7,mmu_hash_lock@ha
183 li r0,0
184 stw r0,mmu_hash_lock@l(r8)
185 blr
186#endif /* CONFIG_SMP */
187
188/*
189 * Add an entry for a particular page to the hash table.
190 *
191 * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
192 *
193 * We assume any necessary modifications to the pte (e.g. setting
194 * the accessed bit) have already been done and that there is actually
195 * a hash table in use (i.e. we're not on a 603).
196 */
197_GLOBAL(add_hash_page)
198 mflr r0
199 stw r0,4(r1)
200
201 /* Convert context and va to VSID */
202 mulli r3,r3,897*16 /* multiply context by context skew */
203 rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
204 mulli r0,r0,0x111 /* multiply by ESID skew */
205 add r3,r3,r0 /* note create_hpte trims to 24 bits */
206
207#ifdef CONFIG_SMP
208 rlwinm r8,r1,0,0,18 /* use cpu number to make tag */
209 lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
210 oris r8,r8,12
211#endif /* CONFIG_SMP */
212
213 /*
214 * We disable interrupts here, even on UP, because we don't
215 * want to race with hash_page, and because we want the
216 * _PAGE_HASHPTE bit to be a reliable indication of whether
217 * the HPTE exists (or at least whether one did once).
218 * We also turn off the MMU for data accesses so that we
219 * we can't take a hash table miss (assuming the code is
220 * covered by a BAT). -- paulus
221 */
222 mfmsr r10
223 SYNC
224 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
225 rlwinm r0,r0,0,28,26 /* clear MSR_DR */
226 mtmsr r0
227 SYNC_601
228 isync
229
230 tophys(r7,0)
231
232#ifdef CONFIG_SMP
233 addis r9,r7,mmu_hash_lock@ha
234 addi r9,r9,mmu_hash_lock@l
23510: lwarx r0,0,r9 /* take the mmu_hash_lock */
236 cmpi 0,r0,0
237 bne- 11f
238 stwcx. r8,0,r9
239 beq+ 12f
24011: lwz r0,0(r9)
241 cmpi 0,r0,0
242 beq 10b
243 b 11b
24412: isync
245#endif
246
247 /*
248 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
249 * If _PAGE_HASHPTE was already set, we don't replace the existing
250 * HPTE, so we just unlock and return.
251 */
252 mr r8,r5
253 rlwimi r8,r4,22,20,29
2541: lwarx r6,0,r8
255 andi. r0,r6,_PAGE_HASHPTE
256 bne 9f /* if HASHPTE already set, done */
257 ori r5,r6,_PAGE_HASHPTE
258 stwcx. r5,0,r8
259 bne- 1b
260
261 bl create_hpte
262
263 addis r8,r7,htab_preloads@ha
264 lwz r3,htab_preloads@l(r8)
265 addi r3,r3,1
266 stw r3,htab_preloads@l(r8)
267
2689:
269#ifdef CONFIG_SMP
270 eieio
271 li r0,0
272 stw r0,0(r9) /* clear mmu_hash_lock */
273#endif
274
275 /* reenable interrupts and DR */
276 mtmsr r10
277 SYNC_601
278 isync
279
280 lwz r0,4(r1)
281 mtlr r0
282 blr
283
284/*
285 * This routine adds a hardware PTE to the hash table.
286 * It is designed to be called with the MMU either on or off.
287 * r3 contains the VSID, r4 contains the virtual address,
288 * r5 contains the linux PTE, r6 contains the old value of the
289 * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
290 * offset to be added to addresses (0 if the MMU is on,
291 * -KERNELBASE if it is off).
292 * On SMP, the caller should have the mmu_hash_lock held.
293 * We assume that the caller has (or will) set the _PAGE_HASHPTE
294 * bit in the linux PTE in memory. The value passed in r6 should
295 * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
296 * this routine will skip the search for an existing HPTE.
297 * This procedure modifies r0, r3 - r6, r8, cr0.
298 * -- paulus.
299 *
300 * For speed, 4 of the instructions get patched once the size and
301 * physical address of the hash table are known. These definitions
302 * of Hash_base and Hash_bits below are just an example.
303 */
304Hash_base = 0xc0180000
305Hash_bits = 12 /* e.g. 256kB hash table */
306Hash_msk = (((1 << Hash_bits) - 1) * 64)
307
308#ifndef CONFIG_PPC64BRIDGE
309/* defines for the PTE format for 32-bit PPCs */
310#define PTE_SIZE 8
311#define PTEG_SIZE 64
312#define LG_PTEG_SIZE 6
313#define LDPTEu lwzu
314#define STPTE stw
315#define CMPPTE cmpw
316#define PTE_H 0x40
317#define PTE_V 0x80000000
318#define TST_V(r) rlwinm. r,r,0,0,0
319#define SET_V(r) oris r,r,PTE_V@h
320#define CLR_V(r,t) rlwinm r,r,0,1,31
321
322#else
323/* defines for the PTE format for 64-bit PPCs */
324#define PTE_SIZE 16
325#define PTEG_SIZE 128
326#define LG_PTEG_SIZE 7
327#define LDPTEu ldu
328#define STPTE std
329#define CMPPTE cmpd
330#define PTE_H 2
331#define PTE_V 1
332#define TST_V(r) andi. r,r,PTE_V
333#define SET_V(r) ori r,r,PTE_V
334#define CLR_V(r,t) li t,PTE_V; andc r,r,t
335#endif /* CONFIG_PPC64BRIDGE */
336
337#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1)
338#define HASH_RIGHT 31-LG_PTEG_SIZE
339
340_GLOBAL(create_hpte)
341 /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
342 rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
343 rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
344 and r8,r8,r0 /* writable if _RW & _DIRTY */
345 rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
346 rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
347 ori r8,r8,0xe14 /* clear out reserved bits and M */
348 andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
349BEGIN_FTR_SECTION
350 ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */
351END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
352
353 /* Construct the high word of the PPC-style PTE (r5) */
354#ifndef CONFIG_PPC64BRIDGE
355 rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
356 rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
357#else /* CONFIG_PPC64BRIDGE */
358 clrlwi r3,r3,8 /* reduce vsid to 24 bits */
359 sldi r5,r3,12 /* shift vsid into position */
360 rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */
361#endif /* CONFIG_PPC64BRIDGE */
362 SET_V(r5) /* set V (valid) bit */
363
364 /* Get the address of the primary PTE group in the hash table (r3) */
365_GLOBAL(hash_page_patch_A)
366 addis r0,r7,Hash_base@h /* base address of hash table */
367 rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
368 rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
369 xor r3,r3,r0 /* make primary hash */
370 li r0,8 /* PTEs/group */
371
372 /*
373 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
374 * if it is clear, meaning that the HPTE isn't there already...
375 */
376 andi. r6,r6,_PAGE_HASHPTE
377 beq+ 10f /* no PTE: go look for an empty slot */
378 tlbie r4
379
380 addis r4,r7,htab_hash_searches@ha
381 lwz r6,htab_hash_searches@l(r4)
382 addi r6,r6,1 /* count how many searches we do */
383 stw r6,htab_hash_searches@l(r4)
384
385 /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
386 mtctr r0
387 addi r4,r3,-PTE_SIZE
3881: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */
389 CMPPTE 0,r6,r5
390 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
391 beq+ found_slot
392
393 /* Search the secondary PTEG for a matching PTE */
394 ori r5,r5,PTE_H /* set H (secondary hash) bit */
395_GLOBAL(hash_page_patch_B)
396 xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
397 xori r4,r4,(-PTEG_SIZE & 0xffff)
398 addi r4,r4,-PTE_SIZE
399 mtctr r0
4002: LDPTEu r6,PTE_SIZE(r4)
401 CMPPTE 0,r6,r5
402 bdnzf 2,2b
403 beq+ found_slot
404 xori r5,r5,PTE_H /* clear H bit again */
405
406 /* Search the primary PTEG for an empty slot */
40710: mtctr r0
408 addi r4,r3,-PTE_SIZE /* search primary PTEG */
4091: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */
410 TST_V(r6) /* test valid bit */
411 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
412 beq+ found_empty
413
414 /* update counter of times that the primary PTEG is full */
415 addis r4,r7,primary_pteg_full@ha
416 lwz r6,primary_pteg_full@l(r4)
417 addi r6,r6,1
418 stw r6,primary_pteg_full@l(r4)
419
420 /* Search the secondary PTEG for an empty slot */
421 ori r5,r5,PTE_H /* set H (secondary hash) bit */
422_GLOBAL(hash_page_patch_C)
423 xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
424 xori r4,r4,(-PTEG_SIZE & 0xffff)
425 addi r4,r4,-PTE_SIZE
426 mtctr r0
4272: LDPTEu r6,PTE_SIZE(r4)
428 TST_V(r6)
429 bdnzf 2,2b
430 beq+ found_empty
431 xori r5,r5,PTE_H /* clear H bit again */
432
433 /*
434 * Choose an arbitrary slot in the primary PTEG to overwrite.
435 * Since both the primary and secondary PTEGs are full, and we
436 * have no information that the PTEs in the primary PTEG are
437 * more important or useful than those in the secondary PTEG,
438 * and we know there is a definite (although small) speed
439 * advantage to putting the PTE in the primary PTEG, we always
440 * put the PTE in the primary PTEG.
441 */
442 addis r4,r7,next_slot@ha
443 lwz r6,next_slot@l(r4)
444 addi r6,r6,PTE_SIZE
445 andi. r6,r6,7*PTE_SIZE
446 stw r6,next_slot@l(r4)
447 add r4,r3,r6
448
449 /* update counter of evicted pages */
450 addis r6,r7,htab_evicts@ha
451 lwz r3,htab_evicts@l(r6)
452 addi r3,r3,1
453 stw r3,htab_evicts@l(r6)
454
455#ifndef CONFIG_SMP
456 /* Store PTE in PTEG */
457found_empty:
458 STPTE r5,0(r4)
459found_slot:
460 STPTE r8,PTE_SIZE/2(r4)
461
462#else /* CONFIG_SMP */
463/*
464 * Between the tlbie above and updating the hash table entry below,
465 * another CPU could read the hash table entry and put it in its TLB.
466 * There are 3 cases:
467 * 1. using an empty slot
468 * 2. updating an earlier entry to change permissions (i.e. enable write)
469 * 3. taking over the PTE for an unrelated address
470 *
471 * In each case it doesn't really matter if the other CPUs have the old
472 * PTE in their TLB. So we don't need to bother with another tlbie here,
473 * which is convenient as we've overwritten the register that had the
474 * address. :-) The tlbie above is mainly to make sure that this CPU comes
475 * and gets the new PTE from the hash table.
476 *
477 * We do however have to make sure that the PTE is never in an invalid
478 * state with the V bit set.
479 */
480found_empty:
481found_slot:
482 CLR_V(r5,r0) /* clear V (valid) bit in PTE */
483 STPTE r5,0(r4)
484 sync
485 TLBSYNC
486 STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
487 sync
488 SET_V(r5)
489 STPTE r5,0(r4) /* finally set V bit in PTE */
490#endif /* CONFIG_SMP */
491
492 sync /* make sure pte updates get to memory */
493 blr
494
495 .comm next_slot,4
496 .comm primary_pteg_full,4
497 .comm htab_hash_searches,4
498
499/*
500 * Flush the entry for a particular page from the hash table.
501 *
502 * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
503 * int count)
504 *
505 * We assume that there is a hash table in use (Hash != 0).
506 */
507_GLOBAL(flush_hash_pages)
508 tophys(r7,0)
509
510 /*
511 * We disable interrupts here, even on UP, because we want
512 * the _PAGE_HASHPTE bit to be a reliable indication of
513 * whether the HPTE exists (or at least whether one did once).
514 * We also turn off the MMU for data accesses so that we
515 * we can't take a hash table miss (assuming the code is
516 * covered by a BAT). -- paulus
517 */
518 mfmsr r10
519 SYNC
520 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
521 rlwinm r0,r0,0,28,26 /* clear MSR_DR */
522 mtmsr r0
523 SYNC_601
524 isync
525
526 /* First find a PTE in the range that has _PAGE_HASHPTE set */
527 rlwimi r5,r4,22,20,29
5281: lwz r0,0(r5)
529 cmpwi cr1,r6,1
530 andi. r0,r0,_PAGE_HASHPTE
531 bne 2f
532 ble cr1,19f
533 addi r4,r4,0x1000
534 addi r5,r5,4
535 addi r6,r6,-1
536 b 1b
537
538 /* Convert context and va to VSID */
5392: mulli r3,r3,897*16 /* multiply context by context skew */
540 rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
541 mulli r0,r0,0x111 /* multiply by ESID skew */
542 add r3,r3,r0 /* note code below trims to 24 bits */
543
544 /* Construct the high word of the PPC-style PTE (r11) */
545#ifndef CONFIG_PPC64BRIDGE
546 rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
547 rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */
548#else /* CONFIG_PPC64BRIDGE */
549 clrlwi r3,r3,8 /* reduce vsid to 24 bits */
550 sldi r11,r3,12 /* shift vsid into position */
551 rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */
552#endif /* CONFIG_PPC64BRIDGE */
553 SET_V(r11) /* set V (valid) bit */
554
555#ifdef CONFIG_SMP
556 addis r9,r7,mmu_hash_lock@ha
557 addi r9,r9,mmu_hash_lock@l
558 rlwinm r8,r1,0,0,18
559 add r8,r8,r7
560 lwz r8,TI_CPU(r8)
561 oris r8,r8,9
56210: lwarx r0,0,r9
563 cmpi 0,r0,0
564 bne- 11f
565 stwcx. r8,0,r9
566 beq+ 12f
56711: lwz r0,0(r9)
568 cmpi 0,r0,0
569 beq 10b
570 b 11b
57112: isync
572#endif
573
574 /*
575 * Check the _PAGE_HASHPTE bit in the linux PTE. If it is
576 * already clear, we're done (for this pte). If not,
577 * clear it (atomically) and proceed. -- paulus.
578 */
57933: lwarx r8,0,r5 /* fetch the pte */
580 andi. r0,r8,_PAGE_HASHPTE
581 beq 8f /* done if HASHPTE is already clear */
582 rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
583 stwcx. r8,0,r5 /* update the pte */
584 bne- 33b
585
586 /* Get the address of the primary PTE group in the hash table (r3) */
587_GLOBAL(flush_hash_patch_A)
588 addis r8,r7,Hash_base@h /* base address of hash table */
589 rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
590 rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
591 xor r8,r0,r8 /* make primary hash */
592
593 /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
594 li r0,8 /* PTEs/group */
595 mtctr r0
596 addi r12,r8,-PTE_SIZE
5971: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */
598 CMPPTE 0,r0,r11
599 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
600 beq+ 3f
601
602 /* Search the secondary PTEG for a matching PTE */
603 ori r11,r11,PTE_H /* set H (secondary hash) bit */
604 li r0,8 /* PTEs/group */
605_GLOBAL(flush_hash_patch_B)
606 xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
607 xori r12,r12,(-PTEG_SIZE & 0xffff)
608 addi r12,r12,-PTE_SIZE
609 mtctr r0
6102: LDPTEu r0,PTE_SIZE(r12)
611 CMPPTE 0,r0,r11
612 bdnzf 2,2b
613 xori r11,r11,PTE_H /* clear H again */
614 bne- 4f /* should rarely fail to find it */
615
6163: li r0,0
617 STPTE r0,0(r12) /* invalidate entry */
6184: sync
619 tlbie r4 /* in hw tlb too */
620 sync
621
6228: ble cr1,9f /* if all ptes checked */
62381: addi r6,r6,-1
624 addi r5,r5,4 /* advance to next pte */
625 addi r4,r4,0x1000
626 lwz r0,0(r5) /* check next pte */
627 cmpwi cr1,r6,1
628 andi. r0,r0,_PAGE_HASHPTE
629 bne 33b
630 bgt cr1,81b
631
6329:
633#ifdef CONFIG_SMP
634 TLBSYNC
635 li r0,0
636 stw r0,0(r9) /* clear mmu_hash_lock */
637#endif
638
63919: mtmsr r10
640 SYNC_601
641 isync
642 blr
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
new file mode 100644
index 000000000000..be02a7fec2b7
--- /dev/null
+++ b/arch/ppc/mm/init.c
@@ -0,0 +1,667 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
10 *
11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/errno.h>
26#include <linux/string.h>
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/stddef.h>
30#include <linux/init.h>
31#include <linux/bootmem.h>
32#include <linux/highmem.h>
33#include <linux/initrd.h>
34#include <linux/pagemap.h>
35
36#include <asm/pgalloc.h>
37#include <asm/prom.h>
38#include <asm/io.h>
39#include <asm/mmu_context.h>
40#include <asm/pgtable.h>
41#include <asm/mmu.h>
42#include <asm/smp.h>
43#include <asm/machdep.h>
44#include <asm/btext.h>
45#include <asm/tlb.h>
46#include <asm/bootinfo.h>
47
48#include "mem_pieces.h"
49#include "mmu_decl.h"
50
51#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
52/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
53#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
54#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
55#endif
56#endif
57#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
58
59DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
60
61unsigned long total_memory;
62unsigned long total_lowmem;
63
64unsigned long ppc_memstart;
65unsigned long ppc_memoffset = PAGE_OFFSET;
66
67int mem_init_done;
68int init_bootmem_done;
69int boot_mapsize;
70#ifdef CONFIG_PPC_PMAC
71unsigned long agp_special_page;
72#endif
73
74extern char _end[];
75extern char etext[], _stext[];
76extern char __init_begin, __init_end;
77extern char __prep_begin, __prep_end;
78extern char __chrp_begin, __chrp_end;
79extern char __pmac_begin, __pmac_end;
80extern char __openfirmware_begin, __openfirmware_end;
81
82#ifdef CONFIG_HIGHMEM
83pte_t *kmap_pte;
84pgprot_t kmap_prot;
85
86EXPORT_SYMBOL(kmap_prot);
87EXPORT_SYMBOL(kmap_pte);
88#endif
89
90void MMU_init(void);
91void set_phys_avail(unsigned long total_ram);
92
93/* XXX should be in current.h -- paulus */
94extern struct task_struct *current_set[NR_CPUS];
95
96char *klimit = _end;
97struct mem_pieces phys_avail;
98
99extern char *sysmap;
100extern unsigned long sysmap_size;
101
102/*
103 * this tells the system to map all of ram with the segregs
104 * (i.e. page tables) instead of the bats.
105 * -- Cort
106 */
107int __map_without_bats;
108int __map_without_ltlbs;
109
110/* max amount of RAM to use */
111unsigned long __max_memory;
112/* max amount of low RAM to map in */
113unsigned long __max_low_memory = MAX_LOW_MEM;
114
115void show_mem(void)
116{
117 int i,free = 0,total = 0,reserved = 0;
118 int shared = 0, cached = 0;
119 int highmem = 0;
120
121 printk("Mem-info:\n");
122 show_free_areas();
123 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
124 i = max_mapnr;
125 while (i-- > 0) {
126 total++;
127 if (PageHighMem(mem_map+i))
128 highmem++;
129 if (PageReserved(mem_map+i))
130 reserved++;
131 else if (PageSwapCache(mem_map+i))
132 cached++;
133 else if (!page_count(mem_map+i))
134 free++;
135 else
136 shared += page_count(mem_map+i) - 1;
137 }
138 printk("%d pages of RAM\n",total);
139 printk("%d pages of HIGHMEM\n", highmem);
140 printk("%d free pages\n",free);
141 printk("%d reserved pages\n",reserved);
142 printk("%d pages shared\n",shared);
143 printk("%d pages swap cached\n",cached);
144}
145
146/* Free up now-unused memory */
147static void free_sec(unsigned long start, unsigned long end, const char *name)
148{
149 unsigned long cnt = 0;
150
151 while (start < end) {
152 ClearPageReserved(virt_to_page(start));
153 set_page_count(virt_to_page(start), 1);
154 free_page(start);
155 cnt++;
156 start += PAGE_SIZE;
157 }
158 if (cnt) {
159 printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
160 totalram_pages += cnt;
161 }
162}
163
164void free_initmem(void)
165{
166#define FREESEC(TYPE) \
167 free_sec((unsigned long)(&__ ## TYPE ## _begin), \
168 (unsigned long)(&__ ## TYPE ## _end), \
169 #TYPE);
170
171 printk ("Freeing unused kernel memory:");
172 FREESEC(init);
173 if (_machine != _MACH_Pmac)
174 FREESEC(pmac);
175 if (_machine != _MACH_chrp)
176 FREESEC(chrp);
177 if (_machine != _MACH_prep)
178 FREESEC(prep);
179 if (!have_of)
180 FREESEC(openfirmware);
181 printk("\n");
182#undef FREESEC
183}
184
185#ifdef CONFIG_BLK_DEV_INITRD
186void free_initrd_mem(unsigned long start, unsigned long end)
187{
188 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
189
190 for (; start < end; start += PAGE_SIZE) {
191 ClearPageReserved(virt_to_page(start));
192 set_page_count(virt_to_page(start), 1);
193 free_page(start);
194 totalram_pages++;
195 }
196}
197#endif
198
199/*
200 * Check for command-line options that affect what MMU_init will do.
201 */
202void MMU_setup(void)
203{
204 /* Check for nobats option (used in mapin_ram). */
205 if (strstr(cmd_line, "nobats")) {
206 __map_without_bats = 1;
207 }
208
209 if (strstr(cmd_line, "noltlbs")) {
210 __map_without_ltlbs = 1;
211 }
212
213 /* Look for mem= option on command line */
214 if (strstr(cmd_line, "mem=")) {
215 char *p, *q;
216 unsigned long maxmem = 0;
217
218 for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
219 q = p + 4;
220 if (p > cmd_line && p[-1] != ' ')
221 continue;
222 maxmem = simple_strtoul(q, &q, 0);
223 if (*q == 'k' || *q == 'K') {
224 maxmem <<= 10;
225 ++q;
226 } else if (*q == 'm' || *q == 'M') {
227 maxmem <<= 20;
228 ++q;
229 }
230 }
231 __max_memory = maxmem;
232 }
233}
234
235/*
236 * MMU_init sets up the basic memory mappings for the kernel,
237 * including both RAM and possibly some I/O regions,
238 * and sets up the page tables and the MMU hardware ready to go.
239 */
240void __init MMU_init(void)
241{
242 if (ppc_md.progress)
243 ppc_md.progress("MMU:enter", 0x111);
244
245 /* parse args from command line */
246 MMU_setup();
247
248 /*
249 * Figure out how much memory we have, how much
250 * is lowmem, and how much is highmem. If we were
251 * passed the total memory size from the bootloader,
252 * just use it.
253 */
254 if (boot_mem_size)
255 total_memory = boot_mem_size;
256 else
257 total_memory = ppc_md.find_end_of_memory();
258
259 if (__max_memory && total_memory > __max_memory)
260 total_memory = __max_memory;
261 total_lowmem = total_memory;
262#ifdef CONFIG_FSL_BOOKE
263 /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
264 * entries, so we need to adjust lowmem to match the amount we can map
265 * in the fixed entries */
266 adjust_total_lowmem();
267#endif /* CONFIG_FSL_BOOKE */
268 if (total_lowmem > __max_low_memory) {
269 total_lowmem = __max_low_memory;
270#ifndef CONFIG_HIGHMEM
271 total_memory = total_lowmem;
272#endif /* CONFIG_HIGHMEM */
273 }
274 set_phys_avail(total_lowmem);
275
276 /* Initialize the MMU hardware */
277 if (ppc_md.progress)
278 ppc_md.progress("MMU:hw init", 0x300);
279 MMU_init_hw();
280
281 /* Map in all of RAM starting at KERNELBASE */
282 if (ppc_md.progress)
283 ppc_md.progress("MMU:mapin", 0x301);
284 mapin_ram();
285
286#ifdef CONFIG_HIGHMEM
287 ioremap_base = PKMAP_BASE;
288#else
289 ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */
290#endif /* CONFIG_HIGHMEM */
291 ioremap_bot = ioremap_base;
292
293 /* Map in I/O resources */
294 if (ppc_md.progress)
295 ppc_md.progress("MMU:setio", 0x302);
296 if (ppc_md.setup_io_mappings)
297 ppc_md.setup_io_mappings();
298
299 /* Initialize the context management stuff */
300 mmu_context_init();
301
302 if (ppc_md.progress)
303 ppc_md.progress("MMU:exit", 0x211);
304
305#ifdef CONFIG_BOOTX_TEXT
306 /* By default, we are no longer mapped */
307 boot_text_mapped = 0;
308 /* Must be done last, or ppc_md.progress will die. */
309 map_boot_text();
310#endif
311}
312
313/* This is only called until mem_init is done. */
314void __init *early_get_page(void)
315{
316 void *p;
317
318 if (init_bootmem_done) {
319 p = alloc_bootmem_pages(PAGE_SIZE);
320 } else {
321 p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
322 }
323 return p;
324}
325
326/*
327 * Initialize the bootmem system and give it all the memory we
328 * have available.
329 */
330void __init do_init_bootmem(void)
331{
332 unsigned long start, size;
333 int i;
334
335 /*
336 * Find an area to use for the bootmem bitmap.
337 * We look for the first area which is at least
338 * 128kB in length (128kB is enough for a bitmap
339 * for 4GB of memory, using 4kB pages), plus 1 page
340 * (in case the address isn't page-aligned).
341 */
342 start = 0;
343 size = 0;
344 for (i = 0; i < phys_avail.n_regions; ++i) {
345 unsigned long a = phys_avail.regions[i].address;
346 unsigned long s = phys_avail.regions[i].size;
347 if (s <= size)
348 continue;
349 start = a;
350 size = s;
351 if (s >= 33 * PAGE_SIZE)
352 break;
353 }
354 start = PAGE_ALIGN(start);
355
356 min_low_pfn = start >> PAGE_SHIFT;
357 max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
358 max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
359 boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
360 PPC_MEMSTART >> PAGE_SHIFT,
361 max_low_pfn);
362
363 /* remove the bootmem bitmap from the available memory */
364 mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
365
366 /* add everything in phys_avail into the bootmem map */
367 for (i = 0; i < phys_avail.n_regions; ++i)
368 free_bootmem(phys_avail.regions[i].address,
369 phys_avail.regions[i].size);
370
371 init_bootmem_done = 1;
372}
373
374/*
375 * paging_init() sets up the page tables - in fact we've already done this.
376 */
377void __init paging_init(void)
378{
379 unsigned long zones_size[MAX_NR_ZONES], i;
380
381#ifdef CONFIG_HIGHMEM
382 map_page(PKMAP_BASE, 0, 0); /* XXX gross */
383 pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
384 (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
385 map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */
386 kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
387 (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
388 kmap_prot = PAGE_KERNEL;
389#endif /* CONFIG_HIGHMEM */
390
391 /*
392 * All pages are DMA-able so we put them all in the DMA zone.
393 */
394 zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
395 for (i = 1; i < MAX_NR_ZONES; i++)
396 zones_size[i] = 0;
397
398#ifdef CONFIG_HIGHMEM
399 zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
400#endif /* CONFIG_HIGHMEM */
401
402 free_area_init(zones_size);
403}
404
405void __init mem_init(void)
406{
407 unsigned long addr;
408 int codepages = 0;
409 int datapages = 0;
410 int initpages = 0;
411#ifdef CONFIG_HIGHMEM
412 unsigned long highmem_mapnr;
413
414 highmem_mapnr = total_lowmem >> PAGE_SHIFT;
415#endif /* CONFIG_HIGHMEM */
416 max_mapnr = total_memory >> PAGE_SHIFT;
417
418 high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
419 num_physpages = max_mapnr; /* RAM is assumed contiguous */
420
421 totalram_pages += free_all_bootmem();
422
423#ifdef CONFIG_BLK_DEV_INITRD
424 /* if we are booted from BootX with an initial ramdisk,
425 make sure the ramdisk pages aren't reserved. */
426 if (initrd_start) {
427 for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
428 ClearPageReserved(virt_to_page(addr));
429 }
430#endif /* CONFIG_BLK_DEV_INITRD */
431
432#ifdef CONFIG_PPC_OF
433 /* mark the RTAS pages as reserved */
434 if ( rtas_data )
435 for (addr = (ulong)__va(rtas_data);
436 addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
437 addr += PAGE_SIZE)
438 SetPageReserved(virt_to_page(addr));
439#endif
440#ifdef CONFIG_PPC_PMAC
441 if (agp_special_page)
442 SetPageReserved(virt_to_page(agp_special_page));
443#endif
444 if ( sysmap )
445 for (addr = (unsigned long)sysmap;
446 addr < PAGE_ALIGN((unsigned long)sysmap+sysmap_size) ;
447 addr += PAGE_SIZE)
448 SetPageReserved(virt_to_page(addr));
449
450 for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
451 addr += PAGE_SIZE) {
452 if (!PageReserved(virt_to_page(addr)))
453 continue;
454 if (addr < (ulong) etext)
455 codepages++;
456 else if (addr >= (unsigned long)&__init_begin
457 && addr < (unsigned long)&__init_end)
458 initpages++;
459 else if (addr < (ulong) klimit)
460 datapages++;
461 }
462
463#ifdef CONFIG_HIGHMEM
464 {
465 unsigned long pfn;
466
467 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
468 struct page *page = mem_map + pfn;
469
470 ClearPageReserved(page);
471 set_bit(PG_highmem, &page->flags);
472 set_page_count(page, 1);
473 __free_page(page);
474 totalhigh_pages++;
475 }
476 totalram_pages += totalhigh_pages;
477 }
478#endif /* CONFIG_HIGHMEM */
479
480 printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
481 (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
482 codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
483 initpages<< (PAGE_SHIFT-10),
484 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
485 if (sysmap)
486 printk("System.map loaded at 0x%08x for debugger, size: %ld bytes\n",
487 (unsigned int)sysmap, sysmap_size);
488#ifdef CONFIG_PPC_PMAC
489 if (agp_special_page)
490 printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
491#endif
492
493 mem_init_done = 1;
494}
495
496/*
497 * Set phys_avail to the amount of physical memory,
498 * less the kernel text/data/bss.
499 */
500void __init
501set_phys_avail(unsigned long total_memory)
502{
503 unsigned long kstart, ksize;
504
505 /*
506 * Initially, available physical memory is equivalent to all
507 * physical memory.
508 */
509
510 phys_avail.regions[0].address = PPC_MEMSTART;
511 phys_avail.regions[0].size = total_memory;
512 phys_avail.n_regions = 1;
513
514 /*
515 * Map out the kernel text/data/bss from the available physical
516 * memory.
517 */
518
519 kstart = __pa(_stext); /* should be 0 */
520 ksize = PAGE_ALIGN(klimit - _stext);
521
522 mem_pieces_remove(&phys_avail, kstart, ksize, 0);
523 mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
524
525#if defined(CONFIG_BLK_DEV_INITRD)
526 /* Remove the init RAM disk from the available memory. */
527 if (initrd_start) {
528 mem_pieces_remove(&phys_avail, __pa(initrd_start),
529 initrd_end - initrd_start, 1);
530 }
531#endif /* CONFIG_BLK_DEV_INITRD */
532#ifdef CONFIG_PPC_OF
533 /* remove the RTAS pages from the available memory */
534 if (rtas_data)
535 mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
536#endif
537 /* remove the sysmap pages from the available memory */
538 if (sysmap)
539 mem_pieces_remove(&phys_avail, __pa(sysmap), sysmap_size, 1);
540#ifdef CONFIG_PPC_PMAC
541 /* Because of some uninorth weirdness, we need a page of
542 * memory as high as possible (it must be outside of the
543 * bus address seen as the AGP aperture). It will be used
544 * by the r128 DRM driver
545 *
546 * FIXME: We need to make sure that page doesn't overlap any of the\
547 * above. This could be done by improving mem_pieces_find to be able
548 * to do a backward search from the end of the list.
549 */
550 if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
551 agp_special_page = (total_memory - PAGE_SIZE);
552 mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
553 agp_special_page = (unsigned long)__va(agp_special_page);
554 }
555#endif /* CONFIG_PPC_PMAC */
556}
557
558/* Mark some memory as reserved by removing it from phys_avail. */
559void __init reserve_phys_mem(unsigned long start, unsigned long size)
560{
561 mem_pieces_remove(&phys_avail, start, size, 1);
562}
563
564/*
565 * This is called when a page has been modified by the kernel.
566 * It just marks the page as not i-cache clean. We do the i-cache
567 * flush later when the page is given to a user process, if necessary.
568 */
569void flush_dcache_page(struct page *page)
570{
571 clear_bit(PG_arch_1, &page->flags);
572}
573
574void flush_dcache_icache_page(struct page *page)
575{
576#ifdef CONFIG_BOOKE
577 __flush_dcache_icache(kmap(page));
578 kunmap(page);
579#else
580 __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
581#endif
582
583}
584void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
585{
586 clear_page(page);
587 clear_bit(PG_arch_1, &pg->flags);
588}
589
590void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
591 struct page *pg)
592{
593 copy_page(vto, vfrom);
594 clear_bit(PG_arch_1, &pg->flags);
595}
596
597void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
598 unsigned long addr, int len)
599{
600 unsigned long maddr;
601
602 maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
603 flush_icache_range(maddr, maddr + len);
604 kunmap(page);
605}
606
607/*
608 * This is called at the end of handling a user page fault, when the
609 * fault has been handled by updating a PTE in the linux page tables.
610 * We use it to preload an HPTE into the hash table corresponding to
611 * the updated linux PTE.
612 */
613void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
614 pte_t pte)
615{
616 /* handle i-cache coherency */
617 unsigned long pfn = pte_pfn(pte);
618
619 if (pfn_valid(pfn)) {
620 struct page *page = pfn_to_page(pfn);
621 if (!PageReserved(page)
622 && !test_bit(PG_arch_1, &page->flags)) {
623 if (vma->vm_mm == current->active_mm)
624 __flush_dcache_icache((void *) address);
625 else
626 flush_dcache_icache_page(page);
627 set_bit(PG_arch_1, &page->flags);
628 }
629 }
630
631#ifdef CONFIG_PPC_STD_MMU
632 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
633 if (Hash != 0 && pte_young(pte)) {
634 struct mm_struct *mm;
635 pmd_t *pmd;
636
637 mm = (address < TASK_SIZE)? vma->vm_mm: &init_mm;
638 pmd = pmd_offset(pgd_offset(mm, address), address);
639 if (!pmd_none(*pmd))
640 add_hash_page(mm->context, address, pmd_val(*pmd));
641 }
642#endif
643}
644
645/*
646 * This is called by /dev/mem to know if a given address has to
647 * be mapped non-cacheable or not
648 */
649int page_is_ram(unsigned long pfn)
650{
651 unsigned long paddr = (pfn << PAGE_SHIFT);
652
653 return paddr < __pa(high_memory);
654}
655
656pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
657 unsigned long size, pgprot_t vma_prot)
658{
659 if (ppc_md.phys_mem_access_prot)
660 return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
661
662 if (!page_is_ram(addr >> PAGE_SHIFT))
663 vma_prot = __pgprot(pgprot_val(vma_prot)
664 | _PAGE_GUARDED | _PAGE_NO_CACHE);
665 return vma_prot;
666}
667EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/ppc/mm/mem_pieces.c b/arch/ppc/mm/mem_pieces.c
new file mode 100644
index 000000000000..3d639052017e
--- /dev/null
+++ b/arch/ppc/mm/mem_pieces.c
@@ -0,0 +1,163 @@
1/*
2 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
3 * Changes to accommodate Power Macintoshes.
4 * Cort Dougan <cort@cs.nmt.edu>
5 * Rewrites.
6 * Grant Erickson <grant@lcse.umn.edu>
7 * General rework and split from mm/init.c.
8 *
9 * Module name: mem_pieces.c
10 *
11 * Description:
12 * Routines and data structures for manipulating and representing
13 * phyiscal memory extents (i.e. address/length pairs).
14 *
15 */
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/stddef.h>
20#include <linux/init.h>
21#include <asm/page.h>
22
23#include "mem_pieces.h"
24
25extern struct mem_pieces phys_avail;
26
27static void mem_pieces_print(struct mem_pieces *);
28
29/*
30 * Scan a region for a piece of a given size with the required alignment.
31 */
32void __init *
33mem_pieces_find(unsigned int size, unsigned int align)
34{
35 int i;
36 unsigned a, e;
37 struct mem_pieces *mp = &phys_avail;
38
39 for (i = 0; i < mp->n_regions; ++i) {
40 a = mp->regions[i].address;
41 e = a + mp->regions[i].size;
42 a = (a + align - 1) & -align;
43 if (a + size <= e) {
44 mem_pieces_remove(mp, a, size, 1);
45 return (void *) __va(a);
46 }
47 }
48 panic("Couldn't find %u bytes at %u alignment\n", size, align);
49
50 return NULL;
51}
52
53/*
54 * Remove some memory from an array of pieces
55 */
56void __init
57mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
58 int must_exist)
59{
60 int i, j;
61 unsigned int end, rs, re;
62 struct reg_property *rp;
63
64 end = start + size;
65 for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
66 if (end > rp->address && start < rp->address + rp->size)
67 break;
68 }
69 if (i >= mp->n_regions) {
70 if (must_exist)
71 printk("mem_pieces_remove: [%x,%x) not in any region\n",
72 start, end);
73 return;
74 }
75 for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
76 rs = rp->address;
77 re = rs + rp->size;
78 if (must_exist && (start < rs || end > re)) {
79 printk("mem_pieces_remove: bad overlap [%x,%x) with",
80 start, end);
81 mem_pieces_print(mp);
82 must_exist = 0;
83 }
84 if (start > rs) {
85 rp->size = start - rs;
86 if (end < re) {
87 /* need to split this entry */
88 if (mp->n_regions >= MEM_PIECES_MAX)
89 panic("eek... mem_pieces overflow");
90 for (j = mp->n_regions; j > i + 1; --j)
91 mp->regions[j] = mp->regions[j-1];
92 ++mp->n_regions;
93 rp[1].address = end;
94 rp[1].size = re - end;
95 }
96 } else {
97 if (end < re) {
98 rp->address = end;
99 rp->size = re - end;
100 } else {
101 /* need to delete this entry */
102 for (j = i; j < mp->n_regions - 1; ++j)
103 mp->regions[j] = mp->regions[j+1];
104 --mp->n_regions;
105 --i;
106 --rp;
107 }
108 }
109 }
110}
111
112static void __init
113mem_pieces_print(struct mem_pieces *mp)
114{
115 int i;
116
117 for (i = 0; i < mp->n_regions; ++i)
118 printk(" [%x, %x)", mp->regions[i].address,
119 mp->regions[i].address + mp->regions[i].size);
120 printk("\n");
121}
122
123void __init
124mem_pieces_sort(struct mem_pieces *mp)
125{
126 unsigned long a, s;
127 int i, j;
128
129 for (i = 1; i < mp->n_regions; ++i) {
130 a = mp->regions[i].address;
131 s = mp->regions[i].size;
132 for (j = i - 1; j >= 0; --j) {
133 if (a >= mp->regions[j].address)
134 break;
135 mp->regions[j+1] = mp->regions[j];
136 }
137 mp->regions[j+1].address = a;
138 mp->regions[j+1].size = s;
139 }
140}
141
142void __init
143mem_pieces_coalesce(struct mem_pieces *mp)
144{
145 unsigned long a, s, ns;
146 int i, j, d;
147
148 d = 0;
149 for (i = 0; i < mp->n_regions; i = j) {
150 a = mp->regions[i].address;
151 s = mp->regions[i].size;
152 for (j = i + 1; j < mp->n_regions
153 && mp->regions[j].address - a <= s; ++j) {
154 ns = mp->regions[j].address + mp->regions[j].size - a;
155 if (ns > s)
156 s = ns;
157 }
158 mp->regions[d].address = a;
159 mp->regions[d].size = s;
160 ++d;
161 }
162 mp->n_regions = d;
163}
diff --git a/arch/ppc/mm/mem_pieces.h b/arch/ppc/mm/mem_pieces.h
new file mode 100644
index 000000000000..e2b700dc7f18
--- /dev/null
+++ b/arch/ppc/mm/mem_pieces.h
@@ -0,0 +1,48 @@
1/*
2 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
3 * Changes to accommodate Power Macintoshes.
4 * Cort Dougan <cort@cs.nmt.edu>
5 * Rewrites.
6 * Grant Erickson <grant@lcse.umn.edu>
7 * General rework and split from mm/init.c.
8 *
9 * Module name: mem_pieces.h
10 *
11 * Description:
12 * Routines and data structures for manipulating and representing
13 * phyiscal memory extents (i.e. address/length pairs).
14 *
15 */
16
17#ifndef __MEM_PIECES_H__
18#define __MEM_PIECES_H__
19
20#include <asm/prom.h>
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26
27/* Type Definitions */
28
29#define MEM_PIECES_MAX 32
30
31struct mem_pieces {
32 int n_regions;
33 struct reg_property regions[MEM_PIECES_MAX];
34};
35
36/* Function Prototypes */
37
38extern void *mem_pieces_find(unsigned int size, unsigned int align);
39extern void mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
40 unsigned int size, int must_exist);
41extern void mem_pieces_coalesce(struct mem_pieces *mp);
42extern void mem_pieces_sort(struct mem_pieces *mp);
43
44#ifdef __cplusplus
45}
46#endif
47
48#endif /* __MEM_PIECES_H__ */
diff --git a/arch/ppc/mm/mmu_context.c b/arch/ppc/mm/mmu_context.c
new file mode 100644
index 000000000000..a8816e0f6a86
--- /dev/null
+++ b/arch/ppc/mm/mmu_context.c
@@ -0,0 +1,86 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/mm.h>
28#include <linux/init.h>
29
30#include <asm/mmu_context.h>
31#include <asm/tlbflush.h>
32
33mm_context_t next_mmu_context;
34unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
35#ifdef FEW_CONTEXTS
36atomic_t nr_free_contexts;
37struct mm_struct *context_mm[LAST_CONTEXT+1];
38void steal_context(void);
39#endif /* FEW_CONTEXTS */
40
41/*
42 * Initialize the context management stuff.
43 */
44void __init
45mmu_context_init(void)
46{
47 /*
48 * Some processors have too few contexts to reserve one for
49 * init_mm, and require using context 0 for a normal task.
50 * Other processors reserve the use of context zero for the kernel.
51 * This code assumes FIRST_CONTEXT < 32.
52 */
53 context_map[0] = (1 << FIRST_CONTEXT) - 1;
54 next_mmu_context = FIRST_CONTEXT;
55#ifdef FEW_CONTEXTS
56 atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
57#endif /* FEW_CONTEXTS */
58}
59
60#ifdef FEW_CONTEXTS
61/*
62 * Steal a context from a task that has one at the moment.
63 * This is only used on 8xx and 4xx and we presently assume that
64 * they don't do SMP. If they do then this will have to check
65 * whether the MM we steal is in use.
66 * We also assume that this is only used on systems that don't
67 * use an MMU hash table - this is true for 8xx and 4xx.
68 * This isn't an LRU system, it just frees up each context in
69 * turn (sort-of pseudo-random replacement :). This would be the
70 * place to implement an LRU scheme if anyone was motivated to do it.
71 * -- paulus
72 */
73void
74steal_context(void)
75{
76 struct mm_struct *mm;
77
78 /* free up context `next_mmu_context' */
79 /* if we shouldn't free context 0, don't... */
80 if (next_mmu_context < FIRST_CONTEXT)
81 next_mmu_context = FIRST_CONTEXT;
82 mm = context_mm[next_mmu_context];
83 flush_tlb_mm(mm);
84 destroy_context(mm);
85}
86#endif /* FEW_CONTEXTS */
diff --git a/arch/ppc/mm/mmu_decl.h b/arch/ppc/mm/mmu_decl.h
new file mode 100644
index 000000000000..ffcdb46997dc
--- /dev/null
+++ b/arch/ppc/mm/mmu_decl.h
@@ -0,0 +1,83 @@
1/*
2 * Declarations of procedures and variables shared between files
3 * in arch/ppc/mm/.
4 *
5 * Derived from arch/ppc/mm/init.c:
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
9 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
10 * Copyright (C) 1996 Paul Mackerras
11 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 *
13 * Derived from "arch/i386/mm/init.c"
14 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22#include <asm/tlbflush.h>
23#include <asm/mmu.h>
24
25extern void mapin_ram(void);
26extern int map_page(unsigned long va, phys_addr_t pa, int flags);
27extern void setbat(int index, unsigned long virt, unsigned long phys,
28 unsigned int size, int flags);
29extern void reserve_phys_mem(unsigned long start, unsigned long size);
30extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
31 unsigned int size, int flags, unsigned int pid);
32extern void invalidate_tlbcam_entry(int index);
33
34extern int __map_without_bats;
35extern unsigned long ioremap_base;
36extern unsigned long ioremap_bot;
37extern unsigned int rtas_data, rtas_size;
38
39extern unsigned long total_memory;
40extern unsigned long total_lowmem;
41extern int mem_init_done;
42
43extern PTE *Hash, *Hash_end;
44extern unsigned long Hash_size, Hash_mask;
45
46/* ...and now those things that may be slightly different between processor
47 * architectures. -- Dan
48 */
49#if defined(CONFIG_8xx)
50#define flush_HPTE(X, va, pg) _tlbie(va)
51#define MMU_init_hw() do { } while(0)
52#define mmu_mapin_ram() (0UL)
53
54#elif defined(CONFIG_4xx)
55#define flush_HPTE(X, va, pg) _tlbie(va)
56extern void MMU_init_hw(void);
57extern unsigned long mmu_mapin_ram(void);
58
59#elif defined(CONFIG_FSL_BOOKE)
60#define flush_HPTE(X, va, pg) _tlbie(va)
61extern void MMU_init_hw(void);
62extern unsigned long mmu_mapin_ram(void);
63extern void adjust_total_lowmem(void);
64
65#else
66/* anything except 4xx or 8xx */
67extern void MMU_init_hw(void);
68extern unsigned long mmu_mapin_ram(void);
69
70/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
71 * which includes all new 82xx processors. We need tlbie/tlbsync here
72 * in that case (I think). -- Dan.
73 */
74static inline void flush_HPTE(unsigned context, unsigned long va,
75 unsigned long pdval)
76{
77 if ((Hash != 0) &&
78 cpu_has_feature(CPU_FTR_HPTE_TABLE))
79 flush_hash_pages(0, va, pdval, 1);
80 else
81 _tlbie(va);
82}
83#endif
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
new file mode 100644
index 000000000000..0a5cd20275c4
--- /dev/null
+++ b/arch/ppc/mm/pgtable.c
@@ -0,0 +1,471 @@
1/*
2 * This file contains the routines setting up the linux page tables.
3 * -- paulus
4 *
5 * Derived from arch/ppc/mm/init.c:
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
9 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
10 * Copyright (C) 1996 Paul Mackerras
11 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 *
13 * Derived from "arch/i386/mm/init.c"
14 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/types.h>
27#include <linux/mm.h>
28#include <linux/vmalloc.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31
32#include <asm/pgtable.h>
33#include <asm/pgalloc.h>
34#include <asm/io.h>
35
36#include "mmu_decl.h"
37
38unsigned long ioremap_base;
39unsigned long ioremap_bot;
40int io_bat_index;
41
42#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
43#define HAVE_BATS 1
44#endif
45
46#if defined(CONFIG_FSL_BOOKE)
47#define HAVE_TLBCAM 1
48#endif
49
50extern char etext[], _stext[];
51
52#ifdef CONFIG_SMP
53extern void hash_page_sync(void);
54#endif
55
56#ifdef HAVE_BATS
57extern unsigned long v_mapped_by_bats(unsigned long va);
58extern unsigned long p_mapped_by_bats(unsigned long pa);
59void setbat(int index, unsigned long virt, unsigned long phys,
60 unsigned int size, int flags);
61
62#else /* !HAVE_BATS */
63#define v_mapped_by_bats(x) (0UL)
64#define p_mapped_by_bats(x) (0UL)
65#endif /* HAVE_BATS */
66
67#ifdef HAVE_TLBCAM
68extern unsigned int tlbcam_index;
69extern unsigned int num_tlbcam_entries;
70extern unsigned long v_mapped_by_tlbcam(unsigned long va);
71extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
72#else /* !HAVE_TLBCAM */
73#define v_mapped_by_tlbcam(x) (0UL)
74#define p_mapped_by_tlbcam(x) (0UL)
75#endif /* HAVE_TLBCAM */
76
77#ifdef CONFIG_44x
78/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
79#define PGDIR_ORDER 1
80#else
81#define PGDIR_ORDER 0
82#endif
83
84pgd_t *pgd_alloc(struct mm_struct *mm)
85{
86 pgd_t *ret;
87
88 ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
89 return ret;
90}
91
92void pgd_free(pgd_t *pgd)
93{
94 free_pages((unsigned long)pgd, PGDIR_ORDER);
95}
96
97pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
98{
99 pte_t *pte;
100 extern int mem_init_done;
101 extern void *early_get_page(void);
102
103 if (mem_init_done) {
104 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
105 } else {
106 pte = (pte_t *)early_get_page();
107 if (pte)
108 clear_page(pte);
109 }
110 return pte;
111}
112
113struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
114{
115 struct page *ptepage;
116
117#ifdef CONFIG_HIGHPTE
118 int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
119#else
120 int flags = GFP_KERNEL | __GFP_REPEAT;
121#endif
122
123 ptepage = alloc_pages(flags, 0);
124 if (ptepage)
125 clear_highpage(ptepage);
126 return ptepage;
127}
128
129void pte_free_kernel(pte_t *pte)
130{
131#ifdef CONFIG_SMP
132 hash_page_sync();
133#endif
134 free_page((unsigned long)pte);
135}
136
137void pte_free(struct page *ptepage)
138{
139#ifdef CONFIG_SMP
140 hash_page_sync();
141#endif
142 __free_page(ptepage);
143}
144
145#ifndef CONFIG_44x
146void __iomem *
147ioremap(phys_addr_t addr, unsigned long size)
148{
149 return __ioremap(addr, size, _PAGE_NO_CACHE);
150}
151#else /* CONFIG_44x */
152void __iomem *
153ioremap64(unsigned long long addr, unsigned long size)
154{
155 return __ioremap(addr, size, _PAGE_NO_CACHE);
156}
157
158void __iomem *
159ioremap(phys_addr_t addr, unsigned long size)
160{
161 phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
162
163 return ioremap64(addr64, size);
164}
165#endif /* CONFIG_44x */
166
167void __iomem *
168__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
169{
170 unsigned long v, i;
171 phys_addr_t p;
172 int err;
173
174 /*
175 * Choose an address to map it to.
176 * Once the vmalloc system is running, we use it.
177 * Before then, we use space going down from ioremap_base
178 * (ioremap_bot records where we're up to).
179 */
180 p = addr & PAGE_MASK;
181 size = PAGE_ALIGN(addr + size) - p;
182
183 /*
184 * If the address lies within the first 16 MB, assume it's in ISA
185 * memory space
186 */
187 if (p < 16*1024*1024)
188 p += _ISA_MEM_BASE;
189
190 /*
191 * Don't allow anybody to remap normal RAM that we're using.
192 * mem_init() sets high_memory so only do the check after that.
193 */
194 if ( mem_init_done && (p < virt_to_phys(high_memory)) )
195 {
196 printk("__ioremap(): phys addr "PTE_FMT" is RAM lr %p\n", p,
197 __builtin_return_address(0));
198 return NULL;
199 }
200
201 if (size == 0)
202 return NULL;
203
204 /*
205 * Is it already mapped? Perhaps overlapped by a previous
206 * BAT mapping. If the whole area is mapped then we're done,
207 * otherwise remap it since we want to keep the virt addrs for
208 * each request contiguous.
209 *
210 * We make the assumption here that if the bottom and top
211 * of the range we want are mapped then it's mapped to the
212 * same virt address (and this is contiguous).
213 * -- Cort
214 */
215 if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
216 goto out;
217
218 if ((v = p_mapped_by_tlbcam(p)))
219 goto out;
220
221 if (mem_init_done) {
222 struct vm_struct *area;
223 area = get_vm_area(size, VM_IOREMAP);
224 if (area == 0)
225 return NULL;
226 v = (unsigned long) area->addr;
227 } else {
228 v = (ioremap_bot -= size);
229 }
230
231 if ((flags & _PAGE_PRESENT) == 0)
232 flags |= _PAGE_KERNEL;
233 if (flags & _PAGE_NO_CACHE)
234 flags |= _PAGE_GUARDED;
235
236 /*
237 * Should check if it is a candidate for a BAT mapping
238 */
239
240 err = 0;
241 for (i = 0; i < size && err == 0; i += PAGE_SIZE)
242 err = map_page(v+i, p+i, flags);
243 if (err) {
244 if (mem_init_done)
245 vunmap((void *)v);
246 return NULL;
247 }
248
249out:
250 return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
251}
252
253void iounmap(volatile void __iomem *addr)
254{
255 /*
256 * If mapped by BATs then there is nothing to do.
257 * Calling vfree() generates a benign warning.
258 */
259 if (v_mapped_by_bats((unsigned long)addr)) return;
260
261 if (addr > high_memory && (unsigned long) addr < ioremap_bot)
262 vunmap((void *) (PAGE_MASK & (unsigned long)addr));
263}
264
265void __iomem *ioport_map(unsigned long port, unsigned int len)
266{
267 return (void __iomem *) (port + _IO_BASE);
268}
269
270void ioport_unmap(void __iomem *addr)
271{
272 /* Nothing to do */
273}
274EXPORT_SYMBOL(ioport_map);
275EXPORT_SYMBOL(ioport_unmap);
276
277int
278map_page(unsigned long va, phys_addr_t pa, int flags)
279{
280 pmd_t *pd;
281 pte_t *pg;
282 int err = -ENOMEM;
283
284 spin_lock(&init_mm.page_table_lock);
285 /* Use upper 10 bits of VA to index the first level map */
286 pd = pmd_offset(pgd_offset_k(va), va);
287 /* Use middle 10 bits of VA to index the second-level map */
288 pg = pte_alloc_kernel(&init_mm, pd, va);
289 if (pg != 0) {
290 err = 0;
291 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
292 if (mem_init_done)
293 flush_HPTE(0, va, pmd_val(*pd));
294 }
295 spin_unlock(&init_mm.page_table_lock);
296 return err;
297}
298
299/*
300 * Map in all of physical memory starting at KERNELBASE.
301 */
302void __init mapin_ram(void)
303{
304 unsigned long v, p, s, f;
305
306 s = mmu_mapin_ram();
307 v = KERNELBASE + s;
308 p = PPC_MEMSTART + s;
309 for (; s < total_lowmem; s += PAGE_SIZE) {
310 if ((char *) v >= _stext && (char *) v < etext)
311 f = _PAGE_RAM_TEXT;
312 else
313 f = _PAGE_RAM;
314 map_page(v, p, f);
315 v += PAGE_SIZE;
316 p += PAGE_SIZE;
317 }
318}
319
320/* is x a power of 2? */
321#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0))
322
323/* is x a power of 4? */
324#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
325
326/*
327 * Set up a mapping for a block of I/O.
328 * virt, phys, size must all be page-aligned.
329 * This should only be called before ioremap is called.
330 */
331void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
332 unsigned int size, int flags)
333{
334 int i;
335
336 if (virt > KERNELBASE && virt < ioremap_bot)
337 ioremap_bot = ioremap_base = virt;
338
339#ifdef HAVE_BATS
340 /*
341 * Use a BAT for this if possible...
342 */
343 if (io_bat_index < 2 && is_power_of_2(size)
344 && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
345 setbat(io_bat_index, virt, phys, size, flags);
346 ++io_bat_index;
347 return;
348 }
349#endif /* HAVE_BATS */
350
351#ifdef HAVE_TLBCAM
352 /*
353 * Use a CAM for this if possible...
354 */
355 if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
356 && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
357 settlbcam(tlbcam_index, virt, phys, size, flags, 0);
358 ++tlbcam_index;
359 return;
360 }
361#endif /* HAVE_TLBCAM */
362
363 /* No BATs available, put it in the page tables. */
364 for (i = 0; i < size; i += PAGE_SIZE)
365 map_page(virt + i, phys + i, flags);
366}
367
368/* Scan the real Linux page tables and return a PTE pointer for
369 * a virtual address in a context.
370 * Returns true (1) if PTE was found, zero otherwise. The pointer to
371 * the PTE pointer is unmodified if PTE is not found.
372 */
373int
374get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
375{
376 pgd_t *pgd;
377 pmd_t *pmd;
378 pte_t *pte;
379 int retval = 0;
380
381 pgd = pgd_offset(mm, addr & PAGE_MASK);
382 if (pgd) {
383 pmd = pmd_offset(pgd, addr & PAGE_MASK);
384 if (pmd_present(*pmd)) {
385 pte = pte_offset_map(pmd, addr & PAGE_MASK);
386 if (pte) {
387 retval = 1;
388 *ptep = pte;
389 /* XXX caller needs to do pte_unmap, yuck */
390 }
391 }
392 }
393 return(retval);
394}
395
396/* Find physical address for this virtual address. Normally used by
397 * I/O functions, but anyone can call it.
398 */
399unsigned long iopa(unsigned long addr)
400{
401 unsigned long pa;
402
403 /* I don't know why this won't work on PMacs or CHRP. It
404 * appears there is some bug, or there is some implicit
405 * mapping done not properly represented by BATs or in page
406 * tables.......I am actively working on resolving this, but
407 * can't hold up other stuff. -- Dan
408 */
409 pte_t *pte;
410 struct mm_struct *mm;
411
412 /* Check the BATs */
413 pa = v_mapped_by_bats(addr);
414 if (pa)
415 return pa;
416
417 /* Allow mapping of user addresses (within the thread)
418 * for DMA if necessary.
419 */
420 if (addr < TASK_SIZE)
421 mm = current->mm;
422 else
423 mm = &init_mm;
424
425 pa = 0;
426 if (get_pteptr(mm, addr, &pte)) {
427 pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
428 pte_unmap(pte);
429 }
430
431 return(pa);
432}
433
434/* This is will find the virtual address for a physical one....
435 * Swiped from APUS, could be dangerous :-).
436 * This is only a placeholder until I really find a way to make this
437 * work. -- Dan
438 */
439unsigned long
440mm_ptov (unsigned long paddr)
441{
442 unsigned long ret;
443#if 0
444 if (paddr < 16*1024*1024)
445 ret = ZTWO_VADDR(paddr);
446 else {
447 int i;
448
449 for (i = 0; i < kmap_chunk_count;){
450 unsigned long phys = kmap_chunks[i++];
451 unsigned long size = kmap_chunks[i++];
452 unsigned long virt = kmap_chunks[i++];
453 if (paddr >= phys
454 && paddr < (phys + size)){
455 ret = virt + paddr - phys;
456 goto exit;
457 }
458 }
459
460 ret = (unsigned long) __va(paddr);
461 }
462exit:
463#ifdef DEBUGPV
464 printk ("PTOV(%lx)=%lx\n", paddr, ret);
465#endif
466#else
467 ret = (unsigned long)paddr + KERNELBASE;
468#endif
469 return ret;
470}
471
diff --git a/arch/ppc/mm/ppc_mmu.c b/arch/ppc/mm/ppc_mmu.c
new file mode 100644
index 000000000000..9a381ed5eb21
--- /dev/null
+++ b/arch/ppc/mm/ppc_mmu.c
@@ -0,0 +1,296 @@
1/*
2 * This file contains the routines for handling the MMU on those
3 * PowerPC implementations where the MMU substantially follows the
4 * architecture specification. This includes the 6xx, 7xx, 7xxx,
5 * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/kernel.h>
28#include <linux/mm.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31
32#include <asm/prom.h>
33#include <asm/mmu.h>
34#include <asm/machdep.h>
35
36#include "mmu_decl.h"
37#include "mem_pieces.h"
38
39PTE *Hash, *Hash_end;
40unsigned long Hash_size, Hash_mask;
41unsigned long _SDR1;
42
43union ubat { /* BAT register values to be loaded */
44 BAT bat;
45#ifdef CONFIG_PPC64BRIDGE
46 u64 word[2];
47#else
48 u32 word[2];
49#endif
50} BATS[4][2]; /* 4 pairs of IBAT, DBAT */
51
52struct batrange { /* stores address ranges mapped by BATs */
53 unsigned long start;
54 unsigned long limit;
55 unsigned long phys;
56} bat_addrs[4];
57
58/*
59 * Return PA for this VA if it is mapped by a BAT, or 0
60 */
61unsigned long v_mapped_by_bats(unsigned long va)
62{
63 int b;
64 for (b = 0; b < 4; ++b)
65 if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
66 return bat_addrs[b].phys + (va - bat_addrs[b].start);
67 return 0;
68}
69
70/*
71 * Return VA for a given PA or 0 if not mapped
72 */
73unsigned long p_mapped_by_bats(unsigned long pa)
74{
75 int b;
76 for (b = 0; b < 4; ++b)
77 if (pa >= bat_addrs[b].phys
78 && pa < (bat_addrs[b].limit-bat_addrs[b].start)
79 +bat_addrs[b].phys)
80 return bat_addrs[b].start+(pa-bat_addrs[b].phys);
81 return 0;
82}
83
84unsigned long __init mmu_mapin_ram(void)
85{
86#ifdef CONFIG_POWER4
87 return 0;
88#else
89 unsigned long tot, bl, done;
90 unsigned long max_size = (256<<20);
91 unsigned long align;
92
93 if (__map_without_bats)
94 return 0;
95
96 /* Set up BAT2 and if necessary BAT3 to cover RAM. */
97
98 /* Make sure we don't map a block larger than the
99 smallest alignment of the physical address. */
100 /* alignment of PPC_MEMSTART */
101 align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
102 /* set BAT block size to MIN(max_size, align) */
103 if (align && align < max_size)
104 max_size = align;
105
106 tot = total_lowmem;
107 for (bl = 128<<10; bl < max_size; bl <<= 1) {
108 if (bl * 2 > tot)
109 break;
110 }
111
112 setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
113 done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
114 if ((done < tot) && !bat_addrs[3].limit) {
115 /* use BAT3 to cover a bit more */
116 tot -= done;
117 for (bl = 128<<10; bl < max_size; bl <<= 1)
118 if (bl * 2 > tot)
119 break;
120 setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
121 done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
122 }
123
124 return done;
125#endif
126}
127
128/*
129 * Set up one of the I/D BAT (block address translation) register pairs.
130 * The parameters are not checked; in particular size must be a power
131 * of 2 between 128k and 256M.
132 */
133void __init setbat(int index, unsigned long virt, unsigned long phys,
134 unsigned int size, int flags)
135{
136 unsigned int bl;
137 int wimgxpp;
138 union ubat *bat = BATS[index];
139
140 if (((flags & _PAGE_NO_CACHE) == 0) &&
141 cpu_has_feature(CPU_FTR_NEED_COHERENT))
142 flags |= _PAGE_COHERENT;
143
144 bl = (size >> 17) - 1;
145 if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
146 /* 603, 604, etc. */
147 /* Do DBAT first */
148 wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
149 | _PAGE_COHERENT | _PAGE_GUARDED);
150 wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
151 bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
152 bat[1].word[1] = phys | wimgxpp;
153#ifndef CONFIG_KGDB /* want user access for breakpoints */
154 if (flags & _PAGE_USER)
155#endif
156 bat[1].bat.batu.vp = 1;
157 if (flags & _PAGE_GUARDED) {
158 /* G bit must be zero in IBATs */
159 bat[0].word[0] = bat[0].word[1] = 0;
160 } else {
161 /* make IBAT same as DBAT */
162 bat[0] = bat[1];
163 }
164 } else {
165 /* 601 cpu */
166 if (bl > BL_8M)
167 bl = BL_8M;
168 wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
169 | _PAGE_COHERENT);
170 wimgxpp |= (flags & _PAGE_RW)?
171 ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
172 bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
173 bat->word[1] = phys | bl | 0x40; /* V=1 */
174 }
175
176 bat_addrs[index].start = virt;
177 bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
178 bat_addrs[index].phys = phys;
179}
180
181/*
182 * Initialize the hash table and patch the instructions in hashtable.S.
183 */
184void __init MMU_init_hw(void)
185{
186 unsigned int hmask, mb, mb2;
187 unsigned int n_hpteg, lg_n_hpteg;
188
189 extern unsigned int hash_page_patch_A[];
190 extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
191 extern unsigned int hash_page[];
192 extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
193
194 if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
195 /*
196 * Put a blr (procedure return) instruction at the
197 * start of hash_page, since we can still get DSI
198 * exceptions on a 603.
199 */
200 hash_page[0] = 0x4e800020;
201 flush_icache_range((unsigned long) &hash_page[0],
202 (unsigned long) &hash_page[1]);
203 return;
204 }
205
206 if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
207
208#ifdef CONFIG_PPC64BRIDGE
209#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */
210#define SDR1_LOW_BITS (lg_n_hpteg - 11)
211#define MIN_N_HPTEG 2048 /* min 256kB hash table */
212#else
213#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
214#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
215#define MIN_N_HPTEG 1024 /* min 64kB hash table */
216#endif
217
218#ifdef CONFIG_POWER4
219 /* The hash table has already been allocated and initialized
220 in prom.c */
221 n_hpteg = Hash_size >> LG_HPTEG_SIZE;
222 lg_n_hpteg = __ilog2(n_hpteg);
223
224 /* Remove the hash table from the available memory */
225 if (Hash)
226 reserve_phys_mem(__pa(Hash), Hash_size);
227
228#else /* CONFIG_POWER4 */
229 /*
230 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
231 * This is less than the recommended amount, but then
232 * Linux ain't AIX.
233 */
234 n_hpteg = total_memory / (PAGE_SIZE * 8);
235 if (n_hpteg < MIN_N_HPTEG)
236 n_hpteg = MIN_N_HPTEG;
237 lg_n_hpteg = __ilog2(n_hpteg);
238 if (n_hpteg & (n_hpteg - 1)) {
239 ++lg_n_hpteg; /* round up if not power of 2 */
240 n_hpteg = 1 << lg_n_hpteg;
241 }
242 Hash_size = n_hpteg << LG_HPTEG_SIZE;
243
244 /*
245 * Find some memory for the hash table.
246 */
247 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
248 Hash = mem_pieces_find(Hash_size, Hash_size);
249 cacheable_memzero(Hash, Hash_size);
250 _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
251#endif /* CONFIG_POWER4 */
252
253 Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
254
255 printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
256 total_memory >> 20, Hash_size >> 10, Hash);
257
258
259 /*
260 * Patch up the instructions in hashtable.S:create_hpte
261 */
262 if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
263 Hash_mask = n_hpteg - 1;
264 hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
265 mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
266 if (lg_n_hpteg > 16)
267 mb2 = 16 - LG_HPTEG_SIZE;
268
269 hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
270 | ((unsigned int)(Hash) >> 16);
271 hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
272 hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
273 hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
274 hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
275
276 /*
277 * Ensure that the locations we've patched have been written
278 * out from the data cache and invalidated in the instruction
279 * cache, on those machines with split caches.
280 */
281 flush_icache_range((unsigned long) &hash_page_patch_A[0],
282 (unsigned long) &hash_page_patch_C[1]);
283
284 /*
285 * Patch up the instructions in hashtable.S:flush_hash_page
286 */
287 flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
288 | ((unsigned int)(Hash) >> 16);
289 flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
290 flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
291 flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
292 flush_icache_range((unsigned long) &flush_hash_patch_A[0],
293 (unsigned long) &flush_hash_patch_B[1]);
294
295 if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
296}
diff --git a/arch/ppc/mm/tlb.c b/arch/ppc/mm/tlb.c
new file mode 100644
index 000000000000..6c3dc3c44c86
--- /dev/null
+++ b/arch/ppc/mm/tlb.c
@@ -0,0 +1,183 @@
1/*
2 * This file contains the routines for TLB flushing.
3 * On machines where the MMU uses a hash table to store virtual to
4 * physical translations, these routines flush entries from the
5 * hash table also.
6 * -- paulus
7 *
8 * Derived from arch/ppc/mm/init.c:
9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10 *
11 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
12 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
13 * Copyright (C) 1996 Paul Mackerras
14 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
15 *
16 * Derived from "arch/i386/mm/init.c"
17 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/kernel.h>
28#include <linux/mm.h>
29#include <linux/init.h>
30#include <linux/highmem.h>
31#include <asm/tlbflush.h>
32#include <asm/tlb.h>
33
34#include "mmu_decl.h"
35
36/*
37 * Called when unmapping pages to flush entries from the TLB/hash table.
38 */
39void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
40{
41 unsigned long ptephys;
42
43 if (Hash != 0) {
44 ptephys = __pa(ptep) & PAGE_MASK;
45 flush_hash_pages(mm->context, addr, ptephys, 1);
46 }
47}
48
49/*
50 * Called by ptep_set_access_flags, must flush on CPUs for which the
51 * DSI handler can't just "fixup" the TLB on a write fault
52 */
53void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
54{
55 if (Hash != 0)
56 return;
57 _tlbie(addr);
58}
59
60/*
61 * Called at the end of a mmu_gather operation to make sure the
62 * TLB flush is completely done.
63 */
64void tlb_flush(struct mmu_gather *tlb)
65{
66 if (Hash == 0) {
67 /*
68 * 603 needs to flush the whole TLB here since
69 * it doesn't use a hash table.
70 */
71 _tlbia();
72 }
73}
74
75/*
76 * TLB flushing:
77 *
78 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
79 * - flush_tlb_page(vma, vmaddr) flushes one page
80 * - flush_tlb_range(vma, start, end) flushes a range of pages
81 * - flush_tlb_kernel_range(start, end) flushes kernel pages
82 *
83 * since the hardware hash table functions as an extension of the
84 * tlb as far as the linux tables are concerned, flush it too.
85 * -- Cort
86 */
87
88/*
89 * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
90 * the cache operations on the bus. Hence we need to use an IPI
91 * to get the other CPU(s) to invalidate their TLBs.
92 */
93#ifdef CONFIG_SMP_750
94#define FINISH_FLUSH smp_send_tlb_invalidate(0)
95#else
96#define FINISH_FLUSH do { } while (0)
97#endif
98
99static void flush_range(struct mm_struct *mm, unsigned long start,
100 unsigned long end)
101{
102 pmd_t *pmd;
103 unsigned long pmd_end;
104 int count;
105 unsigned int ctx = mm->context;
106
107 if (Hash == 0) {
108 _tlbia();
109 return;
110 }
111 start &= PAGE_MASK;
112 if (start >= end)
113 return;
114 end = (end - 1) | ~PAGE_MASK;
115 pmd = pmd_offset(pgd_offset(mm, start), start);
116 for (;;) {
117 pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
118 if (pmd_end > end)
119 pmd_end = end;
120 if (!pmd_none(*pmd)) {
121 count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
122 flush_hash_pages(ctx, start, pmd_val(*pmd), count);
123 }
124 if (pmd_end == end)
125 break;
126 start = pmd_end + 1;
127 ++pmd;
128 }
129}
130
131/*
132 * Flush kernel TLB entries in the given range
133 */
134void flush_tlb_kernel_range(unsigned long start, unsigned long end)
135{
136 flush_range(&init_mm, start, end);
137 FINISH_FLUSH;
138}
139
140/*
141 * Flush all the (user) entries for the address space described by mm.
142 */
143void flush_tlb_mm(struct mm_struct *mm)
144{
145 struct vm_area_struct *mp;
146
147 if (Hash == 0) {
148 _tlbia();
149 return;
150 }
151
152 for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
153 flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
154 FINISH_FLUSH;
155}
156
157void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
158{
159 struct mm_struct *mm;
160 pmd_t *pmd;
161
162 if (Hash == 0) {
163 _tlbie(vmaddr);
164 return;
165 }
166 mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
167 pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
168 if (!pmd_none(*pmd))
169 flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
170 FINISH_FLUSH;
171}
172
173/*
174 * For each address in the range, find the pte for the address
175 * and check _PAGE_HASHPTE bit; if it is set, find and destroy
176 * the corresponding HPTE.
177 */
178void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
179 unsigned long end)
180{
181 flush_range(vma->vm_mm, start, end);
182 FINISH_FLUSH;
183}