diff options
author | Paul Mackerras <paulus@samba.org> | 2005-09-26 02:04:21 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-09-26 02:04:21 -0400 |
commit | 14cf11af6cf608eb8c23e989ddb17a715ddce109 (patch) | |
tree | 271a97ce73e265f39c569cb159c195c5b4bb3f8c /arch/powerpc/mm | |
parent | e5baa396af7560382d2cf3f0871d616b61fc284c (diff) |
powerpc: Merge enough to start building in arch/powerpc.
This creates the directory structure under arch/powerpc and a bunch
of Kconfig files. It does a first-cut merge of arch/powerpc/mm,
arch/powerpc/lib and arch/powerpc/platforms/powermac. This is enough
to build a 32-bit powermac kernel with ARCH=powerpc.
For now we are getting some unmerged files from arch/ppc/kernel and
arch/ppc/syslib, or arch/ppc64/kernel. This makes some minor changes
to files in those directories and files outside arch/powerpc.
The boot directory is still not merged. That's going to be interesting.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 120 | ||||
-rw-r--r-- | arch/powerpc/mm/4xx_mmu.c | 141 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 12 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 391 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 237 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_32.S | 618 | ||||
-rw-r--r-- | arch/powerpc/mm/init.c | 581 | ||||
-rw-r--r-- | arch/powerpc/mm/init64.c | 385 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 299 | ||||
-rw-r--r-- | arch/powerpc/mm/mem64.c | 259 | ||||
-rw-r--r-- | arch/powerpc/mm/mem_pieces.c | 163 | ||||
-rw-r--r-- | arch/powerpc/mm/mem_pieces.h | 48 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 86 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context64.c | 63 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 85 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 470 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable64.c | 357 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu.c | 296 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb.c | 183 |
19 files changed, 4794 insertions, 0 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c new file mode 100644 index 000000000000..3d79ce281b67 --- /dev/null +++ b/arch/powerpc/mm/44x_mmu.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * Modifications by Matt Porter (mporter@mvista.com) to support | ||
3 | * PPC44x Book E processors. | ||
4 | * | ||
5 | * This file contains the routines for initializing the MMU | ||
6 | * on the 4xx series of chips. | ||
7 | * -- paulus | ||
8 | * | ||
9 | * Derived from arch/ppc/mm/init.c: | ||
10 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
11 | * | ||
12 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
13 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
14 | * Copyright (C) 1996 Paul Mackerras | ||
15 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
16 | * | ||
17 | * Derived from "arch/i386/mm/init.c" | ||
18 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
19 | * | ||
20 | * This program is free software; you can redistribute it and/or | ||
21 | * modify it under the terms of the GNU General Public License | ||
22 | * as published by the Free Software Foundation; either version | ||
23 | * 2 of the License, or (at your option) any later version. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/signal.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/errno.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/types.h> | ||
34 | #include <linux/ptrace.h> | ||
35 | #include <linux/mman.h> | ||
36 | #include <linux/mm.h> | ||
37 | #include <linux/swap.h> | ||
38 | #include <linux/stddef.h> | ||
39 | #include <linux/vmalloc.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/delay.h> | ||
42 | #include <linux/highmem.h> | ||
43 | |||
44 | #include <asm/pgalloc.h> | ||
45 | #include <asm/prom.h> | ||
46 | #include <asm/io.h> | ||
47 | #include <asm/mmu_context.h> | ||
48 | #include <asm/pgtable.h> | ||
49 | #include <asm/mmu.h> | ||
50 | #include <asm/uaccess.h> | ||
51 | #include <asm/smp.h> | ||
52 | #include <asm/bootx.h> | ||
53 | #include <asm/machdep.h> | ||
54 | #include <asm/setup.h> | ||
55 | |||
56 | #include "mmu_decl.h" | ||
57 | |||
58 | extern char etext[], _stext[]; | ||
59 | |||
60 | /* Used by the 44x TLB replacement exception handler. | ||
61 | * Just needed it declared someplace. | ||
62 | */ | ||
63 | unsigned int tlb_44x_index = 0; | ||
64 | unsigned int tlb_44x_hwater = 62; | ||
65 | |||
66 | /* | ||
67 | * "Pins" a 256MB TLB entry in AS0 for kernel lowmem | ||
68 | */ | ||
69 | static void __init | ||
70 | ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys) | ||
71 | { | ||
72 | unsigned long attrib = 0; | ||
73 | |||
74 | __asm__ __volatile__("\ | ||
75 | clrrwi %2,%2,10\n\ | ||
76 | ori %2,%2,%4\n\ | ||
77 | clrrwi %1,%1,10\n\ | ||
78 | li %0,0\n\ | ||
79 | ori %0,%0,%5\n\ | ||
80 | tlbwe %2,%3,%6\n\ | ||
81 | tlbwe %1,%3,%7\n\ | ||
82 | tlbwe %0,%3,%8" | ||
83 | : | ||
84 | : "r" (attrib), "r" (phys), "r" (virt), "r" (slot), | ||
85 | "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M), | ||
86 | "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), | ||
87 | "i" (PPC44x_TLB_PAGEID), | ||
88 | "i" (PPC44x_TLB_XLAT), | ||
89 | "i" (PPC44x_TLB_ATTRIB)); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * MMU_init_hw does the chip-specific initialization of the MMU hardware. | ||
94 | */ | ||
95 | void __init MMU_init_hw(void) | ||
96 | { | ||
97 | flush_instruction_cache(); | ||
98 | } | ||
99 | |||
100 | unsigned long __init mmu_mapin_ram(void) | ||
101 | { | ||
102 | unsigned int pinned_tlbs = 1; | ||
103 | int i; | ||
104 | |||
105 | /* Determine number of entries necessary to cover lowmem */ | ||
106 | pinned_tlbs = (unsigned int) | ||
107 | (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT); | ||
108 | |||
109 | /* Write upper watermark to save location */ | ||
110 | tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs; | ||
111 | |||
112 | /* If necessary, set additional pinned TLBs */ | ||
113 | if (pinned_tlbs > 1) | ||
114 | for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) { | ||
115 | unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE; | ||
116 | ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr); | ||
117 | } | ||
118 | |||
119 | return total_lowmem; | ||
120 | } | ||
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c new file mode 100644 index 000000000000..b7bcbc232f39 --- /dev/null +++ b/arch/powerpc/mm/4xx_mmu.c | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | * This file contains the routines for initializing the MMU | ||
3 | * on the 4xx series of chips. | ||
4 | * -- paulus | ||
5 | * | ||
6 | * Derived from arch/ppc/mm/init.c: | ||
7 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
8 | * | ||
9 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
10 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
11 | * Copyright (C) 1996 Paul Mackerras | ||
12 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
13 | * | ||
14 | * Derived from "arch/i386/mm/init.c" | ||
15 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License | ||
19 | * as published by the Free Software Foundation; either version | ||
20 | * 2 of the License, or (at your option) any later version. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/config.h> | ||
25 | #include <linux/signal.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/ptrace.h> | ||
32 | #include <linux/mman.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/swap.h> | ||
35 | #include <linux/stddef.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | #include <linux/init.h> | ||
38 | #include <linux/delay.h> | ||
39 | #include <linux/highmem.h> | ||
40 | |||
41 | #include <asm/pgalloc.h> | ||
42 | #include <asm/prom.h> | ||
43 | #include <asm/io.h> | ||
44 | #include <asm/mmu_context.h> | ||
45 | #include <asm/pgtable.h> | ||
46 | #include <asm/mmu.h> | ||
47 | #include <asm/uaccess.h> | ||
48 | #include <asm/smp.h> | ||
49 | #include <asm/bootx.h> | ||
50 | #include <asm/machdep.h> | ||
51 | #include <asm/setup.h> | ||
52 | #include "mmu_decl.h" | ||
53 | |||
54 | extern int __map_without_ltlbs; | ||
55 | /* | ||
56 | * MMU_init_hw does the chip-specific initialization of the MMU hardware. | ||
57 | */ | ||
58 | void __init MMU_init_hw(void) | ||
59 | { | ||
60 | /* | ||
61 | * The Zone Protection Register (ZPR) defines how protection will | ||
62 | * be applied to every page which is a member of a given zone. At | ||
63 | * present, we utilize only two of the 4xx's zones. | ||
64 | * The zone index bits (of ZSEL) in the PTE are used for software | ||
65 | * indicators, except the LSB. For user access, zone 1 is used, | ||
66 | * for kernel access, zone 0 is used. We set all but zone 1 | ||
67 | * to zero, allowing only kernel access as indicated in the PTE. | ||
68 | * For zone 1, we set a 01 binary (a value of 10 will not work) | ||
69 | * to allow user access as indicated in the PTE. This also allows | ||
70 | * kernel access as indicated in the PTE. | ||
71 | */ | ||
72 | |||
73 | mtspr(SPRN_ZPR, 0x10000000); | ||
74 | |||
75 | flush_instruction_cache(); | ||
76 | |||
77 | /* | ||
78 | * Set up the real-mode cache parameters for the exception vector | ||
79 | * handlers (which are run in real-mode). | ||
80 | */ | ||
81 | |||
82 | mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */ | ||
83 | |||
84 | /* | ||
85 | * Cache instruction and data space where the exception | ||
86 | * vectors and the kernel live in real-mode. | ||
87 | */ | ||
88 | |||
89 | mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ | ||
90 | mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ | ||
91 | } | ||
92 | |||
93 | #define LARGE_PAGE_SIZE_16M (1<<24) | ||
94 | #define LARGE_PAGE_SIZE_4M (1<<22) | ||
95 | |||
96 | unsigned long __init mmu_mapin_ram(void) | ||
97 | { | ||
98 | unsigned long v, s; | ||
99 | phys_addr_t p; | ||
100 | |||
101 | v = KERNELBASE; | ||
102 | p = PPC_MEMSTART; | ||
103 | s = 0; | ||
104 | |||
105 | if (__map_without_ltlbs) { | ||
106 | return s; | ||
107 | } | ||
108 | |||
109 | while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) { | ||
110 | pmd_t *pmdp; | ||
111 | unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; | ||
112 | |||
113 | spin_lock(&init_mm.page_table_lock); | ||
114 | pmdp = pmd_offset(pgd_offset_k(v), v); | ||
115 | pmd_val(*pmdp++) = val; | ||
116 | pmd_val(*pmdp++) = val; | ||
117 | pmd_val(*pmdp++) = val; | ||
118 | pmd_val(*pmdp++) = val; | ||
119 | spin_unlock(&init_mm.page_table_lock); | ||
120 | |||
121 | v += LARGE_PAGE_SIZE_16M; | ||
122 | p += LARGE_PAGE_SIZE_16M; | ||
123 | s += LARGE_PAGE_SIZE_16M; | ||
124 | } | ||
125 | |||
126 | while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) { | ||
127 | pmd_t *pmdp; | ||
128 | unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; | ||
129 | |||
130 | spin_lock(&init_mm.page_table_lock); | ||
131 | pmdp = pmd_offset(pgd_offset_k(v), v); | ||
132 | pmd_val(*pmdp) = val; | ||
133 | spin_unlock(&init_mm.page_table_lock); | ||
134 | |||
135 | v += LARGE_PAGE_SIZE_4M; | ||
136 | p += LARGE_PAGE_SIZE_4M; | ||
137 | s += LARGE_PAGE_SIZE_4M; | ||
138 | } | ||
139 | |||
140 | return s; | ||
141 | } | ||
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile new file mode 100644 index 000000000000..9f52c26acd86 --- /dev/null +++ b/arch/powerpc/mm/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | # | ||
2 | # Makefile for the linux ppc-specific parts of the memory manager. | ||
3 | # | ||
4 | |||
5 | obj-y := fault.o mem.o | ||
6 | obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ | ||
7 | mem_pieces.o tlb.o | ||
8 | obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o | ||
9 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o | ||
10 | obj-$(CONFIG_40x) += 4xx_mmu.o | ||
11 | obj-$(CONFIG_44x) += 44x_mmu.o | ||
12 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c new file mode 100644 index 000000000000..3df641fa789d --- /dev/null +++ b/arch/powerpc/mm/fault.c | |||
@@ -0,0 +1,391 @@ | |||
1 | /* | ||
2 | * arch/ppc/mm/fault.c | ||
3 | * | ||
4 | * PowerPC version | ||
5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
6 | * | ||
7 | * Derived from "arch/i386/mm/fault.c" | ||
8 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
9 | * | ||
10 | * Modified by Cort Dougan and Paul Mackerras. | ||
11 | * | ||
12 | * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/config.h> | ||
21 | #include <linux/signal.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/ptrace.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/interrupt.h> | ||
31 | #include <linux/highmem.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/kprobes.h> | ||
34 | |||
35 | #include <asm/page.h> | ||
36 | #include <asm/pgtable.h> | ||
37 | #include <asm/mmu.h> | ||
38 | #include <asm/mmu_context.h> | ||
39 | #include <asm/system.h> | ||
40 | #include <asm/uaccess.h> | ||
41 | #include <asm/tlbflush.h> | ||
42 | #include <asm/kdebug.h> | ||
43 | #include <asm/siginfo.h> | ||
44 | |||
45 | /* | ||
46 | * Check whether the instruction at regs->nip is a store using | ||
47 | * an update addressing form which will update r1. | ||
48 | */ | ||
49 | static int store_updates_sp(struct pt_regs *regs) | ||
50 | { | ||
51 | unsigned int inst; | ||
52 | |||
53 | if (get_user(inst, (unsigned int __user *)regs->nip)) | ||
54 | return 0; | ||
55 | /* check for 1 in the rA field */ | ||
56 | if (((inst >> 16) & 0x1f) != 1) | ||
57 | return 0; | ||
58 | /* check major opcode */ | ||
59 | switch (inst >> 26) { | ||
60 | case 37: /* stwu */ | ||
61 | case 39: /* stbu */ | ||
62 | case 45: /* sthu */ | ||
63 | case 53: /* stfsu */ | ||
64 | case 55: /* stfdu */ | ||
65 | return 1; | ||
66 | case 62: /* std or stdu */ | ||
67 | return (inst & 3) == 1; | ||
68 | case 31: | ||
69 | /* check minor opcode */ | ||
70 | switch ((inst >> 1) & 0x3ff) { | ||
71 | case 181: /* stdux */ | ||
72 | case 183: /* stwux */ | ||
73 | case 247: /* stbux */ | ||
74 | case 439: /* sthux */ | ||
75 | case 695: /* stfsux */ | ||
76 | case 759: /* stfdux */ | ||
77 | return 1; | ||
78 | } | ||
79 | } | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void do_dabr(struct pt_regs *regs, unsigned long error_code) | ||
84 | { | ||
85 | siginfo_t info; | ||
86 | |||
87 | if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, | ||
88 | 11, SIGSEGV) == NOTIFY_STOP) | ||
89 | return; | ||
90 | |||
91 | if (debugger_dabr_match(regs)) | ||
92 | return; | ||
93 | |||
94 | /* Clear the DABR */ | ||
95 | set_dabr(0); | ||
96 | |||
97 | /* Deliver the signal to userspace */ | ||
98 | info.si_signo = SIGTRAP; | ||
99 | info.si_errno = 0; | ||
100 | info.si_code = TRAP_HWBKPT; | ||
101 | info.si_addr = (void __user *)regs->nip; | ||
102 | force_sig_info(SIGTRAP, &info, current); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * For 600- and 800-family processors, the error_code parameter is DSISR | ||
107 | * for a data fault, SRR1 for an instruction fault. For 400-family processors | ||
108 | * the error_code parameter is ESR for a data fault, 0 for an instruction | ||
109 | * fault. | ||
110 | * For 64-bit processors, the error_code parameter is | ||
111 | * - DSISR for a non-SLB data access fault, | ||
112 | * - SRR1 & 0x08000000 for a non-SLB instruction access fault | ||
113 | * - 0 any SLB fault. | ||
114 | * | ||
115 | * The return value is 0 if the fault was handled, or the signal | ||
116 | * number if this is a kernel fault that can't be handled here. | ||
117 | */ | ||
118 | int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | ||
119 | unsigned long error_code) | ||
120 | { | ||
121 | struct vm_area_struct * vma; | ||
122 | struct mm_struct *mm = current->mm; | ||
123 | siginfo_t info; | ||
124 | int code = SEGV_MAPERR; | ||
125 | int is_write = 0; | ||
126 | int trap = TRAP(regs); | ||
127 | int is_exec = trap == 0x400; | ||
128 | |||
129 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
130 | /* | ||
131 | * Fortunately the bit assignments in SRR1 for an instruction | ||
132 | * fault and DSISR for a data fault are mostly the same for the | ||
133 | * bits we are interested in. But there are some bits which | ||
134 | * indicate errors in DSISR but can validly be set in SRR1. | ||
135 | */ | ||
136 | if (trap == 0x400) | ||
137 | error_code &= 0x48200000; | ||
138 | else | ||
139 | is_write = error_code & DSISR_ISSTORE; | ||
140 | #else | ||
141 | is_write = error_code & ESR_DST; | ||
142 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ | ||
143 | |||
144 | if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, | ||
145 | 11, SIGSEGV) == NOTIFY_STOP) | ||
146 | return 0; | ||
147 | |||
148 | if (trap == 0x300) { | ||
149 | if (debugger_fault_handler(regs)) | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* On a kernel SLB miss we can only check for a valid exception entry */ | ||
154 | if (!user_mode(regs) && (address >= TASK_SIZE)) | ||
155 | return SIGSEGV; | ||
156 | |||
157 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
158 | if (error_code & DSISR_DABRMATCH) { | ||
159 | /* DABR match */ | ||
160 | do_dabr(regs, error_code); | ||
161 | return 0; | ||
162 | } | ||
163 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ | ||
164 | |||
165 | if (in_atomic() || mm == NULL) { | ||
166 | if (!user_mode(regs)) | ||
167 | return SIGSEGV; | ||
168 | /* in_atomic() in user mode is really bad, | ||
169 | as is current->mm == NULL. */ | ||
170 | printk(KERN_EMERG "Page fault in user mode with" | ||
171 | "in_atomic() = %d mm = %p\n", in_atomic(), mm); | ||
172 | printk(KERN_EMERG "NIP = %lx MSR = %lx\n", | ||
173 | regs->nip, regs->msr); | ||
174 | die("Weird page fault", regs, SIGSEGV); | ||
175 | } | ||
176 | |||
177 | /* When running in the kernel we expect faults to occur only to | ||
178 | * addresses in user space. All other faults represent errors in the | ||
179 | * kernel and should generate an OOPS. Unfortunatly, in the case of an | ||
180 | * erroneous fault occuring in a code path which already holds mmap_sem | ||
181 | * we will deadlock attempting to validate the fault against the | ||
182 | * address space. Luckily the kernel only validly references user | ||
183 | * space from well defined areas of code, which are listed in the | ||
184 | * exceptions table. | ||
185 | * | ||
186 | * As the vast majority of faults will be valid we will only perform | ||
187 | * the source reference check when there is a possibilty of a deadlock. | ||
188 | * Attempt to lock the address space, if we cannot we then validate the | ||
189 | * source. If this is invalid we can skip the address space check, | ||
190 | * thus avoiding the deadlock. | ||
191 | */ | ||
192 | if (!down_read_trylock(&mm->mmap_sem)) { | ||
193 | if (!user_mode(regs) && !search_exception_tables(regs->nip)) | ||
194 | goto bad_area_nosemaphore; | ||
195 | |||
196 | down_read(&mm->mmap_sem); | ||
197 | } | ||
198 | |||
199 | vma = find_vma(mm, address); | ||
200 | if (!vma) | ||
201 | goto bad_area; | ||
202 | if (vma->vm_start <= address) | ||
203 | goto good_area; | ||
204 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
205 | goto bad_area; | ||
206 | |||
207 | /* | ||
208 | * N.B. The POWER/Open ABI allows programs to access up to | ||
209 | * 288 bytes below the stack pointer. | ||
210 | * The kernel signal delivery code writes up to about 1.5kB | ||
211 | * below the stack pointer (r1) before decrementing it. | ||
212 | * The exec code can write slightly over 640kB to the stack | ||
213 | * before setting the user r1. Thus we allow the stack to | ||
214 | * expand to 1MB without further checks. | ||
215 | */ | ||
216 | if (address + 0x100000 < vma->vm_end) { | ||
217 | /* get user regs even if this fault is in kernel mode */ | ||
218 | struct pt_regs *uregs = current->thread.regs; | ||
219 | if (uregs == NULL) | ||
220 | goto bad_area; | ||
221 | |||
222 | /* | ||
223 | * A user-mode access to an address a long way below | ||
224 | * the stack pointer is only valid if the instruction | ||
225 | * is one which would update the stack pointer to the | ||
226 | * address accessed if the instruction completed, | ||
227 | * i.e. either stwu rs,n(r1) or stwux rs,r1,rb | ||
228 | * (or the byte, halfword, float or double forms). | ||
229 | * | ||
230 | * If we don't check this then any write to the area | ||
231 | * between the last mapped region and the stack will | ||
232 | * expand the stack rather than segfaulting. | ||
233 | */ | ||
234 | if (address + 2048 < uregs->gpr[1] | ||
235 | && (!user_mode(regs) || !store_updates_sp(regs))) | ||
236 | goto bad_area; | ||
237 | } | ||
238 | if (expand_stack(vma, address)) | ||
239 | goto bad_area; | ||
240 | |||
241 | good_area: | ||
242 | code = SEGV_ACCERR; | ||
243 | #if defined(CONFIG_6xx) | ||
244 | if (error_code & 0x95700000) | ||
245 | /* an error such as lwarx to I/O controller space, | ||
246 | address matching DABR, eciwx, etc. */ | ||
247 | goto bad_area; | ||
248 | #endif /* CONFIG_6xx */ | ||
249 | #if defined(CONFIG_8xx) | ||
250 | /* The MPC8xx seems to always set 0x80000000, which is | ||
251 | * "undefined". Of those that can be set, this is the only | ||
252 | * one which seems bad. | ||
253 | */ | ||
254 | if (error_code & 0x10000000) | ||
255 | /* Guarded storage error. */ | ||
256 | goto bad_area; | ||
257 | #endif /* CONFIG_8xx */ | ||
258 | |||
259 | if (is_exec) { | ||
260 | #ifdef CONFIG_PPC64 | ||
261 | /* protection fault */ | ||
262 | if (error_code & DSISR_PROTFAULT) | ||
263 | goto bad_area; | ||
264 | if (!(vma->vm_flags & VM_EXEC)) | ||
265 | goto bad_area; | ||
266 | #endif | ||
267 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) | ||
268 | pte_t *ptep; | ||
269 | |||
270 | /* Since 4xx/Book-E supports per-page execute permission, | ||
271 | * we lazily flush dcache to icache. */ | ||
272 | ptep = NULL; | ||
273 | if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { | ||
274 | struct page *page = pte_page(*ptep); | ||
275 | |||
276 | if (! test_bit(PG_arch_1, &page->flags)) { | ||
277 | flush_dcache_icache_page(page); | ||
278 | set_bit(PG_arch_1, &page->flags); | ||
279 | } | ||
280 | pte_update(ptep, 0, _PAGE_HWEXEC); | ||
281 | _tlbie(address); | ||
282 | pte_unmap(ptep); | ||
283 | up_read(&mm->mmap_sem); | ||
284 | return 0; | ||
285 | } | ||
286 | if (ptep != NULL) | ||
287 | pte_unmap(ptep); | ||
288 | #endif | ||
289 | /* a write */ | ||
290 | } else if (is_write) { | ||
291 | if (!(vma->vm_flags & VM_WRITE)) | ||
292 | goto bad_area; | ||
293 | /* a read */ | ||
294 | } else { | ||
295 | /* protection fault */ | ||
296 | if (error_code & 0x08000000) | ||
297 | goto bad_area; | ||
298 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | ||
299 | goto bad_area; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * If for any reason at all we couldn't handle the fault, | ||
304 | * make sure we exit gracefully rather than endlessly redo | ||
305 | * the fault. | ||
306 | */ | ||
307 | survive: | ||
308 | switch (handle_mm_fault(mm, vma, address, is_write)) { | ||
309 | |||
310 | case VM_FAULT_MINOR: | ||
311 | current->min_flt++; | ||
312 | break; | ||
313 | case VM_FAULT_MAJOR: | ||
314 | current->maj_flt++; | ||
315 | break; | ||
316 | case VM_FAULT_SIGBUS: | ||
317 | goto do_sigbus; | ||
318 | case VM_FAULT_OOM: | ||
319 | goto out_of_memory; | ||
320 | default: | ||
321 | BUG(); | ||
322 | } | ||
323 | |||
324 | up_read(&mm->mmap_sem); | ||
325 | return 0; | ||
326 | |||
327 | bad_area: | ||
328 | up_read(&mm->mmap_sem); | ||
329 | |||
330 | bad_area_nosemaphore: | ||
331 | /* User mode accesses cause a SIGSEGV */ | ||
332 | if (user_mode(regs)) { | ||
333 | _exception(SIGSEGV, regs, code, address); | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | if (is_exec && (error_code & DSISR_PROTFAULT) | ||
338 | && printk_ratelimit()) | ||
339 | printk(KERN_CRIT "kernel tried to execute NX-protected" | ||
340 | " page (%lx) - exploit attempt? (uid: %d)\n", | ||
341 | address, current->uid); | ||
342 | |||
343 | return SIGSEGV; | ||
344 | |||
345 | /* | ||
346 | * We ran out of memory, or some other thing happened to us that made | ||
347 | * us unable to handle the page fault gracefully. | ||
348 | */ | ||
349 | out_of_memory: | ||
350 | up_read(&mm->mmap_sem); | ||
351 | if (current->pid == 1) { | ||
352 | yield(); | ||
353 | down_read(&mm->mmap_sem); | ||
354 | goto survive; | ||
355 | } | ||
356 | printk("VM: killing process %s\n", current->comm); | ||
357 | if (user_mode(regs)) | ||
358 | do_exit(SIGKILL); | ||
359 | return SIGKILL; | ||
360 | |||
361 | do_sigbus: | ||
362 | up_read(&mm->mmap_sem); | ||
363 | if (user_mode(regs)) { | ||
364 | info.si_signo = SIGBUS; | ||
365 | info.si_errno = 0; | ||
366 | info.si_code = BUS_ADRERR; | ||
367 | info.si_addr = (void __user *)address; | ||
368 | force_sig_info(SIGBUS, &info, current); | ||
369 | return 0; | ||
370 | } | ||
371 | return SIGBUS; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * bad_page_fault is called when we have a bad access from the kernel. | ||
376 | * It is called from the DSI and ISI handlers in head.S and from some | ||
377 | * of the procedures in traps.c. | ||
378 | */ | ||
379 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | ||
380 | { | ||
381 | const struct exception_table_entry *entry; | ||
382 | |||
383 | /* Are we prepared to handle this fault? */ | ||
384 | if ((entry = search_exception_tables(regs->nip)) != NULL) { | ||
385 | regs->nip = entry->fixup; | ||
386 | return; | ||
387 | } | ||
388 | |||
389 | /* kernel has accessed a bad area */ | ||
390 | die("Kernel access of bad area", regs, sig); | ||
391 | } | ||
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c new file mode 100644 index 000000000000..af9ca0eb6d55 --- /dev/null +++ b/arch/powerpc/mm/fsl_booke_mmu.c | |||
@@ -0,0 +1,237 @@ | |||
1 | /* | ||
2 | * Modifications by Kumar Gala (kumar.gala@freescale.com) to support | ||
3 | * E500 Book E processors. | ||
4 | * | ||
5 | * Copyright 2004 Freescale Semiconductor, Inc | ||
6 | * | ||
7 | * This file contains the routines for initializing the MMU | ||
8 | * on the 4xx series of chips. | ||
9 | * -- paulus | ||
10 | * | ||
11 | * Derived from arch/ppc/mm/init.c: | ||
12 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
13 | * | ||
14 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
15 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
16 | * Copyright (C) 1996 Paul Mackerras | ||
17 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
18 | * | ||
19 | * Derived from "arch/i386/mm/init.c" | ||
20 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or | ||
23 | * modify it under the terms of the GNU General Public License | ||
24 | * as published by the Free Software Foundation; either version | ||
25 | * 2 of the License, or (at your option) any later version. | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include <linux/config.h> | ||
30 | #include <linux/signal.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/string.h> | ||
35 | #include <linux/types.h> | ||
36 | #include <linux/ptrace.h> | ||
37 | #include <linux/mman.h> | ||
38 | #include <linux/mm.h> | ||
39 | #include <linux/swap.h> | ||
40 | #include <linux/stddef.h> | ||
41 | #include <linux/vmalloc.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/delay.h> | ||
44 | #include <linux/highmem.h> | ||
45 | |||
46 | #include <asm/pgalloc.h> | ||
47 | #include <asm/prom.h> | ||
48 | #include <asm/io.h> | ||
49 | #include <asm/mmu_context.h> | ||
50 | #include <asm/pgtable.h> | ||
51 | #include <asm/mmu.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/smp.h> | ||
54 | #include <asm/bootx.h> | ||
55 | #include <asm/machdep.h> | ||
56 | #include <asm/setup.h> | ||
57 | |||
58 | extern void loadcam_entry(unsigned int index); | ||
59 | unsigned int tlbcam_index; | ||
60 | unsigned int num_tlbcam_entries; | ||
61 | static unsigned long __cam0, __cam1, __cam2; | ||
62 | extern unsigned long total_lowmem; | ||
63 | extern unsigned long __max_low_memory; | ||
64 | #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE | ||
65 | |||
66 | #define NUM_TLBCAMS (16) | ||
67 | |||
68 | struct tlbcam { | ||
69 | u32 MAS0; | ||
70 | u32 MAS1; | ||
71 | u32 MAS2; | ||
72 | u32 MAS3; | ||
73 | u32 MAS7; | ||
74 | } TLBCAM[NUM_TLBCAMS]; | ||
75 | |||
76 | struct tlbcamrange { | ||
77 | unsigned long start; | ||
78 | unsigned long limit; | ||
79 | phys_addr_t phys; | ||
80 | } tlbcam_addrs[NUM_TLBCAMS]; | ||
81 | |||
82 | extern unsigned int tlbcam_index; | ||
83 | |||
84 | /* | ||
85 | * Return PA for this VA if it is mapped by a CAM, or 0 | ||
86 | */ | ||
87 | unsigned long v_mapped_by_tlbcam(unsigned long va) | ||
88 | { | ||
89 | int b; | ||
90 | for (b = 0; b < tlbcam_index; ++b) | ||
91 | if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) | ||
92 | return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Return VA for a given PA or 0 if not mapped | ||
98 | */ | ||
99 | unsigned long p_mapped_by_tlbcam(unsigned long pa) | ||
100 | { | ||
101 | int b; | ||
102 | for (b = 0; b < tlbcam_index; ++b) | ||
103 | if (pa >= tlbcam_addrs[b].phys | ||
104 | && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) | ||
105 | +tlbcam_addrs[b].phys) | ||
106 | return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Set up one of the I/D BAT (block address translation) register pairs. | ||
112 | * The parameters are not checked; in particular size must be a power | ||
113 | * of 4 between 4k and 256M. | ||
114 | */ | ||
115 | void settlbcam(int index, unsigned long virt, phys_addr_t phys, | ||
116 | unsigned int size, int flags, unsigned int pid) | ||
117 | { | ||
118 | unsigned int tsize, lz; | ||
119 | |||
120 | asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); | ||
121 | tsize = (21 - lz) / 2; | ||
122 | |||
123 | #ifdef CONFIG_SMP | ||
124 | if ((flags & _PAGE_NO_CACHE) == 0) | ||
125 | flags |= _PAGE_COHERENT; | ||
126 | #endif | ||
127 | |||
128 | TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); | ||
129 | TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); | ||
130 | TLBCAM[index].MAS2 = virt & PAGE_MASK; | ||
131 | |||
132 | TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; | ||
133 | TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; | ||
134 | TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; | ||
135 | TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; | ||
136 | TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; | ||
137 | |||
138 | TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR; | ||
139 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); | ||
140 | |||
141 | #ifndef CONFIG_KGDB /* want user access for breakpoints */ | ||
142 | if (flags & _PAGE_USER) { | ||
143 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | ||
144 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | ||
145 | } | ||
146 | #else | ||
147 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | ||
148 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | ||
149 | #endif | ||
150 | |||
151 | tlbcam_addrs[index].start = virt; | ||
152 | tlbcam_addrs[index].limit = virt + size - 1; | ||
153 | tlbcam_addrs[index].phys = phys; | ||
154 | |||
155 | loadcam_entry(index); | ||
156 | } | ||
157 | |||
158 | void invalidate_tlbcam_entry(int index) | ||
159 | { | ||
160 | TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); | ||
161 | TLBCAM[index].MAS1 = ~MAS1_VALID; | ||
162 | |||
163 | loadcam_entry(index); | ||
164 | } | ||
165 | |||
166 | void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, | ||
167 | unsigned long cam2) | ||
168 | { | ||
169 | settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0); | ||
170 | tlbcam_index++; | ||
171 | if (cam1) { | ||
172 | tlbcam_index++; | ||
173 | settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0); | ||
174 | } | ||
175 | if (cam2) { | ||
176 | tlbcam_index++; | ||
177 | settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * MMU_init_hw does the chip-specific initialization of the MMU hardware. | ||
183 | */ | ||
184 | void __init MMU_init_hw(void) | ||
185 | { | ||
186 | flush_instruction_cache(); | ||
187 | } | ||
188 | |||
189 | unsigned long __init mmu_mapin_ram(void) | ||
190 | { | ||
191 | cam_mapin_ram(__cam0, __cam1, __cam2); | ||
192 | |||
193 | return __cam0 + __cam1 + __cam2; | ||
194 | } | ||
195 | |||
196 | |||
197 | void __init | ||
198 | adjust_total_lowmem(void) | ||
199 | { | ||
200 | unsigned long max_low_mem = MAX_LOW_MEM; | ||
201 | unsigned long cam_max = 0x10000000; | ||
202 | unsigned long ram; | ||
203 | |||
204 | /* adjust CAM size to max_low_mem */ | ||
205 | if (max_low_mem < cam_max) | ||
206 | cam_max = max_low_mem; | ||
207 | |||
208 | /* adjust lowmem size to max_low_mem */ | ||
209 | if (max_low_mem < total_lowmem) | ||
210 | ram = max_low_mem; | ||
211 | else | ||
212 | ram = total_lowmem; | ||
213 | |||
214 | /* Calculate CAM values */ | ||
215 | __cam0 = 1UL << 2 * (__ilog2(ram) / 2); | ||
216 | if (__cam0 > cam_max) | ||
217 | __cam0 = cam_max; | ||
218 | ram -= __cam0; | ||
219 | if (ram) { | ||
220 | __cam1 = 1UL << 2 * (__ilog2(ram) / 2); | ||
221 | if (__cam1 > cam_max) | ||
222 | __cam1 = cam_max; | ||
223 | ram -= __cam1; | ||
224 | } | ||
225 | if (ram) { | ||
226 | __cam2 = 1UL << 2 * (__ilog2(ram) / 2); | ||
227 | if (__cam2 > cam_max) | ||
228 | __cam2 = cam_max; | ||
229 | ram -= __cam2; | ||
230 | } | ||
231 | |||
232 | printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb," | ||
233 | " CAM2=%ldMb residual: %ldMb\n", | ||
234 | __cam0 >> 20, __cam1 >> 20, __cam2 >> 20, | ||
235 | (total_lowmem - __cam0 - __cam1 - __cam2) >> 20); | ||
236 | __max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2; | ||
237 | } | ||
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S new file mode 100644 index 000000000000..57278a8dd132 --- /dev/null +++ b/arch/powerpc/mm/hash_32.S | |||
@@ -0,0 +1,618 @@ | |||
1 | /* | ||
2 | * arch/ppc/kernel/hashtable.S | ||
3 | * | ||
4 | * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ | ||
5 | * | ||
6 | * PowerPC version | ||
7 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
8 | * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP | ||
9 | * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> | ||
10 | * Adapted for Power Macintosh by Paul Mackerras. | ||
11 | * Low-level exception handlers and MMU support | ||
12 | * rewritten by Paul Mackerras. | ||
13 | * Copyright (C) 1996 Paul Mackerras. | ||
14 | * | ||
15 | * This file contains low-level assembler routines for managing | ||
16 | * the PowerPC MMU hash table. (PPC 8xx processors don't use a | ||
17 | * hash table, so this file is not used on them.) | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or | ||
20 | * modify it under the terms of the GNU General Public License | ||
21 | * as published by the Free Software Foundation; either version | ||
22 | * 2 of the License, or (at your option) any later version. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <asm/processor.h> | ||
28 | #include <asm/page.h> | ||
29 | #include <asm/pgtable.h> | ||
30 | #include <asm/cputable.h> | ||
31 | #include <asm/ppc_asm.h> | ||
32 | #include <asm/thread_info.h> | ||
33 | #include <asm/asm-offsets.h> | ||
34 | |||
35 | #ifdef CONFIG_SMP | ||
36 | .comm mmu_hash_lock,4 | ||
37 | #endif /* CONFIG_SMP */ | ||
38 | |||
39 | /* | ||
40 | * Sync CPUs with hash_page taking & releasing the hash | ||
41 | * table lock | ||
42 | */ | ||
43 | #ifdef CONFIG_SMP | ||
44 | .text | ||
45 | _GLOBAL(hash_page_sync) | ||
46 | lis r8,mmu_hash_lock@h | ||
47 | ori r8,r8,mmu_hash_lock@l | ||
48 | lis r0,0x0fff | ||
49 | b 10f | ||
50 | 11: lwz r6,0(r8) | ||
51 | cmpwi 0,r6,0 | ||
52 | bne 11b | ||
53 | 10: lwarx r6,0,r8 | ||
54 | cmpwi 0,r6,0 | ||
55 | bne- 11b | ||
56 | stwcx. r0,0,r8 | ||
57 | bne- 10b | ||
58 | isync | ||
59 | eieio | ||
60 | li r0,0 | ||
61 | stw r0,0(r8) | ||
62 | blr | ||
63 | #endif | ||
64 | |||
65 | /* | ||
66 | * Load a PTE into the hash table, if possible. | ||
67 | * The address is in r4, and r3 contains an access flag: | ||
68 | * _PAGE_RW (0x400) if a write. | ||
69 | * r9 contains the SRR1 value, from which we use the MSR_PR bit. | ||
70 | * SPRG3 contains the physical address of the current task's thread. | ||
71 | * | ||
72 | * Returns to the caller if the access is illegal or there is no | ||
73 | * mapping for the address. Otherwise it places an appropriate PTE | ||
74 | * in the hash table and returns from the exception. | ||
75 | * Uses r0, r3 - r8, ctr, lr. | ||
76 | */ | ||
77 | .text | ||
78 | _GLOBAL(hash_page) | ||
79 | #ifdef CONFIG_PPC64BRIDGE | ||
80 | mfmsr r0 | ||
81 | clrldi r0,r0,1 /* make sure it's in 32-bit mode */ | ||
82 | MTMSRD(r0) | ||
83 | isync | ||
84 | #endif | ||
85 | tophys(r7,0) /* gets -KERNELBASE into r7 */ | ||
86 | #ifdef CONFIG_SMP | ||
87 | addis r8,r7,mmu_hash_lock@h | ||
88 | ori r8,r8,mmu_hash_lock@l | ||
89 | lis r0,0x0fff | ||
90 | b 10f | ||
91 | 11: lwz r6,0(r8) | ||
92 | cmpwi 0,r6,0 | ||
93 | bne 11b | ||
94 | 10: lwarx r6,0,r8 | ||
95 | cmpwi 0,r6,0 | ||
96 | bne- 11b | ||
97 | stwcx. r0,0,r8 | ||
98 | bne- 10b | ||
99 | isync | ||
100 | #endif | ||
101 | /* Get PTE (linux-style) and check access */ | ||
102 | lis r0,KERNELBASE@h /* check if kernel address */ | ||
103 | cmplw 0,r4,r0 | ||
104 | mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ | ||
105 | ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ | ||
106 | lwz r5,PGDIR(r8) /* virt page-table root */ | ||
107 | blt+ 112f /* assume user more likely */ | ||
108 | lis r5,swapper_pg_dir@ha /* if kernel address, use */ | ||
109 | addi r5,r5,swapper_pg_dir@l /* kernel page table */ | ||
110 | rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ | ||
111 | 112: add r5,r5,r7 /* convert to phys addr */ | ||
112 | rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ | ||
113 | lwz r8,0(r5) /* get pmd entry */ | ||
114 | rlwinm. r8,r8,0,0,19 /* extract address of pte page */ | ||
115 | #ifdef CONFIG_SMP | ||
116 | beq- hash_page_out /* return if no mapping */ | ||
117 | #else | ||
118 | /* XXX it seems like the 601 will give a machine fault on the | ||
119 | rfi if its alignment is wrong (bottom 4 bits of address are | ||
120 | 8 or 0xc) and we have had a not-taken conditional branch | ||
121 | to the address following the rfi. */ | ||
122 | beqlr- | ||
123 | #endif | ||
124 | rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ | ||
125 | rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ | ||
126 | ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE | ||
127 | |||
128 | /* | ||
129 | * Update the linux PTE atomically. We do the lwarx up-front | ||
130 | * because almost always, there won't be a permission violation | ||
131 | * and there won't already be an HPTE, and thus we will have | ||
132 | * to update the PTE to set _PAGE_HASHPTE. -- paulus. | ||
133 | */ | ||
134 | retry: | ||
135 | lwarx r6,0,r8 /* get linux-style pte */ | ||
136 | andc. r5,r3,r6 /* check access & ~permission */ | ||
137 | #ifdef CONFIG_SMP | ||
138 | bne- hash_page_out /* return if access not permitted */ | ||
139 | #else | ||
140 | bnelr- | ||
141 | #endif | ||
142 | or r5,r0,r6 /* set accessed/dirty bits */ | ||
143 | stwcx. r5,0,r8 /* attempt to update PTE */ | ||
144 | bne- retry /* retry if someone got there first */ | ||
145 | |||
146 | mfsrin r3,r4 /* get segment reg for segment */ | ||
147 | mfctr r0 | ||
148 | stw r0,_CTR(r11) | ||
149 | bl create_hpte /* add the hash table entry */ | ||
150 | |||
151 | #ifdef CONFIG_SMP | ||
152 | eieio | ||
153 | addis r8,r7,mmu_hash_lock@ha | ||
154 | li r0,0 | ||
155 | stw r0,mmu_hash_lock@l(r8) | ||
156 | #endif | ||
157 | |||
158 | /* Return from the exception */ | ||
159 | lwz r5,_CTR(r11) | ||
160 | mtctr r5 | ||
161 | lwz r0,GPR0(r11) | ||
162 | lwz r7,GPR7(r11) | ||
163 | lwz r8,GPR8(r11) | ||
164 | b fast_exception_return | ||
165 | |||
166 | #ifdef CONFIG_SMP | ||
167 | hash_page_out: | ||
168 | eieio | ||
169 | addis r8,r7,mmu_hash_lock@ha | ||
170 | li r0,0 | ||
171 | stw r0,mmu_hash_lock@l(r8) | ||
172 | blr | ||
173 | #endif /* CONFIG_SMP */ | ||
174 | |||
175 | /* | ||
176 | * Add an entry for a particular page to the hash table. | ||
177 | * | ||
178 | * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval) | ||
179 | * | ||
180 | * We assume any necessary modifications to the pte (e.g. setting | ||
181 | * the accessed bit) have already been done and that there is actually | ||
182 | * a hash table in use (i.e. we're not on a 603). | ||
183 | */ | ||
184 | _GLOBAL(add_hash_page) | ||
185 | mflr r0 | ||
186 | stw r0,4(r1) | ||
187 | |||
188 | /* Convert context and va to VSID */ | ||
189 | mulli r3,r3,897*16 /* multiply context by context skew */ | ||
190 | rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ | ||
191 | mulli r0,r0,0x111 /* multiply by ESID skew */ | ||
192 | add r3,r3,r0 /* note create_hpte trims to 24 bits */ | ||
193 | |||
194 | #ifdef CONFIG_SMP | ||
195 | rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ | ||
196 | lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ | ||
197 | oris r8,r8,12 | ||
198 | #endif /* CONFIG_SMP */ | ||
199 | |||
200 | /* | ||
201 | * We disable interrupts here, even on UP, because we don't | ||
202 | * want to race with hash_page, and because we want the | ||
203 | * _PAGE_HASHPTE bit to be a reliable indication of whether | ||
204 | * the HPTE exists (or at least whether one did once). | ||
205 | * We also turn off the MMU for data accesses so that we | ||
206 | * we can't take a hash table miss (assuming the code is | ||
207 | * covered by a BAT). -- paulus | ||
208 | */ | ||
209 | mfmsr r10 | ||
210 | SYNC | ||
211 | rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ | ||
212 | rlwinm r0,r0,0,28,26 /* clear MSR_DR */ | ||
213 | mtmsr r0 | ||
214 | SYNC_601 | ||
215 | isync | ||
216 | |||
217 | tophys(r7,0) | ||
218 | |||
219 | #ifdef CONFIG_SMP | ||
220 | addis r9,r7,mmu_hash_lock@ha | ||
221 | addi r9,r9,mmu_hash_lock@l | ||
222 | 10: lwarx r0,0,r9 /* take the mmu_hash_lock */ | ||
223 | cmpi 0,r0,0 | ||
224 | bne- 11f | ||
225 | stwcx. r8,0,r9 | ||
226 | beq+ 12f | ||
227 | 11: lwz r0,0(r9) | ||
228 | cmpi 0,r0,0 | ||
229 | beq 10b | ||
230 | b 11b | ||
231 | 12: isync | ||
232 | #endif | ||
233 | |||
234 | /* | ||
235 | * Fetch the linux pte and test and set _PAGE_HASHPTE atomically. | ||
236 | * If _PAGE_HASHPTE was already set, we don't replace the existing | ||
237 | * HPTE, so we just unlock and return. | ||
238 | */ | ||
239 | mr r8,r5 | ||
240 | rlwimi r8,r4,22,20,29 | ||
241 | 1: lwarx r6,0,r8 | ||
242 | andi. r0,r6,_PAGE_HASHPTE | ||
243 | bne 9f /* if HASHPTE already set, done */ | ||
244 | ori r5,r6,_PAGE_HASHPTE | ||
245 | stwcx. r5,0,r8 | ||
246 | bne- 1b | ||
247 | |||
248 | bl create_hpte | ||
249 | |||
250 | 9: | ||
251 | #ifdef CONFIG_SMP | ||
252 | eieio | ||
253 | li r0,0 | ||
254 | stw r0,0(r9) /* clear mmu_hash_lock */ | ||
255 | #endif | ||
256 | |||
257 | /* reenable interrupts and DR */ | ||
258 | mtmsr r10 | ||
259 | SYNC_601 | ||
260 | isync | ||
261 | |||
262 | lwz r0,4(r1) | ||
263 | mtlr r0 | ||
264 | blr | ||
265 | |||
266 | /* | ||
267 | * This routine adds a hardware PTE to the hash table. | ||
268 | * It is designed to be called with the MMU either on or off. | ||
269 | * r3 contains the VSID, r4 contains the virtual address, | ||
270 | * r5 contains the linux PTE, r6 contains the old value of the | ||
271 | * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the | ||
272 | * offset to be added to addresses (0 if the MMU is on, | ||
273 | * -KERNELBASE if it is off). | ||
274 | * On SMP, the caller should have the mmu_hash_lock held. | ||
275 | * We assume that the caller has (or will) set the _PAGE_HASHPTE | ||
276 | * bit in the linux PTE in memory. The value passed in r6 should | ||
277 | * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set | ||
278 | * this routine will skip the search for an existing HPTE. | ||
279 | * This procedure modifies r0, r3 - r6, r8, cr0. | ||
280 | * -- paulus. | ||
281 | * | ||
282 | * For speed, 4 of the instructions get patched once the size and | ||
283 | * physical address of the hash table are known. These definitions | ||
284 | * of Hash_base and Hash_bits below are just an example. | ||
285 | */ | ||
286 | Hash_base = 0xc0180000 | ||
287 | Hash_bits = 12 /* e.g. 256kB hash table */ | ||
288 | Hash_msk = (((1 << Hash_bits) - 1) * 64) | ||
289 | |||
290 | #ifndef CONFIG_PPC64BRIDGE | ||
291 | /* defines for the PTE format for 32-bit PPCs */ | ||
292 | #define PTE_SIZE 8 | ||
293 | #define PTEG_SIZE 64 | ||
294 | #define LG_PTEG_SIZE 6 | ||
295 | #define LDPTEu lwzu | ||
296 | #define STPTE stw | ||
297 | #define CMPPTE cmpw | ||
298 | #define PTE_H 0x40 | ||
299 | #define PTE_V 0x80000000 | ||
300 | #define TST_V(r) rlwinm. r,r,0,0,0 | ||
301 | #define SET_V(r) oris r,r,PTE_V@h | ||
302 | #define CLR_V(r,t) rlwinm r,r,0,1,31 | ||
303 | |||
304 | #else | ||
305 | /* defines for the PTE format for 64-bit PPCs */ | ||
306 | #define PTE_SIZE 16 | ||
307 | #define PTEG_SIZE 128 | ||
308 | #define LG_PTEG_SIZE 7 | ||
309 | #define LDPTEu ldu | ||
310 | #define STPTE std | ||
311 | #define CMPPTE cmpd | ||
312 | #define PTE_H 2 | ||
313 | #define PTE_V 1 | ||
314 | #define TST_V(r) andi. r,r,PTE_V | ||
315 | #define SET_V(r) ori r,r,PTE_V | ||
316 | #define CLR_V(r,t) li t,PTE_V; andc r,r,t | ||
317 | #endif /* CONFIG_PPC64BRIDGE */ | ||
318 | |||
319 | #define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) | ||
320 | #define HASH_RIGHT 31-LG_PTEG_SIZE | ||
321 | |||
322 | _GLOBAL(create_hpte) | ||
323 | /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ | ||
324 | rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ | ||
325 | rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ | ||
326 | and r8,r8,r0 /* writable if _RW & _DIRTY */ | ||
327 | rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ | ||
328 | rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ | ||
329 | ori r8,r8,0xe14 /* clear out reserved bits and M */ | ||
330 | andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ | ||
331 | BEGIN_FTR_SECTION | ||
332 | ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ | ||
333 | END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) | ||
334 | |||
335 | /* Construct the high word of the PPC-style PTE (r5) */ | ||
336 | #ifndef CONFIG_PPC64BRIDGE | ||
337 | rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ | ||
338 | rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ | ||
339 | #else /* CONFIG_PPC64BRIDGE */ | ||
340 | clrlwi r3,r3,8 /* reduce vsid to 24 bits */ | ||
341 | sldi r5,r3,12 /* shift vsid into position */ | ||
342 | rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ | ||
343 | #endif /* CONFIG_PPC64BRIDGE */ | ||
344 | SET_V(r5) /* set V (valid) bit */ | ||
345 | |||
346 | /* Get the address of the primary PTE group in the hash table (r3) */ | ||
347 | _GLOBAL(hash_page_patch_A) | ||
348 | addis r0,r7,Hash_base@h /* base address of hash table */ | ||
349 | rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ | ||
350 | rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ | ||
351 | xor r3,r3,r0 /* make primary hash */ | ||
352 | li r0,8 /* PTEs/group */ | ||
353 | |||
354 | /* | ||
355 | * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search | ||
356 | * if it is clear, meaning that the HPTE isn't there already... | ||
357 | */ | ||
358 | andi. r6,r6,_PAGE_HASHPTE | ||
359 | beq+ 10f /* no PTE: go look for an empty slot */ | ||
360 | tlbie r4 | ||
361 | |||
362 | addis r4,r7,htab_hash_searches@ha | ||
363 | lwz r6,htab_hash_searches@l(r4) | ||
364 | addi r6,r6,1 /* count how many searches we do */ | ||
365 | stw r6,htab_hash_searches@l(r4) | ||
366 | |||
367 | /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ | ||
368 | mtctr r0 | ||
369 | addi r4,r3,-PTE_SIZE | ||
370 | 1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ | ||
371 | CMPPTE 0,r6,r5 | ||
372 | bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ | ||
373 | beq+ found_slot | ||
374 | |||
375 | /* Search the secondary PTEG for a matching PTE */ | ||
376 | ori r5,r5,PTE_H /* set H (secondary hash) bit */ | ||
377 | _GLOBAL(hash_page_patch_B) | ||
378 | xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ | ||
379 | xori r4,r4,(-PTEG_SIZE & 0xffff) | ||
380 | addi r4,r4,-PTE_SIZE | ||
381 | mtctr r0 | ||
382 | 2: LDPTEu r6,PTE_SIZE(r4) | ||
383 | CMPPTE 0,r6,r5 | ||
384 | bdnzf 2,2b | ||
385 | beq+ found_slot | ||
386 | xori r5,r5,PTE_H /* clear H bit again */ | ||
387 | |||
388 | /* Search the primary PTEG for an empty slot */ | ||
389 | 10: mtctr r0 | ||
390 | addi r4,r3,-PTE_SIZE /* search primary PTEG */ | ||
391 | 1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ | ||
392 | TST_V(r6) /* test valid bit */ | ||
393 | bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ | ||
394 | beq+ found_empty | ||
395 | |||
396 | /* update counter of times that the primary PTEG is full */ | ||
397 | addis r4,r7,primary_pteg_full@ha | ||
398 | lwz r6,primary_pteg_full@l(r4) | ||
399 | addi r6,r6,1 | ||
400 | stw r6,primary_pteg_full@l(r4) | ||
401 | |||
402 | /* Search the secondary PTEG for an empty slot */ | ||
403 | ori r5,r5,PTE_H /* set H (secondary hash) bit */ | ||
404 | _GLOBAL(hash_page_patch_C) | ||
405 | xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ | ||
406 | xori r4,r4,(-PTEG_SIZE & 0xffff) | ||
407 | addi r4,r4,-PTE_SIZE | ||
408 | mtctr r0 | ||
409 | 2: LDPTEu r6,PTE_SIZE(r4) | ||
410 | TST_V(r6) | ||
411 | bdnzf 2,2b | ||
412 | beq+ found_empty | ||
413 | xori r5,r5,PTE_H /* clear H bit again */ | ||
414 | |||
415 | /* | ||
416 | * Choose an arbitrary slot in the primary PTEG to overwrite. | ||
417 | * Since both the primary and secondary PTEGs are full, and we | ||
418 | * have no information that the PTEs in the primary PTEG are | ||
419 | * more important or useful than those in the secondary PTEG, | ||
420 | * and we know there is a definite (although small) speed | ||
421 | * advantage to putting the PTE in the primary PTEG, we always | ||
422 | * put the PTE in the primary PTEG. | ||
423 | */ | ||
424 | addis r4,r7,next_slot@ha | ||
425 | lwz r6,next_slot@l(r4) | ||
426 | addi r6,r6,PTE_SIZE | ||
427 | andi. r6,r6,7*PTE_SIZE | ||
428 | stw r6,next_slot@l(r4) | ||
429 | add r4,r3,r6 | ||
430 | |||
431 | #ifndef CONFIG_SMP | ||
432 | /* Store PTE in PTEG */ | ||
433 | found_empty: | ||
434 | STPTE r5,0(r4) | ||
435 | found_slot: | ||
436 | STPTE r8,PTE_SIZE/2(r4) | ||
437 | |||
438 | #else /* CONFIG_SMP */ | ||
439 | /* | ||
440 | * Between the tlbie above and updating the hash table entry below, | ||
441 | * another CPU could read the hash table entry and put it in its TLB. | ||
442 | * There are 3 cases: | ||
443 | * 1. using an empty slot | ||
444 | * 2. updating an earlier entry to change permissions (i.e. enable write) | ||
445 | * 3. taking over the PTE for an unrelated address | ||
446 | * | ||
447 | * In each case it doesn't really matter if the other CPUs have the old | ||
448 | * PTE in their TLB. So we don't need to bother with another tlbie here, | ||
449 | * which is convenient as we've overwritten the register that had the | ||
450 | * address. :-) The tlbie above is mainly to make sure that this CPU comes | ||
451 | * and gets the new PTE from the hash table. | ||
452 | * | ||
453 | * We do however have to make sure that the PTE is never in an invalid | ||
454 | * state with the V bit set. | ||
455 | */ | ||
456 | found_empty: | ||
457 | found_slot: | ||
458 | CLR_V(r5,r0) /* clear V (valid) bit in PTE */ | ||
459 | STPTE r5,0(r4) | ||
460 | sync | ||
461 | TLBSYNC | ||
462 | STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */ | ||
463 | sync | ||
464 | SET_V(r5) | ||
465 | STPTE r5,0(r4) /* finally set V bit in PTE */ | ||
466 | #endif /* CONFIG_SMP */ | ||
467 | |||
468 | sync /* make sure pte updates get to memory */ | ||
469 | blr | ||
470 | |||
471 | .comm next_slot,4 | ||
472 | .comm primary_pteg_full,4 | ||
473 | .comm htab_hash_searches,4 | ||
474 | |||
475 | /* | ||
476 | * Flush the entry for a particular page from the hash table. | ||
477 | * | ||
478 | * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval, | ||
479 | * int count) | ||
480 | * | ||
481 | * We assume that there is a hash table in use (Hash != 0). | ||
482 | */ | ||
483 | _GLOBAL(flush_hash_pages) | ||
484 | tophys(r7,0) | ||
485 | |||
486 | /* | ||
487 | * We disable interrupts here, even on UP, because we want | ||
488 | * the _PAGE_HASHPTE bit to be a reliable indication of | ||
489 | * whether the HPTE exists (or at least whether one did once). | ||
490 | * We also turn off the MMU for data accesses so that we | ||
491 | * we can't take a hash table miss (assuming the code is | ||
492 | * covered by a BAT). -- paulus | ||
493 | */ | ||
494 | mfmsr r10 | ||
495 | SYNC | ||
496 | rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ | ||
497 | rlwinm r0,r0,0,28,26 /* clear MSR_DR */ | ||
498 | mtmsr r0 | ||
499 | SYNC_601 | ||
500 | isync | ||
501 | |||
502 | /* First find a PTE in the range that has _PAGE_HASHPTE set */ | ||
503 | rlwimi r5,r4,22,20,29 | ||
504 | 1: lwz r0,0(r5) | ||
505 | cmpwi cr1,r6,1 | ||
506 | andi. r0,r0,_PAGE_HASHPTE | ||
507 | bne 2f | ||
508 | ble cr1,19f | ||
509 | addi r4,r4,0x1000 | ||
510 | addi r5,r5,4 | ||
511 | addi r6,r6,-1 | ||
512 | b 1b | ||
513 | |||
514 | /* Convert context and va to VSID */ | ||
515 | 2: mulli r3,r3,897*16 /* multiply context by context skew */ | ||
516 | rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ | ||
517 | mulli r0,r0,0x111 /* multiply by ESID skew */ | ||
518 | add r3,r3,r0 /* note code below trims to 24 bits */ | ||
519 | |||
520 | /* Construct the high word of the PPC-style PTE (r11) */ | ||
521 | #ifndef CONFIG_PPC64BRIDGE | ||
522 | rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ | ||
523 | rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ | ||
524 | #else /* CONFIG_PPC64BRIDGE */ | ||
525 | clrlwi r3,r3,8 /* reduce vsid to 24 bits */ | ||
526 | sldi r11,r3,12 /* shift vsid into position */ | ||
527 | rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ | ||
528 | #endif /* CONFIG_PPC64BRIDGE */ | ||
529 | SET_V(r11) /* set V (valid) bit */ | ||
530 | |||
531 | #ifdef CONFIG_SMP | ||
532 | addis r9,r7,mmu_hash_lock@ha | ||
533 | addi r9,r9,mmu_hash_lock@l | ||
534 | rlwinm r8,r1,0,0,18 | ||
535 | add r8,r8,r7 | ||
536 | lwz r8,TI_CPU(r8) | ||
537 | oris r8,r8,9 | ||
538 | 10: lwarx r0,0,r9 | ||
539 | cmpi 0,r0,0 | ||
540 | bne- 11f | ||
541 | stwcx. r8,0,r9 | ||
542 | beq+ 12f | ||
543 | 11: lwz r0,0(r9) | ||
544 | cmpi 0,r0,0 | ||
545 | beq 10b | ||
546 | b 11b | ||
547 | 12: isync | ||
548 | #endif | ||
549 | |||
550 | /* | ||
551 | * Check the _PAGE_HASHPTE bit in the linux PTE. If it is | ||
552 | * already clear, we're done (for this pte). If not, | ||
553 | * clear it (atomically) and proceed. -- paulus. | ||
554 | */ | ||
555 | 33: lwarx r8,0,r5 /* fetch the pte */ | ||
556 | andi. r0,r8,_PAGE_HASHPTE | ||
557 | beq 8f /* done if HASHPTE is already clear */ | ||
558 | rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ | ||
559 | stwcx. r8,0,r5 /* update the pte */ | ||
560 | bne- 33b | ||
561 | |||
562 | /* Get the address of the primary PTE group in the hash table (r3) */ | ||
563 | _GLOBAL(flush_hash_patch_A) | ||
564 | addis r8,r7,Hash_base@h /* base address of hash table */ | ||
565 | rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ | ||
566 | rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ | ||
567 | xor r8,r0,r8 /* make primary hash */ | ||
568 | |||
569 | /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ | ||
570 | li r0,8 /* PTEs/group */ | ||
571 | mtctr r0 | ||
572 | addi r12,r8,-PTE_SIZE | ||
573 | 1: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */ | ||
574 | CMPPTE 0,r0,r11 | ||
575 | bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ | ||
576 | beq+ 3f | ||
577 | |||
578 | /* Search the secondary PTEG for a matching PTE */ | ||
579 | ori r11,r11,PTE_H /* set H (secondary hash) bit */ | ||
580 | li r0,8 /* PTEs/group */ | ||
581 | _GLOBAL(flush_hash_patch_B) | ||
582 | xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ | ||
583 | xori r12,r12,(-PTEG_SIZE & 0xffff) | ||
584 | addi r12,r12,-PTE_SIZE | ||
585 | mtctr r0 | ||
586 | 2: LDPTEu r0,PTE_SIZE(r12) | ||
587 | CMPPTE 0,r0,r11 | ||
588 | bdnzf 2,2b | ||
589 | xori r11,r11,PTE_H /* clear H again */ | ||
590 | bne- 4f /* should rarely fail to find it */ | ||
591 | |||
592 | 3: li r0,0 | ||
593 | STPTE r0,0(r12) /* invalidate entry */ | ||
594 | 4: sync | ||
595 | tlbie r4 /* in hw tlb too */ | ||
596 | sync | ||
597 | |||
598 | 8: ble cr1,9f /* if all ptes checked */ | ||
599 | 81: addi r6,r6,-1 | ||
600 | addi r5,r5,4 /* advance to next pte */ | ||
601 | addi r4,r4,0x1000 | ||
602 | lwz r0,0(r5) /* check next pte */ | ||
603 | cmpwi cr1,r6,1 | ||
604 | andi. r0,r0,_PAGE_HASHPTE | ||
605 | bne 33b | ||
606 | bgt cr1,81b | ||
607 | |||
608 | 9: | ||
609 | #ifdef CONFIG_SMP | ||
610 | TLBSYNC | ||
611 | li r0,0 | ||
612 | stw r0,0(r9) /* clear mmu_hash_lock */ | ||
613 | #endif | ||
614 | |||
615 | 19: mtmsr r10 | ||
616 | SYNC_601 | ||
617 | isync | ||
618 | blr | ||
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c new file mode 100644 index 000000000000..f4d983a6e521 --- /dev/null +++ b/arch/powerpc/mm/init.c | |||
@@ -0,0 +1,581 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) | ||
10 | * | ||
11 | * Derived from "arch/i386/mm/init.c" | ||
12 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/config.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/stddef.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/bootmem.h> | ||
32 | #include <linux/highmem.h> | ||
33 | #include <linux/initrd.h> | ||
34 | #include <linux/pagemap.h> | ||
35 | |||
36 | #include <asm/pgalloc.h> | ||
37 | #include <asm/prom.h> | ||
38 | #include <asm/io.h> | ||
39 | #include <asm/mmu_context.h> | ||
40 | #include <asm/pgtable.h> | ||
41 | #include <asm/mmu.h> | ||
42 | #include <asm/smp.h> | ||
43 | #include <asm/machdep.h> | ||
44 | #include <asm/btext.h> | ||
45 | #include <asm/tlb.h> | ||
46 | #include <asm/bootinfo.h> | ||
47 | #include <asm/prom.h> | ||
48 | |||
49 | #include "mem_pieces.h" | ||
50 | #include "mmu_decl.h" | ||
51 | |||
52 | #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) | ||
53 | /* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ | ||
54 | #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) | ||
55 | #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" | ||
56 | #endif | ||
57 | #endif | ||
58 | #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE | ||
59 | |||
60 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
61 | |||
62 | unsigned long total_memory; | ||
63 | unsigned long total_lowmem; | ||
64 | |||
65 | unsigned long ppc_memstart; | ||
66 | unsigned long ppc_memoffset = PAGE_OFFSET; | ||
67 | |||
68 | int mem_init_done; | ||
69 | int init_bootmem_done; | ||
70 | int boot_mapsize; | ||
71 | #ifdef CONFIG_PPC_PMAC | ||
72 | unsigned long agp_special_page; | ||
73 | #endif | ||
74 | |||
75 | extern char _end[]; | ||
76 | extern char etext[], _stext[]; | ||
77 | extern char __init_begin, __init_end; | ||
78 | |||
79 | #ifdef CONFIG_HIGHMEM | ||
80 | pte_t *kmap_pte; | ||
81 | pgprot_t kmap_prot; | ||
82 | |||
83 | EXPORT_SYMBOL(kmap_prot); | ||
84 | EXPORT_SYMBOL(kmap_pte); | ||
85 | #endif | ||
86 | |||
87 | void MMU_init(void); | ||
88 | void set_phys_avail(unsigned long total_ram); | ||
89 | |||
90 | /* XXX should be in current.h -- paulus */ | ||
91 | extern struct task_struct *current_set[NR_CPUS]; | ||
92 | |||
93 | char *klimit = _end; | ||
94 | struct mem_pieces phys_avail; | ||
95 | struct device_node *memory_node; | ||
96 | |||
97 | /* | ||
98 | * this tells the system to map all of ram with the segregs | ||
99 | * (i.e. page tables) instead of the bats. | ||
100 | * -- Cort | ||
101 | */ | ||
102 | int __map_without_bats; | ||
103 | int __map_without_ltlbs; | ||
104 | |||
105 | /* max amount of RAM to use */ | ||
106 | unsigned long __max_memory; | ||
107 | /* max amount of low RAM to map in */ | ||
108 | unsigned long __max_low_memory = MAX_LOW_MEM; | ||
109 | |||
110 | /* | ||
111 | * Read in a property describing some pieces of memory. | ||
112 | */ | ||
113 | static int __init get_mem_prop(char *name, struct mem_pieces *mp) | ||
114 | { | ||
115 | struct reg_property *rp; | ||
116 | int i, s; | ||
117 | unsigned int *ip; | ||
118 | int nac = prom_n_addr_cells(memory_node); | ||
119 | int nsc = prom_n_size_cells(memory_node); | ||
120 | |||
121 | ip = (unsigned int *) get_property(memory_node, name, &s); | ||
122 | if (ip == NULL) { | ||
123 | printk(KERN_ERR "error: couldn't get %s property on /memory\n", | ||
124 | name); | ||
125 | return 0; | ||
126 | } | ||
127 | s /= (nsc + nac) * 4; | ||
128 | rp = mp->regions; | ||
129 | for (i = 0; i < s; ++i, ip += nac+nsc) { | ||
130 | if (nac >= 2 && ip[nac-2] != 0) | ||
131 | continue; | ||
132 | rp->address = ip[nac-1]; | ||
133 | if (nsc >= 2 && ip[nac+nsc-2] != 0) | ||
134 | rp->size = ~0U; | ||
135 | else | ||
136 | rp->size = ip[nac+nsc-1]; | ||
137 | ++rp; | ||
138 | } | ||
139 | mp->n_regions = rp - mp->regions; | ||
140 | |||
141 | /* Make sure the pieces are sorted. */ | ||
142 | mem_pieces_sort(mp); | ||
143 | mem_pieces_coalesce(mp); | ||
144 | return 1; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Collect information about physical RAM and which pieces are | ||
149 | * already in use from the device tree. | ||
150 | */ | ||
151 | unsigned long __init find_end_of_memory(void) | ||
152 | { | ||
153 | unsigned long a, total; | ||
154 | struct mem_pieces phys_mem; | ||
155 | |||
156 | /* | ||
157 | * Find out where physical memory is, and check that it | ||
158 | * starts at 0 and is contiguous. It seems that RAM is | ||
159 | * always physically contiguous on Power Macintoshes. | ||
160 | * | ||
161 | * Supporting discontiguous physical memory isn't hard, | ||
162 | * it just makes the virtual <-> physical mapping functions | ||
163 | * more complicated (or else you end up wasting space | ||
164 | * in mem_map). | ||
165 | */ | ||
166 | memory_node = find_devices("memory"); | ||
167 | if (memory_node == NULL || !get_mem_prop("reg", &phys_mem) | ||
168 | || phys_mem.n_regions == 0) | ||
169 | panic("No RAM??"); | ||
170 | a = phys_mem.regions[0].address; | ||
171 | if (a != 0) | ||
172 | panic("RAM doesn't start at physical address 0"); | ||
173 | total = phys_mem.regions[0].size; | ||
174 | |||
175 | if (phys_mem.n_regions > 1) { | ||
176 | printk("RAM starting at 0x%x is not contiguous\n", | ||
177 | phys_mem.regions[1].address); | ||
178 | printk("Using RAM from 0 to 0x%lx\n", total-1); | ||
179 | } | ||
180 | |||
181 | return total; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Check for command-line options that affect what MMU_init will do. | ||
186 | */ | ||
187 | void MMU_setup(void) | ||
188 | { | ||
189 | /* Check for nobats option (used in mapin_ram). */ | ||
190 | if (strstr(cmd_line, "nobats")) { | ||
191 | __map_without_bats = 1; | ||
192 | } | ||
193 | |||
194 | if (strstr(cmd_line, "noltlbs")) { | ||
195 | __map_without_ltlbs = 1; | ||
196 | } | ||
197 | |||
198 | /* Look for mem= option on command line */ | ||
199 | if (strstr(cmd_line, "mem=")) { | ||
200 | char *p, *q; | ||
201 | unsigned long maxmem = 0; | ||
202 | |||
203 | for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) { | ||
204 | q = p + 4; | ||
205 | if (p > cmd_line && p[-1] != ' ') | ||
206 | continue; | ||
207 | maxmem = simple_strtoul(q, &q, 0); | ||
208 | if (*q == 'k' || *q == 'K') { | ||
209 | maxmem <<= 10; | ||
210 | ++q; | ||
211 | } else if (*q == 'm' || *q == 'M') { | ||
212 | maxmem <<= 20; | ||
213 | ++q; | ||
214 | } | ||
215 | } | ||
216 | __max_memory = maxmem; | ||
217 | } | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * MMU_init sets up the basic memory mappings for the kernel, | ||
222 | * including both RAM and possibly some I/O regions, | ||
223 | * and sets up the page tables and the MMU hardware ready to go. | ||
224 | */ | ||
225 | void __init MMU_init(void) | ||
226 | { | ||
227 | if (ppc_md.progress) | ||
228 | ppc_md.progress("MMU:enter", 0x111); | ||
229 | |||
230 | /* parse args from command line */ | ||
231 | MMU_setup(); | ||
232 | |||
233 | /* | ||
234 | * Figure out how much memory we have, how much | ||
235 | * is lowmem, and how much is highmem. If we were | ||
236 | * passed the total memory size from the bootloader, | ||
237 | * just use it. | ||
238 | */ | ||
239 | if (boot_mem_size) | ||
240 | total_memory = boot_mem_size; | ||
241 | else | ||
242 | total_memory = ppc_md.find_end_of_memory(); | ||
243 | |||
244 | if (__max_memory && total_memory > __max_memory) | ||
245 | total_memory = __max_memory; | ||
246 | total_lowmem = total_memory; | ||
247 | #ifdef CONFIG_FSL_BOOKE | ||
248 | /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB | ||
249 | * entries, so we need to adjust lowmem to match the amount we can map | ||
250 | * in the fixed entries */ | ||
251 | adjust_total_lowmem(); | ||
252 | #endif /* CONFIG_FSL_BOOKE */ | ||
253 | if (total_lowmem > __max_low_memory) { | ||
254 | total_lowmem = __max_low_memory; | ||
255 | #ifndef CONFIG_HIGHMEM | ||
256 | total_memory = total_lowmem; | ||
257 | #endif /* CONFIG_HIGHMEM */ | ||
258 | } | ||
259 | set_phys_avail(total_lowmem); | ||
260 | |||
261 | /* Initialize the MMU hardware */ | ||
262 | if (ppc_md.progress) | ||
263 | ppc_md.progress("MMU:hw init", 0x300); | ||
264 | MMU_init_hw(); | ||
265 | |||
266 | /* Map in all of RAM starting at KERNELBASE */ | ||
267 | if (ppc_md.progress) | ||
268 | ppc_md.progress("MMU:mapin", 0x301); | ||
269 | mapin_ram(); | ||
270 | |||
271 | #ifdef CONFIG_HIGHMEM | ||
272 | ioremap_base = PKMAP_BASE; | ||
273 | #else | ||
274 | ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ | ||
275 | #endif /* CONFIG_HIGHMEM */ | ||
276 | ioremap_bot = ioremap_base; | ||
277 | |||
278 | /* Map in I/O resources */ | ||
279 | if (ppc_md.progress) | ||
280 | ppc_md.progress("MMU:setio", 0x302); | ||
281 | if (ppc_md.setup_io_mappings) | ||
282 | ppc_md.setup_io_mappings(); | ||
283 | |||
284 | /* Initialize the context management stuff */ | ||
285 | mmu_context_init(); | ||
286 | |||
287 | if (ppc_md.progress) | ||
288 | ppc_md.progress("MMU:exit", 0x211); | ||
289 | |||
290 | #ifdef CONFIG_BOOTX_TEXT | ||
291 | /* By default, we are no longer mapped */ | ||
292 | boot_text_mapped = 0; | ||
293 | /* Must be done last, or ppc_md.progress will die. */ | ||
294 | map_boot_text(); | ||
295 | #endif | ||
296 | } | ||
297 | |||
298 | /* This is only called until mem_init is done. */ | ||
299 | void __init *early_get_page(void) | ||
300 | { | ||
301 | void *p; | ||
302 | |||
303 | if (init_bootmem_done) { | ||
304 | p = alloc_bootmem_pages(PAGE_SIZE); | ||
305 | } else { | ||
306 | p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE); | ||
307 | } | ||
308 | return p; | ||
309 | } | ||
310 | |||
311 | /* Free up now-unused memory */ | ||
312 | static void free_sec(unsigned long start, unsigned long end, const char *name) | ||
313 | { | ||
314 | unsigned long cnt = 0; | ||
315 | |||
316 | while (start < end) { | ||
317 | ClearPageReserved(virt_to_page(start)); | ||
318 | set_page_count(virt_to_page(start), 1); | ||
319 | free_page(start); | ||
320 | cnt++; | ||
321 | start += PAGE_SIZE; | ||
322 | } | ||
323 | if (cnt) { | ||
324 | printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); | ||
325 | totalram_pages += cnt; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | void free_initmem(void) | ||
330 | { | ||
331 | #define FREESEC(TYPE) \ | ||
332 | free_sec((unsigned long)(&__ ## TYPE ## _begin), \ | ||
333 | (unsigned long)(&__ ## TYPE ## _end), \ | ||
334 | #TYPE); | ||
335 | |||
336 | printk ("Freeing unused kernel memory:"); | ||
337 | FREESEC(init); | ||
338 | printk("\n"); | ||
339 | ppc_md.progress = NULL; | ||
340 | #undef FREESEC | ||
341 | } | ||
342 | |||
343 | #ifdef CONFIG_BLK_DEV_INITRD | ||
344 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
345 | { | ||
346 | if (start < end) | ||
347 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | ||
348 | for (; start < end; start += PAGE_SIZE) { | ||
349 | ClearPageReserved(virt_to_page(start)); | ||
350 | set_page_count(virt_to_page(start), 1); | ||
351 | free_page(start); | ||
352 | totalram_pages++; | ||
353 | } | ||
354 | } | ||
355 | #endif | ||
356 | |||
357 | /* | ||
358 | * Initialize the bootmem system and give it all the memory we | ||
359 | * have available. | ||
360 | */ | ||
361 | void __init do_init_bootmem(void) | ||
362 | { | ||
363 | unsigned long start, size; | ||
364 | int i; | ||
365 | |||
366 | /* | ||
367 | * Find an area to use for the bootmem bitmap. | ||
368 | * We look for the first area which is at least | ||
369 | * 128kB in length (128kB is enough for a bitmap | ||
370 | * for 4GB of memory, using 4kB pages), plus 1 page | ||
371 | * (in case the address isn't page-aligned). | ||
372 | */ | ||
373 | start = 0; | ||
374 | size = 0; | ||
375 | for (i = 0; i < phys_avail.n_regions; ++i) { | ||
376 | unsigned long a = phys_avail.regions[i].address; | ||
377 | unsigned long s = phys_avail.regions[i].size; | ||
378 | if (s <= size) | ||
379 | continue; | ||
380 | start = a; | ||
381 | size = s; | ||
382 | if (s >= 33 * PAGE_SIZE) | ||
383 | break; | ||
384 | } | ||
385 | start = PAGE_ALIGN(start); | ||
386 | |||
387 | min_low_pfn = start >> PAGE_SHIFT; | ||
388 | max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT; | ||
389 | max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT; | ||
390 | boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn, | ||
391 | PPC_MEMSTART >> PAGE_SHIFT, | ||
392 | max_low_pfn); | ||
393 | |||
394 | /* remove the bootmem bitmap from the available memory */ | ||
395 | mem_pieces_remove(&phys_avail, start, boot_mapsize, 1); | ||
396 | |||
397 | /* add everything in phys_avail into the bootmem map */ | ||
398 | for (i = 0; i < phys_avail.n_regions; ++i) | ||
399 | free_bootmem(phys_avail.regions[i].address, | ||
400 | phys_avail.regions[i].size); | ||
401 | |||
402 | init_bootmem_done = 1; | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * paging_init() sets up the page tables - in fact we've already done this. | ||
407 | */ | ||
408 | void __init paging_init(void) | ||
409 | { | ||
410 | unsigned long zones_size[MAX_NR_ZONES], i; | ||
411 | |||
412 | #ifdef CONFIG_HIGHMEM | ||
413 | map_page(PKMAP_BASE, 0, 0); /* XXX gross */ | ||
414 | pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k | ||
415 | (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); | ||
416 | map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ | ||
417 | kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k | ||
418 | (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN); | ||
419 | kmap_prot = PAGE_KERNEL; | ||
420 | #endif /* CONFIG_HIGHMEM */ | ||
421 | |||
422 | /* | ||
423 | * All pages are DMA-able so we put them all in the DMA zone. | ||
424 | */ | ||
425 | zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; | ||
426 | for (i = 1; i < MAX_NR_ZONES; i++) | ||
427 | zones_size[i] = 0; | ||
428 | |||
429 | #ifdef CONFIG_HIGHMEM | ||
430 | zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT; | ||
431 | #endif /* CONFIG_HIGHMEM */ | ||
432 | |||
433 | free_area_init(zones_size); | ||
434 | } | ||
435 | |||
436 | void __init mem_init(void) | ||
437 | { | ||
438 | unsigned long addr; | ||
439 | int codepages = 0; | ||
440 | int datapages = 0; | ||
441 | int initpages = 0; | ||
442 | #ifdef CONFIG_HIGHMEM | ||
443 | unsigned long highmem_mapnr; | ||
444 | |||
445 | highmem_mapnr = total_lowmem >> PAGE_SHIFT; | ||
446 | #endif /* CONFIG_HIGHMEM */ | ||
447 | max_mapnr = total_memory >> PAGE_SHIFT; | ||
448 | |||
449 | high_memory = (void *) __va(PPC_MEMSTART + total_lowmem); | ||
450 | num_physpages = max_mapnr; /* RAM is assumed contiguous */ | ||
451 | |||
452 | totalram_pages += free_all_bootmem(); | ||
453 | |||
454 | #ifdef CONFIG_BLK_DEV_INITRD | ||
455 | /* if we are booted from BootX with an initial ramdisk, | ||
456 | make sure the ramdisk pages aren't reserved. */ | ||
457 | if (initrd_start) { | ||
458 | for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE) | ||
459 | ClearPageReserved(virt_to_page(addr)); | ||
460 | } | ||
461 | #endif /* CONFIG_BLK_DEV_INITRD */ | ||
462 | |||
463 | #ifdef CONFIG_PPC_OF | ||
464 | /* mark the RTAS pages as reserved */ | ||
465 | if ( rtas_data ) | ||
466 | for (addr = (ulong)__va(rtas_data); | ||
467 | addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ; | ||
468 | addr += PAGE_SIZE) | ||
469 | SetPageReserved(virt_to_page(addr)); | ||
470 | #endif | ||
471 | #ifdef CONFIG_PPC_PMAC | ||
472 | if (agp_special_page) | ||
473 | SetPageReserved(virt_to_page(agp_special_page)); | ||
474 | #endif | ||
475 | for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory; | ||
476 | addr += PAGE_SIZE) { | ||
477 | if (!PageReserved(virt_to_page(addr))) | ||
478 | continue; | ||
479 | if (addr < (ulong) etext) | ||
480 | codepages++; | ||
481 | else if (addr >= (unsigned long)&__init_begin | ||
482 | && addr < (unsigned long)&__init_end) | ||
483 | initpages++; | ||
484 | else if (addr < (ulong) klimit) | ||
485 | datapages++; | ||
486 | } | ||
487 | |||
488 | #ifdef CONFIG_HIGHMEM | ||
489 | { | ||
490 | unsigned long pfn; | ||
491 | |||
492 | for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { | ||
493 | struct page *page = mem_map + pfn; | ||
494 | |||
495 | ClearPageReserved(page); | ||
496 | set_page_count(page, 1); | ||
497 | __free_page(page); | ||
498 | totalhigh_pages++; | ||
499 | } | ||
500 | totalram_pages += totalhigh_pages; | ||
501 | } | ||
502 | #endif /* CONFIG_HIGHMEM */ | ||
503 | |||
504 | printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", | ||
505 | (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), | ||
506 | codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), | ||
507 | initpages<< (PAGE_SHIFT-10), | ||
508 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); | ||
509 | |||
510 | #ifdef CONFIG_PPC_PMAC | ||
511 | if (agp_special_page) | ||
512 | printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page); | ||
513 | #endif | ||
514 | |||
515 | mem_init_done = 1; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Set phys_avail to the amount of physical memory, | ||
520 | * less the kernel text/data/bss. | ||
521 | */ | ||
522 | void __init | ||
523 | set_phys_avail(unsigned long total_memory) | ||
524 | { | ||
525 | unsigned long kstart, ksize; | ||
526 | |||
527 | /* | ||
528 | * Initially, available physical memory is equivalent to all | ||
529 | * physical memory. | ||
530 | */ | ||
531 | |||
532 | phys_avail.regions[0].address = PPC_MEMSTART; | ||
533 | phys_avail.regions[0].size = total_memory; | ||
534 | phys_avail.n_regions = 1; | ||
535 | |||
536 | /* | ||
537 | * Map out the kernel text/data/bss from the available physical | ||
538 | * memory. | ||
539 | */ | ||
540 | |||
541 | kstart = __pa(_stext); /* should be 0 */ | ||
542 | ksize = PAGE_ALIGN(klimit - _stext); | ||
543 | |||
544 | mem_pieces_remove(&phys_avail, kstart, ksize, 0); | ||
545 | mem_pieces_remove(&phys_avail, 0, 0x4000, 0); | ||
546 | |||
547 | #if defined(CONFIG_BLK_DEV_INITRD) | ||
548 | /* Remove the init RAM disk from the available memory. */ | ||
549 | if (initrd_start) { | ||
550 | mem_pieces_remove(&phys_avail, __pa(initrd_start), | ||
551 | initrd_end - initrd_start, 1); | ||
552 | } | ||
553 | #endif /* CONFIG_BLK_DEV_INITRD */ | ||
554 | #ifdef CONFIG_PPC_OF | ||
555 | /* remove the RTAS pages from the available memory */ | ||
556 | if (rtas_data) | ||
557 | mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1); | ||
558 | #endif | ||
559 | #ifdef CONFIG_PPC_PMAC | ||
560 | /* Because of some uninorth weirdness, we need a page of | ||
561 | * memory as high as possible (it must be outside of the | ||
562 | * bus address seen as the AGP aperture). It will be used | ||
563 | * by the r128 DRM driver | ||
564 | * | ||
565 | * FIXME: We need to make sure that page doesn't overlap any of the\ | ||
566 | * above. This could be done by improving mem_pieces_find to be able | ||
567 | * to do a backward search from the end of the list. | ||
568 | */ | ||
569 | if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) { | ||
570 | agp_special_page = (total_memory - PAGE_SIZE); | ||
571 | mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0); | ||
572 | agp_special_page = (unsigned long)__va(agp_special_page); | ||
573 | } | ||
574 | #endif /* CONFIG_PPC_PMAC */ | ||
575 | } | ||
576 | |||
577 | /* Mark some memory as reserved by removing it from phys_avail. */ | ||
578 | void __init reserve_phys_mem(unsigned long start, unsigned long size) | ||
579 | { | ||
580 | mem_pieces_remove(&phys_avail, start, size, 1); | ||
581 | } | ||
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c new file mode 100644 index 000000000000..81f6745b31ef --- /dev/null +++ b/arch/powerpc/mm/init64.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * | ||
10 | * Derived from "arch/i386/mm/init.c" | ||
11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
12 | * | ||
13 | * Dave Engebretsen <engebret@us.ibm.com> | ||
14 | * Rework for PPC64 port. | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/signal.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/mman.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/stddef.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/bootmem.h> | ||
38 | #include <linux/highmem.h> | ||
39 | #include <linux/idr.h> | ||
40 | #include <linux/nodemask.h> | ||
41 | #include <linux/module.h> | ||
42 | |||
43 | #include <asm/pgalloc.h> | ||
44 | #include <asm/page.h> | ||
45 | #include <asm/prom.h> | ||
46 | #include <asm/lmb.h> | ||
47 | #include <asm/rtas.h> | ||
48 | #include <asm/io.h> | ||
49 | #include <asm/mmu_context.h> | ||
50 | #include <asm/pgtable.h> | ||
51 | #include <asm/mmu.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/smp.h> | ||
54 | #include <asm/machdep.h> | ||
55 | #include <asm/tlb.h> | ||
56 | #include <asm/eeh.h> | ||
57 | #include <asm/processor.h> | ||
58 | #include <asm/mmzone.h> | ||
59 | #include <asm/cputable.h> | ||
60 | #include <asm/ppcdebug.h> | ||
61 | #include <asm/sections.h> | ||
62 | #include <asm/system.h> | ||
63 | #include <asm/iommu.h> | ||
64 | #include <asm/abs_addr.h> | ||
65 | #include <asm/vdso.h> | ||
66 | #include <asm/imalloc.h> | ||
67 | |||
68 | #if PGTABLE_RANGE > USER_VSID_RANGE | ||
69 | #warning Limited user VSID range means pagetable space is wasted | ||
70 | #endif | ||
71 | |||
72 | #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) | ||
73 | #warning TASK_SIZE is smaller than it needs to be. | ||
74 | #endif | ||
75 | |||
76 | int mem_init_done; | ||
77 | unsigned long ioremap_bot = IMALLOC_BASE; | ||
78 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | ||
79 | |||
80 | extern pgd_t swapper_pg_dir[]; | ||
81 | extern struct task_struct *current_set[NR_CPUS]; | ||
82 | |||
83 | unsigned long klimit = (unsigned long)_end; | ||
84 | |||
85 | unsigned long _SDR1=0; | ||
86 | unsigned long _ASR=0; | ||
87 | |||
88 | /* max amount of RAM to use */ | ||
89 | unsigned long __max_memory; | ||
90 | |||
91 | /* info on what we think the IO hole is */ | ||
92 | unsigned long io_hole_start; | ||
93 | unsigned long io_hole_size; | ||
94 | |||
95 | /* | ||
96 | * Do very early mm setup. | ||
97 | */ | ||
98 | void __init mm_init_ppc64(void) | ||
99 | { | ||
100 | #ifndef CONFIG_PPC_ISERIES | ||
101 | unsigned long i; | ||
102 | #endif | ||
103 | |||
104 | ppc64_boot_msg(0x100, "MM Init"); | ||
105 | |||
106 | /* This is the story of the IO hole... please, keep seated, | ||
107 | * unfortunately, we are out of oxygen masks at the moment. | ||
108 | * So we need some rough way to tell where your big IO hole | ||
109 | * is. On pmac, it's between 2G and 4G, on POWER3, it's around | ||
110 | * that area as well, on POWER4 we don't have one, etc... | ||
111 | * We need that as a "hint" when sizing the TCE table on POWER3 | ||
112 | * So far, the simplest way that seem work well enough for us it | ||
113 | * to just assume that the first discontinuity in our physical | ||
114 | * RAM layout is the IO hole. That may not be correct in the future | ||
115 | * (and isn't on iSeries but then we don't care ;) | ||
116 | */ | ||
117 | |||
118 | #ifndef CONFIG_PPC_ISERIES | ||
119 | for (i = 1; i < lmb.memory.cnt; i++) { | ||
120 | unsigned long base, prevbase, prevsize; | ||
121 | |||
122 | prevbase = lmb.memory.region[i-1].base; | ||
123 | prevsize = lmb.memory.region[i-1].size; | ||
124 | base = lmb.memory.region[i].base; | ||
125 | if (base > (prevbase + prevsize)) { | ||
126 | io_hole_start = prevbase + prevsize; | ||
127 | io_hole_size = base - (prevbase + prevsize); | ||
128 | break; | ||
129 | } | ||
130 | } | ||
131 | #endif /* CONFIG_PPC_ISERIES */ | ||
132 | if (io_hole_start) | ||
133 | printk("IO Hole assumed to be %lx -> %lx\n", | ||
134 | io_hole_start, io_hole_start + io_hole_size - 1); | ||
135 | |||
136 | ppc64_boot_msg(0x100, "MM Init Done"); | ||
137 | } | ||
138 | |||
139 | void free_initmem(void) | ||
140 | { | ||
141 | unsigned long addr; | ||
142 | |||
143 | addr = (unsigned long)__init_begin; | ||
144 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | ||
145 | memset((void *)addr, 0xcc, PAGE_SIZE); | ||
146 | ClearPageReserved(virt_to_page(addr)); | ||
147 | set_page_count(virt_to_page(addr), 1); | ||
148 | free_page(addr); | ||
149 | totalram_pages++; | ||
150 | } | ||
151 | printk ("Freeing unused kernel memory: %luk freed\n", | ||
152 | ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); | ||
153 | } | ||
154 | |||
155 | #ifdef CONFIG_BLK_DEV_INITRD | ||
156 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
157 | { | ||
158 | if (start < end) | ||
159 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | ||
160 | for (; start < end; start += PAGE_SIZE) { | ||
161 | ClearPageReserved(virt_to_page(start)); | ||
162 | set_page_count(virt_to_page(start), 1); | ||
163 | free_page(start); | ||
164 | totalram_pages++; | ||
165 | } | ||
166 | } | ||
167 | #endif | ||
168 | |||
169 | /* | ||
170 | * Initialize the bootmem system and give it all the memory we | ||
171 | * have available. | ||
172 | */ | ||
173 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
174 | void __init do_init_bootmem(void) | ||
175 | { | ||
176 | unsigned long i; | ||
177 | unsigned long start, bootmap_pages; | ||
178 | unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
179 | int boot_mapsize; | ||
180 | |||
181 | /* | ||
182 | * Find an area to use for the bootmem bitmap. Calculate the size of | ||
183 | * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. | ||
184 | * Add 1 additional page in case the address isn't page-aligned. | ||
185 | */ | ||
186 | bootmap_pages = bootmem_bootmap_pages(total_pages); | ||
187 | |||
188 | start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | ||
189 | BUG_ON(!start); | ||
190 | |||
191 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); | ||
192 | |||
193 | max_pfn = max_low_pfn; | ||
194 | |||
195 | /* Add all physical memory to the bootmem map, mark each area | ||
196 | * present. | ||
197 | */ | ||
198 | for (i=0; i < lmb.memory.cnt; i++) | ||
199 | free_bootmem(lmb.memory.region[i].base, | ||
200 | lmb_size_bytes(&lmb.memory, i)); | ||
201 | |||
202 | /* reserve the sections we're already using */ | ||
203 | for (i=0; i < lmb.reserved.cnt; i++) | ||
204 | reserve_bootmem(lmb.reserved.region[i].base, | ||
205 | lmb_size_bytes(&lmb.reserved, i)); | ||
206 | |||
207 | for (i=0; i < lmb.memory.cnt; i++) | ||
208 | memory_present(0, lmb_start_pfn(&lmb.memory, i), | ||
209 | lmb_end_pfn(&lmb.memory, i)); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * paging_init() sets up the page tables - in fact we've already done this. | ||
214 | */ | ||
215 | void __init paging_init(void) | ||
216 | { | ||
217 | unsigned long zones_size[MAX_NR_ZONES]; | ||
218 | unsigned long zholes_size[MAX_NR_ZONES]; | ||
219 | unsigned long total_ram = lmb_phys_mem_size(); | ||
220 | unsigned long top_of_ram = lmb_end_of_DRAM(); | ||
221 | |||
222 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | ||
223 | top_of_ram, total_ram); | ||
224 | printk(KERN_INFO "Memory hole size: %ldMB\n", | ||
225 | (top_of_ram - total_ram) >> 20); | ||
226 | /* | ||
227 | * All pages are DMA-able so we put them all in the DMA zone. | ||
228 | */ | ||
229 | memset(zones_size, 0, sizeof(zones_size)); | ||
230 | memset(zholes_size, 0, sizeof(zholes_size)); | ||
231 | |||
232 | zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | ||
233 | zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; | ||
234 | |||
235 | free_area_init_node(0, NODE_DATA(0), zones_size, | ||
236 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); | ||
237 | } | ||
238 | #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ | ||
239 | |||
240 | static struct kcore_list kcore_vmem; | ||
241 | |||
242 | static int __init setup_kcore(void) | ||
243 | { | ||
244 | int i; | ||
245 | |||
246 | for (i=0; i < lmb.memory.cnt; i++) { | ||
247 | unsigned long base, size; | ||
248 | struct kcore_list *kcore_mem; | ||
249 | |||
250 | base = lmb.memory.region[i].base; | ||
251 | size = lmb.memory.region[i].size; | ||
252 | |||
253 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ | ||
254 | kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); | ||
255 | if (!kcore_mem) | ||
256 | panic("mem_init: kmalloc failed\n"); | ||
257 | |||
258 | kclist_add(kcore_mem, __va(base), size); | ||
259 | } | ||
260 | |||
261 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | module_init(setup_kcore); | ||
266 | |||
267 | void __init mem_init(void) | ||
268 | { | ||
269 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
270 | int nid; | ||
271 | #endif | ||
272 | pg_data_t *pgdat; | ||
273 | unsigned long i; | ||
274 | struct page *page; | ||
275 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | ||
276 | |||
277 | num_physpages = max_low_pfn; /* RAM is assumed contiguous */ | ||
278 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | ||
279 | |||
280 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
281 | for_each_online_node(nid) { | ||
282 | if (NODE_DATA(nid)->node_spanned_pages != 0) { | ||
283 | printk("freeing bootmem node %x\n", nid); | ||
284 | totalram_pages += | ||
285 | free_all_bootmem_node(NODE_DATA(nid)); | ||
286 | } | ||
287 | } | ||
288 | #else | ||
289 | max_mapnr = num_physpages; | ||
290 | totalram_pages += free_all_bootmem(); | ||
291 | #endif | ||
292 | |||
293 | for_each_pgdat(pgdat) { | ||
294 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
295 | page = pgdat_page_nr(pgdat, i); | ||
296 | if (PageReserved(page)) | ||
297 | reservedpages++; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; | ||
302 | initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; | ||
303 | datasize = (unsigned long)&_edata - (unsigned long)&__init_end; | ||
304 | bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; | ||
305 | |||
306 | printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " | ||
307 | "%luk reserved, %luk data, %luk bss, %luk init)\n", | ||
308 | (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), | ||
309 | num_physpages << (PAGE_SHIFT-10), | ||
310 | codesize >> 10, | ||
311 | reservedpages << (PAGE_SHIFT-10), | ||
312 | datasize >> 10, | ||
313 | bsssize >> 10, | ||
314 | initsize >> 10); | ||
315 | |||
316 | mem_init_done = 1; | ||
317 | |||
318 | /* Initialize the vDSO */ | ||
319 | vdso_init(); | ||
320 | } | ||
321 | |||
322 | void __iomem * reserve_phb_iospace(unsigned long size) | ||
323 | { | ||
324 | void __iomem *virt_addr; | ||
325 | |||
326 | if (phbs_io_bot >= IMALLOC_BASE) | ||
327 | panic("reserve_phb_iospace(): phb io space overflow\n"); | ||
328 | |||
329 | virt_addr = (void __iomem *) phbs_io_bot; | ||
330 | phbs_io_bot += size; | ||
331 | |||
332 | return virt_addr; | ||
333 | } | ||
334 | |||
335 | static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) | ||
336 | { | ||
337 | memset(addr, 0, kmem_cache_size(cache)); | ||
338 | } | ||
339 | |||
340 | static const int pgtable_cache_size[2] = { | ||
341 | PTE_TABLE_SIZE, PMD_TABLE_SIZE | ||
342 | }; | ||
343 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | ||
344 | "pgd_pte_cache", "pud_pmd_cache", | ||
345 | }; | ||
346 | |||
347 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | ||
348 | |||
349 | void pgtable_cache_init(void) | ||
350 | { | ||
351 | int i; | ||
352 | |||
353 | BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); | ||
354 | BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); | ||
355 | BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); | ||
356 | BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); | ||
357 | |||
358 | for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { | ||
359 | int size = pgtable_cache_size[i]; | ||
360 | const char *name = pgtable_cache_name[i]; | ||
361 | |||
362 | pgtable_cache[i] = kmem_cache_create(name, | ||
363 | size, size, | ||
364 | SLAB_HWCACHE_ALIGN | ||
365 | | SLAB_MUST_HWCACHE_ALIGN, | ||
366 | zero_ctor, | ||
367 | NULL); | ||
368 | if (! pgtable_cache[i]) | ||
369 | panic("pgtable_cache_init(): could not create %s!\n", | ||
370 | name); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | ||
375 | unsigned long size, pgprot_t vma_prot) | ||
376 | { | ||
377 | if (ppc_md.phys_mem_access_prot) | ||
378 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | ||
379 | |||
380 | if (!page_is_ram(addr >> PAGE_SHIFT)) | ||
381 | vma_prot = __pgprot(pgprot_val(vma_prot) | ||
382 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | ||
383 | return vma_prot; | ||
384 | } | ||
385 | EXPORT_SYMBOL(phys_mem_access_prot); | ||
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c new file mode 100644 index 000000000000..345db08e5d20 --- /dev/null +++ b/arch/powerpc/mm/mem.c | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) | ||
10 | * | ||
11 | * Derived from "arch/i386/mm/init.c" | ||
12 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/config.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/stddef.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/bootmem.h> | ||
32 | #include <linux/highmem.h> | ||
33 | #include <linux/initrd.h> | ||
34 | #include <linux/pagemap.h> | ||
35 | |||
36 | #include <asm/pgalloc.h> | ||
37 | #include <asm/prom.h> | ||
38 | #include <asm/io.h> | ||
39 | #include <asm/mmu_context.h> | ||
40 | #include <asm/pgtable.h> | ||
41 | #include <asm/mmu.h> | ||
42 | #include <asm/smp.h> | ||
43 | #include <asm/machdep.h> | ||
44 | #include <asm/btext.h> | ||
45 | #include <asm/tlb.h> | ||
46 | #include <asm/bootinfo.h> | ||
47 | #include <asm/prom.h> | ||
48 | |||
49 | #include "mem_pieces.h" | ||
50 | #include "mmu_decl.h" | ||
51 | |||
52 | #ifndef CPU_FTR_COHERENT_ICACHE | ||
53 | #define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ | ||
54 | #define CPU_FTR_NOEXECUTE 0 | ||
55 | #endif | ||
56 | |||
57 | /* | ||
58 | * This is called by /dev/mem to know if a given address has to | ||
59 | * be mapped non-cacheable or not | ||
60 | */ | ||
61 | int page_is_ram(unsigned long pfn) | ||
62 | { | ||
63 | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
64 | |||
65 | #ifndef CONFIG_PPC64 /* XXX for now */ | ||
66 | return paddr < __pa(high_memory); | ||
67 | #else | ||
68 | int i; | ||
69 | for (i=0; i < lmb.memory.cnt; i++) { | ||
70 | unsigned long base; | ||
71 | |||
72 | base = lmb.memory.region[i].base; | ||
73 | |||
74 | if ((paddr >= base) && | ||
75 | (paddr < (base + lmb.memory.region[i].size))) { | ||
76 | return 1; | ||
77 | } | ||
78 | } | ||
79 | |||
80 | return 0; | ||
81 | #endif | ||
82 | } | ||
83 | EXPORT_SYMBOL(page_is_ram); | ||
84 | |||
85 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | ||
86 | unsigned long size, pgprot_t vma_prot) | ||
87 | { | ||
88 | if (ppc_md.phys_mem_access_prot) | ||
89 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | ||
90 | |||
91 | if (!page_is_ram(addr >> PAGE_SHIFT)) | ||
92 | vma_prot = __pgprot(pgprot_val(vma_prot) | ||
93 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | ||
94 | return vma_prot; | ||
95 | } | ||
96 | EXPORT_SYMBOL(phys_mem_access_prot); | ||
97 | |||
98 | void show_mem(void) | ||
99 | { | ||
100 | unsigned long total = 0, reserved = 0; | ||
101 | unsigned long shared = 0, cached = 0; | ||
102 | unsigned long highmem = 0; | ||
103 | struct page *page; | ||
104 | pg_data_t *pgdat; | ||
105 | unsigned long i; | ||
106 | |||
107 | printk("Mem-info:\n"); | ||
108 | show_free_areas(); | ||
109 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | ||
110 | for_each_pgdat(pgdat) { | ||
111 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
112 | page = pgdat_page_nr(pgdat, i); | ||
113 | total++; | ||
114 | if (PageHighMem(page)) | ||
115 | highmem++; | ||
116 | if (PageReserved(page)) | ||
117 | reserved++; | ||
118 | else if (PageSwapCache(page)) | ||
119 | cached++; | ||
120 | else if (page_count(page)) | ||
121 | shared += page_count(page) - 1; | ||
122 | } | ||
123 | } | ||
124 | printk("%ld pages of RAM\n", total); | ||
125 | #ifdef CONFIG_HIGHMEM | ||
126 | printk("%ld pages of HIGHMEM\n", highmem); | ||
127 | #endif | ||
128 | printk("%ld reserved pages\n", reserved); | ||
129 | printk("%ld pages shared\n", shared); | ||
130 | printk("%ld pages swap cached\n", cached); | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * This is called when a page has been modified by the kernel. | ||
135 | * It just marks the page as not i-cache clean. We do the i-cache | ||
136 | * flush later when the page is given to a user process, if necessary. | ||
137 | */ | ||
138 | void flush_dcache_page(struct page *page) | ||
139 | { | ||
140 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
141 | return; | ||
142 | /* avoid an atomic op if possible */ | ||
143 | if (test_bit(PG_arch_1, &page->flags)) | ||
144 | clear_bit(PG_arch_1, &page->flags); | ||
145 | } | ||
146 | EXPORT_SYMBOL(flush_dcache_page); | ||
147 | |||
148 | void flush_dcache_icache_page(struct page *page) | ||
149 | { | ||
150 | #ifdef CONFIG_BOOKE | ||
151 | void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); | ||
152 | __flush_dcache_icache(start); | ||
153 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | ||
154 | #elif defined(CONFIG_8xx) | ||
155 | /* On 8xx there is no need to kmap since highmem is not supported */ | ||
156 | __flush_dcache_icache(page_address(page)); | ||
157 | #else | ||
158 | __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); | ||
159 | #endif | ||
160 | |||
161 | } | ||
162 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | ||
163 | { | ||
164 | clear_page(page); | ||
165 | |||
166 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
167 | return; | ||
168 | /* | ||
169 | * We shouldnt have to do this, but some versions of glibc | ||
170 | * require it (ld.so assumes zero filled pages are icache clean) | ||
171 | * - Anton | ||
172 | */ | ||
173 | |||
174 | /* avoid an atomic op if possible */ | ||
175 | if (test_bit(PG_arch_1, &pg->flags)) | ||
176 | clear_bit(PG_arch_1, &pg->flags); | ||
177 | } | ||
178 | EXPORT_SYMBOL(clear_user_page); | ||
179 | |||
180 | void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, | ||
181 | struct page *pg) | ||
182 | { | ||
183 | copy_page(vto, vfrom); | ||
184 | |||
185 | /* | ||
186 | * We should be able to use the following optimisation, however | ||
187 | * there are two problems. | ||
188 | * Firstly a bug in some versions of binutils meant PLT sections | ||
189 | * were not marked executable. | ||
190 | * Secondly the first word in the GOT section is blrl, used | ||
191 | * to establish the GOT address. Until recently the GOT was | ||
192 | * not marked executable. | ||
193 | * - Anton | ||
194 | */ | ||
195 | #if 0 | ||
196 | if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) | ||
197 | return; | ||
198 | #endif | ||
199 | |||
200 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
201 | return; | ||
202 | |||
203 | /* avoid an atomic op if possible */ | ||
204 | if (test_bit(PG_arch_1, &pg->flags)) | ||
205 | clear_bit(PG_arch_1, &pg->flags); | ||
206 | } | ||
207 | |||
208 | void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, | ||
209 | unsigned long addr, int len) | ||
210 | { | ||
211 | unsigned long maddr; | ||
212 | |||
213 | maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); | ||
214 | flush_icache_range(maddr, maddr + len); | ||
215 | kunmap(page); | ||
216 | } | ||
217 | EXPORT_SYMBOL(flush_icache_user_range); | ||
218 | |||
219 | /* | ||
220 | * This is called at the end of handling a user page fault, when the | ||
221 | * fault has been handled by updating a PTE in the linux page tables. | ||
222 | * We use it to preload an HPTE into the hash table corresponding to | ||
223 | * the updated linux PTE. | ||
224 | * | ||
225 | * This must always be called with the mm->page_table_lock held | ||
226 | */ | ||
227 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, | ||
228 | pte_t pte) | ||
229 | { | ||
230 | /* handle i-cache coherency */ | ||
231 | unsigned long pfn = pte_pfn(pte); | ||
232 | #ifdef CONFIG_PPC32 | ||
233 | pmd_t *pmd; | ||
234 | #else | ||
235 | unsigned long vsid; | ||
236 | void *pgdir; | ||
237 | pte_t *ptep; | ||
238 | int local = 0; | ||
239 | cpumask_t tmp; | ||
240 | unsigned long flags; | ||
241 | #endif | ||
242 | |||
243 | /* handle i-cache coherency */ | ||
244 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && | ||
245 | !cpu_has_feature(CPU_FTR_NOEXECUTE) && | ||
246 | pfn_valid(pfn)) { | ||
247 | struct page *page = pfn_to_page(pfn); | ||
248 | if (!PageReserved(page) | ||
249 | && !test_bit(PG_arch_1, &page->flags)) { | ||
250 | if (vma->vm_mm == current->active_mm) { | ||
251 | #ifdef CONFIG_8xx | ||
252 | /* On 8xx, cache control instructions (particularly | ||
253 | * "dcbst" from flush_dcache_icache) fault as write | ||
254 | * operation if there is an unpopulated TLB entry | ||
255 | * for the address in question. To workaround that, | ||
256 | * we invalidate the TLB here, thus avoiding dcbst | ||
257 | * misbehaviour. | ||
258 | */ | ||
259 | _tlbie(address); | ||
260 | #endif | ||
261 | __flush_dcache_icache((void *) address); | ||
262 | } else | ||
263 | flush_dcache_icache_page(page); | ||
264 | set_bit(PG_arch_1, &page->flags); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | #ifdef CONFIG_PPC_STD_MMU | ||
269 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | ||
270 | if (!pte_young(pte) || address >= TASK_SIZE) | ||
271 | return; | ||
272 | #ifdef CONFIG_PPC32 | ||
273 | if (Hash == 0) | ||
274 | return; | ||
275 | pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address); | ||
276 | if (!pmd_none(*pmd)) | ||
277 | add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd)); | ||
278 | #else | ||
279 | pgdir = vma->vm_mm->pgd; | ||
280 | if (pgdir == NULL) | ||
281 | return; | ||
282 | |||
283 | ptep = find_linux_pte(pgdir, ea); | ||
284 | if (!ptep) | ||
285 | return; | ||
286 | |||
287 | vsid = get_vsid(vma->vm_mm->context.id, ea); | ||
288 | |||
289 | local_irq_save(flags); | ||
290 | tmp = cpumask_of_cpu(smp_processor_id()); | ||
291 | if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) | ||
292 | local = 1; | ||
293 | |||
294 | __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, | ||
295 | 0x300, local); | ||
296 | local_irq_restore(flags); | ||
297 | #endif | ||
298 | #endif | ||
299 | } | ||
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c new file mode 100644 index 000000000000..ef765a84433f --- /dev/null +++ b/arch/powerpc/mm/mem64.c | |||
@@ -0,0 +1,259 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * | ||
10 | * Derived from "arch/i386/mm/init.c" | ||
11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
12 | * | ||
13 | * Dave Engebretsen <engebret@us.ibm.com> | ||
14 | * Rework for PPC64 port. | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/signal.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/mman.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/stddef.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/bootmem.h> | ||
38 | #include <linux/highmem.h> | ||
39 | #include <linux/idr.h> | ||
40 | #include <linux/nodemask.h> | ||
41 | #include <linux/module.h> | ||
42 | |||
43 | #include <asm/pgalloc.h> | ||
44 | #include <asm/page.h> | ||
45 | #include <asm/prom.h> | ||
46 | #include <asm/lmb.h> | ||
47 | #include <asm/rtas.h> | ||
48 | #include <asm/io.h> | ||
49 | #include <asm/mmu_context.h> | ||
50 | #include <asm/pgtable.h> | ||
51 | #include <asm/mmu.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/smp.h> | ||
54 | #include <asm/machdep.h> | ||
55 | #include <asm/tlb.h> | ||
56 | #include <asm/eeh.h> | ||
57 | #include <asm/processor.h> | ||
58 | #include <asm/mmzone.h> | ||
59 | #include <asm/cputable.h> | ||
60 | #include <asm/ppcdebug.h> | ||
61 | #include <asm/sections.h> | ||
62 | #include <asm/system.h> | ||
63 | #include <asm/iommu.h> | ||
64 | #include <asm/abs_addr.h> | ||
65 | #include <asm/vdso.h> | ||
66 | #include <asm/imalloc.h> | ||
67 | |||
68 | /* | ||
69 | * This is called by /dev/mem to know if a given address has to | ||
70 | * be mapped non-cacheable or not | ||
71 | */ | ||
72 | int page_is_ram(unsigned long pfn) | ||
73 | { | ||
74 | int i; | ||
75 | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
76 | |||
77 | for (i=0; i < lmb.memory.cnt; i++) { | ||
78 | unsigned long base; | ||
79 | |||
80 | base = lmb.memory.region[i].base; | ||
81 | |||
82 | if ((paddr >= base) && | ||
83 | (paddr < (base + lmb.memory.region[i].size))) { | ||
84 | return 1; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | EXPORT_SYMBOL(page_is_ram); | ||
91 | |||
92 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | ||
93 | unsigned long size, pgprot_t vma_prot) | ||
94 | { | ||
95 | if (ppc_md.phys_mem_access_prot) | ||
96 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | ||
97 | |||
98 | if (!page_is_ram(addr >> PAGE_SHIFT)) | ||
99 | vma_prot = __pgprot(pgprot_val(vma_prot) | ||
100 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | ||
101 | return vma_prot; | ||
102 | } | ||
103 | EXPORT_SYMBOL(phys_mem_access_prot); | ||
104 | |||
105 | void show_mem(void) | ||
106 | { | ||
107 | unsigned long total = 0, reserved = 0; | ||
108 | unsigned long shared = 0, cached = 0; | ||
109 | struct page *page; | ||
110 | pg_data_t *pgdat; | ||
111 | unsigned long i; | ||
112 | |||
113 | printk("Mem-info:\n"); | ||
114 | show_free_areas(); | ||
115 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | ||
116 | for_each_pgdat(pgdat) { | ||
117 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
118 | page = pgdat_page_nr(pgdat, i); | ||
119 | total++; | ||
120 | if (PageReserved(page)) | ||
121 | reserved++; | ||
122 | else if (PageSwapCache(page)) | ||
123 | cached++; | ||
124 | else if (page_count(page)) | ||
125 | shared += page_count(page) - 1; | ||
126 | } | ||
127 | } | ||
128 | printk("%ld pages of RAM\n", total); | ||
129 | printk("%ld reserved pages\n", reserved); | ||
130 | printk("%ld pages shared\n", shared); | ||
131 | printk("%ld pages swap cached\n", cached); | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * This is called when a page has been modified by the kernel. | ||
136 | * It just marks the page as not i-cache clean. We do the i-cache | ||
137 | * flush later when the page is given to a user process, if necessary. | ||
138 | */ | ||
139 | void flush_dcache_page(struct page *page) | ||
140 | { | ||
141 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
142 | return; | ||
143 | /* avoid an atomic op if possible */ | ||
144 | if (test_bit(PG_arch_1, &page->flags)) | ||
145 | clear_bit(PG_arch_1, &page->flags); | ||
146 | } | ||
147 | EXPORT_SYMBOL(flush_dcache_page); | ||
148 | |||
149 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | ||
150 | { | ||
151 | clear_page(page); | ||
152 | |||
153 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
154 | return; | ||
155 | /* | ||
156 | * We shouldnt have to do this, but some versions of glibc | ||
157 | * require it (ld.so assumes zero filled pages are icache clean) | ||
158 | * - Anton | ||
159 | */ | ||
160 | |||
161 | /* avoid an atomic op if possible */ | ||
162 | if (test_bit(PG_arch_1, &pg->flags)) | ||
163 | clear_bit(PG_arch_1, &pg->flags); | ||
164 | } | ||
165 | EXPORT_SYMBOL(clear_user_page); | ||
166 | |||
167 | void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, | ||
168 | struct page *pg) | ||
169 | { | ||
170 | copy_page(vto, vfrom); | ||
171 | |||
172 | /* | ||
173 | * We should be able to use the following optimisation, however | ||
174 | * there are two problems. | ||
175 | * Firstly a bug in some versions of binutils meant PLT sections | ||
176 | * were not marked executable. | ||
177 | * Secondly the first word in the GOT section is blrl, used | ||
178 | * to establish the GOT address. Until recently the GOT was | ||
179 | * not marked executable. | ||
180 | * - Anton | ||
181 | */ | ||
182 | #if 0 | ||
183 | if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) | ||
184 | return; | ||
185 | #endif | ||
186 | |||
187 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
188 | return; | ||
189 | |||
190 | /* avoid an atomic op if possible */ | ||
191 | if (test_bit(PG_arch_1, &pg->flags)) | ||
192 | clear_bit(PG_arch_1, &pg->flags); | ||
193 | } | ||
194 | |||
195 | void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, | ||
196 | unsigned long addr, int len) | ||
197 | { | ||
198 | unsigned long maddr; | ||
199 | |||
200 | maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); | ||
201 | flush_icache_range(maddr, maddr + len); | ||
202 | } | ||
203 | EXPORT_SYMBOL(flush_icache_user_range); | ||
204 | |||
205 | /* | ||
206 | * This is called at the end of handling a user page fault, when the | ||
207 | * fault has been handled by updating a PTE in the linux page tables. | ||
208 | * We use it to preload an HPTE into the hash table corresponding to | ||
209 | * the updated linux PTE. | ||
210 | * | ||
211 | * This must always be called with the mm->page_table_lock held | ||
212 | */ | ||
213 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, | ||
214 | pte_t pte) | ||
215 | { | ||
216 | unsigned long vsid; | ||
217 | void *pgdir; | ||
218 | pte_t *ptep; | ||
219 | int local = 0; | ||
220 | cpumask_t tmp; | ||
221 | unsigned long flags; | ||
222 | |||
223 | /* handle i-cache coherency */ | ||
224 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && | ||
225 | !cpu_has_feature(CPU_FTR_NOEXECUTE)) { | ||
226 | unsigned long pfn = pte_pfn(pte); | ||
227 | if (pfn_valid(pfn)) { | ||
228 | struct page *page = pfn_to_page(pfn); | ||
229 | if (!PageReserved(page) | ||
230 | && !test_bit(PG_arch_1, &page->flags)) { | ||
231 | __flush_dcache_icache(page_address(page)); | ||
232 | set_bit(PG_arch_1, &page->flags); | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | |||
237 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | ||
238 | if (!pte_young(pte)) | ||
239 | return; | ||
240 | |||
241 | pgdir = vma->vm_mm->pgd; | ||
242 | if (pgdir == NULL) | ||
243 | return; | ||
244 | |||
245 | ptep = find_linux_pte(pgdir, ea); | ||
246 | if (!ptep) | ||
247 | return; | ||
248 | |||
249 | vsid = get_vsid(vma->vm_mm->context.id, ea); | ||
250 | |||
251 | local_irq_save(flags); | ||
252 | tmp = cpumask_of_cpu(smp_processor_id()); | ||
253 | if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) | ||
254 | local = 1; | ||
255 | |||
256 | __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, | ||
257 | 0x300, local); | ||
258 | local_irq_restore(flags); | ||
259 | } | ||
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c new file mode 100644 index 000000000000..3d639052017e --- /dev/null +++ b/arch/powerpc/mm/mem_pieces.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> | ||
3 | * Changes to accommodate Power Macintoshes. | ||
4 | * Cort Dougan <cort@cs.nmt.edu> | ||
5 | * Rewrites. | ||
6 | * Grant Erickson <grant@lcse.umn.edu> | ||
7 | * General rework and split from mm/init.c. | ||
8 | * | ||
9 | * Module name: mem_pieces.c | ||
10 | * | ||
11 | * Description: | ||
12 | * Routines and data structures for manipulating and representing | ||
13 | * phyiscal memory extents (i.e. address/length pairs). | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/stddef.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <asm/page.h> | ||
22 | |||
23 | #include "mem_pieces.h" | ||
24 | |||
25 | extern struct mem_pieces phys_avail; | ||
26 | |||
27 | static void mem_pieces_print(struct mem_pieces *); | ||
28 | |||
29 | /* | ||
30 | * Scan a region for a piece of a given size with the required alignment. | ||
31 | */ | ||
32 | void __init * | ||
33 | mem_pieces_find(unsigned int size, unsigned int align) | ||
34 | { | ||
35 | int i; | ||
36 | unsigned a, e; | ||
37 | struct mem_pieces *mp = &phys_avail; | ||
38 | |||
39 | for (i = 0; i < mp->n_regions; ++i) { | ||
40 | a = mp->regions[i].address; | ||
41 | e = a + mp->regions[i].size; | ||
42 | a = (a + align - 1) & -align; | ||
43 | if (a + size <= e) { | ||
44 | mem_pieces_remove(mp, a, size, 1); | ||
45 | return (void *) __va(a); | ||
46 | } | ||
47 | } | ||
48 | panic("Couldn't find %u bytes at %u alignment\n", size, align); | ||
49 | |||
50 | return NULL; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Remove some memory from an array of pieces | ||
55 | */ | ||
56 | void __init | ||
57 | mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size, | ||
58 | int must_exist) | ||
59 | { | ||
60 | int i, j; | ||
61 | unsigned int end, rs, re; | ||
62 | struct reg_property *rp; | ||
63 | |||
64 | end = start + size; | ||
65 | for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) { | ||
66 | if (end > rp->address && start < rp->address + rp->size) | ||
67 | break; | ||
68 | } | ||
69 | if (i >= mp->n_regions) { | ||
70 | if (must_exist) | ||
71 | printk("mem_pieces_remove: [%x,%x) not in any region\n", | ||
72 | start, end); | ||
73 | return; | ||
74 | } | ||
75 | for (; i < mp->n_regions && end > rp->address; ++i, ++rp) { | ||
76 | rs = rp->address; | ||
77 | re = rs + rp->size; | ||
78 | if (must_exist && (start < rs || end > re)) { | ||
79 | printk("mem_pieces_remove: bad overlap [%x,%x) with", | ||
80 | start, end); | ||
81 | mem_pieces_print(mp); | ||
82 | must_exist = 0; | ||
83 | } | ||
84 | if (start > rs) { | ||
85 | rp->size = start - rs; | ||
86 | if (end < re) { | ||
87 | /* need to split this entry */ | ||
88 | if (mp->n_regions >= MEM_PIECES_MAX) | ||
89 | panic("eek... mem_pieces overflow"); | ||
90 | for (j = mp->n_regions; j > i + 1; --j) | ||
91 | mp->regions[j] = mp->regions[j-1]; | ||
92 | ++mp->n_regions; | ||
93 | rp[1].address = end; | ||
94 | rp[1].size = re - end; | ||
95 | } | ||
96 | } else { | ||
97 | if (end < re) { | ||
98 | rp->address = end; | ||
99 | rp->size = re - end; | ||
100 | } else { | ||
101 | /* need to delete this entry */ | ||
102 | for (j = i; j < mp->n_regions - 1; ++j) | ||
103 | mp->regions[j] = mp->regions[j+1]; | ||
104 | --mp->n_regions; | ||
105 | --i; | ||
106 | --rp; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | static void __init | ||
113 | mem_pieces_print(struct mem_pieces *mp) | ||
114 | { | ||
115 | int i; | ||
116 | |||
117 | for (i = 0; i < mp->n_regions; ++i) | ||
118 | printk(" [%x, %x)", mp->regions[i].address, | ||
119 | mp->regions[i].address + mp->regions[i].size); | ||
120 | printk("\n"); | ||
121 | } | ||
122 | |||
123 | void __init | ||
124 | mem_pieces_sort(struct mem_pieces *mp) | ||
125 | { | ||
126 | unsigned long a, s; | ||
127 | int i, j; | ||
128 | |||
129 | for (i = 1; i < mp->n_regions; ++i) { | ||
130 | a = mp->regions[i].address; | ||
131 | s = mp->regions[i].size; | ||
132 | for (j = i - 1; j >= 0; --j) { | ||
133 | if (a >= mp->regions[j].address) | ||
134 | break; | ||
135 | mp->regions[j+1] = mp->regions[j]; | ||
136 | } | ||
137 | mp->regions[j+1].address = a; | ||
138 | mp->regions[j+1].size = s; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | void __init | ||
143 | mem_pieces_coalesce(struct mem_pieces *mp) | ||
144 | { | ||
145 | unsigned long a, s, ns; | ||
146 | int i, j, d; | ||
147 | |||
148 | d = 0; | ||
149 | for (i = 0; i < mp->n_regions; i = j) { | ||
150 | a = mp->regions[i].address; | ||
151 | s = mp->regions[i].size; | ||
152 | for (j = i + 1; j < mp->n_regions | ||
153 | && mp->regions[j].address - a <= s; ++j) { | ||
154 | ns = mp->regions[j].address + mp->regions[j].size - a; | ||
155 | if (ns > s) | ||
156 | s = ns; | ||
157 | } | ||
158 | mp->regions[d].address = a; | ||
159 | mp->regions[d].size = s; | ||
160 | ++d; | ||
161 | } | ||
162 | mp->n_regions = d; | ||
163 | } | ||
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h new file mode 100644 index 000000000000..e2b700dc7f18 --- /dev/null +++ b/arch/powerpc/mm/mem_pieces.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> | ||
3 | * Changes to accommodate Power Macintoshes. | ||
4 | * Cort Dougan <cort@cs.nmt.edu> | ||
5 | * Rewrites. | ||
6 | * Grant Erickson <grant@lcse.umn.edu> | ||
7 | * General rework and split from mm/init.c. | ||
8 | * | ||
9 | * Module name: mem_pieces.h | ||
10 | * | ||
11 | * Description: | ||
12 | * Routines and data structures for manipulating and representing | ||
13 | * phyiscal memory extents (i.e. address/length pairs). | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #ifndef __MEM_PIECES_H__ | ||
18 | #define __MEM_PIECES_H__ | ||
19 | |||
20 | #include <asm/prom.h> | ||
21 | |||
22 | #ifdef __cplusplus | ||
23 | extern "C" { | ||
24 | #endif | ||
25 | |||
26 | |||
27 | /* Type Definitions */ | ||
28 | |||
29 | #define MEM_PIECES_MAX 32 | ||
30 | |||
31 | struct mem_pieces { | ||
32 | int n_regions; | ||
33 | struct reg_property regions[MEM_PIECES_MAX]; | ||
34 | }; | ||
35 | |||
36 | /* Function Prototypes */ | ||
37 | |||
38 | extern void *mem_pieces_find(unsigned int size, unsigned int align); | ||
39 | extern void mem_pieces_remove(struct mem_pieces *mp, unsigned int start, | ||
40 | unsigned int size, int must_exist); | ||
41 | extern void mem_pieces_coalesce(struct mem_pieces *mp); | ||
42 | extern void mem_pieces_sort(struct mem_pieces *mp); | ||
43 | |||
44 | #ifdef __cplusplus | ||
45 | } | ||
46 | #endif | ||
47 | |||
48 | #endif /* __MEM_PIECES_H__ */ | ||
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c new file mode 100644 index 000000000000..a8816e0f6a86 --- /dev/null +++ b/arch/powerpc/mm/mmu_context.c | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * This file contains the routines for handling the MMU on those | ||
3 | * PowerPC implementations where the MMU substantially follows the | ||
4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, | ||
5 | * 8260, and POWER3 implementations but excludes the 8xx and 4xx. | ||
6 | * -- paulus | ||
7 | * | ||
8 | * Derived from arch/ppc/mm/init.c: | ||
9 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
10 | * | ||
11 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
12 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
13 | * Copyright (C) 1996 Paul Mackerras | ||
14 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
15 | * | ||
16 | * Derived from "arch/i386/mm/init.c" | ||
17 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or | ||
20 | * modify it under the terms of the GNU General Public License | ||
21 | * as published by the Free Software Foundation; either version | ||
22 | * 2 of the License, or (at your option) any later version. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/init.h> | ||
29 | |||
30 | #include <asm/mmu_context.h> | ||
31 | #include <asm/tlbflush.h> | ||
32 | |||
33 | mm_context_t next_mmu_context; | ||
34 | unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; | ||
35 | #ifdef FEW_CONTEXTS | ||
36 | atomic_t nr_free_contexts; | ||
37 | struct mm_struct *context_mm[LAST_CONTEXT+1]; | ||
38 | void steal_context(void); | ||
39 | #endif /* FEW_CONTEXTS */ | ||
40 | |||
41 | /* | ||
42 | * Initialize the context management stuff. | ||
43 | */ | ||
44 | void __init | ||
45 | mmu_context_init(void) | ||
46 | { | ||
47 | /* | ||
48 | * Some processors have too few contexts to reserve one for | ||
49 | * init_mm, and require using context 0 for a normal task. | ||
50 | * Other processors reserve the use of context zero for the kernel. | ||
51 | * This code assumes FIRST_CONTEXT < 32. | ||
52 | */ | ||
53 | context_map[0] = (1 << FIRST_CONTEXT) - 1; | ||
54 | next_mmu_context = FIRST_CONTEXT; | ||
55 | #ifdef FEW_CONTEXTS | ||
56 | atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); | ||
57 | #endif /* FEW_CONTEXTS */ | ||
58 | } | ||
59 | |||
60 | #ifdef FEW_CONTEXTS | ||
61 | /* | ||
62 | * Steal a context from a task that has one at the moment. | ||
63 | * This is only used on 8xx and 4xx and we presently assume that | ||
64 | * they don't do SMP. If they do then this will have to check | ||
65 | * whether the MM we steal is in use. | ||
66 | * We also assume that this is only used on systems that don't | ||
67 | * use an MMU hash table - this is true for 8xx and 4xx. | ||
68 | * This isn't an LRU system, it just frees up each context in | ||
69 | * turn (sort-of pseudo-random replacement :). This would be the | ||
70 | * place to implement an LRU scheme if anyone was motivated to do it. | ||
71 | * -- paulus | ||
72 | */ | ||
73 | void | ||
74 | steal_context(void) | ||
75 | { | ||
76 | struct mm_struct *mm; | ||
77 | |||
78 | /* free up context `next_mmu_context' */ | ||
79 | /* if we shouldn't free context 0, don't... */ | ||
80 | if (next_mmu_context < FIRST_CONTEXT) | ||
81 | next_mmu_context = FIRST_CONTEXT; | ||
82 | mm = context_mm[next_mmu_context]; | ||
83 | flush_tlb_mm(mm); | ||
84 | destroy_context(mm); | ||
85 | } | ||
86 | #endif /* FEW_CONTEXTS */ | ||
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c new file mode 100644 index 000000000000..714a84dd8d5d --- /dev/null +++ b/arch/powerpc/mm/mmu_context64.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * MMU context allocation for 64-bit kernels. | ||
3 | * | ||
4 | * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/idr.h> | ||
22 | |||
23 | #include <asm/mmu_context.h> | ||
24 | |||
25 | static DEFINE_SPINLOCK(mmu_context_lock); | ||
26 | static DEFINE_IDR(mmu_context_idr); | ||
27 | |||
28 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
29 | { | ||
30 | int index; | ||
31 | int err; | ||
32 | |||
33 | again: | ||
34 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | ||
35 | return -ENOMEM; | ||
36 | |||
37 | spin_lock(&mmu_context_lock); | ||
38 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | ||
39 | spin_unlock(&mmu_context_lock); | ||
40 | |||
41 | if (err == -EAGAIN) | ||
42 | goto again; | ||
43 | else if (err) | ||
44 | return err; | ||
45 | |||
46 | if (index > MAX_CONTEXT) { | ||
47 | idr_remove(&mmu_context_idr, index); | ||
48 | return -ENOMEM; | ||
49 | } | ||
50 | |||
51 | mm->context.id = index; | ||
52 | |||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | void destroy_context(struct mm_struct *mm) | ||
57 | { | ||
58 | spin_lock(&mmu_context_lock); | ||
59 | idr_remove(&mmu_context_idr, mm->context.id); | ||
60 | spin_unlock(&mmu_context_lock); | ||
61 | |||
62 | mm->context.id = NO_CONTEXT; | ||
63 | } | ||
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h new file mode 100644 index 000000000000..540f3292b229 --- /dev/null +++ b/arch/powerpc/mm/mmu_decl.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * Declarations of procedures and variables shared between files | ||
3 | * in arch/ppc/mm/. | ||
4 | * | ||
5 | * Derived from arch/ppc/mm/init.c: | ||
6 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
7 | * | ||
8 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
9 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
10 | * Copyright (C) 1996 Paul Mackerras | ||
11 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
12 | * | ||
13 | * Derived from "arch/i386/mm/init.c" | ||
14 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | #include <asm/tlbflush.h> | ||
23 | #include <asm/mmu.h> | ||
24 | |||
25 | extern void mapin_ram(void); | ||
26 | extern int map_page(unsigned long va, phys_addr_t pa, int flags); | ||
27 | extern void setbat(int index, unsigned long virt, unsigned long phys, | ||
28 | unsigned int size, int flags); | ||
29 | extern void reserve_phys_mem(unsigned long start, unsigned long size); | ||
30 | extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, | ||
31 | unsigned int size, int flags, unsigned int pid); | ||
32 | extern void invalidate_tlbcam_entry(int index); | ||
33 | |||
34 | extern int __map_without_bats; | ||
35 | extern unsigned long ioremap_base; | ||
36 | extern unsigned long ioremap_bot; | ||
37 | extern unsigned int rtas_data, rtas_size; | ||
38 | |||
39 | extern unsigned long total_memory; | ||
40 | extern unsigned long total_lowmem; | ||
41 | extern int mem_init_done; | ||
42 | |||
43 | extern PTE *Hash, *Hash_end; | ||
44 | extern unsigned long Hash_size, Hash_mask; | ||
45 | |||
46 | extern unsigned int num_tlbcam_entries; | ||
47 | |||
48 | /* ...and now those things that may be slightly different between processor | ||
49 | * architectures. -- Dan | ||
50 | */ | ||
51 | #if defined(CONFIG_8xx) | ||
52 | #define flush_HPTE(X, va, pg) _tlbie(va) | ||
53 | #define MMU_init_hw() do { } while(0) | ||
54 | #define mmu_mapin_ram() (0UL) | ||
55 | |||
56 | #elif defined(CONFIG_4xx) | ||
57 | #define flush_HPTE(X, va, pg) _tlbie(va) | ||
58 | extern void MMU_init_hw(void); | ||
59 | extern unsigned long mmu_mapin_ram(void); | ||
60 | |||
61 | #elif defined(CONFIG_FSL_BOOKE) | ||
62 | #define flush_HPTE(X, va, pg) _tlbie(va) | ||
63 | extern void MMU_init_hw(void); | ||
64 | extern unsigned long mmu_mapin_ram(void); | ||
65 | extern void adjust_total_lowmem(void); | ||
66 | |||
67 | #else | ||
68 | /* anything except 4xx or 8xx */ | ||
69 | extern void MMU_init_hw(void); | ||
70 | extern unsigned long mmu_mapin_ram(void); | ||
71 | |||
72 | /* Be careful....this needs to be updated if we ever encounter 603 SMPs, | ||
73 | * which includes all new 82xx processors. We need tlbie/tlbsync here | ||
74 | * in that case (I think). -- Dan. | ||
75 | */ | ||
76 | static inline void flush_HPTE(unsigned context, unsigned long va, | ||
77 | unsigned long pdval) | ||
78 | { | ||
79 | if ((Hash != 0) && | ||
80 | cpu_has_feature(CPU_FTR_HPTE_TABLE)) | ||
81 | flush_hash_pages(0, va, pdval, 1); | ||
82 | else | ||
83 | _tlbie(va); | ||
84 | } | ||
85 | #endif | ||
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c new file mode 100644 index 000000000000..81a3d7446d37 --- /dev/null +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -0,0 +1,470 @@ | |||
1 | /* | ||
2 | * This file contains the routines setting up the linux page tables. | ||
3 | * -- paulus | ||
4 | * | ||
5 | * Derived from arch/ppc/mm/init.c: | ||
6 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
7 | * | ||
8 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
9 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
10 | * Copyright (C) 1996 Paul Mackerras | ||
11 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
12 | * | ||
13 | * Derived from "arch/i386/mm/init.c" | ||
14 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/vmalloc.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/highmem.h> | ||
31 | |||
32 | #include <asm/pgtable.h> | ||
33 | #include <asm/pgalloc.h> | ||
34 | #include <asm/io.h> | ||
35 | |||
36 | #include "mmu_decl.h" | ||
37 | |||
38 | unsigned long ioremap_base; | ||
39 | unsigned long ioremap_bot; | ||
40 | int io_bat_index; | ||
41 | |||
42 | #if defined(CONFIG_6xx) || defined(CONFIG_POWER3) | ||
43 | #define HAVE_BATS 1 | ||
44 | #endif | ||
45 | |||
46 | #if defined(CONFIG_FSL_BOOKE) | ||
47 | #define HAVE_TLBCAM 1 | ||
48 | #endif | ||
49 | |||
50 | extern char etext[], _stext[]; | ||
51 | |||
52 | #ifdef CONFIG_SMP | ||
53 | extern void hash_page_sync(void); | ||
54 | #endif | ||
55 | |||
56 | #ifdef HAVE_BATS | ||
57 | extern unsigned long v_mapped_by_bats(unsigned long va); | ||
58 | extern unsigned long p_mapped_by_bats(unsigned long pa); | ||
59 | void setbat(int index, unsigned long virt, unsigned long phys, | ||
60 | unsigned int size, int flags); | ||
61 | |||
62 | #else /* !HAVE_BATS */ | ||
63 | #define v_mapped_by_bats(x) (0UL) | ||
64 | #define p_mapped_by_bats(x) (0UL) | ||
65 | #endif /* HAVE_BATS */ | ||
66 | |||
67 | #ifdef HAVE_TLBCAM | ||
68 | extern unsigned int tlbcam_index; | ||
69 | extern unsigned long v_mapped_by_tlbcam(unsigned long va); | ||
70 | extern unsigned long p_mapped_by_tlbcam(unsigned long pa); | ||
71 | #else /* !HAVE_TLBCAM */ | ||
72 | #define v_mapped_by_tlbcam(x) (0UL) | ||
73 | #define p_mapped_by_tlbcam(x) (0UL) | ||
74 | #endif /* HAVE_TLBCAM */ | ||
75 | |||
76 | #ifdef CONFIG_PTE_64BIT | ||
77 | /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ | ||
78 | #define PGDIR_ORDER 1 | ||
79 | #else | ||
80 | #define PGDIR_ORDER 0 | ||
81 | #endif | ||
82 | |||
83 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
84 | { | ||
85 | pgd_t *ret; | ||
86 | |||
87 | ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); | ||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | void pgd_free(pgd_t *pgd) | ||
92 | { | ||
93 | free_pages((unsigned long)pgd, PGDIR_ORDER); | ||
94 | } | ||
95 | |||
96 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
97 | { | ||
98 | pte_t *pte; | ||
99 | extern int mem_init_done; | ||
100 | extern void *early_get_page(void); | ||
101 | |||
102 | if (mem_init_done) { | ||
103 | pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | ||
104 | } else { | ||
105 | pte = (pte_t *)early_get_page(); | ||
106 | if (pte) | ||
107 | clear_page(pte); | ||
108 | } | ||
109 | return pte; | ||
110 | } | ||
111 | |||
112 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
113 | { | ||
114 | struct page *ptepage; | ||
115 | |||
116 | #ifdef CONFIG_HIGHPTE | ||
117 | int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; | ||
118 | #else | ||
119 | int flags = GFP_KERNEL | __GFP_REPEAT; | ||
120 | #endif | ||
121 | |||
122 | ptepage = alloc_pages(flags, 0); | ||
123 | if (ptepage) | ||
124 | clear_highpage(ptepage); | ||
125 | return ptepage; | ||
126 | } | ||
127 | |||
128 | void pte_free_kernel(pte_t *pte) | ||
129 | { | ||
130 | #ifdef CONFIG_SMP | ||
131 | hash_page_sync(); | ||
132 | #endif | ||
133 | free_page((unsigned long)pte); | ||
134 | } | ||
135 | |||
136 | void pte_free(struct page *ptepage) | ||
137 | { | ||
138 | #ifdef CONFIG_SMP | ||
139 | hash_page_sync(); | ||
140 | #endif | ||
141 | __free_page(ptepage); | ||
142 | } | ||
143 | |||
144 | #ifndef CONFIG_PHYS_64BIT | ||
145 | void __iomem * | ||
146 | ioremap(phys_addr_t addr, unsigned long size) | ||
147 | { | ||
148 | return __ioremap(addr, size, _PAGE_NO_CACHE); | ||
149 | } | ||
150 | #else /* CONFIG_PHYS_64BIT */ | ||
151 | void __iomem * | ||
152 | ioremap64(unsigned long long addr, unsigned long size) | ||
153 | { | ||
154 | return __ioremap(addr, size, _PAGE_NO_CACHE); | ||
155 | } | ||
156 | |||
157 | void __iomem * | ||
158 | ioremap(phys_addr_t addr, unsigned long size) | ||
159 | { | ||
160 | phys_addr_t addr64 = fixup_bigphys_addr(addr, size); | ||
161 | |||
162 | return ioremap64(addr64, size); | ||
163 | } | ||
164 | #endif /* CONFIG_PHYS_64BIT */ | ||
165 | |||
166 | void __iomem * | ||
167 | __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) | ||
168 | { | ||
169 | unsigned long v, i; | ||
170 | phys_addr_t p; | ||
171 | int err; | ||
172 | |||
173 | /* | ||
174 | * Choose an address to map it to. | ||
175 | * Once the vmalloc system is running, we use it. | ||
176 | * Before then, we use space going down from ioremap_base | ||
177 | * (ioremap_bot records where we're up to). | ||
178 | */ | ||
179 | p = addr & PAGE_MASK; | ||
180 | size = PAGE_ALIGN(addr + size) - p; | ||
181 | |||
182 | /* | ||
183 | * If the address lies within the first 16 MB, assume it's in ISA | ||
184 | * memory space | ||
185 | */ | ||
186 | if (p < 16*1024*1024) | ||
187 | p += _ISA_MEM_BASE; | ||
188 | |||
189 | /* | ||
190 | * Don't allow anybody to remap normal RAM that we're using. | ||
191 | * mem_init() sets high_memory so only do the check after that. | ||
192 | */ | ||
193 | if ( mem_init_done && (p < virt_to_phys(high_memory)) ) | ||
194 | { | ||
195 | printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, | ||
196 | __builtin_return_address(0)); | ||
197 | return NULL; | ||
198 | } | ||
199 | |||
200 | if (size == 0) | ||
201 | return NULL; | ||
202 | |||
203 | /* | ||
204 | * Is it already mapped? Perhaps overlapped by a previous | ||
205 | * BAT mapping. If the whole area is mapped then we're done, | ||
206 | * otherwise remap it since we want to keep the virt addrs for | ||
207 | * each request contiguous. | ||
208 | * | ||
209 | * We make the assumption here that if the bottom and top | ||
210 | * of the range we want are mapped then it's mapped to the | ||
211 | * same virt address (and this is contiguous). | ||
212 | * -- Cort | ||
213 | */ | ||
214 | if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ ) | ||
215 | goto out; | ||
216 | |||
217 | if ((v = p_mapped_by_tlbcam(p))) | ||
218 | goto out; | ||
219 | |||
220 | if (mem_init_done) { | ||
221 | struct vm_struct *area; | ||
222 | area = get_vm_area(size, VM_IOREMAP); | ||
223 | if (area == 0) | ||
224 | return NULL; | ||
225 | v = (unsigned long) area->addr; | ||
226 | } else { | ||
227 | v = (ioremap_bot -= size); | ||
228 | } | ||
229 | |||
230 | if ((flags & _PAGE_PRESENT) == 0) | ||
231 | flags |= _PAGE_KERNEL; | ||
232 | if (flags & _PAGE_NO_CACHE) | ||
233 | flags |= _PAGE_GUARDED; | ||
234 | |||
235 | /* | ||
236 | * Should check if it is a candidate for a BAT mapping | ||
237 | */ | ||
238 | |||
239 | err = 0; | ||
240 | for (i = 0; i < size && err == 0; i += PAGE_SIZE) | ||
241 | err = map_page(v+i, p+i, flags); | ||
242 | if (err) { | ||
243 | if (mem_init_done) | ||
244 | vunmap((void *)v); | ||
245 | return NULL; | ||
246 | } | ||
247 | |||
248 | out: | ||
249 | return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); | ||
250 | } | ||
251 | |||
252 | void iounmap(volatile void __iomem *addr) | ||
253 | { | ||
254 | /* | ||
255 | * If mapped by BATs then there is nothing to do. | ||
256 | * Calling vfree() generates a benign warning. | ||
257 | */ | ||
258 | if (v_mapped_by_bats((unsigned long)addr)) return; | ||
259 | |||
260 | if (addr > high_memory && (unsigned long) addr < ioremap_bot) | ||
261 | vunmap((void *) (PAGE_MASK & (unsigned long)addr)); | ||
262 | } | ||
263 | |||
264 | void __iomem *ioport_map(unsigned long port, unsigned int len) | ||
265 | { | ||
266 | return (void __iomem *) (port + _IO_BASE); | ||
267 | } | ||
268 | |||
269 | void ioport_unmap(void __iomem *addr) | ||
270 | { | ||
271 | /* Nothing to do */ | ||
272 | } | ||
273 | EXPORT_SYMBOL(ioport_map); | ||
274 | EXPORT_SYMBOL(ioport_unmap); | ||
275 | |||
276 | int | ||
277 | map_page(unsigned long va, phys_addr_t pa, int flags) | ||
278 | { | ||
279 | pmd_t *pd; | ||
280 | pte_t *pg; | ||
281 | int err = -ENOMEM; | ||
282 | |||
283 | spin_lock(&init_mm.page_table_lock); | ||
284 | /* Use upper 10 bits of VA to index the first level map */ | ||
285 | pd = pmd_offset(pgd_offset_k(va), va); | ||
286 | /* Use middle 10 bits of VA to index the second-level map */ | ||
287 | pg = pte_alloc_kernel(&init_mm, pd, va); | ||
288 | if (pg != 0) { | ||
289 | err = 0; | ||
290 | set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); | ||
291 | if (mem_init_done) | ||
292 | flush_HPTE(0, va, pmd_val(*pd)); | ||
293 | } | ||
294 | spin_unlock(&init_mm.page_table_lock); | ||
295 | return err; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * Map in all of physical memory starting at KERNELBASE. | ||
300 | */ | ||
301 | void __init mapin_ram(void) | ||
302 | { | ||
303 | unsigned long v, p, s, f; | ||
304 | |||
305 | s = mmu_mapin_ram(); | ||
306 | v = KERNELBASE + s; | ||
307 | p = PPC_MEMSTART + s; | ||
308 | for (; s < total_lowmem; s += PAGE_SIZE) { | ||
309 | if ((char *) v >= _stext && (char *) v < etext) | ||
310 | f = _PAGE_RAM_TEXT; | ||
311 | else | ||
312 | f = _PAGE_RAM; | ||
313 | map_page(v, p, f); | ||
314 | v += PAGE_SIZE; | ||
315 | p += PAGE_SIZE; | ||
316 | } | ||
317 | } | ||
318 | |||
319 | /* is x a power of 2? */ | ||
320 | #define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) | ||
321 | |||
322 | /* is x a power of 4? */ | ||
323 | #define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1)) | ||
324 | |||
325 | /* | ||
326 | * Set up a mapping for a block of I/O. | ||
327 | * virt, phys, size must all be page-aligned. | ||
328 | * This should only be called before ioremap is called. | ||
329 | */ | ||
330 | void __init io_block_mapping(unsigned long virt, phys_addr_t phys, | ||
331 | unsigned int size, int flags) | ||
332 | { | ||
333 | int i; | ||
334 | |||
335 | if (virt > KERNELBASE && virt < ioremap_bot) | ||
336 | ioremap_bot = ioremap_base = virt; | ||
337 | |||
338 | #ifdef HAVE_BATS | ||
339 | /* | ||
340 | * Use a BAT for this if possible... | ||
341 | */ | ||
342 | if (io_bat_index < 2 && is_power_of_2(size) | ||
343 | && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { | ||
344 | setbat(io_bat_index, virt, phys, size, flags); | ||
345 | ++io_bat_index; | ||
346 | return; | ||
347 | } | ||
348 | #endif /* HAVE_BATS */ | ||
349 | |||
350 | #ifdef HAVE_TLBCAM | ||
351 | /* | ||
352 | * Use a CAM for this if possible... | ||
353 | */ | ||
354 | if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) | ||
355 | && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { | ||
356 | settlbcam(tlbcam_index, virt, phys, size, flags, 0); | ||
357 | ++tlbcam_index; | ||
358 | return; | ||
359 | } | ||
360 | #endif /* HAVE_TLBCAM */ | ||
361 | |||
362 | /* No BATs available, put it in the page tables. */ | ||
363 | for (i = 0; i < size; i += PAGE_SIZE) | ||
364 | map_page(virt + i, phys + i, flags); | ||
365 | } | ||
366 | |||
367 | /* Scan the real Linux page tables and return a PTE pointer for | ||
368 | * a virtual address in a context. | ||
369 | * Returns true (1) if PTE was found, zero otherwise. The pointer to | ||
370 | * the PTE pointer is unmodified if PTE is not found. | ||
371 | */ | ||
372 | int | ||
373 | get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) | ||
374 | { | ||
375 | pgd_t *pgd; | ||
376 | pmd_t *pmd; | ||
377 | pte_t *pte; | ||
378 | int retval = 0; | ||
379 | |||
380 | pgd = pgd_offset(mm, addr & PAGE_MASK); | ||
381 | if (pgd) { | ||
382 | pmd = pmd_offset(pgd, addr & PAGE_MASK); | ||
383 | if (pmd_present(*pmd)) { | ||
384 | pte = pte_offset_map(pmd, addr & PAGE_MASK); | ||
385 | if (pte) { | ||
386 | retval = 1; | ||
387 | *ptep = pte; | ||
388 | /* XXX caller needs to do pte_unmap, yuck */ | ||
389 | } | ||
390 | } | ||
391 | } | ||
392 | return(retval); | ||
393 | } | ||
394 | |||
395 | /* Find physical address for this virtual address. Normally used by | ||
396 | * I/O functions, but anyone can call it. | ||
397 | */ | ||
398 | unsigned long iopa(unsigned long addr) | ||
399 | { | ||
400 | unsigned long pa; | ||
401 | |||
402 | /* I don't know why this won't work on PMacs or CHRP. It | ||
403 | * appears there is some bug, or there is some implicit | ||
404 | * mapping done not properly represented by BATs or in page | ||
405 | * tables.......I am actively working on resolving this, but | ||
406 | * can't hold up other stuff. -- Dan | ||
407 | */ | ||
408 | pte_t *pte; | ||
409 | struct mm_struct *mm; | ||
410 | |||
411 | /* Check the BATs */ | ||
412 | pa = v_mapped_by_bats(addr); | ||
413 | if (pa) | ||
414 | return pa; | ||
415 | |||
416 | /* Allow mapping of user addresses (within the thread) | ||
417 | * for DMA if necessary. | ||
418 | */ | ||
419 | if (addr < TASK_SIZE) | ||
420 | mm = current->mm; | ||
421 | else | ||
422 | mm = &init_mm; | ||
423 | |||
424 | pa = 0; | ||
425 | if (get_pteptr(mm, addr, &pte)) { | ||
426 | pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); | ||
427 | pte_unmap(pte); | ||
428 | } | ||
429 | |||
430 | return(pa); | ||
431 | } | ||
432 | |||
433 | /* This is will find the virtual address for a physical one.... | ||
434 | * Swiped from APUS, could be dangerous :-). | ||
435 | * This is only a placeholder until I really find a way to make this | ||
436 | * work. -- Dan | ||
437 | */ | ||
438 | unsigned long | ||
439 | mm_ptov (unsigned long paddr) | ||
440 | { | ||
441 | unsigned long ret; | ||
442 | #if 0 | ||
443 | if (paddr < 16*1024*1024) | ||
444 | ret = ZTWO_VADDR(paddr); | ||
445 | else { | ||
446 | int i; | ||
447 | |||
448 | for (i = 0; i < kmap_chunk_count;){ | ||
449 | unsigned long phys = kmap_chunks[i++]; | ||
450 | unsigned long size = kmap_chunks[i++]; | ||
451 | unsigned long virt = kmap_chunks[i++]; | ||
452 | if (paddr >= phys | ||
453 | && paddr < (phys + size)){ | ||
454 | ret = virt + paddr - phys; | ||
455 | goto exit; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | ret = (unsigned long) __va(paddr); | ||
460 | } | ||
461 | exit: | ||
462 | #ifdef DEBUGPV | ||
463 | printk ("PTOV(%lx)=%lx\n", paddr, ret); | ||
464 | #endif | ||
465 | #else | ||
466 | ret = (unsigned long)paddr + KERNELBASE; | ||
467 | #endif | ||
468 | return ret; | ||
469 | } | ||
470 | |||
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c new file mode 100644 index 000000000000..724f97e5dee5 --- /dev/null +++ b/arch/powerpc/mm/pgtable64.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * This file contains ioremap and related functions for 64-bit machines. | ||
3 | * | ||
4 | * Derived from arch/ppc64/mm/init.c | ||
5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
6 | * | ||
7 | * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) | ||
8 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
9 | * Copyright (C) 1996 Paul Mackerras | ||
10 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
11 | * | ||
12 | * Derived from "arch/i386/mm/init.c" | ||
13 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
14 | * | ||
15 | * Dave Engebretsen <engebret@us.ibm.com> | ||
16 | * Rework for PPC64 port. | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or | ||
19 | * modify it under the terms of the GNU General Public License | ||
20 | * as published by the Free Software Foundation; either version | ||
21 | * 2 of the License, or (at your option) any later version. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/config.h> | ||
26 | #include <linux/signal.h> | ||
27 | #include <linux/sched.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/string.h> | ||
31 | #include <linux/types.h> | ||
32 | #include <linux/mman.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/swap.h> | ||
35 | #include <linux/stddef.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | #include <linux/init.h> | ||
38 | #include <linux/delay.h> | ||
39 | #include <linux/bootmem.h> | ||
40 | #include <linux/highmem.h> | ||
41 | #include <linux/idr.h> | ||
42 | #include <linux/nodemask.h> | ||
43 | #include <linux/module.h> | ||
44 | |||
45 | #include <asm/pgalloc.h> | ||
46 | #include <asm/page.h> | ||
47 | #include <asm/prom.h> | ||
48 | #include <asm/lmb.h> | ||
49 | #include <asm/rtas.h> | ||
50 | #include <asm/io.h> | ||
51 | #include <asm/mmu_context.h> | ||
52 | #include <asm/pgtable.h> | ||
53 | #include <asm/mmu.h> | ||
54 | #include <asm/uaccess.h> | ||
55 | #include <asm/smp.h> | ||
56 | #include <asm/machdep.h> | ||
57 | #include <asm/tlb.h> | ||
58 | #include <asm/eeh.h> | ||
59 | #include <asm/processor.h> | ||
60 | #include <asm/mmzone.h> | ||
61 | #include <asm/cputable.h> | ||
62 | #include <asm/ppcdebug.h> | ||
63 | #include <asm/sections.h> | ||
64 | #include <asm/system.h> | ||
65 | #include <asm/iommu.h> | ||
66 | #include <asm/abs_addr.h> | ||
67 | #include <asm/vdso.h> | ||
68 | #include <asm/imalloc.h> | ||
69 | |||
70 | #if PGTABLE_RANGE > USER_VSID_RANGE | ||
71 | #warning Limited user VSID range means pagetable space is wasted | ||
72 | #endif | ||
73 | |||
74 | #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) | ||
75 | #warning TASK_SIZE is smaller than it needs to be. | ||
76 | #endif | ||
77 | |||
78 | int mem_init_done; | ||
79 | unsigned long ioremap_bot = IMALLOC_BASE; | ||
80 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | ||
81 | |||
82 | extern pgd_t swapper_pg_dir[]; | ||
83 | extern struct task_struct *current_set[NR_CPUS]; | ||
84 | |||
85 | unsigned long klimit = (unsigned long)_end; | ||
86 | |||
87 | /* max amount of RAM to use */ | ||
88 | unsigned long __max_memory; | ||
89 | |||
90 | /* info on what we think the IO hole is */ | ||
91 | unsigned long io_hole_start; | ||
92 | unsigned long io_hole_size; | ||
93 | |||
94 | #ifdef CONFIG_PPC_ISERIES | ||
95 | |||
96 | void __iomem *ioremap(unsigned long addr, unsigned long size) | ||
97 | { | ||
98 | return (void __iomem *)addr; | ||
99 | } | ||
100 | |||
101 | extern void __iomem *__ioremap(unsigned long addr, unsigned long size, | ||
102 | unsigned long flags) | ||
103 | { | ||
104 | return (void __iomem *)addr; | ||
105 | } | ||
106 | |||
107 | void iounmap(volatile void __iomem *addr) | ||
108 | { | ||
109 | return; | ||
110 | } | ||
111 | |||
112 | #else | ||
113 | |||
114 | /* | ||
115 | * map_io_page currently only called by __ioremap | ||
116 | * map_io_page adds an entry to the ioremap page table | ||
117 | * and adds an entry to the HPT, possibly bolting it | ||
118 | */ | ||
119 | static int map_io_page(unsigned long ea, unsigned long pa, int flags) | ||
120 | { | ||
121 | pgd_t *pgdp; | ||
122 | pud_t *pudp; | ||
123 | pmd_t *pmdp; | ||
124 | pte_t *ptep; | ||
125 | unsigned long vsid; | ||
126 | |||
127 | if (mem_init_done) { | ||
128 | spin_lock(&init_mm.page_table_lock); | ||
129 | pgdp = pgd_offset_k(ea); | ||
130 | pudp = pud_alloc(&init_mm, pgdp, ea); | ||
131 | if (!pudp) | ||
132 | return -ENOMEM; | ||
133 | pmdp = pmd_alloc(&init_mm, pudp, ea); | ||
134 | if (!pmdp) | ||
135 | return -ENOMEM; | ||
136 | ptep = pte_alloc_kernel(&init_mm, pmdp, ea); | ||
137 | if (!ptep) | ||
138 | return -ENOMEM; | ||
139 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, | ||
140 | __pgprot(flags))); | ||
141 | spin_unlock(&init_mm.page_table_lock); | ||
142 | } else { | ||
143 | unsigned long va, vpn, hash, hpteg; | ||
144 | |||
145 | /* | ||
146 | * If the mm subsystem is not fully up, we cannot create a | ||
147 | * linux page table entry for this mapping. Simply bolt an | ||
148 | * entry in the hardware page table. | ||
149 | */ | ||
150 | vsid = get_kernel_vsid(ea); | ||
151 | va = (vsid << 28) | (ea & 0xFFFFFFF); | ||
152 | vpn = va >> PAGE_SHIFT; | ||
153 | |||
154 | hash = hpt_hash(vpn, 0); | ||
155 | |||
156 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | ||
157 | |||
158 | /* Panic if a pte grpup is full */ | ||
159 | if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, | ||
160 | HPTE_V_BOLTED, | ||
161 | _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) | ||
162 | == -1) { | ||
163 | panic("map_io_page: could not insert mapping"); | ||
164 | } | ||
165 | } | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | |||
170 | static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, | ||
171 | unsigned long ea, unsigned long size, | ||
172 | unsigned long flags) | ||
173 | { | ||
174 | unsigned long i; | ||
175 | |||
176 | if ((flags & _PAGE_PRESENT) == 0) | ||
177 | flags |= pgprot_val(PAGE_KERNEL); | ||
178 | |||
179 | for (i = 0; i < size; i += PAGE_SIZE) | ||
180 | if (map_io_page(ea+i, pa+i, flags)) | ||
181 | return NULL; | ||
182 | |||
183 | return (void __iomem *) (ea + (addr & ~PAGE_MASK)); | ||
184 | } | ||
185 | |||
186 | |||
187 | void __iomem * | ||
188 | ioremap(unsigned long addr, unsigned long size) | ||
189 | { | ||
190 | return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); | ||
191 | } | ||
192 | |||
193 | void __iomem * __ioremap(unsigned long addr, unsigned long size, | ||
194 | unsigned long flags) | ||
195 | { | ||
196 | unsigned long pa, ea; | ||
197 | void __iomem *ret; | ||
198 | |||
199 | /* | ||
200 | * Choose an address to map it to. | ||
201 | * Once the imalloc system is running, we use it. | ||
202 | * Before that, we map using addresses going | ||
203 | * up from ioremap_bot. imalloc will use | ||
204 | * the addresses from ioremap_bot through | ||
205 | * IMALLOC_END | ||
206 | * | ||
207 | */ | ||
208 | pa = addr & PAGE_MASK; | ||
209 | size = PAGE_ALIGN(addr + size) - pa; | ||
210 | |||
211 | if (size == 0) | ||
212 | return NULL; | ||
213 | |||
214 | if (mem_init_done) { | ||
215 | struct vm_struct *area; | ||
216 | area = im_get_free_area(size); | ||
217 | if (area == NULL) | ||
218 | return NULL; | ||
219 | ea = (unsigned long)(area->addr); | ||
220 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
221 | if (!ret) | ||
222 | im_free(area->addr); | ||
223 | } else { | ||
224 | ea = ioremap_bot; | ||
225 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
226 | if (ret) | ||
227 | ioremap_bot += size; | ||
228 | } | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) | ||
233 | |||
234 | int __ioremap_explicit(unsigned long pa, unsigned long ea, | ||
235 | unsigned long size, unsigned long flags) | ||
236 | { | ||
237 | struct vm_struct *area; | ||
238 | void __iomem *ret; | ||
239 | |||
240 | /* For now, require page-aligned values for pa, ea, and size */ | ||
241 | if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || | ||
242 | !IS_PAGE_ALIGNED(size)) { | ||
243 | printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); | ||
244 | return 1; | ||
245 | } | ||
246 | |||
247 | if (!mem_init_done) { | ||
248 | /* Two things to consider in this case: | ||
249 | * 1) No records will be kept (imalloc, etc) that the region | ||
250 | * has been remapped | ||
251 | * 2) It won't be easy to iounmap() the region later (because | ||
252 | * of 1) | ||
253 | */ | ||
254 | ; | ||
255 | } else { | ||
256 | area = im_get_area(ea, size, | ||
257 | IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); | ||
258 | if (area == NULL) { | ||
259 | /* Expected when PHB-dlpar is in play */ | ||
260 | return 1; | ||
261 | } | ||
262 | if (ea != (unsigned long) area->addr) { | ||
263 | printk(KERN_ERR "unexpected addr return from " | ||
264 | "im_get_area\n"); | ||
265 | return 1; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | ret = __ioremap_com(pa, pa, ea, size, flags); | ||
270 | if (ret == NULL) { | ||
271 | printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); | ||
272 | return 1; | ||
273 | } | ||
274 | if (ret != (void *) ea) { | ||
275 | printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); | ||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * Unmap an IO region and remove it from imalloc'd list. | ||
284 | * Access to IO memory should be serialized by driver. | ||
285 | * This code is modeled after vmalloc code - unmap_vm_area() | ||
286 | * | ||
287 | * XXX what about calls before mem_init_done (ie python_countermeasures()) | ||
288 | */ | ||
289 | void iounmap(volatile void __iomem *token) | ||
290 | { | ||
291 | void *addr; | ||
292 | |||
293 | if (!mem_init_done) | ||
294 | return; | ||
295 | |||
296 | addr = (void *) ((unsigned long __force) token & PAGE_MASK); | ||
297 | |||
298 | im_free(addr); | ||
299 | } | ||
300 | |||
301 | static int iounmap_subset_regions(unsigned long addr, unsigned long size) | ||
302 | { | ||
303 | struct vm_struct *area; | ||
304 | |||
305 | /* Check whether subsets of this region exist */ | ||
306 | area = im_get_area(addr, size, IM_REGION_SUPERSET); | ||
307 | if (area == NULL) | ||
308 | return 1; | ||
309 | |||
310 | while (area) { | ||
311 | iounmap((void __iomem *) area->addr); | ||
312 | area = im_get_area(addr, size, | ||
313 | IM_REGION_SUPERSET); | ||
314 | } | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | int iounmap_explicit(volatile void __iomem *start, unsigned long size) | ||
320 | { | ||
321 | struct vm_struct *area; | ||
322 | unsigned long addr; | ||
323 | int rc; | ||
324 | |||
325 | addr = (unsigned long __force) start & PAGE_MASK; | ||
326 | |||
327 | /* Verify that the region either exists or is a subset of an existing | ||
328 | * region. In the latter case, split the parent region to create | ||
329 | * the exact region | ||
330 | */ | ||
331 | area = im_get_area(addr, size, | ||
332 | IM_REGION_EXISTS | IM_REGION_SUBSET); | ||
333 | if (area == NULL) { | ||
334 | /* Determine whether subset regions exist. If so, unmap */ | ||
335 | rc = iounmap_subset_regions(addr, size); | ||
336 | if (rc) { | ||
337 | printk(KERN_ERR | ||
338 | "%s() cannot unmap nonexistent range 0x%lx\n", | ||
339 | __FUNCTION__, addr); | ||
340 | return 1; | ||
341 | } | ||
342 | } else { | ||
343 | iounmap((void __iomem *) area->addr); | ||
344 | } | ||
345 | /* | ||
346 | * FIXME! This can't be right: | ||
347 | iounmap(area->addr); | ||
348 | * Maybe it should be "iounmap(area);" | ||
349 | */ | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | #endif | ||
354 | |||
355 | EXPORT_SYMBOL(ioremap); | ||
356 | EXPORT_SYMBOL(__ioremap); | ||
357 | EXPORT_SYMBOL(iounmap); | ||
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c new file mode 100644 index 000000000000..9a381ed5eb21 --- /dev/null +++ b/arch/powerpc/mm/ppc_mmu.c | |||
@@ -0,0 +1,296 @@ | |||
1 | /* | ||
2 | * This file contains the routines for handling the MMU on those | ||
3 | * PowerPC implementations where the MMU substantially follows the | ||
4 | * architecture specification. This includes the 6xx, 7xx, 7xxx, | ||
5 | * 8260, and POWER3 implementations but excludes the 8xx and 4xx. | ||
6 | * -- paulus | ||
7 | * | ||
8 | * Derived from arch/ppc/mm/init.c: | ||
9 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
10 | * | ||
11 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
12 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
13 | * Copyright (C) 1996 Paul Mackerras | ||
14 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
15 | * | ||
16 | * Derived from "arch/i386/mm/init.c" | ||
17 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or | ||
20 | * modify it under the terms of the GNU General Public License | ||
21 | * as published by the Free Software Foundation; either version | ||
22 | * 2 of the License, or (at your option) any later version. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/highmem.h> | ||
31 | |||
32 | #include <asm/prom.h> | ||
33 | #include <asm/mmu.h> | ||
34 | #include <asm/machdep.h> | ||
35 | |||
36 | #include "mmu_decl.h" | ||
37 | #include "mem_pieces.h" | ||
38 | |||
39 | PTE *Hash, *Hash_end; | ||
40 | unsigned long Hash_size, Hash_mask; | ||
41 | unsigned long _SDR1; | ||
42 | |||
43 | union ubat { /* BAT register values to be loaded */ | ||
44 | BAT bat; | ||
45 | #ifdef CONFIG_PPC64BRIDGE | ||
46 | u64 word[2]; | ||
47 | #else | ||
48 | u32 word[2]; | ||
49 | #endif | ||
50 | } BATS[4][2]; /* 4 pairs of IBAT, DBAT */ | ||
51 | |||
52 | struct batrange { /* stores address ranges mapped by BATs */ | ||
53 | unsigned long start; | ||
54 | unsigned long limit; | ||
55 | unsigned long phys; | ||
56 | } bat_addrs[4]; | ||
57 | |||
58 | /* | ||
59 | * Return PA for this VA if it is mapped by a BAT, or 0 | ||
60 | */ | ||
61 | unsigned long v_mapped_by_bats(unsigned long va) | ||
62 | { | ||
63 | int b; | ||
64 | for (b = 0; b < 4; ++b) | ||
65 | if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) | ||
66 | return bat_addrs[b].phys + (va - bat_addrs[b].start); | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Return VA for a given PA or 0 if not mapped | ||
72 | */ | ||
73 | unsigned long p_mapped_by_bats(unsigned long pa) | ||
74 | { | ||
75 | int b; | ||
76 | for (b = 0; b < 4; ++b) | ||
77 | if (pa >= bat_addrs[b].phys | ||
78 | && pa < (bat_addrs[b].limit-bat_addrs[b].start) | ||
79 | +bat_addrs[b].phys) | ||
80 | return bat_addrs[b].start+(pa-bat_addrs[b].phys); | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | unsigned long __init mmu_mapin_ram(void) | ||
85 | { | ||
86 | #ifdef CONFIG_POWER4 | ||
87 | return 0; | ||
88 | #else | ||
89 | unsigned long tot, bl, done; | ||
90 | unsigned long max_size = (256<<20); | ||
91 | unsigned long align; | ||
92 | |||
93 | if (__map_without_bats) | ||
94 | return 0; | ||
95 | |||
96 | /* Set up BAT2 and if necessary BAT3 to cover RAM. */ | ||
97 | |||
98 | /* Make sure we don't map a block larger than the | ||
99 | smallest alignment of the physical address. */ | ||
100 | /* alignment of PPC_MEMSTART */ | ||
101 | align = ~(PPC_MEMSTART-1) & PPC_MEMSTART; | ||
102 | /* set BAT block size to MIN(max_size, align) */ | ||
103 | if (align && align < max_size) | ||
104 | max_size = align; | ||
105 | |||
106 | tot = total_lowmem; | ||
107 | for (bl = 128<<10; bl < max_size; bl <<= 1) { | ||
108 | if (bl * 2 > tot) | ||
109 | break; | ||
110 | } | ||
111 | |||
112 | setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM); | ||
113 | done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; | ||
114 | if ((done < tot) && !bat_addrs[3].limit) { | ||
115 | /* use BAT3 to cover a bit more */ | ||
116 | tot -= done; | ||
117 | for (bl = 128<<10; bl < max_size; bl <<= 1) | ||
118 | if (bl * 2 > tot) | ||
119 | break; | ||
120 | setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM); | ||
121 | done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; | ||
122 | } | ||
123 | |||
124 | return done; | ||
125 | #endif | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Set up one of the I/D BAT (block address translation) register pairs. | ||
130 | * The parameters are not checked; in particular size must be a power | ||
131 | * of 2 between 128k and 256M. | ||
132 | */ | ||
133 | void __init setbat(int index, unsigned long virt, unsigned long phys, | ||
134 | unsigned int size, int flags) | ||
135 | { | ||
136 | unsigned int bl; | ||
137 | int wimgxpp; | ||
138 | union ubat *bat = BATS[index]; | ||
139 | |||
140 | if (((flags & _PAGE_NO_CACHE) == 0) && | ||
141 | cpu_has_feature(CPU_FTR_NEED_COHERENT)) | ||
142 | flags |= _PAGE_COHERENT; | ||
143 | |||
144 | bl = (size >> 17) - 1; | ||
145 | if (PVR_VER(mfspr(SPRN_PVR)) != 1) { | ||
146 | /* 603, 604, etc. */ | ||
147 | /* Do DBAT first */ | ||
148 | wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | ||
149 | | _PAGE_COHERENT | _PAGE_GUARDED); | ||
150 | wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; | ||
151 | bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ | ||
152 | bat[1].word[1] = phys | wimgxpp; | ||
153 | #ifndef CONFIG_KGDB /* want user access for breakpoints */ | ||
154 | if (flags & _PAGE_USER) | ||
155 | #endif | ||
156 | bat[1].bat.batu.vp = 1; | ||
157 | if (flags & _PAGE_GUARDED) { | ||
158 | /* G bit must be zero in IBATs */ | ||
159 | bat[0].word[0] = bat[0].word[1] = 0; | ||
160 | } else { | ||
161 | /* make IBAT same as DBAT */ | ||
162 | bat[0] = bat[1]; | ||
163 | } | ||
164 | } else { | ||
165 | /* 601 cpu */ | ||
166 | if (bl > BL_8M) | ||
167 | bl = BL_8M; | ||
168 | wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | ||
169 | | _PAGE_COHERENT); | ||
170 | wimgxpp |= (flags & _PAGE_RW)? | ||
171 | ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; | ||
172 | bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ | ||
173 | bat->word[1] = phys | bl | 0x40; /* V=1 */ | ||
174 | } | ||
175 | |||
176 | bat_addrs[index].start = virt; | ||
177 | bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; | ||
178 | bat_addrs[index].phys = phys; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Initialize the hash table and patch the instructions in hashtable.S. | ||
183 | */ | ||
184 | void __init MMU_init_hw(void) | ||
185 | { | ||
186 | unsigned int hmask, mb, mb2; | ||
187 | unsigned int n_hpteg, lg_n_hpteg; | ||
188 | |||
189 | extern unsigned int hash_page_patch_A[]; | ||
190 | extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; | ||
191 | extern unsigned int hash_page[]; | ||
192 | extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; | ||
193 | |||
194 | if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { | ||
195 | /* | ||
196 | * Put a blr (procedure return) instruction at the | ||
197 | * start of hash_page, since we can still get DSI | ||
198 | * exceptions on a 603. | ||
199 | */ | ||
200 | hash_page[0] = 0x4e800020; | ||
201 | flush_icache_range((unsigned long) &hash_page[0], | ||
202 | (unsigned long) &hash_page[1]); | ||
203 | return; | ||
204 | } | ||
205 | |||
206 | if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); | ||
207 | |||
208 | #ifdef CONFIG_PPC64BRIDGE | ||
209 | #define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ | ||
210 | #define SDR1_LOW_BITS (lg_n_hpteg - 11) | ||
211 | #define MIN_N_HPTEG 2048 /* min 256kB hash table */ | ||
212 | #else | ||
213 | #define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ | ||
214 | #define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) | ||
215 | #define MIN_N_HPTEG 1024 /* min 64kB hash table */ | ||
216 | #endif | ||
217 | |||
218 | #ifdef CONFIG_POWER4 | ||
219 | /* The hash table has already been allocated and initialized | ||
220 | in prom.c */ | ||
221 | n_hpteg = Hash_size >> LG_HPTEG_SIZE; | ||
222 | lg_n_hpteg = __ilog2(n_hpteg); | ||
223 | |||
224 | /* Remove the hash table from the available memory */ | ||
225 | if (Hash) | ||
226 | reserve_phys_mem(__pa(Hash), Hash_size); | ||
227 | |||
228 | #else /* CONFIG_POWER4 */ | ||
229 | /* | ||
230 | * Allow 1 HPTE (1/8 HPTEG) for each page of memory. | ||
231 | * This is less than the recommended amount, but then | ||
232 | * Linux ain't AIX. | ||
233 | */ | ||
234 | n_hpteg = total_memory / (PAGE_SIZE * 8); | ||
235 | if (n_hpteg < MIN_N_HPTEG) | ||
236 | n_hpteg = MIN_N_HPTEG; | ||
237 | lg_n_hpteg = __ilog2(n_hpteg); | ||
238 | if (n_hpteg & (n_hpteg - 1)) { | ||
239 | ++lg_n_hpteg; /* round up if not power of 2 */ | ||
240 | n_hpteg = 1 << lg_n_hpteg; | ||
241 | } | ||
242 | Hash_size = n_hpteg << LG_HPTEG_SIZE; | ||
243 | |||
244 | /* | ||
245 | * Find some memory for the hash table. | ||
246 | */ | ||
247 | if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); | ||
248 | Hash = mem_pieces_find(Hash_size, Hash_size); | ||
249 | cacheable_memzero(Hash, Hash_size); | ||
250 | _SDR1 = __pa(Hash) | SDR1_LOW_BITS; | ||
251 | #endif /* CONFIG_POWER4 */ | ||
252 | |||
253 | Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); | ||
254 | |||
255 | printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", | ||
256 | total_memory >> 20, Hash_size >> 10, Hash); | ||
257 | |||
258 | |||
259 | /* | ||
260 | * Patch up the instructions in hashtable.S:create_hpte | ||
261 | */ | ||
262 | if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); | ||
263 | Hash_mask = n_hpteg - 1; | ||
264 | hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); | ||
265 | mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; | ||
266 | if (lg_n_hpteg > 16) | ||
267 | mb2 = 16 - LG_HPTEG_SIZE; | ||
268 | |||
269 | hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) | ||
270 | | ((unsigned int)(Hash) >> 16); | ||
271 | hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); | ||
272 | hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); | ||
273 | hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; | ||
274 | hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; | ||
275 | |||
276 | /* | ||
277 | * Ensure that the locations we've patched have been written | ||
278 | * out from the data cache and invalidated in the instruction | ||
279 | * cache, on those machines with split caches. | ||
280 | */ | ||
281 | flush_icache_range((unsigned long) &hash_page_patch_A[0], | ||
282 | (unsigned long) &hash_page_patch_C[1]); | ||
283 | |||
284 | /* | ||
285 | * Patch up the instructions in hashtable.S:flush_hash_page | ||
286 | */ | ||
287 | flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) | ||
288 | | ((unsigned int)(Hash) >> 16); | ||
289 | flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); | ||
290 | flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); | ||
291 | flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; | ||
292 | flush_icache_range((unsigned long) &flush_hash_patch_A[0], | ||
293 | (unsigned long) &flush_hash_patch_B[1]); | ||
294 | |||
295 | if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); | ||
296 | } | ||
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c new file mode 100644 index 000000000000..6c3dc3c44c86 --- /dev/null +++ b/arch/powerpc/mm/tlb.c | |||
@@ -0,0 +1,183 @@ | |||
1 | /* | ||
2 | * This file contains the routines for TLB flushing. | ||
3 | * On machines where the MMU uses a hash table to store virtual to | ||
4 | * physical translations, these routines flush entries from the | ||
5 | * hash table also. | ||
6 | * -- paulus | ||
7 | * | ||
8 | * Derived from arch/ppc/mm/init.c: | ||
9 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
10 | * | ||
11 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
12 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
13 | * Copyright (C) 1996 Paul Mackerras | ||
14 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
15 | * | ||
16 | * Derived from "arch/i386/mm/init.c" | ||
17 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or | ||
20 | * modify it under the terms of the GNU General Public License | ||
21 | * as published by the Free Software Foundation; either version | ||
22 | * 2 of the License, or (at your option) any later version. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <asm/tlbflush.h> | ||
32 | #include <asm/tlb.h> | ||
33 | |||
34 | #include "mmu_decl.h" | ||
35 | |||
36 | /* | ||
37 | * Called when unmapping pages to flush entries from the TLB/hash table. | ||
38 | */ | ||
39 | void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) | ||
40 | { | ||
41 | unsigned long ptephys; | ||
42 | |||
43 | if (Hash != 0) { | ||
44 | ptephys = __pa(ptep) & PAGE_MASK; | ||
45 | flush_hash_pages(mm->context, addr, ptephys, 1); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * Called by ptep_set_access_flags, must flush on CPUs for which the | ||
51 | * DSI handler can't just "fixup" the TLB on a write fault | ||
52 | */ | ||
53 | void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) | ||
54 | { | ||
55 | if (Hash != 0) | ||
56 | return; | ||
57 | _tlbie(addr); | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Called at the end of a mmu_gather operation to make sure the | ||
62 | * TLB flush is completely done. | ||
63 | */ | ||
64 | void tlb_flush(struct mmu_gather *tlb) | ||
65 | { | ||
66 | if (Hash == 0) { | ||
67 | /* | ||
68 | * 603 needs to flush the whole TLB here since | ||
69 | * it doesn't use a hash table. | ||
70 | */ | ||
71 | _tlbia(); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * TLB flushing: | ||
77 | * | ||
78 | * - flush_tlb_mm(mm) flushes the specified mm context TLB's | ||
79 | * - flush_tlb_page(vma, vmaddr) flushes one page | ||
80 | * - flush_tlb_range(vma, start, end) flushes a range of pages | ||
81 | * - flush_tlb_kernel_range(start, end) flushes kernel pages | ||
82 | * | ||
83 | * since the hardware hash table functions as an extension of the | ||
84 | * tlb as far as the linux tables are concerned, flush it too. | ||
85 | * -- Cort | ||
86 | */ | ||
87 | |||
88 | /* | ||
89 | * 750 SMP is a Bad Idea because the 750 doesn't broadcast all | ||
90 | * the cache operations on the bus. Hence we need to use an IPI | ||
91 | * to get the other CPU(s) to invalidate their TLBs. | ||
92 | */ | ||
93 | #ifdef CONFIG_SMP_750 | ||
94 | #define FINISH_FLUSH smp_send_tlb_invalidate(0) | ||
95 | #else | ||
96 | #define FINISH_FLUSH do { } while (0) | ||
97 | #endif | ||
98 | |||
99 | static void flush_range(struct mm_struct *mm, unsigned long start, | ||
100 | unsigned long end) | ||
101 | { | ||
102 | pmd_t *pmd; | ||
103 | unsigned long pmd_end; | ||
104 | int count; | ||
105 | unsigned int ctx = mm->context; | ||
106 | |||
107 | if (Hash == 0) { | ||
108 | _tlbia(); | ||
109 | return; | ||
110 | } | ||
111 | start &= PAGE_MASK; | ||
112 | if (start >= end) | ||
113 | return; | ||
114 | end = (end - 1) | ~PAGE_MASK; | ||
115 | pmd = pmd_offset(pgd_offset(mm, start), start); | ||
116 | for (;;) { | ||
117 | pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; | ||
118 | if (pmd_end > end) | ||
119 | pmd_end = end; | ||
120 | if (!pmd_none(*pmd)) { | ||
121 | count = ((pmd_end - start) >> PAGE_SHIFT) + 1; | ||
122 | flush_hash_pages(ctx, start, pmd_val(*pmd), count); | ||
123 | } | ||
124 | if (pmd_end == end) | ||
125 | break; | ||
126 | start = pmd_end + 1; | ||
127 | ++pmd; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Flush kernel TLB entries in the given range | ||
133 | */ | ||
134 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | ||
135 | { | ||
136 | flush_range(&init_mm, start, end); | ||
137 | FINISH_FLUSH; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Flush all the (user) entries for the address space described by mm. | ||
142 | */ | ||
143 | void flush_tlb_mm(struct mm_struct *mm) | ||
144 | { | ||
145 | struct vm_area_struct *mp; | ||
146 | |||
147 | if (Hash == 0) { | ||
148 | _tlbia(); | ||
149 | return; | ||
150 | } | ||
151 | |||
152 | for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) | ||
153 | flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); | ||
154 | FINISH_FLUSH; | ||
155 | } | ||
156 | |||
157 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) | ||
158 | { | ||
159 | struct mm_struct *mm; | ||
160 | pmd_t *pmd; | ||
161 | |||
162 | if (Hash == 0) { | ||
163 | _tlbie(vmaddr); | ||
164 | return; | ||
165 | } | ||
166 | mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; | ||
167 | pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); | ||
168 | if (!pmd_none(*pmd)) | ||
169 | flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); | ||
170 | FINISH_FLUSH; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * For each address in the range, find the pte for the address | ||
175 | * and check _PAGE_HASHPTE bit; if it is set, find and destroy | ||
176 | * the corresponding HPTE. | ||
177 | */ | ||
178 | void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, | ||
179 | unsigned long end) | ||
180 | { | ||
181 | flush_range(vma->vm_mm, start, end); | ||
182 | FINISH_FLUSH; | ||
183 | } | ||