diff options
Diffstat (limited to 'arch/tile/mm')
-rw-r--r-- | arch/tile/mm/Makefile | 9 | ||||
-rw-r--r-- | arch/tile/mm/elf.c | 164 | ||||
-rw-r--r-- | arch/tile/mm/extable.c | 30 | ||||
-rw-r--r-- | arch/tile/mm/fault.c | 905 | ||||
-rw-r--r-- | arch/tile/mm/highmem.c | 328 | ||||
-rw-r--r-- | arch/tile/mm/homecache.c | 445 | ||||
-rw-r--r-- | arch/tile/mm/hugetlbpage.c | 343 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 1082 | ||||
-rw-r--r-- | arch/tile/mm/migrate.h | 50 | ||||
-rw-r--r-- | arch/tile/mm/migrate_32.S | 211 | ||||
-rw-r--r-- | arch/tile/mm/mmap.c | 75 | ||||
-rw-r--r-- | arch/tile/mm/pgtable.c | 566 |
12 files changed, 4208 insertions, 0 deletions
diff --git a/arch/tile/mm/Makefile b/arch/tile/mm/Makefile new file mode 100644 index 000000000000..e252aeddc17d --- /dev/null +++ b/arch/tile/mm/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # Makefile for the linux tile-specific parts of the memory manager. | ||
3 | # | ||
4 | |||
5 | obj-y := init.o pgtable.o fault.o extable.o elf.o \ | ||
6 | mmap.o homecache.o migrate_$(BITS).o | ||
7 | |||
8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | ||
9 | obj-$(CONFIG_HIGHMEM) += highmem.o | ||
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c new file mode 100644 index 000000000000..818c9bef060c --- /dev/null +++ b/arch/tile/mm/elf.c | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/mm.h> | ||
16 | #include <linux/pagemap.h> | ||
17 | #include <linux/binfmts.h> | ||
18 | #include <linux/compat.h> | ||
19 | #include <linux/mman.h> | ||
20 | #include <linux/elf.h> | ||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/pgalloc.h> | ||
23 | |||
24 | /* Notify a running simulator, if any, that an exec just occurred. */ | ||
25 | static void sim_notify_exec(const char *binary_name) | ||
26 | { | ||
27 | unsigned char c; | ||
28 | do { | ||
29 | c = *binary_name++; | ||
30 | __insn_mtspr(SPR_SIM_CONTROL, | ||
31 | (SIM_CONTROL_OS_EXEC | ||
32 | | (c << _SIM_CONTROL_OPERATOR_BITS))); | ||
33 | |||
34 | } while (c); | ||
35 | } | ||
36 | |||
37 | static int notify_exec(void) | ||
38 | { | ||
39 | int retval = 0; /* failure */ | ||
40 | struct vm_area_struct *vma = current->mm->mmap; | ||
41 | while (vma) { | ||
42 | if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) | ||
43 | break; | ||
44 | vma = vma->vm_next; | ||
45 | } | ||
46 | if (vma) { | ||
47 | char *buf = (char *) __get_free_page(GFP_KERNEL); | ||
48 | if (buf) { | ||
49 | char *path = d_path(&vma->vm_file->f_path, | ||
50 | buf, PAGE_SIZE); | ||
51 | if (!IS_ERR(path)) { | ||
52 | sim_notify_exec(path); | ||
53 | retval = 1; | ||
54 | } | ||
55 | free_page((unsigned long)buf); | ||
56 | } | ||
57 | } | ||
58 | return retval; | ||
59 | } | ||
60 | |||
61 | /* Notify a running simulator, if any, that we loaded an interpreter. */ | ||
62 | static void sim_notify_interp(unsigned long load_addr) | ||
63 | { | ||
64 | size_t i; | ||
65 | for (i = 0; i < sizeof(load_addr); i++) { | ||
66 | unsigned char c = load_addr >> (i * 8); | ||
67 | __insn_mtspr(SPR_SIM_CONTROL, | ||
68 | (SIM_CONTROL_OS_INTERP | ||
69 | | (c << _SIM_CONTROL_OPERATOR_BITS))); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | |||
74 | /* Kernel address of page used to map read-only kernel data into userspace. */ | ||
75 | static void *vdso_page; | ||
76 | |||
77 | /* One-entry array used for install_special_mapping. */ | ||
78 | static struct page *vdso_pages[1]; | ||
79 | |||
80 | int __init vdso_setup(void) | ||
81 | { | ||
82 | extern char __rt_sigreturn[], __rt_sigreturn_end[]; | ||
83 | vdso_page = (void *)get_zeroed_page(GFP_ATOMIC); | ||
84 | memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn); | ||
85 | vdso_pages[0] = virt_to_page(vdso_page); | ||
86 | return 0; | ||
87 | } | ||
88 | device_initcall(vdso_setup); | ||
89 | |||
90 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
91 | { | ||
92 | if (vma->vm_private_data == vdso_pages) | ||
93 | return "[vdso]"; | ||
94 | #ifndef __tilegx__ | ||
95 | if (vma->vm_start == MEM_USER_INTRPT) | ||
96 | return "[intrpt]"; | ||
97 | #endif | ||
98 | return NULL; | ||
99 | } | ||
100 | |||
101 | int arch_setup_additional_pages(struct linux_binprm *bprm, | ||
102 | int executable_stack) | ||
103 | { | ||
104 | struct mm_struct *mm = current->mm; | ||
105 | unsigned long vdso_base; | ||
106 | int retval = 0; | ||
107 | |||
108 | /* | ||
109 | * Notify the simulator that an exec just occurred. | ||
110 | * If we can't find the filename of the mapping, just use | ||
111 | * whatever was passed as the linux_binprm filename. | ||
112 | */ | ||
113 | if (!notify_exec()) | ||
114 | sim_notify_exec(bprm->filename); | ||
115 | |||
116 | down_write(&mm->mmap_sem); | ||
117 | |||
118 | /* | ||
119 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
120 | * | ||
121 | * Make sure the vDSO gets into every core dump. Dumping its | ||
122 | * contents makes post-mortem fully interpretable later | ||
123 | * without matching up the same kernel and hardware config to | ||
124 | * see what PC values meant. | ||
125 | */ | ||
126 | vdso_base = VDSO_BASE; | ||
127 | retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, | ||
128 | VM_READ|VM_EXEC| | ||
129 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
130 | VM_ALWAYSDUMP, | ||
131 | vdso_pages); | ||
132 | |||
133 | #ifndef __tilegx__ | ||
134 | /* | ||
135 | * Set up a user-interrupt mapping here; the user can't | ||
136 | * create one themselves since it is above TASK_SIZE. | ||
137 | * We make it unwritable by default, so the model for adding | ||
138 | * interrupt vectors always involves an mprotect. | ||
139 | */ | ||
140 | if (!retval) { | ||
141 | unsigned long addr = MEM_USER_INTRPT; | ||
142 | addr = mmap_region(NULL, addr, INTRPT_SIZE, | ||
143 | MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, | ||
144 | VM_READ|VM_EXEC| | ||
145 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); | ||
146 | if (addr > (unsigned long) -PAGE_SIZE) | ||
147 | retval = (int) addr; | ||
148 | } | ||
149 | #endif | ||
150 | |||
151 | up_write(&mm->mmap_sem); | ||
152 | |||
153 | return retval; | ||
154 | } | ||
155 | |||
156 | |||
157 | void elf_plat_init(struct pt_regs *regs, unsigned long load_addr) | ||
158 | { | ||
159 | /* Zero all registers. */ | ||
160 | memset(regs, 0, sizeof(*regs)); | ||
161 | |||
162 | /* Report the interpreter's load address. */ | ||
163 | sim_notify_interp(load_addr); | ||
164 | } | ||
diff --git a/arch/tile/mm/extable.c b/arch/tile/mm/extable.c new file mode 100644 index 000000000000..4fb0acb9d154 --- /dev/null +++ b/arch/tile/mm/extable.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/uaccess.h> | ||
18 | |||
19 | int fixup_exception(struct pt_regs *regs) | ||
20 | { | ||
21 | const struct exception_table_entry *fixup; | ||
22 | |||
23 | fixup = search_exception_tables(regs->pc); | ||
24 | if (fixup) { | ||
25 | regs->pc = fixup->fixup; | ||
26 | return 1; | ||
27 | } | ||
28 | |||
29 | return 0; | ||
30 | } | ||
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c new file mode 100644 index 000000000000..9b6b92f07def --- /dev/null +++ b/arch/tile/mm/fault.c | |||
@@ -0,0 +1,905 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * From i386 code copyright (C) 1995 Linus Torvalds | ||
15 | */ | ||
16 | |||
17 | #include <linux/signal.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/ptrace.h> | ||
24 | #include <linux/mman.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/smp.h> | ||
27 | #include <linux/smp_lock.h> | ||
28 | #include <linux/interrupt.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/tty.h> | ||
31 | #include <linux/vt_kern.h> /* For unblank_screen() */ | ||
32 | #include <linux/highmem.h> | ||
33 | #include <linux/module.h> | ||
34 | #include <linux/kprobes.h> | ||
35 | #include <linux/hugetlb.h> | ||
36 | #include <linux/syscalls.h> | ||
37 | #include <linux/uaccess.h> | ||
38 | |||
39 | #include <asm/system.h> | ||
40 | #include <asm/pgalloc.h> | ||
41 | #include <asm/sections.h> | ||
42 | |||
43 | #include <arch/interrupts.h> | ||
44 | |||
45 | /* | ||
46 | * Unlock any spinlocks which will prevent us from getting the | ||
47 | * message out | ||
48 | */ | ||
49 | void bust_spinlocks(int yes) | ||
50 | { | ||
51 | int loglevel_save = console_loglevel; | ||
52 | |||
53 | if (yes) { | ||
54 | oops_in_progress = 1; | ||
55 | return; | ||
56 | } | ||
57 | oops_in_progress = 0; | ||
58 | /* | ||
59 | * OK, the message is on the console. Now we call printk() | ||
60 | * without oops_in_progress set so that printk will give klogd | ||
61 | * a poke. Hold onto your hats... | ||
62 | */ | ||
63 | console_loglevel = 15; /* NMI oopser may have shut the console up */ | ||
64 | printk(" "); | ||
65 | console_loglevel = loglevel_save; | ||
66 | } | ||
67 | |||
68 | static noinline void force_sig_info_fault(int si_signo, int si_code, | ||
69 | unsigned long address, int fault_num, struct task_struct *tsk) | ||
70 | { | ||
71 | siginfo_t info; | ||
72 | |||
73 | if (unlikely(tsk->pid < 2)) { | ||
74 | panic("Signal %d (code %d) at %#lx sent to %s!", | ||
75 | si_signo, si_code & 0xffff, address, | ||
76 | tsk->pid ? "init" : "the idle task"); | ||
77 | } | ||
78 | |||
79 | info.si_signo = si_signo; | ||
80 | info.si_errno = 0; | ||
81 | info.si_code = si_code; | ||
82 | info.si_addr = (void __user *)address; | ||
83 | info.si_trapno = fault_num; | ||
84 | force_sig_info(si_signo, &info, tsk); | ||
85 | } | ||
86 | |||
87 | #ifndef __tilegx__ | ||
88 | /* | ||
89 | * Synthesize the fault a PL0 process would get by doing a word-load of | ||
90 | * an unaligned address or a high kernel address. Called indirectly | ||
91 | * from sys_cmpxchg() in kernel/intvec.S. | ||
92 | */ | ||
93 | int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *regs) | ||
94 | { | ||
95 | if (address >= PAGE_OFFSET) | ||
96 | force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address, | ||
97 | INT_DTLB_MISS, current); | ||
98 | else | ||
99 | force_sig_info_fault(SIGBUS, BUS_ADRALN, address, | ||
100 | INT_UNALIGN_DATA, current); | ||
101 | |||
102 | /* | ||
103 | * Adjust pc to point at the actual instruction, which is unusual | ||
104 | * for syscalls normally, but is appropriate when we are claiming | ||
105 | * that a syscall swint1 caused a page fault or bus error. | ||
106 | */ | ||
107 | regs->pc -= 8; | ||
108 | |||
109 | /* | ||
110 | * Mark this as a caller-save interrupt, like a normal page fault, | ||
111 | * so that when we go through the signal handler path we will | ||
112 | * properly restore r0, r1, and r2 for the signal handler arguments. | ||
113 | */ | ||
114 | regs->flags |= PT_FLAGS_CALLER_SAVES; | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | #endif | ||
119 | |||
120 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | ||
121 | { | ||
122 | unsigned index = pgd_index(address); | ||
123 | pgd_t *pgd_k; | ||
124 | pud_t *pud, *pud_k; | ||
125 | pmd_t *pmd, *pmd_k; | ||
126 | |||
127 | pgd += index; | ||
128 | pgd_k = init_mm.pgd + index; | ||
129 | |||
130 | if (!pgd_present(*pgd_k)) | ||
131 | return NULL; | ||
132 | |||
133 | pud = pud_offset(pgd, address); | ||
134 | pud_k = pud_offset(pgd_k, address); | ||
135 | if (!pud_present(*pud_k)) | ||
136 | return NULL; | ||
137 | |||
138 | pmd = pmd_offset(pud, address); | ||
139 | pmd_k = pmd_offset(pud_k, address); | ||
140 | if (!pmd_present(*pmd_k)) | ||
141 | return NULL; | ||
142 | if (!pmd_present(*pmd)) { | ||
143 | set_pmd(pmd, *pmd_k); | ||
144 | arch_flush_lazy_mmu_mode(); | ||
145 | } else | ||
146 | BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k)); | ||
147 | return pmd_k; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * Handle a fault on the vmalloc or module mapping area | ||
152 | */ | ||
153 | static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) | ||
154 | { | ||
155 | pmd_t *pmd_k; | ||
156 | pte_t *pte_k; | ||
157 | |||
158 | /* Make sure we are in vmalloc area */ | ||
159 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | ||
160 | return -1; | ||
161 | |||
162 | /* | ||
163 | * Synchronize this task's top level page-table | ||
164 | * with the 'reference' page table. | ||
165 | */ | ||
166 | pmd_k = vmalloc_sync_one(pgd, address); | ||
167 | if (!pmd_k) | ||
168 | return -1; | ||
169 | if (pmd_huge(*pmd_k)) | ||
170 | return 0; /* support TILE huge_vmap() API */ | ||
171 | pte_k = pte_offset_kernel(pmd_k, address); | ||
172 | if (!pte_present(*pte_k)) | ||
173 | return -1; | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | /* Wait until this PTE has completed migration. */ | ||
178 | static void wait_for_migration(pte_t *pte) | ||
179 | { | ||
180 | if (pte_migrating(*pte)) { | ||
181 | /* | ||
182 | * Wait until the migrater fixes up this pte. | ||
183 | * We scale the loop count by the clock rate so we'll wait for | ||
184 | * a few seconds here. | ||
185 | */ | ||
186 | int retries = 0; | ||
187 | int bound = get_clock_rate(); | ||
188 | while (pte_migrating(*pte)) { | ||
189 | barrier(); | ||
190 | if (++retries > bound) | ||
191 | panic("Hit migrating PTE (%#llx) and" | ||
192 | " page PFN %#lx still migrating", | ||
193 | pte->val, pte_pfn(*pte)); | ||
194 | } | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * It's not generally safe to use "current" to get the page table pointer, | ||
200 | * since we might be running an oprofile interrupt in the middle of a | ||
201 | * task switch. | ||
202 | */ | ||
203 | static pgd_t *get_current_pgd(void) | ||
204 | { | ||
205 | HV_Context ctx = hv_inquire_context(); | ||
206 | unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; | ||
207 | struct page *pgd_page = pfn_to_page(pgd_pfn); | ||
208 | BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ | ||
209 | return (pgd_t *) __va(ctx.page_table); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * We can receive a page fault from a migrating PTE at any time. | ||
214 | * Handle it by just waiting until the fault resolves. | ||
215 | * | ||
216 | * It's also possible to get a migrating kernel PTE that resolves | ||
217 | * itself during the downcall from hypervisor to Linux. We just check | ||
218 | * here to see if the PTE seems valid, and if so we retry it. | ||
219 | * | ||
220 | * NOTE! We MUST NOT take any locks for this case. We may be in an | ||
221 | * interrupt or a critical region, and must do as little as possible. | ||
222 | * Similarly, we can't use atomic ops here, since we may be handling a | ||
223 | * fault caused by an atomic op access. | ||
224 | */ | ||
225 | static int handle_migrating_pte(pgd_t *pgd, int fault_num, | ||
226 | unsigned long address, | ||
227 | int is_kernel_mode, int write) | ||
228 | { | ||
229 | pud_t *pud; | ||
230 | pmd_t *pmd; | ||
231 | pte_t *pte; | ||
232 | pte_t pteval; | ||
233 | |||
234 | if (pgd_addr_invalid(address)) | ||
235 | return 0; | ||
236 | |||
237 | pgd += pgd_index(address); | ||
238 | pud = pud_offset(pgd, address); | ||
239 | if (!pud || !pud_present(*pud)) | ||
240 | return 0; | ||
241 | pmd = pmd_offset(pud, address); | ||
242 | if (!pmd || !pmd_present(*pmd)) | ||
243 | return 0; | ||
244 | pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : | ||
245 | pte_offset_kernel(pmd, address); | ||
246 | pteval = *pte; | ||
247 | if (pte_migrating(pteval)) { | ||
248 | wait_for_migration(pte); | ||
249 | return 1; | ||
250 | } | ||
251 | |||
252 | if (!is_kernel_mode || !pte_present(pteval)) | ||
253 | return 0; | ||
254 | if (fault_num == INT_ITLB_MISS) { | ||
255 | if (pte_exec(pteval)) | ||
256 | return 1; | ||
257 | } else if (write) { | ||
258 | if (pte_write(pteval)) | ||
259 | return 1; | ||
260 | } else { | ||
261 | if (pte_read(pteval)) | ||
262 | return 1; | ||
263 | } | ||
264 | |||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * This routine is responsible for faulting in user pages. | ||
270 | * It passes the work off to one of the appropriate routines. | ||
271 | * It returns true if the fault was successfully handled. | ||
272 | */ | ||
273 | static int handle_page_fault(struct pt_regs *regs, | ||
274 | int fault_num, | ||
275 | int is_page_fault, | ||
276 | unsigned long address, | ||
277 | int write) | ||
278 | { | ||
279 | struct task_struct *tsk; | ||
280 | struct mm_struct *mm; | ||
281 | struct vm_area_struct *vma; | ||
282 | unsigned long stack_offset; | ||
283 | int fault; | ||
284 | int si_code; | ||
285 | int is_kernel_mode; | ||
286 | pgd_t *pgd; | ||
287 | |||
288 | /* on TILE, protection faults are always writes */ | ||
289 | if (!is_page_fault) | ||
290 | write = 1; | ||
291 | |||
292 | is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); | ||
293 | |||
294 | tsk = validate_current(); | ||
295 | |||
296 | /* | ||
297 | * Check to see if we might be overwriting the stack, and bail | ||
298 | * out if so. The page fault code is a relatively likely | ||
299 | * place to get trapped in an infinite regress, and once we | ||
300 | * overwrite the whole stack, it becomes very hard to recover. | ||
301 | */ | ||
302 | stack_offset = stack_pointer & (THREAD_SIZE-1); | ||
303 | if (stack_offset < THREAD_SIZE / 8) { | ||
304 | printk(KERN_ALERT "Potential stack overrun: sp %#lx\n", | ||
305 | stack_pointer); | ||
306 | show_regs(regs); | ||
307 | printk(KERN_ALERT "Killing current process %d/%s\n", | ||
308 | tsk->pid, tsk->comm); | ||
309 | do_group_exit(SIGKILL); | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * Early on, we need to check for migrating PTE entries; | ||
314 | * see homecache.c. If we find a migrating PTE, we wait until | ||
315 | * the backing page claims to be done migrating, then we procede. | ||
316 | * For kernel PTEs, we rewrite the PTE and return and retry. | ||
317 | * Otherwise, we treat the fault like a normal "no PTE" fault, | ||
318 | * rather than trying to patch up the existing PTE. | ||
319 | */ | ||
320 | pgd = get_current_pgd(); | ||
321 | if (handle_migrating_pte(pgd, fault_num, address, | ||
322 | is_kernel_mode, write)) | ||
323 | return 1; | ||
324 | |||
325 | si_code = SEGV_MAPERR; | ||
326 | |||
327 | /* | ||
328 | * We fault-in kernel-space virtual memory on-demand. The | ||
329 | * 'reference' page table is init_mm.pgd. | ||
330 | * | ||
331 | * NOTE! We MUST NOT take any locks for this case. We may | ||
332 | * be in an interrupt or a critical region, and should | ||
333 | * only copy the information from the master page table, | ||
334 | * nothing more. | ||
335 | * | ||
336 | * This verifies that the fault happens in kernel space | ||
337 | * and that the fault was not a protection fault. | ||
338 | */ | ||
339 | if (unlikely(address >= TASK_SIZE && | ||
340 | !is_arch_mappable_range(address, 0))) { | ||
341 | if (is_kernel_mode && is_page_fault && | ||
342 | vmalloc_fault(pgd, address) >= 0) | ||
343 | return 1; | ||
344 | /* | ||
345 | * Don't take the mm semaphore here. If we fixup a prefetch | ||
346 | * fault we could otherwise deadlock. | ||
347 | */ | ||
348 | mm = NULL; /* happy compiler */ | ||
349 | vma = NULL; | ||
350 | goto bad_area_nosemaphore; | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * If we're trying to touch user-space addresses, we must | ||
355 | * be either at PL0, or else with interrupts enabled in the | ||
356 | * kernel, so either way we can re-enable interrupts here. | ||
357 | */ | ||
358 | local_irq_enable(); | ||
359 | |||
360 | mm = tsk->mm; | ||
361 | |||
362 | /* | ||
363 | * If we're in an interrupt, have no user context or are running in an | ||
364 | * atomic region then we must not take the fault. | ||
365 | */ | ||
366 | if (in_atomic() || !mm) { | ||
367 | vma = NULL; /* happy compiler */ | ||
368 | goto bad_area_nosemaphore; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * When running in the kernel we expect faults to occur only to | ||
373 | * addresses in user space. All other faults represent errors in the | ||
374 | * kernel and should generate an OOPS. Unfortunately, in the case of an | ||
375 | * erroneous fault occurring in a code path which already holds mmap_sem | ||
376 | * we will deadlock attempting to validate the fault against the | ||
377 | * address space. Luckily the kernel only validly references user | ||
378 | * space from well defined areas of code, which are listed in the | ||
379 | * exceptions table. | ||
380 | * | ||
381 | * As the vast majority of faults will be valid we will only perform | ||
382 | * the source reference check when there is a possibility of a deadlock. | ||
383 | * Attempt to lock the address space, if we cannot we then validate the | ||
384 | * source. If this is invalid we can skip the address space check, | ||
385 | * thus avoiding the deadlock. | ||
386 | */ | ||
387 | if (!down_read_trylock(&mm->mmap_sem)) { | ||
388 | if (is_kernel_mode && | ||
389 | !search_exception_tables(regs->pc)) { | ||
390 | vma = NULL; /* happy compiler */ | ||
391 | goto bad_area_nosemaphore; | ||
392 | } | ||
393 | down_read(&mm->mmap_sem); | ||
394 | } | ||
395 | |||
396 | vma = find_vma(mm, address); | ||
397 | if (!vma) | ||
398 | goto bad_area; | ||
399 | if (vma->vm_start <= address) | ||
400 | goto good_area; | ||
401 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
402 | goto bad_area; | ||
403 | if (regs->sp < PAGE_OFFSET) { | ||
404 | /* | ||
405 | * accessing the stack below sp is always a bug. | ||
406 | */ | ||
407 | if (address < regs->sp) | ||
408 | goto bad_area; | ||
409 | } | ||
410 | if (expand_stack(vma, address)) | ||
411 | goto bad_area; | ||
412 | |||
413 | /* | ||
414 | * Ok, we have a good vm_area for this memory access, so | ||
415 | * we can handle it.. | ||
416 | */ | ||
417 | good_area: | ||
418 | si_code = SEGV_ACCERR; | ||
419 | if (fault_num == INT_ITLB_MISS) { | ||
420 | if (!(vma->vm_flags & VM_EXEC)) | ||
421 | goto bad_area; | ||
422 | } else if (write) { | ||
423 | #ifdef TEST_VERIFY_AREA | ||
424 | if (!is_page_fault && regs->cs == KERNEL_CS) | ||
425 | printk("WP fault at "REGFMT"\n", regs->eip); | ||
426 | #endif | ||
427 | if (!(vma->vm_flags & VM_WRITE)) | ||
428 | goto bad_area; | ||
429 | } else { | ||
430 | if (!is_page_fault || !(vma->vm_flags & VM_READ)) | ||
431 | goto bad_area; | ||
432 | } | ||
433 | |||
434 | survive: | ||
435 | /* | ||
436 | * If for any reason at all we couldn't handle the fault, | ||
437 | * make sure we exit gracefully rather than endlessly redo | ||
438 | * the fault. | ||
439 | */ | ||
440 | fault = handle_mm_fault(mm, vma, address, write); | ||
441 | if (unlikely(fault & VM_FAULT_ERROR)) { | ||
442 | if (fault & VM_FAULT_OOM) | ||
443 | goto out_of_memory; | ||
444 | else if (fault & VM_FAULT_SIGBUS) | ||
445 | goto do_sigbus; | ||
446 | BUG(); | ||
447 | } | ||
448 | if (fault & VM_FAULT_MAJOR) | ||
449 | tsk->maj_flt++; | ||
450 | else | ||
451 | tsk->min_flt++; | ||
452 | |||
453 | /* | ||
454 | * If this was an asynchronous fault, | ||
455 | * restart the appropriate engine. | ||
456 | */ | ||
457 | switch (fault_num) { | ||
458 | #if CHIP_HAS_TILE_DMA() | ||
459 | case INT_DMATLB_MISS: | ||
460 | case INT_DMATLB_MISS_DWNCL: | ||
461 | case INT_DMATLB_ACCESS: | ||
462 | case INT_DMATLB_ACCESS_DWNCL: | ||
463 | __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); | ||
464 | break; | ||
465 | #endif | ||
466 | #if CHIP_HAS_SN_PROC() | ||
467 | case INT_SNITLB_MISS: | ||
468 | case INT_SNITLB_MISS_DWNCL: | ||
469 | __insn_mtspr(SPR_SNCTL, | ||
470 | __insn_mfspr(SPR_SNCTL) & | ||
471 | ~SPR_SNCTL__FRZPROC_MASK); | ||
472 | break; | ||
473 | #endif | ||
474 | } | ||
475 | |||
476 | up_read(&mm->mmap_sem); | ||
477 | return 1; | ||
478 | |||
479 | /* | ||
480 | * Something tried to access memory that isn't in our memory map.. | ||
481 | * Fix it, but check if it's kernel or user first.. | ||
482 | */ | ||
483 | bad_area: | ||
484 | up_read(&mm->mmap_sem); | ||
485 | |||
486 | bad_area_nosemaphore: | ||
487 | /* User mode accesses just cause a SIGSEGV */ | ||
488 | if (!is_kernel_mode) { | ||
489 | /* | ||
490 | * It's possible to have interrupts off here. | ||
491 | */ | ||
492 | local_irq_enable(); | ||
493 | |||
494 | force_sig_info_fault(SIGSEGV, si_code, address, | ||
495 | fault_num, tsk); | ||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | no_context: | ||
500 | /* Are we prepared to handle this kernel fault? */ | ||
501 | if (fixup_exception(regs)) | ||
502 | return 0; | ||
503 | |||
504 | /* | ||
505 | * Oops. The kernel tried to access some bad page. We'll have to | ||
506 | * terminate things with extreme prejudice. | ||
507 | */ | ||
508 | |||
509 | bust_spinlocks(1); | ||
510 | |||
511 | /* FIXME: no lookup_address() yet */ | ||
512 | #ifdef SUPPORT_LOOKUP_ADDRESS | ||
513 | if (fault_num == INT_ITLB_MISS) { | ||
514 | pte_t *pte = lookup_address(address); | ||
515 | |||
516 | if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) | ||
517 | printk(KERN_CRIT "kernel tried to execute" | ||
518 | " non-executable page - exploit attempt?" | ||
519 | " (uid: %d)\n", current->uid); | ||
520 | } | ||
521 | #endif | ||
522 | if (address < PAGE_SIZE) | ||
523 | printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference\n"); | ||
524 | else | ||
525 | printk(KERN_ALERT "Unable to handle kernel paging request\n"); | ||
526 | printk(" at virtual address "REGFMT", pc "REGFMT"\n", | ||
527 | address, regs->pc); | ||
528 | |||
529 | show_regs(regs); | ||
530 | |||
531 | if (unlikely(tsk->pid < 2)) { | ||
532 | panic("Kernel page fault running %s!", | ||
533 | tsk->pid ? "init" : "the idle task"); | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * More FIXME: we should probably copy the i386 here and | ||
538 | * implement a generic die() routine. Not today. | ||
539 | */ | ||
540 | #ifdef SUPPORT_DIE | ||
541 | die("Oops", regs); | ||
542 | #endif | ||
543 | bust_spinlocks(1); | ||
544 | |||
545 | do_group_exit(SIGKILL); | ||
546 | |||
547 | /* | ||
548 | * We ran out of memory, or some other thing happened to us that made | ||
549 | * us unable to handle the page fault gracefully. | ||
550 | */ | ||
551 | out_of_memory: | ||
552 | up_read(&mm->mmap_sem); | ||
553 | if (is_global_init(tsk)) { | ||
554 | yield(); | ||
555 | down_read(&mm->mmap_sem); | ||
556 | goto survive; | ||
557 | } | ||
558 | printk("VM: killing process %s\n", tsk->comm); | ||
559 | if (!is_kernel_mode) | ||
560 | do_group_exit(SIGKILL); | ||
561 | goto no_context; | ||
562 | |||
563 | do_sigbus: | ||
564 | up_read(&mm->mmap_sem); | ||
565 | |||
566 | /* Kernel mode? Handle exceptions or die */ | ||
567 | if (is_kernel_mode) | ||
568 | goto no_context; | ||
569 | |||
570 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk); | ||
571 | return 0; | ||
572 | } | ||
573 | |||
574 | #ifndef __tilegx__ | ||
575 | |||
576 | extern char sys_cmpxchg[], __sys_cmpxchg_end[]; | ||
577 | extern char __sys_cmpxchg_grab_lock[]; | ||
578 | extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; | ||
579 | |||
580 | /* | ||
581 | * We return this structure in registers to avoid having to write | ||
582 | * additional save/restore code in the intvec.S caller. | ||
583 | */ | ||
584 | struct intvec_state { | ||
585 | void *handler; | ||
586 | unsigned long vecnum; | ||
587 | unsigned long fault_num; | ||
588 | unsigned long info; | ||
589 | unsigned long retval; | ||
590 | }; | ||
591 | |||
592 | /* We must release ICS before panicking or we won't get anywhere. */ | ||
593 | #define ics_panic(fmt, ...) do { \ | ||
594 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \ | ||
595 | panic(fmt, __VA_ARGS__); \ | ||
596 | } while (0) | ||
597 | |||
598 | void do_page_fault(struct pt_regs *regs, int fault_num, | ||
599 | unsigned long address, unsigned long write); | ||
600 | |||
601 | /* | ||
602 | * When we take an ITLB or DTLB fault or access violation in the | ||
603 | * supervisor while the critical section bit is set, the hypervisor is | ||
604 | * reluctant to write new values into the EX_CONTEXT_1_x registers, | ||
605 | * since that might indicate we have not yet squirreled the SPR | ||
606 | * contents away and can thus safely take a recursive interrupt. | ||
607 | * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. | ||
608 | */ | ||
609 | struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, | ||
610 | unsigned long address, | ||
611 | unsigned long info) | ||
612 | { | ||
613 | unsigned long pc = info & ~1; | ||
614 | int write = info & 1; | ||
615 | pgd_t *pgd = get_current_pgd(); | ||
616 | |||
617 | /* Retval is 1 at first since we will handle the fault fully. */ | ||
618 | struct intvec_state state = { | ||
619 | do_page_fault, fault_num, address, write, 1 | ||
620 | }; | ||
621 | |||
622 | /* Validate that we are plausibly in the right routine. */ | ||
623 | if ((pc & 0x7) != 0 || pc < PAGE_OFFSET || | ||
624 | (fault_num != INT_DTLB_MISS && | ||
625 | fault_num != INT_DTLB_ACCESS)) { | ||
626 | unsigned long old_pc = regs->pc; | ||
627 | regs->pc = pc; | ||
628 | ics_panic("Bad ICS page fault args:" | ||
629 | " old PC %#lx, fault %d/%d at %#lx\n", | ||
630 | old_pc, fault_num, write, address); | ||
631 | } | ||
632 | |||
633 | /* We might be faulting on a vmalloc page, so check that first. */ | ||
634 | if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0) | ||
635 | return state; | ||
636 | |||
637 | /* | ||
638 | * If we faulted with ICS set in sys_cmpxchg, we are providing | ||
639 | * a user syscall service that should generate a signal on | ||
640 | * fault. We didn't set up a kernel stack on initial entry to | ||
641 | * sys_cmpxchg, but instead had one set up by the fault, which | ||
642 | * (because sys_cmpxchg never releases ICS) came to us via the | ||
643 | * SYSTEM_SAVE_1_2 mechanism, and thus EX_CONTEXT_1_[01] are | ||
644 | * still referencing the original user code. We release the | ||
645 | * atomic lock and rewrite pt_regs so that it appears that we | ||
646 | * came from user-space directly, and after we finish the | ||
647 | * fault we'll go back to user space and re-issue the swint. | ||
648 | * This way the backtrace information is correct if we need to | ||
649 | * emit a stack dump at any point while handling this. | ||
650 | * | ||
651 | * Must match register use in sys_cmpxchg(). | ||
652 | */ | ||
653 | if (pc >= (unsigned long) sys_cmpxchg && | ||
654 | pc < (unsigned long) __sys_cmpxchg_end) { | ||
655 | #ifdef CONFIG_SMP | ||
656 | /* Don't unlock before we could have locked. */ | ||
657 | if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) { | ||
658 | int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); | ||
659 | __atomic_fault_unlock(lock_ptr); | ||
660 | } | ||
661 | #endif | ||
662 | regs->sp = regs->regs[27]; | ||
663 | } | ||
664 | |||
665 | /* | ||
666 | * We can also fault in the atomic assembly, in which | ||
667 | * case we use the exception table to do the first-level fixup. | ||
668 | * We may re-fixup again in the real fault handler if it | ||
669 | * turns out the faulting address is just bad, and not, | ||
670 | * for example, migrating. | ||
671 | */ | ||
672 | else if (pc >= (unsigned long) __start_atomic_asm_code && | ||
673 | pc < (unsigned long) __end_atomic_asm_code) { | ||
674 | const struct exception_table_entry *fixup; | ||
675 | #ifdef CONFIG_SMP | ||
676 | /* Unlock the atomic lock. */ | ||
677 | int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); | ||
678 | __atomic_fault_unlock(lock_ptr); | ||
679 | #endif | ||
680 | fixup = search_exception_tables(pc); | ||
681 | if (!fixup) | ||
682 | ics_panic("ICS atomic fault not in table:" | ||
683 | " PC %#lx, fault %d", pc, fault_num); | ||
684 | regs->pc = fixup->fixup; | ||
685 | regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * NOTE: the one other type of access that might bring us here | ||
690 | * are the memory ops in __tns_atomic_acquire/__tns_atomic_release, | ||
691 | * but we don't have to check specially for them since we can | ||
692 | * always safely return to the address of the fault and retry, | ||
693 | * since no separate atomic locks are involved. | ||
694 | */ | ||
695 | |||
696 | /* | ||
697 | * Now that we have released the atomic lock (if necessary), | ||
698 | * it's safe to spin if the PTE that caused the fault was migrating. | ||
699 | */ | ||
700 | if (fault_num == INT_DTLB_ACCESS) | ||
701 | write = 1; | ||
702 | if (handle_migrating_pte(pgd, fault_num, address, 1, write)) | ||
703 | return state; | ||
704 | |||
705 | /* Return zero so that we continue on with normal fault handling. */ | ||
706 | state.retval = 0; | ||
707 | return state; | ||
708 | } | ||
709 | |||
710 | #endif /* !__tilegx__ */ | ||
711 | |||
712 | /* | ||
713 | * This routine handles page faults. It determines the address, and the | ||
714 | * problem, and then passes it handle_page_fault() for normal DTLB and | ||
715 | * ITLB issues, and for DMA or SN processor faults when we are in user | ||
716 | * space. For the latter, if we're in kernel mode, we just save the | ||
717 | * interrupt away appropriately and return immediately. We can't do | ||
718 | * page faults for user code while in kernel mode. | ||
719 | */ | ||
720 | void do_page_fault(struct pt_regs *regs, int fault_num, | ||
721 | unsigned long address, unsigned long write) | ||
722 | { | ||
723 | int is_page_fault; | ||
724 | |||
725 | /* This case should have been handled by do_page_fault_ics(). */ | ||
726 | BUG_ON(write & ~1); | ||
727 | |||
728 | #if CHIP_HAS_TILE_DMA() | ||
729 | /* | ||
730 | * If it's a DMA fault, suspend the transfer while we're | ||
731 | * handling the miss; we'll restart after it's handled. If we | ||
732 | * don't suspend, it's possible that this process could swap | ||
733 | * out and back in, and restart the engine since the DMA is | ||
734 | * still 'running'. | ||
735 | */ | ||
736 | if (fault_num == INT_DMATLB_MISS || | ||
737 | fault_num == INT_DMATLB_ACCESS || | ||
738 | fault_num == INT_DMATLB_MISS_DWNCL || | ||
739 | fault_num == INT_DMATLB_ACCESS_DWNCL) { | ||
740 | __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); | ||
741 | while (__insn_mfspr(SPR_DMA_USER_STATUS) & | ||
742 | SPR_DMA_STATUS__BUSY_MASK) | ||
743 | ; | ||
744 | } | ||
745 | #endif | ||
746 | |||
747 | /* Validate fault num and decide if this is a first-time page fault. */ | ||
748 | switch (fault_num) { | ||
749 | case INT_ITLB_MISS: | ||
750 | case INT_DTLB_MISS: | ||
751 | #if CHIP_HAS_TILE_DMA() | ||
752 | case INT_DMATLB_MISS: | ||
753 | case INT_DMATLB_MISS_DWNCL: | ||
754 | #endif | ||
755 | #if CHIP_HAS_SN_PROC() | ||
756 | case INT_SNITLB_MISS: | ||
757 | case INT_SNITLB_MISS_DWNCL: | ||
758 | #endif | ||
759 | is_page_fault = 1; | ||
760 | break; | ||
761 | |||
762 | case INT_DTLB_ACCESS: | ||
763 | #if CHIP_HAS_TILE_DMA() | ||
764 | case INT_DMATLB_ACCESS: | ||
765 | case INT_DMATLB_ACCESS_DWNCL: | ||
766 | #endif | ||
767 | is_page_fault = 0; | ||
768 | break; | ||
769 | |||
770 | default: | ||
771 | panic("Bad fault number %d in do_page_fault", fault_num); | ||
772 | } | ||
773 | |||
774 | if (EX1_PL(regs->ex1) != USER_PL) { | ||
775 | struct async_tlb *async; | ||
776 | switch (fault_num) { | ||
777 | #if CHIP_HAS_TILE_DMA() | ||
778 | case INT_DMATLB_MISS: | ||
779 | case INT_DMATLB_ACCESS: | ||
780 | case INT_DMATLB_MISS_DWNCL: | ||
781 | case INT_DMATLB_ACCESS_DWNCL: | ||
782 | async = ¤t->thread.dma_async_tlb; | ||
783 | break; | ||
784 | #endif | ||
785 | #if CHIP_HAS_SN_PROC() | ||
786 | case INT_SNITLB_MISS: | ||
787 | case INT_SNITLB_MISS_DWNCL: | ||
788 | async = ¤t->thread.sn_async_tlb; | ||
789 | break; | ||
790 | #endif | ||
791 | default: | ||
792 | async = NULL; | ||
793 | } | ||
794 | if (async) { | ||
795 | |||
796 | /* | ||
797 | * No vmalloc check required, so we can allow | ||
798 | * interrupts immediately at this point. | ||
799 | */ | ||
800 | local_irq_enable(); | ||
801 | |||
802 | set_thread_flag(TIF_ASYNC_TLB); | ||
803 | if (async->fault_num != 0) { | ||
804 | panic("Second async fault %d;" | ||
805 | " old fault was %d (%#lx/%ld)", | ||
806 | fault_num, async->fault_num, | ||
807 | address, write); | ||
808 | } | ||
809 | BUG_ON(fault_num == 0); | ||
810 | async->fault_num = fault_num; | ||
811 | async->is_fault = is_page_fault; | ||
812 | async->is_write = write; | ||
813 | async->address = address; | ||
814 | return; | ||
815 | } | ||
816 | } | ||
817 | |||
818 | handle_page_fault(regs, fault_num, is_page_fault, address, write); | ||
819 | } | ||
820 | |||
821 | |||
822 | #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() | ||
823 | /* | ||
824 | * Check an async_tlb structure to see if a deferred fault is waiting, | ||
825 | * and if so pass it to the page-fault code. | ||
826 | */ | ||
827 | static void handle_async_page_fault(struct pt_regs *regs, | ||
828 | struct async_tlb *async) | ||
829 | { | ||
830 | if (async->fault_num) { | ||
831 | /* | ||
832 | * Clear async->fault_num before calling the page-fault | ||
833 | * handler so that if we re-interrupt before returning | ||
834 | * from the function we have somewhere to put the | ||
835 | * information from the new interrupt. | ||
836 | */ | ||
837 | int fault_num = async->fault_num; | ||
838 | async->fault_num = 0; | ||
839 | handle_page_fault(regs, fault_num, async->is_fault, | ||
840 | async->address, async->is_write); | ||
841 | } | ||
842 | } | ||
843 | #endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ | ||
844 | |||
845 | |||
846 | /* | ||
847 | * This routine effectively re-issues asynchronous page faults | ||
848 | * when we are returning to user space. | ||
849 | */ | ||
850 | void do_async_page_fault(struct pt_regs *regs) | ||
851 | { | ||
852 | /* | ||
853 | * Clear thread flag early. If we re-interrupt while processing | ||
854 | * code here, we will reset it and recall this routine before | ||
855 | * returning to user space. | ||
856 | */ | ||
857 | clear_thread_flag(TIF_ASYNC_TLB); | ||
858 | |||
859 | #if CHIP_HAS_TILE_DMA() | ||
860 | handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); | ||
861 | #endif | ||
862 | #if CHIP_HAS_SN_PROC() | ||
863 | handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); | ||
864 | #endif | ||
865 | } | ||
866 | |||
867 | void vmalloc_sync_all(void) | ||
868 | { | ||
869 | #ifdef __tilegx__ | ||
870 | /* Currently all L1 kernel pmd's are static and shared. */ | ||
871 | BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START)); | ||
872 | #else | ||
873 | /* | ||
874 | * Note that races in the updates of insync and start aren't | ||
875 | * problematic: insync can only get set bits added, and updates to | ||
876 | * start are only improving performance (without affecting correctness | ||
877 | * if undone). | ||
878 | */ | ||
879 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); | ||
880 | static unsigned long start = PAGE_OFFSET; | ||
881 | unsigned long address; | ||
882 | |||
883 | BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK); | ||
884 | for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) { | ||
885 | if (!test_bit(pgd_index(address), insync)) { | ||
886 | unsigned long flags; | ||
887 | struct list_head *pos; | ||
888 | |||
889 | spin_lock_irqsave(&pgd_lock, flags); | ||
890 | list_for_each(pos, &pgd_list) | ||
891 | if (!vmalloc_sync_one(list_to_pgd(pos), | ||
892 | address)) { | ||
893 | /* Must be at first entry in list. */ | ||
894 | BUG_ON(pos != pgd_list.next); | ||
895 | break; | ||
896 | } | ||
897 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
898 | if (pos != pgd_list.next) | ||
899 | set_bit(pgd_index(address), insync); | ||
900 | } | ||
901 | if (address == start && test_bit(pgd_index(address), insync)) | ||
902 | start = address + PGDIR_SIZE; | ||
903 | } | ||
904 | #endif | ||
905 | } | ||
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c new file mode 100644 index 000000000000..1fcecc5b9e03 --- /dev/null +++ b/arch/tile/mm/highmem.c | |||
@@ -0,0 +1,328 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/highmem.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/pagemap.h> | ||
18 | #include <asm/homecache.h> | ||
19 | |||
20 | #define kmap_get_pte(vaddr) \ | ||
21 | pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ | ||
22 | (vaddr)), (vaddr)) | ||
23 | |||
24 | |||
25 | void *kmap(struct page *page) | ||
26 | { | ||
27 | void *kva; | ||
28 | unsigned long flags; | ||
29 | pte_t *ptep; | ||
30 | |||
31 | might_sleep(); | ||
32 | if (!PageHighMem(page)) | ||
33 | return page_address(page); | ||
34 | kva = kmap_high(page); | ||
35 | |||
36 | /* | ||
37 | * Rewrite the PTE under the lock. This ensures that the page | ||
38 | * is not currently migrating. | ||
39 | */ | ||
40 | ptep = kmap_get_pte((unsigned long)kva); | ||
41 | flags = homecache_kpte_lock(); | ||
42 | set_pte_at(&init_mm, kva, ptep, mk_pte(page, page_to_kpgprot(page))); | ||
43 | homecache_kpte_unlock(flags); | ||
44 | |||
45 | return kva; | ||
46 | } | ||
47 | EXPORT_SYMBOL(kmap); | ||
48 | |||
49 | void kunmap(struct page *page) | ||
50 | { | ||
51 | if (in_interrupt()) | ||
52 | BUG(); | ||
53 | if (!PageHighMem(page)) | ||
54 | return; | ||
55 | kunmap_high(page); | ||
56 | } | ||
57 | EXPORT_SYMBOL(kunmap); | ||
58 | |||
59 | static void debug_kmap_atomic_prot(enum km_type type) | ||
60 | { | ||
61 | #ifdef CONFIG_DEBUG_HIGHMEM | ||
62 | static unsigned warn_count = 10; | ||
63 | |||
64 | if (unlikely(warn_count == 0)) | ||
65 | return; | ||
66 | |||
67 | if (unlikely(in_interrupt())) { | ||
68 | if (in_irq()) { | ||
69 | if (type != KM_IRQ0 && type != KM_IRQ1 && | ||
70 | type != KM_BIO_SRC_IRQ && | ||
71 | /* type != KM_BIO_DST_IRQ && */ | ||
72 | type != KM_BOUNCE_READ) { | ||
73 | WARN_ON(1); | ||
74 | warn_count--; | ||
75 | } | ||
76 | } else if (!irqs_disabled()) { /* softirq */ | ||
77 | if (type != KM_IRQ0 && type != KM_IRQ1 && | ||
78 | type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && | ||
79 | type != KM_SKB_SUNRPC_DATA && | ||
80 | type != KM_SKB_DATA_SOFTIRQ && | ||
81 | type != KM_BOUNCE_READ) { | ||
82 | WARN_ON(1); | ||
83 | warn_count--; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | |||
88 | if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || | ||
89 | type == KM_BIO_SRC_IRQ /* || type == KM_BIO_DST_IRQ */) { | ||
90 | if (!irqs_disabled()) { | ||
91 | WARN_ON(1); | ||
92 | warn_count--; | ||
93 | } | ||
94 | } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { | ||
95 | if (irq_count() == 0 && !irqs_disabled()) { | ||
96 | WARN_ON(1); | ||
97 | warn_count--; | ||
98 | } | ||
99 | } | ||
100 | #endif | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Describe a single atomic mapping of a page on a given cpu at a | ||
105 | * given address, and allow it to be linked into a list. | ||
106 | */ | ||
107 | struct atomic_mapped_page { | ||
108 | struct list_head list; | ||
109 | struct page *page; | ||
110 | int cpu; | ||
111 | unsigned long va; | ||
112 | }; | ||
113 | |||
114 | static spinlock_t amp_lock = __SPIN_LOCK_UNLOCKED(&_lock); | ||
115 | static struct list_head amp_list = LIST_HEAD_INIT(amp_list); | ||
116 | |||
117 | /* | ||
118 | * Combining this structure with a per-cpu declaration lets us give | ||
119 | * each cpu an atomic_mapped_page structure per type. | ||
120 | */ | ||
121 | struct kmap_amps { | ||
122 | struct atomic_mapped_page per_type[KM_TYPE_NR]; | ||
123 | }; | ||
124 | DEFINE_PER_CPU(struct kmap_amps, amps); | ||
125 | |||
126 | /* | ||
127 | * Add a page and va, on this cpu, to the list of kmap_atomic pages, | ||
128 | * and write the new pte to memory. Writing the new PTE under the | ||
129 | * lock guarantees that it is either on the list before migration starts | ||
130 | * (if we won the race), or set_pte() sets the migrating bit in the PTE | ||
131 | * (if we lost the race). And doing it under the lock guarantees | ||
132 | * that when kmap_atomic_fix_one_pte() comes along, it finds a valid | ||
133 | * PTE in memory, iff the mapping is still on the amp_list. | ||
134 | * | ||
135 | * Finally, doing it under the lock lets us safely examine the page | ||
136 | * to see if it is immutable or not, for the generic kmap_atomic() case. | ||
137 | * If we examine it earlier we are exposed to a race where it looks | ||
138 | * writable earlier, but becomes immutable before we write the PTE. | ||
139 | */ | ||
140 | static void kmap_atomic_register(struct page *page, enum km_type type, | ||
141 | unsigned long va, pte_t *ptep, pte_t pteval) | ||
142 | { | ||
143 | unsigned long flags; | ||
144 | struct atomic_mapped_page *amp; | ||
145 | |||
146 | flags = homecache_kpte_lock(); | ||
147 | spin_lock(&_lock); | ||
148 | |||
149 | /* With interrupts disabled, now fill in the per-cpu info. */ | ||
150 | amp = &__get_cpu_var(amps).per_type[type]; | ||
151 | amp->page = page; | ||
152 | amp->cpu = smp_processor_id(); | ||
153 | amp->va = va; | ||
154 | |||
155 | /* For generic kmap_atomic(), choose the PTE writability now. */ | ||
156 | if (!pte_read(pteval)) | ||
157 | pteval = mk_pte(page, page_to_kpgprot(page)); | ||
158 | |||
159 | list_add(&->list, &_list); | ||
160 | set_pte(ptep, pteval); | ||
161 | arch_flush_lazy_mmu_mode(); | ||
162 | |||
163 | spin_unlock(&_lock); | ||
164 | homecache_kpte_unlock(flags); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Remove a page and va, on this cpu, from the list of kmap_atomic pages. | ||
169 | * Linear-time search, but we count on the lists being short. | ||
170 | * We don't need to adjust the PTE under the lock (as opposed to the | ||
171 | * kmap_atomic_register() case), since we're just unconditionally | ||
172 | * zeroing the PTE after it's off the list. | ||
173 | */ | ||
174 | static void kmap_atomic_unregister(struct page *page, unsigned long va) | ||
175 | { | ||
176 | unsigned long flags; | ||
177 | struct atomic_mapped_page *amp; | ||
178 | int cpu = smp_processor_id(); | ||
179 | spin_lock_irqsave(&_lock, flags); | ||
180 | list_for_each_entry(amp, &_list, list) { | ||
181 | if (amp->page == page && amp->cpu == cpu && amp->va == va) | ||
182 | break; | ||
183 | } | ||
184 | BUG_ON(&->list == &_list); | ||
185 | list_del(&->list); | ||
186 | spin_unlock_irqrestore(&_lock, flags); | ||
187 | } | ||
188 | |||
189 | /* Helper routine for kmap_atomic_fix_kpte(), below. */ | ||
190 | static void kmap_atomic_fix_one_kpte(struct atomic_mapped_page *amp, | ||
191 | int finished) | ||
192 | { | ||
193 | pte_t *ptep = kmap_get_pte(amp->va); | ||
194 | if (!finished) { | ||
195 | set_pte(ptep, pte_mkmigrate(*ptep)); | ||
196 | flush_remote(0, 0, NULL, amp->va, PAGE_SIZE, PAGE_SIZE, | ||
197 | cpumask_of(amp->cpu), NULL, 0); | ||
198 | } else { | ||
199 | /* | ||
200 | * Rewrite a default kernel PTE for this page. | ||
201 | * We rely on the fact that set_pte() writes the | ||
202 | * present+migrating bits last. | ||
203 | */ | ||
204 | pte_t pte = mk_pte(amp->page, page_to_kpgprot(amp->page)); | ||
205 | set_pte(ptep, pte); | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * This routine is a helper function for homecache_fix_kpte(); see | ||
211 | * its comments for more information on the "finished" argument here. | ||
212 | * | ||
213 | * Note that we hold the lock while doing the remote flushes, which | ||
214 | * will stall any unrelated cpus trying to do kmap_atomic operations. | ||
215 | * We could just update the PTEs under the lock, and save away copies | ||
216 | * of the structs (or just the va+cpu), then flush them after we | ||
217 | * release the lock, but it seems easier just to do it all under the lock. | ||
218 | */ | ||
219 | void kmap_atomic_fix_kpte(struct page *page, int finished) | ||
220 | { | ||
221 | struct atomic_mapped_page *amp; | ||
222 | unsigned long flags; | ||
223 | spin_lock_irqsave(&_lock, flags); | ||
224 | list_for_each_entry(amp, &_list, list) { | ||
225 | if (amp->page == page) | ||
226 | kmap_atomic_fix_one_kpte(amp, finished); | ||
227 | } | ||
228 | spin_unlock_irqrestore(&_lock, flags); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap | ||
233 | * because the kmap code must perform a global TLB invalidation when | ||
234 | * the kmap pool wraps. | ||
235 | * | ||
236 | * Note that they may be slower than on x86 (etc.) because unlike on | ||
237 | * those platforms, we do have to take a global lock to map and unmap | ||
238 | * pages on Tile (see above). | ||
239 | * | ||
240 | * When holding an atomic kmap is is not legal to sleep, so atomic | ||
241 | * kmaps are appropriate for short, tight code paths only. | ||
242 | */ | ||
243 | void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | ||
244 | { | ||
245 | enum fixed_addresses idx; | ||
246 | unsigned long vaddr; | ||
247 | pte_t *pte; | ||
248 | |||
249 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | ||
250 | pagefault_disable(); | ||
251 | |||
252 | /* Avoid icache flushes by disallowing atomic executable mappings. */ | ||
253 | BUG_ON(pte_exec(prot)); | ||
254 | |||
255 | if (!PageHighMem(page)) | ||
256 | return page_address(page); | ||
257 | |||
258 | debug_kmap_atomic_prot(type); | ||
259 | |||
260 | idx = type + KM_TYPE_NR*smp_processor_id(); | ||
261 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | ||
262 | pte = kmap_get_pte(vaddr); | ||
263 | BUG_ON(!pte_none(*pte)); | ||
264 | |||
265 | /* Register that this page is mapped atomically on this cpu. */ | ||
266 | kmap_atomic_register(page, type, vaddr, pte, mk_pte(page, prot)); | ||
267 | |||
268 | return (void *)vaddr; | ||
269 | } | ||
270 | EXPORT_SYMBOL(kmap_atomic_prot); | ||
271 | |||
272 | void *kmap_atomic(struct page *page, enum km_type type) | ||
273 | { | ||
274 | /* PAGE_NONE is a magic value that tells us to check immutability. */ | ||
275 | return kmap_atomic_prot(page, type, PAGE_NONE); | ||
276 | } | ||
277 | EXPORT_SYMBOL(kmap_atomic); | ||
278 | |||
279 | void kunmap_atomic(void *kvaddr, enum km_type type) | ||
280 | { | ||
281 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; | ||
282 | enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); | ||
283 | |||
284 | /* | ||
285 | * Force other mappings to Oops if they try to access this pte without | ||
286 | * first remapping it. Keeping stale mappings around is a bad idea. | ||
287 | */ | ||
288 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) { | ||
289 | pte_t *pte = kmap_get_pte(vaddr); | ||
290 | pte_t pteval = *pte; | ||
291 | BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); | ||
292 | kmap_atomic_unregister(pte_page(pteval), vaddr); | ||
293 | kpte_clear_flush(pte, vaddr); | ||
294 | } else { | ||
295 | /* Must be a lowmem page */ | ||
296 | BUG_ON(vaddr < PAGE_OFFSET); | ||
297 | BUG_ON(vaddr >= (unsigned long)high_memory); | ||
298 | } | ||
299 | |||
300 | arch_flush_lazy_mmu_mode(); | ||
301 | pagefault_enable(); | ||
302 | } | ||
303 | EXPORT_SYMBOL(kunmap_atomic); | ||
304 | |||
305 | /* | ||
306 | * This API is supposed to allow us to map memory without a "struct page". | ||
307 | * Currently we don't support this, though this may change in the future. | ||
308 | */ | ||
309 | void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) | ||
310 | { | ||
311 | return kmap_atomic(pfn_to_page(pfn), type); | ||
312 | } | ||
313 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | ||
314 | { | ||
315 | return kmap_atomic_prot(pfn_to_page(pfn), type, prot); | ||
316 | } | ||
317 | |||
318 | struct page *kmap_atomic_to_page(void *ptr) | ||
319 | { | ||
320 | pte_t *pte; | ||
321 | unsigned long vaddr = (unsigned long)ptr; | ||
322 | |||
323 | if (vaddr < FIXADDR_START) | ||
324 | return virt_to_page(ptr); | ||
325 | |||
326 | pte = kmap_get_pte(vaddr); | ||
327 | return pte_page(*pte); | ||
328 | } | ||
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c new file mode 100644 index 000000000000..52feb77133ce --- /dev/null +++ b/arch/tile/mm/homecache.c | |||
@@ -0,0 +1,445 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * This code maintains the "home" for each page in the system. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/spinlock.h> | ||
20 | #include <linux/list.h> | ||
21 | #include <linux/bootmem.h> | ||
22 | #include <linux/rmap.h> | ||
23 | #include <linux/pagemap.h> | ||
24 | #include <linux/mutex.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/sysctl.h> | ||
27 | #include <linux/pagevec.h> | ||
28 | #include <linux/ptrace.h> | ||
29 | #include <linux/timex.h> | ||
30 | #include <linux/cache.h> | ||
31 | #include <linux/smp.h> | ||
32 | |||
33 | #include <asm/page.h> | ||
34 | #include <asm/sections.h> | ||
35 | #include <asm/tlbflush.h> | ||
36 | #include <asm/pgalloc.h> | ||
37 | #include <asm/homecache.h> | ||
38 | |||
39 | #include "migrate.h" | ||
40 | |||
41 | |||
42 | #if CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
43 | |||
44 | /* | ||
45 | * The noallocl2 option suppresses all use of the L2 cache to cache | ||
46 | * locally from a remote home. There's no point in using it if we | ||
47 | * don't have coherent local caching, though. | ||
48 | */ | ||
49 | int __write_once noallocl2; | ||
50 | static int __init set_noallocl2(char *str) | ||
51 | { | ||
52 | noallocl2 = 1; | ||
53 | return 0; | ||
54 | } | ||
55 | early_param("noallocl2", set_noallocl2); | ||
56 | |||
57 | #else | ||
58 | |||
59 | #define noallocl2 0 | ||
60 | |||
61 | #endif | ||
62 | |||
63 | |||
64 | |||
65 | /* Provide no-op versions of these routines to keep flush_remote() cleaner. */ | ||
66 | #define mark_caches_evicted_start() 0 | ||
67 | #define mark_caches_evicted_finish(mask, timestamp) do {} while (0) | ||
68 | |||
69 | |||
70 | |||
71 | |||
72 | /* | ||
73 | * Update the irq_stat for cpus that we are going to interrupt | ||
74 | * with TLB or cache flushes. Also handle removing dataplane cpus | ||
75 | * from the TLB flush set, and setting dataplane_tlb_state instead. | ||
76 | */ | ||
77 | static void hv_flush_update(const struct cpumask *cache_cpumask, | ||
78 | struct cpumask *tlb_cpumask, | ||
79 | unsigned long tlb_va, unsigned long tlb_length, | ||
80 | HV_Remote_ASID *asids, int asidcount) | ||
81 | { | ||
82 | struct cpumask mask; | ||
83 | int i, cpu; | ||
84 | |||
85 | cpumask_clear(&mask); | ||
86 | if (cache_cpumask) | ||
87 | cpumask_or(&mask, &mask, cache_cpumask); | ||
88 | if (tlb_cpumask && tlb_length) { | ||
89 | cpumask_or(&mask, &mask, tlb_cpumask); | ||
90 | } | ||
91 | |||
92 | for (i = 0; i < asidcount; ++i) | ||
93 | cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask); | ||
94 | |||
95 | /* | ||
96 | * Don't bother to update atomically; losing a count | ||
97 | * here is not that critical. | ||
98 | */ | ||
99 | for_each_cpu(cpu, &mask) | ||
100 | ++per_cpu(irq_stat, cpu).irq_hv_flush_count; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * This wrapper function around hv_flush_remote() does several things: | ||
105 | * | ||
106 | * - Provides a return value error-checking panic path, since | ||
107 | * there's never any good reason for hv_flush_remote() to fail. | ||
108 | * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally | ||
109 | * is the type that Linux wants to pass around anyway. | ||
110 | * - Centralizes the mark_caches_evicted() handling. | ||
111 | * - Canonicalizes that lengths of zero make cpumasks NULL. | ||
112 | * - Handles deferring TLB flushes for dataplane tiles. | ||
113 | * - Tracks remote interrupts in the per-cpu irq_cpustat_t. | ||
114 | * | ||
115 | * Note that we have to wait until the cache flush completes before | ||
116 | * updating the per-cpu last_cache_flush word, since otherwise another | ||
117 | * concurrent flush can race, conclude the flush has already | ||
118 | * completed, and start to use the page while it's still dirty | ||
119 | * remotely (running concurrently with the actual evict, presumably). | ||
120 | */ | ||
121 | void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | ||
122 | const struct cpumask *cache_cpumask_orig, | ||
123 | HV_VirtAddr tlb_va, unsigned long tlb_length, | ||
124 | unsigned long tlb_pgsize, | ||
125 | const struct cpumask *tlb_cpumask_orig, | ||
126 | HV_Remote_ASID *asids, int asidcount) | ||
127 | { | ||
128 | int rc; | ||
129 | int timestamp = 0; /* happy compiler */ | ||
130 | struct cpumask cache_cpumask_copy, tlb_cpumask_copy; | ||
131 | struct cpumask *cache_cpumask, *tlb_cpumask; | ||
132 | HV_PhysAddr cache_pa; | ||
133 | char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5]; | ||
134 | |||
135 | mb(); /* provided just to simplify "magic hypervisor" mode */ | ||
136 | |||
137 | /* | ||
138 | * Canonicalize and copy the cpumasks. | ||
139 | */ | ||
140 | if (cache_cpumask_orig && cache_control) { | ||
141 | cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig); | ||
142 | cache_cpumask = &cache_cpumask_copy; | ||
143 | } else { | ||
144 | cpumask_clear(&cache_cpumask_copy); | ||
145 | cache_cpumask = NULL; | ||
146 | } | ||
147 | if (cache_cpumask == NULL) | ||
148 | cache_control = 0; | ||
149 | if (tlb_cpumask_orig && tlb_length) { | ||
150 | cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig); | ||
151 | tlb_cpumask = &tlb_cpumask_copy; | ||
152 | } else { | ||
153 | cpumask_clear(&tlb_cpumask_copy); | ||
154 | tlb_cpumask = NULL; | ||
155 | } | ||
156 | |||
157 | hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, | ||
158 | asids, asidcount); | ||
159 | cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; | ||
160 | if (cache_control & HV_FLUSH_EVICT_L2) | ||
161 | timestamp = mark_caches_evicted_start(); | ||
162 | rc = hv_flush_remote(cache_pa, cache_control, | ||
163 | cpumask_bits(cache_cpumask), | ||
164 | tlb_va, tlb_length, tlb_pgsize, | ||
165 | cpumask_bits(tlb_cpumask), | ||
166 | asids, asidcount); | ||
167 | if (cache_control & HV_FLUSH_EVICT_L2) | ||
168 | mark_caches_evicted_finish(cache_cpumask, timestamp); | ||
169 | if (rc == 0) | ||
170 | return; | ||
171 | cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); | ||
172 | cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); | ||
173 | |||
174 | printk("hv_flush_remote(%#llx, %#lx, %p [%s]," | ||
175 | " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", | ||
176 | cache_pa, cache_control, cache_cpumask, cache_buf, | ||
177 | (unsigned long)tlb_va, tlb_length, tlb_pgsize, | ||
178 | tlb_cpumask, tlb_buf, | ||
179 | asids, asidcount, rc); | ||
180 | if (asidcount > 0) { | ||
181 | int i; | ||
182 | printk(" asids:"); | ||
183 | for (i = 0; i < asidcount; ++i) | ||
184 | printk(" %d,%d,%d", | ||
185 | asids[i].x, asids[i].y, asids[i].asid); | ||
186 | printk("\n"); | ||
187 | } | ||
188 | panic("Unsafe to continue."); | ||
189 | } | ||
190 | |||
191 | void homecache_evict(const struct cpumask *mask) | ||
192 | { | ||
193 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); | ||
194 | } | ||
195 | |||
196 | /* Return a mask of the cpus whose caches currently own these pages. */ | ||
197 | static void homecache_mask(struct page *page, int pages, | ||
198 | struct cpumask *home_mask) | ||
199 | { | ||
200 | int i; | ||
201 | cpumask_clear(home_mask); | ||
202 | for (i = 0; i < pages; ++i) { | ||
203 | int home = page_home(&page[i]); | ||
204 | if (home == PAGE_HOME_IMMUTABLE || | ||
205 | home == PAGE_HOME_INCOHERENT) { | ||
206 | cpumask_copy(home_mask, cpu_possible_mask); | ||
207 | return; | ||
208 | } | ||
209 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
210 | if (home == PAGE_HOME_HASH) { | ||
211 | cpumask_or(home_mask, home_mask, &hash_for_home_map); | ||
212 | continue; | ||
213 | } | ||
214 | #endif | ||
215 | if (home == PAGE_HOME_UNCACHED) | ||
216 | continue; | ||
217 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
218 | cpumask_set_cpu(home, home_mask); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Return the passed length, or zero if it's long enough that we | ||
224 | * believe we should evict the whole L2 cache. | ||
225 | */ | ||
226 | static unsigned long cache_flush_length(unsigned long length) | ||
227 | { | ||
228 | return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; | ||
229 | } | ||
230 | |||
231 | /* On the simulator, confirm lines have been evicted everywhere. */ | ||
232 | static void validate_lines_evicted(unsigned long pfn, size_t length) | ||
233 | { | ||
234 | sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, | ||
235 | (HV_PhysAddr)pfn << PAGE_SHIFT, length); | ||
236 | } | ||
237 | |||
238 | /* Flush a page out of whatever cache(s) it is in. */ | ||
239 | void homecache_flush_cache(struct page *page, int order) | ||
240 | { | ||
241 | int pages = 1 << order; | ||
242 | int length = cache_flush_length(pages * PAGE_SIZE); | ||
243 | unsigned long pfn = page_to_pfn(page); | ||
244 | struct cpumask home_mask; | ||
245 | |||
246 | homecache_mask(page, pages, &home_mask); | ||
247 | flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); | ||
248 | validate_lines_evicted(pfn, pages * PAGE_SIZE); | ||
249 | } | ||
250 | |||
251 | |||
252 | /* Report the home corresponding to a given PTE. */ | ||
253 | static int pte_to_home(pte_t pte) | ||
254 | { | ||
255 | if (hv_pte_get_nc(pte)) | ||
256 | return PAGE_HOME_IMMUTABLE; | ||
257 | switch (hv_pte_get_mode(pte)) { | ||
258 | case HV_PTE_MODE_CACHE_TILE_L3: | ||
259 | return get_remote_cache_cpu(pte); | ||
260 | case HV_PTE_MODE_CACHE_NO_L3: | ||
261 | return PAGE_HOME_INCOHERENT; | ||
262 | case HV_PTE_MODE_UNCACHED: | ||
263 | return PAGE_HOME_UNCACHED; | ||
264 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
265 | case HV_PTE_MODE_CACHE_HASH_L3: | ||
266 | return PAGE_HOME_HASH; | ||
267 | #endif | ||
268 | } | ||
269 | panic("Bad PTE %#llx\n", pte.val); | ||
270 | } | ||
271 | |||
272 | /* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ | ||
273 | pte_t pte_set_home(pte_t pte, int home) | ||
274 | { | ||
275 | /* Check for non-linear file mapping "PTEs" and pass them through. */ | ||
276 | if (pte_file(pte)) | ||
277 | return pte; | ||
278 | |||
279 | #if CHIP_HAS_MMIO() | ||
280 | /* Check for MMIO mappings and pass them through. */ | ||
281 | if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) | ||
282 | return pte; | ||
283 | #endif | ||
284 | |||
285 | |||
286 | /* | ||
287 | * Only immutable pages get NC mappings. If we have a | ||
288 | * non-coherent PTE, but the underlying page is not | ||
289 | * immutable, it's likely the result of a forced | ||
290 | * caching setting running up against ptrace setting | ||
291 | * the page to be writable underneath. In this case, | ||
292 | * just keep the PTE coherent. | ||
293 | */ | ||
294 | if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) { | ||
295 | pte = hv_pte_clear_nc(pte); | ||
296 | printk("non-immutable page incoherently referenced: %#llx\n", | ||
297 | pte.val); | ||
298 | } | ||
299 | |||
300 | switch (home) { | ||
301 | |||
302 | case PAGE_HOME_UNCACHED: | ||
303 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); | ||
304 | break; | ||
305 | |||
306 | case PAGE_HOME_INCOHERENT: | ||
307 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); | ||
308 | break; | ||
309 | |||
310 | case PAGE_HOME_IMMUTABLE: | ||
311 | /* | ||
312 | * We could home this page anywhere, since it's immutable, | ||
313 | * but by default just home it to follow "hash_default". | ||
314 | */ | ||
315 | BUG_ON(hv_pte_get_writable(pte)); | ||
316 | if (pte_get_forcecache(pte)) { | ||
317 | /* Upgrade "force any cpu" to "No L3" for immutable. */ | ||
318 | if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3 | ||
319 | && pte_get_anyhome(pte)) { | ||
320 | pte = hv_pte_set_mode(pte, | ||
321 | HV_PTE_MODE_CACHE_NO_L3); | ||
322 | } | ||
323 | } else | ||
324 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
325 | if (hash_default) | ||
326 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); | ||
327 | else | ||
328 | #endif | ||
329 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); | ||
330 | pte = hv_pte_set_nc(pte); | ||
331 | break; | ||
332 | |||
333 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
334 | case PAGE_HOME_HASH: | ||
335 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); | ||
336 | break; | ||
337 | #endif | ||
338 | |||
339 | default: | ||
340 | BUG_ON(home < 0 || home >= NR_CPUS || | ||
341 | !cpu_is_valid_lotar(home)); | ||
342 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); | ||
343 | pte = set_remote_cache_cpu(pte, home); | ||
344 | break; | ||
345 | } | ||
346 | |||
347 | #if CHIP_HAS_NC_AND_NOALLOC_BITS() | ||
348 | if (noallocl2) | ||
349 | pte = hv_pte_set_no_alloc_l2(pte); | ||
350 | |||
351 | /* Simplify "no local and no l3" to "uncached" */ | ||
352 | if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) && | ||
353 | hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { | ||
354 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); | ||
355 | } | ||
356 | #endif | ||
357 | |||
358 | /* Checking this case here gives a better panic than from the hv. */ | ||
359 | BUG_ON(hv_pte_get_mode(pte) == 0); | ||
360 | |||
361 | return pte; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * The routines in this section are the "static" versions of the normal | ||
366 | * dynamic homecaching routines; they just set the home cache | ||
367 | * of a kernel page once, and require a full-chip cache/TLB flush, | ||
368 | * so they're not suitable for anything but infrequent use. | ||
369 | */ | ||
370 | |||
371 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
372 | static inline int initial_page_home(void) { return PAGE_HOME_HASH; } | ||
373 | #else | ||
374 | static inline int initial_page_home(void) { return 0; } | ||
375 | #endif | ||
376 | |||
377 | int page_home(struct page *page) | ||
378 | { | ||
379 | if (PageHighMem(page)) { | ||
380 | return initial_page_home(); | ||
381 | } else { | ||
382 | unsigned long kva = (unsigned long)page_address(page); | ||
383 | return pte_to_home(*virt_to_pte(NULL, kva)); | ||
384 | } | ||
385 | } | ||
386 | |||
387 | void homecache_change_page_home(struct page *page, int order, int home) | ||
388 | { | ||
389 | int i, pages = (1 << order); | ||
390 | unsigned long kva; | ||
391 | |||
392 | BUG_ON(PageHighMem(page)); | ||
393 | BUG_ON(page_count(page) > 1); | ||
394 | BUG_ON(page_mapcount(page) != 0); | ||
395 | kva = (unsigned long) page_address(page); | ||
396 | flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map, | ||
397 | kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask, | ||
398 | NULL, 0); | ||
399 | |||
400 | for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { | ||
401 | pte_t *ptep = virt_to_pte(NULL, kva); | ||
402 | pte_t pteval = *ptep; | ||
403 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); | ||
404 | *ptep = pte_set_home(pteval, home); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | struct page *homecache_alloc_pages(gfp_t gfp_mask, | ||
409 | unsigned int order, int home) | ||
410 | { | ||
411 | struct page *page; | ||
412 | BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ | ||
413 | page = alloc_pages(gfp_mask, order); | ||
414 | if (page) | ||
415 | homecache_change_page_home(page, order, home); | ||
416 | return page; | ||
417 | } | ||
418 | |||
419 | struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, | ||
420 | unsigned int order, int home) | ||
421 | { | ||
422 | struct page *page; | ||
423 | BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ | ||
424 | page = alloc_pages_node(nid, gfp_mask, order); | ||
425 | if (page) | ||
426 | homecache_change_page_home(page, order, home); | ||
427 | return page; | ||
428 | } | ||
429 | |||
430 | void homecache_free_pages(unsigned long addr, unsigned int order) | ||
431 | { | ||
432 | struct page *page; | ||
433 | |||
434 | if (addr == 0) | ||
435 | return; | ||
436 | |||
437 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
438 | page = virt_to_page((void *)addr); | ||
439 | if (put_page_testzero(page)) { | ||
440 | int pages = (1 << order); | ||
441 | homecache_change_page_home(page, order, initial_page_home()); | ||
442 | while (pages--) | ||
443 | __free_page(page++); | ||
444 | } | ||
445 | } | ||
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c new file mode 100644 index 000000000000..c38570f8f0d0 --- /dev/null +++ b/arch/tile/mm/hugetlbpage.c | |||
@@ -0,0 +1,343 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * TILE Huge TLB Page Support for Kernel. | ||
15 | * Taken from i386 hugetlb implementation: | ||
16 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | ||
17 | */ | ||
18 | |||
19 | #include <linux/init.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/hugetlb.h> | ||
23 | #include <linux/pagemap.h> | ||
24 | #include <linux/smp_lock.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/err.h> | ||
27 | #include <linux/sysctl.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <asm/tlb.h> | ||
30 | #include <asm/tlbflush.h> | ||
31 | |||
32 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
33 | unsigned long addr, unsigned long sz) | ||
34 | { | ||
35 | pgd_t *pgd; | ||
36 | pud_t *pud; | ||
37 | pte_t *pte = NULL; | ||
38 | |||
39 | /* We do not yet support multiple huge page sizes. */ | ||
40 | BUG_ON(sz != PMD_SIZE); | ||
41 | |||
42 | pgd = pgd_offset(mm, addr); | ||
43 | pud = pud_alloc(mm, pgd, addr); | ||
44 | if (pud) | ||
45 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
46 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
47 | |||
48 | return pte; | ||
49 | } | ||
50 | |||
51 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
52 | { | ||
53 | pgd_t *pgd; | ||
54 | pud_t *pud; | ||
55 | pmd_t *pmd = NULL; | ||
56 | |||
57 | pgd = pgd_offset(mm, addr); | ||
58 | if (pgd_present(*pgd)) { | ||
59 | pud = pud_offset(pgd, addr); | ||
60 | if (pud_present(*pud)) | ||
61 | pmd = pmd_offset(pud, addr); | ||
62 | } | ||
63 | return (pte_t *) pmd; | ||
64 | } | ||
65 | |||
66 | #ifdef HUGETLB_TEST | ||
67 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
68 | int write) | ||
69 | { | ||
70 | unsigned long start = address; | ||
71 | int length = 1; | ||
72 | int nr; | ||
73 | struct page *page; | ||
74 | struct vm_area_struct *vma; | ||
75 | |||
76 | vma = find_vma(mm, addr); | ||
77 | if (!vma || !is_vm_hugetlb_page(vma)) | ||
78 | return ERR_PTR(-EINVAL); | ||
79 | |||
80 | pte = huge_pte_offset(mm, address); | ||
81 | |||
82 | /* hugetlb should be locked, and hence, prefaulted */ | ||
83 | WARN_ON(!pte || pte_none(*pte)); | ||
84 | |||
85 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | ||
86 | |||
87 | WARN_ON(!PageHead(page)); | ||
88 | |||
89 | return page; | ||
90 | } | ||
91 | |||
92 | int pmd_huge(pmd_t pmd) | ||
93 | { | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | int pud_huge(pud_t pud) | ||
98 | { | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
103 | pmd_t *pmd, int write) | ||
104 | { | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | #else | ||
109 | |||
110 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
111 | int write) | ||
112 | { | ||
113 | return ERR_PTR(-EINVAL); | ||
114 | } | ||
115 | |||
116 | int pmd_huge(pmd_t pmd) | ||
117 | { | ||
118 | return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE); | ||
119 | } | ||
120 | |||
121 | int pud_huge(pud_t pud) | ||
122 | { | ||
123 | return !!(pud_val(pud) & _PAGE_HUGE_PAGE); | ||
124 | } | ||
125 | |||
126 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
127 | pmd_t *pmd, int write) | ||
128 | { | ||
129 | struct page *page; | ||
130 | |||
131 | page = pte_page(*(pte_t *)pmd); | ||
132 | if (page) | ||
133 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
134 | return page; | ||
135 | } | ||
136 | |||
137 | struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
138 | pud_t *pud, int write) | ||
139 | { | ||
140 | struct page *page; | ||
141 | |||
142 | page = pte_page(*(pte_t *)pud); | ||
143 | if (page) | ||
144 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
145 | return page; | ||
146 | } | ||
147 | |||
148 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | ||
149 | { | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | #endif | ||
154 | |||
155 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
156 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | ||
157 | unsigned long addr, unsigned long len, | ||
158 | unsigned long pgoff, unsigned long flags) | ||
159 | { | ||
160 | struct hstate *h = hstate_file(file); | ||
161 | struct mm_struct *mm = current->mm; | ||
162 | struct vm_area_struct *vma; | ||
163 | unsigned long start_addr; | ||
164 | |||
165 | if (len > mm->cached_hole_size) { | ||
166 | start_addr = mm->free_area_cache; | ||
167 | } else { | ||
168 | start_addr = TASK_UNMAPPED_BASE; | ||
169 | mm->cached_hole_size = 0; | ||
170 | } | ||
171 | |||
172 | full_search: | ||
173 | addr = ALIGN(start_addr, huge_page_size(h)); | ||
174 | |||
175 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | ||
176 | /* At this point: (!vma || addr < vma->vm_end). */ | ||
177 | if (TASK_SIZE - len < addr) { | ||
178 | /* | ||
179 | * Start a new search - just in case we missed | ||
180 | * some holes. | ||
181 | */ | ||
182 | if (start_addr != TASK_UNMAPPED_BASE) { | ||
183 | start_addr = TASK_UNMAPPED_BASE; | ||
184 | mm->cached_hole_size = 0; | ||
185 | goto full_search; | ||
186 | } | ||
187 | return -ENOMEM; | ||
188 | } | ||
189 | if (!vma || addr + len <= vma->vm_start) { | ||
190 | mm->free_area_cache = addr + len; | ||
191 | return addr; | ||
192 | } | ||
193 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
194 | mm->cached_hole_size = vma->vm_start - addr; | ||
195 | addr = ALIGN(vma->vm_end, huge_page_size(h)); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | ||
200 | unsigned long addr0, unsigned long len, | ||
201 | unsigned long pgoff, unsigned long flags) | ||
202 | { | ||
203 | struct hstate *h = hstate_file(file); | ||
204 | struct mm_struct *mm = current->mm; | ||
205 | struct vm_area_struct *vma, *prev_vma; | ||
206 | unsigned long base = mm->mmap_base, addr = addr0; | ||
207 | unsigned long largest_hole = mm->cached_hole_size; | ||
208 | int first_time = 1; | ||
209 | |||
210 | /* don't allow allocations above current base */ | ||
211 | if (mm->free_area_cache > base) | ||
212 | mm->free_area_cache = base; | ||
213 | |||
214 | if (len <= largest_hole) { | ||
215 | largest_hole = 0; | ||
216 | mm->free_area_cache = base; | ||
217 | } | ||
218 | try_again: | ||
219 | /* make sure it can fit in the remaining address space */ | ||
220 | if (mm->free_area_cache < len) | ||
221 | goto fail; | ||
222 | |||
223 | /* either no address requested or cant fit in requested address hole */ | ||
224 | addr = (mm->free_area_cache - len) & huge_page_mask(h); | ||
225 | do { | ||
226 | /* | ||
227 | * Lookup failure means no vma is above this address, | ||
228 | * i.e. return with success: | ||
229 | */ | ||
230 | vma = find_vma_prev(mm, addr, &prev_vma); | ||
231 | if (!vma) { | ||
232 | return addr; | ||
233 | break; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * new region fits between prev_vma->vm_end and | ||
238 | * vma->vm_start, use it: | ||
239 | */ | ||
240 | if (addr + len <= vma->vm_start && | ||
241 | (!prev_vma || (addr >= prev_vma->vm_end))) { | ||
242 | /* remember the address as a hint for next time */ | ||
243 | mm->cached_hole_size = largest_hole; | ||
244 | mm->free_area_cache = addr; | ||
245 | return addr; | ||
246 | } else { | ||
247 | /* pull free_area_cache down to the first hole */ | ||
248 | if (mm->free_area_cache == vma->vm_end) { | ||
249 | mm->free_area_cache = vma->vm_start; | ||
250 | mm->cached_hole_size = largest_hole; | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* remember the largest hole we saw so far */ | ||
255 | if (addr + largest_hole < vma->vm_start) | ||
256 | largest_hole = vma->vm_start - addr; | ||
257 | |||
258 | /* try just below the current vma->vm_start */ | ||
259 | addr = (vma->vm_start - len) & huge_page_mask(h); | ||
260 | |||
261 | } while (len <= vma->vm_start); | ||
262 | |||
263 | fail: | ||
264 | /* | ||
265 | * if hint left us with no space for the requested | ||
266 | * mapping then try again: | ||
267 | */ | ||
268 | if (first_time) { | ||
269 | mm->free_area_cache = base; | ||
270 | largest_hole = 0; | ||
271 | first_time = 0; | ||
272 | goto try_again; | ||
273 | } | ||
274 | /* | ||
275 | * A failed mmap() very likely causes application failure, | ||
276 | * so fall back to the bottom-up function here. This scenario | ||
277 | * can happen with large stack limits and large mmap() | ||
278 | * allocations. | ||
279 | */ | ||
280 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
281 | mm->cached_hole_size = ~0UL; | ||
282 | addr = hugetlb_get_unmapped_area_bottomup(file, addr0, | ||
283 | len, pgoff, flags); | ||
284 | |||
285 | /* | ||
286 | * Restore the topdown base: | ||
287 | */ | ||
288 | mm->free_area_cache = base; | ||
289 | mm->cached_hole_size = ~0UL; | ||
290 | |||
291 | return addr; | ||
292 | } | ||
293 | |||
294 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | ||
295 | unsigned long len, unsigned long pgoff, unsigned long flags) | ||
296 | { | ||
297 | struct hstate *h = hstate_file(file); | ||
298 | struct mm_struct *mm = current->mm; | ||
299 | struct vm_area_struct *vma; | ||
300 | |||
301 | if (len & ~huge_page_mask(h)) | ||
302 | return -EINVAL; | ||
303 | if (len > TASK_SIZE) | ||
304 | return -ENOMEM; | ||
305 | |||
306 | if (flags & MAP_FIXED) { | ||
307 | if (prepare_hugepage_range(file, addr, len)) | ||
308 | return -EINVAL; | ||
309 | return addr; | ||
310 | } | ||
311 | |||
312 | if (addr) { | ||
313 | addr = ALIGN(addr, huge_page_size(h)); | ||
314 | vma = find_vma(mm, addr); | ||
315 | if (TASK_SIZE - len >= addr && | ||
316 | (!vma || addr + len <= vma->vm_start)) | ||
317 | return addr; | ||
318 | } | ||
319 | if (current->mm->get_unmapped_area == arch_get_unmapped_area) | ||
320 | return hugetlb_get_unmapped_area_bottomup(file, addr, len, | ||
321 | pgoff, flags); | ||
322 | else | ||
323 | return hugetlb_get_unmapped_area_topdown(file, addr, len, | ||
324 | pgoff, flags); | ||
325 | } | ||
326 | |||
327 | static __init int setup_hugepagesz(char *opt) | ||
328 | { | ||
329 | unsigned long ps = memparse(opt, &opt); | ||
330 | if (ps == PMD_SIZE) { | ||
331 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
332 | } else if (ps == PUD_SIZE) { | ||
333 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
334 | } else { | ||
335 | printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", | ||
336 | ps >> 20); | ||
337 | return 0; | ||
338 | } | ||
339 | return 1; | ||
340 | } | ||
341 | __setup("hugepagesz=", setup_hugepagesz); | ||
342 | |||
343 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c new file mode 100644 index 000000000000..125ac53b60fc --- /dev/null +++ b/arch/tile/mm/init.c | |||
@@ -0,0 +1,1082 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1995 Linus Torvalds | ||
3 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation, version 2. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/signal.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/ptrace.h> | ||
24 | #include <linux/mman.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/hugetlb.h> | ||
27 | #include <linux/swap.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <linux/pagemap.h> | ||
32 | #include <linux/poison.h> | ||
33 | #include <linux/bootmem.h> | ||
34 | #include <linux/slab.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | #include <linux/efi.h> | ||
37 | #include <linux/memory_hotplug.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | #include <asm/mmu_context.h> | ||
40 | #include <asm/processor.h> | ||
41 | #include <asm/system.h> | ||
42 | #include <asm/pgtable.h> | ||
43 | #include <asm/pgalloc.h> | ||
44 | #include <asm/dma.h> | ||
45 | #include <asm/fixmap.h> | ||
46 | #include <asm/tlb.h> | ||
47 | #include <asm/tlbflush.h> | ||
48 | #include <asm/sections.h> | ||
49 | #include <asm/setup.h> | ||
50 | #include <asm/homecache.h> | ||
51 | #include <hv/hypervisor.h> | ||
52 | #include <arch/chip.h> | ||
53 | |||
54 | #include "migrate.h" | ||
55 | |||
56 | /* | ||
57 | * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)" | ||
58 | * in the Tile Kconfig, but this generates configure warnings. | ||
59 | * Do it here and force people to get it right to compile this file. | ||
60 | * The problem is that with 4KB small pages and 16MB huge pages, | ||
61 | * the default value doesn't allow us to group enough small pages | ||
62 | * together to make up a huge page. | ||
63 | */ | ||
64 | #if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1 | ||
65 | # error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size" | ||
66 | #endif | ||
67 | |||
68 | #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) | ||
69 | |||
70 | unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; | ||
71 | |||
72 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
73 | |||
74 | /* Create an L2 page table */ | ||
75 | static pte_t * __init alloc_pte(void) | ||
76 | { | ||
77 | return __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * L2 page tables per controller. We allocate these all at once from | ||
82 | * the bootmem allocator and store them here. This saves on kernel L2 | ||
83 | * page table memory, compared to allocating a full 64K page per L2 | ||
84 | * page table, and also means that in cases where we use huge pages, | ||
85 | * we are guaranteed to later be able to shatter those huge pages and | ||
86 | * switch to using these page tables instead, without requiring | ||
87 | * further allocation. Each l2_ptes[] entry points to the first page | ||
88 | * table for the first hugepage-size piece of memory on the | ||
89 | * controller; other page tables are just indexed directly, i.e. the | ||
90 | * L2 page tables are contiguous in memory for each controller. | ||
91 | */ | ||
92 | static pte_t *l2_ptes[MAX_NUMNODES]; | ||
93 | static int num_l2_ptes[MAX_NUMNODES]; | ||
94 | |||
95 | static void init_prealloc_ptes(int node, int pages) | ||
96 | { | ||
97 | BUG_ON(pages & (HV_L2_ENTRIES-1)); | ||
98 | if (pages) { | ||
99 | num_l2_ptes[node] = pages; | ||
100 | l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t), | ||
101 | HV_PAGE_TABLE_ALIGN, 0); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | pte_t *get_prealloc_pte(unsigned long pfn) | ||
106 | { | ||
107 | int node = pfn_to_nid(pfn); | ||
108 | pfn &= ~(-1UL << (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)); | ||
109 | BUG_ON(node >= MAX_NUMNODES); | ||
110 | BUG_ON(pfn >= num_l2_ptes[node]); | ||
111 | return &l2_ptes[node][pfn]; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * What caching do we expect pages from the heap to have when | ||
116 | * they are allocated during bootup? (Once we've installed the | ||
117 | * "real" swapper_pg_dir.) | ||
118 | */ | ||
119 | static int initial_heap_home(void) | ||
120 | { | ||
121 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
122 | if (hash_default) | ||
123 | return PAGE_HOME_HASH; | ||
124 | #endif | ||
125 | return smp_processor_id(); | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Place a pointer to an L2 page table in a middle page | ||
130 | * directory entry. | ||
131 | */ | ||
132 | static void __init assign_pte(pmd_t *pmd, pte_t *page_table) | ||
133 | { | ||
134 | phys_addr_t pa = __pa(page_table); | ||
135 | unsigned long l2_ptfn = pa >> HV_LOG2_PAGE_TABLE_ALIGN; | ||
136 | pte_t pteval = hv_pte_set_ptfn(__pgprot(_PAGE_TABLE), l2_ptfn); | ||
137 | BUG_ON((pa & (HV_PAGE_TABLE_ALIGN-1)) != 0); | ||
138 | pteval = pte_set_home(pteval, initial_heap_home()); | ||
139 | *(pte_t *)pmd = pteval; | ||
140 | if (page_table != (pte_t *)pmd_page_vaddr(*pmd)) | ||
141 | BUG(); | ||
142 | } | ||
143 | |||
144 | #ifdef __tilegx__ | ||
145 | |||
146 | #if HV_L1_SIZE != HV_L2_SIZE | ||
147 | # error Rework assumption that L1 and L2 page tables are same size. | ||
148 | #endif | ||
149 | |||
150 | /* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */ | ||
151 | static inline pmd_t *alloc_pmd(void) | ||
152 | { | ||
153 | return (pmd_t *)alloc_pte(); | ||
154 | } | ||
155 | |||
156 | static inline void assign_pmd(pud_t *pud, pmd_t *pmd) | ||
157 | { | ||
158 | assign_pte((pmd_t *)pud, (pte_t *)pmd); | ||
159 | } | ||
160 | |||
161 | #endif /* __tilegx__ */ | ||
162 | |||
163 | /* Replace the given pmd with a full PTE table. */ | ||
164 | void __init shatter_pmd(pmd_t *pmd) | ||
165 | { | ||
166 | pte_t *pte = get_prealloc_pte(pte_pfn(*(pte_t *)pmd)); | ||
167 | assign_pte(pmd, pte); | ||
168 | } | ||
169 | |||
170 | #ifdef CONFIG_HIGHMEM | ||
171 | /* | ||
172 | * This function initializes a certain range of kernel virtual memory | ||
173 | * with new bootmem page tables, everywhere page tables are missing in | ||
174 | * the given range. | ||
175 | */ | ||
176 | |||
177 | /* | ||
178 | * NOTE: The pagetables are allocated contiguous on the physical space | ||
179 | * so we can cache the place of the first one and move around without | ||
180 | * checking the pgd every time. | ||
181 | */ | ||
182 | static void __init page_table_range_init(unsigned long start, | ||
183 | unsigned long end, pgd_t *pgd_base) | ||
184 | { | ||
185 | pgd_t *pgd; | ||
186 | int pgd_idx; | ||
187 | unsigned long vaddr; | ||
188 | |||
189 | vaddr = start; | ||
190 | pgd_idx = pgd_index(vaddr); | ||
191 | pgd = pgd_base + pgd_idx; | ||
192 | |||
193 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | ||
194 | pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr); | ||
195 | if (pmd_none(*pmd)) | ||
196 | assign_pte(pmd, alloc_pte()); | ||
197 | vaddr += PMD_SIZE; | ||
198 | } | ||
199 | } | ||
200 | #endif /* CONFIG_HIGHMEM */ | ||
201 | |||
202 | |||
203 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
204 | |||
205 | static int __initdata ktext_hash = 1; /* .text pages */ | ||
206 | static int __initdata kdata_hash = 1; /* .data and .bss pages */ | ||
207 | int __write_once hash_default = 1; /* kernel allocator pages */ | ||
208 | EXPORT_SYMBOL(hash_default); | ||
209 | int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ | ||
210 | #endif /* CHIP_HAS_CBOX_HOME_MAP */ | ||
211 | |||
212 | /* | ||
213 | * CPUs to use to for striping the pages of kernel data. If hash-for-home | ||
214 | * is available, this is only relevant if kcache_hash sets up the | ||
215 | * .data and .bss to be page-homed, and we don't want the default mode | ||
216 | * of using the full set of kernel cpus for the striping. | ||
217 | */ | ||
218 | static __initdata struct cpumask kdata_mask; | ||
219 | static __initdata int kdata_arg_seen; | ||
220 | |||
221 | int __write_once kdata_huge; /* if no homecaching, small pages */ | ||
222 | |||
223 | |||
224 | /* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */ | ||
225 | static pgprot_t __init construct_pgprot(pgprot_t prot, int home) | ||
226 | { | ||
227 | prot = pte_set_home(prot, home); | ||
228 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
229 | if (home == PAGE_HOME_IMMUTABLE) { | ||
230 | if (ktext_hash) | ||
231 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); | ||
232 | else | ||
233 | prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); | ||
234 | } | ||
235 | #endif | ||
236 | return prot; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * For a given kernel data VA, how should it be cached? | ||
241 | * We return the complete pgprot_t with caching bits set. | ||
242 | */ | ||
243 | static pgprot_t __init init_pgprot(ulong address) | ||
244 | { | ||
245 | int cpu; | ||
246 | unsigned long page; | ||
247 | enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; | ||
248 | |||
249 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
250 | /* For kdata=huge, everything is just hash-for-home. */ | ||
251 | if (kdata_huge) | ||
252 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | ||
253 | #endif | ||
254 | |||
255 | /* We map the aliased pages of permanent text inaccessible. */ | ||
256 | if (address < (ulong) _sinittext - CODE_DELTA) | ||
257 | return PAGE_NONE; | ||
258 | |||
259 | /* | ||
260 | * We map read-only data non-coherent for performance. We could | ||
261 | * use neighborhood caching on TILE64, but it's not clear it's a win. | ||
262 | */ | ||
263 | if ((address >= (ulong) __start_rodata && | ||
264 | address < (ulong) __end_rodata) || | ||
265 | address == (ulong) empty_zero_page) { | ||
266 | return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); | ||
267 | } | ||
268 | |||
269 | /* As a performance optimization, keep the boot init stack here. */ | ||
270 | if (address >= (ulong)&init_thread_union && | ||
271 | address < (ulong)&init_thread_union + THREAD_SIZE) | ||
272 | return construct_pgprot(PAGE_KERNEL, smp_processor_id()); | ||
273 | |||
274 | #ifndef __tilegx__ | ||
275 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
276 | /* Force the atomic_locks[] array page to be hash-for-home. */ | ||
277 | if (address == (ulong) atomic_locks) | ||
278 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | ||
279 | #endif | ||
280 | #endif | ||
281 | |||
282 | /* | ||
283 | * Everything else that isn't data or bss is heap, so mark it | ||
284 | * with the initial heap home (hash-for-home, or this cpu). This | ||
285 | * includes any addresses after the loaded image; any address before | ||
286 | * _einittext (since we already captured the case of text before | ||
287 | * _sinittext); and any init-data pages. | ||
288 | * | ||
289 | * All the LOWMEM pages that we mark this way will get their | ||
290 | * struct page homecache properly marked later, in set_page_homes(). | ||
291 | * The HIGHMEM pages we leave with a default zero for their | ||
292 | * homes, but with a zero free_time we don't have to actually | ||
293 | * do a flush action the first time we use them, either. | ||
294 | */ | ||
295 | if (address >= (ulong) _end || address < (ulong) _sdata || | ||
296 | (address >= (ulong) _sinitdata && | ||
297 | address < (ulong) _einitdata)) | ||
298 | return construct_pgprot(PAGE_KERNEL, initial_heap_home()); | ||
299 | |||
300 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
301 | /* Use hash-for-home if requested for data/bss. */ | ||
302 | if (kdata_hash) | ||
303 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); | ||
304 | #endif | ||
305 | |||
306 | /* | ||
307 | * Otherwise we just hand out consecutive cpus. To avoid | ||
308 | * requiring this function to hold state, we just walk forward from | ||
309 | * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach | ||
310 | * the requested address, while walking cpu home around kdata_mask. | ||
311 | * This is typically no more than a dozen or so iterations. | ||
312 | */ | ||
313 | BUG_ON(_einitdata != __bss_start); | ||
314 | for (page = (ulong)_sdata, cpu = NR_CPUS; ; ) { | ||
315 | cpu = cpumask_next(cpu, &kdata_mask); | ||
316 | if (cpu == NR_CPUS) | ||
317 | cpu = cpumask_first(&kdata_mask); | ||
318 | if (page >= address) | ||
319 | break; | ||
320 | page += PAGE_SIZE; | ||
321 | if (page == (ulong)__start_rodata) | ||
322 | page = (ulong)__end_rodata; | ||
323 | if (page == (ulong)&init_thread_union) | ||
324 | page += THREAD_SIZE; | ||
325 | if (page == (ulong)_sinitdata) | ||
326 | page = (ulong)_einitdata; | ||
327 | if (page == (ulong)empty_zero_page) | ||
328 | page += PAGE_SIZE; | ||
329 | #ifndef __tilegx__ | ||
330 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | ||
331 | if (page == (ulong)atomic_locks) | ||
332 | page += PAGE_SIZE; | ||
333 | #endif | ||
334 | #endif | ||
335 | |||
336 | } | ||
337 | return construct_pgprot(PAGE_KERNEL, cpu); | ||
338 | } | ||
339 | |||
340 | /* | ||
341 | * This function sets up how we cache the kernel text. If we have | ||
342 | * hash-for-home support, normally that is used instead (see the | ||
343 | * kcache_hash boot flag for more information). But if we end up | ||
344 | * using a page-based caching technique, this option sets up the | ||
345 | * details of that. In addition, the "ktext=nocache" option may | ||
346 | * always be used to disable local caching of text pages, if desired. | ||
347 | */ | ||
348 | |||
349 | static int __initdata ktext_arg_seen; | ||
350 | static int __initdata ktext_small; | ||
351 | static int __initdata ktext_local; | ||
352 | static int __initdata ktext_all; | ||
353 | static int __initdata ktext_nondataplane; | ||
354 | static int __initdata ktext_nocache; | ||
355 | static struct cpumask __initdata ktext_mask; | ||
356 | |||
357 | static int __init setup_ktext(char *str) | ||
358 | { | ||
359 | if (str == NULL) | ||
360 | return -EINVAL; | ||
361 | |||
362 | /* If you have a leading "nocache", turn off ktext caching */ | ||
363 | if (strncmp(str, "nocache", 7) == 0) { | ||
364 | ktext_nocache = 1; | ||
365 | printk("ktext: disabling local caching of kernel text\n"); | ||
366 | str += 7; | ||
367 | if (*str == ',') | ||
368 | ++str; | ||
369 | if (*str == '\0') | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | ktext_arg_seen = 1; | ||
374 | |||
375 | /* Default setting on Tile64: use a huge page */ | ||
376 | if (strcmp(str, "huge") == 0) | ||
377 | printk("ktext: using one huge locally cached page\n"); | ||
378 | |||
379 | /* Pay TLB cost but get no cache benefit: cache small pages locally */ | ||
380 | else if (strcmp(str, "local") == 0) { | ||
381 | ktext_small = 1; | ||
382 | ktext_local = 1; | ||
383 | printk("ktext: using small pages with local caching\n"); | ||
384 | } | ||
385 | |||
386 | /* Neighborhood cache ktext pages on all cpus. */ | ||
387 | else if (strcmp(str, "all") == 0) { | ||
388 | ktext_small = 1; | ||
389 | ktext_all = 1; | ||
390 | printk("ktext: using maximal caching neighborhood\n"); | ||
391 | } | ||
392 | |||
393 | |||
394 | /* Neighborhood ktext pages on specified mask */ | ||
395 | else if (cpulist_parse(str, &ktext_mask) == 0) { | ||
396 | char buf[NR_CPUS * 5]; | ||
397 | cpulist_scnprintf(buf, sizeof(buf), &ktext_mask); | ||
398 | if (cpumask_weight(&ktext_mask) > 1) { | ||
399 | ktext_small = 1; | ||
400 | printk("ktext: using caching neighborhood %s " | ||
401 | "with small pages\n", buf); | ||
402 | } else { | ||
403 | printk("ktext: caching on cpu %s with one huge page\n", | ||
404 | buf); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | else if (*str) | ||
409 | return -EINVAL; | ||
410 | |||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | early_param("ktext", setup_ktext); | ||
415 | |||
416 | |||
417 | static inline pgprot_t ktext_set_nocache(pgprot_t prot) | ||
418 | { | ||
419 | if (!ktext_nocache) | ||
420 | prot = hv_pte_set_nc(prot); | ||
421 | #if CHIP_HAS_NC_AND_NOALLOC_BITS() | ||
422 | else | ||
423 | prot = hv_pte_set_no_alloc_l2(prot); | ||
424 | #endif | ||
425 | return prot; | ||
426 | } | ||
427 | |||
428 | #ifndef __tilegx__ | ||
429 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
430 | { | ||
431 | return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); | ||
432 | } | ||
433 | #else | ||
434 | static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | ||
435 | { | ||
436 | pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); | ||
437 | if (pud_none(*pud)) | ||
438 | assign_pmd(pud, alloc_pmd()); | ||
439 | return pmd_offset(pud, va); | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | /* Temporary page table we use for staging. */ | ||
444 | static pgd_t pgtables[PTRS_PER_PGD] | ||
445 | __attribute__((section(".init.page"))); | ||
446 | |||
447 | /* | ||
448 | * This maps the physical memory to kernel virtual address space, a total | ||
449 | * of max_low_pfn pages, by creating page tables starting from address | ||
450 | * PAGE_OFFSET. | ||
451 | * | ||
452 | * This routine transitions us from using a set of compiled-in large | ||
453 | * pages to using some more precise caching, including removing access | ||
454 | * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) | ||
455 | * marking read-only data as locally cacheable, striping the remaining | ||
456 | * .data and .bss across all the available tiles, and removing access | ||
457 | * to pages above the top of RAM (thus ensuring a page fault from a bad | ||
458 | * virtual address rather than a hypervisor shoot down for accessing | ||
459 | * memory outside the assigned limits). | ||
460 | */ | ||
461 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | ||
462 | { | ||
463 | unsigned long address, pfn; | ||
464 | pmd_t *pmd; | ||
465 | pte_t *pte; | ||
466 | int pte_ofs; | ||
467 | const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); | ||
468 | struct cpumask kstripe_mask; | ||
469 | int rc, i; | ||
470 | |||
471 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
472 | if (ktext_arg_seen && ktext_hash) { | ||
473 | printk("warning: \"ktext\" boot argument ignored" | ||
474 | " if \"kcache_hash\" sets up text hash-for-home\n"); | ||
475 | ktext_small = 0; | ||
476 | } | ||
477 | |||
478 | if (kdata_arg_seen && kdata_hash) { | ||
479 | printk("warning: \"kdata\" boot argument ignored" | ||
480 | " if \"kcache_hash\" sets up data hash-for-home\n"); | ||
481 | } | ||
482 | |||
483 | if (kdata_huge && !hash_default) { | ||
484 | printk("warning: disabling \"kdata=huge\"; requires" | ||
485 | " kcache_hash=all or =allbutstack\n"); | ||
486 | kdata_huge = 0; | ||
487 | } | ||
488 | #endif | ||
489 | |||
490 | /* | ||
491 | * Set up a mask for cpus to use for kernel striping. | ||
492 | * This is normally all cpus, but minus dataplane cpus if any. | ||
493 | * If the dataplane covers the whole chip, we stripe over | ||
494 | * the whole chip too. | ||
495 | */ | ||
496 | cpumask_copy(&kstripe_mask, cpu_possible_mask); | ||
497 | if (!kdata_arg_seen) | ||
498 | kdata_mask = kstripe_mask; | ||
499 | |||
500 | /* Allocate and fill in L2 page tables */ | ||
501 | for (i = 0; i < MAX_NUMNODES; ++i) { | ||
502 | #ifdef CONFIG_HIGHMEM | ||
503 | unsigned long end_pfn = node_lowmem_end_pfn[i]; | ||
504 | #else | ||
505 | unsigned long end_pfn = node_end_pfn[i]; | ||
506 | #endif | ||
507 | unsigned long end_huge_pfn = 0; | ||
508 | |||
509 | /* Pre-shatter the last huge page to allow per-cpu pages. */ | ||
510 | if (kdata_huge) | ||
511 | end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); | ||
512 | |||
513 | pfn = node_start_pfn[i]; | ||
514 | |||
515 | /* Allocate enough memory to hold L2 page tables for node. */ | ||
516 | init_prealloc_ptes(i, end_pfn - pfn); | ||
517 | |||
518 | address = (unsigned long) pfn_to_kaddr(pfn); | ||
519 | while (pfn < end_pfn) { | ||
520 | BUG_ON(address & (HPAGE_SIZE-1)); | ||
521 | pmd = get_pmd(pgtables, address); | ||
522 | pte = get_prealloc_pte(pfn); | ||
523 | if (pfn < end_huge_pfn) { | ||
524 | pgprot_t prot = init_pgprot(address); | ||
525 | *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); | ||
526 | for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; | ||
527 | pfn++, pte_ofs++, address += PAGE_SIZE) | ||
528 | pte[pte_ofs] = pfn_pte(pfn, prot); | ||
529 | } else { | ||
530 | if (kdata_huge) | ||
531 | printk(KERN_DEBUG "pre-shattered huge" | ||
532 | " page at %#lx\n", address); | ||
533 | for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; | ||
534 | pfn++, pte_ofs++, address += PAGE_SIZE) { | ||
535 | pgprot_t prot = init_pgprot(address); | ||
536 | pte[pte_ofs] = pfn_pte(pfn, prot); | ||
537 | } | ||
538 | assign_pte(pmd, pte); | ||
539 | } | ||
540 | } | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Set or check ktext_map now that we have cpu_possible_mask | ||
545 | * and kstripe_mask to work with. | ||
546 | */ | ||
547 | if (ktext_all) | ||
548 | cpumask_copy(&ktext_mask, cpu_possible_mask); | ||
549 | else if (ktext_nondataplane) | ||
550 | ktext_mask = kstripe_mask; | ||
551 | else if (!cpumask_empty(&ktext_mask)) { | ||
552 | /* Sanity-check any mask that was requested */ | ||
553 | struct cpumask bad; | ||
554 | cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); | ||
555 | cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); | ||
556 | if (!cpumask_empty(&bad)) { | ||
557 | char buf[NR_CPUS * 5]; | ||
558 | cpulist_scnprintf(buf, sizeof(buf), &bad); | ||
559 | printk("ktext: not using unavailable cpus %s\n", buf); | ||
560 | } | ||
561 | if (cpumask_empty(&ktext_mask)) { | ||
562 | printk("ktext: no valid cpus; caching on %d.\n", | ||
563 | smp_processor_id()); | ||
564 | cpumask_copy(&ktext_mask, | ||
565 | cpumask_of(smp_processor_id())); | ||
566 | } | ||
567 | } | ||
568 | |||
569 | address = MEM_SV_INTRPT; | ||
570 | pmd = get_pmd(pgtables, address); | ||
571 | if (ktext_small) { | ||
572 | /* Allocate an L2 PTE for the kernel text */ | ||
573 | int cpu = 0; | ||
574 | pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, | ||
575 | PAGE_HOME_IMMUTABLE); | ||
576 | |||
577 | if (ktext_local) { | ||
578 | if (ktext_nocache) | ||
579 | prot = hv_pte_set_mode(prot, | ||
580 | HV_PTE_MODE_UNCACHED); | ||
581 | else | ||
582 | prot = hv_pte_set_mode(prot, | ||
583 | HV_PTE_MODE_CACHE_NO_L3); | ||
584 | } else { | ||
585 | prot = hv_pte_set_mode(prot, | ||
586 | HV_PTE_MODE_CACHE_TILE_L3); | ||
587 | cpu = cpumask_first(&ktext_mask); | ||
588 | |||
589 | prot = ktext_set_nocache(prot); | ||
590 | } | ||
591 | |||
592 | BUG_ON(address != (unsigned long)_stext); | ||
593 | pfn = 0; /* code starts at PA 0 */ | ||
594 | pte = alloc_pte(); | ||
595 | for (pte_ofs = 0; address < (unsigned long)_einittext; | ||
596 | pfn++, pte_ofs++, address += PAGE_SIZE) { | ||
597 | if (!ktext_local) { | ||
598 | prot = set_remote_cache_cpu(prot, cpu); | ||
599 | cpu = cpumask_next(cpu, &ktext_mask); | ||
600 | if (cpu == NR_CPUS) | ||
601 | cpu = cpumask_first(&ktext_mask); | ||
602 | } | ||
603 | pte[pte_ofs] = pfn_pte(pfn, prot); | ||
604 | } | ||
605 | assign_pte(pmd, pte); | ||
606 | } else { | ||
607 | pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); | ||
608 | pteval = pte_mkhuge(pteval); | ||
609 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
610 | if (ktext_hash) { | ||
611 | pteval = hv_pte_set_mode(pteval, | ||
612 | HV_PTE_MODE_CACHE_HASH_L3); | ||
613 | pteval = ktext_set_nocache(pteval); | ||
614 | } else | ||
615 | #endif /* CHIP_HAS_CBOX_HOME_MAP() */ | ||
616 | if (cpumask_weight(&ktext_mask) == 1) { | ||
617 | pteval = set_remote_cache_cpu(pteval, | ||
618 | cpumask_first(&ktext_mask)); | ||
619 | pteval = hv_pte_set_mode(pteval, | ||
620 | HV_PTE_MODE_CACHE_TILE_L3); | ||
621 | pteval = ktext_set_nocache(pteval); | ||
622 | } else if (ktext_nocache) | ||
623 | pteval = hv_pte_set_mode(pteval, | ||
624 | HV_PTE_MODE_UNCACHED); | ||
625 | else | ||
626 | pteval = hv_pte_set_mode(pteval, | ||
627 | HV_PTE_MODE_CACHE_NO_L3); | ||
628 | *(pte_t *)pmd = pteval; | ||
629 | } | ||
630 | |||
631 | /* Set swapper_pgprot here so it is flushed to memory right away. */ | ||
632 | swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); | ||
633 | |||
634 | /* | ||
635 | * Since we may be changing the caching of the stack and page | ||
636 | * table itself, we invoke an assembly helper to do the | ||
637 | * following steps: | ||
638 | * | ||
639 | * - flush the cache so we start with an empty slate | ||
640 | * - install pgtables[] as the real page table | ||
641 | * - flush the TLB so the new page table takes effect | ||
642 | */ | ||
643 | rc = flush_and_install_context(__pa(pgtables), | ||
644 | init_pgprot((unsigned long)pgtables), | ||
645 | __get_cpu_var(current_asid), | ||
646 | cpumask_bits(my_cpu_mask)); | ||
647 | BUG_ON(rc != 0); | ||
648 | |||
649 | /* Copy the page table back to the normal swapper_pg_dir. */ | ||
650 | memcpy(pgd_base, pgtables, sizeof(pgtables)); | ||
651 | __install_page_table(pgd_base, __get_cpu_var(current_asid), | ||
652 | swapper_pgprot); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
657 | * is valid. The argument is a physical page number. | ||
658 | * | ||
659 | * On Tile, the only valid things for which we can just hand out unchecked | ||
660 | * PTEs are the kernel code and data. Anything else might change its | ||
661 | * homing with time, and we wouldn't know to adjust the /dev/mem PTEs. | ||
662 | * Note that init_thread_union is released to heap soon after boot, | ||
663 | * so we include it in the init data. | ||
664 | * | ||
665 | * For TILE-Gx, we might want to consider allowing access to PA | ||
666 | * regions corresponding to PCI space, etc. | ||
667 | */ | ||
668 | int devmem_is_allowed(unsigned long pagenr) | ||
669 | { | ||
670 | return pagenr < kaddr_to_pfn(_end) && | ||
671 | !(pagenr >= kaddr_to_pfn(&init_thread_union) || | ||
672 | pagenr < kaddr_to_pfn(_einitdata)) && | ||
673 | !(pagenr >= kaddr_to_pfn(_sinittext) || | ||
674 | pagenr <= kaddr_to_pfn(_einittext-1)); | ||
675 | } | ||
676 | |||
677 | #ifdef CONFIG_HIGHMEM | ||
678 | static void __init permanent_kmaps_init(pgd_t *pgd_base) | ||
679 | { | ||
680 | pgd_t *pgd; | ||
681 | pud_t *pud; | ||
682 | pmd_t *pmd; | ||
683 | pte_t *pte; | ||
684 | unsigned long vaddr; | ||
685 | |||
686 | vaddr = PKMAP_BASE; | ||
687 | page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); | ||
688 | |||
689 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
690 | pud = pud_offset(pgd, vaddr); | ||
691 | pmd = pmd_offset(pud, vaddr); | ||
692 | pte = pte_offset_kernel(pmd, vaddr); | ||
693 | pkmap_page_table = pte; | ||
694 | } | ||
695 | #endif /* CONFIG_HIGHMEM */ | ||
696 | |||
697 | |||
698 | static void __init init_free_pfn_range(unsigned long start, unsigned long end) | ||
699 | { | ||
700 | unsigned long pfn; | ||
701 | struct page *page = pfn_to_page(start); | ||
702 | |||
703 | for (pfn = start; pfn < end; ) { | ||
704 | /* Optimize by freeing pages in large batches */ | ||
705 | int order = __ffs(pfn); | ||
706 | int count, i; | ||
707 | struct page *p; | ||
708 | |||
709 | if (order >= MAX_ORDER) | ||
710 | order = MAX_ORDER-1; | ||
711 | count = 1 << order; | ||
712 | while (pfn + count > end) { | ||
713 | count >>= 1; | ||
714 | --order; | ||
715 | } | ||
716 | for (p = page, i = 0; i < count; ++i, ++p) { | ||
717 | __ClearPageReserved(p); | ||
718 | /* | ||
719 | * Hacky direct set to avoid unnecessary | ||
720 | * lock take/release for EVERY page here. | ||
721 | */ | ||
722 | p->_count.counter = 0; | ||
723 | p->_mapcount.counter = -1; | ||
724 | } | ||
725 | init_page_count(page); | ||
726 | __free_pages(page, order); | ||
727 | totalram_pages += count; | ||
728 | |||
729 | page += count; | ||
730 | pfn += count; | ||
731 | } | ||
732 | } | ||
733 | |||
734 | static void __init set_non_bootmem_pages_init(void) | ||
735 | { | ||
736 | struct zone *z; | ||
737 | for_each_zone(z) { | ||
738 | unsigned long start, end; | ||
739 | int nid = z->zone_pgdat->node_id; | ||
740 | |||
741 | start = z->zone_start_pfn; | ||
742 | if (start == 0) | ||
743 | continue; /* bootmem */ | ||
744 | end = start + z->spanned_pages; | ||
745 | if (zone_idx(z) == ZONE_NORMAL) { | ||
746 | BUG_ON(start != node_start_pfn[nid]); | ||
747 | start = node_free_pfn[nid]; | ||
748 | } | ||
749 | #ifdef CONFIG_HIGHMEM | ||
750 | if (zone_idx(z) == ZONE_HIGHMEM) | ||
751 | totalhigh_pages += z->spanned_pages; | ||
752 | #endif | ||
753 | if (kdata_huge) { | ||
754 | unsigned long percpu_pfn = node_percpu_pfn[nid]; | ||
755 | if (start < percpu_pfn && end > percpu_pfn) | ||
756 | end = percpu_pfn; | ||
757 | } | ||
758 | #ifdef CONFIG_PCI | ||
759 | if (start <= pci_reserve_start_pfn && | ||
760 | end > pci_reserve_start_pfn) { | ||
761 | if (end > pci_reserve_end_pfn) | ||
762 | init_free_pfn_range(pci_reserve_end_pfn, end); | ||
763 | end = pci_reserve_start_pfn; | ||
764 | } | ||
765 | #endif | ||
766 | init_free_pfn_range(start, end); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * paging_init() sets up the page tables - note that all of lowmem is | ||
772 | * already mapped by head.S. | ||
773 | */ | ||
774 | void __init paging_init(void) | ||
775 | { | ||
776 | #ifdef CONFIG_HIGHMEM | ||
777 | unsigned long vaddr, end; | ||
778 | #endif | ||
779 | #ifdef __tilegx__ | ||
780 | pud_t *pud; | ||
781 | #endif | ||
782 | pgd_t *pgd_base = swapper_pg_dir; | ||
783 | |||
784 | kernel_physical_mapping_init(pgd_base); | ||
785 | |||
786 | #ifdef CONFIG_HIGHMEM | ||
787 | /* | ||
788 | * Fixed mappings, only the page table structure has to be | ||
789 | * created - mappings will be set by set_fixmap(): | ||
790 | */ | ||
791 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | ||
792 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | ||
793 | page_table_range_init(vaddr, end, pgd_base); | ||
794 | permanent_kmaps_init(pgd_base); | ||
795 | #endif | ||
796 | |||
797 | #ifdef __tilegx__ | ||
798 | /* | ||
799 | * Since GX allocates just one pmd_t array worth of vmalloc space, | ||
800 | * we go ahead and allocate it statically here, then share it | ||
801 | * globally. As a result we don't have to worry about any task | ||
802 | * changing init_mm once we get up and running, and there's no | ||
803 | * need for e.g. vmalloc_sync_all(). | ||
804 | */ | ||
805 | BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END)); | ||
806 | pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START); | ||
807 | assign_pmd(pud, alloc_pmd()); | ||
808 | #endif | ||
809 | } | ||
810 | |||
811 | |||
812 | /* | ||
813 | * Walk the kernel page tables and derive the page_home() from | ||
814 | * the PTEs, so that set_pte() can properly validate the caching | ||
815 | * of all PTEs it sees. | ||
816 | */ | ||
817 | void __init set_page_homes(void) | ||
818 | { | ||
819 | } | ||
820 | |||
821 | static void __init set_max_mapnr_init(void) | ||
822 | { | ||
823 | #ifdef CONFIG_FLATMEM | ||
824 | max_mapnr = max_low_pfn; | ||
825 | #endif | ||
826 | } | ||
827 | |||
828 | void __init mem_init(void) | ||
829 | { | ||
830 | int codesize, datasize, initsize; | ||
831 | int i; | ||
832 | #ifndef __tilegx__ | ||
833 | void *last; | ||
834 | #endif | ||
835 | |||
836 | #ifdef CONFIG_FLATMEM | ||
837 | if (!mem_map) | ||
838 | BUG(); | ||
839 | #endif | ||
840 | |||
841 | #ifdef CONFIG_HIGHMEM | ||
842 | /* check that fixmap and pkmap do not overlap */ | ||
843 | if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { | ||
844 | printk(KERN_ERR "fixmap and kmap areas overlap" | ||
845 | " - this will crash\n"); | ||
846 | printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", | ||
847 | PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), | ||
848 | FIXADDR_START); | ||
849 | BUG(); | ||
850 | } | ||
851 | #endif | ||
852 | |||
853 | set_max_mapnr_init(); | ||
854 | |||
855 | /* this will put all bootmem onto the freelists */ | ||
856 | totalram_pages += free_all_bootmem(); | ||
857 | |||
858 | /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ | ||
859 | set_non_bootmem_pages_init(); | ||
860 | |||
861 | codesize = (unsigned long)&_etext - (unsigned long)&_text; | ||
862 | datasize = (unsigned long)&_end - (unsigned long)&_sdata; | ||
863 | initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; | ||
864 | initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; | ||
865 | |||
866 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", | ||
867 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | ||
868 | num_physpages << (PAGE_SHIFT-10), | ||
869 | codesize >> 10, | ||
870 | datasize >> 10, | ||
871 | initsize >> 10, | ||
872 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | ||
873 | ); | ||
874 | |||
875 | /* | ||
876 | * In debug mode, dump some interesting memory mappings. | ||
877 | */ | ||
878 | #ifdef CONFIG_HIGHMEM | ||
879 | printk(KERN_DEBUG " KMAP %#lx - %#lx\n", | ||
880 | FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); | ||
881 | printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", | ||
882 | PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); | ||
883 | #endif | ||
884 | #ifdef CONFIG_HUGEVMAP | ||
885 | printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", | ||
886 | HUGE_VMAP_BASE, HUGE_VMAP_END - 1); | ||
887 | #endif | ||
888 | printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", | ||
889 | _VMALLOC_START, _VMALLOC_END - 1); | ||
890 | #ifdef __tilegx__ | ||
891 | for (i = MAX_NUMNODES-1; i >= 0; --i) { | ||
892 | struct pglist_data *node = &node_data[i]; | ||
893 | if (node->node_present_pages) { | ||
894 | unsigned long start = (unsigned long) | ||
895 | pfn_to_kaddr(node->node_start_pfn); | ||
896 | unsigned long end = start + | ||
897 | (node->node_present_pages << PAGE_SHIFT); | ||
898 | printk(KERN_DEBUG " MEM%d %#lx - %#lx\n", | ||
899 | i, start, end - 1); | ||
900 | } | ||
901 | } | ||
902 | #else | ||
903 | last = high_memory; | ||
904 | for (i = MAX_NUMNODES-1; i >= 0; --i) { | ||
905 | if ((unsigned long)vbase_map[i] != -1UL) { | ||
906 | printk(KERN_DEBUG " LOWMEM%d %#lx - %#lx\n", | ||
907 | i, (unsigned long) (vbase_map[i]), | ||
908 | (unsigned long) (last-1)); | ||
909 | last = vbase_map[i]; | ||
910 | } | ||
911 | } | ||
912 | #endif | ||
913 | |||
914 | #ifndef __tilegx__ | ||
915 | /* | ||
916 | * Convert from using one lock for all atomic operations to | ||
917 | * one per cpu. | ||
918 | */ | ||
919 | __init_atomic_per_cpu(); | ||
920 | #endif | ||
921 | } | ||
922 | |||
923 | /* | ||
924 | * this is for the non-NUMA, single node SMP system case. | ||
925 | * Specifically, in the case of x86, we will always add | ||
926 | * memory to the highmem for now. | ||
927 | */ | ||
928 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
929 | int arch_add_memory(u64 start, u64 size) | ||
930 | { | ||
931 | struct pglist_data *pgdata = &contig_page_data; | ||
932 | struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; | ||
933 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
934 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
935 | |||
936 | return __add_pages(zone, start_pfn, nr_pages); | ||
937 | } | ||
938 | |||
939 | int remove_memory(u64 start, u64 size) | ||
940 | { | ||
941 | return -EINVAL; | ||
942 | } | ||
943 | #endif | ||
944 | |||
945 | struct kmem_cache *pgd_cache; | ||
946 | |||
947 | void __init pgtable_cache_init(void) | ||
948 | { | ||
949 | pgd_cache = kmem_cache_create("pgd", | ||
950 | PTRS_PER_PGD*sizeof(pgd_t), | ||
951 | PTRS_PER_PGD*sizeof(pgd_t), | ||
952 | 0, | ||
953 | NULL); | ||
954 | if (!pgd_cache) | ||
955 | panic("pgtable_cache_init(): Cannot create pgd cache"); | ||
956 | } | ||
957 | |||
958 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
959 | /* | ||
960 | * The __w1data area holds data that is only written during initialization, | ||
961 | * and is read-only and thus freely cacheable thereafter. Fix the page | ||
962 | * table entries that cover that region accordingly. | ||
963 | */ | ||
964 | static void mark_w1data_ro(void) | ||
965 | { | ||
966 | /* Loop over page table entries */ | ||
967 | unsigned long addr = (unsigned long)__w1data_begin; | ||
968 | BUG_ON((addr & (PAGE_SIZE-1)) != 0); | ||
969 | for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { | ||
970 | unsigned long pfn = kaddr_to_pfn((void *)addr); | ||
971 | struct page *page = pfn_to_page(pfn); | ||
972 | pte_t *ptep = virt_to_pte(NULL, addr); | ||
973 | BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ | ||
974 | set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); | ||
975 | } | ||
976 | } | ||
977 | #endif | ||
978 | |||
979 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
980 | static long __write_once initfree; | ||
981 | #else | ||
982 | static long __write_once initfree = 1; | ||
983 | #endif | ||
984 | |||
985 | /* Select whether to free (1) or mark unusable (0) the __init pages. */ | ||
986 | static int __init set_initfree(char *str) | ||
987 | { | ||
988 | strict_strtol(str, 0, &initfree); | ||
989 | printk("initfree: %s free init pages\n", initfree ? "will" : "won't"); | ||
990 | return 1; | ||
991 | } | ||
992 | __setup("initfree=", set_initfree); | ||
993 | |||
994 | static void free_init_pages(char *what, unsigned long begin, unsigned long end) | ||
995 | { | ||
996 | unsigned long addr = (unsigned long) begin; | ||
997 | |||
998 | if (kdata_huge && !initfree) { | ||
999 | printk("Warning: ignoring initfree=0:" | ||
1000 | " incompatible with kdata=huge\n"); | ||
1001 | initfree = 1; | ||
1002 | } | ||
1003 | end = (end + PAGE_SIZE - 1) & PAGE_MASK; | ||
1004 | local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin); | ||
1005 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | ||
1006 | /* | ||
1007 | * Note we just reset the home here directly in the | ||
1008 | * page table. We know this is safe because our caller | ||
1009 | * just flushed the caches on all the other cpus, | ||
1010 | * and they won't be touching any of these pages. | ||
1011 | */ | ||
1012 | int pfn = kaddr_to_pfn((void *)addr); | ||
1013 | struct page *page = pfn_to_page(pfn); | ||
1014 | pte_t *ptep = virt_to_pte(NULL, addr); | ||
1015 | if (!initfree) { | ||
1016 | /* | ||
1017 | * If debugging page accesses then do not free | ||
1018 | * this memory but mark them not present - any | ||
1019 | * buggy init-section access will create a | ||
1020 | * kernel page fault: | ||
1021 | */ | ||
1022 | pte_clear(&init_mm, addr, ptep); | ||
1023 | continue; | ||
1024 | } | ||
1025 | __ClearPageReserved(page); | ||
1026 | init_page_count(page); | ||
1027 | if (pte_huge(*ptep)) | ||
1028 | BUG_ON(!kdata_huge); | ||
1029 | else | ||
1030 | set_pte_at(&init_mm, addr, ptep, | ||
1031 | pfn_pte(pfn, PAGE_KERNEL)); | ||
1032 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | ||
1033 | free_page(addr); | ||
1034 | totalram_pages++; | ||
1035 | } | ||
1036 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | ||
1037 | } | ||
1038 | |||
1039 | void free_initmem(void) | ||
1040 | { | ||
1041 | const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET; | ||
1042 | |||
1043 | /* | ||
1044 | * Evict the dirty initdata on the boot cpu, evict the w1data | ||
1045 | * wherever it's homed, and evict all the init code everywhere. | ||
1046 | * We are guaranteed that no one will touch the init pages any | ||
1047 | * more, and although other cpus may be touching the w1data, | ||
1048 | * we only actually change the caching on tile64, which won't | ||
1049 | * be keeping local copies in the other tiles' caches anyway. | ||
1050 | */ | ||
1051 | homecache_evict(&cpu_cacheable_map); | ||
1052 | |||
1053 | /* Free the data pages that we won't use again after init. */ | ||
1054 | free_init_pages("unused kernel data", | ||
1055 | (unsigned long)_sinitdata, | ||
1056 | (unsigned long)_einitdata); | ||
1057 | |||
1058 | /* | ||
1059 | * Free the pages mapped from 0xc0000000 that correspond to code | ||
1060 | * pages from 0xfd000000 that we won't use again after init. | ||
1061 | */ | ||
1062 | free_init_pages("unused kernel text", | ||
1063 | (unsigned long)_sinittext - text_delta, | ||
1064 | (unsigned long)_einittext - text_delta); | ||
1065 | |||
1066 | #if !CHIP_HAS_COHERENT_LOCAL_CACHE() | ||
1067 | /* | ||
1068 | * Upgrade the .w1data section to globally cached. | ||
1069 | * We don't do this on tilepro, since the cache architecture | ||
1070 | * pretty much makes it irrelevant, and in any case we end | ||
1071 | * up having racing issues with other tiles that may touch | ||
1072 | * the data after we flush the cache but before we update | ||
1073 | * the PTEs and flush the TLBs, causing sharer shootdowns | ||
1074 | * later. Even though this is to clean data, it seems like | ||
1075 | * an unnecessary complication. | ||
1076 | */ | ||
1077 | mark_w1data_ro(); | ||
1078 | #endif | ||
1079 | |||
1080 | /* Do a global TLB flush so everyone sees the changes. */ | ||
1081 | flush_tlb_all(); | ||
1082 | } | ||
diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h new file mode 100644 index 000000000000..cd45a0837fa6 --- /dev/null +++ b/arch/tile/mm/migrate.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Structure definitions for migration, exposed here for use by | ||
15 | * arch/tile/kernel/asm-offsets.c. | ||
16 | */ | ||
17 | |||
18 | #ifndef MM_MIGRATE_H | ||
19 | #define MM_MIGRATE_H | ||
20 | |||
21 | #include <linux/cpumask.h> | ||
22 | #include <hv/hypervisor.h> | ||
23 | |||
24 | /* | ||
25 | * This function is used as a helper when setting up the initial | ||
26 | * page table (swapper_pg_dir). | ||
27 | */ | ||
28 | extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access, | ||
29 | HV_ASID asid, | ||
30 | const unsigned long *cpumask); | ||
31 | |||
32 | /* | ||
33 | * This function supports migration as a "helper" as follows: | ||
34 | * | ||
35 | * - Set the stack PTE itself to "migrating". | ||
36 | * - Do a global TLB flush for (va,length) and the specified ASIDs. | ||
37 | * - Do a cache-evict on all necessary cpus. | ||
38 | * - Write the new stack PTE. | ||
39 | * | ||
40 | * Note that any non-NULL pointers must not point to the page that | ||
41 | * is handled by the stack_pte itself. | ||
42 | */ | ||
43 | extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va, | ||
44 | size_t length, pte_t *stack_ptep, | ||
45 | const struct cpumask *cache_cpumask, | ||
46 | const struct cpumask *tlb_cpumask, | ||
47 | HV_Remote_ASID *asids, | ||
48 | int asidcount); | ||
49 | |||
50 | #endif /* MM_MIGRATE_H */ | ||
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S new file mode 100644 index 000000000000..f738765cd1e6 --- /dev/null +++ b/arch/tile/mm/migrate_32.S | |||
@@ -0,0 +1,211 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * This routine is a helper for migrating the home of a set of pages to | ||
15 | * a new cpu. See the documentation in homecache.c for more information. | ||
16 | */ | ||
17 | |||
18 | #include <linux/linkage.h> | ||
19 | #include <linux/threads.h> | ||
20 | #include <asm/page.h> | ||
21 | #include <asm/types.h> | ||
22 | #include <asm/asm-offsets.h> | ||
23 | #include <hv/hypervisor.h> | ||
24 | |||
25 | .text | ||
26 | |||
27 | /* | ||
28 | * First, some definitions that apply to all the code in the file. | ||
29 | */ | ||
30 | |||
31 | /* Locals (caller-save) */ | ||
32 | #define r_tmp r10 | ||
33 | #define r_save_sp r11 | ||
34 | |||
35 | /* What we save where in the stack frame; must include all callee-saves. */ | ||
36 | #define FRAME_SP 4 | ||
37 | #define FRAME_R30 8 | ||
38 | #define FRAME_R31 12 | ||
39 | #define FRAME_R32 16 | ||
40 | #define FRAME_R33 20 | ||
41 | #define FRAME_R34 24 | ||
42 | #define FRAME_R35 28 | ||
43 | #define FRAME_SIZE 32 | ||
44 | |||
45 | |||
46 | |||
47 | |||
48 | /* | ||
49 | * On entry: | ||
50 | * | ||
51 | * r0 low word of the new context PA to install (moved to r_context_lo) | ||
52 | * r1 high word of the new context PA to install (moved to r_context_hi) | ||
53 | * r2 low word of PTE to use for context access (moved to r_access_lo) | ||
54 | * r3 high word of PTE to use for context access (moved to r_access_lo) | ||
55 | * r4 ASID to use for new context (moved to r_asid) | ||
56 | * r5 pointer to cpumask with just this cpu set in it (r_my_cpumask) | ||
57 | */ | ||
58 | |||
59 | /* Arguments (caller-save) */ | ||
60 | #define r_context_lo_in r0 | ||
61 | #define r_context_hi_in r1 | ||
62 | #define r_access_lo_in r2 | ||
63 | #define r_access_hi_in r3 | ||
64 | #define r_asid_in r4 | ||
65 | #define r_my_cpumask r5 | ||
66 | |||
67 | /* Locals (callee-save); must not be more than FRAME_xxx above. */ | ||
68 | #define r_save_ics r30 | ||
69 | #define r_context_lo r31 | ||
70 | #define r_context_hi r32 | ||
71 | #define r_access_lo r33 | ||
72 | #define r_access_hi r34 | ||
73 | #define r_asid r35 | ||
74 | |||
75 | STD_ENTRY(flush_and_install_context) | ||
76 | /* | ||
77 | * Create a stack frame; we can't touch it once we flush the | ||
78 | * cache until we install the new page table and flush the TLB. | ||
79 | */ | ||
80 | { | ||
81 | move r_save_sp, sp | ||
82 | sw sp, lr | ||
83 | addi sp, sp, -FRAME_SIZE | ||
84 | } | ||
85 | addi r_tmp, sp, FRAME_SP | ||
86 | { | ||
87 | sw r_tmp, r_save_sp | ||
88 | addi r_tmp, sp, FRAME_R30 | ||
89 | } | ||
90 | { | ||
91 | sw r_tmp, r30 | ||
92 | addi r_tmp, sp, FRAME_R31 | ||
93 | } | ||
94 | { | ||
95 | sw r_tmp, r31 | ||
96 | addi r_tmp, sp, FRAME_R32 | ||
97 | } | ||
98 | { | ||
99 | sw r_tmp, r32 | ||
100 | addi r_tmp, sp, FRAME_R33 | ||
101 | } | ||
102 | { | ||
103 | sw r_tmp, r33 | ||
104 | addi r_tmp, sp, FRAME_R34 | ||
105 | } | ||
106 | { | ||
107 | sw r_tmp, r34 | ||
108 | addi r_tmp, sp, FRAME_R35 | ||
109 | } | ||
110 | sw r_tmp, r35 | ||
111 | |||
112 | /* Move some arguments to callee-save registers. */ | ||
113 | { | ||
114 | move r_context_lo, r_context_lo_in | ||
115 | move r_context_hi, r_context_hi_in | ||
116 | } | ||
117 | { | ||
118 | move r_access_lo, r_access_lo_in | ||
119 | move r_access_hi, r_access_hi_in | ||
120 | } | ||
121 | move r_asid, r_asid_in | ||
122 | |||
123 | /* Disable interrupts, since we can't use our stack. */ | ||
124 | { | ||
125 | mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION | ||
126 | movei r_tmp, 1 | ||
127 | } | ||
128 | mtspr INTERRUPT_CRITICAL_SECTION, r_tmp | ||
129 | |||
130 | /* First, flush our L2 cache. */ | ||
131 | { | ||
132 | move r0, zero /* cache_pa */ | ||
133 | move r1, zero | ||
134 | } | ||
135 | { | ||
136 | auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */ | ||
137 | move r3, r_my_cpumask /* cache_cpumask */ | ||
138 | } | ||
139 | { | ||
140 | move r4, zero /* tlb_va */ | ||
141 | move r5, zero /* tlb_length */ | ||
142 | } | ||
143 | { | ||
144 | move r6, zero /* tlb_pgsize */ | ||
145 | move r7, zero /* tlb_cpumask */ | ||
146 | } | ||
147 | { | ||
148 | move r8, zero /* asids */ | ||
149 | move r9, zero /* asidcount */ | ||
150 | } | ||
151 | jal hv_flush_remote | ||
152 | bnz r0, .Ldone | ||
153 | |||
154 | /* Now install the new page table. */ | ||
155 | { | ||
156 | move r0, r_context_lo | ||
157 | move r1, r_context_hi | ||
158 | } | ||
159 | { | ||
160 | move r2, r_access_lo | ||
161 | move r3, r_access_hi | ||
162 | } | ||
163 | { | ||
164 | move r4, r_asid | ||
165 | movei r5, HV_CTX_DIRECTIO | ||
166 | } | ||
167 | jal hv_install_context | ||
168 | bnz r0, .Ldone | ||
169 | |||
170 | /* Finally, flush the TLB. */ | ||
171 | { | ||
172 | movei r0, 0 /* preserve_global */ | ||
173 | jal hv_flush_all | ||
174 | } | ||
175 | |||
176 | .Ldone: | ||
177 | /* Reset interrupts back how they were before. */ | ||
178 | mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics | ||
179 | |||
180 | /* Restore the callee-saved registers and return. */ | ||
181 | addli lr, sp, FRAME_SIZE | ||
182 | { | ||
183 | lw lr, lr | ||
184 | addli r_tmp, sp, FRAME_R30 | ||
185 | } | ||
186 | { | ||
187 | lw r30, r_tmp | ||
188 | addli r_tmp, sp, FRAME_R31 | ||
189 | } | ||
190 | { | ||
191 | lw r31, r_tmp | ||
192 | addli r_tmp, sp, FRAME_R32 | ||
193 | } | ||
194 | { | ||
195 | lw r32, r_tmp | ||
196 | addli r_tmp, sp, FRAME_R33 | ||
197 | } | ||
198 | { | ||
199 | lw r33, r_tmp | ||
200 | addli r_tmp, sp, FRAME_R34 | ||
201 | } | ||
202 | { | ||
203 | lw r34, r_tmp | ||
204 | addli r_tmp, sp, FRAME_R35 | ||
205 | } | ||
206 | { | ||
207 | lw r35, r_tmp | ||
208 | addi sp, sp, FRAME_SIZE | ||
209 | } | ||
210 | jrp lr | ||
211 | STD_ENDPROC(flush_and_install_context) | ||
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c new file mode 100644 index 000000000000..f96f4cec602a --- /dev/null +++ b/arch/tile/mm/mmap.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Taken from the i386 architecture and simplified. | ||
15 | */ | ||
16 | |||
17 | #include <linux/mm.h> | ||
18 | #include <linux/random.h> | ||
19 | #include <linux/limits.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/mman.h> | ||
22 | #include <linux/compat.h> | ||
23 | |||
24 | /* | ||
25 | * Top of mmap area (just below the process stack). | ||
26 | * | ||
27 | * Leave an at least ~128 MB hole. | ||
28 | */ | ||
29 | #define MIN_GAP (128*1024*1024) | ||
30 | #define MAX_GAP (TASK_SIZE/6*5) | ||
31 | |||
32 | static inline unsigned long mmap_base(struct mm_struct *mm) | ||
33 | { | ||
34 | unsigned long gap = rlimit(RLIMIT_STACK); | ||
35 | unsigned long random_factor = 0; | ||
36 | |||
37 | if (current->flags & PF_RANDOMIZE) | ||
38 | random_factor = get_random_int() % (1024*1024); | ||
39 | |||
40 | if (gap < MIN_GAP) | ||
41 | gap = MIN_GAP; | ||
42 | else if (gap > MAX_GAP) | ||
43 | gap = MAX_GAP; | ||
44 | |||
45 | return PAGE_ALIGN(TASK_SIZE - gap - random_factor); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * This function, called very early during the creation of a new | ||
50 | * process VM image, sets up which VM layout function to use: | ||
51 | */ | ||
52 | void arch_pick_mmap_layout(struct mm_struct *mm) | ||
53 | { | ||
54 | #if !defined(__tilegx__) | ||
55 | int is_32bit = 1; | ||
56 | #elif defined(CONFIG_COMPAT) | ||
57 | int is_32bit = is_compat_task(); | ||
58 | #else | ||
59 | int is_32bit = 0; | ||
60 | #endif | ||
61 | |||
62 | /* | ||
63 | * Use standard layout if the expected stack growth is unlimited | ||
64 | * or we are running native 64 bits. | ||
65 | */ | ||
66 | if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) { | ||
67 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
68 | mm->get_unmapped_area = arch_get_unmapped_area; | ||
69 | mm->unmap_area = arch_unmap_area; | ||
70 | } else { | ||
71 | mm->mmap_base = mmap_base(mm); | ||
72 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | ||
73 | mm->unmap_area = arch_unmap_area_topdown; | ||
74 | } | ||
75 | } | ||
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c new file mode 100644 index 000000000000..289e729bbd76 --- /dev/null +++ b/arch/tile/mm/pgtable.c | |||
@@ -0,0 +1,566 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/sched.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/swap.h> | ||
20 | #include <linux/smp.h> | ||
21 | #include <linux/highmem.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/pagemap.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/cpumask.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/io.h> | ||
28 | #include <linux/vmalloc.h> | ||
29 | #include <linux/smp.h> | ||
30 | |||
31 | #include <asm/system.h> | ||
32 | #include <asm/pgtable.h> | ||
33 | #include <asm/pgalloc.h> | ||
34 | #include <asm/fixmap.h> | ||
35 | #include <asm/tlb.h> | ||
36 | #include <asm/tlbflush.h> | ||
37 | #include <asm/homecache.h> | ||
38 | |||
39 | #define K(x) ((x) << (PAGE_SHIFT-10)) | ||
40 | |||
41 | /* | ||
42 | * The normal show_free_areas() is too verbose on Tile, with dozens | ||
43 | * of processors and often four NUMA zones each with high and lowmem. | ||
44 | */ | ||
45 | void show_mem(void) | ||
46 | { | ||
47 | struct zone *zone; | ||
48 | |||
49 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu" | ||
50 | " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu" | ||
51 | " pagecache:%lu swap:%lu\n", | ||
52 | (global_page_state(NR_ACTIVE_ANON) + | ||
53 | global_page_state(NR_ACTIVE_FILE)), | ||
54 | (global_page_state(NR_INACTIVE_ANON) + | ||
55 | global_page_state(NR_INACTIVE_FILE)), | ||
56 | global_page_state(NR_FILE_DIRTY), | ||
57 | global_page_state(NR_WRITEBACK), | ||
58 | global_page_state(NR_UNSTABLE_NFS), | ||
59 | global_page_state(NR_FREE_PAGES), | ||
60 | (global_page_state(NR_SLAB_RECLAIMABLE) + | ||
61 | global_page_state(NR_SLAB_UNRECLAIMABLE)), | ||
62 | global_page_state(NR_FILE_MAPPED), | ||
63 | global_page_state(NR_PAGETABLE), | ||
64 | global_page_state(NR_BOUNCE), | ||
65 | global_page_state(NR_FILE_PAGES), | ||
66 | nr_swap_pages); | ||
67 | |||
68 | for_each_zone(zone) { | ||
69 | unsigned long flags, order, total = 0, largest_order = -1; | ||
70 | |||
71 | if (!populated_zone(zone)) | ||
72 | continue; | ||
73 | |||
74 | printk("Node %d %7s: ", zone_to_nid(zone), zone->name); | ||
75 | spin_lock_irqsave(&zone->lock, flags); | ||
76 | for (order = 0; order < MAX_ORDER; order++) { | ||
77 | int nr = zone->free_area[order].nr_free; | ||
78 | total += nr << order; | ||
79 | if (nr) | ||
80 | largest_order = order; | ||
81 | } | ||
82 | spin_unlock_irqrestore(&zone->lock, flags); | ||
83 | printk("%lukB (largest %luKb)\n", | ||
84 | K(total), largest_order ? K(1UL) << largest_order : 0); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Associate a virtual page frame with a given physical page frame | ||
90 | * and protection flags for that frame. | ||
91 | */ | ||
92 | static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | ||
93 | { | ||
94 | pgd_t *pgd; | ||
95 | pud_t *pud; | ||
96 | pmd_t *pmd; | ||
97 | pte_t *pte; | ||
98 | |||
99 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
100 | if (pgd_none(*pgd)) { | ||
101 | BUG(); | ||
102 | return; | ||
103 | } | ||
104 | pud = pud_offset(pgd, vaddr); | ||
105 | if (pud_none(*pud)) { | ||
106 | BUG(); | ||
107 | return; | ||
108 | } | ||
109 | pmd = pmd_offset(pud, vaddr); | ||
110 | if (pmd_none(*pmd)) { | ||
111 | BUG(); | ||
112 | return; | ||
113 | } | ||
114 | pte = pte_offset_kernel(pmd, vaddr); | ||
115 | /* <pfn,flags> stored as-is, to permit clearing entries */ | ||
116 | set_pte(pte, pfn_pte(pfn, flags)); | ||
117 | |||
118 | /* | ||
119 | * It's enough to flush this one mapping. | ||
120 | * This appears conservative since it is only called | ||
121 | * from __set_fixmap. | ||
122 | */ | ||
123 | local_flush_tlb_page(NULL, vaddr, PAGE_SIZE); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Associate a huge virtual page frame with a given physical page frame | ||
128 | * and protection flags for that frame. pfn is for the base of the page, | ||
129 | * vaddr is what the page gets mapped to - both must be properly aligned. | ||
130 | * The pmd must already be instantiated. | ||
131 | */ | ||
132 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | ||
133 | { | ||
134 | pgd_t *pgd; | ||
135 | pud_t *pud; | ||
136 | pmd_t *pmd; | ||
137 | |||
138 | if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ | ||
139 | printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); | ||
140 | return; /* BUG(); */ | ||
141 | } | ||
142 | if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ | ||
143 | printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); | ||
144 | return; /* BUG(); */ | ||
145 | } | ||
146 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
147 | if (pgd_none(*pgd)) { | ||
148 | printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); | ||
149 | return; /* BUG(); */ | ||
150 | } | ||
151 | pud = pud_offset(pgd, vaddr); | ||
152 | pmd = pmd_offset(pud, vaddr); | ||
153 | set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(pfn), flags)); | ||
154 | /* | ||
155 | * It's enough to flush this one mapping. | ||
156 | * We flush both small and huge TSBs to be sure. | ||
157 | */ | ||
158 | local_flush_tlb_page(NULL, vaddr, HPAGE_SIZE); | ||
159 | local_flush_tlb_pages(NULL, vaddr, PAGE_SIZE, HPAGE_SIZE); | ||
160 | } | ||
161 | |||
162 | void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | ||
163 | { | ||
164 | unsigned long address = __fix_to_virt(idx); | ||
165 | |||
166 | if (idx >= __end_of_fixed_addresses) { | ||
167 | BUG(); | ||
168 | return; | ||
169 | } | ||
170 | set_pte_pfn(address, phys >> PAGE_SHIFT, flags); | ||
171 | } | ||
172 | |||
173 | #if defined(CONFIG_HIGHPTE) | ||
174 | pte_t *_pte_offset_map(pmd_t *dir, unsigned long address, enum km_type type) | ||
175 | { | ||
176 | pte_t *pte = kmap_atomic(pmd_page(*dir), type) + | ||
177 | (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; | ||
178 | return &pte[pte_index(address)]; | ||
179 | } | ||
180 | #endif | ||
181 | |||
182 | /* | ||
183 | * List of all pgd's needed so it can invalidate entries in both cached | ||
184 | * and uncached pgd's. This is essentially codepath-based locking | ||
185 | * against pageattr.c; it is the unique case in which a valid change | ||
186 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
187 | * vmalloc faults work because attached pagetables are never freed. | ||
188 | * The locking scheme was chosen on the basis of manfred's | ||
189 | * recommendations and having no core impact whatsoever. | ||
190 | * -- wli | ||
191 | */ | ||
192 | DEFINE_SPINLOCK(pgd_lock); | ||
193 | LIST_HEAD(pgd_list); | ||
194 | |||
195 | static inline void pgd_list_add(pgd_t *pgd) | ||
196 | { | ||
197 | list_add(pgd_to_list(pgd), &pgd_list); | ||
198 | } | ||
199 | |||
200 | static inline void pgd_list_del(pgd_t *pgd) | ||
201 | { | ||
202 | list_del(pgd_to_list(pgd)); | ||
203 | } | ||
204 | |||
205 | #define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET) | ||
206 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START) | ||
207 | |||
208 | static void pgd_ctor(pgd_t *pgd) | ||
209 | { | ||
210 | unsigned long flags; | ||
211 | |||
212 | memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t)); | ||
213 | spin_lock_irqsave(&pgd_lock, flags); | ||
214 | |||
215 | #ifndef __tilegx__ | ||
216 | /* | ||
217 | * Check that the user interrupt vector has no L2. | ||
218 | * It never should for the swapper, and new page tables | ||
219 | * should always start with an empty user interrupt vector. | ||
220 | */ | ||
221 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); | ||
222 | #endif | ||
223 | |||
224 | clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, | ||
225 | swapper_pg_dir + KERNEL_PGD_INDEX_START, | ||
226 | KERNEL_PGD_PTRS); | ||
227 | |||
228 | pgd_list_add(pgd); | ||
229 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
230 | } | ||
231 | |||
232 | static void pgd_dtor(pgd_t *pgd) | ||
233 | { | ||
234 | unsigned long flags; /* can be called from interrupt context */ | ||
235 | |||
236 | spin_lock_irqsave(&pgd_lock, flags); | ||
237 | pgd_list_del(pgd); | ||
238 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
239 | } | ||
240 | |||
241 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
242 | { | ||
243 | pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); | ||
244 | if (pgd) | ||
245 | pgd_ctor(pgd); | ||
246 | return pgd; | ||
247 | } | ||
248 | |||
249 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
250 | { | ||
251 | pgd_dtor(pgd); | ||
252 | kmem_cache_free(pgd_cache, pgd); | ||
253 | } | ||
254 | |||
255 | |||
256 | #define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER) | ||
257 | |||
258 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
259 | { | ||
260 | int flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; | ||
261 | struct page *p; | ||
262 | |||
263 | #ifdef CONFIG_HIGHPTE | ||
264 | flags |= __GFP_HIGHMEM; | ||
265 | #endif | ||
266 | |||
267 | p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); | ||
268 | if (p == NULL) | ||
269 | return NULL; | ||
270 | |||
271 | pgtable_page_ctor(p); | ||
272 | return p; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Free page immediately (used in __pte_alloc if we raced with another | ||
277 | * process). We have to correct whatever pte_alloc_one() did before | ||
278 | * returning the pages to the allocator. | ||
279 | */ | ||
280 | void pte_free(struct mm_struct *mm, struct page *p) | ||
281 | { | ||
282 | pgtable_page_dtor(p); | ||
283 | __free_pages(p, L2_USER_PGTABLE_ORDER); | ||
284 | } | ||
285 | |||
286 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | ||
287 | unsigned long address) | ||
288 | { | ||
289 | int i; | ||
290 | |||
291 | pgtable_page_dtor(pte); | ||
292 | tlb->need_flush = 1; | ||
293 | if (tlb_fast_mode(tlb)) { | ||
294 | struct page *pte_pages[L2_USER_PGTABLE_PAGES]; | ||
295 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) | ||
296 | pte_pages[i] = pte + i; | ||
297 | free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES); | ||
298 | return; | ||
299 | } | ||
300 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
301 | tlb->pages[tlb->nr++] = pte + i; | ||
302 | if (tlb->nr >= FREE_PTE_NR) | ||
303 | tlb_flush_mmu(tlb, 0, 0); | ||
304 | } | ||
305 | } | ||
306 | |||
307 | #ifndef __tilegx__ | ||
308 | |||
309 | /* | ||
310 | * FIXME: needs to be atomic vs hypervisor writes. For now we make the | ||
311 | * window of vulnerability a bit smaller by doing an unlocked 8-bit update. | ||
312 | */ | ||
313 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | ||
314 | unsigned long addr, pte_t *ptep) | ||
315 | { | ||
316 | #if HV_PTE_INDEX_ACCESSED < 8 || HV_PTE_INDEX_ACCESSED >= 16 | ||
317 | # error Code assumes HV_PTE "accessed" bit in second byte | ||
318 | #endif | ||
319 | u8 *tmp = (u8 *)ptep; | ||
320 | u8 second_byte = tmp[1]; | ||
321 | if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8)))) | ||
322 | return 0; | ||
323 | tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8)); | ||
324 | return 1; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * This implementation is atomic vs hypervisor writes, since the hypervisor | ||
329 | * always writes the low word (where "accessed" and "dirty" are) and this | ||
330 | * routine only writes the high word. | ||
331 | */ | ||
332 | void ptep_set_wrprotect(struct mm_struct *mm, | ||
333 | unsigned long addr, pte_t *ptep) | ||
334 | { | ||
335 | #if HV_PTE_INDEX_WRITABLE < 32 | ||
336 | # error Code assumes HV_PTE "writable" bit in high word | ||
337 | #endif | ||
338 | u32 *tmp = (u32 *)ptep; | ||
339 | tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32)); | ||
340 | } | ||
341 | |||
342 | #endif | ||
343 | |||
344 | pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) | ||
345 | { | ||
346 | pgd_t *pgd; | ||
347 | pud_t *pud; | ||
348 | pmd_t *pmd; | ||
349 | |||
350 | if (pgd_addr_invalid(addr)) | ||
351 | return NULL; | ||
352 | |||
353 | pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr); | ||
354 | pud = pud_offset(pgd, addr); | ||
355 | if (!pud_present(*pud)) | ||
356 | return NULL; | ||
357 | pmd = pmd_offset(pud, addr); | ||
358 | if (pmd_huge_page(*pmd)) | ||
359 | return (pte_t *)pmd; | ||
360 | if (!pmd_present(*pmd)) | ||
361 | return NULL; | ||
362 | return pte_offset_kernel(pmd, addr); | ||
363 | } | ||
364 | |||
365 | pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu) | ||
366 | { | ||
367 | unsigned int width = smp_width; | ||
368 | int x = cpu % width; | ||
369 | int y = cpu / width; | ||
370 | BUG_ON(y >= smp_height); | ||
371 | BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); | ||
372 | BUG_ON(cpu < 0 || cpu >= NR_CPUS); | ||
373 | BUG_ON(!cpu_is_valid_lotar(cpu)); | ||
374 | return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y)); | ||
375 | } | ||
376 | |||
377 | int get_remote_cache_cpu(pgprot_t prot) | ||
378 | { | ||
379 | HV_LOTAR lotar = hv_pte_get_lotar(prot); | ||
380 | int x = HV_LOTAR_X(lotar); | ||
381 | int y = HV_LOTAR_Y(lotar); | ||
382 | BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); | ||
383 | return x + y * smp_width; | ||
384 | } | ||
385 | |||
386 | void set_pte_order(pte_t *ptep, pte_t pte, int order) | ||
387 | { | ||
388 | unsigned long pfn = pte_pfn(pte); | ||
389 | struct page *page = pfn_to_page(pfn); | ||
390 | |||
391 | /* Update the home of a PTE if necessary */ | ||
392 | pte = pte_set_home(pte, page_home(page)); | ||
393 | |||
394 | #ifdef __tilegx__ | ||
395 | *ptep = pte; | ||
396 | #else | ||
397 | /* | ||
398 | * When setting a PTE, write the high bits first, then write | ||
399 | * the low bits. This sets the "present" bit only after the | ||
400 | * other bits are in place. If a particular PTE update | ||
401 | * involves transitioning from one valid PTE to another, it | ||
402 | * may be necessary to call set_pte_order() more than once, | ||
403 | * transitioning via a suitable intermediate state. | ||
404 | * Note that this sequence also means that if we are transitioning | ||
405 | * from any migrating PTE to a non-migrating one, we will not | ||
406 | * see a half-updated PTE with the migrating bit off. | ||
407 | */ | ||
408 | #if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 | ||
409 | # error Must write the present and migrating bits last | ||
410 | #endif | ||
411 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); | ||
412 | barrier(); | ||
413 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); | ||
414 | #endif | ||
415 | } | ||
416 | |||
417 | /* Can this mm load a PTE with cached_priority set? */ | ||
418 | static inline int mm_is_priority_cached(struct mm_struct *mm) | ||
419 | { | ||
420 | return mm->context.priority_cached; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Add a priority mapping to an mm_context and | ||
425 | * notify the hypervisor if this is the first one. | ||
426 | */ | ||
427 | void start_mm_caching(struct mm_struct *mm) | ||
428 | { | ||
429 | if (!mm_is_priority_cached(mm)) { | ||
430 | mm->context.priority_cached = -1U; | ||
431 | hv_set_caching(-1U); | ||
432 | } | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Validate and return the priority_cached flag. We know if it's zero | ||
437 | * that we don't need to scan, since we immediately set it non-zero | ||
438 | * when we first consider a MAP_CACHE_PRIORITY mapping. | ||
439 | * | ||
440 | * We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it, | ||
441 | * since we're in an interrupt context (servicing switch_mm) we don't | ||
442 | * worry about it and don't unset the "priority_cached" field. | ||
443 | * Presumably we'll come back later and have more luck and clear | ||
444 | * the value then; for now we'll just keep the cache marked for priority. | ||
445 | */ | ||
446 | static unsigned int update_priority_cached(struct mm_struct *mm) | ||
447 | { | ||
448 | if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) { | ||
449 | struct vm_area_struct *vm; | ||
450 | for (vm = mm->mmap; vm; vm = vm->vm_next) { | ||
451 | if (hv_pte_get_cached_priority(vm->vm_page_prot)) | ||
452 | break; | ||
453 | } | ||
454 | if (vm == NULL) | ||
455 | mm->context.priority_cached = 0; | ||
456 | up_write(&mm->mmap_sem); | ||
457 | } | ||
458 | return mm->context.priority_cached; | ||
459 | } | ||
460 | |||
461 | /* Set caching correctly for an mm that we are switching to. */ | ||
462 | void check_mm_caching(struct mm_struct *prev, struct mm_struct *next) | ||
463 | { | ||
464 | if (!mm_is_priority_cached(next)) { | ||
465 | /* | ||
466 | * If the new mm doesn't use priority caching, just see if we | ||
467 | * need the hv_set_caching(), or can assume it's already zero. | ||
468 | */ | ||
469 | if (mm_is_priority_cached(prev)) | ||
470 | hv_set_caching(0); | ||
471 | } else { | ||
472 | hv_set_caching(update_priority_cached(next)); | ||
473 | } | ||
474 | } | ||
475 | |||
476 | #if CHIP_HAS_MMIO() | ||
477 | |||
478 | /* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */ | ||
479 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
480 | pgprot_t home) | ||
481 | { | ||
482 | void *addr; | ||
483 | struct vm_struct *area; | ||
484 | unsigned long offset, last_addr; | ||
485 | pgprot_t pgprot; | ||
486 | |||
487 | /* Don't allow wraparound or zero size */ | ||
488 | last_addr = phys_addr + size - 1; | ||
489 | if (!size || last_addr < phys_addr) | ||
490 | return NULL; | ||
491 | |||
492 | /* Create a read/write, MMIO VA mapping homed at the requested shim. */ | ||
493 | pgprot = PAGE_KERNEL; | ||
494 | pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO); | ||
495 | pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home)); | ||
496 | |||
497 | /* | ||
498 | * Mappings have to be page-aligned | ||
499 | */ | ||
500 | offset = phys_addr & ~PAGE_MASK; | ||
501 | phys_addr &= PAGE_MASK; | ||
502 | size = PAGE_ALIGN(last_addr+1) - phys_addr; | ||
503 | |||
504 | /* | ||
505 | * Ok, go for it.. | ||
506 | */ | ||
507 | area = get_vm_area(size, VM_IOREMAP /* | other flags? */); | ||
508 | if (!area) | ||
509 | return NULL; | ||
510 | area->phys_addr = phys_addr; | ||
511 | addr = area->addr; | ||
512 | if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, | ||
513 | phys_addr, pgprot)) { | ||
514 | remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); | ||
515 | return NULL; | ||
516 | } | ||
517 | return (__force void __iomem *) (offset + (char *)addr); | ||
518 | } | ||
519 | EXPORT_SYMBOL(ioremap_prot); | ||
520 | |||
521 | /* Map a PCI MMIO bus address into VA space. */ | ||
522 | void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) | ||
523 | { | ||
524 | panic("ioremap for PCI MMIO is not supported"); | ||
525 | } | ||
526 | EXPORT_SYMBOL(ioremap); | ||
527 | |||
528 | /* Unmap an MMIO VA mapping. */ | ||
529 | void iounmap(volatile void __iomem *addr_in) | ||
530 | { | ||
531 | volatile void __iomem *addr = (volatile void __iomem *) | ||
532 | (PAGE_MASK & (unsigned long __force)addr_in); | ||
533 | #if 1 | ||
534 | vunmap((void * __force)addr); | ||
535 | #else | ||
536 | /* x86 uses this complicated flow instead of vunmap(). Is | ||
537 | * there any particular reason we should do the same? */ | ||
538 | struct vm_struct *p, *o; | ||
539 | |||
540 | /* Use the vm area unlocked, assuming the caller | ||
541 | ensures there isn't another iounmap for the same address | ||
542 | in parallel. Reuse of the virtual address is prevented by | ||
543 | leaving it in the global lists until we're done with it. | ||
544 | cpa takes care of the direct mappings. */ | ||
545 | read_lock(&vmlist_lock); | ||
546 | for (p = vmlist; p; p = p->next) { | ||
547 | if (p->addr == addr) | ||
548 | break; | ||
549 | } | ||
550 | read_unlock(&vmlist_lock); | ||
551 | |||
552 | if (!p) { | ||
553 | printk("iounmap: bad address %p\n", addr); | ||
554 | dump_stack(); | ||
555 | return; | ||
556 | } | ||
557 | |||
558 | /* Finally remove it */ | ||
559 | o = remove_vm_area((void *)addr); | ||
560 | BUG_ON(p != o || o == NULL); | ||
561 | kfree(p); | ||
562 | #endif | ||
563 | } | ||
564 | EXPORT_SYMBOL(iounmap); | ||
565 | |||
566 | #endif /* CHIP_HAS_MMIO() */ | ||