diff options
Diffstat (limited to 'arch/ppc64/mm/init.c')
-rw-r--r-- | arch/ppc64/mm/init.c | 927 |
1 files changed, 927 insertions, 0 deletions
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c new file mode 100644 index 000000000000..23813d03e1c4 --- /dev/null +++ b/arch/ppc64/mm/init.c | |||
@@ -0,0 +1,927 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * | ||
10 | * Derived from "arch/i386/mm/init.c" | ||
11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
12 | * | ||
13 | * Dave Engebretsen <engebret@us.ibm.com> | ||
14 | * Rework for PPC64 port. | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/signal.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/mman.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/stddef.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/bootmem.h> | ||
38 | #include <linux/highmem.h> | ||
39 | #include <linux/idr.h> | ||
40 | #include <linux/nodemask.h> | ||
41 | #include <linux/module.h> | ||
42 | |||
43 | #include <asm/pgalloc.h> | ||
44 | #include <asm/page.h> | ||
45 | #include <asm/abs_addr.h> | ||
46 | #include <asm/prom.h> | ||
47 | #include <asm/lmb.h> | ||
48 | #include <asm/rtas.h> | ||
49 | #include <asm/io.h> | ||
50 | #include <asm/mmu_context.h> | ||
51 | #include <asm/pgtable.h> | ||
52 | #include <asm/mmu.h> | ||
53 | #include <asm/uaccess.h> | ||
54 | #include <asm/smp.h> | ||
55 | #include <asm/machdep.h> | ||
56 | #include <asm/tlb.h> | ||
57 | #include <asm/eeh.h> | ||
58 | #include <asm/processor.h> | ||
59 | #include <asm/mmzone.h> | ||
60 | #include <asm/cputable.h> | ||
61 | #include <asm/ppcdebug.h> | ||
62 | #include <asm/sections.h> | ||
63 | #include <asm/system.h> | ||
64 | #include <asm/iommu.h> | ||
65 | #include <asm/abs_addr.h> | ||
66 | #include <asm/vdso.h> | ||
67 | |||
68 | int mem_init_done; | ||
69 | unsigned long ioremap_bot = IMALLOC_BASE; | ||
70 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | ||
71 | |||
72 | extern pgd_t swapper_pg_dir[]; | ||
73 | extern struct task_struct *current_set[NR_CPUS]; | ||
74 | |||
75 | extern pgd_t ioremap_dir[]; | ||
76 | pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir; | ||
77 | |||
78 | unsigned long klimit = (unsigned long)_end; | ||
79 | |||
80 | unsigned long _SDR1=0; | ||
81 | unsigned long _ASR=0; | ||
82 | |||
83 | /* max amount of RAM to use */ | ||
84 | unsigned long __max_memory; | ||
85 | |||
86 | /* info on what we think the IO hole is */ | ||
87 | unsigned long io_hole_start; | ||
88 | unsigned long io_hole_size; | ||
89 | |||
90 | void show_mem(void) | ||
91 | { | ||
92 | unsigned long total = 0, reserved = 0; | ||
93 | unsigned long shared = 0, cached = 0; | ||
94 | struct page *page; | ||
95 | pg_data_t *pgdat; | ||
96 | unsigned long i; | ||
97 | |||
98 | printk("Mem-info:\n"); | ||
99 | show_free_areas(); | ||
100 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | ||
101 | for_each_pgdat(pgdat) { | ||
102 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
103 | page = pgdat->node_mem_map + i; | ||
104 | total++; | ||
105 | if (PageReserved(page)) | ||
106 | reserved++; | ||
107 | else if (PageSwapCache(page)) | ||
108 | cached++; | ||
109 | else if (page_count(page)) | ||
110 | shared += page_count(page) - 1; | ||
111 | } | ||
112 | } | ||
113 | printk("%ld pages of RAM\n", total); | ||
114 | printk("%ld reserved pages\n", reserved); | ||
115 | printk("%ld pages shared\n", shared); | ||
116 | printk("%ld pages swap cached\n", cached); | ||
117 | } | ||
118 | |||
119 | #ifdef CONFIG_PPC_ISERIES | ||
120 | |||
121 | void __iomem *ioremap(unsigned long addr, unsigned long size) | ||
122 | { | ||
123 | return (void __iomem *)addr; | ||
124 | } | ||
125 | |||
126 | extern void __iomem *__ioremap(unsigned long addr, unsigned long size, | ||
127 | unsigned long flags) | ||
128 | { | ||
129 | return (void __iomem *)addr; | ||
130 | } | ||
131 | |||
132 | void iounmap(volatile void __iomem *addr) | ||
133 | { | ||
134 | return; | ||
135 | } | ||
136 | |||
137 | #else | ||
138 | |||
139 | /* | ||
140 | * map_io_page currently only called by __ioremap | ||
141 | * map_io_page adds an entry to the ioremap page table | ||
142 | * and adds an entry to the HPT, possibly bolting it | ||
143 | */ | ||
144 | static void map_io_page(unsigned long ea, unsigned long pa, int flags) | ||
145 | { | ||
146 | pgd_t *pgdp; | ||
147 | pmd_t *pmdp; | ||
148 | pte_t *ptep; | ||
149 | unsigned long vsid; | ||
150 | |||
151 | if (mem_init_done) { | ||
152 | spin_lock(&ioremap_mm.page_table_lock); | ||
153 | pgdp = pgd_offset_i(ea); | ||
154 | pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); | ||
155 | ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); | ||
156 | |||
157 | pa = abs_to_phys(pa); | ||
158 | set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); | ||
159 | spin_unlock(&ioremap_mm.page_table_lock); | ||
160 | } else { | ||
161 | unsigned long va, vpn, hash, hpteg; | ||
162 | |||
163 | /* | ||
164 | * If the mm subsystem is not fully up, we cannot create a | ||
165 | * linux page table entry for this mapping. Simply bolt an | ||
166 | * entry in the hardware page table. | ||
167 | */ | ||
168 | vsid = get_kernel_vsid(ea); | ||
169 | va = (vsid << 28) | (ea & 0xFFFFFFF); | ||
170 | vpn = va >> PAGE_SHIFT; | ||
171 | |||
172 | hash = hpt_hash(vpn, 0); | ||
173 | |||
174 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | ||
175 | |||
176 | /* Panic if a pte grpup is full */ | ||
177 | if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, | ||
178 | _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX, | ||
179 | 1, 0) == -1) { | ||
180 | panic("map_io_page: could not insert mapping"); | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | |||
185 | |||
186 | static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, | ||
187 | unsigned long ea, unsigned long size, | ||
188 | unsigned long flags) | ||
189 | { | ||
190 | unsigned long i; | ||
191 | |||
192 | if ((flags & _PAGE_PRESENT) == 0) | ||
193 | flags |= pgprot_val(PAGE_KERNEL); | ||
194 | if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU)) | ||
195 | flags |= _PAGE_GUARDED; | ||
196 | |||
197 | for (i = 0; i < size; i += PAGE_SIZE) { | ||
198 | map_io_page(ea+i, pa+i, flags); | ||
199 | } | ||
200 | |||
201 | return (void __iomem *) (ea + (addr & ~PAGE_MASK)); | ||
202 | } | ||
203 | |||
204 | |||
205 | void __iomem * | ||
206 | ioremap(unsigned long addr, unsigned long size) | ||
207 | { | ||
208 | return __ioremap(addr, size, _PAGE_NO_CACHE); | ||
209 | } | ||
210 | |||
211 | void __iomem * | ||
212 | __ioremap(unsigned long addr, unsigned long size, unsigned long flags) | ||
213 | { | ||
214 | unsigned long pa, ea; | ||
215 | |||
216 | /* | ||
217 | * Choose an address to map it to. | ||
218 | * Once the imalloc system is running, we use it. | ||
219 | * Before that, we map using addresses going | ||
220 | * up from ioremap_bot. imalloc will use | ||
221 | * the addresses from ioremap_bot through | ||
222 | * IMALLOC_END (0xE000001fffffffff) | ||
223 | * | ||
224 | */ | ||
225 | pa = addr & PAGE_MASK; | ||
226 | size = PAGE_ALIGN(addr + size) - pa; | ||
227 | |||
228 | if (size == 0) | ||
229 | return NULL; | ||
230 | |||
231 | if (mem_init_done) { | ||
232 | struct vm_struct *area; | ||
233 | area = im_get_free_area(size); | ||
234 | if (area == NULL) | ||
235 | return NULL; | ||
236 | ea = (unsigned long)(area->addr); | ||
237 | } else { | ||
238 | ea = ioremap_bot; | ||
239 | ioremap_bot += size; | ||
240 | } | ||
241 | |||
242 | return __ioremap_com(addr, pa, ea, size, flags); | ||
243 | } | ||
244 | |||
245 | #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) | ||
246 | |||
247 | int __ioremap_explicit(unsigned long pa, unsigned long ea, | ||
248 | unsigned long size, unsigned long flags) | ||
249 | { | ||
250 | struct vm_struct *area; | ||
251 | |||
252 | /* For now, require page-aligned values for pa, ea, and size */ | ||
253 | if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || | ||
254 | !IS_PAGE_ALIGNED(size)) { | ||
255 | printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); | ||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | if (!mem_init_done) { | ||
260 | /* Two things to consider in this case: | ||
261 | * 1) No records will be kept (imalloc, etc) that the region | ||
262 | * has been remapped | ||
263 | * 2) It won't be easy to iounmap() the region later (because | ||
264 | * of 1) | ||
265 | */ | ||
266 | ; | ||
267 | } else { | ||
268 | area = im_get_area(ea, size, | ||
269 | IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); | ||
270 | if (area == NULL) { | ||
271 | /* Expected when PHB-dlpar is in play */ | ||
272 | return 1; | ||
273 | } | ||
274 | if (ea != (unsigned long) area->addr) { | ||
275 | printk(KERN_ERR "unexpected addr return from im_get_area\n"); | ||
276 | return 1; | ||
277 | } | ||
278 | } | ||
279 | |||
280 | if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) { | ||
281 | printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); | ||
282 | return 1; | ||
283 | } | ||
284 | |||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | static void unmap_im_area_pte(pmd_t *pmd, unsigned long address, | ||
289 | unsigned long size) | ||
290 | { | ||
291 | unsigned long base, end; | ||
292 | pte_t *pte; | ||
293 | |||
294 | if (pmd_none(*pmd)) | ||
295 | return; | ||
296 | if (pmd_bad(*pmd)) { | ||
297 | pmd_ERROR(*pmd); | ||
298 | pmd_clear(pmd); | ||
299 | return; | ||
300 | } | ||
301 | |||
302 | pte = pte_offset_kernel(pmd, address); | ||
303 | base = address & PMD_MASK; | ||
304 | address &= ~PMD_MASK; | ||
305 | end = address + size; | ||
306 | if (end > PMD_SIZE) | ||
307 | end = PMD_SIZE; | ||
308 | |||
309 | do { | ||
310 | pte_t page; | ||
311 | page = ptep_get_and_clear(&ioremap_mm, base + address, pte); | ||
312 | address += PAGE_SIZE; | ||
313 | pte++; | ||
314 | if (pte_none(page)) | ||
315 | continue; | ||
316 | if (pte_present(page)) | ||
317 | continue; | ||
318 | printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); | ||
319 | } while (address < end); | ||
320 | } | ||
321 | |||
322 | static void unmap_im_area_pmd(pgd_t *dir, unsigned long address, | ||
323 | unsigned long size) | ||
324 | { | ||
325 | unsigned long base, end; | ||
326 | pmd_t *pmd; | ||
327 | |||
328 | if (pgd_none(*dir)) | ||
329 | return; | ||
330 | if (pgd_bad(*dir)) { | ||
331 | pgd_ERROR(*dir); | ||
332 | pgd_clear(dir); | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | pmd = pmd_offset(dir, address); | ||
337 | base = address & PGDIR_MASK; | ||
338 | address &= ~PGDIR_MASK; | ||
339 | end = address + size; | ||
340 | if (end > PGDIR_SIZE) | ||
341 | end = PGDIR_SIZE; | ||
342 | |||
343 | do { | ||
344 | unmap_im_area_pte(pmd, base + address, end - address); | ||
345 | address = (address + PMD_SIZE) & PMD_MASK; | ||
346 | pmd++; | ||
347 | } while (address < end); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * Unmap an IO region and remove it from imalloc'd list. | ||
352 | * Access to IO memory should be serialized by driver. | ||
353 | * This code is modeled after vmalloc code - unmap_vm_area() | ||
354 | * | ||
355 | * XXX what about calls before mem_init_done (ie python_countermeasures()) | ||
356 | */ | ||
357 | void iounmap(volatile void __iomem *token) | ||
358 | { | ||
359 | unsigned long address, start, end, size; | ||
360 | struct mm_struct *mm; | ||
361 | pgd_t *dir; | ||
362 | void *addr; | ||
363 | |||
364 | if (!mem_init_done) { | ||
365 | return; | ||
366 | } | ||
367 | |||
368 | addr = (void *) ((unsigned long __force) token & PAGE_MASK); | ||
369 | |||
370 | if ((size = im_free(addr)) == 0) { | ||
371 | return; | ||
372 | } | ||
373 | |||
374 | address = (unsigned long)addr; | ||
375 | start = address; | ||
376 | end = address + size; | ||
377 | |||
378 | mm = &ioremap_mm; | ||
379 | spin_lock(&mm->page_table_lock); | ||
380 | |||
381 | dir = pgd_offset_i(address); | ||
382 | flush_cache_vunmap(address, end); | ||
383 | do { | ||
384 | unmap_im_area_pmd(dir, address, end - address); | ||
385 | address = (address + PGDIR_SIZE) & PGDIR_MASK; | ||
386 | dir++; | ||
387 | } while (address && (address < end)); | ||
388 | flush_tlb_kernel_range(start, end); | ||
389 | |||
390 | spin_unlock(&mm->page_table_lock); | ||
391 | return; | ||
392 | } | ||
393 | |||
394 | static int iounmap_subset_regions(unsigned long addr, unsigned long size) | ||
395 | { | ||
396 | struct vm_struct *area; | ||
397 | |||
398 | /* Check whether subsets of this region exist */ | ||
399 | area = im_get_area(addr, size, IM_REGION_SUPERSET); | ||
400 | if (area == NULL) | ||
401 | return 1; | ||
402 | |||
403 | while (area) { | ||
404 | iounmap((void __iomem *) area->addr); | ||
405 | area = im_get_area(addr, size, | ||
406 | IM_REGION_SUPERSET); | ||
407 | } | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | int iounmap_explicit(volatile void __iomem *start, unsigned long size) | ||
413 | { | ||
414 | struct vm_struct *area; | ||
415 | unsigned long addr; | ||
416 | int rc; | ||
417 | |||
418 | addr = (unsigned long __force) start & PAGE_MASK; | ||
419 | |||
420 | /* Verify that the region either exists or is a subset of an existing | ||
421 | * region. In the latter case, split the parent region to create | ||
422 | * the exact region | ||
423 | */ | ||
424 | area = im_get_area(addr, size, | ||
425 | IM_REGION_EXISTS | IM_REGION_SUBSET); | ||
426 | if (area == NULL) { | ||
427 | /* Determine whether subset regions exist. If so, unmap */ | ||
428 | rc = iounmap_subset_regions(addr, size); | ||
429 | if (rc) { | ||
430 | printk(KERN_ERR | ||
431 | "%s() cannot unmap nonexistent range 0x%lx\n", | ||
432 | __FUNCTION__, addr); | ||
433 | return 1; | ||
434 | } | ||
435 | } else { | ||
436 | iounmap((void __iomem *) area->addr); | ||
437 | } | ||
438 | /* | ||
439 | * FIXME! This can't be right: | ||
440 | iounmap(area->addr); | ||
441 | * Maybe it should be "iounmap(area);" | ||
442 | */ | ||
443 | return 0; | ||
444 | } | ||
445 | |||
446 | #endif | ||
447 | |||
448 | EXPORT_SYMBOL(ioremap); | ||
449 | EXPORT_SYMBOL(__ioremap); | ||
450 | EXPORT_SYMBOL(iounmap); | ||
451 | |||
452 | void free_initmem(void) | ||
453 | { | ||
454 | unsigned long addr; | ||
455 | |||
456 | addr = (unsigned long)__init_begin; | ||
457 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | ||
458 | ClearPageReserved(virt_to_page(addr)); | ||
459 | set_page_count(virt_to_page(addr), 1); | ||
460 | free_page(addr); | ||
461 | totalram_pages++; | ||
462 | } | ||
463 | printk ("Freeing unused kernel memory: %luk freed\n", | ||
464 | ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); | ||
465 | } | ||
466 | |||
467 | #ifdef CONFIG_BLK_DEV_INITRD | ||
468 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
469 | { | ||
470 | if (start < end) | ||
471 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | ||
472 | for (; start < end; start += PAGE_SIZE) { | ||
473 | ClearPageReserved(virt_to_page(start)); | ||
474 | set_page_count(virt_to_page(start), 1); | ||
475 | free_page(start); | ||
476 | totalram_pages++; | ||
477 | } | ||
478 | } | ||
479 | #endif | ||
480 | |||
481 | static DEFINE_SPINLOCK(mmu_context_lock); | ||
482 | static DEFINE_IDR(mmu_context_idr); | ||
483 | |||
484 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
485 | { | ||
486 | int index; | ||
487 | int err; | ||
488 | |||
489 | #ifdef CONFIG_HUGETLB_PAGE | ||
490 | /* We leave htlb_segs as it was, but for a fork, we need to | ||
491 | * clear the huge_pgdir. */ | ||
492 | mm->context.huge_pgdir = NULL; | ||
493 | #endif | ||
494 | |||
495 | again: | ||
496 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | ||
497 | return -ENOMEM; | ||
498 | |||
499 | spin_lock(&mmu_context_lock); | ||
500 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | ||
501 | spin_unlock(&mmu_context_lock); | ||
502 | |||
503 | if (err == -EAGAIN) | ||
504 | goto again; | ||
505 | else if (err) | ||
506 | return err; | ||
507 | |||
508 | if (index > MAX_CONTEXT) { | ||
509 | idr_remove(&mmu_context_idr, index); | ||
510 | return -ENOMEM; | ||
511 | } | ||
512 | |||
513 | mm->context.id = index; | ||
514 | |||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | void destroy_context(struct mm_struct *mm) | ||
519 | { | ||
520 | spin_lock(&mmu_context_lock); | ||
521 | idr_remove(&mmu_context_idr, mm->context.id); | ||
522 | spin_unlock(&mmu_context_lock); | ||
523 | |||
524 | mm->context.id = NO_CONTEXT; | ||
525 | |||
526 | hugetlb_mm_free_pgd(mm); | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * Do very early mm setup. | ||
531 | */ | ||
532 | void __init mm_init_ppc64(void) | ||
533 | { | ||
534 | #ifndef CONFIG_PPC_ISERIES | ||
535 | unsigned long i; | ||
536 | #endif | ||
537 | |||
538 | ppc64_boot_msg(0x100, "MM Init"); | ||
539 | |||
540 | /* This is the story of the IO hole... please, keep seated, | ||
541 | * unfortunately, we are out of oxygen masks at the moment. | ||
542 | * So we need some rough way to tell where your big IO hole | ||
543 | * is. On pmac, it's between 2G and 4G, on POWER3, it's around | ||
544 | * that area as well, on POWER4 we don't have one, etc... | ||
545 | * We need that as a "hint" when sizing the TCE table on POWER3 | ||
546 | * So far, the simplest way that seem work well enough for us it | ||
547 | * to just assume that the first discontinuity in our physical | ||
548 | * RAM layout is the IO hole. That may not be correct in the future | ||
549 | * (and isn't on iSeries but then we don't care ;) | ||
550 | */ | ||
551 | |||
552 | #ifndef CONFIG_PPC_ISERIES | ||
553 | for (i = 1; i < lmb.memory.cnt; i++) { | ||
554 | unsigned long base, prevbase, prevsize; | ||
555 | |||
556 | prevbase = lmb.memory.region[i-1].physbase; | ||
557 | prevsize = lmb.memory.region[i-1].size; | ||
558 | base = lmb.memory.region[i].physbase; | ||
559 | if (base > (prevbase + prevsize)) { | ||
560 | io_hole_start = prevbase + prevsize; | ||
561 | io_hole_size = base - (prevbase + prevsize); | ||
562 | break; | ||
563 | } | ||
564 | } | ||
565 | #endif /* CONFIG_PPC_ISERIES */ | ||
566 | if (io_hole_start) | ||
567 | printk("IO Hole assumed to be %lx -> %lx\n", | ||
568 | io_hole_start, io_hole_start + io_hole_size - 1); | ||
569 | |||
570 | ppc64_boot_msg(0x100, "MM Init Done"); | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * This is called by /dev/mem to know if a given address has to | ||
575 | * be mapped non-cacheable or not | ||
576 | */ | ||
577 | int page_is_ram(unsigned long pfn) | ||
578 | { | ||
579 | int i; | ||
580 | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
581 | |||
582 | for (i=0; i < lmb.memory.cnt; i++) { | ||
583 | unsigned long base; | ||
584 | |||
585 | #ifdef CONFIG_MSCHUNKS | ||
586 | base = lmb.memory.region[i].physbase; | ||
587 | #else | ||
588 | base = lmb.memory.region[i].base; | ||
589 | #endif | ||
590 | if ((paddr >= base) && | ||
591 | (paddr < (base + lmb.memory.region[i].size))) { | ||
592 | return 1; | ||
593 | } | ||
594 | } | ||
595 | |||
596 | return 0; | ||
597 | } | ||
598 | EXPORT_SYMBOL(page_is_ram); | ||
599 | |||
600 | /* | ||
601 | * Initialize the bootmem system and give it all the memory we | ||
602 | * have available. | ||
603 | */ | ||
604 | #ifndef CONFIG_DISCONTIGMEM | ||
605 | void __init do_init_bootmem(void) | ||
606 | { | ||
607 | unsigned long i; | ||
608 | unsigned long start, bootmap_pages; | ||
609 | unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
610 | int boot_mapsize; | ||
611 | |||
612 | /* | ||
613 | * Find an area to use for the bootmem bitmap. Calculate the size of | ||
614 | * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. | ||
615 | * Add 1 additional page in case the address isn't page-aligned. | ||
616 | */ | ||
617 | bootmap_pages = bootmem_bootmap_pages(total_pages); | ||
618 | |||
619 | start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE)); | ||
620 | BUG_ON(!start); | ||
621 | |||
622 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); | ||
623 | |||
624 | max_pfn = max_low_pfn; | ||
625 | |||
626 | /* add all physical memory to the bootmem map. Also find the first */ | ||
627 | for (i=0; i < lmb.memory.cnt; i++) { | ||
628 | unsigned long physbase, size; | ||
629 | |||
630 | physbase = lmb.memory.region[i].physbase; | ||
631 | size = lmb.memory.region[i].size; | ||
632 | free_bootmem(physbase, size); | ||
633 | } | ||
634 | |||
635 | /* reserve the sections we're already using */ | ||
636 | for (i=0; i < lmb.reserved.cnt; i++) { | ||
637 | unsigned long physbase = lmb.reserved.region[i].physbase; | ||
638 | unsigned long size = lmb.reserved.region[i].size; | ||
639 | |||
640 | reserve_bootmem(physbase, size); | ||
641 | } | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * paging_init() sets up the page tables - in fact we've already done this. | ||
646 | */ | ||
647 | void __init paging_init(void) | ||
648 | { | ||
649 | unsigned long zones_size[MAX_NR_ZONES]; | ||
650 | unsigned long zholes_size[MAX_NR_ZONES]; | ||
651 | unsigned long total_ram = lmb_phys_mem_size(); | ||
652 | unsigned long top_of_ram = lmb_end_of_DRAM(); | ||
653 | |||
654 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | ||
655 | top_of_ram, total_ram); | ||
656 | printk(KERN_INFO "Memory hole size: %ldMB\n", | ||
657 | (top_of_ram - total_ram) >> 20); | ||
658 | /* | ||
659 | * All pages are DMA-able so we put them all in the DMA zone. | ||
660 | */ | ||
661 | memset(zones_size, 0, sizeof(zones_size)); | ||
662 | memset(zholes_size, 0, sizeof(zholes_size)); | ||
663 | |||
664 | zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | ||
665 | zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; | ||
666 | |||
667 | free_area_init_node(0, &contig_page_data, zones_size, | ||
668 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); | ||
669 | } | ||
670 | #endif /* CONFIG_DISCONTIGMEM */ | ||
671 | |||
672 | static struct kcore_list kcore_vmem; | ||
673 | |||
674 | static int __init setup_kcore(void) | ||
675 | { | ||
676 | int i; | ||
677 | |||
678 | for (i=0; i < lmb.memory.cnt; i++) { | ||
679 | unsigned long physbase, size; | ||
680 | struct kcore_list *kcore_mem; | ||
681 | |||
682 | physbase = lmb.memory.region[i].physbase; | ||
683 | size = lmb.memory.region[i].size; | ||
684 | |||
685 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ | ||
686 | kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); | ||
687 | if (!kcore_mem) | ||
688 | panic("mem_init: kmalloc failed\n"); | ||
689 | |||
690 | kclist_add(kcore_mem, __va(physbase), size); | ||
691 | } | ||
692 | |||
693 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); | ||
694 | |||
695 | return 0; | ||
696 | } | ||
697 | module_init(setup_kcore); | ||
698 | |||
699 | void __init mem_init(void) | ||
700 | { | ||
701 | #ifdef CONFIG_DISCONTIGMEM | ||
702 | int nid; | ||
703 | #endif | ||
704 | pg_data_t *pgdat; | ||
705 | unsigned long i; | ||
706 | struct page *page; | ||
707 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | ||
708 | |||
709 | num_physpages = max_low_pfn; /* RAM is assumed contiguous */ | ||
710 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | ||
711 | |||
712 | #ifdef CONFIG_DISCONTIGMEM | ||
713 | for_each_online_node(nid) { | ||
714 | if (NODE_DATA(nid)->node_spanned_pages != 0) { | ||
715 | printk("freeing bootmem node %x\n", nid); | ||
716 | totalram_pages += | ||
717 | free_all_bootmem_node(NODE_DATA(nid)); | ||
718 | } | ||
719 | } | ||
720 | #else | ||
721 | max_mapnr = num_physpages; | ||
722 | totalram_pages += free_all_bootmem(); | ||
723 | #endif | ||
724 | |||
725 | for_each_pgdat(pgdat) { | ||
726 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
727 | page = pgdat->node_mem_map + i; | ||
728 | if (PageReserved(page)) | ||
729 | reservedpages++; | ||
730 | } | ||
731 | } | ||
732 | |||
733 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; | ||
734 | initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; | ||
735 | datasize = (unsigned long)&_edata - (unsigned long)&__init_end; | ||
736 | bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; | ||
737 | |||
738 | printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " | ||
739 | "%luk reserved, %luk data, %luk bss, %luk init)\n", | ||
740 | (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), | ||
741 | num_physpages << (PAGE_SHIFT-10), | ||
742 | codesize >> 10, | ||
743 | reservedpages << (PAGE_SHIFT-10), | ||
744 | datasize >> 10, | ||
745 | bsssize >> 10, | ||
746 | initsize >> 10); | ||
747 | |||
748 | mem_init_done = 1; | ||
749 | |||
750 | #ifdef CONFIG_PPC_ISERIES | ||
751 | iommu_vio_init(); | ||
752 | #endif | ||
753 | /* Initialize the vDSO */ | ||
754 | vdso_init(); | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * This is called when a page has been modified by the kernel. | ||
759 | * It just marks the page as not i-cache clean. We do the i-cache | ||
760 | * flush later when the page is given to a user process, if necessary. | ||
761 | */ | ||
762 | void flush_dcache_page(struct page *page) | ||
763 | { | ||
764 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
765 | return; | ||
766 | /* avoid an atomic op if possible */ | ||
767 | if (test_bit(PG_arch_1, &page->flags)) | ||
768 | clear_bit(PG_arch_1, &page->flags); | ||
769 | } | ||
770 | EXPORT_SYMBOL(flush_dcache_page); | ||
771 | |||
772 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | ||
773 | { | ||
774 | clear_page(page); | ||
775 | |||
776 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
777 | return; | ||
778 | /* | ||
779 | * We shouldnt have to do this, but some versions of glibc | ||
780 | * require it (ld.so assumes zero filled pages are icache clean) | ||
781 | * - Anton | ||
782 | */ | ||
783 | |||
784 | /* avoid an atomic op if possible */ | ||
785 | if (test_bit(PG_arch_1, &pg->flags)) | ||
786 | clear_bit(PG_arch_1, &pg->flags); | ||
787 | } | ||
788 | EXPORT_SYMBOL(clear_user_page); | ||
789 | |||
790 | void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, | ||
791 | struct page *pg) | ||
792 | { | ||
793 | copy_page(vto, vfrom); | ||
794 | |||
795 | /* | ||
796 | * We should be able to use the following optimisation, however | ||
797 | * there are two problems. | ||
798 | * Firstly a bug in some versions of binutils meant PLT sections | ||
799 | * were not marked executable. | ||
800 | * Secondly the first word in the GOT section is blrl, used | ||
801 | * to establish the GOT address. Until recently the GOT was | ||
802 | * not marked executable. | ||
803 | * - Anton | ||
804 | */ | ||
805 | #if 0 | ||
806 | if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) | ||
807 | return; | ||
808 | #endif | ||
809 | |||
810 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
811 | return; | ||
812 | |||
813 | /* avoid an atomic op if possible */ | ||
814 | if (test_bit(PG_arch_1, &pg->flags)) | ||
815 | clear_bit(PG_arch_1, &pg->flags); | ||
816 | } | ||
817 | |||
818 | void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, | ||
819 | unsigned long addr, int len) | ||
820 | { | ||
821 | unsigned long maddr; | ||
822 | |||
823 | maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); | ||
824 | flush_icache_range(maddr, maddr + len); | ||
825 | } | ||
826 | EXPORT_SYMBOL(flush_icache_user_range); | ||
827 | |||
828 | /* | ||
829 | * This is called at the end of handling a user page fault, when the | ||
830 | * fault has been handled by updating a PTE in the linux page tables. | ||
831 | * We use it to preload an HPTE into the hash table corresponding to | ||
832 | * the updated linux PTE. | ||
833 | * | ||
834 | * This must always be called with the mm->page_table_lock held | ||
835 | */ | ||
836 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, | ||
837 | pte_t pte) | ||
838 | { | ||
839 | unsigned long vsid; | ||
840 | void *pgdir; | ||
841 | pte_t *ptep; | ||
842 | int local = 0; | ||
843 | cpumask_t tmp; | ||
844 | unsigned long flags; | ||
845 | |||
846 | /* handle i-cache coherency */ | ||
847 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && | ||
848 | !cpu_has_feature(CPU_FTR_NOEXECUTE)) { | ||
849 | unsigned long pfn = pte_pfn(pte); | ||
850 | if (pfn_valid(pfn)) { | ||
851 | struct page *page = pfn_to_page(pfn); | ||
852 | if (!PageReserved(page) | ||
853 | && !test_bit(PG_arch_1, &page->flags)) { | ||
854 | __flush_dcache_icache(page_address(page)); | ||
855 | set_bit(PG_arch_1, &page->flags); | ||
856 | } | ||
857 | } | ||
858 | } | ||
859 | |||
860 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | ||
861 | if (!pte_young(pte)) | ||
862 | return; | ||
863 | |||
864 | pgdir = vma->vm_mm->pgd; | ||
865 | if (pgdir == NULL) | ||
866 | return; | ||
867 | |||
868 | ptep = find_linux_pte(pgdir, ea); | ||
869 | if (!ptep) | ||
870 | return; | ||
871 | |||
872 | vsid = get_vsid(vma->vm_mm->context.id, ea); | ||
873 | |||
874 | local_irq_save(flags); | ||
875 | tmp = cpumask_of_cpu(smp_processor_id()); | ||
876 | if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) | ||
877 | local = 1; | ||
878 | |||
879 | __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, | ||
880 | 0x300, local); | ||
881 | local_irq_restore(flags); | ||
882 | } | ||
883 | |||
884 | void __iomem * reserve_phb_iospace(unsigned long size) | ||
885 | { | ||
886 | void __iomem *virt_addr; | ||
887 | |||
888 | if (phbs_io_bot >= IMALLOC_BASE) | ||
889 | panic("reserve_phb_iospace(): phb io space overflow\n"); | ||
890 | |||
891 | virt_addr = (void __iomem *) phbs_io_bot; | ||
892 | phbs_io_bot += size; | ||
893 | |||
894 | return virt_addr; | ||
895 | } | ||
896 | |||
897 | kmem_cache_t *zero_cache; | ||
898 | |||
899 | static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) | ||
900 | { | ||
901 | memset(pte, 0, PAGE_SIZE); | ||
902 | } | ||
903 | |||
904 | void pgtable_cache_init(void) | ||
905 | { | ||
906 | zero_cache = kmem_cache_create("zero", | ||
907 | PAGE_SIZE, | ||
908 | 0, | ||
909 | SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, | ||
910 | zero_ctor, | ||
911 | NULL); | ||
912 | if (!zero_cache) | ||
913 | panic("pgtable_cache_init(): could not create zero_cache!\n"); | ||
914 | } | ||
915 | |||
916 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | ||
917 | unsigned long size, pgprot_t vma_prot) | ||
918 | { | ||
919 | if (ppc_md.phys_mem_access_prot) | ||
920 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | ||
921 | |||
922 | if (!page_is_ram(addr >> PAGE_SHIFT)) | ||
923 | vma_prot = __pgprot(pgprot_val(vma_prot) | ||
924 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | ||
925 | return vma_prot; | ||
926 | } | ||
927 | EXPORT_SYMBOL(phys_mem_access_prot); | ||