diff options
Diffstat (limited to 'arch/ppc64/mm/init.c')
-rw-r--r-- | arch/ppc64/mm/init.c | 950 |
1 files changed, 0 insertions, 950 deletions
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c deleted file mode 100644 index e2bd7776622f..000000000000 --- a/arch/ppc64/mm/init.c +++ /dev/null | |||
@@ -1,950 +0,0 @@ | |||
1 | /* | ||
2 | * PowerPC version | ||
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
4 | * | ||
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | ||
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | ||
7 | * Copyright (C) 1996 Paul Mackerras | ||
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | ||
9 | * | ||
10 | * Derived from "arch/i386/mm/init.c" | ||
11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
12 | * | ||
13 | * Dave Engebretsen <engebret@us.ibm.com> | ||
14 | * Rework for PPC64 port. | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or | ||
17 | * modify it under the terms of the GNU General Public License | ||
18 | * as published by the Free Software Foundation; either version | ||
19 | * 2 of the License, or (at your option) any later version. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/signal.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/mman.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/stddef.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/bootmem.h> | ||
38 | #include <linux/highmem.h> | ||
39 | #include <linux/idr.h> | ||
40 | #include <linux/nodemask.h> | ||
41 | #include <linux/module.h> | ||
42 | |||
43 | #include <asm/pgalloc.h> | ||
44 | #include <asm/page.h> | ||
45 | #include <asm/prom.h> | ||
46 | #include <asm/lmb.h> | ||
47 | #include <asm/rtas.h> | ||
48 | #include <asm/io.h> | ||
49 | #include <asm/mmu_context.h> | ||
50 | #include <asm/pgtable.h> | ||
51 | #include <asm/mmu.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/smp.h> | ||
54 | #include <asm/machdep.h> | ||
55 | #include <asm/tlb.h> | ||
56 | #include <asm/eeh.h> | ||
57 | #include <asm/processor.h> | ||
58 | #include <asm/mmzone.h> | ||
59 | #include <asm/cputable.h> | ||
60 | #include <asm/ppcdebug.h> | ||
61 | #include <asm/sections.h> | ||
62 | #include <asm/system.h> | ||
63 | #include <asm/iommu.h> | ||
64 | #include <asm/abs_addr.h> | ||
65 | #include <asm/vdso.h> | ||
66 | #include <asm/imalloc.h> | ||
67 | |||
68 | #if PGTABLE_RANGE > USER_VSID_RANGE | ||
69 | #warning Limited user VSID range means pagetable space is wasted | ||
70 | #endif | ||
71 | |||
72 | #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) | ||
73 | #warning TASK_SIZE is smaller than it needs to be. | ||
74 | #endif | ||
75 | |||
76 | int mem_init_done; | ||
77 | unsigned long ioremap_bot = IMALLOC_BASE; | ||
78 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | ||
79 | |||
80 | extern pgd_t swapper_pg_dir[]; | ||
81 | extern struct task_struct *current_set[NR_CPUS]; | ||
82 | |||
83 | unsigned long klimit = (unsigned long)_end; | ||
84 | |||
85 | unsigned long _SDR1=0; | ||
86 | unsigned long _ASR=0; | ||
87 | |||
88 | /* max amount of RAM to use */ | ||
89 | unsigned long __max_memory; | ||
90 | |||
91 | /* info on what we think the IO hole is */ | ||
92 | unsigned long io_hole_start; | ||
93 | unsigned long io_hole_size; | ||
94 | |||
95 | void show_mem(void) | ||
96 | { | ||
97 | unsigned long total = 0, reserved = 0; | ||
98 | unsigned long shared = 0, cached = 0; | ||
99 | struct page *page; | ||
100 | pg_data_t *pgdat; | ||
101 | unsigned long i; | ||
102 | |||
103 | printk("Mem-info:\n"); | ||
104 | show_free_areas(); | ||
105 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | ||
106 | for_each_pgdat(pgdat) { | ||
107 | unsigned long flags; | ||
108 | pgdat_resize_lock(pgdat, &flags); | ||
109 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
110 | page = pgdat_page_nr(pgdat, i); | ||
111 | total++; | ||
112 | if (PageReserved(page)) | ||
113 | reserved++; | ||
114 | else if (PageSwapCache(page)) | ||
115 | cached++; | ||
116 | else if (page_count(page)) | ||
117 | shared += page_count(page) - 1; | ||
118 | } | ||
119 | pgdat_resize_unlock(pgdat, &flags); | ||
120 | } | ||
121 | printk("%ld pages of RAM\n", total); | ||
122 | printk("%ld reserved pages\n", reserved); | ||
123 | printk("%ld pages shared\n", shared); | ||
124 | printk("%ld pages swap cached\n", cached); | ||
125 | } | ||
126 | |||
127 | #ifdef CONFIG_PPC_ISERIES | ||
128 | |||
129 | void __iomem *ioremap(unsigned long addr, unsigned long size) | ||
130 | { | ||
131 | return (void __iomem *)addr; | ||
132 | } | ||
133 | |||
134 | extern void __iomem *__ioremap(unsigned long addr, unsigned long size, | ||
135 | unsigned long flags) | ||
136 | { | ||
137 | return (void __iomem *)addr; | ||
138 | } | ||
139 | |||
140 | void iounmap(volatile void __iomem *addr) | ||
141 | { | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | #else | ||
146 | |||
147 | /* | ||
148 | * map_io_page currently only called by __ioremap | ||
149 | * map_io_page adds an entry to the ioremap page table | ||
150 | * and adds an entry to the HPT, possibly bolting it | ||
151 | */ | ||
152 | static int map_io_page(unsigned long ea, unsigned long pa, int flags) | ||
153 | { | ||
154 | pgd_t *pgdp; | ||
155 | pud_t *pudp; | ||
156 | pmd_t *pmdp; | ||
157 | pte_t *ptep; | ||
158 | unsigned long vsid; | ||
159 | |||
160 | if (mem_init_done) { | ||
161 | pgdp = pgd_offset_k(ea); | ||
162 | pudp = pud_alloc(&init_mm, pgdp, ea); | ||
163 | if (!pudp) | ||
164 | return -ENOMEM; | ||
165 | pmdp = pmd_alloc(&init_mm, pudp, ea); | ||
166 | if (!pmdp) | ||
167 | return -ENOMEM; | ||
168 | ptep = pte_alloc_kernel(pmdp, ea); | ||
169 | if (!ptep) | ||
170 | return -ENOMEM; | ||
171 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, | ||
172 | __pgprot(flags))); | ||
173 | } else { | ||
174 | unsigned long va, vpn, hash, hpteg; | ||
175 | |||
176 | /* | ||
177 | * If the mm subsystem is not fully up, we cannot create a | ||
178 | * linux page table entry for this mapping. Simply bolt an | ||
179 | * entry in the hardware page table. | ||
180 | */ | ||
181 | vsid = get_kernel_vsid(ea); | ||
182 | va = (vsid << 28) | (ea & 0xFFFFFFF); | ||
183 | vpn = va >> PAGE_SHIFT; | ||
184 | |||
185 | hash = hpt_hash(vpn, 0); | ||
186 | |||
187 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | ||
188 | |||
189 | /* Panic if a pte grpup is full */ | ||
190 | if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, | ||
191 | HPTE_V_BOLTED, | ||
192 | _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) | ||
193 | == -1) { | ||
194 | panic("map_io_page: could not insert mapping"); | ||
195 | } | ||
196 | } | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | |||
201 | static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, | ||
202 | unsigned long ea, unsigned long size, | ||
203 | unsigned long flags) | ||
204 | { | ||
205 | unsigned long i; | ||
206 | |||
207 | if ((flags & _PAGE_PRESENT) == 0) | ||
208 | flags |= pgprot_val(PAGE_KERNEL); | ||
209 | |||
210 | for (i = 0; i < size; i += PAGE_SIZE) | ||
211 | if (map_io_page(ea+i, pa+i, flags)) | ||
212 | return NULL; | ||
213 | |||
214 | return (void __iomem *) (ea + (addr & ~PAGE_MASK)); | ||
215 | } | ||
216 | |||
217 | |||
218 | void __iomem * | ||
219 | ioremap(unsigned long addr, unsigned long size) | ||
220 | { | ||
221 | return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); | ||
222 | } | ||
223 | |||
224 | void __iomem * __ioremap(unsigned long addr, unsigned long size, | ||
225 | unsigned long flags) | ||
226 | { | ||
227 | unsigned long pa, ea; | ||
228 | void __iomem *ret; | ||
229 | |||
230 | /* | ||
231 | * Choose an address to map it to. | ||
232 | * Once the imalloc system is running, we use it. | ||
233 | * Before that, we map using addresses going | ||
234 | * up from ioremap_bot. imalloc will use | ||
235 | * the addresses from ioremap_bot through | ||
236 | * IMALLOC_END | ||
237 | * | ||
238 | */ | ||
239 | pa = addr & PAGE_MASK; | ||
240 | size = PAGE_ALIGN(addr + size) - pa; | ||
241 | |||
242 | if (size == 0) | ||
243 | return NULL; | ||
244 | |||
245 | if (mem_init_done) { | ||
246 | struct vm_struct *area; | ||
247 | area = im_get_free_area(size); | ||
248 | if (area == NULL) | ||
249 | return NULL; | ||
250 | ea = (unsigned long)(area->addr); | ||
251 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
252 | if (!ret) | ||
253 | im_free(area->addr); | ||
254 | } else { | ||
255 | ea = ioremap_bot; | ||
256 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
257 | if (ret) | ||
258 | ioremap_bot += size; | ||
259 | } | ||
260 | return ret; | ||
261 | } | ||
262 | |||
263 | #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) | ||
264 | |||
265 | int __ioremap_explicit(unsigned long pa, unsigned long ea, | ||
266 | unsigned long size, unsigned long flags) | ||
267 | { | ||
268 | struct vm_struct *area; | ||
269 | void __iomem *ret; | ||
270 | |||
271 | /* For now, require page-aligned values for pa, ea, and size */ | ||
272 | if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || | ||
273 | !IS_PAGE_ALIGNED(size)) { | ||
274 | printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); | ||
275 | return 1; | ||
276 | } | ||
277 | |||
278 | if (!mem_init_done) { | ||
279 | /* Two things to consider in this case: | ||
280 | * 1) No records will be kept (imalloc, etc) that the region | ||
281 | * has been remapped | ||
282 | * 2) It won't be easy to iounmap() the region later (because | ||
283 | * of 1) | ||
284 | */ | ||
285 | ; | ||
286 | } else { | ||
287 | area = im_get_area(ea, size, | ||
288 | IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); | ||
289 | if (area == NULL) { | ||
290 | /* Expected when PHB-dlpar is in play */ | ||
291 | return 1; | ||
292 | } | ||
293 | if (ea != (unsigned long) area->addr) { | ||
294 | printk(KERN_ERR "unexpected addr return from " | ||
295 | "im_get_area\n"); | ||
296 | return 1; | ||
297 | } | ||
298 | } | ||
299 | |||
300 | ret = __ioremap_com(pa, pa, ea, size, flags); | ||
301 | if (ret == NULL) { | ||
302 | printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); | ||
303 | return 1; | ||
304 | } | ||
305 | if (ret != (void *) ea) { | ||
306 | printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); | ||
307 | return 1; | ||
308 | } | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Unmap an IO region and remove it from imalloc'd list. | ||
315 | * Access to IO memory should be serialized by driver. | ||
316 | * This code is modeled after vmalloc code - unmap_vm_area() | ||
317 | * | ||
318 | * XXX what about calls before mem_init_done (ie python_countermeasures()) | ||
319 | */ | ||
320 | void iounmap(volatile void __iomem *token) | ||
321 | { | ||
322 | void *addr; | ||
323 | |||
324 | if (!mem_init_done) | ||
325 | return; | ||
326 | |||
327 | addr = (void *) ((unsigned long __force) token & PAGE_MASK); | ||
328 | |||
329 | im_free(addr); | ||
330 | } | ||
331 | |||
332 | static int iounmap_subset_regions(unsigned long addr, unsigned long size) | ||
333 | { | ||
334 | struct vm_struct *area; | ||
335 | |||
336 | /* Check whether subsets of this region exist */ | ||
337 | area = im_get_area(addr, size, IM_REGION_SUPERSET); | ||
338 | if (area == NULL) | ||
339 | return 1; | ||
340 | |||
341 | while (area) { | ||
342 | iounmap((void __iomem *) area->addr); | ||
343 | area = im_get_area(addr, size, | ||
344 | IM_REGION_SUPERSET); | ||
345 | } | ||
346 | |||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | int iounmap_explicit(volatile void __iomem *start, unsigned long size) | ||
351 | { | ||
352 | struct vm_struct *area; | ||
353 | unsigned long addr; | ||
354 | int rc; | ||
355 | |||
356 | addr = (unsigned long __force) start & PAGE_MASK; | ||
357 | |||
358 | /* Verify that the region either exists or is a subset of an existing | ||
359 | * region. In the latter case, split the parent region to create | ||
360 | * the exact region | ||
361 | */ | ||
362 | area = im_get_area(addr, size, | ||
363 | IM_REGION_EXISTS | IM_REGION_SUBSET); | ||
364 | if (area == NULL) { | ||
365 | /* Determine whether subset regions exist. If so, unmap */ | ||
366 | rc = iounmap_subset_regions(addr, size); | ||
367 | if (rc) { | ||
368 | printk(KERN_ERR | ||
369 | "%s() cannot unmap nonexistent range 0x%lx\n", | ||
370 | __FUNCTION__, addr); | ||
371 | return 1; | ||
372 | } | ||
373 | } else { | ||
374 | iounmap((void __iomem *) area->addr); | ||
375 | } | ||
376 | /* | ||
377 | * FIXME! This can't be right: | ||
378 | iounmap(area->addr); | ||
379 | * Maybe it should be "iounmap(area);" | ||
380 | */ | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | #endif | ||
385 | |||
386 | EXPORT_SYMBOL(ioremap); | ||
387 | EXPORT_SYMBOL(__ioremap); | ||
388 | EXPORT_SYMBOL(iounmap); | ||
389 | |||
390 | void free_initmem(void) | ||
391 | { | ||
392 | unsigned long addr; | ||
393 | |||
394 | addr = (unsigned long)__init_begin; | ||
395 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | ||
396 | memset((void *)addr, 0xcc, PAGE_SIZE); | ||
397 | ClearPageReserved(virt_to_page(addr)); | ||
398 | set_page_count(virt_to_page(addr), 1); | ||
399 | free_page(addr); | ||
400 | totalram_pages++; | ||
401 | } | ||
402 | printk ("Freeing unused kernel memory: %luk freed\n", | ||
403 | ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); | ||
404 | } | ||
405 | |||
406 | #ifdef CONFIG_BLK_DEV_INITRD | ||
407 | void free_initrd_mem(unsigned long start, unsigned long end) | ||
408 | { | ||
409 | if (start < end) | ||
410 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | ||
411 | for (; start < end; start += PAGE_SIZE) { | ||
412 | ClearPageReserved(virt_to_page(start)); | ||
413 | set_page_count(virt_to_page(start), 1); | ||
414 | free_page(start); | ||
415 | totalram_pages++; | ||
416 | } | ||
417 | } | ||
418 | #endif | ||
419 | |||
420 | static DEFINE_SPINLOCK(mmu_context_lock); | ||
421 | static DEFINE_IDR(mmu_context_idr); | ||
422 | |||
423 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
424 | { | ||
425 | int index; | ||
426 | int err; | ||
427 | |||
428 | again: | ||
429 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | ||
430 | return -ENOMEM; | ||
431 | |||
432 | spin_lock(&mmu_context_lock); | ||
433 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | ||
434 | spin_unlock(&mmu_context_lock); | ||
435 | |||
436 | if (err == -EAGAIN) | ||
437 | goto again; | ||
438 | else if (err) | ||
439 | return err; | ||
440 | |||
441 | if (index > MAX_CONTEXT) { | ||
442 | idr_remove(&mmu_context_idr, index); | ||
443 | return -ENOMEM; | ||
444 | } | ||
445 | |||
446 | mm->context.id = index; | ||
447 | |||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | void destroy_context(struct mm_struct *mm) | ||
452 | { | ||
453 | spin_lock(&mmu_context_lock); | ||
454 | idr_remove(&mmu_context_idr, mm->context.id); | ||
455 | spin_unlock(&mmu_context_lock); | ||
456 | |||
457 | mm->context.id = NO_CONTEXT; | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Do very early mm setup. | ||
462 | */ | ||
463 | void __init mm_init_ppc64(void) | ||
464 | { | ||
465 | #ifndef CONFIG_PPC_ISERIES | ||
466 | unsigned long i; | ||
467 | #endif | ||
468 | |||
469 | ppc64_boot_msg(0x100, "MM Init"); | ||
470 | |||
471 | /* This is the story of the IO hole... please, keep seated, | ||
472 | * unfortunately, we are out of oxygen masks at the moment. | ||
473 | * So we need some rough way to tell where your big IO hole | ||
474 | * is. On pmac, it's between 2G and 4G, on POWER3, it's around | ||
475 | * that area as well, on POWER4 we don't have one, etc... | ||
476 | * We need that as a "hint" when sizing the TCE table on POWER3 | ||
477 | * So far, the simplest way that seem work well enough for us it | ||
478 | * to just assume that the first discontinuity in our physical | ||
479 | * RAM layout is the IO hole. That may not be correct in the future | ||
480 | * (and isn't on iSeries but then we don't care ;) | ||
481 | */ | ||
482 | |||
483 | #ifndef CONFIG_PPC_ISERIES | ||
484 | for (i = 1; i < lmb.memory.cnt; i++) { | ||
485 | unsigned long base, prevbase, prevsize; | ||
486 | |||
487 | prevbase = lmb.memory.region[i-1].base; | ||
488 | prevsize = lmb.memory.region[i-1].size; | ||
489 | base = lmb.memory.region[i].base; | ||
490 | if (base > (prevbase + prevsize)) { | ||
491 | io_hole_start = prevbase + prevsize; | ||
492 | io_hole_size = base - (prevbase + prevsize); | ||
493 | break; | ||
494 | } | ||
495 | } | ||
496 | #endif /* CONFIG_PPC_ISERIES */ | ||
497 | if (io_hole_start) | ||
498 | printk("IO Hole assumed to be %lx -> %lx\n", | ||
499 | io_hole_start, io_hole_start + io_hole_size - 1); | ||
500 | |||
501 | ppc64_boot_msg(0x100, "MM Init Done"); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * This is called by /dev/mem to know if a given address has to | ||
506 | * be mapped non-cacheable or not | ||
507 | */ | ||
508 | int page_is_ram(unsigned long pfn) | ||
509 | { | ||
510 | int i; | ||
511 | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
512 | |||
513 | for (i=0; i < lmb.memory.cnt; i++) { | ||
514 | unsigned long base; | ||
515 | |||
516 | base = lmb.memory.region[i].base; | ||
517 | |||
518 | if ((paddr >= base) && | ||
519 | (paddr < (base + lmb.memory.region[i].size))) { | ||
520 | return 1; | ||
521 | } | ||
522 | } | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | EXPORT_SYMBOL(page_is_ram); | ||
527 | |||
528 | /* | ||
529 | * Initialize the bootmem system and give it all the memory we | ||
530 | * have available. | ||
531 | */ | ||
532 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
533 | void __init do_init_bootmem(void) | ||
534 | { | ||
535 | unsigned long i; | ||
536 | unsigned long start, bootmap_pages; | ||
537 | unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; | ||
538 | int boot_mapsize; | ||
539 | |||
540 | /* | ||
541 | * Find an area to use for the bootmem bitmap. Calculate the size of | ||
542 | * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. | ||
543 | * Add 1 additional page in case the address isn't page-aligned. | ||
544 | */ | ||
545 | bootmap_pages = bootmem_bootmap_pages(total_pages); | ||
546 | |||
547 | start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | ||
548 | BUG_ON(!start); | ||
549 | |||
550 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); | ||
551 | |||
552 | max_pfn = max_low_pfn; | ||
553 | |||
554 | /* Add all physical memory to the bootmem map, mark each area | ||
555 | * present. | ||
556 | */ | ||
557 | for (i=0; i < lmb.memory.cnt; i++) | ||
558 | free_bootmem(lmb.memory.region[i].base, | ||
559 | lmb_size_bytes(&lmb.memory, i)); | ||
560 | |||
561 | /* reserve the sections we're already using */ | ||
562 | for (i=0; i < lmb.reserved.cnt; i++) | ||
563 | reserve_bootmem(lmb.reserved.region[i].base, | ||
564 | lmb_size_bytes(&lmb.reserved, i)); | ||
565 | |||
566 | for (i=0; i < lmb.memory.cnt; i++) | ||
567 | memory_present(0, lmb_start_pfn(&lmb.memory, i), | ||
568 | lmb_end_pfn(&lmb.memory, i)); | ||
569 | } | ||
570 | |||
571 | /* | ||
572 | * paging_init() sets up the page tables - in fact we've already done this. | ||
573 | */ | ||
574 | void __init paging_init(void) | ||
575 | { | ||
576 | unsigned long zones_size[MAX_NR_ZONES]; | ||
577 | unsigned long zholes_size[MAX_NR_ZONES]; | ||
578 | unsigned long total_ram = lmb_phys_mem_size(); | ||
579 | unsigned long top_of_ram = lmb_end_of_DRAM(); | ||
580 | |||
581 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | ||
582 | top_of_ram, total_ram); | ||
583 | printk(KERN_INFO "Memory hole size: %ldMB\n", | ||
584 | (top_of_ram - total_ram) >> 20); | ||
585 | /* | ||
586 | * All pages are DMA-able so we put them all in the DMA zone. | ||
587 | */ | ||
588 | memset(zones_size, 0, sizeof(zones_size)); | ||
589 | memset(zholes_size, 0, sizeof(zholes_size)); | ||
590 | |||
591 | zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | ||
592 | zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; | ||
593 | |||
594 | free_area_init_node(0, NODE_DATA(0), zones_size, | ||
595 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); | ||
596 | } | ||
597 | #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ | ||
598 | |||
599 | static struct kcore_list kcore_vmem; | ||
600 | |||
601 | static int __init setup_kcore(void) | ||
602 | { | ||
603 | int i; | ||
604 | |||
605 | for (i=0; i < lmb.memory.cnt; i++) { | ||
606 | unsigned long base, size; | ||
607 | struct kcore_list *kcore_mem; | ||
608 | |||
609 | base = lmb.memory.region[i].base; | ||
610 | size = lmb.memory.region[i].size; | ||
611 | |||
612 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ | ||
613 | kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); | ||
614 | if (!kcore_mem) | ||
615 | panic("mem_init: kmalloc failed\n"); | ||
616 | |||
617 | kclist_add(kcore_mem, __va(base), size); | ||
618 | } | ||
619 | |||
620 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); | ||
621 | |||
622 | return 0; | ||
623 | } | ||
624 | module_init(setup_kcore); | ||
625 | |||
626 | void __init mem_init(void) | ||
627 | { | ||
628 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
629 | int nid; | ||
630 | #endif | ||
631 | pg_data_t *pgdat; | ||
632 | unsigned long i; | ||
633 | struct page *page; | ||
634 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | ||
635 | |||
636 | num_physpages = max_low_pfn; /* RAM is assumed contiguous */ | ||
637 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | ||
638 | |||
639 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
640 | for_each_online_node(nid) { | ||
641 | if (NODE_DATA(nid)->node_spanned_pages != 0) { | ||
642 | printk("freeing bootmem node %x\n", nid); | ||
643 | totalram_pages += | ||
644 | free_all_bootmem_node(NODE_DATA(nid)); | ||
645 | } | ||
646 | } | ||
647 | #else | ||
648 | max_mapnr = num_physpages; | ||
649 | totalram_pages += free_all_bootmem(); | ||
650 | #endif | ||
651 | |||
652 | for_each_pgdat(pgdat) { | ||
653 | unsigned long flags; | ||
654 | pgdat_resize_lock(pgdat, &flags); | ||
655 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
656 | page = pgdat_page_nr(pgdat, i); | ||
657 | if (PageReserved(page)) | ||
658 | reservedpages++; | ||
659 | } | ||
660 | pgdat_resize_unlock(pgdat, &flags); | ||
661 | } | ||
662 | |||
663 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; | ||
664 | initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; | ||
665 | datasize = (unsigned long)&_edata - (unsigned long)&__init_end; | ||
666 | bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; | ||
667 | |||
668 | printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " | ||
669 | "%luk reserved, %luk data, %luk bss, %luk init)\n", | ||
670 | (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), | ||
671 | num_physpages << (PAGE_SHIFT-10), | ||
672 | codesize >> 10, | ||
673 | reservedpages << (PAGE_SHIFT-10), | ||
674 | datasize >> 10, | ||
675 | bsssize >> 10, | ||
676 | initsize >> 10); | ||
677 | |||
678 | mem_init_done = 1; | ||
679 | |||
680 | /* Initialize the vDSO */ | ||
681 | vdso_init(); | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * This is called when a page has been modified by the kernel. | ||
686 | * It just marks the page as not i-cache clean. We do the i-cache | ||
687 | * flush later when the page is given to a user process, if necessary. | ||
688 | */ | ||
689 | void flush_dcache_page(struct page *page) | ||
690 | { | ||
691 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
692 | return; | ||
693 | /* avoid an atomic op if possible */ | ||
694 | if (test_bit(PG_arch_1, &page->flags)) | ||
695 | clear_bit(PG_arch_1, &page->flags); | ||
696 | } | ||
697 | EXPORT_SYMBOL(flush_dcache_page); | ||
698 | |||
699 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | ||
700 | { | ||
701 | clear_page(page); | ||
702 | |||
703 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
704 | return; | ||
705 | /* | ||
706 | * We shouldnt have to do this, but some versions of glibc | ||
707 | * require it (ld.so assumes zero filled pages are icache clean) | ||
708 | * - Anton | ||
709 | */ | ||
710 | |||
711 | /* avoid an atomic op if possible */ | ||
712 | if (test_bit(PG_arch_1, &pg->flags)) | ||
713 | clear_bit(PG_arch_1, &pg->flags); | ||
714 | } | ||
715 | EXPORT_SYMBOL(clear_user_page); | ||
716 | |||
717 | void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, | ||
718 | struct page *pg) | ||
719 | { | ||
720 | copy_page(vto, vfrom); | ||
721 | |||
722 | /* | ||
723 | * We should be able to use the following optimisation, however | ||
724 | * there are two problems. | ||
725 | * Firstly a bug in some versions of binutils meant PLT sections | ||
726 | * were not marked executable. | ||
727 | * Secondly the first word in the GOT section is blrl, used | ||
728 | * to establish the GOT address. Until recently the GOT was | ||
729 | * not marked executable. | ||
730 | * - Anton | ||
731 | */ | ||
732 | #if 0 | ||
733 | if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) | ||
734 | return; | ||
735 | #endif | ||
736 | |||
737 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
738 | return; | ||
739 | |||
740 | /* avoid an atomic op if possible */ | ||
741 | if (test_bit(PG_arch_1, &pg->flags)) | ||
742 | clear_bit(PG_arch_1, &pg->flags); | ||
743 | } | ||
744 | |||
745 | void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, | ||
746 | unsigned long addr, int len) | ||
747 | { | ||
748 | unsigned long maddr; | ||
749 | |||
750 | maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); | ||
751 | flush_icache_range(maddr, maddr + len); | ||
752 | } | ||
753 | EXPORT_SYMBOL(flush_icache_user_range); | ||
754 | |||
755 | /* | ||
756 | * This is called at the end of handling a user page fault, when the | ||
757 | * fault has been handled by updating a PTE in the linux page tables. | ||
758 | * We use it to preload an HPTE into the hash table corresponding to | ||
759 | * the updated linux PTE. | ||
760 | * | ||
761 | * This must always be called with the mm->page_table_lock held | ||
762 | */ | ||
763 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, | ||
764 | pte_t pte) | ||
765 | { | ||
766 | unsigned long vsid; | ||
767 | void *pgdir; | ||
768 | pte_t *ptep; | ||
769 | int local = 0; | ||
770 | cpumask_t tmp; | ||
771 | unsigned long flags; | ||
772 | |||
773 | /* handle i-cache coherency */ | ||
774 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && | ||
775 | !cpu_has_feature(CPU_FTR_NOEXECUTE)) { | ||
776 | unsigned long pfn = pte_pfn(pte); | ||
777 | if (pfn_valid(pfn)) { | ||
778 | struct page *page = pfn_to_page(pfn); | ||
779 | if (!PageReserved(page) | ||
780 | && !test_bit(PG_arch_1, &page->flags)) { | ||
781 | __flush_dcache_icache(page_address(page)); | ||
782 | set_bit(PG_arch_1, &page->flags); | ||
783 | } | ||
784 | } | ||
785 | } | ||
786 | |||
787 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | ||
788 | if (!pte_young(pte)) | ||
789 | return; | ||
790 | |||
791 | pgdir = vma->vm_mm->pgd; | ||
792 | if (pgdir == NULL) | ||
793 | return; | ||
794 | |||
795 | ptep = find_linux_pte(pgdir, ea); | ||
796 | if (!ptep) | ||
797 | return; | ||
798 | |||
799 | vsid = get_vsid(vma->vm_mm->context.id, ea); | ||
800 | |||
801 | local_irq_save(flags); | ||
802 | tmp = cpumask_of_cpu(smp_processor_id()); | ||
803 | if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) | ||
804 | local = 1; | ||
805 | |||
806 | __hash_page(ea, 0, vsid, ptep, 0x300, local); | ||
807 | local_irq_restore(flags); | ||
808 | } | ||
809 | |||
810 | void __iomem * reserve_phb_iospace(unsigned long size) | ||
811 | { | ||
812 | void __iomem *virt_addr; | ||
813 | |||
814 | if (phbs_io_bot >= IMALLOC_BASE) | ||
815 | panic("reserve_phb_iospace(): phb io space overflow\n"); | ||
816 | |||
817 | virt_addr = (void __iomem *) phbs_io_bot; | ||
818 | phbs_io_bot += size; | ||
819 | |||
820 | return virt_addr; | ||
821 | } | ||
822 | |||
823 | static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) | ||
824 | { | ||
825 | memset(addr, 0, kmem_cache_size(cache)); | ||
826 | } | ||
827 | |||
828 | static const int pgtable_cache_size[2] = { | ||
829 | PTE_TABLE_SIZE, PMD_TABLE_SIZE | ||
830 | }; | ||
831 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | ||
832 | "pgd_pte_cache", "pud_pmd_cache", | ||
833 | }; | ||
834 | |||
835 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | ||
836 | |||
837 | void pgtable_cache_init(void) | ||
838 | { | ||
839 | int i; | ||
840 | |||
841 | BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); | ||
842 | BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); | ||
843 | BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); | ||
844 | BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); | ||
845 | |||
846 | for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { | ||
847 | int size = pgtable_cache_size[i]; | ||
848 | const char *name = pgtable_cache_name[i]; | ||
849 | |||
850 | pgtable_cache[i] = kmem_cache_create(name, | ||
851 | size, size, | ||
852 | SLAB_HWCACHE_ALIGN | ||
853 | | SLAB_MUST_HWCACHE_ALIGN, | ||
854 | zero_ctor, | ||
855 | NULL); | ||
856 | if (! pgtable_cache[i]) | ||
857 | panic("pgtable_cache_init(): could not create %s!\n", | ||
858 | name); | ||
859 | } | ||
860 | } | ||
861 | |||
862 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | ||
863 | unsigned long size, pgprot_t vma_prot) | ||
864 | { | ||
865 | if (ppc_md.phys_mem_access_prot) | ||
866 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | ||
867 | |||
868 | if (!page_is_ram(addr >> PAGE_SHIFT)) | ||
869 | vma_prot = __pgprot(pgprot_val(vma_prot) | ||
870 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | ||
871 | return vma_prot; | ||
872 | } | ||
873 | EXPORT_SYMBOL(phys_mem_access_prot); | ||
874 | |||
875 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
876 | |||
877 | void online_page(struct page *page) | ||
878 | { | ||
879 | ClearPageReserved(page); | ||
880 | free_cold_page(page); | ||
881 | totalram_pages++; | ||
882 | num_physpages++; | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * This works only for the non-NUMA case. Later, we'll need a lookup | ||
887 | * to convert from real physical addresses to nid, that doesn't use | ||
888 | * pfn_to_nid(). | ||
889 | */ | ||
890 | int __devinit add_memory(u64 start, u64 size) | ||
891 | { | ||
892 | struct pglist_data *pgdata = NODE_DATA(0); | ||
893 | struct zone *zone; | ||
894 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
895 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
896 | |||
897 | /* this should work for most non-highmem platforms */ | ||
898 | zone = pgdata->node_zones; | ||
899 | |||
900 | return __add_pages(zone, start_pfn, nr_pages); | ||
901 | |||
902 | return 0; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * First pass at this code will check to determine if the remove | ||
907 | * request is within the RMO. Do not allow removal within the RMO. | ||
908 | */ | ||
909 | int __devinit remove_memory(u64 start, u64 size) | ||
910 | { | ||
911 | struct zone *zone; | ||
912 | unsigned long start_pfn, end_pfn, nr_pages; | ||
913 | |||
914 | start_pfn = start >> PAGE_SHIFT; | ||
915 | nr_pages = size >> PAGE_SHIFT; | ||
916 | end_pfn = start_pfn + nr_pages; | ||
917 | |||
918 | printk("%s(): Attempting to remove memoy in range " | ||
919 | "%lx to %lx\n", __func__, start, start+size); | ||
920 | /* | ||
921 | * check for range within RMO | ||
922 | */ | ||
923 | zone = page_zone(pfn_to_page(start_pfn)); | ||
924 | |||
925 | printk("%s(): memory will be removed from " | ||
926 | "the %s zone\n", __func__, zone->name); | ||
927 | |||
928 | /* | ||
929 | * not handling removing memory ranges that | ||
930 | * overlap multiple zones yet | ||
931 | */ | ||
932 | if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages)) | ||
933 | goto overlap; | ||
934 | |||
935 | /* make sure it is NOT in RMO */ | ||
936 | if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) { | ||
937 | printk("%s(): range to be removed must NOT be in RMO!\n", | ||
938 | __func__); | ||
939 | goto in_rmo; | ||
940 | } | ||
941 | |||
942 | return __remove_pages(zone, start_pfn, nr_pages); | ||
943 | |||
944 | overlap: | ||
945 | printk("%s(): memory range to be removed overlaps " | ||
946 | "multiple zones!!!\n", __func__); | ||
947 | in_rmo: | ||
948 | return -1; | ||
949 | } | ||
950 | #endif /* CONFIG_MEMORY_HOTPLUG */ | ||