diff options
Diffstat (limited to 'arch/x86_64/mm/init.c')
-rw-r--r-- | arch/x86_64/mm/init.c | 180 |
1 files changed, 134 insertions, 46 deletions
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index eca60125efc3..7af1742aa958 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
27 | #include <linux/module.h> | ||
28 | #include <linux/memory_hotplug.h> | ||
27 | 29 | ||
28 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
29 | #include <asm/system.h> | 31 | #include <asm/system.h> |
@@ -180,13 +182,19 @@ static struct temp_map { | |||
180 | {} | 182 | {} |
181 | }; | 183 | }; |
182 | 184 | ||
183 | static __init void *alloc_low_page(int *index, unsigned long *phys) | 185 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) |
184 | { | 186 | { |
185 | struct temp_map *ti; | 187 | struct temp_map *ti; |
186 | int i; | 188 | int i; |
187 | unsigned long pfn = table_end++, paddr; | 189 | unsigned long pfn = table_end++, paddr; |
188 | void *adr; | 190 | void *adr; |
189 | 191 | ||
192 | if (after_bootmem) { | ||
193 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | ||
194 | *phys = __pa(adr); | ||
195 | return adr; | ||
196 | } | ||
197 | |||
190 | if (pfn >= end_pfn) | 198 | if (pfn >= end_pfn) |
191 | panic("alloc_low_page: ran out of memory"); | 199 | panic("alloc_low_page: ran out of memory"); |
192 | for (i = 0; temp_mappings[i].allocated; i++) { | 200 | for (i = 0; temp_mappings[i].allocated; i++) { |
@@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys) | |||
199 | ti->allocated = 1; | 207 | ti->allocated = 1; |
200 | __flush_tlb(); | 208 | __flush_tlb(); |
201 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | 209 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); |
210 | memset(adr, 0, PAGE_SIZE); | ||
202 | *index = i; | 211 | *index = i; |
203 | *phys = pfn * PAGE_SIZE; | 212 | *phys = pfn * PAGE_SIZE; |
204 | return adr; | 213 | return adr; |
205 | } | 214 | } |
206 | 215 | ||
207 | static __init void unmap_low_page(int i) | 216 | static __meminit void unmap_low_page(int i) |
208 | { | 217 | { |
209 | struct temp_map *ti = &temp_mappings[i]; | 218 | struct temp_map *ti; |
219 | |||
220 | if (after_bootmem) | ||
221 | return; | ||
222 | |||
223 | ti = &temp_mappings[i]; | ||
210 | set_pmd(ti->pmd, __pmd(0)); | 224 | set_pmd(ti->pmd, __pmd(0)); |
211 | ti->allocated = 0; | 225 | ti->allocated = 0; |
212 | } | 226 | } |
213 | 227 | ||
214 | static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 228 | static void __meminit |
229 | phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) | ||
230 | { | ||
231 | int i; | ||
232 | |||
233 | for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { | ||
234 | unsigned long entry; | ||
235 | |||
236 | if (address > end) { | ||
237 | for (; i < PTRS_PER_PMD; i++, pmd++) | ||
238 | set_pmd(pmd, __pmd(0)); | ||
239 | break; | ||
240 | } | ||
241 | entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; | ||
242 | entry &= __supported_pte_mask; | ||
243 | set_pmd(pmd, __pmd(entry)); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static void __meminit | ||
248 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | ||
249 | { | ||
250 | pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); | ||
251 | |||
252 | if (pmd_none(*pmd)) { | ||
253 | spin_lock(&init_mm.page_table_lock); | ||
254 | phys_pmd_init(pmd, address, end); | ||
255 | spin_unlock(&init_mm.page_table_lock); | ||
256 | __flush_tlb_all(); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | ||
215 | { | 261 | { |
216 | long i, j; | 262 | long i = pud_index(address); |
217 | 263 | ||
218 | i = pud_index(address); | ||
219 | pud = pud + i; | 264 | pud = pud + i; |
265 | |||
266 | if (after_bootmem && pud_val(*pud)) { | ||
267 | phys_pmd_update(pud, address, end); | ||
268 | return; | ||
269 | } | ||
270 | |||
220 | for (; i < PTRS_PER_PUD; pud++, i++) { | 271 | for (; i < PTRS_PER_PUD; pud++, i++) { |
221 | int map; | 272 | int map; |
222 | unsigned long paddr, pmd_phys; | 273 | unsigned long paddr, pmd_phys; |
223 | pmd_t *pmd; | 274 | pmd_t *pmd; |
224 | 275 | ||
225 | paddr = address + i*PUD_SIZE; | 276 | paddr = (address & PGDIR_MASK) + i*PUD_SIZE; |
226 | if (paddr >= end) { | 277 | if (paddr >= end) |
227 | for (; i < PTRS_PER_PUD; i++, pud++) | ||
228 | set_pud(pud, __pud(0)); | ||
229 | break; | 278 | break; |
230 | } | ||
231 | 279 | ||
232 | if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { | 280 | if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { |
233 | set_pud(pud, __pud(0)); | 281 | set_pud(pud, __pud(0)); |
234 | continue; | 282 | continue; |
235 | } | 283 | } |
236 | 284 | ||
237 | pmd = alloc_low_page(&map, &pmd_phys); | 285 | pmd = alloc_low_page(&map, &pmd_phys); |
286 | spin_lock(&init_mm.page_table_lock); | ||
238 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 287 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
239 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | 288 | phys_pmd_init(pmd, paddr, end); |
240 | unsigned long pe; | 289 | spin_unlock(&init_mm.page_table_lock); |
241 | |||
242 | if (paddr >= end) { | ||
243 | for (; j < PTRS_PER_PMD; j++, pmd++) | ||
244 | set_pmd(pmd, __pmd(0)); | ||
245 | break; | ||
246 | } | ||
247 | pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr; | ||
248 | pe &= __supported_pte_mask; | ||
249 | set_pmd(pmd, __pmd(pe)); | ||
250 | } | ||
251 | unmap_low_page(map); | 290 | unmap_low_page(map); |
252 | } | 291 | } |
253 | __flush_tlb(); | 292 | __flush_tlb(); |
@@ -262,30 +301,25 @@ static void __init find_early_table_space(unsigned long end) | |||
262 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + | 301 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + |
263 | round_up(pmds * sizeof(pmd_t), PAGE_SIZE); | 302 | round_up(pmds * sizeof(pmd_t), PAGE_SIZE); |
264 | 303 | ||
265 | /* Put page tables beyond the DMA zones if possible. | 304 | /* RED-PEN putting page tables only on node 0 could |
266 | RED-PEN might be better to spread them out more over | 305 | cause a hotspot and fill up ZONE_DMA. The page tables |
267 | memory to avoid hotspots */ | 306 | need roughly 0.5KB per GB. */ |
268 | if (end > MAX_DMA32_PFN<<PAGE_SHIFT) | 307 | start = 0x8000; |
269 | start = MAX_DMA32_PFN << PAGE_SHIFT; | 308 | table_start = find_e820_area(start, end, tables); |
270 | else if (end > MAX_DMA_PFN << PAGE_SHIFT) | ||
271 | start = MAX_DMA_PFN << PAGE_SHIFT; | ||
272 | else | ||
273 | start = 0x8000; | ||
274 | |||
275 | table_start = find_e820_area(start, end, tables); | ||
276 | if (table_start == -1) | ||
277 | table_start = find_e820_area(0x8000, end, tables); | ||
278 | if (table_start == -1UL) | 309 | if (table_start == -1UL) |
279 | panic("Cannot find space for the kernel page tables"); | 310 | panic("Cannot find space for the kernel page tables"); |
280 | 311 | ||
281 | table_start >>= PAGE_SHIFT; | 312 | table_start >>= PAGE_SHIFT; |
282 | table_end = table_start; | 313 | table_end = table_start; |
314 | |||
315 | early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
316 | end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); | ||
283 | } | 317 | } |
284 | 318 | ||
285 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. | 319 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. |
286 | This runs before bootmem is initialized and gets pages directly from the | 320 | This runs before bootmem is initialized and gets pages directly from the |
287 | physical memory. To access them they are temporarily mapped. */ | 321 | physical memory. To access them they are temporarily mapped. */ |
288 | void __init init_memory_mapping(unsigned long start, unsigned long end) | 322 | void __meminit init_memory_mapping(unsigned long start, unsigned long end) |
289 | { | 323 | { |
290 | unsigned long next; | 324 | unsigned long next; |
291 | 325 | ||
@@ -297,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
297 | * mapped. Unfortunately this is done currently before the nodes are | 331 | * mapped. Unfortunately this is done currently before the nodes are |
298 | * discovered. | 332 | * discovered. |
299 | */ | 333 | */ |
300 | find_early_table_space(end); | 334 | if (!after_bootmem) |
335 | find_early_table_space(end); | ||
301 | 336 | ||
302 | start = (unsigned long)__va(start); | 337 | start = (unsigned long)__va(start); |
303 | end = (unsigned long)__va(end); | 338 | end = (unsigned long)__va(end); |
@@ -305,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
305 | for (; start < end; start = next) { | 340 | for (; start < end; start = next) { |
306 | int map; | 341 | int map; |
307 | unsigned long pud_phys; | 342 | unsigned long pud_phys; |
308 | pud_t *pud = alloc_low_page(&map, &pud_phys); | 343 | pgd_t *pgd = pgd_offset_k(start); |
344 | pud_t *pud; | ||
345 | |||
346 | if (after_bootmem) | ||
347 | pud = pud_offset_k(pgd, __PAGE_OFFSET); | ||
348 | else | ||
349 | pud = alloc_low_page(&map, &pud_phys); | ||
350 | |||
309 | next = start + PGDIR_SIZE; | 351 | next = start + PGDIR_SIZE; |
310 | if (next > end) | 352 | if (next > end) |
311 | next = end; | 353 | next = end; |
312 | phys_pud_init(pud, __pa(start), __pa(next)); | 354 | phys_pud_init(pud, __pa(start), __pa(next)); |
313 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 355 | if (!after_bootmem) |
356 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | ||
314 | unmap_low_page(map); | 357 | unmap_low_page(map); |
315 | } | 358 | } |
316 | 359 | ||
317 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | 360 | if (!after_bootmem) |
361 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | ||
318 | __flush_tlb_all(); | 362 | __flush_tlb_all(); |
319 | early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, | ||
320 | table_start<<PAGE_SHIFT, | ||
321 | table_end<<PAGE_SHIFT); | ||
322 | } | 363 | } |
323 | 364 | ||
324 | void __cpuinit zap_low_mappings(int cpu) | 365 | void __cpuinit zap_low_mappings(int cpu) |
@@ -393,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h, | |||
393 | void __init paging_init(void) | 434 | void __init paging_init(void) |
394 | { | 435 | { |
395 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; | 436 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; |
437 | |||
438 | memory_present(0, 0, end_pfn); | ||
439 | sparse_init(); | ||
396 | size_zones(zones, holes, 0, end_pfn); | 440 | size_zones(zones, holes, 0, end_pfn); |
397 | free_area_init_node(0, NODE_DATA(0), zones, | 441 | free_area_init_node(0, NODE_DATA(0), zones, |
398 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); | 442 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); |
@@ -433,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
433 | __flush_tlb_all(); | 477 | __flush_tlb_all(); |
434 | } | 478 | } |
435 | 479 | ||
480 | /* | ||
481 | * Memory hotplug specific functions | ||
482 | * These are only for non-NUMA machines right now. | ||
483 | */ | ||
484 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
485 | |||
486 | void online_page(struct page *page) | ||
487 | { | ||
488 | ClearPageReserved(page); | ||
489 | set_page_count(page, 1); | ||
490 | __free_page(page); | ||
491 | totalram_pages++; | ||
492 | num_physpages++; | ||
493 | } | ||
494 | |||
495 | int add_memory(u64 start, u64 size) | ||
496 | { | ||
497 | struct pglist_data *pgdat = NODE_DATA(0); | ||
498 | struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; | ||
499 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
500 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
501 | int ret; | ||
502 | |||
503 | ret = __add_pages(zone, start_pfn, nr_pages); | ||
504 | if (ret) | ||
505 | goto error; | ||
506 | |||
507 | init_memory_mapping(start, (start + size -1)); | ||
508 | |||
509 | return ret; | ||
510 | error: | ||
511 | printk("%s: Problem encountered in __add_pages!\n", __func__); | ||
512 | return ret; | ||
513 | } | ||
514 | EXPORT_SYMBOL_GPL(add_memory); | ||
515 | |||
516 | int remove_memory(u64 start, u64 size) | ||
517 | { | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | EXPORT_SYMBOL_GPL(remove_memory); | ||
521 | |||
522 | #endif | ||
523 | |||
436 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, | 524 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, |
437 | kcore_vsyscall; | 525 | kcore_vsyscall; |
438 | 526 | ||
@@ -539,7 +627,7 @@ void mark_rodata_ro(void) | |||
539 | #ifdef CONFIG_BLK_DEV_INITRD | 627 | #ifdef CONFIG_BLK_DEV_INITRD |
540 | void free_initrd_mem(unsigned long start, unsigned long end) | 628 | void free_initrd_mem(unsigned long start, unsigned long end) |
541 | { | 629 | { |
542 | if (start < (unsigned long)&_end) | 630 | if (start >= end) |
543 | return; | 631 | return; |
544 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
545 | for (; start < end; start += PAGE_SIZE) { | 633 | for (; start < end; start += PAGE_SIZE) { |