diff options
author | Matt Tolentino <metolent@cs.vt.edu> | 2006-01-17 01:03:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-17 02:18:35 -0500 |
commit | 44df75e629106efcada087cead6c3f33ed6bcc60 (patch) | |
tree | 34f7c85748bf1b77c3568d4d1cc0d3d0195abcc7 /arch | |
parent | 8817210d4d968e58f7d93db2eecd17b20dd03d40 (diff) |
[PATCH] x86_64: add x86-64 support for memory hot-add
Add x86-64 specific memory hot-add functions, Kconfig options,
and runtime kernel page table update functions to make
hot-add usable on x86-64 machines. Also, fixup the nefarious
conditional locking and exports pointed out by Andi.
Tested on Intel and IBM x86-64 memory hot-add capable systems.
Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86_64/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 160 |
2 files changed, 134 insertions, 33 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 787feff0e8a5..2f9deca31cc9 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -305,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT | |||
305 | 305 | ||
306 | config ARCH_SPARSEMEM_ENABLE | 306 | config ARCH_SPARSEMEM_ENABLE |
307 | def_bool y | 307 | def_bool y |
308 | depends on NUMA | 308 | depends on (NUMA || EXPERIMENTAL) |
309 | |||
310 | config ARCH_MEMORY_PROBE | ||
311 | def_bool y | ||
312 | depends on MEMORY_HOTPLUG | ||
309 | 313 | ||
310 | config ARCH_FLATMEM_ENABLE | 314 | config ARCH_FLATMEM_ENABLE |
311 | def_bool y | 315 | def_bool y |
@@ -315,6 +319,7 @@ source "mm/Kconfig" | |||
315 | 319 | ||
316 | config HAVE_ARCH_EARLY_PFN_TO_NID | 320 | config HAVE_ARCH_EARLY_PFN_TO_NID |
317 | def_bool y | 321 | def_bool y |
322 | depends on NUMA | ||
318 | 323 | ||
319 | config NR_CPUS | 324 | config NR_CPUS |
320 | int "Maximum number of CPUs (2-256)" | 325 | int "Maximum number of CPUs (2-256)" |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 307d1ac0ef33..7af1742aa958 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
27 | #include <linux/module.h> | ||
28 | #include <linux/memory_hotplug.h> | ||
27 | 29 | ||
28 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
29 | #include <asm/system.h> | 31 | #include <asm/system.h> |
@@ -180,13 +182,19 @@ static struct temp_map { | |||
180 | {} | 182 | {} |
181 | }; | 183 | }; |
182 | 184 | ||
183 | static __init void *alloc_low_page(int *index, unsigned long *phys) | 185 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) |
184 | { | 186 | { |
185 | struct temp_map *ti; | 187 | struct temp_map *ti; |
186 | int i; | 188 | int i; |
187 | unsigned long pfn = table_end++, paddr; | 189 | unsigned long pfn = table_end++, paddr; |
188 | void *adr; | 190 | void *adr; |
189 | 191 | ||
192 | if (after_bootmem) { | ||
193 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | ||
194 | *phys = __pa(adr); | ||
195 | return adr; | ||
196 | } | ||
197 | |||
190 | if (pfn >= end_pfn) | 198 | if (pfn >= end_pfn) |
191 | panic("alloc_low_page: ran out of memory"); | 199 | panic("alloc_low_page: ran out of memory"); |
192 | for (i = 0; temp_mappings[i].allocated; i++) { | 200 | for (i = 0; temp_mappings[i].allocated; i++) { |
@@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys) | |||
199 | ti->allocated = 1; | 207 | ti->allocated = 1; |
200 | __flush_tlb(); | 208 | __flush_tlb(); |
201 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | 209 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); |
210 | memset(adr, 0, PAGE_SIZE); | ||
202 | *index = i; | 211 | *index = i; |
203 | *phys = pfn * PAGE_SIZE; | 212 | *phys = pfn * PAGE_SIZE; |
204 | return adr; | 213 | return adr; |
205 | } | 214 | } |
206 | 215 | ||
207 | static __init void unmap_low_page(int i) | 216 | static __meminit void unmap_low_page(int i) |
208 | { | 217 | { |
209 | struct temp_map *ti = &temp_mappings[i]; | 218 | struct temp_map *ti; |
219 | |||
220 | if (after_bootmem) | ||
221 | return; | ||
222 | |||
223 | ti = &temp_mappings[i]; | ||
210 | set_pmd(ti->pmd, __pmd(0)); | 224 | set_pmd(ti->pmd, __pmd(0)); |
211 | ti->allocated = 0; | 225 | ti->allocated = 0; |
212 | } | 226 | } |
213 | 227 | ||
214 | static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 228 | static void __meminit |
229 | phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) | ||
230 | { | ||
231 | int i; | ||
232 | |||
233 | for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { | ||
234 | unsigned long entry; | ||
235 | |||
236 | if (address > end) { | ||
237 | for (; i < PTRS_PER_PMD; i++, pmd++) | ||
238 | set_pmd(pmd, __pmd(0)); | ||
239 | break; | ||
240 | } | ||
241 | entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; | ||
242 | entry &= __supported_pte_mask; | ||
243 | set_pmd(pmd, __pmd(entry)); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static void __meminit | ||
248 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | ||
249 | { | ||
250 | pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); | ||
251 | |||
252 | if (pmd_none(*pmd)) { | ||
253 | spin_lock(&init_mm.page_table_lock); | ||
254 | phys_pmd_init(pmd, address, end); | ||
255 | spin_unlock(&init_mm.page_table_lock); | ||
256 | __flush_tlb_all(); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | ||
215 | { | 261 | { |
216 | long i, j; | 262 | long i = pud_index(address); |
217 | 263 | ||
218 | i = pud_index(address); | ||
219 | pud = pud + i; | 264 | pud = pud + i; |
265 | |||
266 | if (after_bootmem && pud_val(*pud)) { | ||
267 | phys_pmd_update(pud, address, end); | ||
268 | return; | ||
269 | } | ||
270 | |||
220 | for (; i < PTRS_PER_PUD; pud++, i++) { | 271 | for (; i < PTRS_PER_PUD; pud++, i++) { |
221 | int map; | 272 | int map; |
222 | unsigned long paddr, pmd_phys; | 273 | unsigned long paddr, pmd_phys; |
223 | pmd_t *pmd; | 274 | pmd_t *pmd; |
224 | 275 | ||
225 | paddr = address + i*PUD_SIZE; | 276 | paddr = (address & PGDIR_MASK) + i*PUD_SIZE; |
226 | if (paddr >= end) { | 277 | if (paddr >= end) |
227 | for (; i < PTRS_PER_PUD; i++, pud++) | ||
228 | set_pud(pud, __pud(0)); | ||
229 | break; | 278 | break; |
230 | } | ||
231 | 279 | ||
232 | if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { | 280 | if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { |
233 | set_pud(pud, __pud(0)); | 281 | set_pud(pud, __pud(0)); |
234 | continue; | 282 | continue; |
235 | } | 283 | } |
236 | 284 | ||
237 | pmd = alloc_low_page(&map, &pmd_phys); | 285 | pmd = alloc_low_page(&map, &pmd_phys); |
286 | spin_lock(&init_mm.page_table_lock); | ||
238 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 287 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
239 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | 288 | phys_pmd_init(pmd, paddr, end); |
240 | unsigned long pe; | 289 | spin_unlock(&init_mm.page_table_lock); |
241 | |||
242 | if (paddr >= end) { | ||
243 | for (; j < PTRS_PER_PMD; j++, pmd++) | ||
244 | set_pmd(pmd, __pmd(0)); | ||
245 | break; | ||
246 | } | ||
247 | pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr; | ||
248 | pe &= __supported_pte_mask; | ||
249 | set_pmd(pmd, __pmd(pe)); | ||
250 | } | ||
251 | unmap_low_page(map); | 290 | unmap_low_page(map); |
252 | } | 291 | } |
253 | __flush_tlb(); | 292 | __flush_tlb(); |
@@ -272,12 +311,15 @@ static void __init find_early_table_space(unsigned long end) | |||
272 | 311 | ||
273 | table_start >>= PAGE_SHIFT; | 312 | table_start >>= PAGE_SHIFT; |
274 | table_end = table_start; | 313 | table_end = table_start; |
314 | |||
315 | early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
316 | end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); | ||
275 | } | 317 | } |
276 | 318 | ||
277 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. | 319 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. |
278 | This runs before bootmem is initialized and gets pages directly from the | 320 | This runs before bootmem is initialized and gets pages directly from the |
279 | physical memory. To access them they are temporarily mapped. */ | 321 | physical memory. To access them they are temporarily mapped. */ |
280 | void __init init_memory_mapping(unsigned long start, unsigned long end) | 322 | void __meminit init_memory_mapping(unsigned long start, unsigned long end) |
281 | { | 323 | { |
282 | unsigned long next; | 324 | unsigned long next; |
283 | 325 | ||
@@ -289,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
289 | * mapped. Unfortunately this is done currently before the nodes are | 331 | * mapped. Unfortunately this is done currently before the nodes are |
290 | * discovered. | 332 | * discovered. |
291 | */ | 333 | */ |
292 | find_early_table_space(end); | 334 | if (!after_bootmem) |
335 | find_early_table_space(end); | ||
293 | 336 | ||
294 | start = (unsigned long)__va(start); | 337 | start = (unsigned long)__va(start); |
295 | end = (unsigned long)__va(end); | 338 | end = (unsigned long)__va(end); |
@@ -297,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
297 | for (; start < end; start = next) { | 340 | for (; start < end; start = next) { |
298 | int map; | 341 | int map; |
299 | unsigned long pud_phys; | 342 | unsigned long pud_phys; |
300 | pud_t *pud = alloc_low_page(&map, &pud_phys); | 343 | pgd_t *pgd = pgd_offset_k(start); |
344 | pud_t *pud; | ||
345 | |||
346 | if (after_bootmem) | ||
347 | pud = pud_offset_k(pgd, __PAGE_OFFSET); | ||
348 | else | ||
349 | pud = alloc_low_page(&map, &pud_phys); | ||
350 | |||
301 | next = start + PGDIR_SIZE; | 351 | next = start + PGDIR_SIZE; |
302 | if (next > end) | 352 | if (next > end) |
303 | next = end; | 353 | next = end; |
304 | phys_pud_init(pud, __pa(start), __pa(next)); | 354 | phys_pud_init(pud, __pa(start), __pa(next)); |
305 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 355 | if (!after_bootmem) |
356 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | ||
306 | unmap_low_page(map); | 357 | unmap_low_page(map); |
307 | } | 358 | } |
308 | 359 | ||
309 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | 360 | if (!after_bootmem) |
361 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | ||
310 | __flush_tlb_all(); | 362 | __flush_tlb_all(); |
311 | early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, | ||
312 | table_start<<PAGE_SHIFT, | ||
313 | table_end<<PAGE_SHIFT); | ||
314 | } | 363 | } |
315 | 364 | ||
316 | void __cpuinit zap_low_mappings(int cpu) | 365 | void __cpuinit zap_low_mappings(int cpu) |
@@ -385,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h, | |||
385 | void __init paging_init(void) | 434 | void __init paging_init(void) |
386 | { | 435 | { |
387 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; | 436 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; |
437 | |||
438 | memory_present(0, 0, end_pfn); | ||
439 | sparse_init(); | ||
388 | size_zones(zones, holes, 0, end_pfn); | 440 | size_zones(zones, holes, 0, end_pfn); |
389 | free_area_init_node(0, NODE_DATA(0), zones, | 441 | free_area_init_node(0, NODE_DATA(0), zones, |
390 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); | 442 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); |
@@ -425,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
425 | __flush_tlb_all(); | 477 | __flush_tlb_all(); |
426 | } | 478 | } |
427 | 479 | ||
480 | /* | ||
481 | * Memory hotplug specific functions | ||
482 | * These are only for non-NUMA machines right now. | ||
483 | */ | ||
484 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
485 | |||
486 | void online_page(struct page *page) | ||
487 | { | ||
488 | ClearPageReserved(page); | ||
489 | set_page_count(page, 1); | ||
490 | __free_page(page); | ||
491 | totalram_pages++; | ||
492 | num_physpages++; | ||
493 | } | ||
494 | |||
495 | int add_memory(u64 start, u64 size) | ||
496 | { | ||
497 | struct pglist_data *pgdat = NODE_DATA(0); | ||
498 | struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; | ||
499 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
500 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
501 | int ret; | ||
502 | |||
503 | ret = __add_pages(zone, start_pfn, nr_pages); | ||
504 | if (ret) | ||
505 | goto error; | ||
506 | |||
507 | init_memory_mapping(start, (start + size -1)); | ||
508 | |||
509 | return ret; | ||
510 | error: | ||
511 | printk("%s: Problem encountered in __add_pages!\n", __func__); | ||
512 | return ret; | ||
513 | } | ||
514 | EXPORT_SYMBOL_GPL(add_memory); | ||
515 | |||
516 | int remove_memory(u64 start, u64 size) | ||
517 | { | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | EXPORT_SYMBOL_GPL(remove_memory); | ||
521 | |||
522 | #endif | ||
523 | |||
428 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, | 524 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, |
429 | kcore_vsyscall; | 525 | kcore_vsyscall; |
430 | 526 | ||