aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorMatt Tolentino <metolent@cs.vt.edu>2006-01-17 01:03:41 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-17 02:18:35 -0500
commit44df75e629106efcada087cead6c3f33ed6bcc60 (patch)
tree34f7c85748bf1b77c3568d4d1cc0d3d0195abcc7 /arch/x86_64
parent8817210d4d968e58f7d93db2eecd17b20dd03d40 (diff)
[PATCH] x86_64: add x86-64 support for memory hot-add
Add x86-64 specific memory hot-add functions, Kconfig options, and runtime kernel page table update functions to make hot-add usable on x86-64 machines. Also, fixup the nefarious conditional locking and exports pointed out by Andi. Tested on Intel and IBM x86-64 memory hot-add capable systems. Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig7
-rw-r--r--arch/x86_64/mm/init.c160
2 files changed, 134 insertions, 33 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 787feff0e8a5..2f9deca31cc9 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -305,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT
305 305
306config ARCH_SPARSEMEM_ENABLE 306config ARCH_SPARSEMEM_ENABLE
307 def_bool y 307 def_bool y
308 depends on NUMA 308 depends on (NUMA || EXPERIMENTAL)
309
310config ARCH_MEMORY_PROBE
311 def_bool y
312 depends on MEMORY_HOTPLUG
309 313
310config ARCH_FLATMEM_ENABLE 314config ARCH_FLATMEM_ENABLE
311 def_bool y 315 def_bool y
@@ -315,6 +319,7 @@ source "mm/Kconfig"
315 319
316config HAVE_ARCH_EARLY_PFN_TO_NID 320config HAVE_ARCH_EARLY_PFN_TO_NID
317 def_bool y 321 def_bool y
322 depends on NUMA
318 323
319config NR_CPUS 324config NR_CPUS
320 int "Maximum number of CPUs (2-256)" 325 int "Maximum number of CPUs (2-256)"
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 307d1ac0ef33..7af1742aa958 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -24,6 +24,8 @@
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
27#include <linux/module.h>
28#include <linux/memory_hotplug.h>
27 29
28#include <asm/processor.h> 30#include <asm/processor.h>
29#include <asm/system.h> 31#include <asm/system.h>
@@ -180,13 +182,19 @@ static struct temp_map {
180 {} 182 {}
181}; 183};
182 184
183static __init void *alloc_low_page(int *index, unsigned long *phys) 185static __meminit void *alloc_low_page(int *index, unsigned long *phys)
184{ 186{
185 struct temp_map *ti; 187 struct temp_map *ti;
186 int i; 188 int i;
187 unsigned long pfn = table_end++, paddr; 189 unsigned long pfn = table_end++, paddr;
188 void *adr; 190 void *adr;
189 191
192 if (after_bootmem) {
193 adr = (void *)get_zeroed_page(GFP_ATOMIC);
194 *phys = __pa(adr);
195 return adr;
196 }
197
190 if (pfn >= end_pfn) 198 if (pfn >= end_pfn)
191 panic("alloc_low_page: ran out of memory"); 199 panic("alloc_low_page: ran out of memory");
192 for (i = 0; temp_mappings[i].allocated; i++) { 200 for (i = 0; temp_mappings[i].allocated; i++) {
@@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys)
199 ti->allocated = 1; 207 ti->allocated = 1;
200 __flush_tlb(); 208 __flush_tlb();
201 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 209 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
210 memset(adr, 0, PAGE_SIZE);
202 *index = i; 211 *index = i;
203 *phys = pfn * PAGE_SIZE; 212 *phys = pfn * PAGE_SIZE;
204 return adr; 213 return adr;
205} 214}
206 215
207static __init void unmap_low_page(int i) 216static __meminit void unmap_low_page(int i)
208{ 217{
209 struct temp_map *ti = &temp_mappings[i]; 218 struct temp_map *ti;
219
220 if (after_bootmem)
221 return;
222
223 ti = &temp_mappings[i];
210 set_pmd(ti->pmd, __pmd(0)); 224 set_pmd(ti->pmd, __pmd(0));
211 ti->allocated = 0; 225 ti->allocated = 0;
212} 226}
213 227
214static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 228static void __meminit
229phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
230{
231 int i;
232
233 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
234 unsigned long entry;
235
236 if (address > end) {
237 for (; i < PTRS_PER_PMD; i++, pmd++)
238 set_pmd(pmd, __pmd(0));
239 break;
240 }
241 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
242 entry &= __supported_pte_mask;
243 set_pmd(pmd, __pmd(entry));
244 }
245}
246
247static void __meminit
248phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
249{
250 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
251
252 if (pmd_none(*pmd)) {
253 spin_lock(&init_mm.page_table_lock);
254 phys_pmd_init(pmd, address, end);
255 spin_unlock(&init_mm.page_table_lock);
256 __flush_tlb_all();
257 }
258}
259
260static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
215{ 261{
216 long i, j; 262 long i = pud_index(address);
217 263
218 i = pud_index(address);
219 pud = pud + i; 264 pud = pud + i;
265
266 if (after_bootmem && pud_val(*pud)) {
267 phys_pmd_update(pud, address, end);
268 return;
269 }
270
220 for (; i < PTRS_PER_PUD; pud++, i++) { 271 for (; i < PTRS_PER_PUD; pud++, i++) {
221 int map; 272 int map;
222 unsigned long paddr, pmd_phys; 273 unsigned long paddr, pmd_phys;
223 pmd_t *pmd; 274 pmd_t *pmd;
224 275
225 paddr = address + i*PUD_SIZE; 276 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
226 if (paddr >= end) { 277 if (paddr >= end)
227 for (; i < PTRS_PER_PUD; i++, pud++)
228 set_pud(pud, __pud(0));
229 break; 278 break;
230 }
231 279
232 if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { 280 if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
233 set_pud(pud, __pud(0)); 281 set_pud(pud, __pud(0));
234 continue; 282 continue;
235 } 283 }
236 284
237 pmd = alloc_low_page(&map, &pmd_phys); 285 pmd = alloc_low_page(&map, &pmd_phys);
286 spin_lock(&init_mm.page_table_lock);
238 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 287 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
239 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { 288 phys_pmd_init(pmd, paddr, end);
240 unsigned long pe; 289 spin_unlock(&init_mm.page_table_lock);
241
242 if (paddr >= end) {
243 for (; j < PTRS_PER_PMD; j++, pmd++)
244 set_pmd(pmd, __pmd(0));
245 break;
246 }
247 pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
248 pe &= __supported_pte_mask;
249 set_pmd(pmd, __pmd(pe));
250 }
251 unmap_low_page(map); 290 unmap_low_page(map);
252 } 291 }
253 __flush_tlb(); 292 __flush_tlb();
@@ -272,12 +311,15 @@ static void __init find_early_table_space(unsigned long end)
272 311
273 table_start >>= PAGE_SHIFT; 312 table_start >>= PAGE_SHIFT;
274 table_end = table_start; 313 table_end = table_start;
314
315 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
316 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
275} 317}
276 318
277/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 319/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
278 This runs before bootmem is initialized and gets pages directly from the 320 This runs before bootmem is initialized and gets pages directly from the
279 physical memory. To access them they are temporarily mapped. */ 321 physical memory. To access them they are temporarily mapped. */
280void __init init_memory_mapping(unsigned long start, unsigned long end) 322void __meminit init_memory_mapping(unsigned long start, unsigned long end)
281{ 323{
282 unsigned long next; 324 unsigned long next;
283 325
@@ -289,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
289 * mapped. Unfortunately this is done currently before the nodes are 331 * mapped. Unfortunately this is done currently before the nodes are
290 * discovered. 332 * discovered.
291 */ 333 */
292 find_early_table_space(end); 334 if (!after_bootmem)
335 find_early_table_space(end);
293 336
294 start = (unsigned long)__va(start); 337 start = (unsigned long)__va(start);
295 end = (unsigned long)__va(end); 338 end = (unsigned long)__va(end);
@@ -297,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
297 for (; start < end; start = next) { 340 for (; start < end; start = next) {
298 int map; 341 int map;
299 unsigned long pud_phys; 342 unsigned long pud_phys;
300 pud_t *pud = alloc_low_page(&map, &pud_phys); 343 pgd_t *pgd = pgd_offset_k(start);
344 pud_t *pud;
345
346 if (after_bootmem)
347 pud = pud_offset_k(pgd, __PAGE_OFFSET);
348 else
349 pud = alloc_low_page(&map, &pud_phys);
350
301 next = start + PGDIR_SIZE; 351 next = start + PGDIR_SIZE;
302 if (next > end) 352 if (next > end)
303 next = end; 353 next = end;
304 phys_pud_init(pud, __pa(start), __pa(next)); 354 phys_pud_init(pud, __pa(start), __pa(next));
305 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 355 if (!after_bootmem)
356 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
306 unmap_low_page(map); 357 unmap_low_page(map);
307 } 358 }
308 359
309 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); 360 if (!after_bootmem)
361 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
310 __flush_tlb_all(); 362 __flush_tlb_all();
311 early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
312 table_start<<PAGE_SHIFT,
313 table_end<<PAGE_SHIFT);
314} 363}
315 364
316void __cpuinit zap_low_mappings(int cpu) 365void __cpuinit zap_low_mappings(int cpu)
@@ -385,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h,
385void __init paging_init(void) 434void __init paging_init(void)
386{ 435{
387 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; 436 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
437
438 memory_present(0, 0, end_pfn);
439 sparse_init();
388 size_zones(zones, holes, 0, end_pfn); 440 size_zones(zones, holes, 0, end_pfn);
389 free_area_init_node(0, NODE_DATA(0), zones, 441 free_area_init_node(0, NODE_DATA(0), zones,
390 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 442 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
@@ -425,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
425 __flush_tlb_all(); 477 __flush_tlb_all();
426} 478}
427 479
480/*
481 * Memory hotplug specific functions
482 * These are only for non-NUMA machines right now.
483 */
484#ifdef CONFIG_MEMORY_HOTPLUG
485
486void online_page(struct page *page)
487{
488 ClearPageReserved(page);
489 set_page_count(page, 1);
490 __free_page(page);
491 totalram_pages++;
492 num_physpages++;
493}
494
495int add_memory(u64 start, u64 size)
496{
497 struct pglist_data *pgdat = NODE_DATA(0);
498 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
499 unsigned long start_pfn = start >> PAGE_SHIFT;
500 unsigned long nr_pages = size >> PAGE_SHIFT;
501 int ret;
502
503 ret = __add_pages(zone, start_pfn, nr_pages);
504 if (ret)
505 goto error;
506
507 init_memory_mapping(start, (start + size -1));
508
509 return ret;
510error:
511 printk("%s: Problem encountered in __add_pages!\n", __func__);
512 return ret;
513}
514EXPORT_SYMBOL_GPL(add_memory);
515
516int remove_memory(u64 start, u64 size)
517{
518 return -EINVAL;
519}
520EXPORT_SYMBOL_GPL(remove_memory);
521
522#endif
523
428static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 524static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
429 kcore_vsyscall; 525 kcore_vsyscall;
430 526