diff options
author | Bob Picco <bob.picco@hp.com> | 2005-09-03 18:54:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-09-05 03:05:38 -0400 |
commit | 802f192e4a600f7ef84ca25c8b818c8830acef5a (patch) | |
tree | 51e9a6ed164e6a2d8741af510c3954ad79bf19af | |
parent | 0216f86dafb389c0ad97529fd45e64e883298cfd (diff) |
[PATCH] SPARSEMEM EXTREME
A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture
platforms with a very sparse physical address space would likely want to
select this option. For those architecture platforms that don't select the
option, the code generated is equivalent to SPARSEMEM currently in -mm.
I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature.
ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of
pointers to mem_sections. This two level layout scheme is able to achieve
smaller memory requirements for SPARSEMEM with the tradeoff of an
additional shift and load when fetching the memory section. The current
SPARSEMEM -mm implementation is a one dimensional array of mem_sections
which is the default SPARSEMEM configuration. The patch attempts isolates
the implementation details of the physical layout of the sparsemem section
array.
ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false.
I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME.
I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME
and tested with aim.
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/ppc64/mm/init.c | 27 | ||||
-rw-r--r-- | arch/ppc64/mm/numa.c | 43 | ||||
-rw-r--r-- | include/asm-ppc64/lmb.h | 22 | ||||
-rw-r--r-- | include/linux/mmzone.h | 30 | ||||
-rw-r--r-- | mm/Kconfig | 9 | ||||
-rw-r--r-- | mm/sparse.c | 38 |
6 files changed, 140 insertions, 29 deletions
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index c02dc9809ca5..b3b1e9c1770a 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c | |||
@@ -552,27 +552,18 @@ void __init do_init_bootmem(void) | |||
552 | /* Add all physical memory to the bootmem map, mark each area | 552 | /* Add all physical memory to the bootmem map, mark each area |
553 | * present. | 553 | * present. |
554 | */ | 554 | */ |
555 | for (i=0; i < lmb.memory.cnt; i++) { | 555 | for (i=0; i < lmb.memory.cnt; i++) |
556 | unsigned long base, size; | 556 | free_bootmem(lmb_start_pfn(&lmb.memory, i), |
557 | unsigned long start_pfn, end_pfn; | 557 | lmb_size_bytes(&lmb.memory, i)); |
558 | |||
559 | base = lmb.memory.region[i].base; | ||
560 | size = lmb.memory.region[i].size; | ||
561 | |||
562 | start_pfn = base >> PAGE_SHIFT; | ||
563 | end_pfn = start_pfn + (size >> PAGE_SHIFT); | ||
564 | memory_present(0, start_pfn, end_pfn); | ||
565 | |||
566 | free_bootmem(base, size); | ||
567 | } | ||
568 | 558 | ||
569 | /* reserve the sections we're already using */ | 559 | /* reserve the sections we're already using */ |
570 | for (i=0; i < lmb.reserved.cnt; i++) { | 560 | for (i=0; i < lmb.reserved.cnt; i++) |
571 | unsigned long base = lmb.reserved.region[i].base; | 561 | reserve_bootmem(lmb_start_pfn(&lmb.reserved, i), |
572 | unsigned long size = lmb.reserved.region[i].size; | 562 | lmb_size_bytes(&lmb.reserved, i)); |
573 | 563 | ||
574 | reserve_bootmem(base, size); | 564 | for (i=0; i < lmb.memory.cnt; i++) |
575 | } | 565 | memory_present(0, lmb_start_pfn(&lmb.memory, i), |
566 | lmb_end_pfn(&lmb.memory, i)); | ||
576 | } | 567 | } |
577 | 568 | ||
578 | /* | 569 | /* |
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index c3116f0d788c..cb864b8f2750 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c | |||
@@ -440,8 +440,6 @@ new_range: | |||
440 | for (i = start ; i < (start+size); i += MEMORY_INCREMENT) | 440 | for (i = start ; i < (start+size); i += MEMORY_INCREMENT) |
441 | numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = | 441 | numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = |
442 | numa_domain; | 442 | numa_domain; |
443 | memory_present(numa_domain, start >> PAGE_SHIFT, | ||
444 | (start + size) >> PAGE_SHIFT); | ||
445 | 443 | ||
446 | if (--ranges) | 444 | if (--ranges) |
447 | goto new_range; | 445 | goto new_range; |
@@ -483,7 +481,6 @@ static void __init setup_nonnuma(void) | |||
483 | 481 | ||
484 | for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) | 482 | for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) |
485 | numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; | 483 | numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; |
486 | memory_present(0, 0, init_node_data[0].node_end_pfn); | ||
487 | } | 484 | } |
488 | 485 | ||
489 | static void __init dump_numa_topology(void) | 486 | static void __init dump_numa_topology(void) |
@@ -695,6 +692,46 @@ new_range: | |||
695 | size); | 692 | size); |
696 | } | 693 | } |
697 | } | 694 | } |
695 | /* | ||
696 | * This loop may look famaliar, but we have to do it again | ||
697 | * after marking our reserved memory to mark memory present | ||
698 | * for sparsemem. | ||
699 | */ | ||
700 | addr_cells = get_mem_addr_cells(); | ||
701 | size_cells = get_mem_size_cells(); | ||
702 | memory = NULL; | ||
703 | while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { | ||
704 | unsigned long mem_start, mem_size; | ||
705 | int numa_domain, ranges; | ||
706 | unsigned int *memcell_buf; | ||
707 | unsigned int len; | ||
708 | |||
709 | memcell_buf = (unsigned int *)get_property(memory, "reg", &len); | ||
710 | if (!memcell_buf || len <= 0) | ||
711 | continue; | ||
712 | |||
713 | ranges = memory->n_addrs; /* ranges in cell */ | ||
714 | new_range2: | ||
715 | mem_start = read_n_cells(addr_cells, &memcell_buf); | ||
716 | mem_size = read_n_cells(size_cells, &memcell_buf); | ||
717 | if (numa_enabled) { | ||
718 | numa_domain = of_node_numa_domain(memory); | ||
719 | if (numa_domain >= MAX_NUMNODES) | ||
720 | numa_domain = 0; | ||
721 | } else | ||
722 | numa_domain = 0; | ||
723 | |||
724 | if (numa_domain != nid) | ||
725 | continue; | ||
726 | |||
727 | mem_size = numa_enforce_memory_limit(mem_start, mem_size); | ||
728 | memory_present(numa_domain, mem_start >> PAGE_SHIFT, | ||
729 | (mem_start + mem_size) >> PAGE_SHIFT); | ||
730 | |||
731 | if (--ranges) /* process all ranges in cell */ | ||
732 | goto new_range2; | ||
733 | } | ||
734 | |||
698 | } | 735 | } |
699 | } | 736 | } |
700 | 737 | ||
diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index cb368bf0f264..de91e034bd98 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h | |||
@@ -56,4 +56,26 @@ extern void lmb_dump_all(void); | |||
56 | 56 | ||
57 | extern unsigned long io_hole_start; | 57 | extern unsigned long io_hole_start; |
58 | 58 | ||
59 | static inline unsigned long | ||
60 | lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) | ||
61 | { | ||
62 | return type->region[region_nr].size; | ||
63 | } | ||
64 | static inline unsigned long | ||
65 | lmb_size_pages(struct lmb_region *type, unsigned long region_nr) | ||
66 | { | ||
67 | return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; | ||
68 | } | ||
69 | static inline unsigned long | ||
70 | lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) | ||
71 | { | ||
72 | return type->region[region_nr].base >> PAGE_SHIFT; | ||
73 | } | ||
74 | static inline unsigned long | ||
75 | lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) | ||
76 | { | ||
77 | return lmb_start_pfn(type, region_nr) + | ||
78 | lmb_size_pages(type, region_nr); | ||
79 | } | ||
80 | |||
59 | #endif /* _PPC64_LMB_H */ | 81 | #endif /* _PPC64_LMB_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c90461ed99f..b97054bbc394 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -487,6 +487,28 @@ struct mem_section { | |||
487 | unsigned long section_mem_map; | 487 | unsigned long section_mem_map; |
488 | }; | 488 | }; |
489 | 489 | ||
490 | #ifdef CONFIG_ARCH_SPARSEMEM_EXTREME | ||
491 | /* | ||
492 | * Should we ever require GCC 4 or later then the flat array scheme | ||
493 | * can be eliminated and a uniform solution for EXTREME and !EXTREME can | ||
494 | * be arrived at. | ||
495 | */ | ||
496 | #define SECTION_ROOT_SHIFT (PAGE_SHIFT-3) | ||
497 | #define SECTION_ROOT_MASK ((1UL<<SECTION_ROOT_SHIFT) - 1) | ||
498 | #define SECTION_TO_ROOT(_sec) ((_sec) >> SECTION_ROOT_SHIFT) | ||
499 | #define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT) | ||
500 | |||
501 | extern struct mem_section *mem_section[NR_SECTION_ROOTS]; | ||
502 | |||
503 | static inline struct mem_section *__nr_to_section(unsigned long nr) | ||
504 | { | ||
505 | if (!mem_section[SECTION_TO_ROOT(nr)]) | ||
506 | return NULL; | ||
507 | return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; | ||
508 | } | ||
509 | |||
510 | #else | ||
511 | |||
490 | extern struct mem_section mem_section[NR_MEM_SECTIONS]; | 512 | extern struct mem_section mem_section[NR_MEM_SECTIONS]; |
491 | 513 | ||
492 | static inline struct mem_section *__nr_to_section(unsigned long nr) | 514 | static inline struct mem_section *__nr_to_section(unsigned long nr) |
@@ -494,6 +516,10 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) | |||
494 | return &mem_section[nr]; | 516 | return &mem_section[nr]; |
495 | } | 517 | } |
496 | 518 | ||
519 | #define sparse_index_init(_sec, _nid) do {} while (0) | ||
520 | |||
521 | #endif | ||
522 | |||
497 | /* | 523 | /* |
498 | * We use the lower bits of the mem_map pointer to store | 524 | * We use the lower bits of the mem_map pointer to store |
499 | * a little bit of information. There should be at least | 525 | * a little bit of information. There should be at least |
@@ -513,12 +539,12 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section) | |||
513 | 539 | ||
514 | static inline int valid_section(struct mem_section *section) | 540 | static inline int valid_section(struct mem_section *section) |
515 | { | 541 | { |
516 | return (section->section_mem_map & SECTION_MARKED_PRESENT); | 542 | return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); |
517 | } | 543 | } |
518 | 544 | ||
519 | static inline int section_has_mem_map(struct mem_section *section) | 545 | static inline int section_has_mem_map(struct mem_section *section) |
520 | { | 546 | { |
521 | return (section->section_mem_map & SECTION_HAS_MEM_MAP); | 547 | return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); |
522 | } | 548 | } |
523 | 549 | ||
524 | static inline int valid_section_nr(unsigned long nr) | 550 | static inline int valid_section_nr(unsigned long nr) |
diff --git a/mm/Kconfig b/mm/Kconfig index cd379936cac6..fc644c5c065d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -89,3 +89,12 @@ config NEED_MULTIPLE_NODES | |||
89 | config HAVE_MEMORY_PRESENT | 89 | config HAVE_MEMORY_PRESENT |
90 | def_bool y | 90 | def_bool y |
91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM | 91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM |
92 | |||
93 | # | ||
94 | # Architectecture platforms which require a two level mem_section in SPARSEMEM | ||
95 | # must select this option. This is usually for architecture platforms with | ||
96 | # an extremely sparse physical address space. | ||
97 | # | ||
98 | config ARCH_SPARSEMEM_EXTREME | ||
99 | def_bool n | ||
100 | depends on SPARSEMEM && 64BIT | ||
diff --git a/mm/sparse.c b/mm/sparse.c index b54e304df4a7..b2b456bf0a5d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -13,7 +13,26 @@ | |||
13 | * | 13 | * |
14 | * 1) mem_section - memory sections, mem_map's for valid memory | 14 | * 1) mem_section - memory sections, mem_map's for valid memory |
15 | */ | 15 | */ |
16 | struct mem_section mem_section[NR_MEM_SECTIONS]; | 16 | #ifdef CONFIG_ARCH_SPARSEMEM_EXTREME |
17 | struct mem_section *mem_section[NR_SECTION_ROOTS] | ||
18 | ____cacheline_maxaligned_in_smp; | ||
19 | |||
20 | static void sparse_index_init(unsigned long section, int nid) | ||
21 | { | ||
22 | unsigned long root = SECTION_TO_ROOT(section); | ||
23 | |||
24 | if (mem_section[root]) | ||
25 | return; | ||
26 | mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); | ||
27 | if (mem_section[root]) | ||
28 | memset(mem_section[root], 0, PAGE_SIZE); | ||
29 | else | ||
30 | panic("memory_present: NO MEMORY\n"); | ||
31 | } | ||
32 | #else | ||
33 | struct mem_section mem_section[NR_MEM_SECTIONS] | ||
34 | ____cacheline_maxaligned_in_smp; | ||
35 | #endif | ||
17 | EXPORT_SYMBOL(mem_section); | 36 | EXPORT_SYMBOL(mem_section); |
18 | 37 | ||
19 | /* Record a memory area against a node. */ | 38 | /* Record a memory area against a node. */ |
@@ -24,8 +43,13 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
24 | start &= PAGE_SECTION_MASK; | 43 | start &= PAGE_SECTION_MASK; |
25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | 44 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
26 | unsigned long section = pfn_to_section_nr(pfn); | 45 | unsigned long section = pfn_to_section_nr(pfn); |
27 | if (!mem_section[section].section_mem_map) | 46 | struct mem_section *ms; |
28 | mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; | 47 | |
48 | sparse_index_init(section, nid); | ||
49 | |||
50 | ms = __nr_to_section(section); | ||
51 | if (!ms->section_mem_map) | ||
52 | ms->section_mem_map = SECTION_MARKED_PRESENT; | ||
29 | } | 53 | } |
30 | } | 54 | } |
31 | 55 | ||
@@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) | |||
85 | { | 109 | { |
86 | struct page *map; | 110 | struct page *map; |
87 | int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | 111 | int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); |
112 | struct mem_section *ms = __nr_to_section(pnum); | ||
88 | 113 | ||
89 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | 114 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); |
90 | if (map) | 115 | if (map) |
@@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) | |||
96 | return map; | 121 | return map; |
97 | 122 | ||
98 | printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); | 123 | printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); |
99 | mem_section[pnum].section_mem_map = 0; | 124 | ms->section_mem_map = 0; |
100 | return NULL; | 125 | return NULL; |
101 | } | 126 | } |
102 | 127 | ||
@@ -114,8 +139,9 @@ void sparse_init(void) | |||
114 | continue; | 139 | continue; |
115 | 140 | ||
116 | map = sparse_early_mem_map_alloc(pnum); | 141 | map = sparse_early_mem_map_alloc(pnum); |
117 | if (map) | 142 | if (!map) |
118 | sparse_init_one_section(&mem_section[pnum], pnum, map); | 143 | continue; |
144 | sparse_init_one_section(__nr_to_section(pnum), pnum, map); | ||
119 | } | 145 | } |
120 | } | 146 | } |
121 | 147 | ||