aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBob Picco <bob.picco@hp.com>2005-09-03 18:54:26 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:05:38 -0400
commit802f192e4a600f7ef84ca25c8b818c8830acef5a (patch)
tree51e9a6ed164e6a2d8741af510c3954ad79bf19af
parent0216f86dafb389c0ad97529fd45e64e883298cfd (diff)
[PATCH] SPARSEMEM EXTREME
A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture platforms with a very sparse physical address space would likely want to select this option. For those architecture platforms that don't select the option, the code generated is equivalent to SPARSEMEM currently in -mm. I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature. ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM -mm implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false. I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME. I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME and tested with aim. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/ppc64/mm/init.c27
-rw-r--r--arch/ppc64/mm/numa.c43
-rw-r--r--include/asm-ppc64/lmb.h22
-rw-r--r--include/linux/mmzone.h30
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/sparse.c38
6 files changed, 140 insertions, 29 deletions
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index c02dc9809ca5..b3b1e9c1770a 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -552,27 +552,18 @@ void __init do_init_bootmem(void)
552 /* Add all physical memory to the bootmem map, mark each area 552 /* Add all physical memory to the bootmem map, mark each area
553 * present. 553 * present.
554 */ 554 */
555 for (i=0; i < lmb.memory.cnt; i++) { 555 for (i=0; i < lmb.memory.cnt; i++)
556 unsigned long base, size; 556 free_bootmem(lmb_start_pfn(&lmb.memory, i),
557 unsigned long start_pfn, end_pfn; 557 lmb_size_bytes(&lmb.memory, i));
558
559 base = lmb.memory.region[i].base;
560 size = lmb.memory.region[i].size;
561
562 start_pfn = base >> PAGE_SHIFT;
563 end_pfn = start_pfn + (size >> PAGE_SHIFT);
564 memory_present(0, start_pfn, end_pfn);
565
566 free_bootmem(base, size);
567 }
568 558
569 /* reserve the sections we're already using */ 559 /* reserve the sections we're already using */
570 for (i=0; i < lmb.reserved.cnt; i++) { 560 for (i=0; i < lmb.reserved.cnt; i++)
571 unsigned long base = lmb.reserved.region[i].base; 561 reserve_bootmem(lmb_start_pfn(&lmb.reserved, i),
572 unsigned long size = lmb.reserved.region[i].size; 562 lmb_size_bytes(&lmb.reserved, i));
573 563
574 reserve_bootmem(base, size); 564 for (i=0; i < lmb.memory.cnt; i++)
575 } 565 memory_present(0, lmb_start_pfn(&lmb.memory, i),
566 lmb_end_pfn(&lmb.memory, i));
576} 567}
577 568
578/* 569/*
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
index c3116f0d788c..cb864b8f2750 100644
--- a/arch/ppc64/mm/numa.c
+++ b/arch/ppc64/mm/numa.c
@@ -440,8 +440,6 @@ new_range:
440 for (i = start ; i < (start+size); i += MEMORY_INCREMENT) 440 for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
441 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 441 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
442 numa_domain; 442 numa_domain;
443 memory_present(numa_domain, start >> PAGE_SHIFT,
444 (start + size) >> PAGE_SHIFT);
445 443
446 if (--ranges) 444 if (--ranges)
447 goto new_range; 445 goto new_range;
@@ -483,7 +481,6 @@ static void __init setup_nonnuma(void)
483 481
484 for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) 482 for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
485 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; 483 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
486 memory_present(0, 0, init_node_data[0].node_end_pfn);
487} 484}
488 485
489static void __init dump_numa_topology(void) 486static void __init dump_numa_topology(void)
@@ -695,6 +692,46 @@ new_range:
695 size); 692 size);
696 } 693 }
697 } 694 }
695 /*
696 * This loop may look famaliar, but we have to do it again
697 * after marking our reserved memory to mark memory present
698 * for sparsemem.
699 */
700 addr_cells = get_mem_addr_cells();
701 size_cells = get_mem_size_cells();
702 memory = NULL;
703 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
704 unsigned long mem_start, mem_size;
705 int numa_domain, ranges;
706 unsigned int *memcell_buf;
707 unsigned int len;
708
709 memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
710 if (!memcell_buf || len <= 0)
711 continue;
712
713 ranges = memory->n_addrs; /* ranges in cell */
714new_range2:
715 mem_start = read_n_cells(addr_cells, &memcell_buf);
716 mem_size = read_n_cells(size_cells, &memcell_buf);
717 if (numa_enabled) {
718 numa_domain = of_node_numa_domain(memory);
719 if (numa_domain >= MAX_NUMNODES)
720 numa_domain = 0;
721 } else
722 numa_domain = 0;
723
724 if (numa_domain != nid)
725 continue;
726
727 mem_size = numa_enforce_memory_limit(mem_start, mem_size);
728 memory_present(numa_domain, mem_start >> PAGE_SHIFT,
729 (mem_start + mem_size) >> PAGE_SHIFT);
730
731 if (--ranges) /* process all ranges in cell */
732 goto new_range2;
733 }
734
698 } 735 }
699} 736}
700 737
diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h
index cb368bf0f264..de91e034bd98 100644
--- a/include/asm-ppc64/lmb.h
+++ b/include/asm-ppc64/lmb.h
@@ -56,4 +56,26 @@ extern void lmb_dump_all(void);
56 56
57extern unsigned long io_hole_start; 57extern unsigned long io_hole_start;
58 58
59static inline unsigned long
60lmb_size_bytes(struct lmb_region *type, unsigned long region_nr)
61{
62 return type->region[region_nr].size;
63}
64static inline unsigned long
65lmb_size_pages(struct lmb_region *type, unsigned long region_nr)
66{
67 return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT;
68}
69static inline unsigned long
70lmb_start_pfn(struct lmb_region *type, unsigned long region_nr)
71{
72 return type->region[region_nr].base >> PAGE_SHIFT;
73}
74static inline unsigned long
75lmb_end_pfn(struct lmb_region *type, unsigned long region_nr)
76{
77 return lmb_start_pfn(type, region_nr) +
78 lmb_size_pages(type, region_nr);
79}
80
59#endif /* _PPC64_LMB_H */ 81#endif /* _PPC64_LMB_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6c90461ed99f..b97054bbc394 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -487,6 +487,28 @@ struct mem_section {
487 unsigned long section_mem_map; 487 unsigned long section_mem_map;
488}; 488};
489 489
490#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME
491/*
492 * Should we ever require GCC 4 or later then the flat array scheme
493 * can be eliminated and a uniform solution for EXTREME and !EXTREME can
494 * be arrived at.
495 */
496#define SECTION_ROOT_SHIFT (PAGE_SHIFT-3)
497#define SECTION_ROOT_MASK ((1UL<<SECTION_ROOT_SHIFT) - 1)
498#define SECTION_TO_ROOT(_sec) ((_sec) >> SECTION_ROOT_SHIFT)
499#define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT)
500
501extern struct mem_section *mem_section[NR_SECTION_ROOTS];
502
503static inline struct mem_section *__nr_to_section(unsigned long nr)
504{
505 if (!mem_section[SECTION_TO_ROOT(nr)])
506 return NULL;
507 return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
508}
509
510#else
511
490extern struct mem_section mem_section[NR_MEM_SECTIONS]; 512extern struct mem_section mem_section[NR_MEM_SECTIONS];
491 513
492static inline struct mem_section *__nr_to_section(unsigned long nr) 514static inline struct mem_section *__nr_to_section(unsigned long nr)
@@ -494,6 +516,10 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
494 return &mem_section[nr]; 516 return &mem_section[nr];
495} 517}
496 518
519#define sparse_index_init(_sec, _nid) do {} while (0)
520
521#endif
522
497/* 523/*
498 * We use the lower bits of the mem_map pointer to store 524 * We use the lower bits of the mem_map pointer to store
499 * a little bit of information. There should be at least 525 * a little bit of information. There should be at least
@@ -513,12 +539,12 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section)
513 539
514static inline int valid_section(struct mem_section *section) 540static inline int valid_section(struct mem_section *section)
515{ 541{
516 return (section->section_mem_map & SECTION_MARKED_PRESENT); 542 return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
517} 543}
518 544
519static inline int section_has_mem_map(struct mem_section *section) 545static inline int section_has_mem_map(struct mem_section *section)
520{ 546{
521 return (section->section_mem_map & SECTION_HAS_MEM_MAP); 547 return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
522} 548}
523 549
524static inline int valid_section_nr(unsigned long nr) 550static inline int valid_section_nr(unsigned long nr)
diff --git a/mm/Kconfig b/mm/Kconfig
index cd379936cac6..fc644c5c065d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -89,3 +89,12 @@ config NEED_MULTIPLE_NODES
89config HAVE_MEMORY_PRESENT 89config HAVE_MEMORY_PRESENT
90 def_bool y 90 def_bool y
91 depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM 91 depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
92
93#
94# Architectecture platforms which require a two level mem_section in SPARSEMEM
95# must select this option. This is usually for architecture platforms with
96# an extremely sparse physical address space.
97#
98config ARCH_SPARSEMEM_EXTREME
99 def_bool n
100 depends on SPARSEMEM && 64BIT
diff --git a/mm/sparse.c b/mm/sparse.c
index b54e304df4a7..b2b456bf0a5d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -13,7 +13,26 @@
13 * 13 *
14 * 1) mem_section - memory sections, mem_map's for valid memory 14 * 1) mem_section - memory sections, mem_map's for valid memory
15 */ 15 */
16struct mem_section mem_section[NR_MEM_SECTIONS]; 16#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME
17struct mem_section *mem_section[NR_SECTION_ROOTS]
18 ____cacheline_maxaligned_in_smp;
19
20static void sparse_index_init(unsigned long section, int nid)
21{
22 unsigned long root = SECTION_TO_ROOT(section);
23
24 if (mem_section[root])
25 return;
26 mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE);
27 if (mem_section[root])
28 memset(mem_section[root], 0, PAGE_SIZE);
29 else
30 panic("memory_present: NO MEMORY\n");
31}
32#else
33struct mem_section mem_section[NR_MEM_SECTIONS]
34 ____cacheline_maxaligned_in_smp;
35#endif
17EXPORT_SYMBOL(mem_section); 36EXPORT_SYMBOL(mem_section);
18 37
19/* Record a memory area against a node. */ 38/* Record a memory area against a node. */
@@ -24,8 +43,13 @@ void memory_present(int nid, unsigned long start, unsigned long end)
24 start &= PAGE_SECTION_MASK; 43 start &= PAGE_SECTION_MASK;
25 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 44 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
26 unsigned long section = pfn_to_section_nr(pfn); 45 unsigned long section = pfn_to_section_nr(pfn);
27 if (!mem_section[section].section_mem_map) 46 struct mem_section *ms;
28 mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; 47
48 sparse_index_init(section, nid);
49
50 ms = __nr_to_section(section);
51 if (!ms->section_mem_map)
52 ms->section_mem_map = SECTION_MARKED_PRESENT;
29 } 53 }
30} 54}
31 55
@@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
85{ 109{
86 struct page *map; 110 struct page *map;
87 int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); 111 int nid = early_pfn_to_nid(section_nr_to_pfn(pnum));
112 struct mem_section *ms = __nr_to_section(pnum);
88 113
89 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 114 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
90 if (map) 115 if (map)
@@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
96 return map; 121 return map;
97 122
98 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 123 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
99 mem_section[pnum].section_mem_map = 0; 124 ms->section_mem_map = 0;
100 return NULL; 125 return NULL;
101} 126}
102 127
@@ -114,8 +139,9 @@ void sparse_init(void)
114 continue; 139 continue;
115 140
116 map = sparse_early_mem_map_alloc(pnum); 141 map = sparse_early_mem_map_alloc(pnum);
117 if (map) 142 if (!map)
118 sparse_init_one_section(&mem_section[pnum], pnum, map); 143 continue;
144 sparse_init_one_section(__nr_to_section(pnum), pnum, map);
119 } 145 }
120} 146}
121 147