diff options
author | Amul Shah <amul.shah@unisys.com> | 2007-02-13 07:26:19 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-02-13 07:26:19 -0500 |
commit | 076422d2af7e3d8e72c6e70843f6ea377714b082 (patch) | |
tree | 942ec9d2e7f3f74d6694af99a745ee74ef851268 /arch/x86_64 | |
parent | 0812a579c92fefa57506821fa08e90f47cb6dbdd (diff) |
[PATCH] x86-64: Allocate the NUMA hash function nodemap dynamically
Remove the statically allocated memory to NUMA node hash map in favor of a
dynamically allocated memory to node hash map (it is cache aligned).
This patch has the nice side effect in that it allows the hash map to grow
for systems with large amounts of memory (256GB - 1TB), but suffer from
having small PCI space tacked onto the boot node (which is somewhere
between 192MB to 512MB on the ES7000).
Signed-off-by: Amul Shah <amul.shah@unisys.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Rohit Seth <rohitseth@google.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/e820.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 5 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 74 |
3 files changed, 77 insertions, 9 deletions
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 6fe191c58084..9d67955bbc31 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -83,6 +83,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | #ifdef CONFIG_NUMA | ||
87 | /* NUMA memory to node map */ | ||
88 | if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { | ||
89 | *addrp = nodemap_addr + nodemap_size; | ||
90 | return 1; | ||
91 | } | ||
92 | #endif | ||
86 | /* XXX ramdisk image here? */ | 93 | /* XXX ramdisk image here? */ |
87 | return 0; | 94 | return 0; |
88 | } | 95 | } |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 60477244d1a3..f330f8285499 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -444,6 +444,11 @@ void __init setup_arch(char **cmdline_p) | |||
444 | /* reserve ebda region */ | 444 | /* reserve ebda region */ |
445 | if (ebda_addr) | 445 | if (ebda_addr) |
446 | reserve_bootmem_generic(ebda_addr, ebda_size); | 446 | reserve_bootmem_generic(ebda_addr, ebda_size); |
447 | #ifdef CONFIG_NUMA | ||
448 | /* reserve nodemap region */ | ||
449 | if (nodemap_addr) | ||
450 | reserve_bootmem_generic(nodemap_addr, nodemap_size); | ||
451 | #endif | ||
447 | 452 | ||
448 | #ifdef CONFIG_SMP | 453 | #ifdef CONFIG_SMP |
449 | /* | 454 | /* |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 2ee2e003606c..7d9c428f4094 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -36,6 +36,8 @@ unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | 36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; |
37 | 37 | ||
38 | int numa_off __initdata; | 38 | int numa_off __initdata; |
39 | unsigned long __initdata nodemap_addr; | ||
40 | unsigned long __initdata nodemap_size; | ||
39 | 41 | ||
40 | 42 | ||
41 | /* | 43 | /* |
@@ -52,34 +54,87 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) | |||
52 | int res = -1; | 54 | int res = -1; |
53 | unsigned long addr, end; | 55 | unsigned long addr, end; |
54 | 56 | ||
55 | if (shift >= 64) | 57 | memset(memnodemap, 0xff, memnodemapsize); |
56 | return -1; | ||
57 | memset(memnodemap, 0xff, sizeof(memnodemap)); | ||
58 | for (i = 0; i < numnodes; i++) { | 58 | for (i = 0; i < numnodes; i++) { |
59 | addr = nodes[i].start; | 59 | addr = nodes[i].start; |
60 | end = nodes[i].end; | 60 | end = nodes[i].end; |
61 | if (addr >= end) | 61 | if (addr >= end) |
62 | continue; | 62 | continue; |
63 | if ((end >> shift) >= NODEMAPSIZE) | 63 | if ((end >> shift) >= memnodemapsize) |
64 | return 0; | 64 | return 0; |
65 | do { | 65 | do { |
66 | if (memnodemap[addr >> shift] != 0xff) | 66 | if (memnodemap[addr >> shift] != 0xff) |
67 | return -1; | 67 | return -1; |
68 | memnodemap[addr >> shift] = i; | 68 | memnodemap[addr >> shift] = i; |
69 | addr += (1UL << shift); | 69 | addr += (1UL << shift); |
70 | } while (addr < end); | 70 | } while (addr < end); |
71 | res = 1; | 71 | res = 1; |
72 | } | 72 | } |
73 | return res; | 73 | return res; |
74 | } | 74 | } |
75 | 75 | ||
76 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | 76 | static int __init allocate_cachealigned_memnodemap(void) |
77 | { | ||
78 | unsigned long pad, pad_addr; | ||
79 | |||
80 | memnodemap = memnode.embedded_map; | ||
81 | if (memnodemapsize <= 48) { | ||
82 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
83 | nodemap_addr, nodemap_addr + nodemap_size); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | pad = L1_CACHE_BYTES - 1; | ||
88 | pad_addr = 0x8000; | ||
89 | nodemap_size = pad + memnodemapsize; | ||
90 | nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, | ||
91 | nodemap_size); | ||
92 | if (nodemap_addr == -1UL) { | ||
93 | printk(KERN_ERR | ||
94 | "NUMA: Unable to allocate Memory to Node hash map\n"); | ||
95 | nodemap_addr = nodemap_size = 0; | ||
96 | return -1; | ||
97 | } | ||
98 | pad_addr = (nodemap_addr + pad) & ~pad; | ||
99 | memnodemap = phys_to_virt(pad_addr); | ||
100 | |||
101 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
102 | nodemap_addr, nodemap_addr + nodemap_size); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * The LSB of all start and end addresses in the node map is the value of the | ||
108 | * maximum possible shift. | ||
109 | */ | ||
110 | static int __init | ||
111 | extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes) | ||
77 | { | 112 | { |
78 | int shift = 20; | 113 | int i; |
114 | unsigned long start, end; | ||
115 | unsigned long bitfield = 0, memtop = 0; | ||
79 | 116 | ||
80 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | 117 | for (i = 0; i < numnodes; i++) { |
81 | shift++; | 118 | start = nodes[i].start; |
119 | end = nodes[i].end; | ||
120 | if (start >= end) | ||
121 | continue; | ||
122 | bitfield |= start | end; | ||
123 | if (end > memtop) | ||
124 | memtop = end; | ||
125 | } | ||
126 | i = find_first_bit(&bitfield, sizeof(unsigned long)*8); | ||
127 | memnodemapsize = (memtop >> i)+1; | ||
128 | return i; | ||
129 | } | ||
130 | |||
131 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | ||
132 | { | ||
133 | int shift; | ||
82 | 134 | ||
135 | shift = extract_lsb_from_nodes(nodes, numnodes); | ||
136 | if (allocate_cachealigned_memnodemap()) | ||
137 | return -1; | ||
83 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 138 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
84 | shift); | 139 | shift); |
85 | 140 | ||
@@ -290,6 +345,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
290 | end_pfn << PAGE_SHIFT); | 345 | end_pfn << PAGE_SHIFT); |
291 | /* setup dummy node covering all memory */ | 346 | /* setup dummy node covering all memory */ |
292 | memnode_shift = 63; | 347 | memnode_shift = 63; |
348 | memnodemap = memnode.embedded_map; | ||
293 | memnodemap[0] = 0; | 349 | memnodemap[0] = 0; |
294 | nodes_clear(node_online_map); | 350 | nodes_clear(node_online_map); |
295 | node_set_online(0); | 351 | node_set_online(0); |