diff options
author | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:08 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:08 -0500 |
commit | 97e7b78d0674882a0aae043fda428c583dbb225d (patch) | |
tree | f7393c83623b2adfacc1675779901a4af2a45d78 /arch/x86/mm | |
parent | 8968dab8ad90ea16ef92f2406868354ea3ab6bb9 (diff) |
x86-64, NUMA: Introduce struct numa_meminfo
Arrays for memblks and nodeids and their length lived in separate
variables making things unnecessarily cumbersome. Introduce struct
numa_meminfo which contains all memory configuration info. This patch
doesn't cause any behavior change.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/numa_64.c | 145 |
1 files changed, 75 insertions, 70 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 243d18d4cfde..c3496e2b5a71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -22,6 +22,17 @@ | |||
22 | #include <asm/acpi.h> | 22 | #include <asm/acpi.h> |
23 | #include <asm/amd_nb.h> | 23 | #include <asm/amd_nb.h> |
24 | 24 | ||
25 | struct numa_memblk { | ||
26 | u64 start; | ||
27 | u64 end; | ||
28 | int nid; | ||
29 | }; | ||
30 | |||
31 | struct numa_meminfo { | ||
32 | int nr_blks; | ||
33 | struct numa_memblk blk[NR_NODE_MEMBLKS]; | ||
34 | }; | ||
35 | |||
25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 36 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
26 | EXPORT_SYMBOL(node_data); | 37 | EXPORT_SYMBOL(node_data); |
27 | 38 | ||
@@ -33,9 +44,7 @@ struct memnode memnode; | |||
33 | static unsigned long __initdata nodemap_addr; | 44 | static unsigned long __initdata nodemap_addr; |
34 | static unsigned long __initdata nodemap_size; | 45 | static unsigned long __initdata nodemap_size; |
35 | 46 | ||
36 | static int num_node_memblks __initdata; | 47 | static struct numa_meminfo numa_meminfo __initdata; |
37 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | ||
38 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | ||
39 | 48 | ||
40 | struct bootnode numa_nodes[MAX_NUMNODES] __initdata; | 49 | struct bootnode numa_nodes[MAX_NUMNODES] __initdata; |
41 | 50 | ||
@@ -46,16 +55,15 @@ struct bootnode numa_nodes[MAX_NUMNODES] __initdata; | |||
46 | * 0 if memnodmap[] too small (of shift too small) | 55 | * 0 if memnodmap[] too small (of shift too small) |
47 | * -1 if node overlap or lost ram (shift too big) | 56 | * -1 if node overlap or lost ram (shift too big) |
48 | */ | 57 | */ |
49 | static int __init populate_memnodemap(const struct bootnode *nodes, | 58 | static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift) |
50 | int numnodes, int shift, int *nodeids) | ||
51 | { | 59 | { |
52 | unsigned long addr, end; | 60 | unsigned long addr, end; |
53 | int i, res = -1; | 61 | int i, res = -1; |
54 | 62 | ||
55 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); | 63 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); |
56 | for (i = 0; i < numnodes; i++) { | 64 | for (i = 0; i < mi->nr_blks; i++) { |
57 | addr = nodes[i].start; | 65 | addr = mi->blk[i].start; |
58 | end = nodes[i].end; | 66 | end = mi->blk[i].end; |
59 | if (addr >= end) | 67 | if (addr >= end) |
60 | continue; | 68 | continue; |
61 | if ((end >> shift) >= memnodemapsize) | 69 | if ((end >> shift) >= memnodemapsize) |
@@ -63,7 +71,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes, | |||
63 | do { | 71 | do { |
64 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) | 72 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) |
65 | return -1; | 73 | return -1; |
66 | memnodemap[addr >> shift] = nodeids[i]; | 74 | memnodemap[addr >> shift] = mi->blk[i].nid; |
67 | addr += (1UL << shift); | 75 | addr += (1UL << shift); |
68 | } while (addr < end); | 76 | } while (addr < end); |
69 | res = 1; | 77 | res = 1; |
@@ -101,16 +109,15 @@ static int __init allocate_cachealigned_memnodemap(void) | |||
101 | * The LSB of all start and end addresses in the node map is the value of the | 109 | * The LSB of all start and end addresses in the node map is the value of the |
102 | * maximum possible shift. | 110 | * maximum possible shift. |
103 | */ | 111 | */ |
104 | static int __init extract_lsb_from_nodes(const struct bootnode *nodes, | 112 | static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi) |
105 | int numnodes) | ||
106 | { | 113 | { |
107 | int i, nodes_used = 0; | 114 | int i, nodes_used = 0; |
108 | unsigned long start, end; | 115 | unsigned long start, end; |
109 | unsigned long bitfield = 0, memtop = 0; | 116 | unsigned long bitfield = 0, memtop = 0; |
110 | 117 | ||
111 | for (i = 0; i < numnodes; i++) { | 118 | for (i = 0; i < mi->nr_blks; i++) { |
112 | start = nodes[i].start; | 119 | start = mi->blk[i].start; |
113 | end = nodes[i].end; | 120 | end = mi->blk[i].end; |
114 | if (start >= end) | 121 | if (start >= end) |
115 | continue; | 122 | continue; |
116 | bitfield |= start; | 123 | bitfield |= start; |
@@ -126,18 +133,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, | |||
126 | return i; | 133 | return i; |
127 | } | 134 | } |
128 | 135 | ||
129 | static int __init compute_hash_shift(struct bootnode *nodes, int numnodes, | 136 | static int __init compute_hash_shift(const struct numa_meminfo *mi) |
130 | int *nodeids) | ||
131 | { | 137 | { |
132 | int shift; | 138 | int shift; |
133 | 139 | ||
134 | shift = extract_lsb_from_nodes(nodes, numnodes); | 140 | shift = extract_lsb_from_nodes(mi); |
135 | if (allocate_cachealigned_memnodemap()) | 141 | if (allocate_cachealigned_memnodemap()) |
136 | return -1; | 142 | return -1; |
137 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 143 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
138 | shift); | 144 | shift); |
139 | 145 | ||
140 | if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { | 146 | if (populate_memnodemap(mi, shift) != 1) { |
141 | printk(KERN_INFO "Your memory is not aligned you need to " | 147 | printk(KERN_INFO "Your memory is not aligned you need to " |
142 | "rebuild your kernel with a bigger NODEMAPSIZE " | 148 | "rebuild your kernel with a bigger NODEMAPSIZE " |
143 | "shift=%d\n", shift); | 149 | "shift=%d\n", shift); |
@@ -185,21 +191,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
185 | 191 | ||
186 | static __init int conflicting_memblks(unsigned long start, unsigned long end) | 192 | static __init int conflicting_memblks(unsigned long start, unsigned long end) |
187 | { | 193 | { |
194 | struct numa_meminfo *mi = &numa_meminfo; | ||
188 | int i; | 195 | int i; |
189 | for (i = 0; i < num_node_memblks; i++) { | 196 | |
190 | struct bootnode *nd = &node_memblk_range[i]; | 197 | for (i = 0; i < mi->nr_blks; i++) { |
191 | if (nd->start == nd->end) | 198 | struct numa_memblk *blk = &mi->blk[i]; |
199 | |||
200 | if (blk->start == blk->end) | ||
192 | continue; | 201 | continue; |
193 | if (nd->end > start && nd->start < end) | 202 | if (blk->end > start && blk->start < end) |
194 | return memblk_nodeid[i]; | 203 | return blk->nid; |
195 | if (nd->end == end && nd->start == start) | 204 | if (blk->end == end && blk->start == start) |
196 | return memblk_nodeid[i]; | 205 | return blk->nid; |
197 | } | 206 | } |
198 | return -1; | 207 | return -1; |
199 | } | 208 | } |
200 | 209 | ||
201 | int __init numa_add_memblk(int nid, u64 start, u64 end) | 210 | int __init numa_add_memblk(int nid, u64 start, u64 end) |
202 | { | 211 | { |
212 | struct numa_meminfo *mi = &numa_meminfo; | ||
203 | int i; | 213 | int i; |
204 | 214 | ||
205 | i = conflicting_memblks(start, end); | 215 | i = conflicting_memblks(start, end); |
@@ -213,10 +223,10 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) | |||
213 | return -EINVAL; | 223 | return -EINVAL; |
214 | } | 224 | } |
215 | 225 | ||
216 | node_memblk_range[num_node_memblks].start = start; | 226 | mi->blk[mi->nr_blks].start = start; |
217 | node_memblk_range[num_node_memblks].end = end; | 227 | mi->blk[mi->nr_blks].end = end; |
218 | memblk_nodeid[num_node_memblks] = nid; | 228 | mi->blk[mi->nr_blks].nid = nid; |
219 | num_node_memblks++; | 229 | mi->nr_blks++; |
220 | return 0; | 230 | return 0; |
221 | } | 231 | } |
222 | 232 | ||
@@ -315,66 +325,59 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
315 | 325 | ||
316 | static int __init numa_register_memblks(void) | 326 | static int __init numa_register_memblks(void) |
317 | { | 327 | { |
328 | struct numa_meminfo *mi = &numa_meminfo; | ||
318 | int i; | 329 | int i; |
319 | 330 | ||
320 | /* | 331 | /* |
321 | * Join together blocks on the same node, holes between | 332 | * Join together blocks on the same node, holes between |
322 | * which don't overlap with memory on other nodes. | 333 | * which don't overlap with memory on other nodes. |
323 | */ | 334 | */ |
324 | for (i = 0; i < num_node_memblks; ++i) { | 335 | for (i = 0; i < mi->nr_blks; ++i) { |
336 | struct numa_memblk *bi = &mi->blk[i]; | ||
325 | int j, k; | 337 | int j, k; |
326 | 338 | ||
327 | for (j = i + 1; j < num_node_memblks; ++j) { | 339 | for (j = i + 1; j < mi->nr_blks; ++j) { |
340 | struct numa_memblk *bj = &mi->blk[j]; | ||
328 | unsigned long start, end; | 341 | unsigned long start, end; |
329 | 342 | ||
330 | if (memblk_nodeid[i] != memblk_nodeid[j]) | 343 | if (bi->nid != bj->nid) |
331 | continue; | 344 | continue; |
332 | start = min(node_memblk_range[i].end, | 345 | start = min(bi->end, bj->end); |
333 | node_memblk_range[j].end); | 346 | end = max(bi->start, bj->start); |
334 | end = max(node_memblk_range[i].start, | 347 | for (k = 0; k < mi->nr_blks; ++k) { |
335 | node_memblk_range[j].start); | 348 | struct numa_memblk *bk = &mi->blk[k]; |
336 | for (k = 0; k < num_node_memblks; ++k) { | 349 | |
337 | if (memblk_nodeid[i] == memblk_nodeid[k]) | 350 | if (bi->nid == bk->nid) |
338 | continue; | 351 | continue; |
339 | if (start < node_memblk_range[k].end && | 352 | if (start < bk->end && end > bk->start) |
340 | end > node_memblk_range[k].start) | ||
341 | break; | 353 | break; |
342 | } | 354 | } |
343 | if (k < num_node_memblks) | 355 | if (k < mi->nr_blks) |
344 | continue; | 356 | continue; |
345 | start = min(node_memblk_range[i].start, | 357 | start = min(bi->start, bj->start); |
346 | node_memblk_range[j].start); | 358 | end = max(bi->end, bj->end); |
347 | end = max(node_memblk_range[i].end, | ||
348 | node_memblk_range[j].end); | ||
349 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | 359 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", |
350 | memblk_nodeid[i], | 360 | bi->nid, bi->start, bi->end, bj->start, bj->end, |
351 | node_memblk_range[i].start, | ||
352 | node_memblk_range[i].end, | ||
353 | node_memblk_range[j].start, | ||
354 | node_memblk_range[j].end, | ||
355 | start, end); | 361 | start, end); |
356 | node_memblk_range[i].start = start; | 362 | bi->start = start; |
357 | node_memblk_range[i].end = end; | 363 | bi->end = end; |
358 | k = --num_node_memblks - j; | 364 | k = --mi->nr_blks - j; |
359 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | 365 | memmove(mi->blk + j, mi->blk + j + 1, |
360 | k * sizeof(*memblk_nodeid)); | 366 | k * sizeof(mi->blk[0])); |
361 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
362 | k * sizeof(*node_memblk_range)); | ||
363 | --j; | 367 | --j; |
364 | } | 368 | } |
365 | } | 369 | } |
366 | 370 | ||
367 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 371 | memnode_shift = compute_hash_shift(mi); |
368 | memblk_nodeid); | ||
369 | if (memnode_shift < 0) { | 372 | if (memnode_shift < 0) { |
370 | printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); | 373 | printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); |
371 | return -EINVAL; | 374 | return -EINVAL; |
372 | } | 375 | } |
373 | 376 | ||
374 | for (i = 0; i < num_node_memblks; i++) | 377 | for (i = 0; i < mi->nr_blks; i++) |
375 | memblock_x86_register_active_regions(memblk_nodeid[i], | 378 | memblock_x86_register_active_regions(mi->blk[i].nid, |
376 | node_memblk_range[i].start >> PAGE_SHIFT, | 379 | mi->blk[i].start >> PAGE_SHIFT, |
377 | node_memblk_range[i].end >> PAGE_SHIFT); | 380 | mi->blk[i].end >> PAGE_SHIFT); |
378 | 381 | ||
379 | /* for out of order entries */ | 382 | /* for out of order entries */ |
380 | sort_node_map(); | 383 | sort_node_map(); |
@@ -701,7 +704,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
701 | static int __init numa_emulation(unsigned long start_pfn, | 704 | static int __init numa_emulation(unsigned long start_pfn, |
702 | unsigned long last_pfn, int acpi, int amd) | 705 | unsigned long last_pfn, int acpi, int amd) |
703 | { | 706 | { |
704 | static int nodeid[NR_NODE_MEMBLKS] __initdata; | 707 | static struct numa_meminfo ei __initdata; |
705 | u64 addr = start_pfn << PAGE_SHIFT; | 708 | u64 addr = start_pfn << PAGE_SHIFT; |
706 | u64 max_addr = last_pfn << PAGE_SHIFT; | 709 | u64 max_addr = last_pfn << PAGE_SHIFT; |
707 | int num_nodes; | 710 | int num_nodes; |
@@ -727,10 +730,14 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
727 | if (num_nodes < 0) | 730 | if (num_nodes < 0) |
728 | return num_nodes; | 731 | return num_nodes; |
729 | 732 | ||
730 | for (i = 0; i < ARRAY_SIZE(nodeid); i++) | 733 | ei.nr_blks = num_nodes; |
731 | nodeid[i] = i; | 734 | for (i = 0; i < ei.nr_blks; i++) { |
735 | ei.blk[i].start = nodes[i].start; | ||
736 | ei.blk[i].end = nodes[i].end; | ||
737 | ei.blk[i].nid = i; | ||
738 | } | ||
732 | 739 | ||
733 | memnode_shift = compute_hash_shift(nodes, num_nodes, nodeid); | 740 | memnode_shift = compute_hash_shift(&ei); |
734 | if (memnode_shift < 0) { | 741 | if (memnode_shift < 0) { |
735 | memnode_shift = 0; | 742 | memnode_shift = 0; |
736 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " | 743 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " |
@@ -797,9 +804,7 @@ void __init initmem_init(void) | |||
797 | nodes_clear(mem_nodes_parsed); | 804 | nodes_clear(mem_nodes_parsed); |
798 | nodes_clear(node_possible_map); | 805 | nodes_clear(node_possible_map); |
799 | nodes_clear(node_online_map); | 806 | nodes_clear(node_online_map); |
800 | num_node_memblks = 0; | 807 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
801 | memset(node_memblk_range, 0, sizeof(node_memblk_range)); | ||
802 | memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); | ||
803 | memset(numa_nodes, 0, sizeof(numa_nodes)); | 808 | memset(numa_nodes, 0, sizeof(numa_nodes)); |
804 | remove_all_active_ranges(); | 809 | remove_all_active_ranges(); |
805 | 810 | ||