diff options
-rw-r--r-- | arch/x86_64/kernel/e820.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 5 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 74 | ||||
-rw-r--r-- | include/asm-x86_64/e820.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/mmzone.h | 13 |
5 files changed, 85 insertions, 15 deletions
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 6fe191c58084..9d67955bbc31 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -83,6 +83,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | #ifdef CONFIG_NUMA | ||
87 | /* NUMA memory to node map */ | ||
88 | if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { | ||
89 | *addrp = nodemap_addr + nodemap_size; | ||
90 | return 1; | ||
91 | } | ||
92 | #endif | ||
86 | /* XXX ramdisk image here? */ | 93 | /* XXX ramdisk image here? */ |
87 | return 0; | 94 | return 0; |
88 | } | 95 | } |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 60477244d1a3..f330f8285499 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -444,6 +444,11 @@ void __init setup_arch(char **cmdline_p) | |||
444 | /* reserve ebda region */ | 444 | /* reserve ebda region */ |
445 | if (ebda_addr) | 445 | if (ebda_addr) |
446 | reserve_bootmem_generic(ebda_addr, ebda_size); | 446 | reserve_bootmem_generic(ebda_addr, ebda_size); |
447 | #ifdef CONFIG_NUMA | ||
448 | /* reserve nodemap region */ | ||
449 | if (nodemap_addr) | ||
450 | reserve_bootmem_generic(nodemap_addr, nodemap_size); | ||
451 | #endif | ||
447 | 452 | ||
448 | #ifdef CONFIG_SMP | 453 | #ifdef CONFIG_SMP |
449 | /* | 454 | /* |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 2ee2e003606c..7d9c428f4094 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -36,6 +36,8 @@ unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | 36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; |
37 | 37 | ||
38 | int numa_off __initdata; | 38 | int numa_off __initdata; |
39 | unsigned long __initdata nodemap_addr; | ||
40 | unsigned long __initdata nodemap_size; | ||
39 | 41 | ||
40 | 42 | ||
41 | /* | 43 | /* |
@@ -52,34 +54,87 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) | |||
52 | int res = -1; | 54 | int res = -1; |
53 | unsigned long addr, end; | 55 | unsigned long addr, end; |
54 | 56 | ||
55 | if (shift >= 64) | 57 | memset(memnodemap, 0xff, memnodemapsize); |
56 | return -1; | ||
57 | memset(memnodemap, 0xff, sizeof(memnodemap)); | ||
58 | for (i = 0; i < numnodes; i++) { | 58 | for (i = 0; i < numnodes; i++) { |
59 | addr = nodes[i].start; | 59 | addr = nodes[i].start; |
60 | end = nodes[i].end; | 60 | end = nodes[i].end; |
61 | if (addr >= end) | 61 | if (addr >= end) |
62 | continue; | 62 | continue; |
63 | if ((end >> shift) >= NODEMAPSIZE) | 63 | if ((end >> shift) >= memnodemapsize) |
64 | return 0; | 64 | return 0; |
65 | do { | 65 | do { |
66 | if (memnodemap[addr >> shift] != 0xff) | 66 | if (memnodemap[addr >> shift] != 0xff) |
67 | return -1; | 67 | return -1; |
68 | memnodemap[addr >> shift] = i; | 68 | memnodemap[addr >> shift] = i; |
69 | addr += (1UL << shift); | 69 | addr += (1UL << shift); |
70 | } while (addr < end); | 70 | } while (addr < end); |
71 | res = 1; | 71 | res = 1; |
72 | } | 72 | } |
73 | return res; | 73 | return res; |
74 | } | 74 | } |
75 | 75 | ||
76 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | 76 | static int __init allocate_cachealigned_memnodemap(void) |
77 | { | ||
78 | unsigned long pad, pad_addr; | ||
79 | |||
80 | memnodemap = memnode.embedded_map; | ||
81 | if (memnodemapsize <= 48) { | ||
82 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
83 | nodemap_addr, nodemap_addr + nodemap_size); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | pad = L1_CACHE_BYTES - 1; | ||
88 | pad_addr = 0x8000; | ||
89 | nodemap_size = pad + memnodemapsize; | ||
90 | nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, | ||
91 | nodemap_size); | ||
92 | if (nodemap_addr == -1UL) { | ||
93 | printk(KERN_ERR | ||
94 | "NUMA: Unable to allocate Memory to Node hash map\n"); | ||
95 | nodemap_addr = nodemap_size = 0; | ||
96 | return -1; | ||
97 | } | ||
98 | pad_addr = (nodemap_addr + pad) & ~pad; | ||
99 | memnodemap = phys_to_virt(pad_addr); | ||
100 | |||
101 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
102 | nodemap_addr, nodemap_addr + nodemap_size); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * The LSB of all start and end addresses in the node map is the value of the | ||
108 | * maximum possible shift. | ||
109 | */ | ||
110 | static int __init | ||
111 | extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes) | ||
77 | { | 112 | { |
78 | int shift = 20; | 113 | int i; |
114 | unsigned long start, end; | ||
115 | unsigned long bitfield = 0, memtop = 0; | ||
79 | 116 | ||
80 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | 117 | for (i = 0; i < numnodes; i++) { |
81 | shift++; | 118 | start = nodes[i].start; |
119 | end = nodes[i].end; | ||
120 | if (start >= end) | ||
121 | continue; | ||
122 | bitfield |= start | end; | ||
123 | if (end > memtop) | ||
124 | memtop = end; | ||
125 | } | ||
126 | i = find_first_bit(&bitfield, sizeof(unsigned long)*8); | ||
127 | memnodemapsize = (memtop >> i)+1; | ||
128 | return i; | ||
129 | } | ||
130 | |||
131 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | ||
132 | { | ||
133 | int shift; | ||
82 | 134 | ||
135 | shift = extract_lsb_from_nodes(nodes, numnodes); | ||
136 | if (allocate_cachealigned_memnodemap()) | ||
137 | return -1; | ||
83 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 138 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
84 | shift); | 139 | shift); |
85 | 140 | ||
@@ -290,6 +345,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
290 | end_pfn << PAGE_SHIFT); | 345 | end_pfn << PAGE_SHIFT); |
291 | /* setup dummy node covering all memory */ | 346 | /* setup dummy node covering all memory */ |
292 | memnode_shift = 63; | 347 | memnode_shift = 63; |
348 | memnodemap = memnode.embedded_map; | ||
293 | memnodemap[0] = 0; | 349 | memnodemap[0] = 0; |
294 | nodes_clear(node_online_map); | 350 | nodes_clear(node_online_map); |
295 | node_set_online(0); | 351 | node_set_online(0); |
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index fa2086774105..855fb4a454b6 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h | |||
@@ -56,6 +56,7 @@ extern void finish_e820_parsing(void); | |||
56 | extern struct e820map e820; | 56 | extern struct e820map e820; |
57 | 57 | ||
58 | extern unsigned ebda_addr, ebda_size; | 58 | extern unsigned ebda_addr, ebda_size; |
59 | extern unsigned long nodemap_addr, nodemap_size; | ||
59 | #endif/*!__ASSEMBLY__*/ | 60 | #endif/*!__ASSEMBLY__*/ |
60 | 61 | ||
61 | #endif/*__E820_HEADER*/ | 62 | #endif/*__E820_HEADER*/ |
diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index c38ebdf6f426..39ef106986eb 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h | |||
@@ -11,24 +11,25 @@ | |||
11 | 11 | ||
12 | #include <asm/smp.h> | 12 | #include <asm/smp.h> |
13 | 13 | ||
14 | /* Should really switch to dynamic allocation at some point */ | ||
15 | #define NODEMAPSIZE 0x4fff | ||
16 | |||
17 | /* Simple perfect hash to map physical addresses to node numbers */ | 14 | /* Simple perfect hash to map physical addresses to node numbers */ |
18 | struct memnode { | 15 | struct memnode { |
19 | int shift; | 16 | int shift; |
20 | u8 map[NODEMAPSIZE]; | 17 | unsigned int mapsize; |
21 | } ____cacheline_aligned; | 18 | u8 *map; |
19 | u8 embedded_map[64-16]; | ||
20 | } ____cacheline_aligned; /* total size = 64 bytes */ | ||
22 | extern struct memnode memnode; | 21 | extern struct memnode memnode; |
23 | #define memnode_shift memnode.shift | 22 | #define memnode_shift memnode.shift |
24 | #define memnodemap memnode.map | 23 | #define memnodemap memnode.map |
24 | #define memnodemapsize memnode.mapsize | ||
25 | 25 | ||
26 | extern struct pglist_data *node_data[]; | 26 | extern struct pglist_data *node_data[]; |
27 | 27 | ||
28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | 28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) |
29 | { | 29 | { |
30 | unsigned nid; | 30 | unsigned nid; |
31 | VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); | 31 | VIRTUAL_BUG_ON(!memnodemap); |
32 | VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize); | ||
32 | nid = memnodemap[addr >> memnode_shift]; | 33 | nid = memnodemap[addr >> memnode_shift]; |
33 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); | 34 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); |
34 | return nid; | 35 | return nid; |