aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/kernel/e820.c7
-rw-r--r--arch/x86_64/kernel/setup.c5
-rw-r--r--arch/x86_64/mm/numa.c74
-rw-r--r--include/asm-x86_64/e820.h1
-rw-r--r--include/asm-x86_64/mmzone.h13
5 files changed, 85 insertions, 15 deletions
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 6fe191c58084..9d67955bbc31 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -83,6 +83,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
83 return 1; 83 return 1;
84 } 84 }
85 85
86#ifdef CONFIG_NUMA
87 /* NUMA memory to node map */
88 if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
89 *addrp = nodemap_addr + nodemap_size;
90 return 1;
91 }
92#endif
86 /* XXX ramdisk image here? */ 93 /* XXX ramdisk image here? */
87 return 0; 94 return 0;
88} 95}
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 60477244d1a3..f330f8285499 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -444,6 +444,11 @@ void __init setup_arch(char **cmdline_p)
444 /* reserve ebda region */ 444 /* reserve ebda region */
445 if (ebda_addr) 445 if (ebda_addr)
446 reserve_bootmem_generic(ebda_addr, ebda_size); 446 reserve_bootmem_generic(ebda_addr, ebda_size);
447#ifdef CONFIG_NUMA
448 /* reserve nodemap region */
449 if (nodemap_addr)
450 reserve_bootmem_generic(nodemap_addr, nodemap_size);
451#endif
447 452
448#ifdef CONFIG_SMP 453#ifdef CONFIG_SMP
449 /* 454 /*
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 2ee2e003606c..7d9c428f4094 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -36,6 +36,8 @@ unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
36cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; 36cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
37 37
38int numa_off __initdata; 38int numa_off __initdata;
39unsigned long __initdata nodemap_addr;
40unsigned long __initdata nodemap_size;
39 41
40 42
41/* 43/*
@@ -52,34 +54,87 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
52 int res = -1; 54 int res = -1;
53 unsigned long addr, end; 55 unsigned long addr, end;
54 56
55 if (shift >= 64) 57 memset(memnodemap, 0xff, memnodemapsize);
56 return -1;
57 memset(memnodemap, 0xff, sizeof(memnodemap));
58 for (i = 0; i < numnodes; i++) { 58 for (i = 0; i < numnodes; i++) {
59 addr = nodes[i].start; 59 addr = nodes[i].start;
60 end = nodes[i].end; 60 end = nodes[i].end;
61 if (addr >= end) 61 if (addr >= end)
62 continue; 62 continue;
63 if ((end >> shift) >= NODEMAPSIZE) 63 if ((end >> shift) >= memnodemapsize)
64 return 0; 64 return 0;
65 do { 65 do {
66 if (memnodemap[addr >> shift] != 0xff) 66 if (memnodemap[addr >> shift] != 0xff)
67 return -1; 67 return -1;
68 memnodemap[addr >> shift] = i; 68 memnodemap[addr >> shift] = i;
69 addr += (1UL << shift); 69 addr += (1UL << shift);
70 } while (addr < end); 70 } while (addr < end);
71 res = 1; 71 res = 1;
72 } 72 }
73 return res; 73 return res;
74} 74}
75 75
76int __init compute_hash_shift(struct bootnode *nodes, int numnodes) 76static int __init allocate_cachealigned_memnodemap(void)
77{
78 unsigned long pad, pad_addr;
79
80 memnodemap = memnode.embedded_map;
81 if (memnodemapsize <= 48) {
82 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
83 nodemap_addr, nodemap_addr + nodemap_size);
84 return 0;
85 }
86
87 pad = L1_CACHE_BYTES - 1;
88 pad_addr = 0x8000;
89 nodemap_size = pad + memnodemapsize;
90 nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT,
91 nodemap_size);
92 if (nodemap_addr == -1UL) {
93 printk(KERN_ERR
94 "NUMA: Unable to allocate Memory to Node hash map\n");
95 nodemap_addr = nodemap_size = 0;
96 return -1;
97 }
98 pad_addr = (nodemap_addr + pad) & ~pad;
99 memnodemap = phys_to_virt(pad_addr);
100
101 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
102 nodemap_addr, nodemap_addr + nodemap_size);
103 return 0;
104}
105
106/*
107 * The LSB of all start and end addresses in the node map is the value of the
108 * maximum possible shift.
109 */
110static int __init
111extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
77{ 112{
78 int shift = 20; 113 int i;
114 unsigned long start, end;
115 unsigned long bitfield = 0, memtop = 0;
79 116
80 while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) 117 for (i = 0; i < numnodes; i++) {
81 shift++; 118 start = nodes[i].start;
119 end = nodes[i].end;
120 if (start >= end)
121 continue;
122 bitfield |= start | end;
123 if (end > memtop)
124 memtop = end;
125 }
126 i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
127 memnodemapsize = (memtop >> i)+1;
128 return i;
129}
130
131int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
132{
133 int shift;
82 134
135 shift = extract_lsb_from_nodes(nodes, numnodes);
136 if (allocate_cachealigned_memnodemap())
137 return -1;
83 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 138 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
84 shift); 139 shift);
85 140
@@ -290,6 +345,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
290 end_pfn << PAGE_SHIFT); 345 end_pfn << PAGE_SHIFT);
291 /* setup dummy node covering all memory */ 346 /* setup dummy node covering all memory */
292 memnode_shift = 63; 347 memnode_shift = 63;
348 memnodemap = memnode.embedded_map;
293 memnodemap[0] = 0; 349 memnodemap[0] = 0;
294 nodes_clear(node_online_map); 350 nodes_clear(node_online_map);
295 node_set_online(0); 351 node_set_online(0);
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index fa2086774105..855fb4a454b6 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -56,6 +56,7 @@ extern void finish_e820_parsing(void);
56extern struct e820map e820; 56extern struct e820map e820;
57 57
58extern unsigned ebda_addr, ebda_size; 58extern unsigned ebda_addr, ebda_size;
59extern unsigned long nodemap_addr, nodemap_size;
59#endif/*!__ASSEMBLY__*/ 60#endif/*!__ASSEMBLY__*/
60 61
61#endif/*__E820_HEADER*/ 62#endif/*__E820_HEADER*/
diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index c38ebdf6f426..39ef106986eb 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -11,24 +11,25 @@
11 11
12#include <asm/smp.h> 12#include <asm/smp.h>
13 13
14/* Should really switch to dynamic allocation at some point */
15#define NODEMAPSIZE 0x4fff
16
17/* Simple perfect hash to map physical addresses to node numbers */ 14/* Simple perfect hash to map physical addresses to node numbers */
18struct memnode { 15struct memnode {
19 int shift; 16 int shift;
20 u8 map[NODEMAPSIZE]; 17 unsigned int mapsize;
21} ____cacheline_aligned; 18 u8 *map;
19 u8 embedded_map[64-16];
20} ____cacheline_aligned; /* total size = 64 bytes */
22extern struct memnode memnode; 21extern struct memnode memnode;
23#define memnode_shift memnode.shift 22#define memnode_shift memnode.shift
24#define memnodemap memnode.map 23#define memnodemap memnode.map
24#define memnodemapsize memnode.mapsize
25 25
26extern struct pglist_data *node_data[]; 26extern struct pglist_data *node_data[];
27 27
28static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 28static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
29{ 29{
30 unsigned nid; 30 unsigned nid;
31 VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); 31 VIRTUAL_BUG_ON(!memnodemap);
32 VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize);
32 nid = memnodemap[addr >> memnode_shift]; 33 nid = memnodemap[addr >> memnode_shift];
33 VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 34 VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
34 return nid; 35 return nid;