aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-04-01 05:15:12 -0400
committerTejun Heo <tj@kernel.org>2011-04-01 05:15:12 -0400
commit052936080c8fb2f791103995b21bd4018c8df886 (patch)
treea195ff3279760f867b3b9120ed22f441f9e49f61
parent9d42a53e0f46505b39494041d514372235817e15 (diff)
x86-64, NUMA: Remove custom phys_to_nid() implementation
phys_to_nid() maps physical address to NUMA node id. This is implemented by building perfect hash in compute_hash_shift() during initialization. However, with SPARSE memory model, the nid is encoded in page flags. The perfect hash implementation was for DISCONTIG memory model which got removed years ago by b263295dbf (x86: 64-bit, make sparsemem vmemmap the only memory model). So, the perfect hash ends up being used only during initialization when the core SPARSE code already provides perfectly acceptable generic early_pfn_to_nid() implementation. Drop phys_to_nid() and use the generic ealry_pfn_to_nid() instead. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/mmzone_64.h23
-rw-r--r--arch/x86/mm/numa_64.c123
3 files changed, 1 insertions, 149 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bf..b034814bf97 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1703,10 +1703,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
1703 def_bool y 1703 def_bool y
1704 depends on MEMORY_HOTPLUG 1704 depends on MEMORY_HOTPLUG
1705 1705
1706config HAVE_ARCH_EARLY_PFN_TO_NID
1707 def_bool X86_64
1708 depends on NUMA
1709
1710config USE_PERCPU_NUMA_NODE_ID 1706config USE_PERCPU_NUMA_NODE_ID
1711 def_bool y 1707 def_bool y
1712 depends on NUMA 1708 depends on NUMA
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index 288b96f815a..b3f88d7867c 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -4,36 +4,13 @@
4#ifndef _ASM_X86_MMZONE_64_H 4#ifndef _ASM_X86_MMZONE_64_H
5#define _ASM_X86_MMZONE_64_H 5#define _ASM_X86_MMZONE_64_H
6 6
7
8#ifdef CONFIG_NUMA 7#ifdef CONFIG_NUMA
9 8
10#include <linux/mmdebug.h> 9#include <linux/mmdebug.h>
11
12#include <asm/smp.h> 10#include <asm/smp.h>
13 11
14/* Simple perfect hash to map physical addresses to node numbers */
15struct memnode {
16 int shift;
17 unsigned int mapsize;
18 s16 *map;
19 s16 embedded_map[64 - 8];
20} ____cacheline_aligned; /* total size = 128 bytes */
21extern struct memnode memnode;
22#define memnode_shift memnode.shift
23#define memnodemap memnode.map
24#define memnodemapsize memnode.mapsize
25
26extern struct pglist_data *node_data[]; 12extern struct pglist_data *node_data[];
27 13
28static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
29{
30 unsigned nid;
31 VIRTUAL_BUG_ON(!memnodemap);
32 nid = memnodemap[addr >> memnode_shift];
33 VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
34 return nid;
35}
36
37#define NODE_DATA(nid) (node_data[nid]) 14#define NODE_DATA(nid) (node_data[nid])
38 15
39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 16#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc7203..3951ee6eade 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -28,125 +28,10 @@ EXPORT_SYMBOL(node_data);
28 28
29nodemask_t numa_nodes_parsed __initdata; 29nodemask_t numa_nodes_parsed __initdata;
30 30
31struct memnode memnode;
32
33static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size;
35
36static struct numa_meminfo numa_meminfo __initdata; 31static struct numa_meminfo numa_meminfo __initdata;
37
38static int numa_distance_cnt; 32static int numa_distance_cnt;
39static u8 *numa_distance; 33static u8 *numa_distance;
40 34
41/*
42 * Given a shift value, try to populate memnodemap[]
43 * Returns :
44 * 1 if OK
45 * 0 if memnodmap[] too small (of shift too small)
46 * -1 if node overlap or lost ram (shift too big)
47 */
48static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
49{
50 unsigned long addr, end;
51 int i, res = -1;
52
53 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
54 for (i = 0; i < mi->nr_blks; i++) {
55 addr = mi->blk[i].start;
56 end = mi->blk[i].end;
57 if (addr >= end)
58 continue;
59 if ((end >> shift) >= memnodemapsize)
60 return 0;
61 do {
62 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
63 return -1;
64 memnodemap[addr >> shift] = mi->blk[i].nid;
65 addr += (1UL << shift);
66 } while (addr < end);
67 res = 1;
68 }
69 return res;
70}
71
72static int __init allocate_cachealigned_memnodemap(void)
73{
74 unsigned long addr;
75
76 memnodemap = memnode.embedded_map;
77 if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
78 return 0;
79
80 addr = 0x8000;
81 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
82 nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
83 nodemap_size, L1_CACHE_BYTES);
84 if (nodemap_addr == MEMBLOCK_ERROR) {
85 printk(KERN_ERR
86 "NUMA: Unable to allocate Memory to Node hash map\n");
87 nodemap_addr = nodemap_size = 0;
88 return -1;
89 }
90 memnodemap = phys_to_virt(nodemap_addr);
91 memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
92
93 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
94 nodemap_addr, nodemap_addr + nodemap_size);
95 return 0;
96}
97
98/*
99 * The LSB of all start and end addresses in the node map is the value of the
100 * maximum possible shift.
101 */
102static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
103{
104 int i, nodes_used = 0;
105 unsigned long start, end;
106 unsigned long bitfield = 0, memtop = 0;
107
108 for (i = 0; i < mi->nr_blks; i++) {
109 start = mi->blk[i].start;
110 end = mi->blk[i].end;
111 if (start >= end)
112 continue;
113 bitfield |= start;
114 nodes_used++;
115 if (end > memtop)
116 memtop = end;
117 }
118 if (nodes_used <= 1)
119 i = 63;
120 else
121 i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
122 memnodemapsize = (memtop >> i)+1;
123 return i;
124}
125
126static int __init compute_hash_shift(const struct numa_meminfo *mi)
127{
128 int shift;
129
130 shift = extract_lsb_from_nodes(mi);
131 if (allocate_cachealigned_memnodemap())
132 return -1;
133 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
134 shift);
135
136 if (populate_memnodemap(mi, shift) != 1) {
137 printk(KERN_INFO "Your memory is not aligned you need to "
138 "rebuild your kernel with a bigger NODEMAPSIZE "
139 "shift=%d\n", shift);
140 return -1;
141 }
142 return shift;
143}
144
145int __meminit __early_pfn_to_nid(unsigned long pfn)
146{
147 return phys_to_nid(pfn << PAGE_SHIFT);
148}
149
150static void * __init early_node_mem(int nodeid, unsigned long start, 35static void * __init early_node_mem(int nodeid, unsigned long start,
151 unsigned long end, unsigned long size, 36 unsigned long end, unsigned long size,
152 unsigned long align) 37 unsigned long align)
@@ -270,7 +155,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
270 memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); 155 memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
271 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 156 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
272 nodedata_phys + pgdat_size - 1); 157 nodedata_phys + pgdat_size - 1);
273 nid = phys_to_nid(nodedata_phys); 158 nid = early_pfn_to_nid(nodedata_phys >> PAGE_SHIFT);
274 if (nid != nodeid) 159 if (nid != nodeid)
275 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); 160 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
276 161
@@ -527,12 +412,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
527 if (WARN_ON(nodes_empty(node_possible_map))) 412 if (WARN_ON(nodes_empty(node_possible_map)))
528 return -EINVAL; 413 return -EINVAL;
529 414
530 memnode_shift = compute_hash_shift(mi);
531 if (memnode_shift < 0) {
532 printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
533 return -EINVAL;
534 }
535
536 for (i = 0; i < mi->nr_blks; i++) 415 for (i = 0; i < mi->nr_blks; i++)
537 memblock_x86_register_active_regions(mi->blk[i].nid, 416 memblock_x86_register_active_regions(mi->blk[i].nid,
538 mi->blk[i].start >> PAGE_SHIFT, 417 mi->blk[i].start >> PAGE_SHIFT,