aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2010-05-26 17:45:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 12:12:57 -0400
commit7aac789885512388a66d47280d7e7777ffba1e59 (patch)
treeaf4ac98260268889a422dd264102d2f15d5c1983
parent3bccd996276b108c138e8176793a26ecef54d573 (diff)
numa: introduce numa_mem_id()- effective local memory node id
Introduce numa_mem_id(), based on generic percpu variable infrastructure to track "nearest node with memory" for archs that support memoryless nodes. Define API in <linux/topology.h> when CONFIG_HAVE_MEMORYLESS_NODES defined, else stubs. Architectures will define HAVE_MEMORYLESS_NODES if/when they support them. Archs can override definitions of: numa_mem_id() - returns node number of "local memory" node set_numa_mem() - initialize [this cpus'] per cpu variable 'numa_mem' cpu_to_mem() - return numa_mem for specified cpu; may be used as lvalue Generic initialization of 'numa_mem' occurs in __build_all_zonelists(). This will initialize the boot cpu at boot time, and all cpus on change of numa_zonelist_order, or when node or memory hot-plug requires zonelist rebuild. Archs that support memoryless nodes will need to initialize 'numa_mem' for secondary cpus as they're brought on-line. [akpm@linux-foundation.org: fix build] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Christoph Lameter <cl@linux-foundation.org> Cc: Tejun Heo <tj@kernel.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Nick Piggin <npiggin@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/asm-generic/topology.h3
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/topology.h61
-rw-r--r--mm/page_alloc.c45
4 files changed, 114 insertions, 1 deletions
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 510df36dd5d4..fd60700503c8 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -34,6 +34,9 @@
34#ifndef cpu_to_node 34#ifndef cpu_to_node
35#define cpu_to_node(cpu) ((void)(cpu),0) 35#define cpu_to_node(cpu) ((void)(cpu),0)
36#endif 36#endif
37#ifndef cpu_to_mem
38#define cpu_to_mem(cpu) ((void)(cpu),0)
39#endif
37#ifndef parent_node 40#ifndef parent_node
38#define parent_node(node) ((void)(node),0) 41#define parent_node(node) ((void)(node),0)
39#endif 42#endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0fa491326c4a..b4d109e389b8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -671,6 +671,12 @@ void memory_present(int nid, unsigned long start, unsigned long end);
671static inline void memory_present(int nid, unsigned long start, unsigned long end) {} 671static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
672#endif 672#endif
673 673
674#ifdef CONFIG_HAVE_MEMORYLESS_NODES
675int local_memory_node(int node_id);
676#else
677static inline int local_memory_node(int node_id) { return node_id; };
678#endif
679
674#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE 680#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
675unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); 681unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
676#endif 682#endif
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 2e5518f46571..c44df50a05ab 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -251,6 +251,67 @@ static inline int numa_node_id(void)
251 251
252#endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ 252#endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */
253 253
254#ifdef CONFIG_HAVE_MEMORYLESS_NODES
255
256/*
257 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
258 * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
259 * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
260 */
261DECLARE_PER_CPU(int, _numa_mem_);
262
263#ifndef set_numa_mem
264static inline void set_numa_mem(int node)
265{
266 percpu_write(_numa_mem_, node);
267}
268#endif
269
270#ifndef numa_mem_id
271/* Returns the number of the nearest Node with memory */
272static inline int numa_mem_id(void)
273{
274 return __this_cpu_read(_numa_mem_);
275}
276#endif
277
278#ifndef cpu_to_mem
279static inline int cpu_to_mem(int cpu)
280{
281 return per_cpu(_numa_mem_, cpu);
282}
283#endif
284
285#ifndef set_cpu_numa_mem
286static inline void set_cpu_numa_mem(int cpu, int node)
287{
288 per_cpu(_numa_mem_, cpu) = node;
289}
290#endif
291
292#else /* !CONFIG_HAVE_MEMORYLESS_NODES */
293
294static inline void set_numa_mem(int node) {}
295
296static inline void set_cpu_numa_mem(int cpu, int node) {}
297
298#ifndef numa_mem_id
299/* Returns the number of the nearest Node with memory */
300static inline int numa_mem_id(void)
301{
302 return numa_node_id();
303}
304#endif
305
306#ifndef cpu_to_mem
307static inline int cpu_to_mem(int cpu)
308{
309 return cpu_to_node(cpu);
310}
311#endif
312
313#endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */
314
254#ifndef topology_physical_package_id 315#ifndef topology_physical_package_id
255#define topology_physical_package_id(cpu) ((void)(cpu), -1) 316#define topology_physical_package_id(cpu) ((void)(cpu), -1)
256#endif 317#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6fe1b65ee1a8..431214b941ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -62,6 +62,17 @@ DEFINE_PER_CPU(int, numa_node);
62EXPORT_PER_CPU_SYMBOL(numa_node); 62EXPORT_PER_CPU_SYMBOL(numa_node);
63#endif 63#endif
64 64
65#ifdef CONFIG_HAVE_MEMORYLESS_NODES
66/*
67 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
68 * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
69 * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem()
70 * defined in <linux/topology.h>.
71 */
72DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */
73EXPORT_PER_CPU_SYMBOL(_numa_mem_);
74#endif
75
65/* 76/*
66 * Array of node states. 77 * Array of node states.
67 */ 78 */
@@ -2861,6 +2872,24 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2861 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); 2872 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
2862} 2873}
2863 2874
2875#ifdef CONFIG_HAVE_MEMORYLESS_NODES
2876/*
2877 * Return node id of node used for "local" allocations.
2878 * I.e., first node id of first zone in arg node's generic zonelist.
2879 * Used for initializing percpu 'numa_mem', which is used primarily
2880 * for kernel allocations, so use GFP_KERNEL flags to locate zonelist.
2881 */
2882int local_memory_node(int node)
2883{
2884 struct zone *zone;
2885
2886 (void)first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
2887 gfp_zone(GFP_KERNEL),
2888 NULL,
2889 &zone);
2890 return zone->node;
2891}
2892#endif
2864 2893
2865#else /* CONFIG_NUMA */ 2894#else /* CONFIG_NUMA */
2866 2895
@@ -2975,9 +3004,23 @@ static __init_refok int __build_all_zonelists(void *data)
2975 * needs the percpu allocator in order to allocate its pagesets 3004 * needs the percpu allocator in order to allocate its pagesets
2976 * (a chicken-egg dilemma). 3005 * (a chicken-egg dilemma).
2977 */ 3006 */
2978 for_each_possible_cpu(cpu) 3007 for_each_possible_cpu(cpu) {
2979 setup_pageset(&per_cpu(boot_pageset, cpu), 0); 3008 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2980 3009
3010#ifdef CONFIG_HAVE_MEMORYLESS_NODES
3011 /*
3012 * We now know the "local memory node" for each node--
3013 * i.e., the node of the first zone in the generic zonelist.
3014 * Set up numa_mem percpu variable for on-line cpus. During
3015 * boot, only the boot cpu should be on-line; we'll init the
3016 * secondary cpus' numa_mem as they come on-line. During
3017 * node/memory hotplug, we'll fixup all on-line cpus.
3018 */
3019 if (cpu_online(cpu))
3020 set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu)));
3021#endif
3022 }
3023
2981 return 0; 3024 return 0;
2982} 3025}
2983 3026