diff options
| author | Lee Schermerhorn <lee.schermerhorn@hp.com> | 2010-05-26 17:45:00 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 12:12:57 -0400 |
| commit | 7aac789885512388a66d47280d7e7777ffba1e59 (patch) | |
| tree | af4ac98260268889a422dd264102d2f15d5c1983 | |
| parent | 3bccd996276b108c138e8176793a26ecef54d573 (diff) | |
numa: introduce numa_mem_id()- effective local memory node id
Introduce numa_mem_id(), based on generic percpu variable infrastructure
to track "nearest node with memory" for archs that support memoryless
nodes.
Define API in <linux/topology.h> when CONFIG_HAVE_MEMORYLESS_NODES
defined, else stubs. Architectures will define HAVE_MEMORYLESS_NODES
if/when they support them.
Archs can override definitions of:
numa_mem_id() - returns node number of "local memory" node
set_numa_mem() - initialize [this cpus'] per cpu variable 'numa_mem'
cpu_to_mem() - return numa_mem for specified cpu; may be used as lvalue
Generic initialization of 'numa_mem' occurs in __build_all_zonelists().
This will initialize the boot cpu at boot time, and all cpus on change of
numa_zonelist_order, or when node or memory hot-plug requires zonelist
rebuild. Archs that support memoryless nodes will need to initialize
'numa_mem' for secondary cpus as they're brought on-line.
[akpm@linux-foundation.org: fix build]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | include/asm-generic/topology.h | 3 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 6 | ||||
| -rw-r--r-- | include/linux/topology.h | 61 | ||||
| -rw-r--r-- | mm/page_alloc.c | 45 |
4 files changed, 114 insertions, 1 deletions
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 510df36dd5d4..fd60700503c8 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h | |||
| @@ -34,6 +34,9 @@ | |||
| 34 | #ifndef cpu_to_node | 34 | #ifndef cpu_to_node |
| 35 | #define cpu_to_node(cpu) ((void)(cpu),0) | 35 | #define cpu_to_node(cpu) ((void)(cpu),0) |
| 36 | #endif | 36 | #endif |
| 37 | #ifndef cpu_to_mem | ||
| 38 | #define cpu_to_mem(cpu) ((void)(cpu),0) | ||
| 39 | #endif | ||
| 37 | #ifndef parent_node | 40 | #ifndef parent_node |
| 38 | #define parent_node(node) ((void)(node),0) | 41 | #define parent_node(node) ((void)(node),0) |
| 39 | #endif | 42 | #endif |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0fa491326c4a..b4d109e389b8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -671,6 +671,12 @@ void memory_present(int nid, unsigned long start, unsigned long end); | |||
| 671 | static inline void memory_present(int nid, unsigned long start, unsigned long end) {} | 671 | static inline void memory_present(int nid, unsigned long start, unsigned long end) {} |
| 672 | #endif | 672 | #endif |
| 673 | 673 | ||
| 674 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | ||
| 675 | int local_memory_node(int node_id); | ||
| 676 | #else | ||
| 677 | static inline int local_memory_node(int node_id) { return node_id; }; | ||
| 678 | #endif | ||
| 679 | |||
| 674 | #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE | 680 | #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE |
| 675 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | 681 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); |
| 676 | #endif | 682 | #endif |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 2e5518f46571..c44df50a05ab 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -251,6 +251,67 @@ static inline int numa_node_id(void) | |||
| 251 | 251 | ||
| 252 | #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ | 252 | #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ |
| 253 | 253 | ||
| 254 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | ||
| 255 | |||
| 256 | /* | ||
| 257 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. | ||
| 258 | * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. | ||
| 259 | * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). | ||
| 260 | */ | ||
| 261 | DECLARE_PER_CPU(int, _numa_mem_); | ||
| 262 | |||
| 263 | #ifndef set_numa_mem | ||
| 264 | static inline void set_numa_mem(int node) | ||
| 265 | { | ||
| 266 | percpu_write(_numa_mem_, node); | ||
| 267 | } | ||
| 268 | #endif | ||
| 269 | |||
| 270 | #ifndef numa_mem_id | ||
| 271 | /* Returns the number of the nearest Node with memory */ | ||
| 272 | static inline int numa_mem_id(void) | ||
| 273 | { | ||
| 274 | return __this_cpu_read(_numa_mem_); | ||
| 275 | } | ||
| 276 | #endif | ||
| 277 | |||
| 278 | #ifndef cpu_to_mem | ||
| 279 | static inline int cpu_to_mem(int cpu) | ||
| 280 | { | ||
| 281 | return per_cpu(_numa_mem_, cpu); | ||
| 282 | } | ||
| 283 | #endif | ||
| 284 | |||
| 285 | #ifndef set_cpu_numa_mem | ||
| 286 | static inline void set_cpu_numa_mem(int cpu, int node) | ||
| 287 | { | ||
| 288 | per_cpu(_numa_mem_, cpu) = node; | ||
| 289 | } | ||
| 290 | #endif | ||
| 291 | |||
| 292 | #else /* !CONFIG_HAVE_MEMORYLESS_NODES */ | ||
| 293 | |||
| 294 | static inline void set_numa_mem(int node) {} | ||
| 295 | |||
| 296 | static inline void set_cpu_numa_mem(int cpu, int node) {} | ||
| 297 | |||
| 298 | #ifndef numa_mem_id | ||
| 299 | /* Returns the number of the nearest Node with memory */ | ||
| 300 | static inline int numa_mem_id(void) | ||
| 301 | { | ||
| 302 | return numa_node_id(); | ||
| 303 | } | ||
| 304 | #endif | ||
| 305 | |||
| 306 | #ifndef cpu_to_mem | ||
| 307 | static inline int cpu_to_mem(int cpu) | ||
| 308 | { | ||
| 309 | return cpu_to_node(cpu); | ||
| 310 | } | ||
| 311 | #endif | ||
| 312 | |||
| 313 | #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ | ||
| 314 | |||
| 254 | #ifndef topology_physical_package_id | 315 | #ifndef topology_physical_package_id |
| 255 | #define topology_physical_package_id(cpu) ((void)(cpu), -1) | 316 | #define topology_physical_package_id(cpu) ((void)(cpu), -1) |
| 256 | #endif | 317 | #endif |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fe1b65ee1a8..431214b941ac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -62,6 +62,17 @@ DEFINE_PER_CPU(int, numa_node); | |||
| 62 | EXPORT_PER_CPU_SYMBOL(numa_node); | 62 | EXPORT_PER_CPU_SYMBOL(numa_node); |
| 63 | #endif | 63 | #endif |
| 64 | 64 | ||
| 65 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | ||
| 66 | /* | ||
| 67 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. | ||
| 68 | * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. | ||
| 69 | * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem() | ||
| 70 | * defined in <linux/topology.h>. | ||
| 71 | */ | ||
| 72 | DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */ | ||
| 73 | EXPORT_PER_CPU_SYMBOL(_numa_mem_); | ||
| 74 | #endif | ||
| 75 | |||
| 65 | /* | 76 | /* |
| 66 | * Array of node states. | 77 | * Array of node states. |
| 67 | */ | 78 | */ |
| @@ -2861,6 +2872,24 @@ static void build_zonelist_cache(pg_data_t *pgdat) | |||
| 2861 | zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); | 2872 | zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); |
| 2862 | } | 2873 | } |
| 2863 | 2874 | ||
| 2875 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | ||
| 2876 | /* | ||
| 2877 | * Return node id of node used for "local" allocations. | ||
| 2878 | * I.e., first node id of first zone in arg node's generic zonelist. | ||
| 2879 | * Used for initializing percpu 'numa_mem', which is used primarily | ||
| 2880 | * for kernel allocations, so use GFP_KERNEL flags to locate zonelist. | ||
| 2881 | */ | ||
| 2882 | int local_memory_node(int node) | ||
| 2883 | { | ||
| 2884 | struct zone *zone; | ||
| 2885 | |||
| 2886 | (void)first_zones_zonelist(node_zonelist(node, GFP_KERNEL), | ||
| 2887 | gfp_zone(GFP_KERNEL), | ||
| 2888 | NULL, | ||
| 2889 | &zone); | ||
| 2890 | return zone->node; | ||
| 2891 | } | ||
| 2892 | #endif | ||
| 2864 | 2893 | ||
| 2865 | #else /* CONFIG_NUMA */ | 2894 | #else /* CONFIG_NUMA */ |
| 2866 | 2895 | ||
| @@ -2975,9 +3004,23 @@ static __init_refok int __build_all_zonelists(void *data) | |||
| 2975 | * needs the percpu allocator in order to allocate its pagesets | 3004 | * needs the percpu allocator in order to allocate its pagesets |
| 2976 | * (a chicken-egg dilemma). | 3005 | * (a chicken-egg dilemma). |
| 2977 | */ | 3006 | */ |
| 2978 | for_each_possible_cpu(cpu) | 3007 | for_each_possible_cpu(cpu) { |
| 2979 | setup_pageset(&per_cpu(boot_pageset, cpu), 0); | 3008 | setup_pageset(&per_cpu(boot_pageset, cpu), 0); |
| 2980 | 3009 | ||
| 3010 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | ||
| 3011 | /* | ||
| 3012 | * We now know the "local memory node" for each node-- | ||
| 3013 | * i.e., the node of the first zone in the generic zonelist. | ||
| 3014 | * Set up numa_mem percpu variable for on-line cpus. During | ||
| 3015 | * boot, only the boot cpu should be on-line; we'll init the | ||
| 3016 | * secondary cpus' numa_mem as they come on-line. During | ||
| 3017 | * node/memory hotplug, we'll fixup all on-line cpus. | ||
| 3018 | */ | ||
| 3019 | if (cpu_online(cpu)) | ||
| 3020 | set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu))); | ||
| 3021 | #endif | ||
| 3022 | } | ||
| 3023 | |||
| 2981 | return 0; | 3024 | return 0; |
| 2982 | } | 3025 | } |
| 2983 | 3026 | ||
