aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-09-29 05:01:16 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 12:18:21 -0400
commit38837fc75acb7fa9b0e111b0241fe4fe76c5d4b3 (patch)
tree51508cbc49527e35921efb4ba31bca7da9795ad2
parentaf3ffa6758dbd2ab7ebe62dddf66b3aa94d64eeb (diff)
[PATCH] cpuset: top_cpuset tracks hotplug changes to node_online_map
Change the list of memory nodes allowed to tasks in the top (root) nodeset to dynamically track what cpus are online, using a call to a cpuset hook from the memory hotplug code. Make this top cpus file read-only. On systems that have cpusets configured in their kernel, but that aren't actively using cpusets (for some distros, this covers the majority of systems) all tasks end up in the top cpuset. If that system does support memory hotplug, then these tasks cannot make use of memory nodes that are added after system boot, because the memory nodes are not allowed in the top cpuset. This is a surprising regression over earlier kernels that didn't have cpusets enabled. One key motivation for this change is to remain consistent with the behaviour for the top_cpuset's 'cpus', which is also read-only, and which automatically tracks the cpu_online_map. This change also has the minor benefit that it fixes a long standing, little noticed, minor bug in cpusets. The cpuset performance tweak to short circuit the cpuset_zone_allowed() check on systems with just a single cpuset (see 'number_of_cpusets', in linux/cpuset.h) meant that simply changing the 'mems' of the top_cpuset had no affect, even though the change (the write system call) appeared to succeed. With the following change, that write to the 'mems' file fails -EACCES, and the 'mems' file stubbornly refuses to be changed via user space writes. Thus no one should be mislead into thinking they've changed the top_cpusets's 'mems' when in affect they haven't. In order to keep the behaviour of cpusets consistent between systems actively making use of them and systems not using them, this patch changes the behaviour of the 'mems' file in the top (root) cpuset, making it read only, and making it automatically track the value of node_online_map. Thus tasks in the top cpuset will have automatic use of hot plugged memory nodes allowed by their cpuset. [akpm@osdl.org: build fix] [bunk@stusta.de: build fix] Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/cpusets.txt10
-rw-r--r--include/linux/cpuset.h4
-rw-r--r--kernel/cpuset.c28
-rw-r--r--mm/memory_hotplug.c3
4 files changed, 37 insertions, 8 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 76b44290c154..842f0d1ab216 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,11 +217,11 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
217to represent the cpuset hierarchy provides for a familiar permission 217to represent the cpuset hierarchy provides for a familiar permission
218and name space for cpusets, with a minimum of additional kernel code. 218and name space for cpusets, with a minimum of additional kernel code.
219 219
220The cpus file in the root (top_cpuset) cpuset is read-only. 220The cpus and mems files in the root (top_cpuset) cpuset are
221It automatically tracks the value of cpu_online_map, using a CPU 221read-only. The cpus file automatically tracks the value of
222hotplug notifier. If and when memory nodes can be hotplugged, 222cpu_online_map using a CPU hotplug notifier, and the mems file
223we expect to make the mems file in the root cpuset read-only 223automatically tracks the value of node_online_map using the
224as well, and have it track the value of node_online_map. 224cpuset_track_online_nodes() hook.
225 225
226 226
2271.4 What are exclusive cpusets ? 2271.4 What are exclusive cpusets ?
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 9354722a9217..4d8adf663681 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -63,6 +63,8 @@ static inline int cpuset_do_slab_mem_spread(void)
63 return current->flags & PF_SPREAD_SLAB; 63 return current->flags & PF_SPREAD_SLAB;
64} 64}
65 65
66extern void cpuset_track_online_nodes(void);
67
66#else /* !CONFIG_CPUSETS */ 68#else /* !CONFIG_CPUSETS */
67 69
68static inline int cpuset_init_early(void) { return 0; } 70static inline int cpuset_init_early(void) { return 0; }
@@ -126,6 +128,8 @@ static inline int cpuset_do_slab_mem_spread(void)
126 return 0; 128 return 0;
127} 129}
128 130
131static inline void cpuset_track_online_nodes(void) {}
132
129#endif /* !CONFIG_CPUSETS */ 133#endif /* !CONFIG_CPUSETS */
130 134
131#endif /* _LINUX_CPUSET_H */ 135#endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 584bb4e6c042..794af5024c2f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -912,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
912 int fudge; 912 int fudge;
913 int retval; 913 int retval;
914 914
915 /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
916 if (cs == &top_cpuset)
917 return -EACCES;
918
915 trialcs = *cs; 919 trialcs = *cs;
916 retval = nodelist_parse(buf, trialcs.mems_allowed); 920 retval = nodelist_parse(buf, trialcs.mems_allowed);
917 if (retval < 0) 921 if (retval < 0)
@@ -2042,9 +2046,8 @@ out:
2042 * (of no affect) on systems that are actively using CPU hotplug 2046 * (of no affect) on systems that are actively using CPU hotplug
2043 * but making no active use of cpusets. 2047 * but making no active use of cpusets.
2044 * 2048 *
2045 * This handles CPU hotplug (cpuhp) events. If someday Memory 2049 * This routine ensures that top_cpuset.cpus_allowed tracks
2046 * Nodes can be hotplugged (dynamically changing node_online_map) 2050 * cpu_online_map on each CPU hotplug (cpuhp) event.
2047 * then we should handle that too, perhaps in a similar way.
2048 */ 2051 */
2049 2052
2050#ifdef CONFIG_HOTPLUG_CPU 2053#ifdef CONFIG_HOTPLUG_CPU
@@ -2063,6 +2066,25 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
2063} 2066}
2064#endif 2067#endif
2065 2068
2069/*
2070 * Keep top_cpuset.mems_allowed tracking node_online_map.
2071 * Call this routine anytime after you change node_online_map.
2072 * See also the previous routine cpuset_handle_cpuhp().
2073 */
2074
2075#ifdef CONFIG_MEMORY_HOTPLUG
2076void cpuset_track_online_nodes()
2077{
2078 mutex_lock(&manage_mutex);
2079 mutex_lock(&callback_mutex);
2080
2081 top_cpuset.mems_allowed = node_online_map;
2082
2083 mutex_unlock(&callback_mutex);
2084 mutex_unlock(&manage_mutex);
2085}
2086#endif
2087
2066/** 2088/**
2067 * cpuset_init_smp - initialize cpus_allowed 2089 * cpuset_init_smp - initialize cpus_allowed
2068 * 2090 *
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c37319542b70..9576ed920c0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
21#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/ioport.h> 23#include <linux/ioport.h>
24#include <linux/cpuset.h>
24 25
25#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
26 27
@@ -283,6 +284,8 @@ int add_memory(int nid, u64 start, u64 size)
283 /* we online node here. we can't roll back from here. */ 284 /* we online node here. we can't roll back from here. */
284 node_set_online(nid); 285 node_set_online(nid);
285 286
287 cpuset_track_online_nodes();
288
286 if (new_pgdat) { 289 if (new_pgdat) {
287 ret = register_one_node(nid); 290 ret = register_one_node(nid);
288 /* 291 /*