From 58568d2a8215cb6f55caf2332017d7bdff954e1c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 16 Jun 2009 15:31:49 -0700 Subject: cpuset,mm: update tasks' mems_allowed in time Fix allocating page cache/slab object on the unallowed node when memory spread is set by updating tasks' mems_allowed after its cpuset's mems is changed. In order to update tasks' mems_allowed in time, we must modify the code of memory policy. Because the memory policy is applied in the process's context originally. After applying this patch, one task directly manipulates anothers mems_allowed, and we use alloc_lock in the task_struct to protect mems_allowed and memory policy of the task. But in the fast path, we didn't use lock to protect them, because adding a lock may lead to performance regression. But if we don't add a lock,the task might see no nodes when changing cpuset's mems_allowed to some non-overlapping set. In order to avoid it, we set all new allowed nodes, then clear newly disallowed ones. [lee.schermerhorn@hp.com: The rework of mpol_new() to extract the adjusting of the node mask to apply cpuset and mpol flags "context" breaks set_mempolicy() and mbind() with MPOL_PREFERRED and a NULL nodemask--i.e., explicit local allocation. Fix this by adding the check for MPOL_PREFERRED and empty node mask to mpol_new_mpolicy(). Remove the now unneeded 'nodes = NULL' from mpol_new(). Note that mpol_new_mempolicy() is always called with a non-NULL 'nodes' parameter now that it has been removed from mpol_new(). Therefore, we don't need to test nodes for NULL before testing it for 'empty'. However, just to be extra paranoid, add a VM_BUG_ON() to verify this assumption.] [lee.schermerhorn@hp.com: I don't think the function name 'mpol_new_mempolicy' is descriptive enough to differentiate it from mpol_new(). This function applies cpuset set context, usually constraining nodes to those allowed by the cpuset. However, when the 'RELATIVE_NODES flag is set, it also translates the nodes. So I settled on 'mpol_set_nodemask()', because the comment block for mpol_new() mentions that we need to call this function to "set nodes". Some additional minor line length, whitespace and typo cleanup.] Signed-off-by: Miao Xie Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Paul Menage Cc: Nick Piggin Cc: Yasunori Goto Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 13 +++++++++---- include/linux/sched.h | 8 ++++---- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 05ea1dd7d681..a5740fc4d04b 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -18,7 +18,6 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ -extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); @@ -27,7 +26,6 @@ extern void cpuset_cpus_allowed_locked(struct task_struct *p, extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); -void cpuset_update_task_memory_state(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); @@ -92,9 +90,13 @@ extern void rebuild_sched_domains(void); extern void cpuset_print_task_mems_allowed(struct task_struct *p); +static inline void set_mems_allowed(nodemask_t nodemask) +{ + current->mems_allowed = nodemask; +} + #else /* !CONFIG_CPUSETS */ -static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} @@ -116,7 +118,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} -static inline void cpuset_update_task_memory_state(void) {} static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { @@ -188,6 +189,10 @@ static inline void cpuset_print_task_mems_allowed(struct task_struct *p) { } +static inline void set_mems_allowed(nodemask_t nodemask) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c900aa530070..1048bf50540a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1318,7 +1318,8 @@ struct task_struct { /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ +/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, + * mempolicy */ spinlock_t alloc_lock; #ifdef CONFIG_GENERIC_HARDIRQS @@ -1386,8 +1387,7 @@ struct task_struct { cputime_t acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; - int cpuset_mems_generation; + nodemask_t mems_allowed; /* Protected by alloc_lock */ int cpuset_mem_spread_rotor; #endif #ifdef CONFIG_CGROUPS @@ -1410,7 +1410,7 @@ struct task_struct { struct list_head perf_counter_list; #endif #ifdef CONFIG_NUMA - struct mempolicy *mempolicy; + struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; #endif atomic_t fs_excl; /* holding fs exclusive resources */ -- cgit v1.2.2