aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cpuset.h
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-05-24 17:32:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:57 -0400
commitc0ff7453bb5c7c98e0885fb94279f2571946f280 (patch)
tree8bb2b169a5145f0496575dbd2f48bb4b1c83f819 /include/linux/cpuset.h
parent708c1bbc9d0c3e57f40501794d9b0eed29d10fce (diff)
cpuset,mm: fix no node to alloc memory when changing cpuset's mems
Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Paul Menage <menage@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Ravikiran Thirumalai <kiran@scalex86.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/cpuset.h')
-rw-r--r--include/linux/cpuset.h43
1 files changed, 43 insertions, 0 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index a73454aec333..20b51cab6593 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -86,9 +86,44 @@ extern void rebuild_sched_domains(void);
86 86
87extern void cpuset_print_task_mems_allowed(struct task_struct *p); 87extern void cpuset_print_task_mems_allowed(struct task_struct *p);
88 88
89/*
90 * reading current mems_allowed and mempolicy in the fastpath must protected
91 * by get_mems_allowed()
92 */
93static inline void get_mems_allowed(void)
94{
95 current->mems_allowed_change_disable++;
96
97 /*
98 * ensure that reading mems_allowed and mempolicy happens after the
99 * update of ->mems_allowed_change_disable.
100 *
101 * the write-side task finds ->mems_allowed_change_disable is not 0,
102 * and knows the read-side task is reading mems_allowed or mempolicy,
103 * so it will clear old bits lazily.
104 */
105 smp_mb();
106}
107
108static inline void put_mems_allowed(void)
109{
110 /*
111 * ensure that reading mems_allowed and mempolicy before reducing
112 * mems_allowed_change_disable.
113 *
114 * the write-side task will know that the read-side task is still
115 * reading mems_allowed or mempolicy, don't clears old bits in the
116 * nodemask.
117 */
118 smp_mb();
119 --ACCESS_ONCE(current->mems_allowed_change_disable);
120}
121
89static inline void set_mems_allowed(nodemask_t nodemask) 122static inline void set_mems_allowed(nodemask_t nodemask)
90{ 123{
124 task_lock(current);
91 current->mems_allowed = nodemask; 125 current->mems_allowed = nodemask;
126 task_unlock(current);
92} 127}
93 128
94#else /* !CONFIG_CPUSETS */ 129#else /* !CONFIG_CPUSETS */
@@ -187,6 +222,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
187{ 222{
188} 223}
189 224
225static inline void get_mems_allowed(void)
226{
227}
228
229static inline void put_mems_allowed(void)
230{
231}
232
190#endif /* !CONFIG_CPUSETS */ 233#endif /* !CONFIG_CPUSETS */
191 234
192#endif /* _LINUX_CPUSET_H */ 235#endif /* _LINUX_CPUSET_H */