aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-01-14 16:21:06 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-14 21:27:10 -0500
commit505970b96e3b7d22177c38e03435a68376628e7a (patch)
tree5508317e391961355bf3d946a6aac05bb21569eb
parented68cb3676bb179768529aeb808403d57295af56 (diff)
[PATCH] cpuset oom lock fix
The problem, reported in: http://bugzilla.kernel.org/show_bug.cgi?id=5859 and by various other email messages and lkml posts is that the cpuset hook in the oom (out of memory) code can try to take a cpuset semaphore while holding the tasklist_lock (a spinlock). One must not sleep while holding a spinlock. The fix seems easy enough - move the cpuset semaphore region outside the tasklist_lock region. This required a few lines of mechanism to implement. The oom code where the locking needs to be changed does not have access to the cpuset locks, which are internal to kernel/cpuset.c only. So I provided a couple more cpuset interface routines, available to the rest of the kernel, which simple take and drop the lock needed here (cpusets callback_sem). Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--kernel/cpuset.c33
-rw-r--r--mm/oom_kill.c3
3 files changed, 37 insertions, 5 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index c472f972bd6d..3bc606927116 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -48,6 +48,9 @@ extern void __cpuset_memory_pressure_bump(void);
48extern struct file_operations proc_cpuset_operations; 48extern struct file_operations proc_cpuset_operations;
49extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); 49extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
50 50
51extern void cpuset_lock(void);
52extern void cpuset_unlock(void);
53
51#else /* !CONFIG_CPUSETS */ 54#else /* !CONFIG_CPUSETS */
52 55
53static inline int cpuset_init_early(void) { return 0; } 56static inline int cpuset_init_early(void) { return 0; }
@@ -93,6 +96,9 @@ static inline char *cpuset_task_status_allowed(struct task_struct *task,
93 return buffer; 96 return buffer;
94} 97}
95 98
99static inline void cpuset_lock(void) {}
100static inline void cpuset_unlock(void) {}
101
96#endif /* !CONFIG_CPUSETS */ 102#endif /* !CONFIG_CPUSETS */
97 103
98#endif /* _LINUX_CPUSET_H */ 104#endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d4b6bd7d74e5..fe2f71f92ae0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2150,6 +2150,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2150} 2150}
2151 2151
2152/** 2152/**
2153 * cpuset_lock - lock out any changes to cpuset structures
2154 *
2155 * The out of memory (oom) code needs to lock down cpusets
2156 * from being changed while it scans the tasklist looking for a
2157 * task in an overlapping cpuset. Expose callback_sem via this
2158 * cpuset_lock() routine, so the oom code can lock it, before
2159 * locking the task list. The tasklist_lock is a spinlock, so
2160 * must be taken inside callback_sem.
2161 */
2162
2163void cpuset_lock(void)
2164{
2165 down(&callback_sem);
2166}
2167
2168/**
2169 * cpuset_unlock - release lock on cpuset changes
2170 *
2171 * Undo the lock taken in a previous cpuset_lock() call.
2172 */
2173
2174void cpuset_unlock(void)
2175{
2176 up(&callback_sem);
2177}
2178
2179/**
2153 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? 2180 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
2154 * @p: pointer to task_struct of some other task. 2181 * @p: pointer to task_struct of some other task.
2155 * 2182 *
@@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2158 * determine if task @p's memory usage might impact the memory 2185 * determine if task @p's memory usage might impact the memory
2159 * available to the current task. 2186 * available to the current task.
2160 * 2187 *
2161 * Acquires callback_sem - not suitable for calling from a fast path. 2188 * Call while holding callback_sem.
2162 **/ 2189 **/
2163 2190
2164int cpuset_excl_nodes_overlap(const struct task_struct *p) 2191int cpuset_excl_nodes_overlap(const struct task_struct *p)
@@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
2166 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 2193 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
2167 int overlap = 0; /* do cpusets overlap? */ 2194 int overlap = 0; /* do cpusets overlap? */
2168 2195
2169 down(&callback_sem);
2170
2171 task_lock(current); 2196 task_lock(current);
2172 if (current->flags & PF_EXITING) { 2197 if (current->flags & PF_EXITING) {
2173 task_unlock(current); 2198 task_unlock(current);
@@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
2186 2211
2187 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 2212 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
2188done: 2213done:
2189 up(&callback_sem);
2190
2191 return overlap; 2214 return overlap;
2192} 2215}
2193 2216
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4748b906aff2..14bd4ec79597 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -274,6 +274,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
274 show_mem(); 274 show_mem();
275 } 275 }
276 276
277 cpuset_lock();
277 read_lock(&tasklist_lock); 278 read_lock(&tasklist_lock);
278retry: 279retry:
279 p = select_bad_process(); 280 p = select_bad_process();
@@ -284,6 +285,7 @@ retry:
284 /* Found nothing?!?! Either we hang forever, or we panic. */ 285 /* Found nothing?!?! Either we hang forever, or we panic. */
285 if (!p) { 286 if (!p) {
286 read_unlock(&tasklist_lock); 287 read_unlock(&tasklist_lock);
288 cpuset_unlock();
287 panic("Out of memory and no killable processes...\n"); 289 panic("Out of memory and no killable processes...\n");
288 } 290 }
289 291
@@ -293,6 +295,7 @@ retry:
293 295
294 out: 296 out:
295 read_unlock(&tasklist_lock); 297 read_unlock(&tasklist_lock);
298 cpuset_unlock();
296 if (mm) 299 if (mm)
297 mmput(mm); 300 mmput(mm);
298 301