diff options
author | Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> | 2013-11-12 18:06:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 22:08:59 -0500 |
commit | 786235eeba0e1e85e5cbbb9f97d1087ad03dfa21 (patch) | |
tree | e529b2b8f6a64889ec8c385cdca64f5c1d8e1c19 /kernel | |
parent | 10d0c9705e80bbd3d587c5fad24599aabaca6688 (diff) |
kthread: make kthread_create() killable
Any user process callers of wait_for_completion() except global init
process might be chosen by the OOM killer while waiting for completion()
call by some other process which does memory allocation. See
CVE-2012-4398 "kernel: request_module() OOM local DoS" can happen.
When such users are chosen by the OOM killer when they are waiting for
completion() in TASK_UNINTERRUPTIBLE, the system will be kept stressed
due to memory starvation because the OOM killer cannot kill such users.
kthread_create() is one of such users and this patch fixes the problem
for kthreadd by making kthread_create() killable - the same approach
used for fixing CVE-2012-4398.
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/kthread.c | 73 |
1 files changed, 55 insertions, 18 deletions
diff --git a/kernel/kthread.c b/kernel/kthread.c index 760e86df8c20..b5ae3ee860a9 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -33,7 +33,7 @@ struct kthread_create_info | |||
33 | 33 | ||
34 | /* Result passed back to kthread_create() from kthreadd. */ | 34 | /* Result passed back to kthread_create() from kthreadd. */ |
35 | struct task_struct *result; | 35 | struct task_struct *result; |
36 | struct completion done; | 36 | struct completion *done; |
37 | 37 | ||
38 | struct list_head list; | 38 | struct list_head list; |
39 | }; | 39 | }; |
@@ -178,6 +178,7 @@ static int kthread(void *_create) | |||
178 | struct kthread_create_info *create = _create; | 178 | struct kthread_create_info *create = _create; |
179 | int (*threadfn)(void *data) = create->threadfn; | 179 | int (*threadfn)(void *data) = create->threadfn; |
180 | void *data = create->data; | 180 | void *data = create->data; |
181 | struct completion *done; | ||
181 | struct kthread self; | 182 | struct kthread self; |
182 | int ret; | 183 | int ret; |
183 | 184 | ||
@@ -187,10 +188,16 @@ static int kthread(void *_create) | |||
187 | init_completion(&self.parked); | 188 | init_completion(&self.parked); |
188 | current->vfork_done = &self.exited; | 189 | current->vfork_done = &self.exited; |
189 | 190 | ||
191 | /* If user was SIGKILLed, I release the structure. */ | ||
192 | done = xchg(&create->done, NULL); | ||
193 | if (!done) { | ||
194 | kfree(create); | ||
195 | do_exit(-EINTR); | ||
196 | } | ||
190 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 197 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
191 | __set_current_state(TASK_UNINTERRUPTIBLE); | 198 | __set_current_state(TASK_UNINTERRUPTIBLE); |
192 | create->result = current; | 199 | create->result = current; |
193 | complete(&create->done); | 200 | complete(done); |
194 | schedule(); | 201 | schedule(); |
195 | 202 | ||
196 | ret = -EINTR; | 203 | ret = -EINTR; |
@@ -223,8 +230,15 @@ static void create_kthread(struct kthread_create_info *create) | |||
223 | /* We want our own signal handler (we take no signals by default). */ | 230 | /* We want our own signal handler (we take no signals by default). */ |
224 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 231 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
225 | if (pid < 0) { | 232 | if (pid < 0) { |
233 | /* If user was SIGKILLed, I release the structure. */ | ||
234 | struct completion *done = xchg(&create->done, NULL); | ||
235 | |||
236 | if (!done) { | ||
237 | kfree(create); | ||
238 | return; | ||
239 | } | ||
226 | create->result = ERR_PTR(pid); | 240 | create->result = ERR_PTR(pid); |
227 | complete(&create->done); | 241 | complete(done); |
228 | } | 242 | } |
229 | } | 243 | } |
230 | 244 | ||
@@ -255,36 +269,59 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), | |||
255 | const char namefmt[], | 269 | const char namefmt[], |
256 | ...) | 270 | ...) |
257 | { | 271 | { |
258 | struct kthread_create_info create; | 272 | DECLARE_COMPLETION_ONSTACK(done); |
259 | 273 | struct task_struct *task; | |
260 | create.threadfn = threadfn; | 274 | struct kthread_create_info *create = kmalloc(sizeof(*create), |
261 | create.data = data; | 275 | GFP_KERNEL); |
262 | create.node = node; | 276 | |
263 | init_completion(&create.done); | 277 | if (!create) |
278 | return ERR_PTR(-ENOMEM); | ||
279 | create->threadfn = threadfn; | ||
280 | create->data = data; | ||
281 | create->node = node; | ||
282 | create->done = &done; | ||
264 | 283 | ||
265 | spin_lock(&kthread_create_lock); | 284 | spin_lock(&kthread_create_lock); |
266 | list_add_tail(&create.list, &kthread_create_list); | 285 | list_add_tail(&create->list, &kthread_create_list); |
267 | spin_unlock(&kthread_create_lock); | 286 | spin_unlock(&kthread_create_lock); |
268 | 287 | ||
269 | wake_up_process(kthreadd_task); | 288 | wake_up_process(kthreadd_task); |
270 | wait_for_completion(&create.done); | 289 | /* |
271 | 290 | * Wait for completion in killable state, for I might be chosen by | |
272 | if (!IS_ERR(create.result)) { | 291 | * the OOM killer while kthreadd is trying to allocate memory for |
292 | * new kernel thread. | ||
293 | */ | ||
294 | if (unlikely(wait_for_completion_killable(&done))) { | ||
295 | /* | ||
296 | * If I was SIGKILLed before kthreadd (or new kernel thread) | ||
297 | * calls complete(), leave the cleanup of this structure to | ||
298 | * that thread. | ||
299 | */ | ||
300 | if (xchg(&create->done, NULL)) | ||
301 | return ERR_PTR(-ENOMEM); | ||
302 | /* | ||
303 | * kthreadd (or new kernel thread) will call complete() | ||
304 | * shortly. | ||
305 | */ | ||
306 | wait_for_completion(&done); | ||
307 | } | ||
308 | task = create->result; | ||
309 | if (!IS_ERR(task)) { | ||
273 | static const struct sched_param param = { .sched_priority = 0 }; | 310 | static const struct sched_param param = { .sched_priority = 0 }; |
274 | va_list args; | 311 | va_list args; |
275 | 312 | ||
276 | va_start(args, namefmt); | 313 | va_start(args, namefmt); |
277 | vsnprintf(create.result->comm, sizeof(create.result->comm), | 314 | vsnprintf(task->comm, sizeof(task->comm), namefmt, args); |
278 | namefmt, args); | ||
279 | va_end(args); | 315 | va_end(args); |
280 | /* | 316 | /* |
281 | * root may have changed our (kthreadd's) priority or CPU mask. | 317 | * root may have changed our (kthreadd's) priority or CPU mask. |
282 | * The kernel thread should not inherit these properties. | 318 | * The kernel thread should not inherit these properties. |
283 | */ | 319 | */ |
284 | sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); | 320 | sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); |
285 | set_cpus_allowed_ptr(create.result, cpu_all_mask); | 321 | set_cpus_allowed_ptr(task, cpu_all_mask); |
286 | } | 322 | } |
287 | return create.result; | 323 | kfree(create); |
324 | return task; | ||
288 | } | 325 | } |
289 | EXPORT_SYMBOL(kthread_create_on_node); | 326 | EXPORT_SYMBOL(kthread_create_on_node); |
290 | 327 | ||