diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2014-06-04 19:11:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 19:54:13 -0400 |
commit | 3ba08129e38437561df44c36b7ea9081185d5333 (patch) | |
tree | 01835605410c1a55e47e44304e075e01a28ddca2 /mm/memory-failure.c | |
parent | 74614de17db6fb472370c426d4f934d8d616edf2 (diff) |
mm/memory-failure.c: support use of a dedicated thread to handle SIGBUS(BUS_MCEERR_AO)
Currently memory error handler handles action optional errors in the
deferred manner by default. And if a recovery aware application wants
to handle it immediately, it can do it by setting PF_MCE_EARLY flag.
However, such signal can be sent only to the main thread, so it's
problematic if the application wants to have a dedicated thread to
handler such signals.
So this patch adds dedicated thread support to memory error handler. We
have PF_MCE_EARLY flags for each thread separately, so with this patch
AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main
thread. If you want to implement a dedicated thread, you call prctl()
to set PF_MCE_EARLY on the thread.
Memory error handler collects processes to be killed, so this patch lets
it check PF_MCE_EARLY flag on each thread in the collecting routines.
No behavioral change for all non-early kill cases.
Tony said:
: The old behavior was crazy - someone with a multithreaded process might
: well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then
: that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if
: that thread wasn't the main thread for the process.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: Kamil Iskra <iskra@mcs.anl.gov>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Chen Gong <gong.chen@linux.jf.intel.com>
Cc: <stable@vger.kernel.org> [3.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 56 |
1 files changed, 43 insertions, 13 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ed339c505d55..cd8989c1027e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -380,15 +380,44 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, | |||
380 | } | 380 | } |
381 | } | 381 | } |
382 | 382 | ||
383 | static int task_early_kill(struct task_struct *tsk, int force_early) | 383 | /* |
384 | * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) | ||
385 | * on behalf of the thread group. Return task_struct of the (first found) | ||
386 | * dedicated thread if found, and return NULL otherwise. | ||
387 | * | ||
388 | * We already hold read_lock(&tasklist_lock) in the caller, so we don't | ||
389 | * have to call rcu_read_lock/unlock() in this function. | ||
390 | */ | ||
391 | static struct task_struct *find_early_kill_thread(struct task_struct *tsk) | ||
392 | { | ||
393 | struct task_struct *t; | ||
394 | |||
395 | for_each_thread(tsk, t) | ||
396 | if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) | ||
397 | return t; | ||
398 | return NULL; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * Determine whether a given process is "early kill" process which expects | ||
403 | * to be signaled when some page under the process is hwpoisoned. | ||
404 | * Return task_struct of the dedicated thread (main thread unless explicitly | ||
405 | * specified) if the process is "early kill," and otherwise returns NULL. | ||
406 | */ | ||
407 | static struct task_struct *task_early_kill(struct task_struct *tsk, | ||
408 | int force_early) | ||
384 | { | 409 | { |
410 | struct task_struct *t; | ||
385 | if (!tsk->mm) | 411 | if (!tsk->mm) |
386 | return 0; | 412 | return NULL; |
387 | if (force_early) | 413 | if (force_early) |
388 | return 1; | 414 | return tsk; |
389 | if (tsk->flags & PF_MCE_PROCESS) | 415 | t = find_early_kill_thread(tsk); |
390 | return !!(tsk->flags & PF_MCE_EARLY); | 416 | if (t) |
391 | return sysctl_memory_failure_early_kill; | 417 | return t; |
418 | if (sysctl_memory_failure_early_kill) | ||
419 | return tsk; | ||
420 | return NULL; | ||
392 | } | 421 | } |
393 | 422 | ||
394 | /* | 423 | /* |
@@ -410,16 +439,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
410 | read_lock(&tasklist_lock); | 439 | read_lock(&tasklist_lock); |
411 | for_each_process (tsk) { | 440 | for_each_process (tsk) { |
412 | struct anon_vma_chain *vmac; | 441 | struct anon_vma_chain *vmac; |
442 | struct task_struct *t = task_early_kill(tsk, force_early); | ||
413 | 443 | ||
414 | if (!task_early_kill(tsk, force_early)) | 444 | if (!t) |
415 | continue; | 445 | continue; |
416 | anon_vma_interval_tree_foreach(vmac, &av->rb_root, | 446 | anon_vma_interval_tree_foreach(vmac, &av->rb_root, |
417 | pgoff, pgoff) { | 447 | pgoff, pgoff) { |
418 | vma = vmac->vma; | 448 | vma = vmac->vma; |
419 | if (!page_mapped_in_vma(page, vma)) | 449 | if (!page_mapped_in_vma(page, vma)) |
420 | continue; | 450 | continue; |
421 | if (vma->vm_mm == tsk->mm) | 451 | if (vma->vm_mm == t->mm) |
422 | add_to_kill(tsk, page, vma, to_kill, tkc); | 452 | add_to_kill(t, page, vma, to_kill, tkc); |
423 | } | 453 | } |
424 | } | 454 | } |
425 | read_unlock(&tasklist_lock); | 455 | read_unlock(&tasklist_lock); |
@@ -440,10 +470,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, | |||
440 | read_lock(&tasklist_lock); | 470 | read_lock(&tasklist_lock); |
441 | for_each_process(tsk) { | 471 | for_each_process(tsk) { |
442 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 472 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
473 | struct task_struct *t = task_early_kill(tsk, force_early); | ||
443 | 474 | ||
444 | if (!task_early_kill(tsk, force_early)) | 475 | if (!t) |
445 | continue; | 476 | continue; |
446 | |||
447 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, | 477 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, |
448 | pgoff) { | 478 | pgoff) { |
449 | /* | 479 | /* |
@@ -453,8 +483,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, | |||
453 | * Assume applications who requested early kill want | 483 | * Assume applications who requested early kill want |
454 | * to be informed of all such data corruptions. | 484 | * to be informed of all such data corruptions. |
455 | */ | 485 | */ |
456 | if (vma->vm_mm == tsk->mm) | 486 | if (vma->vm_mm == t->mm) |
457 | add_to_kill(tsk, page, vma, to_kill, tkc); | 487 | add_to_kill(t, page, vma, to_kill, tkc); |
458 | } | 488 | } |
459 | } | 489 | } |
460 | read_unlock(&tasklist_lock); | 490 | read_unlock(&tasklist_lock); |