aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-09-16 05:50:14 -0400
committerAndi Kleen <ak@linux.intel.com>2009-09-16 05:50:14 -0400
commit4db96cf077aa938b11fe7ac79ecc9b29ec00fbab (patch)
treeec196e85769159f29952e34305795b47513639a0
parent6746aff74da293b5fd24e5c68b870b721e86cd5f (diff)
HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per process
This allows processes to override their early/late kill behaviour on hardware memory errors. Typically applications which are memory error aware is better of with early kill (see the error as soon as possible), all others with late kill (only see the error when the error is really impacting execution) There's a global sysctl, but this way an application can set its specific policy. We're using two bits, one to signify that the process stated its intention and that I also made the prctl future proof by enforcing the unused arguments are 0. The state is inherited to children. Note this makes us officially run out of process flags on 32bit, but the next patch can easily add another field. Manpage patch will be supplied separately. Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r--include/linux/prctl.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sys.c22
3 files changed, 26 insertions, 0 deletions
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c79c63..3dc303197e67 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
88#define PR_TASK_PERF_COUNTERS_DISABLE 31 88#define PR_TASK_PERF_COUNTERS_DISABLE 31
89#define PR_TASK_PERF_COUNTERS_ENABLE 32 89#define PR_TASK_PERF_COUNTERS_ENABLE 32
90 90
91#define PR_MCE_KILL 33
92
91#endif /* _LINUX_PRCTL_H */ 93#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..29eae73c951d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1687,6 +1687,7 @@ extern cputime_t task_gtime(struct task_struct *p);
1687#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1687#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
1688#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ 1688#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
1689#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ 1689#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
1690#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
1690#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ 1691#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
1691#define PF_DUMPCORE 0x00000200 /* dumped core */ 1692#define PF_DUMPCORE 0x00000200 /* dumped core */
1692#define PF_SIGNALED 0x00000400 /* killed by a signal */ 1693#define PF_SIGNALED 0x00000400 /* killed by a signal */
@@ -1706,6 +1707,7 @@ extern cputime_t task_gtime(struct task_struct *p);
1706#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1707#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1707#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1708#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1708#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ 1709#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
1710#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1709#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 1711#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1710#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1712#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1711#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ 1713#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097c76fa..41e02eff3398 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1528,6 +1528,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1528 current->timer_slack_ns = arg2; 1528 current->timer_slack_ns = arg2;
1529 error = 0; 1529 error = 0;
1530 break; 1530 break;
1531 case PR_MCE_KILL:
1532 if (arg4 | arg5)
1533 return -EINVAL;
1534 switch (arg2) {
1535 case 0:
1536 if (arg3 != 0)
1537 return -EINVAL;
1538 current->flags &= ~PF_MCE_PROCESS;
1539 break;
1540 case 1:
1541 current->flags |= PF_MCE_PROCESS;
1542 if (arg3 != 0)
1543 current->flags |= PF_MCE_EARLY;
1544 else
1545 current->flags &= ~PF_MCE_EARLY;
1546 break;
1547 default:
1548 return -EINVAL;
1549 }
1550 error = 0;
1551 break;
1552
1531 default: 1553 default:
1532 error = -EINVAL; 1554 error = -EINVAL;
1533 break; 1555 break;