rlimits: Print more information when CPU/RT limits are exceeded

When a process is sent a SIGKILL because it exceeded CPU or RT limits, the cause may not be obvious in userspace -- daemonised processes just get killed, and even foreground process just see a 'Killed' message. The lack of any information on why this might be happening in logs can be confusing to users who are not aware of this mechanism. Add messages which dump the process name and tid in dmesg when a process exceeds its CPU or RT limits (soft and hard) in order to make it clearer to people debugging such issues. Signed-off-by: Arun Raghavan <arun@arunraghavan.net> Link: http://lkml.kernel.org/r/20170301145309.27214-1-arun@arunraghavan.net Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Arun Raghavan <arun@arunraghavan.net> 2017-03-01 09:53:09 -0500
committer: Thomas Gleixner <tglx@linutronix.de> 2017-03-13 16:32:15 -0400
commit: e7ea7c9806a2681807257ea89085339d33f7fa0b (patch)
tree: 10c31f31e93c29475e2a443db27109238f89aa6b /kernel/time
parent: 4495c08e84729385774601b5146d51d9e5849f81 (diff)
1 files changed, 7 insertions, 2 deletions
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 4513ad16a253..76bea3a47d4b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -825,6 +825,8 @@ static void check_thread_timers(struct task_struct *tsk,
                         * At the hard limit, we just die.
                         * No need to calculate anything else now.
                         */
+                        pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
+                                tsk->comm, task_pid_nr(tsk));
                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
                        return;
                }
@@ -836,8 +838,7 @@ static void check_thread_timers(struct task_struct *tsk,
                                soft += USEC_PER_SEC;
                                sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
                        }
-                        printk(KERN_INFO
+                        pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
-                                "RT Watchdog Timeout: %s[%d]\n",
                                tsk->comm, task_pid_nr(tsk));
                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
                }
@@ -935,6 +936,8 @@ static void check_process_timers(struct task_struct *tsk,
                         * At the hard limit, we just die.
                         * No need to calculate anything else now.
                         */
+                        pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
+                                tsk->comm, task_pid_nr(tsk));
                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
                        return;
                }
@@ -942,6 +945,8 @@ static void check_process_timers(struct task_struct *tsk,
                        /*
                         * At the soft limit, send a SIGXCPU every second.
                         */
+                        pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
+                                tsk->comm, task_pid_nr(tsk));
                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
                        if (soft < hard) {
                                soft++;
author	Arun Raghavan <arun@arunraghavan.net>	2017-03-01 09:53:09 -0500
committer	Thomas Gleixner <tglx@linutronix.de>	2017-03-13 16:32:15 -0400
commit	e7ea7c9806a2681807257ea89085339d33f7fa0b (patch)
tree	10c31f31e93c29475e2a443db27109238f89aa6b /kernel/time
parent	4495c08e84729385774601b5146d51d9e5849f81 (diff)