aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeerthy <j-keerthy@ti.com>2017-04-18 00:29:59 -0400
committerZhang Rui <rui.zhang@intel.com>2017-05-05 04:01:45 -0400
commitef1d87e06ab4d3f9a95f02517ecc50902dc233a7 (patch)
tree6dd3327394c0cb14fa964a3a8baf1c51b6d0e337
parente441fd68663e298e99a99e215e0144a0eda6250d (diff)
thermal: core: Add a back up thermal shutdown mechanism
orderly_poweroff is triggered when a graceful shutdown of system is desired. This may be used in many critical states of the kernel such as when subsystems detects conditions such as critical temperature conditions. However, in certain conditions in system boot up sequences like those in the middle of driver probes being initiated, userspace will be unable to power off the system in a clean manner and leaves the system in a critical state. In cases like these, the /sbin/poweroff will return success (having forked off to attempt powering off the system. However, the system overall will fail to completely poweroff (since other modules will be probed) and the system is still functional with no userspace (since that would have shut itself off). However, there is no clean way of detecting such failure of userspace powering off the system. In such scenarios, it is necessary for a backup workqueue to be able to force a shutdown of the system when orderly shutdown is not successful after a configurable time period. Reported-by: Nishanth Menon <nm@ti.com> Signed-off-by: Keerthy <j-keerthy@ti.com> Acked-by: Eduardo Valentin <edubezval@gmail.com> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
-rw-r--r--Documentation/thermal/sysfs-api.txt21
-rw-r--r--drivers/thermal/Kconfig17
-rw-r--r--drivers/thermal/thermal_core.c53
3 files changed, 91 insertions, 0 deletions
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index ef473dc7f55e..bb9a0a53e76b 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy.
582This function serves as an arbitrator to set the state of a cooling 582This function serves as an arbitrator to set the state of a cooling
583device. It sets the cooling device to the deepest cooling state if 583device. It sets the cooling device to the deepest cooling state if
584possible. 584possible.
585
5866. thermal_emergency_poweroff:
587
588On an event of critical trip temperature crossing. Thermal framework
589allows the system to shutdown gracefully by calling orderly_poweroff().
590In the event of a failure of orderly_poweroff() to shut down the system
591we are in danger of keeping the system alive at undesirably high
592temperatures. To mitigate this high risk scenario we program a work
593queue to fire after a pre-determined number of seconds to start
594an emergency shutdown of the device using the kernel_power_off()
595function. In case kernel_power_off() fails then finally
596emergency_restart() is called in the worst case.
597
598The delay should be carefully profiled so as to give adequate time for
599orderly_poweroff(). In case of failure of an orderly_poweroff() the
600emergency poweroff kicks in after the delay has elapsed and shuts down
601the system.
602
603If set to 0 emergency poweroff will not be supported. So a carefully
604profiled non-zero positive value is a must for emergerncy poweroff to be
605triggered.
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..74ef51dfb816 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,23 @@ menuconfig THERMAL
15 15
16if THERMAL 16if THERMAL
17 17
18config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
19 int "Emergency poweroff delay in milli-seconds"
20 depends on THERMAL
21 default 0
22 help
23 Thermal subsystem will issue a graceful shutdown when
24 critical temperatures are reached using orderly_poweroff(). In
25 case of failure of an orderly_poweroff(), the thermal emergency
26 poweroff kicks in after a delay has elapsed and shuts down the system.
27 This config is number of milliseconds to delay before emergency
28 poweroff kicks in. Similarly to the critical trip point,
29 the delay should be carefully profiled so as to give adequate
30 time for orderly_poweroff() to finish on regular execution.
31 If set to 0 emergency poweroff will not be supported.
32
33 In doubt, leave as 0.
34
18config THERMAL_HWMON 35config THERMAL_HWMON
19 bool 36 bool
20 prompt "Expose thermal sensors as hwmon device" 37 prompt "Expose thermal sensors as hwmon device"
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 8337c272d3ec..b21b9cc2c8d6 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -324,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz,
324 def_governor->throttle(tz, trip); 324 def_governor->throttle(tz, trip);
325} 325}
326 326
327/**
328 * thermal_emergency_poweroff_func - emergency poweroff work after a known delay
329 * @work: work_struct associated with the emergency poweroff function
330 *
331 * This function is called in very critical situations to force
332 * a kernel poweroff after a configurable timeout value.
333 */
334static void thermal_emergency_poweroff_func(struct work_struct *work)
335{
336 /*
337 * We have reached here after the emergency thermal shutdown
338 * Waiting period has expired. This means orderly_poweroff has
339 * not been able to shut off the system for some reason.
340 * Try to shut down the system immediately using kernel_power_off
341 * if populated
342 */
343 WARN(1, "Attempting kernel_power_off: Temperature too high\n");
344 kernel_power_off();
345
346 /*
347 * Worst of the worst case trigger emergency restart
348 */
349 WARN(1, "Attempting emergency_restart: Temperature too high\n");
350 emergency_restart();
351}
352
353static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
354 thermal_emergency_poweroff_func);
355
356/**
357 * thermal_emergency_poweroff - Trigger an emergency system poweroff
358 *
359 * This may be called from any critical situation to trigger a system shutdown
360 * after a known period of time. By default this is not scheduled.
361 */
362void thermal_emergency_poweroff(void)
363{
364 int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
365 /*
366 * poweroff_delay_ms must be a carefully profiled positive value.
367 * Its a must for thermal_emergency_poweroff_work to be scheduled
368 */
369 if (poweroff_delay_ms <= 0)
370 return;
371 schedule_delayed_work(&thermal_emergency_poweroff_work,
372 msecs_to_jiffies(poweroff_delay_ms));
373}
374
327static void handle_critical_trips(struct thermal_zone_device *tz, 375static void handle_critical_trips(struct thermal_zone_device *tz,
328 int trip, enum thermal_trip_type trip_type) 376 int trip, enum thermal_trip_type trip_type)
329{ 377{
@@ -346,6 +394,11 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
346 tz->temperature / 1000); 394 tz->temperature / 1000);
347 mutex_lock(&poweroff_lock); 395 mutex_lock(&poweroff_lock);
348 if (!power_off_triggered) { 396 if (!power_off_triggered) {
397 /*
398 * Queue a backup emergency shutdown in the event of
399 * orderly_poweroff failure
400 */
401 thermal_emergency_poweroff();
349 orderly_poweroff(true); 402 orderly_poweroff(true);
350 power_off_triggered = true; 403 power_off_triggered = true;
351 } 404 }