aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorViresh Kumar <viresh.kumar@linaro.org>2018-04-02 06:56:25 -0400
committerZhang Rui <rui.zhang@intel.com>2018-04-02 09:49:01 -0400
commit8ea229511e06f9635ecc338dcbe0db41a73623f0 (patch)
treea963ef526322117373498ff22950fffc942a5b14
parent0c8efd610b58cb23cefdfa12015799079aef94ae (diff)
thermal: Add cooling device's statistics in sysfs
This extends the sysfs interface for thermal cooling devices and exposes some pretty useful statistics. These statistics have proven to be quite useful specially while doing benchmarks related to the task scheduler, where we want to make sure that nothing has disrupted the test, specially the cooling device which may have put constraints on the CPUs. The information exposed here tells us to what extent the CPUs were constrained by the thermal framework. The write-only "reset" file is used to reset the statistics. The read-only "time_in_state_ms" file shows the time (in msec) spent by the device in the respective cooling states, and it prints one line per cooling state. The read-only "total_trans" file shows single positive integer value showing the total number of cooling state transitions the device has gone through since the time the cooling device is registered or the time when statistics were reset last. The read-only "trans_table" file shows a two dimensional matrix, where an entry <i,j> (row i, column j) represents the number of transitions from State_i to State_j. This is how the directory structure looks like for a single cooling device: $ ls -R /sys/class/thermal/cooling_device0/ /sys/class/thermal/cooling_device0/: cur_state max_state power stats subsystem type uevent /sys/class/thermal/cooling_device0/power: autosuspend_delay_ms runtime_active_time runtime_suspended_time control runtime_status /sys/class/thermal/cooling_device0/stats: reset time_in_state_ms total_trans trans_table This is tested on ARM 64-bit Hisilicon hikey620 board running Ubuntu and ARM 64-bit Hisilicon hikey960 board running Android. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
-rw-r--r--Documentation/thermal/sysfs-api.txt31
-rw-r--r--drivers/thermal/Kconfig7
-rw-r--r--drivers/thermal/thermal_core.c3
-rw-r--r--drivers/thermal/thermal_core.h10
-rw-r--r--drivers/thermal/thermal_helpers.c5
-rw-r--r--drivers/thermal/thermal_sysfs.c225
-rw-r--r--include/linux/thermal.h1
7 files changed, 280 insertions, 2 deletions
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index bb9a0a53e76b..911399730c1c 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -255,6 +255,7 @@ temperature) and throttle appropriate devices.
2552. sysfs attributes structure 2552. sysfs attributes structure
256 256
257RO read only value 257RO read only value
258WO write only value
258RW read/write value 259RW read/write value
259 260
260Thermal sysfs attributes will be represented under /sys/class/thermal. 261Thermal sysfs attributes will be represented under /sys/class/thermal.
@@ -286,6 +287,11 @@ Thermal cooling device sys I/F, created once it's registered:
286 |---type: Type of the cooling device(processor/fan/...) 287 |---type: Type of the cooling device(processor/fan/...)
287 |---max_state: Maximum cooling state of the cooling device 288 |---max_state: Maximum cooling state of the cooling device
288 |---cur_state: Current cooling state of the cooling device 289 |---cur_state: Current cooling state of the cooling device
290 |---stats: Directory containing cooling device's statistics
291 |---stats/reset: Writing any value resets the statistics
292 |---stats/time_in_state_ms: Time (msec) spent in various cooling states
293 |---stats/total_trans: Total number of times cooling state is changed
294 |---stats/trans_table: Cooing state transition table
289 295
290 296
291Then next two dynamic attributes are created/removed in pairs. They represent 297Then next two dynamic attributes are created/removed in pairs. They represent
@@ -490,6 +496,31 @@ cur_state
490 - cur_state == max_state means the maximum cooling. 496 - cur_state == max_state means the maximum cooling.
491 RW, Required 497 RW, Required
492 498
499stats/reset
500 Writing any value resets the cooling device's statistics.
501 WO, Required
502
503stats/time_in_state_ms:
504 The amount of time spent by the cooling device in various cooling
505 states. The output will have "<state> <time>" pair in each line, which
506 will mean this cooling device spent <time> msec of time at <state>.
507 Output will have one line for each of the supported states. usertime
508 units here is 10mS (similar to other time exported in /proc).
509 RO, Required
510
511stats/total_trans:
512 A single positive value showing the total number of times the state of a
513 cooling device is changed.
514 RO, Required
515
516stats/trans_table:
517 This gives fine grained information about all the cooling state
518 transitions. The cat output here is a two dimensional matrix, where an
519 entry <i,j> (row i, column j) represents the number of transitions from
520 State_i to State_j. If the transition table is bigger than PAGE_SIZE,
521 reading this will return an -EFBIG error.
522 RO, Required
523
4933. A simple implementation 5243. A simple implementation
494 525
495ACPI thermal zone may support multiple trip points like critical, hot, 526ACPI thermal zone may support multiple trip points like critical, hot,
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index b6adc54b96f1..82979880f985 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,13 @@ menuconfig THERMAL
15 15
16if THERMAL 16if THERMAL
17 17
18config THERMAL_STATISTICS
19 bool "Thermal state transition statistics"
20 help
21 Export thermal state transition statistics information through sysfs.
22
23 If in doubt, say N.
24
18config THERMAL_EMERGENCY_POWEROFF_DELAY_MS 25config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
19 int "Emergency poweroff delay in milli-seconds" 26 int "Emergency poweroff delay in milli-seconds"
20 depends on THERMAL 27 depends on THERMAL
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 2b1b0ba393a4..d64325e078db 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -972,8 +972,8 @@ __thermal_cooling_device_register(struct device_node *np,
972 cdev->ops = ops; 972 cdev->ops = ops;
973 cdev->updated = false; 973 cdev->updated = false;
974 cdev->device.class = &thermal_class; 974 cdev->device.class = &thermal_class;
975 thermal_cooling_device_setup_sysfs(cdev);
976 cdev->devdata = devdata; 975 cdev->devdata = devdata;
976 thermal_cooling_device_setup_sysfs(cdev);
977 dev_set_name(&cdev->device, "cooling_device%d", cdev->id); 977 dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
978 result = device_register(&cdev->device); 978 result = device_register(&cdev->device);
979 if (result) { 979 if (result) {
@@ -1106,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
1106 1106
1107 ida_simple_remove(&thermal_cdev_ida, cdev->id); 1107 ida_simple_remove(&thermal_cdev_ida, cdev->id);
1108 device_unregister(&cdev->device); 1108 device_unregister(&cdev->device);
1109 thermal_cooling_device_destroy_sysfs(cdev);
1109} 1110}
1110EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister); 1111EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
1111 1112
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 27e3b1df7360..5e4150261500 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -73,6 +73,7 @@ int thermal_build_list_of_policies(char *buf);
73int thermal_zone_create_device_groups(struct thermal_zone_device *, int); 73int thermal_zone_create_device_groups(struct thermal_zone_device *, int);
74void thermal_zone_destroy_device_groups(struct thermal_zone_device *); 74void thermal_zone_destroy_device_groups(struct thermal_zone_device *);
75void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); 75void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *);
76void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev);
76/* used only at binding time */ 77/* used only at binding time */
77ssize_t 78ssize_t
78thermal_cooling_device_trip_point_show(struct device *, 79thermal_cooling_device_trip_point_show(struct device *,
@@ -84,6 +85,15 @@ ssize_t thermal_cooling_device_weight_store(struct device *,
84 struct device_attribute *, 85 struct device_attribute *,
85 const char *, size_t); 86 const char *, size_t);
86 87
88#ifdef CONFIG_THERMAL_STATISTICS
89void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
90 unsigned long new_state);
91#else
92static inline void
93thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
94 unsigned long new_state) {}
95#endif /* CONFIG_THERMAL_STATISTICS */
96
87#ifdef CONFIG_THERMAL_GOV_STEP_WISE 97#ifdef CONFIG_THERMAL_GOV_STEP_WISE
88int thermal_gov_step_wise_register(void); 98int thermal_gov_step_wise_register(void);
89void thermal_gov_step_wise_unregister(void); 99void thermal_gov_step_wise_unregister(void);
diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
index 8cdf75adcce1..eb03d7e099bb 100644
--- a/drivers/thermal/thermal_helpers.c
+++ b/drivers/thermal/thermal_helpers.c
@@ -187,7 +187,10 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
187 if (instance->target > target) 187 if (instance->target > target)
188 target = instance->target; 188 target = instance->target;
189 } 189 }
190 cdev->ops->set_cur_state(cdev, target); 190
191 if (!cdev->ops->set_cur_state(cdev, target))
192 thermal_cooling_device_stats_update(cdev, target);
193
191 cdev->updated = true; 194 cdev->updated = true;
192 mutex_unlock(&cdev->lock); 195 mutex_unlock(&cdev->lock);
193 trace_cdev_update(cdev, target); 196 trace_cdev_update(cdev, target);
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index ba81c9080f6e..23b5e0a709b0 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -20,6 +20,7 @@
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/jiffies.h>
23 24
24#include "thermal_core.h" 25#include "thermal_core.h"
25 26
@@ -721,6 +722,7 @@ thermal_cooling_device_cur_state_store(struct device *dev,
721 result = cdev->ops->set_cur_state(cdev, state); 722 result = cdev->ops->set_cur_state(cdev, state);
722 if (result) 723 if (result)
723 return result; 724 return result;
725 thermal_cooling_device_stats_update(cdev, state);
724 return count; 726 return count;
725} 727}
726 728
@@ -745,14 +747,237 @@ static const struct attribute_group cooling_device_attr_group = {
745 747
746static const struct attribute_group *cooling_device_attr_groups[] = { 748static const struct attribute_group *cooling_device_attr_groups[] = {
747 &cooling_device_attr_group, 749 &cooling_device_attr_group,
750 NULL, /* Space allocated for cooling_device_stats_attr_group */
748 NULL, 751 NULL,
749}; 752};
750 753
754#ifdef CONFIG_THERMAL_STATISTICS
755struct cooling_dev_stats {
756 spinlock_t lock;
757 unsigned int total_trans;
758 unsigned long state;
759 unsigned long max_states;
760 ktime_t last_time;
761 ktime_t *time_in_state;
762 unsigned int *trans_table;
763};
764
765static void update_time_in_state(struct cooling_dev_stats *stats)
766{
767 ktime_t now = ktime_get(), delta;
768
769 delta = ktime_sub(now, stats->last_time);
770 stats->time_in_state[stats->state] =
771 ktime_add(stats->time_in_state[stats->state], delta);
772 stats->last_time = now;
773}
774
775void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
776 unsigned long new_state)
777{
778 struct cooling_dev_stats *stats = cdev->stats;
779
780 spin_lock(&stats->lock);
781
782 if (stats->state == new_state)
783 goto unlock;
784
785 update_time_in_state(stats);
786 stats->trans_table[stats->state * stats->max_states + new_state]++;
787 stats->state = new_state;
788 stats->total_trans++;
789
790unlock:
791 spin_unlock(&stats->lock);
792}
793
794static ssize_t
795thermal_cooling_device_total_trans_show(struct device *dev,
796 struct device_attribute *attr,
797 char *buf)
798{
799 struct thermal_cooling_device *cdev = to_cooling_device(dev);
800 struct cooling_dev_stats *stats = cdev->stats;
801 int ret;
802
803 spin_lock(&stats->lock);
804 ret = sprintf(buf, "%u\n", stats->total_trans);
805 spin_unlock(&stats->lock);
806
807 return ret;
808}
809
810static ssize_t
811thermal_cooling_device_time_in_state_show(struct device *dev,
812 struct device_attribute *attr,
813 char *buf)
814{
815 struct thermal_cooling_device *cdev = to_cooling_device(dev);
816 struct cooling_dev_stats *stats = cdev->stats;
817 ssize_t len = 0;
818 int i;
819
820 spin_lock(&stats->lock);
821 update_time_in_state(stats);
822
823 for (i = 0; i < stats->max_states; i++) {
824 len += sprintf(buf + len, "state%u\t%llu\n", i,
825 ktime_to_ms(stats->time_in_state[i]));
826 }
827 spin_unlock(&stats->lock);
828
829 return len;
830}
831
832static ssize_t
833thermal_cooling_device_reset_store(struct device *dev,
834 struct device_attribute *attr,
835 const char *buf, size_t count)
836{
837 struct thermal_cooling_device *cdev = to_cooling_device(dev);
838 struct cooling_dev_stats *stats = cdev->stats;
839 int i, states = stats->max_states;
840
841 spin_lock(&stats->lock);
842
843 stats->total_trans = 0;
844 stats->last_time = ktime_get();
845 memset(stats->trans_table, 0,
846 states * states * sizeof(*stats->trans_table));
847
848 for (i = 0; i < stats->max_states; i++)
849 stats->time_in_state[i] = ktime_set(0, 0);
850
851 spin_unlock(&stats->lock);
852
853 return count;
854}
855
856static ssize_t
857thermal_cooling_device_trans_table_show(struct device *dev,
858 struct device_attribute *attr,
859 char *buf)
860{
861 struct thermal_cooling_device *cdev = to_cooling_device(dev);
862 struct cooling_dev_stats *stats = cdev->stats;
863 ssize_t len = 0;
864 int i, j;
865
866 len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n");
867 len += snprintf(buf + len, PAGE_SIZE - len, " : ");
868 for (i = 0; i < stats->max_states; i++) {
869 if (len >= PAGE_SIZE)
870 break;
871 len += snprintf(buf + len, PAGE_SIZE - len, "state%2u ", i);
872 }
873 if (len >= PAGE_SIZE)
874 return PAGE_SIZE;
875
876 len += snprintf(buf + len, PAGE_SIZE - len, "\n");
877
878 for (i = 0; i < stats->max_states; i++) {
879 if (len >= PAGE_SIZE)
880 break;
881
882 len += snprintf(buf + len, PAGE_SIZE - len, "state%2u:", i);
883
884 for (j = 0; j < stats->max_states; j++) {
885 if (len >= PAGE_SIZE)
886 break;
887 len += snprintf(buf + len, PAGE_SIZE - len, "%8u ",
888 stats->trans_table[i * stats->max_states + j]);
889 }
890 if (len >= PAGE_SIZE)
891 break;
892 len += snprintf(buf + len, PAGE_SIZE - len, "\n");
893 }
894
895 if (len >= PAGE_SIZE) {
896 pr_warn_once("Thermal transition table exceeds PAGE_SIZE. Disabling\n");
897 return -EFBIG;
898 }
899 return len;
900}
901
902static DEVICE_ATTR(total_trans, 0444, thermal_cooling_device_total_trans_show,
903 NULL);
904static DEVICE_ATTR(time_in_state_ms, 0444,
905 thermal_cooling_device_time_in_state_show, NULL);
906static DEVICE_ATTR(reset, 0200, NULL, thermal_cooling_device_reset_store);
907static DEVICE_ATTR(trans_table, 0444,
908 thermal_cooling_device_trans_table_show, NULL);
909
910static struct attribute *cooling_device_stats_attrs[] = {
911 &dev_attr_total_trans.attr,
912 &dev_attr_time_in_state_ms.attr,
913 &dev_attr_reset.attr,
914 &dev_attr_trans_table.attr,
915 NULL
916};
917
918static const struct attribute_group cooling_device_stats_attr_group = {
919 .attrs = cooling_device_stats_attrs,
920 .name = "stats"
921};
922
923static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
924{
925 struct cooling_dev_stats *stats;
926 unsigned long states;
927 int var;
928
929 if (cdev->ops->get_max_state(cdev, &states))
930 return;
931
932 states++; /* Total number of states is highest state + 1 */
933
934 var = sizeof(*stats);
935 var += sizeof(*stats->time_in_state) * states;
936 var += sizeof(*stats->trans_table) * states * states;
937
938 stats = kzalloc(var, GFP_KERNEL);
939 if (!stats)
940 return;
941
942 stats->time_in_state = (ktime_t *)(stats + 1);
943 stats->trans_table = (unsigned int *)(stats->time_in_state + states);
944 cdev->stats = stats;
945 stats->last_time = ktime_get();
946 stats->max_states = states;
947
948 spin_lock_init(&stats->lock);
949
950 /* Fill the empty slot left in cooling_device_attr_groups */
951 var = ARRAY_SIZE(cooling_device_attr_groups) - 2;
952 cooling_device_attr_groups[var] = &cooling_device_stats_attr_group;
953}
954
955static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
956{
957 kfree(cdev->stats);
958 cdev->stats = NULL;
959}
960
961#else
962
963static inline void
964cooling_device_stats_setup(struct thermal_cooling_device *cdev) {}
965static inline void
966cooling_device_stats_destroy(struct thermal_cooling_device *cdev) {}
967
968#endif /* CONFIG_THERMAL_STATISTICS */
969
751void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *cdev) 970void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *cdev)
752{ 971{
972 cooling_device_stats_setup(cdev);
753 cdev->device.groups = cooling_device_attr_groups; 973 cdev->device.groups = cooling_device_attr_groups;
754} 974}
755 975
976void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev)
977{
978 cooling_device_stats_destroy(cdev);
979}
980
756/* these helper will be used only at the time of bindig */ 981/* these helper will be used only at the time of bindig */
757ssize_t 982ssize_t
758thermal_cooling_device_trip_point_show(struct device *dev, 983thermal_cooling_device_trip_point_show(struct device *dev,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8c5302374eaa..7834be668d80 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -148,6 +148,7 @@ struct thermal_cooling_device {
148 struct device device; 148 struct device device;
149 struct device_node *np; 149 struct device_node *np;
150 void *devdata; 150 void *devdata;
151 void *stats;
151 const struct thermal_cooling_device_ops *ops; 152 const struct thermal_cooling_device_ops *ops;
152 bool updated; /* true if the cooling device does not need update */ 153 bool updated; /* true if the cooling device does not need update */
153 struct mutex lock; /* protect thermal_instances list */ 154 struct mutex lock; /* protect thermal_instances list */