summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPrashant Malani <pmalani@nvidia.com>2014-01-02 15:47:14 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:45 -0400
commit6157db5d51f634fb44a286098d2764f6787f097c (patch)
treed4ad97c5d5667246b0462290700b4c9b88e33f0d /drivers
parent4c7065b558dac5968610415770d957f0e3b1ba69 (diff)
gpu: nvgpu: gk20a: Update perfmon init
Make the perfmon sampling configurable, by adding an 'enabled' flag. This is set according to the CONFIG initially. Modify the perfmon event handler to not touch clock rates. Add a counter to count the number of perfmon events. Also add debugfs entries for the above. Bug 1410515 Change-Id: Ic8197eef0e46e35af1179a5b06140393541cfd43 Signed-off-by: Prashant Malani <pmalani@nvidia.com> Reviewed-on: http://git-master/r/351564 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c127
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h4
2 files changed, 107 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 234255ce..bd7546b3 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/dma-mapping.h> 27#include <linux/dma-mapping.h>
28#include <linux/uaccess.h>
28 29
29#include "gk20a.h" 30#include "gk20a.h"
30#include "hw_mc_gk20a.h" 31#include "hw_mc_gk20a.h"
@@ -1507,6 +1508,8 @@ static void gk20a_save_pmu_sw_state(struct pmu_gk20a *pmu,
1507 save->pg_buf = pmu->pg_buf; 1508 save->pg_buf = pmu->pg_buf;
1508 save->sw_ready = pmu->sw_ready; 1509 save->sw_ready = pmu->sw_ready;
1509 save->pg_init = pmu->pg_init; 1510 save->pg_init = pmu->pg_init;
1511 save->perfmon_events_cnt = pmu->perfmon_events_cnt;
1512 save->perfmon_sampling_enabled = pmu->perfmon_sampling_enabled;
1510} 1513}
1511 1514
1512static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu, 1515static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
@@ -1524,6 +1527,8 @@ static void gk20a_restore_pmu_sw_state(struct pmu_gk20a *pmu,
1524 pmu->pg_buf = save->pg_buf; 1527 pmu->pg_buf = save->pg_buf;
1525 pmu->sw_ready = save->sw_ready; 1528 pmu->sw_ready = save->sw_ready;
1526 pmu->pg_init = save->pg_init; 1529 pmu->pg_init = save->pg_init;
1530 pmu->perfmon_events_cnt = save->perfmon_events_cnt;
1531 pmu->perfmon_sampling_enabled = save->perfmon_sampling_enabled;
1527} 1532}
1528 1533
1529void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) 1534void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
@@ -1589,6 +1594,9 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g)
1589 1594
1590 /* TBD: sysmon subtask */ 1595 /* TBD: sysmon subtask */
1591 1596
1597 if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
1598 pmu->perfmon_sampling_enabled = true;
1599
1592 pmu->mutex_cnt = pwr_pmu_mutex__size_1_v(); 1600 pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
1593 pmu->mutex = kzalloc(pmu->mutex_cnt * 1601 pmu->mutex = kzalloc(pmu->mutex_cnt *
1594 sizeof(struct pmu_mutex), GFP_KERNEL); 1602 sizeof(struct pmu_mutex), GFP_KERNEL);
@@ -2556,7 +2564,6 @@ static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2556 struct pmu_v *pv = &g->ops.pmu_ver; 2564 struct pmu_v *pv = &g->ops.pmu_ver;
2557 struct pmu_cmd cmd; 2565 struct pmu_cmd cmd;
2558 struct pmu_payload payload; 2566 struct pmu_payload payload;
2559 u32 current_rate = 0;
2560 u32 seq; 2567 u32 seq;
2561 2568
2562 /* PERFMON Start */ 2569 /* PERFMON Start */
@@ -2570,20 +2577,9 @@ static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
2570 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, 2577 pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
2571 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); 2578 pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
2572 2579
2573 current_rate = rate_gpu_to_gpc2clk(gk20a_clk_get_rate(g));
2574 if (current_rate >= gpc_pll_params.max_freq)
2575 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2576 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2577 else if (current_rate <= gpc_pll_params.min_freq)
2578 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2579 PMU_PERFMON_FLAG_ENABLE_INCREASE);
2580 else
2581 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2582 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2583 PMU_PERFMON_FLAG_ENABLE_DECREASE);
2584
2585 pv->perfmon_start_set_flags(&cmd.cmd.perfmon, 2580 pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
2586 pv->perfmon_start_get_flags(&cmd.cmd.perfmon) | 2581 PMU_PERFMON_FLAG_ENABLE_INCREASE |
2582 PMU_PERFMON_FLAG_ENABLE_DECREASE |
2587 PMU_PERFMON_FLAG_CLEAR_PREV); 2583 PMU_PERFMON_FLAG_CLEAR_PREV);
2588 2584
2589 memset(&payload, 0, sizeof(struct pmu_payload)); 2585 memset(&payload, 0, sizeof(struct pmu_payload));
@@ -2625,9 +2621,6 @@ static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
2625static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu, 2621static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2626 struct pmu_perfmon_msg *msg) 2622 struct pmu_perfmon_msg *msg)
2627{ 2623{
2628 struct gk20a *g = pmu->g;
2629 u32 rate;
2630
2631 gk20a_dbg_fn(""); 2624 gk20a_dbg_fn("");
2632 2625
2633 switch (msg->msg_type) { 2626 switch (msg->msg_type) {
@@ -2635,17 +2628,13 @@ static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2635 gk20a_dbg_pmu("perfmon increase event: " 2628 gk20a_dbg_pmu("perfmon increase event: "
2636 "state_id %d, ground_id %d, pct %d", 2629 "state_id %d, ground_id %d, pct %d",
2637 msg->gen.state_id, msg->gen.group_id, msg->gen.data); 2630 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2638 /* increase gk20a clock freq by 20% */ 2631 (pmu->perfmon_events_cnt)++;
2639 rate = gk20a_clk_get_rate(g);
2640 gk20a_clk_set_rate(g, rate * 6 / 5);
2641 break; 2632 break;
2642 case PMU_PERFMON_MSG_ID_DECREASE_EVENT: 2633 case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
2643 gk20a_dbg_pmu("perfmon decrease event: " 2634 gk20a_dbg_pmu("perfmon decrease event: "
2644 "state_id %d, ground_id %d, pct %d", 2635 "state_id %d, ground_id %d, pct %d",
2645 msg->gen.state_id, msg->gen.group_id, msg->gen.data); 2636 msg->gen.state_id, msg->gen.group_id, msg->gen.data);
2646 /* decrease gk20a clock freq by 10% */ 2637 (pmu->perfmon_events_cnt)++;
2647 rate = gk20a_clk_get_rate(g);
2648 gk20a_clk_set_rate(g, (rate / 10) * 7);
2649 break; 2638 break;
2650 case PMU_PERFMON_MSG_ID_INIT_EVENT: 2639 case PMU_PERFMON_MSG_ID_INIT_EVENT:
2651 pmu->perfmon_ready = 1; 2640 pmu->perfmon_ready = 1;
@@ -2656,7 +2645,7 @@ static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
2656 } 2645 }
2657 2646
2658 /* restart sampling */ 2647 /* restart sampling */
2659 if (IS_ENABLED(CONFIG_GK20A_PERFMON)) 2648 if (pmu->perfmon_sampling_enabled)
2660 return pmu_perfmon_start_sampling(pmu); 2649 return pmu_perfmon_start_sampling(pmu);
2661 return 0; 2650 return 0;
2662} 2651}
@@ -3753,6 +3742,85 @@ static const struct file_operations elpg_transitions_fops = {
3753 .release = single_release, 3742 .release = single_release,
3754}; 3743};
3755 3744
3745static int perfmon_events_enable_show(struct seq_file *s, void *data)
3746{
3747 struct gk20a *g = s->private;
3748
3749 seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
3750 return 0;
3751
3752}
3753
3754static int perfmon_events_enable_open(struct inode *inode, struct file *file)
3755{
3756 return single_open(file, perfmon_events_enable_show, inode->i_private);
3757}
3758
3759static ssize_t perfmon_events_enable_write(struct file *file,
3760 const char __user *userbuf, size_t count, loff_t *ppos)
3761{
3762 struct seq_file *s = file->private_data;
3763 struct gk20a *g = s->private;
3764 unsigned long val = 0;
3765 char buf[40];
3766 int buf_size;
3767
3768 memset(buf, 0, sizeof(buf));
3769 buf_size = min(count, (sizeof(buf)-1));
3770
3771 if (copy_from_user(buf, userbuf, buf_size))
3772 return -EFAULT;
3773
3774 if (kstrtoul(buf, 10, &val) < 0)
3775 return -EINVAL;
3776
3777 /* Don't turn on gk20a unnecessarily */
3778 if (g->power_on) {
3779 gk20a_busy(g->dev);
3780 if (val && !g->pmu.perfmon_sampling_enabled) {
3781 g->pmu.perfmon_sampling_enabled = true;
3782 pmu_perfmon_start_sampling(&(g->pmu));
3783 } else if (!val && g->pmu.perfmon_sampling_enabled) {
3784 g->pmu.perfmon_sampling_enabled = false;
3785 pmu_perfmon_stop_sampling(&(g->pmu));
3786 }
3787 gk20a_idle(g->dev);
3788 } else {
3789 g->pmu.perfmon_sampling_enabled = val ? true : false;
3790 }
3791
3792 return count;
3793}
3794
3795static const struct file_operations perfmon_events_enable_fops = {
3796 .open = perfmon_events_enable_open,
3797 .read = seq_read,
3798 .write = perfmon_events_enable_write,
3799 .llseek = seq_lseek,
3800 .release = single_release,
3801};
3802
3803static int perfmon_events_count_show(struct seq_file *s, void *data)
3804{
3805 struct gk20a *g = s->private;
3806
3807 seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
3808 return 0;
3809
3810}
3811
3812static int perfmon_events_count_open(struct inode *inode, struct file *file)
3813{
3814 return single_open(file, perfmon_events_count_show, inode->i_private);
3815}
3816
3817static const struct file_operations perfmon_events_count_fops = {
3818 .open = perfmon_events_count_open,
3819 .read = seq_read,
3820 .llseek = seq_lseek,
3821 .release = single_release,
3822};
3823
3756int gk20a_pmu_debugfs_init(struct platform_device *dev) 3824int gk20a_pmu_debugfs_init(struct platform_device *dev)
3757{ 3825{
3758 struct dentry *d; 3826 struct dentry *d;
@@ -3771,6 +3839,17 @@ int gk20a_pmu_debugfs_init(struct platform_device *dev)
3771 if (!d) 3839 if (!d)
3772 goto err_out; 3840 goto err_out;
3773 3841
3842 d = debugfs_create_file(
3843 "perfmon_events_enable", S_IRUGO, platform->debugfs, g,
3844 &perfmon_events_enable_fops);
3845 if (!d)
3846 goto err_out;
3847
3848 d = debugfs_create_file(
3849 "perfmon_events_count", S_IRUGO, platform->debugfs, g,
3850 &perfmon_events_count_fops);
3851 if (!d)
3852 goto err_out;
3774 return 0; 3853 return 0;
3775 3854
3776err_out: 3855err_out:
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index de519bf6..488558fe 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -1049,6 +1049,8 @@ struct pmu_gk20a {
1049 struct pmu_cmdline_args_v0 args_v0; 1049 struct pmu_cmdline_args_v0 args_v0;
1050 struct pmu_cmdline_args_v1 args_v1; 1050 struct pmu_cmdline_args_v1 args_v1;
1051 }; 1051 };
1052 unsigned long perfmon_events_cnt;
1053 bool perfmon_sampling_enabled;
1052}; 1054};
1053 1055
1054struct gk20a_pmu_save_state { 1056struct gk20a_pmu_save_state {
@@ -1064,6 +1066,8 @@ struct gk20a_pmu_save_state {
1064 wait_queue_head_t pg_wq; 1066 wait_queue_head_t pg_wq;
1065 bool sw_ready; 1067 bool sw_ready;
1066 struct work_struct pg_init; 1068 struct work_struct pg_init;
1069 unsigned long perfmon_events_cnt;
1070 bool perfmon_sampling_enabled;
1067}; 1071};
1068 1072
1069int gk20a_init_pmu_support(struct gk20a *g); 1073int gk20a_init_pmu_support(struct gk20a *g);