summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-08-06 02:30:11 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:45 -0400
commitb33020008b727d75827d670ca7a6c969769ca1a0 (patch)
tree387fd7b91d0bad4e287d939e75babba2b3abdb80 /drivers/gpu
parent273f754cb518c8133c1c19d23d58fab533b1cf0a (diff)
gpu: nvgpu: Add sw shadow for load value
Reading the load value may increase CPU power consumption temprorarily. In most cases we are ok with a value that was read a moment earlier. This patch introduces a software shadow for gpu load. The shadow is updated before starting scaling and all scaling code paths use the sw shadow. Change-Id: I53d2ccb8e7f83147f411a14d3104d890dd9af9a3 Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/453347 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_scale.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h2
4 files changed, 14 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
index 8a92828f..d09a18f8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_scale.c
@@ -192,6 +192,9 @@ static void gk20a_scale_notify(struct platform_device *pdev, bool busy)
192 struct gk20a_scale_profile *profile = g->scale_profile; 192 struct gk20a_scale_profile *profile = g->scale_profile;
193 struct devfreq *devfreq = g->devfreq; 193 struct devfreq *devfreq = g->devfreq;
194 194
195 /* update the software shadow */
196 gk20a_pmu_load_update(g);
197
195 /* inform edp about new constraint */ 198 /* inform edp about new constraint */
196 if (platform->prescale) 199 if (platform->prescale)
197 platform->prescale(pdev); 200 platform->prescale(pdev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 994c9cd2..97a0452c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -278,6 +278,7 @@ static ssize_t gk20a_load_show(struct device *dev,
278 busy_time = 0; 278 busy_time = 0;
279 } else { 279 } else {
280 gk20a_busy(g->dev); 280 gk20a_busy(g->dev);
281 gk20a_pmu_load_update(g);
281 gk20a_pmu_load_norm(g, &busy_time); 282 gk20a_pmu_load_norm(g, &busy_time);
282 gk20a_idle(g->dev); 283 gk20a_idle(g->dev);
283 } 284 }
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 7c441f53..06e7a4e6 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -3677,16 +3677,22 @@ int gk20a_pmu_destroy(struct gk20a *g)
3677 3677
3678int gk20a_pmu_load_norm(struct gk20a *g, u32 *load) 3678int gk20a_pmu_load_norm(struct gk20a *g, u32 *load)
3679{ 3679{
3680 *load = g->pmu.load_shadow;
3681 return 0;
3682}
3683
3684int gk20a_pmu_load_update(struct gk20a *g)
3685{
3680 struct pmu_gk20a *pmu = &g->pmu; 3686 struct pmu_gk20a *pmu = &g->pmu;
3681 u16 _load = 0; 3687 u16 _load = 0;
3682 3688
3683 if (!pmu->perfmon_ready) { 3689 if (!pmu->perfmon_ready) {
3684 *load = 0; 3690 pmu->load_shadow = 0;
3685 return 0; 3691 return 0;
3686 } 3692 }
3687 3693
3688 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); 3694 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3689 *load = _load / 10; 3695 pmu->load_shadow = _load / 10;
3690 3696
3691 return 0; 3697 return 0;
3692} 3698}
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 292aabb0..694e0288 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -1079,6 +1079,7 @@ struct pmu_gk20a {
1079 bool perfmon_ready; 1079 bool perfmon_ready;
1080 1080
1081 u32 sample_buffer; 1081 u32 sample_buffer;
1082 u32 load_shadow;
1082 1083
1083 struct mutex isr_mutex; 1084 struct mutex isr_mutex;
1084 struct mutex isr_enable_lock; 1085 struct mutex isr_enable_lock;
@@ -1119,6 +1120,7 @@ int pmu_mutex_acquire(struct pmu_gk20a *pmu, u32 id, u32 *token);
1119int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token); 1120int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token);
1120int gk20a_pmu_destroy(struct gk20a *g); 1121int gk20a_pmu_destroy(struct gk20a *g);
1121int gk20a_pmu_load_norm(struct gk20a *g, u32 *load); 1122int gk20a_pmu_load_norm(struct gk20a *g, u32 *load);
1123int gk20a_pmu_load_update(struct gk20a *g);
1122int gk20a_pmu_debugfs_init(struct platform_device *dev); 1124int gk20a_pmu_debugfs_init(struct platform_device *dev);
1123void gk20a_pmu_reset_load_counters(struct gk20a *g); 1125void gk20a_pmu_reset_load_counters(struct gk20a *g);
1124void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 1126void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,