diff options
author | Peng Liu <pengliu@nvidia.com> | 2018-10-30 16:45:43 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-04-01 18:27:17 -0400 |
commit | 3a11883f7f4399ae8dffbea00c1842e3c2095937 (patch) | |
tree | 82d36197046e73c13432250ec4ebce0da21791d5 /drivers/gpu/nvgpu/os/linux | |
parent | f1be222687a853b0218a5700a213f3d34d8ccc4f (diff) |
gpu: nvgpu: using pmu counters for load estimate
PMU counters #0 and #4 are used to count total cycles and busy cycles.
These counts are used by podgov to estimate GPU load.
PMU idle intr status register is used to monitor overflow. Overflow
rarely occurs because frequency governor reads and resets the counters
at a high cadence. When overflow occurs, 100% work load is reported to
frequency governor.
Bug 1963732
Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69
Signed-off-by: Peng Liu <pengliu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1939547
(cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149)
Reviewed-on: https://git-master.nvidia.com/r/1979495
Reviewed-by: Aaron Tian <atian@nvidia.com>
Tested-by: Aaron Tian <atian@nvidia.com>
Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/scale.c | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index ee5b6861..ef7996e6 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c | |||
@@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq, | |||
211 | } | 211 | } |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * update_load_estimate_gpmu(profile) | 214 | * update_load_estimate_busy_cycles(dev) |
215 | * | 215 | * |
216 | * Update load estimate using gpmu. The gpmu value is normalised | 216 | * Update load estimate using pmu idle counters. Result is normalised |
217 | * based on the time it was asked last time. | 217 | * based on the time it was asked last time. |
218 | */ | 218 | */ |
219 | 219 | ||
220 | static void update_load_estimate_gpmu(struct device *dev) | 220 | static void update_load_estimate_busy_cycles(struct device *dev) |
221 | { | 221 | { |
222 | struct gk20a *g = get_gk20a(dev); | 222 | struct gk20a *g = get_gk20a(dev); |
223 | struct gk20a_scale_profile *profile = g->scale_profile; | 223 | struct gk20a_scale_profile *profile = g->scale_profile; |
224 | unsigned long dt; | 224 | unsigned long dt; |
225 | u32 busy_time; | 225 | u32 busy_cycles_norm; |
226 | ktime_t t; | 226 | ktime_t t; |
227 | 227 | ||
228 | t = ktime_get(); | 228 | t = ktime_get(); |
@@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev) | |||
230 | 230 | ||
231 | profile->dev_stat.total_time = dt; | 231 | profile->dev_stat.total_time = dt; |
232 | profile->last_event_time = t; | 232 | profile->last_event_time = t; |
233 | nvgpu_pmu_load_norm(g, &busy_time); | 233 | nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm); |
234 | profile->dev_stat.busy_time = (busy_time * dt) / 1000; | 234 | profile->dev_stat.busy_time = |
235 | (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX; | ||
235 | } | 236 | } |
236 | 237 | ||
237 | /* | 238 | /* |
@@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev, | |||
284 | struct gk20a_scale_profile *profile = g->scale_profile; | 285 | struct gk20a_scale_profile *profile = g->scale_profile; |
285 | struct gk20a_platform *platform = dev_get_drvdata(dev); | 286 | struct gk20a_platform *platform = dev_get_drvdata(dev); |
286 | 287 | ||
287 | /* update the software shadow */ | ||
288 | nvgpu_pmu_load_update(g); | ||
289 | |||
290 | /* inform edp about new constraint */ | 288 | /* inform edp about new constraint */ |
291 | if (platform->prescale) | 289 | if (platform->prescale) |
292 | platform->prescale(dev); | 290 | platform->prescale(dev); |
@@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev, | |||
296 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | 294 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); |
297 | 295 | ||
298 | /* Update load estimate */ | 296 | /* Update load estimate */ |
299 | update_load_estimate_gpmu(dev); | 297 | update_load_estimate_busy_cycles(dev); |
300 | 298 | ||
301 | /* Copy the contents of the current device status */ | 299 | /* Copy the contents of the current device status */ |
302 | *stat = profile->dev_stat; | 300 | *stat = profile->dev_stat; |