From 3a11883f7f4399ae8dffbea00c1842e3c2095937 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 30 Oct 2018 13:45:43 -0700 Subject: gpu: nvgpu: using pmu counters for load estimate PMU counters #0 and #4 are used to count total cycles and busy cycles. These counts are used by podgov to estimate GPU load. PMU idle intr status register is used to monitor overflow. Overflow rarely occurs because frequency governor reads and resets the counters at a high cadence. When overflow occurs, 100% work load is reported to frequency governor. Bug 1963732 Change-Id: I046480ebde162e6eda24577932b96cfd91b77c69 Signed-off-by: Peng Liu Reviewed-on: https://git-master.nvidia.com/r/1939547 (cherry picked from commit 34df0035194e0203f68f679acdd84e5533a48149) Reviewed-on: https://git-master.nvidia.com/r/1979495 Reviewed-by: Aaron Tian Tested-by: Aaron Tian Reviewed-by: Rajkumar Kasirajan Tested-by: Rajkumar Kasirajan Reviewed-by: Bibek Basu GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c | 42 +++++++++++++++++++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 45 ++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 3 ++ drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 + .../nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h | 40 ++++++++++++++++++ .../nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h | 48 ++++++++++++++++++++++ .../nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h | 48 ++++++++++++++++++++++ .../nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h | 48 ++++++++++++++++++++++ .../nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h | 48 ++++++++++++++++++++++ .../nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h | 48 ++++++++++++++++++++++ drivers/gpu/nvgpu/include/nvgpu/pmu.h | 4 ++ drivers/gpu/nvgpu/os/linux/scale.c | 18 ++++---- drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 2 + 19 files changed, 398 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c index 57a4ea40..bf07bd79 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c @@ -236,6 +236,48 @@ int nvgpu_pmu_load_update(struct gk20a *g) return 0; } +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm) +{ + u64 busy_cycles, total_cycles; + u32 intr_status; + + gk20a_busy_noresume(g); + if (!g->power_on) { + *norm = 0; + goto exit; + } + + if (g->ops.pmu.pmu_read_idle_counter == NULL || + g->ops.pmu.pmu_reset_idle_counter == NULL || + g->ops.pmu.pmu_read_idle_intr_status == NULL || + g->ops.pmu.pmu_clear_idle_intr_status == NULL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + goto exit; + } + + busy_cycles = g->ops.pmu.pmu_read_idle_counter(g, 4); + total_cycles = g->ops.pmu.pmu_read_idle_counter(g, 0); + intr_status = g->ops.pmu.pmu_read_idle_intr_status(g); + + g->ops.pmu.pmu_reset_idle_counter(g, 4); + g->ops.pmu.pmu_reset_idle_counter(g, 0); + + if (intr_status != 0UL) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + g->ops.pmu.pmu_clear_idle_intr_status(g); + } else if (total_cycles == 0ULL || busy_cycles > total_cycles) { + *norm = PMU_BUSY_CYCLES_NORM_MAX; + } else { + *norm = (u32)(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX + / total_cycles); + } + +exit: + gk20a_idle_nosuspend(g); + + return 0; +} + void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles) { diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 6eecc4fa..050423b0 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -798,6 +798,39 @@ void gk20a_pmu_init_perfmon_counter(struct gk20a *g) pwr_pmu_idle_ctrl_value_always_f() | pwr_pmu_idle_ctrl_filter_disabled_f()); gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); + + /* + * use counters 4 and 0 for perfmon to log busy cycles and total cycles + * counter #0 overflow sets pmu idle intr status bit + */ + gk20a_writel(g, pwr_pmu_idle_intr_r(), + pwr_pmu_idle_intr_en_f(0)); + + gk20a_writel(g, pwr_pmu_idle_threshold_r(0), + pwr_pmu_idle_threshold_value_f(0x7FFFFFFF)); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(0)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_always_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(0), data); + + gk20a_writel(g, pwr_pmu_idle_mask_r(4), + pwr_pmu_idle_mask_gr_enabled_f() | + pwr_pmu_idle_mask_ce_2_enabled_f()); + + data = gk20a_readl(g, pwr_pmu_idle_ctrl_r(4)); + data = set_field(data, pwr_pmu_idle_ctrl_value_m() | + pwr_pmu_idle_ctrl_filter_m(), + pwr_pmu_idle_ctrl_value_busy_f() | + pwr_pmu_idle_ctrl_filter_disabled_f()); + gk20a_writel(g, pwr_pmu_idle_ctrl_r(4), data); + + gk20a_writel(g, pwr_pmu_idle_count_r(0), pwr_pmu_idle_count_reset_f(1)); + gk20a_writel(g, pwr_pmu_idle_count_r(4), pwr_pmu_idle_count_reset_f(1)); + gk20a_writel(g, pwr_pmu_idle_intr_status_r(), + pwr_pmu_idle_intr_status_intr_f(1)); } u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id) @@ -812,6 +845,18 @@ void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id) pwr_pmu_idle_count_reset_f(1)); } +u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g) +{ + return pwr_pmu_idle_intr_status_intr_v( + gk20a_readl(g, pwr_pmu_idle_intr_status_r())); +} + +void gk20a_pmu_clear_idle_intr_status(struct gk20a *g) +{ + gk20a_writel(g, pwr_pmu_idle_intr_status_r(), + pwr_pmu_idle_intr_status_intr_f(1)); +} + void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data) { diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 35b80eaf..65ffd636 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -58,6 +58,9 @@ void gk20a_pmu_msgq_tail(struct nvgpu_pmu *pmu, u32 *tail, bool set); u32 gk20a_pmu_read_idle_counter(struct gk20a *g, u32 counter_id); void gk20a_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id); +u32 gk20a_pmu_read_idle_intr_status(struct gk20a *g); +void gk20a_pmu_clear_idle_intr_status(struct gk20a *g); + void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr); bool gk20a_is_pmu_supported(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 7eaf6bff..dbfbc3d7 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -530,6 +530,8 @@ static const struct gpu_ops gm20b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 86892d23..fea2ffa0 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -631,6 +631,8 @@ static const struct gpu_ops gp106_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 85051c11..b3379253 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -595,6 +595,8 @@ static const struct gpu_ops gp10b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 5f1a18a0..9621aaa4 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -756,6 +756,8 @@ static const struct gpu_ops gv100_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index cf6a7e2c..d52d1c7e 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -713,6 +713,8 @@ static const struct gpu_ops gv11b_ops = { .pmu_pg_idle_counter_config = gk20a_pmu_pg_idle_counter_config, .pmu_read_idle_counter = gk20a_pmu_read_idle_counter, .pmu_reset_idle_counter = gk20a_pmu_reset_idle_counter, + .pmu_read_idle_intr_status = gk20a_pmu_read_idle_intr_status, + .pmu_clear_idle_intr_status = gk20a_pmu_clear_idle_intr_status, .pmu_dump_elpg_stats = gk20a_pmu_dump_elpg_stats, .pmu_dump_falcon_stats = gk20a_pmu_dump_falcon_stats, .pmu_enable_irq = gk20a_pmu_enable_irq, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index dfa4aaf2..aa435638 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1050,6 +1050,8 @@ struct gpu_ops { void (*pmu_init_perfmon_counter)(struct gk20a *g); void (*pmu_pg_idle_counter_config)(struct gk20a *g, u32 pg_engine_id); u32 (*pmu_read_idle_counter)(struct gk20a *g, u32 counter_id); + u32 (*pmu_read_idle_intr_status)(struct gk20a *g); + void (*pmu_clear_idle_intr_status)(struct gk20a *g); void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id); void (*pmu_dump_elpg_stats)(struct nvgpu_pmu *pmu); void (*pmu_dump_falcon_stats)(struct nvgpu_pmu *pmu); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h index 71b73d2a..28457634 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_pwr_gk20a.h @@ -672,6 +672,46 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h index fa232644..2ca1f02b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_pwr_gm20b.h @@ -716,6 +716,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h index a9fbbd10..2e75fa6e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_pwr_gp106.h @@ -724,6 +724,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h index 73a5c45c..c160e897 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_pwr_gp10b.h @@ -720,6 +720,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h index 4b0b0326..c719226c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_pwr_gv100.h @@ -824,6 +824,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h index c16d44f1..295c6e95 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_pwr_gv11b.h @@ -880,6 +880,54 @@ static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void) { return 0x0U; } +static inline u32 pwr_pmu_idle_threshold_r(u32 i) +{ + return 0x0010a8a0U + i*4U; +} +static inline u32 pwr_pmu_idle_threshold_value_f(u32 v) +{ + return (v & 0x7fffffffU) << 0U; +} +static inline u32 pwr_pmu_idle_intr_r(void) +{ + return 0x0010a9e8U; +} +static inline u32 pwr_pmu_idle_intr_en_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_en_disabled_v(void) +{ + return 0x00000000U; +} +static inline u32 pwr_pmu_idle_intr_en_enabled_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_r(void) +{ + return 0x0010a9ecU; +} +static inline u32 pwr_pmu_idle_intr_status_intr_f(u32 v) +{ + return (v & 0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_m(void) +{ + return U32(0x1U) << 0U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_v(u32 r) +{ + return (r >> 0U) & 0x1U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_pending_v(void) +{ + return 0x00000001U; +} +static inline u32 pwr_pmu_idle_intr_status_intr_clear_v(void) +{ + return 0x00000001U; +} static inline u32 pwr_pmu_idle_mask_supp_r(u32 i) { return 0x0010a9f0U + i*8U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index 7283755a..00194ec0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -144,6 +144,9 @@ enum { #define APCTRL_POWER_BREAKEVEN_DEFAULT_US (2000) #define APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT (200) +/* pmu load const defines */ +#define PMU_BUSY_CYCLES_NORM_MAX (1000U) + /* RPC */ #define PMU_RPC_EXECUTE(_stat, _pmu, _unit, _func, _prpc, _size)\ do { \ @@ -449,6 +452,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); int nvgpu_pmu_load_update(struct gk20a *g); +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm); void nvgpu_pmu_reset_load_counters(struct gk20a *g); void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, u32 *total_cycles); diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index ee5b6861..ef7996e6 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c @@ -211,18 +211,18 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq, } /* - * update_load_estimate_gpmu(profile) + * update_load_estimate_busy_cycles(dev) * - * Update load estimate using gpmu. The gpmu value is normalised + * Update load estimate using pmu idle counters. Result is normalised * based on the time it was asked last time. */ -static void update_load_estimate_gpmu(struct device *dev) +static void update_load_estimate_busy_cycles(struct device *dev) { struct gk20a *g = get_gk20a(dev); struct gk20a_scale_profile *profile = g->scale_profile; unsigned long dt; - u32 busy_time; + u32 busy_cycles_norm; ktime_t t; t = ktime_get(); @@ -230,8 +230,9 @@ static void update_load_estimate_gpmu(struct device *dev) profile->dev_stat.total_time = dt; profile->last_event_time = t; - nvgpu_pmu_load_norm(g, &busy_time); - profile->dev_stat.busy_time = (busy_time * dt) / 1000; + nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm); + profile->dev_stat.busy_time = + (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX; } /* @@ -284,9 +285,6 @@ static int gk20a_scale_get_dev_status(struct device *dev, struct gk20a_scale_profile *profile = g->scale_profile; struct gk20a_platform *platform = dev_get_drvdata(dev); - /* update the software shadow */ - nvgpu_pmu_load_update(g); - /* inform edp about new constraint */ if (platform->prescale) platform->prescale(dev); @@ -296,7 +294,7 @@ static int gk20a_scale_get_dev_status(struct device *dev, g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); /* Update load estimate */ - update_load_estimate_gpmu(dev); + update_load_estimate_busy_cycles(dev); /* Copy the contents of the current device status */ *stat = profile->dev_stat; diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index eae0ba9e..7c800d5f 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -436,6 +436,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .pmu_pg_idle_counter_config = NULL, .pmu_read_idle_counter = NULL, .pmu_reset_idle_counter = NULL, + .pmu_read_idle_intr_status = NULL, + .pmu_clear_idle_intr_status = NULL, .pmu_dump_elpg_stats = NULL, .pmu_dump_falcon_stats = NULL, .pmu_enable_irq = NULL, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index de006b1e..78ea5643 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -504,6 +504,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .pmu_pg_idle_counter_config = NULL, .pmu_read_idle_counter = NULL, .pmu_reset_idle_counter = NULL, + .pmu_read_idle_intr_status = NULL, + .pmu_clear_idle_intr_status = NULL, .pmu_dump_elpg_stats = NULL, .pmu_dump_falcon_stats = NULL, .pmu_enable_irq = NULL, -- cgit v1.2.2