summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Goyal <dgoyal@nvidia.com>2018-07-16 01:40:23 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-24 02:52:39 -0400
commitd3b8415948de8c9ffe2f2fa66340dd7e71a894e6 (patch)
tree328970819ace31fae3bf3bc27376121330064db9
parent2df33e32e40eb2c8e025f8d27396d9b5cdb3ac11 (diff)
gpu: nvgpu: tpc powergating through sysfs
- adds static tpc-powergating through sysfs. - active tpc count will remain till the GPU/systems is not booted again. - tpc_pg_mask can be written only after GPU probe finishes and GPU boot is triggered. Note: To be able to use this feature, we need to change boot/init scripts of the OS(used with nvgpu driver) to write to sysfs nodes before posting discover image size query to FECS. Bug 200406784 Change-Id: Id749c7a617422c625f77d0c1a9aada2eb960c4d0 Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1742422 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c11
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c40
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h1
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h36
-rw-r--r--drivers/gpu/nvgpu/os/linux/driver_common.c3
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c3
-rw-r--r--drivers/gpu/nvgpu/os/linux/sysfs.c59
10 files changed, 167 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 08e8b79f..ed48253f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -274,12 +274,23 @@ int gk20a_finalize_poweron(struct gk20a *g)
274 } 274 }
275 } 275 }
276 276
277 nvgpu_mutex_acquire(&g->tpc_pg_lock);
278
279 if (g->can_tpc_powergate) {
280 if (g->ops.gr.powergate_tpc != NULL) {
281 g->ops.gr.powergate_tpc(g);
282 }
283 }
284
277 err = gk20a_init_gr_support(g); 285 err = gk20a_init_gr_support(g);
278 if (err) { 286 if (err) {
279 nvgpu_err(g, "failed to init gk20a gr"); 287 nvgpu_err(g, "failed to init gk20a gr");
288 nvgpu_mutex_release(&g->tpc_pg_lock);
280 goto done; 289 goto done;
281 } 290 }
282 291
292 nvgpu_mutex_release(&g->tpc_pg_lock);
293
283 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { 294 if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
284 err = gk20a_init_pstate_pmu_support(g); 295 err = gk20a_init_pstate_pmu_support(g);
285 if (err) { 296 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 948d8e60..4934958c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -307,6 +307,7 @@ struct gpu_ops {
307 u32 class, u32 padding); 307 u32 class, u32 padding);
308 void (*free_gr_ctx)(struct gk20a *g, 308 void (*free_gr_ctx)(struct gk20a *g,
309 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); 309 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
310 void (*powergate_tpc)(struct gk20a *g);
310 void (*update_ctxsw_preemption_mode)(struct gk20a *g, 311 void (*update_ctxsw_preemption_mode)(struct gk20a *g,
311 struct channel_gk20a *c, 312 struct channel_gk20a *c,
312 struct nvgpu_mem *mem); 313 struct nvgpu_mem *mem);
@@ -1361,6 +1362,8 @@ struct gk20a {
1361 u64 log_mask; 1362 u64 log_mask;
1362 u32 log_trace; 1363 u32 log_trace;
1363 1364
1365 struct nvgpu_mutex tpc_pg_lock;
1366
1364 struct nvgpu_gpu_params params; 1367 struct nvgpu_gpu_params params;
1365 1368
1366 /* 1369 /*
@@ -1532,6 +1535,11 @@ struct gk20a {
1532 1535
1533 u32 tpc_fs_mask_user; 1536 u32 tpc_fs_mask_user;
1534 1537
1538 u32 tpc_pg_mask;
1539 bool can_tpc_powergate;
1540
1541 u32 valid_tpc_mask;
1542
1535 struct nvgpu_bios bios; 1543 struct nvgpu_bios bios;
1536 bool bios_is_init; 1544 bool bios_is_init;
1537 1545
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index c2f47a20..51588f1f 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -58,6 +58,7 @@
58#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> 58#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
59#include <nvgpu/hw/gv11b/hw_therm_gv11b.h> 59#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
60#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 60#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
61#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
61 62
62#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 63#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
63 64
@@ -71,6 +72,16 @@
71 */ 72 */
72#define GR_TPCS_INFO_FOR_MAPREGISTER 6 73#define GR_TPCS_INFO_FOR_MAPREGISTER 6
73 74
75/*
76 * There are 4 TPCs in GV11b ranging from TPC0 to TPC3
77 * There are two PES in GV11b each controlling two TPCs
78 * PES0 is linked to TPC0 & TPC2
79 * PES1 is linked to TPC1 & TPC3
80 */
81#define TPC_MASK_FOR_PESID_0 (u32) 0x5
82#define TPC_MASK_FOR_PESID_1 (u32) 0xa
83
84
74bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) 85bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
75{ 86{
76 bool valid = false; 87 bool valid = false;
@@ -117,6 +128,35 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
117 return valid; 128 return valid;
118} 129}
119 130
131void gr_gv11b_powergate_tpc(struct gk20a *g)
132{
133 u32 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
134
135 if (tpc_pg_status == g->tpc_pg_mask) {
136 nvgpu_info(g, "TPC-PG mask and TPC-PG status is same");
137 return;
138 }
139
140 gk20a_writel(g, fuse_ctrl_opt_tpc_gpc_r(0), (g->tpc_pg_mask));
141
142 do {
143 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
144 } while (tpc_pg_status != g->tpc_pg_mask);
145
146 gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
147 gr_fe_tpc_pesmask_action_write_f() |
148 gr_fe_tpc_pesmask_pesid_f(0) |
149 gr_fe_tpc_pesmask_gpcid_f(0) |
150 ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0));
151 gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
152 gr_fe_tpc_pesmask_action_write_f() |
153 gr_fe_tpc_pesmask_pesid_f(1) |
154 gr_fe_tpc_pesmask_gpcid_f(0) |
155 ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1));
156
157 return;
158}
159
120bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) 160bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
121{ 161{
122 bool valid = false; 162 bool valid = false;
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index a8dbd3a8..f799ccfe 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -250,4 +250,5 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
250 u32 *priv_addr_table, 250 u32 *priv_addr_table,
251 u32 *num_registers); 251 u32 *num_registers);
252u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc); 252u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc);
253void gr_gv11b_powergate_tpc(struct gk20a *g);
253#endif 254#endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index d479fef8..08c3097e 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -294,6 +294,7 @@ static const struct gpu_ops gv11b_ops = {
294 .init_ctx_state = gr_gp10b_init_ctx_state, 294 .init_ctx_state = gr_gp10b_init_ctx_state,
295 .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, 295 .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
296 .free_gr_ctx = gr_gk20a_free_gr_ctx, 296 .free_gr_ctx = gr_gk20a_free_gr_ctx,
297 .powergate_tpc = gr_gv11b_powergate_tpc,
297 .update_ctxsw_preemption_mode = 298 .update_ctxsw_preemption_mode =
298 gr_gv11b_update_ctxsw_preemption_mode, 299 gr_gv11b_update_ctxsw_preemption_mode,
299 .dump_gr_regs = gr_gv11b_dump_gr_status_regs, 300 .dump_gr_regs = gr_gv11b_dump_gr_status_regs,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 90994a53..473eaff4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -1572,6 +1572,42 @@ static inline u32 gr_fe_tpc_fs_r(u32 i)
1572{ 1572{
1573 return 0x0040a200U + i*4U; 1573 return 0x0040a200U + i*4U;
1574} 1574}
1575static inline u32 gr_fe_tpc_pesmask_r(void)
1576{
1577 return 0x0040a260U;
1578}
1579static inline u32 gr_fe_tpc_pesmask_pesid_f(u32 v)
1580{
1581 return (v & 0x3fU) << 24U;
1582}
1583static inline u32 gr_fe_tpc_pesmask_gpcid_f(u32 v)
1584{
1585 return (v & 0xffU) << 16U;
1586}
1587static inline u32 gr_fe_tpc_pesmask_action_m(void)
1588{
1589 return 0x1U << 30U;
1590}
1591static inline u32 gr_fe_tpc_pesmask_action_write_f(void)
1592{
1593 return 0x40000000U;
1594}
1595static inline u32 gr_fe_tpc_pesmask_action_read_f(void)
1596{
1597 return 0x0U;
1598}
1599static inline u32 gr_fe_tpc_pesmask_req_m(void)
1600{
1601 return 0x1U << 31U;
1602}
1603static inline u32 gr_fe_tpc_pesmask_req_send_f(void)
1604{
1605 return 0x80000000U;
1606}
1607static inline u32 gr_fe_tpc_pesmask_mask_m(void)
1608{
1609 return 0xffffU << 0U;
1610}
1575static inline u32 gr_pri_mme_shadow_raw_index_r(void) 1611static inline u32 gr_pri_mme_shadow_raw_index_r(void)
1576{ 1612{
1577 return 0x00404488U; 1613 return 0x00404488U;
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index 8a7cf552..c651e394 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -64,6 +64,7 @@ static void nvgpu_init_vars(struct gk20a *g)
64 nvgpu_mutex_init(&g->poweron_lock); 64 nvgpu_mutex_init(&g->poweron_lock);
65 nvgpu_mutex_init(&g->poweroff_lock); 65 nvgpu_mutex_init(&g->poweroff_lock);
66 nvgpu_mutex_init(&g->ctxsw_disable_lock); 66 nvgpu_mutex_init(&g->ctxsw_disable_lock);
67 nvgpu_mutex_init(&g->tpc_pg_lock);
67 68
68 l->regs_saved = l->regs; 69 l->regs_saved = l->regs;
69 l->bar1_saved = l->bar1; 70 l->bar1_saved = l->bar1;
@@ -168,6 +169,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
168 g->ptimer_src_freq = platform->ptimer_src_freq; 169 g->ptimer_src_freq = platform->ptimer_src_freq;
169 g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); 170 g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
170 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); 171 __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
172 g->can_tpc_powergate = platform->can_tpc_powergate;
173 g->valid_tpc_mask = platform->valid_tpc_mask;
171 g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; 174 g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
172 /* if default delay is not set, set default delay to 500msec */ 175 /* if default delay is not set, set default delay to 500msec */
173 if (platform->railgate_delay_init) 176 if (platform->railgate_delay_init)
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
index d9725e4c..a4c3eca3 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -44,6 +44,9 @@ struct gk20a_platform {
44 bool can_railgate_init; 44 bool can_railgate_init;
45 45
46 /* Should be populated at probe. */ 46 /* Should be populated at probe. */
47 bool can_tpc_powergate;
48
49 /* Should be populated at probe. */
47 bool can_elpg_init; 50 bool can_elpg_init;
48 51
49 /* Should be populated at probe. */ 52 /* Should be populated at probe. */
@@ -71,6 +74,8 @@ struct gk20a_platform {
71 /* Reset control for device */ 74 /* Reset control for device */
72 struct reset_control *reset_control; 75 struct reset_control *reset_control;
73#endif 76#endif
77 /* valid TPC-MASK */
78 u32 valid_tpc_mask;
74 79
75 /* Delay before rail gated */ 80 /* Delay before rail gated */
76 int railgate_delay_init; 81 int railgate_delay_init;
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
index c9c13197..edb3263c 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
@@ -209,6 +209,9 @@ struct gk20a_platform gv11b_tegra_platform = {
209 .railgate_delay_init = 500, 209 .railgate_delay_init = 500,
210 .can_railgate_init = true, 210 .can_railgate_init = true,
211 211
212 .can_tpc_powergate = true,
213 .valid_tpc_mask = 0xc,
214
212 .can_slcg = true, 215 .can_slcg = true,
213 .can_blcg = true, 216 .can_blcg = true,
214 .can_elcg = true, 217 .can_elcg = true,
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
index 1b84e2e2..b607768a 100644
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -31,6 +31,8 @@
31 31
32#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) 32#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
33 33
34#define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0
35
34static ssize_t elcg_enable_store(struct device *dev, 36static ssize_t elcg_enable_store(struct device *dev,
35 struct device_attribute *attr, const char *buf, size_t count) 37 struct device_attribute *attr, const char *buf, size_t count)
36{ 38{
@@ -843,6 +845,61 @@ static ssize_t force_idle_read(struct device *dev,
843static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); 845static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
844#endif 846#endif
845 847
848static ssize_t tpc_pg_mask_read(struct device *dev,
849 struct device_attribute *attr, char *buf)
850{
851 struct gk20a *g = get_gk20a(dev);
852
853 return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
854}
855
856static ssize_t tpc_pg_mask_store(struct device *dev,
857 struct device_attribute *attr, const char *buf, size_t count)
858{
859 struct gk20a *g = get_gk20a(dev);
860 struct gr_gk20a *gr = &g->gr;
861 unsigned long val = 0;
862
863 nvgpu_mutex_acquire(&g->tpc_pg_lock);
864
865 if (!g->can_tpc_powergate) {
866 nvgpu_info(g, "TPC-PG not enabled for the platform");
867 goto exit;
868 }
869
870 if (kstrtoul(buf, 10, &val) < 0) {
871 nvgpu_err(g, "invalid value");
872 nvgpu_mutex_release(&g->tpc_pg_lock);
873 return -EINVAL;
874 }
875
876 if (val == g->tpc_pg_mask) {
877 nvgpu_info(g, "no value change, same mask already set");
878 goto exit;
879 }
880
881 if (gr->ctx_vars.golden_image_size) {
882 nvgpu_err(g, "golden image size already initialized");
883 nvgpu_mutex_release(&g->tpc_pg_lock);
884 return -ENODEV;
885 }
886
887 if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) {
888 g->tpc_pg_mask = val;
889 } else {
890
891 nvgpu_err(g, "TPC-PG mask is invalid");
892 nvgpu_mutex_release(&g->tpc_pg_lock);
893 return -EINVAL;
894 }
895exit:
896 nvgpu_mutex_release(&g->tpc_pg_lock);
897
898 return count;
899}
900
901static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store);
902
846static ssize_t tpc_fs_mask_store(struct device *dev, 903static ssize_t tpc_fs_mask_store(struct device *dev,
847 struct device_attribute *attr, const char *buf, size_t count) 904 struct device_attribute *attr, const char *buf, size_t count)
848{ 905{
@@ -1130,6 +1187,7 @@ void nvgpu_remove_sysfs(struct device *dev)
1130 device_remove_file(dev, &dev_attr_aelpg_enable); 1187 device_remove_file(dev, &dev_attr_aelpg_enable);
1131 device_remove_file(dev, &dev_attr_allow_all); 1188 device_remove_file(dev, &dev_attr_allow_all);
1132 device_remove_file(dev, &dev_attr_tpc_fs_mask); 1189 device_remove_file(dev, &dev_attr_tpc_fs_mask);
1190 device_remove_file(dev, &dev_attr_tpc_pg_mask);
1133 device_remove_file(dev, &dev_attr_min_timeslice_us); 1191 device_remove_file(dev, &dev_attr_min_timeslice_us);
1134 device_remove_file(dev, &dev_attr_max_timeslice_us); 1192 device_remove_file(dev, &dev_attr_max_timeslice_us);
1135 1193
@@ -1181,6 +1239,7 @@ int nvgpu_create_sysfs(struct device *dev)
1181 error |= device_create_file(dev, &dev_attr_aelpg_enable); 1239 error |= device_create_file(dev, &dev_attr_aelpg_enable);
1182 error |= device_create_file(dev, &dev_attr_allow_all); 1240 error |= device_create_file(dev, &dev_attr_allow_all);
1183 error |= device_create_file(dev, &dev_attr_tpc_fs_mask); 1241 error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
1242 error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
1184 error |= device_create_file(dev, &dev_attr_min_timeslice_us); 1243 error |= device_create_file(dev, &dev_attr_min_timeslice_us);
1185 error |= device_create_file(dev, &dev_attr_max_timeslice_us); 1244 error |= device_create_file(dev, &dev_attr_max_timeslice_us);
1186 1245