diff options
author | Richard Zhao <rizhao@nvidia.com> | 2016-04-25 19:16:54 -0400 |
---|---|---|
committer | Adeel Raza <araza@nvidia.com> | 2016-05-10 15:29:49 -0400 |
commit | bc72480f8dc09737fc87782a69c71785de08f2c0 (patch) | |
tree | 385457db6cc71dc8f844742f3b4bb6ddaa556baa /drivers/gpu | |
parent | 9357086cee7f11de57f37447ce068c59eebcd411 (diff) |
gpu: nvgpu: add fuse overrides for tpc disabling
- add fuse_override in gops. Implement it starting from gm20b.
- set cwd fs register, so cuda won't use disabled TPCs
Bug 1757262
Bug 200169697
Change-Id: If7bac58bd3a6bcf2925197ea5b7c2d10a77e0933
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
(cherry picked from commit 66cde7724815e9e5e85ab9b07fc985a78530222f)
Reviewed-on: http://git-master/r/1132177
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Adeel Raza <araza@nvidia.com>
Tested-by: Adeel Raza <araza@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 76 |
3 files changed, 74 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 09198fa5..f228cce4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -259,6 +259,7 @@ struct gpu_ops { | |||
259 | u32 compute_preempt_mode); | 259 | u32 compute_preempt_mode); |
260 | int (*get_preemption_mode_flags)(struct gk20a *g, | 260 | int (*get_preemption_mode_flags)(struct gk20a *g, |
261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
262 | int (*fuse_override)(struct gk20a *g); | ||
262 | } gr; | 263 | } gr; |
263 | const char *name; | 264 | const char *name; |
264 | struct { | 265 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 8b645cc2..c61bb235 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -4740,6 +4740,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) | |||
4740 | return 0; | 4740 | return 0; |
4741 | } | 4741 | } |
4742 | 4742 | ||
4743 | if (g->ops.gr.fuse_override) | ||
4744 | g->ops.gr.fuse_override(g); | ||
4745 | |||
4743 | gr->g = g; | 4746 | gr->g = g; |
4744 | 4747 | ||
4745 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 4748 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 2a982f87..fd4065dc 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/tegra-fuse.h> | 19 | #include <linux/tegra-fuse.h> |
20 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
21 | 21 | ||
22 | #include <dt-bindings/soc/gm20b-fuse.h> | ||
23 | |||
22 | #include "gk20a/gk20a.h" | 24 | #include "gk20a/gk20a.h" |
23 | #include "gk20a/gr_gk20a.h" | 25 | #include "gk20a/gr_gk20a.h" |
24 | 26 | ||
@@ -527,7 +529,7 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | |||
527 | 529 | ||
528 | static void gr_gm20b_load_tpc_mask(struct gk20a *g) | 530 | static void gr_gm20b_load_tpc_mask(struct gk20a *g) |
529 | { | 531 | { |
530 | u32 pes_tpc_mask = 0; | 532 | u32 pes_tpc_mask = 0, fuse_tpc_mask; |
531 | u32 gpc, pes; | 533 | u32 gpc, pes; |
532 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | 534 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
533 | 535 | ||
@@ -537,10 +539,13 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g) | |||
537 | num_tpc_per_gpc * gpc; | 539 | num_tpc_per_gpc * gpc; |
538 | } | 540 | } |
539 | 541 | ||
540 | if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) == | 542 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); |
541 | (0x1 << g->gr.max_tpc_count) - 1) { | 543 | if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask && |
544 | fuse_tpc_mask == (0x1 << g->gr.max_tpc_count) - 1) { | ||
542 | u32 val = g->tpc_fs_mask_user; | 545 | u32 val = g->tpc_fs_mask_user; |
543 | val &= (0x1 << g->gr.max_tpc_count) - 1; | 546 | val &= (0x1 << g->gr.max_tpc_count) - 1; |
547 | /* skip tpc to disable the other tpc cause channel timeout */ | ||
548 | val = (0x1 << hweight32(val)) - 1; | ||
544 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); | 549 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); |
545 | } else { | 550 | } else { |
546 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | 551 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); |
@@ -557,6 +562,7 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
557 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | 562 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; |
558 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
559 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 564 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
565 | u32 fuse_tpc_mask; | ||
560 | 566 | ||
561 | gk20a_dbg_fn(""); | 567 | gk20a_dbg_fn(""); |
562 | 568 | ||
@@ -605,9 +611,19 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
605 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | 611 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); |
606 | } | 612 | } |
607 | 613 | ||
608 | gk20a_writel(g, gr_cwd_fs_r(), | 614 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); |
609 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | 615 | if (g->tpc_fs_mask_user && |
610 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | 616 | fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { |
617 | u32 val = g->tpc_fs_mask_user; | ||
618 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
619 | gk20a_writel(g, gr_cwd_fs_r(), | ||
620 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
621 | gr_cwd_fs_num_tpcs_f(hweight32(val))); | ||
622 | } else { | ||
623 | gk20a_writel(g, gr_cwd_fs_r(), | ||
624 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
625 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
626 | } | ||
611 | 627 | ||
612 | gr_gm20b_load_tpc_mask(g); | 628 | gr_gm20b_load_tpc_mask(g); |
613 | 629 | ||
@@ -1362,6 +1378,53 @@ static int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | |||
1362 | return 0; | 1378 | return 0; |
1363 | } | 1379 | } |
1364 | 1380 | ||
1381 | static int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask) | ||
1382 | { | ||
1383 | if (!mask) | ||
1384 | return 0; | ||
1385 | |||
1386 | g->tpc_fs_mask_user = ~mask; | ||
1387 | |||
1388 | return 0; | ||
1389 | } | ||
1390 | |||
1391 | static int gm20b_gr_fuse_override(struct gk20a *g) | ||
1392 | { | ||
1393 | struct device_node *np = g->dev->of_node; | ||
1394 | u32 *fuses; | ||
1395 | int count, i; | ||
1396 | |||
1397 | if (!np) /* may be pcie device */ | ||
1398 | return 0; | ||
1399 | |||
1400 | count = of_property_count_elems_of_size(np, "fuse-overrides", 8); | ||
1401 | if (count <= 0) | ||
1402 | return count; | ||
1403 | |||
1404 | fuses = kmalloc(sizeof(u32) * count * 2, GFP_KERNEL); | ||
1405 | if (!fuses) | ||
1406 | return -ENOMEM; | ||
1407 | of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); | ||
1408 | for (i = 0; i < count; i++) { | ||
1409 | u32 fuse, value; | ||
1410 | |||
1411 | fuse = fuses[2 * i]; | ||
1412 | value = fuses[2 * i + 1]; | ||
1413 | switch (fuse) { | ||
1414 | case GM20B_FUSE_OPT_TPC_DISABLE: | ||
1415 | gm20b_gr_tpc_disable_override(g, value); | ||
1416 | break; | ||
1417 | default: | ||
1418 | gk20a_err(dev_from_gk20a(g), | ||
1419 | "ignore unknown fuse override %08x", fuse); | ||
1420 | break; | ||
1421 | } | ||
1422 | } | ||
1423 | |||
1424 | kfree(fuses); | ||
1425 | return 0; | ||
1426 | } | ||
1427 | |||
1365 | void gm20b_init_gr(struct gpu_ops *gops) | 1428 | void gm20b_init_gr(struct gpu_ops *gops) |
1366 | { | 1429 | { |
1367 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1430 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1435,4 +1498,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1435 | gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state; | 1498 | gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state; |
1436 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | 1499 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; |
1437 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; | 1500 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; |
1501 | gops->gr.fuse_override = gm20b_gr_fuse_override; | ||
1438 | } | 1502 | } |