summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2016-04-25 19:16:54 -0400
committerAdeel Raza <araza@nvidia.com>2016-05-10 15:29:49 -0400
commitbc72480f8dc09737fc87782a69c71785de08f2c0 (patch)
tree385457db6cc71dc8f844742f3b4bb6ddaa556baa /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent9357086cee7f11de57f37447ce068c59eebcd411 (diff)
gpu: nvgpu: add fuse overrides for tpc disabling
- add fuse_override in gops. Implement it starting from gm20b. - set cwd fs register, so cuda won't use disabled TPCs Bug 1757262 Bug 200169697 Change-Id: If7bac58bd3a6bcf2925197ea5b7c2d10a77e0933 Signed-off-by: Richard Zhao <rizhao@nvidia.com> (cherry picked from commit 66cde7724815e9e5e85ab9b07fc985a78530222f) Reviewed-on: http://git-master/r/1132177 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: Adeel Raza <araza@nvidia.com> Tested-by: Adeel Raza <araza@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c76
1 files changed, 70 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 2a982f87..fd4065dc 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -19,6 +19,8 @@
19#include <linux/tegra-fuse.h> 19#include <linux/tegra-fuse.h>
20#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
21 21
22#include <dt-bindings/soc/gm20b-fuse.h>
23
22#include "gk20a/gk20a.h" 24#include "gk20a/gk20a.h"
23#include "gk20a/gr_gk20a.h" 25#include "gk20a/gr_gk20a.h"
24 26
@@ -527,7 +529,7 @@ static void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
527 529
528static void gr_gm20b_load_tpc_mask(struct gk20a *g) 530static void gr_gm20b_load_tpc_mask(struct gk20a *g)
529{ 531{
530 u32 pes_tpc_mask = 0; 532 u32 pes_tpc_mask = 0, fuse_tpc_mask;
531 u32 gpc, pes; 533 u32 gpc, pes;
532 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); 534 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
533 535
@@ -537,10 +539,13 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g)
537 num_tpc_per_gpc * gpc; 539 num_tpc_per_gpc * gpc;
538 } 540 }
539 541
540 if (g->tpc_fs_mask_user && g->ops.gr.get_gpc_tpc_mask(g, 0) == 542 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
541 (0x1 << g->gr.max_tpc_count) - 1) { 543 if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask &&
544 fuse_tpc_mask == (0x1 << g->gr.max_tpc_count) - 1) {
542 u32 val = g->tpc_fs_mask_user; 545 u32 val = g->tpc_fs_mask_user;
543 val &= (0x1 << g->gr.max_tpc_count) - 1; 546 val &= (0x1 << g->gr.max_tpc_count) - 1;
547 /* skip tpc to disable the other tpc cause channel timeout */
548 val = (0x1 << hweight32(val)) - 1;
544 gk20a_writel(g, gr_fe_tpc_fs_r(), val); 549 gk20a_writel(g, gr_fe_tpc_fs_r(), val);
545 } else { 550 } else {
546 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); 551 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
@@ -557,6 +562,7 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
557 u32 tpc_sm_id = 0, gpc_tpc_id = 0; 562 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
558 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 563 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
559 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 564 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565 u32 fuse_tpc_mask;
560 566
561 gk20a_dbg_fn(""); 567 gk20a_dbg_fn("");
562 568
@@ -605,9 +611,19 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
605 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); 611 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
606 } 612 }
607 613
608 gk20a_writel(g, gr_cwd_fs_r(), 614 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
609 gr_cwd_fs_num_gpcs_f(gr->gpc_count) | 615 if (g->tpc_fs_mask_user &&
610 gr_cwd_fs_num_tpcs_f(gr->tpc_count)); 616 fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) {
617 u32 val = g->tpc_fs_mask_user;
618 val &= (0x1 << gr->max_tpc_count) - 1;
619 gk20a_writel(g, gr_cwd_fs_r(),
620 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
621 gr_cwd_fs_num_tpcs_f(hweight32(val)));
622 } else {
623 gk20a_writel(g, gr_cwd_fs_r(),
624 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
625 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
626 }
611 627
612 gr_gm20b_load_tpc_mask(g); 628 gr_gm20b_load_tpc_mask(g);
613 629
@@ -1362,6 +1378,53 @@ static int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
1362 return 0; 1378 return 0;
1363} 1379}
1364 1380
1381static int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask)
1382{
1383 if (!mask)
1384 return 0;
1385
1386 g->tpc_fs_mask_user = ~mask;
1387
1388 return 0;
1389}
1390
1391static int gm20b_gr_fuse_override(struct gk20a *g)
1392{
1393 struct device_node *np = g->dev->of_node;
1394 u32 *fuses;
1395 int count, i;
1396
1397 if (!np) /* may be pcie device */
1398 return 0;
1399
1400 count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
1401 if (count <= 0)
1402 return count;
1403
1404 fuses = kmalloc(sizeof(u32) * count * 2, GFP_KERNEL);
1405 if (!fuses)
1406 return -ENOMEM;
1407 of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
1408 for (i = 0; i < count; i++) {
1409 u32 fuse, value;
1410
1411 fuse = fuses[2 * i];
1412 value = fuses[2 * i + 1];
1413 switch (fuse) {
1414 case GM20B_FUSE_OPT_TPC_DISABLE:
1415 gm20b_gr_tpc_disable_override(g, value);
1416 break;
1417 default:
1418 gk20a_err(dev_from_gk20a(g),
1419 "ignore unknown fuse override %08x", fuse);
1420 break;
1421 }
1422 }
1423
1424 kfree(fuses);
1425 return 0;
1426}
1427
1365void gm20b_init_gr(struct gpu_ops *gops) 1428void gm20b_init_gr(struct gpu_ops *gops)
1366{ 1429{
1367 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1430 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1435,4 +1498,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1435 gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state; 1498 gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state;
1436 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; 1499 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts;
1437 gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; 1500 gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags;
1501 gops->gr.fuse_override = gm20b_gr_fuse_override;
1438} 1502}