diff options
author | Kirill Artamonov <kartamonov@nvidia.com> | 2015-01-25 11:42:18 -0500 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:52:07 -0500 |
commit | 3b08d73568ddaf0dec2c2abe8e813672da2463ae (patch) | |
tree | 8e46bb2806ede3eca9a75bd24737834063f75965 /drivers | |
parent | 6434195dc75df5d574a9ae4f5535924bf4704aec (diff) |
gpu: nvgpu: gp10b: add debug features for gfxp and cilp
Add debugfs switch to force cilp and gfx preemption
Add debugfs switch to dump context switch stats on channel
destruction.
bug 1525327
bug 1581799
Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Change-Id: I7d0558cc325ce655411388ea66ad982101f2fe66
Reviewed-on: http://git-master/r/794976
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/677231
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 24 |
3 files changed, 76 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index c9b870c7..e727ee99 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "hw_proj_gp10b.h" | 25 | #include "hw_proj_gp10b.h" |
26 | #include "hw_ctxsw_prog_gp10b.h" | 26 | #include "hw_ctxsw_prog_gp10b.h" |
27 | #include "hw_mc_gp10b.h" | 27 | #include "hw_mc_gp10b.h" |
28 | #include <linux/vmalloc.h> | ||
28 | 29 | ||
29 | static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | 30 | static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) |
30 | { | 31 | { |
@@ -482,6 +483,13 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
482 | if (err) | 483 | if (err) |
483 | return err; | 484 | return err; |
484 | 485 | ||
486 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) | ||
487 | flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; | ||
488 | |||
489 | if (class == PASCAL_COMPUTE_A && | ||
490 | g->gr.t18x.ctx_vars.force_preemption_cilp) | ||
491 | flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP; | ||
492 | |||
485 | if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) { | 493 | if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) { |
486 | u32 spill_size = | 494 | u32 spill_size = |
487 | gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(); | 495 | gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(); |
@@ -531,7 +539,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
531 | goto fail_free_betacb; | 539 | goto fail_free_betacb; |
532 | } | 540 | } |
533 | 541 | ||
534 | (*gr_ctx)->preempt_mode = flags; | 542 | (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP; |
535 | } | 543 | } |
536 | 544 | ||
537 | if (class == PASCAL_COMPUTE_A) { | 545 | if (class == PASCAL_COMPUTE_A) { |
@@ -558,6 +566,38 @@ fail_free_gk20a_ctx: | |||
558 | return err; | 566 | return err; |
559 | } | 567 | } |
560 | 568 | ||
569 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, | ||
570 | struct gr_ctx_desc *gr_ctx) { | ||
571 | void *ctx_ptr = vmap(gr_ctx->mem.pages, | ||
572 | PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, | ||
573 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
574 | if (!ctx_ptr) { | ||
575 | WARN_ON("Cannot map context"); | ||
576 | return; | ||
577 | } | ||
578 | gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", | ||
579 | gk20a_mem_rd32(ctx_ptr + | ||
580 | ctxsw_prog_main_image_num_save_ops_o(), 0)); | ||
581 | gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", | ||
582 | gk20a_mem_rd32(ctx_ptr + | ||
583 | ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); | ||
584 | gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", | ||
585 | gk20a_mem_rd32(ctx_ptr + | ||
586 | ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); | ||
587 | gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", | ||
588 | gk20a_mem_rd32(ctx_ptr + | ||
589 | ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); | ||
590 | gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", | ||
591 | gk20a_mem_rd32(ctx_ptr + | ||
592 | ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); | ||
593 | gk20a_err(dev_from_gk20a(g), | ||
594 | "image gfx preemption option (GFXP is 1) %x\n", | ||
595 | gk20a_mem_rd32(ctx_ptr + | ||
596 | ctxsw_prog_main_image_graphics_preemption_options_o(), | ||
597 | 0)); | ||
598 | vunmap(ctx_ptr); | ||
599 | } | ||
600 | |||
561 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 601 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
562 | struct gr_ctx_desc *gr_ctx) | 602 | struct gr_ctx_desc *gr_ctx) |
563 | { | 603 | { |
@@ -566,15 +606,18 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | |||
566 | if (!gr_ctx) | 606 | if (!gr_ctx) |
567 | return; | 607 | return; |
568 | 608 | ||
609 | if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close) | ||
610 | dump_ctx_switch_stats(g, vm, gr_ctx); | ||
611 | |||
569 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); | 612 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); |
570 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); | 613 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); |
571 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); | 614 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer); |
572 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); | 615 | gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer); |
573 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); | 616 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); |
574 | |||
575 | gk20a_dbg_fn("done"); | 617 | gk20a_dbg_fn("done"); |
576 | } | 618 | } |
577 | 619 | ||
620 | |||
578 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | 621 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, |
579 | struct channel_ctx_gk20a *ch_ctx, | 622 | struct channel_ctx_gk20a *ch_ctx, |
580 | void *ctx_ptr) | 623 | void *ctx_ptr) |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 7c3ddf27..370e0ea3 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B GPU GR | 2 | * GM20B GPU GR |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -36,6 +36,12 @@ void gp10b_init_gr(struct gpu_ops *ops); | |||
36 | struct gr_t18x { | 36 | struct gr_t18x { |
37 | struct { | 37 | struct { |
38 | u32 preempt_image_size; | 38 | u32 preempt_image_size; |
39 | u32 force_preemption_gfxp; | ||
40 | u32 force_preemption_cilp; | ||
41 | u32 dump_ctxsw_stats_on_channel_close; | ||
42 | struct dentry *debugfs_force_preemption_cilp; | ||
43 | struct dentry *debugfs_force_preemption_gfxp; | ||
44 | struct dentry *debugfs_dump_ctxsw_stats; | ||
39 | } ctx_vars; | 45 | } ctx_vars; |
40 | }; | 46 | }; |
41 | 47 | ||
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c index 44b27fe7..2f81378d 100644 --- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | |||
@@ -53,6 +53,30 @@ static int gp10b_tegra_probe(struct platform_device *pdev) | |||
53 | platform->bypass_smmu = !device_is_iommuable(&pdev->dev); | 53 | platform->bypass_smmu = !device_is_iommuable(&pdev->dev); |
54 | platform->disable_bigpage = platform->bypass_smmu; | 54 | platform->disable_bigpage = platform->bypass_smmu; |
55 | 55 | ||
56 | platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
57 | = false; | ||
58 | platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
59 | = false; | ||
60 | |||
61 | platform->g->gr.t18x.ctx_vars.force_preemption_gfxp = false; | ||
62 | platform->g->gr.t18x.ctx_vars.force_preemption_cilp = false; | ||
63 | |||
64 | platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp = | ||
65 | debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, | ||
66 | platform->debugfs, | ||
67 | &platform->g->gr.t18x.ctx_vars.force_preemption_gfxp); | ||
68 | |||
69 | platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp = | ||
70 | debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, | ||
71 | platform->debugfs, | ||
72 | &platform->g->gr.t18x.ctx_vars.force_preemption_cilp); | ||
73 | |||
74 | platform->g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats = | ||
75 | debugfs_create_bool("dump_ctxsw_stats_on_channel_close", | ||
76 | S_IRUGO|S_IWUSR, | ||
77 | platform->debugfs, | ||
78 | &platform->g->gr.t18x. | ||
79 | ctx_vars.dump_ctxsw_stats_on_channel_close); | ||
56 | return 0; | 80 | return 0; |
57 | } | 81 | } |
58 | 82 | ||