diff options
author | seshendra Gadagottu <sgadagottu@nvidia.com> | 2018-01-18 14:02:08 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-01-22 18:29:54 -0500 |
commit | 193a2ed38ca51d898ac811820ab86237c84e18eb (patch) | |
tree | da672582c2322fc91c7b5c600deb5a60f3068a8d /drivers/gpu/nvgpu/gp10b | |
parent | f6d898656a6d7c197aa27ee53f5f0151fb6dfcf5 (diff) |
gpu: nvgpu: add sw method for SET_BES_CROP_DEBUG4
Added sw method support for SET_BES_CROP_DEBUG4.
In this sw method:
CLAMP_FP_BLEND_TO_MAXVAL forces overflow and
CLAMP_FP_BLEND_TO_INF blend results to clamp to FP maxval.
Added support for this sw method in gp10b/gp106/gv11b
and gv100.
Bug 2046636
Change-Id: I3a9e97587aca76718f7f504ea3b853f87409092a
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1641529
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 5 |
2 files changed, 32 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 549a4da4..17685f59 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -695,6 +695,30 @@ void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) | |||
695 | gk20a_writel(g, gr_bes_crop_debug3_r(), val); | 695 | gk20a_writel(g, gr_bes_crop_debug3_r(), val); |
696 | } | 696 | } |
697 | 697 | ||
698 | void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data) | ||
699 | { | ||
700 | u32 val; | ||
701 | |||
702 | nvgpu_log_fn(g, " "); | ||
703 | |||
704 | val = gk20a_readl(g, gr_bes_crop_debug4_r()); | ||
705 | if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL) { | ||
706 | val = set_field(val, | ||
707 | gr_bes_crop_debug4_clamp_fp_blend_m(), | ||
708 | gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f()); | ||
709 | } else if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF) { | ||
710 | val = set_field(val, | ||
711 | gr_bes_crop_debug4_clamp_fp_blend_m(), | ||
712 | gr_bes_crop_debug4_clamp_fp_blend_to_inf_f()); | ||
713 | } else { | ||
714 | nvgpu_warn(g, | ||
715 | "gr_gp10b_set_bes_crop_debug4: wrong data sent!"); | ||
716 | return; | ||
717 | } | ||
718 | gk20a_writel(g, gr_bes_crop_debug4_r(), val); | ||
719 | } | ||
720 | |||
721 | |||
698 | int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, | 722 | int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, |
699 | u32 class_num, u32 offset, u32 data) | 723 | u32 class_num, u32 offset, u32 data) |
700 | { | 724 | { |
@@ -736,6 +760,9 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, | |||
736 | case NVC097_SET_BES_CROP_DEBUG3: | 760 | case NVC097_SET_BES_CROP_DEBUG3: |
737 | g->ops.gr.set_bes_crop_debug3(g, data); | 761 | g->ops.gr.set_bes_crop_debug3(g, data); |
738 | break; | 762 | break; |
763 | case NVC097_SET_BES_CROP_DEBUG4: | ||
764 | g->ops.gr.set_bes_crop_debug4(g, data); | ||
765 | break; | ||
739 | default: | 766 | default: |
740 | goto fail; | 767 | goto fail; |
741 | } | 768 | } |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 8d553d37..8f1ebb16 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h | |||
@@ -49,9 +49,13 @@ enum { | |||
49 | #define NVC097_SET_CIRCULAR_BUFFER_SIZE 0x1280 | 49 | #define NVC097_SET_CIRCULAR_BUFFER_SIZE 0x1280 |
50 | #define NVC097_SET_SHADER_EXCEPTIONS 0x1528 | 50 | #define NVC097_SET_SHADER_EXCEPTIONS 0x1528 |
51 | #define NVC097_SET_BES_CROP_DEBUG3 0x10c4 | 51 | #define NVC097_SET_BES_CROP_DEBUG3 0x10c4 |
52 | #define NVC097_SET_BES_CROP_DEBUG4 0x10b0 | ||
52 | #define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 | 53 | #define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 |
53 | #define NVC0C0_SET_RD_COALESCE 0x0228 | 54 | #define NVC0C0_SET_RD_COALESCE 0x0228 |
54 | 55 | ||
56 | #define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF 0x0 | ||
57 | #define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL 0x1 | ||
58 | |||
55 | int gr_gp10b_init_fs_state(struct gk20a *g); | 59 | int gr_gp10b_init_fs_state(struct gk20a *g); |
56 | int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, | 60 | int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, |
57 | struct nvgpu_mem *mem); | 61 | struct nvgpu_mem *mem); |
@@ -85,6 +89,7 @@ int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, | |||
85 | u32 gr_gp10b_pagepool_default_size(struct gk20a *g); | 89 | u32 gr_gp10b_pagepool_default_size(struct gk20a *g); |
86 | int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g); | 90 | int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g); |
87 | void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data); | 91 | void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data); |
92 | void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data); | ||
88 | int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, | 93 | int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, |
89 | u32 class_num, u32 offset, u32 data); | 94 | u32 class_num, u32 offset, u32 data); |
90 | void gr_gp10b_cb_size_default(struct gk20a *g); | 95 | void gr_gp10b_cb_size_default(struct gk20a *g); |