From 193a2ed38ca51d898ac811820ab86237c84e18eb Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 18 Jan 2018 11:02:08 -0800 Subject: gpu: nvgpu: add sw method for SET_BES_CROP_DEBUG4 Added sw method support for SET_BES_CROP_DEBUG4. In this sw method: CLAMP_FP_BLEND_TO_MAXVAL forces overflow and CLAMP_FP_BLEND_TO_INF blend results to clamp to FP maxval. Added support for this sw method in gp10b/gp106/gv11b and gv100. Bug 2046636 Change-Id: I3a9e97587aca76718f7f504ea3b853f87409092a Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/1641529 Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | 1 + .../nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | 1 + drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gp106/gr_gp106.c | 3 +++ drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 + drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 27 ++++++++++++++++++++++ drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 5 ++++ drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 +++ drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + .../gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h | 18 ++++++++++++++- .../gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 18 ++++++++++++++- .../gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h | 16 +++++++++++++ .../gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 16 +++++++++++++ 15 files changed, 111 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c index 5b8307e5..164ac3d2 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c @@ -204,6 +204,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .init_preemption_state = gr_gp10b_init_preemption_state, .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .create_gr_sysfs = gr_gp10b_create_sysfs, .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c index 0813ad75..d205f039 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c @@ -222,6 +222,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .init_preemption_state = NULL, .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .create_gr_sysfs = gr_gv11b_create_sysfs, .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 31855250..50f827a9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -205,6 +205,7 @@ struct gpu_ops { u32 data); void (*set_circular_buffer_size)(struct gk20a *g, u32 data); void (*set_bes_crop_debug3)(struct gk20a *g, u32 data); + void (*set_bes_crop_debug4)(struct gk20a *g, u32 data); void (*enable_hww_exceptions)(struct gk20a *g); bool (*is_valid_class)(struct gk20a *g, u32 class_num); bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num); diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index 02cecf53..1bd24b45 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -110,6 +110,9 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, case NVC097_SET_BES_CROP_DEBUG3: g->ops.gr.set_bes_crop_debug3(g, data); break; + case NVC097_SET_BES_CROP_DEBUG4: + g->ops.gr.set_bes_crop_debug4(g, data); + break; default: goto fail; } diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index bd02f914..68562955 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -368,6 +368,7 @@ static const struct gpu_ops gp106_ops = { .init_preemption_state = NULL, .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .create_gr_sysfs = NULL, .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 549a4da4..17685f59 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -695,6 +695,30 @@ void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data) gk20a_writel(g, gr_bes_crop_debug3_r(), val); } +void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data) +{ + u32 val; + + nvgpu_log_fn(g, " "); + + val = gk20a_readl(g, gr_bes_crop_debug4_r()); + if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL) { + val = set_field(val, + gr_bes_crop_debug4_clamp_fp_blend_m(), + gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f()); + } else if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF) { + val = set_field(val, + gr_bes_crop_debug4_clamp_fp_blend_m(), + gr_bes_crop_debug4_clamp_fp_blend_to_inf_f()); + } else { + nvgpu_warn(g, + "gr_gp10b_set_bes_crop_debug4: wrong data sent!"); + return; + } + gk20a_writel(g, gr_bes_crop_debug4_r(), val); +} + + int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data) { @@ -736,6 +760,9 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, case NVC097_SET_BES_CROP_DEBUG3: g->ops.gr.set_bes_crop_debug3(g, data); break; + case NVC097_SET_BES_CROP_DEBUG4: + g->ops.gr.set_bes_crop_debug4(g, data); + break; default: goto fail; } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 8d553d37..8f1ebb16 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -49,9 +49,13 @@ enum { #define NVC097_SET_CIRCULAR_BUFFER_SIZE 0x1280 #define NVC097_SET_SHADER_EXCEPTIONS 0x1528 #define NVC097_SET_BES_CROP_DEBUG3 0x10c4 +#define NVC097_SET_BES_CROP_DEBUG4 0x10b0 #define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528 #define NVC0C0_SET_RD_COALESCE 0x0228 +#define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF 0x0 +#define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL 0x1 + int gr_gp10b_init_fs_state(struct gk20a *g); int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, struct nvgpu_mem *mem); @@ -85,6 +89,7 @@ int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, u32 gr_gp10b_pagepool_default_size(struct gk20a *g); int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g); void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data); +void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data); int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr, u32 class_num, u32 offset, u32 data); void gr_gp10b_cb_size_default(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 0768ed0c..ca41a90b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -400,6 +400,7 @@ static const struct gpu_ops gv100_ops = { .init_preemption_state = NULL, .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .create_gr_sysfs = gr_gv11b_create_sysfs, .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3030def8..9c79b29b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1193,6 +1193,9 @@ int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr, case NVC397_SET_BES_CROP_DEBUG3: g->ops.gr.set_bes_crop_debug3(g, data); break; + case NVC397_SET_BES_CROP_DEBUG4: + g->ops.gr.set_bes_crop_debug4(g, data); + break; default: goto fail; } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 022a7698..f8f80df3 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -65,6 +65,7 @@ enum { #define NVC397_SET_TEX_IN_DBG 0x10bc #define NVC397_SET_SKEDCHECK 0x10c0 #define NVC397_SET_BES_CROP_DEBUG3 0x10c4 +#define NVC397_SET_BES_CROP_DEBUG4 0x10b0 #define NVC397_SET_TEX_IN_DBG_TSL1_RVCH_INVALIDATE 0x1 #define NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_LD 0x2 diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 482c1eec..93f819e4 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -367,6 +367,7 @@ static const struct gpu_ops gv11b_ops = { .init_preemption_state = gr_gv11b_init_preemption_state, .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, + .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, .create_gr_sysfs = gr_gv11b_create_sysfs, .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h index 2da3ec41..7630bda2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -3792,6 +3792,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void) { return 0x4U; } +static inline u32 gr_bes_crop_debug4_r(void) +{ + return 0x0040894cU; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void) +{ + return 0x1U << 18U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void) +{ + return 0x0U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void) +{ + return 0x40000U; +} static inline u32 gr_bes_crop_settings_r(void) { return 0x00408958U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 51809112..dca75004 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -3996,6 +3996,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void) { return 0x4U; } +static inline u32 gr_bes_crop_debug4_r(void) +{ + return 0x0040894cU; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void) +{ + return 0x1U << 18U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void) +{ + return 0x0U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void) +{ + return 0x40000U; +} static inline u32 gr_bes_crop_settings_r(void) { return 0x00408958U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h index e669c0f0..2e28bdb8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h @@ -3732,6 +3732,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void) { return 0x4U; } +static inline u32 gr_bes_crop_debug4_r(void) +{ + return 0x0040894cU; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void) +{ + return 0x1U << 18U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void) +{ + return 0x0U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void) +{ + return 0x40000U; +} static inline u32 gr_bes_crop_settings_r(void) { return 0x00408958U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 2b9bffdd..72cbc7dd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h @@ -4416,6 +4416,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void) { return 0x4U; } +static inline u32 gr_bes_crop_debug4_r(void) +{ + return 0x0040894cU; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void) +{ + return 0x1U << 18U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void) +{ + return 0x0U; +} +static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void) +{ + return 0x40000U; +} static inline u32 gr_bes_crop_settings_r(void) { return 0x00408958U; -- cgit v1.2.2