summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-11-06 05:58:00 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:01:44 -0400
commitc3892ff917ad63ae63ad20d70f89583fb8213ce3 (patch)
tree91d4b5755f2f6a5365303cb9adbc8aed1dc63fb0
parent5df3d09e16c9d2f413cea53d16bc8ca42ae42d6e (diff)
gpu: nvgpu: gk20a: Set lockboost size for compute
For compute channel on gk20a, set lockboost size to zero. Bug 1573856 Change-Id: I369cebf72241e4017e7d380c82caff6014e42984 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/594843 GVS: Gerrit_Virtual_Submit
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h12
-rw-r--r--include/uapi/linux/nvgpu.h4
4 files changed, 39 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 6f629e81..4acfa955 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -515,7 +515,7 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
515 int err; 515 int err;
516 516
517 alloc_obj_ctx.class_num = required_class; 517 alloc_obj_ctx.class_num = required_class;
518 alloc_obj_ctx.padding = 0; 518 alloc_obj_ctx.flags = 0;
519 519
520 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); 520 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
521 if (err) { 521 if (err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 37cccba3..8acc5b45 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2771,7 +2771,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2771 if (!ch_ctx->gr_ctx) { 2771 if (!ch_ctx->gr_ctx) {
2772 err = gr_gk20a_alloc_channel_gr_ctx(g, c, 2772 err = gr_gk20a_alloc_channel_gr_ctx(g, c,
2773 args->class_num, 2773 args->class_num,
2774 args->padding); 2774 args->flags);
2775 if (err) { 2775 if (err) {
2776 gk20a_err(dev_from_gk20a(g), 2776 gk20a_err(dev_from_gk20a(g),
2777 "fail to allocate gr ctx buffer"); 2777 "fail to allocate gr ctx buffer");
@@ -2792,7 +2792,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2792 gk20a_vm_get(tsg->vm); 2792 gk20a_vm_get(tsg->vm);
2793 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, 2793 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
2794 args->class_num, 2794 args->class_num,
2795 args->padding); 2795 args->flags);
2796 if (err) { 2796 if (err) {
2797 gk20a_err(dev_from_gk20a(g), 2797 gk20a_err(dev_from_gk20a(g),
2798 "fail to allocate TSG gr ctx buffer"); 2798 "fail to allocate TSG gr ctx buffer");
@@ -2837,7 +2837,12 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2837 /* tweak any perf parameters per-context here */ 2837 /* tweak any perf parameters per-context here */
2838 if (args->class_num == KEPLER_COMPUTE_A) { 2838 if (args->class_num == KEPLER_COMPUTE_A) {
2839 int begin_err; 2839 int begin_err;
2840 u32 tex_lock_disable_mask = 2840 u32 tex_lock_disable_mask;
2841 u32 texlock;
2842 u32 lockboost_mask;
2843 u32 lockboost;
2844
2845 tex_lock_disable_mask =
2841 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m() | 2846 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m() |
2842 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m() | 2847 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m() |
2843 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m() | 2848 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m() |
@@ -2845,7 +2850,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2845 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m() | 2850 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m() |
2846 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m(); 2851 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m();
2847 2852
2848 u32 texlock = gk20a_readl(g, gr_gpcs_tpcs_sm_sch_texlock_r()); 2853 texlock = gk20a_readl(g, gr_gpcs_tpcs_sm_sch_texlock_r());
2849 2854
2850 texlock = (texlock & ~tex_lock_disable_mask) | 2855 texlock = (texlock & ~tex_lock_disable_mask) |
2851 (gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f() | 2856 (gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f() |
@@ -2855,12 +2860,24 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2855 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f() | 2860 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f() |
2856 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f()); 2861 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f());
2857 2862
2863 lockboost_mask =
2864 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_m();
2865
2866 lockboost = gk20a_readl(g, gr_gpcs_tpcs_sm_sch_macro_sched_r());
2867 lockboost = (lockboost & ~lockboost_mask) |
2868 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(0);
2869
2858 begin_err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 2870 begin_err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
2859 2871
2860 if (!begin_err) { 2872 if (!begin_err) {
2861 err = gr_gk20a_ctx_patch_write(g, ch_ctx, 2873 err = gr_gk20a_ctx_patch_write(g, ch_ctx,
2862 gr_gpcs_tpcs_sm_sch_texlock_r(), 2874 gr_gpcs_tpcs_sm_sch_texlock_r(),
2863 texlock, true); 2875 texlock, true);
2876
2877 if (!err)
2878 err = gr_gk20a_ctx_patch_write(g, ch_ctx,
2879 gr_gpcs_tpcs_sm_sch_macro_sched_r(),
2880 lockboost, true);
2864 } 2881 }
2865 if ((begin_err || err)) { 2882 if ((begin_err || err)) {
2866 gk20a_err(dev_from_gk20a(g), 2883 gk20a_err(dev_from_gk20a(g),
@@ -2868,6 +2885,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2868 } 2885 }
2869 if (!begin_err) 2886 if (!begin_err)
2870 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 2887 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
2888
2889 args->flags |= NVGPU_ALLOC_OBJ_FLAGS_LOCKBOOST_ZERO;
2871 } 2890 }
2872 2891
2873 /* init golden image, ELPG enabled after this is done */ 2892 /* init golden image, ELPG enabled after this is done */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 4e15af5f..1de9e794 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -3250,6 +3250,18 @@ static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f(void)
3250{ 3250{
3251 return 0x0; 3251 return 0x0;
3252} 3252}
3253static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_r(void)
3254{
3255 return 0x00419eac;
3256}
3257static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_f(u32 v)
3258{
3259 return (v & 0x1) << 2;
3260}
3261static inline u32 gr_gpcs_tpcs_sm_sch_macro_sched_lockboost_size_m(void)
3262{
3263 return 0x1 << 2;
3264}
3253static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void) 3265static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void)
3254{ 3266{
3255 return 0x00419e10; 3267 return 0x00419e10;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 97e791df..20acc66a 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -459,9 +459,11 @@ struct nvgpu_set_nvmap_fd_args {
459 __u32 fd; 459 __u32 fd;
460} __packed; 460} __packed;
461 461
462#define NVGPU_ALLOC_OBJ_FLAGS_LOCKBOOST_ZERO (1 << 0)
463
462struct nvgpu_alloc_obj_ctx_args { 464struct nvgpu_alloc_obj_ctx_args {
463 __u32 class_num; /* kepler3d, 2d, compute, etc */ 465 __u32 class_num; /* kepler3d, 2d, compute, etc */
464 __u32 padding; 466 __u32 flags; /* input, output */
465 __u64 obj_id; /* output, used to free later */ 467 __u64 obj_id; /* output, used to free later */
466}; 468};
467 469