diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-12-09 03:04:05 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 18:06:45 -0400 |
commit | 5df3d09e16c9d2f413cea53d16bc8ca42ae42d6e (patch) | |
tree | 0ed55cf8bdf0d265742d396c160f8dd0f1ac9d85 /drivers | |
parent | 4ccb162da7a2414c344aecc9cdf85bee9c284caf (diff) |
gpu: nvgpu: gm20b: Enable CTA preemption
CTA preemption needs to be enabled by setting a value in context. Set
it for gm20b.
Bug 200063473
Bug 1517461
Change-Id: I080cd71b348d08f834fd23ebbe7443dba79224db
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/661299
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 16 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 44 |
5 files changed, 60 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9bb890ca..184ef168 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -151,7 +151,7 @@ struct gpu_ops { | |||
151 | int (*init_ctx_state)(struct gk20a *g); | 151 | int (*init_ctx_state)(struct gk20a *g); |
152 | int (*alloc_gr_ctx)(struct gk20a *g, | 152 | int (*alloc_gr_ctx)(struct gk20a *g, |
153 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 153 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, |
154 | u32 padding); | 154 | u32 class, u32 padding); |
155 | void (*free_gr_ctx)(struct gk20a *g, | 155 | void (*free_gr_ctx)(struct gk20a *g, |
156 | struct vm_gk20a *vm, | 156 | struct vm_gk20a *vm, |
157 | struct gr_ctx_desc *gr_ctx); | 157 | struct gr_ctx_desc *gr_ctx); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4f6c885c..37cccba3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -67,7 +67,8 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); | |||
67 | 67 | ||
68 | /* channel gr ctx buffer */ | 68 | /* channel gr ctx buffer */ |
69 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 69 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, |
70 | struct channel_gk20a *c, u32 padding); | 70 | struct channel_gk20a *c, |
71 | u32 class, u32 padding); | ||
71 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); | 72 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); |
72 | 73 | ||
73 | /* channel patch ctx buffer */ | 74 | /* channel patch ctx buffer */ |
@@ -2486,6 +2487,7 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | |||
2486 | 2487 | ||
2487 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 2488 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
2488 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 2489 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, |
2490 | u32 class, | ||
2489 | u32 padding) | 2491 | u32 padding) |
2490 | { | 2492 | { |
2491 | struct gr_ctx_desc *gr_ctx = NULL; | 2493 | struct gr_ctx_desc *gr_ctx = NULL; |
@@ -2551,7 +2553,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2551 | } | 2553 | } |
2552 | 2554 | ||
2553 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | 2555 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, |
2554 | struct tsg_gk20a *tsg, u32 padding) | 2556 | struct tsg_gk20a *tsg, u32 class, u32 padding) |
2555 | { | 2557 | { |
2556 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; | 2558 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; |
2557 | int err; | 2559 | int err; |
@@ -2561,7 +2563,7 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2561 | return -ENOMEM; | 2563 | return -ENOMEM; |
2562 | } | 2564 | } |
2563 | 2565 | ||
2564 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, padding); | 2566 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding); |
2565 | if (err) | 2567 | if (err) |
2566 | return err; | 2568 | return err; |
2567 | 2569 | ||
@@ -2570,10 +2572,11 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2570 | 2572 | ||
2571 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 2573 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, |
2572 | struct channel_gk20a *c, | 2574 | struct channel_gk20a *c, |
2575 | u32 class, | ||
2573 | u32 padding) | 2576 | u32 padding) |
2574 | { | 2577 | { |
2575 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; | 2578 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; |
2576 | int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, padding); | 2579 | int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding); |
2577 | if (err) | 2580 | if (err) |
2578 | return err; | 2581 | return err; |
2579 | 2582 | ||
@@ -2767,6 +2770,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2767 | if (!tsg) { | 2770 | if (!tsg) { |
2768 | if (!ch_ctx->gr_ctx) { | 2771 | if (!ch_ctx->gr_ctx) { |
2769 | err = gr_gk20a_alloc_channel_gr_ctx(g, c, | 2772 | err = gr_gk20a_alloc_channel_gr_ctx(g, c, |
2773 | args->class_num, | ||
2770 | args->padding); | 2774 | args->padding); |
2771 | if (err) { | 2775 | if (err) { |
2772 | gk20a_err(dev_from_gk20a(g), | 2776 | gk20a_err(dev_from_gk20a(g), |
@@ -2786,7 +2790,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2786 | if (!tsg->tsg_gr_ctx) { | 2790 | if (!tsg->tsg_gr_ctx) { |
2787 | tsg->vm = c->vm; | 2791 | tsg->vm = c->vm; |
2788 | gk20a_vm_get(tsg->vm); | 2792 | gk20a_vm_get(tsg->vm); |
2789 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->padding); | 2793 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, |
2794 | args->class_num, | ||
2795 | args->padding); | ||
2790 | if (err) { | 2796 | if (err) { |
2791 | gk20a_err(dev_from_gk20a(g), | 2797 | gk20a_err(dev_from_gk20a(g), |
2792 | "fail to allocate TSG gr ctx buffer"); | 2798 | "fail to allocate TSG gr ctx buffer"); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 309faf3b..f130b830 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -496,7 +496,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
496 | struct fecs_method_op_gk20a op); | 496 | struct fecs_method_op_gk20a op); |
497 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 497 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
498 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 498 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, |
499 | u32 padding); | 499 | u32 class, u32 padding); |
500 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 500 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
501 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); | 501 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); |
502 | #endif /*__GR_GK20A_H__*/ | 502 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d3ee8670..04f9446b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -150,11 +150,15 @@ struct gr_ctx_desc { | |||
150 | u64 iova; | 150 | u64 iova; |
151 | size_t size; | 151 | size_t size; |
152 | u64 gpu_va; | 152 | u64 gpu_va; |
153 | int preempt_mode; | ||
153 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 154 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
154 | struct gr_ctx_desc_t18x t18x; | 155 | struct gr_ctx_desc_t18x t18x; |
155 | #endif | 156 | #endif |
156 | }; | 157 | }; |
157 | 158 | ||
159 | #define NVGPU_GR_PREEMPTION_MODE_WFI 0 | ||
160 | #define NVGPU_GR_PREEMPTION_MODE_CTA 2 | ||
161 | |||
158 | struct compbit_store_desc { | 162 | struct compbit_store_desc { |
159 | struct page **pages; | 163 | struct page **pages; |
160 | struct sg_table *sgt; | 164 | struct sg_table *sgt; |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index cba51cd6..5f544819 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -775,6 +775,46 @@ static u32 gr_gm20b_pagepool_default_size(struct gk20a *g) | |||
775 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | 775 | return gr_scc_pagepool_total_pages_hwmax_value_v(); |
776 | } | 776 | } |
777 | 777 | ||
778 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | ||
779 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | ||
780 | u32 class, | ||
781 | u32 flags) | ||
782 | { | ||
783 | int err; | ||
784 | |||
785 | gk20a_dbg_fn(""); | ||
786 | |||
787 | err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); | ||
788 | if (err) | ||
789 | return err; | ||
790 | |||
791 | if (class == MAXWELL_COMPUTE_B) | ||
792 | (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA; | ||
793 | |||
794 | gk20a_dbg_fn("done"); | ||
795 | |||
796 | return 0; | ||
797 | } | ||
798 | |||
799 | static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | ||
800 | struct channel_ctx_gk20a *ch_ctx, | ||
801 | void *ctx_ptr) | ||
802 | { | ||
803 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
804 | u32 cta_preempt_option = | ||
805 | ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); | ||
806 | |||
807 | gk20a_dbg_fn(""); | ||
808 | |||
809 | if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CTA) { | ||
810 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | ||
811 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, | ||
812 | cta_preempt_option); | ||
813 | } | ||
814 | |||
815 | gk20a_dbg_fn("done"); | ||
816 | } | ||
817 | |||
778 | void gm20b_init_gr(struct gpu_ops *gops) | 818 | void gm20b_init_gr(struct gpu_ops *gops) |
779 | { | 819 | { |
780 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 820 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -814,6 +854,8 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
814 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; | 854 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; |
815 | gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; | 855 | gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; |
816 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; | 856 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; |
817 | gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; | 857 | gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx; |
818 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; | 858 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; |
859 | gops->gr.update_ctxsw_preemption_mode = | ||
860 | gr_gm20b_update_ctxsw_preemption_mode; | ||
819 | } | 861 | } |