diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-12-03 09:13:39 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:12:27 -0400 |
commit | 0d9bb7f82e99a014d56d2662a67fc8efa86d398a (patch) | |
tree | e0ff8d4663dfc32fbdf061a6b04db5ec05459c72 /drivers/gpu | |
parent | 5477d0f4c226847fe030ad00425e00206118b0d6 (diff) |
gpu: nvgpu: Per-chip context creation
Add HAL for context creation, and expose functions that T18x context
creation needs.
Bug 1517461
Bug 1521790
Bug 200063473
Change-Id: I63d1c52594e851570b677184a4585d402125a86d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/660237
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 65 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 37 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 3 |
5 files changed, 83 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ef43d29a..75c6ef89 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -144,6 +144,16 @@ struct gpu_ops { | |||
144 | int (*add_zbc_depth)(struct gk20a *g, struct gr_gk20a *gr, | 144 | int (*add_zbc_depth)(struct gk20a *g, struct gr_gk20a *gr, |
145 | struct zbc_entry *depth_val, u32 index); | 145 | struct zbc_entry *depth_val, u32 index); |
146 | u32 (*pagepool_default_size)(struct gk20a *g); | 146 | u32 (*pagepool_default_size)(struct gk20a *g); |
147 | int (*init_ctx_state)(struct gk20a *g); | ||
148 | int (*alloc_gr_ctx)(struct gk20a *g, | ||
149 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | ||
150 | u32 padding); | ||
151 | void (*free_gr_ctx)(struct gk20a *g, | ||
152 | struct vm_gk20a *vm, | ||
153 | struct gr_ctx_desc *gr_ctx); | ||
154 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, | ||
155 | struct channel_ctx_gk20a *ch_ctx, | ||
156 | void *ctx_ptr); | ||
147 | } gr; | 157 | } gr; |
148 | const char *name; | 158 | const char *name; |
149 | struct { | 159 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 867e775a..e9b39487 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -67,7 +67,7 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); | |||
67 | 67 | ||
68 | /* channel gr ctx buffer */ | 68 | /* channel gr ctx buffer */ |
69 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 69 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, |
70 | struct channel_gk20a *c); | 70 | struct channel_gk20a *c, u32 padding); |
71 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); | 71 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); |
72 | 72 | ||
73 | /* channel patch ctx buffer */ | 73 | /* channel patch ctx buffer */ |
@@ -469,29 +469,7 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, | |||
469 | 469 | ||
470 | /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) | 470 | /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) |
471 | * We should replace most, if not all, fecs method calls to this instead. */ | 471 | * We should replace most, if not all, fecs method calls to this instead. */ |
472 | struct fecs_method_op_gk20a { | 472 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, |
473 | struct { | ||
474 | u32 addr; | ||
475 | u32 data; | ||
476 | } method; | ||
477 | |||
478 | struct { | ||
479 | u32 id; | ||
480 | u32 data; | ||
481 | u32 clr; | ||
482 | u32 *ret; | ||
483 | u32 ok; | ||
484 | u32 fail; | ||
485 | } mailbox; | ||
486 | |||
487 | struct { | ||
488 | u32 ok; | ||
489 | u32 fail; | ||
490 | } cond; | ||
491 | |||
492 | }; | ||
493 | |||
494 | static int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | ||
495 | struct fecs_method_op_gk20a op) | 473 | struct fecs_method_op_gk20a op) |
496 | { | 474 | { |
497 | struct gr_gk20a *gr = &g->gr; | 475 | struct gr_gk20a *gr = &g->gr; |
@@ -1649,6 +1627,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1649 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); | 1627 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); |
1650 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); | 1628 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); |
1651 | 1629 | ||
1630 | if (g->ops.gr.update_ctxsw_preemption_mode) | ||
1631 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); | ||
1652 | 1632 | ||
1653 | vunmap(ctx_ptr); | 1633 | vunmap(ctx_ptr); |
1654 | 1634 | ||
@@ -2198,7 +2178,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) | |||
2198 | return 0; | 2178 | return 0; |
2199 | } | 2179 | } |
2200 | 2180 | ||
2201 | static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) | 2181 | int gr_gk20a_init_ctx_state(struct gk20a *g) |
2202 | { | 2182 | { |
2203 | u32 pm_ctx_image_size; | 2183 | u32 pm_ctx_image_size; |
2204 | u32 ret; | 2184 | u32 ret; |
@@ -2515,8 +2495,9 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | |||
2515 | c->ch_ctx.global_ctx_buffer_mapped = false; | 2495 | c->ch_ctx.global_ctx_buffer_mapped = false; |
2516 | } | 2496 | } |
2517 | 2497 | ||
2518 | static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 2498 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
2519 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm) | 2499 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, |
2500 | u32 padding) | ||
2520 | { | 2501 | { |
2521 | struct gr_ctx_desc *gr_ctx = NULL; | 2502 | struct gr_ctx_desc *gr_ctx = NULL; |
2522 | struct gr_gk20a *gr = &g->gr; | 2503 | struct gr_gk20a *gr = &g->gr; |
@@ -2581,7 +2562,7 @@ static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2581 | } | 2562 | } |
2582 | 2563 | ||
2583 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | 2564 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, |
2584 | struct tsg_gk20a *tsg) | 2565 | struct tsg_gk20a *tsg, u32 padding) |
2585 | { | 2566 | { |
2586 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; | 2567 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; |
2587 | int err; | 2568 | int err; |
@@ -2591,7 +2572,7 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2591 | return -ENOMEM; | 2572 | return -ENOMEM; |
2592 | } | 2573 | } |
2593 | 2574 | ||
2594 | err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm); | 2575 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, padding); |
2595 | if (err) | 2576 | if (err) |
2596 | return err; | 2577 | return err; |
2597 | 2578 | ||
@@ -2599,18 +2580,19 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2599 | } | 2580 | } |
2600 | 2581 | ||
2601 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 2582 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, |
2602 | struct channel_gk20a *c) | 2583 | struct channel_gk20a *c, |
2584 | u32 padding) | ||
2603 | { | 2585 | { |
2604 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; | 2586 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; |
2605 | int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm); | 2587 | int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, padding); |
2606 | if (err) | 2588 | if (err) |
2607 | return err; | 2589 | return err; |
2608 | 2590 | ||
2609 | return 0; | 2591 | return 0; |
2610 | } | 2592 | } |
2611 | 2593 | ||
2612 | static void __gr_gk20a_free_gr_ctx(struct gk20a *g, | 2594 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
2613 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) | 2595 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) |
2614 | { | 2596 | { |
2615 | struct device *d = dev_from_gk20a(g); | 2597 | struct device *d = dev_from_gk20a(g); |
2616 | DEFINE_DMA_ATTRS(attrs); | 2598 | DEFINE_DMA_ATTRS(attrs); |
@@ -2636,12 +2618,14 @@ void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) | |||
2636 | gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n"); | 2618 | gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n"); |
2637 | return; | 2619 | return; |
2638 | } | 2620 | } |
2639 | __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); | 2621 | tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); |
2622 | tsg->tsg_gr_ctx = NULL; | ||
2640 | } | 2623 | } |
2641 | 2624 | ||
2642 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) | 2625 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) |
2643 | { | 2626 | { |
2644 | __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | 2627 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); |
2628 | c->ch_ctx.gr_ctx = NULL; | ||
2645 | } | 2629 | } |
2646 | 2630 | ||
2647 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | 2631 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, |
@@ -2793,7 +2777,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2793 | /* allocate gr ctx buffer */ | 2777 | /* allocate gr ctx buffer */ |
2794 | if (!tsg) { | 2778 | if (!tsg) { |
2795 | if (!ch_ctx->gr_ctx) { | 2779 | if (!ch_ctx->gr_ctx) { |
2796 | err = gr_gk20a_alloc_channel_gr_ctx(g, c); | 2780 | err = gr_gk20a_alloc_channel_gr_ctx(g, c, |
2781 | args->padding); | ||
2797 | if (err) { | 2782 | if (err) { |
2798 | gk20a_err(dev_from_gk20a(g), | 2783 | gk20a_err(dev_from_gk20a(g), |
2799 | "fail to allocate gr ctx buffer"); | 2784 | "fail to allocate gr ctx buffer"); |
@@ -2812,7 +2797,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2812 | if (!tsg->tsg_gr_ctx) { | 2797 | if (!tsg->tsg_gr_ctx) { |
2813 | tsg->vm = c->vm; | 2798 | tsg->vm = c->vm; |
2814 | gk20a_vm_get(tsg->vm); | 2799 | gk20a_vm_get(tsg->vm); |
2815 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg); | 2800 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->padding); |
2816 | if (err) { | 2801 | if (err) { |
2817 | gk20a_err(dev_from_gk20a(g), | 2802 | gk20a_err(dev_from_gk20a(g), |
2818 | "fail to allocate TSG gr ctx buffer"); | 2803 | "fail to allocate TSG gr ctx buffer"); |
@@ -4480,7 +4465,6 @@ static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g) | |||
4480 | 4465 | ||
4481 | static int gr_gk20a_init_ctxsw(struct gk20a *g) | 4466 | static int gr_gk20a_init_ctxsw(struct gk20a *g) |
4482 | { | 4467 | { |
4483 | struct gr_gk20a *gr = &g->gr; | ||
4484 | u32 err = 0; | 4468 | u32 err = 0; |
4485 | 4469 | ||
4486 | err = g->ops.gr.load_ctxsw_ucode(g); | 4470 | err = g->ops.gr.load_ctxsw_ucode(g); |
@@ -4493,7 +4477,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g) | |||
4493 | 4477 | ||
4494 | /* this appears query for sw states but fecs actually init | 4478 | /* this appears query for sw states but fecs actually init |
4495 | ramchain, etc so this is hw init */ | 4479 | ramchain, etc so this is hw init */ |
4496 | err = gr_gk20a_init_ctx_state(g, gr); | 4480 | err = g->ops.gr.init_ctx_state(g); |
4497 | if (err) | 4481 | if (err) |
4498 | goto out; | 4482 | goto out; |
4499 | 4483 | ||
@@ -7357,5 +7341,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7357 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; | 7341 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; |
7358 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; | 7342 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; |
7359 | gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; | 7343 | gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; |
7344 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; | ||
7345 | gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; | ||
7346 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; | ||
7360 | } | 7347 | } |
7361 | 7348 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e5d315e5..309faf3b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -19,6 +19,9 @@ | |||
19 | #define GR_GK20A_H | 19 | #define GR_GK20A_H |
20 | 20 | ||
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
23 | #include "gr_t18x.h" | ||
24 | #endif | ||
22 | 25 | ||
23 | #include "tsg_gk20a.h" | 26 | #include "tsg_gk20a.h" |
24 | #include "gr_ctx_gk20a.h" | 27 | #include "gr_ctx_gk20a.h" |
@@ -284,6 +287,10 @@ struct gr_gk20a { | |||
284 | void (*remove_support)(struct gr_gk20a *gr); | 287 | void (*remove_support)(struct gr_gk20a *gr); |
285 | bool sw_ready; | 288 | bool sw_ready; |
286 | bool skip_ucode_init; | 289 | bool skip_ucode_init; |
290 | |||
291 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
292 | struct gr_t18x t18x; | ||
293 | #endif | ||
287 | }; | 294 | }; |
288 | 295 | ||
289 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | 296 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); |
@@ -336,6 +343,28 @@ struct gk20a_ctxsw_bootloader_desc { | |||
336 | u32 entry_point; | 343 | u32 entry_point; |
337 | }; | 344 | }; |
338 | 345 | ||
346 | struct fecs_method_op_gk20a { | ||
347 | struct { | ||
348 | u32 addr; | ||
349 | u32 data; | ||
350 | } method; | ||
351 | |||
352 | struct { | ||
353 | u32 id; | ||
354 | u32 data; | ||
355 | u32 clr; | ||
356 | u32 *ret; | ||
357 | u32 ok; | ||
358 | u32 fail; | ||
359 | } mailbox; | ||
360 | |||
361 | struct { | ||
362 | u32 ok; | ||
363 | u32 fail; | ||
364 | } cond; | ||
365 | |||
366 | }; | ||
367 | |||
339 | struct gpu_ops; | 368 | struct gpu_ops; |
340 | int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | 369 | int gr_gk20a_load_golden_ctx_image(struct gk20a *g, |
341 | struct channel_gk20a *c); | 370 | struct channel_gk20a *c); |
@@ -462,4 +491,12 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, | |||
462 | struct zbc_entry *depth_val, u32 index); | 491 | struct zbc_entry *depth_val, u32 index); |
463 | int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | 492 | int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, |
464 | u32 expect_delay); | 493 | u32 expect_delay); |
494 | int gr_gk20a_init_ctx_state(struct gk20a *g); | ||
495 | int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | ||
496 | struct fecs_method_op_gk20a op); | ||
497 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | ||
498 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | ||
499 | u32 padding); | ||
500 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | ||
501 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); | ||
465 | #endif /*__GR_GK20A_H__*/ | 502 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 041c7edf..4dbde580 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -141,11 +141,18 @@ struct gr_ctx_buffer_desc { | |||
141 | void *priv; | 141 | void *priv; |
142 | }; | 142 | }; |
143 | 143 | ||
144 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
145 | #include "gr_t18x.h" | ||
146 | #endif | ||
147 | |||
144 | struct gr_ctx_desc { | 148 | struct gr_ctx_desc { |
145 | struct page **pages; | 149 | struct page **pages; |
146 | u64 iova; | 150 | u64 iova; |
147 | size_t size; | 151 | size_t size; |
148 | u64 gpu_va; | 152 | u64 gpu_va; |
153 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
154 | struct gr_ctx_desc_t18x t18x; | ||
155 | #endif | ||
149 | }; | 156 | }; |
150 | 157 | ||
151 | struct compbit_store_desc { | 158 | struct compbit_store_desc { |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 66b94943..cba51cd6 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -813,4 +813,7 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
813 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; | 813 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; |
814 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; | 814 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; |
815 | gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; | 815 | gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; |
816 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; | ||
817 | gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; | ||
818 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; | ||
816 | } | 819 | } |