summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-04-23 07:18:33 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-16 06:10:37 -0400
commit0301cc01f6cbfb752290bc63a2ed4eb19129c7c1 (patch)
treefafe6562b8251c88f130f6368b9a41eb622669b9
parent4ff87c7d35f34e01e138cbedb143a37ff32a8926 (diff)
gpu: nvgpu: add HAL to insert semaphore commands
Add below new HALs gops.fifo.add_sema_cmd() to insert HOST semaphore acquire/release methods gops.fifo.get_sema_wait_cmd_size() to get size of acquire command buffer gops.fifo.get_sema_incr_cmd_size() to get size of release command buffer Separate out new API gk20a_fifo_add_sema_cmd() to implement semaphore acquire/ release sequence and set it to gops.fifo.add_sema_cmd() Add gk20a_fifo_get_sema_wait_cmd_size() and gk20a_fifo_get_sema_incr_cmd_size() to return respective command buffer sizes Jira NVGPUT-16 Change-Id: Ia81a50921a6a56ebc237f2f90b137268aaa2d749 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1704490 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c38
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c54
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c3
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c3
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c3
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c3
12 files changed, 92 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
index 2bb71c99..25832417 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -34,12 +34,14 @@ int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
34 int max_wait_cmds) 34 int max_wait_cmds)
35{ 35{
36 int err; 36 int err;
37 const int wait_cmd_size = 8; 37 int wait_cmd_size;
38 int num_wait_cmds; 38 int num_wait_cmds;
39 int i; 39 int i;
40 struct nvgpu_semaphore *sema; 40 struct nvgpu_semaphore *sema;
41 struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); 41 struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
42 42
43 wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
44
43 num_wait_cmds = sync_fence->num_fences; 45 num_wait_cmds = sync_fence->num_fences;
44 if (num_wait_cmds == 0) 46 if (num_wait_cmds == 0)
45 return 0; 47 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 68fbb738..7a664bf8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -372,41 +372,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
372 if (!acquire) 372 if (!acquire)
373 nvgpu_semaphore_prepare(s, c->hw_sema); 373 nvgpu_semaphore_prepare(s, c->hw_sema);
374 374
375 /* semaphore_a */ 375 g->ops.fifo.add_sema_cmd(g, s, va, cmd, off, acquire, wfi);
376 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
377 /* offset_upper */
378 nvgpu_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
379 /* semaphore_b */
380 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
381 /* offset */
382 nvgpu_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
383
384 if (acquire) {
385 /* semaphore_c */
386 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
387 /* payload */
388 nvgpu_mem_wr32(g, cmd->mem, off++,
389 nvgpu_semaphore_get_value(s));
390 /* semaphore_d */
391 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
392 /* operation: acq_geq, switch_en */
393 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
394 } else {
395 /* semaphore_c */
396 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
397 /* payload */
398 nvgpu_mem_wr32(g, cmd->mem, off++,
399 nvgpu_semaphore_get_value(s));
400 /* semaphore_d */
401 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
402 /* operation: release, wfi */
403 nvgpu_mem_wr32(g, cmd->mem, off++,
404 0x2 | ((wfi ? 0x0 : 0x1) << 20));
405 /* non_stall_int */
406 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
407 /* ignored */
408 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
409 }
410 376
411 if (acquire) 377 if (acquire)
412 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d" 378 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
@@ -495,7 +461,7 @@ static int __gk20a_channel_semaphore_incr(
495 return -ENOMEM; 461 return -ENOMEM;
496 } 462 }
497 463
498 incr_cmd_size = 10; 464 incr_cmd_size = c->g->ops.fifo.get_sema_incr_cmd_size();
499 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); 465 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
500 if (err) { 466 if (err) {
501 nvgpu_err(c->g, 467 nvgpu_err(c->g,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 0c3e8039..aada3065 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -4077,6 +4077,60 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level)
4077 } 4077 }
4078} 4078}
4079 4079
4080u32 gk20a_fifo_get_sema_wait_cmd_size(void)
4081{
4082 return 8;
4083}
4084
4085u32 gk20a_fifo_get_sema_incr_cmd_size(void)
4086{
4087 return 10;
4088}
4089
4090void gk20a_fifo_add_sema_cmd(struct gk20a *g,
4091 struct nvgpu_semaphore *s, u64 sema_va,
4092 struct priv_cmd_entry *cmd,
4093 u32 off, bool acquire, bool wfi)
4094{
4095 nvgpu_log_fn(g, " ");
4096
4097 /* semaphore_a */
4098 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
4099 /* offset_upper */
4100 nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff);
4101 /* semaphore_b */
4102 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
4103 /* offset */
4104 nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff);
4105
4106 if (acquire) {
4107 /* semaphore_c */
4108 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4109 /* payload */
4110 nvgpu_mem_wr32(g, cmd->mem, off++,
4111 nvgpu_semaphore_get_value(s));
4112 /* semaphore_d */
4113 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4114 /* operation: acq_geq, switch_en */
4115 nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
4116 } else {
4117 /* semaphore_c */
4118 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
4119 /* payload */
4120 nvgpu_mem_wr32(g, cmd->mem, off++,
4121 nvgpu_semaphore_get_value(s));
4122 /* semaphore_d */
4123 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
4124 /* operation: release, wfi */
4125 nvgpu_mem_wr32(g, cmd->mem, off++,
4126 0x2 | ((wfi ? 0x0 : 0x1) << 20));
4127 /* non_stall_int */
4128 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
4129 /* ignored */
4130 nvgpu_mem_wr32(g, cmd->mem, off++, 0);
4131 }
4132}
4133
4080#ifdef CONFIG_TEGRA_GK20A_NVHOST 4134#ifdef CONFIG_TEGRA_GK20A_NVHOST
4081void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g, 4135void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
4082 struct priv_cmd_entry *cmd, u32 off, 4136 struct priv_cmd_entry *cmd, u32 off,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 20533f5d..7216302c 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -444,4 +444,10 @@ void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id,
444void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault); 444void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault);
445void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault); 445void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault);
446void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault); 446void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault);
447u32 gk20a_fifo_get_sema_wait_cmd_size(void);
448u32 gk20a_fifo_get_sema_incr_cmd_size(void);
449void gk20a_fifo_add_sema_cmd(struct gk20a *g,
450 struct nvgpu_semaphore *s, u64 sema_va,
451 struct priv_cmd_entry *cmd,
452 u32 off, bool acquire, bool wfi);
447#endif /*__GR_GK20A_H__*/ 453#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 23e85ee9..17f662df 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -678,6 +678,12 @@ struct gpu_ops {
678 u32 count, u32 buffer_index); 678 u32 count, u32 buffer_index);
679 int (*runlist_wait_pending)(struct gk20a *g, u32 runlist_id); 679 int (*runlist_wait_pending)(struct gk20a *g, u32 runlist_id);
680 void (*ring_channel_doorbell)(struct channel_gk20a *c); 680 void (*ring_channel_doorbell)(struct channel_gk20a *c);
681 u32 (*get_sema_wait_cmd_size)(void);
682 u32 (*get_sema_incr_cmd_size)(void);
683 void (*add_sema_cmd)(struct gk20a *g,
684 struct nvgpu_semaphore *s, u64 sema_va,
685 struct priv_cmd_entry *cmd,
686 u32 off, bool acquire, bool wfi);
681 } fifo; 687 } fifo;
682 struct pmu_v { 688 struct pmu_v {
683 u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu); 689 u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 26b3f61a..bbeaa40f 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -467,6 +467,9 @@ static const struct gpu_ops gm20b_ops = {
467#endif 467#endif
468 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 468 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
469 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 469 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
470 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
471 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
472 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
470 }, 473 },
471 .gr_ctx = { 474 .gr_ctx = {
472 .get_netlist_name = gr_gm20b_get_netlist_name, 475 .get_netlist_name = gr_gm20b_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 6d3154e3..4111ac7d 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -531,6 +531,9 @@ static const struct gpu_ops gp106_ops = {
531 .device_info_fault_id = top_device_info_data_fault_id_enum_v, 531 .device_info_fault_id = top_device_info_data_fault_id_enum_v,
532 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 532 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
533 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 533 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
534 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
535 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
536 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
534 }, 537 },
535 .gr_ctx = { 538 .gr_ctx = {
536 .get_netlist_name = gr_gp106_get_netlist_name, 539 .get_netlist_name = gr_gp106_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 79eeb25a..e4bf0fd7 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -504,6 +504,9 @@ static const struct gpu_ops gp10b_ops = {
504 .device_info_fault_id = top_device_info_data_fault_id_enum_v, 504 .device_info_fault_id = top_device_info_data_fault_id_enum_v,
505 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 505 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
506 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 506 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
507 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
508 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
509 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
507 }, 510 },
508 .gr_ctx = { 511 .gr_ctx = {
509 .get_netlist_name = gr_gp10b_get_netlist_name, 512 .get_netlist_name = gr_gp10b_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index dc404b7a..c64a06ca 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -597,6 +597,9 @@ static const struct gpu_ops gv100_ops = {
597 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 597 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
598 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 598 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
599 .ring_channel_doorbell = gv11b_ring_channel_doorbell, 599 .ring_channel_doorbell = gv11b_ring_channel_doorbell,
600 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
601 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
602 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
600 }, 603 },
601 .gr_ctx = { 604 .gr_ctx = {
602 .get_netlist_name = gr_gv100_get_netlist_name, 605 .get_netlist_name = gr_gv100_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index c62dee26..7bfcc1dd 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -566,6 +566,9 @@ static const struct gpu_ops gv11b_ops = {
566 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 566 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
567 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 567 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
568 .ring_channel_doorbell = gv11b_ring_channel_doorbell, 568 .ring_channel_doorbell = gv11b_ring_channel_doorbell,
569 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
570 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
571 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
569 }, 572 },
570 .gr_ctx = { 573 .gr_ctx = {
571 .get_netlist_name = gr_gv11b_get_netlist_name, 574 .get_netlist_name = gr_gv11b_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 5630e406..708c52a1 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -377,6 +377,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
377 .device_info_fault_id = top_device_info_data_fault_id_enum_v, 377 .device_info_fault_id = top_device_info_data_fault_id_enum_v,
378 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 378 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
379 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 379 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
380 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
381 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
382 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
380 }, 383 },
381 .gr_ctx = { 384 .gr_ctx = {
382 .get_netlist_name = gr_gp10b_get_netlist_name, 385 .get_netlist_name = gr_gp10b_get_netlist_name,
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
index 7b536329..7bc053e8 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -425,6 +425,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
425 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, 425 .runlist_hw_submit = gk20a_fifo_runlist_hw_submit,
426 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, 426 .runlist_wait_pending = gk20a_fifo_runlist_wait_pending,
427 .ring_channel_doorbell = gv11b_ring_channel_doorbell, 427 .ring_channel_doorbell = gv11b_ring_channel_doorbell,
428 .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size,
429 .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size,
430 .add_sema_cmd = gk20a_fifo_add_sema_cmd,
428 }, 431 },
429 .gr_ctx = { 432 .gr_ctx = {
430 .get_netlist_name = gr_gv11b_get_netlist_name, 433 .get_netlist_name = gr_gv11b_get_netlist_name,