From 70e69e2686527990865b221a60e0ec1e9a53d316 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Mon, 23 Apr 2018 17:26:51 +0530 Subject: gpu: nvgpu: adapt gk20a_channel_syncpt to use os_fence This patch adapts gk20a_channel_syncpt to use os_fence for post fence as well as pre-fence(wait) use cases. Jira NVGPU-66 Change-Id: I49627d1f88d52a53511a02f5de60fed6df8350de Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/1676631 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 4 + drivers/gpu/nvgpu/common/linux/os_fence_android.c | 14 +- .../nvgpu/common/linux/os_fence_android_syncpt.c | 121 +++++++++++++++++ drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 148 ++++++++------------- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 4 + .../nvgpu/include/nvgpu/linux/os_fence_android.h | 6 +- drivers/gpu/nvgpu/include/nvgpu/os_fence.h | 19 +++ 7 files changed, 213 insertions(+), 103 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 9a46f12c..5af7de35 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -164,6 +164,10 @@ nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o \ common/linux/os_fence_android.o \ common/linux/os_fence_android_sema.o +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y) +nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o +endif + nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \ common/linux/pci_usermode.o \ diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c index d689a2a8..d8f70639 100644 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c +++ b/drivers/gpu/nvgpu/common/linux/os_fence_android.c @@ -55,15 +55,15 @@ void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, int fd) { - int err; + int err = -ENOSYS; - err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); +#ifdef CONFIG_TEGRA_GK20A_NVHOST + err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); +#endif + + if (err) + err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); - /* TO-DO - * check if fence is empty and if CONFIG_TEGRA_GK20A_NVHOST - * is enabled, try to get a sync_fence using - * corresponding nvhost method. - */ if (err) nvgpu_err(c->g, "error obtaining fence from fd %d", fd); diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c new file mode 100644 index 00000000..76def831 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/sync_gk20a.h" +#include "gk20a/channel_sync_gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "../drivers/staging/android/sync.h" + +int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, + struct priv_cmd_entry *wait_cmd, + struct channel_gk20a *c, + int max_wait_cmds) +{ + int err; + int wait_cmd_size; + int num_wait_cmds; + int i; + u32 wait_id; + struct sync_pt *pt; + + struct sync_fence *sync_fence = (struct sync_fence *)s->priv; + + if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) + return -EINVAL; + + /* validate syncpt ids */ + for (i = 0; i < sync_fence->num_fences; i++) { + pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); + wait_id = nvgpu_nvhost_sync_pt_id(pt); + if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( + c->g->nvhost_dev, wait_id)) { + return -EINVAL; + } + } + + num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); + if (num_wait_cmds == 0) + return 0; + + wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); + err = gk20a_channel_alloc_priv_cmdbuf(c, + wait_cmd_size * num_wait_cmds, wait_cmd); + if (err) { + nvgpu_err(c->g, + "not enough priv cmd buffer space"); + return err; + } + + for (i = 0; i < sync_fence->num_fences; i++) { + struct fence *f = sync_fence->cbs[i].sync_pt; + struct sync_pt *pt = sync_pt_from_fence(f); + u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); + u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); + + err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value, + wait_cmd, wait_cmd_size, i, true); + } + + WARN_ON(i != num_wait_cmds); + + return 0; +} + +static const struct nvgpu_os_fence_ops syncpt_ops = { + .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, + .drop_ref = nvgpu_os_fence_android_drop_ref, +}; + +int nvgpu_os_fence_syncpt_create( + struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) +{ + struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( + nvhost_dev, id, thresh, "fence"); + + if (!fence) { + nvgpu_err(c->g, "error constructing fence %s", "fence"); + return -ENOMEM; + } + + nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); + + return 0; +} + +int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, int fd) +{ + struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); + + if (!fence) + return -ENOMEM; + + nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); + + return 0; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index a0b0ac1e..16898593 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -51,6 +51,39 @@ struct gk20a_channel_syncpt { struct nvgpu_mem syncpt_buf; }; +int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c, + u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, + u32 wait_cmd_size, int pos, bool preallocated) +{ + int err = 0; + bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext( + c->g->nvhost_dev, id, thresh); + + if (is_expired) { + if (preallocated) { + nvgpu_memset(c->g, wait_cmd->mem, + (wait_cmd->off + pos * wait_cmd_size) * sizeof(u32), + 0, wait_cmd_size * sizeof(u32)); + } + } else { + if (!preallocated) { + err = gk20a_channel_alloc_priv_cmdbuf(c, + c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd); + if (err) { + nvgpu_err(c->g, "not enough priv cmd buffer space"); + return err; + } + } + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + id, c->vm->syncpt_ro_map_gpu_va); + c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, + pos * wait_cmd_size, id, thresh, + c->vm->syncpt_ro_map_gpu_va); + } + + return 0; +} + static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd) { @@ -58,108 +91,36 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; int err = 0; + u32 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id)) return -EINVAL; - if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh)) - return 0; + err = gk20a_channel_gen_syncpt_wait_cmd(c, id, thresh, + wait_cmd, wait_cmd_size, 0, false); - err = gk20a_channel_alloc_priv_cmdbuf(c, - c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd); - if (err) { - nvgpu_err(c->g, - "not enough priv cmd buffer space"); - return err; - } - - nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", - id, sp->c->vm->syncpt_ro_map_gpu_va); - c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id, - thresh, c->vm->syncpt_ro_map_gpu_va); - - return 0; + return err; } static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry *wait_cmd, int max_wait_cmds) + struct priv_cmd_entry *wait_cmd, int max_wait_cmds) { -#ifdef CONFIG_SYNC - int i; - int num_wait_cmds; - struct sync_fence *sync_fence; - struct sync_pt *pt; + struct nvgpu_os_fence os_fence = {0}; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; - u32 wait_id; int err = 0; - u32 wait_cmd_size = 0; - - sync_fence = nvgpu_nvhost_sync_fdget(fd); - if (!sync_fence) - return -EINVAL; - if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) { - sync_fence_put(sync_fence); + err = nvgpu_os_fence_fdget(&os_fence, c, fd); + if (err) return -EINVAL; - } - - /* validate syncpt ids */ - for (i = 0; i < sync_fence->num_fences; i++) { - pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); - wait_id = nvgpu_nvhost_sync_pt_id(pt); - if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( - sp->nvhost_dev, wait_id)) { - sync_fence_put(sync_fence); - return -EINVAL; - } - } - num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); - if (num_wait_cmds == 0) { - sync_fence_put(sync_fence); - return 0; - } - wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); - err = gk20a_channel_alloc_priv_cmdbuf(c, - wait_cmd_size * num_wait_cmds, - wait_cmd); - if (err) { - nvgpu_err(c->g, - "not enough priv cmd buffer space"); - sync_fence_put(sync_fence); - return err; - } - - i = 0; - for (i = 0; i < sync_fence->num_fences; i++) { - struct fence *f = sync_fence->cbs[i].sync_pt; - struct sync_pt *pt = sync_pt_from_fence(f); - u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); - u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); - - if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, - wait_id, wait_value)) { - nvgpu_memset(c->g, wait_cmd->mem, - (wait_cmd->off + i * wait_cmd_size) * sizeof(u32), - 0, wait_cmd_size * sizeof(u32)); - } else { - nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", - wait_id, sp->syncpt_buf.gpu_va); - c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, - i * wait_cmd_size, wait_id, wait_value, - c->vm->syncpt_ro_map_gpu_va); - } - } + err = os_fence.ops->program_waits(&os_fence, + wait_cmd, c, max_wait_cmds); - WARN_ON(i != num_wait_cmds); - sync_fence_put(sync_fence); + os_fence.ops->drop_ref(&os_fence); - return 0; -#else - return -ENODEV; -#endif + return err; } static void gk20a_channel_syncpt_update(void *priv, int nr_completed) @@ -185,6 +146,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; struct sync_fence *sync_fence = NULL; + struct nvgpu_os_fence os_fence = {0}; err = gk20a_channel_alloc_priv_cmdbuf(c, c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd), @@ -226,26 +188,22 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, } } -#ifdef CONFIG_SYNC if (need_sync_fence) { - sync_fence = nvgpu_nvhost_sync_create_fence(sp->nvhost_dev, - sp->id, thresh, "fence"); + err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost_dev, + sp->id, thresh); - if (IS_ERR(sync_fence)) { - err = PTR_ERR(sync_fence); + if (err) goto clean_up_priv_cmd; - } + + sync_fence = (struct sync_fence *)os_fence.priv; } -#endif err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, sp->id, thresh, sync_fence); if (err) { -#ifdef CONFIG_SYNC - if (sync_fence) - sync_fence_put(sync_fence); -#endif + if (nvgpu_os_fence_is_initialized(&os_fence)) + os_fence.ops->drop_ref(&os_fence); goto clean_up_priv_cmd; } diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 565b4f86..5c6e91c9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -108,6 +108,10 @@ void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c, struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd, u32 wait_cmd_size, int pos); +int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c, + u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, + u32 wait_cmd_size, int pos, bool preallocated); + void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync, bool set_safe_state); struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c, diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h index 79cc51ea..39d08339 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h @@ -39,4 +39,8 @@ void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, struct gk20a *g, const struct nvgpu_os_fence_ops *fops, struct sync_fence *fence); -#endif \ No newline at end of file +int nvgpu_os_fence_syncpt_fdget( + struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, int fd); + +#endif /* __NVGPU_OS_FENCE_ANDROID_H__ */ \ No newline at end of file diff --git a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h index c8d24fc2..a22140da 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h +++ b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h @@ -28,6 +28,7 @@ struct nvgpu_semaphore; struct channel_gk20a; struct priv_cmd_entry; +struct nvgpu_nvhost_dev; /* * struct nvgpu_os_fence adds an abstraction to the earlier Android Sync @@ -108,4 +109,22 @@ static inline int nvgpu_os_fence_fdget( #endif /* CONFIG_SYNC */ +#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_SYNC) + +int nvgpu_os_fence_syncpt_create(struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, struct nvgpu_nvhost_dev *nvhost_dev, + u32 id, u32 thresh); + +#else + +static inline int nvgpu_os_fence_syncpt_create( + struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, + struct nvgpu_nvhost_dev *nvhost_dev, + u32 id, u32 thresh) +{ + return -ENOSYS; +} + +#endif /* CONFIG_TEGRA_GK20A_NVHOST && CONFIG_SYNC */ + #endif /* __NVGPU_OS_FENCE__ */ -- cgit v1.2.2