From d0e4dfd6efd651abc431aba9cfae5907638f8172 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Fri, 23 Mar 2018 15:32:27 +0530 Subject: gpu: nvgpu: sync_framework cleanups This patch deals with cleanups meant to make things simpler for the upcoming os abstraction patches for the sync framework. This patch causes some substantial changes which are listed out as follows. 1) sync_timeline is moved out of gk20a_fence into struct nvgpu_channel_linux. New function pointers are created to facilitate os independent methods for enabling/disabling timeline and are now named as os_fence_framework. These function pointers are located in the struct os_channel under struct gk20a. 2) construction of the channel_sync require nvgpu_finalize_poweron_linux() to be invoked before invocations to nvgpu_init_mm_ce_context(). Hence, these methods are now moved away from gk20a_finalize_poweron() and invoked after nvgpu_finalize_poweron_linux(). 3) sync_fence creation is now delinked from fence construction and move to the channel_sync_gk20a's channel_incr methods. These sync_fences are mainly associated with post_fences. 4) In case userspace requires the sync_fences to be constructed, we try to obtain an fd before the gk20a_channel_submit_gpfifo() instead of trying to do that later. This is used to avoid potential after effects of duplicate work submission due to failure to obtain an unused fd. JIRA NVGPU-66 Change-Id: I42a3e4e2e692a113b1b36d2b48ab107ae4444dfa Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/1678400 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/channel.c | 79 +++++++++++++++++++ drivers/gpu/nvgpu/common/linux/channel.h | 15 +++- drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 21 ++++- drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 21 ++++- drivers/gpu/nvgpu/common/linux/module.c | 16 +++- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 102 ++++++++++++++++--------- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 7 +- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 101 ++++++++---------------- drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 16 ++-- drivers/gpu/nvgpu/gk20a/gk20a.c | 4 - drivers/gpu/nvgpu/gk20a/gk20a.h | 5 ++ drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 22 +++--- drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 9 ++- 15 files changed, 275 insertions(+), 149 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index a360d0df..8f2adc3a 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c @@ -40,6 +40,8 @@ #include #include +#include "gk20a/sync_gk20a.h" + u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) { u32 flags = 0; @@ -292,6 +294,10 @@ static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) ch->os_priv = priv; priv->ch = ch; +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = true; +#endif + err = nvgpu_mutex_init(&priv->error_notifier.mutex); if (err) { nvgpu_kfree(g, priv); @@ -309,6 +315,64 @@ static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) nvgpu_mutex_destroy(&priv->error_notifier.mutex); nvgpu_kfree(g, priv); + + ch->os_priv = NULL; + +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = false; +#endif +} + +static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, + const char *fmt, ...) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + char name[30]; + va_list args; + + fence_framework = &priv->fence_framework; + + va_start(args, fmt); + vsnprintf(name, sizeof(name), fmt, args); + va_end(args); + + fence_framework->timeline = gk20a_sync_timeline_create(name); + + if (!fence_framework->timeline) + return -EINVAL; + + return 0; +} +static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_signal(fence_framework->timeline); +} + +static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_destroy(fence_framework->timeline); + fence_framework->timeline = NULL; +} + +static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + return (fence_framework->timeline != NULL); } int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) @@ -332,6 +396,16 @@ int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) nvgpu_channel_work_completion_signal; g->os_channel.work_completion_cancel_sync = nvgpu_channel_work_completion_cancel_sync; + + g->os_channel.os_fence_framework_inst_exists = + nvgpu_channel_fence_framework_exists; + g->os_channel.init_os_fence_framework = + nvgpu_channel_init_os_fence_framework; + g->os_channel.signal_os_fence_framework = + nvgpu_channel_signal_os_fence_framework; + g->os_channel.destroy_os_fence_framework = + nvgpu_channel_destroy_os_fence_framework; + return 0; err_clean: @@ -354,6 +428,11 @@ void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) nvgpu_channel_free_linux(g, ch); } + + g->os_channel.os_fence_framework_inst_exists = NULL; + g->os_channel.init_os_fence_framework = NULL; + g->os_channel.signal_os_fence_framework = NULL; + g->os_channel.destroy_os_fence_framework = NULL; } u32 nvgpu_get_gpfifo_entry_size(void) diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h index d4cb6d55..805de55a 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.h +++ b/drivers/gpu/nvgpu/common/linux/channel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,6 +29,9 @@ struct gk20a_fence; struct fifo_profile_gk20a; struct nvgpu_os_linux; +struct sync_fence; +struct sync_timeline; + struct nvgpu_channel_completion_cb { /* * Signal channel owner via a callback, if set, in job cleanup with @@ -52,9 +55,19 @@ struct nvgpu_error_notifier { struct nvgpu_mutex mutex; }; +/* + * This struct contains fence_related data. + * e.g. sync_timeline for sync_fences. + */ +struct nvgpu_os_fence_framework { + struct sync_timeline *timeline; +}; + struct nvgpu_channel_linux { struct channel_gk20a *ch; + struct nvgpu_os_fence_framework fence_framework; + struct nvgpu_channel_completion_cb completion_cb; struct nvgpu_error_notifier error_notifier; diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index b4d7d501..06dfb180 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -774,6 +774,7 @@ static int gk20a_ioctl_channel_submit_gpfifo( struct gk20a_fence *fence_out; struct fifo_profile_gk20a *profile = NULL; u32 submit_flags = 0; + int fd = -1; int ret = 0; gk20a_dbg_fn(""); @@ -794,19 +795,31 @@ static int gk20a_ioctl_channel_submit_gpfifo( nvgpu_get_fence_args(&args->fence, &fence); submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); + + /* Try and allocate an fd here*/ + if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; + } + ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, submit_flags, &fence, &fence_out, false, profile); - if (ret) + if (ret) { + if (fd != -1) + put_unused_fd(fd); goto clean_up; + } /* Convert fence_out to something we can pass back to user space. */ if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { - int fd = gk20a_fence_install_fd(fence_out); - if (fd < 0) - ret = fd; + ret = gk20a_fence_install_fd(fence_out, fd); + if (ret) + put_unused_fd(fd); else args->fence.id = fd; } else { diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index e4b66460..70707a5c 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c @@ -344,10 +344,19 @@ static int gk20a_ctrl_prepare_compressible_read( struct gk20a_fence *fence_out = NULL; int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( args->submit_flags); + int fd = -1; fence.id = args->fence.syncpt_id; fence.value = args->fence.syncpt_value; + /* Try and allocate an fd here*/ + if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) + && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; + } + ret = gk20a_prepare_compressible_read(l, args->handle, args->request_compbits, args->offset, args->compbits_hoffset, args->compbits_voffset, @@ -356,20 +365,24 @@ static int gk20a_ctrl_prepare_compressible_read( submit_flags, &fence, &args->valid_compbits, &args->zbc_color, &fence_out); - if (ret) + if (ret) { + if (fd != -1) + put_unused_fd(fd); return ret; + } /* Convert fence_out to something we can pass back to user space. */ if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { if (fence_out) { - int fd = gk20a_fence_install_fd(fence_out); - if (fd < 0) - ret = fd; + ret = gk20a_fence_install_fd(fence_out, fd); + if (ret) + put_unused_fd(fd); else args->fence.fd = fd; } else { args->fence.fd = -1; + put_unused_fd(fd); } } else { if (fence_out) { diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index b9c9554b..81b3db82 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "platform_gk20a.h" #include "sysfs.h" @@ -252,13 +253,22 @@ int gk20a_pm_finalize_poweron(struct device *dev) return err; err = gk20a_finalize_poweron(g); - set_user_nice(current, nice_value); - if (err) + if (err) { + set_user_nice(current, nice_value); goto done; + } err = nvgpu_finalize_poweron_linux(l); - if (err) + if (err) { + set_user_nice(current, nice_value); goto done; + } + + nvgpu_init_mm_ce_context(g); + + nvgpu_vidmem_thread_unpause(&g->mm); + + set_user_nice(current, nice_value); /* Initialise scaling: it will initialize scaling drive only once */ if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 2f5514a8..48677529 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1894,7 +1894,9 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, WARN_ON(!c->sync); if (c->sync) { - c->sync->signal_timeline(c->sync); + if (c->has_os_fence_framework_support && + g->os_channel.os_fence_framework_inst_exists(c)) + g->os_channel.signal_os_fence_framework(c); if (g->aggressive_sync_destroy_thresh) { nvgpu_mutex_acquire(&c->sync_lock); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 2b8be069..5e8cab0d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -280,6 +280,8 @@ struct channel_gk20a { struct gk20a_channel_sync *sync; struct gk20a_channel_sync *user_sync; + bool has_os_fence_framework_support; + #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION u64 virt_ctx; #endif diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index bf467210..c0e035ea 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -183,6 +183,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; + struct sync_fence *sync_fence = NULL; err = gk20a_channel_alloc_priv_cmdbuf(c, c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd), @@ -224,10 +225,28 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, } } - err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, sp->id, thresh, - need_sync_fence); - if (err) +#ifdef CONFIG_SYNC + if (need_sync_fence) { + sync_fence = nvgpu_nvhost_sync_create_fence(sp->nvhost_dev, + sp->id, thresh, "fence"); + + if (IS_ERR(sync_fence)) { + err = PTR_ERR(sync_fence); + goto clean_up_priv_cmd; + } + } +#endif + + err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, + sp->id, thresh, sync_fence); + + if (err) { +#ifdef CONFIG_SYNC + if (sync_fence) + sync_fence_put(sync_fence); +#endif goto clean_up_priv_cmd; + } return 0; @@ -290,12 +309,6 @@ static void gk20a_channel_syncpt_set_safe_state(struct gk20a_channel_sync *s) nvgpu_nvhost_syncpt_set_safe_state(sp->nvhost_dev, sp->id); } -static void gk20a_channel_syncpt_signal_timeline( - struct gk20a_channel_sync *s) -{ - /* Nothing to do. */ -} - static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = @@ -368,7 +381,6 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c, bool user_managed) sp->ops.incr_user = gk20a_channel_syncpt_incr_user; sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; sp->ops.set_safe_state = gk20a_channel_syncpt_set_safe_state; - sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; sp->ops.syncpt_id = gk20a_channel_syncpt_id; sp->ops.syncpt_address = gk20a_channel_syncpt_address; sp->ops.destroy = gk20a_channel_syncpt_destroy; @@ -383,9 +395,6 @@ struct gk20a_channel_semaphore { /* A semaphore pool owned by this channel. */ struct nvgpu_semaphore_pool *pool; - - /* A sync timeline that advances when gpu completes work. */ - struct sync_timeline *timeline; }; static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, @@ -560,6 +569,7 @@ static int __gk20a_channel_semaphore_incr( struct channel_gk20a *c = sp->c; struct nvgpu_semaphore *semaphore; int err = 0; + struct sync_fence *sync_fence = NULL; semaphore = nvgpu_semaphore_alloc(c); if (!semaphore) { @@ -579,12 +589,31 @@ static int __gk20a_channel_semaphore_incr( /* Release the completion semaphore. */ add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd); - err = gk20a_fence_from_semaphore(c->g, fence, - sp->timeline, semaphore, - &c->semaphore_wq, - need_sync_fence); - if (err) +#ifdef CONFIG_SYNC + if (need_sync_fence) { + sync_fence = gk20a_sync_fence_create(c, + semaphore, "f-gk20a-0x%04x", + nvgpu_semaphore_gpu_ro_va(semaphore)); + + if (!sync_fence) { + err = -ENOMEM; + goto clean_up_sema; + } + } +#endif + + err = gk20a_fence_from_semaphore(fence, + semaphore, + &c->semaphore_wq, + sync_fence); + + if (err) { +#ifdef CONFIG_SYNC + if (sync_fence) + sync_fence_put(sync_fence); +#endif goto clean_up_sema; + } return 0; @@ -665,14 +694,6 @@ static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s) /* Nothing to do. */ } -static void gk20a_channel_semaphore_signal_timeline( - struct gk20a_channel_sync *s) -{ - struct gk20a_channel_semaphore *sp = - container_of(s, struct gk20a_channel_semaphore, ops); - gk20a_sync_timeline_signal(sp->timeline); -} - static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s) { return -EINVAL; @@ -687,8 +708,13 @@ static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); - if (sema->timeline) - gk20a_sync_timeline_destroy(sema->timeline); + + struct channel_gk20a *c = sema->c; + struct gk20a *g = c->g; + + if (c->has_os_fence_framework_support && + g->os_channel.os_fence_framework_inst_exists(c)) + g->os_channel.destroy_os_fence_framework(c); /* The sema pool is cleaned up by the VM destroy. */ sema->pool = NULL; @@ -700,10 +726,10 @@ static struct gk20a_channel_sync * gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed) { struct gk20a_channel_semaphore *sema; + struct gk20a *g = c->g; char pool_name[20]; -#ifdef CONFIG_SYNC int asid = -1; -#endif + int err; if (WARN_ON(!c->vm)) return NULL; @@ -716,17 +742,20 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed) sprintf(pool_name, "semaphore_pool-%d", c->chid); sema->pool = c->vm->sema_pool; -#ifdef CONFIG_SYNC if (c->vm->as_share) asid = c->vm->as_share->id; - sema->timeline = gk20a_sync_timeline_create( + if (c->has_os_fence_framework_support) { + /*Init the sync_timeline for this channel */ + err = g->os_channel.init_os_fence_framework(c, "gk20a_ch%d_as%d", c->chid, asid); - if (!sema->timeline) { - gk20a_channel_semaphore_destroy(&sema->ops); - return NULL; + + if (err) { + nvgpu_kfree(g, sema); + return NULL; + } } -#endif + nvgpu_atomic_set(&sema->ops.refcount, 0); sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; @@ -735,7 +764,6 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed) sema->ops.incr_user = gk20a_channel_semaphore_incr_user; sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; sema->ops.set_safe_state = gk20a_channel_semaphore_set_safe_state; - sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; sema->ops.destroy = gk20a_channel_semaphore_destroy; diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index adbecbe1..d63b358f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -3,7 +3,7 @@ * * GK20A Channel Synchronization Abstraction * - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -93,11 +93,6 @@ struct gk20a_channel_sync { */ void (*set_safe_state)(struct gk20a_channel_sync *s); - /* Signals the sync timeline (if owned by the gk20a_channel_sync layer). - * This should be called when we notice that a gk20a_fence is - * expired. */ - void (*signal_timeline)(struct gk20a_channel_sync *s); - /* Returns the sync point id or negative number if no syncpt*/ int (*syncpt_id)(struct gk20a_channel_sync *s); diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index f74afd6e..f0ad773f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -54,9 +54,10 @@ static void gk20a_fence_free(struct nvgpu_ref *ref) struct gk20a *g = f->g; #ifdef CONFIG_SYNC - if (f->sync_fence) - sync_fence_put(f->sync_fence); + if (f->os_fence) + sync_fence_put(f->os_fence); #endif + if (f->semaphore) nvgpu_semaphore_put(f->semaphore); @@ -80,7 +81,7 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f) return f; } -static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) +inline bool gk20a_fence_is_valid(struct gk20a_fence *f) { bool valid = f->valid; @@ -88,6 +89,21 @@ static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) return valid; } +int gk20a_fence_install_fd(struct gk20a_fence *f, int fd) +{ +#ifdef CONFIG_SYNC + if (!f || !gk20a_fence_is_valid(f) || !f->os_fence) + return -EINVAL; + + sync_fence_get(f->os_fence); + sync_fence_install(f->os_fence, fd); + + return 0; +#else + return -ENODEV; +#endif +} + int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f, unsigned long timeout) { @@ -107,26 +123,6 @@ bool gk20a_fence_is_expired(struct gk20a_fence *f) return true; } -int gk20a_fence_install_fd(struct gk20a_fence *f) -{ -#ifdef CONFIG_SYNC - int fd; - - if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence) - return -EINVAL; - - fd = get_unused_fd_flags(O_RDWR); - if (fd < 0) - return fd; - - sync_fence_get(f->sync_fence); - sync_fence_install(f->sync_fence, fd); - return fd; -#else - return -ENODEV; -#endif -} - int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count) { int err; @@ -195,13 +191,14 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) void gk20a_init_fence(struct gk20a_fence *f, const struct gk20a_fence_ops *ops, - struct sync_fence *sync_fence) + struct sync_fence *os_fence) { if (!f) return; f->ops = ops; - f->sync_fence = sync_fence; f->syncpt_id = -1; + f->semaphore = NULL; + f->os_fence = os_fence; } /* Fences that are backed by GPU semaphores: */ @@ -227,36 +224,19 @@ static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = { .is_expired = &nvgpu_semaphore_fence_is_expired, }; -/* This function takes ownership of the semaphore */ +/* This function takes ownership of the semaphore as well as the os_fence */ int gk20a_fence_from_semaphore( - struct gk20a *g, struct gk20a_fence *fence_out, - struct sync_timeline *timeline, struct nvgpu_semaphore *semaphore, struct nvgpu_cond *semaphore_wq, - bool need_sync_fence) + struct sync_fence *os_fence) { struct gk20a_fence *f = fence_out; - struct sync_fence *sync_fence = NULL; - -#ifdef CONFIG_SYNC - if (need_sync_fence) { - sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, - "f-gk20a-0x%04x", - nvgpu_semaphore_gpu_ro_va(semaphore)); - if (!sync_fence) - return -ENOMEM; - } -#endif - gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, sync_fence); - if (!f) { -#ifdef CONFIG_SYNC - if (sync_fence) - sync_fence_put(sync_fence); -#endif + gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, os_fence); + if (!f) return -EINVAL; - } + f->semaphore = semaphore; f->semaphore_wq = semaphore_wq; @@ -306,32 +286,18 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { .is_expired = &gk20a_syncpt_fence_is_expired, }; +/* This function takes the ownership of the os_fence */ int gk20a_fence_from_syncpt( struct gk20a_fence *fence_out, struct nvgpu_nvhost_dev *nvhost_dev, - u32 id, u32 value, - bool need_sync_fence) + u32 id, u32 value, struct sync_fence *os_fence) { struct gk20a_fence *f = fence_out; - struct sync_fence *sync_fence = NULL; - -#ifdef CONFIG_SYNC - if (need_sync_fence) { - sync_fence = nvgpu_nvhost_sync_create_fence(nvhost_dev, - id, value, "fence"); - if (IS_ERR(sync_fence)) - return PTR_ERR(sync_fence); - } -#endif - gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence); - if (!f) { -#ifdef CONFIG_SYNC - if (sync_fence) - sync_fence_put(sync_fence); -#endif + gk20a_init_fence(f, &gk20a_syncpt_fence_ops, os_fence); + if (!f) return -EINVAL; - } + f->nvhost_dev = nvhost_dev; f->syncpt_id = id; f->syncpt_value = value; @@ -346,8 +312,7 @@ int gk20a_fence_from_syncpt( int gk20a_fence_from_syncpt( struct gk20a_fence *fence_out, struct nvgpu_nvhost_dev *nvhost_dev, - u32 id, u32 value, - bool need_sync_fence) + u32 id, u32 value, struct sync_fence *os_fence) { return -EINVAL; } diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index 277603d1..6a28e657 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h @@ -3,7 +3,7 @@ * * GK20A Fences * - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -44,9 +44,10 @@ struct gk20a_fence { /* Valid for all fence types: */ bool valid; struct nvgpu_ref ref; - struct sync_fence *sync_fence; const struct gk20a_fence_ops *ops; + struct sync_fence *os_fence; + /* Valid for fences created from semaphores: */ struct nvgpu_semaphore *semaphore; struct nvgpu_cond *semaphore_wq; @@ -62,18 +63,16 @@ struct gk20a_fence { /* Fences can be created from semaphores or syncpoint (id, value) pairs */ int gk20a_fence_from_semaphore( - struct gk20a *g, struct gk20a_fence *fence_out, - struct sync_timeline *timeline, struct nvgpu_semaphore *semaphore, struct nvgpu_cond *semaphore_wq, - bool need_sync_fence); + struct sync_fence *os_fence); int gk20a_fence_from_syncpt( struct gk20a_fence *fence_out, struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 value, - bool need_sync_fence); + struct sync_fence *os_fence); int gk20a_alloc_fence_pool( struct channel_gk20a *c, @@ -87,7 +86,7 @@ struct gk20a_fence *gk20a_alloc_fence( void gk20a_init_fence(struct gk20a_fence *f, const struct gk20a_fence_ops *ops, - struct sync_fence *sync_fence); + struct sync_fence *os_fence); /* Fence operations */ void gk20a_fence_put(struct gk20a_fence *f); @@ -95,6 +94,7 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f); int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f, unsigned long timeout); bool gk20a_fence_is_expired(struct gk20a_fence *f); -int gk20a_fence_install_fd(struct gk20a_fence *f); +bool gk20a_fence_is_valid(struct gk20a_fence *f); +int gk20a_fence_install_fd(struct gk20a_fence *f, int fd); #endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 64ae4401..8c81b5b6 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -335,10 +335,6 @@ int gk20a_finalize_poweron(struct gk20a *g) if (g->ops.fifo.channel_resume) g->ops.fifo.channel_resume(g); - nvgpu_init_mm_ce_context(g); - - nvgpu_vidmem_thread_unpause(&g->mm); - done: if (err) g->power_on = false; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index bb0b572f..57854e11 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -1408,6 +1408,11 @@ struct gk20a { void (*close)(struct channel_gk20a *ch); void (*work_completion_signal)(struct channel_gk20a *ch); void (*work_completion_cancel_sync)(struct channel_gk20a *ch); + bool (*os_fence_framework_inst_exists)(struct channel_gk20a *ch); + int (*init_os_fence_framework)( + struct channel_gk20a *ch, const char *fmt, ...); + void (*signal_os_fence_framework)(struct channel_gk20a *ch); + void (*destroy_os_fence_framework)(struct channel_gk20a *ch); } os_channel; struct gk20a_scale_profile *scale_profile; diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index a8600bce..56c90da7 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -32,6 +32,7 @@ #include #include #include +#include "../common/linux/channel.h" #include "../drivers/staging/android/sync.h" @@ -373,15 +374,9 @@ void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) } struct sync_timeline *gk20a_sync_timeline_create( - const char *fmt, ...) + const char *name) { struct gk20a_sync_timeline *obj; - char name[30]; - va_list args; - - va_start(args, fmt); - vsnprintf(name, sizeof(name), fmt, args); - va_end(args); obj = (struct gk20a_sync_timeline *) sync_timeline_create(&gk20a_sync_timeline_ops, @@ -395,8 +390,7 @@ struct sync_timeline *gk20a_sync_timeline_create( } struct sync_fence *gk20a_sync_fence_create( - struct gk20a *g, - struct sync_timeline *obj, + struct channel_gk20a *c, struct nvgpu_semaphore *sema, const char *fmt, ...) { @@ -404,7 +398,15 @@ struct sync_fence *gk20a_sync_fence_create( va_list args; struct sync_pt *pt; struct sync_fence *fence; - struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); + struct gk20a *g = c->g; + + struct nvgpu_channel_linux *os_channel_priv = c->os_priv; + struct nvgpu_os_fence_framework *fence_framework = NULL; + struct gk20a_sync_timeline *timeline = NULL; + + fence_framework = &os_channel_priv->fence_framework; + + timeline = to_gk20a_timeline(fence_framework->timeline); pt = gk20a_sync_pt_create_inst(g, timeline, sema); if (pt == NULL) diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h index 8a6439ab..ffdfaec3 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h @@ -34,12 +34,11 @@ struct nvgpu_semaphore; struct fence; #ifdef CONFIG_SYNC -struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); +struct sync_timeline *gk20a_sync_timeline_create(const char *name); void gk20a_sync_timeline_destroy(struct sync_timeline *); void gk20a_sync_timeline_signal(struct sync_timeline *); struct sync_fence *gk20a_sync_fence_create( - struct gk20a *g, - struct sync_timeline *, + struct channel_gk20a *c, struct nvgpu_semaphore *, const char *fmt, ...); struct sync_fence *gk20a_sync_fence_fdget(int fd); @@ -51,6 +50,10 @@ static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) { return NULL; } +static inline struct sync_timeline *gk20a_sync_timeline_create( + const char *name) { + return NULL; +} #endif #endif -- cgit v1.2.2