/* * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c * * GK20A Channel Synchronization Abstraction * * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #include #include "channel_sync_gk20a.h" #include "gk20a.h" #include "fence_gk20a.h" #include "semaphore_gk20a.h" #include "sync_gk20a.h" #include "mm_gk20a.h" #ifdef CONFIG_SYNC #include "../drivers/staging/android/sync.h" #endif #ifdef CONFIG_TEGRA_GK20A #include #endif #ifdef CONFIG_TEGRA_GK20A struct gk20a_channel_syncpt { struct gk20a_channel_sync ops; struct channel_gk20a *c; struct platform_device *host1x_pdev; u32 id; }; static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh) { /* syncpoint_a */ ptr[0] = 0x2001001C; /* payload */ ptr[1] = thresh; /* syncpoint_b */ ptr[2] = 0x2001001D; /* syncpt_id, switch_en, wait */ ptr[3] = (id << 8) | 0x10; } static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, u32 thresh, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct priv_cmd_entry *wait_cmd = NULL; int err = 0; if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) { dev_warn(dev_from_gk20a(sp->c->g), "invalid wait id in gpfifo submit, elided"); return 0; } if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) return 0; err = gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd); if (err) { gk20a_err(dev_from_gk20a(sp->c->g), "not enough priv cmd buffer space"); return err; } add_wait_cmd(&wait_cmd->ptr[0], id, thresh); *entry = wait_cmd; *fence = NULL; return 0; } static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { #ifdef CONFIG_SYNC int i; int num_wait_cmds; struct sync_fence *sync_fence; struct priv_cmd_entry *wait_cmd = NULL; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; int err = 0; sync_fence = nvhost_sync_fdget(fd); if (!sync_fence) return -EINVAL; /* validate syncpt ids */ for (i = 0; i < sync_fence->num_fences; i++) { struct sync_pt *pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); u32 wait_id = nvhost_sync_pt_id(pt); if (!wait_id || !nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, wait_id)) { sync_fence_put(sync_fence); return -EINVAL; } } num_wait_cmds = nvhost_sync_num_pts(sync_fence); if (num_wait_cmds == 0) { sync_fence_put(sync_fence); return 0; } err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); sync_fence_put(sync_fence); return err; } i = 0; for (i = 0; i < sync_fence->num_fences; i++) { struct fence *f = sync_fence->cbs[i].sync_pt; struct sync_pt *pt = sync_pt_from_fence(f); u32 wait_id = nvhost_sync_pt_id(pt); u32 wait_value = nvhost_sync_pt_thresh(pt); if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, wait_id, wait_value)) { wait_cmd->ptr[i * 4 + 0] = 0; wait_cmd->ptr[i * 4 + 1] = 0; wait_cmd->ptr[i * 4 + 2] = 0; wait_cmd->ptr[i * 4 + 3] = 0; } else add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id, wait_value); } WARN_ON(i != num_wait_cmds); sync_fence_put(sync_fence); *entry = wait_cmd; *fence = NULL; return 0; #else return -ENODEV; #endif } static void gk20a_channel_syncpt_update(void *priv, int nr_completed) { struct channel_gk20a *ch = priv; gk20a_channel_update(ch, nr_completed); /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ gk20a_channel_put(ch); } static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, bool wfi_cmd, bool register_irq, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool need_sync_fence) { u32 thresh; int incr_cmd_size; int j = 0; int err; struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; incr_cmd_size = 6; if (wfi_cmd) incr_cmd_size += 2; err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); return err; } /* WAR for hw bug 1491360: syncpt needs to be incremented twice */ if (wfi_cmd) { /* wfi */ incr_cmd->ptr[j++] = 0x2001001E; /* handle, ignored */ incr_cmd->ptr[j++] = 0x00000000; } /* syncpoint_a */ incr_cmd->ptr[j++] = 0x2001001C; /* payload, ignored */ incr_cmd->ptr[j++] = 0; /* syncpoint_b */ incr_cmd->ptr[j++] = 0x2001001D; /* syncpt_id, incr */ incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; /* syncpoint_b */ incr_cmd->ptr[j++] = 0x2001001D; /* syncpt_id, incr */ incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; WARN_ON(j != incr_cmd_size); thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); if (register_irq) { struct channel_gk20a *referenced = gk20a_channel_get(c); WARN_ON(!referenced); if (referenced) { /* note: channel_put() is in * gk20a_channel_syncpt_update() */ err = nvhost_intr_register_notifier( sp->host1x_pdev, sp->id, thresh, gk20a_channel_syncpt_update, c); if (err) gk20a_channel_put(referenced); /* Adding interrupt action should * never fail. A proper error handling * here would require us to decrement * the syncpt max back to its original * value. */ WARN(err, "failed to set submit complete interrupt"); } } *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, wfi_cmd, need_sync_fence); *entry = incr_cmd; return 0; } static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { return __gk20a_channel_syncpt_incr(s, true /* wfi */, false /* no irq handler */, entry, fence, true); } static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ return __gk20a_channel_syncpt_incr(s, false /* no wfi */, true /* register irq */, entry, fence, need_sync_fence); } static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool wfi, bool need_sync_fence) { /* Need to do 'wfi + host incr' since we return the fence * to user space. */ return __gk20a_channel_syncpt_incr(s, wfi, true /* register irq */, entry, fence, need_sync_fence); } static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); } static void gk20a_channel_syncpt_signal_timeline( struct gk20a_channel_sync *s) { /* Nothing to do. */ } static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); return sp->id; } static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); nvhost_syncpt_put_ref_ext(sp->host1x_pdev, sp->id); kfree(sp); } static struct gk20a_channel_sync * gk20a_channel_syncpt_create(struct channel_gk20a *c) { struct gk20a_channel_syncpt *sp; char syncpt_name[32]; sp = kzalloc(sizeof(*sp), GFP_KERNEL); if (!sp) return NULL; sp->c = c; sp->host1x_pdev = c->g->host1x_dev; snprintf(syncpt_name, sizeof(syncpt_name), "%s_%d", dev_name(&c->g->dev->dev), c->hw_chid); sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev, c->hw_chid, syncpt_name); if (!sp->id) { kfree(sp); gk20a_err(&c->g->dev->dev, "failed to get free syncpt"); return NULL; } nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; sp->ops.incr = gk20a_channel_syncpt_incr; sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi; sp->ops.incr_user = gk20a_channel_syncpt_incr_user; sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; sp->ops.syncpt_id = gk20a_channel_syncpt_id; sp->ops.destroy = gk20a_channel_syncpt_destroy; return &sp->ops; } #endif /* CONFIG_TEGRA_GK20A */ struct gk20a_channel_semaphore { struct gk20a_channel_sync ops; struct channel_gk20a *c; /* A semaphore pool owned by this channel. */ struct gk20a_semaphore_pool *pool; /* A sync timeline that advances when gpu completes work. */ struct sync_timeline *timeline; }; #ifdef CONFIG_SYNC struct wait_fence_work { struct sync_fence_waiter waiter; struct channel_gk20a *ch; struct gk20a_semaphore *sema; }; static void gk20a_channel_semaphore_launcher( struct sync_fence *fence, struct sync_fence_waiter *waiter) { int err; struct wait_fence_work *w = container_of(waiter, struct wait_fence_work, waiter); struct gk20a *g = w->ch->g; gk20a_dbg_info("waiting for pre fence %p '%s'", fence, fence->name); err = sync_fence_wait(fence, -1); if (err < 0) dev_err(&g->dev->dev, "error waiting pre-fence: %d\n", err); gk20a_dbg_info( "wait completed (%d) for fence %p '%s', triggering gpu work", err, fence, fence->name); sync_fence_put(fence); gk20a_semaphore_release(w->sema); gk20a_semaphore_put(w->sema); kfree(w); } #endif static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload, bool acquire, bool wfi) { int i = 0; /* semaphore_a */ ptr[i++] = 0x20010004; /* offset_upper */ ptr[i++] = (sema >> 32) & 0xff; /* semaphore_b */ ptr[i++] = 0x20010005; /* offset */ ptr[i++] = sema & 0xffffffff; /* semaphore_c */ ptr[i++] = 0x20010006; /* payload */ ptr[i++] = payload; if (acquire) { /* semaphore_d */ ptr[i++] = 0x20010007; /* operation: acq_geq, switch_en */ ptr[i++] = 0x4 | (0x1 << 12); } else { /* semaphore_d */ ptr[i++] = 0x20010007; /* operation: release, wfi */ ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20); /* non_stall_int */ ptr[i++] = 0x20010008; /* ignored */ ptr[i++] = 0; } return i; } static int gk20a_channel_semaphore_wait_syncpt( struct gk20a_channel_sync *s, u32 id, u32 thresh, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); struct device *dev = dev_from_gk20a(sema->c->g); gk20a_err(dev, "trying to use syncpoint synchronization"); return -ENODEV; } static int gk20a_channel_semaphore_wait_fd( struct gk20a_channel_sync *s, int fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); struct channel_gk20a *c = sema->c; #ifdef CONFIG_SYNC struct sync_fence *sync_fence; struct priv_cmd_entry *wait_cmd = NULL; struct wait_fence_work *w; int written; int err, ret; u64 va; sync_fence = gk20a_sync_fence_fdget(fd); if (!sync_fence) return -EINVAL; w = kzalloc(sizeof(*w), GFP_KERNEL); if (!w) { err = -ENOMEM; goto fail; } sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); w->ch = c; w->sema = gk20a_semaphore_alloc(sema->pool); if (!w->sema) { gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); err = -ENOMEM; goto fail; } /* worker takes one reference */ gk20a_semaphore_get(w->sema); err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); goto fail; } va = gk20a_semaphore_gpu_va(w->sema, c->vm); /* GPU unblocked when when the semaphore value becomes 1. */ written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); WARN_ON(written != wait_cmd->size); ret = sync_fence_wait_async(sync_fence, &w->waiter); /* * If the sync_fence has already signaled then the above async_wait * will never trigger. This causes the semaphore release op to never * happen which, in turn, hangs the GPU. That's bad. So let's just * do the semaphore_release right now. */ if (ret == 1) gk20a_semaphore_release(w->sema); /* XXX - this fixes an actual bug, we need to hold a ref to this semaphore while the job is in flight. */ *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, &c->semaphore_wq, NULL, false); *entry = wait_cmd; return 0; fail: if (w && w->sema) gk20a_semaphore_put(w->sema); kfree(w); sync_fence_put(sync_fence); return err; #else gk20a_err(dev_from_gk20a(c->g), "trying to use sync fds with CONFIG_SYNC disabled"); return -ENODEV; #endif } static int __gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, bool wfi_cmd, struct sync_fence *dependency, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool need_sync_fence) { u64 va; int incr_cmd_size; int written; struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_channel_semaphore *sp = container_of(s, struct gk20a_channel_semaphore, ops); struct channel_gk20a *c = sp->c; struct gk20a_semaphore *semaphore; int err = 0; semaphore = gk20a_semaphore_alloc(sp->pool); if (!semaphore) { gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); return -ENOMEM; } incr_cmd_size = 10; err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); gk20a_semaphore_put(semaphore); return err; } /* Release the completion semaphore. */ va = gk20a_semaphore_gpu_va(semaphore, c->vm); written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd); WARN_ON(written != incr_cmd_size); *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, &c->semaphore_wq, dependency, wfi_cmd); *entry = incr_cmd; return 0; } static int gk20a_channel_semaphore_incr_wfi( struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, struct gk20a_fence **fence) { return __gk20a_channel_semaphore_incr(s, true /* wfi */, NULL, entry, fence, true); } static int gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning * a fence to user space. */ return __gk20a_channel_semaphore_incr(s, false /* no wfi */, NULL, entry, fence, need_sync_fence); } static int gk20a_channel_semaphore_incr_user( struct gk20a_channel_sync *s, int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_fence **fence, bool wfi, bool need_sync_fence) { #ifdef CONFIG_SYNC struct sync_fence *dependency = NULL; int err; if (wait_fence_fd >= 0) { dependency = gk20a_sync_fence_fdget(wait_fence_fd); if (!dependency) return -EINVAL; } err = __gk20a_channel_semaphore_incr(s, wfi, dependency, entry, fence, need_sync_fence); if (err) { if (dependency) sync_fence_put(dependency); return err; } return 0; #else struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); gk20a_err(dev_from_gk20a(sema->c->g), "trying to use sync fds with CONFIG_SYNC disabled"); return -ENODEV; #endif } static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s) { /* Nothing to do. */ } static void gk20a_channel_semaphore_signal_timeline( struct gk20a_channel_sync *s) { struct gk20a_channel_semaphore *sp = container_of(s, struct gk20a_channel_semaphore, ops); gk20a_sync_timeline_signal(sp->timeline); } static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s) { return -EINVAL; } static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); if (sema->timeline) gk20a_sync_timeline_destroy(sema->timeline); if (sema->pool) { gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm); gk20a_semaphore_pool_put(sema->pool); } kfree(sema); } static struct gk20a_channel_sync * gk20a_channel_semaphore_create(struct channel_gk20a *c) { int err; int asid = -1; struct gk20a_channel_semaphore *sema; char pool_name[20]; if (WARN_ON(!c->vm)) return NULL; sema = kzalloc(sizeof(*sema), GFP_KERNEL); if (!sema) return NULL; sema->c = c; if (c->vm->as_share) asid = c->vm->as_share->id; sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g), pool_name, 1024); if (!sema->pool) goto clean_up; /* Map the semaphore pool to the channel vm. Map as read-write to the * owner channel (all other channels should map as read only!). */ err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none); if (err) goto clean_up; #ifdef CONFIG_SYNC sema->timeline = gk20a_sync_timeline_create( "gk20a_ch%d_as%d", c->hw_chid, asid); if (!sema->timeline) goto clean_up; #endif sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; sema->ops.incr = gk20a_channel_semaphore_incr; sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi; sema->ops.incr_user = gk20a_channel_semaphore_incr_user; sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; sema->ops.destroy = gk20a_channel_semaphore_destroy; return &sema->ops; clean_up: gk20a_channel_semaphore_destroy(&sema->ops); return NULL; } struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) { #ifdef CONFIG_TEGRA_GK20A if (gk20a_platform_has_syncpoints(c->g->dev)) return gk20a_channel_syncpt_create(c); #endif return gk20a_channel_semaphore_create(c); }