From aa36d3786aeed6755b9744fed37aad000b582322 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 12 Jan 2017 18:50:34 -0800 Subject: gpu: nvgpu: Organize semaphore_gk20a.[ch] Move semaphore_gk20a.c drivers/gpu/nvgpu/common/ since the semaphore code is common to all chips. Move the semaphore_gk20a.h header file to drivers/gpu/nvgpu/include/nvgpu and rename it to semaphore.h. Also update all places where the header is inluced to use the new path. This revealed an odd location for the enum gk20a_mem_rw_flag. This should be in the mm headers. As a result many places that did not need anything semaphore related had to include the semaphore header file. Fixing this oddity allowed the semaphore include to be removed from many C files that did not need it. Bug 1799159 Change-Id: Ie017219acf34c4c481747323b9f3ac33e76e064c Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1284627 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/Makefile.nvgpu | 2 +- drivers/gpu/nvgpu/common/semaphore.c | 460 ++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 1 - drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 7 +- drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 5 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 1 - drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 + drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 1 - drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 466 --------------------------- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 318 ------------------ drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 11 +- drivers/gpu/nvgpu/gm206/acr_gm206.c | 1 - drivers/gpu/nvgpu/gm206/ce_gm206.c | 3 +- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 1 - drivers/gpu/nvgpu/gp106/acr_gp106.c | 1 - drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 1 - drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 3 +- drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 312 ++++++++++++++++++ drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 3 +- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 3 +- 25 files changed, 804 insertions(+), 821 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/semaphore.c delete mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c delete mode 100644 drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/semaphore.h (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 0f8f5bc1..5c2bbb79 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -32,6 +32,7 @@ nvgpu-y := \ common/mm/page_allocator.o \ common/mm/lockless_allocator.o \ common/nvgpu_common.o \ + common/semaphore.o \ gk20a/gk20a.o \ gk20a/sched_gk20a.o \ gk20a/as_gk20a.o \ @@ -48,7 +49,6 @@ nvgpu-y := \ gk20a/mm_gk20a.o \ gk20a/pmu_gk20a.o \ gk20a/priv_ring_gk20a.o \ - gk20a/semaphore_gk20a.o \ gk20a/fence_gk20a.o \ gk20a/therm_gk20a.o \ gk20a/gr_ctx_gk20a_sim.o \ diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c new file mode 100644 index 00000000..ea4910f1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -0,0 +1,460 @@ +/* + * Nvgpu Semaphores + * + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) "gpu_sema: " fmt + +#include +#include +#include + +#include + +#define __lock_sema_sea(s) \ + do { \ + gpu_sema_verbose_dbg("Acquiring sema lock..."); \ + mutex_lock(&s->sea_lock); \ + gpu_sema_verbose_dbg("Sema lock aquried!"); \ + } while (0) + +#define __unlock_sema_sea(s) \ + do { \ + mutex_unlock(&s->sea_lock); \ + gpu_sema_verbose_dbg("Released sema lock"); \ + } while (0) + +/* + * Return the sema_sea pointer. + */ +struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g) +{ + return g->sema_sea; +} + +static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea) +{ + int ret = 0; + struct gk20a *gk20a = sea->gk20a; + + __lock_sema_sea(sea); + + ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING, + PAGE_SIZE * SEMAPHORE_POOL_COUNT, + &sea->sea_mem); + if (ret) + goto out; + + sea->ro_sg_table = sea->sea_mem.sgt; + sea->size = SEMAPHORE_POOL_COUNT; + sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; + +out: + __unlock_sema_sea(sea); + return ret; +} + +/* + * Create the semaphore sea. Only create it once - subsequent calls to this will + * return the originally created sea pointer. + */ +struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g) +{ + if (g->sema_sea) + return g->sema_sea; + + g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL); + if (!g->sema_sea) + return NULL; + + g->sema_sea->size = 0; + g->sema_sea->page_count = 0; + g->sema_sea->gk20a = g; + INIT_LIST_HEAD(&g->sema_sea->pool_list); + mutex_init(&g->sema_sea->sea_lock); + + if (__gk20a_semaphore_sea_grow(g->sema_sea)) + goto cleanup; + + gpu_sema_dbg("Created semaphore sea!"); + return g->sema_sea; + +cleanup: + kfree(g->sema_sea); + g->sema_sea = NULL; + gpu_sema_dbg("Failed to creat semaphore sea!"); + return NULL; +} + +static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len) +{ + unsigned long idx = find_first_zero_bit(bitmap, len); + + if (idx == len) + return -ENOSPC; + + set_bit(idx, bitmap); + + return (int)idx; +} + +/* + * Allocate a pool from the sea. + */ +struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( + struct gk20a_semaphore_sea *sea) +{ + struct gk20a_semaphore_pool *p; + unsigned long page_idx; + int ret, err = 0; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + __lock_sema_sea(sea); + + ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); + if (ret < 0) { + err = ret; + goto fail; + } + + page_idx = (unsigned long)ret; + + p->page = sea->sea_mem.pages[page_idx]; + p->ro_sg_table = sea->ro_sg_table; + p->page_idx = page_idx; + p->sema_sea = sea; + INIT_LIST_HEAD(&p->hw_semas); + kref_init(&p->ref); + mutex_init(&p->pool_lock); + + sea->page_count++; + list_add(&p->pool_list_entry, &sea->pool_list); + __unlock_sema_sea(sea); + + gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); + + return p; + +fail: + __unlock_sema_sea(sea); + kfree(p); + gpu_sema_dbg("Failed to allocate semaphore pool!"); + return ERR_PTR(err); +} + +/* + * Map a pool into the passed vm's address space. This handles both the fixed + * global RO mapping and the non-fixed private RW mapping. + */ +int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm) +{ + int ents, err = 0; + u64 addr; + + gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); + + p->cpu_va = vmap(&p->page, 1, 0, + pgprot_writecombine(PAGE_KERNEL)); + + gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); + + /* First do the RW mapping. */ + p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); + if (!p->rw_sg_table) + return -ENOMEM; + + err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, + PAGE_SIZE, GFP_KERNEL); + if (err) { + err = -ENOMEM; + goto fail; + } + + /* Add IOMMU mapping... */ + ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, + DMA_BIDIRECTIONAL); + if (ents != 1) { + err = -ENOMEM; + goto fail_free_sgt; + } + + gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, + &sg_dma_address(p->rw_sg_table->sgl)); + + /* Map into the GPU... Doesn't need to be fixed. */ + p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, + 0, gk20a_mem_flag_none, false, + APERTURE_SYSMEM); + if (!p->gpu_va) { + err = -ENOMEM; + goto fail_unmap_sgt; + } + + gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, + p->gpu_va); + + /* + * And now the global mapping. Take the sea lock so that we don't race + * with a concurrent remap. + */ + __lock_sema_sea(p->sema_sea); + + BUG_ON(p->mapped); + addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, + p->sema_sea->gpu_va, p->sema_sea->map_size, + 0, + gk20a_mem_flag_read_only, + false, + APERTURE_SYSMEM); + if (!addr) { + err = -ENOMEM; + BUG(); + goto fail_unlock; + } + p->gpu_va_ro = addr; + p->mapped = 1; + + gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, + p->gpu_va_ro); + + __unlock_sema_sea(p->sema_sea); + + return 0; + +fail_unlock: + __unlock_sema_sea(p->sema_sea); +fail_unmap_sgt: + dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, + DMA_BIDIRECTIONAL); +fail_free_sgt: + sg_free_table(p->rw_sg_table); +fail: + kfree(p->rw_sg_table); + p->rw_sg_table = NULL; + gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); + return err; +} + +/* + * Unmap a semaphore_pool. + */ +void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm) +{ + struct gk20a_semaphore_int *hw_sema; + + kunmap(p->cpu_va); + + /* First the global RO mapping... */ + __lock_sema_sea(p->sema_sea); + gk20a_gmmu_unmap(vm, p->gpu_va_ro, + p->sema_sea->map_size, gk20a_mem_flag_none); + p->ro_sg_table = NULL; + __unlock_sema_sea(p->sema_sea); + + /* And now the private RW mapping. */ + gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); + p->gpu_va = 0; + + dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, + DMA_BIDIRECTIONAL); + + sg_free_table(p->rw_sg_table); + kfree(p->rw_sg_table); + p->rw_sg_table = NULL; + + list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) + /* + * Make sure the mem addresses are all NULL so if this gets + * reused we will fault. + */ + hw_sema->value = NULL; + + gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); +} + +/* + * Completely free a sempahore_pool. You should make sure this pool is not + * mapped otherwise there's going to be a memory leak. + */ +static void gk20a_semaphore_pool_free(struct kref *ref) +{ + struct gk20a_semaphore_pool *p = + container_of(ref, struct gk20a_semaphore_pool, ref); + struct gk20a_semaphore_sea *s = p->sema_sea; + struct gk20a_semaphore_int *hw_sema, *tmp; + + WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); + + __lock_sema_sea(s); + list_del(&p->pool_list_entry); + clear_bit(p->page_idx, s->pools_alloced); + s->page_count--; + __unlock_sema_sea(s); + + list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) + kfree(hw_sema); + + gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); + kfree(p); +} + +void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) +{ + kref_get(&p->ref); +} + +void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p) +{ + kref_put(&p->ref, gk20a_semaphore_pool_free); +} + +/* + * Get the address for a semaphore_pool - if global is true then return the + * global RO address instead of the RW address owned by the semaphore's VM. + */ +u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global) +{ + if (!global) + return p->gpu_va; + + return p->gpu_va_ro + (PAGE_SIZE * p->page_idx); +} + +static int __gk20a_init_hw_sema(struct channel_gk20a *ch) +{ + int hw_sema_idx; + int ret = 0; + struct gk20a_semaphore_int *hw_sema; + struct gk20a_semaphore_pool *p = ch->vm->sema_pool; + + BUG_ON(!p); + + mutex_lock(&p->pool_lock); + + /* Find an available HW semaphore. */ + hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced, + PAGE_SIZE / SEMAPHORE_SIZE); + if (hw_sema_idx < 0) { + ret = hw_sema_idx; + goto fail; + } + + hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL); + if (!hw_sema) { + ret = -ENOMEM; + goto fail_free_idx; + } + + ch->hw_sema = hw_sema; + hw_sema->ch = ch; + hw_sema->p = p; + hw_sema->idx = hw_sema_idx; + hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; + atomic_set(&hw_sema->next_value, 0); + hw_sema->value = p->cpu_va + hw_sema->offset; + writel(0, hw_sema->value); + + list_add(&hw_sema->hw_sema_list, &p->hw_semas); + + mutex_unlock(&p->pool_lock); + + return 0; + +fail_free_idx: + clear_bit(hw_sema_idx, p->semas_alloced); +fail: + mutex_unlock(&p->pool_lock); + return ret; +} + +/* + * Free the channel used semaphore index + */ +void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch) +{ + struct gk20a_semaphore_pool *p = ch->vm->sema_pool; + + BUG_ON(!p); + + mutex_lock(&p->pool_lock); + + clear_bit(ch->hw_sema->idx, p->semas_alloced); + + /* Make sure that when the ch is re-opened it will get a new HW sema. */ + list_del(&ch->hw_sema->hw_sema_list); + kfree(ch->hw_sema); + ch->hw_sema = NULL; + + mutex_unlock(&p->pool_lock); +} + +/* + * Allocate a semaphore from the passed pool. + * + * Since semaphores are ref-counted there's no explicit free for external code + * to use. When the ref-count hits 0 the internal free will happen. + */ +struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch) +{ + struct gk20a_semaphore *s; + int ret; + + if (!ch->hw_sema) { + ret = __gk20a_init_hw_sema(ch); + if (ret) + return NULL; + } + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return NULL; + + kref_init(&s->ref); + s->hw_sema = ch->hw_sema; + atomic_set(&s->value, 0); + + /* + * Take a ref on the pool so that we can keep this pool alive for + * as long as this semaphore is alive. + */ + gk20a_semaphore_pool_get(s->hw_sema->p); + + gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); + + return s; +} + +static void gk20a_semaphore_free(struct kref *ref) +{ + struct gk20a_semaphore *s = + container_of(ref, struct gk20a_semaphore, ref); + + gk20a_semaphore_pool_put(s->hw_sema->p); + + kfree(s); +} + +void gk20a_semaphore_put(struct gk20a_semaphore *s) +{ + kref_put(&s->ref, gk20a_semaphore_free); +} + +void gk20a_semaphore_get(struct gk20a_semaphore *s) +{ + kref_get(&s->ref); +} diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index b4a1f6f4..2a9ad40d 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -34,7 +34,6 @@ #include "fence_gk20a.h" #include "gr_gk20a.h" #include "debug_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 7afed41f..62b0a05e 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Graphics Copy Engine (gr host) * - * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,7 +28,6 @@ #include "gk20a.h" #include "debug_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index c8b1c105..3fa6bb25 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -28,13 +28,13 @@ #include #include -#include "debug_gk20a.h" -#include "ctxsw_trace_gk20a.h" +#include #include "gk20a.h" +#include "debug_gk20a.h" +#include "ctxsw_trace_gk20a.h" #include "dbg_gpu_gk20a.h" #include "fence_gk20a.h" -#include "semaphore_gk20a.h" #include diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index c3c6fbb8..0eba1c30 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -1,9 +1,7 @@ /* - * drivers/video/tegra/host/gk20a/channel_sync_gk20a.c - * * GK20A Channel Synchronization Abstraction * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -20,10 +18,11 @@ #include #include +#include + #include "channel_sync_gk20a.h" #include "gk20a.h" #include "fence_gk20a.h" -#include "semaphore_gk20a.h" #include "sync_gk20a.h" #include "mm_gk20a.h" diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 37ba720a..83fdc05d 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -1,6 +1,4 @@ /* - * drivers/video/tegra/host/t20/debug_gk20a.c - * * Copyright (C) 2011-2017 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public @@ -20,12 +18,12 @@ #include #include - #include +#include + #include "gk20a.h" #include "debug_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index b8a1dcbc..6bd59067 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,8 +17,9 @@ #include #include +#include + #include "gk20a.h" -#include "semaphore_gk20a.h" #include "channel_gk20a.h" #include "sync_gk20a.h" diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 469148c2..c6b444f9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -25,11 +25,11 @@ #include #include +#include #include "gk20a.h" #include "debug_gk20a.h" #include "ctxsw_trace_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index cddb3316..0e1c88a4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -41,7 +41,6 @@ #include "regops_gk20a.h" #include "dbg_gpu_gk20a.h" #include "debug_gk20a.h" -#include "semaphore_gk20a.h" #include "platform_gk20a.h" #include "ctxsw_trace_gk20a.h" diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ea5ea73f..cafb1233 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -33,13 +33,13 @@ #include #include +#include #include #include "gk20a.h" #include "mm_gk20a.h" #include "fence_gk20a.h" #include "kind_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index f3dffa46..d39ca2d0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -52,6 +52,12 @@ enum gk20a_aperture { APERTURE_VIDMEM }; +enum gk20a_mem_rw_flag { + gk20a_mem_flag_none = 0, + gk20a_mem_flag_read_only = 1, + gk20a_mem_flag_write_only = 2, +}; + static inline const char *gk20a_aperture_str(enum gk20a_aperture aperture) { switch (aperture) { diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 9924e48f..d53cf09b 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -29,7 +29,6 @@ #include "gk20a.h" #include "gr_gk20a.h" -#include "semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c deleted file mode 100644 index 2038e300..00000000 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ /dev/null @@ -1,466 +0,0 @@ -/* - * drivers/video/tegra/host/gk20a/semaphore_gk20a.c - * - * GK20A Semaphores - * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#define pr_fmt(fmt) "gpu_sema: " fmt - -#include -#include -#include - -#include - -#include "gk20a.h" -#include "mm_gk20a.h" -#include "semaphore_gk20a.h" - -#define __lock_sema_sea(s) \ - do { \ - gpu_sema_verbose_dbg("Acquiring sema lock..."); \ - mutex_lock(&s->sea_lock); \ - gpu_sema_verbose_dbg("Sema lock aquried!"); \ - } while (0) - -#define __unlock_sema_sea(s) \ - do { \ - mutex_unlock(&s->sea_lock); \ - gpu_sema_verbose_dbg("Released sema lock"); \ - } while (0) - -/* - * Return the sema_sea pointer. - */ -struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g) -{ - return g->sema_sea; -} - -static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea) -{ - int ret = 0; - struct gk20a *gk20a = sea->gk20a; - - __lock_sema_sea(sea); - - ret = gk20a_gmmu_alloc_attr_sys(gk20a, DMA_ATTR_NO_KERNEL_MAPPING, - PAGE_SIZE * SEMAPHORE_POOL_COUNT, - &sea->sea_mem); - if (ret) - goto out; - - sea->ro_sg_table = sea->sea_mem.sgt; - sea->size = SEMAPHORE_POOL_COUNT; - sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; - -out: - __unlock_sema_sea(sea); - return ret; -} - -/* - * Create the semaphore sea. Only create it once - subsequent calls to this will - * return the originally created sea pointer. - */ -struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g) -{ - if (g->sema_sea) - return g->sema_sea; - - g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL); - if (!g->sema_sea) - return NULL; - - g->sema_sea->size = 0; - g->sema_sea->page_count = 0; - g->sema_sea->gk20a = g; - INIT_LIST_HEAD(&g->sema_sea->pool_list); - mutex_init(&g->sema_sea->sea_lock); - - if (__gk20a_semaphore_sea_grow(g->sema_sea)) - goto cleanup; - - gpu_sema_dbg("Created semaphore sea!"); - return g->sema_sea; - -cleanup: - kfree(g->sema_sea); - g->sema_sea = NULL; - gpu_sema_dbg("Failed to creat semaphore sea!"); - return NULL; -} - -static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len) -{ - unsigned long idx = find_first_zero_bit(bitmap, len); - - if (idx == len) - return -ENOSPC; - - set_bit(idx, bitmap); - - return (int)idx; -} - -/* - * Allocate a pool from the sea. - */ -struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( - struct gk20a_semaphore_sea *sea) -{ - struct gk20a_semaphore_pool *p; - unsigned long page_idx; - int ret, err = 0; - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return ERR_PTR(-ENOMEM); - - __lock_sema_sea(sea); - - ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); - if (ret < 0) { - err = ret; - goto fail; - } - - page_idx = (unsigned long)ret; - - p->page = sea->sea_mem.pages[page_idx]; - p->ro_sg_table = sea->ro_sg_table; - p->page_idx = page_idx; - p->sema_sea = sea; - INIT_LIST_HEAD(&p->hw_semas); - kref_init(&p->ref); - mutex_init(&p->pool_lock); - - sea->page_count++; - list_add(&p->pool_list_entry, &sea->pool_list); - __unlock_sema_sea(sea); - - gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); - - return p; - -fail: - __unlock_sema_sea(sea); - kfree(p); - gpu_sema_dbg("Failed to allocate semaphore pool!"); - return ERR_PTR(err); -} - -/* - * Map a pool into the passed vm's address space. This handles both the fixed - * global RO mapping and the non-fixed private RW mapping. - */ -int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, - struct vm_gk20a *vm) -{ - int ents, err = 0; - u64 addr; - - gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); - - p->cpu_va = vmap(&p->page, 1, 0, - pgprot_writecombine(PAGE_KERNEL)); - - gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); - - /* First do the RW mapping. */ - p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); - if (!p->rw_sg_table) - return -ENOMEM; - - err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, - PAGE_SIZE, GFP_KERNEL); - if (err) { - err = -ENOMEM; - goto fail; - } - - /* Add IOMMU mapping... */ - ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); - if (ents != 1) { - err = -ENOMEM; - goto fail_free_sgt; - } - - gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, - &sg_dma_address(p->rw_sg_table->sgl)); - - /* Map into the GPU... Doesn't need to be fixed. */ - p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, - 0, gk20a_mem_flag_none, false, - APERTURE_SYSMEM); - if (!p->gpu_va) { - err = -ENOMEM; - goto fail_unmap_sgt; - } - - gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, - p->gpu_va); - - /* - * And now the global mapping. Take the sea lock so that we don't race - * with a concurrent remap. - */ - __lock_sema_sea(p->sema_sea); - - BUG_ON(p->mapped); - addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, - p->sema_sea->gpu_va, p->sema_sea->map_size, - 0, - gk20a_mem_flag_read_only, - false, - APERTURE_SYSMEM); - if (!addr) { - err = -ENOMEM; - BUG(); - goto fail_unlock; - } - p->gpu_va_ro = addr; - p->mapped = 1; - - gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, - p->gpu_va_ro); - - __unlock_sema_sea(p->sema_sea); - - return 0; - -fail_unlock: - __unlock_sema_sea(p->sema_sea); -fail_unmap_sgt: - dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); -fail_free_sgt: - sg_free_table(p->rw_sg_table); -fail: - kfree(p->rw_sg_table); - p->rw_sg_table = NULL; - gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); - return err; -} - -/* - * Unmap a semaphore_pool. - */ -void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, - struct vm_gk20a *vm) -{ - struct gk20a_semaphore_int *hw_sema; - - kunmap(p->cpu_va); - - /* First the global RO mapping... */ - __lock_sema_sea(p->sema_sea); - gk20a_gmmu_unmap(vm, p->gpu_va_ro, - p->sema_sea->map_size, gk20a_mem_flag_none); - p->ro_sg_table = NULL; - __unlock_sema_sea(p->sema_sea); - - /* And now the private RW mapping. */ - gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); - p->gpu_va = 0; - - dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); - - sg_free_table(p->rw_sg_table); - kfree(p->rw_sg_table); - p->rw_sg_table = NULL; - - list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) - /* - * Make sure the mem addresses are all NULL so if this gets - * reused we will fault. - */ - hw_sema->value = NULL; - - gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); -} - -/* - * Completely free a sempahore_pool. You should make sure this pool is not - * mapped otherwise there's going to be a memory leak. - */ -static void gk20a_semaphore_pool_free(struct kref *ref) -{ - struct gk20a_semaphore_pool *p = - container_of(ref, struct gk20a_semaphore_pool, ref); - struct gk20a_semaphore_sea *s = p->sema_sea; - struct gk20a_semaphore_int *hw_sema, *tmp; - - WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); - - __lock_sema_sea(s); - list_del(&p->pool_list_entry); - clear_bit(p->page_idx, s->pools_alloced); - s->page_count--; - __unlock_sema_sea(s); - - list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) - kfree(hw_sema); - - gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); - kfree(p); -} - -void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) -{ - kref_get(&p->ref); -} - -void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p) -{ - kref_put(&p->ref, gk20a_semaphore_pool_free); -} - -/* - * Get the address for a semaphore_pool - if global is true then return the - * global RO address instead of the RW address owned by the semaphore's VM. - */ -u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global) -{ - if (!global) - return p->gpu_va; - - return p->gpu_va_ro + (PAGE_SIZE * p->page_idx); -} - -static int __gk20a_init_hw_sema(struct channel_gk20a *ch) -{ - int hw_sema_idx; - int ret = 0; - struct gk20a_semaphore_int *hw_sema; - struct gk20a_semaphore_pool *p = ch->vm->sema_pool; - - BUG_ON(!p); - - mutex_lock(&p->pool_lock); - - /* Find an available HW semaphore. */ - hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced, - PAGE_SIZE / SEMAPHORE_SIZE); - if (hw_sema_idx < 0) { - ret = hw_sema_idx; - goto fail; - } - - hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL); - if (!hw_sema) { - ret = -ENOMEM; - goto fail_free_idx; - } - - ch->hw_sema = hw_sema; - hw_sema->ch = ch; - hw_sema->p = p; - hw_sema->idx = hw_sema_idx; - hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; - atomic_set(&hw_sema->next_value, 0); - hw_sema->value = p->cpu_va + hw_sema->offset; - writel(0, hw_sema->value); - - list_add(&hw_sema->hw_sema_list, &p->hw_semas); - - mutex_unlock(&p->pool_lock); - - return 0; - -fail_free_idx: - clear_bit(hw_sema_idx, p->semas_alloced); -fail: - mutex_unlock(&p->pool_lock); - return ret; -} - -/* - * Free the channel used semaphore index - */ -void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch) -{ - struct gk20a_semaphore_pool *p = ch->vm->sema_pool; - - BUG_ON(!p); - - mutex_lock(&p->pool_lock); - - clear_bit(ch->hw_sema->idx, p->semas_alloced); - - /* Make sure that when the ch is re-opened it will get a new HW sema. */ - list_del(&ch->hw_sema->hw_sema_list); - kfree(ch->hw_sema); - ch->hw_sema = NULL; - - mutex_unlock(&p->pool_lock); -} - -/* - * Allocate a semaphore from the passed pool. - * - * Since semaphores are ref-counted there's no explicit free for external code - * to use. When the ref-count hits 0 the internal free will happen. - */ -struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch) -{ - struct gk20a_semaphore *s; - int ret; - - if (!ch->hw_sema) { - ret = __gk20a_init_hw_sema(ch); - if (ret) - return NULL; - } - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return NULL; - - kref_init(&s->ref); - s->hw_sema = ch->hw_sema; - atomic_set(&s->value, 0); - - /* - * Take a ref on the pool so that we can keep this pool alive for - * as long as this semaphore is alive. - */ - gk20a_semaphore_pool_get(s->hw_sema->p); - - gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); - - return s; -} - -static void gk20a_semaphore_free(struct kref *ref) -{ - struct gk20a_semaphore *s = - container_of(ref, struct gk20a_semaphore, ref); - - gk20a_semaphore_pool_put(s->hw_sema->p); - - kfree(s); -} - -void gk20a_semaphore_put(struct gk20a_semaphore *s) -{ - kref_put(&s->ref, gk20a_semaphore_free); -} - -void gk20a_semaphore_get(struct gk20a_semaphore *s) -{ - kref_get(&s->ref); -} diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h deleted file mode 100644 index 8e09fcfc..00000000 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef SEMAPHORE_GK20A_H -#define SEMAPHORE_GK20A_H - -#include -#include -#include - -#include - -#include "gk20a.h" -#include "mm_gk20a.h" -#include "channel_gk20a.h" - -#define gpu_sema_dbg(fmt, args...) \ - gk20a_dbg(gpu_dbg_sema, fmt, ##args) -#define gpu_sema_verbose_dbg(fmt, args...) \ - gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) - -/* - * Max number of channels that can be used is 512. This of course needs to be - * fixed to be dynamic but still fast. - */ -#define SEMAPHORE_POOL_COUNT 512 -#define SEMAPHORE_SIZE 16 -#define SEMAPHORE_SEA_GROWTH_RATE 32 - -struct gk20a_semaphore_sea; - -/* - * Underlying semaphore data structure. This semaphore can be shared amongst - * other semaphore instances. - */ -struct gk20a_semaphore_int { - int idx; /* Semaphore index. */ - u32 offset; /* Offset into the pool. */ - atomic_t next_value; /* Next available value. */ - u32 *value; /* Current value (access w/ readl()). */ - u32 nr_incrs; /* Number of increments programmed. */ - struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */ - struct channel_gk20a *ch; /* Channel that owns this sema. */ - struct list_head hw_sema_list; /* List of HW semaphores. */ -}; - -/* - * A semaphore which the rest of the driver actually uses. This consists of a - * pointer to a real semaphore and a value to wait for. This allows one physical - * semaphore to be shared among an essentially infinite number of submits. - */ -struct gk20a_semaphore { - struct gk20a_semaphore_int *hw_sema; - - atomic_t value; - int incremented; - - struct kref ref; -}; - -/* - * A semaphore pool. Each address space will own exactly one of these. - */ -struct gk20a_semaphore_pool { - struct page *page; /* This pool's page of memory */ - struct list_head pool_list_entry; /* Node for list of pools. */ - void *cpu_va; /* CPU access to the pool. */ - u64 gpu_va; /* GPU access to the pool. */ - u64 gpu_va_ro; /* GPU access to the pool. */ - int page_idx; /* Index into sea bitmap. */ - - struct list_head hw_semas; /* List of HW semas. */ - DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE); - - struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */ - - struct mutex pool_lock; - - /* - * This is the address spaces's personal RW table. Other channels will - * ultimately map this page as RO. - */ - struct sg_table *rw_sg_table; - - /* - * This is to keep track of whether the pool has had its sg_table - * updated during sea resizing. - */ - struct sg_table *ro_sg_table; - - int mapped; - - /* - * Sometimes a channel can be released before other channels are - * done waiting on it. This ref count ensures that the pool doesn't - * go away until all semaphores using this pool are cleaned up first. - */ - struct kref ref; -}; - -/* - * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple - * channels can share a VM each channel gets it's own HW semaphore from the - * pool. Channels then allocate regular semaphores - basically just a value that - * signifies when a particular job is done. - */ -struct gk20a_semaphore_sea { - struct list_head pool_list; /* List of pools in this sea. */ - struct gk20a *gk20a; - - size_t size; /* Number of pages available. */ - u64 gpu_va; /* GPU virtual address of sema sea. */ - u64 map_size; /* Size of the mapping. */ - - /* - * TODO: - * List of pages that we use to back the pools. The number of pages - * can grow dynamically since allocating 512 pages for all channels at - * once would be a tremendous waste. - */ - int page_count; /* Pages allocated to pools. */ - - struct sg_table *ro_sg_table; - /* - struct page *pages[SEMAPHORE_POOL_COUNT]; - */ - - struct mem_desc sea_mem; - - /* - * Can't use a regular allocator here since the full range of pools are - * not always allocated. Instead just use a bitmap. - */ - DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); - - struct mutex sea_lock; /* Lock alloc/free calls. */ -}; - -enum gk20a_mem_rw_flag { - gk20a_mem_flag_none = 0, - gk20a_mem_flag_read_only = 1, - gk20a_mem_flag_write_only = 2, -}; - -/* - * Semaphore sea functions. - */ -struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a); -int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea, - struct vm_gk20a *vm); -void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea, - struct vm_gk20a *vm); -struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g); - -/* - * Semaphore pool functions. - */ -struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( - struct gk20a_semaphore_sea *sea); -int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool, - struct vm_gk20a *vm); -void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool, - struct vm_gk20a *vm); -u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global); -void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p); -void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p); - -/* - * Semaphore functions. - */ -struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch); -void gk20a_semaphore_put(struct gk20a_semaphore *s); -void gk20a_semaphore_get(struct gk20a_semaphore *s); -void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch); - -/* - * Return the address of a specific semaphore. - * - * Don't call this on a semaphore you don't own - the VA returned will make no - * sense in your specific channel's VM. - */ -static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s) -{ - return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) + - s->hw_sema->offset; -} - -/* - * Get the global RO address for the semaphore. Can be called on any semaphore - * regardless of whether you own it. - */ -static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s) -{ - return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) + - s->hw_sema->offset; -} - -static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema) -{ - return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) + - hw_sema->offset; -} - -/* - * TODO: handle wrap around... Hmm, how to do this? - */ -static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s) -{ - u32 sema_val = readl(s->hw_sema->value); - - /* - * If the underlying semaphore value is greater than or equal to - * the value of the semaphore then the semaphore has been signaled - * (a.k.a. released). - */ - return (int)sema_val >= atomic_read(&s->value); -} - -static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) -{ - return !gk20a_semaphore_is_released(s); -} - -/* - * Read the underlying value from a semaphore. - */ -static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s) -{ - return readl(s->hw_sema->value); -} - -static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s) -{ - return (u32)atomic_read(&s->value); -} - -static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s) -{ - return (u32)atomic_read(&s->hw_sema->next_value); -} - -/* - * If @force is set then this will not wait for the underlying semaphore to - * catch up to the passed semaphore. - */ -static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s, - bool force) -{ - u32 current_val; - u32 val = gk20a_semaphore_get_value(s); - int attempts = 0; - - /* - * Wait until the sema value is 1 less than the write value. That - * way this function is essentially an increment. - * - * TODO: tune the wait a little better. - */ - while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) { - if (force) - break; - msleep(100); - attempts += 1; - if (attempts > 100) { - WARN(1, "Stall on sema release!"); - return; - } - } - - /* - * If the semaphore has already passed the value we would write then - * this is really just a NO-OP. - */ - if (current_val >= val) - return; - - writel(val, s->hw_sema->value); - - gpu_sema_verbose_dbg("(c=%d) WRITE %u", - s->hw_sema->ch->hw_chid, val); -} - -static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) -{ - __gk20a_semaphore_release(s, false); -} - -/* - * Configure a software based increment on this semaphore. This is useful for - * when we want the GPU to wait on a SW event before processing a channel. - * Another way to describe this is when the GPU needs to wait on a SW pre-fence. - * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which - * then allows the GPU to continue. - * - * Also used to prep a semaphore for an INCR by the GPU. - */ -static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) -{ - BUG_ON(s->incremented); - - atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); - s->incremented = 1; - - gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", - s->hw_sema->ch->hw_chid, - gk20a_semaphore_next_value(s)); -} -#endif diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index b642981c..e7bacac8 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Sync Framework Integration * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -13,8 +13,6 @@ * more details. */ -#include "sync_gk20a.h" - #include #include #include @@ -23,9 +21,14 @@ #include #include #include + #include + +#include + #include "../drivers/staging/android/sync.h" -#include "semaphore_gk20a.h" + +#include "sync_gk20a.h" static const struct sync_timeline_ops gk20a_sync_timeline_ops; diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.c b/drivers/gpu/nvgpu/gm206/acr_gm206.c index 238114e3..3af59374 100644 --- a/drivers/gpu/nvgpu/gm206/acr_gm206.c +++ b/drivers/gpu/nvgpu/gm206/acr_gm206.c @@ -21,7 +21,6 @@ #include "gk20a/gk20a.h" #include "gk20a/pmu_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include "acr.h" #include "acr_gm206.h" diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.c b/drivers/gpu/nvgpu/gm206/ce_gm206.c index 5d5fd432..dd3eac95 100644 --- a/drivers/gpu/nvgpu/gm206/ce_gm206.c +++ b/drivers/gpu/nvgpu/gm206/ce_gm206.c @@ -1,7 +1,7 @@ /* * GM206 Copy Engine. * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,7 +29,6 @@ #include #include "gk20a/debug_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include #include diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 060dc778..40a28136 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -26,7 +26,6 @@ #include "gk20a/gk20a.h" #include "gk20a/pmu_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c index 9e844994..f8d7ba70 100644 --- a/drivers/gpu/nvgpu/gp106/acr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c @@ -21,7 +21,6 @@ #include "gk20a/gk20a.h" #include "gk20a/pmu_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include "gm206/acr_gm206.h" #include "gm20b/acr_gm20b.h" diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index e04aec7d..a7aa4003 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -24,7 +24,6 @@ #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" #include "gm20b/gr_gm20b.h" diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 835d33f3..776bbe85 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B MMU * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,7 +17,6 @@ #include #include "gk20a/gk20a.h" -#include "gk20a/semaphore_gk20a.h" #include "mm_gp10b.h" #include "rpfb_gp10b.h" diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h new file mode 100644 index 00000000..07a27584 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef SEMAPHORE_GK20A_H +#define SEMAPHORE_GK20A_H + +#include +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" +#include "gk20a/channel_gk20a.h" + +#define gpu_sema_dbg(fmt, args...) \ + gk20a_dbg(gpu_dbg_sema, fmt, ##args) +#define gpu_sema_verbose_dbg(fmt, args...) \ + gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) + +/* + * Max number of channels that can be used is 512. This of course needs to be + * fixed to be dynamic but still fast. + */ +#define SEMAPHORE_POOL_COUNT 512 +#define SEMAPHORE_SIZE 16 +#define SEMAPHORE_SEA_GROWTH_RATE 32 + +struct gk20a_semaphore_sea; + +/* + * Underlying semaphore data structure. This semaphore can be shared amongst + * other semaphore instances. + */ +struct gk20a_semaphore_int { + int idx; /* Semaphore index. */ + u32 offset; /* Offset into the pool. */ + atomic_t next_value; /* Next available value. */ + u32 *value; /* Current value (access w/ readl()). */ + u32 nr_incrs; /* Number of increments programmed. */ + struct gk20a_semaphore_pool *p; /* Pool that owns this sema. */ + struct channel_gk20a *ch; /* Channel that owns this sema. */ + struct list_head hw_sema_list; /* List of HW semaphores. */ +}; + +/* + * A semaphore which the rest of the driver actually uses. This consists of a + * pointer to a real semaphore and a value to wait for. This allows one physical + * semaphore to be shared among an essentially infinite number of submits. + */ +struct gk20a_semaphore { + struct gk20a_semaphore_int *hw_sema; + + atomic_t value; + int incremented; + + struct kref ref; +}; + +/* + * A semaphore pool. Each address space will own exactly one of these. + */ +struct gk20a_semaphore_pool { + struct page *page; /* This pool's page of memory */ + struct list_head pool_list_entry; /* Node for list of pools. */ + void *cpu_va; /* CPU access to the pool. */ + u64 gpu_va; /* GPU access to the pool. */ + u64 gpu_va_ro; /* GPU access to the pool. */ + int page_idx; /* Index into sea bitmap. */ + + struct list_head hw_semas; /* List of HW semas. */ + DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE); + + struct gk20a_semaphore_sea *sema_sea; /* Sea that owns this pool. */ + + struct mutex pool_lock; + + /* + * This is the address spaces's personal RW table. Other channels will + * ultimately map this page as RO. + */ + struct sg_table *rw_sg_table; + + /* + * This is to keep track of whether the pool has had its sg_table + * updated during sea resizing. + */ + struct sg_table *ro_sg_table; + + int mapped; + + /* + * Sometimes a channel can be released before other channels are + * done waiting on it. This ref count ensures that the pool doesn't + * go away until all semaphores using this pool are cleaned up first. + */ + struct kref ref; +}; + +/* + * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple + * channels can share a VM each channel gets it's own HW semaphore from the + * pool. Channels then allocate regular semaphores - basically just a value that + * signifies when a particular job is done. + */ +struct gk20a_semaphore_sea { + struct list_head pool_list; /* List of pools in this sea. */ + struct gk20a *gk20a; + + size_t size; /* Number of pages available. */ + u64 gpu_va; /* GPU virtual address of sema sea. */ + u64 map_size; /* Size of the mapping. */ + + /* + * TODO: + * List of pages that we use to back the pools. The number of pages + * can grow dynamically since allocating 512 pages for all channels at + * once would be a tremendous waste. + */ + int page_count; /* Pages allocated to pools. */ + + struct sg_table *ro_sg_table; + /* + struct page *pages[SEMAPHORE_POOL_COUNT]; + */ + + struct mem_desc sea_mem; + + /* + * Can't use a regular allocator here since the full range of pools are + * not always allocated. Instead just use a bitmap. + */ + DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT); + + struct mutex sea_lock; /* Lock alloc/free calls. */ +}; + +/* + * Semaphore sea functions. + */ +struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *gk20a); +int gk20a_semaphore_sea_map(struct gk20a_semaphore_pool *sea, + struct vm_gk20a *vm); +void gk20a_semaphore_sea_unmap(struct gk20a_semaphore_pool *sea, + struct vm_gk20a *vm); +struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g); + +/* + * Semaphore pool functions. + */ +struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( + struct gk20a_semaphore_sea *sea); +int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *pool, + struct vm_gk20a *vm); +void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *pool, + struct vm_gk20a *vm); +u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global); +void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p); +void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p); + +/* + * Semaphore functions. + */ +struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch); +void gk20a_semaphore_put(struct gk20a_semaphore *s); +void gk20a_semaphore_get(struct gk20a_semaphore *s); +void gk20a_semaphore_free_hw_sema(struct channel_gk20a *ch); + +/* + * Return the address of a specific semaphore. + * + * Don't call this on a semaphore you don't own - the VA returned will make no + * sense in your specific channel's VM. + */ +static inline u64 gk20a_semaphore_gpu_rw_va(struct gk20a_semaphore *s) +{ + return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, false) + + s->hw_sema->offset; +} + +/* + * Get the global RO address for the semaphore. Can be called on any semaphore + * regardless of whether you own it. + */ +static inline u64 gk20a_semaphore_gpu_ro_va(struct gk20a_semaphore *s) +{ + return __gk20a_semaphore_pool_gpu_va(s->hw_sema->p, true) + + s->hw_sema->offset; +} + +static inline u64 gk20a_hw_sema_addr(struct gk20a_semaphore_int *hw_sema) +{ + return __gk20a_semaphore_pool_gpu_va(hw_sema->p, true) + + hw_sema->offset; +} + +/* + * TODO: handle wrap around... Hmm, how to do this? + */ +static inline bool gk20a_semaphore_is_released(struct gk20a_semaphore *s) +{ + u32 sema_val = readl(s->hw_sema->value); + + /* + * If the underlying semaphore value is greater than or equal to + * the value of the semaphore then the semaphore has been signaled + * (a.k.a. released). + */ + return (int)sema_val >= atomic_read(&s->value); +} + +static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) +{ + return !gk20a_semaphore_is_released(s); +} + +/* + * Read the underlying value from a semaphore. + */ +static inline u32 gk20a_semaphore_read(struct gk20a_semaphore *s) +{ + return readl(s->hw_sema->value); +} + +static inline u32 gk20a_semaphore_get_value(struct gk20a_semaphore *s) +{ + return (u32)atomic_read(&s->value); +} + +static inline u32 gk20a_semaphore_next_value(struct gk20a_semaphore *s) +{ + return (u32)atomic_read(&s->hw_sema->next_value); +} + +/* + * If @force is set then this will not wait for the underlying semaphore to + * catch up to the passed semaphore. + */ +static inline void __gk20a_semaphore_release(struct gk20a_semaphore *s, + bool force) +{ + u32 current_val; + u32 val = gk20a_semaphore_get_value(s); + int attempts = 0; + + /* + * Wait until the sema value is 1 less than the write value. That + * way this function is essentially an increment. + * + * TODO: tune the wait a little better. + */ + while ((current_val = gk20a_semaphore_read(s)) < (val - 1)) { + if (force) + break; + msleep(100); + attempts += 1; + if (attempts > 100) { + WARN(1, "Stall on sema release!"); + return; + } + } + + /* + * If the semaphore has already passed the value we would write then + * this is really just a NO-OP. + */ + if (current_val >= val) + return; + + writel(val, s->hw_sema->value); + + gpu_sema_verbose_dbg("(c=%d) WRITE %u", + s->hw_sema->ch->hw_chid, val); +} + +static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) +{ + __gk20a_semaphore_release(s, false); +} + +/* + * Configure a software based increment on this semaphore. This is useful for + * when we want the GPU to wait on a SW event before processing a channel. + * Another way to describe this is when the GPU needs to wait on a SW pre-fence. + * The pre-fence signals SW which in turn calls gk20a_semaphore_release() which + * then allows the GPU to continue. + * + * Also used to prep a semaphore for an INCR by the GPU. + */ +static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) +{ + BUG_ON(s->incremented); + + atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); + s->incremented = 1; + + gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", + s->hw_sema->ch->hw_chid, + gk20a_semaphore_next_value(s)); +} +#endif diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index 66fda2d9..3d908b0d 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c @@ -1,7 +1,7 @@ /* * Virtualized GPU Memory Management * - * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,7 +16,6 @@ #include #include "vgpu/vgpu.h" #include "vgpu_mm_gp10b.h" -#include "gk20a/semaphore_gk20a.h" #include "gk20a/mm_gk20a.h" static int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index f97acd47..eb5f7749 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -1,7 +1,7 @@ /* * Virtualized GPU Memory Management * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -15,7 +15,6 @@ #include #include "vgpu/vgpu.h" -#include "gk20a/semaphore_gk20a.h" #include "gk20a/mm_gk20a.h" static int vgpu_init_mm_setup_sw(struct gk20a *g) -- cgit v1.2.2