From 84dadb1a9ae2ab0473976ebf5ece1cb0d1e60205 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 12 Apr 2017 11:27:48 -0700 Subject: gpu: nvgpu: Move semaphore impl to nvgpu_mem Use struct nvgpu_mem for DMA allocations (and the corresponding nvgpu_dma_alloc_sys()) instead of custom rolled code. This migrates away from using linux scatter gather tables directly. Instead this is hidden in the nvgpu_mem struct. With this change the semaphore.c code no longer has any direct Linux dependencies. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I92167c98aac9b413ae87496744dcee051cd60207 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1464081 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: svccoveritychecker --- drivers/gpu/nvgpu/common/semaphore.c | 201 ++++++++++++--------------- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 72 +++++----- 4 files changed, 129 insertions(+), 152 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index bf7b6348..fa86985b 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -13,11 +13,6 @@ * more details. */ -#define pr_fmt(fmt) "gpu_sema: " fmt - -#include -#include - #include #include #include @@ -26,17 +21,19 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" +#define pool_to_gk20a(p) ((p)->sema_sea->gk20a) + #define __lock_sema_sea(s) \ do { \ - gpu_sema_verbose_dbg("Acquiring sema lock..."); \ + gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); \ nvgpu_mutex_acquire(&s->sea_lock); \ - gpu_sema_verbose_dbg("Sema lock aquried!"); \ + gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); \ } while (0) #define __unlock_sema_sea(s) \ do { \ nvgpu_mutex_release(&s->sea_lock); \ - gpu_sema_verbose_dbg("Released sema lock"); \ + gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); \ } while (0) /* @@ -54,13 +51,12 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea) __lock_sema_sea(sea); - ret = nvgpu_dma_alloc_flags_sys(gk20a, NVGPU_DMA_NO_KERNEL_MAPPING, - PAGE_SIZE * SEMAPHORE_POOL_COUNT, - &sea->sea_mem); + ret = nvgpu_dma_alloc_sys(gk20a, + PAGE_SIZE * SEMAPHORE_POOL_COUNT, + &sea->sea_mem); if (ret) goto out; - sea->ro_sg_table = sea->sea_mem.priv.sgt; sea->size = SEMAPHORE_POOL_COUNT; sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; @@ -102,7 +98,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g) if (__nvgpu_semaphore_sea_grow(g->sema_sea)) goto cleanup_destroy; - gpu_sema_dbg("Created semaphore sea!"); + gpu_sema_dbg(g, "Created semaphore sea!"); return g->sema_sea; cleanup_destroy: @@ -110,7 +106,7 @@ cleanup_destroy: cleanup_free: nvgpu_kfree(g, g->sema_sea); g->sema_sea = NULL; - gpu_sema_dbg("Failed to creat semaphore sea!"); + gpu_sema_dbg(g, "Failed to creat semaphore sea!"); return NULL; } @@ -146,7 +142,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( if (err) goto fail; - ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); + ret = __semaphore_bitmap_alloc(sea->pools_alloced, + SEMAPHORE_POOL_COUNT); if (ret < 0) { err = ret; goto fail_alloc; @@ -154,8 +151,6 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( page_idx = (unsigned long)ret; - p->page = sea->sea_mem.priv.pages[page_idx]; - p->ro_sg_table = sea->ro_sg_table; p->page_idx = page_idx; p->sema_sea = sea; nvgpu_init_list_node(&p->hw_semas); @@ -166,7 +161,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); __unlock_sema_sea(sea); - gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); + gpu_sema_dbg(sea->gk20a, + "Allocated semaphore pool: page-idx=%d", p->page_idx); return p; @@ -175,7 +171,7 @@ fail_alloc: fail: __unlock_sema_sea(sea); nvgpu_kfree(sea->gk20a, p); - gpu_sema_dbg("Failed to allocate semaphore pool!"); + gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!"); return ERR_PTR(err); } @@ -186,91 +182,82 @@ fail: int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, struct vm_gk20a *vm) { - int ents, err = 0; + int err = 0; u64 addr; - gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); - - p->cpu_va = vmap(&p->page, 1, 0, - pgprot_writecombine(PAGE_KERNEL)); - - gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); + if (p->mapped) + return -EBUSY; - /* First do the RW mapping. */ - p->rw_sg_table = nvgpu_kzalloc(p->sema_sea->gk20a, - sizeof(*p->rw_sg_table)); - if (!p->rw_sg_table) - return -ENOMEM; + gpu_sema_dbg(pool_to_gk20a(p), + "Mapping semaphore pool! (idx=%d)", p->page_idx); - err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, - PAGE_SIZE, GFP_KERNEL); - if (err) { - err = -ENOMEM; - goto fail; - } + /* + * Take the sea lock so that we don't race with a possible change to the + * nvgpu_mem in the sema sea. + */ + __lock_sema_sea(p->sema_sea); - /* Add IOMMU mapping... */ - ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); - if (ents != 1) { + addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt, + p->sema_sea->gpu_va, + p->sema_sea->map_size, + 0, gk20a_mem_flag_read_only, 0, + p->sema_sea->sea_mem.aperture); + if (!addr) { err = -ENOMEM; - goto fail_free_sgt; + goto fail_unlock; } - gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, - &sg_dma_address(p->rw_sg_table->sgl)); - - /* Map into the GPU... Doesn't need to be fixed. */ - p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, - 0, gk20a_mem_flag_none, false, - APERTURE_SYSMEM); - if (!p->gpu_va) { - err = -ENOMEM; - goto fail_unmap_sgt; - } + p->gpu_va_ro = addr; + p->mapped = 1; - gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, - p->gpu_va); + gpu_sema_dbg(pool_to_gk20a(p), + " %d: GPU read-only VA = 0x%llx", + p->page_idx, p->gpu_va_ro); /* - * And now the global mapping. Take the sea lock so that we don't race - * with a concurrent remap. + * Now the RW mapping. This is a bit more complicated. We make a + * nvgpu_mem describing a page of the bigger RO space and then map + * that. Unlike above this does not need to be a fixed address. */ - __lock_sema_sea(p->sema_sea); + err = nvgpu_mem_create_from_mem(vm->mm->g, + &p->rw_mem, &p->sema_sea->sea_mem, + p->page_idx, 1); + if (err) + goto fail_unmap; + + addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0, + gk20a_mem_flag_none, 0, + p->rw_mem.aperture); - BUG_ON(p->mapped); - addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, - p->sema_sea->gpu_va, p->sema_sea->map_size, - 0, - gk20a_mem_flag_read_only, - false, - APERTURE_SYSMEM); if (!addr) { err = -ENOMEM; - BUG(); - goto fail_unlock; + goto fail_free_submem; } - p->gpu_va_ro = addr; - p->mapped = 1; - gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, - p->gpu_va_ro); + p->gpu_va = addr; __unlock_sema_sea(p->sema_sea); + gpu_sema_dbg(pool_to_gk20a(p), + " %d: GPU read-write VA = 0x%llx", + p->page_idx, p->gpu_va); + gpu_sema_dbg(pool_to_gk20a(p), + " %d: CPU VA = 0x%p", + p->page_idx, p->rw_mem.cpu_va); + return 0; +fail_free_submem: + nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); +fail_unmap: + gk20a_gmmu_unmap(vm, + p->sema_sea->sea_mem.gpu_va, + p->sema_sea->map_size, + gk20a_mem_flag_none); + gpu_sema_dbg(pool_to_gk20a(p), + " %d: Failed to map semaphore pool!", p->page_idx); fail_unlock: __unlock_sema_sea(p->sema_sea); -fail_unmap_sgt: - dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); -fail_free_sgt: - sg_free_table(p->rw_sg_table); -fail: - nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); - p->rw_sg_table = NULL; - gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); return err; } @@ -280,41 +267,30 @@ fail: void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, struct vm_gk20a *vm) { - struct nvgpu_semaphore_int *hw_sema; - - kunmap(p->cpu_va); - - /* First the global RO mapping... */ __lock_sema_sea(p->sema_sea); - gk20a_gmmu_unmap(vm, p->gpu_va_ro, - p->sema_sea->map_size, gk20a_mem_flag_none); - p->ro_sg_table = NULL; - __unlock_sema_sea(p->sema_sea); - /* And now the private RW mapping. */ - gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); - p->gpu_va = 0; - - dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, - DMA_BIDIRECTIONAL); + gk20a_gmmu_unmap(vm, + p->sema_sea->sea_mem.gpu_va, + p->sema_sea->sea_mem.size, + gk20a_mem_flag_none); + gk20a_gmmu_unmap(vm, + p->rw_mem.gpu_va, + p->rw_mem.size, + gk20a_mem_flag_none); + nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); - sg_free_table(p->rw_sg_table); - nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); - p->rw_sg_table = NULL; + p->gpu_va = 0; + p->gpu_va_ro = 0; + p->mapped = 0; - nvgpu_list_for_each_entry(hw_sema, &p->hw_semas, - nvgpu_semaphore_int, hw_sema_list) - /* - * Make sure the mem addresses are all NULL so if this gets - * reused we will fault. - */ - hw_sema->value = NULL; + __unlock_sema_sea(p->sema_sea); - gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); + gpu_sema_dbg(pool_to_gk20a(p), + "Unmapped semaphore pool! (idx=%d)", p->page_idx); } /* - * Completely free a sempahore_pool. You should make sure this pool is not + * Completely free a semaphore_pool. You should make sure this pool is not * mapped otherwise there's going to be a memory leak. */ static void nvgpu_semaphore_pool_free(struct kref *ref) @@ -324,7 +300,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) struct nvgpu_semaphore_sea *s = p->sema_sea; struct nvgpu_semaphore_int *hw_sema, *tmp; - WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); + /* Freeing a mapped pool is a bad idea. */ + WARN_ON(p->mapped || p->gpu_va || p->gpu_va_ro); __lock_sema_sea(s); nvgpu_list_del(&p->pool_list_entry); @@ -338,7 +315,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) nvgpu_mutex_destroy(&p->pool_lock); - gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); + gpu_sema_dbg(pool_to_gk20a(p), + "Freed semaphore pool! (idx=%d)", p->page_idx); nvgpu_kfree(p->sema_sea->gk20a, p); } @@ -395,9 +373,8 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch) hw_sema->idx = hw_sema_idx; hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; atomic_set(&hw_sema->next_value, 0); - hw_sema->value = p->cpu_va + hw_sema->offset; - writel(0, hw_sema->value); nvgpu_init_list_node(&hw_sema->hw_sema_list); + nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0); nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); @@ -464,7 +441,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch) */ nvgpu_semaphore_pool_get(s->hw_sema->p); - gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); + gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->hw_chid); return s; } diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index b509c5c4..3fb35e94 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -576,16 +576,16 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, } if (acquire) - gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d" + gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", ch, nvgpu_semaphore_get_value(s), s->hw_sema->ch->hw_chid, va, cmd->gva, cmd->mem->gpu_va, ob); else - gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx " + gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " "cmd_mem=0x%llx b=0x%llx off=%u", ch, nvgpu_semaphore_get_value(s), - readl(s->hw_sema->value), va, cmd->gva, + nvgpu_semaphore_read(s), va, cmd->gva, cmd->mem->gpu_va, ob); } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 314d4551..e89e9f68 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -3833,7 +3833,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, if (hw_sema) gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " "next_val: 0x%08x addr: 0x%010llx\n", - readl(hw_sema->value), + __nvgpu_semaphore_read(hw_sema), atomic_read(&hw_sema->next_value), nvgpu_hw_sema_addr(hw_sema)); diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index f197a918..45a3af5a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -14,23 +14,22 @@ #ifndef SEMAPHORE_GK20A_H #define SEMAPHORE_GK20A_H -#include - #include -#include +#include #include #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" #include "gk20a/channel_gk20a.h" -#define gpu_sema_dbg(fmt, args...) \ - gk20a_dbg(gpu_dbg_sema, fmt, ##args) -#define gpu_sema_verbose_dbg(fmt, args...) \ - gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) +#define gpu_sema_dbg(g, fmt, args...) \ + nvgpu_log(g, gpu_dbg_sema, fmt, ##args) +#define gpu_sema_verbose_dbg(g, fmt, args...) \ + nvgpu_log(g, gpu_dbg_sema_v, fmt, ##args) /* * Max number of channels that can be used is 512. This of course needs to be @@ -50,7 +49,6 @@ struct nvgpu_semaphore_int { int idx; /* Semaphore index. */ u32 offset; /* Offset into the pool. */ atomic_t next_value; /* Next available value. */ - u32 *value; /* Current value (access w/ readl()). */ u32 nr_incrs; /* Number of increments programmed. */ struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ struct channel_gk20a *ch; /* Channel that owns this sema. */ @@ -82,9 +80,7 @@ struct nvgpu_semaphore { * A semaphore pool. Each address space will own exactly one of these. */ struct nvgpu_semaphore_pool { - struct page *page; /* This pool's page of memory */ struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */ - void *cpu_va; /* CPU access to the pool. */ u64 gpu_va; /* GPU access to the pool. */ u64 gpu_va_ro; /* GPU access to the pool. */ int page_idx; /* Index into sea bitmap. */ @@ -98,15 +94,10 @@ struct nvgpu_semaphore_pool { /* * This is the address spaces's personal RW table. Other channels will - * ultimately map this page as RO. - */ - struct sg_table *rw_sg_table; - - /* - * This is to keep track of whether the pool has had its sg_table - * updated during sea resizing. + * ultimately map this page as RO. This is a sub-nvgpu_mem from the + * sea's mem. */ - struct sg_table *ro_sg_table; + struct nvgpu_mem rw_mem; int mapped; @@ -148,11 +139,12 @@ struct nvgpu_semaphore_sea { */ int page_count; /* Pages allocated to pools. */ - struct sg_table *ro_sg_table; /* - struct page *pages[SEMAPHORE_POOL_COUNT]; - */ - + * The read-only memory for the entire semaphore sea. Each semaphore + * pool needs a sub-nvgpu_mem that will be mapped as RW in its address + * space. This sea_mem cannot be freed until all semaphore_pools have + * been freed. + */ struct nvgpu_mem sea_mem; /* @@ -224,12 +216,26 @@ static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) hw_sema->offset; } +static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) +{ + return nvgpu_mem_rd(hw_sema->ch->g, + &hw_sema->p->rw_mem, hw_sema->offset); +} + +/* + * Read the underlying value from a semaphore. + */ +static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) +{ + return __nvgpu_semaphore_read(s->hw_sema); +} + /* * TODO: handle wrap around... Hmm, how to do this? */ static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) { - u32 sema_val = readl(s->hw_sema->value); + u32 sema_val = nvgpu_semaphore_read(s); /* * If the underlying semaphore value is greater than or equal to @@ -244,14 +250,6 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) return !nvgpu_semaphore_is_released(s); } -/* - * Read the underlying value from a semaphore. - */ -static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) -{ - return readl(s->hw_sema->value); -} - static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) { return (u32)atomic_read(&s->value); @@ -269,6 +267,7 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, bool force) { + struct nvgpu_semaphore_int *hw_sema = s->hw_sema; u32 current_val; u32 val = nvgpu_semaphore_get_value(s); int attempts = 0; @@ -282,7 +281,7 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { if (force) break; - msleep(100); + nvgpu_msleep(100); attempts += 1; if (attempts > 100) { WARN(1, "Stall on sema release!"); @@ -297,10 +296,10 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, if (current_val >= val) return; - writel(val, s->hw_sema->value); + nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); - gpu_sema_verbose_dbg("(c=%d) WRITE %u", - s->hw_sema->ch->hw_chid, val); + gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, + "(c=%d) WRITE %u", hw_sema->ch->hw_chid, val); } static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) @@ -324,7 +323,8 @@ static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); s->incremented = 1; - gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", + gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, + "INCR sema for c=%d (%u)", s->hw_sema->ch->hw_chid, nvgpu_semaphore_next_value(s)); } -- cgit v1.2.2