From 1d9fba8804fb811771eac0f68f334f51f101ed01 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 7 Oct 2014 15:02:35 +0300 Subject: gpu: nvgpu: Per-alloc alignment Change-Id: I8b7e86afb68adf6dd33b05995d0978f42d57e7b7 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/554185 GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 22 +++++++--------- drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 11 ++++---- drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 3 +-- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 22 +++++++--------- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 44 ++++++++++++++++++------------- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 8 +++--- drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 3 +-- drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 3 +-- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 6 ++--- 9 files changed, 60 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index 0b5f9f6f..fee3e4ea 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -20,7 +20,7 @@ /* init allocator struct */ int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u32 start, u32 len, u32 align) + const char *name, u32 start, u32 len) { memset(allocator, 0, sizeof(struct gk20a_allocator)); @@ -28,16 +28,14 @@ int gk20a_allocator_init(struct gk20a_allocator *allocator, allocator->base = start; allocator->limit = start + len - 1; - allocator->align = align; allocator->bitmap = kzalloc(BITS_TO_LONGS(len) * sizeof(long), GFP_KERNEL); if (!allocator->bitmap) return -ENOMEM; - allocator_dbg(allocator, "%s : base %d, limit %d, align %d", - allocator->name, allocator->base, - allocator->limit, allocator->align); + allocator_dbg(allocator, "%s : base %d, limit %d", + allocator->name, allocator->base); init_rwsem(&allocator->rw_sema); @@ -65,7 +63,7 @@ void gk20a_allocator_destroy(struct gk20a_allocator *allocator) * contiguous address. */ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, - u32 *addr, u32 len) + u32 *addr, u32 len, u32 align) { unsigned long _addr; @@ -73,11 +71,11 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ *addr + len > allocator->limit || /* check addr range */ - *addr & (allocator->align - 1) || /* check addr alignment */ + *addr & (align - 1) || /* check addr alignment */ len == 0) /* check len */ return -EINVAL; - len = ALIGN(len, allocator->align); + len = ALIGN(len, align); if (!len) return -ENOMEM; @@ -87,7 +85,7 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, allocator->limit - allocator->base + 1, *addr ? (*addr - allocator->base) : 0, len, - allocator->align - 1); + align - 1); if ((_addr > allocator->limit - allocator->base + 1) || (*addr && *addr != (_addr + allocator->base))) { up_write(&allocator->rw_sema); @@ -106,16 +104,16 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, /* free all blocks between start and end */ int gk20a_allocator_block_free(struct gk20a_allocator *allocator, - u32 addr, u32 len) + u32 addr, u32 len, u32 align) { allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); if (addr + len > allocator->limit || /* check addr range */ addr < allocator->base || - addr & (allocator->align - 1)) /* check addr alignment */ + addr & (align - 1)) /* check addr alignment */ return -EINVAL; - len = ALIGN(len, allocator->align); + len = ALIGN(len, align); if (!len) return -EINVAL; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 154f953a..69a227bd 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -31,7 +31,6 @@ struct gk20a_allocator { u32 base; /* min value of this linear space */ u32 limit; /* max value = limit - 1 */ - u32 align; /* alignment size, power of 2 */ unsigned long *bitmap; /* bitmap */ @@ -58,21 +57,21 @@ struct gk20a_allocator { } constraint; int (*alloc)(struct gk20a_allocator *allocator, - u32 *addr, u32 len); + u32 *addr, u32 len, u32 align); int (*free)(struct gk20a_allocator *allocator, - u32 addr, u32 len); + u32 addr, u32 len, u32 align); }; int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u32 base, u32 size, u32 align); + const char *name, u32 base, u32 size); void gk20a_allocator_destroy(struct gk20a_allocator *allocator); int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, - u32 *addr, u32 len); + u32 *addr, u32 len, u32 align); int gk20a_allocator_block_free(struct gk20a_allocator *allocator, - u32 addr, u32 len); + u32 addr, u32 len, u32 align); #if defined(ALLOCATOR_DEBUG) diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 71d87b5c..4c6543bb 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -92,8 +92,7 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gk20a_allocator_init(&gr->comp_tags, "comptag", 1, /* start */ - max_comptag_lines - 1, /* length*/ - 1); /* align */ + max_comptag_lines - 1); /* length*/ gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_fbp / g->ltc_count; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index e4186c95..f588d112 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -129,7 +129,7 @@ static void gk20a_mm_delete_priv(void *_priv) BUG_ON(!priv->comptag_allocator); priv->comptag_allocator->free(priv->comptag_allocator, priv->comptags.offset, - priv->comptags.lines); + priv->comptags.lines, 1); } /* Free buffer states */ @@ -229,7 +229,7 @@ static int gk20a_alloc_comptags(struct device *dev, /* store the allocator so we can use it when we free the ctags */ priv->comptag_allocator = allocator; - err = allocator->alloc(allocator, &offset, lines); + err = allocator->alloc(allocator, &offset, lines, 1); if (!err) { priv->comptags.lines = lines; priv->comptags.offset = offset; @@ -837,7 +837,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, /* The vma allocator represents page accounting. */ num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); - err = vma->alloc(vma, &start_page_nr, num_pages); + err = vma->alloc(vma, &start_page_nr, num_pages, 1); if (err) { gk20a_err(dev_from_vm(vm), @@ -868,7 +868,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, start_page_nr = (u32)(offset >> page_shift); num_pages = (u32)((size + page_size - 1) >> page_shift); - err = vma->free(vma, start_page_nr, num_pages); + err = vma->free(vma, start_page_nr, num_pages, 1); if (err) { gk20a_err(dev_from_vm(vm), "not found: offset=0x%llx, sz=0x%llx", @@ -2290,9 +2290,8 @@ static int gk20a_init_vm(struct mm_gk20a *mm, vm->gmmu_page_sizes[gmmu_page_size_small]>>10); err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], alloc_name, - low_hole_pages, /*start*/ - num_pages - low_hole_pages,/* length*/ - 1); /* align */ + low_hole_pages, /*start*/ + num_pages - low_hole_pages);/* length*/ if (err) goto clean_up_map_pde; @@ -2305,8 +2304,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], alloc_name, num_pages, /* start */ - num_pages, /* length */ - 1); /* align */ + num_pages); /* length */ if (err) goto clean_up_small_allocator; } @@ -2435,7 +2433,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, ilog2(vm->gmmu_page_sizes[pgsz_idx])); vma = &vm->vma[pgsz_idx]; - err = vma->alloc(vma, &start_page_nr, args->pages); + err = vma->alloc(vma, &start_page_nr, args->pages, 1); if (err) { kfree(va_node); goto clean_up; @@ -2458,7 +2456,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, pgsz_idx, true); if (err) { mutex_unlock(&vm->update_gmmu_lock); - vma->free(vma, start_page_nr, args->pages); + vma->free(vma, start_page_nr, args->pages, 1); kfree(va_node); goto clean_up; } @@ -2506,7 +2504,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, ilog2(vm->gmmu_page_sizes[pgsz_idx])); vma = &vm->vma[pgsz_idx]; - err = vma->free(vma, start_page_nr, args->pages); + err = vma->free(vma, start_page_nr, args->pages, 1); if (err) goto clean_up; diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 47ee7a1b..5d973938 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -2603,7 +2603,8 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) if (!pmu->sample_buffer) err = pmu->dmem.alloc(&pmu->dmem, - &pmu->sample_buffer, 2 * sizeof(u16)); + &pmu->sample_buffer, 2 * sizeof(u16), + PMU_DMEM_ALLOC_ALIGNMENT); if (err) { gk20a_err(dev_from_gk20a(g), "failed to allocate perfmon sample buffer"); @@ -2707,8 +2708,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, if (!pmu->dmem.alloc) gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", pv->get_pmu_init_msg_pmu_sw_mg_off(init), - pv->get_pmu_init_msg_pmu_sw_mg_size(init), - PMU_DMEM_ALLOC_ALIGNMENT); + pv->get_pmu_init_msg_pmu_sw_mg_size(init)); pmu->pmu_ready = true; pmu->pmu_state = PMU_STATE_INIT_RECEIVED; @@ -2845,17 +2845,19 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_in_a_ptr(seq)), - pv->pmu_allocation_get_dmem_size(pmu, - pv->get_pmu_seq_in_a_ptr(seq))); + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_in_a_ptr(seq)), + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_in_a_ptr(seq)), + PMU_DMEM_ALLOC_ALIGNMENT); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_out_a_ptr(seq)), - pv->pmu_allocation_get_dmem_size(pmu, - pv->get_pmu_seq_out_a_ptr(seq))); + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_out_a_ptr(seq)), + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq)), + PMU_DMEM_ALLOC_ALIGNMENT); if (seq->callback) seq->callback(g, msg, seq->cb_params, seq->desc, ret); @@ -3493,8 +3495,9 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, (u16)max(payload->in.size, payload->out.size)); err = pmu->dmem.alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset_addr(pmu, in), - pv->pmu_allocation_get_dmem_size(pmu, in)); + pv->pmu_allocation_get_dmem_offset_addr(pmu, in), + pv->pmu_allocation_get_dmem_size(pmu, in), + PMU_DMEM_ALLOC_ALIGNMENT); if (err) goto clean_up; @@ -3517,8 +3520,9 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, if (payload->out.buf != payload->in.buf) { err = pmu->dmem.alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset_addr(pmu, out), - pv->pmu_allocation_get_dmem_size(pmu, out)); + pv->pmu_allocation_get_dmem_offset_addr(pmu, out), + pv->pmu_allocation_get_dmem_size(pmu, out), + PMU_DMEM_ALLOC_ALIGNMENT); if (err) goto clean_up; } else { @@ -3548,12 +3552,14 @@ clean_up: gk20a_dbg_fn("fail"); if (in) pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, in), - pv->pmu_allocation_get_dmem_size(pmu, in)); + pv->pmu_allocation_get_dmem_offset(pmu, in), + pv->pmu_allocation_get_dmem_size(pmu, in), + PMU_DMEM_ALLOC_ALIGNMENT); if (out) pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, out), - pv->pmu_allocation_get_dmem_size(pmu, out)); + pv->pmu_allocation_get_dmem_offset(pmu, out), + pv->pmu_allocation_get_dmem_size(pmu, out), + PMU_DMEM_ALLOC_ALIGNMENT); pmu_seq_release(pmu, seq); return err; diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 493c7b63..04f61c58 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c @@ -45,7 +45,7 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, goto clean_up; if (gk20a_allocator_init(&p->alloc, unique_name, 0, - p->size, SEMAPHORE_SIZE)) + p->size)) goto clean_up; gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, @@ -163,7 +163,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) if (!s) return NULL; - if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE)) { + if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE, + SEMAPHORE_SIZE)) { gk20a_err(pool->dev, "failed to allocate semaphore"); kfree(s); return NULL; @@ -185,7 +186,8 @@ static void gk20a_semaphore_free(struct kref *ref) struct gk20a_semaphore *s = container_of(ref, struct gk20a_semaphore, ref); - s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE); + s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE, + SEMAPHORE_SIZE); gk20a_semaphore_pool_put(s->pool); kfree(s); } diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 10e3ba7f..fe2e06d5 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -92,8 +92,7 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gk20a_allocator_init(&gr->comp_tags, "comptag", 1, /* start */ - max_comptag_lines - 1, /* length*/ - 1); /* align */ + max_comptag_lines - 1); /* length*/ gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_ltc; diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c index ddff23b7..1beac216 100644 --- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c @@ -43,8 +43,7 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gk20a_allocator_init(&gr->comp_tags, "comptag", 1, /* start */ - max_comptag_lines - 1, /* length*/ - 1); /* align */ + max_comptag_lines - 1); /* length*/ return 0; } diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index eb67c01f..7f1a5856 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -297,8 +297,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, low_hole_pages, /* start */ - num_pages - low_hole_pages, /* length */ - 1); /* align */ + num_pages - low_hole_pages); /* length */ snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_big]>>10); @@ -307,8 +306,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, ilog2(gmmu_page_sizes[gmmu_page_size_big])); gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, num_pages, /* start */ - num_pages, /* length */ - 1); /* align */ + num_pages); /* length */ vm->mapped_buffers = RB_ROOT; -- cgit v1.2.2