diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-18 07:19:53 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-18 14:55:11 -0400 |
commit | abec0ddc19b89822138f3c8970516eed4d0e17d9 (patch) | |
tree | f788095b1bc9118c9afac81c77d27bd0044ac5ad /drivers/gpu/nvgpu | |
parent | dc45473eeb39d93100290a0f09bd787b3a5ce3f2 (diff) |
gpu: nvgpu: use mem_desc for semaphores
Replace manual buffer allocation and cpu_va pointer accesses with
gk20a_gmmu_{alloc,free}() and gk20a_mem_{rd,wr}() using a struct
mem_desc in gk20a_semaphore_pool, for buffer aperture flexibility.
JIRA DNVGPU-23
Change-Id: I394c38f407a9da02480bfd35062a892eec242ea3
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1146684
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 44 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 20 |
4 files changed, 33 insertions, 46 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 0a769b56..011c980e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -702,8 +702,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
702 | asid = c->vm->as_share->id; | 702 | asid = c->vm->as_share->id; |
703 | 703 | ||
704 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); | 704 | sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); |
705 | sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g), | 705 | sema->pool = gk20a_semaphore_pool_alloc(c->g, pool_name, 1024); |
706 | pool_name, 1024); | ||
707 | if (!sema->pool) | 706 | if (!sema->pool) |
708 | goto clean_up; | 707 | goto clean_up; |
709 | 708 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 1bda5902..fe38ae57 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -154,17 +154,9 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
154 | struct sync_fence *sync_fence = NULL; | 154 | struct sync_fence *sync_fence = NULL; |
155 | 155 | ||
156 | #ifdef CONFIG_SYNC | 156 | #ifdef CONFIG_SYNC |
157 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
158 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, | 157 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, |
159 | dependency, "f-gk20a-0x%04llx", | 158 | dependency, "f-gk20a-0x%04x", |
160 | ((uintptr_t)(void *)semaphore->value) & | 159 | semaphore->offset & 0xffff); |
161 | 0xffff); | ||
162 | #else | ||
163 | sync_fence = gk20a_sync_fence_create(timeline, semaphore, | ||
164 | dependency, "f-gk20a-0x%04llx", | ||
165 | ((u64)(void *)semaphore->value) & | ||
166 | 0xffff); | ||
167 | #endif | ||
168 | if (!sync_fence) | 160 | if (!sync_fence) |
169 | return NULL; | 161 | return NULL; |
170 | #endif | 162 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index cf855463..3b17bfcb 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | static const int SEMAPHORE_SIZE = 16; | 24 | static const int SEMAPHORE_SIZE = 16; |
25 | 25 | ||
26 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, | 26 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g, |
27 | const char *unique_name, size_t capacity) | 27 | const char *unique_name, size_t capacity) |
28 | { | 28 | { |
29 | struct gk20a_semaphore_pool *p; | 29 | struct gk20a_semaphore_pool *p; |
@@ -34,30 +34,27 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, | |||
34 | kref_init(&p->ref); | 34 | kref_init(&p->ref); |
35 | INIT_LIST_HEAD(&p->maps); | 35 | INIT_LIST_HEAD(&p->maps); |
36 | mutex_init(&p->maps_mutex); | 36 | mutex_init(&p->maps_mutex); |
37 | p->dev = d; | 37 | p->g = g; |
38 | 38 | ||
39 | /* Alloc one 4k page of semaphore per channel. */ | 39 | /* Alloc one 4k page of semaphore per channel. */ |
40 | p->size = roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE); | 40 | if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE), |
41 | p->cpu_va = dma_alloc_coherent(d, p->size, &p->iova, GFP_KERNEL); | 41 | &p->mem)) |
42 | if (!p->cpu_va) | ||
43 | goto clean_up; | ||
44 | if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) | ||
45 | goto clean_up; | 42 | goto clean_up; |
46 | 43 | ||
47 | /* Sacrifice one semaphore in the name of returning error codes. */ | 44 | /* Sacrifice one semaphore in the name of returning error codes. */ |
48 | if (gk20a_allocator_init(&p->alloc, unique_name, | 45 | if (gk20a_allocator_init(&p->alloc, unique_name, |
49 | SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE, | 46 | SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE, |
50 | SEMAPHORE_SIZE)) | 47 | SEMAPHORE_SIZE)) |
51 | goto clean_up; | 48 | goto clean_up; |
52 | 49 | ||
53 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, | 50 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va, |
54 | (u64)sg_dma_address(p->sgt->sgl), (u64)sg_phys(p->sgt->sgl)); | 51 | (u64)sg_dma_address(p->mem.sgt->sgl), |
52 | (u64)sg_phys(p->mem.sgt->sgl)); | ||
55 | return p; | 53 | return p; |
54 | |||
56 | clean_up: | 55 | clean_up: |
57 | if (p->cpu_va) | 56 | if (p->mem.size) |
58 | dma_free_coherent(d, p->size, p->cpu_va, p->iova); | 57 | gk20a_gmmu_free(p->g, &p->mem); |
59 | if (p->sgt) | ||
60 | gk20a_free_sgtable(&p->sgt); | ||
61 | kfree(p); | 58 | kfree(p); |
62 | return NULL; | 59 | return NULL; |
63 | } | 60 | } |
@@ -69,8 +66,7 @@ static void gk20a_semaphore_pool_free(struct kref *ref) | |||
69 | mutex_lock(&p->maps_mutex); | 66 | mutex_lock(&p->maps_mutex); |
70 | WARN_ON(!list_empty(&p->maps)); | 67 | WARN_ON(!list_empty(&p->maps)); |
71 | mutex_unlock(&p->maps_mutex); | 68 | mutex_unlock(&p->maps_mutex); |
72 | gk20a_free_sgtable(&p->sgt); | 69 | gk20a_gmmu_free(p->g, &p->mem); |
73 | dma_free_coherent(p->dev, p->size, p->cpu_va, p->iova); | ||
74 | gk20a_allocator_destroy(&p->alloc); | 70 | gk20a_allocator_destroy(&p->alloc); |
75 | kfree(p); | 71 | kfree(p); |
76 | } | 72 | } |
@@ -110,7 +106,7 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | |||
110 | return -ENOMEM; | 106 | return -ENOMEM; |
111 | map->vm = vm; | 107 | map->vm = vm; |
112 | map->rw_flag = rw_flag; | 108 | map->rw_flag = rw_flag; |
113 | map->gpu_va = gk20a_gmmu_map(vm, &p->sgt, p->size, | 109 | map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size, |
114 | 0/*uncached*/, rw_flag, | 110 | 0/*uncached*/, rw_flag, |
115 | false); | 111 | false); |
116 | if (!map->gpu_va) { | 112 | if (!map->gpu_va) { |
@@ -135,7 +131,7 @@ void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | |||
135 | mutex_lock(&p->maps_mutex); | 131 | mutex_lock(&p->maps_mutex); |
136 | map = gk20a_semaphore_pool_find_map_locked(p, vm); | 132 | map = gk20a_semaphore_pool_find_map_locked(p, vm); |
137 | if (map) { | 133 | if (map) { |
138 | gk20a_gmmu_unmap(vm, map->gpu_va, p->size, map->rw_flag); | 134 | gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag); |
139 | gk20a_vm_put(vm); | 135 | gk20a_vm_put(vm); |
140 | list_del(&map->list); | 136 | list_del(&map->list); |
141 | kfree(map); | 137 | kfree(map); |
@@ -168,7 +164,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) | |||
168 | 164 | ||
169 | s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); | 165 | s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); |
170 | if (!s->offset) { | 166 | if (!s->offset) { |
171 | gk20a_err(pool->dev, "failed to allocate semaphore"); | 167 | gk20a_err(dev_from_gk20a(pool->g), |
168 | "failed to allocate semaphore"); | ||
172 | kfree(s); | 169 | kfree(s); |
173 | return NULL; | 170 | return NULL; |
174 | } | 171 | } |
@@ -177,10 +174,11 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) | |||
177 | s->pool = pool; | 174 | s->pool = pool; |
178 | 175 | ||
179 | kref_init(&s->ref); | 176 | kref_init(&s->ref); |
180 | s->value = (volatile u32 *)((uintptr_t)pool->cpu_va + s->offset); | 177 | /* Initially acquired. */ |
181 | *s->value = 0; /* Initially acquired. */ | 178 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0); |
182 | gk20a_dbg_info("created semaphore offset=%d, value_cpu=%p, value=%d", | 179 | gk20a_dbg_info("created semaphore offset=%d, value=%d", |
183 | s->offset, s->value, *s->value); | 180 | s->offset, |
181 | gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset)); | ||
184 | return s; | 182 | return s; |
185 | } | 183 | } |
186 | 184 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index 6ffe1fd2..1f12e262 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | |||
@@ -20,11 +20,8 @@ | |||
20 | 20 | ||
21 | /* A memory pool for holding semaphores. */ | 21 | /* A memory pool for holding semaphores. */ |
22 | struct gk20a_semaphore_pool { | 22 | struct gk20a_semaphore_pool { |
23 | void *cpu_va; | 23 | struct mem_desc mem; |
24 | dma_addr_t iova; | 24 | struct gk20a *g; |
25 | size_t size; | ||
26 | struct device *dev; | ||
27 | struct sg_table *sgt; | ||
28 | struct list_head maps; | 25 | struct list_head maps; |
29 | struct mutex maps_mutex; | 26 | struct mutex maps_mutex; |
30 | struct kref ref; | 27 | struct kref ref; |
@@ -48,16 +45,17 @@ struct gk20a_semaphore_pool_map { | |||
48 | /* A semaphore that lives inside a semaphore pool. */ | 45 | /* A semaphore that lives inside a semaphore pool. */ |
49 | struct gk20a_semaphore { | 46 | struct gk20a_semaphore { |
50 | struct gk20a_semaphore_pool *pool; | 47 | struct gk20a_semaphore_pool *pool; |
48 | /* | ||
49 | * value exists within the pool's memory at the specified offset. | ||
50 | * 0=acquired, 1=released. | ||
51 | */ | ||
51 | u32 offset; /* byte offset within pool */ | 52 | u32 offset; /* byte offset within pool */ |
52 | struct kref ref; | 53 | struct kref ref; |
53 | /* value is a pointer within the pool's coherent cpu_va. | ||
54 | * It is shared between CPU and GPU, hence volatile. */ | ||
55 | volatile u32 *value; /* 0=acquired, 1=released */ | ||
56 | }; | 54 | }; |
57 | 55 | ||
58 | /* Create a semaphore pool that can hold at most 'capacity' semaphores. */ | 56 | /* Create a semaphore pool that can hold at most 'capacity' semaphores. */ |
59 | struct gk20a_semaphore_pool * | 57 | struct gk20a_semaphore_pool * |
60 | gk20a_semaphore_pool_alloc(struct device *, const char *unique_name, | 58 | gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name, |
61 | size_t capacity); | 59 | size_t capacity); |
62 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *); | 60 | void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *); |
63 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *, | 61 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *, |
@@ -83,7 +81,7 @@ static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s, | |||
83 | 81 | ||
84 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) | 82 | static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) |
85 | { | 83 | { |
86 | u32 v = *s->value; | 84 | u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset); |
87 | 85 | ||
88 | /* When often block on value reaching a certain threshold. We must make | 86 | /* When often block on value reaching a certain threshold. We must make |
89 | * sure that if we get unblocked, we haven't read anything too early. */ | 87 | * sure that if we get unblocked, we haven't read anything too early. */ |
@@ -94,6 +92,6 @@ static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) | |||
94 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) | 92 | static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) |
95 | { | 93 | { |
96 | smp_wmb(); | 94 | smp_wmb(); |
97 | *s->value = 1; | 95 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1); |
98 | } | 96 | } |
99 | #endif | 97 | #endif |