summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/semaphore.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-04-12 14:27:48 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-25 17:26:00 -0400
commit84dadb1a9ae2ab0473976ebf5ece1cb0d1e60205 (patch)
tree8ec8d404c319082dc472eae1ca1b56f2b7e7c197 /drivers/gpu/nvgpu/common/semaphore.c
parentaff9d46c00a2a82c93d6cc43d790584e7e474d0e (diff)
gpu: nvgpu: Move semaphore impl to nvgpu_mem
Use struct nvgpu_mem for DMA allocations (and the corresponding nvgpu_dma_alloc_sys()) instead of custom rolled code. This migrates away from using linux scatter gather tables directly. Instead this is hidden in the nvgpu_mem struct. With this change the semaphore.c code no longer has any direct Linux dependencies. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I92167c98aac9b413ae87496744dcee051cd60207 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1464081 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/semaphore.c')
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c201
1 files changed, 89 insertions, 112 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index bf7b6348..fa86985b 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -13,11 +13,6 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#define pr_fmt(fmt) "gpu_sema: " fmt
17
18#include <linux/dma-mapping.h>
19#include <linux/highmem.h>
20
21#include <nvgpu/dma.h> 16#include <nvgpu/dma.h>
22#include <nvgpu/semaphore.h> 17#include <nvgpu/semaphore.h>
23#include <nvgpu/kmem.h> 18#include <nvgpu/kmem.h>
@@ -26,17 +21,19 @@
26#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 22#include "gk20a/mm_gk20a.h"
28 23
24#define pool_to_gk20a(p) ((p)->sema_sea->gk20a)
25
29#define __lock_sema_sea(s) \ 26#define __lock_sema_sea(s) \
30 do { \ 27 do { \
31 gpu_sema_verbose_dbg("Acquiring sema lock..."); \ 28 gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); \
32 nvgpu_mutex_acquire(&s->sea_lock); \ 29 nvgpu_mutex_acquire(&s->sea_lock); \
33 gpu_sema_verbose_dbg("Sema lock aquried!"); \ 30 gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); \
34 } while (0) 31 } while (0)
35 32
36#define __unlock_sema_sea(s) \ 33#define __unlock_sema_sea(s) \
37 do { \ 34 do { \
38 nvgpu_mutex_release(&s->sea_lock); \ 35 nvgpu_mutex_release(&s->sea_lock); \
39 gpu_sema_verbose_dbg("Released sema lock"); \ 36 gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); \
40 } while (0) 37 } while (0)
41 38
42/* 39/*
@@ -54,13 +51,12 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
54 51
55 __lock_sema_sea(sea); 52 __lock_sema_sea(sea);
56 53
57 ret = nvgpu_dma_alloc_flags_sys(gk20a, NVGPU_DMA_NO_KERNEL_MAPPING, 54 ret = nvgpu_dma_alloc_sys(gk20a,
58 PAGE_SIZE * SEMAPHORE_POOL_COUNT, 55 PAGE_SIZE * SEMAPHORE_POOL_COUNT,
59 &sea->sea_mem); 56 &sea->sea_mem);
60 if (ret) 57 if (ret)
61 goto out; 58 goto out;
62 59
63 sea->ro_sg_table = sea->sea_mem.priv.sgt;
64 sea->size = SEMAPHORE_POOL_COUNT; 60 sea->size = SEMAPHORE_POOL_COUNT;
65 sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; 61 sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
66 62
@@ -102,7 +98,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
102 if (__nvgpu_semaphore_sea_grow(g->sema_sea)) 98 if (__nvgpu_semaphore_sea_grow(g->sema_sea))
103 goto cleanup_destroy; 99 goto cleanup_destroy;
104 100
105 gpu_sema_dbg("Created semaphore sea!"); 101 gpu_sema_dbg(g, "Created semaphore sea!");
106 return g->sema_sea; 102 return g->sema_sea;
107 103
108cleanup_destroy: 104cleanup_destroy:
@@ -110,7 +106,7 @@ cleanup_destroy:
110cleanup_free: 106cleanup_free:
111 nvgpu_kfree(g, g->sema_sea); 107 nvgpu_kfree(g, g->sema_sea);
112 g->sema_sea = NULL; 108 g->sema_sea = NULL;
113 gpu_sema_dbg("Failed to creat semaphore sea!"); 109 gpu_sema_dbg(g, "Failed to creat semaphore sea!");
114 return NULL; 110 return NULL;
115} 111}
116 112
@@ -146,7 +142,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
146 if (err) 142 if (err)
147 goto fail; 143 goto fail;
148 144
149 ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); 145 ret = __semaphore_bitmap_alloc(sea->pools_alloced,
146 SEMAPHORE_POOL_COUNT);
150 if (ret < 0) { 147 if (ret < 0) {
151 err = ret; 148 err = ret;
152 goto fail_alloc; 149 goto fail_alloc;
@@ -154,8 +151,6 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
154 151
155 page_idx = (unsigned long)ret; 152 page_idx = (unsigned long)ret;
156 153
157 p->page = sea->sea_mem.priv.pages[page_idx];
158 p->ro_sg_table = sea->ro_sg_table;
159 p->page_idx = page_idx; 154 p->page_idx = page_idx;
160 p->sema_sea = sea; 155 p->sema_sea = sea;
161 nvgpu_init_list_node(&p->hw_semas); 156 nvgpu_init_list_node(&p->hw_semas);
@@ -166,7 +161,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
166 nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); 161 nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
167 __unlock_sema_sea(sea); 162 __unlock_sema_sea(sea);
168 163
169 gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); 164 gpu_sema_dbg(sea->gk20a,
165 "Allocated semaphore pool: page-idx=%d", p->page_idx);
170 166
171 return p; 167 return p;
172 168
@@ -175,7 +171,7 @@ fail_alloc:
175fail: 171fail:
176 __unlock_sema_sea(sea); 172 __unlock_sema_sea(sea);
177 nvgpu_kfree(sea->gk20a, p); 173 nvgpu_kfree(sea->gk20a, p);
178 gpu_sema_dbg("Failed to allocate semaphore pool!"); 174 gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!");
179 return ERR_PTR(err); 175 return ERR_PTR(err);
180} 176}
181 177
@@ -186,91 +182,82 @@ fail:
186int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, 182int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
187 struct vm_gk20a *vm) 183 struct vm_gk20a *vm)
188{ 184{
189 int ents, err = 0; 185 int err = 0;
190 u64 addr; 186 u64 addr;
191 187
192 gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); 188 if (p->mapped)
193 189 return -EBUSY;
194 p->cpu_va = vmap(&p->page, 1, 0,
195 pgprot_writecombine(PAGE_KERNEL));
196
197 gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
198 190
199 /* First do the RW mapping. */ 191 gpu_sema_dbg(pool_to_gk20a(p),
200 p->rw_sg_table = nvgpu_kzalloc(p->sema_sea->gk20a, 192 "Mapping semaphore pool! (idx=%d)", p->page_idx);
201 sizeof(*p->rw_sg_table));
202 if (!p->rw_sg_table)
203 return -ENOMEM;
204 193
205 err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, 194 /*
206 PAGE_SIZE, GFP_KERNEL); 195 * Take the sea lock so that we don't race with a possible change to the
207 if (err) { 196 * nvgpu_mem in the sema sea.
208 err = -ENOMEM; 197 */
209 goto fail; 198 __lock_sema_sea(p->sema_sea);
210 }
211 199
212 /* Add IOMMU mapping... */ 200 addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt,
213 ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, 201 p->sema_sea->gpu_va,
214 DMA_BIDIRECTIONAL); 202 p->sema_sea->map_size,
215 if (ents != 1) { 203 0, gk20a_mem_flag_read_only, 0,
204 p->sema_sea->sea_mem.aperture);
205 if (!addr) {
216 err = -ENOMEM; 206 err = -ENOMEM;
217 goto fail_free_sgt; 207 goto fail_unlock;
218 } 208 }
219 209
220 gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, 210 p->gpu_va_ro = addr;
221 &sg_dma_address(p->rw_sg_table->sgl)); 211 p->mapped = 1;
222
223 /* Map into the GPU... Doesn't need to be fixed. */
224 p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
225 0, gk20a_mem_flag_none, false,
226 APERTURE_SYSMEM);
227 if (!p->gpu_va) {
228 err = -ENOMEM;
229 goto fail_unmap_sgt;
230 }
231 212
232 gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, 213 gpu_sema_dbg(pool_to_gk20a(p),
233 p->gpu_va); 214 " %d: GPU read-only VA = 0x%llx",
215 p->page_idx, p->gpu_va_ro);
234 216
235 /* 217 /*
236 * And now the global mapping. Take the sea lock so that we don't race 218 * Now the RW mapping. This is a bit more complicated. We make a
237 * with a concurrent remap. 219 * nvgpu_mem describing a page of the bigger RO space and then map
220 * that. Unlike above this does not need to be a fixed address.
238 */ 221 */
239 __lock_sema_sea(p->sema_sea); 222 err = nvgpu_mem_create_from_mem(vm->mm->g,
223 &p->rw_mem, &p->sema_sea->sea_mem,
224 p->page_idx, 1);
225 if (err)
226 goto fail_unmap;
227
228 addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0,
229 gk20a_mem_flag_none, 0,
230 p->rw_mem.aperture);
240 231
241 BUG_ON(p->mapped);
242 addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
243 p->sema_sea->gpu_va, p->sema_sea->map_size,
244 0,
245 gk20a_mem_flag_read_only,
246 false,
247 APERTURE_SYSMEM);
248 if (!addr) { 232 if (!addr) {
249 err = -ENOMEM; 233 err = -ENOMEM;
250 BUG(); 234 goto fail_free_submem;
251 goto fail_unlock;
252 } 235 }
253 p->gpu_va_ro = addr;
254 p->mapped = 1;
255 236
256 gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, 237 p->gpu_va = addr;
257 p->gpu_va_ro);
258 238
259 __unlock_sema_sea(p->sema_sea); 239 __unlock_sema_sea(p->sema_sea);
260 240
241 gpu_sema_dbg(pool_to_gk20a(p),
242 " %d: GPU read-write VA = 0x%llx",
243 p->page_idx, p->gpu_va);
244 gpu_sema_dbg(pool_to_gk20a(p),
245 " %d: CPU VA = 0x%p",
246 p->page_idx, p->rw_mem.cpu_va);
247
261 return 0; 248 return 0;
262 249
250fail_free_submem:
251 nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
252fail_unmap:
253 gk20a_gmmu_unmap(vm,
254 p->sema_sea->sea_mem.gpu_va,
255 p->sema_sea->map_size,
256 gk20a_mem_flag_none);
257 gpu_sema_dbg(pool_to_gk20a(p),
258 " %d: Failed to map semaphore pool!", p->page_idx);
263fail_unlock: 259fail_unlock:
264 __unlock_sema_sea(p->sema_sea); 260 __unlock_sema_sea(p->sema_sea);
265fail_unmap_sgt:
266 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
267 DMA_BIDIRECTIONAL);
268fail_free_sgt:
269 sg_free_table(p->rw_sg_table);
270fail:
271 nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table);
272 p->rw_sg_table = NULL;
273 gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx);
274 return err; 261 return err;
275} 262}
276 263
@@ -280,41 +267,30 @@ fail:
280void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, 267void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
281 struct vm_gk20a *vm) 268 struct vm_gk20a *vm)
282{ 269{
283 struct nvgpu_semaphore_int *hw_sema;
284
285 kunmap(p->cpu_va);
286
287 /* First the global RO mapping... */
288 __lock_sema_sea(p->sema_sea); 270 __lock_sema_sea(p->sema_sea);
289 gk20a_gmmu_unmap(vm, p->gpu_va_ro,
290 p->sema_sea->map_size, gk20a_mem_flag_none);
291 p->ro_sg_table = NULL;
292 __unlock_sema_sea(p->sema_sea);
293 271
294 /* And now the private RW mapping. */ 272 gk20a_gmmu_unmap(vm,
295 gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); 273 p->sema_sea->sea_mem.gpu_va,
296 p->gpu_va = 0; 274 p->sema_sea->sea_mem.size,
297 275 gk20a_mem_flag_none);
298 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, 276 gk20a_gmmu_unmap(vm,
299 DMA_BIDIRECTIONAL); 277 p->rw_mem.gpu_va,
278 p->rw_mem.size,
279 gk20a_mem_flag_none);
280 nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
300 281
301 sg_free_table(p->rw_sg_table); 282 p->gpu_va = 0;
302 nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); 283 p->gpu_va_ro = 0;
303 p->rw_sg_table = NULL; 284 p->mapped = 0;
304 285
305 nvgpu_list_for_each_entry(hw_sema, &p->hw_semas, 286 __unlock_sema_sea(p->sema_sea);
306 nvgpu_semaphore_int, hw_sema_list)
307 /*
308 * Make sure the mem addresses are all NULL so if this gets
309 * reused we will fault.
310 */
311 hw_sema->value = NULL;
312 287
313 gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); 288 gpu_sema_dbg(pool_to_gk20a(p),
289 "Unmapped semaphore pool! (idx=%d)", p->page_idx);
314} 290}
315 291
316/* 292/*
317 * Completely free a sempahore_pool. You should make sure this pool is not 293 * Completely free a semaphore_pool. You should make sure this pool is not
318 * mapped otherwise there's going to be a memory leak. 294 * mapped otherwise there's going to be a memory leak.
319 */ 295 */
320static void nvgpu_semaphore_pool_free(struct kref *ref) 296static void nvgpu_semaphore_pool_free(struct kref *ref)
@@ -324,7 +300,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref)
324 struct nvgpu_semaphore_sea *s = p->sema_sea; 300 struct nvgpu_semaphore_sea *s = p->sema_sea;
325 struct nvgpu_semaphore_int *hw_sema, *tmp; 301 struct nvgpu_semaphore_int *hw_sema, *tmp;
326 302
327 WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); 303 /* Freeing a mapped pool is a bad idea. */
304 WARN_ON(p->mapped || p->gpu_va || p->gpu_va_ro);
328 305
329 __lock_sema_sea(s); 306 __lock_sema_sea(s);
330 nvgpu_list_del(&p->pool_list_entry); 307 nvgpu_list_del(&p->pool_list_entry);
@@ -338,7 +315,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref)
338 315
339 nvgpu_mutex_destroy(&p->pool_lock); 316 nvgpu_mutex_destroy(&p->pool_lock);
340 317
341 gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); 318 gpu_sema_dbg(pool_to_gk20a(p),
319 "Freed semaphore pool! (idx=%d)", p->page_idx);
342 nvgpu_kfree(p->sema_sea->gk20a, p); 320 nvgpu_kfree(p->sema_sea->gk20a, p);
343} 321}
344 322
@@ -395,9 +373,8 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
395 hw_sema->idx = hw_sema_idx; 373 hw_sema->idx = hw_sema_idx;
396 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; 374 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
397 atomic_set(&hw_sema->next_value, 0); 375 atomic_set(&hw_sema->next_value, 0);
398 hw_sema->value = p->cpu_va + hw_sema->offset;
399 writel(0, hw_sema->value);
400 nvgpu_init_list_node(&hw_sema->hw_sema_list); 376 nvgpu_init_list_node(&hw_sema->hw_sema_list);
377 nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0);
401 378
402 nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); 379 nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas);
403 380
@@ -464,7 +441,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
464 */ 441 */
465 nvgpu_semaphore_pool_get(s->hw_sema->p); 442 nvgpu_semaphore_pool_get(s->hw_sema->p);
466 443
467 gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); 444 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->hw_chid);
468 445
469 return s; 446 return s;
470} 447}