summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-04-12 14:27:48 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-25 17:26:00 -0400
commit84dadb1a9ae2ab0473976ebf5ece1cb0d1e60205 (patch)
tree8ec8d404c319082dc472eae1ca1b56f2b7e7c197
parentaff9d46c00a2a82c93d6cc43d790584e7e474d0e (diff)
gpu: nvgpu: Move semaphore impl to nvgpu_mem
Use struct nvgpu_mem for DMA allocations (and the corresponding nvgpu_dma_alloc_sys()) instead of custom rolled code. This migrates away from using linux scatter gather tables directly. Instead this is hidden in the nvgpu_mem struct. With this change the semaphore.c code no longer has any direct Linux dependencies. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I92167c98aac9b413ae87496744dcee051cd60207 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1464081 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c201
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h72
4 files changed, 129 insertions, 152 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index bf7b6348..fa86985b 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -13,11 +13,6 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#define pr_fmt(fmt) "gpu_sema: " fmt
17
18#include <linux/dma-mapping.h>
19#include <linux/highmem.h>
20
21#include <nvgpu/dma.h> 16#include <nvgpu/dma.h>
22#include <nvgpu/semaphore.h> 17#include <nvgpu/semaphore.h>
23#include <nvgpu/kmem.h> 18#include <nvgpu/kmem.h>
@@ -26,17 +21,19 @@
26#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 22#include "gk20a/mm_gk20a.h"
28 23
24#define pool_to_gk20a(p) ((p)->sema_sea->gk20a)
25
29#define __lock_sema_sea(s) \ 26#define __lock_sema_sea(s) \
30 do { \ 27 do { \
31 gpu_sema_verbose_dbg("Acquiring sema lock..."); \ 28 gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); \
32 nvgpu_mutex_acquire(&s->sea_lock); \ 29 nvgpu_mutex_acquire(&s->sea_lock); \
33 gpu_sema_verbose_dbg("Sema lock aquried!"); \ 30 gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); \
34 } while (0) 31 } while (0)
35 32
36#define __unlock_sema_sea(s) \ 33#define __unlock_sema_sea(s) \
37 do { \ 34 do { \
38 nvgpu_mutex_release(&s->sea_lock); \ 35 nvgpu_mutex_release(&s->sea_lock); \
39 gpu_sema_verbose_dbg("Released sema lock"); \ 36 gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); \
40 } while (0) 37 } while (0)
41 38
42/* 39/*
@@ -54,13 +51,12 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
54 51
55 __lock_sema_sea(sea); 52 __lock_sema_sea(sea);
56 53
57 ret = nvgpu_dma_alloc_flags_sys(gk20a, NVGPU_DMA_NO_KERNEL_MAPPING, 54 ret = nvgpu_dma_alloc_sys(gk20a,
58 PAGE_SIZE * SEMAPHORE_POOL_COUNT, 55 PAGE_SIZE * SEMAPHORE_POOL_COUNT,
59 &sea->sea_mem); 56 &sea->sea_mem);
60 if (ret) 57 if (ret)
61 goto out; 58 goto out;
62 59
63 sea->ro_sg_table = sea->sea_mem.priv.sgt;
64 sea->size = SEMAPHORE_POOL_COUNT; 60 sea->size = SEMAPHORE_POOL_COUNT;
65 sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; 61 sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
66 62
@@ -102,7 +98,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
102 if (__nvgpu_semaphore_sea_grow(g->sema_sea)) 98 if (__nvgpu_semaphore_sea_grow(g->sema_sea))
103 goto cleanup_destroy; 99 goto cleanup_destroy;
104 100
105 gpu_sema_dbg("Created semaphore sea!"); 101 gpu_sema_dbg(g, "Created semaphore sea!");
106 return g->sema_sea; 102 return g->sema_sea;
107 103
108cleanup_destroy: 104cleanup_destroy:
@@ -110,7 +106,7 @@ cleanup_destroy:
110cleanup_free: 106cleanup_free:
111 nvgpu_kfree(g, g->sema_sea); 107 nvgpu_kfree(g, g->sema_sea);
112 g->sema_sea = NULL; 108 g->sema_sea = NULL;
113 gpu_sema_dbg("Failed to creat semaphore sea!"); 109 gpu_sema_dbg(g, "Failed to creat semaphore sea!");
114 return NULL; 110 return NULL;
115} 111}
116 112
@@ -146,7 +142,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
146 if (err) 142 if (err)
147 goto fail; 143 goto fail;
148 144
149 ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); 145 ret = __semaphore_bitmap_alloc(sea->pools_alloced,
146 SEMAPHORE_POOL_COUNT);
150 if (ret < 0) { 147 if (ret < 0) {
151 err = ret; 148 err = ret;
152 goto fail_alloc; 149 goto fail_alloc;
@@ -154,8 +151,6 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
154 151
155 page_idx = (unsigned long)ret; 152 page_idx = (unsigned long)ret;
156 153
157 p->page = sea->sea_mem.priv.pages[page_idx];
158 p->ro_sg_table = sea->ro_sg_table;
159 p->page_idx = page_idx; 154 p->page_idx = page_idx;
160 p->sema_sea = sea; 155 p->sema_sea = sea;
161 nvgpu_init_list_node(&p->hw_semas); 156 nvgpu_init_list_node(&p->hw_semas);
@@ -166,7 +161,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
166 nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); 161 nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
167 __unlock_sema_sea(sea); 162 __unlock_sema_sea(sea);
168 163
169 gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); 164 gpu_sema_dbg(sea->gk20a,
165 "Allocated semaphore pool: page-idx=%d", p->page_idx);
170 166
171 return p; 167 return p;
172 168
@@ -175,7 +171,7 @@ fail_alloc:
175fail: 171fail:
176 __unlock_sema_sea(sea); 172 __unlock_sema_sea(sea);
177 nvgpu_kfree(sea->gk20a, p); 173 nvgpu_kfree(sea->gk20a, p);
178 gpu_sema_dbg("Failed to allocate semaphore pool!"); 174 gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!");
179 return ERR_PTR(err); 175 return ERR_PTR(err);
180} 176}
181 177
@@ -186,91 +182,82 @@ fail:
186int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, 182int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
187 struct vm_gk20a *vm) 183 struct vm_gk20a *vm)
188{ 184{
189 int ents, err = 0; 185 int err = 0;
190 u64 addr; 186 u64 addr;
191 187
192 gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); 188 if (p->mapped)
193 189 return -EBUSY;
194 p->cpu_va = vmap(&p->page, 1, 0,
195 pgprot_writecombine(PAGE_KERNEL));
196
197 gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va);
198 190
199 /* First do the RW mapping. */ 191 gpu_sema_dbg(pool_to_gk20a(p),
200 p->rw_sg_table = nvgpu_kzalloc(p->sema_sea->gk20a, 192 "Mapping semaphore pool! (idx=%d)", p->page_idx);
201 sizeof(*p->rw_sg_table));
202 if (!p->rw_sg_table)
203 return -ENOMEM;
204 193
205 err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, 194 /*
206 PAGE_SIZE, GFP_KERNEL); 195 * Take the sea lock so that we don't race with a possible change to the
207 if (err) { 196 * nvgpu_mem in the sema sea.
208 err = -ENOMEM; 197 */
209 goto fail; 198 __lock_sema_sea(p->sema_sea);
210 }
211 199
212 /* Add IOMMU mapping... */ 200 addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt,
213 ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, 201 p->sema_sea->gpu_va,
214 DMA_BIDIRECTIONAL); 202 p->sema_sea->map_size,
215 if (ents != 1) { 203 0, gk20a_mem_flag_read_only, 0,
204 p->sema_sea->sea_mem.aperture);
205 if (!addr) {
216 err = -ENOMEM; 206 err = -ENOMEM;
217 goto fail_free_sgt; 207 goto fail_unlock;
218 } 208 }
219 209
220 gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, 210 p->gpu_va_ro = addr;
221 &sg_dma_address(p->rw_sg_table->sgl)); 211 p->mapped = 1;
222
223 /* Map into the GPU... Doesn't need to be fixed. */
224 p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
225 0, gk20a_mem_flag_none, false,
226 APERTURE_SYSMEM);
227 if (!p->gpu_va) {
228 err = -ENOMEM;
229 goto fail_unmap_sgt;
230 }
231 212
232 gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, 213 gpu_sema_dbg(pool_to_gk20a(p),
233 p->gpu_va); 214 " %d: GPU read-only VA = 0x%llx",
215 p->page_idx, p->gpu_va_ro);
234 216
235 /* 217 /*
236 * And now the global mapping. Take the sea lock so that we don't race 218 * Now the RW mapping. This is a bit more complicated. We make a
237 * with a concurrent remap. 219 * nvgpu_mem describing a page of the bigger RO space and then map
220 * that. Unlike above this does not need to be a fixed address.
238 */ 221 */
239 __lock_sema_sea(p->sema_sea); 222 err = nvgpu_mem_create_from_mem(vm->mm->g,
223 &p->rw_mem, &p->sema_sea->sea_mem,
224 p->page_idx, 1);
225 if (err)
226 goto fail_unmap;
227
228 addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0,
229 gk20a_mem_flag_none, 0,
230 p->rw_mem.aperture);
240 231
241 BUG_ON(p->mapped);
242 addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
243 p->sema_sea->gpu_va, p->sema_sea->map_size,
244 0,
245 gk20a_mem_flag_read_only,
246 false,
247 APERTURE_SYSMEM);
248 if (!addr) { 232 if (!addr) {
249 err = -ENOMEM; 233 err = -ENOMEM;
250 BUG(); 234 goto fail_free_submem;
251 goto fail_unlock;
252 } 235 }
253 p->gpu_va_ro = addr;
254 p->mapped = 1;
255 236
256 gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, 237 p->gpu_va = addr;
257 p->gpu_va_ro);
258 238
259 __unlock_sema_sea(p->sema_sea); 239 __unlock_sema_sea(p->sema_sea);
260 240
241 gpu_sema_dbg(pool_to_gk20a(p),
242 " %d: GPU read-write VA = 0x%llx",
243 p->page_idx, p->gpu_va);
244 gpu_sema_dbg(pool_to_gk20a(p),
245 " %d: CPU VA = 0x%p",
246 p->page_idx, p->rw_mem.cpu_va);
247
261 return 0; 248 return 0;
262 249
250fail_free_submem:
251 nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
252fail_unmap:
253 gk20a_gmmu_unmap(vm,
254 p->sema_sea->sea_mem.gpu_va,
255 p->sema_sea->map_size,
256 gk20a_mem_flag_none);
257 gpu_sema_dbg(pool_to_gk20a(p),
258 " %d: Failed to map semaphore pool!", p->page_idx);
263fail_unlock: 259fail_unlock:
264 __unlock_sema_sea(p->sema_sea); 260 __unlock_sema_sea(p->sema_sea);
265fail_unmap_sgt:
266 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
267 DMA_BIDIRECTIONAL);
268fail_free_sgt:
269 sg_free_table(p->rw_sg_table);
270fail:
271 nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table);
272 p->rw_sg_table = NULL;
273 gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx);
274 return err; 261 return err;
275} 262}
276 263
@@ -280,41 +267,30 @@ fail:
280void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, 267void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
281 struct vm_gk20a *vm) 268 struct vm_gk20a *vm)
282{ 269{
283 struct nvgpu_semaphore_int *hw_sema;
284
285 kunmap(p->cpu_va);
286
287 /* First the global RO mapping... */
288 __lock_sema_sea(p->sema_sea); 270 __lock_sema_sea(p->sema_sea);
289 gk20a_gmmu_unmap(vm, p->gpu_va_ro,
290 p->sema_sea->map_size, gk20a_mem_flag_none);
291 p->ro_sg_table = NULL;
292 __unlock_sema_sea(p->sema_sea);
293 271
294 /* And now the private RW mapping. */ 272 gk20a_gmmu_unmap(vm,
295 gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); 273 p->sema_sea->sea_mem.gpu_va,
296 p->gpu_va = 0; 274 p->sema_sea->sea_mem.size,
297 275 gk20a_mem_flag_none);
298 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, 276 gk20a_gmmu_unmap(vm,
299 DMA_BIDIRECTIONAL); 277 p->rw_mem.gpu_va,
278 p->rw_mem.size,
279 gk20a_mem_flag_none);
280 nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
300 281
301 sg_free_table(p->rw_sg_table); 282 p->gpu_va = 0;
302 nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); 283 p->gpu_va_ro = 0;
303 p->rw_sg_table = NULL; 284 p->mapped = 0;
304 285
305 nvgpu_list_for_each_entry(hw_sema, &p->hw_semas, 286 __unlock_sema_sea(p->sema_sea);
306 nvgpu_semaphore_int, hw_sema_list)
307 /*
308 * Make sure the mem addresses are all NULL so if this gets
309 * reused we will fault.
310 */
311 hw_sema->value = NULL;
312 287
313 gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); 288 gpu_sema_dbg(pool_to_gk20a(p),
289 "Unmapped semaphore pool! (idx=%d)", p->page_idx);
314} 290}
315 291
316/* 292/*
317 * Completely free a sempahore_pool. You should make sure this pool is not 293 * Completely free a semaphore_pool. You should make sure this pool is not
318 * mapped otherwise there's going to be a memory leak. 294 * mapped otherwise there's going to be a memory leak.
319 */ 295 */
320static void nvgpu_semaphore_pool_free(struct kref *ref) 296static void nvgpu_semaphore_pool_free(struct kref *ref)
@@ -324,7 +300,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref)
324 struct nvgpu_semaphore_sea *s = p->sema_sea; 300 struct nvgpu_semaphore_sea *s = p->sema_sea;
325 struct nvgpu_semaphore_int *hw_sema, *tmp; 301 struct nvgpu_semaphore_int *hw_sema, *tmp;
326 302
327 WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); 303 /* Freeing a mapped pool is a bad idea. */
304 WARN_ON(p->mapped || p->gpu_va || p->gpu_va_ro);
328 305
329 __lock_sema_sea(s); 306 __lock_sema_sea(s);
330 nvgpu_list_del(&p->pool_list_entry); 307 nvgpu_list_del(&p->pool_list_entry);
@@ -338,7 +315,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref)
338 315
339 nvgpu_mutex_destroy(&p->pool_lock); 316 nvgpu_mutex_destroy(&p->pool_lock);
340 317
341 gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); 318 gpu_sema_dbg(pool_to_gk20a(p),
319 "Freed semaphore pool! (idx=%d)", p->page_idx);
342 nvgpu_kfree(p->sema_sea->gk20a, p); 320 nvgpu_kfree(p->sema_sea->gk20a, p);
343} 321}
344 322
@@ -395,9 +373,8 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
395 hw_sema->idx = hw_sema_idx; 373 hw_sema->idx = hw_sema_idx;
396 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; 374 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
397 atomic_set(&hw_sema->next_value, 0); 375 atomic_set(&hw_sema->next_value, 0);
398 hw_sema->value = p->cpu_va + hw_sema->offset;
399 writel(0, hw_sema->value);
400 nvgpu_init_list_node(&hw_sema->hw_sema_list); 376 nvgpu_init_list_node(&hw_sema->hw_sema_list);
377 nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0);
401 378
402 nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); 379 nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas);
403 380
@@ -464,7 +441,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
464 */ 441 */
465 nvgpu_semaphore_pool_get(s->hw_sema->p); 442 nvgpu_semaphore_pool_get(s->hw_sema->p);
466 443
467 gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); 444 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->hw_chid);
468 445
469 return s; 446 return s;
470} 447}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index b509c5c4..3fb35e94 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -576,16 +576,16 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
576 } 576 }
577 577
578 if (acquire) 578 if (acquire)
579 gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d" 579 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d"
580 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", 580 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
581 ch, nvgpu_semaphore_get_value(s), 581 ch, nvgpu_semaphore_get_value(s),
582 s->hw_sema->ch->hw_chid, va, cmd->gva, 582 s->hw_sema->ch->hw_chid, va, cmd->gva,
583 cmd->mem->gpu_va, ob); 583 cmd->mem->gpu_va, ob);
584 else 584 else
585 gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx " 585 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx "
586 "cmd_mem=0x%llx b=0x%llx off=%u", 586 "cmd_mem=0x%llx b=0x%llx off=%u",
587 ch, nvgpu_semaphore_get_value(s), 587 ch, nvgpu_semaphore_get_value(s),
588 readl(s->hw_sema->value), va, cmd->gva, 588 nvgpu_semaphore_read(s), va, cmd->gva,
589 cmd->mem->gpu_va, ob); 589 cmd->mem->gpu_va, ob);
590} 590}
591 591
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 314d4551..e89e9f68 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3833,7 +3833,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
3833 if (hw_sema) 3833 if (hw_sema)
3834 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " 3834 gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
3835 "next_val: 0x%08x addr: 0x%010llx\n", 3835 "next_val: 0x%08x addr: 0x%010llx\n",
3836 readl(hw_sema->value), 3836 __nvgpu_semaphore_read(hw_sema),
3837 atomic_read(&hw_sema->next_value), 3837 atomic_read(&hw_sema->next_value),
3838 nvgpu_hw_sema_addr(hw_sema)); 3838 nvgpu_hw_sema_addr(hw_sema));
3839 3839
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index f197a918..45a3af5a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -14,23 +14,22 @@
14#ifndef SEMAPHORE_GK20A_H 14#ifndef SEMAPHORE_GK20A_H
15#define SEMAPHORE_GK20A_H 15#define SEMAPHORE_GK20A_H
16 16
17#include <linux/delay.h>
18
19#include <nvgpu/log.h> 17#include <nvgpu/log.h>
20#include <nvgpu/allocator.h> 18#include <nvgpu/timers.h>
21#include <nvgpu/atomic.h> 19#include <nvgpu/atomic.h>
22#include <nvgpu/bug.h> 20#include <nvgpu/bug.h>
23#include <nvgpu/kref.h> 21#include <nvgpu/kref.h>
24#include <nvgpu/list.h> 22#include <nvgpu/list.h>
23#include <nvgpu/nvgpu_mem.h>
25 24
26#include "gk20a/gk20a.h" 25#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 26#include "gk20a/mm_gk20a.h"
28#include "gk20a/channel_gk20a.h" 27#include "gk20a/channel_gk20a.h"
29 28
30#define gpu_sema_dbg(fmt, args...) \ 29#define gpu_sema_dbg(g, fmt, args...) \
31 gk20a_dbg(gpu_dbg_sema, fmt, ##args) 30 nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
32#define gpu_sema_verbose_dbg(fmt, args...) \ 31#define gpu_sema_verbose_dbg(g, fmt, args...) \
33 gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) 32 nvgpu_log(g, gpu_dbg_sema_v, fmt, ##args)
34 33
35/* 34/*
36 * Max number of channels that can be used is 512. This of course needs to be 35 * Max number of channels that can be used is 512. This of course needs to be
@@ -50,7 +49,6 @@ struct nvgpu_semaphore_int {
50 int idx; /* Semaphore index. */ 49 int idx; /* Semaphore index. */
51 u32 offset; /* Offset into the pool. */ 50 u32 offset; /* Offset into the pool. */
52 atomic_t next_value; /* Next available value. */ 51 atomic_t next_value; /* Next available value. */
53 u32 *value; /* Current value (access w/ readl()). */
54 u32 nr_incrs; /* Number of increments programmed. */ 52 u32 nr_incrs; /* Number of increments programmed. */
55 struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ 53 struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */
56 struct channel_gk20a *ch; /* Channel that owns this sema. */ 54 struct channel_gk20a *ch; /* Channel that owns this sema. */
@@ -82,9 +80,7 @@ struct nvgpu_semaphore {
82 * A semaphore pool. Each address space will own exactly one of these. 80 * A semaphore pool. Each address space will own exactly one of these.
83 */ 81 */
84struct nvgpu_semaphore_pool { 82struct nvgpu_semaphore_pool {
85 struct page *page; /* This pool's page of memory */
86 struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */ 83 struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */
87 void *cpu_va; /* CPU access to the pool. */
88 u64 gpu_va; /* GPU access to the pool. */ 84 u64 gpu_va; /* GPU access to the pool. */
89 u64 gpu_va_ro; /* GPU access to the pool. */ 85 u64 gpu_va_ro; /* GPU access to the pool. */
90 int page_idx; /* Index into sea bitmap. */ 86 int page_idx; /* Index into sea bitmap. */
@@ -98,15 +94,10 @@ struct nvgpu_semaphore_pool {
98 94
99 /* 95 /*
100 * This is the address spaces's personal RW table. Other channels will 96 * This is the address spaces's personal RW table. Other channels will
101 * ultimately map this page as RO. 97 * ultimately map this page as RO. This is a sub-nvgpu_mem from the
102 */ 98 * sea's mem.
103 struct sg_table *rw_sg_table;
104
105 /*
106 * This is to keep track of whether the pool has had its sg_table
107 * updated during sea resizing.
108 */ 99 */
109 struct sg_table *ro_sg_table; 100 struct nvgpu_mem rw_mem;
110 101
111 int mapped; 102 int mapped;
112 103
@@ -148,11 +139,12 @@ struct nvgpu_semaphore_sea {
148 */ 139 */
149 int page_count; /* Pages allocated to pools. */ 140 int page_count; /* Pages allocated to pools. */
150 141
151 struct sg_table *ro_sg_table;
152 /* 142 /*
153 struct page *pages[SEMAPHORE_POOL_COUNT]; 143 * The read-only memory for the entire semaphore sea. Each semaphore
154 */ 144 * pool needs a sub-nvgpu_mem that will be mapped as RW in its address
155 145 * space. This sea_mem cannot be freed until all semaphore_pools have
146 * been freed.
147 */
156 struct nvgpu_mem sea_mem; 148 struct nvgpu_mem sea_mem;
157 149
158 /* 150 /*
@@ -224,12 +216,26 @@ static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
224 hw_sema->offset; 216 hw_sema->offset;
225} 217}
226 218
219static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
220{
221 return nvgpu_mem_rd(hw_sema->ch->g,
222 &hw_sema->p->rw_mem, hw_sema->offset);
223}
224
225/*
226 * Read the underlying value from a semaphore.
227 */
228static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
229{
230 return __nvgpu_semaphore_read(s->hw_sema);
231}
232
227/* 233/*
228 * TODO: handle wrap around... Hmm, how to do this? 234 * TODO: handle wrap around... Hmm, how to do this?
229 */ 235 */
230static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) 236static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
231{ 237{
232 u32 sema_val = readl(s->hw_sema->value); 238 u32 sema_val = nvgpu_semaphore_read(s);
233 239
234 /* 240 /*
235 * If the underlying semaphore value is greater than or equal to 241 * If the underlying semaphore value is greater than or equal to
@@ -244,14 +250,6 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
244 return !nvgpu_semaphore_is_released(s); 250 return !nvgpu_semaphore_is_released(s);
245} 251}
246 252
247/*
248 * Read the underlying value from a semaphore.
249 */
250static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
251{
252 return readl(s->hw_sema->value);
253}
254
255static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) 253static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
256{ 254{
257 return (u32)atomic_read(&s->value); 255 return (u32)atomic_read(&s->value);
@@ -269,6 +267,7 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
269static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, 267static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
270 bool force) 268 bool force)
271{ 269{
270 struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
272 u32 current_val; 271 u32 current_val;
273 u32 val = nvgpu_semaphore_get_value(s); 272 u32 val = nvgpu_semaphore_get_value(s);
274 int attempts = 0; 273 int attempts = 0;
@@ -282,7 +281,7 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
282 while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { 281 while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) {
283 if (force) 282 if (force)
284 break; 283 break;
285 msleep(100); 284 nvgpu_msleep(100);
286 attempts += 1; 285 attempts += 1;
287 if (attempts > 100) { 286 if (attempts > 100) {
288 WARN(1, "Stall on sema release!"); 287 WARN(1, "Stall on sema release!");
@@ -297,10 +296,10 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
297 if (current_val >= val) 296 if (current_val >= val)
298 return; 297 return;
299 298
300 writel(val, s->hw_sema->value); 299 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val);
301 300
302 gpu_sema_verbose_dbg("(c=%d) WRITE %u", 301 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
303 s->hw_sema->ch->hw_chid, val); 302 "(c=%d) WRITE %u", hw_sema->ch->hw_chid, val);
304} 303}
305 304
306static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) 305static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
@@ -324,7 +323,8 @@ static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s)
324 atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); 323 atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
325 s->incremented = 1; 324 s->incremented = 1;
326 325
327 gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", 326 gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a,
327 "INCR sema for c=%d (%u)",
328 s->hw_sema->ch->hw_chid, 328 s->hw_sema->ch->hw_chid,
329 nvgpu_semaphore_next_value(s)); 329 nvgpu_semaphore_next_value(s));
330} 330}