diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-04-12 14:27:48 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-04-25 17:26:00 -0400 |
commit | 84dadb1a9ae2ab0473976ebf5ece1cb0d1e60205 (patch) | |
tree | 8ec8d404c319082dc472eae1ca1b56f2b7e7c197 /drivers/gpu/nvgpu | |
parent | aff9d46c00a2a82c93d6cc43d790584e7e474d0e (diff) |
gpu: nvgpu: Move semaphore impl to nvgpu_mem
Use struct nvgpu_mem for DMA allocations (and the corresponding
nvgpu_dma_alloc_sys()) instead of custom rolled code. This migrates
away from using linux scatter gather tables directly. Instead this
is hidden in the nvgpu_mem struct. With this change the semaphore.c
code no longer has any direct Linux dependencies.
JIRA NVGPU-12
JIRA NVGPU-30
Change-Id: I92167c98aac9b413ae87496744dcee051cd60207
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1464081
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/semaphore.c | 201 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 72 |
4 files changed, 129 insertions, 152 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index bf7b6348..fa86985b 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c | |||
@@ -13,11 +13,6 @@ | |||
13 | * more details. | 13 | * more details. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #define pr_fmt(fmt) "gpu_sema: " fmt | ||
17 | |||
18 | #include <linux/dma-mapping.h> | ||
19 | #include <linux/highmem.h> | ||
20 | |||
21 | #include <nvgpu/dma.h> | 16 | #include <nvgpu/dma.h> |
22 | #include <nvgpu/semaphore.h> | 17 | #include <nvgpu/semaphore.h> |
23 | #include <nvgpu/kmem.h> | 18 | #include <nvgpu/kmem.h> |
@@ -26,17 +21,19 @@ | |||
26 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
27 | #include "gk20a/mm_gk20a.h" | 22 | #include "gk20a/mm_gk20a.h" |
28 | 23 | ||
24 | #define pool_to_gk20a(p) ((p)->sema_sea->gk20a) | ||
25 | |||
29 | #define __lock_sema_sea(s) \ | 26 | #define __lock_sema_sea(s) \ |
30 | do { \ | 27 | do { \ |
31 | gpu_sema_verbose_dbg("Acquiring sema lock..."); \ | 28 | gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); \ |
32 | nvgpu_mutex_acquire(&s->sea_lock); \ | 29 | nvgpu_mutex_acquire(&s->sea_lock); \ |
33 | gpu_sema_verbose_dbg("Sema lock aquried!"); \ | 30 | gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); \ |
34 | } while (0) | 31 | } while (0) |
35 | 32 | ||
36 | #define __unlock_sema_sea(s) \ | 33 | #define __unlock_sema_sea(s) \ |
37 | do { \ | 34 | do { \ |
38 | nvgpu_mutex_release(&s->sea_lock); \ | 35 | nvgpu_mutex_release(&s->sea_lock); \ |
39 | gpu_sema_verbose_dbg("Released sema lock"); \ | 36 | gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); \ |
40 | } while (0) | 37 | } while (0) |
41 | 38 | ||
42 | /* | 39 | /* |
@@ -54,13 +51,12 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea) | |||
54 | 51 | ||
55 | __lock_sema_sea(sea); | 52 | __lock_sema_sea(sea); |
56 | 53 | ||
57 | ret = nvgpu_dma_alloc_flags_sys(gk20a, NVGPU_DMA_NO_KERNEL_MAPPING, | 54 | ret = nvgpu_dma_alloc_sys(gk20a, |
58 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, | 55 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, |
59 | &sea->sea_mem); | 56 | &sea->sea_mem); |
60 | if (ret) | 57 | if (ret) |
61 | goto out; | 58 | goto out; |
62 | 59 | ||
63 | sea->ro_sg_table = sea->sea_mem.priv.sgt; | ||
64 | sea->size = SEMAPHORE_POOL_COUNT; | 60 | sea->size = SEMAPHORE_POOL_COUNT; |
65 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; | 61 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; |
66 | 62 | ||
@@ -102,7 +98,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g) | |||
102 | if (__nvgpu_semaphore_sea_grow(g->sema_sea)) | 98 | if (__nvgpu_semaphore_sea_grow(g->sema_sea)) |
103 | goto cleanup_destroy; | 99 | goto cleanup_destroy; |
104 | 100 | ||
105 | gpu_sema_dbg("Created semaphore sea!"); | 101 | gpu_sema_dbg(g, "Created semaphore sea!"); |
106 | return g->sema_sea; | 102 | return g->sema_sea; |
107 | 103 | ||
108 | cleanup_destroy: | 104 | cleanup_destroy: |
@@ -110,7 +106,7 @@ cleanup_destroy: | |||
110 | cleanup_free: | 106 | cleanup_free: |
111 | nvgpu_kfree(g, g->sema_sea); | 107 | nvgpu_kfree(g, g->sema_sea); |
112 | g->sema_sea = NULL; | 108 | g->sema_sea = NULL; |
113 | gpu_sema_dbg("Failed to creat semaphore sea!"); | 109 | gpu_sema_dbg(g, "Failed to creat semaphore sea!"); |
114 | return NULL; | 110 | return NULL; |
115 | } | 111 | } |
116 | 112 | ||
@@ -146,7 +142,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
146 | if (err) | 142 | if (err) |
147 | goto fail; | 143 | goto fail; |
148 | 144 | ||
149 | ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); | 145 | ret = __semaphore_bitmap_alloc(sea->pools_alloced, |
146 | SEMAPHORE_POOL_COUNT); | ||
150 | if (ret < 0) { | 147 | if (ret < 0) { |
151 | err = ret; | 148 | err = ret; |
152 | goto fail_alloc; | 149 | goto fail_alloc; |
@@ -154,8 +151,6 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
154 | 151 | ||
155 | page_idx = (unsigned long)ret; | 152 | page_idx = (unsigned long)ret; |
156 | 153 | ||
157 | p->page = sea->sea_mem.priv.pages[page_idx]; | ||
158 | p->ro_sg_table = sea->ro_sg_table; | ||
159 | p->page_idx = page_idx; | 154 | p->page_idx = page_idx; |
160 | p->sema_sea = sea; | 155 | p->sema_sea = sea; |
161 | nvgpu_init_list_node(&p->hw_semas); | 156 | nvgpu_init_list_node(&p->hw_semas); |
@@ -166,7 +161,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
166 | nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); | 161 | nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); |
167 | __unlock_sema_sea(sea); | 162 | __unlock_sema_sea(sea); |
168 | 163 | ||
169 | gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); | 164 | gpu_sema_dbg(sea->gk20a, |
165 | "Allocated semaphore pool: page-idx=%d", p->page_idx); | ||
170 | 166 | ||
171 | return p; | 167 | return p; |
172 | 168 | ||
@@ -175,7 +171,7 @@ fail_alloc: | |||
175 | fail: | 171 | fail: |
176 | __unlock_sema_sea(sea); | 172 | __unlock_sema_sea(sea); |
177 | nvgpu_kfree(sea->gk20a, p); | 173 | nvgpu_kfree(sea->gk20a, p); |
178 | gpu_sema_dbg("Failed to allocate semaphore pool!"); | 174 | gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!"); |
179 | return ERR_PTR(err); | 175 | return ERR_PTR(err); |
180 | } | 176 | } |
181 | 177 | ||
@@ -186,91 +182,82 @@ fail: | |||
186 | int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, | 182 | int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, |
187 | struct vm_gk20a *vm) | 183 | struct vm_gk20a *vm) |
188 | { | 184 | { |
189 | int ents, err = 0; | 185 | int err = 0; |
190 | u64 addr; | 186 | u64 addr; |
191 | 187 | ||
192 | gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); | 188 | if (p->mapped) |
193 | 189 | return -EBUSY; | |
194 | p->cpu_va = vmap(&p->page, 1, 0, | ||
195 | pgprot_writecombine(PAGE_KERNEL)); | ||
196 | |||
197 | gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); | ||
198 | 190 | ||
199 | /* First do the RW mapping. */ | 191 | gpu_sema_dbg(pool_to_gk20a(p), |
200 | p->rw_sg_table = nvgpu_kzalloc(p->sema_sea->gk20a, | 192 | "Mapping semaphore pool! (idx=%d)", p->page_idx); |
201 | sizeof(*p->rw_sg_table)); | ||
202 | if (!p->rw_sg_table) | ||
203 | return -ENOMEM; | ||
204 | 193 | ||
205 | err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, | 194 | /* |
206 | PAGE_SIZE, GFP_KERNEL); | 195 | * Take the sea lock so that we don't race with a possible change to the |
207 | if (err) { | 196 | * nvgpu_mem in the sema sea. |
208 | err = -ENOMEM; | 197 | */ |
209 | goto fail; | 198 | __lock_sema_sea(p->sema_sea); |
210 | } | ||
211 | 199 | ||
212 | /* Add IOMMU mapping... */ | 200 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt, |
213 | ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | 201 | p->sema_sea->gpu_va, |
214 | DMA_BIDIRECTIONAL); | 202 | p->sema_sea->map_size, |
215 | if (ents != 1) { | 203 | 0, gk20a_mem_flag_read_only, 0, |
204 | p->sema_sea->sea_mem.aperture); | ||
205 | if (!addr) { | ||
216 | err = -ENOMEM; | 206 | err = -ENOMEM; |
217 | goto fail_free_sgt; | 207 | goto fail_unlock; |
218 | } | 208 | } |
219 | 209 | ||
220 | gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, | 210 | p->gpu_va_ro = addr; |
221 | &sg_dma_address(p->rw_sg_table->sgl)); | 211 | p->mapped = 1; |
222 | |||
223 | /* Map into the GPU... Doesn't need to be fixed. */ | ||
224 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, | ||
225 | 0, gk20a_mem_flag_none, false, | ||
226 | APERTURE_SYSMEM); | ||
227 | if (!p->gpu_va) { | ||
228 | err = -ENOMEM; | ||
229 | goto fail_unmap_sgt; | ||
230 | } | ||
231 | 212 | ||
232 | gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, | 213 | gpu_sema_dbg(pool_to_gk20a(p), |
233 | p->gpu_va); | 214 | " %d: GPU read-only VA = 0x%llx", |
215 | p->page_idx, p->gpu_va_ro); | ||
234 | 216 | ||
235 | /* | 217 | /* |
236 | * And now the global mapping. Take the sea lock so that we don't race | 218 | * Now the RW mapping. This is a bit more complicated. We make a |
237 | * with a concurrent remap. | 219 | * nvgpu_mem describing a page of the bigger RO space and then map |
220 | * that. Unlike above this does not need to be a fixed address. | ||
238 | */ | 221 | */ |
239 | __lock_sema_sea(p->sema_sea); | 222 | err = nvgpu_mem_create_from_mem(vm->mm->g, |
223 | &p->rw_mem, &p->sema_sea->sea_mem, | ||
224 | p->page_idx, 1); | ||
225 | if (err) | ||
226 | goto fail_unmap; | ||
227 | |||
228 | addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0, | ||
229 | gk20a_mem_flag_none, 0, | ||
230 | p->rw_mem.aperture); | ||
240 | 231 | ||
241 | BUG_ON(p->mapped); | ||
242 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, | ||
243 | p->sema_sea->gpu_va, p->sema_sea->map_size, | ||
244 | 0, | ||
245 | gk20a_mem_flag_read_only, | ||
246 | false, | ||
247 | APERTURE_SYSMEM); | ||
248 | if (!addr) { | 232 | if (!addr) { |
249 | err = -ENOMEM; | 233 | err = -ENOMEM; |
250 | BUG(); | 234 | goto fail_free_submem; |
251 | goto fail_unlock; | ||
252 | } | 235 | } |
253 | p->gpu_va_ro = addr; | ||
254 | p->mapped = 1; | ||
255 | 236 | ||
256 | gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, | 237 | p->gpu_va = addr; |
257 | p->gpu_va_ro); | ||
258 | 238 | ||
259 | __unlock_sema_sea(p->sema_sea); | 239 | __unlock_sema_sea(p->sema_sea); |
260 | 240 | ||
241 | gpu_sema_dbg(pool_to_gk20a(p), | ||
242 | " %d: GPU read-write VA = 0x%llx", | ||
243 | p->page_idx, p->gpu_va); | ||
244 | gpu_sema_dbg(pool_to_gk20a(p), | ||
245 | " %d: CPU VA = 0x%p", | ||
246 | p->page_idx, p->rw_mem.cpu_va); | ||
247 | |||
261 | return 0; | 248 | return 0; |
262 | 249 | ||
250 | fail_free_submem: | ||
251 | nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); | ||
252 | fail_unmap: | ||
253 | gk20a_gmmu_unmap(vm, | ||
254 | p->sema_sea->sea_mem.gpu_va, | ||
255 | p->sema_sea->map_size, | ||
256 | gk20a_mem_flag_none); | ||
257 | gpu_sema_dbg(pool_to_gk20a(p), | ||
258 | " %d: Failed to map semaphore pool!", p->page_idx); | ||
263 | fail_unlock: | 259 | fail_unlock: |
264 | __unlock_sema_sea(p->sema_sea); | 260 | __unlock_sema_sea(p->sema_sea); |
265 | fail_unmap_sgt: | ||
266 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
267 | DMA_BIDIRECTIONAL); | ||
268 | fail_free_sgt: | ||
269 | sg_free_table(p->rw_sg_table); | ||
270 | fail: | ||
271 | nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); | ||
272 | p->rw_sg_table = NULL; | ||
273 | gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); | ||
274 | return err; | 261 | return err; |
275 | } | 262 | } |
276 | 263 | ||
@@ -280,41 +267,30 @@ fail: | |||
280 | void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, | 267 | void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, |
281 | struct vm_gk20a *vm) | 268 | struct vm_gk20a *vm) |
282 | { | 269 | { |
283 | struct nvgpu_semaphore_int *hw_sema; | ||
284 | |||
285 | kunmap(p->cpu_va); | ||
286 | |||
287 | /* First the global RO mapping... */ | ||
288 | __lock_sema_sea(p->sema_sea); | 270 | __lock_sema_sea(p->sema_sea); |
289 | gk20a_gmmu_unmap(vm, p->gpu_va_ro, | ||
290 | p->sema_sea->map_size, gk20a_mem_flag_none); | ||
291 | p->ro_sg_table = NULL; | ||
292 | __unlock_sema_sea(p->sema_sea); | ||
293 | 271 | ||
294 | /* And now the private RW mapping. */ | 272 | gk20a_gmmu_unmap(vm, |
295 | gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); | 273 | p->sema_sea->sea_mem.gpu_va, |
296 | p->gpu_va = 0; | 274 | p->sema_sea->sea_mem.size, |
297 | 275 | gk20a_mem_flag_none); | |
298 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | 276 | gk20a_gmmu_unmap(vm, |
299 | DMA_BIDIRECTIONAL); | 277 | p->rw_mem.gpu_va, |
278 | p->rw_mem.size, | ||
279 | gk20a_mem_flag_none); | ||
280 | nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); | ||
300 | 281 | ||
301 | sg_free_table(p->rw_sg_table); | 282 | p->gpu_va = 0; |
302 | nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); | 283 | p->gpu_va_ro = 0; |
303 | p->rw_sg_table = NULL; | 284 | p->mapped = 0; |
304 | 285 | ||
305 | nvgpu_list_for_each_entry(hw_sema, &p->hw_semas, | 286 | __unlock_sema_sea(p->sema_sea); |
306 | nvgpu_semaphore_int, hw_sema_list) | ||
307 | /* | ||
308 | * Make sure the mem addresses are all NULL so if this gets | ||
309 | * reused we will fault. | ||
310 | */ | ||
311 | hw_sema->value = NULL; | ||
312 | 287 | ||
313 | gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); | 288 | gpu_sema_dbg(pool_to_gk20a(p), |
289 | "Unmapped semaphore pool! (idx=%d)", p->page_idx); | ||
314 | } | 290 | } |
315 | 291 | ||
316 | /* | 292 | /* |
317 | * Completely free a sempahore_pool. You should make sure this pool is not | 293 | * Completely free a semaphore_pool. You should make sure this pool is not |
318 | * mapped otherwise there's going to be a memory leak. | 294 | * mapped otherwise there's going to be a memory leak. |
319 | */ | 295 | */ |
320 | static void nvgpu_semaphore_pool_free(struct kref *ref) | 296 | static void nvgpu_semaphore_pool_free(struct kref *ref) |
@@ -324,7 +300,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) | |||
324 | struct nvgpu_semaphore_sea *s = p->sema_sea; | 300 | struct nvgpu_semaphore_sea *s = p->sema_sea; |
325 | struct nvgpu_semaphore_int *hw_sema, *tmp; | 301 | struct nvgpu_semaphore_int *hw_sema, *tmp; |
326 | 302 | ||
327 | WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); | 303 | /* Freeing a mapped pool is a bad idea. */ |
304 | WARN_ON(p->mapped || p->gpu_va || p->gpu_va_ro); | ||
328 | 305 | ||
329 | __lock_sema_sea(s); | 306 | __lock_sema_sea(s); |
330 | nvgpu_list_del(&p->pool_list_entry); | 307 | nvgpu_list_del(&p->pool_list_entry); |
@@ -338,7 +315,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) | |||
338 | 315 | ||
339 | nvgpu_mutex_destroy(&p->pool_lock); | 316 | nvgpu_mutex_destroy(&p->pool_lock); |
340 | 317 | ||
341 | gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); | 318 | gpu_sema_dbg(pool_to_gk20a(p), |
319 | "Freed semaphore pool! (idx=%d)", p->page_idx); | ||
342 | nvgpu_kfree(p->sema_sea->gk20a, p); | 320 | nvgpu_kfree(p->sema_sea->gk20a, p); |
343 | } | 321 | } |
344 | 322 | ||
@@ -395,9 +373,8 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch) | |||
395 | hw_sema->idx = hw_sema_idx; | 373 | hw_sema->idx = hw_sema_idx; |
396 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; | 374 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; |
397 | atomic_set(&hw_sema->next_value, 0); | 375 | atomic_set(&hw_sema->next_value, 0); |
398 | hw_sema->value = p->cpu_va + hw_sema->offset; | ||
399 | writel(0, hw_sema->value); | ||
400 | nvgpu_init_list_node(&hw_sema->hw_sema_list); | 376 | nvgpu_init_list_node(&hw_sema->hw_sema_list); |
377 | nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0); | ||
401 | 378 | ||
402 | nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); | 379 | nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); |
403 | 380 | ||
@@ -464,7 +441,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch) | |||
464 | */ | 441 | */ |
465 | nvgpu_semaphore_pool_get(s->hw_sema->p); | 442 | nvgpu_semaphore_pool_get(s->hw_sema->p); |
466 | 443 | ||
467 | gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); | 444 | gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->hw_chid); |
468 | 445 | ||
469 | return s; | 446 | return s; |
470 | } | 447 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index b509c5c4..3fb35e94 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -576,16 +576,16 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
576 | } | 576 | } |
577 | 577 | ||
578 | if (acquire) | 578 | if (acquire) |
579 | gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d" | 579 | gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" |
580 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", | 580 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", |
581 | ch, nvgpu_semaphore_get_value(s), | 581 | ch, nvgpu_semaphore_get_value(s), |
582 | s->hw_sema->ch->hw_chid, va, cmd->gva, | 582 | s->hw_sema->ch->hw_chid, va, cmd->gva, |
583 | cmd->mem->gpu_va, ob); | 583 | cmd->mem->gpu_va, ob); |
584 | else | 584 | else |
585 | gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx " | 585 | gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " |
586 | "cmd_mem=0x%llx b=0x%llx off=%u", | 586 | "cmd_mem=0x%llx b=0x%llx off=%u", |
587 | ch, nvgpu_semaphore_get_value(s), | 587 | ch, nvgpu_semaphore_get_value(s), |
588 | readl(s->hw_sema->value), va, cmd->gva, | 588 | nvgpu_semaphore_read(s), va, cmd->gva, |
589 | cmd->mem->gpu_va, ob); | 589 | cmd->mem->gpu_va, ob); |
590 | } | 590 | } |
591 | 591 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 314d4551..e89e9f68 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -3833,7 +3833,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, | |||
3833 | if (hw_sema) | 3833 | if (hw_sema) |
3834 | gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " | 3834 | gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " |
3835 | "next_val: 0x%08x addr: 0x%010llx\n", | 3835 | "next_val: 0x%08x addr: 0x%010llx\n", |
3836 | readl(hw_sema->value), | 3836 | __nvgpu_semaphore_read(hw_sema), |
3837 | atomic_read(&hw_sema->next_value), | 3837 | atomic_read(&hw_sema->next_value), |
3838 | nvgpu_hw_sema_addr(hw_sema)); | 3838 | nvgpu_hw_sema_addr(hw_sema)); |
3839 | 3839 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index f197a918..45a3af5a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -14,23 +14,22 @@ | |||
14 | #ifndef SEMAPHORE_GK20A_H | 14 | #ifndef SEMAPHORE_GK20A_H |
15 | #define SEMAPHORE_GK20A_H | 15 | #define SEMAPHORE_GK20A_H |
16 | 16 | ||
17 | #include <linux/delay.h> | ||
18 | |||
19 | #include <nvgpu/log.h> | 17 | #include <nvgpu/log.h> |
20 | #include <nvgpu/allocator.h> | 18 | #include <nvgpu/timers.h> |
21 | #include <nvgpu/atomic.h> | 19 | #include <nvgpu/atomic.h> |
22 | #include <nvgpu/bug.h> | 20 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/kref.h> | 21 | #include <nvgpu/kref.h> |
24 | #include <nvgpu/list.h> | 22 | #include <nvgpu/list.h> |
23 | #include <nvgpu/nvgpu_mem.h> | ||
25 | 24 | ||
26 | #include "gk20a/gk20a.h" | 25 | #include "gk20a/gk20a.h" |
27 | #include "gk20a/mm_gk20a.h" | 26 | #include "gk20a/mm_gk20a.h" |
28 | #include "gk20a/channel_gk20a.h" | 27 | #include "gk20a/channel_gk20a.h" |
29 | 28 | ||
30 | #define gpu_sema_dbg(fmt, args...) \ | 29 | #define gpu_sema_dbg(g, fmt, args...) \ |
31 | gk20a_dbg(gpu_dbg_sema, fmt, ##args) | 30 | nvgpu_log(g, gpu_dbg_sema, fmt, ##args) |
32 | #define gpu_sema_verbose_dbg(fmt, args...) \ | 31 | #define gpu_sema_verbose_dbg(g, fmt, args...) \ |
33 | gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) | 32 | nvgpu_log(g, gpu_dbg_sema_v, fmt, ##args) |
34 | 33 | ||
35 | /* | 34 | /* |
36 | * Max number of channels that can be used is 512. This of course needs to be | 35 | * Max number of channels that can be used is 512. This of course needs to be |
@@ -50,7 +49,6 @@ struct nvgpu_semaphore_int { | |||
50 | int idx; /* Semaphore index. */ | 49 | int idx; /* Semaphore index. */ |
51 | u32 offset; /* Offset into the pool. */ | 50 | u32 offset; /* Offset into the pool. */ |
52 | atomic_t next_value; /* Next available value. */ | 51 | atomic_t next_value; /* Next available value. */ |
53 | u32 *value; /* Current value (access w/ readl()). */ | ||
54 | u32 nr_incrs; /* Number of increments programmed. */ | 52 | u32 nr_incrs; /* Number of increments programmed. */ |
55 | struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ | 53 | struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ |
56 | struct channel_gk20a *ch; /* Channel that owns this sema. */ | 54 | struct channel_gk20a *ch; /* Channel that owns this sema. */ |
@@ -82,9 +80,7 @@ struct nvgpu_semaphore { | |||
82 | * A semaphore pool. Each address space will own exactly one of these. | 80 | * A semaphore pool. Each address space will own exactly one of these. |
83 | */ | 81 | */ |
84 | struct nvgpu_semaphore_pool { | 82 | struct nvgpu_semaphore_pool { |
85 | struct page *page; /* This pool's page of memory */ | ||
86 | struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */ | 83 | struct nvgpu_list_node pool_list_entry; /* Node for list of pools. */ |
87 | void *cpu_va; /* CPU access to the pool. */ | ||
88 | u64 gpu_va; /* GPU access to the pool. */ | 84 | u64 gpu_va; /* GPU access to the pool. */ |
89 | u64 gpu_va_ro; /* GPU access to the pool. */ | 85 | u64 gpu_va_ro; /* GPU access to the pool. */ |
90 | int page_idx; /* Index into sea bitmap. */ | 86 | int page_idx; /* Index into sea bitmap. */ |
@@ -98,15 +94,10 @@ struct nvgpu_semaphore_pool { | |||
98 | 94 | ||
99 | /* | 95 | /* |
100 | * This is the address spaces's personal RW table. Other channels will | 96 | * This is the address spaces's personal RW table. Other channels will |
101 | * ultimately map this page as RO. | 97 | * ultimately map this page as RO. This is a sub-nvgpu_mem from the |
102 | */ | 98 | * sea's mem. |
103 | struct sg_table *rw_sg_table; | ||
104 | |||
105 | /* | ||
106 | * This is to keep track of whether the pool has had its sg_table | ||
107 | * updated during sea resizing. | ||
108 | */ | 99 | */ |
109 | struct sg_table *ro_sg_table; | 100 | struct nvgpu_mem rw_mem; |
110 | 101 | ||
111 | int mapped; | 102 | int mapped; |
112 | 103 | ||
@@ -148,11 +139,12 @@ struct nvgpu_semaphore_sea { | |||
148 | */ | 139 | */ |
149 | int page_count; /* Pages allocated to pools. */ | 140 | int page_count; /* Pages allocated to pools. */ |
150 | 141 | ||
151 | struct sg_table *ro_sg_table; | ||
152 | /* | 142 | /* |
153 | struct page *pages[SEMAPHORE_POOL_COUNT]; | 143 | * The read-only memory for the entire semaphore sea. Each semaphore |
154 | */ | 144 | * pool needs a sub-nvgpu_mem that will be mapped as RW in its address |
155 | 145 | * space. This sea_mem cannot be freed until all semaphore_pools have | |
146 | * been freed. | ||
147 | */ | ||
156 | struct nvgpu_mem sea_mem; | 148 | struct nvgpu_mem sea_mem; |
157 | 149 | ||
158 | /* | 150 | /* |
@@ -224,12 +216,26 @@ static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) | |||
224 | hw_sema->offset; | 216 | hw_sema->offset; |
225 | } | 217 | } |
226 | 218 | ||
219 | static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | ||
220 | { | ||
221 | return nvgpu_mem_rd(hw_sema->ch->g, | ||
222 | &hw_sema->p->rw_mem, hw_sema->offset); | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Read the underlying value from a semaphore. | ||
227 | */ | ||
228 | static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | ||
229 | { | ||
230 | return __nvgpu_semaphore_read(s->hw_sema); | ||
231 | } | ||
232 | |||
227 | /* | 233 | /* |
228 | * TODO: handle wrap around... Hmm, how to do this? | 234 | * TODO: handle wrap around... Hmm, how to do this? |
229 | */ | 235 | */ |
230 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | 236 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) |
231 | { | 237 | { |
232 | u32 sema_val = readl(s->hw_sema->value); | 238 | u32 sema_val = nvgpu_semaphore_read(s); |
233 | 239 | ||
234 | /* | 240 | /* |
235 | * If the underlying semaphore value is greater than or equal to | 241 | * If the underlying semaphore value is greater than or equal to |
@@ -244,14 +250,6 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | |||
244 | return !nvgpu_semaphore_is_released(s); | 250 | return !nvgpu_semaphore_is_released(s); |
245 | } | 251 | } |
246 | 252 | ||
247 | /* | ||
248 | * Read the underlying value from a semaphore. | ||
249 | */ | ||
250 | static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | ||
251 | { | ||
252 | return readl(s->hw_sema->value); | ||
253 | } | ||
254 | |||
255 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | 253 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) |
256 | { | 254 | { |
257 | return (u32)atomic_read(&s->value); | 255 | return (u32)atomic_read(&s->value); |
@@ -269,6 +267,7 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | |||
269 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | 267 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, |
270 | bool force) | 268 | bool force) |
271 | { | 269 | { |
270 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; | ||
272 | u32 current_val; | 271 | u32 current_val; |
273 | u32 val = nvgpu_semaphore_get_value(s); | 272 | u32 val = nvgpu_semaphore_get_value(s); |
274 | int attempts = 0; | 273 | int attempts = 0; |
@@ -282,7 +281,7 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
282 | while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { | 281 | while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { |
283 | if (force) | 282 | if (force) |
284 | break; | 283 | break; |
285 | msleep(100); | 284 | nvgpu_msleep(100); |
286 | attempts += 1; | 285 | attempts += 1; |
287 | if (attempts > 100) { | 286 | if (attempts > 100) { |
288 | WARN(1, "Stall on sema release!"); | 287 | WARN(1, "Stall on sema release!"); |
@@ -297,10 +296,10 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
297 | if (current_val >= val) | 296 | if (current_val >= val) |
298 | return; | 297 | return; |
299 | 298 | ||
300 | writel(val, s->hw_sema->value); | 299 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); |
301 | 300 | ||
302 | gpu_sema_verbose_dbg("(c=%d) WRITE %u", | 301 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, |
303 | s->hw_sema->ch->hw_chid, val); | 302 | "(c=%d) WRITE %u", hw_sema->ch->hw_chid, val); |
304 | } | 303 | } |
305 | 304 | ||
306 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) | 305 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) |
@@ -324,7 +323,8 @@ static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) | |||
324 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); | 323 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); |
325 | s->incremented = 1; | 324 | s->incremented = 1; |
326 | 325 | ||
327 | gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", | 326 | gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, |
327 | "INCR sema for c=%d (%u)", | ||
328 | s->hw_sema->ch->hw_chid, | 328 | s->hw_sema->ch->hw_chid, |
329 | nvgpu_semaphore_next_value(s)); | 329 | nvgpu_semaphore_next_value(s)); |
330 | } | 330 | } |