diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/semaphore.c | 201 |
1 files changed, 89 insertions, 112 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index bf7b6348..fa86985b 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c | |||
@@ -13,11 +13,6 @@ | |||
13 | * more details. | 13 | * more details. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #define pr_fmt(fmt) "gpu_sema: " fmt | ||
17 | |||
18 | #include <linux/dma-mapping.h> | ||
19 | #include <linux/highmem.h> | ||
20 | |||
21 | #include <nvgpu/dma.h> | 16 | #include <nvgpu/dma.h> |
22 | #include <nvgpu/semaphore.h> | 17 | #include <nvgpu/semaphore.h> |
23 | #include <nvgpu/kmem.h> | 18 | #include <nvgpu/kmem.h> |
@@ -26,17 +21,19 @@ | |||
26 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
27 | #include "gk20a/mm_gk20a.h" | 22 | #include "gk20a/mm_gk20a.h" |
28 | 23 | ||
24 | #define pool_to_gk20a(p) ((p)->sema_sea->gk20a) | ||
25 | |||
29 | #define __lock_sema_sea(s) \ | 26 | #define __lock_sema_sea(s) \ |
30 | do { \ | 27 | do { \ |
31 | gpu_sema_verbose_dbg("Acquiring sema lock..."); \ | 28 | gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock..."); \ |
32 | nvgpu_mutex_acquire(&s->sea_lock); \ | 29 | nvgpu_mutex_acquire(&s->sea_lock); \ |
33 | gpu_sema_verbose_dbg("Sema lock aquried!"); \ | 30 | gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!"); \ |
34 | } while (0) | 31 | } while (0) |
35 | 32 | ||
36 | #define __unlock_sema_sea(s) \ | 33 | #define __unlock_sema_sea(s) \ |
37 | do { \ | 34 | do { \ |
38 | nvgpu_mutex_release(&s->sea_lock); \ | 35 | nvgpu_mutex_release(&s->sea_lock); \ |
39 | gpu_sema_verbose_dbg("Released sema lock"); \ | 36 | gpu_sema_verbose_dbg(s->gk20a, "Released sema lock"); \ |
40 | } while (0) | 37 | } while (0) |
41 | 38 | ||
42 | /* | 39 | /* |
@@ -54,13 +51,12 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea) | |||
54 | 51 | ||
55 | __lock_sema_sea(sea); | 52 | __lock_sema_sea(sea); |
56 | 53 | ||
57 | ret = nvgpu_dma_alloc_flags_sys(gk20a, NVGPU_DMA_NO_KERNEL_MAPPING, | 54 | ret = nvgpu_dma_alloc_sys(gk20a, |
58 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, | 55 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, |
59 | &sea->sea_mem); | 56 | &sea->sea_mem); |
60 | if (ret) | 57 | if (ret) |
61 | goto out; | 58 | goto out; |
62 | 59 | ||
63 | sea->ro_sg_table = sea->sea_mem.priv.sgt; | ||
64 | sea->size = SEMAPHORE_POOL_COUNT; | 60 | sea->size = SEMAPHORE_POOL_COUNT; |
65 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; | 61 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; |
66 | 62 | ||
@@ -102,7 +98,7 @@ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g) | |||
102 | if (__nvgpu_semaphore_sea_grow(g->sema_sea)) | 98 | if (__nvgpu_semaphore_sea_grow(g->sema_sea)) |
103 | goto cleanup_destroy; | 99 | goto cleanup_destroy; |
104 | 100 | ||
105 | gpu_sema_dbg("Created semaphore sea!"); | 101 | gpu_sema_dbg(g, "Created semaphore sea!"); |
106 | return g->sema_sea; | 102 | return g->sema_sea; |
107 | 103 | ||
108 | cleanup_destroy: | 104 | cleanup_destroy: |
@@ -110,7 +106,7 @@ cleanup_destroy: | |||
110 | cleanup_free: | 106 | cleanup_free: |
111 | nvgpu_kfree(g, g->sema_sea); | 107 | nvgpu_kfree(g, g->sema_sea); |
112 | g->sema_sea = NULL; | 108 | g->sema_sea = NULL; |
113 | gpu_sema_dbg("Failed to creat semaphore sea!"); | 109 | gpu_sema_dbg(g, "Failed to creat semaphore sea!"); |
114 | return NULL; | 110 | return NULL; |
115 | } | 111 | } |
116 | 112 | ||
@@ -146,7 +142,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
146 | if (err) | 142 | if (err) |
147 | goto fail; | 143 | goto fail; |
148 | 144 | ||
149 | ret = __semaphore_bitmap_alloc(sea->pools_alloced, SEMAPHORE_POOL_COUNT); | 145 | ret = __semaphore_bitmap_alloc(sea->pools_alloced, |
146 | SEMAPHORE_POOL_COUNT); | ||
150 | if (ret < 0) { | 147 | if (ret < 0) { |
151 | err = ret; | 148 | err = ret; |
152 | goto fail_alloc; | 149 | goto fail_alloc; |
@@ -154,8 +151,6 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
154 | 151 | ||
155 | page_idx = (unsigned long)ret; | 152 | page_idx = (unsigned long)ret; |
156 | 153 | ||
157 | p->page = sea->sea_mem.priv.pages[page_idx]; | ||
158 | p->ro_sg_table = sea->ro_sg_table; | ||
159 | p->page_idx = page_idx; | 154 | p->page_idx = page_idx; |
160 | p->sema_sea = sea; | 155 | p->sema_sea = sea; |
161 | nvgpu_init_list_node(&p->hw_semas); | 156 | nvgpu_init_list_node(&p->hw_semas); |
@@ -166,7 +161,8 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( | |||
166 | nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); | 161 | nvgpu_list_add(&p->pool_list_entry, &sea->pool_list); |
167 | __unlock_sema_sea(sea); | 162 | __unlock_sema_sea(sea); |
168 | 163 | ||
169 | gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); | 164 | gpu_sema_dbg(sea->gk20a, |
165 | "Allocated semaphore pool: page-idx=%d", p->page_idx); | ||
170 | 166 | ||
171 | return p; | 167 | return p; |
172 | 168 | ||
@@ -175,7 +171,7 @@ fail_alloc: | |||
175 | fail: | 171 | fail: |
176 | __unlock_sema_sea(sea); | 172 | __unlock_sema_sea(sea); |
177 | nvgpu_kfree(sea->gk20a, p); | 173 | nvgpu_kfree(sea->gk20a, p); |
178 | gpu_sema_dbg("Failed to allocate semaphore pool!"); | 174 | gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!"); |
179 | return ERR_PTR(err); | 175 | return ERR_PTR(err); |
180 | } | 176 | } |
181 | 177 | ||
@@ -186,91 +182,82 @@ fail: | |||
186 | int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, | 182 | int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p, |
187 | struct vm_gk20a *vm) | 183 | struct vm_gk20a *vm) |
188 | { | 184 | { |
189 | int ents, err = 0; | 185 | int err = 0; |
190 | u64 addr; | 186 | u64 addr; |
191 | 187 | ||
192 | gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); | 188 | if (p->mapped) |
193 | 189 | return -EBUSY; | |
194 | p->cpu_va = vmap(&p->page, 1, 0, | ||
195 | pgprot_writecombine(PAGE_KERNEL)); | ||
196 | |||
197 | gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); | ||
198 | 190 | ||
199 | /* First do the RW mapping. */ | 191 | gpu_sema_dbg(pool_to_gk20a(p), |
200 | p->rw_sg_table = nvgpu_kzalloc(p->sema_sea->gk20a, | 192 | "Mapping semaphore pool! (idx=%d)", p->page_idx); |
201 | sizeof(*p->rw_sg_table)); | ||
202 | if (!p->rw_sg_table) | ||
203 | return -ENOMEM; | ||
204 | 193 | ||
205 | err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, | 194 | /* |
206 | PAGE_SIZE, GFP_KERNEL); | 195 | * Take the sea lock so that we don't race with a possible change to the |
207 | if (err) { | 196 | * nvgpu_mem in the sema sea. |
208 | err = -ENOMEM; | 197 | */ |
209 | goto fail; | 198 | __lock_sema_sea(p->sema_sea); |
210 | } | ||
211 | 199 | ||
212 | /* Add IOMMU mapping... */ | 200 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->sea_mem.priv.sgt, |
213 | ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | 201 | p->sema_sea->gpu_va, |
214 | DMA_BIDIRECTIONAL); | 202 | p->sema_sea->map_size, |
215 | if (ents != 1) { | 203 | 0, gk20a_mem_flag_read_only, 0, |
204 | p->sema_sea->sea_mem.aperture); | ||
205 | if (!addr) { | ||
216 | err = -ENOMEM; | 206 | err = -ENOMEM; |
217 | goto fail_free_sgt; | 207 | goto fail_unlock; |
218 | } | 208 | } |
219 | 209 | ||
220 | gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, | 210 | p->gpu_va_ro = addr; |
221 | &sg_dma_address(p->rw_sg_table->sgl)); | 211 | p->mapped = 1; |
222 | |||
223 | /* Map into the GPU... Doesn't need to be fixed. */ | ||
224 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, | ||
225 | 0, gk20a_mem_flag_none, false, | ||
226 | APERTURE_SYSMEM); | ||
227 | if (!p->gpu_va) { | ||
228 | err = -ENOMEM; | ||
229 | goto fail_unmap_sgt; | ||
230 | } | ||
231 | 212 | ||
232 | gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, | 213 | gpu_sema_dbg(pool_to_gk20a(p), |
233 | p->gpu_va); | 214 | " %d: GPU read-only VA = 0x%llx", |
215 | p->page_idx, p->gpu_va_ro); | ||
234 | 216 | ||
235 | /* | 217 | /* |
236 | * And now the global mapping. Take the sea lock so that we don't race | 218 | * Now the RW mapping. This is a bit more complicated. We make a |
237 | * with a concurrent remap. | 219 | * nvgpu_mem describing a page of the bigger RO space and then map |
220 | * that. Unlike above this does not need to be a fixed address. | ||
238 | */ | 221 | */ |
239 | __lock_sema_sea(p->sema_sea); | 222 | err = nvgpu_mem_create_from_mem(vm->mm->g, |
223 | &p->rw_mem, &p->sema_sea->sea_mem, | ||
224 | p->page_idx, 1); | ||
225 | if (err) | ||
226 | goto fail_unmap; | ||
227 | |||
228 | addr = gk20a_gmmu_map(vm, &p->rw_mem.priv.sgt, SZ_4K, 0, | ||
229 | gk20a_mem_flag_none, 0, | ||
230 | p->rw_mem.aperture); | ||
240 | 231 | ||
241 | BUG_ON(p->mapped); | ||
242 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, | ||
243 | p->sema_sea->gpu_va, p->sema_sea->map_size, | ||
244 | 0, | ||
245 | gk20a_mem_flag_read_only, | ||
246 | false, | ||
247 | APERTURE_SYSMEM); | ||
248 | if (!addr) { | 232 | if (!addr) { |
249 | err = -ENOMEM; | 233 | err = -ENOMEM; |
250 | BUG(); | 234 | goto fail_free_submem; |
251 | goto fail_unlock; | ||
252 | } | 235 | } |
253 | p->gpu_va_ro = addr; | ||
254 | p->mapped = 1; | ||
255 | 236 | ||
256 | gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, | 237 | p->gpu_va = addr; |
257 | p->gpu_va_ro); | ||
258 | 238 | ||
259 | __unlock_sema_sea(p->sema_sea); | 239 | __unlock_sema_sea(p->sema_sea); |
260 | 240 | ||
241 | gpu_sema_dbg(pool_to_gk20a(p), | ||
242 | " %d: GPU read-write VA = 0x%llx", | ||
243 | p->page_idx, p->gpu_va); | ||
244 | gpu_sema_dbg(pool_to_gk20a(p), | ||
245 | " %d: CPU VA = 0x%p", | ||
246 | p->page_idx, p->rw_mem.cpu_va); | ||
247 | |||
261 | return 0; | 248 | return 0; |
262 | 249 | ||
250 | fail_free_submem: | ||
251 | nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); | ||
252 | fail_unmap: | ||
253 | gk20a_gmmu_unmap(vm, | ||
254 | p->sema_sea->sea_mem.gpu_va, | ||
255 | p->sema_sea->map_size, | ||
256 | gk20a_mem_flag_none); | ||
257 | gpu_sema_dbg(pool_to_gk20a(p), | ||
258 | " %d: Failed to map semaphore pool!", p->page_idx); | ||
263 | fail_unlock: | 259 | fail_unlock: |
264 | __unlock_sema_sea(p->sema_sea); | 260 | __unlock_sema_sea(p->sema_sea); |
265 | fail_unmap_sgt: | ||
266 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
267 | DMA_BIDIRECTIONAL); | ||
268 | fail_free_sgt: | ||
269 | sg_free_table(p->rw_sg_table); | ||
270 | fail: | ||
271 | nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); | ||
272 | p->rw_sg_table = NULL; | ||
273 | gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); | ||
274 | return err; | 261 | return err; |
275 | } | 262 | } |
276 | 263 | ||
@@ -280,41 +267,30 @@ fail: | |||
280 | void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, | 267 | void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p, |
281 | struct vm_gk20a *vm) | 268 | struct vm_gk20a *vm) |
282 | { | 269 | { |
283 | struct nvgpu_semaphore_int *hw_sema; | ||
284 | |||
285 | kunmap(p->cpu_va); | ||
286 | |||
287 | /* First the global RO mapping... */ | ||
288 | __lock_sema_sea(p->sema_sea); | 270 | __lock_sema_sea(p->sema_sea); |
289 | gk20a_gmmu_unmap(vm, p->gpu_va_ro, | ||
290 | p->sema_sea->map_size, gk20a_mem_flag_none); | ||
291 | p->ro_sg_table = NULL; | ||
292 | __unlock_sema_sea(p->sema_sea); | ||
293 | 271 | ||
294 | /* And now the private RW mapping. */ | 272 | gk20a_gmmu_unmap(vm, |
295 | gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); | 273 | p->sema_sea->sea_mem.gpu_va, |
296 | p->gpu_va = 0; | 274 | p->sema_sea->sea_mem.size, |
297 | 275 | gk20a_mem_flag_none); | |
298 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | 276 | gk20a_gmmu_unmap(vm, |
299 | DMA_BIDIRECTIONAL); | 277 | p->rw_mem.gpu_va, |
278 | p->rw_mem.size, | ||
279 | gk20a_mem_flag_none); | ||
280 | nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem); | ||
300 | 281 | ||
301 | sg_free_table(p->rw_sg_table); | 282 | p->gpu_va = 0; |
302 | nvgpu_kfree(p->sema_sea->gk20a, p->rw_sg_table); | 283 | p->gpu_va_ro = 0; |
303 | p->rw_sg_table = NULL; | 284 | p->mapped = 0; |
304 | 285 | ||
305 | nvgpu_list_for_each_entry(hw_sema, &p->hw_semas, | 286 | __unlock_sema_sea(p->sema_sea); |
306 | nvgpu_semaphore_int, hw_sema_list) | ||
307 | /* | ||
308 | * Make sure the mem addresses are all NULL so if this gets | ||
309 | * reused we will fault. | ||
310 | */ | ||
311 | hw_sema->value = NULL; | ||
312 | 287 | ||
313 | gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); | 288 | gpu_sema_dbg(pool_to_gk20a(p), |
289 | "Unmapped semaphore pool! (idx=%d)", p->page_idx); | ||
314 | } | 290 | } |
315 | 291 | ||
316 | /* | 292 | /* |
317 | * Completely free a sempahore_pool. You should make sure this pool is not | 293 | * Completely free a semaphore_pool. You should make sure this pool is not |
318 | * mapped otherwise there's going to be a memory leak. | 294 | * mapped otherwise there's going to be a memory leak. |
319 | */ | 295 | */ |
320 | static void nvgpu_semaphore_pool_free(struct kref *ref) | 296 | static void nvgpu_semaphore_pool_free(struct kref *ref) |
@@ -324,7 +300,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) | |||
324 | struct nvgpu_semaphore_sea *s = p->sema_sea; | 300 | struct nvgpu_semaphore_sea *s = p->sema_sea; |
325 | struct nvgpu_semaphore_int *hw_sema, *tmp; | 301 | struct nvgpu_semaphore_int *hw_sema, *tmp; |
326 | 302 | ||
327 | WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); | 303 | /* Freeing a mapped pool is a bad idea. */ |
304 | WARN_ON(p->mapped || p->gpu_va || p->gpu_va_ro); | ||
328 | 305 | ||
329 | __lock_sema_sea(s); | 306 | __lock_sema_sea(s); |
330 | nvgpu_list_del(&p->pool_list_entry); | 307 | nvgpu_list_del(&p->pool_list_entry); |
@@ -338,7 +315,8 @@ static void nvgpu_semaphore_pool_free(struct kref *ref) | |||
338 | 315 | ||
339 | nvgpu_mutex_destroy(&p->pool_lock); | 316 | nvgpu_mutex_destroy(&p->pool_lock); |
340 | 317 | ||
341 | gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); | 318 | gpu_sema_dbg(pool_to_gk20a(p), |
319 | "Freed semaphore pool! (idx=%d)", p->page_idx); | ||
342 | nvgpu_kfree(p->sema_sea->gk20a, p); | 320 | nvgpu_kfree(p->sema_sea->gk20a, p); |
343 | } | 321 | } |
344 | 322 | ||
@@ -395,9 +373,8 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch) | |||
395 | hw_sema->idx = hw_sema_idx; | 373 | hw_sema->idx = hw_sema_idx; |
396 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; | 374 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; |
397 | atomic_set(&hw_sema->next_value, 0); | 375 | atomic_set(&hw_sema->next_value, 0); |
398 | hw_sema->value = p->cpu_va + hw_sema->offset; | ||
399 | writel(0, hw_sema->value); | ||
400 | nvgpu_init_list_node(&hw_sema->hw_sema_list); | 376 | nvgpu_init_list_node(&hw_sema->hw_sema_list); |
377 | nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0); | ||
401 | 378 | ||
402 | nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); | 379 | nvgpu_list_add(&hw_sema->hw_sema_list, &p->hw_semas); |
403 | 380 | ||
@@ -464,7 +441,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch) | |||
464 | */ | 441 | */ |
465 | nvgpu_semaphore_pool_get(s->hw_sema->p); | 442 | nvgpu_semaphore_pool_get(s->hw_sema->p); |
466 | 443 | ||
467 | gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); | 444 | gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->hw_chid); |
468 | 445 | ||
469 | return s; | 446 | return s; |
470 | } | 447 | } |