diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 435 |
1 files changed, 324 insertions, 111 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 3b17bfcb..aa375b24 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | |||
@@ -15,63 +15,284 @@ | |||
15 | * more details. | 15 | * more details. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "semaphore_gk20a.h" | 18 | #define pr_fmt(fmt) "gpu_sema: " fmt |
19 | |||
19 | #include <linux/dma-mapping.h> | 20 | #include <linux/dma-mapping.h> |
21 | #include <linux/highmem.h> | ||
20 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | |||
24 | #include <asm/pgtable.h> | ||
25 | |||
21 | #include "gk20a.h" | 26 | #include "gk20a.h" |
22 | #include "mm_gk20a.h" | 27 | #include "mm_gk20a.h" |
28 | #include "semaphore_gk20a.h" | ||
29 | |||
30 | #define __lock_sema_sea(s) \ | ||
31 | do { \ | ||
32 | mutex_lock(&s->sea_lock); \ | ||
33 | } while (0) | ||
23 | 34 | ||
24 | static const int SEMAPHORE_SIZE = 16; | 35 | #define __unlock_sema_sea(s) \ |
36 | do { \ | ||
37 | mutex_unlock(&s->sea_lock); \ | ||
38 | } while (0) | ||
25 | 39 | ||
26 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g, | 40 | /* |
27 | const char *unique_name, size_t capacity) | 41 | * Return the sema_sea pointer. |
42 | */ | ||
43 | struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g) | ||
44 | { | ||
45 | return g->sema_sea; | ||
46 | } | ||
47 | |||
48 | static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea) | ||
49 | { | ||
50 | int ret = 0; | ||
51 | struct gk20a *gk20a = sea->gk20a; | ||
52 | |||
53 | __lock_sema_sea(sea); | ||
54 | |||
55 | ret = gk20a_gmmu_alloc_attr(gk20a, DMA_ATTR_NO_KERNEL_MAPPING, | ||
56 | PAGE_SIZE * SEMAPHORE_POOL_COUNT, | ||
57 | &sea->sea_mem); | ||
58 | if (ret) | ||
59 | goto out; | ||
60 | |||
61 | sea->ro_sg_table = sea->sea_mem.sgt; | ||
62 | sea->size = SEMAPHORE_POOL_COUNT; | ||
63 | sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; | ||
64 | |||
65 | out: | ||
66 | __unlock_sema_sea(sea); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Create the semaphore sea. Only create it once - subsequent calls to this will | ||
72 | * return the originally created sea pointer. | ||
73 | */ | ||
74 | struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g) | ||
75 | { | ||
76 | if (g->sema_sea) | ||
77 | return g->sema_sea; | ||
78 | |||
79 | g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL); | ||
80 | if (!g->sema_sea) | ||
81 | return NULL; | ||
82 | |||
83 | g->sema_sea->size = 0; | ||
84 | g->sema_sea->page_count = 0; | ||
85 | g->sema_sea->gk20a = g; | ||
86 | INIT_LIST_HEAD(&g->sema_sea->pool_list); | ||
87 | mutex_init(&g->sema_sea->sea_lock); | ||
88 | |||
89 | if (__gk20a_semaphore_sea_grow(g->sema_sea)) | ||
90 | goto cleanup; | ||
91 | |||
92 | return g->sema_sea; | ||
93 | |||
94 | cleanup: | ||
95 | kfree(g->sema_sea); | ||
96 | g->sema_sea = NULL; | ||
97 | return NULL; | ||
98 | } | ||
99 | |||
100 | static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len) | ||
101 | { | ||
102 | unsigned long idx = find_first_zero_bit(bitmap, len); | ||
103 | |||
104 | if (idx == len) | ||
105 | return -ENOSPC; | ||
106 | |||
107 | set_bit(idx, bitmap); | ||
108 | |||
109 | return (int)idx; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Allocate a pool from the sea. | ||
114 | */ | ||
115 | struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | ||
116 | struct gk20a_semaphore_sea *sea) | ||
28 | { | 117 | { |
29 | struct gk20a_semaphore_pool *p; | 118 | struct gk20a_semaphore_pool *p; |
119 | unsigned long page_idx; | ||
120 | int err = 0; | ||
121 | |||
30 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 122 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
31 | if (!p) | 123 | if (!p) |
32 | return NULL; | 124 | return ERR_PTR(-ENOMEM); |
125 | |||
126 | __lock_sema_sea(sea); | ||
127 | |||
128 | page_idx = __semaphore_bitmap_alloc(sea->pools_alloced, | ||
129 | SEMAPHORE_POOL_COUNT); | ||
130 | if (page_idx < 0) { | ||
131 | err = page_idx; | ||
132 | goto fail; | ||
133 | } | ||
33 | 134 | ||
135 | p->page = sea->sea_mem.pages[page_idx]; | ||
136 | p->ro_sg_table = sea->ro_sg_table; | ||
137 | p->page_idx = page_idx; | ||
138 | p->sema_sea = sea; | ||
139 | INIT_LIST_HEAD(&p->hw_semas); | ||
34 | kref_init(&p->ref); | 140 | kref_init(&p->ref); |
35 | INIT_LIST_HEAD(&p->maps); | 141 | mutex_init(&p->pool_lock); |
36 | mutex_init(&p->maps_mutex); | 142 | |
37 | p->g = g; | 143 | sea->page_count++; |
38 | 144 | list_add(&p->pool_list_entry, &sea->pool_list); | |
39 | /* Alloc one 4k page of semaphore per channel. */ | 145 | __unlock_sema_sea(sea); |
40 | if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE), | 146 | |
41 | &p->mem)) | ||
42 | goto clean_up; | ||
43 | |||
44 | /* Sacrifice one semaphore in the name of returning error codes. */ | ||
45 | if (gk20a_allocator_init(&p->alloc, unique_name, | ||
46 | SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE, | ||
47 | SEMAPHORE_SIZE)) | ||
48 | goto clean_up; | ||
49 | |||
50 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va, | ||
51 | (u64)sg_dma_address(p->mem.sgt->sgl), | ||
52 | (u64)sg_phys(p->mem.sgt->sgl)); | ||
53 | return p; | 147 | return p; |
54 | 148 | ||
55 | clean_up: | 149 | fail: |
56 | if (p->mem.size) | 150 | __unlock_sema_sea(sea); |
57 | gk20a_gmmu_free(p->g, &p->mem); | ||
58 | kfree(p); | 151 | kfree(p); |
59 | return NULL; | 152 | return ERR_PTR(err); |
153 | } | ||
154 | |||
155 | /* | ||
156 | * Map a pool into the passed vm's address space. This handles both the fixed | ||
157 | * global RO mapping and the non-fixed private RW mapping. | ||
158 | */ | ||
159 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | ||
160 | struct vm_gk20a *vm) | ||
161 | { | ||
162 | int ents, err = 0; | ||
163 | u64 addr; | ||
164 | |||
165 | p->cpu_va = vmap(&p->page, 1, 0, | ||
166 | pgprot_writecombine(PAGE_KERNEL)); | ||
167 | |||
168 | /* First do the RW mapping. */ | ||
169 | p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); | ||
170 | if (!p->rw_sg_table) | ||
171 | return -ENOMEM; | ||
172 | |||
173 | err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0, | ||
174 | PAGE_SIZE, GFP_KERNEL); | ||
175 | if (err) { | ||
176 | err = -ENOMEM; | ||
177 | goto fail; | ||
178 | } | ||
179 | |||
180 | /* Add IOMMU mapping... */ | ||
181 | ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
182 | DMA_BIDIRECTIONAL); | ||
183 | if (ents != 1) { | ||
184 | err = -ENOMEM; | ||
185 | goto fail_free_sgt; | ||
186 | } | ||
187 | |||
188 | /* Map into the GPU... Doesn't need to be fixed. */ | ||
189 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, | ||
190 | 0, gk20a_mem_flag_none, false); | ||
191 | if (!p->gpu_va) { | ||
192 | err = -ENOMEM; | ||
193 | goto fail_unmap_sgt; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * And now the global mapping. Take the sea lock so that we don't race | ||
198 | * with a concurrent remap. | ||
199 | */ | ||
200 | __lock_sema_sea(p->sema_sea); | ||
201 | |||
202 | BUG_ON(p->mapped); | ||
203 | addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table, | ||
204 | p->sema_sea->gpu_va, p->sema_sea->map_size, | ||
205 | 0, | ||
206 | gk20a_mem_flag_read_only, | ||
207 | false); | ||
208 | if (!addr) { | ||
209 | err = -ENOMEM; | ||
210 | BUG(); | ||
211 | goto fail_unlock; | ||
212 | } | ||
213 | p->gpu_va_ro = addr; | ||
214 | p->mapped = 1; | ||
215 | |||
216 | __unlock_sema_sea(p->sema_sea); | ||
217 | |||
218 | return 0; | ||
219 | |||
220 | fail_unlock: | ||
221 | __unlock_sema_sea(p->sema_sea); | ||
222 | fail_unmap_sgt: | ||
223 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
224 | DMA_BIDIRECTIONAL); | ||
225 | fail_free_sgt: | ||
226 | sg_free_table(p->rw_sg_table); | ||
227 | fail: | ||
228 | kfree(p->rw_sg_table); | ||
229 | p->rw_sg_table = NULL; | ||
230 | return err; | ||
60 | } | 231 | } |
61 | 232 | ||
233 | /* | ||
234 | * Unmap a semaphore_pool. | ||
235 | */ | ||
236 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | ||
237 | struct vm_gk20a *vm) | ||
238 | { | ||
239 | struct gk20a_semaphore_int *hw_sema; | ||
240 | |||
241 | kunmap(p->cpu_va); | ||
242 | |||
243 | /* First the global RO mapping... */ | ||
244 | __lock_sema_sea(p->sema_sea); | ||
245 | gk20a_gmmu_unmap(vm, p->gpu_va_ro, | ||
246 | p->sema_sea->map_size, gk20a_mem_flag_none); | ||
247 | p->ro_sg_table = NULL; | ||
248 | __unlock_sema_sea(p->sema_sea); | ||
249 | |||
250 | /* And now the private RW mapping. */ | ||
251 | gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none); | ||
252 | p->gpu_va = 0; | ||
253 | |||
254 | dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1, | ||
255 | DMA_BIDIRECTIONAL); | ||
256 | |||
257 | sg_free_table(p->rw_sg_table); | ||
258 | kfree(p->rw_sg_table); | ||
259 | p->rw_sg_table = NULL; | ||
260 | |||
261 | gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx); | ||
262 | list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) | ||
263 | /* | ||
264 | * Make sure the mem addresses are all NULL so if this gets | ||
265 | * reused we will fault. | ||
266 | */ | ||
267 | hw_sema->value = NULL; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Completely free a sempahore_pool. You should make sure this pool is not | ||
272 | * mapped otherwise there's going to be a memory leak. | ||
273 | */ | ||
62 | static void gk20a_semaphore_pool_free(struct kref *ref) | 274 | static void gk20a_semaphore_pool_free(struct kref *ref) |
63 | { | 275 | { |
64 | struct gk20a_semaphore_pool *p = | 276 | struct gk20a_semaphore_pool *p = |
65 | container_of(ref, struct gk20a_semaphore_pool, ref); | 277 | container_of(ref, struct gk20a_semaphore_pool, ref); |
66 | mutex_lock(&p->maps_mutex); | 278 | struct gk20a_semaphore_sea *s = p->sema_sea; |
67 | WARN_ON(!list_empty(&p->maps)); | 279 | struct gk20a_semaphore_int *hw_sema, *tmp; |
68 | mutex_unlock(&p->maps_mutex); | 280 | |
69 | gk20a_gmmu_free(p->g, &p->mem); | 281 | WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table); |
70 | gk20a_allocator_destroy(&p->alloc); | 282 | |
283 | __lock_sema_sea(s); | ||
284 | list_del(&p->pool_list_entry); | ||
285 | clear_bit(p->page_idx, s->pools_alloced); | ||
286 | s->page_count--; | ||
287 | __unlock_sema_sea(s); | ||
288 | |||
289 | list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) | ||
290 | kfree(hw_sema); | ||
291 | |||
71 | kfree(p); | 292 | kfree(p); |
72 | } | 293 | } |
73 | 294 | ||
74 | static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) | 295 | void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) |
75 | { | 296 | { |
76 | kref_get(&p->ref); | 297 | kref_get(&p->ref); |
77 | } | 298 | } |
@@ -81,104 +302,96 @@ void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p) | |||
81 | kref_put(&p->ref, gk20a_semaphore_pool_free); | 302 | kref_put(&p->ref, gk20a_semaphore_pool_free); |
82 | } | 303 | } |
83 | 304 | ||
84 | static struct gk20a_semaphore_pool_map * | 305 | /* |
85 | gk20a_semaphore_pool_find_map_locked(struct gk20a_semaphore_pool *p, | 306 | * Get the address for a semaphore_pool - if global is true then return the |
86 | struct vm_gk20a *vm) | 307 | * global RO address instead of the RW address owned by the semaphore's VM. |
308 | */ | ||
309 | u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global) | ||
87 | { | 310 | { |
88 | struct gk20a_semaphore_pool_map *map, *found = NULL; | 311 | if (!global) |
89 | list_for_each_entry(map, &p->maps, list) { | 312 | return p->gpu_va; |
90 | if (map->vm == vm) { | 313 | |
91 | found = map; | 314 | return p->gpu_va_ro + (PAGE_SIZE * p->page_idx); |
92 | break; | ||
93 | } | ||
94 | } | ||
95 | return found; | ||
96 | } | 315 | } |
97 | 316 | ||
98 | int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | 317 | static int __gk20a_init_hw_sema(struct channel_gk20a *ch) |
99 | struct vm_gk20a *vm, | ||
100 | enum gk20a_mem_rw_flag rw_flag) | ||
101 | { | 318 | { |
102 | struct gk20a_semaphore_pool_map *map; | 319 | int hw_sema_idx; |
320 | int ret = 0; | ||
321 | struct gk20a_semaphore_int *hw_sema; | ||
322 | struct gk20a_semaphore_pool *p = ch->vm->sema_pool; | ||
103 | 323 | ||
104 | map = kzalloc(sizeof(*map), GFP_KERNEL); | 324 | BUG_ON(!p); |
105 | if (!map) | ||
106 | return -ENOMEM; | ||
107 | map->vm = vm; | ||
108 | map->rw_flag = rw_flag; | ||
109 | map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size, | ||
110 | 0/*uncached*/, rw_flag, | ||
111 | false); | ||
112 | if (!map->gpu_va) { | ||
113 | kfree(map); | ||
114 | return -ENOMEM; | ||
115 | } | ||
116 | gk20a_vm_get(vm); | ||
117 | 325 | ||
118 | mutex_lock(&p->maps_mutex); | 326 | mutex_lock(&p->pool_lock); |
119 | WARN_ON(gk20a_semaphore_pool_find_map_locked(p, vm)); | ||
120 | list_add(&map->list, &p->maps); | ||
121 | mutex_unlock(&p->maps_mutex); | ||
122 | return 0; | ||
123 | } | ||
124 | 327 | ||
125 | void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | 328 | /* Find an available HW semaphore. */ |
126 | struct vm_gk20a *vm) | 329 | hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced, |
127 | { | 330 | PAGE_SIZE / SEMAPHORE_SIZE); |
128 | struct gk20a_semaphore_pool_map *map; | 331 | if (hw_sema_idx < 0) { |
129 | WARN_ON(!vm); | 332 | ret = hw_sema_idx; |
130 | 333 | goto fail; | |
131 | mutex_lock(&p->maps_mutex); | ||
132 | map = gk20a_semaphore_pool_find_map_locked(p, vm); | ||
133 | if (map) { | ||
134 | gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag); | ||
135 | gk20a_vm_put(vm); | ||
136 | list_del(&map->list); | ||
137 | kfree(map); | ||
138 | } | 334 | } |
139 | mutex_unlock(&p->maps_mutex); | ||
140 | } | ||
141 | 335 | ||
142 | u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, | 336 | hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL); |
143 | struct vm_gk20a *vm) | 337 | if (!hw_sema) { |
144 | { | 338 | ret = -ENOMEM; |
145 | struct gk20a_semaphore_pool_map *map; | 339 | goto fail_free_idx; |
146 | u64 gpu_va = 0; | 340 | } |
147 | 341 | ||
148 | mutex_lock(&p->maps_mutex); | 342 | ch->hw_sema = hw_sema; |
149 | map = gk20a_semaphore_pool_find_map_locked(p, vm); | 343 | hw_sema->ch = ch; |
150 | if (map) | 344 | hw_sema->p = p; |
151 | gpu_va = map->gpu_va; | 345 | hw_sema->idx = hw_sema_idx; |
152 | mutex_unlock(&p->maps_mutex); | 346 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; |
347 | atomic_set(&hw_sema->next_value, 0); | ||
348 | hw_sema->value = p->cpu_va + hw_sema->offset; | ||
349 | writel(0, hw_sema->value); | ||
153 | 350 | ||
154 | return gpu_va; | 351 | list_add(&hw_sema->hw_sema_list, &p->hw_semas); |
352 | |||
353 | mutex_unlock(&p->pool_lock); | ||
354 | |||
355 | return 0; | ||
356 | |||
357 | fail_free_idx: | ||
358 | clear_bit(hw_sema_idx, p->semas_alloced); | ||
359 | fail: | ||
360 | mutex_unlock(&p->pool_lock); | ||
361 | return ret; | ||
155 | } | 362 | } |
156 | 363 | ||
157 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) | 364 | /* |
365 | * Allocate a semaphore from the passed pool. | ||
366 | * | ||
367 | * Since semaphores are ref-counted there's no explicit free for external code | ||
368 | * to use. When the ref-count hits 0 the internal free will happen. | ||
369 | */ | ||
370 | struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch) | ||
158 | { | 371 | { |
159 | struct gk20a_semaphore *s; | 372 | struct gk20a_semaphore *s; |
373 | int ret; | ||
374 | |||
375 | if (!ch->hw_sema) { | ||
376 | ret = __gk20a_init_hw_sema(ch); | ||
377 | if (ret) | ||
378 | return ERR_PTR(ret); | ||
379 | } | ||
160 | 380 | ||
161 | s = kzalloc(sizeof(*s), GFP_KERNEL); | 381 | s = kzalloc(sizeof(*s), GFP_KERNEL); |
162 | if (!s) | 382 | if (!s) |
163 | return NULL; | 383 | return NULL; |
164 | 384 | ||
165 | s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); | 385 | kref_init(&s->ref); |
166 | if (!s->offset) { | 386 | s->hw_sema = ch->hw_sema; |
167 | gk20a_err(dev_from_gk20a(pool->g), | 387 | atomic_set(&s->value, 0); |
168 | "failed to allocate semaphore"); | ||
169 | kfree(s); | ||
170 | return NULL; | ||
171 | } | ||
172 | 388 | ||
173 | gk20a_semaphore_pool_get(pool); | 389 | /* |
174 | s->pool = pool; | 390 | * Take a ref on the pool so that we can keep this pool alive for |
391 | * as long as this semaphore is alive. | ||
392 | */ | ||
393 | gk20a_semaphore_pool_get(s->hw_sema->p); | ||
175 | 394 | ||
176 | kref_init(&s->ref); | ||
177 | /* Initially acquired. */ | ||
178 | gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0); | ||
179 | gk20a_dbg_info("created semaphore offset=%d, value=%d", | ||
180 | s->offset, | ||
181 | gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset)); | ||
182 | return s; | 395 | return s; |
183 | } | 396 | } |
184 | 397 | ||
@@ -187,8 +400,8 @@ static void gk20a_semaphore_free(struct kref *ref) | |||
187 | struct gk20a_semaphore *s = | 400 | struct gk20a_semaphore *s = |
188 | container_of(ref, struct gk20a_semaphore, ref); | 401 | container_of(ref, struct gk20a_semaphore, ref); |
189 | 402 | ||
190 | gk20a_bfree(&s->pool->alloc, s->offset); | 403 | gk20a_semaphore_pool_put(s->hw_sema->p); |
191 | gk20a_semaphore_pool_put(s->pool); | 404 | |
192 | kfree(s); | 405 | kfree(s); |
193 | } | 406 | } |
194 | 407 | ||