summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c435
1 files changed, 324 insertions, 111 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 3b17bfcb..aa375b24 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -15,63 +15,284 @@
15 * more details. 15 * more details.
16 */ 16 */
17 17
18#include "semaphore_gk20a.h" 18#define pr_fmt(fmt) "gpu_sema: " fmt
19
19#include <linux/dma-mapping.h> 20#include <linux/dma-mapping.h>
21#include <linux/highmem.h>
20#include <linux/slab.h> 22#include <linux/slab.h>
23
24#include <asm/pgtable.h>
25
21#include "gk20a.h" 26#include "gk20a.h"
22#include "mm_gk20a.h" 27#include "mm_gk20a.h"
28#include "semaphore_gk20a.h"
29
30#define __lock_sema_sea(s) \
31 do { \
32 mutex_lock(&s->sea_lock); \
33 } while (0)
23 34
24static const int SEMAPHORE_SIZE = 16; 35#define __unlock_sema_sea(s) \
36 do { \
37 mutex_unlock(&s->sea_lock); \
38 } while (0)
25 39
26struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g, 40/*
27 const char *unique_name, size_t capacity) 41 * Return the sema_sea pointer.
42 */
43struct gk20a_semaphore_sea *gk20a_semaphore_get_sea(struct gk20a *g)
44{
45 return g->sema_sea;
46}
47
48static int __gk20a_semaphore_sea_grow(struct gk20a_semaphore_sea *sea)
49{
50 int ret = 0;
51 struct gk20a *gk20a = sea->gk20a;
52
53 __lock_sema_sea(sea);
54
55 ret = gk20a_gmmu_alloc_attr(gk20a, DMA_ATTR_NO_KERNEL_MAPPING,
56 PAGE_SIZE * SEMAPHORE_POOL_COUNT,
57 &sea->sea_mem);
58 if (ret)
59 goto out;
60
61 sea->ro_sg_table = sea->sea_mem.sgt;
62 sea->size = SEMAPHORE_POOL_COUNT;
63 sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
64
65out:
66 __unlock_sema_sea(sea);
67 return ret;
68}
69
70/*
71 * Create the semaphore sea. Only create it once - subsequent calls to this will
72 * return the originally created sea pointer.
73 */
74struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g)
75{
76 if (g->sema_sea)
77 return g->sema_sea;
78
79 g->sema_sea = kzalloc(sizeof(*g->sema_sea), GFP_KERNEL);
80 if (!g->sema_sea)
81 return NULL;
82
83 g->sema_sea->size = 0;
84 g->sema_sea->page_count = 0;
85 g->sema_sea->gk20a = g;
86 INIT_LIST_HEAD(&g->sema_sea->pool_list);
87 mutex_init(&g->sema_sea->sea_lock);
88
89 if (__gk20a_semaphore_sea_grow(g->sema_sea))
90 goto cleanup;
91
92 return g->sema_sea;
93
94cleanup:
95 kfree(g->sema_sea);
96 g->sema_sea = NULL;
97 return NULL;
98}
99
100static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
101{
102 unsigned long idx = find_first_zero_bit(bitmap, len);
103
104 if (idx == len)
105 return -ENOSPC;
106
107 set_bit(idx, bitmap);
108
109 return (int)idx;
110}
111
112/*
113 * Allocate a pool from the sea.
114 */
115struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(
116 struct gk20a_semaphore_sea *sea)
28{ 117{
29 struct gk20a_semaphore_pool *p; 118 struct gk20a_semaphore_pool *p;
119 unsigned long page_idx;
120 int err = 0;
121
30 p = kzalloc(sizeof(*p), GFP_KERNEL); 122 p = kzalloc(sizeof(*p), GFP_KERNEL);
31 if (!p) 123 if (!p)
32 return NULL; 124 return ERR_PTR(-ENOMEM);
125
126 __lock_sema_sea(sea);
127
128 page_idx = __semaphore_bitmap_alloc(sea->pools_alloced,
129 SEMAPHORE_POOL_COUNT);
130 if (page_idx < 0) {
131 err = page_idx;
132 goto fail;
133 }
33 134
135 p->page = sea->sea_mem.pages[page_idx];
136 p->ro_sg_table = sea->ro_sg_table;
137 p->page_idx = page_idx;
138 p->sema_sea = sea;
139 INIT_LIST_HEAD(&p->hw_semas);
34 kref_init(&p->ref); 140 kref_init(&p->ref);
35 INIT_LIST_HEAD(&p->maps); 141 mutex_init(&p->pool_lock);
36 mutex_init(&p->maps_mutex); 142
37 p->g = g; 143 sea->page_count++;
38 144 list_add(&p->pool_list_entry, &sea->pool_list);
39 /* Alloc one 4k page of semaphore per channel. */ 145 __unlock_sema_sea(sea);
40 if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE), 146
41 &p->mem))
42 goto clean_up;
43
44 /* Sacrifice one semaphore in the name of returning error codes. */
45 if (gk20a_allocator_init(&p->alloc, unique_name,
46 SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE,
47 SEMAPHORE_SIZE))
48 goto clean_up;
49
50 gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va,
51 (u64)sg_dma_address(p->mem.sgt->sgl),
52 (u64)sg_phys(p->mem.sgt->sgl));
53 return p; 147 return p;
54 148
55clean_up: 149fail:
56 if (p->mem.size) 150 __unlock_sema_sea(sea);
57 gk20a_gmmu_free(p->g, &p->mem);
58 kfree(p); 151 kfree(p);
59 return NULL; 152 return ERR_PTR(err);
153}
154
155/*
156 * Map a pool into the passed vm's address space. This handles both the fixed
157 * global RO mapping and the non-fixed private RW mapping.
158 */
159int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
160 struct vm_gk20a *vm)
161{
162 int ents, err = 0;
163 u64 addr;
164
165 p->cpu_va = vmap(&p->page, 1, 0,
166 pgprot_writecombine(PAGE_KERNEL));
167
168 /* First do the RW mapping. */
169 p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL);
170 if (!p->rw_sg_table)
171 return -ENOMEM;
172
173 err = sg_alloc_table_from_pages(p->rw_sg_table, &p->page, 1, 0,
174 PAGE_SIZE, GFP_KERNEL);
175 if (err) {
176 err = -ENOMEM;
177 goto fail;
178 }
179
180 /* Add IOMMU mapping... */
181 ents = dma_map_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
182 DMA_BIDIRECTIONAL);
183 if (ents != 1) {
184 err = -ENOMEM;
185 goto fail_free_sgt;
186 }
187
188 /* Map into the GPU... Doesn't need to be fixed. */
189 p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE,
190 0, gk20a_mem_flag_none, false);
191 if (!p->gpu_va) {
192 err = -ENOMEM;
193 goto fail_unmap_sgt;
194 }
195
196 /*
197 * And now the global mapping. Take the sea lock so that we don't race
198 * with a concurrent remap.
199 */
200 __lock_sema_sea(p->sema_sea);
201
202 BUG_ON(p->mapped);
203 addr = gk20a_gmmu_fixed_map(vm, &p->sema_sea->ro_sg_table,
204 p->sema_sea->gpu_va, p->sema_sea->map_size,
205 0,
206 gk20a_mem_flag_read_only,
207 false);
208 if (!addr) {
209 err = -ENOMEM;
210 BUG();
211 goto fail_unlock;
212 }
213 p->gpu_va_ro = addr;
214 p->mapped = 1;
215
216 __unlock_sema_sea(p->sema_sea);
217
218 return 0;
219
220fail_unlock:
221 __unlock_sema_sea(p->sema_sea);
222fail_unmap_sgt:
223 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
224 DMA_BIDIRECTIONAL);
225fail_free_sgt:
226 sg_free_table(p->rw_sg_table);
227fail:
228 kfree(p->rw_sg_table);
229 p->rw_sg_table = NULL;
230 return err;
60} 231}
61 232
233/*
234 * Unmap a semaphore_pool.
235 */
236void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
237 struct vm_gk20a *vm)
238{
239 struct gk20a_semaphore_int *hw_sema;
240
241 kunmap(p->cpu_va);
242
243 /* First the global RO mapping... */
244 __lock_sema_sea(p->sema_sea);
245 gk20a_gmmu_unmap(vm, p->gpu_va_ro,
246 p->sema_sea->map_size, gk20a_mem_flag_none);
247 p->ro_sg_table = NULL;
248 __unlock_sema_sea(p->sema_sea);
249
250 /* And now the private RW mapping. */
251 gk20a_gmmu_unmap(vm, p->gpu_va, PAGE_SIZE, gk20a_mem_flag_none);
252 p->gpu_va = 0;
253
254 dma_unmap_sg(dev_from_vm(vm), p->rw_sg_table->sgl, 1,
255 DMA_BIDIRECTIONAL);
256
257 sg_free_table(p->rw_sg_table);
258 kfree(p->rw_sg_table);
259 p->rw_sg_table = NULL;
260
261 gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx);
262 list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list)
263 /*
264 * Make sure the mem addresses are all NULL so if this gets
265 * reused we will fault.
266 */
267 hw_sema->value = NULL;
268}
269
270/*
271 * Completely free a sempahore_pool. You should make sure this pool is not
272 * mapped otherwise there's going to be a memory leak.
273 */
62static void gk20a_semaphore_pool_free(struct kref *ref) 274static void gk20a_semaphore_pool_free(struct kref *ref)
63{ 275{
64 struct gk20a_semaphore_pool *p = 276 struct gk20a_semaphore_pool *p =
65 container_of(ref, struct gk20a_semaphore_pool, ref); 277 container_of(ref, struct gk20a_semaphore_pool, ref);
66 mutex_lock(&p->maps_mutex); 278 struct gk20a_semaphore_sea *s = p->sema_sea;
67 WARN_ON(!list_empty(&p->maps)); 279 struct gk20a_semaphore_int *hw_sema, *tmp;
68 mutex_unlock(&p->maps_mutex); 280
69 gk20a_gmmu_free(p->g, &p->mem); 281 WARN_ON(p->gpu_va || p->rw_sg_table || p->ro_sg_table);
70 gk20a_allocator_destroy(&p->alloc); 282
283 __lock_sema_sea(s);
284 list_del(&p->pool_list_entry);
285 clear_bit(p->page_idx, s->pools_alloced);
286 s->page_count--;
287 __unlock_sema_sea(s);
288
289 list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list)
290 kfree(hw_sema);
291
71 kfree(p); 292 kfree(p);
72} 293}
73 294
74static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) 295void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p)
75{ 296{
76 kref_get(&p->ref); 297 kref_get(&p->ref);
77} 298}
@@ -81,104 +302,96 @@ void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p)
81 kref_put(&p->ref, gk20a_semaphore_pool_free); 302 kref_put(&p->ref, gk20a_semaphore_pool_free);
82} 303}
83 304
84static struct gk20a_semaphore_pool_map * 305/*
85gk20a_semaphore_pool_find_map_locked(struct gk20a_semaphore_pool *p, 306 * Get the address for a semaphore_pool - if global is true then return the
86 struct vm_gk20a *vm) 307 * global RO address instead of the RW address owned by the semaphore's VM.
308 */
309u64 __gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, bool global)
87{ 310{
88 struct gk20a_semaphore_pool_map *map, *found = NULL; 311 if (!global)
89 list_for_each_entry(map, &p->maps, list) { 312 return p->gpu_va;
90 if (map->vm == vm) { 313
91 found = map; 314 return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
92 break;
93 }
94 }
95 return found;
96} 315}
97 316
98int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, 317static int __gk20a_init_hw_sema(struct channel_gk20a *ch)
99 struct vm_gk20a *vm,
100 enum gk20a_mem_rw_flag rw_flag)
101{ 318{
102 struct gk20a_semaphore_pool_map *map; 319 int hw_sema_idx;
320 int ret = 0;
321 struct gk20a_semaphore_int *hw_sema;
322 struct gk20a_semaphore_pool *p = ch->vm->sema_pool;
103 323
104 map = kzalloc(sizeof(*map), GFP_KERNEL); 324 BUG_ON(!p);
105 if (!map)
106 return -ENOMEM;
107 map->vm = vm;
108 map->rw_flag = rw_flag;
109 map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size,
110 0/*uncached*/, rw_flag,
111 false);
112 if (!map->gpu_va) {
113 kfree(map);
114 return -ENOMEM;
115 }
116 gk20a_vm_get(vm);
117 325
118 mutex_lock(&p->maps_mutex); 326 mutex_lock(&p->pool_lock);
119 WARN_ON(gk20a_semaphore_pool_find_map_locked(p, vm));
120 list_add(&map->list, &p->maps);
121 mutex_unlock(&p->maps_mutex);
122 return 0;
123}
124 327
125void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, 328 /* Find an available HW semaphore. */
126 struct vm_gk20a *vm) 329 hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
127{ 330 PAGE_SIZE / SEMAPHORE_SIZE);
128 struct gk20a_semaphore_pool_map *map; 331 if (hw_sema_idx < 0) {
129 WARN_ON(!vm); 332 ret = hw_sema_idx;
130 333 goto fail;
131 mutex_lock(&p->maps_mutex);
132 map = gk20a_semaphore_pool_find_map_locked(p, vm);
133 if (map) {
134 gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag);
135 gk20a_vm_put(vm);
136 list_del(&map->list);
137 kfree(map);
138 } 334 }
139 mutex_unlock(&p->maps_mutex);
140}
141 335
142u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, 336 hw_sema = kzalloc(sizeof(struct gk20a_semaphore_int), GFP_KERNEL);
143 struct vm_gk20a *vm) 337 if (!hw_sema) {
144{ 338 ret = -ENOMEM;
145 struct gk20a_semaphore_pool_map *map; 339 goto fail_free_idx;
146 u64 gpu_va = 0; 340 }
147 341
148 mutex_lock(&p->maps_mutex); 342 ch->hw_sema = hw_sema;
149 map = gk20a_semaphore_pool_find_map_locked(p, vm); 343 hw_sema->ch = ch;
150 if (map) 344 hw_sema->p = p;
151 gpu_va = map->gpu_va; 345 hw_sema->idx = hw_sema_idx;
152 mutex_unlock(&p->maps_mutex); 346 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
347 atomic_set(&hw_sema->next_value, 0);
348 hw_sema->value = p->cpu_va + hw_sema->offset;
349 writel(0, hw_sema->value);
153 350
154 return gpu_va; 351 list_add(&hw_sema->hw_sema_list, &p->hw_semas);
352
353 mutex_unlock(&p->pool_lock);
354
355 return 0;
356
357fail_free_idx:
358 clear_bit(hw_sema_idx, p->semas_alloced);
359fail:
360 mutex_unlock(&p->pool_lock);
361 return ret;
155} 362}
156 363
157struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) 364/*
365 * Allocate a semaphore from the passed pool.
366 *
367 * Since semaphores are ref-counted there's no explicit free for external code
368 * to use. When the ref-count hits 0 the internal free will happen.
369 */
370struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch)
158{ 371{
159 struct gk20a_semaphore *s; 372 struct gk20a_semaphore *s;
373 int ret;
374
375 if (!ch->hw_sema) {
376 ret = __gk20a_init_hw_sema(ch);
377 if (ret)
378 return ERR_PTR(ret);
379 }
160 380
161 s = kzalloc(sizeof(*s), GFP_KERNEL); 381 s = kzalloc(sizeof(*s), GFP_KERNEL);
162 if (!s) 382 if (!s)
163 return NULL; 383 return NULL;
164 384
165 s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); 385 kref_init(&s->ref);
166 if (!s->offset) { 386 s->hw_sema = ch->hw_sema;
167 gk20a_err(dev_from_gk20a(pool->g), 387 atomic_set(&s->value, 0);
168 "failed to allocate semaphore");
169 kfree(s);
170 return NULL;
171 }
172 388
173 gk20a_semaphore_pool_get(pool); 389 /*
174 s->pool = pool; 390 * Take a ref on the pool so that we can keep this pool alive for
391 * as long as this semaphore is alive.
392 */
393 gk20a_semaphore_pool_get(s->hw_sema->p);
175 394
176 kref_init(&s->ref);
177 /* Initially acquired. */
178 gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0);
179 gk20a_dbg_info("created semaphore offset=%d, value=%d",
180 s->offset,
181 gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset));
182 return s; 395 return s;
183} 396}
184 397
@@ -187,8 +400,8 @@ static void gk20a_semaphore_free(struct kref *ref)
187 struct gk20a_semaphore *s = 400 struct gk20a_semaphore *s =
188 container_of(ref, struct gk20a_semaphore, ref); 401 container_of(ref, struct gk20a_semaphore, ref);
189 402
190 gk20a_bfree(&s->pool->alloc, s->offset); 403 gk20a_semaphore_pool_put(s->hw_sema->p);
191 gk20a_semaphore_pool_put(s->pool); 404
192 kfree(s); 405 kfree(s);
193} 406}
194 407