aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-12-14 19:43:27 -0500
committerDave Airlie <airlied@redhat.com>2015-12-14 19:43:27 -0500
commit21de54b3c4d08d2b20e80876c6def0b421dfec2e (patch)
tree3ff31275e6b4acdd57a2d05eb2499a3941fb3b72
parent870a171814da2b3230edbbfbb4b2fa1c4abb5413 (diff)
parent214613656b5179f0daab6e0a080814b5100d45f0 (diff)
Merge tag 'drm-vc4-next-2015-12-11' of http://github.com/anholt/linux into drm-next
This pull request brings in 3D acceleration support for the VC4 GPU. While there is still performance work to be done (particularly surrounding RCL generation), the CL submit ABI should be settled and done now. * tag 'drm-vc4-next-2015-12-11' of http://github.com/anholt/linux: drm/vc4: Add an interface for capturing the GPU state after a hang. drm/vc4: Add support for async pageflips. drm/vc4: Add support for drawing 3D frames. drm/vc4: Bind and initialize the V3D engine. drm/vc4: Fix a typo in a V3D debug register. drm/vc4: Add an API for creating GPU shaders in GEM BOs. drm/vc4: Add create and map BO ioctls. drm/vc4: Add a BO cache. drm: Create a driver hook for allocating GEM object structs.
-rw-r--r--drivers/gpu/drm/drm_gem_cma_helper.c10
-rw-r--r--drivers/gpu/drm/vc4/Makefile11
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c517
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c99
-rw-r--r--drivers/gpu/drm/vc4/vc4_debugfs.c3
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c36
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h318
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c867
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c210
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c149
-rw-r--r--drivers/gpu/drm/vc4/vc4_packet.h399
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c40
-rw-r--r--drivers/gpu/drm/vc4/vc4_qpu_defines.h264
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h2
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c634
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace.h63
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace_points.c14
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c262
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c900
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c513
-rw-r--r--include/drm/drmP.h7
-rw-r--r--include/uapi/drm/Kbuild1
-rw-r--r--include/uapi/drm/vc4_drm.h279
23 files changed, 5577 insertions, 21 deletions
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e109b49cd25d..0f7b00ba57da 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -59,11 +59,13 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
59 struct drm_gem_object *gem_obj; 59 struct drm_gem_object *gem_obj;
60 int ret; 60 int ret;
61 61
62 cma_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL); 62 if (drm->driver->gem_create_object)
63 if (!cma_obj) 63 gem_obj = drm->driver->gem_create_object(drm, size);
64 else
65 gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
66 if (!gem_obj)
64 return ERR_PTR(-ENOMEM); 67 return ERR_PTR(-ENOMEM);
65 68 cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
66 gem_obj = &cma_obj->base;
67 69
68 ret = drm_gem_object_init(drm, gem_obj, size); 70 ret = drm_gem_object_init(drm, gem_obj, size);
69 if (ret) 71 if (ret)
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 32b4f9cd8f52..4c6a99f0398c 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,10 +8,19 @@ vc4-y := \
8 vc4_crtc.o \ 8 vc4_crtc.o \
9 vc4_drv.o \ 9 vc4_drv.o \
10 vc4_kms.o \ 10 vc4_kms.o \
11 vc4_gem.o \
11 vc4_hdmi.o \ 12 vc4_hdmi.o \
12 vc4_hvs.o \ 13 vc4_hvs.o \
13 vc4_plane.o 14 vc4_irq.o \
15 vc4_plane.o \
16 vc4_render_cl.o \
17 vc4_trace_points.o \
18 vc4_v3d.o \
19 vc4_validate.o \
20 vc4_validate_shaders.o
14 21
15vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o 22vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
16 23
17obj-$(CONFIG_DRM_VC4) += vc4.o 24obj-$(CONFIG_DRM_VC4) += vc4.o
25
26CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index ab9f5108ae1a..18dfe3ec9a62 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -12,19 +12,236 @@
12 * access to system memory with no MMU in between. To support it, we 12 * access to system memory with no MMU in between. To support it, we
13 * use the GEM CMA helper functions to allocate contiguous ranges of 13 * use the GEM CMA helper functions to allocate contiguous ranges of
14 * physical memory for our BOs. 14 * physical memory for our BOs.
15 *
16 * Since the CMA allocator is very slow, we keep a cache of recently
17 * freed BOs around so that the kernel's allocation of objects for 3D
18 * rendering can return quickly.
15 */ 19 */
16 20
17#include "vc4_drv.h" 21#include "vc4_drv.h"
22#include "uapi/drm/vc4_drm.h"
23
24static void vc4_bo_stats_dump(struct vc4_dev *vc4)
25{
26 DRM_INFO("num bos allocated: %d\n",
27 vc4->bo_stats.num_allocated);
28 DRM_INFO("size bos allocated: %dkb\n",
29 vc4->bo_stats.size_allocated / 1024);
30 DRM_INFO("num bos used: %d\n",
31 vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
32 DRM_INFO("size bos used: %dkb\n",
33 (vc4->bo_stats.size_allocated -
34 vc4->bo_stats.size_cached) / 1024);
35 DRM_INFO("num bos cached: %d\n",
36 vc4->bo_stats.num_cached);
37 DRM_INFO("size bos cached: %dkb\n",
38 vc4->bo_stats.size_cached / 1024);
39}
40
41#ifdef CONFIG_DEBUG_FS
42int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
43{
44 struct drm_info_node *node = (struct drm_info_node *)m->private;
45 struct drm_device *dev = node->minor->dev;
46 struct vc4_dev *vc4 = to_vc4_dev(dev);
47 struct vc4_bo_stats stats;
48
49 /* Take a snapshot of the current stats with the lock held. */
50 mutex_lock(&vc4->bo_lock);
51 stats = vc4->bo_stats;
52 mutex_unlock(&vc4->bo_lock);
53
54 seq_printf(m, "num bos allocated: %d\n",
55 stats.num_allocated);
56 seq_printf(m, "size bos allocated: %dkb\n",
57 stats.size_allocated / 1024);
58 seq_printf(m, "num bos used: %d\n",
59 stats.num_allocated - stats.num_cached);
60 seq_printf(m, "size bos used: %dkb\n",
61 (stats.size_allocated - stats.size_cached) / 1024);
62 seq_printf(m, "num bos cached: %d\n",
63 stats.num_cached);
64 seq_printf(m, "size bos cached: %dkb\n",
65 stats.size_cached / 1024);
66
67 return 0;
68}
69#endif
70
71static uint32_t bo_page_index(size_t size)
72{
73 return (size / PAGE_SIZE) - 1;
74}
75
76/* Must be called with bo_lock held. */
77static void vc4_bo_destroy(struct vc4_bo *bo)
78{
79 struct drm_gem_object *obj = &bo->base.base;
80 struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
81
82 if (bo->validated_shader) {
83 kfree(bo->validated_shader->texture_samples);
84 kfree(bo->validated_shader);
85 bo->validated_shader = NULL;
86 }
87
88 vc4->bo_stats.num_allocated--;
89 vc4->bo_stats.size_allocated -= obj->size;
90 drm_gem_cma_free_object(obj);
91}
92
93/* Must be called with bo_lock held. */
94static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
95{
96 struct drm_gem_object *obj = &bo->base.base;
97 struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
98
99 vc4->bo_stats.num_cached--;
100 vc4->bo_stats.size_cached -= obj->size;
101
102 list_del(&bo->unref_head);
103 list_del(&bo->size_head);
104}
105
106static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
107 size_t size)
108{
109 struct vc4_dev *vc4 = to_vc4_dev(dev);
110 uint32_t page_index = bo_page_index(size);
111
112 if (vc4->bo_cache.size_list_size <= page_index) {
113 uint32_t new_size = max(vc4->bo_cache.size_list_size * 2,
114 page_index + 1);
115 struct list_head *new_list;
116 uint32_t i;
117
118 new_list = kmalloc_array(new_size, sizeof(struct list_head),
119 GFP_KERNEL);
120 if (!new_list)
121 return NULL;
122
123 /* Rebase the old cached BO lists to their new list
124 * head locations.
125 */
126 for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
127 struct list_head *old_list =
128 &vc4->bo_cache.size_list[i];
129
130 if (list_empty(old_list))
131 INIT_LIST_HEAD(&new_list[i]);
132 else
133 list_replace(old_list, &new_list[i]);
134 }
135 /* And initialize the brand new BO list heads. */
136 for (i = vc4->bo_cache.size_list_size; i < new_size; i++)
137 INIT_LIST_HEAD(&new_list[i]);
138
139 kfree(vc4->bo_cache.size_list);
140 vc4->bo_cache.size_list = new_list;
141 vc4->bo_cache.size_list_size = new_size;
142 }
143
144 return &vc4->bo_cache.size_list[page_index];
145}
146
147void vc4_bo_cache_purge(struct drm_device *dev)
148{
149 struct vc4_dev *vc4 = to_vc4_dev(dev);
150
151 mutex_lock(&vc4->bo_lock);
152 while (!list_empty(&vc4->bo_cache.time_list)) {
153 struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
154 struct vc4_bo, unref_head);
155 vc4_bo_remove_from_cache(bo);
156 vc4_bo_destroy(bo);
157 }
158 mutex_unlock(&vc4->bo_lock);
159}
160
161static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
162 uint32_t size)
163{
164 struct vc4_dev *vc4 = to_vc4_dev(dev);
165 uint32_t page_index = bo_page_index(size);
166 struct vc4_bo *bo = NULL;
167
168 size = roundup(size, PAGE_SIZE);
169
170 mutex_lock(&vc4->bo_lock);
171 if (page_index >= vc4->bo_cache.size_list_size)
172 goto out;
18 173
19struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size) 174 if (list_empty(&vc4->bo_cache.size_list[page_index]))
175 goto out;
176
177 bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
178 struct vc4_bo, size_head);
179 vc4_bo_remove_from_cache(bo);
180 kref_init(&bo->base.base.refcount);
181
182out:
183 mutex_unlock(&vc4->bo_lock);
184 return bo;
185}
186
187/**
188 * vc4_gem_create_object - Implementation of driver->gem_create_object.
189 *
190 * This lets the CMA helpers allocate object structs for us, and keep
191 * our BO stats correct.
192 */
193struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
20{ 194{
195 struct vc4_dev *vc4 = to_vc4_dev(dev);
196 struct vc4_bo *bo;
197
198 bo = kzalloc(sizeof(*bo), GFP_KERNEL);
199 if (!bo)
200 return ERR_PTR(-ENOMEM);
201
202 mutex_lock(&vc4->bo_lock);
203 vc4->bo_stats.num_allocated++;
204 vc4->bo_stats.size_allocated += size;
205 mutex_unlock(&vc4->bo_lock);
206
207 return &bo->base.base;
208}
209
210struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
211 bool from_cache)
212{
213 size_t size = roundup(unaligned_size, PAGE_SIZE);
214 struct vc4_dev *vc4 = to_vc4_dev(dev);
21 struct drm_gem_cma_object *cma_obj; 215 struct drm_gem_cma_object *cma_obj;
22 216
23 cma_obj = drm_gem_cma_create(dev, size); 217 if (size == 0)
24 if (IS_ERR(cma_obj))
25 return NULL; 218 return NULL;
26 else 219
27 return to_vc4_bo(&cma_obj->base); 220 /* First, try to get a vc4_bo from the kernel BO cache. */
221 if (from_cache) {
222 struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
223
224 if (bo)
225 return bo;
226 }
227
228 cma_obj = drm_gem_cma_create(dev, size);
229 if (IS_ERR(cma_obj)) {
230 /*
231 * If we've run out of CMA memory, kill the cache of
232 * CMA allocations we've got laying around and try again.
233 */
234 vc4_bo_cache_purge(dev);
235
236 cma_obj = drm_gem_cma_create(dev, size);
237 if (IS_ERR(cma_obj)) {
238 DRM_ERROR("Failed to allocate from CMA:\n");
239 vc4_bo_stats_dump(vc4);
240 return NULL;
241 }
242 }
243
244 return to_vc4_bo(&cma_obj->base);
28} 245}
29 246
30int vc4_dumb_create(struct drm_file *file_priv, 247int vc4_dumb_create(struct drm_file *file_priv,
@@ -41,7 +258,191 @@ int vc4_dumb_create(struct drm_file *file_priv,
41 if (args->size < args->pitch * args->height) 258 if (args->size < args->pitch * args->height)
42 args->size = args->pitch * args->height; 259 args->size = args->pitch * args->height;
43 260
44 bo = vc4_bo_create(dev, roundup(args->size, PAGE_SIZE)); 261 bo = vc4_bo_create(dev, args->size, false);
262 if (!bo)
263 return -ENOMEM;
264
265 ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
266 drm_gem_object_unreference_unlocked(&bo->base.base);
267
268 return ret;
269}
270
271/* Must be called with bo_lock held. */
272static void vc4_bo_cache_free_old(struct drm_device *dev)
273{
274 struct vc4_dev *vc4 = to_vc4_dev(dev);
275 unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
276
277 while (!list_empty(&vc4->bo_cache.time_list)) {
278 struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
279 struct vc4_bo, unref_head);
280 if (time_before(expire_time, bo->free_time)) {
281 mod_timer(&vc4->bo_cache.time_timer,
282 round_jiffies_up(jiffies +
283 msecs_to_jiffies(1000)));
284 return;
285 }
286
287 vc4_bo_remove_from_cache(bo);
288 vc4_bo_destroy(bo);
289 }
290}
291
292/* Called on the last userspace/kernel unreference of the BO. Returns
293 * it to the BO cache if possible, otherwise frees it.
294 *
295 * Note that this is called with the struct_mutex held.
296 */
297void vc4_free_object(struct drm_gem_object *gem_bo)
298{
299 struct drm_device *dev = gem_bo->dev;
300 struct vc4_dev *vc4 = to_vc4_dev(dev);
301 struct vc4_bo *bo = to_vc4_bo(gem_bo);
302 struct list_head *cache_list;
303
304 mutex_lock(&vc4->bo_lock);
305 /* If the object references someone else's memory, we can't cache it.
306 */
307 if (gem_bo->import_attach) {
308 vc4_bo_destroy(bo);
309 goto out;
310 }
311
312 /* Don't cache if it was publicly named. */
313 if (gem_bo->name) {
314 vc4_bo_destroy(bo);
315 goto out;
316 }
317
318 cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size);
319 if (!cache_list) {
320 vc4_bo_destroy(bo);
321 goto out;
322 }
323
324 if (bo->validated_shader) {
325 kfree(bo->validated_shader->texture_samples);
326 kfree(bo->validated_shader);
327 bo->validated_shader = NULL;
328 }
329
330 bo->free_time = jiffies;
331 list_add(&bo->size_head, cache_list);
332 list_add(&bo->unref_head, &vc4->bo_cache.time_list);
333
334 vc4->bo_stats.num_cached++;
335 vc4->bo_stats.size_cached += gem_bo->size;
336
337 vc4_bo_cache_free_old(dev);
338
339out:
340 mutex_unlock(&vc4->bo_lock);
341}
342
343static void vc4_bo_cache_time_work(struct work_struct *work)
344{
345 struct vc4_dev *vc4 =
346 container_of(work, struct vc4_dev, bo_cache.time_work);
347 struct drm_device *dev = vc4->dev;
348
349 mutex_lock(&vc4->bo_lock);
350 vc4_bo_cache_free_old(dev);
351 mutex_unlock(&vc4->bo_lock);
352}
353
354static void vc4_bo_cache_time_timer(unsigned long data)
355{
356 struct drm_device *dev = (struct drm_device *)data;
357 struct vc4_dev *vc4 = to_vc4_dev(dev);
358
359 schedule_work(&vc4->bo_cache.time_work);
360}
361
362struct dma_buf *
363vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
364{
365 struct vc4_bo *bo = to_vc4_bo(obj);
366
367 if (bo->validated_shader) {
368 DRM_ERROR("Attempting to export shader BO\n");
369 return ERR_PTR(-EINVAL);
370 }
371
372 return drm_gem_prime_export(dev, obj, flags);
373}
374
375int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
376{
377 struct drm_gem_object *gem_obj;
378 struct vc4_bo *bo;
379 int ret;
380
381 ret = drm_gem_mmap(filp, vma);
382 if (ret)
383 return ret;
384
385 gem_obj = vma->vm_private_data;
386 bo = to_vc4_bo(gem_obj);
387
388 if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
389 DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
390 return -EINVAL;
391 }
392
393 /*
394 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
395 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
396 * the whole buffer.
397 */
398 vma->vm_flags &= ~VM_PFNMAP;
399 vma->vm_pgoff = 0;
400
401 ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
402 bo->base.vaddr, bo->base.paddr,
403 vma->vm_end - vma->vm_start);
404 if (ret)
405 drm_gem_vm_close(vma);
406
407 return ret;
408}
409
410int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
411{
412 struct vc4_bo *bo = to_vc4_bo(obj);
413
414 if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
415 DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
416 return -EINVAL;
417 }
418
419 return drm_gem_cma_prime_mmap(obj, vma);
420}
421
422void *vc4_prime_vmap(struct drm_gem_object *obj)
423{
424 struct vc4_bo *bo = to_vc4_bo(obj);
425
426 if (bo->validated_shader) {
427 DRM_ERROR("mmaping of shader BOs not allowed.\n");
428 return ERR_PTR(-EINVAL);
429 }
430
431 return drm_gem_cma_prime_vmap(obj);
432}
433
434int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
435 struct drm_file *file_priv)
436{
437 struct drm_vc4_create_bo *args = data;
438 struct vc4_bo *bo = NULL;
439 int ret;
440
441 /*
442 * We can't allocate from the BO cache, because the BOs don't
443 * get zeroed, and that might leak data between users.
444 */
445 bo = vc4_bo_create(dev, args->size, false);
45 if (!bo) 446 if (!bo)
46 return -ENOMEM; 447 return -ENOMEM;
47 448
@@ -50,3 +451,107 @@ int vc4_dumb_create(struct drm_file *file_priv,
50 451
51 return ret; 452 return ret;
52} 453}
454
455int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
456 struct drm_file *file_priv)
457{
458 struct drm_vc4_mmap_bo *args = data;
459 struct drm_gem_object *gem_obj;
460
461 gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
462 if (!gem_obj) {
463 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
464 return -EINVAL;
465 }
466
467 /* The mmap offset was set up at BO allocation time. */
468 args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
469
470 drm_gem_object_unreference_unlocked(gem_obj);
471 return 0;
472}
473
474int
475vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
476 struct drm_file *file_priv)
477{
478 struct drm_vc4_create_shader_bo *args = data;
479 struct vc4_bo *bo = NULL;
480 int ret;
481
482 if (args->size == 0)
483 return -EINVAL;
484
485 if (args->size % sizeof(u64) != 0)
486 return -EINVAL;
487
488 if (args->flags != 0) {
489 DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
490 return -EINVAL;
491 }
492
493 if (args->pad != 0) {
494 DRM_INFO("Pad set: 0x%08x\n", args->pad);
495 return -EINVAL;
496 }
497
498 bo = vc4_bo_create(dev, args->size, true);
499 if (!bo)
500 return -ENOMEM;
501
502 ret = copy_from_user(bo->base.vaddr,
503 (void __user *)(uintptr_t)args->data,
504 args->size);
505 if (ret != 0)
506 goto fail;
507 /* Clear the rest of the memory from allocating from the BO
508 * cache.
509 */
510 memset(bo->base.vaddr + args->size, 0,
511 bo->base.base.size - args->size);
512
513 bo->validated_shader = vc4_validate_shader(&bo->base);
514 if (!bo->validated_shader) {
515 ret = -EINVAL;
516 goto fail;
517 }
518
519 /* We have to create the handle after validation, to avoid
520 * races for users to do doing things like mmap the shader BO.
521 */
522 ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
523
524 fail:
525 drm_gem_object_unreference_unlocked(&bo->base.base);
526
527 return ret;
528}
529
530void vc4_bo_cache_init(struct drm_device *dev)
531{
532 struct vc4_dev *vc4 = to_vc4_dev(dev);
533
534 mutex_init(&vc4->bo_lock);
535
536 INIT_LIST_HEAD(&vc4->bo_cache.time_list);
537
538 INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
539 setup_timer(&vc4->bo_cache.time_timer,
540 vc4_bo_cache_time_timer,
541 (unsigned long)dev);
542}
543
544void vc4_bo_cache_destroy(struct drm_device *dev)
545{
546 struct vc4_dev *vc4 = to_vc4_dev(dev);
547
548 del_timer(&vc4->bo_cache.time_timer);
549 cancel_work_sync(&vc4->bo_cache.time_work);
550
551 vc4_bo_cache_purge(dev);
552
553 if (vc4->bo_stats.num_allocated) {
554 DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
555 vc4_bo_stats_dump(vc4);
556 }
557}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 2168a99d59aa..8d0d70e51ef2 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -35,6 +35,7 @@
35#include "drm_atomic_helper.h" 35#include "drm_atomic_helper.h"
36#include "drm_crtc_helper.h" 36#include "drm_crtc_helper.h"
37#include "linux/clk.h" 37#include "linux/clk.h"
38#include "drm_fb_cma_helper.h"
38#include "linux/component.h" 39#include "linux/component.h"
39#include "linux/of_device.h" 40#include "linux/of_device.h"
40#include "vc4_drv.h" 41#include "vc4_drv.h"
@@ -476,10 +477,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
476 return ret; 477 return ret;
477} 478}
478 479
480struct vc4_async_flip_state {
481 struct drm_crtc *crtc;
482 struct drm_framebuffer *fb;
483 struct drm_pending_vblank_event *event;
484
485 struct vc4_seqno_cb cb;
486};
487
488/* Called when the V3D execution for the BO being flipped to is done, so that
489 * we can actually update the plane's address to point to it.
490 */
491static void
492vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
493{
494 struct vc4_async_flip_state *flip_state =
495 container_of(cb, struct vc4_async_flip_state, cb);
496 struct drm_crtc *crtc = flip_state->crtc;
497 struct drm_device *dev = crtc->dev;
498 struct vc4_dev *vc4 = to_vc4_dev(dev);
499 struct drm_plane *plane = crtc->primary;
500
501 vc4_plane_async_set_fb(plane, flip_state->fb);
502 if (flip_state->event) {
503 unsigned long flags;
504
505 spin_lock_irqsave(&dev->event_lock, flags);
506 drm_crtc_send_vblank_event(crtc, flip_state->event);
507 spin_unlock_irqrestore(&dev->event_lock, flags);
508 }
509
510 drm_framebuffer_unreference(flip_state->fb);
511 kfree(flip_state);
512
513 up(&vc4->async_modeset);
514}
515
516/* Implements async (non-vblank-synced) page flips.
517 *
518 * The page flip ioctl needs to return immediately, so we grab the
519 * modeset semaphore on the pipe, and queue the address update for
520 * when V3D is done with the BO being flipped to.
521 */
522static int vc4_async_page_flip(struct drm_crtc *crtc,
523 struct drm_framebuffer *fb,
524 struct drm_pending_vblank_event *event,
525 uint32_t flags)
526{
527 struct drm_device *dev = crtc->dev;
528 struct vc4_dev *vc4 = to_vc4_dev(dev);
529 struct drm_plane *plane = crtc->primary;
530 int ret = 0;
531 struct vc4_async_flip_state *flip_state;
532 struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
533 struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
534
535 flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
536 if (!flip_state)
537 return -ENOMEM;
538
539 drm_framebuffer_reference(fb);
540 flip_state->fb = fb;
541 flip_state->crtc = crtc;
542 flip_state->event = event;
543
544 /* Make sure all other async modesetes have landed. */
545 ret = down_interruptible(&vc4->async_modeset);
546 if (ret) {
547 kfree(flip_state);
548 return ret;
549 }
550
551 /* Immediately update the plane's legacy fb pointer, so that later
552 * modeset prep sees the state that will be present when the semaphore
553 * is released.
554 */
555 drm_atomic_set_fb_for_plane(plane->state, fb);
556 plane->fb = fb;
557
558 vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
559 vc4_async_page_flip_complete);
560
561 /* Driver takes ownership of state on successful async commit. */
562 return 0;
563}
564
565static int vc4_page_flip(struct drm_crtc *crtc,
566 struct drm_framebuffer *fb,
567 struct drm_pending_vblank_event *event,
568 uint32_t flags)
569{
570 if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
571 return vc4_async_page_flip(crtc, fb, event, flags);
572 else
573 return drm_atomic_helper_page_flip(crtc, fb, event, flags);
574}
575
479static const struct drm_crtc_funcs vc4_crtc_funcs = { 576static const struct drm_crtc_funcs vc4_crtc_funcs = {
480 .set_config = drm_atomic_helper_set_config, 577 .set_config = drm_atomic_helper_set_config,
481 .destroy = vc4_crtc_destroy, 578 .destroy = vc4_crtc_destroy,
482 .page_flip = drm_atomic_helper_page_flip, 579 .page_flip = vc4_page_flip,
483 .set_property = NULL, 580 .set_property = NULL,
484 .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ 581 .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
485 .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ 582 .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index 4297b0a5b74e..d76ad10b07fd 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -16,11 +16,14 @@
16#include "vc4_regs.h" 16#include "vc4_regs.h"
17 17
18static const struct drm_info_list vc4_debugfs_list[] = { 18static const struct drm_info_list vc4_debugfs_list[] = {
19 {"bo_stats", vc4_bo_stats_debugfs, 0},
19 {"hdmi_regs", vc4_hdmi_debugfs_regs, 0}, 20 {"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
20 {"hvs_regs", vc4_hvs_debugfs_regs, 0}, 21 {"hvs_regs", vc4_hvs_debugfs_regs, 0},
21 {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0}, 22 {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
22 {"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1}, 23 {"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
23 {"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2}, 24 {"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
25 {"v3d_ident", vc4_v3d_debugfs_ident, 0},
26 {"v3d_regs", vc4_v3d_debugfs_regs, 0},
24}; 27};
25 28
26#define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list) 29#define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index d5db9e0f3b73..cbcbbb83500e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -16,6 +16,7 @@
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17#include "drm_fb_cma_helper.h" 17#include "drm_fb_cma_helper.h"
18 18
19#include "uapi/drm/vc4_drm.h"
19#include "vc4_drv.h" 20#include "vc4_drv.h"
20#include "vc4_regs.h" 21#include "vc4_regs.h"
21 22
@@ -63,7 +64,7 @@ static const struct file_operations vc4_drm_fops = {
63 .open = drm_open, 64 .open = drm_open,
64 .release = drm_release, 65 .release = drm_release,
65 .unlocked_ioctl = drm_ioctl, 66 .unlocked_ioctl = drm_ioctl,
66 .mmap = drm_gem_cma_mmap, 67 .mmap = vc4_mmap,
67 .poll = drm_poll, 68 .poll = drm_poll,
68 .read = drm_read, 69 .read = drm_read,
69#ifdef CONFIG_COMPAT 70#ifdef CONFIG_COMPAT
@@ -73,16 +74,30 @@ static const struct file_operations vc4_drm_fops = {
73}; 74};
74 75
75static const struct drm_ioctl_desc vc4_drm_ioctls[] = { 76static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
77 DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
78 DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
79 DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
80 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
81 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
82 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
83 DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
84 DRM_ROOT_ONLY),
76}; 85};
77 86
78static struct drm_driver vc4_drm_driver = { 87static struct drm_driver vc4_drm_driver = {
79 .driver_features = (DRIVER_MODESET | 88 .driver_features = (DRIVER_MODESET |
80 DRIVER_ATOMIC | 89 DRIVER_ATOMIC |
81 DRIVER_GEM | 90 DRIVER_GEM |
91 DRIVER_HAVE_IRQ |
82 DRIVER_PRIME), 92 DRIVER_PRIME),
83 .lastclose = vc4_lastclose, 93 .lastclose = vc4_lastclose,
84 .preclose = vc4_drm_preclose, 94 .preclose = vc4_drm_preclose,
85 95
96 .irq_handler = vc4_irq,
97 .irq_preinstall = vc4_irq_preinstall,
98 .irq_postinstall = vc4_irq_postinstall,
99 .irq_uninstall = vc4_irq_uninstall,
100
86 .enable_vblank = vc4_enable_vblank, 101 .enable_vblank = vc4_enable_vblank,
87 .disable_vblank = vc4_disable_vblank, 102 .disable_vblank = vc4_disable_vblank,
88 .get_vblank_counter = drm_vblank_count, 103 .get_vblank_counter = drm_vblank_count,
@@ -92,18 +107,19 @@ static struct drm_driver vc4_drm_driver = {
92 .debugfs_cleanup = vc4_debugfs_cleanup, 107 .debugfs_cleanup = vc4_debugfs_cleanup,
93#endif 108#endif
94 109
95 .gem_free_object = drm_gem_cma_free_object, 110 .gem_create_object = vc4_create_object,
111 .gem_free_object = vc4_free_object,
96 .gem_vm_ops = &drm_gem_cma_vm_ops, 112 .gem_vm_ops = &drm_gem_cma_vm_ops,
97 113
98 .prime_handle_to_fd = drm_gem_prime_handle_to_fd, 114 .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
99 .prime_fd_to_handle = drm_gem_prime_fd_to_handle, 115 .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
100 .gem_prime_import = drm_gem_prime_import, 116 .gem_prime_import = drm_gem_prime_import,
101 .gem_prime_export = drm_gem_prime_export, 117 .gem_prime_export = vc4_prime_export,
102 .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, 118 .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
103 .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, 119 .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
104 .gem_prime_vmap = drm_gem_cma_prime_vmap, 120 .gem_prime_vmap = vc4_prime_vmap,
105 .gem_prime_vunmap = drm_gem_cma_prime_vunmap, 121 .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
106 .gem_prime_mmap = drm_gem_cma_prime_mmap, 122 .gem_prime_mmap = vc4_prime_mmap,
107 123
108 .dumb_create = vc4_dumb_create, 124 .dumb_create = vc4_dumb_create,
109 .dumb_map_offset = drm_gem_cma_dumb_map_offset, 125 .dumb_map_offset = drm_gem_cma_dumb_map_offset,
@@ -170,13 +186,17 @@ static int vc4_drm_bind(struct device *dev)
170 186
171 drm_dev_set_unique(drm, dev_name(dev)); 187 drm_dev_set_unique(drm, dev_name(dev));
172 188
189 vc4_bo_cache_init(drm);
190
173 drm_mode_config_init(drm); 191 drm_mode_config_init(drm);
174 if (ret) 192 if (ret)
175 goto unref; 193 goto unref;
176 194
195 vc4_gem_init(drm);
196
177 ret = component_bind_all(dev, drm); 197 ret = component_bind_all(dev, drm);
178 if (ret) 198 if (ret)
179 goto unref; 199 goto gem_destroy;
180 200
181 ret = drm_dev_register(drm, 0); 201 ret = drm_dev_register(drm, 0);
182 if (ret < 0) 202 if (ret < 0)
@@ -200,8 +220,11 @@ unregister:
200 drm_dev_unregister(drm); 220 drm_dev_unregister(drm);
201unbind_all: 221unbind_all:
202 component_unbind_all(dev, drm); 222 component_unbind_all(dev, drm);
223gem_destroy:
224 vc4_gem_destroy(drm);
203unref: 225unref:
204 drm_dev_unref(drm); 226 drm_dev_unref(drm);
227 vc4_bo_cache_destroy(drm);
205 return ret; 228 return ret;
206} 229}
207 230
@@ -228,6 +251,7 @@ static struct platform_driver *const component_drivers[] = {
228 &vc4_hdmi_driver, 251 &vc4_hdmi_driver,
229 &vc4_crtc_driver, 252 &vc4_crtc_driver,
230 &vc4_hvs_driver, 253 &vc4_hvs_driver,
254 &vc4_v3d_driver,
231}; 255};
232 256
233static int vc4_platform_drm_probe(struct platform_device *pdev) 257static int vc4_platform_drm_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index fd8319fa682e..080865ec2bae 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -15,8 +15,89 @@ struct vc4_dev {
15 struct vc4_hdmi *hdmi; 15 struct vc4_hdmi *hdmi;
16 struct vc4_hvs *hvs; 16 struct vc4_hvs *hvs;
17 struct vc4_crtc *crtc[3]; 17 struct vc4_crtc *crtc[3];
18 struct vc4_v3d *v3d;
18 19
19 struct drm_fbdev_cma *fbdev; 20 struct drm_fbdev_cma *fbdev;
21
22 struct vc4_hang_state *hang_state;
23
24 /* The kernel-space BO cache. Tracks buffers that have been
25 * unreferenced by all other users (refcounts of 0!) but not
26 * yet freed, so we can do cheap allocations.
27 */
28 struct vc4_bo_cache {
29 /* Array of list heads for entries in the BO cache,
30 * based on number of pages, so we can do O(1) lookups
31 * in the cache when allocating.
32 */
33 struct list_head *size_list;
34 uint32_t size_list_size;
35
36 /* List of all BOs in the cache, ordered by age, so we
37 * can do O(1) lookups when trying to free old
38 * buffers.
39 */
40 struct list_head time_list;
41 struct work_struct time_work;
42 struct timer_list time_timer;
43 } bo_cache;
44
45 struct vc4_bo_stats {
46 u32 num_allocated;
47 u32 size_allocated;
48 u32 num_cached;
49 u32 size_cached;
50 } bo_stats;
51
52 /* Protects bo_cache and the BO stats. */
53 struct mutex bo_lock;
54
55 /* Sequence number for the last job queued in job_list.
56 * Starts at 0 (no jobs emitted).
57 */
58 uint64_t emit_seqno;
59
60 /* Sequence number for the last completed job on the GPU.
61 * Starts at 0 (no jobs completed).
62 */
63 uint64_t finished_seqno;
64
65 /* List of all struct vc4_exec_info for jobs to be executed.
66 * The first job in the list is the one currently programmed
67 * into ct0ca/ct1ca for execution.
68 */
69 struct list_head job_list;
70 /* List of the finished vc4_exec_infos waiting to be freed by
71 * job_done_work.
72 */
73 struct list_head job_done_list;
74 /* Spinlock used to synchronize the job_list and seqno
75 * accesses between the IRQ handler and GEM ioctls.
76 */
77 spinlock_t job_lock;
78 wait_queue_head_t job_wait_queue;
79 struct work_struct job_done_work;
80
81 /* List of struct vc4_seqno_cb for callbacks to be made from a
82 * workqueue when the given seqno is passed.
83 */
84 struct list_head seqno_cb_list;
85
86 /* The binner overflow memory that's currently set up in
87 * BPOA/BPOS registers. When overflow occurs and a new one is
88 * allocated, the previous one will be moved to
89 * vc4->current_exec's free list.
90 */
91 struct vc4_bo *overflow_mem;
92 struct work_struct overflow_mem_work;
93
94 struct {
95 uint32_t last_ct0ca, last_ct1ca;
96 struct timer_list timer;
97 struct work_struct reset_work;
98 } hangcheck;
99
100 struct semaphore async_modeset;
20}; 101};
21 102
22static inline struct vc4_dev * 103static inline struct vc4_dev *
@@ -27,6 +108,25 @@ to_vc4_dev(struct drm_device *dev)
27 108
28struct vc4_bo { 109struct vc4_bo {
29 struct drm_gem_cma_object base; 110 struct drm_gem_cma_object base;
111
112 /* seqno of the last job to render to this BO. */
113 uint64_t seqno;
114
115 /* List entry for the BO's position in either
116 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
117 */
118 struct list_head unref_head;
119
120 /* Time in jiffies when the BO was put in vc4->bo_cache. */
121 unsigned long free_time;
122
123 /* List entry for the BO's position in vc4_dev->bo_cache.size_list */
124 struct list_head size_head;
125
126 /* Struct for shader validation state, if created by
127 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
128 */
129 struct vc4_validated_shader_info *validated_shader;
30}; 130};
31 131
32static inline struct vc4_bo * 132static inline struct vc4_bo *
@@ -35,6 +135,17 @@ to_vc4_bo(struct drm_gem_object *bo)
35 return (struct vc4_bo *)bo; 135 return (struct vc4_bo *)bo;
36} 136}
37 137
138struct vc4_seqno_cb {
139 struct work_struct work;
140 uint64_t seqno;
141 void (*func)(struct vc4_seqno_cb *cb);
142};
143
144struct vc4_v3d {
145 struct platform_device *pdev;
146 void __iomem *regs;
147};
148
38struct vc4_hvs { 149struct vc4_hvs {
39 struct platform_device *pdev; 150 struct platform_device *pdev;
40 void __iomem *regs; 151 void __iomem *regs;
@@ -72,9 +183,142 @@ to_vc4_encoder(struct drm_encoder *encoder)
72 return container_of(encoder, struct vc4_encoder, base); 183 return container_of(encoder, struct vc4_encoder, base);
73} 184}
74 185
186#define V3D_READ(offset) readl(vc4->v3d->regs + offset)
187#define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
75#define HVS_READ(offset) readl(vc4->hvs->regs + offset) 188#define HVS_READ(offset) readl(vc4->hvs->regs + offset)
76#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) 189#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
77 190
191struct vc4_exec_info {
192 /* Sequence number for this bin/render job. */
193 uint64_t seqno;
194
195 /* Kernel-space copy of the ioctl arguments */
196 struct drm_vc4_submit_cl *args;
197
198 /* This is the array of BOs that were looked up at the start of exec.
199 * Command validation will use indices into this array.
200 */
201 struct drm_gem_cma_object **bo;
202 uint32_t bo_count;
203
204 /* Pointers for our position in vc4->job_list */
205 struct list_head head;
206
207 /* List of other BOs used in the job that need to be released
208 * once the job is complete.
209 */
210 struct list_head unref_list;
211
212 /* Current unvalidated indices into @bo loaded by the non-hardware
213 * VC4_PACKET_GEM_HANDLES.
214 */
215 uint32_t bo_index[2];
216
217 /* This is the BO where we store the validated command lists, shader
218 * records, and uniforms.
219 */
220 struct drm_gem_cma_object *exec_bo;
221
222 /**
223 * This tracks the per-shader-record state (packet 64) that
224 * determines the length of the shader record and the offset
225 * it's expected to be found at. It gets read in from the
226 * command lists.
227 */
228 struct vc4_shader_state {
229 uint32_t addr;
230 /* Maximum vertex index referenced by any primitive using this
231 * shader state.
232 */
233 uint32_t max_index;
234 } *shader_state;
235
236 /** How many shader states the user declared they were using. */
237 uint32_t shader_state_size;
238 /** How many shader state records the validator has seen. */
239 uint32_t shader_state_count;
240
241 bool found_tile_binning_mode_config_packet;
242 bool found_start_tile_binning_packet;
243 bool found_increment_semaphore_packet;
244 bool found_flush;
245 uint8_t bin_tiles_x, bin_tiles_y;
246 struct drm_gem_cma_object *tile_bo;
247 uint32_t tile_alloc_offset;
248
249 /**
250 * Computed addresses pointing into exec_bo where we start the
251 * bin thread (ct0) and render thread (ct1).
252 */
253 uint32_t ct0ca, ct0ea;
254 uint32_t ct1ca, ct1ea;
255
256 /* Pointer to the unvalidated bin CL (if present). */
257 void *bin_u;
258
259 /* Pointers to the shader recs. These paddr gets incremented as CL
260 * packets are relocated in validate_gl_shader_state, and the vaddrs
261 * (u and v) get incremented and size decremented as the shader recs
262 * themselves are validated.
263 */
264 void *shader_rec_u;
265 void *shader_rec_v;
266 uint32_t shader_rec_p;
267 uint32_t shader_rec_size;
268
269 /* Pointers to the uniform data. These pointers are incremented, and
270 * size decremented, as each batch of uniforms is uploaded.
271 */
272 void *uniforms_u;
273 void *uniforms_v;
274 uint32_t uniforms_p;
275 uint32_t uniforms_size;
276};
277
278static inline struct vc4_exec_info *
279vc4_first_job(struct vc4_dev *vc4)
280{
281 if (list_empty(&vc4->job_list))
282 return NULL;
283 return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
284}
285
286/**
287 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
288 * setup parameters.
289 *
290 * This will be used at draw time to relocate the reference to the texture
291 * contents in p0, and validate that the offset combined with
292 * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
293 * Note that the hardware treats unprovided config parameters as 0, so not all
294 * of them need to be set up for every texure sample, and we'll store ~0 as
295 * the offset to mark the unused ones.
296 *
297 * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
298 * Setup") for definitions of the texture parameters.
299 */
300struct vc4_texture_sample_info {
301 bool is_direct;
302 uint32_t p_offset[4];
303};
304
305/**
306 * struct vc4_validated_shader_info - information about validated shaders that
307 * needs to be used from command list validation.
308 *
309 * For a given shader, each time a shader state record references it, we need
310 * to verify that the shader doesn't read more uniforms than the shader state
311 * record's uniform BO pointer can provide, and we need to apply relocations
312 * and validate the shader state record's uniforms that define the texture
313 * samples.
314 */
315struct vc4_validated_shader_info {
316 uint32_t uniforms_size;
317 uint32_t uniforms_src_size;
318 uint32_t num_texture_samples;
319 struct vc4_texture_sample_info *texture_samples;
320};
321
78/** 322/**
79 * _wait_for - magic (register) wait macro 323 * _wait_for - magic (register) wait macro
80 * 324 *
@@ -104,13 +348,29 @@ to_vc4_encoder(struct drm_encoder *encoder)
104#define wait_for(COND, MS) _wait_for(COND, MS, 1) 348#define wait_for(COND, MS) _wait_for(COND, MS, 1)
105 349
106/* vc4_bo.c */ 350/* vc4_bo.c */
351struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
107void vc4_free_object(struct drm_gem_object *gem_obj); 352void vc4_free_object(struct drm_gem_object *gem_obj);
108struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size); 353struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
354 bool from_cache);
109int vc4_dumb_create(struct drm_file *file_priv, 355int vc4_dumb_create(struct drm_file *file_priv,
110 struct drm_device *dev, 356 struct drm_device *dev,
111 struct drm_mode_create_dumb *args); 357 struct drm_mode_create_dumb *args);
112struct dma_buf *vc4_prime_export(struct drm_device *dev, 358struct dma_buf *vc4_prime_export(struct drm_device *dev,
113 struct drm_gem_object *obj, int flags); 359 struct drm_gem_object *obj, int flags);
360int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
361 struct drm_file *file_priv);
362int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
363 struct drm_file *file_priv);
364int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
365 struct drm_file *file_priv);
366int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
367 struct drm_file *file_priv);
368int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
369int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
370void *vc4_prime_vmap(struct drm_gem_object *obj);
371void vc4_bo_cache_init(struct drm_device *dev);
372void vc4_bo_cache_destroy(struct drm_device *dev);
373int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
114 374
115/* vc4_crtc.c */ 375/* vc4_crtc.c */
116extern struct platform_driver vc4_crtc_driver; 376extern struct platform_driver vc4_crtc_driver;
@@ -126,10 +386,34 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
126/* vc4_drv.c */ 386/* vc4_drv.c */
127void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 387void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
128 388
389/* vc4_gem.c */
390void vc4_gem_init(struct drm_device *dev);
391void vc4_gem_destroy(struct drm_device *dev);
392int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
393 struct drm_file *file_priv);
394int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
395 struct drm_file *file_priv);
396int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
397 struct drm_file *file_priv);
398void vc4_submit_next_job(struct drm_device *dev);
399int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
400 uint64_t timeout_ns, bool interruptible);
401void vc4_job_handle_completed(struct vc4_dev *vc4);
402int vc4_queue_seqno_cb(struct drm_device *dev,
403 struct vc4_seqno_cb *cb, uint64_t seqno,
404 void (*func)(struct vc4_seqno_cb *cb));
405
129/* vc4_hdmi.c */ 406/* vc4_hdmi.c */
130extern struct platform_driver vc4_hdmi_driver; 407extern struct platform_driver vc4_hdmi_driver;
131int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); 408int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
132 409
410/* vc4_irq.c */
411irqreturn_t vc4_irq(int irq, void *arg);
412void vc4_irq_preinstall(struct drm_device *dev);
413int vc4_irq_postinstall(struct drm_device *dev);
414void vc4_irq_uninstall(struct drm_device *dev);
415void vc4_irq_reset(struct drm_device *dev);
416
133/* vc4_hvs.c */ 417/* vc4_hvs.c */
134extern struct platform_driver vc4_hvs_driver; 418extern struct platform_driver vc4_hvs_driver;
135void vc4_hvs_dump_state(struct drm_device *dev); 419void vc4_hvs_dump_state(struct drm_device *dev);
@@ -143,3 +427,35 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
143 enum drm_plane_type type); 427 enum drm_plane_type type);
144u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); 428u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
145u32 vc4_plane_dlist_size(struct drm_plane_state *state); 429u32 vc4_plane_dlist_size(struct drm_plane_state *state);
430void vc4_plane_async_set_fb(struct drm_plane *plane,
431 struct drm_framebuffer *fb);
432
433/* vc4_v3d.c */
434extern struct platform_driver vc4_v3d_driver;
435int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
436int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
437int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
438
439/* vc4_validate.c */
440int
441vc4_validate_bin_cl(struct drm_device *dev,
442 void *validated,
443 void *unvalidated,
444 struct vc4_exec_info *exec);
445
446int
447vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
448
449struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
450 uint32_t hindex);
451
452int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
453
454bool vc4_check_tex_size(struct vc4_exec_info *exec,
455 struct drm_gem_cma_object *fbo,
456 uint32_t offset, uint8_t tiling_format,
457 uint32_t width, uint32_t height, uint8_t cpp);
458
459/* vc4_validate_shader.c */
460struct vc4_validated_shader_info *
461vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 000000000000..39f29e759334
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,867 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <linux/module.h>
25#include <linux/platform_device.h>
26#include <linux/device.h>
27#include <linux/io.h>
28
29#include "uapi/drm/vc4_drm.h"
30#include "vc4_drv.h"
31#include "vc4_regs.h"
32#include "vc4_trace.h"
33
34static void
35vc4_queue_hangcheck(struct drm_device *dev)
36{
37 struct vc4_dev *vc4 = to_vc4_dev(dev);
38
39 mod_timer(&vc4->hangcheck.timer,
40 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
41}
42
43struct vc4_hang_state {
44 struct drm_vc4_get_hang_state user_state;
45
46 u32 bo_count;
47 struct drm_gem_object **bo;
48};
49
50static void
51vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
52{
53 unsigned int i;
54
55 mutex_lock(&dev->struct_mutex);
56 for (i = 0; i < state->user_state.bo_count; i++)
57 drm_gem_object_unreference(state->bo[i]);
58 mutex_unlock(&dev->struct_mutex);
59
60 kfree(state);
61}
62
63int
64vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
65 struct drm_file *file_priv)
66{
67 struct drm_vc4_get_hang_state *get_state = data;
68 struct drm_vc4_get_hang_state_bo *bo_state;
69 struct vc4_hang_state *kernel_state;
70 struct drm_vc4_get_hang_state *state;
71 struct vc4_dev *vc4 = to_vc4_dev(dev);
72 unsigned long irqflags;
73 u32 i;
74 int ret;
75
76 spin_lock_irqsave(&vc4->job_lock, irqflags);
77 kernel_state = vc4->hang_state;
78 if (!kernel_state) {
79 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
80 return -ENOENT;
81 }
82 state = &kernel_state->user_state;
83
84 /* If the user's array isn't big enough, just return the
85 * required array size.
86 */
87 if (get_state->bo_count < state->bo_count) {
88 get_state->bo_count = state->bo_count;
89 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
90 return 0;
91 }
92
93 vc4->hang_state = NULL;
94 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
95
96 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
97 state->bo = get_state->bo;
98 memcpy(get_state, state, sizeof(*state));
99
100 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
101 if (!bo_state) {
102 ret = -ENOMEM;
103 goto err_free;
104 }
105
106 for (i = 0; i < state->bo_count; i++) {
107 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
108 u32 handle;
109
110 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
111 &handle);
112
113 if (ret) {
114 state->bo_count = i - 1;
115 goto err;
116 }
117 bo_state[i].handle = handle;
118 bo_state[i].paddr = vc4_bo->base.paddr;
119 bo_state[i].size = vc4_bo->base.base.size;
120 }
121
122 ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
123 bo_state,
124 state->bo_count * sizeof(*bo_state));
125 kfree(bo_state);
126
127err_free:
128
129 vc4_free_hang_state(dev, kernel_state);
130
131err:
132 return ret;
133}
134
135static void
136vc4_save_hang_state(struct drm_device *dev)
137{
138 struct vc4_dev *vc4 = to_vc4_dev(dev);
139 struct drm_vc4_get_hang_state *state;
140 struct vc4_hang_state *kernel_state;
141 struct vc4_exec_info *exec;
142 struct vc4_bo *bo;
143 unsigned long irqflags;
144 unsigned int i, unref_list_count;
145
146 kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
147 if (!kernel_state)
148 return;
149
150 state = &kernel_state->user_state;
151
152 spin_lock_irqsave(&vc4->job_lock, irqflags);
153 exec = vc4_first_job(vc4);
154 if (!exec) {
155 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
156 return;
157 }
158
159 unref_list_count = 0;
160 list_for_each_entry(bo, &exec->unref_list, unref_head)
161 unref_list_count++;
162
163 state->bo_count = exec->bo_count + unref_list_count;
164 kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
165 GFP_ATOMIC);
166 if (!kernel_state->bo) {
167 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
168 return;
169 }
170
171 for (i = 0; i < exec->bo_count; i++) {
172 drm_gem_object_reference(&exec->bo[i]->base);
173 kernel_state->bo[i] = &exec->bo[i]->base;
174 }
175
176 list_for_each_entry(bo, &exec->unref_list, unref_head) {
177 drm_gem_object_reference(&bo->base.base);
178 kernel_state->bo[i] = &bo->base.base;
179 i++;
180 }
181
182 state->start_bin = exec->ct0ca;
183 state->start_render = exec->ct1ca;
184
185 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
186
187 state->ct0ca = V3D_READ(V3D_CTNCA(0));
188 state->ct0ea = V3D_READ(V3D_CTNEA(0));
189
190 state->ct1ca = V3D_READ(V3D_CTNCA(1));
191 state->ct1ea = V3D_READ(V3D_CTNEA(1));
192
193 state->ct0cs = V3D_READ(V3D_CTNCS(0));
194 state->ct1cs = V3D_READ(V3D_CTNCS(1));
195
196 state->ct0ra0 = V3D_READ(V3D_CT00RA0);
197 state->ct1ra0 = V3D_READ(V3D_CT01RA0);
198
199 state->bpca = V3D_READ(V3D_BPCA);
200 state->bpcs = V3D_READ(V3D_BPCS);
201 state->bpoa = V3D_READ(V3D_BPOA);
202 state->bpos = V3D_READ(V3D_BPOS);
203
204 state->vpmbase = V3D_READ(V3D_VPMBASE);
205
206 state->dbge = V3D_READ(V3D_DBGE);
207 state->fdbgo = V3D_READ(V3D_FDBGO);
208 state->fdbgb = V3D_READ(V3D_FDBGB);
209 state->fdbgr = V3D_READ(V3D_FDBGR);
210 state->fdbgs = V3D_READ(V3D_FDBGS);
211 state->errstat = V3D_READ(V3D_ERRSTAT);
212
213 spin_lock_irqsave(&vc4->job_lock, irqflags);
214 if (vc4->hang_state) {
215 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
216 vc4_free_hang_state(dev, kernel_state);
217 } else {
218 vc4->hang_state = kernel_state;
219 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
220 }
221}
222
223static void
224vc4_reset(struct drm_device *dev)
225{
226 struct vc4_dev *vc4 = to_vc4_dev(dev);
227
228 DRM_INFO("Resetting GPU.\n");
229 vc4_v3d_set_power(vc4, false);
230 vc4_v3d_set_power(vc4, true);
231
232 vc4_irq_reset(dev);
233
234 /* Rearm the hangcheck -- another job might have been waiting
235 * for our hung one to get kicked off, and vc4_irq_reset()
236 * would have started it.
237 */
238 vc4_queue_hangcheck(dev);
239}
240
241static void
242vc4_reset_work(struct work_struct *work)
243{
244 struct vc4_dev *vc4 =
245 container_of(work, struct vc4_dev, hangcheck.reset_work);
246
247 vc4_save_hang_state(vc4->dev);
248
249 vc4_reset(vc4->dev);
250}
251
252static void
253vc4_hangcheck_elapsed(unsigned long data)
254{
255 struct drm_device *dev = (struct drm_device *)data;
256 struct vc4_dev *vc4 = to_vc4_dev(dev);
257 uint32_t ct0ca, ct1ca;
258
259 /* If idle, we can stop watching for hangs. */
260 if (list_empty(&vc4->job_list))
261 return;
262
263 ct0ca = V3D_READ(V3D_CTNCA(0));
264 ct1ca = V3D_READ(V3D_CTNCA(1));
265
266 /* If we've made any progress in execution, rearm the timer
267 * and wait.
268 */
269 if (ct0ca != vc4->hangcheck.last_ct0ca ||
270 ct1ca != vc4->hangcheck.last_ct1ca) {
271 vc4->hangcheck.last_ct0ca = ct0ca;
272 vc4->hangcheck.last_ct1ca = ct1ca;
273 vc4_queue_hangcheck(dev);
274 return;
275 }
276
277 /* We've gone too long with no progress, reset. This has to
278 * be done from a work struct, since resetting can sleep and
279 * this timer hook isn't allowed to.
280 */
281 schedule_work(&vc4->hangcheck.reset_work);
282}
283
284static void
285submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
286{
287 struct vc4_dev *vc4 = to_vc4_dev(dev);
288
289 /* Set the current and end address of the control list.
290 * Writing the end register is what starts the job.
291 */
292 V3D_WRITE(V3D_CTNCA(thread), start);
293 V3D_WRITE(V3D_CTNEA(thread), end);
294}
295
296int
297vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
298 bool interruptible)
299{
300 struct vc4_dev *vc4 = to_vc4_dev(dev);
301 int ret = 0;
302 unsigned long timeout_expire;
303 DEFINE_WAIT(wait);
304
305 if (vc4->finished_seqno >= seqno)
306 return 0;
307
308 if (timeout_ns == 0)
309 return -ETIME;
310
311 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
312
313 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
314 for (;;) {
315 prepare_to_wait(&vc4->job_wait_queue, &wait,
316 interruptible ? TASK_INTERRUPTIBLE :
317 TASK_UNINTERRUPTIBLE);
318
319 if (interruptible && signal_pending(current)) {
320 ret = -ERESTARTSYS;
321 break;
322 }
323
324 if (vc4->finished_seqno >= seqno)
325 break;
326
327 if (timeout_ns != ~0ull) {
328 if (time_after_eq(jiffies, timeout_expire)) {
329 ret = -ETIME;
330 break;
331 }
332 schedule_timeout(timeout_expire - jiffies);
333 } else {
334 schedule();
335 }
336 }
337
338 finish_wait(&vc4->job_wait_queue, &wait);
339 trace_vc4_wait_for_seqno_end(dev, seqno);
340
341 if (ret && ret != -ERESTARTSYS) {
342 DRM_ERROR("timeout waiting for render thread idle\n");
343 return ret;
344 }
345
346 return 0;
347}
348
349static void
350vc4_flush_caches(struct drm_device *dev)
351{
352 struct vc4_dev *vc4 = to_vc4_dev(dev);
353
354 /* Flush the GPU L2 caches. These caches sit on top of system
355 * L3 (the 128kb or so shared with the CPU), and are
356 * non-allocating in the L3.
357 */
358 V3D_WRITE(V3D_L2CACTL,
359 V3D_L2CACTL_L2CCLR);
360
361 V3D_WRITE(V3D_SLCACTL,
362 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
363 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
364 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
365 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
366}
367
368/* Sets the registers for the next job to be actually be executed in
369 * the hardware.
370 *
371 * The job_lock should be held during this.
372 */
373void
374vc4_submit_next_job(struct drm_device *dev)
375{
376 struct vc4_dev *vc4 = to_vc4_dev(dev);
377 struct vc4_exec_info *exec = vc4_first_job(vc4);
378
379 if (!exec)
380 return;
381
382 vc4_flush_caches(dev);
383
384 /* Disable the binner's pre-loaded overflow memory address */
385 V3D_WRITE(V3D_BPOA, 0);
386 V3D_WRITE(V3D_BPOS, 0);
387
388 if (exec->ct0ca != exec->ct0ea)
389 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
390 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
391}
392
393static void
394vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
395{
396 struct vc4_bo *bo;
397 unsigned i;
398
399 for (i = 0; i < exec->bo_count; i++) {
400 bo = to_vc4_bo(&exec->bo[i]->base);
401 bo->seqno = seqno;
402 }
403
404 list_for_each_entry(bo, &exec->unref_list, unref_head) {
405 bo->seqno = seqno;
406 }
407}
408
409/* Queues a struct vc4_exec_info for execution. If no job is
410 * currently executing, then submits it.
411 *
412 * Unlike most GPUs, our hardware only handles one command list at a
413 * time. To queue multiple jobs at once, we'd need to edit the
414 * previous command list to have a jump to the new one at the end, and
415 * then bump the end address. That's a change for a later date,
416 * though.
417 */
418static void
419vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
420{
421 struct vc4_dev *vc4 = to_vc4_dev(dev);
422 uint64_t seqno;
423 unsigned long irqflags;
424
425 spin_lock_irqsave(&vc4->job_lock, irqflags);
426
427 seqno = ++vc4->emit_seqno;
428 exec->seqno = seqno;
429 vc4_update_bo_seqnos(exec, seqno);
430
431 list_add_tail(&exec->head, &vc4->job_list);
432
433 /* If no job was executing, kick ours off. Otherwise, it'll
434 * get started when the previous job's frame done interrupt
435 * occurs.
436 */
437 if (vc4_first_job(vc4) == exec) {
438 vc4_submit_next_job(dev);
439 vc4_queue_hangcheck(dev);
440 }
441
442 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
443}
444
445/**
446 * Looks up a bunch of GEM handles for BOs and stores the array for
447 * use in the command validator that actually writes relocated
448 * addresses pointing to them.
449 */
450static int
451vc4_cl_lookup_bos(struct drm_device *dev,
452 struct drm_file *file_priv,
453 struct vc4_exec_info *exec)
454{
455 struct drm_vc4_submit_cl *args = exec->args;
456 uint32_t *handles;
457 int ret = 0;
458 int i;
459
460 exec->bo_count = args->bo_handle_count;
461
462 if (!exec->bo_count) {
463 /* See comment on bo_index for why we have to check
464 * this.
465 */
466 DRM_ERROR("Rendering requires BOs to validate\n");
467 return -EINVAL;
468 }
469
470 exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
471 GFP_KERNEL);
472 if (!exec->bo) {
473 DRM_ERROR("Failed to allocate validated BO pointers\n");
474 return -ENOMEM;
475 }
476
477 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
478 if (!handles) {
479 DRM_ERROR("Failed to allocate incoming GEM handles\n");
480 goto fail;
481 }
482
483 ret = copy_from_user(handles,
484 (void __user *)(uintptr_t)args->bo_handles,
485 exec->bo_count * sizeof(uint32_t));
486 if (ret) {
487 DRM_ERROR("Failed to copy in GEM handles\n");
488 goto fail;
489 }
490
491 spin_lock(&file_priv->table_lock);
492 for (i = 0; i < exec->bo_count; i++) {
493 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
494 handles[i]);
495 if (!bo) {
496 DRM_ERROR("Failed to look up GEM BO %d: %d\n",
497 i, handles[i]);
498 ret = -EINVAL;
499 spin_unlock(&file_priv->table_lock);
500 goto fail;
501 }
502 drm_gem_object_reference(bo);
503 exec->bo[i] = (struct drm_gem_cma_object *)bo;
504 }
505 spin_unlock(&file_priv->table_lock);
506
507fail:
508 kfree(handles);
509 return 0;
510}
511
512static int
513vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
514{
515 struct drm_vc4_submit_cl *args = exec->args;
516 void *temp = NULL;
517 void *bin;
518 int ret = 0;
519 uint32_t bin_offset = 0;
520 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
521 16);
522 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
523 uint32_t exec_size = uniforms_offset + args->uniforms_size;
524 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
525 args->shader_rec_count);
526 struct vc4_bo *bo;
527
528 if (uniforms_offset < shader_rec_offset ||
529 exec_size < uniforms_offset ||
530 args->shader_rec_count >= (UINT_MAX /
531 sizeof(struct vc4_shader_state)) ||
532 temp_size < exec_size) {
533 DRM_ERROR("overflow in exec arguments\n");
534 goto fail;
535 }
536
537 /* Allocate space where we'll store the copied in user command lists
538 * and shader records.
539 *
540 * We don't just copy directly into the BOs because we need to
541 * read the contents back for validation, and I think the
542 * bo->vaddr is uncached access.
543 */
544 temp = kmalloc(temp_size, GFP_KERNEL);
545 if (!temp) {
546 DRM_ERROR("Failed to allocate storage for copying "
547 "in bin/render CLs.\n");
548 ret = -ENOMEM;
549 goto fail;
550 }
551 bin = temp + bin_offset;
552 exec->shader_rec_u = temp + shader_rec_offset;
553 exec->uniforms_u = temp + uniforms_offset;
554 exec->shader_state = temp + exec_size;
555 exec->shader_state_size = args->shader_rec_count;
556
557 ret = copy_from_user(bin,
558 (void __user *)(uintptr_t)args->bin_cl,
559 args->bin_cl_size);
560 if (ret) {
561 DRM_ERROR("Failed to copy in bin cl\n");
562 goto fail;
563 }
564
565 ret = copy_from_user(exec->shader_rec_u,
566 (void __user *)(uintptr_t)args->shader_rec,
567 args->shader_rec_size);
568 if (ret) {
569 DRM_ERROR("Failed to copy in shader recs\n");
570 goto fail;
571 }
572
573 ret = copy_from_user(exec->uniforms_u,
574 (void __user *)(uintptr_t)args->uniforms,
575 args->uniforms_size);
576 if (ret) {
577 DRM_ERROR("Failed to copy in uniforms cl\n");
578 goto fail;
579 }
580
581 bo = vc4_bo_create(dev, exec_size, true);
582 if (!bo) {
583 DRM_ERROR("Couldn't allocate BO for binning\n");
584 ret = PTR_ERR(exec->exec_bo);
585 goto fail;
586 }
587 exec->exec_bo = &bo->base;
588
589 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
590 &exec->unref_list);
591
592 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
593
594 exec->bin_u = bin;
595
596 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
597 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
598 exec->shader_rec_size = args->shader_rec_size;
599
600 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
601 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
602 exec->uniforms_size = args->uniforms_size;
603
604 ret = vc4_validate_bin_cl(dev,
605 exec->exec_bo->vaddr + bin_offset,
606 bin,
607 exec);
608 if (ret)
609 goto fail;
610
611 ret = vc4_validate_shader_recs(dev, exec);
612
613fail:
614 kfree(temp);
615 return ret;
616}
617
618static void
619vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
620{
621 unsigned i;
622
623 /* Need the struct lock for drm_gem_object_unreference(). */
624 mutex_lock(&dev->struct_mutex);
625 if (exec->bo) {
626 for (i = 0; i < exec->bo_count; i++)
627 drm_gem_object_unreference(&exec->bo[i]->base);
628 kfree(exec->bo);
629 }
630
631 while (!list_empty(&exec->unref_list)) {
632 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
633 struct vc4_bo, unref_head);
634 list_del(&bo->unref_head);
635 drm_gem_object_unreference(&bo->base.base);
636 }
637 mutex_unlock(&dev->struct_mutex);
638
639 kfree(exec);
640}
641
642void
643vc4_job_handle_completed(struct vc4_dev *vc4)
644{
645 unsigned long irqflags;
646 struct vc4_seqno_cb *cb, *cb_temp;
647
648 spin_lock_irqsave(&vc4->job_lock, irqflags);
649 while (!list_empty(&vc4->job_done_list)) {
650 struct vc4_exec_info *exec =
651 list_first_entry(&vc4->job_done_list,
652 struct vc4_exec_info, head);
653 list_del(&exec->head);
654
655 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
656 vc4_complete_exec(vc4->dev, exec);
657 spin_lock_irqsave(&vc4->job_lock, irqflags);
658 }
659
660 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
661 if (cb->seqno <= vc4->finished_seqno) {
662 list_del_init(&cb->work.entry);
663 schedule_work(&cb->work);
664 }
665 }
666
667 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
668}
669
670static void vc4_seqno_cb_work(struct work_struct *work)
671{
672 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
673
674 cb->func(cb);
675}
676
677int vc4_queue_seqno_cb(struct drm_device *dev,
678 struct vc4_seqno_cb *cb, uint64_t seqno,
679 void (*func)(struct vc4_seqno_cb *cb))
680{
681 struct vc4_dev *vc4 = to_vc4_dev(dev);
682 int ret = 0;
683 unsigned long irqflags;
684
685 cb->func = func;
686 INIT_WORK(&cb->work, vc4_seqno_cb_work);
687
688 spin_lock_irqsave(&vc4->job_lock, irqflags);
689 if (seqno > vc4->finished_seqno) {
690 cb->seqno = seqno;
691 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
692 } else {
693 schedule_work(&cb->work);
694 }
695 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
696
697 return ret;
698}
699
700/* Scheduled when any job has been completed, this walks the list of
701 * jobs that had completed and unrefs their BOs and frees their exec
702 * structs.
703 */
704static void
705vc4_job_done_work(struct work_struct *work)
706{
707 struct vc4_dev *vc4 =
708 container_of(work, struct vc4_dev, job_done_work);
709
710 vc4_job_handle_completed(vc4);
711}
712
713static int
714vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
715 uint64_t seqno,
716 uint64_t *timeout_ns)
717{
718 unsigned long start = jiffies;
719 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
720
721 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
722 uint64_t delta = jiffies_to_nsecs(jiffies - start);
723
724 if (*timeout_ns >= delta)
725 *timeout_ns -= delta;
726 }
727
728 return ret;
729}
730
731int
732vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
733 struct drm_file *file_priv)
734{
735 struct drm_vc4_wait_seqno *args = data;
736
737 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
738 &args->timeout_ns);
739}
740
741int
742vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
743 struct drm_file *file_priv)
744{
745 int ret;
746 struct drm_vc4_wait_bo *args = data;
747 struct drm_gem_object *gem_obj;
748 struct vc4_bo *bo;
749
750 gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
751 if (!gem_obj) {
752 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
753 return -EINVAL;
754 }
755 bo = to_vc4_bo(gem_obj);
756
757 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
758 &args->timeout_ns);
759
760 drm_gem_object_unreference_unlocked(gem_obj);
761 return ret;
762}
763
764/**
765 * Submits a command list to the VC4.
766 *
767 * This is what is called batchbuffer emitting on other hardware.
768 */
769int
770vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
771 struct drm_file *file_priv)
772{
773 struct vc4_dev *vc4 = to_vc4_dev(dev);
774 struct drm_vc4_submit_cl *args = data;
775 struct vc4_exec_info *exec;
776 int ret;
777
778 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
779 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
780 return -EINVAL;
781 }
782
783 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
784 if (!exec) {
785 DRM_ERROR("malloc failure on exec struct\n");
786 return -ENOMEM;
787 }
788
789 exec->args = args;
790 INIT_LIST_HEAD(&exec->unref_list);
791
792 ret = vc4_cl_lookup_bos(dev, file_priv, exec);
793 if (ret)
794 goto fail;
795
796 if (exec->args->bin_cl_size != 0) {
797 ret = vc4_get_bcl(dev, exec);
798 if (ret)
799 goto fail;
800 } else {
801 exec->ct0ca = 0;
802 exec->ct0ea = 0;
803 }
804
805 ret = vc4_get_rcl(dev, exec);
806 if (ret)
807 goto fail;
808
809 /* Clear this out of the struct we'll be putting in the queue,
810 * since it's part of our stack.
811 */
812 exec->args = NULL;
813
814 vc4_queue_submit(dev, exec);
815
816 /* Return the seqno for our job. */
817 args->seqno = vc4->emit_seqno;
818
819 return 0;
820
821fail:
822 vc4_complete_exec(vc4->dev, exec);
823
824 return ret;
825}
826
827void
828vc4_gem_init(struct drm_device *dev)
829{
830 struct vc4_dev *vc4 = to_vc4_dev(dev);
831
832 INIT_LIST_HEAD(&vc4->job_list);
833 INIT_LIST_HEAD(&vc4->job_done_list);
834 INIT_LIST_HEAD(&vc4->seqno_cb_list);
835 spin_lock_init(&vc4->job_lock);
836
837 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
838 setup_timer(&vc4->hangcheck.timer,
839 vc4_hangcheck_elapsed,
840 (unsigned long)dev);
841
842 INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
843}
844
845void
846vc4_gem_destroy(struct drm_device *dev)
847{
848 struct vc4_dev *vc4 = to_vc4_dev(dev);
849
850 /* Waiting for exec to finish would need to be done before
851 * unregistering V3D.
852 */
853 WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
854
855 /* V3D should already have disabled its interrupt and cleared
856 * the overflow allocation registers. Now free the object.
857 */
858 if (vc4->overflow_mem) {
859 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
860 vc4->overflow_mem = NULL;
861 }
862
863 vc4_bo_cache_destroy(dev);
864
865 if (vc4->hang_state)
866 vc4_free_hang_state(dev, vc4->hang_state);
867}
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 000000000000..b68060e758db
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,210 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** DOC: Interrupt management for the V3D engine.
25 *
26 * We have an interrupt status register (V3D_INTCTL) which reports
27 * interrupts, and where writing 1 bits clears those interrupts.
28 * There are also a pair of interrupt registers
29 * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
30 * disables that specific interrupt, and 0s written are ignored
31 * (reading either one returns the set of enabled interrupts).
32 *
33 * When we take a render frame interrupt, we need to wake the
34 * processes waiting for some frame to be done, and get the next frame
35 * submitted ASAP (so the hardware doesn't sit idle when there's work
36 * to do).
37 *
38 * When we take the binner out of memory interrupt, we need to
39 * allocate some new memory and pass it to the binner so that the
40 * current job can make progress.
41 */
42
43#include "vc4_drv.h"
44#include "vc4_regs.h"
45
46#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
47 V3D_INT_FRDONE)
48
49DECLARE_WAIT_QUEUE_HEAD(render_wait);
50
51static void
52vc4_overflow_mem_work(struct work_struct *work)
53{
54 struct vc4_dev *vc4 =
55 container_of(work, struct vc4_dev, overflow_mem_work);
56 struct drm_device *dev = vc4->dev;
57 struct vc4_bo *bo;
58
59 bo = vc4_bo_create(dev, 256 * 1024, true);
60 if (!bo) {
61 DRM_ERROR("Couldn't allocate binner overflow mem\n");
62 return;
63 }
64
65 /* If there's a job executing currently, then our previous
66 * overflow allocation is getting used in that job and we need
67 * to queue it to be released when the job is done. But if no
68 * job is executing at all, then we can free the old overflow
69 * object direcctly.
70 *
71 * No lock necessary for this pointer since we're the only
72 * ones that update the pointer, and our workqueue won't
73 * reenter.
74 */
75 if (vc4->overflow_mem) {
76 struct vc4_exec_info *current_exec;
77 unsigned long irqflags;
78
79 spin_lock_irqsave(&vc4->job_lock, irqflags);
80 current_exec = vc4_first_job(vc4);
81 if (current_exec) {
82 vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
83 list_add_tail(&vc4->overflow_mem->unref_head,
84 &current_exec->unref_list);
85 vc4->overflow_mem = NULL;
86 }
87 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
88 }
89
90 if (vc4->overflow_mem)
91 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
92 vc4->overflow_mem = bo;
93
94 V3D_WRITE(V3D_BPOA, bo->base.paddr);
95 V3D_WRITE(V3D_BPOS, bo->base.base.size);
96 V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
97 V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
98}
99
100static void
101vc4_irq_finish_job(struct drm_device *dev)
102{
103 struct vc4_dev *vc4 = to_vc4_dev(dev);
104 struct vc4_exec_info *exec = vc4_first_job(vc4);
105
106 if (!exec)
107 return;
108
109 vc4->finished_seqno++;
110 list_move_tail(&exec->head, &vc4->job_done_list);
111 vc4_submit_next_job(dev);
112
113 wake_up_all(&vc4->job_wait_queue);
114 schedule_work(&vc4->job_done_work);
115}
116
117irqreturn_t
118vc4_irq(int irq, void *arg)
119{
120 struct drm_device *dev = arg;
121 struct vc4_dev *vc4 = to_vc4_dev(dev);
122 uint32_t intctl;
123 irqreturn_t status = IRQ_NONE;
124
125 barrier();
126 intctl = V3D_READ(V3D_INTCTL);
127
128 /* Acknowledge the interrupts we're handling here. The render
129 * frame done interrupt will be cleared, while OUTOMEM will
130 * stay high until the underlying cause is cleared.
131 */
132 V3D_WRITE(V3D_INTCTL, intctl);
133
134 if (intctl & V3D_INT_OUTOMEM) {
135 /* Disable OUTOMEM until the work is done. */
136 V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
137 schedule_work(&vc4->overflow_mem_work);
138 status = IRQ_HANDLED;
139 }
140
141 if (intctl & V3D_INT_FRDONE) {
142 spin_lock(&vc4->job_lock);
143 vc4_irq_finish_job(dev);
144 spin_unlock(&vc4->job_lock);
145 status = IRQ_HANDLED;
146 }
147
148 return status;
149}
150
151void
152vc4_irq_preinstall(struct drm_device *dev)
153{
154 struct vc4_dev *vc4 = to_vc4_dev(dev);
155
156 init_waitqueue_head(&vc4->job_wait_queue);
157 INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
158
159 /* Clear any pending interrupts someone might have left around
160 * for us.
161 */
162 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
163}
164
165int
166vc4_irq_postinstall(struct drm_device *dev)
167{
168 struct vc4_dev *vc4 = to_vc4_dev(dev);
169
170 /* Enable both the render done and out of memory interrupts. */
171 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
172
173 return 0;
174}
175
176void
177vc4_irq_uninstall(struct drm_device *dev)
178{
179 struct vc4_dev *vc4 = to_vc4_dev(dev);
180
181 /* Disable sending interrupts for our driver's IRQs. */
182 V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
183
184 /* Clear any pending interrupts we might have left. */
185 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
186
187 cancel_work_sync(&vc4->overflow_mem_work);
188}
189
190/** Reinitializes interrupt registers when a GPU reset is performed. */
191void vc4_irq_reset(struct drm_device *dev)
192{
193 struct vc4_dev *vc4 = to_vc4_dev(dev);
194 unsigned long irqflags;
195
196 /* Acknowledge any stale IRQs. */
197 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
198
199 /*
200 * Turn all our interrupts on. Binner out of memory is the
201 * only one we expect to trigger at this point, since we've
202 * just come from poweron and haven't supplied any overflow
203 * memory yet.
204 */
205 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
206
207 spin_lock_irqsave(&vc4->job_lock, irqflags);
208 vc4_irq_finish_job(dev);
209 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
210}
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 2e5597d10cc6..f95f2df5f8d1 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include "drm_crtc.h" 17#include "drm_crtc.h"
18#include "drm_atomic.h"
18#include "drm_atomic_helper.h" 19#include "drm_atomic_helper.h"
19#include "drm_crtc_helper.h" 20#include "drm_crtc_helper.h"
20#include "drm_plane_helper.h" 21#include "drm_plane_helper.h"
@@ -29,10 +30,152 @@ static void vc4_output_poll_changed(struct drm_device *dev)
29 drm_fbdev_cma_hotplug_event(vc4->fbdev); 30 drm_fbdev_cma_hotplug_event(vc4->fbdev);
30} 31}
31 32
33struct vc4_commit {
34 struct drm_device *dev;
35 struct drm_atomic_state *state;
36 struct vc4_seqno_cb cb;
37};
38
39static void
40vc4_atomic_complete_commit(struct vc4_commit *c)
41{
42 struct drm_atomic_state *state = c->state;
43 struct drm_device *dev = state->dev;
44 struct vc4_dev *vc4 = to_vc4_dev(dev);
45
46 drm_atomic_helper_commit_modeset_disables(dev, state);
47
48 drm_atomic_helper_commit_planes(dev, state, false);
49
50 drm_atomic_helper_commit_modeset_enables(dev, state);
51
52 drm_atomic_helper_wait_for_vblanks(dev, state);
53
54 drm_atomic_helper_cleanup_planes(dev, state);
55
56 drm_atomic_state_free(state);
57
58 up(&vc4->async_modeset);
59
60 kfree(c);
61}
62
63static void
64vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
65{
66 struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
67
68 vc4_atomic_complete_commit(c);
69}
70
71static struct vc4_commit *commit_init(struct drm_atomic_state *state)
72{
73 struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
74
75 if (!c)
76 return NULL;
77 c->dev = state->dev;
78 c->state = state;
79
80 return c;
81}
82
83/**
84 * vc4_atomic_commit - commit validated state object
85 * @dev: DRM device
86 * @state: the driver state object
87 * @async: asynchronous commit
88 *
89 * This function commits a with drm_atomic_helper_check() pre-validated state
90 * object. This can still fail when e.g. the framebuffer reservation fails. For
91 * now this doesn't implement asynchronous commits.
92 *
93 * RETURNS
94 * Zero for success or -errno.
95 */
96static int vc4_atomic_commit(struct drm_device *dev,
97 struct drm_atomic_state *state,
98 bool async)
99{
100 struct vc4_dev *vc4 = to_vc4_dev(dev);
101 int ret;
102 int i;
103 uint64_t wait_seqno = 0;
104 struct vc4_commit *c;
105
106 c = commit_init(state);
107 if (!c)
108 return -ENOMEM;
109
110 /* Make sure that any outstanding modesets have finished. */
111 ret = down_interruptible(&vc4->async_modeset);
112 if (ret) {
113 kfree(c);
114 return ret;
115 }
116
117 ret = drm_atomic_helper_prepare_planes(dev, state);
118 if (ret) {
119 kfree(c);
120 up(&vc4->async_modeset);
121 return ret;
122 }
123
124 for (i = 0; i < dev->mode_config.num_total_plane; i++) {
125 struct drm_plane *plane = state->planes[i];
126 struct drm_plane_state *new_state = state->plane_states[i];
127
128 if (!plane)
129 continue;
130
131 if ((plane->state->fb != new_state->fb) && new_state->fb) {
132 struct drm_gem_cma_object *cma_bo =
133 drm_fb_cma_get_gem_obj(new_state->fb, 0);
134 struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
135
136 wait_seqno = max(bo->seqno, wait_seqno);
137 }
138 }
139
140 /*
141 * This is the point of no return - everything below never fails except
142 * when the hw goes bonghits. Which means we can commit the new state on
143 * the software side now.
144 */
145
146 drm_atomic_helper_swap_state(dev, state);
147
148 /*
149 * Everything below can be run asynchronously without the need to grab
150 * any modeset locks at all under one condition: It must be guaranteed
151 * that the asynchronous work has either been cancelled (if the driver
152 * supports it, which at least requires that the framebuffers get
153 * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
154 * before the new state gets committed on the software side with
155 * drm_atomic_helper_swap_state().
156 *
157 * This scheme allows new atomic state updates to be prepared and
158 * checked in parallel to the asynchronous completion of the previous
159 * update. Which is important since compositors need to figure out the
160 * composition of the next frame right after having submitted the
161 * current layout.
162 */
163
164 if (async) {
165 vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
166 vc4_atomic_complete_commit_seqno_cb);
167 } else {
168 vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
169 vc4_atomic_complete_commit(c);
170 }
171
172 return 0;
173}
174
32static const struct drm_mode_config_funcs vc4_mode_funcs = { 175static const struct drm_mode_config_funcs vc4_mode_funcs = {
33 .output_poll_changed = vc4_output_poll_changed, 176 .output_poll_changed = vc4_output_poll_changed,
34 .atomic_check = drm_atomic_helper_check, 177 .atomic_check = drm_atomic_helper_check,
35 .atomic_commit = drm_atomic_helper_commit, 178 .atomic_commit = vc4_atomic_commit,
36 .fb_create = drm_fb_cma_create, 179 .fb_create = drm_fb_cma_create,
37}; 180};
38 181
@@ -41,6 +184,8 @@ int vc4_kms_load(struct drm_device *dev)
41 struct vc4_dev *vc4 = to_vc4_dev(dev); 184 struct vc4_dev *vc4 = to_vc4_dev(dev);
42 int ret; 185 int ret;
43 186
187 sema_init(&vc4->async_modeset, 1);
188
44 ret = drm_vblank_init(dev, dev->mode_config.num_crtc); 189 ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
45 if (ret < 0) { 190 if (ret < 0) {
46 dev_err(dev->dev, "failed to initialize vblank\n"); 191 dev_err(dev->dev, "failed to initialize vblank\n");
@@ -51,6 +196,8 @@ int vc4_kms_load(struct drm_device *dev)
51 dev->mode_config.max_height = 2048; 196 dev->mode_config.max_height = 2048;
52 dev->mode_config.funcs = &vc4_mode_funcs; 197 dev->mode_config.funcs = &vc4_mode_funcs;
53 dev->mode_config.preferred_depth = 24; 198 dev->mode_config.preferred_depth = 24;
199 dev->mode_config.async_page_flip = true;
200
54 dev->vblank_disable_allowed = true; 201 dev->vblank_disable_allowed = true;
55 202
56 drm_mode_config_reset(dev); 203 drm_mode_config_reset(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 000000000000..0f31cc06500f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_PACKET_H
25#define VC4_PACKET_H
26
27#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
28
29enum vc4_packet {
30 VC4_PACKET_HALT = 0,
31 VC4_PACKET_NOP = 1,
32
33 VC4_PACKET_FLUSH = 4,
34 VC4_PACKET_FLUSH_ALL = 5,
35 VC4_PACKET_START_TILE_BINNING = 6,
36 VC4_PACKET_INCREMENT_SEMAPHORE = 7,
37 VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
38
39 VC4_PACKET_BRANCH = 16,
40 VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
41
42 VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
43 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
44 VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
45 VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
46 VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
47 VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
48
49 VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
50 VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
51
52 VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
53 VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
54
55 VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
56
57 VC4_PACKET_GL_SHADER_STATE = 64,
58 VC4_PACKET_NV_SHADER_STATE = 65,
59 VC4_PACKET_VG_SHADER_STATE = 66,
60
61 VC4_PACKET_CONFIGURATION_BITS = 96,
62 VC4_PACKET_FLAT_SHADE_FLAGS = 97,
63 VC4_PACKET_POINT_SIZE = 98,
64 VC4_PACKET_LINE_WIDTH = 99,
65 VC4_PACKET_RHT_X_BOUNDARY = 100,
66 VC4_PACKET_DEPTH_OFFSET = 101,
67 VC4_PACKET_CLIP_WINDOW = 102,
68 VC4_PACKET_VIEWPORT_OFFSET = 103,
69 VC4_PACKET_Z_CLIPPING = 104,
70 VC4_PACKET_CLIPPER_XY_SCALING = 105,
71 VC4_PACKET_CLIPPER_Z_SCALING = 106,
72
73 VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
74 VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
75 VC4_PACKET_CLEAR_COLORS = 114,
76 VC4_PACKET_TILE_COORDINATES = 115,
77
78 /* Not an actual hardware packet -- this is what we use to put
79 * references to GEM bos in the command stream, since we need the u32
80 * int the actual address packet in order to store the offset from the
81 * start of the BO.
82 */
83 VC4_PACKET_GEM_HANDLES = 254,
84} __attribute__ ((__packed__));
85
86#define VC4_PACKET_HALT_SIZE 1
87#define VC4_PACKET_NOP_SIZE 1
88#define VC4_PACKET_FLUSH_SIZE 1
89#define VC4_PACKET_FLUSH_ALL_SIZE 1
90#define VC4_PACKET_START_TILE_BINNING_SIZE 1
91#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
92#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
93#define VC4_PACKET_BRANCH_SIZE 5
94#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
95#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
96#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
97#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
98#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
99#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
100#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
101#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
102#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
103#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
104#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
105#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
106#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
107#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
108#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
109#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
110#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
111#define VC4_PACKET_POINT_SIZE_SIZE 5
112#define VC4_PACKET_LINE_WIDTH_SIZE 5
113#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
114#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
115#define VC4_PACKET_CLIP_WINDOW_SIZE 9
116#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
117#define VC4_PACKET_Z_CLIPPING_SIZE 9
118#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
119#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
120#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
121#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
122#define VC4_PACKET_CLEAR_COLORS_SIZE 14
123#define VC4_PACKET_TILE_COORDINATES_SIZE 3
124#define VC4_PACKET_GEM_HANDLES_SIZE 9
125
126/* Number of multisamples supported. */
127#define VC4_MAX_SAMPLES 4
128/* Size of a full resolution color or Z tile buffer load/store. */
129#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4)
130
131/** @{
132 * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
133 * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
134*/
135#define VC4_TILING_FORMAT_LINEAR 0
136#define VC4_TILING_FORMAT_T 1
137#define VC4_TILING_FORMAT_LT 2
138/** @} */
139
140/** @{
141 *
142 * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
143 * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
144 */
145#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
146#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
147#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
148#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
149
150/** @{
151 *
152 * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
153 * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
154 */
155#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
156#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
157#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
158#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
159
160/** @{
161 *
162 * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
163 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
164 */
165
166#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
167#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
168#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
169#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
170
171/** @} */
172
173/** @{
174 *
175 * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
176 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
177 */
178#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
179#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
180#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
181#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
182
183#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
184#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
185#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
186#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
187#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
188/** @} */
189
190/** @{
191 *
192 * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
193 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
194 */
195#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
196#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
197#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
198#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
199#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
200
201/** The values of the field are VC4_TILING_FORMAT_* */
202#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
203#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
204
205#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
206#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
207#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
208#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
209#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
210#define VC4_LOADSTORE_TILE_BUFFER_Z 3
211#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
212#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
213/** @} */
214
215#define VC4_INDEX_BUFFER_U8 (0 << 4)
216#define VC4_INDEX_BUFFER_U16 (1 << 4)
217
218/* This flag is only present in NV shader state. */
219#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
220#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
221#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
222#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
223
224/** @{ byte 2 of config bits. */
225#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
226#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
227/** @} */
228
229/** @{ byte 1 of config bits. */
230#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
231/** same values in this 3-bit field as PIPE_FUNC_* */
232#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
233#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
234
235#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
236#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
237#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
238#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
239
240#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
241/** @} */
242
243/** @{ byte 0 of config bits. */
244#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
245#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
246#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
247
248#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
249#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
250#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
251#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
252#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
253/** @} */
254
255/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
256#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
257
258#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
259#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
260#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
261#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
262#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
263#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
264
265#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
266#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
267#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
268#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
269#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
270#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
271
272#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
273#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
274#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
275/** @} */
276
277/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
278#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
279#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
280#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
281#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
282#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
283
284/** The values of the field are VC4_TILING_FORMAT_* */
285#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
286#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
287
288#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
289#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
290#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
291
292#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
293#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
294#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
295#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
296#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
297
298#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
299#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
300
301#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
302#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
303#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0)
304#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0)
305#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0)
306#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
307
308enum vc4_texture_data_type {
309 VC4_TEXTURE_TYPE_RGBA8888 = 0,
310 VC4_TEXTURE_TYPE_RGBX8888 = 1,
311 VC4_TEXTURE_TYPE_RGBA4444 = 2,
312 VC4_TEXTURE_TYPE_RGBA5551 = 3,
313 VC4_TEXTURE_TYPE_RGB565 = 4,
314 VC4_TEXTURE_TYPE_LUMINANCE = 5,
315 VC4_TEXTURE_TYPE_ALPHA = 6,
316 VC4_TEXTURE_TYPE_LUMALPHA = 7,
317 VC4_TEXTURE_TYPE_ETC1 = 8,
318 VC4_TEXTURE_TYPE_S16F = 9,
319 VC4_TEXTURE_TYPE_S8 = 10,
320 VC4_TEXTURE_TYPE_S16 = 11,
321 VC4_TEXTURE_TYPE_BW1 = 12,
322 VC4_TEXTURE_TYPE_A4 = 13,
323 VC4_TEXTURE_TYPE_A1 = 14,
324 VC4_TEXTURE_TYPE_RGBA64 = 15,
325 VC4_TEXTURE_TYPE_RGBA32R = 16,
326 VC4_TEXTURE_TYPE_YUV422R = 17,
327};
328
329#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
330#define VC4_TEX_P0_OFFSET_SHIFT 12
331#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10)
332#define VC4_TEX_P0_CSWIZ_SHIFT 10
333#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9)
334#define VC4_TEX_P0_CMMODE_SHIFT 9
335#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8)
336#define VC4_TEX_P0_FLIPY_SHIFT 8
337#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4)
338#define VC4_TEX_P0_TYPE_SHIFT 4
339#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0)
340#define VC4_TEX_P0_MIPLVLS_SHIFT 0
341
342#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31)
343#define VC4_TEX_P1_TYPE4_SHIFT 31
344#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20)
345#define VC4_TEX_P1_HEIGHT_SHIFT 20
346#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19)
347#define VC4_TEX_P1_ETCFLIP_SHIFT 19
348#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8)
349#define VC4_TEX_P1_WIDTH_SHIFT 8
350
351#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7)
352#define VC4_TEX_P1_MAGFILT_SHIFT 7
353# define VC4_TEX_P1_MAGFILT_LINEAR 0
354# define VC4_TEX_P1_MAGFILT_NEAREST 1
355
356#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4)
357#define VC4_TEX_P1_MINFILT_SHIFT 4
358# define VC4_TEX_P1_MINFILT_LINEAR 0
359# define VC4_TEX_P1_MINFILT_NEAREST 1
360# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2
361# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3
362# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4
363# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5
364
365#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2)
366#define VC4_TEX_P1_WRAP_T_SHIFT 2
367#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0)
368#define VC4_TEX_P1_WRAP_S_SHIFT 0
369# define VC4_TEX_P1_WRAP_REPEAT 0
370# define VC4_TEX_P1_WRAP_CLAMP 1
371# define VC4_TEX_P1_WRAP_MIRROR 2
372# define VC4_TEX_P1_WRAP_BORDER 3
373
374#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30)
375#define VC4_TEX_P2_PTYPE_SHIFT 30
376# define VC4_TEX_P2_PTYPE_IGNORED 0
377# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1
378# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2
379# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3
380
381/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
382#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12)
383#define VC4_TEX_P2_CMST_SHIFT 12
384#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0)
385#define VC4_TEX_P2_BSLOD_SHIFT 0
386
387/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
388#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12)
389#define VC4_TEX_P2_CHEIGHT_SHIFT 12
390#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0)
391#define VC4_TEX_P2_CWIDTH_SHIFT 0
392
393/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
394#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12)
395#define VC4_TEX_P2_CYOFF_SHIFT 12
396#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0)
397#define VC4_TEX_P2_CXOFF_SHIFT 0
398
399#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index f34c422733dc..0addbad15832 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -29,6 +29,14 @@ struct vc4_plane_state {
29 u32 *dlist; 29 u32 *dlist;
30 u32 dlist_size; /* Number of dwords in allocated for the display list */ 30 u32 dlist_size; /* Number of dwords in allocated for the display list */
31 u32 dlist_count; /* Number of used dwords in the display list. */ 31 u32 dlist_count; /* Number of used dwords in the display list. */
32
33 /* Offset in the dlist to pointer word 0. */
34 u32 pw0_offset;
35
36 /* Offset where the plane's dlist was last stored in the
37 hardware at vc4_crtc_atomic_flush() time.
38 */
39 u32 *hw_dlist;
32}; 40};
33 41
34static inline struct vc4_plane_state * 42static inline struct vc4_plane_state *
@@ -207,6 +215,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
207 /* Position Word 3: Context. Written by the HVS. */ 215 /* Position Word 3: Context. Written by the HVS. */
208 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 216 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
209 217
218 vc4_state->pw0_offset = vc4_state->dlist_count;
219
210 /* Pointer Word 0: RGB / Y Pointer */ 220 /* Pointer Word 0: RGB / Y Pointer */
211 vc4_dlist_write(vc4_state, bo->paddr + offset); 221 vc4_dlist_write(vc4_state, bo->paddr + offset);
212 222
@@ -258,6 +268,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
258 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 268 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
259 int i; 269 int i;
260 270
271 vc4_state->hw_dlist = dlist;
272
261 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 273 /* Can't memcpy_toio() because it needs to be 32-bit writes. */
262 for (i = 0; i < vc4_state->dlist_count; i++) 274 for (i = 0; i < vc4_state->dlist_count; i++)
263 writel(vc4_state->dlist[i], &dlist[i]); 275 writel(vc4_state->dlist[i], &dlist[i]);
@@ -272,6 +284,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state)
272 return vc4_state->dlist_count; 284 return vc4_state->dlist_count;
273} 285}
274 286
287/* Updates the plane to immediately (well, once the FIFO needs
288 * refilling) scan out from at a new framebuffer.
289 */
290void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
291{
292 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
293 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
294 uint32_t addr;
295
296 /* We're skipping the address adjustment for negative origin,
297 * because this is only called on the primary plane.
298 */
299 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
300 addr = bo->paddr + fb->offsets[0];
301
302 /* Write the new address into the hardware immediately. The
303 * scanout will start from this address as soon as the FIFO
304 * needs to refill with pixels.
305 */
306 writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
307
308 /* Also update the CPU-side dlist copy, so that any later
309 * atomic updates that don't do a new modeset on our plane
310 * also use our updated address.
311 */
312 vc4_state->dlist[vc4_state->pw0_offset] = addr;
313}
314
275static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 315static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
276 .prepare_fb = NULL, 316 .prepare_fb = NULL,
277 .cleanup_fb = NULL, 317 .cleanup_fb = NULL,
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
new file mode 100644
index 000000000000..d5c2f3c85ebb
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -0,0 +1,264 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_QPU_DEFINES_H
25#define VC4_QPU_DEFINES_H
26
27enum qpu_op_add {
28 QPU_A_NOP,
29 QPU_A_FADD,
30 QPU_A_FSUB,
31 QPU_A_FMIN,
32 QPU_A_FMAX,
33 QPU_A_FMINABS,
34 QPU_A_FMAXABS,
35 QPU_A_FTOI,
36 QPU_A_ITOF,
37 QPU_A_ADD = 12,
38 QPU_A_SUB,
39 QPU_A_SHR,
40 QPU_A_ASR,
41 QPU_A_ROR,
42 QPU_A_SHL,
43 QPU_A_MIN,
44 QPU_A_MAX,
45 QPU_A_AND,
46 QPU_A_OR,
47 QPU_A_XOR,
48 QPU_A_NOT,
49 QPU_A_CLZ,
50 QPU_A_V8ADDS = 30,
51 QPU_A_V8SUBS = 31,
52};
53
54enum qpu_op_mul {
55 QPU_M_NOP,
56 QPU_M_FMUL,
57 QPU_M_MUL24,
58 QPU_M_V8MULD,
59 QPU_M_V8MIN,
60 QPU_M_V8MAX,
61 QPU_M_V8ADDS,
62 QPU_M_V8SUBS,
63};
64
65enum qpu_raddr {
66 QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
67 /* 0-31 are the plain regfile a or b fields */
68 QPU_R_UNIF = 32,
69 QPU_R_VARY = 35,
70 QPU_R_ELEM_QPU = 38,
71 QPU_R_NOP,
72 QPU_R_XY_PIXEL_COORD = 41,
73 QPU_R_MS_REV_FLAGS = 41,
74 QPU_R_VPM = 48,
75 QPU_R_VPM_LD_BUSY,
76 QPU_R_VPM_LD_WAIT,
77 QPU_R_MUTEX_ACQUIRE,
78};
79
80enum qpu_waddr {
81 /* 0-31 are the plain regfile a or b fields */
82 QPU_W_ACC0 = 32, /* aka r0 */
83 QPU_W_ACC1,
84 QPU_W_ACC2,
85 QPU_W_ACC3,
86 QPU_W_TMU_NOSWAP,
87 QPU_W_ACC5,
88 QPU_W_HOST_INT,
89 QPU_W_NOP,
90 QPU_W_UNIFORMS_ADDRESS,
91 QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
92 QPU_W_MS_FLAGS = 42,
93 QPU_W_REV_FLAG = 42,
94 QPU_W_TLB_STENCIL_SETUP = 43,
95 QPU_W_TLB_Z,
96 QPU_W_TLB_COLOR_MS,
97 QPU_W_TLB_COLOR_ALL,
98 QPU_W_TLB_ALPHA_MASK,
99 QPU_W_VPM,
100 QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
101 QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
102 QPU_W_MUTEX_RELEASE,
103 QPU_W_SFU_RECIP,
104 QPU_W_SFU_RECIPSQRT,
105 QPU_W_SFU_EXP,
106 QPU_W_SFU_LOG,
107 QPU_W_TMU0_S,
108 QPU_W_TMU0_T,
109 QPU_W_TMU0_R,
110 QPU_W_TMU0_B,
111 QPU_W_TMU1_S,
112 QPU_W_TMU1_T,
113 QPU_W_TMU1_R,
114 QPU_W_TMU1_B,
115};
116
117enum qpu_sig_bits {
118 QPU_SIG_SW_BREAKPOINT,
119 QPU_SIG_NONE,
120 QPU_SIG_THREAD_SWITCH,
121 QPU_SIG_PROG_END,
122 QPU_SIG_WAIT_FOR_SCOREBOARD,
123 QPU_SIG_SCOREBOARD_UNLOCK,
124 QPU_SIG_LAST_THREAD_SWITCH,
125 QPU_SIG_COVERAGE_LOAD,
126 QPU_SIG_COLOR_LOAD,
127 QPU_SIG_COLOR_LOAD_END,
128 QPU_SIG_LOAD_TMU0,
129 QPU_SIG_LOAD_TMU1,
130 QPU_SIG_ALPHA_MASK_LOAD,
131 QPU_SIG_SMALL_IMM,
132 QPU_SIG_LOAD_IMM,
133 QPU_SIG_BRANCH
134};
135
136enum qpu_mux {
137 /* hardware mux values */
138 QPU_MUX_R0,
139 QPU_MUX_R1,
140 QPU_MUX_R2,
141 QPU_MUX_R3,
142 QPU_MUX_R4,
143 QPU_MUX_R5,
144 QPU_MUX_A,
145 QPU_MUX_B,
146
147 /* non-hardware mux values */
148 QPU_MUX_IMM,
149};
150
151enum qpu_cond {
152 QPU_COND_NEVER,
153 QPU_COND_ALWAYS,
154 QPU_COND_ZS,
155 QPU_COND_ZC,
156 QPU_COND_NS,
157 QPU_COND_NC,
158 QPU_COND_CS,
159 QPU_COND_CC,
160};
161
162enum qpu_pack_mul {
163 QPU_PACK_MUL_NOP,
164 /* replicated to each 8 bits of the 32-bit dst. */
165 QPU_PACK_MUL_8888 = 3,
166 QPU_PACK_MUL_8A,
167 QPU_PACK_MUL_8B,
168 QPU_PACK_MUL_8C,
169 QPU_PACK_MUL_8D,
170};
171
172enum qpu_pack_a {
173 QPU_PACK_A_NOP,
174 /* convert to 16 bit float if float input, or to int16. */
175 QPU_PACK_A_16A,
176 QPU_PACK_A_16B,
177 /* replicated to each 8 bits of the 32-bit dst. */
178 QPU_PACK_A_8888,
179 /* Convert to 8-bit unsigned int. */
180 QPU_PACK_A_8A,
181 QPU_PACK_A_8B,
182 QPU_PACK_A_8C,
183 QPU_PACK_A_8D,
184
185 /* Saturating variants of the previous instructions. */
186 QPU_PACK_A_32_SAT, /* int-only */
187 QPU_PACK_A_16A_SAT, /* int or float */
188 QPU_PACK_A_16B_SAT,
189 QPU_PACK_A_8888_SAT,
190 QPU_PACK_A_8A_SAT,
191 QPU_PACK_A_8B_SAT,
192 QPU_PACK_A_8C_SAT,
193 QPU_PACK_A_8D_SAT,
194};
195
196enum qpu_unpack_r4 {
197 QPU_UNPACK_R4_NOP,
198 QPU_UNPACK_R4_F16A_TO_F32,
199 QPU_UNPACK_R4_F16B_TO_F32,
200 QPU_UNPACK_R4_8D_REP,
201 QPU_UNPACK_R4_8A,
202 QPU_UNPACK_R4_8B,
203 QPU_UNPACK_R4_8C,
204 QPU_UNPACK_R4_8D,
205};
206
207#define QPU_MASK(high, low) \
208 ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
209
210#define QPU_GET_FIELD(word, field) \
211 ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
212
213#define QPU_SIG_SHIFT 60
214#define QPU_SIG_MASK QPU_MASK(63, 60)
215
216#define QPU_UNPACK_SHIFT 57
217#define QPU_UNPACK_MASK QPU_MASK(59, 57)
218
219/**
220 * If set, the pack field means PACK_MUL or R4 packing, instead of normal
221 * regfile a packing.
222 */
223#define QPU_PM ((uint64_t)1 << 56)
224
225#define QPU_PACK_SHIFT 52
226#define QPU_PACK_MASK QPU_MASK(55, 52)
227
228#define QPU_COND_ADD_SHIFT 49
229#define QPU_COND_ADD_MASK QPU_MASK(51, 49)
230#define QPU_COND_MUL_SHIFT 46
231#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
232
233#define QPU_SF ((uint64_t)1 << 45)
234
235#define QPU_WADDR_ADD_SHIFT 38
236#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
237#define QPU_WADDR_MUL_SHIFT 32
238#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32)
239
240#define QPU_OP_MUL_SHIFT 29
241#define QPU_OP_MUL_MASK QPU_MASK(31, 29)
242
243#define QPU_RADDR_A_SHIFT 18
244#define QPU_RADDR_A_MASK QPU_MASK(23, 18)
245#define QPU_RADDR_B_SHIFT 12
246#define QPU_RADDR_B_MASK QPU_MASK(17, 12)
247#define QPU_SMALL_IMM_SHIFT 12
248#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12)
249
250#define QPU_ADD_A_SHIFT 9
251#define QPU_ADD_A_MASK QPU_MASK(11, 9)
252#define QPU_ADD_B_SHIFT 6
253#define QPU_ADD_B_MASK QPU_MASK(8, 6)
254#define QPU_MUL_A_SHIFT 3
255#define QPU_MUL_A_MASK QPU_MASK(5, 3)
256#define QPU_MUL_B_SHIFT 0
257#define QPU_MUL_B_MASK QPU_MASK(2, 0)
258
259#define QPU_WS ((uint64_t)1 << 44)
260
261#define QPU_OP_ADD_SHIFT 24
262#define QPU_OP_ADD_MASK QPU_MASK(28, 24)
263
264#endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 9e4e904c668e..4e52a0a88551 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -154,7 +154,7 @@
154#define V3D_PCTRS14 0x006f4 154#define V3D_PCTRS14 0x006f4
155#define V3D_PCTR15 0x006f8 155#define V3D_PCTR15 0x006f8
156#define V3D_PCTRS15 0x006fc 156#define V3D_PCTRS15 0x006fc
157#define V3D_BGE 0x00f00 157#define V3D_DBGE 0x00f00
158#define V3D_FDBGO 0x00f04 158#define V3D_FDBGO 0x00f04
159#define V3D_FDBGB 0x00f08 159#define V3D_FDBGB 0x00f08
160#define V3D_FDBGR 0x00f0c 160#define V3D_FDBGR 0x00f0c
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 000000000000..8a2a312e2c1b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,634 @@
1/*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * DOC: Render command list generation
26 *
27 * In the VC4 driver, render command list generation is performed by the
28 * kernel instead of userspace. We do this because validating a
29 * user-submitted command list is hard to get right and has high CPU overhead,
30 * while the number of valid configurations for render command lists is
31 * actually fairly low.
32 */
33
34#include "uapi/drm/vc4_drm.h"
35#include "vc4_drv.h"
36#include "vc4_packet.h"
37
38struct vc4_rcl_setup {
39 struct drm_gem_cma_object *color_read;
40 struct drm_gem_cma_object *color_write;
41 struct drm_gem_cma_object *zs_read;
42 struct drm_gem_cma_object *zs_write;
43 struct drm_gem_cma_object *msaa_color_write;
44 struct drm_gem_cma_object *msaa_zs_write;
45
46 struct drm_gem_cma_object *rcl;
47 u32 next_offset;
48};
49
50static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
51{
52 *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
53 setup->next_offset += 1;
54}
55
56static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
57{
58 *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
59 setup->next_offset += 2;
60}
61
62static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
63{
64 *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
65 setup->next_offset += 4;
66}
67
68/*
69 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
70 *
71 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
72 * some sort before another load is triggered.
73 */
74static void vc4_store_before_load(struct vc4_rcl_setup *setup)
75{
76 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
77 rcl_u16(setup,
78 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
79 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
80 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
81 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
82 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
83 rcl_u32(setup, 0); /* no address, since we're in None mode */
84}
85
86/*
87 * Calculates the physical address of the start of a tile in a RCL surface.
88 *
89 * Unlike the other load/store packets,
90 * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
91 * coordinates packet, and instead just store to the address given.
92 */
93static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
94 struct drm_gem_cma_object *bo,
95 struct drm_vc4_submit_rcl_surface *surf,
96 uint8_t x, uint8_t y)
97{
98 return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
99 (DIV_ROUND_UP(exec->args->width, 32) * y + x);
100}
101
102/*
103 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
104 *
105 * The tile coordinates packet triggers a pending load if there is one, are
106 * used for clipping during rendering, and determine where loads/stores happen
107 * relative to their base address.
108 */
109static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
110 uint32_t x, uint32_t y)
111{
112 rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
113 rcl_u8(setup, x);
114 rcl_u8(setup, y);
115}
116
117static void emit_tile(struct vc4_exec_info *exec,
118 struct vc4_rcl_setup *setup,
119 uint8_t x, uint8_t y, bool first, bool last)
120{
121 struct drm_vc4_submit_cl *args = exec->args;
122 bool has_bin = args->bin_cl_size != 0;
123
124 /* Note that the load doesn't actually occur until the
125 * tile coords packet is processed, and only one load
126 * may be outstanding at a time.
127 */
128 if (setup->color_read) {
129 if (args->color_read.flags &
130 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
131 rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
132 rcl_u32(setup,
133 vc4_full_res_offset(exec, setup->color_read,
134 &args->color_read, x, y) |
135 VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
136 } else {
137 rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
138 rcl_u16(setup, args->color_read.bits);
139 rcl_u32(setup, setup->color_read->paddr +
140 args->color_read.offset);
141 }
142 }
143
144 if (setup->zs_read) {
145 if (args->zs_read.flags &
146 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
147 rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
148 rcl_u32(setup,
149 vc4_full_res_offset(exec, setup->zs_read,
150 &args->zs_read, x, y) |
151 VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
152 } else {
153 if (setup->color_read) {
154 /* Exec previous load. */
155 vc4_tile_coordinates(setup, x, y);
156 vc4_store_before_load(setup);
157 }
158
159 rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
160 rcl_u16(setup, args->zs_read.bits);
161 rcl_u32(setup, setup->zs_read->paddr +
162 args->zs_read.offset);
163 }
164 }
165
166 /* Clipping depends on tile coordinates having been
167 * emitted, so we always need one here.
168 */
169 vc4_tile_coordinates(setup, x, y);
170
171 /* Wait for the binner before jumping to the first
172 * tile's lists.
173 */
174 if (first && has_bin)
175 rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
176
177 if (has_bin) {
178 rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
179 rcl_u32(setup, (exec->tile_bo->paddr +
180 exec->tile_alloc_offset +
181 (y * exec->bin_tiles_x + x) * 32));
182 }
183
184 if (setup->msaa_color_write) {
185 bool last_tile_write = (!setup->msaa_zs_write &&
186 !setup->zs_write &&
187 !setup->color_write);
188 uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
189
190 if (!last_tile_write)
191 bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
192 else if (last)
193 bits |= VC4_LOADSTORE_FULL_RES_EOF;
194 rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
195 rcl_u32(setup,
196 vc4_full_res_offset(exec, setup->msaa_color_write,
197 &args->msaa_color_write, x, y) |
198 bits);
199 }
200
201 if (setup->msaa_zs_write) {
202 bool last_tile_write = (!setup->zs_write &&
203 !setup->color_write);
204 uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
205
206 if (setup->msaa_color_write)
207 vc4_tile_coordinates(setup, x, y);
208 if (!last_tile_write)
209 bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
210 else if (last)
211 bits |= VC4_LOADSTORE_FULL_RES_EOF;
212 rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
213 rcl_u32(setup,
214 vc4_full_res_offset(exec, setup->msaa_zs_write,
215 &args->msaa_zs_write, x, y) |
216 bits);
217 }
218
219 if (setup->zs_write) {
220 bool last_tile_write = !setup->color_write;
221
222 if (setup->msaa_color_write || setup->msaa_zs_write)
223 vc4_tile_coordinates(setup, x, y);
224
225 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
226 rcl_u16(setup, args->zs_write.bits |
227 (last_tile_write ?
228 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
229 rcl_u32(setup,
230 (setup->zs_write->paddr + args->zs_write.offset) |
231 ((last && last_tile_write) ?
232 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
233 }
234
235 if (setup->color_write) {
236 if (setup->msaa_color_write || setup->msaa_zs_write ||
237 setup->zs_write) {
238 vc4_tile_coordinates(setup, x, y);
239 }
240
241 if (last)
242 rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
243 else
244 rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
245 }
246}
247
248static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
249 struct vc4_rcl_setup *setup)
250{
251 struct drm_vc4_submit_cl *args = exec->args;
252 bool has_bin = args->bin_cl_size != 0;
253 uint8_t min_x_tile = args->min_x_tile;
254 uint8_t min_y_tile = args->min_y_tile;
255 uint8_t max_x_tile = args->max_x_tile;
256 uint8_t max_y_tile = args->max_y_tile;
257 uint8_t xtiles = max_x_tile - min_x_tile + 1;
258 uint8_t ytiles = max_y_tile - min_y_tile + 1;
259 uint8_t x, y;
260 uint32_t size, loop_body_size;
261
262 size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
263 loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
264
265 if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
266 size += VC4_PACKET_CLEAR_COLORS_SIZE +
267 VC4_PACKET_TILE_COORDINATES_SIZE +
268 VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
269 }
270
271 if (setup->color_read) {
272 if (args->color_read.flags &
273 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
274 loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
275 } else {
276 loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
277 }
278 }
279 if (setup->zs_read) {
280 if (args->zs_read.flags &
281 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
282 loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
283 } else {
284 if (setup->color_read &&
285 !(args->color_read.flags &
286 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
287 loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
288 loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
289 }
290 loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
291 }
292 }
293
294 if (has_bin) {
295 size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
296 loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
297 }
298
299 if (setup->msaa_color_write)
300 loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
301 if (setup->msaa_zs_write)
302 loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
303
304 if (setup->zs_write)
305 loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
306 if (setup->color_write)
307 loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
308
309 /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
310 loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
311 ((setup->msaa_color_write != NULL) +
312 (setup->msaa_zs_write != NULL) +
313 (setup->color_write != NULL) +
314 (setup->zs_write != NULL) - 1);
315
316 size += xtiles * ytiles * loop_body_size;
317
318 setup->rcl = &vc4_bo_create(dev, size, true)->base;
319 if (!setup->rcl)
320 return -ENOMEM;
321 list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
322 &exec->unref_list);
323
324 rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
325 rcl_u32(setup,
326 (setup->color_write ? (setup->color_write->paddr +
327 args->color_write.offset) :
328 0));
329 rcl_u16(setup, args->width);
330 rcl_u16(setup, args->height);
331 rcl_u16(setup, args->color_write.bits);
332
333 /* The tile buffer gets cleared when the previous tile is stored. If
334 * the clear values changed between frames, then the tile buffer has
335 * stale clear values in it, so we have to do a store in None mode (no
336 * writes) so that we trigger the tile buffer clear.
337 */
338 if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
339 rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
340 rcl_u32(setup, args->clear_color[0]);
341 rcl_u32(setup, args->clear_color[1]);
342 rcl_u32(setup, args->clear_z);
343 rcl_u8(setup, args->clear_s);
344
345 vc4_tile_coordinates(setup, 0, 0);
346
347 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
348 rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
349 rcl_u32(setup, 0); /* no address, since we're in None mode */
350 }
351
352 for (y = min_y_tile; y <= max_y_tile; y++) {
353 for (x = min_x_tile; x <= max_x_tile; x++) {
354 bool first = (x == min_x_tile && y == min_y_tile);
355 bool last = (x == max_x_tile && y == max_y_tile);
356
357 emit_tile(exec, setup, x, y, first, last);
358 }
359 }
360
361 BUG_ON(setup->next_offset != size);
362 exec->ct1ca = setup->rcl->paddr;
363 exec->ct1ea = setup->rcl->paddr + setup->next_offset;
364
365 return 0;
366}
367
368static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
369 struct drm_gem_cma_object *obj,
370 struct drm_vc4_submit_rcl_surface *surf)
371{
372 struct drm_vc4_submit_cl *args = exec->args;
373 u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
374
375 if (surf->offset > obj->base.size) {
376 DRM_ERROR("surface offset %d > BO size %zd\n",
377 surf->offset, obj->base.size);
378 return -EINVAL;
379 }
380
381 if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
382 render_tiles_stride * args->max_y_tile + args->max_x_tile) {
383 DRM_ERROR("MSAA tile %d, %d out of bounds "
384 "(bo size %zd, offset %d).\n",
385 args->max_x_tile, args->max_y_tile,
386 obj->base.size,
387 surf->offset);
388 return -EINVAL;
389 }
390
391 return 0;
392}
393
394static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
395 struct drm_gem_cma_object **obj,
396 struct drm_vc4_submit_rcl_surface *surf)
397{
398 if (surf->flags != 0 || surf->bits != 0) {
399 DRM_ERROR("MSAA surface had nonzero flags/bits\n");
400 return -EINVAL;
401 }
402
403 if (surf->hindex == ~0)
404 return 0;
405
406 *obj = vc4_use_bo(exec, surf->hindex);
407 if (!*obj)
408 return -EINVAL;
409
410 if (surf->offset & 0xf) {
411 DRM_ERROR("MSAA write must be 16b aligned.\n");
412 return -EINVAL;
413 }
414
415 return vc4_full_res_bounds_check(exec, *obj, surf);
416}
417
418static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
419 struct drm_gem_cma_object **obj,
420 struct drm_vc4_submit_rcl_surface *surf)
421{
422 uint8_t tiling = VC4_GET_FIELD(surf->bits,
423 VC4_LOADSTORE_TILE_BUFFER_TILING);
424 uint8_t buffer = VC4_GET_FIELD(surf->bits,
425 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
426 uint8_t format = VC4_GET_FIELD(surf->bits,
427 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
428 int cpp;
429 int ret;
430
431 if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
432 DRM_ERROR("Extra flags set\n");
433 return -EINVAL;
434 }
435
436 if (surf->hindex == ~0)
437 return 0;
438
439 *obj = vc4_use_bo(exec, surf->hindex);
440 if (!*obj)
441 return -EINVAL;
442
443 if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
444 if (surf == &exec->args->zs_write) {
445 DRM_ERROR("general zs write may not be a full-res.\n");
446 return -EINVAL;
447 }
448
449 if (surf->bits != 0) {
450 DRM_ERROR("load/store general bits set with "
451 "full res load/store.\n");
452 return -EINVAL;
453 }
454
455 ret = vc4_full_res_bounds_check(exec, *obj, surf);
456 if (!ret)
457 return ret;
458
459 return 0;
460 }
461
462 if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
463 VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
464 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
465 DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
466 surf->bits);
467 return -EINVAL;
468 }
469
470 if (tiling > VC4_TILING_FORMAT_LT) {
471 DRM_ERROR("Bad tiling format\n");
472 return -EINVAL;
473 }
474
475 if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
476 if (format != 0) {
477 DRM_ERROR("No color format should be set for ZS\n");
478 return -EINVAL;
479 }
480 cpp = 4;
481 } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
482 switch (format) {
483 case VC4_LOADSTORE_TILE_BUFFER_BGR565:
484 case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
485 cpp = 2;
486 break;
487 case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
488 cpp = 4;
489 break;
490 default:
491 DRM_ERROR("Bad tile buffer format\n");
492 return -EINVAL;
493 }
494 } else {
495 DRM_ERROR("Bad load/store buffer %d.\n", buffer);
496 return -EINVAL;
497 }
498
499 if (surf->offset & 0xf) {
500 DRM_ERROR("load/store buffer must be 16b aligned.\n");
501 return -EINVAL;
502 }
503
504 if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
505 exec->args->width, exec->args->height, cpp)) {
506 return -EINVAL;
507 }
508
509 return 0;
510}
511
512static int
513vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
514 struct vc4_rcl_setup *setup,
515 struct drm_gem_cma_object **obj,
516 struct drm_vc4_submit_rcl_surface *surf)
517{
518 uint8_t tiling = VC4_GET_FIELD(surf->bits,
519 VC4_RENDER_CONFIG_MEMORY_FORMAT);
520 uint8_t format = VC4_GET_FIELD(surf->bits,
521 VC4_RENDER_CONFIG_FORMAT);
522 int cpp;
523
524 if (surf->flags != 0) {
525 DRM_ERROR("No flags supported on render config.\n");
526 return -EINVAL;
527 }
528
529 if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
530 VC4_RENDER_CONFIG_FORMAT_MASK |
531 VC4_RENDER_CONFIG_MS_MODE_4X |
532 VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
533 DRM_ERROR("Unknown bits in render config: 0x%04x\n",
534 surf->bits);
535 return -EINVAL;
536 }
537
538 if (surf->hindex == ~0)
539 return 0;
540
541 *obj = vc4_use_bo(exec, surf->hindex);
542 if (!*obj)
543 return -EINVAL;
544
545 if (tiling > VC4_TILING_FORMAT_LT) {
546 DRM_ERROR("Bad tiling format\n");
547 return -EINVAL;
548 }
549
550 switch (format) {
551 case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
552 case VC4_RENDER_CONFIG_FORMAT_BGR565:
553 cpp = 2;
554 break;
555 case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
556 cpp = 4;
557 break;
558 default:
559 DRM_ERROR("Bad tile buffer format\n");
560 return -EINVAL;
561 }
562
563 if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
564 exec->args->width, exec->args->height, cpp)) {
565 return -EINVAL;
566 }
567
568 return 0;
569}
570
571int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
572{
573 struct vc4_rcl_setup setup = {0};
574 struct drm_vc4_submit_cl *args = exec->args;
575 bool has_bin = args->bin_cl_size != 0;
576 int ret;
577
578 if (args->min_x_tile > args->max_x_tile ||
579 args->min_y_tile > args->max_y_tile) {
580 DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
581 args->min_x_tile, args->min_y_tile,
582 args->max_x_tile, args->max_y_tile);
583 return -EINVAL;
584 }
585
586 if (has_bin &&
587 (args->max_x_tile > exec->bin_tiles_x ||
588 args->max_y_tile > exec->bin_tiles_y)) {
589 DRM_ERROR("Render tiles (%d,%d) outside of bin config "
590 "(%d,%d)\n",
591 args->max_x_tile, args->max_y_tile,
592 exec->bin_tiles_x, exec->bin_tiles_y);
593 return -EINVAL;
594 }
595
596 ret = vc4_rcl_render_config_surface_setup(exec, &setup,
597 &setup.color_write,
598 &args->color_write);
599 if (ret)
600 return ret;
601
602 ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
603 if (ret)
604 return ret;
605
606 ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
607 if (ret)
608 return ret;
609
610 ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
611 if (ret)
612 return ret;
613
614 ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
615 &args->msaa_color_write);
616 if (ret)
617 return ret;
618
619 ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
620 &args->msaa_zs_write);
621 if (ret)
622 return ret;
623
624 /* We shouldn't even have the job submitted to us if there's no
625 * surface to write out.
626 */
627 if (!setup.color_write && !setup.zs_write &&
628 !setup.msaa_color_write && !setup.msaa_zs_write) {
629 DRM_ERROR("RCL requires color or Z/S write\n");
630 return -EINVAL;
631 }
632
633 return vc4_create_rcl_bo(dev, exec, &setup);
634}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 000000000000..ad7b1ea720c2
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (C) 2015 Broadcom
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
10#define _VC4_TRACE_H_
11
12#include <linux/stringify.h>
13#include <linux/types.h>
14#include <linux/tracepoint.h>
15
16#undef TRACE_SYSTEM
17#define TRACE_SYSTEM vc4
18#define TRACE_INCLUDE_FILE vc4_trace
19
20TRACE_EVENT(vc4_wait_for_seqno_begin,
21 TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
22 TP_ARGS(dev, seqno, timeout),
23
24 TP_STRUCT__entry(
25 __field(u32, dev)
26 __field(u64, seqno)
27 __field(u64, timeout)
28 ),
29
30 TP_fast_assign(
31 __entry->dev = dev->primary->index;
32 __entry->seqno = seqno;
33 __entry->timeout = timeout;
34 ),
35
36 TP_printk("dev=%u, seqno=%llu, timeout=%llu",
37 __entry->dev, __entry->seqno, __entry->timeout)
38);
39
40TRACE_EVENT(vc4_wait_for_seqno_end,
41 TP_PROTO(struct drm_device *dev, uint64_t seqno),
42 TP_ARGS(dev, seqno),
43
44 TP_STRUCT__entry(
45 __field(u32, dev)
46 __field(u64, seqno)
47 ),
48
49 TP_fast_assign(
50 __entry->dev = dev->primary->index;
51 __entry->seqno = seqno;
52 ),
53
54 TP_printk("dev=%u, seqno=%llu",
55 __entry->dev, __entry->seqno)
56);
57
58#endif /* _VC4_TRACE_H_ */
59
60/* This part must be outside protection */
61#undef TRACE_INCLUDE_PATH
62#define TRACE_INCLUDE_PATH .
63#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 000000000000..e6278f25716b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
1/*
2 * Copyright (C) 2015 Broadcom
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include "vc4_drv.h"
10
11#ifndef __CHECKER__
12#define CREATE_TRACE_POINTS
13#include "vc4_trace.h"
14#endif
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
new file mode 100644
index 000000000000..424d515ffcda
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -0,0 +1,262 @@
1/*
2 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
3 * Copyright (C) 2013 Red Hat
4 * Author: Rob Clark <robdclark@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "linux/component.h"
20#include "vc4_drv.h"
21#include "vc4_regs.h"
22
23#ifdef CONFIG_DEBUG_FS
24#define REGDEF(reg) { reg, #reg }
25static const struct {
26 uint32_t reg;
27 const char *name;
28} vc4_reg_defs[] = {
29 REGDEF(V3D_IDENT0),
30 REGDEF(V3D_IDENT1),
31 REGDEF(V3D_IDENT2),
32 REGDEF(V3D_SCRATCH),
33 REGDEF(V3D_L2CACTL),
34 REGDEF(V3D_SLCACTL),
35 REGDEF(V3D_INTCTL),
36 REGDEF(V3D_INTENA),
37 REGDEF(V3D_INTDIS),
38 REGDEF(V3D_CT0CS),
39 REGDEF(V3D_CT1CS),
40 REGDEF(V3D_CT0EA),
41 REGDEF(V3D_CT1EA),
42 REGDEF(V3D_CT0CA),
43 REGDEF(V3D_CT1CA),
44 REGDEF(V3D_CT00RA0),
45 REGDEF(V3D_CT01RA0),
46 REGDEF(V3D_CT0LC),
47 REGDEF(V3D_CT1LC),
48 REGDEF(V3D_CT0PC),
49 REGDEF(V3D_CT1PC),
50 REGDEF(V3D_PCS),
51 REGDEF(V3D_BFC),
52 REGDEF(V3D_RFC),
53 REGDEF(V3D_BPCA),
54 REGDEF(V3D_BPCS),
55 REGDEF(V3D_BPOA),
56 REGDEF(V3D_BPOS),
57 REGDEF(V3D_BXCF),
58 REGDEF(V3D_SQRSV0),
59 REGDEF(V3D_SQRSV1),
60 REGDEF(V3D_SQCNTL),
61 REGDEF(V3D_SRQPC),
62 REGDEF(V3D_SRQUA),
63 REGDEF(V3D_SRQUL),
64 REGDEF(V3D_SRQCS),
65 REGDEF(V3D_VPACNTL),
66 REGDEF(V3D_VPMBASE),
67 REGDEF(V3D_PCTRC),
68 REGDEF(V3D_PCTRE),
69 REGDEF(V3D_PCTR0),
70 REGDEF(V3D_PCTRS0),
71 REGDEF(V3D_PCTR1),
72 REGDEF(V3D_PCTRS1),
73 REGDEF(V3D_PCTR2),
74 REGDEF(V3D_PCTRS2),
75 REGDEF(V3D_PCTR3),
76 REGDEF(V3D_PCTRS3),
77 REGDEF(V3D_PCTR4),
78 REGDEF(V3D_PCTRS4),
79 REGDEF(V3D_PCTR5),
80 REGDEF(V3D_PCTRS5),
81 REGDEF(V3D_PCTR6),
82 REGDEF(V3D_PCTRS6),
83 REGDEF(V3D_PCTR7),
84 REGDEF(V3D_PCTRS7),
85 REGDEF(V3D_PCTR8),
86 REGDEF(V3D_PCTRS8),
87 REGDEF(V3D_PCTR9),
88 REGDEF(V3D_PCTRS9),
89 REGDEF(V3D_PCTR10),
90 REGDEF(V3D_PCTRS10),
91 REGDEF(V3D_PCTR11),
92 REGDEF(V3D_PCTRS11),
93 REGDEF(V3D_PCTR12),
94 REGDEF(V3D_PCTRS12),
95 REGDEF(V3D_PCTR13),
96 REGDEF(V3D_PCTRS13),
97 REGDEF(V3D_PCTR14),
98 REGDEF(V3D_PCTRS14),
99 REGDEF(V3D_PCTR15),
100 REGDEF(V3D_PCTRS15),
101 REGDEF(V3D_DBGE),
102 REGDEF(V3D_FDBGO),
103 REGDEF(V3D_FDBGB),
104 REGDEF(V3D_FDBGR),
105 REGDEF(V3D_FDBGS),
106 REGDEF(V3D_ERRSTAT),
107};
108
109int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
110{
111 struct drm_info_node *node = (struct drm_info_node *)m->private;
112 struct drm_device *dev = node->minor->dev;
113 struct vc4_dev *vc4 = to_vc4_dev(dev);
114 int i;
115
116 for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
117 seq_printf(m, "%s (0x%04x): 0x%08x\n",
118 vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
119 V3D_READ(vc4_reg_defs[i].reg));
120 }
121
122 return 0;
123}
124
125int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
126{
127 struct drm_info_node *node = (struct drm_info_node *)m->private;
128 struct drm_device *dev = node->minor->dev;
129 struct vc4_dev *vc4 = to_vc4_dev(dev);
130 uint32_t ident1 = V3D_READ(V3D_IDENT1);
131 uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
132 uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
133 uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
134
135 seq_printf(m, "Revision: %d\n",
136 VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
137 seq_printf(m, "Slices: %d\n", nslc);
138 seq_printf(m, "TMUs: %d\n", nslc * tups);
139 seq_printf(m, "QPUs: %d\n", nslc * qups);
140 seq_printf(m, "Semaphores: %d\n",
141 VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
142
143 return 0;
144}
145#endif /* CONFIG_DEBUG_FS */
146
147/*
148 * Asks the firmware to turn on power to the V3D engine.
149 *
150 * This may be doable with just the clocks interface, though this
151 * packet does some other register setup from the firmware, too.
152 */
153int
154vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
155{
156 if (on)
157 return pm_generic_poweroff(&vc4->v3d->pdev->dev);
158 else
159 return pm_generic_resume(&vc4->v3d->pdev->dev);
160}
161
162static void vc4_v3d_init_hw(struct drm_device *dev)
163{
164 struct vc4_dev *vc4 = to_vc4_dev(dev);
165
166 /* Take all the memory that would have been reserved for user
167 * QPU programs, since we don't have an interface for running
168 * them, anyway.
169 */
170 V3D_WRITE(V3D_VPMBASE, 0);
171}
172
173static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
174{
175 struct platform_device *pdev = to_platform_device(dev);
176 struct drm_device *drm = dev_get_drvdata(master);
177 struct vc4_dev *vc4 = to_vc4_dev(drm);
178 struct vc4_v3d *v3d = NULL;
179 int ret;
180
181 v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
182 if (!v3d)
183 return -ENOMEM;
184
185 v3d->pdev = pdev;
186
187 v3d->regs = vc4_ioremap_regs(pdev, 0);
188 if (IS_ERR(v3d->regs))
189 return PTR_ERR(v3d->regs);
190
191 vc4->v3d = v3d;
192
193 if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
194 DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
195 V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
196 return -EINVAL;
197 }
198
199 /* Reset the binner overflow address/size at setup, to be sure
200 * we don't reuse an old one.
201 */
202 V3D_WRITE(V3D_BPOA, 0);
203 V3D_WRITE(V3D_BPOS, 0);
204
205 vc4_v3d_init_hw(drm);
206
207 ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
208 if (ret) {
209 DRM_ERROR("Failed to install IRQ handler\n");
210 return ret;
211 }
212
213 return 0;
214}
215
216static void vc4_v3d_unbind(struct device *dev, struct device *master,
217 void *data)
218{
219 struct drm_device *drm = dev_get_drvdata(master);
220 struct vc4_dev *vc4 = to_vc4_dev(drm);
221
222 drm_irq_uninstall(drm);
223
224 /* Disable the binner's overflow memory address, so the next
225 * driver probe (if any) doesn't try to reuse our old
226 * allocation.
227 */
228 V3D_WRITE(V3D_BPOA, 0);
229 V3D_WRITE(V3D_BPOS, 0);
230
231 vc4->v3d = NULL;
232}
233
234static const struct component_ops vc4_v3d_ops = {
235 .bind = vc4_v3d_bind,
236 .unbind = vc4_v3d_unbind,
237};
238
239static int vc4_v3d_dev_probe(struct platform_device *pdev)
240{
241 return component_add(&pdev->dev, &vc4_v3d_ops);
242}
243
244static int vc4_v3d_dev_remove(struct platform_device *pdev)
245{
246 component_del(&pdev->dev, &vc4_v3d_ops);
247 return 0;
248}
249
250static const struct of_device_id vc4_v3d_dt_match[] = {
251 { .compatible = "brcm,vc4-v3d" },
252 {}
253};
254
255struct platform_driver vc4_v3d_driver = {
256 .probe = vc4_v3d_dev_probe,
257 .remove = vc4_v3d_dev_remove,
258 .driver = {
259 .name = "vc4_v3d",
260 .of_match_table = vc4_v3d_dt_match,
261 },
262};
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 000000000000..0fb5b994b9dd
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,900 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Command list validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory. So, a user with
28 * access to execute command lists could escalate privilege by
29 * overwriting system memory (drawing to it as a framebuffer) or
30 * reading system memory it shouldn't (reading it as a texture, or
31 * uniform data, or vertex data).
32 *
33 * This validates command lists to ensure that all accesses are within
34 * the bounds of the GEM objects referenced. It explicitly whitelists
35 * packets, and looks at the offsets in any address fields to make
36 * sure they're constrained within the BOs they reference.
37 *
38 * Note that because of the validation that's happening anyway, this
39 * is where GEM relocation processing happens.
40 */
41
42#include "uapi/drm/vc4_drm.h"
43#include "vc4_drv.h"
44#include "vc4_packet.h"
45
46#define VALIDATE_ARGS \
47 struct vc4_exec_info *exec, \
48 void *validated, \
49 void *untrusted
50
51/** Return the width in pixels of a 64-byte microtile. */
52static uint32_t
53utile_width(int cpp)
54{
55 switch (cpp) {
56 case 1:
57 case 2:
58 return 8;
59 case 4:
60 return 4;
61 case 8:
62 return 2;
63 default:
64 DRM_ERROR("unknown cpp: %d\n", cpp);
65 return 1;
66 }
67}
68
69/** Return the height in pixels of a 64-byte microtile. */
70static uint32_t
71utile_height(int cpp)
72{
73 switch (cpp) {
74 case 1:
75 return 8;
76 case 2:
77 case 4:
78 case 8:
79 return 4;
80 default:
81 DRM_ERROR("unknown cpp: %d\n", cpp);
82 return 1;
83 }
84}
85
86/**
87 * The texture unit decides what tiling format a particular miplevel is using
88 * this function, so we lay out our miptrees accordingly.
89 */
90static bool
91size_is_lt(uint32_t width, uint32_t height, int cpp)
92{
93 return (width <= 4 * utile_width(cpp) ||
94 height <= 4 * utile_height(cpp));
95}
96
97struct drm_gem_cma_object *
98vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
99{
100 struct drm_gem_cma_object *obj;
101 struct vc4_bo *bo;
102
103 if (hindex >= exec->bo_count) {
104 DRM_ERROR("BO index %d greater than BO count %d\n",
105 hindex, exec->bo_count);
106 return NULL;
107 }
108 obj = exec->bo[hindex];
109 bo = to_vc4_bo(&obj->base);
110
111 if (bo->validated_shader) {
112 DRM_ERROR("Trying to use shader BO as something other than "
113 "a shader\n");
114 return NULL;
115 }
116
117 return obj;
118}
119
120static struct drm_gem_cma_object *
121vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
122{
123 return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
124}
125
126static bool
127validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
128{
129 /* Note that the untrusted pointer passed to these functions is
130 * incremented past the packet byte.
131 */
132 return (untrusted - 1 == exec->bin_u + pos);
133}
134
135static uint32_t
136gl_shader_rec_size(uint32_t pointer_bits)
137{
138 uint32_t attribute_count = pointer_bits & 7;
139 bool extended = pointer_bits & 8;
140
141 if (attribute_count == 0)
142 attribute_count = 8;
143
144 if (extended)
145 return 100 + attribute_count * 4;
146 else
147 return 36 + attribute_count * 8;
148}
149
150bool
151vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
152 uint32_t offset, uint8_t tiling_format,
153 uint32_t width, uint32_t height, uint8_t cpp)
154{
155 uint32_t aligned_width, aligned_height, stride, size;
156 uint32_t utile_w = utile_width(cpp);
157 uint32_t utile_h = utile_height(cpp);
158
159 /* The shaded vertex format stores signed 12.4 fixed point
160 * (-2048,2047) offsets from the viewport center, so we should
161 * never have a render target larger than 4096. The texture
162 * unit can only sample from 2048x2048, so it's even more
163 * restricted. This lets us avoid worrying about overflow in
164 * our math.
165 */
166 if (width > 4096 || height > 4096) {
167 DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
168 return false;
169 }
170
171 switch (tiling_format) {
172 case VC4_TILING_FORMAT_LINEAR:
173 aligned_width = round_up(width, utile_w);
174 aligned_height = height;
175 break;
176 case VC4_TILING_FORMAT_T:
177 aligned_width = round_up(width, utile_w * 8);
178 aligned_height = round_up(height, utile_h * 8);
179 break;
180 case VC4_TILING_FORMAT_LT:
181 aligned_width = round_up(width, utile_w);
182 aligned_height = round_up(height, utile_h);
183 break;
184 default:
185 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
186 return false;
187 }
188
189 stride = aligned_width * cpp;
190 size = stride * aligned_height;
191
192 if (size + offset < size ||
193 size + offset > fbo->base.size) {
194 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
195 width, height,
196 aligned_width, aligned_height,
197 size, offset, fbo->base.size);
198 return false;
199 }
200
201 return true;
202}
203
204static int
205validate_flush(VALIDATE_ARGS)
206{
207 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
208 DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
209 return -EINVAL;
210 }
211 exec->found_flush = true;
212
213 return 0;
214}
215
216static int
217validate_start_tile_binning(VALIDATE_ARGS)
218{
219 if (exec->found_start_tile_binning_packet) {
220 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
221 return -EINVAL;
222 }
223 exec->found_start_tile_binning_packet = true;
224
225 if (!exec->found_tile_binning_mode_config_packet) {
226 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
227 return -EINVAL;
228 }
229
230 return 0;
231}
232
233static int
234validate_increment_semaphore(VALIDATE_ARGS)
235{
236 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
237 DRM_ERROR("Bin CL must end with "
238 "VC4_PACKET_INCREMENT_SEMAPHORE\n");
239 return -EINVAL;
240 }
241 exec->found_increment_semaphore_packet = true;
242
243 return 0;
244}
245
246static int
247validate_indexed_prim_list(VALIDATE_ARGS)
248{
249 struct drm_gem_cma_object *ib;
250 uint32_t length = *(uint32_t *)(untrusted + 1);
251 uint32_t offset = *(uint32_t *)(untrusted + 5);
252 uint32_t max_index = *(uint32_t *)(untrusted + 9);
253 uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
254 struct vc4_shader_state *shader_state;
255
256 /* Check overflow condition */
257 if (exec->shader_state_count == 0) {
258 DRM_ERROR("shader state must precede primitives\n");
259 return -EINVAL;
260 }
261 shader_state = &exec->shader_state[exec->shader_state_count - 1];
262
263 if (max_index > shader_state->max_index)
264 shader_state->max_index = max_index;
265
266 ib = vc4_use_handle(exec, 0);
267 if (!ib)
268 return -EINVAL;
269
270 if (offset > ib->base.size ||
271 (ib->base.size - offset) / index_size < length) {
272 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
273 offset, length, index_size, ib->base.size);
274 return -EINVAL;
275 }
276
277 *(uint32_t *)(validated + 5) = ib->paddr + offset;
278
279 return 0;
280}
281
282static int
283validate_gl_array_primitive(VALIDATE_ARGS)
284{
285 uint32_t length = *(uint32_t *)(untrusted + 1);
286 uint32_t base_index = *(uint32_t *)(untrusted + 5);
287 uint32_t max_index;
288 struct vc4_shader_state *shader_state;
289
290 /* Check overflow condition */
291 if (exec->shader_state_count == 0) {
292 DRM_ERROR("shader state must precede primitives\n");
293 return -EINVAL;
294 }
295 shader_state = &exec->shader_state[exec->shader_state_count - 1];
296
297 if (length + base_index < length) {
298 DRM_ERROR("primitive vertex count overflow\n");
299 return -EINVAL;
300 }
301 max_index = length + base_index - 1;
302
303 if (max_index > shader_state->max_index)
304 shader_state->max_index = max_index;
305
306 return 0;
307}
308
309static int
310validate_gl_shader_state(VALIDATE_ARGS)
311{
312 uint32_t i = exec->shader_state_count++;
313
314 if (i >= exec->shader_state_size) {
315 DRM_ERROR("More requests for shader states than declared\n");
316 return -EINVAL;
317 }
318
319 exec->shader_state[i].addr = *(uint32_t *)untrusted;
320 exec->shader_state[i].max_index = 0;
321
322 if (exec->shader_state[i].addr & ~0xf) {
323 DRM_ERROR("high bits set in GL shader rec reference\n");
324 return -EINVAL;
325 }
326
327 *(uint32_t *)validated = (exec->shader_rec_p +
328 exec->shader_state[i].addr);
329
330 exec->shader_rec_p +=
331 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
332
333 return 0;
334}
335
336static int
337validate_tile_binning_config(VALIDATE_ARGS)
338{
339 struct drm_device *dev = exec->exec_bo->base.dev;
340 struct vc4_bo *tile_bo;
341 uint8_t flags;
342 uint32_t tile_state_size, tile_alloc_size;
343 uint32_t tile_count;
344
345 if (exec->found_tile_binning_mode_config_packet) {
346 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
347 return -EINVAL;
348 }
349 exec->found_tile_binning_mode_config_packet = true;
350
351 exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
352 exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
353 tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
354 flags = *(uint8_t *)(untrusted + 14);
355
356 if (exec->bin_tiles_x == 0 ||
357 exec->bin_tiles_y == 0) {
358 DRM_ERROR("Tile binning config of %dx%d too small\n",
359 exec->bin_tiles_x, exec->bin_tiles_y);
360 return -EINVAL;
361 }
362
363 if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
364 VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
365 DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
366 return -EINVAL;
367 }
368
369 /* The tile state data array is 48 bytes per tile, and we put it at
370 * the start of a BO containing both it and the tile alloc.
371 */
372 tile_state_size = 48 * tile_count;
373
374 /* Since the tile alloc array will follow us, align. */
375 exec->tile_alloc_offset = roundup(tile_state_size, 4096);
376
377 *(uint8_t *)(validated + 14) =
378 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
379 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
380 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
381 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
382 VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
383 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
384 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
385
386 /* Initial block size. */
387 tile_alloc_size = 32 * tile_count;
388
389 /*
390 * The initial allocation gets rounded to the next 256 bytes before
391 * the hardware starts fulfilling further allocations.
392 */
393 tile_alloc_size = roundup(tile_alloc_size, 256);
394
395 /* Add space for the extra allocations. This is what gets used first,
396 * before overflow memory. It must have at least 4096 bytes, but we
397 * want to avoid overflow memory usage if possible.
398 */
399 tile_alloc_size += 1024 * 1024;
400
401 tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
402 true);
403 exec->tile_bo = &tile_bo->base;
404 if (!exec->tile_bo)
405 return -ENOMEM;
406 list_add_tail(&tile_bo->unref_head, &exec->unref_list);
407
408 /* tile alloc address. */
409 *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
410 exec->tile_alloc_offset);
411 /* tile alloc size. */
412 *(uint32_t *)(validated + 4) = tile_alloc_size;
413 /* tile state address. */
414 *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
415
416 return 0;
417}
418
419static int
420validate_gem_handles(VALIDATE_ARGS)
421{
422 memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
423 return 0;
424}
425
426#define VC4_DEFINE_PACKET(packet, func) \
427 [packet] = { packet ## _SIZE, #packet, func }
428
429static const struct cmd_info {
430 uint16_t len;
431 const char *name;
432 int (*func)(struct vc4_exec_info *exec, void *validated,
433 void *untrusted);
434} cmd_info[] = {
435 VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
436 VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
437 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
438 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
439 VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
440 validate_start_tile_binning),
441 VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
442 validate_increment_semaphore),
443
444 VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
445 validate_indexed_prim_list),
446 VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
447 validate_gl_array_primitive),
448
449 VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
450
451 VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
452
453 VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
454 VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
455 VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
456 VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
457 VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
458 VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
459 VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
460 VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
461 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
462 /* Note: The docs say this was also 105, but it was 106 in the
463 * initial userland code drop.
464 */
465 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
466
467 VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
468 validate_tile_binning_config),
469
470 VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
471};
472
473int
474vc4_validate_bin_cl(struct drm_device *dev,
475 void *validated,
476 void *unvalidated,
477 struct vc4_exec_info *exec)
478{
479 uint32_t len = exec->args->bin_cl_size;
480 uint32_t dst_offset = 0;
481 uint32_t src_offset = 0;
482
483 while (src_offset < len) {
484 void *dst_pkt = validated + dst_offset;
485 void *src_pkt = unvalidated + src_offset;
486 u8 cmd = *(uint8_t *)src_pkt;
487 const struct cmd_info *info;
488
489 if (cmd >= ARRAY_SIZE(cmd_info)) {
490 DRM_ERROR("0x%08x: packet %d out of bounds\n",
491 src_offset, cmd);
492 return -EINVAL;
493 }
494
495 info = &cmd_info[cmd];
496 if (!info->name) {
497 DRM_ERROR("0x%08x: packet %d invalid\n",
498 src_offset, cmd);
499 return -EINVAL;
500 }
501
502 if (src_offset + info->len > len) {
503 DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
504 "exceeds bounds (0x%08x)\n",
505 src_offset, cmd, info->name, info->len,
506 src_offset + len);
507 return -EINVAL;
508 }
509
510 if (cmd != VC4_PACKET_GEM_HANDLES)
511 memcpy(dst_pkt, src_pkt, info->len);
512
513 if (info->func && info->func(exec,
514 dst_pkt + 1,
515 src_pkt + 1)) {
516 DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
517 src_offset, cmd, info->name);
518 return -EINVAL;
519 }
520
521 src_offset += info->len;
522 /* GEM handle loading doesn't produce HW packets. */
523 if (cmd != VC4_PACKET_GEM_HANDLES)
524 dst_offset += info->len;
525
526 /* When the CL hits halt, it'll stop reading anything else. */
527 if (cmd == VC4_PACKET_HALT)
528 break;
529 }
530
531 exec->ct0ea = exec->ct0ca + dst_offset;
532
533 if (!exec->found_start_tile_binning_packet) {
534 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
535 return -EINVAL;
536 }
537
538 /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
539 * semaphore is used to trigger the render CL to start up, and the
540 * FLUSH is what caps the bin lists with
541 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
542 * render CL when they get called to) and actually triggers the queued
543 * semaphore increment.
544 */
545 if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
546 DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
547 "VC4_PACKET_FLUSH\n");
548 return -EINVAL;
549 }
550
551 return 0;
552}
553
554static bool
555reloc_tex(struct vc4_exec_info *exec,
556 void *uniform_data_u,
557 struct vc4_texture_sample_info *sample,
558 uint32_t texture_handle_index)
559
560{
561 struct drm_gem_cma_object *tex;
562 uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
563 uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
564 uint32_t p2 = (sample->p_offset[2] != ~0 ?
565 *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
566 uint32_t p3 = (sample->p_offset[3] != ~0 ?
567 *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
568 uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
569 uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
570 uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
571 uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
572 uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
573 uint32_t cpp, tiling_format, utile_w, utile_h;
574 uint32_t i;
575 uint32_t cube_map_stride = 0;
576 enum vc4_texture_data_type type;
577
578 tex = vc4_use_bo(exec, texture_handle_index);
579 if (!tex)
580 return false;
581
582 if (sample->is_direct) {
583 uint32_t remaining_size = tex->base.size - p0;
584
585 if (p0 > tex->base.size - 4) {
586 DRM_ERROR("UBO offset greater than UBO size\n");
587 goto fail;
588 }
589 if (p1 > remaining_size - 4) {
590 DRM_ERROR("UBO clamp would allow reads "
591 "outside of UBO\n");
592 goto fail;
593 }
594 *validated_p0 = tex->paddr + p0;
595 return true;
596 }
597
598 if (width == 0)
599 width = 2048;
600 if (height == 0)
601 height = 2048;
602
603 if (p0 & VC4_TEX_P0_CMMODE_MASK) {
604 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
605 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
606 cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
607 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
608 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
609 if (cube_map_stride) {
610 DRM_ERROR("Cube map stride set twice\n");
611 goto fail;
612 }
613
614 cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
615 }
616 if (!cube_map_stride) {
617 DRM_ERROR("Cube map stride not set\n");
618 goto fail;
619 }
620 }
621
622 type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
623 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
624
625 switch (type) {
626 case VC4_TEXTURE_TYPE_RGBA8888:
627 case VC4_TEXTURE_TYPE_RGBX8888:
628 case VC4_TEXTURE_TYPE_RGBA32R:
629 cpp = 4;
630 break;
631 case VC4_TEXTURE_TYPE_RGBA4444:
632 case VC4_TEXTURE_TYPE_RGBA5551:
633 case VC4_TEXTURE_TYPE_RGB565:
634 case VC4_TEXTURE_TYPE_LUMALPHA:
635 case VC4_TEXTURE_TYPE_S16F:
636 case VC4_TEXTURE_TYPE_S16:
637 cpp = 2;
638 break;
639 case VC4_TEXTURE_TYPE_LUMINANCE:
640 case VC4_TEXTURE_TYPE_ALPHA:
641 case VC4_TEXTURE_TYPE_S8:
642 cpp = 1;
643 break;
644 case VC4_TEXTURE_TYPE_ETC1:
645 case VC4_TEXTURE_TYPE_BW1:
646 case VC4_TEXTURE_TYPE_A4:
647 case VC4_TEXTURE_TYPE_A1:
648 case VC4_TEXTURE_TYPE_RGBA64:
649 case VC4_TEXTURE_TYPE_YUV422R:
650 default:
651 DRM_ERROR("Texture format %d unsupported\n", type);
652 goto fail;
653 }
654 utile_w = utile_width(cpp);
655 utile_h = utile_height(cpp);
656
657 if (type == VC4_TEXTURE_TYPE_RGBA32R) {
658 tiling_format = VC4_TILING_FORMAT_LINEAR;
659 } else {
660 if (size_is_lt(width, height, cpp))
661 tiling_format = VC4_TILING_FORMAT_LT;
662 else
663 tiling_format = VC4_TILING_FORMAT_T;
664 }
665
666 if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
667 tiling_format, width, height, cpp)) {
668 goto fail;
669 }
670
671 /* The mipmap levels are stored before the base of the texture. Make
672 * sure there is actually space in the BO.
673 */
674 for (i = 1; i <= miplevels; i++) {
675 uint32_t level_width = max(width >> i, 1u);
676 uint32_t level_height = max(height >> i, 1u);
677 uint32_t aligned_width, aligned_height;
678 uint32_t level_size;
679
680 /* Once the levels get small enough, they drop from T to LT. */
681 if (tiling_format == VC4_TILING_FORMAT_T &&
682 size_is_lt(level_width, level_height, cpp)) {
683 tiling_format = VC4_TILING_FORMAT_LT;
684 }
685
686 switch (tiling_format) {
687 case VC4_TILING_FORMAT_T:
688 aligned_width = round_up(level_width, utile_w * 8);
689 aligned_height = round_up(level_height, utile_h * 8);
690 break;
691 case VC4_TILING_FORMAT_LT:
692 aligned_width = round_up(level_width, utile_w);
693 aligned_height = round_up(level_height, utile_h);
694 break;
695 default:
696 aligned_width = round_up(level_width, utile_w);
697 aligned_height = level_height;
698 break;
699 }
700
701 level_size = aligned_width * cpp * aligned_height;
702
703 if (offset < level_size) {
704 DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
705 "overflowed buffer bounds (offset %d)\n",
706 i, level_width, level_height,
707 aligned_width, aligned_height,
708 level_size, offset);
709 goto fail;
710 }
711
712 offset -= level_size;
713 }
714
715 *validated_p0 = tex->paddr + p0;
716
717 return true;
718 fail:
719 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
720 DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
721 DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
722 DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
723 return false;
724}
725
726static int
727validate_gl_shader_rec(struct drm_device *dev,
728 struct vc4_exec_info *exec,
729 struct vc4_shader_state *state)
730{
731 uint32_t *src_handles;
732 void *pkt_u, *pkt_v;
733 static const uint32_t shader_reloc_offsets[] = {
734 4, /* fs */
735 16, /* vs */
736 28, /* cs */
737 };
738 uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
739 struct drm_gem_cma_object *bo[shader_reloc_count + 8];
740 uint32_t nr_attributes, nr_relocs, packet_size;
741 int i;
742
743 nr_attributes = state->addr & 0x7;
744 if (nr_attributes == 0)
745 nr_attributes = 8;
746 packet_size = gl_shader_rec_size(state->addr);
747
748 nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
749 if (nr_relocs * 4 > exec->shader_rec_size) {
750 DRM_ERROR("overflowed shader recs reading %d handles "
751 "from %d bytes left\n",
752 nr_relocs, exec->shader_rec_size);
753 return -EINVAL;
754 }
755 src_handles = exec->shader_rec_u;
756 exec->shader_rec_u += nr_relocs * 4;
757 exec->shader_rec_size -= nr_relocs * 4;
758
759 if (packet_size > exec->shader_rec_size) {
760 DRM_ERROR("overflowed shader recs copying %db packet "
761 "from %d bytes left\n",
762 packet_size, exec->shader_rec_size);
763 return -EINVAL;
764 }
765 pkt_u = exec->shader_rec_u;
766 pkt_v = exec->shader_rec_v;
767 memcpy(pkt_v, pkt_u, packet_size);
768 exec->shader_rec_u += packet_size;
769 /* Shader recs have to be aligned to 16 bytes (due to the attribute
770 * flags being in the low bytes), so round the next validated shader
771 * rec address up. This should be safe, since we've got so many
772 * relocations in a shader rec packet.
773 */
774 BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
775 exec->shader_rec_v += roundup(packet_size, 16);
776 exec->shader_rec_size -= packet_size;
777
778 if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
779 DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
780 return -EINVAL;
781 }
782
783 for (i = 0; i < shader_reloc_count; i++) {
784 if (src_handles[i] > exec->bo_count) {
785 DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
786 return -EINVAL;
787 }
788
789 bo[i] = exec->bo[src_handles[i]];
790 if (!bo[i])
791 return -EINVAL;
792 }
793 for (i = shader_reloc_count; i < nr_relocs; i++) {
794 bo[i] = vc4_use_bo(exec, src_handles[i]);
795 if (!bo[i])
796 return -EINVAL;
797 }
798
799 for (i = 0; i < shader_reloc_count; i++) {
800 struct vc4_validated_shader_info *validated_shader;
801 uint32_t o = shader_reloc_offsets[i];
802 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
803 uint32_t *texture_handles_u;
804 void *uniform_data_u;
805 uint32_t tex;
806
807 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
808
809 if (src_offset != 0) {
810 DRM_ERROR("Shaders must be at offset 0 of "
811 "the BO.\n");
812 return -EINVAL;
813 }
814
815 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
816 if (!validated_shader)
817 return -EINVAL;
818
819 if (validated_shader->uniforms_src_size >
820 exec->uniforms_size) {
821 DRM_ERROR("Uniforms src buffer overflow\n");
822 return -EINVAL;
823 }
824
825 texture_handles_u = exec->uniforms_u;
826 uniform_data_u = (texture_handles_u +
827 validated_shader->num_texture_samples);
828
829 memcpy(exec->uniforms_v, uniform_data_u,
830 validated_shader->uniforms_size);
831
832 for (tex = 0;
833 tex < validated_shader->num_texture_samples;
834 tex++) {
835 if (!reloc_tex(exec,
836 uniform_data_u,
837 &validated_shader->texture_samples[tex],
838 texture_handles_u[tex])) {
839 return -EINVAL;
840 }
841 }
842
843 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
844
845 exec->uniforms_u += validated_shader->uniforms_src_size;
846 exec->uniforms_v += validated_shader->uniforms_size;
847 exec->uniforms_p += validated_shader->uniforms_size;
848 }
849
850 for (i = 0; i < nr_attributes; i++) {
851 struct drm_gem_cma_object *vbo =
852 bo[ARRAY_SIZE(shader_reloc_offsets) + i];
853 uint32_t o = 36 + i * 8;
854 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
855 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
856 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
857 uint32_t max_index;
858
859 if (state->addr & 0x8)
860 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
861
862 if (vbo->base.size < offset ||
863 vbo->base.size - offset < attr_size) {
864 DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
865 offset, attr_size, vbo->base.size);
866 return -EINVAL;
867 }
868
869 if (stride != 0) {
870 max_index = ((vbo->base.size - offset - attr_size) /
871 stride);
872 if (state->max_index > max_index) {
873 DRM_ERROR("primitives use index %d out of "
874 "supplied %d\n",
875 state->max_index, max_index);
876 return -EINVAL;
877 }
878 }
879
880 *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
881 }
882
883 return 0;
884}
885
886int
887vc4_validate_shader_recs(struct drm_device *dev,
888 struct vc4_exec_info *exec)
889{
890 uint32_t i;
891 int ret = 0;
892
893 for (i = 0; i < exec->shader_state_count; i++) {
894 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
895 if (ret)
896 return ret;
897 }
898
899 return ret;
900}
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
new file mode 100644
index 000000000000..f67124b4c534
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -0,0 +1,513 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * DOC: Shader validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory, so a user with
28 * access to execute shaders could escalate privilege by overwriting
29 * system memory (using the VPM write address register in the
30 * general-purpose DMA mode) or reading system memory it shouldn't
31 * (reading it as a texture, or uniform data, or vertex data).
32 *
33 * This walks over a shader BO, ensuring that its accesses are
34 * appropriately bounded, and recording how many texture accesses are
35 * made and where so that we can do relocations for them in the
36 * uniform stream.
37 */
38
39#include "vc4_drv.h"
40#include "vc4_qpu_defines.h"
41
42struct vc4_shader_validation_state {
43 struct vc4_texture_sample_info tmu_setup[2];
44 int tmu_write_count[2];
45
46 /* For registers that were last written to by a MIN instruction with
47 * one argument being a uniform, the address of the uniform.
48 * Otherwise, ~0.
49 *
50 * This is used for the validation of direct address memory reads.
51 */
52 uint32_t live_min_clamp_offsets[32 + 32 + 4];
53 bool live_max_clamp_regs[32 + 32 + 4];
54};
55
56static uint32_t
57waddr_to_live_reg_index(uint32_t waddr, bool is_b)
58{
59 if (waddr < 32) {
60 if (is_b)
61 return 32 + waddr;
62 else
63 return waddr;
64 } else if (waddr <= QPU_W_ACC3) {
65 return 64 + waddr - QPU_W_ACC0;
66 } else {
67 return ~0;
68 }
69}
70
71static uint32_t
72raddr_add_a_to_live_reg_index(uint64_t inst)
73{
74 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
75 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
76 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
77 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
78
79 if (add_a == QPU_MUX_A)
80 return raddr_a;
81 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
82 return 32 + raddr_b;
83 else if (add_a <= QPU_MUX_R3)
84 return 64 + add_a;
85 else
86 return ~0;
87}
88
89static bool
90is_tmu_submit(uint32_t waddr)
91{
92 return (waddr == QPU_W_TMU0_S ||
93 waddr == QPU_W_TMU1_S);
94}
95
96static bool
97is_tmu_write(uint32_t waddr)
98{
99 return (waddr >= QPU_W_TMU0_S &&
100 waddr <= QPU_W_TMU1_B);
101}
102
103static bool
104record_texture_sample(struct vc4_validated_shader_info *validated_shader,
105 struct vc4_shader_validation_state *validation_state,
106 int tmu)
107{
108 uint32_t s = validated_shader->num_texture_samples;
109 int i;
110 struct vc4_texture_sample_info *temp_samples;
111
112 temp_samples = krealloc(validated_shader->texture_samples,
113 (s + 1) * sizeof(*temp_samples),
114 GFP_KERNEL);
115 if (!temp_samples)
116 return false;
117
118 memcpy(&temp_samples[s],
119 &validation_state->tmu_setup[tmu],
120 sizeof(*temp_samples));
121
122 validated_shader->num_texture_samples = s + 1;
123 validated_shader->texture_samples = temp_samples;
124
125 for (i = 0; i < 4; i++)
126 validation_state->tmu_setup[tmu].p_offset[i] = ~0;
127
128 return true;
129}
130
131static bool
132check_tmu_write(uint64_t inst,
133 struct vc4_validated_shader_info *validated_shader,
134 struct vc4_shader_validation_state *validation_state,
135 bool is_mul)
136{
137 uint32_t waddr = (is_mul ?
138 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
139 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
140 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
141 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
142 int tmu = waddr > QPU_W_TMU0_B;
143 bool submit = is_tmu_submit(waddr);
144 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
145 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
146
147 if (is_direct) {
148 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
149 uint32_t clamp_reg, clamp_offset;
150
151 if (sig == QPU_SIG_SMALL_IMM) {
152 DRM_ERROR("direct TMU read used small immediate\n");
153 return false;
154 }
155
156 /* Make sure that this texture load is an add of the base
157 * address of the UBO to a clamped offset within the UBO.
158 */
159 if (is_mul ||
160 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
161 DRM_ERROR("direct TMU load wasn't an add\n");
162 return false;
163 }
164
165 /* We assert that the the clamped address is the first
166 * argument, and the UBO base address is the second argument.
167 * This is arbitrary, but simpler than supporting flipping the
168 * two either way.
169 */
170 clamp_reg = raddr_add_a_to_live_reg_index(inst);
171 if (clamp_reg == ~0) {
172 DRM_ERROR("direct TMU load wasn't clamped\n");
173 return false;
174 }
175
176 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
177 if (clamp_offset == ~0) {
178 DRM_ERROR("direct TMU load wasn't clamped\n");
179 return false;
180 }
181
182 /* Store the clamp value's offset in p1 (see reloc_tex() in
183 * vc4_validate.c).
184 */
185 validation_state->tmu_setup[tmu].p_offset[1] =
186 clamp_offset;
187
188 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
189 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
190 DRM_ERROR("direct TMU load didn't add to a uniform\n");
191 return false;
192 }
193
194 validation_state->tmu_setup[tmu].is_direct = true;
195 } else {
196 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
197 raddr_b == QPU_R_UNIF)) {
198 DRM_ERROR("uniform read in the same instruction as "
199 "texture setup.\n");
200 return false;
201 }
202 }
203
204 if (validation_state->tmu_write_count[tmu] >= 4) {
205 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
206 tmu);
207 return false;
208 }
209 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
210 validated_shader->uniforms_size;
211 validation_state->tmu_write_count[tmu]++;
212 /* Since direct uses a RADDR uniform reference, it will get counted in
213 * check_instruction_reads()
214 */
215 if (!is_direct)
216 validated_shader->uniforms_size += 4;
217
218 if (submit) {
219 if (!record_texture_sample(validated_shader,
220 validation_state, tmu)) {
221 return false;
222 }
223
224 validation_state->tmu_write_count[tmu] = 0;
225 }
226
227 return true;
228}
229
230static bool
231check_reg_write(uint64_t inst,
232 struct vc4_validated_shader_info *validated_shader,
233 struct vc4_shader_validation_state *validation_state,
234 bool is_mul)
235{
236 uint32_t waddr = (is_mul ?
237 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
238 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
239
240 switch (waddr) {
241 case QPU_W_UNIFORMS_ADDRESS:
242 /* XXX: We'll probably need to support this for reladdr, but
243 * it's definitely a security-related one.
244 */
245 DRM_ERROR("uniforms address load unsupported\n");
246 return false;
247
248 case QPU_W_TLB_COLOR_MS:
249 case QPU_W_TLB_COLOR_ALL:
250 case QPU_W_TLB_Z:
251 /* These only interact with the tile buffer, not main memory,
252 * so they're safe.
253 */
254 return true;
255
256 case QPU_W_TMU0_S:
257 case QPU_W_TMU0_T:
258 case QPU_W_TMU0_R:
259 case QPU_W_TMU0_B:
260 case QPU_W_TMU1_S:
261 case QPU_W_TMU1_T:
262 case QPU_W_TMU1_R:
263 case QPU_W_TMU1_B:
264 return check_tmu_write(inst, validated_shader, validation_state,
265 is_mul);
266
267 case QPU_W_HOST_INT:
268 case QPU_W_TMU_NOSWAP:
269 case QPU_W_TLB_ALPHA_MASK:
270 case QPU_W_MUTEX_RELEASE:
271 /* XXX: I haven't thought about these, so don't support them
272 * for now.
273 */
274 DRM_ERROR("Unsupported waddr %d\n", waddr);
275 return false;
276
277 case QPU_W_VPM_ADDR:
278 DRM_ERROR("General VPM DMA unsupported\n");
279 return false;
280
281 case QPU_W_VPM:
282 case QPU_W_VPMVCD_SETUP:
283 /* We allow VPM setup in general, even including VPM DMA
284 * configuration setup, because the (unsafe) DMA can only be
285 * triggered by QPU_W_VPM_ADDR writes.
286 */
287 return true;
288
289 case QPU_W_TLB_STENCIL_SETUP:
290 return true;
291 }
292
293 return true;
294}
295
296static void
297track_live_clamps(uint64_t inst,
298 struct vc4_validated_shader_info *validated_shader,
299 struct vc4_shader_validation_state *validation_state)
300{
301 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
302 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
303 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
304 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
305 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
306 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
307 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
308 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
309 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
310 bool ws = inst & QPU_WS;
311 uint32_t lri_add_a, lri_add, lri_mul;
312 bool add_a_is_min_0;
313
314 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
315 * before we clear previous live state.
316 */
317 lri_add_a = raddr_add_a_to_live_reg_index(inst);
318 add_a_is_min_0 = (lri_add_a != ~0 &&
319 validation_state->live_max_clamp_regs[lri_add_a]);
320
321 /* Clear live state for registers written by our instruction. */
322 lri_add = waddr_to_live_reg_index(waddr_add, ws);
323 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
324 if (lri_mul != ~0) {
325 validation_state->live_max_clamp_regs[lri_mul] = false;
326 validation_state->live_min_clamp_offsets[lri_mul] = ~0;
327 }
328 if (lri_add != ~0) {
329 validation_state->live_max_clamp_regs[lri_add] = false;
330 validation_state->live_min_clamp_offsets[lri_add] = ~0;
331 } else {
332 /* Nothing further to do for live tracking, since only ADDs
333 * generate new live clamp registers.
334 */
335 return;
336 }
337
338 /* Now, handle remaining live clamp tracking for the ADD operation. */
339
340 if (cond_add != QPU_COND_ALWAYS)
341 return;
342
343 if (op_add == QPU_A_MAX) {
344 /* Track live clamps of a value to a minimum of 0 (in either
345 * arg).
346 */
347 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
348 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
349 return;
350 }
351
352 validation_state->live_max_clamp_regs[lri_add] = true;
353 } else if (op_add == QPU_A_MIN) {
354 /* Track live clamps of a value clamped to a minimum of 0 and
355 * a maximum of some uniform's offset.
356 */
357 if (!add_a_is_min_0)
358 return;
359
360 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
361 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
362 sig != QPU_SIG_SMALL_IMM)) {
363 return;
364 }
365
366 validation_state->live_min_clamp_offsets[lri_add] =
367 validated_shader->uniforms_size;
368 }
369}
370
371static bool
372check_instruction_writes(uint64_t inst,
373 struct vc4_validated_shader_info *validated_shader,
374 struct vc4_shader_validation_state *validation_state)
375{
376 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
377 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
378 bool ok;
379
380 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
381 DRM_ERROR("ADD and MUL both set up textures\n");
382 return false;
383 }
384
385 ok = (check_reg_write(inst, validated_shader, validation_state,
386 false) &&
387 check_reg_write(inst, validated_shader, validation_state,
388 true));
389
390 track_live_clamps(inst, validated_shader, validation_state);
391
392 return ok;
393}
394
395static bool
396check_instruction_reads(uint64_t inst,
397 struct vc4_validated_shader_info *validated_shader)
398{
399 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
400 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
401 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
402
403 if (raddr_a == QPU_R_UNIF ||
404 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
405 /* This can't overflow the uint32_t, because we're reading 8
406 * bytes of instruction to increment by 4 here, so we'd
407 * already be OOM.
408 */
409 validated_shader->uniforms_size += 4;
410 }
411
412 return true;
413}
414
415struct vc4_validated_shader_info *
416vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
417{
418 bool found_shader_end = false;
419 int shader_end_ip = 0;
420 uint32_t ip, max_ip;
421 uint64_t *shader;
422 struct vc4_validated_shader_info *validated_shader;
423 struct vc4_shader_validation_state validation_state;
424 int i;
425
426 memset(&validation_state, 0, sizeof(validation_state));
427
428 for (i = 0; i < 8; i++)
429 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
430 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
431 validation_state.live_min_clamp_offsets[i] = ~0;
432
433 shader = shader_obj->vaddr;
434 max_ip = shader_obj->base.size / sizeof(uint64_t);
435
436 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
437 if (!validated_shader)
438 return NULL;
439
440 for (ip = 0; ip < max_ip; ip++) {
441 uint64_t inst = shader[ip];
442 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
443
444 switch (sig) {
445 case QPU_SIG_NONE:
446 case QPU_SIG_WAIT_FOR_SCOREBOARD:
447 case QPU_SIG_SCOREBOARD_UNLOCK:
448 case QPU_SIG_COLOR_LOAD:
449 case QPU_SIG_LOAD_TMU0:
450 case QPU_SIG_LOAD_TMU1:
451 case QPU_SIG_PROG_END:
452 case QPU_SIG_SMALL_IMM:
453 if (!check_instruction_writes(inst, validated_shader,
454 &validation_state)) {
455 DRM_ERROR("Bad write at ip %d\n", ip);
456 goto fail;
457 }
458
459 if (!check_instruction_reads(inst, validated_shader))
460 goto fail;
461
462 if (sig == QPU_SIG_PROG_END) {
463 found_shader_end = true;
464 shader_end_ip = ip;
465 }
466
467 break;
468
469 case QPU_SIG_LOAD_IMM:
470 if (!check_instruction_writes(inst, validated_shader,
471 &validation_state)) {
472 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
473 goto fail;
474 }
475 break;
476
477 default:
478 DRM_ERROR("Unsupported QPU signal %d at "
479 "instruction %d\n", sig, ip);
480 goto fail;
481 }
482
483 /* There are two delay slots after program end is signaled
484 * that are still executed, then we're finished.
485 */
486 if (found_shader_end && ip == shader_end_ip + 2)
487 break;
488 }
489
490 if (ip == max_ip) {
491 DRM_ERROR("shader failed to terminate before "
492 "shader BO end at %zd\n",
493 shader_obj->base.size);
494 goto fail;
495 }
496
497 /* Again, no chance of integer overflow here because the worst case
498 * scenario is 8 bytes of uniforms plus handles per 8-byte
499 * instruction.
500 */
501 validated_shader->uniforms_src_size =
502 (validated_shader->uniforms_size +
503 4 * validated_shader->num_texture_samples);
504
505 return validated_shader;
506
507fail:
508 if (validated_shader) {
509 kfree(validated_shader->texture_samples);
510 kfree(validated_shader);
511 }
512 return NULL;
513}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a8e01aaca087..5531d7bbe851 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -587,6 +587,13 @@ struct drm_driver {
587 int (*gem_open_object) (struct drm_gem_object *, struct drm_file *); 587 int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
588 void (*gem_close_object) (struct drm_gem_object *, struct drm_file *); 588 void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
589 589
590 /**
591 * Hook for allocating the GEM object struct, for use by core
592 * helpers.
593 */
594 struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
595 size_t size);
596
590 /* prime: */ 597 /* prime: */
591 /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */ 598 /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
592 int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv, 599 int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 159551f49313..9355dd8eff3b 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -18,4 +18,5 @@ header-y += tegra_drm.h
18header-y += via_drm.h 18header-y += via_drm.h
19header-y += vmwgfx_drm.h 19header-y += vmwgfx_drm.h
20header-y += msm_drm.h 20header-y += msm_drm.h
21header-y += vc4_drm.h
21header-y += virtgpu_drm.h 22header-y += virtgpu_drm.h
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
new file mode 100644
index 000000000000..eeb37e394f13
--- /dev/null
+++ b/include/uapi/drm/vc4_drm.h
@@ -0,0 +1,279 @@
1/*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef _UAPI_VC4_DRM_H_
25#define _UAPI_VC4_DRM_H_
26
27#include "drm.h"
28
29#define DRM_VC4_SUBMIT_CL 0x00
30#define DRM_VC4_WAIT_SEQNO 0x01
31#define DRM_VC4_WAIT_BO 0x02
32#define DRM_VC4_CREATE_BO 0x03
33#define DRM_VC4_MMAP_BO 0x04
34#define DRM_VC4_CREATE_SHADER_BO 0x05
35#define DRM_VC4_GET_HANG_STATE 0x06
36
37#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
38#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
39#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
40#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
41#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
42#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
43#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
44
45struct drm_vc4_submit_rcl_surface {
46 __u32 hindex; /* Handle index, or ~0 if not present. */
47 __u32 offset; /* Offset to start of buffer. */
48 /*
49 * Bits for either render config (color_write) or load/store packet.
50 * Bits should all be 0 for MSAA load/stores.
51 */
52 __u16 bits;
53
54#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
55 __u16 flags;
56};
57
58/**
59 * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
60 * engine.
61 *
62 * Drivers typically use GPU BOs to store batchbuffers / command lists and
63 * their associated state. However, because the VC4 lacks an MMU, we have to
64 * do validation of memory accesses by the GPU commands. If we were to store
65 * our commands in BOs, we'd need to do uncached readback from them to do the
66 * validation process, which is too expensive. Instead, userspace accumulates
67 * commands and associated state in plain memory, then the kernel copies the
68 * data to its own address space, and then validates and stores it in a GPU
69 * BO.
70 */
71struct drm_vc4_submit_cl {
72 /* Pointer to the binner command list.
73 *
74 * This is the first set of commands executed, which runs the
75 * coordinate shader to determine where primitives land on the screen,
76 * then writes out the state updates and draw calls necessary per tile
77 * to the tile allocation BO.
78 */
79 __u64 bin_cl;
80
81 /* Pointer to the shader records.
82 *
83 * Shader records are the structures read by the hardware that contain
84 * pointers to uniforms, shaders, and vertex attributes. The
85 * reference to the shader record has enough information to determine
86 * how many pointers are necessary (fixed number for shaders/uniforms,
87 * and an attribute count), so those BO indices into bo_handles are
88 * just stored as __u32s before each shader record passed in.
89 */
90 __u64 shader_rec;
91
92 /* Pointer to uniform data and texture handles for the textures
93 * referenced by the shader.
94 *
95 * For each shader state record, there is a set of uniform data in the
96 * order referenced by the record (FS, VS, then CS). Each set of
97 * uniform data has a __u32 index into bo_handles per texture
98 * sample operation, in the order the QPU_W_TMUn_S writes appear in
99 * the program. Following the texture BO handle indices is the actual
100 * uniform data.
101 *
102 * The individual uniform state blocks don't have sizes passed in,
103 * because the kernel has to determine the sizes anyway during shader
104 * code validation.
105 */
106 __u64 uniforms;
107 __u64 bo_handles;
108
109 /* Size in bytes of the binner command list. */
110 __u32 bin_cl_size;
111 /* Size in bytes of the set of shader records. */
112 __u32 shader_rec_size;
113 /* Number of shader records.
114 *
115 * This could just be computed from the contents of shader_records and
116 * the address bits of references to them from the bin CL, but it
117 * keeps the kernel from having to resize some allocations it makes.
118 */
119 __u32 shader_rec_count;
120 /* Size in bytes of the uniform state. */
121 __u32 uniforms_size;
122
123 /* Number of BO handles passed in (size is that times 4). */
124 __u32 bo_handle_count;
125
126 /* RCL setup: */
127 __u16 width;
128 __u16 height;
129 __u8 min_x_tile;
130 __u8 min_y_tile;
131 __u8 max_x_tile;
132 __u8 max_y_tile;
133 struct drm_vc4_submit_rcl_surface color_read;
134 struct drm_vc4_submit_rcl_surface color_write;
135 struct drm_vc4_submit_rcl_surface zs_read;
136 struct drm_vc4_submit_rcl_surface zs_write;
137 struct drm_vc4_submit_rcl_surface msaa_color_write;
138 struct drm_vc4_submit_rcl_surface msaa_zs_write;
139 __u32 clear_color[2];
140 __u32 clear_z;
141 __u8 clear_s;
142
143 __u32 pad:24;
144
145#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
146 __u32 flags;
147
148 /* Returned value of the seqno of this render job (for the
149 * wait ioctl).
150 */
151 __u64 seqno;
152};
153
154/**
155 * struct drm_vc4_wait_seqno - ioctl argument for waiting for
156 * DRM_VC4_SUBMIT_CL completion using its returned seqno.
157 *
158 * timeout_ns is the timeout in nanoseconds, where "0" means "don't
159 * block, just return the status."
160 */
161struct drm_vc4_wait_seqno {
162 __u64 seqno;
163 __u64 timeout_ns;
164};
165
166/**
167 * struct drm_vc4_wait_bo - ioctl argument for waiting for
168 * completion of the last DRM_VC4_SUBMIT_CL on a BO.
169 *
170 * This is useful for cases where multiple processes might be
171 * rendering to a BO and you want to wait for all rendering to be
172 * completed.
173 */
174struct drm_vc4_wait_bo {
175 __u32 handle;
176 __u32 pad;
177 __u64 timeout_ns;
178};
179
180/**
181 * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
182 *
183 * There are currently no values for the flags argument, but it may be
184 * used in a future extension.
185 */
186struct drm_vc4_create_bo {
187 __u32 size;
188 __u32 flags;
189 /** Returned GEM handle for the BO. */
190 __u32 handle;
191 __u32 pad;
192};
193
194/**
195 * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
196 *
197 * This doesn't actually perform an mmap. Instead, it returns the
198 * offset you need to use in an mmap on the DRM device node. This
199 * means that tools like valgrind end up knowing about the mapped
200 * memory.
201 *
202 * There are currently no values for the flags argument, but it may be
203 * used in a future extension.
204 */
205struct drm_vc4_mmap_bo {
206 /** Handle for the object being mapped. */
207 __u32 handle;
208 __u32 flags;
209 /** offset into the drm node to use for subsequent mmap call. */
210 __u64 offset;
211};
212
213/**
214 * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
215 * shader BOs.
216 *
217 * Since allowing a shader to be overwritten while it's also being
218 * executed from would allow privlege escalation, shaders must be
219 * created using this ioctl, and they can't be mmapped later.
220 */
221struct drm_vc4_create_shader_bo {
222 /* Size of the data argument. */
223 __u32 size;
224 /* Flags, currently must be 0. */
225 __u32 flags;
226
227 /* Pointer to the data. */
228 __u64 data;
229
230 /** Returned GEM handle for the BO. */
231 __u32 handle;
232 /* Pad, must be 0. */
233 __u32 pad;
234};
235
236struct drm_vc4_get_hang_state_bo {
237 __u32 handle;
238 __u32 paddr;
239 __u32 size;
240 __u32 pad;
241};
242
243/**
244 * struct drm_vc4_hang_state - ioctl argument for collecting state
245 * from a GPU hang for analysis.
246*/
247struct drm_vc4_get_hang_state {
248 /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
249 __u64 bo;
250 /**
251 * On input, the size of the bo array. Output is the number
252 * of bos to be returned.
253 */
254 __u32 bo_count;
255
256 __u32 start_bin, start_render;
257
258 __u32 ct0ca, ct0ea;
259 __u32 ct1ca, ct1ea;
260 __u32 ct0cs, ct1cs;
261 __u32 ct0ra0, ct1ra0;
262
263 __u32 bpca, bpcs;
264 __u32 bpoa, bpos;
265
266 __u32 vpmbase;
267
268 __u32 dbge;
269 __u32 fdbgo;
270 __u32 fdbgb;
271 __u32 fdbgr;
272 __u32 fdbgs;
273 __u32 errstat;
274
275 /* Pad that we may save more registers into in the future. */
276 __u32 pad[16];
277};
278
279#endif /* _UAPI_VC4_DRM_H_ */