aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-11-30 15:13:37 -0500
committerEric Anholt <eric@anholt.net>2015-12-07 23:05:10 -0500
commitd5b1a78a772f1e31a94f8babfa964152ec5e9aa5 (patch)
treefc74c0df66b4e6fd7d610a96fe8cb2a428db8399
parentd3f5168a0810005920e7a3d5ba83e249bd9a750c (diff)
drm/vc4: Add support for drawing 3D frames.
The user submission is basically a pointer to a command list and a pointer to uniforms. We copy those in to the kernel, validate and relocate them, and store the result in a GPU BO which we queue for execution. v2: Drop support for NV shader recs (not necessary for GL), simplify vc4_use_bo(), improve bin flush/semaphore checks, use __u32 style types. Signed-off-by: Eric Anholt <eric@anholt.net>
-rw-r--r--drivers/gpu/drm/vc4/Makefile7
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c15
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h182
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c642
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c210
-rw-r--r--drivers/gpu/drm/vc4/vc4_packet.h399
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c634
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace.h63
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace_points.c14
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c37
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c900
-rw-r--r--include/uapi/drm/vc4_drm.h141
12 files changed, 3243 insertions, 1 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index e87a6f2f5916..4c6a99f0398c 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,12 +8,19 @@ vc4-y := \
8 vc4_crtc.o \ 8 vc4_crtc.o \
9 vc4_drv.o \ 9 vc4_drv.o \
10 vc4_kms.o \ 10 vc4_kms.o \
11 vc4_gem.o \
11 vc4_hdmi.o \ 12 vc4_hdmi.o \
12 vc4_hvs.o \ 13 vc4_hvs.o \
14 vc4_irq.o \
13 vc4_plane.o \ 15 vc4_plane.o \
16 vc4_render_cl.o \
17 vc4_trace_points.o \
14 vc4_v3d.o \ 18 vc4_v3d.o \
19 vc4_validate.o \
15 vc4_validate_shaders.o 20 vc4_validate_shaders.o
16 21
17vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o 22vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
18 23
19obj-$(CONFIG_DRM_VC4) += vc4.o 24obj-$(CONFIG_DRM_VC4) += vc4.o
25
26CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index db58d74efe95..2cfee5959455 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = {
74}; 74};
75 75
76static const struct drm_ioctl_desc vc4_drm_ioctls[] = { 76static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
77 DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
78 DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
79 DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
77 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), 80 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
78 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), 81 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
79 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), 82 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = {
83 .driver_features = (DRIVER_MODESET | 86 .driver_features = (DRIVER_MODESET |
84 DRIVER_ATOMIC | 87 DRIVER_ATOMIC |
85 DRIVER_GEM | 88 DRIVER_GEM |
89 DRIVER_HAVE_IRQ |
86 DRIVER_PRIME), 90 DRIVER_PRIME),
87 .lastclose = vc4_lastclose, 91 .lastclose = vc4_lastclose,
88 .preclose = vc4_drm_preclose, 92 .preclose = vc4_drm_preclose,
89 93
94 .irq_handler = vc4_irq,
95 .irq_preinstall = vc4_irq_preinstall,
96 .irq_postinstall = vc4_irq_postinstall,
97 .irq_uninstall = vc4_irq_uninstall,
98
90 .enable_vblank = vc4_enable_vblank, 99 .enable_vblank = vc4_enable_vblank,
91 .disable_vblank = vc4_disable_vblank, 100 .disable_vblank = vc4_disable_vblank,
92 .get_vblank_counter = drm_vblank_count, 101 .get_vblank_counter = drm_vblank_count,
@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev)
181 if (ret) 190 if (ret)
182 goto unref; 191 goto unref;
183 192
193 vc4_gem_init(drm);
194
184 ret = component_bind_all(dev, drm); 195 ret = component_bind_all(dev, drm);
185 if (ret) 196 if (ret)
186 goto unref; 197 goto gem_destroy;
187 198
188 ret = drm_dev_register(drm, 0); 199 ret = drm_dev_register(drm, 0);
189 if (ret < 0) 200 if (ret < 0)
@@ -207,6 +218,8 @@ unregister:
207 drm_dev_unregister(drm); 218 drm_dev_unregister(drm);
208unbind_all: 219unbind_all:
209 component_unbind_all(dev, drm); 220 component_unbind_all(dev, drm);
221gem_destroy:
222 vc4_gem_destroy(drm);
210unref: 223unref:
211 drm_dev_unref(drm); 224 drm_dev_unref(drm);
212 vc4_bo_cache_destroy(drm); 225 vc4_bo_cache_destroy(drm);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 8945463e70b6..0bc8c57196ac 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -49,6 +49,48 @@ struct vc4_dev {
49 49
50 /* Protects bo_cache and the BO stats. */ 50 /* Protects bo_cache and the BO stats. */
51 struct mutex bo_lock; 51 struct mutex bo_lock;
52
53 /* Sequence number for the last job queued in job_list.
54 * Starts at 0 (no jobs emitted).
55 */
56 uint64_t emit_seqno;
57
58 /* Sequence number for the last completed job on the GPU.
59 * Starts at 0 (no jobs completed).
60 */
61 uint64_t finished_seqno;
62
63 /* List of all struct vc4_exec_info for jobs to be executed.
64 * The first job in the list is the one currently programmed
65 * into ct0ca/ct1ca for execution.
66 */
67 struct list_head job_list;
68 /* List of the finished vc4_exec_infos waiting to be freed by
69 * job_done_work.
70 */
71 struct list_head job_done_list;
72 /* Spinlock used to synchronize the job_list and seqno
73 * accesses between the IRQ handler and GEM ioctls.
74 */
75 spinlock_t job_lock;
76 wait_queue_head_t job_wait_queue;
77 struct work_struct job_done_work;
78
79 /* The binner overflow memory that's currently set up in
80 * BPOA/BPOS registers. When overflow occurs and a new one is
81 * allocated, the previous one will be moved to
82 * vc4->current_exec's free list.
83 */
84 struct vc4_bo *overflow_mem;
85 struct work_struct overflow_mem_work;
86
87 struct {
88 uint32_t last_ct0ca, last_ct1ca;
89 struct timer_list timer;
90 struct work_struct reset_work;
91 } hangcheck;
92
93 struct semaphore async_modeset;
52}; 94};
53 95
54static inline struct vc4_dev * 96static inline struct vc4_dev *
@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev)
60struct vc4_bo { 102struct vc4_bo {
61 struct drm_gem_cma_object base; 103 struct drm_gem_cma_object base;
62 104
105 /* seqno of the last job to render to this BO. */
106 uint64_t seqno;
107
63 /* List entry for the BO's position in either 108 /* List entry for the BO's position in either
64 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 109 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
65 */ 110 */
@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder)
130#define HVS_READ(offset) readl(vc4->hvs->regs + offset) 175#define HVS_READ(offset) readl(vc4->hvs->regs + offset)
131#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) 176#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
132 177
178struct vc4_exec_info {
179 /* Sequence number for this bin/render job. */
180 uint64_t seqno;
181
182 /* Kernel-space copy of the ioctl arguments */
183 struct drm_vc4_submit_cl *args;
184
185 /* This is the array of BOs that were looked up at the start of exec.
186 * Command validation will use indices into this array.
187 */
188 struct drm_gem_cma_object **bo;
189 uint32_t bo_count;
190
191 /* Pointers for our position in vc4->job_list */
192 struct list_head head;
193
194 /* List of other BOs used in the job that need to be released
195 * once the job is complete.
196 */
197 struct list_head unref_list;
198
199 /* Current unvalidated indices into @bo loaded by the non-hardware
200 * VC4_PACKET_GEM_HANDLES.
201 */
202 uint32_t bo_index[2];
203
204 /* This is the BO where we store the validated command lists, shader
205 * records, and uniforms.
206 */
207 struct drm_gem_cma_object *exec_bo;
208
209 /**
210 * This tracks the per-shader-record state (packet 64) that
211 * determines the length of the shader record and the offset
212 * it's expected to be found at. It gets read in from the
213 * command lists.
214 */
215 struct vc4_shader_state {
216 uint32_t addr;
217 /* Maximum vertex index referenced by any primitive using this
218 * shader state.
219 */
220 uint32_t max_index;
221 } *shader_state;
222
223 /** How many shader states the user declared they were using. */
224 uint32_t shader_state_size;
225 /** How many shader state records the validator has seen. */
226 uint32_t shader_state_count;
227
228 bool found_tile_binning_mode_config_packet;
229 bool found_start_tile_binning_packet;
230 bool found_increment_semaphore_packet;
231 bool found_flush;
232 uint8_t bin_tiles_x, bin_tiles_y;
233 struct drm_gem_cma_object *tile_bo;
234 uint32_t tile_alloc_offset;
235
236 /**
237 * Computed addresses pointing into exec_bo where we start the
238 * bin thread (ct0) and render thread (ct1).
239 */
240 uint32_t ct0ca, ct0ea;
241 uint32_t ct1ca, ct1ea;
242
243 /* Pointer to the unvalidated bin CL (if present). */
244 void *bin_u;
245
246 /* Pointers to the shader recs. These paddr gets incremented as CL
247 * packets are relocated in validate_gl_shader_state, and the vaddrs
248 * (u and v) get incremented and size decremented as the shader recs
249 * themselves are validated.
250 */
251 void *shader_rec_u;
252 void *shader_rec_v;
253 uint32_t shader_rec_p;
254 uint32_t shader_rec_size;
255
256 /* Pointers to the uniform data. These pointers are incremented, and
257 * size decremented, as each batch of uniforms is uploaded.
258 */
259 void *uniforms_u;
260 void *uniforms_v;
261 uint32_t uniforms_p;
262 uint32_t uniforms_size;
263};
264
265static inline struct vc4_exec_info *
266vc4_first_job(struct vc4_dev *vc4)
267{
268 if (list_empty(&vc4->job_list))
269 return NULL;
270 return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
271}
272
133/** 273/**
134 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture 274 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
135 * setup parameters. 275 * setup parameters.
@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
231/* vc4_drv.c */ 371/* vc4_drv.c */
232void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 372void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
233 373
374/* vc4_gem.c */
375void vc4_gem_init(struct drm_device *dev);
376void vc4_gem_destroy(struct drm_device *dev);
377int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
378 struct drm_file *file_priv);
379int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
380 struct drm_file *file_priv);
381int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
382 struct drm_file *file_priv);
383void vc4_submit_next_job(struct drm_device *dev);
384int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
385 uint64_t timeout_ns, bool interruptible);
386void vc4_job_handle_completed(struct vc4_dev *vc4);
387
234/* vc4_hdmi.c */ 388/* vc4_hdmi.c */
235extern struct platform_driver vc4_hdmi_driver; 389extern struct platform_driver vc4_hdmi_driver;
236int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); 390int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
237 391
392/* vc4_irq.c */
393irqreturn_t vc4_irq(int irq, void *arg);
394void vc4_irq_preinstall(struct drm_device *dev);
395int vc4_irq_postinstall(struct drm_device *dev);
396void vc4_irq_uninstall(struct drm_device *dev);
397void vc4_irq_reset(struct drm_device *dev);
398
238/* vc4_hvs.c */ 399/* vc4_hvs.c */
239extern struct platform_driver vc4_hvs_driver; 400extern struct platform_driver vc4_hvs_driver;
240void vc4_hvs_dump_state(struct drm_device *dev); 401void vc4_hvs_dump_state(struct drm_device *dev);
@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state);
253extern struct platform_driver vc4_v3d_driver; 414extern struct platform_driver vc4_v3d_driver;
254int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); 415int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
255int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); 416int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
417int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
418
419/* vc4_validate.c */
420int
421vc4_validate_bin_cl(struct drm_device *dev,
422 void *validated,
423 void *unvalidated,
424 struct vc4_exec_info *exec);
425
426int
427vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
428
429struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
430 uint32_t hindex);
431
432int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
433
434bool vc4_check_tex_size(struct vc4_exec_info *exec,
435 struct drm_gem_cma_object *fbo,
436 uint32_t offset, uint8_t tiling_format,
437 uint32_t width, uint32_t height, uint8_t cpp);
256 438
257/* vc4_validate_shader.c */ 439/* vc4_validate_shader.c */
258struct vc4_validated_shader_info * 440struct vc4_validated_shader_info *
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 000000000000..936dddfa890f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,642 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <linux/module.h>
25#include <linux/platform_device.h>
26#include <linux/device.h>
27#include <linux/io.h>
28
29#include "uapi/drm/vc4_drm.h"
30#include "vc4_drv.h"
31#include "vc4_regs.h"
32#include "vc4_trace.h"
33
34static void
35vc4_queue_hangcheck(struct drm_device *dev)
36{
37 struct vc4_dev *vc4 = to_vc4_dev(dev);
38
39 mod_timer(&vc4->hangcheck.timer,
40 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
41}
42
43static void
44vc4_reset(struct drm_device *dev)
45{
46 struct vc4_dev *vc4 = to_vc4_dev(dev);
47
48 DRM_INFO("Resetting GPU.\n");
49 vc4_v3d_set_power(vc4, false);
50 vc4_v3d_set_power(vc4, true);
51
52 vc4_irq_reset(dev);
53
54 /* Rearm the hangcheck -- another job might have been waiting
55 * for our hung one to get kicked off, and vc4_irq_reset()
56 * would have started it.
57 */
58 vc4_queue_hangcheck(dev);
59}
60
61static void
62vc4_reset_work(struct work_struct *work)
63{
64 struct vc4_dev *vc4 =
65 container_of(work, struct vc4_dev, hangcheck.reset_work);
66
67 vc4_reset(vc4->dev);
68}
69
70static void
71vc4_hangcheck_elapsed(unsigned long data)
72{
73 struct drm_device *dev = (struct drm_device *)data;
74 struct vc4_dev *vc4 = to_vc4_dev(dev);
75 uint32_t ct0ca, ct1ca;
76
77 /* If idle, we can stop watching for hangs. */
78 if (list_empty(&vc4->job_list))
79 return;
80
81 ct0ca = V3D_READ(V3D_CTNCA(0));
82 ct1ca = V3D_READ(V3D_CTNCA(1));
83
84 /* If we've made any progress in execution, rearm the timer
85 * and wait.
86 */
87 if (ct0ca != vc4->hangcheck.last_ct0ca ||
88 ct1ca != vc4->hangcheck.last_ct1ca) {
89 vc4->hangcheck.last_ct0ca = ct0ca;
90 vc4->hangcheck.last_ct1ca = ct1ca;
91 vc4_queue_hangcheck(dev);
92 return;
93 }
94
95 /* We've gone too long with no progress, reset. This has to
96 * be done from a work struct, since resetting can sleep and
97 * this timer hook isn't allowed to.
98 */
99 schedule_work(&vc4->hangcheck.reset_work);
100}
101
102static void
103submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
104{
105 struct vc4_dev *vc4 = to_vc4_dev(dev);
106
107 /* Set the current and end address of the control list.
108 * Writing the end register is what starts the job.
109 */
110 V3D_WRITE(V3D_CTNCA(thread), start);
111 V3D_WRITE(V3D_CTNEA(thread), end);
112}
113
114int
115vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
116 bool interruptible)
117{
118 struct vc4_dev *vc4 = to_vc4_dev(dev);
119 int ret = 0;
120 unsigned long timeout_expire;
121 DEFINE_WAIT(wait);
122
123 if (vc4->finished_seqno >= seqno)
124 return 0;
125
126 if (timeout_ns == 0)
127 return -ETIME;
128
129 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
130
131 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
132 for (;;) {
133 prepare_to_wait(&vc4->job_wait_queue, &wait,
134 interruptible ? TASK_INTERRUPTIBLE :
135 TASK_UNINTERRUPTIBLE);
136
137 if (interruptible && signal_pending(current)) {
138 ret = -ERESTARTSYS;
139 break;
140 }
141
142 if (vc4->finished_seqno >= seqno)
143 break;
144
145 if (timeout_ns != ~0ull) {
146 if (time_after_eq(jiffies, timeout_expire)) {
147 ret = -ETIME;
148 break;
149 }
150 schedule_timeout(timeout_expire - jiffies);
151 } else {
152 schedule();
153 }
154 }
155
156 finish_wait(&vc4->job_wait_queue, &wait);
157 trace_vc4_wait_for_seqno_end(dev, seqno);
158
159 if (ret && ret != -ERESTARTSYS) {
160 DRM_ERROR("timeout waiting for render thread idle\n");
161 return ret;
162 }
163
164 return 0;
165}
166
167static void
168vc4_flush_caches(struct drm_device *dev)
169{
170 struct vc4_dev *vc4 = to_vc4_dev(dev);
171
172 /* Flush the GPU L2 caches. These caches sit on top of system
173 * L3 (the 128kb or so shared with the CPU), and are
174 * non-allocating in the L3.
175 */
176 V3D_WRITE(V3D_L2CACTL,
177 V3D_L2CACTL_L2CCLR);
178
179 V3D_WRITE(V3D_SLCACTL,
180 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
181 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
182 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
183 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
184}
185
186/* Sets the registers for the next job to be actually be executed in
187 * the hardware.
188 *
189 * The job_lock should be held during this.
190 */
191void
192vc4_submit_next_job(struct drm_device *dev)
193{
194 struct vc4_dev *vc4 = to_vc4_dev(dev);
195 struct vc4_exec_info *exec = vc4_first_job(vc4);
196
197 if (!exec)
198 return;
199
200 vc4_flush_caches(dev);
201
202 /* Disable the binner's pre-loaded overflow memory address */
203 V3D_WRITE(V3D_BPOA, 0);
204 V3D_WRITE(V3D_BPOS, 0);
205
206 if (exec->ct0ca != exec->ct0ea)
207 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
208 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
209}
210
211static void
212vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
213{
214 struct vc4_bo *bo;
215 unsigned i;
216
217 for (i = 0; i < exec->bo_count; i++) {
218 bo = to_vc4_bo(&exec->bo[i]->base);
219 bo->seqno = seqno;
220 }
221
222 list_for_each_entry(bo, &exec->unref_list, unref_head) {
223 bo->seqno = seqno;
224 }
225}
226
227/* Queues a struct vc4_exec_info for execution. If no job is
228 * currently executing, then submits it.
229 *
230 * Unlike most GPUs, our hardware only handles one command list at a
231 * time. To queue multiple jobs at once, we'd need to edit the
232 * previous command list to have a jump to the new one at the end, and
233 * then bump the end address. That's a change for a later date,
234 * though.
235 */
236static void
237vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
238{
239 struct vc4_dev *vc4 = to_vc4_dev(dev);
240 uint64_t seqno;
241 unsigned long irqflags;
242
243 spin_lock_irqsave(&vc4->job_lock, irqflags);
244
245 seqno = ++vc4->emit_seqno;
246 exec->seqno = seqno;
247 vc4_update_bo_seqnos(exec, seqno);
248
249 list_add_tail(&exec->head, &vc4->job_list);
250
251 /* If no job was executing, kick ours off. Otherwise, it'll
252 * get started when the previous job's frame done interrupt
253 * occurs.
254 */
255 if (vc4_first_job(vc4) == exec) {
256 vc4_submit_next_job(dev);
257 vc4_queue_hangcheck(dev);
258 }
259
260 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
261}
262
263/**
264 * Looks up a bunch of GEM handles for BOs and stores the array for
265 * use in the command validator that actually writes relocated
266 * addresses pointing to them.
267 */
268static int
269vc4_cl_lookup_bos(struct drm_device *dev,
270 struct drm_file *file_priv,
271 struct vc4_exec_info *exec)
272{
273 struct drm_vc4_submit_cl *args = exec->args;
274 uint32_t *handles;
275 int ret = 0;
276 int i;
277
278 exec->bo_count = args->bo_handle_count;
279
280 if (!exec->bo_count) {
281 /* See comment on bo_index for why we have to check
282 * this.
283 */
284 DRM_ERROR("Rendering requires BOs to validate\n");
285 return -EINVAL;
286 }
287
288 exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
289 GFP_KERNEL);
290 if (!exec->bo) {
291 DRM_ERROR("Failed to allocate validated BO pointers\n");
292 return -ENOMEM;
293 }
294
295 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
296 if (!handles) {
297 DRM_ERROR("Failed to allocate incoming GEM handles\n");
298 goto fail;
299 }
300
301 ret = copy_from_user(handles,
302 (void __user *)(uintptr_t)args->bo_handles,
303 exec->bo_count * sizeof(uint32_t));
304 if (ret) {
305 DRM_ERROR("Failed to copy in GEM handles\n");
306 goto fail;
307 }
308
309 spin_lock(&file_priv->table_lock);
310 for (i = 0; i < exec->bo_count; i++) {
311 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
312 handles[i]);
313 if (!bo) {
314 DRM_ERROR("Failed to look up GEM BO %d: %d\n",
315 i, handles[i]);
316 ret = -EINVAL;
317 spin_unlock(&file_priv->table_lock);
318 goto fail;
319 }
320 drm_gem_object_reference(bo);
321 exec->bo[i] = (struct drm_gem_cma_object *)bo;
322 }
323 spin_unlock(&file_priv->table_lock);
324
325fail:
326 kfree(handles);
327 return 0;
328}
329
330static int
331vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
332{
333 struct drm_vc4_submit_cl *args = exec->args;
334 void *temp = NULL;
335 void *bin;
336 int ret = 0;
337 uint32_t bin_offset = 0;
338 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
339 16);
340 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
341 uint32_t exec_size = uniforms_offset + args->uniforms_size;
342 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
343 args->shader_rec_count);
344 struct vc4_bo *bo;
345
346 if (uniforms_offset < shader_rec_offset ||
347 exec_size < uniforms_offset ||
348 args->shader_rec_count >= (UINT_MAX /
349 sizeof(struct vc4_shader_state)) ||
350 temp_size < exec_size) {
351 DRM_ERROR("overflow in exec arguments\n");
352 goto fail;
353 }
354
355 /* Allocate space where we'll store the copied in user command lists
356 * and shader records.
357 *
358 * We don't just copy directly into the BOs because we need to
359 * read the contents back for validation, and I think the
360 * bo->vaddr is uncached access.
361 */
362 temp = kmalloc(temp_size, GFP_KERNEL);
363 if (!temp) {
364 DRM_ERROR("Failed to allocate storage for copying "
365 "in bin/render CLs.\n");
366 ret = -ENOMEM;
367 goto fail;
368 }
369 bin = temp + bin_offset;
370 exec->shader_rec_u = temp + shader_rec_offset;
371 exec->uniforms_u = temp + uniforms_offset;
372 exec->shader_state = temp + exec_size;
373 exec->shader_state_size = args->shader_rec_count;
374
375 ret = copy_from_user(bin,
376 (void __user *)(uintptr_t)args->bin_cl,
377 args->bin_cl_size);
378 if (ret) {
379 DRM_ERROR("Failed to copy in bin cl\n");
380 goto fail;
381 }
382
383 ret = copy_from_user(exec->shader_rec_u,
384 (void __user *)(uintptr_t)args->shader_rec,
385 args->shader_rec_size);
386 if (ret) {
387 DRM_ERROR("Failed to copy in shader recs\n");
388 goto fail;
389 }
390
391 ret = copy_from_user(exec->uniforms_u,
392 (void __user *)(uintptr_t)args->uniforms,
393 args->uniforms_size);
394 if (ret) {
395 DRM_ERROR("Failed to copy in uniforms cl\n");
396 goto fail;
397 }
398
399 bo = vc4_bo_create(dev, exec_size, true);
400 if (!bo) {
401 DRM_ERROR("Couldn't allocate BO for binning\n");
402 ret = PTR_ERR(exec->exec_bo);
403 goto fail;
404 }
405 exec->exec_bo = &bo->base;
406
407 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
408 &exec->unref_list);
409
410 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
411
412 exec->bin_u = bin;
413
414 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
415 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
416 exec->shader_rec_size = args->shader_rec_size;
417
418 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
419 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
420 exec->uniforms_size = args->uniforms_size;
421
422 ret = vc4_validate_bin_cl(dev,
423 exec->exec_bo->vaddr + bin_offset,
424 bin,
425 exec);
426 if (ret)
427 goto fail;
428
429 ret = vc4_validate_shader_recs(dev, exec);
430
431fail:
432 kfree(temp);
433 return ret;
434}
435
436static void
437vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
438{
439 unsigned i;
440
441 /* Need the struct lock for drm_gem_object_unreference(). */
442 mutex_lock(&dev->struct_mutex);
443 if (exec->bo) {
444 for (i = 0; i < exec->bo_count; i++)
445 drm_gem_object_unreference(&exec->bo[i]->base);
446 kfree(exec->bo);
447 }
448
449 while (!list_empty(&exec->unref_list)) {
450 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
451 struct vc4_bo, unref_head);
452 list_del(&bo->unref_head);
453 drm_gem_object_unreference(&bo->base.base);
454 }
455 mutex_unlock(&dev->struct_mutex);
456
457 kfree(exec);
458}
459
460void
461vc4_job_handle_completed(struct vc4_dev *vc4)
462{
463 unsigned long irqflags;
464
465 spin_lock_irqsave(&vc4->job_lock, irqflags);
466 while (!list_empty(&vc4->job_done_list)) {
467 struct vc4_exec_info *exec =
468 list_first_entry(&vc4->job_done_list,
469 struct vc4_exec_info, head);
470 list_del(&exec->head);
471
472 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
473 vc4_complete_exec(vc4->dev, exec);
474 spin_lock_irqsave(&vc4->job_lock, irqflags);
475 }
476 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
477}
478
479/* Scheduled when any job has been completed, this walks the list of
480 * jobs that had completed and unrefs their BOs and frees their exec
481 * structs.
482 */
483static void
484vc4_job_done_work(struct work_struct *work)
485{
486 struct vc4_dev *vc4 =
487 container_of(work, struct vc4_dev, job_done_work);
488
489 vc4_job_handle_completed(vc4);
490}
491
492static int
493vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
494 uint64_t seqno,
495 uint64_t *timeout_ns)
496{
497 unsigned long start = jiffies;
498 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
499
500 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
501 uint64_t delta = jiffies_to_nsecs(jiffies - start);
502
503 if (*timeout_ns >= delta)
504 *timeout_ns -= delta;
505 }
506
507 return ret;
508}
509
510int
511vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
512 struct drm_file *file_priv)
513{
514 struct drm_vc4_wait_seqno *args = data;
515
516 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
517 &args->timeout_ns);
518}
519
520int
521vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
522 struct drm_file *file_priv)
523{
524 int ret;
525 struct drm_vc4_wait_bo *args = data;
526 struct drm_gem_object *gem_obj;
527 struct vc4_bo *bo;
528
529 gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
530 if (!gem_obj) {
531 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
532 return -EINVAL;
533 }
534 bo = to_vc4_bo(gem_obj);
535
536 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
537 &args->timeout_ns);
538
539 drm_gem_object_unreference_unlocked(gem_obj);
540 return ret;
541}
542
543/**
544 * Submits a command list to the VC4.
545 *
546 * This is what is called batchbuffer emitting on other hardware.
547 */
548int
549vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
550 struct drm_file *file_priv)
551{
552 struct vc4_dev *vc4 = to_vc4_dev(dev);
553 struct drm_vc4_submit_cl *args = data;
554 struct vc4_exec_info *exec;
555 int ret;
556
557 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
558 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
559 return -EINVAL;
560 }
561
562 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
563 if (!exec) {
564 DRM_ERROR("malloc failure on exec struct\n");
565 return -ENOMEM;
566 }
567
568 exec->args = args;
569 INIT_LIST_HEAD(&exec->unref_list);
570
571 ret = vc4_cl_lookup_bos(dev, file_priv, exec);
572 if (ret)
573 goto fail;
574
575 if (exec->args->bin_cl_size != 0) {
576 ret = vc4_get_bcl(dev, exec);
577 if (ret)
578 goto fail;
579 } else {
580 exec->ct0ca = 0;
581 exec->ct0ea = 0;
582 }
583
584 ret = vc4_get_rcl(dev, exec);
585 if (ret)
586 goto fail;
587
588 /* Clear this out of the struct we'll be putting in the queue,
589 * since it's part of our stack.
590 */
591 exec->args = NULL;
592
593 vc4_queue_submit(dev, exec);
594
595 /* Return the seqno for our job. */
596 args->seqno = vc4->emit_seqno;
597
598 return 0;
599
600fail:
601 vc4_complete_exec(vc4->dev, exec);
602
603 return ret;
604}
605
606void
607vc4_gem_init(struct drm_device *dev)
608{
609 struct vc4_dev *vc4 = to_vc4_dev(dev);
610
611 INIT_LIST_HEAD(&vc4->job_list);
612 INIT_LIST_HEAD(&vc4->job_done_list);
613 spin_lock_init(&vc4->job_lock);
614
615 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
616 setup_timer(&vc4->hangcheck.timer,
617 vc4_hangcheck_elapsed,
618 (unsigned long)dev);
619
620 INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
621}
622
623void
624vc4_gem_destroy(struct drm_device *dev)
625{
626 struct vc4_dev *vc4 = to_vc4_dev(dev);
627
628 /* Waiting for exec to finish would need to be done before
629 * unregistering V3D.
630 */
631 WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
632
633 /* V3D should already have disabled its interrupt and cleared
634 * the overflow allocation registers. Now free the object.
635 */
636 if (vc4->overflow_mem) {
637 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
638 vc4->overflow_mem = NULL;
639 }
640
641 vc4_bo_cache_destroy(dev);
642}
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 000000000000..b68060e758db
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,210 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** DOC: Interrupt management for the V3D engine.
25 *
26 * We have an interrupt status register (V3D_INTCTL) which reports
27 * interrupts, and where writing 1 bits clears those interrupts.
28 * There are also a pair of interrupt registers
29 * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
30 * disables that specific interrupt, and 0s written are ignored
31 * (reading either one returns the set of enabled interrupts).
32 *
33 * When we take a render frame interrupt, we need to wake the
34 * processes waiting for some frame to be done, and get the next frame
35 * submitted ASAP (so the hardware doesn't sit idle when there's work
36 * to do).
37 *
38 * When we take the binner out of memory interrupt, we need to
39 * allocate some new memory and pass it to the binner so that the
40 * current job can make progress.
41 */
42
43#include "vc4_drv.h"
44#include "vc4_regs.h"
45
46#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
47 V3D_INT_FRDONE)
48
49DECLARE_WAIT_QUEUE_HEAD(render_wait);
50
51static void
52vc4_overflow_mem_work(struct work_struct *work)
53{
54 struct vc4_dev *vc4 =
55 container_of(work, struct vc4_dev, overflow_mem_work);
56 struct drm_device *dev = vc4->dev;
57 struct vc4_bo *bo;
58
59 bo = vc4_bo_create(dev, 256 * 1024, true);
60 if (!bo) {
61 DRM_ERROR("Couldn't allocate binner overflow mem\n");
62 return;
63 }
64
65 /* If there's a job executing currently, then our previous
66 * overflow allocation is getting used in that job and we need
67 * to queue it to be released when the job is done. But if no
68 * job is executing at all, then we can free the old overflow
69 * object direcctly.
70 *
71 * No lock necessary for this pointer since we're the only
72 * ones that update the pointer, and our workqueue won't
73 * reenter.
74 */
75 if (vc4->overflow_mem) {
76 struct vc4_exec_info *current_exec;
77 unsigned long irqflags;
78
79 spin_lock_irqsave(&vc4->job_lock, irqflags);
80 current_exec = vc4_first_job(vc4);
81 if (current_exec) {
82 vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
83 list_add_tail(&vc4->overflow_mem->unref_head,
84 &current_exec->unref_list);
85 vc4->overflow_mem = NULL;
86 }
87 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
88 }
89
90 if (vc4->overflow_mem)
91 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
92 vc4->overflow_mem = bo;
93
94 V3D_WRITE(V3D_BPOA, bo->base.paddr);
95 V3D_WRITE(V3D_BPOS, bo->base.base.size);
96 V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
97 V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
98}
99
100static void
101vc4_irq_finish_job(struct drm_device *dev)
102{
103 struct vc4_dev *vc4 = to_vc4_dev(dev);
104 struct vc4_exec_info *exec = vc4_first_job(vc4);
105
106 if (!exec)
107 return;
108
109 vc4->finished_seqno++;
110 list_move_tail(&exec->head, &vc4->job_done_list);
111 vc4_submit_next_job(dev);
112
113 wake_up_all(&vc4->job_wait_queue);
114 schedule_work(&vc4->job_done_work);
115}
116
117irqreturn_t
118vc4_irq(int irq, void *arg)
119{
120 struct drm_device *dev = arg;
121 struct vc4_dev *vc4 = to_vc4_dev(dev);
122 uint32_t intctl;
123 irqreturn_t status = IRQ_NONE;
124
125 barrier();
126 intctl = V3D_READ(V3D_INTCTL);
127
128 /* Acknowledge the interrupts we're handling here. The render
129 * frame done interrupt will be cleared, while OUTOMEM will
130 * stay high until the underlying cause is cleared.
131 */
132 V3D_WRITE(V3D_INTCTL, intctl);
133
134 if (intctl & V3D_INT_OUTOMEM) {
135 /* Disable OUTOMEM until the work is done. */
136 V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
137 schedule_work(&vc4->overflow_mem_work);
138 status = IRQ_HANDLED;
139 }
140
141 if (intctl & V3D_INT_FRDONE) {
142 spin_lock(&vc4->job_lock);
143 vc4_irq_finish_job(dev);
144 spin_unlock(&vc4->job_lock);
145 status = IRQ_HANDLED;
146 }
147
148 return status;
149}
150
151void
152vc4_irq_preinstall(struct drm_device *dev)
153{
154 struct vc4_dev *vc4 = to_vc4_dev(dev);
155
156 init_waitqueue_head(&vc4->job_wait_queue);
157 INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
158
159 /* Clear any pending interrupts someone might have left around
160 * for us.
161 */
162 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
163}
164
165int
166vc4_irq_postinstall(struct drm_device *dev)
167{
168 struct vc4_dev *vc4 = to_vc4_dev(dev);
169
170 /* Enable both the render done and out of memory interrupts. */
171 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
172
173 return 0;
174}
175
176void
177vc4_irq_uninstall(struct drm_device *dev)
178{
179 struct vc4_dev *vc4 = to_vc4_dev(dev);
180
181 /* Disable sending interrupts for our driver's IRQs. */
182 V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
183
184 /* Clear any pending interrupts we might have left. */
185 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
186
187 cancel_work_sync(&vc4->overflow_mem_work);
188}
189
190/** Reinitializes interrupt registers when a GPU reset is performed. */
191void vc4_irq_reset(struct drm_device *dev)
192{
193 struct vc4_dev *vc4 = to_vc4_dev(dev);
194 unsigned long irqflags;
195
196 /* Acknowledge any stale IRQs. */
197 V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
198
199 /*
200 * Turn all our interrupts on. Binner out of memory is the
201 * only one we expect to trigger at this point, since we've
202 * just come from poweron and haven't supplied any overflow
203 * memory yet.
204 */
205 V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
206
207 spin_lock_irqsave(&vc4->job_lock, irqflags);
208 vc4_irq_finish_job(dev);
209 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
210}
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 000000000000..0f31cc06500f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC4_PACKET_H
25#define VC4_PACKET_H
26
27#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
28
29enum vc4_packet {
30 VC4_PACKET_HALT = 0,
31 VC4_PACKET_NOP = 1,
32
33 VC4_PACKET_FLUSH = 4,
34 VC4_PACKET_FLUSH_ALL = 5,
35 VC4_PACKET_START_TILE_BINNING = 6,
36 VC4_PACKET_INCREMENT_SEMAPHORE = 7,
37 VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
38
39 VC4_PACKET_BRANCH = 16,
40 VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
41
42 VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
43 VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
44 VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
45 VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
46 VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
47 VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
48
49 VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
50 VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
51
52 VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
53 VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
54
55 VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
56
57 VC4_PACKET_GL_SHADER_STATE = 64,
58 VC4_PACKET_NV_SHADER_STATE = 65,
59 VC4_PACKET_VG_SHADER_STATE = 66,
60
61 VC4_PACKET_CONFIGURATION_BITS = 96,
62 VC4_PACKET_FLAT_SHADE_FLAGS = 97,
63 VC4_PACKET_POINT_SIZE = 98,
64 VC4_PACKET_LINE_WIDTH = 99,
65 VC4_PACKET_RHT_X_BOUNDARY = 100,
66 VC4_PACKET_DEPTH_OFFSET = 101,
67 VC4_PACKET_CLIP_WINDOW = 102,
68 VC4_PACKET_VIEWPORT_OFFSET = 103,
69 VC4_PACKET_Z_CLIPPING = 104,
70 VC4_PACKET_CLIPPER_XY_SCALING = 105,
71 VC4_PACKET_CLIPPER_Z_SCALING = 106,
72
73 VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
74 VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
75 VC4_PACKET_CLEAR_COLORS = 114,
76 VC4_PACKET_TILE_COORDINATES = 115,
77
78 /* Not an actual hardware packet -- this is what we use to put
79 * references to GEM bos in the command stream, since we need the u32
80 * int the actual address packet in order to store the offset from the
81 * start of the BO.
82 */
83 VC4_PACKET_GEM_HANDLES = 254,
84} __attribute__ ((__packed__));
85
86#define VC4_PACKET_HALT_SIZE 1
87#define VC4_PACKET_NOP_SIZE 1
88#define VC4_PACKET_FLUSH_SIZE 1
89#define VC4_PACKET_FLUSH_ALL_SIZE 1
90#define VC4_PACKET_START_TILE_BINNING_SIZE 1
91#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
92#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
93#define VC4_PACKET_BRANCH_SIZE 5
94#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
95#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
96#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
97#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
98#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
99#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
100#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
101#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
102#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
103#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
104#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
105#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
106#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
107#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
108#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
109#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
110#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
111#define VC4_PACKET_POINT_SIZE_SIZE 5
112#define VC4_PACKET_LINE_WIDTH_SIZE 5
113#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
114#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
115#define VC4_PACKET_CLIP_WINDOW_SIZE 9
116#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
117#define VC4_PACKET_Z_CLIPPING_SIZE 9
118#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
119#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
120#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
121#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
122#define VC4_PACKET_CLEAR_COLORS_SIZE 14
123#define VC4_PACKET_TILE_COORDINATES_SIZE 3
124#define VC4_PACKET_GEM_HANDLES_SIZE 9
125
126/* Number of multisamples supported. */
127#define VC4_MAX_SAMPLES 4
128/* Size of a full resolution color or Z tile buffer load/store. */
129#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4)
130
131/** @{
132 * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
133 * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
134*/
135#define VC4_TILING_FORMAT_LINEAR 0
136#define VC4_TILING_FORMAT_T 1
137#define VC4_TILING_FORMAT_LT 2
138/** @} */
139
140/** @{
141 *
142 * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
143 * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
144 */
145#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
146#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
147#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
148#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
149
150/** @{
151 *
152 * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
153 * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
154 */
155#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
156#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
157#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
158#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
159
160/** @{
161 *
162 * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
163 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
164 */
165
166#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
167#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
168#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
169#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
170
171/** @} */
172
173/** @{
174 *
175 * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
176 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
177 */
178#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
179#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
180#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
181#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
182
183#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
184#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
185#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
186#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
187#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
188/** @} */
189
190/** @{
191 *
192 * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
193 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
194 */
195#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
196#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
197#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
198#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
199#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
200
201/** The values of the field are VC4_TILING_FORMAT_* */
202#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
203#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
204
205#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
206#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
207#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
208#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
209#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
210#define VC4_LOADSTORE_TILE_BUFFER_Z 3
211#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
212#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
213/** @} */
214
215#define VC4_INDEX_BUFFER_U8 (0 << 4)
216#define VC4_INDEX_BUFFER_U16 (1 << 4)
217
218/* This flag is only present in NV shader state. */
219#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
220#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
221#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
222#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
223
224/** @{ byte 2 of config bits. */
225#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
226#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
227/** @} */
228
229/** @{ byte 1 of config bits. */
230#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
231/** same values in this 3-bit field as PIPE_FUNC_* */
232#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
233#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
234
235#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
236#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
237#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
238#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
239
240#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
241/** @} */
242
243/** @{ byte 0 of config bits. */
244#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
245#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
246#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
247
248#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
249#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
250#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
251#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
252#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
253/** @} */
254
255/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
256#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
257
258#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
259#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
260#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
261#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
262#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
263#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
264
265#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
266#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
267#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
268#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
269#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
270#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
271
272#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
273#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
274#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
275/** @} */
276
277/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
278#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
279#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
280#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
281#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
282#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
283
284/** The values of the field are VC4_TILING_FORMAT_* */
285#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
286#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
287
288#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
289#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
290#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
291
292#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
293#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
294#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
295#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
296#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
297
298#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
299#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
300
301#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
302#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
303#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0)
304#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0)
305#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0)
306#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
307
308enum vc4_texture_data_type {
309 VC4_TEXTURE_TYPE_RGBA8888 = 0,
310 VC4_TEXTURE_TYPE_RGBX8888 = 1,
311 VC4_TEXTURE_TYPE_RGBA4444 = 2,
312 VC4_TEXTURE_TYPE_RGBA5551 = 3,
313 VC4_TEXTURE_TYPE_RGB565 = 4,
314 VC4_TEXTURE_TYPE_LUMINANCE = 5,
315 VC4_TEXTURE_TYPE_ALPHA = 6,
316 VC4_TEXTURE_TYPE_LUMALPHA = 7,
317 VC4_TEXTURE_TYPE_ETC1 = 8,
318 VC4_TEXTURE_TYPE_S16F = 9,
319 VC4_TEXTURE_TYPE_S8 = 10,
320 VC4_TEXTURE_TYPE_S16 = 11,
321 VC4_TEXTURE_TYPE_BW1 = 12,
322 VC4_TEXTURE_TYPE_A4 = 13,
323 VC4_TEXTURE_TYPE_A1 = 14,
324 VC4_TEXTURE_TYPE_RGBA64 = 15,
325 VC4_TEXTURE_TYPE_RGBA32R = 16,
326 VC4_TEXTURE_TYPE_YUV422R = 17,
327};
328
329#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
330#define VC4_TEX_P0_OFFSET_SHIFT 12
331#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10)
332#define VC4_TEX_P0_CSWIZ_SHIFT 10
333#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9)
334#define VC4_TEX_P0_CMMODE_SHIFT 9
335#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8)
336#define VC4_TEX_P0_FLIPY_SHIFT 8
337#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4)
338#define VC4_TEX_P0_TYPE_SHIFT 4
339#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0)
340#define VC4_TEX_P0_MIPLVLS_SHIFT 0
341
342#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31)
343#define VC4_TEX_P1_TYPE4_SHIFT 31
344#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20)
345#define VC4_TEX_P1_HEIGHT_SHIFT 20
346#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19)
347#define VC4_TEX_P1_ETCFLIP_SHIFT 19
348#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8)
349#define VC4_TEX_P1_WIDTH_SHIFT 8
350
351#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7)
352#define VC4_TEX_P1_MAGFILT_SHIFT 7
353# define VC4_TEX_P1_MAGFILT_LINEAR 0
354# define VC4_TEX_P1_MAGFILT_NEAREST 1
355
356#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4)
357#define VC4_TEX_P1_MINFILT_SHIFT 4
358# define VC4_TEX_P1_MINFILT_LINEAR 0
359# define VC4_TEX_P1_MINFILT_NEAREST 1
360# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2
361# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3
362# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4
363# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5
364
365#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2)
366#define VC4_TEX_P1_WRAP_T_SHIFT 2
367#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0)
368#define VC4_TEX_P1_WRAP_S_SHIFT 0
369# define VC4_TEX_P1_WRAP_REPEAT 0
370# define VC4_TEX_P1_WRAP_CLAMP 1
371# define VC4_TEX_P1_WRAP_MIRROR 2
372# define VC4_TEX_P1_WRAP_BORDER 3
373
374#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30)
375#define VC4_TEX_P2_PTYPE_SHIFT 30
376# define VC4_TEX_P2_PTYPE_IGNORED 0
377# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1
378# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2
379# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3
380
381/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
382#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12)
383#define VC4_TEX_P2_CMST_SHIFT 12
384#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0)
385#define VC4_TEX_P2_BSLOD_SHIFT 0
386
387/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
388#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12)
389#define VC4_TEX_P2_CHEIGHT_SHIFT 12
390#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0)
391#define VC4_TEX_P2_CWIDTH_SHIFT 0
392
393/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
394#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12)
395#define VC4_TEX_P2_CYOFF_SHIFT 12
396#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0)
397#define VC4_TEX_P2_CXOFF_SHIFT 0
398
399#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 000000000000..8a2a312e2c1b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,634 @@
1/*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * DOC: Render command list generation
26 *
27 * In the VC4 driver, render command list generation is performed by the
28 * kernel instead of userspace. We do this because validating a
29 * user-submitted command list is hard to get right and has high CPU overhead,
30 * while the number of valid configurations for render command lists is
31 * actually fairly low.
32 */
33
34#include "uapi/drm/vc4_drm.h"
35#include "vc4_drv.h"
36#include "vc4_packet.h"
37
38struct vc4_rcl_setup {
39 struct drm_gem_cma_object *color_read;
40 struct drm_gem_cma_object *color_write;
41 struct drm_gem_cma_object *zs_read;
42 struct drm_gem_cma_object *zs_write;
43 struct drm_gem_cma_object *msaa_color_write;
44 struct drm_gem_cma_object *msaa_zs_write;
45
46 struct drm_gem_cma_object *rcl;
47 u32 next_offset;
48};
49
50static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
51{
52 *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
53 setup->next_offset += 1;
54}
55
56static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
57{
58 *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
59 setup->next_offset += 2;
60}
61
62static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
63{
64 *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
65 setup->next_offset += 4;
66}
67
68/*
69 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
70 *
71 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
72 * some sort before another load is triggered.
73 */
74static void vc4_store_before_load(struct vc4_rcl_setup *setup)
75{
76 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
77 rcl_u16(setup,
78 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
79 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
80 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
81 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
82 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
83 rcl_u32(setup, 0); /* no address, since we're in None mode */
84}
85
86/*
87 * Calculates the physical address of the start of a tile in a RCL surface.
88 *
89 * Unlike the other load/store packets,
90 * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
91 * coordinates packet, and instead just store to the address given.
92 */
93static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
94 struct drm_gem_cma_object *bo,
95 struct drm_vc4_submit_rcl_surface *surf,
96 uint8_t x, uint8_t y)
97{
98 return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
99 (DIV_ROUND_UP(exec->args->width, 32) * y + x);
100}
101
102/*
103 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
104 *
105 * The tile coordinates packet triggers a pending load if there is one, are
106 * used for clipping during rendering, and determine where loads/stores happen
107 * relative to their base address.
108 */
109static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
110 uint32_t x, uint32_t y)
111{
112 rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
113 rcl_u8(setup, x);
114 rcl_u8(setup, y);
115}
116
117static void emit_tile(struct vc4_exec_info *exec,
118 struct vc4_rcl_setup *setup,
119 uint8_t x, uint8_t y, bool first, bool last)
120{
121 struct drm_vc4_submit_cl *args = exec->args;
122 bool has_bin = args->bin_cl_size != 0;
123
124 /* Note that the load doesn't actually occur until the
125 * tile coords packet is processed, and only one load
126 * may be outstanding at a time.
127 */
128 if (setup->color_read) {
129 if (args->color_read.flags &
130 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
131 rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
132 rcl_u32(setup,
133 vc4_full_res_offset(exec, setup->color_read,
134 &args->color_read, x, y) |
135 VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
136 } else {
137 rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
138 rcl_u16(setup, args->color_read.bits);
139 rcl_u32(setup, setup->color_read->paddr +
140 args->color_read.offset);
141 }
142 }
143
144 if (setup->zs_read) {
145 if (args->zs_read.flags &
146 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
147 rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
148 rcl_u32(setup,
149 vc4_full_res_offset(exec, setup->zs_read,
150 &args->zs_read, x, y) |
151 VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
152 } else {
153 if (setup->color_read) {
154 /* Exec previous load. */
155 vc4_tile_coordinates(setup, x, y);
156 vc4_store_before_load(setup);
157 }
158
159 rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
160 rcl_u16(setup, args->zs_read.bits);
161 rcl_u32(setup, setup->zs_read->paddr +
162 args->zs_read.offset);
163 }
164 }
165
166 /* Clipping depends on tile coordinates having been
167 * emitted, so we always need one here.
168 */
169 vc4_tile_coordinates(setup, x, y);
170
171 /* Wait for the binner before jumping to the first
172 * tile's lists.
173 */
174 if (first && has_bin)
175 rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
176
177 if (has_bin) {
178 rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
179 rcl_u32(setup, (exec->tile_bo->paddr +
180 exec->tile_alloc_offset +
181 (y * exec->bin_tiles_x + x) * 32));
182 }
183
184 if (setup->msaa_color_write) {
185 bool last_tile_write = (!setup->msaa_zs_write &&
186 !setup->zs_write &&
187 !setup->color_write);
188 uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
189
190 if (!last_tile_write)
191 bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
192 else if (last)
193 bits |= VC4_LOADSTORE_FULL_RES_EOF;
194 rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
195 rcl_u32(setup,
196 vc4_full_res_offset(exec, setup->msaa_color_write,
197 &args->msaa_color_write, x, y) |
198 bits);
199 }
200
201 if (setup->msaa_zs_write) {
202 bool last_tile_write = (!setup->zs_write &&
203 !setup->color_write);
204 uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
205
206 if (setup->msaa_color_write)
207 vc4_tile_coordinates(setup, x, y);
208 if (!last_tile_write)
209 bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
210 else if (last)
211 bits |= VC4_LOADSTORE_FULL_RES_EOF;
212 rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
213 rcl_u32(setup,
214 vc4_full_res_offset(exec, setup->msaa_zs_write,
215 &args->msaa_zs_write, x, y) |
216 bits);
217 }
218
219 if (setup->zs_write) {
220 bool last_tile_write = !setup->color_write;
221
222 if (setup->msaa_color_write || setup->msaa_zs_write)
223 vc4_tile_coordinates(setup, x, y);
224
225 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
226 rcl_u16(setup, args->zs_write.bits |
227 (last_tile_write ?
228 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
229 rcl_u32(setup,
230 (setup->zs_write->paddr + args->zs_write.offset) |
231 ((last && last_tile_write) ?
232 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
233 }
234
235 if (setup->color_write) {
236 if (setup->msaa_color_write || setup->msaa_zs_write ||
237 setup->zs_write) {
238 vc4_tile_coordinates(setup, x, y);
239 }
240
241 if (last)
242 rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
243 else
244 rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
245 }
246}
247
248static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
249 struct vc4_rcl_setup *setup)
250{
251 struct drm_vc4_submit_cl *args = exec->args;
252 bool has_bin = args->bin_cl_size != 0;
253 uint8_t min_x_tile = args->min_x_tile;
254 uint8_t min_y_tile = args->min_y_tile;
255 uint8_t max_x_tile = args->max_x_tile;
256 uint8_t max_y_tile = args->max_y_tile;
257 uint8_t xtiles = max_x_tile - min_x_tile + 1;
258 uint8_t ytiles = max_y_tile - min_y_tile + 1;
259 uint8_t x, y;
260 uint32_t size, loop_body_size;
261
262 size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
263 loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
264
265 if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
266 size += VC4_PACKET_CLEAR_COLORS_SIZE +
267 VC4_PACKET_TILE_COORDINATES_SIZE +
268 VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
269 }
270
271 if (setup->color_read) {
272 if (args->color_read.flags &
273 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
274 loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
275 } else {
276 loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
277 }
278 }
279 if (setup->zs_read) {
280 if (args->zs_read.flags &
281 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
282 loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
283 } else {
284 if (setup->color_read &&
285 !(args->color_read.flags &
286 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
287 loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
288 loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
289 }
290 loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
291 }
292 }
293
294 if (has_bin) {
295 size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
296 loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
297 }
298
299 if (setup->msaa_color_write)
300 loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
301 if (setup->msaa_zs_write)
302 loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
303
304 if (setup->zs_write)
305 loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
306 if (setup->color_write)
307 loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
308
309 /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
310 loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
311 ((setup->msaa_color_write != NULL) +
312 (setup->msaa_zs_write != NULL) +
313 (setup->color_write != NULL) +
314 (setup->zs_write != NULL) - 1);
315
316 size += xtiles * ytiles * loop_body_size;
317
318 setup->rcl = &vc4_bo_create(dev, size, true)->base;
319 if (!setup->rcl)
320 return -ENOMEM;
321 list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
322 &exec->unref_list);
323
324 rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
325 rcl_u32(setup,
326 (setup->color_write ? (setup->color_write->paddr +
327 args->color_write.offset) :
328 0));
329 rcl_u16(setup, args->width);
330 rcl_u16(setup, args->height);
331 rcl_u16(setup, args->color_write.bits);
332
333 /* The tile buffer gets cleared when the previous tile is stored. If
334 * the clear values changed between frames, then the tile buffer has
335 * stale clear values in it, so we have to do a store in None mode (no
336 * writes) so that we trigger the tile buffer clear.
337 */
338 if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
339 rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
340 rcl_u32(setup, args->clear_color[0]);
341 rcl_u32(setup, args->clear_color[1]);
342 rcl_u32(setup, args->clear_z);
343 rcl_u8(setup, args->clear_s);
344
345 vc4_tile_coordinates(setup, 0, 0);
346
347 rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
348 rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
349 rcl_u32(setup, 0); /* no address, since we're in None mode */
350 }
351
352 for (y = min_y_tile; y <= max_y_tile; y++) {
353 for (x = min_x_tile; x <= max_x_tile; x++) {
354 bool first = (x == min_x_tile && y == min_y_tile);
355 bool last = (x == max_x_tile && y == max_y_tile);
356
357 emit_tile(exec, setup, x, y, first, last);
358 }
359 }
360
361 BUG_ON(setup->next_offset != size);
362 exec->ct1ca = setup->rcl->paddr;
363 exec->ct1ea = setup->rcl->paddr + setup->next_offset;
364
365 return 0;
366}
367
368static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
369 struct drm_gem_cma_object *obj,
370 struct drm_vc4_submit_rcl_surface *surf)
371{
372 struct drm_vc4_submit_cl *args = exec->args;
373 u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
374
375 if (surf->offset > obj->base.size) {
376 DRM_ERROR("surface offset %d > BO size %zd\n",
377 surf->offset, obj->base.size);
378 return -EINVAL;
379 }
380
381 if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
382 render_tiles_stride * args->max_y_tile + args->max_x_tile) {
383 DRM_ERROR("MSAA tile %d, %d out of bounds "
384 "(bo size %zd, offset %d).\n",
385 args->max_x_tile, args->max_y_tile,
386 obj->base.size,
387 surf->offset);
388 return -EINVAL;
389 }
390
391 return 0;
392}
393
394static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
395 struct drm_gem_cma_object **obj,
396 struct drm_vc4_submit_rcl_surface *surf)
397{
398 if (surf->flags != 0 || surf->bits != 0) {
399 DRM_ERROR("MSAA surface had nonzero flags/bits\n");
400 return -EINVAL;
401 }
402
403 if (surf->hindex == ~0)
404 return 0;
405
406 *obj = vc4_use_bo(exec, surf->hindex);
407 if (!*obj)
408 return -EINVAL;
409
410 if (surf->offset & 0xf) {
411 DRM_ERROR("MSAA write must be 16b aligned.\n");
412 return -EINVAL;
413 }
414
415 return vc4_full_res_bounds_check(exec, *obj, surf);
416}
417
418static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
419 struct drm_gem_cma_object **obj,
420 struct drm_vc4_submit_rcl_surface *surf)
421{
422 uint8_t tiling = VC4_GET_FIELD(surf->bits,
423 VC4_LOADSTORE_TILE_BUFFER_TILING);
424 uint8_t buffer = VC4_GET_FIELD(surf->bits,
425 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
426 uint8_t format = VC4_GET_FIELD(surf->bits,
427 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
428 int cpp;
429 int ret;
430
431 if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
432 DRM_ERROR("Extra flags set\n");
433 return -EINVAL;
434 }
435
436 if (surf->hindex == ~0)
437 return 0;
438
439 *obj = vc4_use_bo(exec, surf->hindex);
440 if (!*obj)
441 return -EINVAL;
442
443 if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
444 if (surf == &exec->args->zs_write) {
445 DRM_ERROR("general zs write may not be a full-res.\n");
446 return -EINVAL;
447 }
448
449 if (surf->bits != 0) {
450 DRM_ERROR("load/store general bits set with "
451 "full res load/store.\n");
452 return -EINVAL;
453 }
454
455 ret = vc4_full_res_bounds_check(exec, *obj, surf);
456 if (!ret)
457 return ret;
458
459 return 0;
460 }
461
462 if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
463 VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
464 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
465 DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
466 surf->bits);
467 return -EINVAL;
468 }
469
470 if (tiling > VC4_TILING_FORMAT_LT) {
471 DRM_ERROR("Bad tiling format\n");
472 return -EINVAL;
473 }
474
475 if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
476 if (format != 0) {
477 DRM_ERROR("No color format should be set for ZS\n");
478 return -EINVAL;
479 }
480 cpp = 4;
481 } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
482 switch (format) {
483 case VC4_LOADSTORE_TILE_BUFFER_BGR565:
484 case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
485 cpp = 2;
486 break;
487 case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
488 cpp = 4;
489 break;
490 default:
491 DRM_ERROR("Bad tile buffer format\n");
492 return -EINVAL;
493 }
494 } else {
495 DRM_ERROR("Bad load/store buffer %d.\n", buffer);
496 return -EINVAL;
497 }
498
499 if (surf->offset & 0xf) {
500 DRM_ERROR("load/store buffer must be 16b aligned.\n");
501 return -EINVAL;
502 }
503
504 if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
505 exec->args->width, exec->args->height, cpp)) {
506 return -EINVAL;
507 }
508
509 return 0;
510}
511
512static int
513vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
514 struct vc4_rcl_setup *setup,
515 struct drm_gem_cma_object **obj,
516 struct drm_vc4_submit_rcl_surface *surf)
517{
518 uint8_t tiling = VC4_GET_FIELD(surf->bits,
519 VC4_RENDER_CONFIG_MEMORY_FORMAT);
520 uint8_t format = VC4_GET_FIELD(surf->bits,
521 VC4_RENDER_CONFIG_FORMAT);
522 int cpp;
523
524 if (surf->flags != 0) {
525 DRM_ERROR("No flags supported on render config.\n");
526 return -EINVAL;
527 }
528
529 if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
530 VC4_RENDER_CONFIG_FORMAT_MASK |
531 VC4_RENDER_CONFIG_MS_MODE_4X |
532 VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
533 DRM_ERROR("Unknown bits in render config: 0x%04x\n",
534 surf->bits);
535 return -EINVAL;
536 }
537
538 if (surf->hindex == ~0)
539 return 0;
540
541 *obj = vc4_use_bo(exec, surf->hindex);
542 if (!*obj)
543 return -EINVAL;
544
545 if (tiling > VC4_TILING_FORMAT_LT) {
546 DRM_ERROR("Bad tiling format\n");
547 return -EINVAL;
548 }
549
550 switch (format) {
551 case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
552 case VC4_RENDER_CONFIG_FORMAT_BGR565:
553 cpp = 2;
554 break;
555 case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
556 cpp = 4;
557 break;
558 default:
559 DRM_ERROR("Bad tile buffer format\n");
560 return -EINVAL;
561 }
562
563 if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
564 exec->args->width, exec->args->height, cpp)) {
565 return -EINVAL;
566 }
567
568 return 0;
569}
570
571int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
572{
573 struct vc4_rcl_setup setup = {0};
574 struct drm_vc4_submit_cl *args = exec->args;
575 bool has_bin = args->bin_cl_size != 0;
576 int ret;
577
578 if (args->min_x_tile > args->max_x_tile ||
579 args->min_y_tile > args->max_y_tile) {
580 DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
581 args->min_x_tile, args->min_y_tile,
582 args->max_x_tile, args->max_y_tile);
583 return -EINVAL;
584 }
585
586 if (has_bin &&
587 (args->max_x_tile > exec->bin_tiles_x ||
588 args->max_y_tile > exec->bin_tiles_y)) {
589 DRM_ERROR("Render tiles (%d,%d) outside of bin config "
590 "(%d,%d)\n",
591 args->max_x_tile, args->max_y_tile,
592 exec->bin_tiles_x, exec->bin_tiles_y);
593 return -EINVAL;
594 }
595
596 ret = vc4_rcl_render_config_surface_setup(exec, &setup,
597 &setup.color_write,
598 &args->color_write);
599 if (ret)
600 return ret;
601
602 ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
603 if (ret)
604 return ret;
605
606 ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
607 if (ret)
608 return ret;
609
610 ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
611 if (ret)
612 return ret;
613
614 ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
615 &args->msaa_color_write);
616 if (ret)
617 return ret;
618
619 ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
620 &args->msaa_zs_write);
621 if (ret)
622 return ret;
623
624 /* We shouldn't even have the job submitted to us if there's no
625 * surface to write out.
626 */
627 if (!setup.color_write && !setup.zs_write &&
628 !setup.msaa_color_write && !setup.msaa_zs_write) {
629 DRM_ERROR("RCL requires color or Z/S write\n");
630 return -EINVAL;
631 }
632
633 return vc4_create_rcl_bo(dev, exec, &setup);
634}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 000000000000..ad7b1ea720c2
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (C) 2015 Broadcom
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
10#define _VC4_TRACE_H_
11
12#include <linux/stringify.h>
13#include <linux/types.h>
14#include <linux/tracepoint.h>
15
16#undef TRACE_SYSTEM
17#define TRACE_SYSTEM vc4
18#define TRACE_INCLUDE_FILE vc4_trace
19
20TRACE_EVENT(vc4_wait_for_seqno_begin,
21 TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
22 TP_ARGS(dev, seqno, timeout),
23
24 TP_STRUCT__entry(
25 __field(u32, dev)
26 __field(u64, seqno)
27 __field(u64, timeout)
28 ),
29
30 TP_fast_assign(
31 __entry->dev = dev->primary->index;
32 __entry->seqno = seqno;
33 __entry->timeout = timeout;
34 ),
35
36 TP_printk("dev=%u, seqno=%llu, timeout=%llu",
37 __entry->dev, __entry->seqno, __entry->timeout)
38);
39
40TRACE_EVENT(vc4_wait_for_seqno_end,
41 TP_PROTO(struct drm_device *dev, uint64_t seqno),
42 TP_ARGS(dev, seqno),
43
44 TP_STRUCT__entry(
45 __field(u32, dev)
46 __field(u64, seqno)
47 ),
48
49 TP_fast_assign(
50 __entry->dev = dev->primary->index;
51 __entry->seqno = seqno;
52 ),
53
54 TP_printk("dev=%u, seqno=%llu",
55 __entry->dev, __entry->seqno)
56);
57
58#endif /* _VC4_TRACE_H_ */
59
60/* This part must be outside protection */
61#undef TRACE_INCLUDE_PATH
62#define TRACE_INCLUDE_PATH .
63#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 000000000000..e6278f25716b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
1/*
2 * Copyright (C) 2015 Broadcom
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include "vc4_drv.h"
10
11#ifndef __CHECKER__
12#define CREATE_TRACE_POINTS
13#include "vc4_trace.h"
14#endif
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 040ad0d8b8a1..424d515ffcda 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
144} 144}
145#endif /* CONFIG_DEBUG_FS */ 145#endif /* CONFIG_DEBUG_FS */
146 146
147/*
148 * Asks the firmware to turn on power to the V3D engine.
149 *
150 * This may be doable with just the clocks interface, though this
151 * packet does some other register setup from the firmware, too.
152 */
153int
154vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
155{
156 if (on)
157 return pm_generic_poweroff(&vc4->v3d->pdev->dev);
158 else
159 return pm_generic_resume(&vc4->v3d->pdev->dev);
160}
161
147static void vc4_v3d_init_hw(struct drm_device *dev) 162static void vc4_v3d_init_hw(struct drm_device *dev)
148{ 163{
149 struct vc4_dev *vc4 = to_vc4_dev(dev); 164 struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
161 struct drm_device *drm = dev_get_drvdata(master); 176 struct drm_device *drm = dev_get_drvdata(master);
162 struct vc4_dev *vc4 = to_vc4_dev(drm); 177 struct vc4_dev *vc4 = to_vc4_dev(drm);
163 struct vc4_v3d *v3d = NULL; 178 struct vc4_v3d *v3d = NULL;
179 int ret;
164 180
165 v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); 181 v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
166 if (!v3d) 182 if (!v3d)
@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
180 return -EINVAL; 196 return -EINVAL;
181 } 197 }
182 198
199 /* Reset the binner overflow address/size at setup, to be sure
200 * we don't reuse an old one.
201 */
202 V3D_WRITE(V3D_BPOA, 0);
203 V3D_WRITE(V3D_BPOS, 0);
204
183 vc4_v3d_init_hw(drm); 205 vc4_v3d_init_hw(drm);
184 206
207 ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
208 if (ret) {
209 DRM_ERROR("Failed to install IRQ handler\n");
210 return ret;
211 }
212
185 return 0; 213 return 0;
186} 214}
187 215
@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
191 struct drm_device *drm = dev_get_drvdata(master); 219 struct drm_device *drm = dev_get_drvdata(master);
192 struct vc4_dev *vc4 = to_vc4_dev(drm); 220 struct vc4_dev *vc4 = to_vc4_dev(drm);
193 221
222 drm_irq_uninstall(drm);
223
224 /* Disable the binner's overflow memory address, so the next
225 * driver probe (if any) doesn't try to reuse our old
226 * allocation.
227 */
228 V3D_WRITE(V3D_BPOA, 0);
229 V3D_WRITE(V3D_BPOS, 0);
230
194 vc4->v3d = NULL; 231 vc4->v3d = NULL;
195} 232}
196 233
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 000000000000..0fb5b994b9dd
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,900 @@
1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Command list validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory. So, a user with
28 * access to execute command lists could escalate privilege by
29 * overwriting system memory (drawing to it as a framebuffer) or
30 * reading system memory it shouldn't (reading it as a texture, or
31 * uniform data, or vertex data).
32 *
33 * This validates command lists to ensure that all accesses are within
34 * the bounds of the GEM objects referenced. It explicitly whitelists
35 * packets, and looks at the offsets in any address fields to make
36 * sure they're constrained within the BOs they reference.
37 *
38 * Note that because of the validation that's happening anyway, this
39 * is where GEM relocation processing happens.
40 */
41
42#include "uapi/drm/vc4_drm.h"
43#include "vc4_drv.h"
44#include "vc4_packet.h"
45
46#define VALIDATE_ARGS \
47 struct vc4_exec_info *exec, \
48 void *validated, \
49 void *untrusted
50
51/** Return the width in pixels of a 64-byte microtile. */
52static uint32_t
53utile_width(int cpp)
54{
55 switch (cpp) {
56 case 1:
57 case 2:
58 return 8;
59 case 4:
60 return 4;
61 case 8:
62 return 2;
63 default:
64 DRM_ERROR("unknown cpp: %d\n", cpp);
65 return 1;
66 }
67}
68
69/** Return the height in pixels of a 64-byte microtile. */
70static uint32_t
71utile_height(int cpp)
72{
73 switch (cpp) {
74 case 1:
75 return 8;
76 case 2:
77 case 4:
78 case 8:
79 return 4;
80 default:
81 DRM_ERROR("unknown cpp: %d\n", cpp);
82 return 1;
83 }
84}
85
86/**
87 * The texture unit decides what tiling format a particular miplevel is using
88 * this function, so we lay out our miptrees accordingly.
89 */
90static bool
91size_is_lt(uint32_t width, uint32_t height, int cpp)
92{
93 return (width <= 4 * utile_width(cpp) ||
94 height <= 4 * utile_height(cpp));
95}
96
97struct drm_gem_cma_object *
98vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
99{
100 struct drm_gem_cma_object *obj;
101 struct vc4_bo *bo;
102
103 if (hindex >= exec->bo_count) {
104 DRM_ERROR("BO index %d greater than BO count %d\n",
105 hindex, exec->bo_count);
106 return NULL;
107 }
108 obj = exec->bo[hindex];
109 bo = to_vc4_bo(&obj->base);
110
111 if (bo->validated_shader) {
112 DRM_ERROR("Trying to use shader BO as something other than "
113 "a shader\n");
114 return NULL;
115 }
116
117 return obj;
118}
119
120static struct drm_gem_cma_object *
121vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
122{
123 return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
124}
125
126static bool
127validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
128{
129 /* Note that the untrusted pointer passed to these functions is
130 * incremented past the packet byte.
131 */
132 return (untrusted - 1 == exec->bin_u + pos);
133}
134
135static uint32_t
136gl_shader_rec_size(uint32_t pointer_bits)
137{
138 uint32_t attribute_count = pointer_bits & 7;
139 bool extended = pointer_bits & 8;
140
141 if (attribute_count == 0)
142 attribute_count = 8;
143
144 if (extended)
145 return 100 + attribute_count * 4;
146 else
147 return 36 + attribute_count * 8;
148}
149
150bool
151vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
152 uint32_t offset, uint8_t tiling_format,
153 uint32_t width, uint32_t height, uint8_t cpp)
154{
155 uint32_t aligned_width, aligned_height, stride, size;
156 uint32_t utile_w = utile_width(cpp);
157 uint32_t utile_h = utile_height(cpp);
158
159 /* The shaded vertex format stores signed 12.4 fixed point
160 * (-2048,2047) offsets from the viewport center, so we should
161 * never have a render target larger than 4096. The texture
162 * unit can only sample from 2048x2048, so it's even more
163 * restricted. This lets us avoid worrying about overflow in
164 * our math.
165 */
166 if (width > 4096 || height > 4096) {
167 DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
168 return false;
169 }
170
171 switch (tiling_format) {
172 case VC4_TILING_FORMAT_LINEAR:
173 aligned_width = round_up(width, utile_w);
174 aligned_height = height;
175 break;
176 case VC4_TILING_FORMAT_T:
177 aligned_width = round_up(width, utile_w * 8);
178 aligned_height = round_up(height, utile_h * 8);
179 break;
180 case VC4_TILING_FORMAT_LT:
181 aligned_width = round_up(width, utile_w);
182 aligned_height = round_up(height, utile_h);
183 break;
184 default:
185 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
186 return false;
187 }
188
189 stride = aligned_width * cpp;
190 size = stride * aligned_height;
191
192 if (size + offset < size ||
193 size + offset > fbo->base.size) {
194 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
195 width, height,
196 aligned_width, aligned_height,
197 size, offset, fbo->base.size);
198 return false;
199 }
200
201 return true;
202}
203
204static int
205validate_flush(VALIDATE_ARGS)
206{
207 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
208 DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
209 return -EINVAL;
210 }
211 exec->found_flush = true;
212
213 return 0;
214}
215
216static int
217validate_start_tile_binning(VALIDATE_ARGS)
218{
219 if (exec->found_start_tile_binning_packet) {
220 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
221 return -EINVAL;
222 }
223 exec->found_start_tile_binning_packet = true;
224
225 if (!exec->found_tile_binning_mode_config_packet) {
226 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
227 return -EINVAL;
228 }
229
230 return 0;
231}
232
233static int
234validate_increment_semaphore(VALIDATE_ARGS)
235{
236 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
237 DRM_ERROR("Bin CL must end with "
238 "VC4_PACKET_INCREMENT_SEMAPHORE\n");
239 return -EINVAL;
240 }
241 exec->found_increment_semaphore_packet = true;
242
243 return 0;
244}
245
246static int
247validate_indexed_prim_list(VALIDATE_ARGS)
248{
249 struct drm_gem_cma_object *ib;
250 uint32_t length = *(uint32_t *)(untrusted + 1);
251 uint32_t offset = *(uint32_t *)(untrusted + 5);
252 uint32_t max_index = *(uint32_t *)(untrusted + 9);
253 uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
254 struct vc4_shader_state *shader_state;
255
256 /* Check overflow condition */
257 if (exec->shader_state_count == 0) {
258 DRM_ERROR("shader state must precede primitives\n");
259 return -EINVAL;
260 }
261 shader_state = &exec->shader_state[exec->shader_state_count - 1];
262
263 if (max_index > shader_state->max_index)
264 shader_state->max_index = max_index;
265
266 ib = vc4_use_handle(exec, 0);
267 if (!ib)
268 return -EINVAL;
269
270 if (offset > ib->base.size ||
271 (ib->base.size - offset) / index_size < length) {
272 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
273 offset, length, index_size, ib->base.size);
274 return -EINVAL;
275 }
276
277 *(uint32_t *)(validated + 5) = ib->paddr + offset;
278
279 return 0;
280}
281
282static int
283validate_gl_array_primitive(VALIDATE_ARGS)
284{
285 uint32_t length = *(uint32_t *)(untrusted + 1);
286 uint32_t base_index = *(uint32_t *)(untrusted + 5);
287 uint32_t max_index;
288 struct vc4_shader_state *shader_state;
289
290 /* Check overflow condition */
291 if (exec->shader_state_count == 0) {
292 DRM_ERROR("shader state must precede primitives\n");
293 return -EINVAL;
294 }
295 shader_state = &exec->shader_state[exec->shader_state_count - 1];
296
297 if (length + base_index < length) {
298 DRM_ERROR("primitive vertex count overflow\n");
299 return -EINVAL;
300 }
301 max_index = length + base_index - 1;
302
303 if (max_index > shader_state->max_index)
304 shader_state->max_index = max_index;
305
306 return 0;
307}
308
309static int
310validate_gl_shader_state(VALIDATE_ARGS)
311{
312 uint32_t i = exec->shader_state_count++;
313
314 if (i >= exec->shader_state_size) {
315 DRM_ERROR("More requests for shader states than declared\n");
316 return -EINVAL;
317 }
318
319 exec->shader_state[i].addr = *(uint32_t *)untrusted;
320 exec->shader_state[i].max_index = 0;
321
322 if (exec->shader_state[i].addr & ~0xf) {
323 DRM_ERROR("high bits set in GL shader rec reference\n");
324 return -EINVAL;
325 }
326
327 *(uint32_t *)validated = (exec->shader_rec_p +
328 exec->shader_state[i].addr);
329
330 exec->shader_rec_p +=
331 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
332
333 return 0;
334}
335
336static int
337validate_tile_binning_config(VALIDATE_ARGS)
338{
339 struct drm_device *dev = exec->exec_bo->base.dev;
340 struct vc4_bo *tile_bo;
341 uint8_t flags;
342 uint32_t tile_state_size, tile_alloc_size;
343 uint32_t tile_count;
344
345 if (exec->found_tile_binning_mode_config_packet) {
346 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
347 return -EINVAL;
348 }
349 exec->found_tile_binning_mode_config_packet = true;
350
351 exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
352 exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
353 tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
354 flags = *(uint8_t *)(untrusted + 14);
355
356 if (exec->bin_tiles_x == 0 ||
357 exec->bin_tiles_y == 0) {
358 DRM_ERROR("Tile binning config of %dx%d too small\n",
359 exec->bin_tiles_x, exec->bin_tiles_y);
360 return -EINVAL;
361 }
362
363 if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
364 VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
365 DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
366 return -EINVAL;
367 }
368
369 /* The tile state data array is 48 bytes per tile, and we put it at
370 * the start of a BO containing both it and the tile alloc.
371 */
372 tile_state_size = 48 * tile_count;
373
374 /* Since the tile alloc array will follow us, align. */
375 exec->tile_alloc_offset = roundup(tile_state_size, 4096);
376
377 *(uint8_t *)(validated + 14) =
378 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
379 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
380 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
381 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
382 VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
383 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
384 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
385
386 /* Initial block size. */
387 tile_alloc_size = 32 * tile_count;
388
389 /*
390 * The initial allocation gets rounded to the next 256 bytes before
391 * the hardware starts fulfilling further allocations.
392 */
393 tile_alloc_size = roundup(tile_alloc_size, 256);
394
395 /* Add space for the extra allocations. This is what gets used first,
396 * before overflow memory. It must have at least 4096 bytes, but we
397 * want to avoid overflow memory usage if possible.
398 */
399 tile_alloc_size += 1024 * 1024;
400
401 tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
402 true);
403 exec->tile_bo = &tile_bo->base;
404 if (!exec->tile_bo)
405 return -ENOMEM;
406 list_add_tail(&tile_bo->unref_head, &exec->unref_list);
407
408 /* tile alloc address. */
409 *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
410 exec->tile_alloc_offset);
411 /* tile alloc size. */
412 *(uint32_t *)(validated + 4) = tile_alloc_size;
413 /* tile state address. */
414 *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
415
416 return 0;
417}
418
419static int
420validate_gem_handles(VALIDATE_ARGS)
421{
422 memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
423 return 0;
424}
425
426#define VC4_DEFINE_PACKET(packet, func) \
427 [packet] = { packet ## _SIZE, #packet, func }
428
429static const struct cmd_info {
430 uint16_t len;
431 const char *name;
432 int (*func)(struct vc4_exec_info *exec, void *validated,
433 void *untrusted);
434} cmd_info[] = {
435 VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
436 VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
437 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
438 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
439 VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
440 validate_start_tile_binning),
441 VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
442 validate_increment_semaphore),
443
444 VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
445 validate_indexed_prim_list),
446 VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
447 validate_gl_array_primitive),
448
449 VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
450
451 VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
452
453 VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
454 VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
455 VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
456 VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
457 VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
458 VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
459 VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
460 VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
461 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
462 /* Note: The docs say this was also 105, but it was 106 in the
463 * initial userland code drop.
464 */
465 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
466
467 VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
468 validate_tile_binning_config),
469
470 VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
471};
472
473int
474vc4_validate_bin_cl(struct drm_device *dev,
475 void *validated,
476 void *unvalidated,
477 struct vc4_exec_info *exec)
478{
479 uint32_t len = exec->args->bin_cl_size;
480 uint32_t dst_offset = 0;
481 uint32_t src_offset = 0;
482
483 while (src_offset < len) {
484 void *dst_pkt = validated + dst_offset;
485 void *src_pkt = unvalidated + src_offset;
486 u8 cmd = *(uint8_t *)src_pkt;
487 const struct cmd_info *info;
488
489 if (cmd >= ARRAY_SIZE(cmd_info)) {
490 DRM_ERROR("0x%08x: packet %d out of bounds\n",
491 src_offset, cmd);
492 return -EINVAL;
493 }
494
495 info = &cmd_info[cmd];
496 if (!info->name) {
497 DRM_ERROR("0x%08x: packet %d invalid\n",
498 src_offset, cmd);
499 return -EINVAL;
500 }
501
502 if (src_offset + info->len > len) {
503 DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
504 "exceeds bounds (0x%08x)\n",
505 src_offset, cmd, info->name, info->len,
506 src_offset + len);
507 return -EINVAL;
508 }
509
510 if (cmd != VC4_PACKET_GEM_HANDLES)
511 memcpy(dst_pkt, src_pkt, info->len);
512
513 if (info->func && info->func(exec,
514 dst_pkt + 1,
515 src_pkt + 1)) {
516 DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
517 src_offset, cmd, info->name);
518 return -EINVAL;
519 }
520
521 src_offset += info->len;
522 /* GEM handle loading doesn't produce HW packets. */
523 if (cmd != VC4_PACKET_GEM_HANDLES)
524 dst_offset += info->len;
525
526 /* When the CL hits halt, it'll stop reading anything else. */
527 if (cmd == VC4_PACKET_HALT)
528 break;
529 }
530
531 exec->ct0ea = exec->ct0ca + dst_offset;
532
533 if (!exec->found_start_tile_binning_packet) {
534 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
535 return -EINVAL;
536 }
537
538 /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
539 * semaphore is used to trigger the render CL to start up, and the
540 * FLUSH is what caps the bin lists with
541 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
542 * render CL when they get called to) and actually triggers the queued
543 * semaphore increment.
544 */
545 if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
546 DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
547 "VC4_PACKET_FLUSH\n");
548 return -EINVAL;
549 }
550
551 return 0;
552}
553
554static bool
555reloc_tex(struct vc4_exec_info *exec,
556 void *uniform_data_u,
557 struct vc4_texture_sample_info *sample,
558 uint32_t texture_handle_index)
559
560{
561 struct drm_gem_cma_object *tex;
562 uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
563 uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
564 uint32_t p2 = (sample->p_offset[2] != ~0 ?
565 *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
566 uint32_t p3 = (sample->p_offset[3] != ~0 ?
567 *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
568 uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
569 uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
570 uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
571 uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
572 uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
573 uint32_t cpp, tiling_format, utile_w, utile_h;
574 uint32_t i;
575 uint32_t cube_map_stride = 0;
576 enum vc4_texture_data_type type;
577
578 tex = vc4_use_bo(exec, texture_handle_index);
579 if (!tex)
580 return false;
581
582 if (sample->is_direct) {
583 uint32_t remaining_size = tex->base.size - p0;
584
585 if (p0 > tex->base.size - 4) {
586 DRM_ERROR("UBO offset greater than UBO size\n");
587 goto fail;
588 }
589 if (p1 > remaining_size - 4) {
590 DRM_ERROR("UBO clamp would allow reads "
591 "outside of UBO\n");
592 goto fail;
593 }
594 *validated_p0 = tex->paddr + p0;
595 return true;
596 }
597
598 if (width == 0)
599 width = 2048;
600 if (height == 0)
601 height = 2048;
602
603 if (p0 & VC4_TEX_P0_CMMODE_MASK) {
604 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
605 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
606 cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
607 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
608 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
609 if (cube_map_stride) {
610 DRM_ERROR("Cube map stride set twice\n");
611 goto fail;
612 }
613
614 cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
615 }
616 if (!cube_map_stride) {
617 DRM_ERROR("Cube map stride not set\n");
618 goto fail;
619 }
620 }
621
622 type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
623 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
624
625 switch (type) {
626 case VC4_TEXTURE_TYPE_RGBA8888:
627 case VC4_TEXTURE_TYPE_RGBX8888:
628 case VC4_TEXTURE_TYPE_RGBA32R:
629 cpp = 4;
630 break;
631 case VC4_TEXTURE_TYPE_RGBA4444:
632 case VC4_TEXTURE_TYPE_RGBA5551:
633 case VC4_TEXTURE_TYPE_RGB565:
634 case VC4_TEXTURE_TYPE_LUMALPHA:
635 case VC4_TEXTURE_TYPE_S16F:
636 case VC4_TEXTURE_TYPE_S16:
637 cpp = 2;
638 break;
639 case VC4_TEXTURE_TYPE_LUMINANCE:
640 case VC4_TEXTURE_TYPE_ALPHA:
641 case VC4_TEXTURE_TYPE_S8:
642 cpp = 1;
643 break;
644 case VC4_TEXTURE_TYPE_ETC1:
645 case VC4_TEXTURE_TYPE_BW1:
646 case VC4_TEXTURE_TYPE_A4:
647 case VC4_TEXTURE_TYPE_A1:
648 case VC4_TEXTURE_TYPE_RGBA64:
649 case VC4_TEXTURE_TYPE_YUV422R:
650 default:
651 DRM_ERROR("Texture format %d unsupported\n", type);
652 goto fail;
653 }
654 utile_w = utile_width(cpp);
655 utile_h = utile_height(cpp);
656
657 if (type == VC4_TEXTURE_TYPE_RGBA32R) {
658 tiling_format = VC4_TILING_FORMAT_LINEAR;
659 } else {
660 if (size_is_lt(width, height, cpp))
661 tiling_format = VC4_TILING_FORMAT_LT;
662 else
663 tiling_format = VC4_TILING_FORMAT_T;
664 }
665
666 if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
667 tiling_format, width, height, cpp)) {
668 goto fail;
669 }
670
671 /* The mipmap levels are stored before the base of the texture. Make
672 * sure there is actually space in the BO.
673 */
674 for (i = 1; i <= miplevels; i++) {
675 uint32_t level_width = max(width >> i, 1u);
676 uint32_t level_height = max(height >> i, 1u);
677 uint32_t aligned_width, aligned_height;
678 uint32_t level_size;
679
680 /* Once the levels get small enough, they drop from T to LT. */
681 if (tiling_format == VC4_TILING_FORMAT_T &&
682 size_is_lt(level_width, level_height, cpp)) {
683 tiling_format = VC4_TILING_FORMAT_LT;
684 }
685
686 switch (tiling_format) {
687 case VC4_TILING_FORMAT_T:
688 aligned_width = round_up(level_width, utile_w * 8);
689 aligned_height = round_up(level_height, utile_h * 8);
690 break;
691 case VC4_TILING_FORMAT_LT:
692 aligned_width = round_up(level_width, utile_w);
693 aligned_height = round_up(level_height, utile_h);
694 break;
695 default:
696 aligned_width = round_up(level_width, utile_w);
697 aligned_height = level_height;
698 break;
699 }
700
701 level_size = aligned_width * cpp * aligned_height;
702
703 if (offset < level_size) {
704 DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
705 "overflowed buffer bounds (offset %d)\n",
706 i, level_width, level_height,
707 aligned_width, aligned_height,
708 level_size, offset);
709 goto fail;
710 }
711
712 offset -= level_size;
713 }
714
715 *validated_p0 = tex->paddr + p0;
716
717 return true;
718 fail:
719 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
720 DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
721 DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
722 DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
723 return false;
724}
725
726static int
727validate_gl_shader_rec(struct drm_device *dev,
728 struct vc4_exec_info *exec,
729 struct vc4_shader_state *state)
730{
731 uint32_t *src_handles;
732 void *pkt_u, *pkt_v;
733 static const uint32_t shader_reloc_offsets[] = {
734 4, /* fs */
735 16, /* vs */
736 28, /* cs */
737 };
738 uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
739 struct drm_gem_cma_object *bo[shader_reloc_count + 8];
740 uint32_t nr_attributes, nr_relocs, packet_size;
741 int i;
742
743 nr_attributes = state->addr & 0x7;
744 if (nr_attributes == 0)
745 nr_attributes = 8;
746 packet_size = gl_shader_rec_size(state->addr);
747
748 nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
749 if (nr_relocs * 4 > exec->shader_rec_size) {
750 DRM_ERROR("overflowed shader recs reading %d handles "
751 "from %d bytes left\n",
752 nr_relocs, exec->shader_rec_size);
753 return -EINVAL;
754 }
755 src_handles = exec->shader_rec_u;
756 exec->shader_rec_u += nr_relocs * 4;
757 exec->shader_rec_size -= nr_relocs * 4;
758
759 if (packet_size > exec->shader_rec_size) {
760 DRM_ERROR("overflowed shader recs copying %db packet "
761 "from %d bytes left\n",
762 packet_size, exec->shader_rec_size);
763 return -EINVAL;
764 }
765 pkt_u = exec->shader_rec_u;
766 pkt_v = exec->shader_rec_v;
767 memcpy(pkt_v, pkt_u, packet_size);
768 exec->shader_rec_u += packet_size;
769 /* Shader recs have to be aligned to 16 bytes (due to the attribute
770 * flags being in the low bytes), so round the next validated shader
771 * rec address up. This should be safe, since we've got so many
772 * relocations in a shader rec packet.
773 */
774 BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
775 exec->shader_rec_v += roundup(packet_size, 16);
776 exec->shader_rec_size -= packet_size;
777
778 if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
779 DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
780 return -EINVAL;
781 }
782
783 for (i = 0; i < shader_reloc_count; i++) {
784 if (src_handles[i] > exec->bo_count) {
785 DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
786 return -EINVAL;
787 }
788
789 bo[i] = exec->bo[src_handles[i]];
790 if (!bo[i])
791 return -EINVAL;
792 }
793 for (i = shader_reloc_count; i < nr_relocs; i++) {
794 bo[i] = vc4_use_bo(exec, src_handles[i]);
795 if (!bo[i])
796 return -EINVAL;
797 }
798
799 for (i = 0; i < shader_reloc_count; i++) {
800 struct vc4_validated_shader_info *validated_shader;
801 uint32_t o = shader_reloc_offsets[i];
802 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
803 uint32_t *texture_handles_u;
804 void *uniform_data_u;
805 uint32_t tex;
806
807 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
808
809 if (src_offset != 0) {
810 DRM_ERROR("Shaders must be at offset 0 of "
811 "the BO.\n");
812 return -EINVAL;
813 }
814
815 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
816 if (!validated_shader)
817 return -EINVAL;
818
819 if (validated_shader->uniforms_src_size >
820 exec->uniforms_size) {
821 DRM_ERROR("Uniforms src buffer overflow\n");
822 return -EINVAL;
823 }
824
825 texture_handles_u = exec->uniforms_u;
826 uniform_data_u = (texture_handles_u +
827 validated_shader->num_texture_samples);
828
829 memcpy(exec->uniforms_v, uniform_data_u,
830 validated_shader->uniforms_size);
831
832 for (tex = 0;
833 tex < validated_shader->num_texture_samples;
834 tex++) {
835 if (!reloc_tex(exec,
836 uniform_data_u,
837 &validated_shader->texture_samples[tex],
838 texture_handles_u[tex])) {
839 return -EINVAL;
840 }
841 }
842
843 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
844
845 exec->uniforms_u += validated_shader->uniforms_src_size;
846 exec->uniforms_v += validated_shader->uniforms_size;
847 exec->uniforms_p += validated_shader->uniforms_size;
848 }
849
850 for (i = 0; i < nr_attributes; i++) {
851 struct drm_gem_cma_object *vbo =
852 bo[ARRAY_SIZE(shader_reloc_offsets) + i];
853 uint32_t o = 36 + i * 8;
854 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
855 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
856 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
857 uint32_t max_index;
858
859 if (state->addr & 0x8)
860 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
861
862 if (vbo->base.size < offset ||
863 vbo->base.size - offset < attr_size) {
864 DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
865 offset, attr_size, vbo->base.size);
866 return -EINVAL;
867 }
868
869 if (stride != 0) {
870 max_index = ((vbo->base.size - offset - attr_size) /
871 stride);
872 if (state->max_index > max_index) {
873 DRM_ERROR("primitives use index %d out of "
874 "supplied %d\n",
875 state->max_index, max_index);
876 return -EINVAL;
877 }
878 }
879
880 *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
881 }
882
883 return 0;
884}
885
886int
887vc4_validate_shader_recs(struct drm_device *dev,
888 struct vc4_exec_info *exec)
889{
890 uint32_t i;
891 int ret = 0;
892
893 for (i = 0; i < exec->shader_state_count; i++) {
894 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
895 if (ret)
896 return ret;
897 }
898
899 return ret;
900}
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 74de18416be9..fe4161bc93ae 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -26,14 +26,155 @@
26 26
27#include "drm.h" 27#include "drm.h"
28 28
29#define DRM_VC4_SUBMIT_CL 0x00
30#define DRM_VC4_WAIT_SEQNO 0x01
31#define DRM_VC4_WAIT_BO 0x02
29#define DRM_VC4_CREATE_BO 0x03 32#define DRM_VC4_CREATE_BO 0x03
30#define DRM_VC4_MMAP_BO 0x04 33#define DRM_VC4_MMAP_BO 0x04
31#define DRM_VC4_CREATE_SHADER_BO 0x05 34#define DRM_VC4_CREATE_SHADER_BO 0x05
32 35
36#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
37#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
38#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
33#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) 39#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
34#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) 40#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
35#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) 41#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
36 42
43struct drm_vc4_submit_rcl_surface {
44 __u32 hindex; /* Handle index, or ~0 if not present. */
45 __u32 offset; /* Offset to start of buffer. */
46 /*
47 * Bits for either render config (color_write) or load/store packet.
48 * Bits should all be 0 for MSAA load/stores.
49 */
50 __u16 bits;
51
52#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
53 __u16 flags;
54};
55
56/**
57 * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
58 * engine.
59 *
60 * Drivers typically use GPU BOs to store batchbuffers / command lists and
61 * their associated state. However, because the VC4 lacks an MMU, we have to
62 * do validation of memory accesses by the GPU commands. If we were to store
63 * our commands in BOs, we'd need to do uncached readback from them to do the
64 * validation process, which is too expensive. Instead, userspace accumulates
65 * commands and associated state in plain memory, then the kernel copies the
66 * data to its own address space, and then validates and stores it in a GPU
67 * BO.
68 */
69struct drm_vc4_submit_cl {
70 /* Pointer to the binner command list.
71 *
72 * This is the first set of commands executed, which runs the
73 * coordinate shader to determine where primitives land on the screen,
74 * then writes out the state updates and draw calls necessary per tile
75 * to the tile allocation BO.
76 */
77 __u64 bin_cl;
78
79 /* Pointer to the shader records.
80 *
81 * Shader records are the structures read by the hardware that contain
82 * pointers to uniforms, shaders, and vertex attributes. The
83 * reference to the shader record has enough information to determine
84 * how many pointers are necessary (fixed number for shaders/uniforms,
85 * and an attribute count), so those BO indices into bo_handles are
86 * just stored as __u32s before each shader record passed in.
87 */
88 __u64 shader_rec;
89
90 /* Pointer to uniform data and texture handles for the textures
91 * referenced by the shader.
92 *
93 * For each shader state record, there is a set of uniform data in the
94 * order referenced by the record (FS, VS, then CS). Each set of
95 * uniform data has a __u32 index into bo_handles per texture
96 * sample operation, in the order the QPU_W_TMUn_S writes appear in
97 * the program. Following the texture BO handle indices is the actual
98 * uniform data.
99 *
100 * The individual uniform state blocks don't have sizes passed in,
101 * because the kernel has to determine the sizes anyway during shader
102 * code validation.
103 */
104 __u64 uniforms;
105 __u64 bo_handles;
106
107 /* Size in bytes of the binner command list. */
108 __u32 bin_cl_size;
109 /* Size in bytes of the set of shader records. */
110 __u32 shader_rec_size;
111 /* Number of shader records.
112 *
113 * This could just be computed from the contents of shader_records and
114 * the address bits of references to them from the bin CL, but it
115 * keeps the kernel from having to resize some allocations it makes.
116 */
117 __u32 shader_rec_count;
118 /* Size in bytes of the uniform state. */
119 __u32 uniforms_size;
120
121 /* Number of BO handles passed in (size is that times 4). */
122 __u32 bo_handle_count;
123
124 /* RCL setup: */
125 __u16 width;
126 __u16 height;
127 __u8 min_x_tile;
128 __u8 min_y_tile;
129 __u8 max_x_tile;
130 __u8 max_y_tile;
131 struct drm_vc4_submit_rcl_surface color_read;
132 struct drm_vc4_submit_rcl_surface color_write;
133 struct drm_vc4_submit_rcl_surface zs_read;
134 struct drm_vc4_submit_rcl_surface zs_write;
135 struct drm_vc4_submit_rcl_surface msaa_color_write;
136 struct drm_vc4_submit_rcl_surface msaa_zs_write;
137 __u32 clear_color[2];
138 __u32 clear_z;
139 __u8 clear_s;
140
141 __u32 pad:24;
142
143#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
144 __u32 flags;
145
146 /* Returned value of the seqno of this render job (for the
147 * wait ioctl).
148 */
149 __u64 seqno;
150};
151
152/**
153 * struct drm_vc4_wait_seqno - ioctl argument for waiting for
154 * DRM_VC4_SUBMIT_CL completion using its returned seqno.
155 *
156 * timeout_ns is the timeout in nanoseconds, where "0" means "don't
157 * block, just return the status."
158 */
159struct drm_vc4_wait_seqno {
160 __u64 seqno;
161 __u64 timeout_ns;
162};
163
164/**
165 * struct drm_vc4_wait_bo - ioctl argument for waiting for
166 * completion of the last DRM_VC4_SUBMIT_CL on a BO.
167 *
168 * This is useful for cases where multiple processes might be
169 * rendering to a BO and you want to wait for all rendering to be
170 * completed.
171 */
172struct drm_vc4_wait_bo {
173 __u32 handle;
174 __u32 pad;
175 __u64 timeout_ns;
176};
177
37/** 178/**
38 * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. 179 * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
39 * 180 *