diff options
author | Eric Anholt <eric@anholt.net> | 2015-11-30 15:13:37 -0500 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2015-12-07 23:05:10 -0500 |
commit | d5b1a78a772f1e31a94f8babfa964152ec5e9aa5 (patch) | |
tree | fc74c0df66b4e6fd7d610a96fe8cb2a428db8399 | |
parent | d3f5168a0810005920e7a3d5ba83e249bd9a750c (diff) |
drm/vc4: Add support for drawing 3D frames.
The user submission is basically a pointer to a command list and a
pointer to uniforms. We copy those in to the kernel, validate and
relocate them, and store the result in a GPU BO which we queue for
execution.
v2: Drop support for NV shader recs (not necessary for GL), simplify
vc4_use_bo(), improve bin flush/semaphore checks, use __u32 style
types.
Signed-off-by: Eric Anholt <eric@anholt.net>
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 182 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 642 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 210 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_packet.h | 399 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_render_cl.c | 634 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_trace.h | 63 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_trace_points.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 900 | ||||
-rw-r--r-- | include/uapi/drm/vc4_drm.h | 141 |
12 files changed, 3243 insertions, 1 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index e87a6f2f5916..4c6a99f0398c 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile | |||
@@ -8,12 +8,19 @@ vc4-y := \ | |||
8 | vc4_crtc.o \ | 8 | vc4_crtc.o \ |
9 | vc4_drv.o \ | 9 | vc4_drv.o \ |
10 | vc4_kms.o \ | 10 | vc4_kms.o \ |
11 | vc4_gem.o \ | ||
11 | vc4_hdmi.o \ | 12 | vc4_hdmi.o \ |
12 | vc4_hvs.o \ | 13 | vc4_hvs.o \ |
14 | vc4_irq.o \ | ||
13 | vc4_plane.o \ | 15 | vc4_plane.o \ |
16 | vc4_render_cl.o \ | ||
17 | vc4_trace_points.o \ | ||
14 | vc4_v3d.o \ | 18 | vc4_v3d.o \ |
19 | vc4_validate.o \ | ||
15 | vc4_validate_shaders.o | 20 | vc4_validate_shaders.o |
16 | 21 | ||
17 | vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o | 22 | vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o |
18 | 23 | ||
19 | obj-$(CONFIG_DRM_VC4) += vc4.o | 24 | obj-$(CONFIG_DRM_VC4) += vc4.o |
25 | |||
26 | CFLAGS_vc4_trace_points.o := -I$(src) | ||
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index db58d74efe95..2cfee5959455 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c | |||
@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = { | |||
74 | }; | 74 | }; |
75 | 75 | ||
76 | static const struct drm_ioctl_desc vc4_drm_ioctls[] = { | 76 | static const struct drm_ioctl_desc vc4_drm_ioctls[] = { |
77 | DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0), | ||
78 | DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0), | ||
79 | DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0), | ||
77 | DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), | 80 | DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), |
78 | DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), | 81 | DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), |
79 | DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), | 82 | DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), |
@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = { | |||
83 | .driver_features = (DRIVER_MODESET | | 86 | .driver_features = (DRIVER_MODESET | |
84 | DRIVER_ATOMIC | | 87 | DRIVER_ATOMIC | |
85 | DRIVER_GEM | | 88 | DRIVER_GEM | |
89 | DRIVER_HAVE_IRQ | | ||
86 | DRIVER_PRIME), | 90 | DRIVER_PRIME), |
87 | .lastclose = vc4_lastclose, | 91 | .lastclose = vc4_lastclose, |
88 | .preclose = vc4_drm_preclose, | 92 | .preclose = vc4_drm_preclose, |
89 | 93 | ||
94 | .irq_handler = vc4_irq, | ||
95 | .irq_preinstall = vc4_irq_preinstall, | ||
96 | .irq_postinstall = vc4_irq_postinstall, | ||
97 | .irq_uninstall = vc4_irq_uninstall, | ||
98 | |||
90 | .enable_vblank = vc4_enable_vblank, | 99 | .enable_vblank = vc4_enable_vblank, |
91 | .disable_vblank = vc4_disable_vblank, | 100 | .disable_vblank = vc4_disable_vblank, |
92 | .get_vblank_counter = drm_vblank_count, | 101 | .get_vblank_counter = drm_vblank_count, |
@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev) | |||
181 | if (ret) | 190 | if (ret) |
182 | goto unref; | 191 | goto unref; |
183 | 192 | ||
193 | vc4_gem_init(drm); | ||
194 | |||
184 | ret = component_bind_all(dev, drm); | 195 | ret = component_bind_all(dev, drm); |
185 | if (ret) | 196 | if (ret) |
186 | goto unref; | 197 | goto gem_destroy; |
187 | 198 | ||
188 | ret = drm_dev_register(drm, 0); | 199 | ret = drm_dev_register(drm, 0); |
189 | if (ret < 0) | 200 | if (ret < 0) |
@@ -207,6 +218,8 @@ unregister: | |||
207 | drm_dev_unregister(drm); | 218 | drm_dev_unregister(drm); |
208 | unbind_all: | 219 | unbind_all: |
209 | component_unbind_all(dev, drm); | 220 | component_unbind_all(dev, drm); |
221 | gem_destroy: | ||
222 | vc4_gem_destroy(drm); | ||
210 | unref: | 223 | unref: |
211 | drm_dev_unref(drm); | 224 | drm_dev_unref(drm); |
212 | vc4_bo_cache_destroy(drm); | 225 | vc4_bo_cache_destroy(drm); |
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 8945463e70b6..0bc8c57196ac 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h | |||
@@ -49,6 +49,48 @@ struct vc4_dev { | |||
49 | 49 | ||
50 | /* Protects bo_cache and the BO stats. */ | 50 | /* Protects bo_cache and the BO stats. */ |
51 | struct mutex bo_lock; | 51 | struct mutex bo_lock; |
52 | |||
53 | /* Sequence number for the last job queued in job_list. | ||
54 | * Starts at 0 (no jobs emitted). | ||
55 | */ | ||
56 | uint64_t emit_seqno; | ||
57 | |||
58 | /* Sequence number for the last completed job on the GPU. | ||
59 | * Starts at 0 (no jobs completed). | ||
60 | */ | ||
61 | uint64_t finished_seqno; | ||
62 | |||
63 | /* List of all struct vc4_exec_info for jobs to be executed. | ||
64 | * The first job in the list is the one currently programmed | ||
65 | * into ct0ca/ct1ca for execution. | ||
66 | */ | ||
67 | struct list_head job_list; | ||
68 | /* List of the finished vc4_exec_infos waiting to be freed by | ||
69 | * job_done_work. | ||
70 | */ | ||
71 | struct list_head job_done_list; | ||
72 | /* Spinlock used to synchronize the job_list and seqno | ||
73 | * accesses between the IRQ handler and GEM ioctls. | ||
74 | */ | ||
75 | spinlock_t job_lock; | ||
76 | wait_queue_head_t job_wait_queue; | ||
77 | struct work_struct job_done_work; | ||
78 | |||
79 | /* The binner overflow memory that's currently set up in | ||
80 | * BPOA/BPOS registers. When overflow occurs and a new one is | ||
81 | * allocated, the previous one will be moved to | ||
82 | * vc4->current_exec's free list. | ||
83 | */ | ||
84 | struct vc4_bo *overflow_mem; | ||
85 | struct work_struct overflow_mem_work; | ||
86 | |||
87 | struct { | ||
88 | uint32_t last_ct0ca, last_ct1ca; | ||
89 | struct timer_list timer; | ||
90 | struct work_struct reset_work; | ||
91 | } hangcheck; | ||
92 | |||
93 | struct semaphore async_modeset; | ||
52 | }; | 94 | }; |
53 | 95 | ||
54 | static inline struct vc4_dev * | 96 | static inline struct vc4_dev * |
@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev) | |||
60 | struct vc4_bo { | 102 | struct vc4_bo { |
61 | struct drm_gem_cma_object base; | 103 | struct drm_gem_cma_object base; |
62 | 104 | ||
105 | /* seqno of the last job to render to this BO. */ | ||
106 | uint64_t seqno; | ||
107 | |||
63 | /* List entry for the BO's position in either | 108 | /* List entry for the BO's position in either |
64 | * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list | 109 | * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list |
65 | */ | 110 | */ |
@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder) | |||
130 | #define HVS_READ(offset) readl(vc4->hvs->regs + offset) | 175 | #define HVS_READ(offset) readl(vc4->hvs->regs + offset) |
131 | #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) | 176 | #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) |
132 | 177 | ||
178 | struct vc4_exec_info { | ||
179 | /* Sequence number for this bin/render job. */ | ||
180 | uint64_t seqno; | ||
181 | |||
182 | /* Kernel-space copy of the ioctl arguments */ | ||
183 | struct drm_vc4_submit_cl *args; | ||
184 | |||
185 | /* This is the array of BOs that were looked up at the start of exec. | ||
186 | * Command validation will use indices into this array. | ||
187 | */ | ||
188 | struct drm_gem_cma_object **bo; | ||
189 | uint32_t bo_count; | ||
190 | |||
191 | /* Pointers for our position in vc4->job_list */ | ||
192 | struct list_head head; | ||
193 | |||
194 | /* List of other BOs used in the job that need to be released | ||
195 | * once the job is complete. | ||
196 | */ | ||
197 | struct list_head unref_list; | ||
198 | |||
199 | /* Current unvalidated indices into @bo loaded by the non-hardware | ||
200 | * VC4_PACKET_GEM_HANDLES. | ||
201 | */ | ||
202 | uint32_t bo_index[2]; | ||
203 | |||
204 | /* This is the BO where we store the validated command lists, shader | ||
205 | * records, and uniforms. | ||
206 | */ | ||
207 | struct drm_gem_cma_object *exec_bo; | ||
208 | |||
209 | /** | ||
210 | * This tracks the per-shader-record state (packet 64) that | ||
211 | * determines the length of the shader record and the offset | ||
212 | * it's expected to be found at. It gets read in from the | ||
213 | * command lists. | ||
214 | */ | ||
215 | struct vc4_shader_state { | ||
216 | uint32_t addr; | ||
217 | /* Maximum vertex index referenced by any primitive using this | ||
218 | * shader state. | ||
219 | */ | ||
220 | uint32_t max_index; | ||
221 | } *shader_state; | ||
222 | |||
223 | /** How many shader states the user declared they were using. */ | ||
224 | uint32_t shader_state_size; | ||
225 | /** How many shader state records the validator has seen. */ | ||
226 | uint32_t shader_state_count; | ||
227 | |||
228 | bool found_tile_binning_mode_config_packet; | ||
229 | bool found_start_tile_binning_packet; | ||
230 | bool found_increment_semaphore_packet; | ||
231 | bool found_flush; | ||
232 | uint8_t bin_tiles_x, bin_tiles_y; | ||
233 | struct drm_gem_cma_object *tile_bo; | ||
234 | uint32_t tile_alloc_offset; | ||
235 | |||
236 | /** | ||
237 | * Computed addresses pointing into exec_bo where we start the | ||
238 | * bin thread (ct0) and render thread (ct1). | ||
239 | */ | ||
240 | uint32_t ct0ca, ct0ea; | ||
241 | uint32_t ct1ca, ct1ea; | ||
242 | |||
243 | /* Pointer to the unvalidated bin CL (if present). */ | ||
244 | void *bin_u; | ||
245 | |||
246 | /* Pointers to the shader recs. These paddr gets incremented as CL | ||
247 | * packets are relocated in validate_gl_shader_state, and the vaddrs | ||
248 | * (u and v) get incremented and size decremented as the shader recs | ||
249 | * themselves are validated. | ||
250 | */ | ||
251 | void *shader_rec_u; | ||
252 | void *shader_rec_v; | ||
253 | uint32_t shader_rec_p; | ||
254 | uint32_t shader_rec_size; | ||
255 | |||
256 | /* Pointers to the uniform data. These pointers are incremented, and | ||
257 | * size decremented, as each batch of uniforms is uploaded. | ||
258 | */ | ||
259 | void *uniforms_u; | ||
260 | void *uniforms_v; | ||
261 | uint32_t uniforms_p; | ||
262 | uint32_t uniforms_size; | ||
263 | }; | ||
264 | |||
265 | static inline struct vc4_exec_info * | ||
266 | vc4_first_job(struct vc4_dev *vc4) | ||
267 | { | ||
268 | if (list_empty(&vc4->job_list)) | ||
269 | return NULL; | ||
270 | return list_first_entry(&vc4->job_list, struct vc4_exec_info, head); | ||
271 | } | ||
272 | |||
133 | /** | 273 | /** |
134 | * struct vc4_texture_sample_info - saves the offsets into the UBO for texture | 274 | * struct vc4_texture_sample_info - saves the offsets into the UBO for texture |
135 | * setup parameters. | 275 | * setup parameters. |
@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor); | |||
231 | /* vc4_drv.c */ | 371 | /* vc4_drv.c */ |
232 | void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); | 372 | void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); |
233 | 373 | ||
374 | /* vc4_gem.c */ | ||
375 | void vc4_gem_init(struct drm_device *dev); | ||
376 | void vc4_gem_destroy(struct drm_device *dev); | ||
377 | int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | ||
378 | struct drm_file *file_priv); | ||
379 | int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, | ||
380 | struct drm_file *file_priv); | ||
381 | int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, | ||
382 | struct drm_file *file_priv); | ||
383 | void vc4_submit_next_job(struct drm_device *dev); | ||
384 | int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, | ||
385 | uint64_t timeout_ns, bool interruptible); | ||
386 | void vc4_job_handle_completed(struct vc4_dev *vc4); | ||
387 | |||
234 | /* vc4_hdmi.c */ | 388 | /* vc4_hdmi.c */ |
235 | extern struct platform_driver vc4_hdmi_driver; | 389 | extern struct platform_driver vc4_hdmi_driver; |
236 | int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); | 390 | int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); |
237 | 391 | ||
392 | /* vc4_irq.c */ | ||
393 | irqreturn_t vc4_irq(int irq, void *arg); | ||
394 | void vc4_irq_preinstall(struct drm_device *dev); | ||
395 | int vc4_irq_postinstall(struct drm_device *dev); | ||
396 | void vc4_irq_uninstall(struct drm_device *dev); | ||
397 | void vc4_irq_reset(struct drm_device *dev); | ||
398 | |||
238 | /* vc4_hvs.c */ | 399 | /* vc4_hvs.c */ |
239 | extern struct platform_driver vc4_hvs_driver; | 400 | extern struct platform_driver vc4_hvs_driver; |
240 | void vc4_hvs_dump_state(struct drm_device *dev); | 401 | void vc4_hvs_dump_state(struct drm_device *dev); |
@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state); | |||
253 | extern struct platform_driver vc4_v3d_driver; | 414 | extern struct platform_driver vc4_v3d_driver; |
254 | int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); | 415 | int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); |
255 | int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); | 416 | int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); |
417 | int vc4_v3d_set_power(struct vc4_dev *vc4, bool on); | ||
418 | |||
419 | /* vc4_validate.c */ | ||
420 | int | ||
421 | vc4_validate_bin_cl(struct drm_device *dev, | ||
422 | void *validated, | ||
423 | void *unvalidated, | ||
424 | struct vc4_exec_info *exec); | ||
425 | |||
426 | int | ||
427 | vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); | ||
428 | |||
429 | struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec, | ||
430 | uint32_t hindex); | ||
431 | |||
432 | int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); | ||
433 | |||
434 | bool vc4_check_tex_size(struct vc4_exec_info *exec, | ||
435 | struct drm_gem_cma_object *fbo, | ||
436 | uint32_t offset, uint8_t tiling_format, | ||
437 | uint32_t width, uint32_t height, uint8_t cpp); | ||
256 | 438 | ||
257 | /* vc4_validate_shader.c */ | 439 | /* vc4_validate_shader.c */ |
258 | struct vc4_validated_shader_info * | 440 | struct vc4_validated_shader_info * |
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c new file mode 100644 index 000000000000..936dddfa890f --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_gem.c | |||
@@ -0,0 +1,642 @@ | |||
1 | /* | ||
2 | * Copyright © 2014 Broadcom | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/platform_device.h> | ||
26 | #include <linux/device.h> | ||
27 | #include <linux/io.h> | ||
28 | |||
29 | #include "uapi/drm/vc4_drm.h" | ||
30 | #include "vc4_drv.h" | ||
31 | #include "vc4_regs.h" | ||
32 | #include "vc4_trace.h" | ||
33 | |||
34 | static void | ||
35 | vc4_queue_hangcheck(struct drm_device *dev) | ||
36 | { | ||
37 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
38 | |||
39 | mod_timer(&vc4->hangcheck.timer, | ||
40 | round_jiffies_up(jiffies + msecs_to_jiffies(100))); | ||
41 | } | ||
42 | |||
43 | static void | ||
44 | vc4_reset(struct drm_device *dev) | ||
45 | { | ||
46 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
47 | |||
48 | DRM_INFO("Resetting GPU.\n"); | ||
49 | vc4_v3d_set_power(vc4, false); | ||
50 | vc4_v3d_set_power(vc4, true); | ||
51 | |||
52 | vc4_irq_reset(dev); | ||
53 | |||
54 | /* Rearm the hangcheck -- another job might have been waiting | ||
55 | * for our hung one to get kicked off, and vc4_irq_reset() | ||
56 | * would have started it. | ||
57 | */ | ||
58 | vc4_queue_hangcheck(dev); | ||
59 | } | ||
60 | |||
61 | static void | ||
62 | vc4_reset_work(struct work_struct *work) | ||
63 | { | ||
64 | struct vc4_dev *vc4 = | ||
65 | container_of(work, struct vc4_dev, hangcheck.reset_work); | ||
66 | |||
67 | vc4_reset(vc4->dev); | ||
68 | } | ||
69 | |||
70 | static void | ||
71 | vc4_hangcheck_elapsed(unsigned long data) | ||
72 | { | ||
73 | struct drm_device *dev = (struct drm_device *)data; | ||
74 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
75 | uint32_t ct0ca, ct1ca; | ||
76 | |||
77 | /* If idle, we can stop watching for hangs. */ | ||
78 | if (list_empty(&vc4->job_list)) | ||
79 | return; | ||
80 | |||
81 | ct0ca = V3D_READ(V3D_CTNCA(0)); | ||
82 | ct1ca = V3D_READ(V3D_CTNCA(1)); | ||
83 | |||
84 | /* If we've made any progress in execution, rearm the timer | ||
85 | * and wait. | ||
86 | */ | ||
87 | if (ct0ca != vc4->hangcheck.last_ct0ca || | ||
88 | ct1ca != vc4->hangcheck.last_ct1ca) { | ||
89 | vc4->hangcheck.last_ct0ca = ct0ca; | ||
90 | vc4->hangcheck.last_ct1ca = ct1ca; | ||
91 | vc4_queue_hangcheck(dev); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | /* We've gone too long with no progress, reset. This has to | ||
96 | * be done from a work struct, since resetting can sleep and | ||
97 | * this timer hook isn't allowed to. | ||
98 | */ | ||
99 | schedule_work(&vc4->hangcheck.reset_work); | ||
100 | } | ||
101 | |||
102 | static void | ||
103 | submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) | ||
104 | { | ||
105 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
106 | |||
107 | /* Set the current and end address of the control list. | ||
108 | * Writing the end register is what starts the job. | ||
109 | */ | ||
110 | V3D_WRITE(V3D_CTNCA(thread), start); | ||
111 | V3D_WRITE(V3D_CTNEA(thread), end); | ||
112 | } | ||
113 | |||
114 | int | ||
115 | vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, | ||
116 | bool interruptible) | ||
117 | { | ||
118 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
119 | int ret = 0; | ||
120 | unsigned long timeout_expire; | ||
121 | DEFINE_WAIT(wait); | ||
122 | |||
123 | if (vc4->finished_seqno >= seqno) | ||
124 | return 0; | ||
125 | |||
126 | if (timeout_ns == 0) | ||
127 | return -ETIME; | ||
128 | |||
129 | timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); | ||
130 | |||
131 | trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); | ||
132 | for (;;) { | ||
133 | prepare_to_wait(&vc4->job_wait_queue, &wait, | ||
134 | interruptible ? TASK_INTERRUPTIBLE : | ||
135 | TASK_UNINTERRUPTIBLE); | ||
136 | |||
137 | if (interruptible && signal_pending(current)) { | ||
138 | ret = -ERESTARTSYS; | ||
139 | break; | ||
140 | } | ||
141 | |||
142 | if (vc4->finished_seqno >= seqno) | ||
143 | break; | ||
144 | |||
145 | if (timeout_ns != ~0ull) { | ||
146 | if (time_after_eq(jiffies, timeout_expire)) { | ||
147 | ret = -ETIME; | ||
148 | break; | ||
149 | } | ||
150 | schedule_timeout(timeout_expire - jiffies); | ||
151 | } else { | ||
152 | schedule(); | ||
153 | } | ||
154 | } | ||
155 | |||
156 | finish_wait(&vc4->job_wait_queue, &wait); | ||
157 | trace_vc4_wait_for_seqno_end(dev, seqno); | ||
158 | |||
159 | if (ret && ret != -ERESTARTSYS) { | ||
160 | DRM_ERROR("timeout waiting for render thread idle\n"); | ||
161 | return ret; | ||
162 | } | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void | ||
168 | vc4_flush_caches(struct drm_device *dev) | ||
169 | { | ||
170 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
171 | |||
172 | /* Flush the GPU L2 caches. These caches sit on top of system | ||
173 | * L3 (the 128kb or so shared with the CPU), and are | ||
174 | * non-allocating in the L3. | ||
175 | */ | ||
176 | V3D_WRITE(V3D_L2CACTL, | ||
177 | V3D_L2CACTL_L2CCLR); | ||
178 | |||
179 | V3D_WRITE(V3D_SLCACTL, | ||
180 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | | ||
181 | VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | | ||
182 | VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | | ||
183 | VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); | ||
184 | } | ||
185 | |||
186 | /* Sets the registers for the next job to be actually be executed in | ||
187 | * the hardware. | ||
188 | * | ||
189 | * The job_lock should be held during this. | ||
190 | */ | ||
191 | void | ||
192 | vc4_submit_next_job(struct drm_device *dev) | ||
193 | { | ||
194 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
195 | struct vc4_exec_info *exec = vc4_first_job(vc4); | ||
196 | |||
197 | if (!exec) | ||
198 | return; | ||
199 | |||
200 | vc4_flush_caches(dev); | ||
201 | |||
202 | /* Disable the binner's pre-loaded overflow memory address */ | ||
203 | V3D_WRITE(V3D_BPOA, 0); | ||
204 | V3D_WRITE(V3D_BPOS, 0); | ||
205 | |||
206 | if (exec->ct0ca != exec->ct0ea) | ||
207 | submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); | ||
208 | submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); | ||
209 | } | ||
210 | |||
211 | static void | ||
212 | vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | ||
213 | { | ||
214 | struct vc4_bo *bo; | ||
215 | unsigned i; | ||
216 | |||
217 | for (i = 0; i < exec->bo_count; i++) { | ||
218 | bo = to_vc4_bo(&exec->bo[i]->base); | ||
219 | bo->seqno = seqno; | ||
220 | } | ||
221 | |||
222 | list_for_each_entry(bo, &exec->unref_list, unref_head) { | ||
223 | bo->seqno = seqno; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | /* Queues a struct vc4_exec_info for execution. If no job is | ||
228 | * currently executing, then submits it. | ||
229 | * | ||
230 | * Unlike most GPUs, our hardware only handles one command list at a | ||
231 | * time. To queue multiple jobs at once, we'd need to edit the | ||
232 | * previous command list to have a jump to the new one at the end, and | ||
233 | * then bump the end address. That's a change for a later date, | ||
234 | * though. | ||
235 | */ | ||
236 | static void | ||
237 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) | ||
238 | { | ||
239 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
240 | uint64_t seqno; | ||
241 | unsigned long irqflags; | ||
242 | |||
243 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
244 | |||
245 | seqno = ++vc4->emit_seqno; | ||
246 | exec->seqno = seqno; | ||
247 | vc4_update_bo_seqnos(exec, seqno); | ||
248 | |||
249 | list_add_tail(&exec->head, &vc4->job_list); | ||
250 | |||
251 | /* If no job was executing, kick ours off. Otherwise, it'll | ||
252 | * get started when the previous job's frame done interrupt | ||
253 | * occurs. | ||
254 | */ | ||
255 | if (vc4_first_job(vc4) == exec) { | ||
256 | vc4_submit_next_job(dev); | ||
257 | vc4_queue_hangcheck(dev); | ||
258 | } | ||
259 | |||
260 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
261 | } | ||
262 | |||
263 | /** | ||
264 | * Looks up a bunch of GEM handles for BOs and stores the array for | ||
265 | * use in the command validator that actually writes relocated | ||
266 | * addresses pointing to them. | ||
267 | */ | ||
268 | static int | ||
269 | vc4_cl_lookup_bos(struct drm_device *dev, | ||
270 | struct drm_file *file_priv, | ||
271 | struct vc4_exec_info *exec) | ||
272 | { | ||
273 | struct drm_vc4_submit_cl *args = exec->args; | ||
274 | uint32_t *handles; | ||
275 | int ret = 0; | ||
276 | int i; | ||
277 | |||
278 | exec->bo_count = args->bo_handle_count; | ||
279 | |||
280 | if (!exec->bo_count) { | ||
281 | /* See comment on bo_index for why we have to check | ||
282 | * this. | ||
283 | */ | ||
284 | DRM_ERROR("Rendering requires BOs to validate\n"); | ||
285 | return -EINVAL; | ||
286 | } | ||
287 | |||
288 | exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), | ||
289 | GFP_KERNEL); | ||
290 | if (!exec->bo) { | ||
291 | DRM_ERROR("Failed to allocate validated BO pointers\n"); | ||
292 | return -ENOMEM; | ||
293 | } | ||
294 | |||
295 | handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); | ||
296 | if (!handles) { | ||
297 | DRM_ERROR("Failed to allocate incoming GEM handles\n"); | ||
298 | goto fail; | ||
299 | } | ||
300 | |||
301 | ret = copy_from_user(handles, | ||
302 | (void __user *)(uintptr_t)args->bo_handles, | ||
303 | exec->bo_count * sizeof(uint32_t)); | ||
304 | if (ret) { | ||
305 | DRM_ERROR("Failed to copy in GEM handles\n"); | ||
306 | goto fail; | ||
307 | } | ||
308 | |||
309 | spin_lock(&file_priv->table_lock); | ||
310 | for (i = 0; i < exec->bo_count; i++) { | ||
311 | struct drm_gem_object *bo = idr_find(&file_priv->object_idr, | ||
312 | handles[i]); | ||
313 | if (!bo) { | ||
314 | DRM_ERROR("Failed to look up GEM BO %d: %d\n", | ||
315 | i, handles[i]); | ||
316 | ret = -EINVAL; | ||
317 | spin_unlock(&file_priv->table_lock); | ||
318 | goto fail; | ||
319 | } | ||
320 | drm_gem_object_reference(bo); | ||
321 | exec->bo[i] = (struct drm_gem_cma_object *)bo; | ||
322 | } | ||
323 | spin_unlock(&file_priv->table_lock); | ||
324 | |||
325 | fail: | ||
326 | kfree(handles); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static int | ||
331 | vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) | ||
332 | { | ||
333 | struct drm_vc4_submit_cl *args = exec->args; | ||
334 | void *temp = NULL; | ||
335 | void *bin; | ||
336 | int ret = 0; | ||
337 | uint32_t bin_offset = 0; | ||
338 | uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, | ||
339 | 16); | ||
340 | uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; | ||
341 | uint32_t exec_size = uniforms_offset + args->uniforms_size; | ||
342 | uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * | ||
343 | args->shader_rec_count); | ||
344 | struct vc4_bo *bo; | ||
345 | |||
346 | if (uniforms_offset < shader_rec_offset || | ||
347 | exec_size < uniforms_offset || | ||
348 | args->shader_rec_count >= (UINT_MAX / | ||
349 | sizeof(struct vc4_shader_state)) || | ||
350 | temp_size < exec_size) { | ||
351 | DRM_ERROR("overflow in exec arguments\n"); | ||
352 | goto fail; | ||
353 | } | ||
354 | |||
355 | /* Allocate space where we'll store the copied in user command lists | ||
356 | * and shader records. | ||
357 | * | ||
358 | * We don't just copy directly into the BOs because we need to | ||
359 | * read the contents back for validation, and I think the | ||
360 | * bo->vaddr is uncached access. | ||
361 | */ | ||
362 | temp = kmalloc(temp_size, GFP_KERNEL); | ||
363 | if (!temp) { | ||
364 | DRM_ERROR("Failed to allocate storage for copying " | ||
365 | "in bin/render CLs.\n"); | ||
366 | ret = -ENOMEM; | ||
367 | goto fail; | ||
368 | } | ||
369 | bin = temp + bin_offset; | ||
370 | exec->shader_rec_u = temp + shader_rec_offset; | ||
371 | exec->uniforms_u = temp + uniforms_offset; | ||
372 | exec->shader_state = temp + exec_size; | ||
373 | exec->shader_state_size = args->shader_rec_count; | ||
374 | |||
375 | ret = copy_from_user(bin, | ||
376 | (void __user *)(uintptr_t)args->bin_cl, | ||
377 | args->bin_cl_size); | ||
378 | if (ret) { | ||
379 | DRM_ERROR("Failed to copy in bin cl\n"); | ||
380 | goto fail; | ||
381 | } | ||
382 | |||
383 | ret = copy_from_user(exec->shader_rec_u, | ||
384 | (void __user *)(uintptr_t)args->shader_rec, | ||
385 | args->shader_rec_size); | ||
386 | if (ret) { | ||
387 | DRM_ERROR("Failed to copy in shader recs\n"); | ||
388 | goto fail; | ||
389 | } | ||
390 | |||
391 | ret = copy_from_user(exec->uniforms_u, | ||
392 | (void __user *)(uintptr_t)args->uniforms, | ||
393 | args->uniforms_size); | ||
394 | if (ret) { | ||
395 | DRM_ERROR("Failed to copy in uniforms cl\n"); | ||
396 | goto fail; | ||
397 | } | ||
398 | |||
399 | bo = vc4_bo_create(dev, exec_size, true); | ||
400 | if (!bo) { | ||
401 | DRM_ERROR("Couldn't allocate BO for binning\n"); | ||
402 | ret = PTR_ERR(exec->exec_bo); | ||
403 | goto fail; | ||
404 | } | ||
405 | exec->exec_bo = &bo->base; | ||
406 | |||
407 | list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, | ||
408 | &exec->unref_list); | ||
409 | |||
410 | exec->ct0ca = exec->exec_bo->paddr + bin_offset; | ||
411 | |||
412 | exec->bin_u = bin; | ||
413 | |||
414 | exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; | ||
415 | exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; | ||
416 | exec->shader_rec_size = args->shader_rec_size; | ||
417 | |||
418 | exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; | ||
419 | exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; | ||
420 | exec->uniforms_size = args->uniforms_size; | ||
421 | |||
422 | ret = vc4_validate_bin_cl(dev, | ||
423 | exec->exec_bo->vaddr + bin_offset, | ||
424 | bin, | ||
425 | exec); | ||
426 | if (ret) | ||
427 | goto fail; | ||
428 | |||
429 | ret = vc4_validate_shader_recs(dev, exec); | ||
430 | |||
431 | fail: | ||
432 | kfree(temp); | ||
433 | return ret; | ||
434 | } | ||
435 | |||
436 | static void | ||
437 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | ||
438 | { | ||
439 | unsigned i; | ||
440 | |||
441 | /* Need the struct lock for drm_gem_object_unreference(). */ | ||
442 | mutex_lock(&dev->struct_mutex); | ||
443 | if (exec->bo) { | ||
444 | for (i = 0; i < exec->bo_count; i++) | ||
445 | drm_gem_object_unreference(&exec->bo[i]->base); | ||
446 | kfree(exec->bo); | ||
447 | } | ||
448 | |||
449 | while (!list_empty(&exec->unref_list)) { | ||
450 | struct vc4_bo *bo = list_first_entry(&exec->unref_list, | ||
451 | struct vc4_bo, unref_head); | ||
452 | list_del(&bo->unref_head); | ||
453 | drm_gem_object_unreference(&bo->base.base); | ||
454 | } | ||
455 | mutex_unlock(&dev->struct_mutex); | ||
456 | |||
457 | kfree(exec); | ||
458 | } | ||
459 | |||
460 | void | ||
461 | vc4_job_handle_completed(struct vc4_dev *vc4) | ||
462 | { | ||
463 | unsigned long irqflags; | ||
464 | |||
465 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
466 | while (!list_empty(&vc4->job_done_list)) { | ||
467 | struct vc4_exec_info *exec = | ||
468 | list_first_entry(&vc4->job_done_list, | ||
469 | struct vc4_exec_info, head); | ||
470 | list_del(&exec->head); | ||
471 | |||
472 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
473 | vc4_complete_exec(vc4->dev, exec); | ||
474 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
475 | } | ||
476 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
477 | } | ||
478 | |||
479 | /* Scheduled when any job has been completed, this walks the list of | ||
480 | * jobs that had completed and unrefs their BOs and frees their exec | ||
481 | * structs. | ||
482 | */ | ||
483 | static void | ||
484 | vc4_job_done_work(struct work_struct *work) | ||
485 | { | ||
486 | struct vc4_dev *vc4 = | ||
487 | container_of(work, struct vc4_dev, job_done_work); | ||
488 | |||
489 | vc4_job_handle_completed(vc4); | ||
490 | } | ||
491 | |||
492 | static int | ||
493 | vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, | ||
494 | uint64_t seqno, | ||
495 | uint64_t *timeout_ns) | ||
496 | { | ||
497 | unsigned long start = jiffies; | ||
498 | int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); | ||
499 | |||
500 | if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { | ||
501 | uint64_t delta = jiffies_to_nsecs(jiffies - start); | ||
502 | |||
503 | if (*timeout_ns >= delta) | ||
504 | *timeout_ns -= delta; | ||
505 | } | ||
506 | |||
507 | return ret; | ||
508 | } | ||
509 | |||
510 | int | ||
511 | vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, | ||
512 | struct drm_file *file_priv) | ||
513 | { | ||
514 | struct drm_vc4_wait_seqno *args = data; | ||
515 | |||
516 | return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, | ||
517 | &args->timeout_ns); | ||
518 | } | ||
519 | |||
520 | int | ||
521 | vc4_wait_bo_ioctl(struct drm_device *dev, void *data, | ||
522 | struct drm_file *file_priv) | ||
523 | { | ||
524 | int ret; | ||
525 | struct drm_vc4_wait_bo *args = data; | ||
526 | struct drm_gem_object *gem_obj; | ||
527 | struct vc4_bo *bo; | ||
528 | |||
529 | gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); | ||
530 | if (!gem_obj) { | ||
531 | DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); | ||
532 | return -EINVAL; | ||
533 | } | ||
534 | bo = to_vc4_bo(gem_obj); | ||
535 | |||
536 | ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, | ||
537 | &args->timeout_ns); | ||
538 | |||
539 | drm_gem_object_unreference_unlocked(gem_obj); | ||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | /** | ||
544 | * Submits a command list to the VC4. | ||
545 | * | ||
546 | * This is what is called batchbuffer emitting on other hardware. | ||
547 | */ | ||
548 | int | ||
549 | vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | ||
550 | struct drm_file *file_priv) | ||
551 | { | ||
552 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
553 | struct drm_vc4_submit_cl *args = data; | ||
554 | struct vc4_exec_info *exec; | ||
555 | int ret; | ||
556 | |||
557 | if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { | ||
558 | DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); | ||
559 | return -EINVAL; | ||
560 | } | ||
561 | |||
562 | exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); | ||
563 | if (!exec) { | ||
564 | DRM_ERROR("malloc failure on exec struct\n"); | ||
565 | return -ENOMEM; | ||
566 | } | ||
567 | |||
568 | exec->args = args; | ||
569 | INIT_LIST_HEAD(&exec->unref_list); | ||
570 | |||
571 | ret = vc4_cl_lookup_bos(dev, file_priv, exec); | ||
572 | if (ret) | ||
573 | goto fail; | ||
574 | |||
575 | if (exec->args->bin_cl_size != 0) { | ||
576 | ret = vc4_get_bcl(dev, exec); | ||
577 | if (ret) | ||
578 | goto fail; | ||
579 | } else { | ||
580 | exec->ct0ca = 0; | ||
581 | exec->ct0ea = 0; | ||
582 | } | ||
583 | |||
584 | ret = vc4_get_rcl(dev, exec); | ||
585 | if (ret) | ||
586 | goto fail; | ||
587 | |||
588 | /* Clear this out of the struct we'll be putting in the queue, | ||
589 | * since it's part of our stack. | ||
590 | */ | ||
591 | exec->args = NULL; | ||
592 | |||
593 | vc4_queue_submit(dev, exec); | ||
594 | |||
595 | /* Return the seqno for our job. */ | ||
596 | args->seqno = vc4->emit_seqno; | ||
597 | |||
598 | return 0; | ||
599 | |||
600 | fail: | ||
601 | vc4_complete_exec(vc4->dev, exec); | ||
602 | |||
603 | return ret; | ||
604 | } | ||
605 | |||
606 | void | ||
607 | vc4_gem_init(struct drm_device *dev) | ||
608 | { | ||
609 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
610 | |||
611 | INIT_LIST_HEAD(&vc4->job_list); | ||
612 | INIT_LIST_HEAD(&vc4->job_done_list); | ||
613 | spin_lock_init(&vc4->job_lock); | ||
614 | |||
615 | INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); | ||
616 | setup_timer(&vc4->hangcheck.timer, | ||
617 | vc4_hangcheck_elapsed, | ||
618 | (unsigned long)dev); | ||
619 | |||
620 | INIT_WORK(&vc4->job_done_work, vc4_job_done_work); | ||
621 | } | ||
622 | |||
623 | void | ||
624 | vc4_gem_destroy(struct drm_device *dev) | ||
625 | { | ||
626 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
627 | |||
628 | /* Waiting for exec to finish would need to be done before | ||
629 | * unregistering V3D. | ||
630 | */ | ||
631 | WARN_ON(vc4->emit_seqno != vc4->finished_seqno); | ||
632 | |||
633 | /* V3D should already have disabled its interrupt and cleared | ||
634 | * the overflow allocation registers. Now free the object. | ||
635 | */ | ||
636 | if (vc4->overflow_mem) { | ||
637 | drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); | ||
638 | vc4->overflow_mem = NULL; | ||
639 | } | ||
640 | |||
641 | vc4_bo_cache_destroy(dev); | ||
642 | } | ||
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c new file mode 100644 index 000000000000..b68060e758db --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_irq.c | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | * Copyright © 2014 Broadcom | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | /** DOC: Interrupt management for the V3D engine. | ||
25 | * | ||
26 | * We have an interrupt status register (V3D_INTCTL) which reports | ||
27 | * interrupts, and where writing 1 bits clears those interrupts. | ||
28 | * There are also a pair of interrupt registers | ||
29 | * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or | ||
30 | * disables that specific interrupt, and 0s written are ignored | ||
31 | * (reading either one returns the set of enabled interrupts). | ||
32 | * | ||
33 | * When we take a render frame interrupt, we need to wake the | ||
34 | * processes waiting for some frame to be done, and get the next frame | ||
35 | * submitted ASAP (so the hardware doesn't sit idle when there's work | ||
36 | * to do). | ||
37 | * | ||
38 | * When we take the binner out of memory interrupt, we need to | ||
39 | * allocate some new memory and pass it to the binner so that the | ||
40 | * current job can make progress. | ||
41 | */ | ||
42 | |||
43 | #include "vc4_drv.h" | ||
44 | #include "vc4_regs.h" | ||
45 | |||
46 | #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ | ||
47 | V3D_INT_FRDONE) | ||
48 | |||
49 | DECLARE_WAIT_QUEUE_HEAD(render_wait); | ||
50 | |||
51 | static void | ||
52 | vc4_overflow_mem_work(struct work_struct *work) | ||
53 | { | ||
54 | struct vc4_dev *vc4 = | ||
55 | container_of(work, struct vc4_dev, overflow_mem_work); | ||
56 | struct drm_device *dev = vc4->dev; | ||
57 | struct vc4_bo *bo; | ||
58 | |||
59 | bo = vc4_bo_create(dev, 256 * 1024, true); | ||
60 | if (!bo) { | ||
61 | DRM_ERROR("Couldn't allocate binner overflow mem\n"); | ||
62 | return; | ||
63 | } | ||
64 | |||
65 | /* If there's a job executing currently, then our previous | ||
66 | * overflow allocation is getting used in that job and we need | ||
67 | * to queue it to be released when the job is done. But if no | ||
68 | * job is executing at all, then we can free the old overflow | ||
69 | * object direcctly. | ||
70 | * | ||
71 | * No lock necessary for this pointer since we're the only | ||
72 | * ones that update the pointer, and our workqueue won't | ||
73 | * reenter. | ||
74 | */ | ||
75 | if (vc4->overflow_mem) { | ||
76 | struct vc4_exec_info *current_exec; | ||
77 | unsigned long irqflags; | ||
78 | |||
79 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
80 | current_exec = vc4_first_job(vc4); | ||
81 | if (current_exec) { | ||
82 | vc4->overflow_mem->seqno = vc4->finished_seqno + 1; | ||
83 | list_add_tail(&vc4->overflow_mem->unref_head, | ||
84 | ¤t_exec->unref_list); | ||
85 | vc4->overflow_mem = NULL; | ||
86 | } | ||
87 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
88 | } | ||
89 | |||
90 | if (vc4->overflow_mem) | ||
91 | drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); | ||
92 | vc4->overflow_mem = bo; | ||
93 | |||
94 | V3D_WRITE(V3D_BPOA, bo->base.paddr); | ||
95 | V3D_WRITE(V3D_BPOS, bo->base.base.size); | ||
96 | V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM); | ||
97 | V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); | ||
98 | } | ||
99 | |||
100 | static void | ||
101 | vc4_irq_finish_job(struct drm_device *dev) | ||
102 | { | ||
103 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
104 | struct vc4_exec_info *exec = vc4_first_job(vc4); | ||
105 | |||
106 | if (!exec) | ||
107 | return; | ||
108 | |||
109 | vc4->finished_seqno++; | ||
110 | list_move_tail(&exec->head, &vc4->job_done_list); | ||
111 | vc4_submit_next_job(dev); | ||
112 | |||
113 | wake_up_all(&vc4->job_wait_queue); | ||
114 | schedule_work(&vc4->job_done_work); | ||
115 | } | ||
116 | |||
117 | irqreturn_t | ||
118 | vc4_irq(int irq, void *arg) | ||
119 | { | ||
120 | struct drm_device *dev = arg; | ||
121 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
122 | uint32_t intctl; | ||
123 | irqreturn_t status = IRQ_NONE; | ||
124 | |||
125 | barrier(); | ||
126 | intctl = V3D_READ(V3D_INTCTL); | ||
127 | |||
128 | /* Acknowledge the interrupts we're handling here. The render | ||
129 | * frame done interrupt will be cleared, while OUTOMEM will | ||
130 | * stay high until the underlying cause is cleared. | ||
131 | */ | ||
132 | V3D_WRITE(V3D_INTCTL, intctl); | ||
133 | |||
134 | if (intctl & V3D_INT_OUTOMEM) { | ||
135 | /* Disable OUTOMEM until the work is done. */ | ||
136 | V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM); | ||
137 | schedule_work(&vc4->overflow_mem_work); | ||
138 | status = IRQ_HANDLED; | ||
139 | } | ||
140 | |||
141 | if (intctl & V3D_INT_FRDONE) { | ||
142 | spin_lock(&vc4->job_lock); | ||
143 | vc4_irq_finish_job(dev); | ||
144 | spin_unlock(&vc4->job_lock); | ||
145 | status = IRQ_HANDLED; | ||
146 | } | ||
147 | |||
148 | return status; | ||
149 | } | ||
150 | |||
151 | void | ||
152 | vc4_irq_preinstall(struct drm_device *dev) | ||
153 | { | ||
154 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
155 | |||
156 | init_waitqueue_head(&vc4->job_wait_queue); | ||
157 | INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work); | ||
158 | |||
159 | /* Clear any pending interrupts someone might have left around | ||
160 | * for us. | ||
161 | */ | ||
162 | V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); | ||
163 | } | ||
164 | |||
165 | int | ||
166 | vc4_irq_postinstall(struct drm_device *dev) | ||
167 | { | ||
168 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
169 | |||
170 | /* Enable both the render done and out of memory interrupts. */ | ||
171 | V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | void | ||
177 | vc4_irq_uninstall(struct drm_device *dev) | ||
178 | { | ||
179 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
180 | |||
181 | /* Disable sending interrupts for our driver's IRQs. */ | ||
182 | V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS); | ||
183 | |||
184 | /* Clear any pending interrupts we might have left. */ | ||
185 | V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); | ||
186 | |||
187 | cancel_work_sync(&vc4->overflow_mem_work); | ||
188 | } | ||
189 | |||
190 | /** Reinitializes interrupt registers when a GPU reset is performed. */ | ||
191 | void vc4_irq_reset(struct drm_device *dev) | ||
192 | { | ||
193 | struct vc4_dev *vc4 = to_vc4_dev(dev); | ||
194 | unsigned long irqflags; | ||
195 | |||
196 | /* Acknowledge any stale IRQs. */ | ||
197 | V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); | ||
198 | |||
199 | /* | ||
200 | * Turn all our interrupts on. Binner out of memory is the | ||
201 | * only one we expect to trigger at this point, since we've | ||
202 | * just come from poweron and haven't supplied any overflow | ||
203 | * memory yet. | ||
204 | */ | ||
205 | V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); | ||
206 | |||
207 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
208 | vc4_irq_finish_job(dev); | ||
209 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
210 | } | ||
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h new file mode 100644 index 000000000000..0f31cc06500f --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_packet.h | |||
@@ -0,0 +1,399 @@ | |||
1 | /* | ||
2 | * Copyright © 2014 Broadcom | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | #ifndef VC4_PACKET_H | ||
25 | #define VC4_PACKET_H | ||
26 | |||
27 | #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */ | ||
28 | |||
29 | enum vc4_packet { | ||
30 | VC4_PACKET_HALT = 0, | ||
31 | VC4_PACKET_NOP = 1, | ||
32 | |||
33 | VC4_PACKET_FLUSH = 4, | ||
34 | VC4_PACKET_FLUSH_ALL = 5, | ||
35 | VC4_PACKET_START_TILE_BINNING = 6, | ||
36 | VC4_PACKET_INCREMENT_SEMAPHORE = 7, | ||
37 | VC4_PACKET_WAIT_ON_SEMAPHORE = 8, | ||
38 | |||
39 | VC4_PACKET_BRANCH = 16, | ||
40 | VC4_PACKET_BRANCH_TO_SUB_LIST = 17, | ||
41 | |||
42 | VC4_PACKET_STORE_MS_TILE_BUFFER = 24, | ||
43 | VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, | ||
44 | VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, | ||
45 | VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, | ||
46 | VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, | ||
47 | VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, | ||
48 | |||
49 | VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, | ||
50 | VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, | ||
51 | |||
52 | VC4_PACKET_COMPRESSED_PRIMITIVE = 48, | ||
53 | VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, | ||
54 | |||
55 | VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, | ||
56 | |||
57 | VC4_PACKET_GL_SHADER_STATE = 64, | ||
58 | VC4_PACKET_NV_SHADER_STATE = 65, | ||
59 | VC4_PACKET_VG_SHADER_STATE = 66, | ||
60 | |||
61 | VC4_PACKET_CONFIGURATION_BITS = 96, | ||
62 | VC4_PACKET_FLAT_SHADE_FLAGS = 97, | ||
63 | VC4_PACKET_POINT_SIZE = 98, | ||
64 | VC4_PACKET_LINE_WIDTH = 99, | ||
65 | VC4_PACKET_RHT_X_BOUNDARY = 100, | ||
66 | VC4_PACKET_DEPTH_OFFSET = 101, | ||
67 | VC4_PACKET_CLIP_WINDOW = 102, | ||
68 | VC4_PACKET_VIEWPORT_OFFSET = 103, | ||
69 | VC4_PACKET_Z_CLIPPING = 104, | ||
70 | VC4_PACKET_CLIPPER_XY_SCALING = 105, | ||
71 | VC4_PACKET_CLIPPER_Z_SCALING = 106, | ||
72 | |||
73 | VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, | ||
74 | VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, | ||
75 | VC4_PACKET_CLEAR_COLORS = 114, | ||
76 | VC4_PACKET_TILE_COORDINATES = 115, | ||
77 | |||
78 | /* Not an actual hardware packet -- this is what we use to put | ||
79 | * references to GEM bos in the command stream, since we need the u32 | ||
80 | * int the actual address packet in order to store the offset from the | ||
81 | * start of the BO. | ||
82 | */ | ||
83 | VC4_PACKET_GEM_HANDLES = 254, | ||
84 | } __attribute__ ((__packed__)); | ||
85 | |||
86 | #define VC4_PACKET_HALT_SIZE 1 | ||
87 | #define VC4_PACKET_NOP_SIZE 1 | ||
88 | #define VC4_PACKET_FLUSH_SIZE 1 | ||
89 | #define VC4_PACKET_FLUSH_ALL_SIZE 1 | ||
90 | #define VC4_PACKET_START_TILE_BINNING_SIZE 1 | ||
91 | #define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 | ||
92 | #define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 | ||
93 | #define VC4_PACKET_BRANCH_SIZE 5 | ||
94 | #define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 | ||
95 | #define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 | ||
96 | #define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 | ||
97 | #define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5 | ||
98 | #define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5 | ||
99 | #define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 | ||
100 | #define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 | ||
101 | #define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 | ||
102 | #define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 | ||
103 | #define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1 | ||
104 | #define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1 | ||
105 | #define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 | ||
106 | #define VC4_PACKET_GL_SHADER_STATE_SIZE 5 | ||
107 | #define VC4_PACKET_NV_SHADER_STATE_SIZE 5 | ||
108 | #define VC4_PACKET_VG_SHADER_STATE_SIZE 5 | ||
109 | #define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 | ||
110 | #define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 | ||
111 | #define VC4_PACKET_POINT_SIZE_SIZE 5 | ||
112 | #define VC4_PACKET_LINE_WIDTH_SIZE 5 | ||
113 | #define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 | ||
114 | #define VC4_PACKET_DEPTH_OFFSET_SIZE 5 | ||
115 | #define VC4_PACKET_CLIP_WINDOW_SIZE 9 | ||
116 | #define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 | ||
117 | #define VC4_PACKET_Z_CLIPPING_SIZE 9 | ||
118 | #define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 | ||
119 | #define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 | ||
120 | #define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 | ||
121 | #define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 | ||
122 | #define VC4_PACKET_CLEAR_COLORS_SIZE 14 | ||
123 | #define VC4_PACKET_TILE_COORDINATES_SIZE 3 | ||
124 | #define VC4_PACKET_GEM_HANDLES_SIZE 9 | ||
125 | |||
126 | /* Number of multisamples supported. */ | ||
127 | #define VC4_MAX_SAMPLES 4 | ||
128 | /* Size of a full resolution color or Z tile buffer load/store. */ | ||
129 | #define VC4_TILE_BUFFER_SIZE (64 * 64 * 4) | ||
130 | |||
131 | /** @{ | ||
132 | * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and | ||
133 | * VC4_PACKET_TILE_RENDERING_MODE_CONFIG. | ||
134 | */ | ||
135 | #define VC4_TILING_FORMAT_LINEAR 0 | ||
136 | #define VC4_TILING_FORMAT_T 1 | ||
137 | #define VC4_TILING_FORMAT_LT 2 | ||
138 | /** @} */ | ||
139 | |||
140 | /** @{ | ||
141 | * | ||
142 | * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and | ||
143 | * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. | ||
144 | */ | ||
145 | #define VC4_LOADSTORE_FULL_RES_EOF BIT(3) | ||
146 | #define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2) | ||
147 | #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1) | ||
148 | #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0) | ||
149 | |||
150 | /** @{ | ||
151 | * | ||
152 | * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and | ||
153 | * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. | ||
154 | */ | ||
155 | #define VC4_LOADSTORE_FULL_RES_EOF BIT(3) | ||
156 | #define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2) | ||
157 | #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1) | ||
158 | #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0) | ||
159 | |||
160 | /** @{ | ||
161 | * | ||
162 | * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and | ||
163 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) | ||
164 | */ | ||
165 | |||
166 | #define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3) | ||
167 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2) | ||
168 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1) | ||
169 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0) | ||
170 | |||
171 | /** @} */ | ||
172 | |||
173 | /** @{ | ||
174 | * | ||
175 | * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and | ||
176 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL | ||
177 | */ | ||
178 | #define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15) | ||
179 | #define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14) | ||
180 | #define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13) | ||
181 | #define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12) | ||
182 | |||
183 | #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) | ||
184 | #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 | ||
185 | #define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 | ||
186 | #define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 | ||
187 | #define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 | ||
188 | /** @} */ | ||
189 | |||
190 | /** @{ | ||
191 | * | ||
192 | * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and | ||
193 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL | ||
194 | */ | ||
195 | #define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) | ||
196 | #define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 | ||
197 | #define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) | ||
198 | #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) | ||
199 | #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) | ||
200 | |||
201 | /** The values of the field are VC4_TILING_FORMAT_* */ | ||
202 | #define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) | ||
203 | #define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 | ||
204 | |||
205 | #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) | ||
206 | #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 | ||
207 | #define VC4_LOADSTORE_TILE_BUFFER_NONE 0 | ||
208 | #define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 | ||
209 | #define VC4_LOADSTORE_TILE_BUFFER_ZS 2 | ||
210 | #define VC4_LOADSTORE_TILE_BUFFER_Z 3 | ||
211 | #define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 | ||
212 | #define VC4_LOADSTORE_TILE_BUFFER_FULL 5 | ||
213 | /** @} */ | ||
214 | |||
215 | #define VC4_INDEX_BUFFER_U8 (0 << 4) | ||
216 | #define VC4_INDEX_BUFFER_U16 (1 << 4) | ||
217 | |||
218 | /* This flag is only present in NV shader state. */ | ||
219 | #define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3) | ||
220 | #define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2) | ||
221 | #define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1) | ||
222 | #define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0) | ||
223 | |||
224 | /** @{ byte 2 of config bits. */ | ||
225 | #define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1) | ||
226 | #define VC4_CONFIG_BITS_EARLY_Z BIT(0) | ||
227 | /** @} */ | ||
228 | |||
229 | /** @{ byte 1 of config bits. */ | ||
230 | #define VC4_CONFIG_BITS_Z_UPDATE BIT(7) | ||
231 | /** same values in this 3-bit field as PIPE_FUNC_* */ | ||
232 | #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 | ||
233 | #define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3) | ||
234 | |||
235 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) | ||
236 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) | ||
237 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) | ||
238 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) | ||
239 | |||
240 | #define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0) | ||
241 | /** @} */ | ||
242 | |||
243 | /** @{ byte 0 of config bits. */ | ||
244 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6) | ||
245 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) | ||
246 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) | ||
247 | |||
248 | #define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4) | ||
249 | #define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3) | ||
250 | #define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2) | ||
251 | #define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1) | ||
252 | #define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0) | ||
253 | /** @} */ | ||
254 | |||
255 | /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ | ||
256 | #define VC4_BIN_CONFIG_DB_NON_MS BIT(7) | ||
257 | |||
258 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) | ||
259 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 | ||
260 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 | ||
261 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 | ||
262 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 | ||
263 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 | ||
264 | |||
265 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) | ||
266 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 | ||
267 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 | ||
268 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 | ||
269 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 | ||
270 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 | ||
271 | |||
272 | #define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2) | ||
273 | #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1) | ||
274 | #define VC4_BIN_CONFIG_MS_MODE_4X BIT(0) | ||
275 | /** @} */ | ||
276 | |||
277 | /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ | ||
278 | #define VC4_RENDER_CONFIG_DB_NON_MS BIT(12) | ||
279 | #define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11) | ||
280 | #define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10) | ||
281 | #define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9) | ||
282 | #define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8) | ||
283 | |||
284 | /** The values of the field are VC4_TILING_FORMAT_* */ | ||
285 | #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) | ||
286 | #define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 | ||
287 | |||
288 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) | ||
289 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) | ||
290 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) | ||
291 | |||
292 | #define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) | ||
293 | #define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 | ||
294 | #define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 | ||
295 | #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 | ||
296 | #define VC4_RENDER_CONFIG_FORMAT_BGR565 2 | ||
297 | |||
298 | #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1) | ||
299 | #define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0) | ||
300 | |||
301 | #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) | ||
302 | #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) | ||
303 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0) | ||
304 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0) | ||
305 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0) | ||
306 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) | ||
307 | |||
308 | enum vc4_texture_data_type { | ||
309 | VC4_TEXTURE_TYPE_RGBA8888 = 0, | ||
310 | VC4_TEXTURE_TYPE_RGBX8888 = 1, | ||
311 | VC4_TEXTURE_TYPE_RGBA4444 = 2, | ||
312 | VC4_TEXTURE_TYPE_RGBA5551 = 3, | ||
313 | VC4_TEXTURE_TYPE_RGB565 = 4, | ||
314 | VC4_TEXTURE_TYPE_LUMINANCE = 5, | ||
315 | VC4_TEXTURE_TYPE_ALPHA = 6, | ||
316 | VC4_TEXTURE_TYPE_LUMALPHA = 7, | ||
317 | VC4_TEXTURE_TYPE_ETC1 = 8, | ||
318 | VC4_TEXTURE_TYPE_S16F = 9, | ||
319 | VC4_TEXTURE_TYPE_S8 = 10, | ||
320 | VC4_TEXTURE_TYPE_S16 = 11, | ||
321 | VC4_TEXTURE_TYPE_BW1 = 12, | ||
322 | VC4_TEXTURE_TYPE_A4 = 13, | ||
323 | VC4_TEXTURE_TYPE_A1 = 14, | ||
324 | VC4_TEXTURE_TYPE_RGBA64 = 15, | ||
325 | VC4_TEXTURE_TYPE_RGBA32R = 16, | ||
326 | VC4_TEXTURE_TYPE_YUV422R = 17, | ||
327 | }; | ||
328 | |||
329 | #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) | ||
330 | #define VC4_TEX_P0_OFFSET_SHIFT 12 | ||
331 | #define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10) | ||
332 | #define VC4_TEX_P0_CSWIZ_SHIFT 10 | ||
333 | #define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9) | ||
334 | #define VC4_TEX_P0_CMMODE_SHIFT 9 | ||
335 | #define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8) | ||
336 | #define VC4_TEX_P0_FLIPY_SHIFT 8 | ||
337 | #define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4) | ||
338 | #define VC4_TEX_P0_TYPE_SHIFT 4 | ||
339 | #define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0) | ||
340 | #define VC4_TEX_P0_MIPLVLS_SHIFT 0 | ||
341 | |||
342 | #define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31) | ||
343 | #define VC4_TEX_P1_TYPE4_SHIFT 31 | ||
344 | #define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20) | ||
345 | #define VC4_TEX_P1_HEIGHT_SHIFT 20 | ||
346 | #define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19) | ||
347 | #define VC4_TEX_P1_ETCFLIP_SHIFT 19 | ||
348 | #define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8) | ||
349 | #define VC4_TEX_P1_WIDTH_SHIFT 8 | ||
350 | |||
351 | #define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7) | ||
352 | #define VC4_TEX_P1_MAGFILT_SHIFT 7 | ||
353 | # define VC4_TEX_P1_MAGFILT_LINEAR 0 | ||
354 | # define VC4_TEX_P1_MAGFILT_NEAREST 1 | ||
355 | |||
356 | #define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4) | ||
357 | #define VC4_TEX_P1_MINFILT_SHIFT 4 | ||
358 | # define VC4_TEX_P1_MINFILT_LINEAR 0 | ||
359 | # define VC4_TEX_P1_MINFILT_NEAREST 1 | ||
360 | # define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2 | ||
361 | # define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3 | ||
362 | # define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4 | ||
363 | # define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5 | ||
364 | |||
365 | #define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2) | ||
366 | #define VC4_TEX_P1_WRAP_T_SHIFT 2 | ||
367 | #define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0) | ||
368 | #define VC4_TEX_P1_WRAP_S_SHIFT 0 | ||
369 | # define VC4_TEX_P1_WRAP_REPEAT 0 | ||
370 | # define VC4_TEX_P1_WRAP_CLAMP 1 | ||
371 | # define VC4_TEX_P1_WRAP_MIRROR 2 | ||
372 | # define VC4_TEX_P1_WRAP_BORDER 3 | ||
373 | |||
374 | #define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30) | ||
375 | #define VC4_TEX_P2_PTYPE_SHIFT 30 | ||
376 | # define VC4_TEX_P2_PTYPE_IGNORED 0 | ||
377 | # define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1 | ||
378 | # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2 | ||
379 | # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3 | ||
380 | |||
381 | /* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */ | ||
382 | #define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12) | ||
383 | #define VC4_TEX_P2_CMST_SHIFT 12 | ||
384 | #define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0) | ||
385 | #define VC4_TEX_P2_BSLOD_SHIFT 0 | ||
386 | |||
387 | /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */ | ||
388 | #define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12) | ||
389 | #define VC4_TEX_P2_CHEIGHT_SHIFT 12 | ||
390 | #define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0) | ||
391 | #define VC4_TEX_P2_CWIDTH_SHIFT 0 | ||
392 | |||
393 | /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */ | ||
394 | #define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12) | ||
395 | #define VC4_TEX_P2_CYOFF_SHIFT 12 | ||
396 | #define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0) | ||
397 | #define VC4_TEX_P2_CXOFF_SHIFT 0 | ||
398 | |||
399 | #endif /* VC4_PACKET_H */ | ||
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c new file mode 100644 index 000000000000..8a2a312e2c1b --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c | |||
@@ -0,0 +1,634 @@ | |||
1 | /* | ||
2 | * Copyright © 2014-2015 Broadcom | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | /** | ||
25 | * DOC: Render command list generation | ||
26 | * | ||
27 | * In the VC4 driver, render command list generation is performed by the | ||
28 | * kernel instead of userspace. We do this because validating a | ||
29 | * user-submitted command list is hard to get right and has high CPU overhead, | ||
30 | * while the number of valid configurations for render command lists is | ||
31 | * actually fairly low. | ||
32 | */ | ||
33 | |||
34 | #include "uapi/drm/vc4_drm.h" | ||
35 | #include "vc4_drv.h" | ||
36 | #include "vc4_packet.h" | ||
37 | |||
38 | struct vc4_rcl_setup { | ||
39 | struct drm_gem_cma_object *color_read; | ||
40 | struct drm_gem_cma_object *color_write; | ||
41 | struct drm_gem_cma_object *zs_read; | ||
42 | struct drm_gem_cma_object *zs_write; | ||
43 | struct drm_gem_cma_object *msaa_color_write; | ||
44 | struct drm_gem_cma_object *msaa_zs_write; | ||
45 | |||
46 | struct drm_gem_cma_object *rcl; | ||
47 | u32 next_offset; | ||
48 | }; | ||
49 | |||
50 | static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) | ||
51 | { | ||
52 | *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val; | ||
53 | setup->next_offset += 1; | ||
54 | } | ||
55 | |||
56 | static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val) | ||
57 | { | ||
58 | *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val; | ||
59 | setup->next_offset += 2; | ||
60 | } | ||
61 | |||
62 | static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val) | ||
63 | { | ||
64 | *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val; | ||
65 | setup->next_offset += 4; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Emits a no-op STORE_TILE_BUFFER_GENERAL. | ||
70 | * | ||
71 | * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of | ||
72 | * some sort before another load is triggered. | ||
73 | */ | ||
74 | static void vc4_store_before_load(struct vc4_rcl_setup *setup) | ||
75 | { | ||
76 | rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); | ||
77 | rcl_u16(setup, | ||
78 | VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, | ||
79 | VC4_LOADSTORE_TILE_BUFFER_BUFFER) | | ||
80 | VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | | ||
81 | VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | | ||
82 | VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); | ||
83 | rcl_u32(setup, 0); /* no address, since we're in None mode */ | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Calculates the physical address of the start of a tile in a RCL surface. | ||
88 | * | ||
89 | * Unlike the other load/store packets, | ||
90 | * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile | ||
91 | * coordinates packet, and instead just store to the address given. | ||
92 | */ | ||
93 | static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec, | ||
94 | struct drm_gem_cma_object *bo, | ||
95 | struct drm_vc4_submit_rcl_surface *surf, | ||
96 | uint8_t x, uint8_t y) | ||
97 | { | ||
98 | return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE * | ||
99 | (DIV_ROUND_UP(exec->args->width, 32) * y + x); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Emits a PACKET_TILE_COORDINATES if one isn't already pending. | ||
104 | * | ||
105 | * The tile coordinates packet triggers a pending load if there is one, are | ||
106 | * used for clipping during rendering, and determine where loads/stores happen | ||
107 | * relative to their base address. | ||
108 | */ | ||
109 | static void vc4_tile_coordinates(struct vc4_rcl_setup *setup, | ||
110 | uint32_t x, uint32_t y) | ||
111 | { | ||
112 | rcl_u8(setup, VC4_PACKET_TILE_COORDINATES); | ||
113 | rcl_u8(setup, x); | ||
114 | rcl_u8(setup, y); | ||
115 | } | ||
116 | |||
117 | static void emit_tile(struct vc4_exec_info *exec, | ||
118 | struct vc4_rcl_setup *setup, | ||
119 | uint8_t x, uint8_t y, bool first, bool last) | ||
120 | { | ||
121 | struct drm_vc4_submit_cl *args = exec->args; | ||
122 | bool has_bin = args->bin_cl_size != 0; | ||
123 | |||
124 | /* Note that the load doesn't actually occur until the | ||
125 | * tile coords packet is processed, and only one load | ||
126 | * may be outstanding at a time. | ||
127 | */ | ||
128 | if (setup->color_read) { | ||
129 | if (args->color_read.flags & | ||
130 | VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
131 | rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); | ||
132 | rcl_u32(setup, | ||
133 | vc4_full_res_offset(exec, setup->color_read, | ||
134 | &args->color_read, x, y) | | ||
135 | VC4_LOADSTORE_FULL_RES_DISABLE_ZS); | ||
136 | } else { | ||
137 | rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); | ||
138 | rcl_u16(setup, args->color_read.bits); | ||
139 | rcl_u32(setup, setup->color_read->paddr + | ||
140 | args->color_read.offset); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | if (setup->zs_read) { | ||
145 | if (args->zs_read.flags & | ||
146 | VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
147 | rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); | ||
148 | rcl_u32(setup, | ||
149 | vc4_full_res_offset(exec, setup->zs_read, | ||
150 | &args->zs_read, x, y) | | ||
151 | VC4_LOADSTORE_FULL_RES_DISABLE_COLOR); | ||
152 | } else { | ||
153 | if (setup->color_read) { | ||
154 | /* Exec previous load. */ | ||
155 | vc4_tile_coordinates(setup, x, y); | ||
156 | vc4_store_before_load(setup); | ||
157 | } | ||
158 | |||
159 | rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); | ||
160 | rcl_u16(setup, args->zs_read.bits); | ||
161 | rcl_u32(setup, setup->zs_read->paddr + | ||
162 | args->zs_read.offset); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* Clipping depends on tile coordinates having been | ||
167 | * emitted, so we always need one here. | ||
168 | */ | ||
169 | vc4_tile_coordinates(setup, x, y); | ||
170 | |||
171 | /* Wait for the binner before jumping to the first | ||
172 | * tile's lists. | ||
173 | */ | ||
174 | if (first && has_bin) | ||
175 | rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE); | ||
176 | |||
177 | if (has_bin) { | ||
178 | rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); | ||
179 | rcl_u32(setup, (exec->tile_bo->paddr + | ||
180 | exec->tile_alloc_offset + | ||
181 | (y * exec->bin_tiles_x + x) * 32)); | ||
182 | } | ||
183 | |||
184 | if (setup->msaa_color_write) { | ||
185 | bool last_tile_write = (!setup->msaa_zs_write && | ||
186 | !setup->zs_write && | ||
187 | !setup->color_write); | ||
188 | uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS; | ||
189 | |||
190 | if (!last_tile_write) | ||
191 | bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; | ||
192 | else if (last) | ||
193 | bits |= VC4_LOADSTORE_FULL_RES_EOF; | ||
194 | rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); | ||
195 | rcl_u32(setup, | ||
196 | vc4_full_res_offset(exec, setup->msaa_color_write, | ||
197 | &args->msaa_color_write, x, y) | | ||
198 | bits); | ||
199 | } | ||
200 | |||
201 | if (setup->msaa_zs_write) { | ||
202 | bool last_tile_write = (!setup->zs_write && | ||
203 | !setup->color_write); | ||
204 | uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR; | ||
205 | |||
206 | if (setup->msaa_color_write) | ||
207 | vc4_tile_coordinates(setup, x, y); | ||
208 | if (!last_tile_write) | ||
209 | bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; | ||
210 | else if (last) | ||
211 | bits |= VC4_LOADSTORE_FULL_RES_EOF; | ||
212 | rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); | ||
213 | rcl_u32(setup, | ||
214 | vc4_full_res_offset(exec, setup->msaa_zs_write, | ||
215 | &args->msaa_zs_write, x, y) | | ||
216 | bits); | ||
217 | } | ||
218 | |||
219 | if (setup->zs_write) { | ||
220 | bool last_tile_write = !setup->color_write; | ||
221 | |||
222 | if (setup->msaa_color_write || setup->msaa_zs_write) | ||
223 | vc4_tile_coordinates(setup, x, y); | ||
224 | |||
225 | rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); | ||
226 | rcl_u16(setup, args->zs_write.bits | | ||
227 | (last_tile_write ? | ||
228 | 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR)); | ||
229 | rcl_u32(setup, | ||
230 | (setup->zs_write->paddr + args->zs_write.offset) | | ||
231 | ((last && last_tile_write) ? | ||
232 | VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); | ||
233 | } | ||
234 | |||
235 | if (setup->color_write) { | ||
236 | if (setup->msaa_color_write || setup->msaa_zs_write || | ||
237 | setup->zs_write) { | ||
238 | vc4_tile_coordinates(setup, x, y); | ||
239 | } | ||
240 | |||
241 | if (last) | ||
242 | rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); | ||
243 | else | ||
244 | rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, | ||
249 | struct vc4_rcl_setup *setup) | ||
250 | { | ||
251 | struct drm_vc4_submit_cl *args = exec->args; | ||
252 | bool has_bin = args->bin_cl_size != 0; | ||
253 | uint8_t min_x_tile = args->min_x_tile; | ||
254 | uint8_t min_y_tile = args->min_y_tile; | ||
255 | uint8_t max_x_tile = args->max_x_tile; | ||
256 | uint8_t max_y_tile = args->max_y_tile; | ||
257 | uint8_t xtiles = max_x_tile - min_x_tile + 1; | ||
258 | uint8_t ytiles = max_y_tile - min_y_tile + 1; | ||
259 | uint8_t x, y; | ||
260 | uint32_t size, loop_body_size; | ||
261 | |||
262 | size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; | ||
263 | loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; | ||
264 | |||
265 | if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { | ||
266 | size += VC4_PACKET_CLEAR_COLORS_SIZE + | ||
267 | VC4_PACKET_TILE_COORDINATES_SIZE + | ||
268 | VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; | ||
269 | } | ||
270 | |||
271 | if (setup->color_read) { | ||
272 | if (args->color_read.flags & | ||
273 | VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
274 | loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; | ||
275 | } else { | ||
276 | loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; | ||
277 | } | ||
278 | } | ||
279 | if (setup->zs_read) { | ||
280 | if (args->zs_read.flags & | ||
281 | VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
282 | loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; | ||
283 | } else { | ||
284 | if (setup->color_read && | ||
285 | !(args->color_read.flags & | ||
286 | VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) { | ||
287 | loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; | ||
288 | loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; | ||
289 | } | ||
290 | loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | if (has_bin) { | ||
295 | size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; | ||
296 | loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; | ||
297 | } | ||
298 | |||
299 | if (setup->msaa_color_write) | ||
300 | loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; | ||
301 | if (setup->msaa_zs_write) | ||
302 | loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; | ||
303 | |||
304 | if (setup->zs_write) | ||
305 | loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; | ||
306 | if (setup->color_write) | ||
307 | loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; | ||
308 | |||
309 | /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */ | ||
310 | loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE * | ||
311 | ((setup->msaa_color_write != NULL) + | ||
312 | (setup->msaa_zs_write != NULL) + | ||
313 | (setup->color_write != NULL) + | ||
314 | (setup->zs_write != NULL) - 1); | ||
315 | |||
316 | size += xtiles * ytiles * loop_body_size; | ||
317 | |||
318 | setup->rcl = &vc4_bo_create(dev, size, true)->base; | ||
319 | if (!setup->rcl) | ||
320 | return -ENOMEM; | ||
321 | list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, | ||
322 | &exec->unref_list); | ||
323 | |||
324 | rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); | ||
325 | rcl_u32(setup, | ||
326 | (setup->color_write ? (setup->color_write->paddr + | ||
327 | args->color_write.offset) : | ||
328 | 0)); | ||
329 | rcl_u16(setup, args->width); | ||
330 | rcl_u16(setup, args->height); | ||
331 | rcl_u16(setup, args->color_write.bits); | ||
332 | |||
333 | /* The tile buffer gets cleared when the previous tile is stored. If | ||
334 | * the clear values changed between frames, then the tile buffer has | ||
335 | * stale clear values in it, so we have to do a store in None mode (no | ||
336 | * writes) so that we trigger the tile buffer clear. | ||
337 | */ | ||
338 | if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { | ||
339 | rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); | ||
340 | rcl_u32(setup, args->clear_color[0]); | ||
341 | rcl_u32(setup, args->clear_color[1]); | ||
342 | rcl_u32(setup, args->clear_z); | ||
343 | rcl_u8(setup, args->clear_s); | ||
344 | |||
345 | vc4_tile_coordinates(setup, 0, 0); | ||
346 | |||
347 | rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); | ||
348 | rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); | ||
349 | rcl_u32(setup, 0); /* no address, since we're in None mode */ | ||
350 | } | ||
351 | |||
352 | for (y = min_y_tile; y <= max_y_tile; y++) { | ||
353 | for (x = min_x_tile; x <= max_x_tile; x++) { | ||
354 | bool first = (x == min_x_tile && y == min_y_tile); | ||
355 | bool last = (x == max_x_tile && y == max_y_tile); | ||
356 | |||
357 | emit_tile(exec, setup, x, y, first, last); | ||
358 | } | ||
359 | } | ||
360 | |||
361 | BUG_ON(setup->next_offset != size); | ||
362 | exec->ct1ca = setup->rcl->paddr; | ||
363 | exec->ct1ea = setup->rcl->paddr + setup->next_offset; | ||
364 | |||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | static int vc4_full_res_bounds_check(struct vc4_exec_info *exec, | ||
369 | struct drm_gem_cma_object *obj, | ||
370 | struct drm_vc4_submit_rcl_surface *surf) | ||
371 | { | ||
372 | struct drm_vc4_submit_cl *args = exec->args; | ||
373 | u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32); | ||
374 | |||
375 | if (surf->offset > obj->base.size) { | ||
376 | DRM_ERROR("surface offset %d > BO size %zd\n", | ||
377 | surf->offset, obj->base.size); | ||
378 | return -EINVAL; | ||
379 | } | ||
380 | |||
381 | if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE < | ||
382 | render_tiles_stride * args->max_y_tile + args->max_x_tile) { | ||
383 | DRM_ERROR("MSAA tile %d, %d out of bounds " | ||
384 | "(bo size %zd, offset %d).\n", | ||
385 | args->max_x_tile, args->max_y_tile, | ||
386 | obj->base.size, | ||
387 | surf->offset); | ||
388 | return -EINVAL; | ||
389 | } | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec, | ||
395 | struct drm_gem_cma_object **obj, | ||
396 | struct drm_vc4_submit_rcl_surface *surf) | ||
397 | { | ||
398 | if (surf->flags != 0 || surf->bits != 0) { | ||
399 | DRM_ERROR("MSAA surface had nonzero flags/bits\n"); | ||
400 | return -EINVAL; | ||
401 | } | ||
402 | |||
403 | if (surf->hindex == ~0) | ||
404 | return 0; | ||
405 | |||
406 | *obj = vc4_use_bo(exec, surf->hindex); | ||
407 | if (!*obj) | ||
408 | return -EINVAL; | ||
409 | |||
410 | if (surf->offset & 0xf) { | ||
411 | DRM_ERROR("MSAA write must be 16b aligned.\n"); | ||
412 | return -EINVAL; | ||
413 | } | ||
414 | |||
415 | return vc4_full_res_bounds_check(exec, *obj, surf); | ||
416 | } | ||
417 | |||
418 | static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, | ||
419 | struct drm_gem_cma_object **obj, | ||
420 | struct drm_vc4_submit_rcl_surface *surf) | ||
421 | { | ||
422 | uint8_t tiling = VC4_GET_FIELD(surf->bits, | ||
423 | VC4_LOADSTORE_TILE_BUFFER_TILING); | ||
424 | uint8_t buffer = VC4_GET_FIELD(surf->bits, | ||
425 | VC4_LOADSTORE_TILE_BUFFER_BUFFER); | ||
426 | uint8_t format = VC4_GET_FIELD(surf->bits, | ||
427 | VC4_LOADSTORE_TILE_BUFFER_FORMAT); | ||
428 | int cpp; | ||
429 | int ret; | ||
430 | |||
431 | if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
432 | DRM_ERROR("Extra flags set\n"); | ||
433 | return -EINVAL; | ||
434 | } | ||
435 | |||
436 | if (surf->hindex == ~0) | ||
437 | return 0; | ||
438 | |||
439 | *obj = vc4_use_bo(exec, surf->hindex); | ||
440 | if (!*obj) | ||
441 | return -EINVAL; | ||
442 | |||
443 | if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { | ||
444 | if (surf == &exec->args->zs_write) { | ||
445 | DRM_ERROR("general zs write may not be a full-res.\n"); | ||
446 | return -EINVAL; | ||
447 | } | ||
448 | |||
449 | if (surf->bits != 0) { | ||
450 | DRM_ERROR("load/store general bits set with " | ||
451 | "full res load/store.\n"); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | ret = vc4_full_res_bounds_check(exec, *obj, surf); | ||
456 | if (!ret) | ||
457 | return ret; | ||
458 | |||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | | ||
463 | VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | | ||
464 | VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { | ||
465 | DRM_ERROR("Unknown bits in load/store: 0x%04x\n", | ||
466 | surf->bits); | ||
467 | return -EINVAL; | ||
468 | } | ||
469 | |||
470 | if (tiling > VC4_TILING_FORMAT_LT) { | ||
471 | DRM_ERROR("Bad tiling format\n"); | ||
472 | return -EINVAL; | ||
473 | } | ||
474 | |||
475 | if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) { | ||
476 | if (format != 0) { | ||
477 | DRM_ERROR("No color format should be set for ZS\n"); | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | cpp = 4; | ||
481 | } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) { | ||
482 | switch (format) { | ||
483 | case VC4_LOADSTORE_TILE_BUFFER_BGR565: | ||
484 | case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER: | ||
485 | cpp = 2; | ||
486 | break; | ||
487 | case VC4_LOADSTORE_TILE_BUFFER_RGBA8888: | ||
488 | cpp = 4; | ||
489 | break; | ||
490 | default: | ||
491 | DRM_ERROR("Bad tile buffer format\n"); | ||
492 | return -EINVAL; | ||
493 | } | ||
494 | } else { | ||
495 | DRM_ERROR("Bad load/store buffer %d.\n", buffer); | ||
496 | return -EINVAL; | ||
497 | } | ||
498 | |||
499 | if (surf->offset & 0xf) { | ||
500 | DRM_ERROR("load/store buffer must be 16b aligned.\n"); | ||
501 | return -EINVAL; | ||
502 | } | ||
503 | |||
504 | if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, | ||
505 | exec->args->width, exec->args->height, cpp)) { | ||
506 | return -EINVAL; | ||
507 | } | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | static int | ||
513 | vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, | ||
514 | struct vc4_rcl_setup *setup, | ||
515 | struct drm_gem_cma_object **obj, | ||
516 | struct drm_vc4_submit_rcl_surface *surf) | ||
517 | { | ||
518 | uint8_t tiling = VC4_GET_FIELD(surf->bits, | ||
519 | VC4_RENDER_CONFIG_MEMORY_FORMAT); | ||
520 | uint8_t format = VC4_GET_FIELD(surf->bits, | ||
521 | VC4_RENDER_CONFIG_FORMAT); | ||
522 | int cpp; | ||
523 | |||
524 | if (surf->flags != 0) { | ||
525 | DRM_ERROR("No flags supported on render config.\n"); | ||
526 | return -EINVAL; | ||
527 | } | ||
528 | |||
529 | if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | | ||
530 | VC4_RENDER_CONFIG_FORMAT_MASK | | ||
531 | VC4_RENDER_CONFIG_MS_MODE_4X | | ||
532 | VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) { | ||
533 | DRM_ERROR("Unknown bits in render config: 0x%04x\n", | ||
534 | surf->bits); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | if (surf->hindex == ~0) | ||
539 | return 0; | ||
540 | |||
541 | *obj = vc4_use_bo(exec, surf->hindex); | ||
542 | if (!*obj) | ||
543 | return -EINVAL; | ||
544 | |||
545 | if (tiling > VC4_TILING_FORMAT_LT) { | ||
546 | DRM_ERROR("Bad tiling format\n"); | ||
547 | return -EINVAL; | ||
548 | } | ||
549 | |||
550 | switch (format) { | ||
551 | case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: | ||
552 | case VC4_RENDER_CONFIG_FORMAT_BGR565: | ||
553 | cpp = 2; | ||
554 | break; | ||
555 | case VC4_RENDER_CONFIG_FORMAT_RGBA8888: | ||
556 | cpp = 4; | ||
557 | break; | ||
558 | default: | ||
559 | DRM_ERROR("Bad tile buffer format\n"); | ||
560 | return -EINVAL; | ||
561 | } | ||
562 | |||
563 | if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, | ||
564 | exec->args->width, exec->args->height, cpp)) { | ||
565 | return -EINVAL; | ||
566 | } | ||
567 | |||
568 | return 0; | ||
569 | } | ||
570 | |||
571 | int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) | ||
572 | { | ||
573 | struct vc4_rcl_setup setup = {0}; | ||
574 | struct drm_vc4_submit_cl *args = exec->args; | ||
575 | bool has_bin = args->bin_cl_size != 0; | ||
576 | int ret; | ||
577 | |||
578 | if (args->min_x_tile > args->max_x_tile || | ||
579 | args->min_y_tile > args->max_y_tile) { | ||
580 | DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n", | ||
581 | args->min_x_tile, args->min_y_tile, | ||
582 | args->max_x_tile, args->max_y_tile); | ||
583 | return -EINVAL; | ||
584 | } | ||
585 | |||
586 | if (has_bin && | ||
587 | (args->max_x_tile > exec->bin_tiles_x || | ||
588 | args->max_y_tile > exec->bin_tiles_y)) { | ||
589 | DRM_ERROR("Render tiles (%d,%d) outside of bin config " | ||
590 | "(%d,%d)\n", | ||
591 | args->max_x_tile, args->max_y_tile, | ||
592 | exec->bin_tiles_x, exec->bin_tiles_y); | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | ret = vc4_rcl_render_config_surface_setup(exec, &setup, | ||
597 | &setup.color_write, | ||
598 | &args->color_write); | ||
599 | if (ret) | ||
600 | return ret; | ||
601 | |||
602 | ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); | ||
603 | if (ret) | ||
604 | return ret; | ||
605 | |||
606 | ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); | ||
607 | if (ret) | ||
608 | return ret; | ||
609 | |||
610 | ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); | ||
611 | if (ret) | ||
612 | return ret; | ||
613 | |||
614 | ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write, | ||
615 | &args->msaa_color_write); | ||
616 | if (ret) | ||
617 | return ret; | ||
618 | |||
619 | ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write, | ||
620 | &args->msaa_zs_write); | ||
621 | if (ret) | ||
622 | return ret; | ||
623 | |||
624 | /* We shouldn't even have the job submitted to us if there's no | ||
625 | * surface to write out. | ||
626 | */ | ||
627 | if (!setup.color_write && !setup.zs_write && | ||
628 | !setup.msaa_color_write && !setup.msaa_zs_write) { | ||
629 | DRM_ERROR("RCL requires color or Z/S write\n"); | ||
630 | return -EINVAL; | ||
631 | } | ||
632 | |||
633 | return vc4_create_rcl_bo(dev, exec, &setup); | ||
634 | } | ||
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h new file mode 100644 index 000000000000..ad7b1ea720c2 --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_trace.h | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Broadcom | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) | ||
10 | #define _VC4_TRACE_H_ | ||
11 | |||
12 | #include <linux/stringify.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/tracepoint.h> | ||
15 | |||
16 | #undef TRACE_SYSTEM | ||
17 | #define TRACE_SYSTEM vc4 | ||
18 | #define TRACE_INCLUDE_FILE vc4_trace | ||
19 | |||
20 | TRACE_EVENT(vc4_wait_for_seqno_begin, | ||
21 | TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout), | ||
22 | TP_ARGS(dev, seqno, timeout), | ||
23 | |||
24 | TP_STRUCT__entry( | ||
25 | __field(u32, dev) | ||
26 | __field(u64, seqno) | ||
27 | __field(u64, timeout) | ||
28 | ), | ||
29 | |||
30 | TP_fast_assign( | ||
31 | __entry->dev = dev->primary->index; | ||
32 | __entry->seqno = seqno; | ||
33 | __entry->timeout = timeout; | ||
34 | ), | ||
35 | |||
36 | TP_printk("dev=%u, seqno=%llu, timeout=%llu", | ||
37 | __entry->dev, __entry->seqno, __entry->timeout) | ||
38 | ); | ||
39 | |||
40 | TRACE_EVENT(vc4_wait_for_seqno_end, | ||
41 | TP_PROTO(struct drm_device *dev, uint64_t seqno), | ||
42 | TP_ARGS(dev, seqno), | ||
43 | |||
44 | TP_STRUCT__entry( | ||
45 | __field(u32, dev) | ||
46 | __field(u64, seqno) | ||
47 | ), | ||
48 | |||
49 | TP_fast_assign( | ||
50 | __entry->dev = dev->primary->index; | ||
51 | __entry->seqno = seqno; | ||
52 | ), | ||
53 | |||
54 | TP_printk("dev=%u, seqno=%llu", | ||
55 | __entry->dev, __entry->seqno) | ||
56 | ); | ||
57 | |||
58 | #endif /* _VC4_TRACE_H_ */ | ||
59 | |||
60 | /* This part must be outside protection */ | ||
61 | #undef TRACE_INCLUDE_PATH | ||
62 | #define TRACE_INCLUDE_PATH . | ||
63 | #include <trace/define_trace.h> | ||
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c new file mode 100644 index 000000000000..e6278f25716b --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_trace_points.c | |||
@@ -0,0 +1,14 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Broadcom | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include "vc4_drv.h" | ||
10 | |||
11 | #ifndef __CHECKER__ | ||
12 | #define CREATE_TRACE_POINTS | ||
13 | #include "vc4_trace.h" | ||
14 | #endif | ||
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 040ad0d8b8a1..424d515ffcda 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c | |||
@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) | |||
144 | } | 144 | } |
145 | #endif /* CONFIG_DEBUG_FS */ | 145 | #endif /* CONFIG_DEBUG_FS */ |
146 | 146 | ||
147 | /* | ||
148 | * Asks the firmware to turn on power to the V3D engine. | ||
149 | * | ||
150 | * This may be doable with just the clocks interface, though this | ||
151 | * packet does some other register setup from the firmware, too. | ||
152 | */ | ||
153 | int | ||
154 | vc4_v3d_set_power(struct vc4_dev *vc4, bool on) | ||
155 | { | ||
156 | if (on) | ||
157 | return pm_generic_poweroff(&vc4->v3d->pdev->dev); | ||
158 | else | ||
159 | return pm_generic_resume(&vc4->v3d->pdev->dev); | ||
160 | } | ||
161 | |||
147 | static void vc4_v3d_init_hw(struct drm_device *dev) | 162 | static void vc4_v3d_init_hw(struct drm_device *dev) |
148 | { | 163 | { |
149 | struct vc4_dev *vc4 = to_vc4_dev(dev); | 164 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) | |||
161 | struct drm_device *drm = dev_get_drvdata(master); | 176 | struct drm_device *drm = dev_get_drvdata(master); |
162 | struct vc4_dev *vc4 = to_vc4_dev(drm); | 177 | struct vc4_dev *vc4 = to_vc4_dev(drm); |
163 | struct vc4_v3d *v3d = NULL; | 178 | struct vc4_v3d *v3d = NULL; |
179 | int ret; | ||
164 | 180 | ||
165 | v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); | 181 | v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); |
166 | if (!v3d) | 182 | if (!v3d) |
@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) | |||
180 | return -EINVAL; | 196 | return -EINVAL; |
181 | } | 197 | } |
182 | 198 | ||
199 | /* Reset the binner overflow address/size at setup, to be sure | ||
200 | * we don't reuse an old one. | ||
201 | */ | ||
202 | V3D_WRITE(V3D_BPOA, 0); | ||
203 | V3D_WRITE(V3D_BPOS, 0); | ||
204 | |||
183 | vc4_v3d_init_hw(drm); | 205 | vc4_v3d_init_hw(drm); |
184 | 206 | ||
207 | ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); | ||
208 | if (ret) { | ||
209 | DRM_ERROR("Failed to install IRQ handler\n"); | ||
210 | return ret; | ||
211 | } | ||
212 | |||
185 | return 0; | 213 | return 0; |
186 | } | 214 | } |
187 | 215 | ||
@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master, | |||
191 | struct drm_device *drm = dev_get_drvdata(master); | 219 | struct drm_device *drm = dev_get_drvdata(master); |
192 | struct vc4_dev *vc4 = to_vc4_dev(drm); | 220 | struct vc4_dev *vc4 = to_vc4_dev(drm); |
193 | 221 | ||
222 | drm_irq_uninstall(drm); | ||
223 | |||
224 | /* Disable the binner's overflow memory address, so the next | ||
225 | * driver probe (if any) doesn't try to reuse our old | ||
226 | * allocation. | ||
227 | */ | ||
228 | V3D_WRITE(V3D_BPOA, 0); | ||
229 | V3D_WRITE(V3D_BPOS, 0); | ||
230 | |||
194 | vc4->v3d = NULL; | 231 | vc4->v3d = NULL; |
195 | } | 232 | } |
196 | 233 | ||
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c new file mode 100644 index 000000000000..0fb5b994b9dd --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_validate.c | |||
@@ -0,0 +1,900 @@ | |||
1 | /* | ||
2 | * Copyright © 2014 Broadcom | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | /** | ||
25 | * Command list validator for VC4. | ||
26 | * | ||
27 | * The VC4 has no IOMMU between it and system memory. So, a user with | ||
28 | * access to execute command lists could escalate privilege by | ||
29 | * overwriting system memory (drawing to it as a framebuffer) or | ||
30 | * reading system memory it shouldn't (reading it as a texture, or | ||
31 | * uniform data, or vertex data). | ||
32 | * | ||
33 | * This validates command lists to ensure that all accesses are within | ||
34 | * the bounds of the GEM objects referenced. It explicitly whitelists | ||
35 | * packets, and looks at the offsets in any address fields to make | ||
36 | * sure they're constrained within the BOs they reference. | ||
37 | * | ||
38 | * Note that because of the validation that's happening anyway, this | ||
39 | * is where GEM relocation processing happens. | ||
40 | */ | ||
41 | |||
42 | #include "uapi/drm/vc4_drm.h" | ||
43 | #include "vc4_drv.h" | ||
44 | #include "vc4_packet.h" | ||
45 | |||
46 | #define VALIDATE_ARGS \ | ||
47 | struct vc4_exec_info *exec, \ | ||
48 | void *validated, \ | ||
49 | void *untrusted | ||
50 | |||
51 | /** Return the width in pixels of a 64-byte microtile. */ | ||
52 | static uint32_t | ||
53 | utile_width(int cpp) | ||
54 | { | ||
55 | switch (cpp) { | ||
56 | case 1: | ||
57 | case 2: | ||
58 | return 8; | ||
59 | case 4: | ||
60 | return 4; | ||
61 | case 8: | ||
62 | return 2; | ||
63 | default: | ||
64 | DRM_ERROR("unknown cpp: %d\n", cpp); | ||
65 | return 1; | ||
66 | } | ||
67 | } | ||
68 | |||
69 | /** Return the height in pixels of a 64-byte microtile. */ | ||
70 | static uint32_t | ||
71 | utile_height(int cpp) | ||
72 | { | ||
73 | switch (cpp) { | ||
74 | case 1: | ||
75 | return 8; | ||
76 | case 2: | ||
77 | case 4: | ||
78 | case 8: | ||
79 | return 4; | ||
80 | default: | ||
81 | DRM_ERROR("unknown cpp: %d\n", cpp); | ||
82 | return 1; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * The texture unit decides what tiling format a particular miplevel is using | ||
88 | * this function, so we lay out our miptrees accordingly. | ||
89 | */ | ||
90 | static bool | ||
91 | size_is_lt(uint32_t width, uint32_t height, int cpp) | ||
92 | { | ||
93 | return (width <= 4 * utile_width(cpp) || | ||
94 | height <= 4 * utile_height(cpp)); | ||
95 | } | ||
96 | |||
97 | struct drm_gem_cma_object * | ||
98 | vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) | ||
99 | { | ||
100 | struct drm_gem_cma_object *obj; | ||
101 | struct vc4_bo *bo; | ||
102 | |||
103 | if (hindex >= exec->bo_count) { | ||
104 | DRM_ERROR("BO index %d greater than BO count %d\n", | ||
105 | hindex, exec->bo_count); | ||
106 | return NULL; | ||
107 | } | ||
108 | obj = exec->bo[hindex]; | ||
109 | bo = to_vc4_bo(&obj->base); | ||
110 | |||
111 | if (bo->validated_shader) { | ||
112 | DRM_ERROR("Trying to use shader BO as something other than " | ||
113 | "a shader\n"); | ||
114 | return NULL; | ||
115 | } | ||
116 | |||
117 | return obj; | ||
118 | } | ||
119 | |||
120 | static struct drm_gem_cma_object * | ||
121 | vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index) | ||
122 | { | ||
123 | return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]); | ||
124 | } | ||
125 | |||
126 | static bool | ||
127 | validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos) | ||
128 | { | ||
129 | /* Note that the untrusted pointer passed to these functions is | ||
130 | * incremented past the packet byte. | ||
131 | */ | ||
132 | return (untrusted - 1 == exec->bin_u + pos); | ||
133 | } | ||
134 | |||
135 | static uint32_t | ||
136 | gl_shader_rec_size(uint32_t pointer_bits) | ||
137 | { | ||
138 | uint32_t attribute_count = pointer_bits & 7; | ||
139 | bool extended = pointer_bits & 8; | ||
140 | |||
141 | if (attribute_count == 0) | ||
142 | attribute_count = 8; | ||
143 | |||
144 | if (extended) | ||
145 | return 100 + attribute_count * 4; | ||
146 | else | ||
147 | return 36 + attribute_count * 8; | ||
148 | } | ||
149 | |||
150 | bool | ||
151 | vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, | ||
152 | uint32_t offset, uint8_t tiling_format, | ||
153 | uint32_t width, uint32_t height, uint8_t cpp) | ||
154 | { | ||
155 | uint32_t aligned_width, aligned_height, stride, size; | ||
156 | uint32_t utile_w = utile_width(cpp); | ||
157 | uint32_t utile_h = utile_height(cpp); | ||
158 | |||
159 | /* The shaded vertex format stores signed 12.4 fixed point | ||
160 | * (-2048,2047) offsets from the viewport center, so we should | ||
161 | * never have a render target larger than 4096. The texture | ||
162 | * unit can only sample from 2048x2048, so it's even more | ||
163 | * restricted. This lets us avoid worrying about overflow in | ||
164 | * our math. | ||
165 | */ | ||
166 | if (width > 4096 || height > 4096) { | ||
167 | DRM_ERROR("Surface dimesions (%d,%d) too large", width, height); | ||
168 | return false; | ||
169 | } | ||
170 | |||
171 | switch (tiling_format) { | ||
172 | case VC4_TILING_FORMAT_LINEAR: | ||
173 | aligned_width = round_up(width, utile_w); | ||
174 | aligned_height = height; | ||
175 | break; | ||
176 | case VC4_TILING_FORMAT_T: | ||
177 | aligned_width = round_up(width, utile_w * 8); | ||
178 | aligned_height = round_up(height, utile_h * 8); | ||
179 | break; | ||
180 | case VC4_TILING_FORMAT_LT: | ||
181 | aligned_width = round_up(width, utile_w); | ||
182 | aligned_height = round_up(height, utile_h); | ||
183 | break; | ||
184 | default: | ||
185 | DRM_ERROR("buffer tiling %d unsupported\n", tiling_format); | ||
186 | return false; | ||
187 | } | ||
188 | |||
189 | stride = aligned_width * cpp; | ||
190 | size = stride * aligned_height; | ||
191 | |||
192 | if (size + offset < size || | ||
193 | size + offset > fbo->base.size) { | ||
194 | DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n", | ||
195 | width, height, | ||
196 | aligned_width, aligned_height, | ||
197 | size, offset, fbo->base.size); | ||
198 | return false; | ||
199 | } | ||
200 | |||
201 | return true; | ||
202 | } | ||
203 | |||
204 | static int | ||
205 | validate_flush(VALIDATE_ARGS) | ||
206 | { | ||
207 | if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) { | ||
208 | DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n"); | ||
209 | return -EINVAL; | ||
210 | } | ||
211 | exec->found_flush = true; | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int | ||
217 | validate_start_tile_binning(VALIDATE_ARGS) | ||
218 | { | ||
219 | if (exec->found_start_tile_binning_packet) { | ||
220 | DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n"); | ||
221 | return -EINVAL; | ||
222 | } | ||
223 | exec->found_start_tile_binning_packet = true; | ||
224 | |||
225 | if (!exec->found_tile_binning_mode_config_packet) { | ||
226 | DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); | ||
227 | return -EINVAL; | ||
228 | } | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int | ||
234 | validate_increment_semaphore(VALIDATE_ARGS) | ||
235 | { | ||
236 | if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) { | ||
237 | DRM_ERROR("Bin CL must end with " | ||
238 | "VC4_PACKET_INCREMENT_SEMAPHORE\n"); | ||
239 | return -EINVAL; | ||
240 | } | ||
241 | exec->found_increment_semaphore_packet = true; | ||
242 | |||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int | ||
247 | validate_indexed_prim_list(VALIDATE_ARGS) | ||
248 | { | ||
249 | struct drm_gem_cma_object *ib; | ||
250 | uint32_t length = *(uint32_t *)(untrusted + 1); | ||
251 | uint32_t offset = *(uint32_t *)(untrusted + 5); | ||
252 | uint32_t max_index = *(uint32_t *)(untrusted + 9); | ||
253 | uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1; | ||
254 | struct vc4_shader_state *shader_state; | ||
255 | |||
256 | /* Check overflow condition */ | ||
257 | if (exec->shader_state_count == 0) { | ||
258 | DRM_ERROR("shader state must precede primitives\n"); | ||
259 | return -EINVAL; | ||
260 | } | ||
261 | shader_state = &exec->shader_state[exec->shader_state_count - 1]; | ||
262 | |||
263 | if (max_index > shader_state->max_index) | ||
264 | shader_state->max_index = max_index; | ||
265 | |||
266 | ib = vc4_use_handle(exec, 0); | ||
267 | if (!ib) | ||
268 | return -EINVAL; | ||
269 | |||
270 | if (offset > ib->base.size || | ||
271 | (ib->base.size - offset) / index_size < length) { | ||
272 | DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n", | ||
273 | offset, length, index_size, ib->base.size); | ||
274 | return -EINVAL; | ||
275 | } | ||
276 | |||
277 | *(uint32_t *)(validated + 5) = ib->paddr + offset; | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int | ||
283 | validate_gl_array_primitive(VALIDATE_ARGS) | ||
284 | { | ||
285 | uint32_t length = *(uint32_t *)(untrusted + 1); | ||
286 | uint32_t base_index = *(uint32_t *)(untrusted + 5); | ||
287 | uint32_t max_index; | ||
288 | struct vc4_shader_state *shader_state; | ||
289 | |||
290 | /* Check overflow condition */ | ||
291 | if (exec->shader_state_count == 0) { | ||
292 | DRM_ERROR("shader state must precede primitives\n"); | ||
293 | return -EINVAL; | ||
294 | } | ||
295 | shader_state = &exec->shader_state[exec->shader_state_count - 1]; | ||
296 | |||
297 | if (length + base_index < length) { | ||
298 | DRM_ERROR("primitive vertex count overflow\n"); | ||
299 | return -EINVAL; | ||
300 | } | ||
301 | max_index = length + base_index - 1; | ||
302 | |||
303 | if (max_index > shader_state->max_index) | ||
304 | shader_state->max_index = max_index; | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | static int | ||
310 | validate_gl_shader_state(VALIDATE_ARGS) | ||
311 | { | ||
312 | uint32_t i = exec->shader_state_count++; | ||
313 | |||
314 | if (i >= exec->shader_state_size) { | ||
315 | DRM_ERROR("More requests for shader states than declared\n"); | ||
316 | return -EINVAL; | ||
317 | } | ||
318 | |||
319 | exec->shader_state[i].addr = *(uint32_t *)untrusted; | ||
320 | exec->shader_state[i].max_index = 0; | ||
321 | |||
322 | if (exec->shader_state[i].addr & ~0xf) { | ||
323 | DRM_ERROR("high bits set in GL shader rec reference\n"); | ||
324 | return -EINVAL; | ||
325 | } | ||
326 | |||
327 | *(uint32_t *)validated = (exec->shader_rec_p + | ||
328 | exec->shader_state[i].addr); | ||
329 | |||
330 | exec->shader_rec_p += | ||
331 | roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16); | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | static int | ||
337 | validate_tile_binning_config(VALIDATE_ARGS) | ||
338 | { | ||
339 | struct drm_device *dev = exec->exec_bo->base.dev; | ||
340 | struct vc4_bo *tile_bo; | ||
341 | uint8_t flags; | ||
342 | uint32_t tile_state_size, tile_alloc_size; | ||
343 | uint32_t tile_count; | ||
344 | |||
345 | if (exec->found_tile_binning_mode_config_packet) { | ||
346 | DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); | ||
347 | return -EINVAL; | ||
348 | } | ||
349 | exec->found_tile_binning_mode_config_packet = true; | ||
350 | |||
351 | exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); | ||
352 | exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); | ||
353 | tile_count = exec->bin_tiles_x * exec->bin_tiles_y; | ||
354 | flags = *(uint8_t *)(untrusted + 14); | ||
355 | |||
356 | if (exec->bin_tiles_x == 0 || | ||
357 | exec->bin_tiles_y == 0) { | ||
358 | DRM_ERROR("Tile binning config of %dx%d too small\n", | ||
359 | exec->bin_tiles_x, exec->bin_tiles_y); | ||
360 | return -EINVAL; | ||
361 | } | ||
362 | |||
363 | if (flags & (VC4_BIN_CONFIG_DB_NON_MS | | ||
364 | VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) { | ||
365 | DRM_ERROR("unsupported binning config flags 0x%02x\n", flags); | ||
366 | return -EINVAL; | ||
367 | } | ||
368 | |||
369 | /* The tile state data array is 48 bytes per tile, and we put it at | ||
370 | * the start of a BO containing both it and the tile alloc. | ||
371 | */ | ||
372 | tile_state_size = 48 * tile_count; | ||
373 | |||
374 | /* Since the tile alloc array will follow us, align. */ | ||
375 | exec->tile_alloc_offset = roundup(tile_state_size, 4096); | ||
376 | |||
377 | *(uint8_t *)(validated + 14) = | ||
378 | ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | | ||
379 | VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | | ||
380 | VC4_BIN_CONFIG_AUTO_INIT_TSDA | | ||
381 | VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, | ||
382 | VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | | ||
383 | VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, | ||
384 | VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); | ||
385 | |||
386 | /* Initial block size. */ | ||
387 | tile_alloc_size = 32 * tile_count; | ||
388 | |||
389 | /* | ||
390 | * The initial allocation gets rounded to the next 256 bytes before | ||
391 | * the hardware starts fulfilling further allocations. | ||
392 | */ | ||
393 | tile_alloc_size = roundup(tile_alloc_size, 256); | ||
394 | |||
395 | /* Add space for the extra allocations. This is what gets used first, | ||
396 | * before overflow memory. It must have at least 4096 bytes, but we | ||
397 | * want to avoid overflow memory usage if possible. | ||
398 | */ | ||
399 | tile_alloc_size += 1024 * 1024; | ||
400 | |||
401 | tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size, | ||
402 | true); | ||
403 | exec->tile_bo = &tile_bo->base; | ||
404 | if (!exec->tile_bo) | ||
405 | return -ENOMEM; | ||
406 | list_add_tail(&tile_bo->unref_head, &exec->unref_list); | ||
407 | |||
408 | /* tile alloc address. */ | ||
409 | *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + | ||
410 | exec->tile_alloc_offset); | ||
411 | /* tile alloc size. */ | ||
412 | *(uint32_t *)(validated + 4) = tile_alloc_size; | ||
413 | /* tile state address. */ | ||
414 | *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static int | ||
420 | validate_gem_handles(VALIDATE_ARGS) | ||
421 | { | ||
422 | memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); | ||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | #define VC4_DEFINE_PACKET(packet, func) \ | ||
427 | [packet] = { packet ## _SIZE, #packet, func } | ||
428 | |||
429 | static const struct cmd_info { | ||
430 | uint16_t len; | ||
431 | const char *name; | ||
432 | int (*func)(struct vc4_exec_info *exec, void *validated, | ||
433 | void *untrusted); | ||
434 | } cmd_info[] = { | ||
435 | VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL), | ||
436 | VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL), | ||
437 | VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush), | ||
438 | VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL), | ||
439 | VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, | ||
440 | validate_start_tile_binning), | ||
441 | VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, | ||
442 | validate_increment_semaphore), | ||
443 | |||
444 | VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, | ||
445 | validate_indexed_prim_list), | ||
446 | VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, | ||
447 | validate_gl_array_primitive), | ||
448 | |||
449 | VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL), | ||
450 | |||
451 | VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state), | ||
452 | |||
453 | VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL), | ||
454 | VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL), | ||
455 | VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL), | ||
456 | VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL), | ||
457 | VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL), | ||
458 | VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL), | ||
459 | VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL), | ||
460 | VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL), | ||
461 | VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), | ||
462 | /* Note: The docs say this was also 105, but it was 106 in the | ||
463 | * initial userland code drop. | ||
464 | */ | ||
465 | VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL), | ||
466 | |||
467 | VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, | ||
468 | validate_tile_binning_config), | ||
469 | |||
470 | VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles), | ||
471 | }; | ||
472 | |||
473 | int | ||
474 | vc4_validate_bin_cl(struct drm_device *dev, | ||
475 | void *validated, | ||
476 | void *unvalidated, | ||
477 | struct vc4_exec_info *exec) | ||
478 | { | ||
479 | uint32_t len = exec->args->bin_cl_size; | ||
480 | uint32_t dst_offset = 0; | ||
481 | uint32_t src_offset = 0; | ||
482 | |||
483 | while (src_offset < len) { | ||
484 | void *dst_pkt = validated + dst_offset; | ||
485 | void *src_pkt = unvalidated + src_offset; | ||
486 | u8 cmd = *(uint8_t *)src_pkt; | ||
487 | const struct cmd_info *info; | ||
488 | |||
489 | if (cmd >= ARRAY_SIZE(cmd_info)) { | ||
490 | DRM_ERROR("0x%08x: packet %d out of bounds\n", | ||
491 | src_offset, cmd); | ||
492 | return -EINVAL; | ||
493 | } | ||
494 | |||
495 | info = &cmd_info[cmd]; | ||
496 | if (!info->name) { | ||
497 | DRM_ERROR("0x%08x: packet %d invalid\n", | ||
498 | src_offset, cmd); | ||
499 | return -EINVAL; | ||
500 | } | ||
501 | |||
502 | if (src_offset + info->len > len) { | ||
503 | DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " | ||
504 | "exceeds bounds (0x%08x)\n", | ||
505 | src_offset, cmd, info->name, info->len, | ||
506 | src_offset + len); | ||
507 | return -EINVAL; | ||
508 | } | ||
509 | |||
510 | if (cmd != VC4_PACKET_GEM_HANDLES) | ||
511 | memcpy(dst_pkt, src_pkt, info->len); | ||
512 | |||
513 | if (info->func && info->func(exec, | ||
514 | dst_pkt + 1, | ||
515 | src_pkt + 1)) { | ||
516 | DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n", | ||
517 | src_offset, cmd, info->name); | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | |||
521 | src_offset += info->len; | ||
522 | /* GEM handle loading doesn't produce HW packets. */ | ||
523 | if (cmd != VC4_PACKET_GEM_HANDLES) | ||
524 | dst_offset += info->len; | ||
525 | |||
526 | /* When the CL hits halt, it'll stop reading anything else. */ | ||
527 | if (cmd == VC4_PACKET_HALT) | ||
528 | break; | ||
529 | } | ||
530 | |||
531 | exec->ct0ea = exec->ct0ca + dst_offset; | ||
532 | |||
533 | if (!exec->found_start_tile_binning_packet) { | ||
534 | DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The | ||
539 | * semaphore is used to trigger the render CL to start up, and the | ||
540 | * FLUSH is what caps the bin lists with | ||
541 | * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main | ||
542 | * render CL when they get called to) and actually triggers the queued | ||
543 | * semaphore increment. | ||
544 | */ | ||
545 | if (!exec->found_increment_semaphore_packet || !exec->found_flush) { | ||
546 | DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + " | ||
547 | "VC4_PACKET_FLUSH\n"); | ||
548 | return -EINVAL; | ||
549 | } | ||
550 | |||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | static bool | ||
555 | reloc_tex(struct vc4_exec_info *exec, | ||
556 | void *uniform_data_u, | ||
557 | struct vc4_texture_sample_info *sample, | ||
558 | uint32_t texture_handle_index) | ||
559 | |||
560 | { | ||
561 | struct drm_gem_cma_object *tex; | ||
562 | uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); | ||
563 | uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]); | ||
564 | uint32_t p2 = (sample->p_offset[2] != ~0 ? | ||
565 | *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0); | ||
566 | uint32_t p3 = (sample->p_offset[3] != ~0 ? | ||
567 | *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); | ||
568 | uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; | ||
569 | uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; | ||
570 | uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); | ||
571 | uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); | ||
572 | uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); | ||
573 | uint32_t cpp, tiling_format, utile_w, utile_h; | ||
574 | uint32_t i; | ||
575 | uint32_t cube_map_stride = 0; | ||
576 | enum vc4_texture_data_type type; | ||
577 | |||
578 | tex = vc4_use_bo(exec, texture_handle_index); | ||
579 | if (!tex) | ||
580 | return false; | ||
581 | |||
582 | if (sample->is_direct) { | ||
583 | uint32_t remaining_size = tex->base.size - p0; | ||
584 | |||
585 | if (p0 > tex->base.size - 4) { | ||
586 | DRM_ERROR("UBO offset greater than UBO size\n"); | ||
587 | goto fail; | ||
588 | } | ||
589 | if (p1 > remaining_size - 4) { | ||
590 | DRM_ERROR("UBO clamp would allow reads " | ||
591 | "outside of UBO\n"); | ||
592 | goto fail; | ||
593 | } | ||
594 | *validated_p0 = tex->paddr + p0; | ||
595 | return true; | ||
596 | } | ||
597 | |||
598 | if (width == 0) | ||
599 | width = 2048; | ||
600 | if (height == 0) | ||
601 | height = 2048; | ||
602 | |||
603 | if (p0 & VC4_TEX_P0_CMMODE_MASK) { | ||
604 | if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == | ||
605 | VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) | ||
606 | cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; | ||
607 | if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == | ||
608 | VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { | ||
609 | if (cube_map_stride) { | ||
610 | DRM_ERROR("Cube map stride set twice\n"); | ||
611 | goto fail; | ||
612 | } | ||
613 | |||
614 | cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; | ||
615 | } | ||
616 | if (!cube_map_stride) { | ||
617 | DRM_ERROR("Cube map stride not set\n"); | ||
618 | goto fail; | ||
619 | } | ||
620 | } | ||
621 | |||
622 | type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | | ||
623 | (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); | ||
624 | |||
625 | switch (type) { | ||
626 | case VC4_TEXTURE_TYPE_RGBA8888: | ||
627 | case VC4_TEXTURE_TYPE_RGBX8888: | ||
628 | case VC4_TEXTURE_TYPE_RGBA32R: | ||
629 | cpp = 4; | ||
630 | break; | ||
631 | case VC4_TEXTURE_TYPE_RGBA4444: | ||
632 | case VC4_TEXTURE_TYPE_RGBA5551: | ||
633 | case VC4_TEXTURE_TYPE_RGB565: | ||
634 | case VC4_TEXTURE_TYPE_LUMALPHA: | ||
635 | case VC4_TEXTURE_TYPE_S16F: | ||
636 | case VC4_TEXTURE_TYPE_S16: | ||
637 | cpp = 2; | ||
638 | break; | ||
639 | case VC4_TEXTURE_TYPE_LUMINANCE: | ||
640 | case VC4_TEXTURE_TYPE_ALPHA: | ||
641 | case VC4_TEXTURE_TYPE_S8: | ||
642 | cpp = 1; | ||
643 | break; | ||
644 | case VC4_TEXTURE_TYPE_ETC1: | ||
645 | case VC4_TEXTURE_TYPE_BW1: | ||
646 | case VC4_TEXTURE_TYPE_A4: | ||
647 | case VC4_TEXTURE_TYPE_A1: | ||
648 | case VC4_TEXTURE_TYPE_RGBA64: | ||
649 | case VC4_TEXTURE_TYPE_YUV422R: | ||
650 | default: | ||
651 | DRM_ERROR("Texture format %d unsupported\n", type); | ||
652 | goto fail; | ||
653 | } | ||
654 | utile_w = utile_width(cpp); | ||
655 | utile_h = utile_height(cpp); | ||
656 | |||
657 | if (type == VC4_TEXTURE_TYPE_RGBA32R) { | ||
658 | tiling_format = VC4_TILING_FORMAT_LINEAR; | ||
659 | } else { | ||
660 | if (size_is_lt(width, height, cpp)) | ||
661 | tiling_format = VC4_TILING_FORMAT_LT; | ||
662 | else | ||
663 | tiling_format = VC4_TILING_FORMAT_T; | ||
664 | } | ||
665 | |||
666 | if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, | ||
667 | tiling_format, width, height, cpp)) { | ||
668 | goto fail; | ||
669 | } | ||
670 | |||
671 | /* The mipmap levels are stored before the base of the texture. Make | ||
672 | * sure there is actually space in the BO. | ||
673 | */ | ||
674 | for (i = 1; i <= miplevels; i++) { | ||
675 | uint32_t level_width = max(width >> i, 1u); | ||
676 | uint32_t level_height = max(height >> i, 1u); | ||
677 | uint32_t aligned_width, aligned_height; | ||
678 | uint32_t level_size; | ||
679 | |||
680 | /* Once the levels get small enough, they drop from T to LT. */ | ||
681 | if (tiling_format == VC4_TILING_FORMAT_T && | ||
682 | size_is_lt(level_width, level_height, cpp)) { | ||
683 | tiling_format = VC4_TILING_FORMAT_LT; | ||
684 | } | ||
685 | |||
686 | switch (tiling_format) { | ||
687 | case VC4_TILING_FORMAT_T: | ||
688 | aligned_width = round_up(level_width, utile_w * 8); | ||
689 | aligned_height = round_up(level_height, utile_h * 8); | ||
690 | break; | ||
691 | case VC4_TILING_FORMAT_LT: | ||
692 | aligned_width = round_up(level_width, utile_w); | ||
693 | aligned_height = round_up(level_height, utile_h); | ||
694 | break; | ||
695 | default: | ||
696 | aligned_width = round_up(level_width, utile_w); | ||
697 | aligned_height = level_height; | ||
698 | break; | ||
699 | } | ||
700 | |||
701 | level_size = aligned_width * cpp * aligned_height; | ||
702 | |||
703 | if (offset < level_size) { | ||
704 | DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db " | ||
705 | "overflowed buffer bounds (offset %d)\n", | ||
706 | i, level_width, level_height, | ||
707 | aligned_width, aligned_height, | ||
708 | level_size, offset); | ||
709 | goto fail; | ||
710 | } | ||
711 | |||
712 | offset -= level_size; | ||
713 | } | ||
714 | |||
715 | *validated_p0 = tex->paddr + p0; | ||
716 | |||
717 | return true; | ||
718 | fail: | ||
719 | DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); | ||
720 | DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1); | ||
721 | DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2); | ||
722 | DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3); | ||
723 | return false; | ||
724 | } | ||
725 | |||
726 | static int | ||
727 | validate_gl_shader_rec(struct drm_device *dev, | ||
728 | struct vc4_exec_info *exec, | ||
729 | struct vc4_shader_state *state) | ||
730 | { | ||
731 | uint32_t *src_handles; | ||
732 | void *pkt_u, *pkt_v; | ||
733 | static const uint32_t shader_reloc_offsets[] = { | ||
734 | 4, /* fs */ | ||
735 | 16, /* vs */ | ||
736 | 28, /* cs */ | ||
737 | }; | ||
738 | uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets); | ||
739 | struct drm_gem_cma_object *bo[shader_reloc_count + 8]; | ||
740 | uint32_t nr_attributes, nr_relocs, packet_size; | ||
741 | int i; | ||
742 | |||
743 | nr_attributes = state->addr & 0x7; | ||
744 | if (nr_attributes == 0) | ||
745 | nr_attributes = 8; | ||
746 | packet_size = gl_shader_rec_size(state->addr); | ||
747 | |||
748 | nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes; | ||
749 | if (nr_relocs * 4 > exec->shader_rec_size) { | ||
750 | DRM_ERROR("overflowed shader recs reading %d handles " | ||
751 | "from %d bytes left\n", | ||
752 | nr_relocs, exec->shader_rec_size); | ||
753 | return -EINVAL; | ||
754 | } | ||
755 | src_handles = exec->shader_rec_u; | ||
756 | exec->shader_rec_u += nr_relocs * 4; | ||
757 | exec->shader_rec_size -= nr_relocs * 4; | ||
758 | |||
759 | if (packet_size > exec->shader_rec_size) { | ||
760 | DRM_ERROR("overflowed shader recs copying %db packet " | ||
761 | "from %d bytes left\n", | ||
762 | packet_size, exec->shader_rec_size); | ||
763 | return -EINVAL; | ||
764 | } | ||
765 | pkt_u = exec->shader_rec_u; | ||
766 | pkt_v = exec->shader_rec_v; | ||
767 | memcpy(pkt_v, pkt_u, packet_size); | ||
768 | exec->shader_rec_u += packet_size; | ||
769 | /* Shader recs have to be aligned to 16 bytes (due to the attribute | ||
770 | * flags being in the low bytes), so round the next validated shader | ||
771 | * rec address up. This should be safe, since we've got so many | ||
772 | * relocations in a shader rec packet. | ||
773 | */ | ||
774 | BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4); | ||
775 | exec->shader_rec_v += roundup(packet_size, 16); | ||
776 | exec->shader_rec_size -= packet_size; | ||
777 | |||
778 | if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) { | ||
779 | DRM_ERROR("Multi-threaded fragment shaders not supported.\n"); | ||
780 | return -EINVAL; | ||
781 | } | ||
782 | |||
783 | for (i = 0; i < shader_reloc_count; i++) { | ||
784 | if (src_handles[i] > exec->bo_count) { | ||
785 | DRM_ERROR("Shader handle %d too big\n", src_handles[i]); | ||
786 | return -EINVAL; | ||
787 | } | ||
788 | |||
789 | bo[i] = exec->bo[src_handles[i]]; | ||
790 | if (!bo[i]) | ||
791 | return -EINVAL; | ||
792 | } | ||
793 | for (i = shader_reloc_count; i < nr_relocs; i++) { | ||
794 | bo[i] = vc4_use_bo(exec, src_handles[i]); | ||
795 | if (!bo[i]) | ||
796 | return -EINVAL; | ||
797 | } | ||
798 | |||
799 | for (i = 0; i < shader_reloc_count; i++) { | ||
800 | struct vc4_validated_shader_info *validated_shader; | ||
801 | uint32_t o = shader_reloc_offsets[i]; | ||
802 | uint32_t src_offset = *(uint32_t *)(pkt_u + o); | ||
803 | uint32_t *texture_handles_u; | ||
804 | void *uniform_data_u; | ||
805 | uint32_t tex; | ||
806 | |||
807 | *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; | ||
808 | |||
809 | if (src_offset != 0) { | ||
810 | DRM_ERROR("Shaders must be at offset 0 of " | ||
811 | "the BO.\n"); | ||
812 | return -EINVAL; | ||
813 | } | ||
814 | |||
815 | validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; | ||
816 | if (!validated_shader) | ||
817 | return -EINVAL; | ||
818 | |||
819 | if (validated_shader->uniforms_src_size > | ||
820 | exec->uniforms_size) { | ||
821 | DRM_ERROR("Uniforms src buffer overflow\n"); | ||
822 | return -EINVAL; | ||
823 | } | ||
824 | |||
825 | texture_handles_u = exec->uniforms_u; | ||
826 | uniform_data_u = (texture_handles_u + | ||
827 | validated_shader->num_texture_samples); | ||
828 | |||
829 | memcpy(exec->uniforms_v, uniform_data_u, | ||
830 | validated_shader->uniforms_size); | ||
831 | |||
832 | for (tex = 0; | ||
833 | tex < validated_shader->num_texture_samples; | ||
834 | tex++) { | ||
835 | if (!reloc_tex(exec, | ||
836 | uniform_data_u, | ||
837 | &validated_shader->texture_samples[tex], | ||
838 | texture_handles_u[tex])) { | ||
839 | return -EINVAL; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; | ||
844 | |||
845 | exec->uniforms_u += validated_shader->uniforms_src_size; | ||
846 | exec->uniforms_v += validated_shader->uniforms_size; | ||
847 | exec->uniforms_p += validated_shader->uniforms_size; | ||
848 | } | ||
849 | |||
850 | for (i = 0; i < nr_attributes; i++) { | ||
851 | struct drm_gem_cma_object *vbo = | ||
852 | bo[ARRAY_SIZE(shader_reloc_offsets) + i]; | ||
853 | uint32_t o = 36 + i * 8; | ||
854 | uint32_t offset = *(uint32_t *)(pkt_u + o + 0); | ||
855 | uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1; | ||
856 | uint32_t stride = *(uint8_t *)(pkt_u + o + 5); | ||
857 | uint32_t max_index; | ||
858 | |||
859 | if (state->addr & 0x8) | ||
860 | stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; | ||
861 | |||
862 | if (vbo->base.size < offset || | ||
863 | vbo->base.size - offset < attr_size) { | ||
864 | DRM_ERROR("BO offset overflow (%d + %d > %d)\n", | ||
865 | offset, attr_size, vbo->base.size); | ||
866 | return -EINVAL; | ||
867 | } | ||
868 | |||
869 | if (stride != 0) { | ||
870 | max_index = ((vbo->base.size - offset - attr_size) / | ||
871 | stride); | ||
872 | if (state->max_index > max_index) { | ||
873 | DRM_ERROR("primitives use index %d out of " | ||
874 | "supplied %d\n", | ||
875 | state->max_index, max_index); | ||
876 | return -EINVAL; | ||
877 | } | ||
878 | } | ||
879 | |||
880 | *(uint32_t *)(pkt_v + o) = vbo->paddr + offset; | ||
881 | } | ||
882 | |||
883 | return 0; | ||
884 | } | ||
885 | |||
886 | int | ||
887 | vc4_validate_shader_recs(struct drm_device *dev, | ||
888 | struct vc4_exec_info *exec) | ||
889 | { | ||
890 | uint32_t i; | ||
891 | int ret = 0; | ||
892 | |||
893 | for (i = 0; i < exec->shader_state_count; i++) { | ||
894 | ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]); | ||
895 | if (ret) | ||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | return ret; | ||
900 | } | ||
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 74de18416be9..fe4161bc93ae 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h | |||
@@ -26,14 +26,155 @@ | |||
26 | 26 | ||
27 | #include "drm.h" | 27 | #include "drm.h" |
28 | 28 | ||
29 | #define DRM_VC4_SUBMIT_CL 0x00 | ||
30 | #define DRM_VC4_WAIT_SEQNO 0x01 | ||
31 | #define DRM_VC4_WAIT_BO 0x02 | ||
29 | #define DRM_VC4_CREATE_BO 0x03 | 32 | #define DRM_VC4_CREATE_BO 0x03 |
30 | #define DRM_VC4_MMAP_BO 0x04 | 33 | #define DRM_VC4_MMAP_BO 0x04 |
31 | #define DRM_VC4_CREATE_SHADER_BO 0x05 | 34 | #define DRM_VC4_CREATE_SHADER_BO 0x05 |
32 | 35 | ||
36 | #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) | ||
37 | #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) | ||
38 | #define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) | ||
33 | #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) | 39 | #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) |
34 | #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) | 40 | #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) |
35 | #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) | 41 | #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) |
36 | 42 | ||
43 | struct drm_vc4_submit_rcl_surface { | ||
44 | __u32 hindex; /* Handle index, or ~0 if not present. */ | ||
45 | __u32 offset; /* Offset to start of buffer. */ | ||
46 | /* | ||
47 | * Bits for either render config (color_write) or load/store packet. | ||
48 | * Bits should all be 0 for MSAA load/stores. | ||
49 | */ | ||
50 | __u16 bits; | ||
51 | |||
52 | #define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0) | ||
53 | __u16 flags; | ||
54 | }; | ||
55 | |||
56 | /** | ||
57 | * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D | ||
58 | * engine. | ||
59 | * | ||
60 | * Drivers typically use GPU BOs to store batchbuffers / command lists and | ||
61 | * their associated state. However, because the VC4 lacks an MMU, we have to | ||
62 | * do validation of memory accesses by the GPU commands. If we were to store | ||
63 | * our commands in BOs, we'd need to do uncached readback from them to do the | ||
64 | * validation process, which is too expensive. Instead, userspace accumulates | ||
65 | * commands and associated state in plain memory, then the kernel copies the | ||
66 | * data to its own address space, and then validates and stores it in a GPU | ||
67 | * BO. | ||
68 | */ | ||
69 | struct drm_vc4_submit_cl { | ||
70 | /* Pointer to the binner command list. | ||
71 | * | ||
72 | * This is the first set of commands executed, which runs the | ||
73 | * coordinate shader to determine where primitives land on the screen, | ||
74 | * then writes out the state updates and draw calls necessary per tile | ||
75 | * to the tile allocation BO. | ||
76 | */ | ||
77 | __u64 bin_cl; | ||
78 | |||
79 | /* Pointer to the shader records. | ||
80 | * | ||
81 | * Shader records are the structures read by the hardware that contain | ||
82 | * pointers to uniforms, shaders, and vertex attributes. The | ||
83 | * reference to the shader record has enough information to determine | ||
84 | * how many pointers are necessary (fixed number for shaders/uniforms, | ||
85 | * and an attribute count), so those BO indices into bo_handles are | ||
86 | * just stored as __u32s before each shader record passed in. | ||
87 | */ | ||
88 | __u64 shader_rec; | ||
89 | |||
90 | /* Pointer to uniform data and texture handles for the textures | ||
91 | * referenced by the shader. | ||
92 | * | ||
93 | * For each shader state record, there is a set of uniform data in the | ||
94 | * order referenced by the record (FS, VS, then CS). Each set of | ||
95 | * uniform data has a __u32 index into bo_handles per texture | ||
96 | * sample operation, in the order the QPU_W_TMUn_S writes appear in | ||
97 | * the program. Following the texture BO handle indices is the actual | ||
98 | * uniform data. | ||
99 | * | ||
100 | * The individual uniform state blocks don't have sizes passed in, | ||
101 | * because the kernel has to determine the sizes anyway during shader | ||
102 | * code validation. | ||
103 | */ | ||
104 | __u64 uniforms; | ||
105 | __u64 bo_handles; | ||
106 | |||
107 | /* Size in bytes of the binner command list. */ | ||
108 | __u32 bin_cl_size; | ||
109 | /* Size in bytes of the set of shader records. */ | ||
110 | __u32 shader_rec_size; | ||
111 | /* Number of shader records. | ||
112 | * | ||
113 | * This could just be computed from the contents of shader_records and | ||
114 | * the address bits of references to them from the bin CL, but it | ||
115 | * keeps the kernel from having to resize some allocations it makes. | ||
116 | */ | ||
117 | __u32 shader_rec_count; | ||
118 | /* Size in bytes of the uniform state. */ | ||
119 | __u32 uniforms_size; | ||
120 | |||
121 | /* Number of BO handles passed in (size is that times 4). */ | ||
122 | __u32 bo_handle_count; | ||
123 | |||
124 | /* RCL setup: */ | ||
125 | __u16 width; | ||
126 | __u16 height; | ||
127 | __u8 min_x_tile; | ||
128 | __u8 min_y_tile; | ||
129 | __u8 max_x_tile; | ||
130 | __u8 max_y_tile; | ||
131 | struct drm_vc4_submit_rcl_surface color_read; | ||
132 | struct drm_vc4_submit_rcl_surface color_write; | ||
133 | struct drm_vc4_submit_rcl_surface zs_read; | ||
134 | struct drm_vc4_submit_rcl_surface zs_write; | ||
135 | struct drm_vc4_submit_rcl_surface msaa_color_write; | ||
136 | struct drm_vc4_submit_rcl_surface msaa_zs_write; | ||
137 | __u32 clear_color[2]; | ||
138 | __u32 clear_z; | ||
139 | __u8 clear_s; | ||
140 | |||
141 | __u32 pad:24; | ||
142 | |||
143 | #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) | ||
144 | __u32 flags; | ||
145 | |||
146 | /* Returned value of the seqno of this render job (for the | ||
147 | * wait ioctl). | ||
148 | */ | ||
149 | __u64 seqno; | ||
150 | }; | ||
151 | |||
152 | /** | ||
153 | * struct drm_vc4_wait_seqno - ioctl argument for waiting for | ||
154 | * DRM_VC4_SUBMIT_CL completion using its returned seqno. | ||
155 | * | ||
156 | * timeout_ns is the timeout in nanoseconds, where "0" means "don't | ||
157 | * block, just return the status." | ||
158 | */ | ||
159 | struct drm_vc4_wait_seqno { | ||
160 | __u64 seqno; | ||
161 | __u64 timeout_ns; | ||
162 | }; | ||
163 | |||
164 | /** | ||
165 | * struct drm_vc4_wait_bo - ioctl argument for waiting for | ||
166 | * completion of the last DRM_VC4_SUBMIT_CL on a BO. | ||
167 | * | ||
168 | * This is useful for cases where multiple processes might be | ||
169 | * rendering to a BO and you want to wait for all rendering to be | ||
170 | * completed. | ||
171 | */ | ||
172 | struct drm_vc4_wait_bo { | ||
173 | __u32 handle; | ||
174 | __u32 pad; | ||
175 | __u64 timeout_ns; | ||
176 | }; | ||
177 | |||
37 | /** | 178 | /** |
38 | * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. | 179 | * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. |
39 | * | 180 | * |