diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_v3d.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..c53afec34586 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * this program. If not, see <http://www.gnu.org/licenses/>. | 16 | * this program. If not, see <http://www.gnu.org/licenses/>. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include "linux/clk.h" | ||
19 | #include "linux/component.h" | 20 | #include "linux/component.h" |
20 | #include "linux/pm_runtime.h" | 21 | #include "linux/pm_runtime.h" |
21 | #include "vc4_drv.h" | 22 | #include "vc4_drv.h" |
@@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev) | |||
156 | V3D_WRITE(V3D_VPMBASE, 0); | 157 | V3D_WRITE(V3D_VPMBASE, 0); |
157 | } | 158 | } |
158 | 159 | ||
160 | int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) | ||
161 | { | ||
162 | struct drm_device *dev = vc4->dev; | ||
163 | unsigned long irqflags; | ||
164 | int slot; | ||
165 | uint64_t seqno = 0; | ||
166 | struct vc4_exec_info *exec; | ||
167 | |||
168 | try_again: | ||
169 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
170 | slot = ffs(~vc4->bin_alloc_used); | ||
171 | if (slot != 0) { | ||
172 | /* Switch from ffs() bit index to a 0-based index. */ | ||
173 | slot--; | ||
174 | vc4->bin_alloc_used |= BIT(slot); | ||
175 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
176 | return slot; | ||
177 | } | ||
178 | |||
179 | /* Couldn't find an open slot. Wait for render to complete | ||
180 | * and try again. | ||
181 | */ | ||
182 | exec = vc4_last_render_job(vc4); | ||
183 | if (exec) | ||
184 | seqno = exec->seqno; | ||
185 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
186 | |||
187 | if (seqno) { | ||
188 | int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true); | ||
189 | |||
190 | if (ret == 0) | ||
191 | goto try_again; | ||
192 | |||
193 | return ret; | ||
194 | } | ||
195 | |||
196 | return -ENOMEM; | ||
197 | } | ||
198 | |||
199 | /** | ||
200 | * vc4_allocate_bin_bo() - allocates the memory that will be used for | ||
201 | * tile binning. | ||
202 | * | ||
203 | * The binner has a limitation that the addresses in the tile state | ||
204 | * buffer that point into the tile alloc buffer or binner overflow | ||
205 | * memory only have 28 bits (256MB), and the top 4 on the bus for | ||
206 | * tile alloc references end up coming from the tile state buffer's | ||
207 | * address. | ||
208 | * | ||
209 | * To work around this, we allocate a single large buffer while V3D is | ||
210 | * in use, make sure that it has the top 4 bits constant across its | ||
211 | * entire extent, and then put the tile state, tile alloc, and binner | ||
212 | * overflow memory inside that buffer. | ||
213 | * | ||
214 | * This creates a limitation where we may not be able to execute a job | ||
215 | * if it doesn't fit within the buffer that we allocated up front. | ||
216 | * However, it turns out that 16MB is "enough for anybody", and | ||
217 | * real-world applications run into allocation failures from the | ||
218 | * overall CMA pool before they make scenes complicated enough to run | ||
219 | * out of bin space. | ||
220 | */ | ||
221 | int | ||
222 | vc4_allocate_bin_bo(struct drm_device *drm) | ||
223 | { | ||
224 | struct vc4_dev *vc4 = to_vc4_dev(drm); | ||
225 | struct vc4_v3d *v3d = vc4->v3d; | ||
226 | uint32_t size = 16 * 1024 * 1024; | ||
227 | int ret = 0; | ||
228 | struct list_head list; | ||
229 | |||
230 | /* We may need to try allocating more than once to get a BO | ||
231 | * that doesn't cross 256MB. Track the ones we've allocated | ||
232 | * that failed so far, so that we can free them when we've got | ||
233 | * one that succeeded (if we freed them right away, our next | ||
234 | * allocation would probably be the same chunk of memory). | ||
235 | */ | ||
236 | INIT_LIST_HEAD(&list); | ||
237 | |||
238 | while (true) { | ||
239 | struct vc4_bo *bo = vc4_bo_create(drm, size, true); | ||
240 | |||
241 | if (IS_ERR(bo)) { | ||
242 | ret = PTR_ERR(bo); | ||
243 | |||
244 | dev_err(&v3d->pdev->dev, | ||
245 | "Failed to allocate memory for tile binning: " | ||
246 | "%d. You may need to enable CMA or give it " | ||
247 | "more memory.", | ||
248 | ret); | ||
249 | break; | ||
250 | } | ||
251 | |||
252 | /* Check if this BO won't trigger the addressing bug. */ | ||
253 | if ((bo->base.paddr & 0xf0000000) == | ||
254 | ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) { | ||
255 | vc4->bin_bo = bo; | ||
256 | |||
257 | /* Set up for allocating 512KB chunks of | ||
258 | * binner memory. The biggest allocation we | ||
259 | * need to do is for the initial tile alloc + | ||
260 | * tile state buffer. We can render to a | ||
261 | * maximum of ((2048*2048) / (32*32) = 4096 | ||
262 | * tiles in a frame (until we do floating | ||
263 | * point rendering, at which point it would be | ||
264 | * 8192). Tile state is 48b/tile (rounded to | ||
265 | * a page), and tile alloc is 32b/tile | ||
266 | * (rounded to a page), plus a page of extra, | ||
267 | * for a total of 320kb for our worst-case. | ||
268 | * We choose 512kb so that it divides evenly | ||
269 | * into our 16MB, and the rest of the 512kb | ||
270 | * will be used as storage for the overflow | ||
271 | * from the initial 32b CL per bin. | ||
272 | */ | ||
273 | vc4->bin_alloc_size = 512 * 1024; | ||
274 | vc4->bin_alloc_used = 0; | ||
275 | vc4->bin_alloc_overflow = 0; | ||
276 | WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != | ||
277 | bo->base.base.size / vc4->bin_alloc_size); | ||
278 | |||
279 | break; | ||
280 | } | ||
281 | |||
282 | /* Put it on the list to free later, and try again. */ | ||
283 | list_add(&bo->unref_head, &list); | ||
284 | } | ||
285 | |||
286 | /* Free all the BOs we allocated but didn't choose. */ | ||
287 | while (!list_empty(&list)) { | ||
288 | struct vc4_bo *bo = list_last_entry(&list, | ||
289 | struct vc4_bo, unref_head); | ||
290 | |||
291 | list_del(&bo->unref_head); | ||
292 | drm_gem_object_put_unlocked(&bo->base.base); | ||
293 | } | ||
294 | |||
295 | return ret; | ||
296 | } | ||
297 | |||
159 | #ifdef CONFIG_PM | 298 | #ifdef CONFIG_PM |
160 | static int vc4_v3d_runtime_suspend(struct device *dev) | 299 | static int vc4_v3d_runtime_suspend(struct device *dev) |
161 | { | 300 | { |
@@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev) | |||
164 | 303 | ||
165 | vc4_irq_uninstall(vc4->dev); | 304 | vc4_irq_uninstall(vc4->dev); |
166 | 305 | ||
306 | drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); | ||
307 | vc4->bin_bo = NULL; | ||
308 | |||
309 | clk_disable_unprepare(v3d->clk); | ||
310 | |||
167 | return 0; | 311 | return 0; |
168 | } | 312 | } |
169 | 313 | ||
@@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev) | |||
171 | { | 315 | { |
172 | struct vc4_v3d *v3d = dev_get_drvdata(dev); | 316 | struct vc4_v3d *v3d = dev_get_drvdata(dev); |
173 | struct vc4_dev *vc4 = v3d->vc4; | 317 | struct vc4_dev *vc4 = v3d->vc4; |
318 | int ret; | ||
319 | |||
320 | ret = vc4_allocate_bin_bo(vc4->dev); | ||
321 | if (ret) | ||
322 | return ret; | ||
323 | |||
324 | ret = clk_prepare_enable(v3d->clk); | ||
325 | if (ret != 0) | ||
326 | return ret; | ||
174 | 327 | ||
175 | vc4_v3d_init_hw(vc4->dev); | 328 | vc4_v3d_init_hw(vc4->dev); |
176 | vc4_irq_postinstall(vc4->dev); | 329 | vc4_irq_postinstall(vc4->dev); |
@@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) | |||
202 | vc4->v3d = v3d; | 355 | vc4->v3d = v3d; |
203 | v3d->vc4 = vc4; | 356 | v3d->vc4 = vc4; |
204 | 357 | ||
358 | v3d->clk = devm_clk_get(dev, NULL); | ||
359 | if (IS_ERR(v3d->clk)) { | ||
360 | int ret = PTR_ERR(v3d->clk); | ||
361 | |||
362 | if (ret == -ENOENT) { | ||
363 | /* bcm2835 didn't have a clock reference in the DT. */ | ||
364 | ret = 0; | ||
365 | v3d->clk = NULL; | ||
366 | } else { | ||
367 | if (ret != -EPROBE_DEFER) | ||
368 | dev_err(dev, "Failed to get V3D clock: %d\n", | ||
369 | ret); | ||
370 | return ret; | ||
371 | } | ||
372 | } | ||
373 | |||
205 | if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { | 374 | if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { |
206 | DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", | 375 | DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", |
207 | V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); | 376 | V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); |
208 | return -EINVAL; | 377 | return -EINVAL; |
209 | } | 378 | } |
210 | 379 | ||
380 | ret = clk_prepare_enable(v3d->clk); | ||
381 | if (ret != 0) | ||
382 | return ret; | ||
383 | |||
384 | ret = vc4_allocate_bin_bo(drm); | ||
385 | if (ret) { | ||
386 | clk_disable_unprepare(v3d->clk); | ||
387 | return ret; | ||
388 | } | ||
389 | |||
211 | /* Reset the binner overflow address/size at setup, to be sure | 390 | /* Reset the binner overflow address/size at setup, to be sure |
212 | * we don't reuse an old one. | 391 | * we don't reuse an old one. |
213 | */ | 392 | */ |
@@ -271,6 +450,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) | |||
271 | 450 | ||
272 | static const struct of_device_id vc4_v3d_dt_match[] = { | 451 | static const struct of_device_id vc4_v3d_dt_match[] = { |
273 | { .compatible = "brcm,bcm2835-v3d" }, | 452 | { .compatible = "brcm,bcm2835-v3d" }, |
453 | { .compatible = "brcm,cygnus-v3d" }, | ||
274 | { .compatible = "brcm,vc4-v3d" }, | 454 | { .compatible = "brcm,vc4-v3d" }, |
275 | {} | 455 | {} |
276 | }; | 456 | }; |