aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4/vc4_v3d.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_v3d.c')
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c180
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 7cc346ad9b0b..c53afec34586 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -16,6 +16,7 @@
16 * this program. If not, see <http://www.gnu.org/licenses/>. 16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */ 17 */
18 18
19#include "linux/clk.h"
19#include "linux/component.h" 20#include "linux/component.h"
20#include "linux/pm_runtime.h" 21#include "linux/pm_runtime.h"
21#include "vc4_drv.h" 22#include "vc4_drv.h"
@@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev)
156 V3D_WRITE(V3D_VPMBASE, 0); 157 V3D_WRITE(V3D_VPMBASE, 0);
157} 158}
158 159
160int vc4_v3d_get_bin_slot(struct vc4_dev *vc4)
161{
162 struct drm_device *dev = vc4->dev;
163 unsigned long irqflags;
164 int slot;
165 uint64_t seqno = 0;
166 struct vc4_exec_info *exec;
167
168try_again:
169 spin_lock_irqsave(&vc4->job_lock, irqflags);
170 slot = ffs(~vc4->bin_alloc_used);
171 if (slot != 0) {
172 /* Switch from ffs() bit index to a 0-based index. */
173 slot--;
174 vc4->bin_alloc_used |= BIT(slot);
175 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
176 return slot;
177 }
178
179 /* Couldn't find an open slot. Wait for render to complete
180 * and try again.
181 */
182 exec = vc4_last_render_job(vc4);
183 if (exec)
184 seqno = exec->seqno;
185 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
186
187 if (seqno) {
188 int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true);
189
190 if (ret == 0)
191 goto try_again;
192
193 return ret;
194 }
195
196 return -ENOMEM;
197}
198
199/**
200 * vc4_allocate_bin_bo() - allocates the memory that will be used for
201 * tile binning.
202 *
203 * The binner has a limitation that the addresses in the tile state
204 * buffer that point into the tile alloc buffer or binner overflow
205 * memory only have 28 bits (256MB), and the top 4 on the bus for
206 * tile alloc references end up coming from the tile state buffer's
207 * address.
208 *
209 * To work around this, we allocate a single large buffer while V3D is
210 * in use, make sure that it has the top 4 bits constant across its
211 * entire extent, and then put the tile state, tile alloc, and binner
212 * overflow memory inside that buffer.
213 *
214 * This creates a limitation where we may not be able to execute a job
215 * if it doesn't fit within the buffer that we allocated up front.
216 * However, it turns out that 16MB is "enough for anybody", and
217 * real-world applications run into allocation failures from the
218 * overall CMA pool before they make scenes complicated enough to run
219 * out of bin space.
220 */
221int
222vc4_allocate_bin_bo(struct drm_device *drm)
223{
224 struct vc4_dev *vc4 = to_vc4_dev(drm);
225 struct vc4_v3d *v3d = vc4->v3d;
226 uint32_t size = 16 * 1024 * 1024;
227 int ret = 0;
228 struct list_head list;
229
230 /* We may need to try allocating more than once to get a BO
231 * that doesn't cross 256MB. Track the ones we've allocated
232 * that failed so far, so that we can free them when we've got
233 * one that succeeded (if we freed them right away, our next
234 * allocation would probably be the same chunk of memory).
235 */
236 INIT_LIST_HEAD(&list);
237
238 while (true) {
239 struct vc4_bo *bo = vc4_bo_create(drm, size, true);
240
241 if (IS_ERR(bo)) {
242 ret = PTR_ERR(bo);
243
244 dev_err(&v3d->pdev->dev,
245 "Failed to allocate memory for tile binning: "
246 "%d. You may need to enable CMA or give it "
247 "more memory.",
248 ret);
249 break;
250 }
251
252 /* Check if this BO won't trigger the addressing bug. */
253 if ((bo->base.paddr & 0xf0000000) ==
254 ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) {
255 vc4->bin_bo = bo;
256
257 /* Set up for allocating 512KB chunks of
258 * binner memory. The biggest allocation we
259 * need to do is for the initial tile alloc +
260 * tile state buffer. We can render to a
261 * maximum of ((2048*2048) / (32*32) = 4096
262 * tiles in a frame (until we do floating
263 * point rendering, at which point it would be
264 * 8192). Tile state is 48b/tile (rounded to
265 * a page), and tile alloc is 32b/tile
266 * (rounded to a page), plus a page of extra,
267 * for a total of 320kb for our worst-case.
268 * We choose 512kb so that it divides evenly
269 * into our 16MB, and the rest of the 512kb
270 * will be used as storage for the overflow
271 * from the initial 32b CL per bin.
272 */
273 vc4->bin_alloc_size = 512 * 1024;
274 vc4->bin_alloc_used = 0;
275 vc4->bin_alloc_overflow = 0;
276 WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
277 bo->base.base.size / vc4->bin_alloc_size);
278
279 break;
280 }
281
282 /* Put it on the list to free later, and try again. */
283 list_add(&bo->unref_head, &list);
284 }
285
286 /* Free all the BOs we allocated but didn't choose. */
287 while (!list_empty(&list)) {
288 struct vc4_bo *bo = list_last_entry(&list,
289 struct vc4_bo, unref_head);
290
291 list_del(&bo->unref_head);
292 drm_gem_object_put_unlocked(&bo->base.base);
293 }
294
295 return ret;
296}
297
159#ifdef CONFIG_PM 298#ifdef CONFIG_PM
160static int vc4_v3d_runtime_suspend(struct device *dev) 299static int vc4_v3d_runtime_suspend(struct device *dev)
161{ 300{
@@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev)
164 303
165 vc4_irq_uninstall(vc4->dev); 304 vc4_irq_uninstall(vc4->dev);
166 305
306 drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
307 vc4->bin_bo = NULL;
308
309 clk_disable_unprepare(v3d->clk);
310
167 return 0; 311 return 0;
168} 312}
169 313
@@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev)
171{ 315{
172 struct vc4_v3d *v3d = dev_get_drvdata(dev); 316 struct vc4_v3d *v3d = dev_get_drvdata(dev);
173 struct vc4_dev *vc4 = v3d->vc4; 317 struct vc4_dev *vc4 = v3d->vc4;
318 int ret;
319
320 ret = vc4_allocate_bin_bo(vc4->dev);
321 if (ret)
322 return ret;
323
324 ret = clk_prepare_enable(v3d->clk);
325 if (ret != 0)
326 return ret;
174 327
175 vc4_v3d_init_hw(vc4->dev); 328 vc4_v3d_init_hw(vc4->dev);
176 vc4_irq_postinstall(vc4->dev); 329 vc4_irq_postinstall(vc4->dev);
@@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
202 vc4->v3d = v3d; 355 vc4->v3d = v3d;
203 v3d->vc4 = vc4; 356 v3d->vc4 = vc4;
204 357
358 v3d->clk = devm_clk_get(dev, NULL);
359 if (IS_ERR(v3d->clk)) {
360 int ret = PTR_ERR(v3d->clk);
361
362 if (ret == -ENOENT) {
363 /* bcm2835 didn't have a clock reference in the DT. */
364 ret = 0;
365 v3d->clk = NULL;
366 } else {
367 if (ret != -EPROBE_DEFER)
368 dev_err(dev, "Failed to get V3D clock: %d\n",
369 ret);
370 return ret;
371 }
372 }
373
205 if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { 374 if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
206 DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", 375 DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
207 V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); 376 V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
208 return -EINVAL; 377 return -EINVAL;
209 } 378 }
210 379
380 ret = clk_prepare_enable(v3d->clk);
381 if (ret != 0)
382 return ret;
383
384 ret = vc4_allocate_bin_bo(drm);
385 if (ret) {
386 clk_disable_unprepare(v3d->clk);
387 return ret;
388 }
389
211 /* Reset the binner overflow address/size at setup, to be sure 390 /* Reset the binner overflow address/size at setup, to be sure
212 * we don't reuse an old one. 391 * we don't reuse an old one.
213 */ 392 */
@@ -271,6 +450,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
271 450
272static const struct of_device_id vc4_v3d_dt_match[] = { 451static const struct of_device_id vc4_v3d_dt_match[] = {
273 { .compatible = "brcm,bcm2835-v3d" }, 452 { .compatible = "brcm,bcm2835-v3d" },
453 { .compatible = "brcm,cygnus-v3d" },
274 { .compatible = "brcm,vc4-v3d" }, 454 { .compatible = "brcm,vc4-v3d" },
275 {} 455 {}
276}; 456};