summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c110
1 files changed, 6 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 49a1c1a8..095f69c7 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1308,103 +1308,6 @@ enum programs {
1308/* maximum number of WRITE_PATCHes in the below function */ 1308/* maximum number of WRITE_PATCHes in the below function */
1309#define MAX_CDE_LAUNCH_PATCHES 32 1309#define MAX_CDE_LAUNCH_PATCHES 32
1310 1310
1311static int gk20a_buffer_convert_gpu_to_cde_v0(
1312 struct gk20a *g,
1313 struct dma_buf *dmabuf, u32 consumer,
1314 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1315 u32 width, u32 height, u32 block_height_log2,
1316 u32 submit_flags, struct nvgpu_fence *fence_in,
1317 struct gk20a_buffer_state *state)
1318{
1319 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1320 int param = 0;
1321 int err = 0;
1322 struct gk20a_fence *new_fence = NULL;
1323 const int wgx = 8;
1324 const int wgy = 8;
1325 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1326 const int xalign = compbits_per_byte * wgx;
1327 const int yalign = wgy;
1328
1329 /* firmware v0 needs to call swizzling twice */
1330 int i;
1331 for (i = 0; i < 2; i++) {
1332 /* Compute per launch parameters */
1333 const bool vpass = (i == 1);
1334 const int transposed_width = vpass ? height : width;
1335 const int transposed_height = vpass ? width : height;
1336 const int xtiles = (transposed_width + 7) >> 3;
1337 const int ytiles = (transposed_height + 7) >> 3;
1338 const int gridw = roundup(xtiles, xalign) / xalign;
1339 const int gridh = roundup(ytiles, yalign) / yalign;
1340 const int flags = (vpass ? 4 : 0) |
1341 g->cde_app.shader_parameter;
1342 const int dst_stride = 128; /* chip constant */
1343
1344 if ((vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEV)) ||
1345 (!vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEH)))
1346 continue;
1347
1348 if (xtiles > 4096 / 8 || ytiles > 4096 / 8)
1349 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1350 xtiles, ytiles);
1351
1352 gk20a_dbg(gpu_dbg_cde, "pass=%c", vpass ? 'V' : 'H');
1353 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx",
1354 width, height, block_height_log2,
1355 compbits_hoffset, compbits_voffset);
1356 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1357 width, height, xtiles, ytiles);
1358 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)",
1359 wgx, wgy, gridw, gridh);
1360
1361 /* Write parameters */
1362#define WRITE_PATCH(NAME, VALUE) \
1363 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1364 param = 0;
1365 WRITE_PATCH(PATCH_USER_CONST_XTILES, xtiles);
1366 WRITE_PATCH(PATCH_USER_CONST_YTILES, ytiles);
1367 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1368 block_height_log2);
1369 WRITE_PATCH(PATCH_USER_CONST_DSTPITCH, dst_stride);
1370 WRITE_PATCH(PATCH_H_USER_CONST_FLAGS, flags);
1371 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw);
1372 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh);
1373 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1374 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1375 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1376 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1377 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw);
1378 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh);
1379 WRITE_PATCH(PATCH_QMD_CTA_RASTER_DEPTH, 1);
1380 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1381 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1382 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION2, 1);
1383#undef WRITE_PATCH
1384
1385 err = gk20a_cde_convert(g, dmabuf,
1386 0, /* dst kind */
1387 vpass ?
1388 compbits_voffset :
1389 compbits_hoffset,
1390 0, /* dst_size, 0 = auto */
1391 fence_in, submit_flags,
1392 params, param,
1393 &new_fence);
1394 if (err)
1395 goto out;
1396
1397 /* compbits generated, update state & fence */
1398 gk20a_fence_put(state->fence);
1399 state->fence = new_fence;
1400 state->valid_compbits |= vpass ?
1401 NVGPU_GPU_COMPBITS_CDEV :
1402 NVGPU_GPU_COMPBITS_CDEH;
1403 }
1404out:
1405 return err;
1406}
1407
1408static int gk20a_buffer_convert_gpu_to_cde_v1( 1311static int gk20a_buffer_convert_gpu_to_cde_v1(
1409 struct gk20a *g, 1312 struct gk20a *g,
1410 struct dma_buf *dmabuf, u32 consumer, 1313 struct dma_buf *dmabuf, u32 consumer,
@@ -1446,7 +1349,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1446 PROG_VPASS_SMALL_DEBUG; 1349 PROG_VPASS_SMALL_DEBUG;
1447 } 1350 }
1448 1351
1449 if (xtiles > 4096 / 8 || ytiles > 4096 / 8) 1352 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1450 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", 1353 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1451 xtiles, ytiles); 1354 xtiles, ytiles);
1452 1355
@@ -1562,16 +1465,15 @@ static int gk20a_buffer_convert_gpu_to_cde(
1562 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", 1465 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1563 g->cde_app.firmware_version); 1466 g->cde_app.firmware_version);
1564 1467
1565 if (g->cde_app.firmware_version == 0) { 1468 if (g->cde_app.firmware_version == 1) {
1566 err = gk20a_buffer_convert_gpu_to_cde_v0(
1567 g, dmabuf, consumer, offset, compbits_hoffset,
1568 compbits_voffset, width, height, block_height_log2,
1569 submit_flags, fence_in, state);
1570 } else {
1571 err = gk20a_buffer_convert_gpu_to_cde_v1( 1469 err = gk20a_buffer_convert_gpu_to_cde_v1(
1572 g, dmabuf, consumer, offset, compbits_hoffset, 1470 g, dmabuf, consumer, offset, compbits_hoffset,
1573 compbits_voffset, width, height, block_height_log2, 1471 compbits_voffset, width, height, block_height_log2,
1574 submit_flags, fence_in, state); 1472 submit_flags, fence_in, state);
1473 } else {
1474 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d",
1475 g->cde_app.firmware_version);
1476 err = -EINVAL;
1575 } 1477 }
1576 1478
1577 gk20a_idle(g->dev); 1479 gk20a_idle(g->dev);