diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 110 |
1 files changed, 6 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 49a1c1a8..095f69c7 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1308,103 +1308,6 @@ enum programs { | |||
1308 | /* maximum number of WRITE_PATCHes in the below function */ | 1308 | /* maximum number of WRITE_PATCHes in the below function */ |
1309 | #define MAX_CDE_LAUNCH_PATCHES 32 | 1309 | #define MAX_CDE_LAUNCH_PATCHES 32 |
1310 | 1310 | ||
1311 | static int gk20a_buffer_convert_gpu_to_cde_v0( | ||
1312 | struct gk20a *g, | ||
1313 | struct dma_buf *dmabuf, u32 consumer, | ||
1314 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1315 | u32 width, u32 height, u32 block_height_log2, | ||
1316 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1317 | struct gk20a_buffer_state *state) | ||
1318 | { | ||
1319 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1320 | int param = 0; | ||
1321 | int err = 0; | ||
1322 | struct gk20a_fence *new_fence = NULL; | ||
1323 | const int wgx = 8; | ||
1324 | const int wgy = 8; | ||
1325 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1326 | const int xalign = compbits_per_byte * wgx; | ||
1327 | const int yalign = wgy; | ||
1328 | |||
1329 | /* firmware v0 needs to call swizzling twice */ | ||
1330 | int i; | ||
1331 | for (i = 0; i < 2; i++) { | ||
1332 | /* Compute per launch parameters */ | ||
1333 | const bool vpass = (i == 1); | ||
1334 | const int transposed_width = vpass ? height : width; | ||
1335 | const int transposed_height = vpass ? width : height; | ||
1336 | const int xtiles = (transposed_width + 7) >> 3; | ||
1337 | const int ytiles = (transposed_height + 7) >> 3; | ||
1338 | const int gridw = roundup(xtiles, xalign) / xalign; | ||
1339 | const int gridh = roundup(ytiles, yalign) / yalign; | ||
1340 | const int flags = (vpass ? 4 : 0) | | ||
1341 | g->cde_app.shader_parameter; | ||
1342 | const int dst_stride = 128; /* chip constant */ | ||
1343 | |||
1344 | if ((vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEV)) || | ||
1345 | (!vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEH))) | ||
1346 | continue; | ||
1347 | |||
1348 | if (xtiles > 4096 / 8 || ytiles > 4096 / 8) | ||
1349 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1350 | xtiles, ytiles); | ||
1351 | |||
1352 | gk20a_dbg(gpu_dbg_cde, "pass=%c", vpass ? 'V' : 'H'); | ||
1353 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx", | ||
1354 | width, height, block_height_log2, | ||
1355 | compbits_hoffset, compbits_voffset); | ||
1356 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1357 | width, height, xtiles, ytiles); | ||
1358 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", | ||
1359 | wgx, wgy, gridw, gridh); | ||
1360 | |||
1361 | /* Write parameters */ | ||
1362 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1363 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1364 | param = 0; | ||
1365 | WRITE_PATCH(PATCH_USER_CONST_XTILES, xtiles); | ||
1366 | WRITE_PATCH(PATCH_USER_CONST_YTILES, ytiles); | ||
1367 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1368 | block_height_log2); | ||
1369 | WRITE_PATCH(PATCH_USER_CONST_DSTPITCH, dst_stride); | ||
1370 | WRITE_PATCH(PATCH_H_USER_CONST_FLAGS, flags); | ||
1371 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw); | ||
1372 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh); | ||
1373 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1374 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1375 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1376 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1377 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw); | ||
1378 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh); | ||
1379 | WRITE_PATCH(PATCH_QMD_CTA_RASTER_DEPTH, 1); | ||
1380 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1381 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1382 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION2, 1); | ||
1383 | #undef WRITE_PATCH | ||
1384 | |||
1385 | err = gk20a_cde_convert(g, dmabuf, | ||
1386 | 0, /* dst kind */ | ||
1387 | vpass ? | ||
1388 | compbits_voffset : | ||
1389 | compbits_hoffset, | ||
1390 | 0, /* dst_size, 0 = auto */ | ||
1391 | fence_in, submit_flags, | ||
1392 | params, param, | ||
1393 | &new_fence); | ||
1394 | if (err) | ||
1395 | goto out; | ||
1396 | |||
1397 | /* compbits generated, update state & fence */ | ||
1398 | gk20a_fence_put(state->fence); | ||
1399 | state->fence = new_fence; | ||
1400 | state->valid_compbits |= vpass ? | ||
1401 | NVGPU_GPU_COMPBITS_CDEV : | ||
1402 | NVGPU_GPU_COMPBITS_CDEH; | ||
1403 | } | ||
1404 | out: | ||
1405 | return err; | ||
1406 | } | ||
1407 | |||
1408 | static int gk20a_buffer_convert_gpu_to_cde_v1( | 1311 | static int gk20a_buffer_convert_gpu_to_cde_v1( |
1409 | struct gk20a *g, | 1312 | struct gk20a *g, |
1410 | struct dma_buf *dmabuf, u32 consumer, | 1313 | struct dma_buf *dmabuf, u32 consumer, |
@@ -1446,7 +1349,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1446 | PROG_VPASS_SMALL_DEBUG; | 1349 | PROG_VPASS_SMALL_DEBUG; |
1447 | } | 1350 | } |
1448 | 1351 | ||
1449 | if (xtiles > 4096 / 8 || ytiles > 4096 / 8) | 1352 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) |
1450 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | 1353 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", |
1451 | xtiles, ytiles); | 1354 | xtiles, ytiles); |
1452 | 1355 | ||
@@ -1562,16 +1465,15 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1562 | gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", | 1465 | gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", |
1563 | g->cde_app.firmware_version); | 1466 | g->cde_app.firmware_version); |
1564 | 1467 | ||
1565 | if (g->cde_app.firmware_version == 0) { | 1468 | if (g->cde_app.firmware_version == 1) { |
1566 | err = gk20a_buffer_convert_gpu_to_cde_v0( | ||
1567 | g, dmabuf, consumer, offset, compbits_hoffset, | ||
1568 | compbits_voffset, width, height, block_height_log2, | ||
1569 | submit_flags, fence_in, state); | ||
1570 | } else { | ||
1571 | err = gk20a_buffer_convert_gpu_to_cde_v1( | 1469 | err = gk20a_buffer_convert_gpu_to_cde_v1( |
1572 | g, dmabuf, consumer, offset, compbits_hoffset, | 1470 | g, dmabuf, consumer, offset, compbits_hoffset, |
1573 | compbits_voffset, width, height, block_height_log2, | 1471 | compbits_voffset, width, height, block_height_log2, |
1574 | submit_flags, fence_in, state); | 1472 | submit_flags, fence_in, state); |
1473 | } else { | ||
1474 | dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", | ||
1475 | g->cde_app.firmware_version); | ||
1476 | err = -EINVAL; | ||
1575 | } | 1477 | } |
1576 | 1478 | ||
1577 | gk20a_idle(g->dev); | 1479 | gk20a_idle(g->dev); |