summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2018-01-02 18:48:46 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-10 11:47:03 -0500
commit0ac3ba2a99b745f577c752ebf9a6b4291730a36d (patch)
treedd111702d91dd5d14369def5fc152960f90a2daf /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent3e9aa581b61a3ecbcf01a8988b1d12a8af8e2a45 (diff)
gpu: nvgpu: gv11b: fix for gfx preemption
Used chip specific attrib_cb_gfxp_default_size and attrib_cb_gfxp_size buffer sizes during committing global callback buffer when gfx preemption is requested. These sizes are different for gv11b from gp10b. For gp10b used smaller buffer sizes than specified value in hw manuals as per sw requirement. Also used gv11b specific preemption related functions: gr_gv11b_set_ctxsw_preemption_mode gr_gv11b_update_ctxsw_preemption_mode This is required because preemption related buffer sizes are different for gv11b from gp10b. More optimization will be done as part of NVGPU-484. Another issue fixed is: gpu va for preemption buffers still needs to be 8 bit aligned, even though 49 bits available now. This done because of legacy implementation of fecs ucode. Bug 1976694 Change-Id: I2dc923340d34d0dc5fe45419200d0cf4f53cdb23 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1635027 GVS: Gerrit_Virtual_Submit Reviewed-by: Richard Zhao <rizhao@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c248
1 files changed, 247 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index e00277f0..f369e12e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GV11b GPU GR 2 * GV11b GPU GR
3 * 3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
1224 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); 1224 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
1225 gr->alpha_cb_default_size = 1225 gr->alpha_cb_default_size =
1226 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); 1226 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
1227 gr->attrib_cb_gfxp_default_size =
1228 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1229 gr->attrib_cb_gfxp_size =
1230 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1227} 1231}
1228 1232
1229void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) 1233void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
1368 return err; 1372 return err;
1369} 1373}
1370 1374
1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
1376 struct gr_ctx_desc *gr_ctx,
1377 struct vm_gk20a *vm, u32 class,
1378 u32 graphics_preempt_mode,
1379 u32 compute_preempt_mode)
1380{
1381 int err = 0;
1382
1383 if (g->ops.gr.is_valid_gfx_class(g, class) &&
1384 g->gr.ctx_vars.force_preemption_gfxp)
1385 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
1386
1387 if (g->ops.gr.is_valid_compute_class(g, class) &&
1388 g->gr.ctx_vars.force_preemption_cilp)
1389 compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
1390
1391 /* check for invalid combinations */
1392 if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
1393 return -EINVAL;
1394
1395 if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
1396 (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
1397 return -EINVAL;
1398
1399 /* Do not allow lower preemption modes than current ones */
1400 if (graphics_preempt_mode &&
1401 (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
1402 return -EINVAL;
1403
1404 if (compute_preempt_mode &&
1405 (compute_preempt_mode < gr_ctx->compute_preempt_mode))
1406 return -EINVAL;
1407
1408 /* set preemption modes */
1409 switch (graphics_preempt_mode) {
1410 case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
1411 {
1412 u32 spill_size =
1413 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
1414 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1415 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
1416 gr_scc_pagepool_total_pages_byte_granularity_v();
1417 u32 betacb_size = g->gr.attrib_cb_default_size +
1418 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
1419 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
1420 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
1421 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
1422 g->gr.max_tpc_count;
1423 attrib_cb_size = ALIGN(attrib_cb_size, 128);
1424
1425 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
1426 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
1427 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
1428 attrib_cb_size);
1429
1430 err = gr_gp10b_alloc_buffer(vm,
1431 g->gr.ctx_vars.preempt_image_size,
1432 &gr_ctx->preempt_ctxsw_buffer);
1433 if (err) {
1434 nvgpu_err(g, "cannot allocate preempt buffer");
1435 goto fail;
1436 }
1437
1438 err = gr_gp10b_alloc_buffer(vm,
1439 spill_size,
1440 &gr_ctx->spill_ctxsw_buffer);
1441 if (err) {
1442 nvgpu_err(g, "cannot allocate spill buffer");
1443 goto fail_free_preempt;
1444 }
1445
1446 err = gr_gp10b_alloc_buffer(vm,
1447 attrib_cb_size,
1448 &gr_ctx->betacb_ctxsw_buffer);
1449 if (err) {
1450 nvgpu_err(g, "cannot allocate beta buffer");
1451 goto fail_free_spill;
1452 }
1453
1454 err = gr_gp10b_alloc_buffer(vm,
1455 pagepool_size,
1456 &gr_ctx->pagepool_ctxsw_buffer);
1457 if (err) {
1458 nvgpu_err(g, "cannot allocate page pool");
1459 goto fail_free_betacb;
1460 }
1461
1462 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1463 break;
1464 }
1465
1466 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
1467 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1468 break;
1469
1470 default:
1471 break;
1472 }
1473
1474 if (g->ops.gr.is_valid_compute_class(g, class) ||
1475 g->ops.gr.is_valid_gfx_class(g, class)) {
1476 switch (compute_preempt_mode) {
1477 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
1478 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
1479 case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
1480 gr_ctx->compute_preempt_mode = compute_preempt_mode;
1481 break;
1482 default:
1483 break;
1484 }
1485 }
1486
1487 return 0;
1488
1489fail_free_betacb:
1490 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
1491fail_free_spill:
1492 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
1493fail_free_preempt:
1494 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
1495fail:
1496 return err;
1497}
1498
1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1500 struct channel_ctx_gk20a *ch_ctx,
1501 struct nvgpu_mem *mem)
1502{
1503 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1504 struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
1505 struct nvgpu_mem *ctxheader = &ctx->mem;
1506
1507 u32 gfxp_preempt_option =
1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1509 u32 cilp_preempt_option =
1510 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
1511 u32 cta_preempt_option =
1512 ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
1513 int err;
1514
1515 gk20a_dbg_fn("");
1516
1517 if (gr_ctx->graphics_preempt_mode ==
1518 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1519 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1520 nvgpu_mem_wr(g, mem,
1521 ctxsw_prog_main_image_graphics_preemption_options_o(),
1522 gfxp_preempt_option);
1523 }
1524
1525 if (gr_ctx->compute_preempt_mode ==
1526 NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
1527 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
1528 nvgpu_mem_wr(g, mem,
1529 ctxsw_prog_main_image_compute_preemption_options_o(),
1530 cilp_preempt_option);
1531 }
1532
1533 if (gr_ctx->compute_preempt_mode ==
1534 NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
1535 gk20a_dbg_info("CTA: %x", cta_preempt_option);
1536 nvgpu_mem_wr(g, mem,
1537 ctxsw_prog_main_image_compute_preemption_options_o(),
1538 cta_preempt_option);
1539 }
1540
1541 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
1542 u32 addr;
1543 u32 size;
1544 u32 cbes_reserve;
1545
1546 if (g->ops.gr.set_preemption_buffer_va) {
1547 if (ctxheader->gpu_va)
1548 g->ops.gr.set_preemption_buffer_va(g, ctxheader,
1549 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1550 else
1551 g->ops.gr.set_preemption_buffer_va(g, mem,
1552 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1553 }
1554
1555 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
1556 if (err) {
1557 nvgpu_err(g, "can't map patch context");
1558 goto out;
1559 }
1560
1561 addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
1562 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1563 (u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
1564 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1565
1566 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1567 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
1568
1569 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1570 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1571 (u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
1572 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1573 size = gr_ctx->pagepool_ctxsw_buffer.size;
1574
1575 if (size == g->ops.gr.pagepool_default_size(g))
1576 size = gr_scc_pagepool_total_pages_hwmax_v();
1577
1578 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
1579
1580 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1581 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
1582 (u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
1583 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
1584 size = gr_ctx->spill_ctxsw_buffer.size /
1585 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1586
1587 gr_gk20a_ctx_patch_write(g, ch_ctx,
1588 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1589 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1590 true);
1591 gr_gk20a_ctx_patch_write(g, ch_ctx,
1592 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1593 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1594 true);
1595
1596 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1597 gr_gk20a_ctx_patch_write(g, ch_ctx,
1598 gr_gpcs_swdx_beta_cb_ctrl_r(),
1599 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1600 cbes_reserve),
1601 true);
1602 gr_gk20a_ctx_patch_write(g, ch_ctx,
1603 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1604 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1605 cbes_reserve),
1606 true);
1607
1608 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
1609 }
1610
1611out:
1612 gk20a_dbg_fn("done");
1613}
1371static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g, 1614static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
1372 struct gk20a_debug_output *o, 1615 struct gk20a_debug_output *o,
1373 u32 gpc, u32 tpc, u32 sm, u32 offset) 1616 u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
2382{ 2625{
2383 u32 addr_lo, addr_hi; 2626 u32 addr_lo, addr_hi;
2384 2627
2628 /* gpu va still needs to be 8 bit aligned */
2629 gpu_va = gpu_va >> 8;
2630
2385 addr_lo = u64_lo32(gpu_va); 2631 addr_lo = u64_lo32(gpu_va);
2386 addr_hi = u64_hi32(gpu_va); 2632 addr_hi = u64_hi32(gpu_va);
2387 2633