summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c248
1 files changed, 247 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index e00277f0..f369e12e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GV11b GPU GR 2 * GV11b GPU GR
3 * 3 *
4 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
1224 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); 1224 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
1225 gr->alpha_cb_default_size = 1225 gr->alpha_cb_default_size =
1226 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); 1226 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
1227 gr->attrib_cb_gfxp_default_size =
1228 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1229 gr->attrib_cb_gfxp_size =
1230 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1227} 1231}
1228 1232
1229void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) 1233void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
1368 return err; 1372 return err;
1369} 1373}
1370 1374
1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
1376 struct gr_ctx_desc *gr_ctx,
1377 struct vm_gk20a *vm, u32 class,
1378 u32 graphics_preempt_mode,
1379 u32 compute_preempt_mode)
1380{
1381 int err = 0;
1382
1383 if (g->ops.gr.is_valid_gfx_class(g, class) &&
1384 g->gr.ctx_vars.force_preemption_gfxp)
1385 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
1386
1387 if (g->ops.gr.is_valid_compute_class(g, class) &&
1388 g->gr.ctx_vars.force_preemption_cilp)
1389 compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
1390
1391 /* check for invalid combinations */
1392 if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
1393 return -EINVAL;
1394
1395 if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
1396 (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
1397 return -EINVAL;
1398
1399 /* Do not allow lower preemption modes than current ones */
1400 if (graphics_preempt_mode &&
1401 (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
1402 return -EINVAL;
1403
1404 if (compute_preempt_mode &&
1405 (compute_preempt_mode < gr_ctx->compute_preempt_mode))
1406 return -EINVAL;
1407
1408 /* set preemption modes */
1409 switch (graphics_preempt_mode) {
1410 case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
1411 {
1412 u32 spill_size =
1413 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
1414 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1415 u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
1416 gr_scc_pagepool_total_pages_byte_granularity_v();
1417 u32 betacb_size = g->gr.attrib_cb_default_size +
1418 (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
1419 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
1420 u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
1421 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
1422 g->gr.max_tpc_count;
1423 attrib_cb_size = ALIGN(attrib_cb_size, 128);
1424
1425 gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
1426 gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
1427 gk20a_dbg_info("gfxp context attrib_cb_size=%d",
1428 attrib_cb_size);
1429
1430 err = gr_gp10b_alloc_buffer(vm,
1431 g->gr.ctx_vars.preempt_image_size,
1432 &gr_ctx->preempt_ctxsw_buffer);
1433 if (err) {
1434 nvgpu_err(g, "cannot allocate preempt buffer");
1435 goto fail;
1436 }
1437
1438 err = gr_gp10b_alloc_buffer(vm,
1439 spill_size,
1440 &gr_ctx->spill_ctxsw_buffer);
1441 if (err) {
1442 nvgpu_err(g, "cannot allocate spill buffer");
1443 goto fail_free_preempt;
1444 }
1445
1446 err = gr_gp10b_alloc_buffer(vm,
1447 attrib_cb_size,
1448 &gr_ctx->betacb_ctxsw_buffer);
1449 if (err) {
1450 nvgpu_err(g, "cannot allocate beta buffer");
1451 goto fail_free_spill;
1452 }
1453
1454 err = gr_gp10b_alloc_buffer(vm,
1455 pagepool_size,
1456 &gr_ctx->pagepool_ctxsw_buffer);
1457 if (err) {
1458 nvgpu_err(g, "cannot allocate page pool");
1459 goto fail_free_betacb;
1460 }
1461
1462 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1463 break;
1464 }
1465
1466 case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
1467 gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
1468 break;
1469
1470 default:
1471 break;
1472 }
1473
1474 if (g->ops.gr.is_valid_compute_class(g, class) ||
1475 g->ops.gr.is_valid_gfx_class(g, class)) {
1476 switch (compute_preempt_mode) {
1477 case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
1478 case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
1479 case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
1480 gr_ctx->compute_preempt_mode = compute_preempt_mode;
1481 break;
1482 default:
1483 break;
1484 }
1485 }
1486
1487 return 0;
1488
1489fail_free_betacb:
1490 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
1491fail_free_spill:
1492 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
1493fail_free_preempt:
1494 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
1495fail:
1496 return err;
1497}
1498
1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1500 struct channel_ctx_gk20a *ch_ctx,
1501 struct nvgpu_mem *mem)
1502{
1503 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1504 struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
1505 struct nvgpu_mem *ctxheader = &ctx->mem;
1506
1507 u32 gfxp_preempt_option =
1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1509 u32 cilp_preempt_option =
1510 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
1511 u32 cta_preempt_option =
1512 ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
1513 int err;
1514
1515 gk20a_dbg_fn("");
1516
1517 if (gr_ctx->graphics_preempt_mode ==
1518 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1519 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1520 nvgpu_mem_wr(g, mem,
1521 ctxsw_prog_main_image_graphics_preemption_options_o(),
1522 gfxp_preempt_option);
1523 }
1524
1525 if (gr_ctx->compute_preempt_mode ==
1526 NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
1527 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
1528 nvgpu_mem_wr(g, mem,
1529 ctxsw_prog_main_image_compute_preemption_options_o(),
1530 cilp_preempt_option);
1531 }
1532
1533 if (gr_ctx->compute_preempt_mode ==
1534 NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
1535 gk20a_dbg_info("CTA: %x", cta_preempt_option);
1536 nvgpu_mem_wr(g, mem,
1537 ctxsw_prog_main_image_compute_preemption_options_o(),
1538 cta_preempt_option);
1539 }
1540
1541 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
1542 u32 addr;
1543 u32 size;
1544 u32 cbes_reserve;
1545
1546 if (g->ops.gr.set_preemption_buffer_va) {
1547 if (ctxheader->gpu_va)
1548 g->ops.gr.set_preemption_buffer_va(g, ctxheader,
1549 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1550 else
1551 g->ops.gr.set_preemption_buffer_va(g, mem,
1552 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1553 }
1554
1555 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
1556 if (err) {
1557 nvgpu_err(g, "can't map patch context");
1558 goto out;
1559 }
1560
1561 addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
1562 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
1563 (u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
1564 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1565
1566 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1567 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
1568
1569 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1570 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
1571 (u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
1572 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
1573 size = gr_ctx->pagepool_ctxsw_buffer.size;
1574
1575 if (size == g->ops.gr.pagepool_default_size(g))
1576 size = gr_scc_pagepool_total_pages_hwmax_v();
1577
1578 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
1579
1580 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1581 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
1582 (u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
1583 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
1584 size = gr_ctx->spill_ctxsw_buffer.size /
1585 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1586
1587 gr_gk20a_ctx_patch_write(g, ch_ctx,
1588 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1589 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1590 true);
1591 gr_gk20a_ctx_patch_write(g, ch_ctx,
1592 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1593 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1594 true);
1595
1596 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1597 gr_gk20a_ctx_patch_write(g, ch_ctx,
1598 gr_gpcs_swdx_beta_cb_ctrl_r(),
1599 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1600 cbes_reserve),
1601 true);
1602 gr_gk20a_ctx_patch_write(g, ch_ctx,
1603 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1604 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1605 cbes_reserve),
1606 true);
1607
1608 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
1609 }
1610
1611out:
1612 gk20a_dbg_fn("done");
1613}
1371static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g, 1614static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
1372 struct gk20a_debug_output *o, 1615 struct gk20a_debug_output *o,
1373 u32 gpc, u32 tpc, u32 sm, u32 offset) 1616 u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
2382{ 2625{
2383 u32 addr_lo, addr_hi; 2626 u32 addr_lo, addr_hi;
2384 2627
2628 /* gpu va still needs to be 8 bit aligned */
2629 gpu_va = gpu_va >> 8;
2630
2385 addr_lo = u64_lo32(gpu_va); 2631 addr_lo = u64_lo32(gpu_va);
2386 addr_hi = u64_hi32(gpu_va); 2632 addr_hi = u64_hi32(gpu_va);
2387 2633