gpu: nvgpu: gv11b: fix for gfx preemption

Used chip specific attrib_cb_gfxp_default_size and attrib_cb_gfxp_size buffer sizes during committing global callback buffer when gfx preemption is requested. These sizes are different for gv11b from gp10b. For gp10b used smaller buffer sizes than specified value in hw manuals as per sw requirement. Also used gv11b specific preemption related functions: gr_gv11b_set_ctxsw_preemption_mode gr_gv11b_update_ctxsw_preemption_mode This is required because preemption related buffer sizes are different for gv11b from gp10b. More optimization will be done as part of NVGPU-484. Another issue fixed is: gpu va for preemption buffers still needs to be 8 bit aligned, even though 49 bits available now. This done because of legacy implementation of fecs ucode. Bug 1976694 Change-Id: I2dc923340d34d0dc5fe45419200d0cf4f53cdb23 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1635027 GVS: Gerrit_Virtual_Submit Reviewed-by: Richard Zhao <rizhao@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: seshendra Gadagottu <sgadagottu@nvidia.com> 2018-01-02 18:48:46 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-01-10 11:47:03 -0500
commit: 0ac3ba2a99b745f577c752ebf9a6b4291730a36d (patch)
tree: dd111702d91dd5d14369def5fc152960f90a2daf /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent: 3e9aa581b61a3ecbcf01a8988b1d12a8af8e2a45 (diff)
1 files changed, 247 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index e00277f0..f369e12e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
 /*
 * GV11b GPU GR
 *
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
                        gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
        gr->alpha_cb_default_size =
                gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+        gr->attrib_cb_gfxp_default_size =
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
+        gr->attrib_cb_gfxp_size =
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
 }
 void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
        return err;
 }
+int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
+                                struct gr_ctx_desc *gr_ctx,
+                                struct vm_gk20a *vm, u32 class,
+                                u32 graphics_preempt_mode,
+                                u32 compute_preempt_mode)
+{
+        int err = 0;
+        if (g->ops.gr.is_valid_gfx_class(g, class) &&
+                                g->gr.ctx_vars.force_preemption_gfxp)
+                graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+        if (g->ops.gr.is_valid_compute_class(g, class) &&
+                        g->gr.ctx_vars.force_preemption_cilp)
+                compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+        /* check for invalid combinations */
+        if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+                return -EINVAL;
+        if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
+                   (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
+                return -EINVAL;
+        /* Do not allow lower preemption modes than current ones */
+        if (graphics_preempt_mode &&
+           (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
+                return -EINVAL;
+        if (compute_preempt_mode &&
+           (compute_preempt_mode < gr_ctx->compute_preempt_mode))
+                return -EINVAL;
+        /* set preemption modes */
+        switch (graphics_preempt_mode) {
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
+                {
+                u32 spill_size =
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+                u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+                        gr_scc_pagepool_total_pages_byte_granularity_v();
+                u32 betacb_size = g->gr.attrib_cb_default_size +
+                                (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+                          gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+                u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+                          gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                                  g->gr.max_tpc_count;
+                attrib_cb_size = ALIGN(attrib_cb_size, 128);
+                gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
+                gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
+                gk20a_dbg_info("gfxp context attrib_cb_size=%d",
+                                attrib_cb_size);
+                err = gr_gp10b_alloc_buffer(vm,
+                                        g->gr.ctx_vars.preempt_image_size,
+                                        &gr_ctx->preempt_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate preempt buffer");
+                        goto fail;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        spill_size,
+                                        &gr_ctx->spill_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate spill buffer");
+                        goto fail_free_preempt;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        attrib_cb_size,
+                                        &gr_ctx->betacb_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate beta buffer");
+                        goto fail_free_spill;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        pagepool_size,
+                                        &gr_ctx->pagepool_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate page pool");
+                        goto fail_free_betacb;
+                }
+                gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+                break;
+                }
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
+                gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+                break;
+        default:
+                break;
+        }
+        if (g->ops.gr.is_valid_compute_class(g, class) ||
+                        g->ops.gr.is_valid_gfx_class(g, class)) {
+                switch (compute_preempt_mode) {
+                case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
+                        gr_ctx->compute_preempt_mode = compute_preempt_mode;
+                        break;
+                default:
+                        break;
+                }
+        }
+        return 0;
+fail_free_betacb:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
+fail_free_spill:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
+fail_free_preempt:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
+fail:
+        return err;
+}
+void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
+                struct channel_ctx_gk20a *ch_ctx,
+                struct nvgpu_mem *mem)
+{
+        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+        struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+        struct nvgpu_mem *ctxheader = &ctx->mem;
+        u32 gfxp_preempt_option =
+                ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+        u32 cilp_preempt_option =
+                ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
+        u32 cta_preempt_option =
+                ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
+        int err;
+        gk20a_dbg_fn("");
+        if (gr_ctx->graphics_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
+                gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_graphics_preemption_options_o(),
+                        gfxp_preempt_option);
+        }
+        if (gr_ctx->compute_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
+                gk20a_dbg_info("CILP: %x", cilp_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_compute_preemption_options_o(),
+                        cilp_preempt_option);
+        }
+        if (gr_ctx->compute_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
+                gk20a_dbg_info("CTA: %x", cta_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_compute_preemption_options_o(),
+                        cta_preempt_option);
+        }
+        if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
+                u32 addr;
+                u32 size;
+                u32 cbes_reserve;
+                if (g->ops.gr.set_preemption_buffer_va) {
+                        if (ctxheader->gpu_va)
+                                g->ops.gr.set_preemption_buffer_va(g, ctxheader,
+                                gr_ctx->preempt_ctxsw_buffer.gpu_va);
+                        else
+                                g->ops.gr.set_preemption_buffer_va(g, mem,
+                                gr_ctx->preempt_ctxsw_buffer.gpu_va);
+                }
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
+                if (err) {
+                        nvgpu_err(g, "can't map patch context");
+                        goto out;
+                }
+                addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
+                        gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+                        (u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+                gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
+                g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+                addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
+                        gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+                size = gr_ctx->pagepool_ctxsw_buffer.size;
+                if (size == g->ops.gr.pagepool_default_size(g))
+                        size = gr_scc_pagepool_total_pages_hwmax_v();
+                g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+                addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
+                        gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+                size = gr_ctx->spill_ctxsw_buffer.size /
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_addr_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_size_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
+                                true);
+                cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_swdx_beta_cb_ctrl_r(),
+                                gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
+        }
+out:
+        gk20a_dbg_fn("done");
+}
 static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
                        struct gk20a_debug_output *o,
                        u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
 {
        u32 addr_lo, addr_hi;
+        /* gpu va still needs to be 8 bit aligned */
+        gpu_va = gpu_va >> 8;
        addr_lo = u64_lo32(gpu_va);
        addr_hi = u64_hi32(gpu_va);
author	seshendra Gadagottu <sgadagottu@nvidia.com>	2018-01-02 18:48:46 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-01-10 11:47:03 -0500
commit	0ac3ba2a99b745f577c752ebf9a6b4291730a36d (patch)
tree	dd111702d91dd5d14369def5fc152960f90a2daf /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent	3e9aa581b61a3ecbcf01a8988b1d12a8af8e2a45 (diff)

diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index e00277f0..f369e12e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GV11b GPU GR	2	* GV11b GPU GR
3	*	3	*
4	* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a	6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),	7	* copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
1224	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();	1224	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
1225	gr->alpha_cb_default_size =	1225	gr->alpha_cb_default_size =
1226	gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();	1226	gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
		1227	gr->attrib_cb_gfxp_default_size =
		1228	gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
		1229	gr->attrib_cb_gfxp_size =
		1230	gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1227	}	1231	}
1228		1232
1229	void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)	1233	void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
1368	return err;	1372	return err;
1369	}	1373	}
1370		1374
		1375	int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
		1376	struct gr_ctx_desc *gr_ctx,
		1377	struct vm_gk20a *vm, u32 class,
		1378	u32 graphics_preempt_mode,
		1379	u32 compute_preempt_mode)
		1380	{
		1381	int err = 0;
		1382
		1383	if (g->ops.gr.is_valid_gfx_class(g, class) &&
		1384	g->gr.ctx_vars.force_preemption_gfxp)
		1385	graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
		1386
		1387	if (g->ops.gr.is_valid_compute_class(g, class) &&
		1388	g->gr.ctx_vars.force_preemption_cilp)
		1389	compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
		1390
		1391	/* check for invalid combinations */
		1392	if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
		1393	return -EINVAL;
		1394
		1395	if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
		1396	(compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
		1397	return -EINVAL;
		1398
		1399	/* Do not allow lower preemption modes than current ones */
		1400	if (graphics_preempt_mode &&
		1401	(graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
		1402	return -EINVAL;
		1403
		1404	if (compute_preempt_mode &&
		1405	(compute_preempt_mode < gr_ctx->compute_preempt_mode))
		1406	return -EINVAL;
		1407
		1408	/* set preemption modes */
		1409	switch (graphics_preempt_mode) {
		1410	case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
		1411	{
		1412	u32 spill_size =
		1413	gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
		1414	gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
		1415	u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
		1416	gr_scc_pagepool_total_pages_byte_granularity_v();
		1417	u32 betacb_size = g->gr.attrib_cb_default_size +
		1418	(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
		1419	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
		1420	u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
		1421	gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
		1422	g->gr.max_tpc_count;
		1423	attrib_cb_size = ALIGN(attrib_cb_size, 128);
		1424
		1425	gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
		1426	gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
		1427	gk20a_dbg_info("gfxp context attrib_cb_size=%d",
		1428	attrib_cb_size);
		1429
		1430	err = gr_gp10b_alloc_buffer(vm,
		1431	g->gr.ctx_vars.preempt_image_size,
		1432	&gr_ctx->preempt_ctxsw_buffer);
		1433	if (err) {
		1434	nvgpu_err(g, "cannot allocate preempt buffer");
		1435	goto fail;
		1436	}
		1437
		1438	err = gr_gp10b_alloc_buffer(vm,
		1439	spill_size,
		1440	&gr_ctx->spill_ctxsw_buffer);
		1441	if (err) {
		1442	nvgpu_err(g, "cannot allocate spill buffer");
		1443	goto fail_free_preempt;
		1444	}
		1445
		1446	err = gr_gp10b_alloc_buffer(vm,
		1447	attrib_cb_size,
		1448	&gr_ctx->betacb_ctxsw_buffer);
		1449	if (err) {
		1450	nvgpu_err(g, "cannot allocate beta buffer");
		1451	goto fail_free_spill;
		1452	}
		1453
		1454	err = gr_gp10b_alloc_buffer(vm,
		1455	pagepool_size,
		1456	&gr_ctx->pagepool_ctxsw_buffer);
		1457	if (err) {
		1458	nvgpu_err(g, "cannot allocate page pool");
		1459	goto fail_free_betacb;
		1460	}
		1461
		1462	gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
		1463	break;
		1464	}
		1465
		1466	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
		1467	gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
		1468	break;
		1469
		1470	default:
		1471	break;
		1472	}
		1473
		1474	if (g->ops.gr.is_valid_compute_class(g, class) \|\|
		1475	g->ops.gr.is_valid_gfx_class(g, class)) {
		1476	switch (compute_preempt_mode) {
		1477	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
		1478	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
		1479	case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
		1480	gr_ctx->compute_preempt_mode = compute_preempt_mode;
		1481	break;
		1482	default:
		1483	break;
		1484	}
		1485	}
		1486
		1487	return 0;
		1488
		1489	fail_free_betacb:
		1490	nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
		1491	fail_free_spill:
		1492	nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
		1493	fail_free_preempt:
		1494	nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
		1495	fail:
		1496	return err;
		1497	}
		1498
		1499	void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
		1500	struct channel_ctx_gk20a *ch_ctx,
		1501	struct nvgpu_mem *mem)
		1502	{
		1503	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
		1504	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
		1505	struct nvgpu_mem *ctxheader = &ctx->mem;
		1506
		1507	u32 gfxp_preempt_option =
		1508	ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
		1509	u32 cilp_preempt_option =
		1510	ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
		1511	u32 cta_preempt_option =
		1512	ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
		1513	int err;
		1514
		1515	gk20a_dbg_fn("");
		1516
		1517	if (gr_ctx->graphics_preempt_mode ==
		1518	NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
		1519	gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
		1520	nvgpu_mem_wr(g, mem,
		1521	ctxsw_prog_main_image_graphics_preemption_options_o(),
		1522	gfxp_preempt_option);
		1523	}
		1524
		1525	if (gr_ctx->compute_preempt_mode ==
		1526	NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
		1527	gk20a_dbg_info("CILP: %x", cilp_preempt_option);
		1528	nvgpu_mem_wr(g, mem,
		1529	ctxsw_prog_main_image_compute_preemption_options_o(),
		1530	cilp_preempt_option);
		1531	}
		1532
		1533	if (gr_ctx->compute_preempt_mode ==
		1534	NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
		1535	gk20a_dbg_info("CTA: %x", cta_preempt_option);
		1536	nvgpu_mem_wr(g, mem,
		1537	ctxsw_prog_main_image_compute_preemption_options_o(),
		1538	cta_preempt_option);
		1539	}
		1540
		1541	if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
		1542	u32 addr;
		1543	u32 size;
		1544	u32 cbes_reserve;
		1545
		1546	if (g->ops.gr.set_preemption_buffer_va) {
		1547	if (ctxheader->gpu_va)
		1548	g->ops.gr.set_preemption_buffer_va(g, ctxheader,
		1549	gr_ctx->preempt_ctxsw_buffer.gpu_va);
		1550	else
		1551	g->ops.gr.set_preemption_buffer_va(g, mem,
		1552	gr_ctx->preempt_ctxsw_buffer.gpu_va);
		1553	}
		1554
		1555	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
		1556	if (err) {
		1557	nvgpu_err(g, "can't map patch context");
		1558	goto out;
		1559	}
		1560
		1561	addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
		1562	gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) \|
		1563	(u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
		1564	(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
		1565
		1566	gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
		1567	g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
		1568
		1569	addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
		1570	gr_scc_pagepool_base_addr_39_8_align_bits_v()) \|
		1571	(u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
		1572	(32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
		1573	size = gr_ctx->pagepool_ctxsw_buffer.size;
		1574
		1575	if (size == g->ops.gr.pagepool_default_size(g))
		1576	size = gr_scc_pagepool_total_pages_hwmax_v();
		1577
		1578	g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
		1579
		1580	addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
		1581	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) \|
		1582	(u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
		1583	(32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
		1584	size = gr_ctx->spill_ctxsw_buffer.size /
		1585	gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
		1586
		1587	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1588	gr_gpc0_swdx_rm_spill_buffer_addr_r(),
		1589	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
		1590	true);
		1591	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1592	gr_gpc0_swdx_rm_spill_buffer_size_r(),
		1593	gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
		1594	true);
		1595
		1596	cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
		1597	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1598	gr_gpcs_swdx_beta_cb_ctrl_r(),
		1599	gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
		1600	cbes_reserve),
		1601	true);
		1602	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1603	gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
		1604	gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
		1605	cbes_reserve),
		1606	true);
		1607
		1608	gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
		1609	}
		1610
		1611	out:
		1612	gk20a_dbg_fn("done");
		1613	}
1371	static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,	1614	static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
1372	struct gk20a_debug_output *o,	1615	struct gk20a_debug_output *o,
1373	u32 gpc, u32 tpc, u32 sm, u32 offset)	1616	u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
2382	{	2625	{
2383	u32 addr_lo, addr_hi;	2626	u32 addr_lo, addr_hi;
2384		2627
		2628	/* gpu va still needs to be 8 bit aligned */
		2629	gpu_va = gpu_va >> 8;
		2630
2385	addr_lo = u64_lo32(gpu_va);	2631	addr_lo = u64_lo32(gpu_va);
2386	addr_hi = u64_hi32(gpu_va);	2632	addr_hi = u64_hi32(gpu_va);
2387		2633