1 files changed, 247 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index e00277f0..f369e12e 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
 /*
 * GV11b GPU GR
 *
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
                        gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
        gr->alpha_cb_default_size =
                gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+        gr->attrib_cb_gfxp_default_size =
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
+        gr->attrib_cb_gfxp_size =
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
 }
 void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
        return err;
 }
+int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
+                                struct gr_ctx_desc *gr_ctx,
+                                struct vm_gk20a *vm, u32 class,
+                                u32 graphics_preempt_mode,
+                                u32 compute_preempt_mode)
+{
+        int err = 0;
+        if (g->ops.gr.is_valid_gfx_class(g, class) &&
+                                g->gr.ctx_vars.force_preemption_gfxp)
+                graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+        if (g->ops.gr.is_valid_compute_class(g, class) &&
+                        g->gr.ctx_vars.force_preemption_cilp)
+                compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+        /* check for invalid combinations */
+        if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+                return -EINVAL;
+        if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
+                   (compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
+                return -EINVAL;
+        /* Do not allow lower preemption modes than current ones */
+        if (graphics_preempt_mode &&
+           (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
+                return -EINVAL;
+        if (compute_preempt_mode &&
+           (compute_preempt_mode < gr_ctx->compute_preempt_mode))
+                return -EINVAL;
+        /* set preemption modes */
+        switch (graphics_preempt_mode) {
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
+                {
+                u32 spill_size =
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+                u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+                        gr_scc_pagepool_total_pages_byte_granularity_v();
+                u32 betacb_size = g->gr.attrib_cb_default_size +
+                                (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+                          gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+                u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+                          gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                                  g->gr.max_tpc_count;
+                attrib_cb_size = ALIGN(attrib_cb_size, 128);
+                gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
+                gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
+                gk20a_dbg_info("gfxp context attrib_cb_size=%d",
+                                attrib_cb_size);
+                err = gr_gp10b_alloc_buffer(vm,
+                                        g->gr.ctx_vars.preempt_image_size,
+                                        &gr_ctx->preempt_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate preempt buffer");
+                        goto fail;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        spill_size,
+                                        &gr_ctx->spill_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate spill buffer");
+                        goto fail_free_preempt;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        attrib_cb_size,
+                                        &gr_ctx->betacb_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate beta buffer");
+                        goto fail_free_spill;
+                }
+                err = gr_gp10b_alloc_buffer(vm,
+                                        pagepool_size,
+                                        &gr_ctx->pagepool_ctxsw_buffer);
+                if (err) {
+                        nvgpu_err(g, "cannot allocate page pool");
+                        goto fail_free_betacb;
+                }
+                gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+                break;
+                }
+        case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
+                gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+                break;
+        default:
+                break;
+        }
+        if (g->ops.gr.is_valid_compute_class(g, class) ||
+                        g->ops.gr.is_valid_gfx_class(g, class)) {
+                switch (compute_preempt_mode) {
+                case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
+                case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
+                        gr_ctx->compute_preempt_mode = compute_preempt_mode;
+                        break;
+                default:
+                        break;
+                }
+        }
+        return 0;
+fail_free_betacb:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
+fail_free_spill:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
+fail_free_preempt:
+        nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
+fail:
+        return err;
+}
+void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
+                struct channel_ctx_gk20a *ch_ctx,
+                struct nvgpu_mem *mem)
+{
+        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+        struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+        struct nvgpu_mem *ctxheader = &ctx->mem;
+        u32 gfxp_preempt_option =
+                ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+        u32 cilp_preempt_option =
+                ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
+        u32 cta_preempt_option =
+                ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
+        int err;
+        gk20a_dbg_fn("");
+        if (gr_ctx->graphics_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
+                gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_graphics_preemption_options_o(),
+                        gfxp_preempt_option);
+        }
+        if (gr_ctx->compute_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
+                gk20a_dbg_info("CILP: %x", cilp_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_compute_preemption_options_o(),
+                        cilp_preempt_option);
+        }
+        if (gr_ctx->compute_preempt_mode ==
+                                        NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
+                gk20a_dbg_info("CTA: %x", cta_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                        ctxsw_prog_main_image_compute_preemption_options_o(),
+                        cta_preempt_option);
+        }
+        if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
+                u32 addr;
+                u32 size;
+                u32 cbes_reserve;
+                if (g->ops.gr.set_preemption_buffer_va) {
+                        if (ctxheader->gpu_va)
+                                g->ops.gr.set_preemption_buffer_va(g, ctxheader,
+                                gr_ctx->preempt_ctxsw_buffer.gpu_va);
+                        else
+                                g->ops.gr.set_preemption_buffer_va(g, mem,
+                                gr_ctx->preempt_ctxsw_buffer.gpu_va);
+                }
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
+                if (err) {
+                        nvgpu_err(g, "can't map patch context");
+                        goto out;
+                }
+                addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
+                        gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+                        (u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+                gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
+                g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+                addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
+                        gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+                size = gr_ctx->pagepool_ctxsw_buffer.size;
+                if (size == g->ops.gr.pagepool_default_size(g))
+                        size = gr_scc_pagepool_total_pages_hwmax_v();
+                g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+                addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
+                        gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
+                        (u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
+                         (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+                size = gr_ctx->spill_ctxsw_buffer.size /
+                        gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_addr_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_swdx_rm_spill_buffer_size_r(),
+                                gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
+                                true);
+                cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_swdx_beta_cb_ctrl_r(),
+                                gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
+                                gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
+                                        cbes_reserve),
+                                true);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
+        }
+out:
+        gk20a_dbg_fn("done");
+}
 static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
                        struct gk20a_debug_output *o,
                        u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
 {
        u32 addr_lo, addr_hi;
+        /* gpu va still needs to be 8 bit aligned */
+        gpu_va = gpu_va >> 8;
        addr_lo = u64_lo32(gpu_va);
        addr_hi = u64_hi32(gpu_va);

diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index e00277f0..f369e12e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GV11b GPU GR	2	* GV11b GPU GR
3	*	3	*
4	* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a	6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),	7	* copy of this software and associated documentation files (the "Software"),
@@ -1224,6 +1224,10 @@ void gr_gv11b_cb_size_default(struct gk20a *g)
1224	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();	1224	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
1225	gr->alpha_cb_default_size =	1225	gr->alpha_cb_default_size =
1226	gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();	1226	gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
		1227	gr->attrib_cb_gfxp_default_size =
		1228	gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
		1229	gr->attrib_cb_gfxp_size =
		1230	gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
1227	}	1231	}
1228		1232
1229	void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)	1233	void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
@@ -1368,6 +1372,245 @@ fail_free:
1368	return err;	1372	return err;
1369	}	1373	}
1370		1374
		1375	int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
		1376	struct gr_ctx_desc *gr_ctx,
		1377	struct vm_gk20a *vm, u32 class,
		1378	u32 graphics_preempt_mode,
		1379	u32 compute_preempt_mode)
		1380	{
		1381	int err = 0;
		1382
		1383	if (g->ops.gr.is_valid_gfx_class(g, class) &&
		1384	g->gr.ctx_vars.force_preemption_gfxp)
		1385	graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
		1386
		1387	if (g->ops.gr.is_valid_compute_class(g, class) &&
		1388	g->gr.ctx_vars.force_preemption_cilp)
		1389	compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
		1390
		1391	/* check for invalid combinations */
		1392	if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
		1393	return -EINVAL;
		1394
		1395	if ((graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) &&
		1396	(compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP))
		1397	return -EINVAL;
		1398
		1399	/* Do not allow lower preemption modes than current ones */
		1400	if (graphics_preempt_mode &&
		1401	(graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
		1402	return -EINVAL;
		1403
		1404	if (compute_preempt_mode &&
		1405	(compute_preempt_mode < gr_ctx->compute_preempt_mode))
		1406	return -EINVAL;
		1407
		1408	/* set preemption modes */
		1409	switch (graphics_preempt_mode) {
		1410	case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
		1411	{
		1412	u32 spill_size =
		1413	gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
		1414	gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
		1415	u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
		1416	gr_scc_pagepool_total_pages_byte_granularity_v();
		1417	u32 betacb_size = g->gr.attrib_cb_default_size +
		1418	(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
		1419	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
		1420	u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
		1421	gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
		1422	g->gr.max_tpc_count;
		1423	attrib_cb_size = ALIGN(attrib_cb_size, 128);
		1424
		1425	gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
		1426	gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
		1427	gk20a_dbg_info("gfxp context attrib_cb_size=%d",
		1428	attrib_cb_size);
		1429
		1430	err = gr_gp10b_alloc_buffer(vm,
		1431	g->gr.ctx_vars.preempt_image_size,
		1432	&gr_ctx->preempt_ctxsw_buffer);
		1433	if (err) {
		1434	nvgpu_err(g, "cannot allocate preempt buffer");
		1435	goto fail;
		1436	}
		1437
		1438	err = gr_gp10b_alloc_buffer(vm,
		1439	spill_size,
		1440	&gr_ctx->spill_ctxsw_buffer);
		1441	if (err) {
		1442	nvgpu_err(g, "cannot allocate spill buffer");
		1443	goto fail_free_preempt;
		1444	}
		1445
		1446	err = gr_gp10b_alloc_buffer(vm,
		1447	attrib_cb_size,
		1448	&gr_ctx->betacb_ctxsw_buffer);
		1449	if (err) {
		1450	nvgpu_err(g, "cannot allocate beta buffer");
		1451	goto fail_free_spill;
		1452	}
		1453
		1454	err = gr_gp10b_alloc_buffer(vm,
		1455	pagepool_size,
		1456	&gr_ctx->pagepool_ctxsw_buffer);
		1457	if (err) {
		1458	nvgpu_err(g, "cannot allocate page pool");
		1459	goto fail_free_betacb;
		1460	}
		1461
		1462	gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
		1463	break;
		1464	}
		1465
		1466	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
		1467	gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
		1468	break;
		1469
		1470	default:
		1471	break;
		1472	}
		1473
		1474	if (g->ops.gr.is_valid_compute_class(g, class) \|\|
		1475	g->ops.gr.is_valid_gfx_class(g, class)) {
		1476	switch (compute_preempt_mode) {
		1477	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
		1478	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
		1479	case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
		1480	gr_ctx->compute_preempt_mode = compute_preempt_mode;
		1481	break;
		1482	default:
		1483	break;
		1484	}
		1485	}
		1486
		1487	return 0;
		1488
		1489	fail_free_betacb:
		1490	nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
		1491	fail_free_spill:
		1492	nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
		1493	fail_free_preempt:
		1494	nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
		1495	fail:
		1496	return err;
		1497	}
		1498
		1499	void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
		1500	struct channel_ctx_gk20a *ch_ctx,
		1501	struct nvgpu_mem *mem)
		1502	{
		1503	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
		1504	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
		1505	struct nvgpu_mem *ctxheader = &ctx->mem;
		1506
		1507	u32 gfxp_preempt_option =
		1508	ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
		1509	u32 cilp_preempt_option =
		1510	ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
		1511	u32 cta_preempt_option =
		1512	ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
		1513	int err;
		1514
		1515	gk20a_dbg_fn("");
		1516
		1517	if (gr_ctx->graphics_preempt_mode ==
		1518	NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
		1519	gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
		1520	nvgpu_mem_wr(g, mem,
		1521	ctxsw_prog_main_image_graphics_preemption_options_o(),
		1522	gfxp_preempt_option);
		1523	}
		1524
		1525	if (gr_ctx->compute_preempt_mode ==
		1526	NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
		1527	gk20a_dbg_info("CILP: %x", cilp_preempt_option);
		1528	nvgpu_mem_wr(g, mem,
		1529	ctxsw_prog_main_image_compute_preemption_options_o(),
		1530	cilp_preempt_option);
		1531	}
		1532
		1533	if (gr_ctx->compute_preempt_mode ==
		1534	NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
		1535	gk20a_dbg_info("CTA: %x", cta_preempt_option);
		1536	nvgpu_mem_wr(g, mem,
		1537	ctxsw_prog_main_image_compute_preemption_options_o(),
		1538	cta_preempt_option);
		1539	}
		1540
		1541	if (gr_ctx->preempt_ctxsw_buffer.gpu_va) {
		1542	u32 addr;
		1543	u32 size;
		1544	u32 cbes_reserve;
		1545
		1546	if (g->ops.gr.set_preemption_buffer_va) {
		1547	if (ctxheader->gpu_va)
		1548	g->ops.gr.set_preemption_buffer_va(g, ctxheader,
		1549	gr_ctx->preempt_ctxsw_buffer.gpu_va);
		1550	else
		1551	g->ops.gr.set_preemption_buffer_va(g, mem,
		1552	gr_ctx->preempt_ctxsw_buffer.gpu_va);
		1553	}
		1554
		1555	err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true);
		1556	if (err) {
		1557	nvgpu_err(g, "can't map patch context");
		1558	goto out;
		1559	}
		1560
		1561	addr = (u64_lo32(gr_ctx->betacb_ctxsw_buffer.gpu_va) >>
		1562	gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) \|
		1563	(u64_hi32(gr_ctx->betacb_ctxsw_buffer.gpu_va) <<
		1564	(32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
		1565
		1566	gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
		1567	g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
		1568
		1569	addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
		1570	gr_scc_pagepool_base_addr_39_8_align_bits_v()) \|
		1571	(u64_hi32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) <<
		1572	(32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
		1573	size = gr_ctx->pagepool_ctxsw_buffer.size;
		1574
		1575	if (size == g->ops.gr.pagepool_default_size(g))
		1576	size = gr_scc_pagepool_total_pages_hwmax_v();
		1577
		1578	g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
		1579
		1580	addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
		1581	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) \|
		1582	(u64_hi32(gr_ctx->spill_ctxsw_buffer.gpu_va) <<
		1583	(32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
		1584	size = gr_ctx->spill_ctxsw_buffer.size /
		1585	gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
		1586
		1587	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1588	gr_gpc0_swdx_rm_spill_buffer_addr_r(),
		1589	gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
		1590	true);
		1591	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1592	gr_gpc0_swdx_rm_spill_buffer_size_r(),
		1593	gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
		1594	true);
		1595
		1596	cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
		1597	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1598	gr_gpcs_swdx_beta_cb_ctrl_r(),
		1599	gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
		1600	cbes_reserve),
		1601	true);
		1602	gr_gk20a_ctx_patch_write(g, ch_ctx,
		1603	gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
		1604	gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
		1605	cbes_reserve),
		1606	true);
		1607
		1608	gr_gk20a_ctx_patch_write_end(g, ch_ctx, true);
		1609	}
		1610
		1611	out:
		1612	gk20a_dbg_fn("done");
		1613	}
1371	static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,	1614	static void gr_gv11b_dump_gr_per_sm_regs(struct gk20a *g,
1372	struct gk20a_debug_output *o,	1615	struct gk20a_debug_output *o,
1373	u32 gpc, u32 tpc, u32 sm, u32 offset)	1616	u32 gpc, u32 tpc, u32 sm, u32 offset)
@@ -2382,6 +2625,9 @@ void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
2382	{	2625	{
2383	u32 addr_lo, addr_hi;	2626	u32 addr_lo, addr_hi;
2384		2627
		2628	/* gpu va still needs to be 8 bit aligned */
		2629	gpu_va = gpu_va >> 8;
		2630
2385	addr_lo = u64_lo32(gpu_va);	2631	addr_lo = u64_lo32(gpu_va);
2386	addr_hi = u64_hi32(gpu_va);	2632	addr_hi = u64_hi32(gpu_va);
2387		2633