summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2016-03-09 22:10:20 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-07 14:05:49 -0400
commit37155b65f1dd6039bdef92f513d86640956bc12c (patch)
tree1deb57523c3acc445996c642da6ac96e1cf7c355 /drivers
parent6675c03603669c667c6ffec34567eaf101a2d09d (diff)
gpu: nvgpu: support for hwpm context switching
Add support for hwpm context switching Bug 1648200 Change-Id: I482899bf165cd2ef24bb8617be16df01218e462f Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1120450 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c47
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c65
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c692
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h23
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h10
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/regops_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h10
11 files changed, 836 insertions, 64 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 29c39160..d8951b94 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -54,6 +54,7 @@ struct channel_ctx_gk20a {
54 struct gr_ctx_desc *gr_ctx; 54 struct gr_ctx_desc *gr_ctx;
55 struct patch_desc patch_ctx; 55 struct patch_desc patch_ctx;
56 struct zcull_ctx_desc zcull_ctx; 56 struct zcull_ctx_desc zcull_ctx;
57 struct pm_ctx_desc pm_ctx;
57 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; 58 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
58 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; 59 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
59 bool global_ctx_buffer_mapped; 60 bool global_ctx_buffer_mapped;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 1ee0189b..d087d89e 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -457,6 +457,9 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
457static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, 457static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
458 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); 458 struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
459 459
460static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
461 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
462
460static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( 463static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
461 struct dbg_session_gk20a *dbg_s, 464 struct dbg_session_gk20a *dbg_s,
462 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); 465 struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
@@ -582,6 +585,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
582 (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); 585 (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
583 break; 586 break;
584 587
588 case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
589 err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
590 (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
591 break;
592
585 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: 593 case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
586 err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, 594 err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
587 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); 595 (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
@@ -880,7 +888,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
880 ch_gk20a = dbg_s->ch; 888 ch_gk20a = dbg_s->ch;
881 889
882 if (!ch_gk20a) { 890 if (!ch_gk20a) {
883 gk20a_err(dev_from_gk20a(dbg_s->g), 891 gk20a_err(dev_from_gk20a(g),
884 "no bound channel for smpc ctxsw mode update\n"); 892 "no bound channel for smpc ctxsw mode update\n");
885 err = -EINVAL; 893 err = -EINVAL;
886 goto clean_up; 894 goto clean_up;
@@ -889,13 +897,48 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
889 err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, 897 err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
890 args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); 898 args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
891 if (err) { 899 if (err) {
892 gk20a_err(dev_from_gk20a(dbg_s->g), 900 gk20a_err(dev_from_gk20a(g),
893 "error (%d) during smpc ctxsw mode update\n", err); 901 "error (%d) during smpc ctxsw mode update\n", err);
894 goto clean_up; 902 goto clean_up;
895 } 903 }
896 904
897 err = g->ops.regops.apply_smpc_war(dbg_s); 905 err = g->ops.regops.apply_smpc_war(dbg_s);
906 clean_up:
907 mutex_unlock(&g->dbg_sessions_lock);
908 return err;
909}
910
911static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
912 struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
913{
914 int err;
915 struct gk20a *g = get_gk20a(dbg_s->pdev);
916 struct channel_gk20a *ch_gk20a;
898 917
918 gk20a_dbg_fn("%s pm ctxsw mode = %d",
919 dev_name(dbg_s->dev), args->mode);
920
921 /* Take the global lock, since we'll be doing global regops */
922 mutex_lock(&g->dbg_sessions_lock);
923
924 ch_gk20a = dbg_s->ch;
925
926 if (!ch_gk20a) {
927 gk20a_err(dev_from_gk20a(g),
928 "no bound channel for pm ctxsw mode update\n");
929 err = -EINVAL;
930 goto clean_up;
931 }
932
933 err = gr_gk20a_update_hwpm_ctxsw_mode(g, ch_gk20a,
934 args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
935 if (err)
936 gk20a_err(dev_from_gk20a(g),
937 "error (%d) during pm ctxsw mode update\n", err);
938
939 /* gk20a would require a WAR to set the core PM_ENABLE bit, not
940 * added here with gk20a being deprecated
941 */
899 clean_up: 942 clean_up:
900 mutex_unlock(&g->dbg_sessions_lock); 943 mutex_unlock(&g->dbg_sessions_lock);
901 return err; 944 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
index 94dba7b6..64d6542b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Graphics Context 4 * GK20A Graphics Context
5 * 5 *
6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -281,7 +281,60 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
281 netlist_num); 281 netlist_num);
282 break; 282 break;
283 case NETLIST_REGIONID_CTXREG_PMPPC: 283 case NETLIST_REGIONID_CTXREG_PMPPC:
284 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped"); 284 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC");
285 err = gr_gk20a_alloc_load_netlist_aiv(
286 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ppc);
287 if (err)
288 goto clean_up;
289 break;
290 case NETLIST_REGIONID_NVPERF_CTXREG_SYS:
291 gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_SYS");
292 err = gr_gk20a_alloc_load_netlist_aiv(
293 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys);
294 if (err)
295 goto clean_up;
296 break;
297 case NETLIST_REGIONID_NVPERF_FBP_CTXREGS:
298 gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_CTXREGS");
299 err = gr_gk20a_alloc_load_netlist_aiv(
300 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp);
301 if (err)
302 goto clean_up;
303 break;
304 case NETLIST_REGIONID_NVPERF_CTXREG_GPC:
305 gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_GPC");
306 err = gr_gk20a_alloc_load_netlist_aiv(
307 src, size, &g->gr.ctx_vars.ctxsw_regs.perf_gpc);
308 if (err)
309 goto clean_up;
310 break;
311 case NETLIST_REGIONID_NVPERF_FBP_ROUTER:
312 gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_ROUTER");
313 err = gr_gk20a_alloc_load_netlist_aiv(
314 src, size, &g->gr.ctx_vars.ctxsw_regs.fbp_router);
315 if (err)
316 goto clean_up;
317 break;
318 case NETLIST_REGIONID_NVPERF_GPC_ROUTER:
319 gk20a_dbg_info("NETLIST_REGIONID_NVPERF_GPC_ROUTER");
320 err = gr_gk20a_alloc_load_netlist_aiv(
321 src, size, &g->gr.ctx_vars.ctxsw_regs.gpc_router);
322 if (err)
323 goto clean_up;
324 break;
325 case NETLIST_REGIONID_CTXREG_PMLTC:
326 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMLTC");
327 err = gr_gk20a_alloc_load_netlist_aiv(
328 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ltc);
329 if (err)
330 goto clean_up;
331 break;
332 case NETLIST_REGIONID_CTXREG_PMFBPA:
333 gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMFBPA");
334 err = gr_gk20a_alloc_load_netlist_aiv(
335 src, size, &g->gr.ctx_vars.ctxsw_regs.pm_fbpa);
336 if (err)
337 goto clean_up;
285 break; 338 break;
286 default: 339 default:
287 gk20a_dbg_info("unrecognized region %d skipped", i); 340 gk20a_dbg_info("unrecognized region %d skipped", i);
@@ -319,6 +372,14 @@ clean_up:
319 kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); 372 kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l);
320 kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); 373 kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l);
321 kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); 374 kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l);
375 kfree(g->gr.ctx_vars.ctxsw_regs.pm_ppc.l);
376 kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys.l);
377 kfree(g->gr.ctx_vars.ctxsw_regs.fbp.l);
378 kfree(g->gr.ctx_vars.ctxsw_regs.perf_gpc.l);
379 kfree(g->gr.ctx_vars.ctxsw_regs.fbp_router.l);
380 kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
381 kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
382 kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
322 release_firmware(netlist_fw); 383 release_firmware(netlist_fw);
323 err = -ENOENT; 384 err = -ENOENT;
324 } 385 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
index 6844ee69..d413942a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics Context 2 * GK20A Graphics Context
3 * 3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -75,6 +75,13 @@ union __max_name {
75#define NETLIST_REGIONID_NETLIST_NUM 18 75#define NETLIST_REGIONID_NETLIST_NUM 18
76#define NETLIST_REGIONID_CTXREG_PPC 19 76#define NETLIST_REGIONID_CTXREG_PPC 19
77#define NETLIST_REGIONID_CTXREG_PMPPC 20 77#define NETLIST_REGIONID_CTXREG_PMPPC 20
78#define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21
79#define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22
80#define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23
81#define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24
82#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
83#define NETLIST_REGIONID_CTXREG_PMLTC 26
84#define NETLIST_REGIONID_CTXREG_PMFBPA 27
78 85
79struct netlist_region { 86struct netlist_region {
80 u32 region_id; 87 u32 region_id;
@@ -114,6 +121,11 @@ struct u32_list_gk20a {
114 u32 count; 121 u32 count;
115}; 122};
116 123
124struct ctxsw_buf_offset_map_entry {
125 u32 addr; /* Register address */
126 u32 offset; /* Offset in ctxt switch buffer */
127};
128
117static inline 129static inline
118struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) 130struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl)
119{ 131{
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 116fd88f..a8addc7b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -27,6 +27,8 @@
27#include <linux/dma-mapping.h> 27#include <linux/dma-mapping.h>
28#include <linux/firmware.h> 28#include <linux/firmware.h>
29#include <linux/nvhost.h> 29#include <linux/nvhost.h>
30#include <linux/sort.h>
31#include <linux/bsearch.h>
30#include <trace/events/gk20a.h> 32#include <trace/events/gk20a.h>
31 33
32#include "gk20a.h" 34#include "gk20a.h"
@@ -59,6 +61,10 @@
59#include "ctxsw_trace_gk20a.h" 61#include "ctxsw_trace_gk20a.h"
60 62
61#define BLK_SIZE (256) 63#define BLK_SIZE (256)
64#define NV_PMM_FBP_STRIDE 0x1000
65#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
66#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
67#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
62 68
63static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g); 69static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g);
64static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); 70static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
@@ -1591,9 +1597,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1591 u32 data; 1597 u32 data;
1592 int ret; 1598 int ret;
1593 1599
1600 gk20a_dbg_fn("");
1601
1602 if (!ch_ctx->gr_ctx) {
1603 gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
1604 return -EFAULT;
1605 }
1606
1594 c->g->ops.fifo.disable_channel(c); 1607 c->g->ops.fifo.disable_channel(c);
1595 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); 1608 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1596 if (ret) { 1609 if (ret) {
1610 c->g->ops.fifo.enable_channel(c);
1597 gk20a_err(dev_from_gk20a(g), 1611 gk20a_err(dev_from_gk20a(g),
1598 "failed to preempt channel\n"); 1612 "failed to preempt channel\n");
1599 return ret; 1613 return ret;
@@ -1603,11 +1617,18 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1603 Flush and invalidate before cpu update. */ 1617 Flush and invalidate before cpu update. */
1604 g->ops.mm.l2_flush(g, true); 1618 g->ops.mm.l2_flush(g, true);
1605 1619
1620 if (!ch_ctx->gr_ctx) {
1621 gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
1622 return -EFAULT;
1623 }
1624
1606 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1625 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1607 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 1626 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1608 0, pgprot_writecombine(PAGE_KERNEL)); 1627 0, pgprot_writecombine(PAGE_KERNEL));
1609 if (!ctx_ptr) 1628 if (!ctx_ptr) {
1629 c->g->ops.fifo.enable_channel(c);
1610 return -ENOMEM; 1630 return -ENOMEM;
1631 }
1611 1632
1612 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1633 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1613 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1634 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
@@ -1620,11 +1641,135 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1620 vunmap(ctx_ptr); 1641 vunmap(ctx_ptr);
1621 1642
1622 /* enable channel */ 1643 /* enable channel */
1623 gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), 1644 c->g->ops.fifo.enable_channel(c);
1624 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | 1645
1625 ccsr_channel_enable_set_true_f()); 1646 return 0;
1647}
1648
1649int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1650 struct channel_gk20a *c,
1651 bool enable_hwpm_ctxsw)
1652{
1653 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1654 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1655 void *ctx_ptr = NULL;
1656 void *pm_ctx_ptr;
1657 u32 data, virt_addr;
1658 int ret;
1659
1660 gk20a_dbg_fn("");
1661
1662 if (!ch_ctx->gr_ctx) {
1663 gk20a_err(dev_from_gk20a(g), "no graphics context allocated");
1664 return -EFAULT;
1665 }
1666
1667 if (enable_hwpm_ctxsw) {
1668 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1669 return 0;
1670 } else {
1671 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
1672 return 0;
1673 }
1674
1675 c->g->ops.fifo.disable_channel(c);
1676 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1677 if (ret) {
1678 c->g->ops.fifo.enable_channel(c);
1679 gk20a_err(dev_from_gk20a(g),
1680 "failed to preempt channel\n");
1681 return ret;
1682 }
1683
1684 /* Channel gr_ctx buffer is gpu cacheable.
1685 Flush and invalidate before cpu update. */
1686 g->ops.mm.l2_flush(g, true);
1687
1688 if (enable_hwpm_ctxsw) {
1689 /* Allocate buffer if necessary */
1690 if (pm_ctx->mem.gpu_va == 0) {
1691 ret = gk20a_gmmu_alloc_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
1692 g->gr.ctx_vars.pm_ctxsw_image_size,
1693 &pm_ctx->mem);
1694 if (ret) {
1695 c->g->ops.fifo.enable_channel(c);
1696 gk20a_err(dev_from_gk20a(g),
1697 "failed to allocate pm ctxt buffer");
1698 return ret;
1699 }
1700
1701 pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm,
1702 &pm_ctx->mem.sgt,
1703 pm_ctx->mem.size,
1704 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1705 gk20a_mem_flag_none, true);
1706 if (!pm_ctx->mem.gpu_va) {
1707 gk20a_err(dev_from_gk20a(g),
1708 "failed to map pm ctxt buffer");
1709 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING,
1710 &pm_ctx->mem);
1711 c->g->ops.fifo.enable_channel(c);
1712 return -ENOMEM;
1713 }
1714 }
1715
1716 /* Now clear the buffer */
1717 pm_ctx_ptr = vmap(pm_ctx->mem.pages,
1718 PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
1719 0, pgprot_writecombine(PAGE_KERNEL));
1720
1721 if (!pm_ctx_ptr) {
1722 ret = -ENOMEM;
1723 goto cleanup_pm_buf;
1724 }
1725
1726 memset(pm_ctx_ptr, 0, pm_ctx->mem.size);
1727
1728 vunmap(pm_ctx_ptr);
1729 }
1730
1731 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1732 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1733 0, pgprot_writecombine(PAGE_KERNEL));
1734 if (!ctx_ptr) {
1735 ret = -ENOMEM;
1736 goto cleanup_pm_buf;
1737 }
1738
1739 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1740 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1741
1742 if (enable_hwpm_ctxsw) {
1743 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
1744
1745 /* pack upper 32 bits of virtual address into a 32 bit number
1746 * (256 byte boundary)
1747 */
1748 virt_addr = (u32)(pm_ctx->mem.gpu_va >> 8);
1749 } else {
1750 pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1751 virt_addr = 0;
1752 }
1753
1754 data |= pm_ctx->pm_mode;
1755
1756 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data);
1757 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr);
1758
1759 vunmap(ctx_ptr);
1760
1761 /* enable channel */
1762 c->g->ops.fifo.enable_channel(c);
1626 1763
1627 return 0; 1764 return 0;
1765cleanup_pm_buf:
1766 gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
1767 gk20a_mem_flag_none);
1768 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem);
1769 memset(&pm_ctx->mem, 0, sizeof(struct mem_desc));
1770
1771 c->g->ops.fifo.enable_channel(c);
1772 return ret;
1628} 1773}
1629 1774
1630/* load saved fresh copy of gloden image into channel gr_ctx */ 1775/* load saved fresh copy of gloden image into channel gr_ctx */
@@ -1635,6 +1780,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1635 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1780 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1636 u32 virt_addr_lo; 1781 u32 virt_addr_lo;
1637 u32 virt_addr_hi; 1782 u32 virt_addr_hi;
1783 u32 virt_addr = 0;
1638 u32 i, v, data; 1784 u32 i, v, data;
1639 int ret = 0; 1785 int ret = 0;
1640 void *ctx_ptr = NULL; 1786 void *ctx_ptr = NULL;
@@ -1663,15 +1809,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1663 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1809 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
1664 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1810 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
1665 1811
1666 /* no user for client managed performance counter ctx */
1667 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1668 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1669 data |= ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1670 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
1671 data);
1672
1673 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
1674
1675 /* set priv access map */ 1812 /* set priv access map */
1676 virt_addr_lo = 1813 virt_addr_lo =
1677 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1814 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -1708,6 +1845,32 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1708 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, 1845 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0,
1709 virt_addr_hi); 1846 virt_addr_hi);
1710 1847
1848 /* Update main header region of the context buffer with the info needed
1849 * for PM context switching, including mode and possibly a pointer to
1850 * the PM backing store.
1851 */
1852 if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1853 if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
1854 gk20a_err(dev_from_gk20a(g),
1855 "context switched pm with no pm buffer!");
1856 vunmap(ctx_ptr);
1857 return -EFAULT;
1858 }
1859
1860 /* pack upper 32 bits of virtual address into a 32 bit number
1861 * (256 byte boundary)
1862 */
1863 virt_addr = (u32)(ch_ctx->pm_ctx.mem.gpu_va >> 8);
1864 } else
1865 virt_addr = 0;
1866
1867 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
1868 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1869 data |= ch_ctx->pm_ctx.pm_mode;
1870
1871 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data);
1872 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr);
1873
1711 vunmap(ctx_ptr); 1874 vunmap(ctx_ptr);
1712 1875
1713 if (tegra_platform_is_linsim()) { 1876 if (tegra_platform_is_linsim()) {
@@ -2205,7 +2368,6 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
2205 2368
2206int gr_gk20a_init_ctx_state(struct gk20a *g) 2369int gr_gk20a_init_ctx_state(struct gk20a *g)
2207{ 2370{
2208 u32 pm_ctx_image_size;
2209 u32 ret; 2371 u32 ret;
2210 struct fecs_method_op_gk20a op = { 2372 struct fecs_method_op_gk20a op = {
2211 .mailbox = { .id = 0, .data = 0, 2373 .mailbox = { .id = 0, .data = 0,
@@ -2237,7 +2399,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
2237 } 2399 }
2238 op.method.addr = 2400 op.method.addr =
2239 gr_fecs_method_push_adr_discover_pm_image_size_v(); 2401 gr_fecs_method_push_adr_discover_pm_image_size_v();
2240 op.mailbox.ret = &pm_ctx_image_size; 2402 op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size;
2241 ret = gr_gk20a_submit_fecs_method_op(g, op, false); 2403 ret = gr_gk20a_submit_fecs_method_op(g, op, false);
2242 if (ret) { 2404 if (ret) {
2243 gk20a_err(dev_from_gk20a(g), 2405 gk20a_err(dev_from_gk20a(g),
@@ -2641,14 +2803,30 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
2641 patch_ctx->data_count = 0; 2803 patch_ctx->data_count = 0;
2642} 2804}
2643 2805
2806static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c)
2807{
2808 struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx;
2809 struct gk20a *g = c->g;
2810
2811 gk20a_dbg_fn("");
2812
2813 if (pm_ctx->mem.gpu_va) {
2814 gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va,
2815 pm_ctx->mem.size, gk20a_mem_flag_none);
2816
2817 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem);
2818 }
2819}
2820
2644void gk20a_free_channel_ctx(struct channel_gk20a *c) 2821void gk20a_free_channel_ctx(struct channel_gk20a *c)
2645{ 2822{
2646 gr_gk20a_unmap_global_ctx_buffers(c); 2823 gr_gk20a_unmap_global_ctx_buffers(c);
2647 gr_gk20a_free_channel_patch_ctx(c); 2824 gr_gk20a_free_channel_patch_ctx(c);
2825 gr_gk20a_free_channel_pm_ctx(c);
2648 if (!gk20a_is_channel_marked_as_tsg(c)) 2826 if (!gk20a_is_channel_marked_as_tsg(c))
2649 gr_gk20a_free_channel_gr_ctx(c); 2827 gr_gk20a_free_channel_gr_ctx(c);
2650 2828
2651 /* zcull_ctx, pm_ctx */ 2829 /* zcull_ctx */
2652 2830
2653 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); 2831 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
2654 2832
@@ -2743,6 +2921,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2743 ch_ctx->gr_ctx = tsg->tsg_gr_ctx; 2921 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
2744 } 2922 }
2745 2923
2924 /* PM ctxt switch is off by default */
2925 ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
2926
2746 /* commit gr ctx buffer */ 2927 /* commit gr ctx buffer */
2747 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 2928 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
2748 if (err) { 2929 if (err) {
@@ -2983,6 +3164,10 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
2983 kfree(gr->ctx_vars.local_golden_image); 3164 kfree(gr->ctx_vars.local_golden_image);
2984 gr->ctx_vars.local_golden_image = NULL; 3165 gr->ctx_vars.local_golden_image = NULL;
2985 3166
3167 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3168 nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3169 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3170
2986 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3171 gk20a_comptag_allocator_destroy(&gr->comp_tags);
2987} 3172}
2988 3173
@@ -5828,6 +6013,10 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
5828 u32 context_buffer_size, 6013 u32 context_buffer_size,
5829 u32 *priv_offset); 6014 u32 *priv_offset);
5830 6015
6016static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
6017 u32 addr,
6018 u32 *priv_offset);
6019
5831/* This function will decode a priv address and return the partition type and numbers. */ 6020/* This function will decode a priv address and return the partition type and numbers. */
5832static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, 6021static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
5833 int *addr_type, /* enum ctxsw_addr_type */ 6022 int *addr_type, /* enum ctxsw_addr_type */
@@ -6056,14 +6245,81 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6056 offset_addrs[i] = priv_registers[i]; 6245 offset_addrs[i] = priv_registers[i];
6057 } 6246 }
6058 6247
6059 *num_offsets = num_registers; 6248 *num_offsets = num_registers;
6249cleanup:
6250 if (!IS_ERR_OR_NULL(priv_registers))
6251 kfree(priv_registers);
6060 6252
6061 cleanup: 6253 return err;
6254}
6255
6256int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
6257 u32 addr,
6258 u32 max_offsets,
6259 u32 *offsets, u32 *offset_addrs,
6260 u32 *num_offsets)
6261{
6262 u32 i;
6263 u32 priv_offset = 0;
6264 u32 *priv_registers;
6265 u32 num_registers = 0;
6266 int err = 0;
6267 struct gr_gk20a *gr = &g->gr;
6268 u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
6269
6270 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6271
6272 /* implementation is crossed-up if either of these happen */
6273 if (max_offsets > potential_offsets)
6274 return -EINVAL;
6275
6276 if (!g->gr.ctx_vars.golden_image_initialized)
6277 return -ENODEV;
6278
6279 priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
6280 if (ZERO_OR_NULL_PTR(priv_registers)) {
6281 gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
6282 return -ENOMEM;
6283 }
6284 memset(offsets, 0, sizeof(u32) * max_offsets);
6285 memset(offset_addrs, 0, sizeof(u32) * max_offsets);
6286 *num_offsets = 0;
6287
6288 gr_gk20a_create_priv_addr_table(g, addr, priv_registers, &num_registers);
6289
6290 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6291 err = -EINVAL;
6292 goto cleanup;
6293 }
6062 6294
6063 if (!IS_ERR_OR_NULL(priv_registers)) 6295 if ((max_offsets == 1) && (num_registers > 1))
6064 kfree(priv_registers); 6296 num_registers = 1;
6297
6298 if (!g->gr.ctx_vars.local_golden_image) {
6299 gk20a_dbg_fn("no context switch header info to work with");
6300 err = -EINVAL;
6301 goto cleanup;
6302 }
6065 6303
6066 return err; 6304 for (i = 0; i < num_registers; i++) {
6305 err = gr_gk20a_find_priv_offset_in_pm_buffer(g,
6306 priv_registers[i],
6307 &priv_offset);
6308 if (err) {
6309 gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x",
6310 addr); /*, grPriRegStr(addr)));*/
6311 goto cleanup;
6312 }
6313
6314 offsets[i] = priv_offset;
6315 offset_addrs[i] = priv_registers[i];
6316 }
6317
6318 *num_offsets = num_registers;
6319cleanup:
6320 kfree(priv_registers);
6321
6322 return err;
6067} 6323}
6068 6324
6069/* Setup some register tables. This looks hacky; our 6325/* Setup some register tables. This looks hacky; our
@@ -6638,8 +6894,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
6638 return 0; 6894 return 0;
6639} 6895}
6640 6896
6641
6642
6643/* 6897/*
6644 * This function will return the 32 bit offset for a priv register if it is 6898 * This function will return the 32 bit offset for a priv register if it is
6645 * present in the context buffer. 6899 * present in the context buffer.
@@ -6801,6 +7055,314 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6801 return -EINVAL; 7055 return -EINVAL;
6802} 7056}
6803 7057
7058static int map_cmp(const void *a, const void *b)
7059{
7060 struct ctxsw_buf_offset_map_entry *e1 =
7061 (struct ctxsw_buf_offset_map_entry *)a;
7062 struct ctxsw_buf_offset_map_entry *e2 =
7063 (struct ctxsw_buf_offset_map_entry *)b;
7064
7065 if (e1->addr < e2->addr)
7066 return -1;
7067
7068 if (e1->addr > e2->addr)
7069 return 1;
7070 return 0;
7071}
7072
7073static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
7074 struct aiv_list_gk20a *regs,
7075 u32 *count, u32 *offset,
7076 u32 max_cnt, u32 base, u32 mask)
7077{
7078 u32 idx;
7079 u32 cnt = *count;
7080 u32 off = *offset;
7081
7082 if ((cnt + regs->count) > max_cnt)
7083 return -EINVAL;
7084
7085 for (idx = 0; idx < regs->count; idx++) {
7086 map[cnt].addr = base + (regs->l[idx].addr & mask);
7087 map[cnt++].offset = off;
7088 off += 4;
7089 }
7090 *count = cnt;
7091 *offset = off;
7092 return 0;
7093}
7094
7095/* Helper function to add register entries to the register map for all
7096 * subunits
7097 */
7098static int add_ctxsw_buffer_map_entries_subunits(
7099 struct ctxsw_buf_offset_map_entry *map,
7100 struct aiv_list_gk20a *regs,
7101 u32 *count, u32 *offset,
7102 u32 max_cnt, u32 base,
7103 u32 num_units, u32 stride, u32 mask)
7104{
7105 u32 unit;
7106 u32 idx;
7107 u32 cnt = *count;
7108 u32 off = *offset;
7109
7110 if ((cnt + (regs->count * num_units)) > max_cnt)
7111 return -EINVAL;
7112
7113 /* Data is interleaved for units in ctxsw buffer */
7114 for (idx = 0; idx < regs->count; idx++) {
7115 for (unit = 0; unit < num_units; unit++) {
7116 map[cnt].addr = base + (regs->l[idx].addr & mask) +
7117 (unit * stride);
7118 map[cnt++].offset = off;
7119 off += 4;
7120 }
7121 }
7122 *count = cnt;
7123 *offset = off;
7124 return 0;
7125}
7126
7127static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7128 struct ctxsw_buf_offset_map_entry *map,
7129 u32 *count, u32 *offset, u32 max_cnt)
7130{
7131 u32 num_gpcs = g->gr.gpc_count;
7132 u32 num_ppcs, num_tpcs, gpc_num, base;
7133
7134 for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
7135 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7136 base = proj_gpc_base_v() +
7137 (proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v();
7138 if (add_ctxsw_buffer_map_entries_subunits(map,
7139 &g->gr.ctx_vars.ctxsw_regs.pm_tpc,
7140 count, offset, max_cnt, base, num_tpcs,
7141 proj_tpc_in_gpc_stride_v(),
7142 (proj_tpc_in_gpc_stride_v() - 1)))
7143 return -EINVAL;
7144
7145 num_ppcs = g->gr.gpc_ppc_count[gpc_num];
7146 base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) +
7147 proj_ppc_in_gpc_base_v();
7148 if (add_ctxsw_buffer_map_entries_subunits(map,
7149 &g->gr.ctx_vars.ctxsw_regs.pm_ppc,
7150 count, offset, max_cnt, base, num_ppcs,
7151 proj_ppc_in_gpc_stride_v(),
7152 (proj_ppc_in_gpc_stride_v() - 1)))
7153 return -EINVAL;
7154
7155 base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num);
7156 if (add_ctxsw_buffer_map_entries(map,
7157 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7158 count, offset, max_cnt, base,
7159 (proj_gpc_stride_v() - 1)))
7160 return -EINVAL;
7161
7162 base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
7163 if (add_ctxsw_buffer_map_entries(map,
7164 &g->gr.ctx_vars.ctxsw_regs.perf_gpc,
7165 count, offset, max_cnt, base, ~0))
7166 return -EINVAL;
7167
7168 base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
7169 if (add_ctxsw_buffer_map_entries(map,
7170 &g->gr.ctx_vars.ctxsw_regs.gpc_router,
7171 count, offset, max_cnt, base, ~0))
7172 return -EINVAL;
7173
7174 *offset = ALIGN(*offset, 256);
7175 }
7176 return 0;
7177}
7178
7179/*
7180 * PM CTXSW BUFFER LAYOUT :
7181 *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
7182 *| |
7183 *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
7184 *|---------------------------------------------|
7185 *| |
7186 *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
7187 *|---------------------------------------------|
7188 *| PADDING for 256 byte alignment |
7189 *|---------------------------------------------|<----256 byte aligned
7190 *| LIST_compressed_nv_perf_fbp_ctx_regs |
7191 *| |Space allocated: numRegs * n words (for n FB units)
7192 *|---------------------------------------------|
7193 *| LIST_compressed_nv_perf_fbprouter_ctx_regs |
7194 *| |Space allocated: numRegs * n words (for n FB units)
7195 *|---------------------------------------------|
7196 *| LIST_compressed_pm_fbpa_ctx_regs |
7197 *| |Space allocated: numRegs * n words (for n FB units)
7198 *|---------------------------------------------|
7199 *| LIST_compressed_pm_ltc_ctx_regs |
7200 *| LTC0 LTS0 |
7201 *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
7202 *| LTCn LTS0 |
7203 *| LTC0 LTS1 |
7204 *| LTC1 LTS1 |
7205 *| LTCn LTS1 |
7206 *| LTC0 LTSn |
7207 *| LTC1 LTSn |
7208 *| LTCn LTSn |
7209 *|---------------------------------------------|
7210 *| PADDING for 256 byte alignment |
7211 *|---------------------------------------------|<----256 byte aligned
7212 *| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
7213 *| REG0 TPC1 | all the GPC/TPC register lists
7214 *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
7215 *| LIST_pm_ctx_reg_TPC REG1 TPC0 |
7216 *| * numTpcs REG1 TPC1 |
7217 *| LIST_pm_ctx_reg_PPC REG1 TPCn |
7218 *| * numPpcs REGn TPC0 |
7219 *| LIST_pm_ctx_reg_GPC REGn TPC1 |
7220 *| LIST_nv_perf_ctx_reg_GPC REGn TPCn |
7221 *| ---- |--
7222 *| GPC1 . |
7223 *| . |<----
7224 *|---------------------------------------------|
7225 *= =
7226 *| GPCn |
7227 *= =
7228 *|---------------------------------------------|
7229 */
7230
7231static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7232{
7233 u32 hwpm_ctxsw_buffer_size = g->gr.ctx_vars.pm_ctxsw_image_size;
7234 u32 hwpm_ctxsw_reg_count_max;
7235 u32 map_size;
7236 u32 i, count = 0;
7237 u32 offset = 0;
7238 struct ctxsw_buf_offset_map_entry *map;
7239
7240 if (hwpm_ctxsw_buffer_size == 0) {
7241 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
7242 "no PM Ctxsw buffer memory in context buffer");
7243 return -EINVAL;
7244 }
7245
7246 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
7247 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
7248
7249 map = nvgpu_alloc(map_size, true);
7250 if (!map)
7251 return -ENOMEM;
7252
7253 /* Add entries from _LIST_pm_ctx_reg_SYS */
7254 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
7255 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7256 goto cleanup;
7257
7258 /* Add entries from _LIST_nv_perf_ctx_reg_SYS */
7259 if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys,
7260 &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
7261 goto cleanup;
7262
7263 offset = ALIGN(offset, 256);
7264
7265 /* Add entries from _LIST_nv_perf_fbp_ctx_regs */
7266 if (add_ctxsw_buffer_map_entries_subunits(map,
7267 &g->gr.ctx_vars.ctxsw_regs.fbp,
7268 &count, &offset,
7269 hwpm_ctxsw_reg_count_max, 0,
7270 g->gr.num_fbps, NV_PMM_FBP_STRIDE, ~0))
7271 goto cleanup;
7272
7273 /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
7274 if (add_ctxsw_buffer_map_entries_subunits(map,
7275 &g->gr.ctx_vars.ctxsw_regs.fbp_router,
7276 &count, &offset,
7277 hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps,
7278 NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0))
7279 goto cleanup;
7280
7281 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
7282 if (add_ctxsw_buffer_map_entries_subunits(map,
7283 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7284 &count, &offset,
7285 hwpm_ctxsw_reg_count_max, 0,
7286 proj_scal_litter_num_fbpas_v(),
7287 proj_fbpa_stride_v(), ~0))
7288 goto cleanup;
7289
7290 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
7291 if (add_ctxsw_buffer_map_entries_subunits(map,
7292 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,
7293 &count, &offset,
7294 hwpm_ctxsw_reg_count_max, 0,
7295 g->ltc_count, proj_ltc_stride_v(), ~0))
7296 goto cleanup;
7297
7298 offset = ALIGN(offset, 256);
7299
7300 /* Add GPC entries */
7301 if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
7302 hwpm_ctxsw_reg_count_max))
7303 goto cleanup;
7304
7305 if (offset > hwpm_ctxsw_buffer_size) {
7306 gk20a_err(dev_from_gk20a(g), "offset > buffer size");
7307 goto cleanup;
7308 }
7309
7310 sort(map, count, sizeof(*map), map_cmp, NULL);
7311
7312 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map = map;
7313 g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map_count = count;
7314
7315 gk20a_dbg_info("Reg Addr => HWPM Ctxt switch buffer offset");
7316
7317 for (i = 0; i < count; i++)
7318 gk20a_dbg_info("%08x => %08x", map[i].addr, map[i].offset);
7319
7320 return 0;
7321cleanup:
7322 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
7323 nvgpu_free(map);
7324 return -EINVAL;
7325}
7326
7327/*
7328 * This function will return the 32 bit offset for a priv register if it is
7329 * present in the PM context buffer.
7330 */
7331static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g,
7332 u32 addr,
7333 u32 *priv_offset)
7334{
7335 struct gr_gk20a *gr = &g->gr;
7336 int err = 0;
7337 u32 count;
7338 struct ctxsw_buf_offset_map_entry *map, *result, map_key;
7339
7340 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
7341
7342 /* Create map of pri address and pm offset if necessary */
7343 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) {
7344 err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g);
7345 if (err)
7346 return err;
7347 }
7348
7349 *priv_offset = 0;
7350
7351 map = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map;
7352 count = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map_count;
7353
7354 map_key.addr = addr;
7355 result = bsearch(&map_key, map, count, sizeof(*map), map_cmp);
7356
7357 if (result)
7358 *priv_offset = result->offset;
7359 else {
7360 gk20a_err(dev_from_gk20a(g), "Lookup failed for address 0x%x", addr);
7361 err = -EINVAL;
7362 }
7363 return err;
7364}
7365
6804bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) 7366bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6805{ 7367{
6806 int curr_gr_ctx, curr_gr_tsgid; 7368 int curr_gr_ctx, curr_gr_tsgid;
@@ -6840,6 +7402,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6840 struct gk20a *g = ch->g; 7402 struct gk20a *g = ch->g;
6841 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7403 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
6842 void *ctx_ptr = NULL; 7404 void *ctx_ptr = NULL;
7405 void *pm_ctx_ptr = NULL;
7406 void *base_ptr = NULL;
6843 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 7407 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
6844 u32 i, j, offset, v; 7408 u32 i, j, offset, v;
6845 struct gr_gk20a *gr = &g->gr; 7409 struct gr_gk20a *gr = &g->gr;
@@ -6940,15 +7504,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6940 } 7504 }
6941 offset_addrs = offsets + max_offsets; 7505 offset_addrs = offsets + max_offsets;
6942 7506
6943 /* would have been a variant of gr_gk20a_apply_instmem_overrides */
6944 /* recoded in-place instead.*/
6945 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
6946 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
6947 0, pgprot_writecombine(PAGE_KERNEL));
6948 if (!ctx_ptr) {
6949 err = -ENOMEM;
6950 goto cleanup;
6951 }
6952 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 7507 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
6953 if (err) 7508 if (err)
6954 goto cleanup; 7509 goto cleanup;
@@ -6977,13 +7532,52 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6977 &num_offsets, 7532 &num_offsets,
6978 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7533 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
6979 ctx_ops[i].quad); 7534 ctx_ops[i].quad);
6980 if (err) { 7535 if (!err) {
6981 gk20a_dbg(gpu_dbg_gpu_dbg, 7536 if (!ctx_ptr) {
7537 /* would have been a variant of
7538 * gr_gk20a_apply_instmem_overrides,
7539 * recoded in-place instead.
7540 */
7541 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
7542 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
7543 0, pgprot_writecombine(PAGE_KERNEL));
7544 if (!ctx_ptr) {
7545 err = -ENOMEM;
7546 goto cleanup;
7547 }
7548 }
7549 base_ptr = ctx_ptr;
7550 } else {
7551 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7552 ctx_ops[i].offset,
7553 max_offsets,
7554 offsets, offset_addrs,
7555 &num_offsets);
7556 if (err) {
7557 gk20a_dbg(gpu_dbg_gpu_dbg,
6982 "ctx op invalid offset: offset=0x%x", 7558 "ctx op invalid offset: offset=0x%x",
6983 ctx_ops[i].offset); 7559 ctx_ops[i].offset);
6984 ctx_ops[i].status = 7560 ctx_ops[i].status =
6985 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; 7561 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
6986 continue; 7562 continue;
7563 }
7564 if (!pm_ctx_ptr) {
7565 /* Make sure ctx buffer was initialized */
7566 if (!ch_ctx->pm_ctx.mem.pages) {
7567 gk20a_err(dev_from_gk20a(g),
7568 "Invalid ctx buffer");
7569 err = -EINVAL;
7570 goto cleanup;
7571 }
7572 pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages,
7573 PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
7574 0, pgprot_writecombine(PAGE_KERNEL));
7575 if (!pm_ctx_ptr) {
7576 err = -ENOMEM;
7577 goto cleanup;
7578 }
7579 }
7580 base_ptr = pm_ctx_ptr;
6987 } 7581 }
6988 7582
6989 /* if this is a quad access, setup for special access*/ 7583 /* if this is a quad access, setup for special access*/
@@ -6993,24 +7587,27 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6993 ctx_ops[i].offset); 7587 ctx_ops[i].offset);
6994 7588
6995 for (j = 0; j < num_offsets; j++) { 7589 for (j = 0; j < num_offsets; j++) {
6996 /* sanity check, don't write outside, worst case */ 7590 /* sanity check gr ctxt offsets,
6997 if (offsets[j] >= g->gr.ctx_vars.golden_image_size) 7591 * don't write outside, worst case
7592 */
7593 if ((base_ptr == ctx_ptr) &&
7594 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
6998 continue; 7595 continue;
6999 if (pass == 0) { /* write pass */ 7596 if (pass == 0) { /* write pass */
7000 v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0); 7597 v = gk20a_mem_rd32(base_ptr + offsets[j], 0);
7001 v &= ~ctx_ops[i].and_n_mask_lo; 7598 v &= ~ctx_ops[i].and_n_mask_lo;
7002 v |= ctx_ops[i].value_lo; 7599 v |= ctx_ops[i].value_lo;
7003 gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v); 7600 gk20a_mem_wr32(base_ptr + offsets[j], 0, v);
7004 7601
7005 gk20a_dbg(gpu_dbg_gpu_dbg, 7602 gk20a_dbg(gpu_dbg_gpu_dbg,
7006 "context wr: offset=0x%x v=0x%x", 7603 "context wr: offset=0x%x v=0x%x",
7007 offsets[j], v); 7604 offsets[j], v);
7008 7605
7009 if (ctx_ops[i].op == REGOP(WRITE_64)) { 7606 if (ctx_ops[i].op == REGOP(WRITE_64)) {
7010 v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0); 7607 v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0);
7011 v &= ~ctx_ops[i].and_n_mask_hi; 7608 v &= ~ctx_ops[i].and_n_mask_hi;
7012 v |= ctx_ops[i].value_hi; 7609 v |= ctx_ops[i].value_hi;
7013 gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v); 7610 gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v);
7014 7611
7015 gk20a_dbg(gpu_dbg_gpu_dbg, 7612 gk20a_dbg(gpu_dbg_gpu_dbg,
7016 "context wr: offset=0x%x v=0x%x", 7613 "context wr: offset=0x%x v=0x%x",
@@ -7020,18 +7617,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7020 /* check to see if we need to add a special WAR 7617 /* check to see if we need to add a special WAR
7021 for some of the SMPC perf regs */ 7618 for some of the SMPC perf regs */
7022 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7619 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
7023 v, ctx_ptr); 7620 v, base_ptr);
7024 7621
7025 } else { /* read pass */ 7622 } else { /* read pass */
7026 ctx_ops[i].value_lo = 7623 ctx_ops[i].value_lo =
7027 gk20a_mem_rd32(ctx_ptr + offsets[0], 0); 7624 gk20a_mem_rd32(base_ptr + offsets[0], 0);
7028 7625
7029 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", 7626 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
7030 offsets[0], ctx_ops[i].value_lo); 7627 offsets[0], ctx_ops[i].value_lo);
7031 7628
7032 if (ctx_ops[i].op == REGOP(READ_64)) { 7629 if (ctx_ops[i].op == REGOP(READ_64)) {
7033 ctx_ops[i].value_hi = 7630 ctx_ops[i].value_hi =
7034 gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0); 7631 gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0);
7035 7632
7036 gk20a_dbg(gpu_dbg_gpu_dbg, 7633 gk20a_dbg(gpu_dbg_gpu_dbg,
7037 "context rd: offset=0x%x v=0x%x", 7634 "context rd: offset=0x%x v=0x%x",
@@ -7062,6 +7659,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7062 if (ctx_ptr) 7659 if (ctx_ptr)
7063 vunmap(ctx_ptr); 7660 vunmap(ctx_ptr);
7064 7661
7662 if (pm_ctx_ptr)
7663 vunmap(pm_ctx_ptr);
7664
7065 if (restart_gr_ctxsw) { 7665 if (restart_gr_ctxsw) {
7066 int tmp_err = gr_gk20a_enable_ctxsw(g); 7666 int tmp_err = gr_gk20a_enable_ctxsw(g);
7067 if (tmp_err) { 7667 if (tmp_err) {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2c575534..c82cf75c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -198,8 +198,13 @@ struct gr_gk20a {
198 u32 golden_image_size; 198 u32 golden_image_size;
199 u32 *local_golden_image; 199 u32 *local_golden_image;
200 200
201 u32 hwpm_ctxsw_buffer_offset_map_count;
202 struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map;
203
201 u32 zcull_ctxsw_image_size; 204 u32 zcull_ctxsw_image_size;
202 205
206 u32 pm_ctxsw_image_size;
207
203 u32 buffer_header_size; 208 u32 buffer_header_size;
204 209
205 u32 priv_access_map_size; 210 u32 priv_access_map_size;
@@ -219,6 +224,14 @@ struct gr_gk20a {
219 struct aiv_list_gk20a pm_sys; 224 struct aiv_list_gk20a pm_sys;
220 struct aiv_list_gk20a pm_gpc; 225 struct aiv_list_gk20a pm_gpc;
221 struct aiv_list_gk20a pm_tpc; 226 struct aiv_list_gk20a pm_tpc;
227 struct aiv_list_gk20a pm_ppc;
228 struct aiv_list_gk20a perf_sys;
229 struct aiv_list_gk20a perf_gpc;
230 struct aiv_list_gk20a fbp;
231 struct aiv_list_gk20a fbp_router;
232 struct aiv_list_gk20a gpc_router;
233 struct aiv_list_gk20a pm_ltc;
234 struct aiv_list_gk20a pm_fbpa;
222 } ctxsw_regs; 235 } ctxsw_regs;
223 int regs_base_index; 236 int regs_base_index;
224 bool valid; 237 bool valid;
@@ -484,9 +497,17 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
484 u32 *offsets, u32 *offset_addrs, 497 u32 *offsets, u32 *offset_addrs,
485 u32 *num_offsets, 498 u32 *num_offsets,
486 bool is_quad, u32 quad); 499 bool is_quad, u32 quad);
500int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
501 u32 addr,
502 u32 max_offsets,
503 u32 *offsets, u32 *offset_addrs,
504 u32 *num_offsets);
487int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, 505int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
488 struct channel_gk20a *c, 506 struct channel_gk20a *c,
489 bool enable_smpc_ctxsw); 507 bool enable_smpc_ctxsw);
508int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
509 struct channel_gk20a *c,
510 bool enable_hwpm_ctxsw);
490 511
491struct channel_ctx_gk20a; 512struct channel_ctx_gk20a;
492int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, 513int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx,
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
index da555f7c..08834557 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h
@@ -94,6 +94,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
94{ 94{
95 return 0x7 << 0; 95 return 0x7 << 0;
96} 96}
97static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
98{
99 return 0x1;
100}
97static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) 101static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
98{ 102{
99 return 0x0; 103 return 0x0;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
index 93c55c30..ce10db35 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void)
70{ 70{
71 return 0x00000400; 71 return 0x00000400;
72} 72}
73static inline u32 proj_fbpa_stride_v(void)
74{
75 return 0x00001000;
76}
73static inline u32 proj_ppc_in_gpc_base_v(void) 77static inline u32 proj_ppc_in_gpc_base_v(void)
74{ 78{
75 return 0x00003000; 79 return 0x00003000;
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void)
114{ 118{
115 return 0x00000001; 119 return 0x00000001;
116} 120}
121static inline u32 proj_scal_litter_num_fbpas_v(void)
122{
123 return 0x00000001;
124}
117static inline u32 proj_scal_litter_num_gpcs_v(void) 125static inline u32 proj_scal_litter_num_gpcs_v(void)
118{ 126{
119 return 0x00000001; 127 return 0x00000001;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 368b32d3..833d896d 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A memory management 2 * GK20A memory management
3 * 3 *
4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -73,6 +73,11 @@ struct zcull_ctx_desc {
73 u32 ctx_sw_mode; 73 u32 ctx_sw_mode;
74}; 74};
75 75
76struct pm_ctx_desc {
77 struct mem_desc mem;
78 u32 pm_mode;
79};
80
76struct gk20a; 81struct gk20a;
77struct gr_ctx_buffer_desc { 82struct gr_ctx_buffer_desc {
78 void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *); 83 void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *);
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
index 1696f759..e6162af2 100644
--- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops 2 * Tegra GK20A GPU Debugger Driver Register Ops
3 * 3 *
4 * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -644,22 +644,31 @@ static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s,
644 valid = check_whitelists(dbg_s, op, offset + 4); 644 valid = check_whitelists(dbg_s, op, offset + 4);
645 645
646 if (valid && (op->type != REGOP(TYPE_GLOBAL))) { 646 if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
647 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, 647 err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g,
648 op->offset,
649 1,
650 &buf_offset_lo,
651 &buf_offset_addr,
652 &num_offsets,
653 op->type == REGOP(TYPE_GR_CTX_QUAD),
654 op->quad);
655 if (err) {
656 err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g,
648 op->offset, 657 op->offset,
649 1, 658 1,
650 &buf_offset_lo, 659 &buf_offset_lo,
651 &buf_offset_addr, 660 &buf_offset_addr,
652 &num_offsets, 661 &num_offsets);
653 op->type == REGOP(TYPE_GR_CTX_QUAD), 662
654 op->quad);
655 if (err) { 663 if (err) {
656 op->status |= REGOP(STATUS_INVALID_OFFSET); 664 op->status |= REGOP(STATUS_INVALID_OFFSET);
657 return -EINVAL; 665 return -EINVAL;
658 } 666 }
659 if (!buf_offset_lo) { 667 }
660 op->status |= REGOP(STATUS_INVALID_OFFSET); 668 if (!buf_offset_lo) {
661 return -EINVAL; 669 op->status |= REGOP(STATUS_INVALID_OFFSET);
662 } 670 return -EINVAL;
671 }
663 } 672 }
664 673
665 if (!valid) { 674 if (!valid) {
diff --git a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
index f9531ae1..b837918c 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void)
70{ 70{
71 return 0x00000200; 71 return 0x00000200;
72} 72}
73static inline u32 proj_fbpa_stride_v(void)
74{
75 return 0x00001000;
76}
73static inline u32 proj_ppc_in_gpc_base_v(void) 77static inline u32 proj_ppc_in_gpc_base_v(void)
74{ 78{
75 return 0x00003000; 79 return 0x00003000;
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void)
114{ 118{
115 return 0x00000001; 119 return 0x00000001;
116} 120}
121static inline u32 proj_scal_litter_num_fbpas_v(void)
122{
123 return 0x00000001;
124}
117static inline u32 proj_scal_litter_num_gpcs_v(void) 125static inline u32 proj_scal_litter_num_gpcs_v(void)
118{ 126{
119 return 0x00000001; 127 return 0x00000001;