diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2016-03-09 22:10:20 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-07 14:05:49 -0400 |
commit | 37155b65f1dd6039bdef92f513d86640956bc12c (patch) | |
tree | 1deb57523c3acc445996c642da6ac96e1cf7c355 | |
parent | 6675c03603669c667c6ffec34567eaf101a2d09d (diff) |
gpu: nvgpu: support for hwpm context switching
Add support for hwpm context switching
Bug 1648200
Change-Id: I482899bf165cd2ef24bb8617be16df01218e462f
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1120450
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | 65 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 692 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 23 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/regops_gk20a.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 14 |
12 files changed, 849 insertions, 65 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 29c39160..d8951b94 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -54,6 +54,7 @@ struct channel_ctx_gk20a { | |||
54 | struct gr_ctx_desc *gr_ctx; | 54 | struct gr_ctx_desc *gr_ctx; |
55 | struct patch_desc patch_ctx; | 55 | struct patch_desc patch_ctx; |
56 | struct zcull_ctx_desc zcull_ctx; | 56 | struct zcull_ctx_desc zcull_ctx; |
57 | struct pm_ctx_desc pm_ctx; | ||
57 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | 58 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; |
58 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | 59 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; |
59 | bool global_ctx_buffer_mapped; | 60 | bool global_ctx_buffer_mapped; |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 1ee0189b..d087d89e 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -457,6 +457,9 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | |||
457 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | 457 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, |
458 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); | 458 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); |
459 | 459 | ||
460 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
461 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); | ||
462 | |||
460 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | 463 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( |
461 | struct dbg_session_gk20a *dbg_s, | 464 | struct dbg_session_gk20a *dbg_s, |
462 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | 465 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); |
@@ -582,6 +585,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
582 | (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); | 585 | (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); |
583 | break; | 586 | break; |
584 | 587 | ||
588 | case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: | ||
589 | err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, | ||
590 | (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); | ||
591 | break; | ||
592 | |||
585 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: | 593 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: |
586 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, | 594 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, |
587 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | 595 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); |
@@ -880,7 +888,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | |||
880 | ch_gk20a = dbg_s->ch; | 888 | ch_gk20a = dbg_s->ch; |
881 | 889 | ||
882 | if (!ch_gk20a) { | 890 | if (!ch_gk20a) { |
883 | gk20a_err(dev_from_gk20a(dbg_s->g), | 891 | gk20a_err(dev_from_gk20a(g), |
884 | "no bound channel for smpc ctxsw mode update\n"); | 892 | "no bound channel for smpc ctxsw mode update\n"); |
885 | err = -EINVAL; | 893 | err = -EINVAL; |
886 | goto clean_up; | 894 | goto clean_up; |
@@ -889,13 +897,48 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | |||
889 | err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, | 897 | err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a, |
890 | args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | 898 | args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); |
891 | if (err) { | 899 | if (err) { |
892 | gk20a_err(dev_from_gk20a(dbg_s->g), | 900 | gk20a_err(dev_from_gk20a(g), |
893 | "error (%d) during smpc ctxsw mode update\n", err); | 901 | "error (%d) during smpc ctxsw mode update\n", err); |
894 | goto clean_up; | 902 | goto clean_up; |
895 | } | 903 | } |
896 | 904 | ||
897 | err = g->ops.regops.apply_smpc_war(dbg_s); | 905 | err = g->ops.regops.apply_smpc_war(dbg_s); |
906 | clean_up: | ||
907 | mutex_unlock(&g->dbg_sessions_lock); | ||
908 | return err; | ||
909 | } | ||
910 | |||
911 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
912 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) | ||
913 | { | ||
914 | int err; | ||
915 | struct gk20a *g = get_gk20a(dbg_s->pdev); | ||
916 | struct channel_gk20a *ch_gk20a; | ||
898 | 917 | ||
918 | gk20a_dbg_fn("%s pm ctxsw mode = %d", | ||
919 | dev_name(dbg_s->dev), args->mode); | ||
920 | |||
921 | /* Take the global lock, since we'll be doing global regops */ | ||
922 | mutex_lock(&g->dbg_sessions_lock); | ||
923 | |||
924 | ch_gk20a = dbg_s->ch; | ||
925 | |||
926 | if (!ch_gk20a) { | ||
927 | gk20a_err(dev_from_gk20a(g), | ||
928 | "no bound channel for pm ctxsw mode update\n"); | ||
929 | err = -EINVAL; | ||
930 | goto clean_up; | ||
931 | } | ||
932 | |||
933 | err = gr_gk20a_update_hwpm_ctxsw_mode(g, ch_gk20a, | ||
934 | args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); | ||
935 | if (err) | ||
936 | gk20a_err(dev_from_gk20a(g), | ||
937 | "error (%d) during pm ctxsw mode update\n", err); | ||
938 | |||
939 | /* gk20a would require a WAR to set the core PM_ENABLE bit, not | ||
940 | * added here with gk20a being deprecated | ||
941 | */ | ||
899 | clean_up: | 942 | clean_up: |
900 | mutex_unlock(&g->dbg_sessions_lock); | 943 | mutex_unlock(&g->dbg_sessions_lock); |
901 | return err; | 944 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c index 94dba7b6..64d6542b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Graphics Context | 4 | * GK20A Graphics Context |
5 | * | 5 | * |
6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -281,7 +281,60 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) | |||
281 | netlist_num); | 281 | netlist_num); |
282 | break; | 282 | break; |
283 | case NETLIST_REGIONID_CTXREG_PMPPC: | 283 | case NETLIST_REGIONID_CTXREG_PMPPC: |
284 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC skipped"); | 284 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMPPC"); |
285 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
286 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ppc); | ||
287 | if (err) | ||
288 | goto clean_up; | ||
289 | break; | ||
290 | case NETLIST_REGIONID_NVPERF_CTXREG_SYS: | ||
291 | gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_SYS"); | ||
292 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
293 | src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys); | ||
294 | if (err) | ||
295 | goto clean_up; | ||
296 | break; | ||
297 | case NETLIST_REGIONID_NVPERF_FBP_CTXREGS: | ||
298 | gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_CTXREGS"); | ||
299 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
300 | src, size, &g->gr.ctx_vars.ctxsw_regs.fbp); | ||
301 | if (err) | ||
302 | goto clean_up; | ||
303 | break; | ||
304 | case NETLIST_REGIONID_NVPERF_CTXREG_GPC: | ||
305 | gk20a_dbg_info("NETLIST_REGIONID_NVPERF_CTXREG_GPC"); | ||
306 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
307 | src, size, &g->gr.ctx_vars.ctxsw_regs.perf_gpc); | ||
308 | if (err) | ||
309 | goto clean_up; | ||
310 | break; | ||
311 | case NETLIST_REGIONID_NVPERF_FBP_ROUTER: | ||
312 | gk20a_dbg_info("NETLIST_REGIONID_NVPERF_FBP_ROUTER"); | ||
313 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
314 | src, size, &g->gr.ctx_vars.ctxsw_regs.fbp_router); | ||
315 | if (err) | ||
316 | goto clean_up; | ||
317 | break; | ||
318 | case NETLIST_REGIONID_NVPERF_GPC_ROUTER: | ||
319 | gk20a_dbg_info("NETLIST_REGIONID_NVPERF_GPC_ROUTER"); | ||
320 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
321 | src, size, &g->gr.ctx_vars.ctxsw_regs.gpc_router); | ||
322 | if (err) | ||
323 | goto clean_up; | ||
324 | break; | ||
325 | case NETLIST_REGIONID_CTXREG_PMLTC: | ||
326 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMLTC"); | ||
327 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
328 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ltc); | ||
329 | if (err) | ||
330 | goto clean_up; | ||
331 | break; | ||
332 | case NETLIST_REGIONID_CTXREG_PMFBPA: | ||
333 | gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMFBPA"); | ||
334 | err = gr_gk20a_alloc_load_netlist_aiv( | ||
335 | src, size, &g->gr.ctx_vars.ctxsw_regs.pm_fbpa); | ||
336 | if (err) | ||
337 | goto clean_up; | ||
285 | break; | 338 | break; |
286 | default: | 339 | default: |
287 | gk20a_dbg_info("unrecognized region %d skipped", i); | 340 | gk20a_dbg_info("unrecognized region %d skipped", i); |
@@ -319,6 +372,14 @@ clean_up: | |||
319 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); | 372 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_sys.l); |
320 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); | 373 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_gpc.l); |
321 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); | 374 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_tpc.l); |
375 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_ppc.l); | ||
376 | kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys.l); | ||
377 | kfree(g->gr.ctx_vars.ctxsw_regs.fbp.l); | ||
378 | kfree(g->gr.ctx_vars.ctxsw_regs.perf_gpc.l); | ||
379 | kfree(g->gr.ctx_vars.ctxsw_regs.fbp_router.l); | ||
380 | kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l); | ||
381 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l); | ||
382 | kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l); | ||
322 | release_firmware(netlist_fw); | 383 | release_firmware(netlist_fw); |
323 | err = -ENOENT; | 384 | err = -ENOENT; |
324 | } | 385 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h index 6844ee69..d413942a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A Graphics Context | 2 | * GK20A Graphics Context |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -75,6 +75,13 @@ union __max_name { | |||
75 | #define NETLIST_REGIONID_NETLIST_NUM 18 | 75 | #define NETLIST_REGIONID_NETLIST_NUM 18 |
76 | #define NETLIST_REGIONID_CTXREG_PPC 19 | 76 | #define NETLIST_REGIONID_CTXREG_PPC 19 |
77 | #define NETLIST_REGIONID_CTXREG_PMPPC 20 | 77 | #define NETLIST_REGIONID_CTXREG_PMPPC 20 |
78 | #define NETLIST_REGIONID_NVPERF_CTXREG_SYS 21 | ||
79 | #define NETLIST_REGIONID_NVPERF_FBP_CTXREGS 22 | ||
80 | #define NETLIST_REGIONID_NVPERF_CTXREG_GPC 23 | ||
81 | #define NETLIST_REGIONID_NVPERF_FBP_ROUTER 24 | ||
82 | #define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25 | ||
83 | #define NETLIST_REGIONID_CTXREG_PMLTC 26 | ||
84 | #define NETLIST_REGIONID_CTXREG_PMFBPA 27 | ||
78 | 85 | ||
79 | struct netlist_region { | 86 | struct netlist_region { |
80 | u32 region_id; | 87 | u32 region_id; |
@@ -114,6 +121,11 @@ struct u32_list_gk20a { | |||
114 | u32 count; | 121 | u32 count; |
115 | }; | 122 | }; |
116 | 123 | ||
124 | struct ctxsw_buf_offset_map_entry { | ||
125 | u32 addr; /* Register address */ | ||
126 | u32 offset; /* Offset in ctxt switch buffer */ | ||
127 | }; | ||
128 | |||
117 | static inline | 129 | static inline |
118 | struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) | 130 | struct av_gk20a *alloc_av_list_gk20a(struct av_list_gk20a *avl) |
119 | { | 131 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 116fd88f..a8addc7b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/dma-mapping.h> | 27 | #include <linux/dma-mapping.h> |
28 | #include <linux/firmware.h> | 28 | #include <linux/firmware.h> |
29 | #include <linux/nvhost.h> | 29 | #include <linux/nvhost.h> |
30 | #include <linux/sort.h> | ||
31 | #include <linux/bsearch.h> | ||
30 | #include <trace/events/gk20a.h> | 32 | #include <trace/events/gk20a.h> |
31 | 33 | ||
32 | #include "gk20a.h" | 34 | #include "gk20a.h" |
@@ -59,6 +61,10 @@ | |||
59 | #include "ctxsw_trace_gk20a.h" | 61 | #include "ctxsw_trace_gk20a.h" |
60 | 62 | ||
61 | #define BLK_SIZE (256) | 63 | #define BLK_SIZE (256) |
64 | #define NV_PMM_FBP_STRIDE 0x1000 | ||
65 | #define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200 | ||
66 | #define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000 | ||
67 | #define NV_PERF_PMMGPCROUTER_STRIDE 0x0200 | ||
62 | 68 | ||
63 | static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g); | 69 | static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g); |
64 | static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); | 70 | static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); |
@@ -1591,9 +1597,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1591 | u32 data; | 1597 | u32 data; |
1592 | int ret; | 1598 | int ret; |
1593 | 1599 | ||
1600 | gk20a_dbg_fn(""); | ||
1601 | |||
1602 | if (!ch_ctx->gr_ctx) { | ||
1603 | gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); | ||
1604 | return -EFAULT; | ||
1605 | } | ||
1606 | |||
1594 | c->g->ops.fifo.disable_channel(c); | 1607 | c->g->ops.fifo.disable_channel(c); |
1595 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); | 1608 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); |
1596 | if (ret) { | 1609 | if (ret) { |
1610 | c->g->ops.fifo.enable_channel(c); | ||
1597 | gk20a_err(dev_from_gk20a(g), | 1611 | gk20a_err(dev_from_gk20a(g), |
1598 | "failed to preempt channel\n"); | 1612 | "failed to preempt channel\n"); |
1599 | return ret; | 1613 | return ret; |
@@ -1603,11 +1617,18 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1603 | Flush and invalidate before cpu update. */ | 1617 | Flush and invalidate before cpu update. */ |
1604 | g->ops.mm.l2_flush(g, true); | 1618 | g->ops.mm.l2_flush(g, true); |
1605 | 1619 | ||
1620 | if (!ch_ctx->gr_ctx) { | ||
1621 | gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); | ||
1622 | return -EFAULT; | ||
1623 | } | ||
1624 | |||
1606 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1625 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, |
1607 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | 1626 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, |
1608 | 0, pgprot_writecombine(PAGE_KERNEL)); | 1627 | 0, pgprot_writecombine(PAGE_KERNEL)); |
1609 | if (!ctx_ptr) | 1628 | if (!ctx_ptr) { |
1629 | c->g->ops.fifo.enable_channel(c); | ||
1610 | return -ENOMEM; | 1630 | return -ENOMEM; |
1631 | } | ||
1611 | 1632 | ||
1612 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1633 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); |
1613 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); | 1634 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); |
@@ -1620,11 +1641,135 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1620 | vunmap(ctx_ptr); | 1641 | vunmap(ctx_ptr); |
1621 | 1642 | ||
1622 | /* enable channel */ | 1643 | /* enable channel */ |
1623 | gk20a_writel(c->g, ccsr_channel_r(c->hw_chid), | 1644 | c->g->ops.fifo.enable_channel(c); |
1624 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | 1645 | |
1625 | ccsr_channel_enable_set_true_f()); | 1646 | return 0; |
1647 | } | ||
1648 | |||
1649 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | ||
1650 | struct channel_gk20a *c, | ||
1651 | bool enable_hwpm_ctxsw) | ||
1652 | { | ||
1653 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1654 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | ||
1655 | void *ctx_ptr = NULL; | ||
1656 | void *pm_ctx_ptr; | ||
1657 | u32 data, virt_addr; | ||
1658 | int ret; | ||
1659 | |||
1660 | gk20a_dbg_fn(""); | ||
1661 | |||
1662 | if (!ch_ctx->gr_ctx) { | ||
1663 | gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); | ||
1664 | return -EFAULT; | ||
1665 | } | ||
1666 | |||
1667 | if (enable_hwpm_ctxsw) { | ||
1668 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | ||
1669 | return 0; | ||
1670 | } else { | ||
1671 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) | ||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | c->g->ops.fifo.disable_channel(c); | ||
1676 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); | ||
1677 | if (ret) { | ||
1678 | c->g->ops.fifo.enable_channel(c); | ||
1679 | gk20a_err(dev_from_gk20a(g), | ||
1680 | "failed to preempt channel\n"); | ||
1681 | return ret; | ||
1682 | } | ||
1683 | |||
1684 | /* Channel gr_ctx buffer is gpu cacheable. | ||
1685 | Flush and invalidate before cpu update. */ | ||
1686 | g->ops.mm.l2_flush(g, true); | ||
1687 | |||
1688 | if (enable_hwpm_ctxsw) { | ||
1689 | /* Allocate buffer if necessary */ | ||
1690 | if (pm_ctx->mem.gpu_va == 0) { | ||
1691 | ret = gk20a_gmmu_alloc_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, | ||
1692 | g->gr.ctx_vars.pm_ctxsw_image_size, | ||
1693 | &pm_ctx->mem); | ||
1694 | if (ret) { | ||
1695 | c->g->ops.fifo.enable_channel(c); | ||
1696 | gk20a_err(dev_from_gk20a(g), | ||
1697 | "failed to allocate pm ctxt buffer"); | ||
1698 | return ret; | ||
1699 | } | ||
1700 | |||
1701 | pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm, | ||
1702 | &pm_ctx->mem.sgt, | ||
1703 | pm_ctx->mem.size, | ||
1704 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1705 | gk20a_mem_flag_none, true); | ||
1706 | if (!pm_ctx->mem.gpu_va) { | ||
1707 | gk20a_err(dev_from_gk20a(g), | ||
1708 | "failed to map pm ctxt buffer"); | ||
1709 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, | ||
1710 | &pm_ctx->mem); | ||
1711 | c->g->ops.fifo.enable_channel(c); | ||
1712 | return -ENOMEM; | ||
1713 | } | ||
1714 | } | ||
1715 | |||
1716 | /* Now clear the buffer */ | ||
1717 | pm_ctx_ptr = vmap(pm_ctx->mem.pages, | ||
1718 | PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT, | ||
1719 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1720 | |||
1721 | if (!pm_ctx_ptr) { | ||
1722 | ret = -ENOMEM; | ||
1723 | goto cleanup_pm_buf; | ||
1724 | } | ||
1725 | |||
1726 | memset(pm_ctx_ptr, 0, pm_ctx->mem.size); | ||
1727 | |||
1728 | vunmap(pm_ctx_ptr); | ||
1729 | } | ||
1730 | |||
1731 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
1732 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1733 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1734 | if (!ctx_ptr) { | ||
1735 | ret = -ENOMEM; | ||
1736 | goto cleanup_pm_buf; | ||
1737 | } | ||
1738 | |||
1739 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | ||
1740 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | ||
1741 | |||
1742 | if (enable_hwpm_ctxsw) { | ||
1743 | pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); | ||
1744 | |||
1745 | /* pack upper 32 bits of virtual address into a 32 bit number | ||
1746 | * (256 byte boundary) | ||
1747 | */ | ||
1748 | virt_addr = (u32)(pm_ctx->mem.gpu_va >> 8); | ||
1749 | } else { | ||
1750 | pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1751 | virt_addr = 0; | ||
1752 | } | ||
1753 | |||
1754 | data |= pm_ctx->pm_mode; | ||
1755 | |||
1756 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | ||
1757 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | ||
1758 | |||
1759 | vunmap(ctx_ptr); | ||
1760 | |||
1761 | /* enable channel */ | ||
1762 | c->g->ops.fifo.enable_channel(c); | ||
1626 | 1763 | ||
1627 | return 0; | 1764 | return 0; |
1765 | cleanup_pm_buf: | ||
1766 | gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size, | ||
1767 | gk20a_mem_flag_none); | ||
1768 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem); | ||
1769 | memset(&pm_ctx->mem, 0, sizeof(struct mem_desc)); | ||
1770 | |||
1771 | c->g->ops.fifo.enable_channel(c); | ||
1772 | return ret; | ||
1628 | } | 1773 | } |
1629 | 1774 | ||
1630 | /* load saved fresh copy of gloden image into channel gr_ctx */ | 1775 | /* load saved fresh copy of gloden image into channel gr_ctx */ |
@@ -1635,6 +1780,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1635 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1780 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1636 | u32 virt_addr_lo; | 1781 | u32 virt_addr_lo; |
1637 | u32 virt_addr_hi; | 1782 | u32 virt_addr_hi; |
1783 | u32 virt_addr = 0; | ||
1638 | u32 i, v, data; | 1784 | u32 i, v, data; |
1639 | int ret = 0; | 1785 | int ret = 0; |
1640 | void *ctx_ptr = NULL; | 1786 | void *ctx_ptr = NULL; |
@@ -1663,15 +1809,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1663 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | 1809 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); |
1664 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | 1810 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); |
1665 | 1811 | ||
1666 | /* no user for client managed performance counter ctx */ | ||
1667 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | ||
1668 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | ||
1669 | data |= ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1670 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, | ||
1671 | data); | ||
1672 | |||
1673 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0); | ||
1674 | |||
1675 | /* set priv access map */ | 1812 | /* set priv access map */ |
1676 | virt_addr_lo = | 1813 | virt_addr_lo = |
1677 | u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1814 | u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
@@ -1708,6 +1845,32 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1708 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, | 1845 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, |
1709 | virt_addr_hi); | 1846 | virt_addr_hi); |
1710 | 1847 | ||
1848 | /* Update main header region of the context buffer with the info needed | ||
1849 | * for PM context switching, including mode and possibly a pointer to | ||
1850 | * the PM backing store. | ||
1851 | */ | ||
1852 | if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { | ||
1853 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { | ||
1854 | gk20a_err(dev_from_gk20a(g), | ||
1855 | "context switched pm with no pm buffer!"); | ||
1856 | vunmap(ctx_ptr); | ||
1857 | return -EFAULT; | ||
1858 | } | ||
1859 | |||
1860 | /* pack upper 32 bits of virtual address into a 32 bit number | ||
1861 | * (256 byte boundary) | ||
1862 | */ | ||
1863 | virt_addr = (u32)(ch_ctx->pm_ctx.mem.gpu_va >> 8); | ||
1864 | } else | ||
1865 | virt_addr = 0; | ||
1866 | |||
1867 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | ||
1868 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | ||
1869 | data |= ch_ctx->pm_ctx.pm_mode; | ||
1870 | |||
1871 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | ||
1872 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | ||
1873 | |||
1711 | vunmap(ctx_ptr); | 1874 | vunmap(ctx_ptr); |
1712 | 1875 | ||
1713 | if (tegra_platform_is_linsim()) { | 1876 | if (tegra_platform_is_linsim()) { |
@@ -2205,7 +2368,6 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) | |||
2205 | 2368 | ||
2206 | int gr_gk20a_init_ctx_state(struct gk20a *g) | 2369 | int gr_gk20a_init_ctx_state(struct gk20a *g) |
2207 | { | 2370 | { |
2208 | u32 pm_ctx_image_size; | ||
2209 | u32 ret; | 2371 | u32 ret; |
2210 | struct fecs_method_op_gk20a op = { | 2372 | struct fecs_method_op_gk20a op = { |
2211 | .mailbox = { .id = 0, .data = 0, | 2373 | .mailbox = { .id = 0, .data = 0, |
@@ -2237,7 +2399,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) | |||
2237 | } | 2399 | } |
2238 | op.method.addr = | 2400 | op.method.addr = |
2239 | gr_fecs_method_push_adr_discover_pm_image_size_v(); | 2401 | gr_fecs_method_push_adr_discover_pm_image_size_v(); |
2240 | op.mailbox.ret = &pm_ctx_image_size; | 2402 | op.mailbox.ret = &g->gr.ctx_vars.pm_ctxsw_image_size; |
2241 | ret = gr_gk20a_submit_fecs_method_op(g, op, false); | 2403 | ret = gr_gk20a_submit_fecs_method_op(g, op, false); |
2242 | if (ret) { | 2404 | if (ret) { |
2243 | gk20a_err(dev_from_gk20a(g), | 2405 | gk20a_err(dev_from_gk20a(g), |
@@ -2641,14 +2803,30 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) | |||
2641 | patch_ctx->data_count = 0; | 2803 | patch_ctx->data_count = 0; |
2642 | } | 2804 | } |
2643 | 2805 | ||
2806 | static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) | ||
2807 | { | ||
2808 | struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; | ||
2809 | struct gk20a *g = c->g; | ||
2810 | |||
2811 | gk20a_dbg_fn(""); | ||
2812 | |||
2813 | if (pm_ctx->mem.gpu_va) { | ||
2814 | gk20a_gmmu_unmap(c->vm, pm_ctx->mem.gpu_va, | ||
2815 | pm_ctx->mem.size, gk20a_mem_flag_none); | ||
2816 | |||
2817 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &pm_ctx->mem); | ||
2818 | } | ||
2819 | } | ||
2820 | |||
2644 | void gk20a_free_channel_ctx(struct channel_gk20a *c) | 2821 | void gk20a_free_channel_ctx(struct channel_gk20a *c) |
2645 | { | 2822 | { |
2646 | gr_gk20a_unmap_global_ctx_buffers(c); | 2823 | gr_gk20a_unmap_global_ctx_buffers(c); |
2647 | gr_gk20a_free_channel_patch_ctx(c); | 2824 | gr_gk20a_free_channel_patch_ctx(c); |
2825 | gr_gk20a_free_channel_pm_ctx(c); | ||
2648 | if (!gk20a_is_channel_marked_as_tsg(c)) | 2826 | if (!gk20a_is_channel_marked_as_tsg(c)) |
2649 | gr_gk20a_free_channel_gr_ctx(c); | 2827 | gr_gk20a_free_channel_gr_ctx(c); |
2650 | 2828 | ||
2651 | /* zcull_ctx, pm_ctx */ | 2829 | /* zcull_ctx */ |
2652 | 2830 | ||
2653 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | 2831 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); |
2654 | 2832 | ||
@@ -2743,6 +2921,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2743 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | 2921 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; |
2744 | } | 2922 | } |
2745 | 2923 | ||
2924 | /* PM ctxt switch is off by default */ | ||
2925 | ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
2926 | |||
2746 | /* commit gr ctx buffer */ | 2927 | /* commit gr ctx buffer */ |
2747 | err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 2928 | err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); |
2748 | if (err) { | 2929 | if (err) { |
@@ -2983,6 +3164,10 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
2983 | kfree(gr->ctx_vars.local_golden_image); | 3164 | kfree(gr->ctx_vars.local_golden_image); |
2984 | gr->ctx_vars.local_golden_image = NULL; | 3165 | gr->ctx_vars.local_golden_image = NULL; |
2985 | 3166 | ||
3167 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) | ||
3168 | nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); | ||
3169 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; | ||
3170 | |||
2986 | gk20a_comptag_allocator_destroy(&gr->comp_tags); | 3171 | gk20a_comptag_allocator_destroy(&gr->comp_tags); |
2987 | } | 3172 | } |
2988 | 3173 | ||
@@ -5828,6 +6013,10 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
5828 | u32 context_buffer_size, | 6013 | u32 context_buffer_size, |
5829 | u32 *priv_offset); | 6014 | u32 *priv_offset); |
5830 | 6015 | ||
6016 | static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g, | ||
6017 | u32 addr, | ||
6018 | u32 *priv_offset); | ||
6019 | |||
5831 | /* This function will decode a priv address and return the partition type and numbers. */ | 6020 | /* This function will decode a priv address and return the partition type and numbers. */ |
5832 | static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | 6021 | static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, |
5833 | int *addr_type, /* enum ctxsw_addr_type */ | 6022 | int *addr_type, /* enum ctxsw_addr_type */ |
@@ -6056,14 +6245,81 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
6056 | offset_addrs[i] = priv_registers[i]; | 6245 | offset_addrs[i] = priv_registers[i]; |
6057 | } | 6246 | } |
6058 | 6247 | ||
6059 | *num_offsets = num_registers; | 6248 | *num_offsets = num_registers; |
6249 | cleanup: | ||
6250 | if (!IS_ERR_OR_NULL(priv_registers)) | ||
6251 | kfree(priv_registers); | ||
6060 | 6252 | ||
6061 | cleanup: | 6253 | return err; |
6254 | } | ||
6255 | |||
6256 | int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | ||
6257 | u32 addr, | ||
6258 | u32 max_offsets, | ||
6259 | u32 *offsets, u32 *offset_addrs, | ||
6260 | u32 *num_offsets) | ||
6261 | { | ||
6262 | u32 i; | ||
6263 | u32 priv_offset = 0; | ||
6264 | u32 *priv_registers; | ||
6265 | u32 num_registers = 0; | ||
6266 | int err = 0; | ||
6267 | struct gr_gk20a *gr = &g->gr; | ||
6268 | u32 potential_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; | ||
6269 | |||
6270 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
6271 | |||
6272 | /* implementation is crossed-up if either of these happen */ | ||
6273 | if (max_offsets > potential_offsets) | ||
6274 | return -EINVAL; | ||
6275 | |||
6276 | if (!g->gr.ctx_vars.golden_image_initialized) | ||
6277 | return -ENODEV; | ||
6278 | |||
6279 | priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL); | ||
6280 | if (ZERO_OR_NULL_PTR(priv_registers)) { | ||
6281 | gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets); | ||
6282 | return -ENOMEM; | ||
6283 | } | ||
6284 | memset(offsets, 0, sizeof(u32) * max_offsets); | ||
6285 | memset(offset_addrs, 0, sizeof(u32) * max_offsets); | ||
6286 | *num_offsets = 0; | ||
6287 | |||
6288 | gr_gk20a_create_priv_addr_table(g, addr, priv_registers, &num_registers); | ||
6289 | |||
6290 | if ((max_offsets > 1) && (num_registers > max_offsets)) { | ||
6291 | err = -EINVAL; | ||
6292 | goto cleanup; | ||
6293 | } | ||
6062 | 6294 | ||
6063 | if (!IS_ERR_OR_NULL(priv_registers)) | 6295 | if ((max_offsets == 1) && (num_registers > 1)) |
6064 | kfree(priv_registers); | 6296 | num_registers = 1; |
6297 | |||
6298 | if (!g->gr.ctx_vars.local_golden_image) { | ||
6299 | gk20a_dbg_fn("no context switch header info to work with"); | ||
6300 | err = -EINVAL; | ||
6301 | goto cleanup; | ||
6302 | } | ||
6065 | 6303 | ||
6066 | return err; | 6304 | for (i = 0; i < num_registers; i++) { |
6305 | err = gr_gk20a_find_priv_offset_in_pm_buffer(g, | ||
6306 | priv_registers[i], | ||
6307 | &priv_offset); | ||
6308 | if (err) { | ||
6309 | gk20a_dbg_fn("Could not determine priv_offset for addr:0x%x", | ||
6310 | addr); /*, grPriRegStr(addr)));*/ | ||
6311 | goto cleanup; | ||
6312 | } | ||
6313 | |||
6314 | offsets[i] = priv_offset; | ||
6315 | offset_addrs[i] = priv_registers[i]; | ||
6316 | } | ||
6317 | |||
6318 | *num_offsets = num_registers; | ||
6319 | cleanup: | ||
6320 | kfree(priv_registers); | ||
6321 | |||
6322 | return err; | ||
6067 | } | 6323 | } |
6068 | 6324 | ||
6069 | /* Setup some register tables. This looks hacky; our | 6325 | /* Setup some register tables. This looks hacky; our |
@@ -6638,8 +6894,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
6638 | return 0; | 6894 | return 0; |
6639 | } | 6895 | } |
6640 | 6896 | ||
6641 | |||
6642 | |||
6643 | /* | 6897 | /* |
6644 | * This function will return the 32 bit offset for a priv register if it is | 6898 | * This function will return the 32 bit offset for a priv register if it is |
6645 | * present in the context buffer. | 6899 | * present in the context buffer. |
@@ -6801,6 +7055,314 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
6801 | return -EINVAL; | 7055 | return -EINVAL; |
6802 | } | 7056 | } |
6803 | 7057 | ||
7058 | static int map_cmp(const void *a, const void *b) | ||
7059 | { | ||
7060 | struct ctxsw_buf_offset_map_entry *e1 = | ||
7061 | (struct ctxsw_buf_offset_map_entry *)a; | ||
7062 | struct ctxsw_buf_offset_map_entry *e2 = | ||
7063 | (struct ctxsw_buf_offset_map_entry *)b; | ||
7064 | |||
7065 | if (e1->addr < e2->addr) | ||
7066 | return -1; | ||
7067 | |||
7068 | if (e1->addr > e2->addr) | ||
7069 | return 1; | ||
7070 | return 0; | ||
7071 | } | ||
7072 | |||
7073 | static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map, | ||
7074 | struct aiv_list_gk20a *regs, | ||
7075 | u32 *count, u32 *offset, | ||
7076 | u32 max_cnt, u32 base, u32 mask) | ||
7077 | { | ||
7078 | u32 idx; | ||
7079 | u32 cnt = *count; | ||
7080 | u32 off = *offset; | ||
7081 | |||
7082 | if ((cnt + regs->count) > max_cnt) | ||
7083 | return -EINVAL; | ||
7084 | |||
7085 | for (idx = 0; idx < regs->count; idx++) { | ||
7086 | map[cnt].addr = base + (regs->l[idx].addr & mask); | ||
7087 | map[cnt++].offset = off; | ||
7088 | off += 4; | ||
7089 | } | ||
7090 | *count = cnt; | ||
7091 | *offset = off; | ||
7092 | return 0; | ||
7093 | } | ||
7094 | |||
7095 | /* Helper function to add register entries to the register map for all | ||
7096 | * subunits | ||
7097 | */ | ||
7098 | static int add_ctxsw_buffer_map_entries_subunits( | ||
7099 | struct ctxsw_buf_offset_map_entry *map, | ||
7100 | struct aiv_list_gk20a *regs, | ||
7101 | u32 *count, u32 *offset, | ||
7102 | u32 max_cnt, u32 base, | ||
7103 | u32 num_units, u32 stride, u32 mask) | ||
7104 | { | ||
7105 | u32 unit; | ||
7106 | u32 idx; | ||
7107 | u32 cnt = *count; | ||
7108 | u32 off = *offset; | ||
7109 | |||
7110 | if ((cnt + (regs->count * num_units)) > max_cnt) | ||
7111 | return -EINVAL; | ||
7112 | |||
7113 | /* Data is interleaved for units in ctxsw buffer */ | ||
7114 | for (idx = 0; idx < regs->count; idx++) { | ||
7115 | for (unit = 0; unit < num_units; unit++) { | ||
7116 | map[cnt].addr = base + (regs->l[idx].addr & mask) + | ||
7117 | (unit * stride); | ||
7118 | map[cnt++].offset = off; | ||
7119 | off += 4; | ||
7120 | } | ||
7121 | } | ||
7122 | *count = cnt; | ||
7123 | *offset = off; | ||
7124 | return 0; | ||
7125 | } | ||
7126 | |||
7127 | static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | ||
7128 | struct ctxsw_buf_offset_map_entry *map, | ||
7129 | u32 *count, u32 *offset, u32 max_cnt) | ||
7130 | { | ||
7131 | u32 num_gpcs = g->gr.gpc_count; | ||
7132 | u32 num_ppcs, num_tpcs, gpc_num, base; | ||
7133 | |||
7134 | for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { | ||
7135 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; | ||
7136 | base = proj_gpc_base_v() + | ||
7137 | (proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v(); | ||
7138 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7139 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, | ||
7140 | count, offset, max_cnt, base, num_tpcs, | ||
7141 | proj_tpc_in_gpc_stride_v(), | ||
7142 | (proj_tpc_in_gpc_stride_v() - 1))) | ||
7143 | return -EINVAL; | ||
7144 | |||
7145 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; | ||
7146 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) + | ||
7147 | proj_ppc_in_gpc_base_v(); | ||
7148 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7149 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, | ||
7150 | count, offset, max_cnt, base, num_ppcs, | ||
7151 | proj_ppc_in_gpc_stride_v(), | ||
7152 | (proj_ppc_in_gpc_stride_v() - 1))) | ||
7153 | return -EINVAL; | ||
7154 | |||
7155 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num); | ||
7156 | if (add_ctxsw_buffer_map_entries(map, | ||
7157 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, | ||
7158 | count, offset, max_cnt, base, | ||
7159 | (proj_gpc_stride_v() - 1))) | ||
7160 | return -EINVAL; | ||
7161 | |||
7162 | base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); | ||
7163 | if (add_ctxsw_buffer_map_entries(map, | ||
7164 | &g->gr.ctx_vars.ctxsw_regs.perf_gpc, | ||
7165 | count, offset, max_cnt, base, ~0)) | ||
7166 | return -EINVAL; | ||
7167 | |||
7168 | base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num); | ||
7169 | if (add_ctxsw_buffer_map_entries(map, | ||
7170 | &g->gr.ctx_vars.ctxsw_regs.gpc_router, | ||
7171 | count, offset, max_cnt, base, ~0)) | ||
7172 | return -EINVAL; | ||
7173 | |||
7174 | *offset = ALIGN(*offset, 256); | ||
7175 | } | ||
7176 | return 0; | ||
7177 | } | ||
7178 | |||
7179 | /* | ||
7180 | * PM CTXSW BUFFER LAYOUT : | ||
7181 | *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE | ||
7182 | *| | | ||
7183 | *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words | ||
7184 | *|---------------------------------------------| | ||
7185 | *| | | ||
7186 | *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words | ||
7187 | *|---------------------------------------------| | ||
7188 | *| PADDING for 256 byte alignment | | ||
7189 | *|---------------------------------------------|<----256 byte aligned | ||
7190 | *| LIST_compressed_nv_perf_fbp_ctx_regs | | ||
7191 | *| |Space allocated: numRegs * n words (for n FB units) | ||
7192 | *|---------------------------------------------| | ||
7193 | *| LIST_compressed_nv_perf_fbprouter_ctx_regs | | ||
7194 | *| |Space allocated: numRegs * n words (for n FB units) | ||
7195 | *|---------------------------------------------| | ||
7196 | *| LIST_compressed_pm_fbpa_ctx_regs | | ||
7197 | *| |Space allocated: numRegs * n words (for n FB units) | ||
7198 | *|---------------------------------------------| | ||
7199 | *| LIST_compressed_pm_ltc_ctx_regs | | ||
7200 | *| LTC0 LTS0 | | ||
7201 | *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units) | ||
7202 | *| LTCn LTS0 | | ||
7203 | *| LTC0 LTS1 | | ||
7204 | *| LTC1 LTS1 | | ||
7205 | *| LTCn LTS1 | | ||
7206 | *| LTC0 LTSn | | ||
7207 | *| LTC1 LTSn | | ||
7208 | *| LTCn LTSn | | ||
7209 | *|---------------------------------------------| | ||
7210 | *| PADDING for 256 byte alignment | | ||
7211 | *|---------------------------------------------|<----256 byte aligned | ||
7212 | *| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate | ||
7213 | *| REG0 TPC1 | all the GPC/TPC register lists | ||
7214 | *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned | ||
7215 | *| LIST_pm_ctx_reg_TPC REG1 TPC0 | | ||
7216 | *| * numTpcs REG1 TPC1 | | ||
7217 | *| LIST_pm_ctx_reg_PPC REG1 TPCn | | ||
7218 | *| * numPpcs REGn TPC0 | | ||
7219 | *| LIST_pm_ctx_reg_GPC REGn TPC1 | | ||
7220 | *| LIST_nv_perf_ctx_reg_GPC REGn TPCn | | ||
7221 | *| ---- |-- | ||
7222 | *| GPC1 . | | ||
7223 | *| . |<---- | ||
7224 | *|---------------------------------------------| | ||
7225 | *= = | ||
7226 | *| GPCn | | ||
7227 | *= = | ||
7228 | *|---------------------------------------------| | ||
7229 | */ | ||
7230 | |||
7231 | static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | ||
7232 | { | ||
7233 | u32 hwpm_ctxsw_buffer_size = g->gr.ctx_vars.pm_ctxsw_image_size; | ||
7234 | u32 hwpm_ctxsw_reg_count_max; | ||
7235 | u32 map_size; | ||
7236 | u32 i, count = 0; | ||
7237 | u32 offset = 0; | ||
7238 | struct ctxsw_buf_offset_map_entry *map; | ||
7239 | |||
7240 | if (hwpm_ctxsw_buffer_size == 0) { | ||
7241 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
7242 | "no PM Ctxsw buffer memory in context buffer"); | ||
7243 | return -EINVAL; | ||
7244 | } | ||
7245 | |||
7246 | hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; | ||
7247 | map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); | ||
7248 | |||
7249 | map = nvgpu_alloc(map_size, true); | ||
7250 | if (!map) | ||
7251 | return -ENOMEM; | ||
7252 | |||
7253 | /* Add entries from _LIST_pm_ctx_reg_SYS */ | ||
7254 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, | ||
7255 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | ||
7256 | goto cleanup; | ||
7257 | |||
7258 | /* Add entries from _LIST_nv_perf_ctx_reg_SYS */ | ||
7259 | if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys, | ||
7260 | &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) | ||
7261 | goto cleanup; | ||
7262 | |||
7263 | offset = ALIGN(offset, 256); | ||
7264 | |||
7265 | /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ | ||
7266 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7267 | &g->gr.ctx_vars.ctxsw_regs.fbp, | ||
7268 | &count, &offset, | ||
7269 | hwpm_ctxsw_reg_count_max, 0, | ||
7270 | g->gr.num_fbps, NV_PMM_FBP_STRIDE, ~0)) | ||
7271 | goto cleanup; | ||
7272 | |||
7273 | /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ | ||
7274 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7275 | &g->gr.ctx_vars.ctxsw_regs.fbp_router, | ||
7276 | &count, &offset, | ||
7277 | hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, | ||
7278 | NV_PERF_PMM_FBP_ROUTER_STRIDE, ~0)) | ||
7279 | goto cleanup; | ||
7280 | |||
7281 | /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ | ||
7282 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7283 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, | ||
7284 | &count, &offset, | ||
7285 | hwpm_ctxsw_reg_count_max, 0, | ||
7286 | proj_scal_litter_num_fbpas_v(), | ||
7287 | proj_fbpa_stride_v(), ~0)) | ||
7288 | goto cleanup; | ||
7289 | |||
7290 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ | ||
7291 | if (add_ctxsw_buffer_map_entries_subunits(map, | ||
7292 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, | ||
7293 | &count, &offset, | ||
7294 | hwpm_ctxsw_reg_count_max, 0, | ||
7295 | g->ltc_count, proj_ltc_stride_v(), ~0)) | ||
7296 | goto cleanup; | ||
7297 | |||
7298 | offset = ALIGN(offset, 256); | ||
7299 | |||
7300 | /* Add GPC entries */ | ||
7301 | if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, | ||
7302 | hwpm_ctxsw_reg_count_max)) | ||
7303 | goto cleanup; | ||
7304 | |||
7305 | if (offset > hwpm_ctxsw_buffer_size) { | ||
7306 | gk20a_err(dev_from_gk20a(g), "offset > buffer size"); | ||
7307 | goto cleanup; | ||
7308 | } | ||
7309 | |||
7310 | sort(map, count, sizeof(*map), map_cmp, NULL); | ||
7311 | |||
7312 | g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map = map; | ||
7313 | g->gr.ctx_vars.hwpm_ctxsw_buffer_offset_map_count = count; | ||
7314 | |||
7315 | gk20a_dbg_info("Reg Addr => HWPM Ctxt switch buffer offset"); | ||
7316 | |||
7317 | for (i = 0; i < count; i++) | ||
7318 | gk20a_dbg_info("%08x => %08x", map[i].addr, map[i].offset); | ||
7319 | |||
7320 | return 0; | ||
7321 | cleanup: | ||
7322 | gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); | ||
7323 | nvgpu_free(map); | ||
7324 | return -EINVAL; | ||
7325 | } | ||
7326 | |||
7327 | /* | ||
7328 | * This function will return the 32 bit offset for a priv register if it is | ||
7329 | * present in the PM context buffer. | ||
7330 | */ | ||
7331 | static int gr_gk20a_find_priv_offset_in_pm_buffer(struct gk20a *g, | ||
7332 | u32 addr, | ||
7333 | u32 *priv_offset) | ||
7334 | { | ||
7335 | struct gr_gk20a *gr = &g->gr; | ||
7336 | int err = 0; | ||
7337 | u32 count; | ||
7338 | struct ctxsw_buf_offset_map_entry *map, *result, map_key; | ||
7339 | |||
7340 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | ||
7341 | |||
7342 | /* Create map of pri address and pm offset if necessary */ | ||
7343 | if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map == NULL) { | ||
7344 | err = gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(g); | ||
7345 | if (err) | ||
7346 | return err; | ||
7347 | } | ||
7348 | |||
7349 | *priv_offset = 0; | ||
7350 | |||
7351 | map = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map; | ||
7352 | count = gr->ctx_vars.hwpm_ctxsw_buffer_offset_map_count; | ||
7353 | |||
7354 | map_key.addr = addr; | ||
7355 | result = bsearch(&map_key, map, count, sizeof(*map), map_cmp); | ||
7356 | |||
7357 | if (result) | ||
7358 | *priv_offset = result->offset; | ||
7359 | else { | ||
7360 | gk20a_err(dev_from_gk20a(g), "Lookup failed for address 0x%x", addr); | ||
7361 | err = -EINVAL; | ||
7362 | } | ||
7363 | return err; | ||
7364 | } | ||
7365 | |||
6804 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | 7366 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) |
6805 | { | 7367 | { |
6806 | int curr_gr_ctx, curr_gr_tsgid; | 7368 | int curr_gr_ctx, curr_gr_tsgid; |
@@ -6840,6 +7402,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6840 | struct gk20a *g = ch->g; | 7402 | struct gk20a *g = ch->g; |
6841 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 7403 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
6842 | void *ctx_ptr = NULL; | 7404 | void *ctx_ptr = NULL; |
7405 | void *pm_ctx_ptr = NULL; | ||
7406 | void *base_ptr = NULL; | ||
6843 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | 7407 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; |
6844 | u32 i, j, offset, v; | 7408 | u32 i, j, offset, v; |
6845 | struct gr_gk20a *gr = &g->gr; | 7409 | struct gr_gk20a *gr = &g->gr; |
@@ -6940,15 +7504,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6940 | } | 7504 | } |
6941 | offset_addrs = offsets + max_offsets; | 7505 | offset_addrs = offsets + max_offsets; |
6942 | 7506 | ||
6943 | /* would have been a variant of gr_gk20a_apply_instmem_overrides */ | ||
6944 | /* recoded in-place instead.*/ | ||
6945 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
6946 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
6947 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
6948 | if (!ctx_ptr) { | ||
6949 | err = -ENOMEM; | ||
6950 | goto cleanup; | ||
6951 | } | ||
6952 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 7507 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
6953 | if (err) | 7508 | if (err) |
6954 | goto cleanup; | 7509 | goto cleanup; |
@@ -6977,13 +7532,52 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6977 | &num_offsets, | 7532 | &num_offsets, |
6978 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), | 7533 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), |
6979 | ctx_ops[i].quad); | 7534 | ctx_ops[i].quad); |
6980 | if (err) { | 7535 | if (!err) { |
6981 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7536 | if (!ctx_ptr) { |
7537 | /* would have been a variant of | ||
7538 | * gr_gk20a_apply_instmem_overrides, | ||
7539 | * recoded in-place instead. | ||
7540 | */ | ||
7541 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
7542 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
7543 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7544 | if (!ctx_ptr) { | ||
7545 | err = -ENOMEM; | ||
7546 | goto cleanup; | ||
7547 | } | ||
7548 | } | ||
7549 | base_ptr = ctx_ptr; | ||
7550 | } else { | ||
7551 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | ||
7552 | ctx_ops[i].offset, | ||
7553 | max_offsets, | ||
7554 | offsets, offset_addrs, | ||
7555 | &num_offsets); | ||
7556 | if (err) { | ||
7557 | gk20a_dbg(gpu_dbg_gpu_dbg, | ||
6982 | "ctx op invalid offset: offset=0x%x", | 7558 | "ctx op invalid offset: offset=0x%x", |
6983 | ctx_ops[i].offset); | 7559 | ctx_ops[i].offset); |
6984 | ctx_ops[i].status = | 7560 | ctx_ops[i].status = |
6985 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; | 7561 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; |
6986 | continue; | 7562 | continue; |
7563 | } | ||
7564 | if (!pm_ctx_ptr) { | ||
7565 | /* Make sure ctx buffer was initialized */ | ||
7566 | if (!ch_ctx->pm_ctx.mem.pages) { | ||
7567 | gk20a_err(dev_from_gk20a(g), | ||
7568 | "Invalid ctx buffer"); | ||
7569 | err = -EINVAL; | ||
7570 | goto cleanup; | ||
7571 | } | ||
7572 | pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, | ||
7573 | PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT, | ||
7574 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7575 | if (!pm_ctx_ptr) { | ||
7576 | err = -ENOMEM; | ||
7577 | goto cleanup; | ||
7578 | } | ||
7579 | } | ||
7580 | base_ptr = pm_ctx_ptr; | ||
6987 | } | 7581 | } |
6988 | 7582 | ||
6989 | /* if this is a quad access, setup for special access*/ | 7583 | /* if this is a quad access, setup for special access*/ |
@@ -6993,24 +7587,27 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6993 | ctx_ops[i].offset); | 7587 | ctx_ops[i].offset); |
6994 | 7588 | ||
6995 | for (j = 0; j < num_offsets; j++) { | 7589 | for (j = 0; j < num_offsets; j++) { |
6996 | /* sanity check, don't write outside, worst case */ | 7590 | /* sanity check gr ctxt offsets, |
6997 | if (offsets[j] >= g->gr.ctx_vars.golden_image_size) | 7591 | * don't write outside, worst case |
7592 | */ | ||
7593 | if ((base_ptr == ctx_ptr) && | ||
7594 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | ||
6998 | continue; | 7595 | continue; |
6999 | if (pass == 0) { /* write pass */ | 7596 | if (pass == 0) { /* write pass */ |
7000 | v = gk20a_mem_rd32(ctx_ptr + offsets[j], 0); | 7597 | v = gk20a_mem_rd32(base_ptr + offsets[j], 0); |
7001 | v &= ~ctx_ops[i].and_n_mask_lo; | 7598 | v &= ~ctx_ops[i].and_n_mask_lo; |
7002 | v |= ctx_ops[i].value_lo; | 7599 | v |= ctx_ops[i].value_lo; |
7003 | gk20a_mem_wr32(ctx_ptr + offsets[j], 0, v); | 7600 | gk20a_mem_wr32(base_ptr + offsets[j], 0, v); |
7004 | 7601 | ||
7005 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7602 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7006 | "context wr: offset=0x%x v=0x%x", | 7603 | "context wr: offset=0x%x v=0x%x", |
7007 | offsets[j], v); | 7604 | offsets[j], v); |
7008 | 7605 | ||
7009 | if (ctx_ops[i].op == REGOP(WRITE_64)) { | 7606 | if (ctx_ops[i].op == REGOP(WRITE_64)) { |
7010 | v = gk20a_mem_rd32(ctx_ptr + offsets[j] + 4, 0); | 7607 | v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); |
7011 | v &= ~ctx_ops[i].and_n_mask_hi; | 7608 | v &= ~ctx_ops[i].and_n_mask_hi; |
7012 | v |= ctx_ops[i].value_hi; | 7609 | v |= ctx_ops[i].value_hi; |
7013 | gk20a_mem_wr32(ctx_ptr + offsets[j] + 4, 0, v); | 7610 | gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); |
7014 | 7611 | ||
7015 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7612 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7016 | "context wr: offset=0x%x v=0x%x", | 7613 | "context wr: offset=0x%x v=0x%x", |
@@ -7020,18 +7617,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7020 | /* check to see if we need to add a special WAR | 7617 | /* check to see if we need to add a special WAR |
7021 | for some of the SMPC perf regs */ | 7618 | for some of the SMPC perf regs */ |
7022 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | 7619 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], |
7023 | v, ctx_ptr); | 7620 | v, base_ptr); |
7024 | 7621 | ||
7025 | } else { /* read pass */ | 7622 | } else { /* read pass */ |
7026 | ctx_ops[i].value_lo = | 7623 | ctx_ops[i].value_lo = |
7027 | gk20a_mem_rd32(ctx_ptr + offsets[0], 0); | 7624 | gk20a_mem_rd32(base_ptr + offsets[0], 0); |
7028 | 7625 | ||
7029 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", | 7626 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", |
7030 | offsets[0], ctx_ops[i].value_lo); | 7627 | offsets[0], ctx_ops[i].value_lo); |
7031 | 7628 | ||
7032 | if (ctx_ops[i].op == REGOP(READ_64)) { | 7629 | if (ctx_ops[i].op == REGOP(READ_64)) { |
7033 | ctx_ops[i].value_hi = | 7630 | ctx_ops[i].value_hi = |
7034 | gk20a_mem_rd32(ctx_ptr + offsets[0] + 4, 0); | 7631 | gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); |
7035 | 7632 | ||
7036 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7633 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7037 | "context rd: offset=0x%x v=0x%x", | 7634 | "context rd: offset=0x%x v=0x%x", |
@@ -7062,6 +7659,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7062 | if (ctx_ptr) | 7659 | if (ctx_ptr) |
7063 | vunmap(ctx_ptr); | 7660 | vunmap(ctx_ptr); |
7064 | 7661 | ||
7662 | if (pm_ctx_ptr) | ||
7663 | vunmap(pm_ctx_ptr); | ||
7664 | |||
7065 | if (restart_gr_ctxsw) { | 7665 | if (restart_gr_ctxsw) { |
7066 | int tmp_err = gr_gk20a_enable_ctxsw(g); | 7666 | int tmp_err = gr_gk20a_enable_ctxsw(g); |
7067 | if (tmp_err) { | 7667 | if (tmp_err) { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2c575534..c82cf75c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -198,8 +198,13 @@ struct gr_gk20a { | |||
198 | u32 golden_image_size; | 198 | u32 golden_image_size; |
199 | u32 *local_golden_image; | 199 | u32 *local_golden_image; |
200 | 200 | ||
201 | u32 hwpm_ctxsw_buffer_offset_map_count; | ||
202 | struct ctxsw_buf_offset_map_entry *hwpm_ctxsw_buffer_offset_map; | ||
203 | |||
201 | u32 zcull_ctxsw_image_size; | 204 | u32 zcull_ctxsw_image_size; |
202 | 205 | ||
206 | u32 pm_ctxsw_image_size; | ||
207 | |||
203 | u32 buffer_header_size; | 208 | u32 buffer_header_size; |
204 | 209 | ||
205 | u32 priv_access_map_size; | 210 | u32 priv_access_map_size; |
@@ -219,6 +224,14 @@ struct gr_gk20a { | |||
219 | struct aiv_list_gk20a pm_sys; | 224 | struct aiv_list_gk20a pm_sys; |
220 | struct aiv_list_gk20a pm_gpc; | 225 | struct aiv_list_gk20a pm_gpc; |
221 | struct aiv_list_gk20a pm_tpc; | 226 | struct aiv_list_gk20a pm_tpc; |
227 | struct aiv_list_gk20a pm_ppc; | ||
228 | struct aiv_list_gk20a perf_sys; | ||
229 | struct aiv_list_gk20a perf_gpc; | ||
230 | struct aiv_list_gk20a fbp; | ||
231 | struct aiv_list_gk20a fbp_router; | ||
232 | struct aiv_list_gk20a gpc_router; | ||
233 | struct aiv_list_gk20a pm_ltc; | ||
234 | struct aiv_list_gk20a pm_fbpa; | ||
222 | } ctxsw_regs; | 235 | } ctxsw_regs; |
223 | int regs_base_index; | 236 | int regs_base_index; |
224 | bool valid; | 237 | bool valid; |
@@ -484,9 +497,17 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | |||
484 | u32 *offsets, u32 *offset_addrs, | 497 | u32 *offsets, u32 *offset_addrs, |
485 | u32 *num_offsets, | 498 | u32 *num_offsets, |
486 | bool is_quad, u32 quad); | 499 | bool is_quad, u32 quad); |
500 | int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, | ||
501 | u32 addr, | ||
502 | u32 max_offsets, | ||
503 | u32 *offsets, u32 *offset_addrs, | ||
504 | u32 *num_offsets); | ||
487 | int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | 505 | int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, |
488 | struct channel_gk20a *c, | 506 | struct channel_gk20a *c, |
489 | bool enable_smpc_ctxsw); | 507 | bool enable_smpc_ctxsw); |
508 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | ||
509 | struct channel_gk20a *c, | ||
510 | bool enable_hwpm_ctxsw); | ||
490 | 511 | ||
491 | struct channel_ctx_gk20a; | 512 | struct channel_ctx_gk20a; |
492 | int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, | 513 | int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h index da555f7c..08834557 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | |||
@@ -94,6 +94,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) | |||
94 | { | 94 | { |
95 | return 0x7 << 0; | 95 | return 0x7 << 0; |
96 | } | 96 | } |
97 | static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) | ||
98 | { | ||
99 | return 0x1; | ||
100 | } | ||
97 | static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) | 101 | static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) |
98 | { | 102 | { |
99 | return 0x0; | 103 | return 0x0; |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h index 93c55c30..ce10db35 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void) | |||
70 | { | 70 | { |
71 | return 0x00000400; | 71 | return 0x00000400; |
72 | } | 72 | } |
73 | static inline u32 proj_fbpa_stride_v(void) | ||
74 | { | ||
75 | return 0x00001000; | ||
76 | } | ||
73 | static inline u32 proj_ppc_in_gpc_base_v(void) | 77 | static inline u32 proj_ppc_in_gpc_base_v(void) |
74 | { | 78 | { |
75 | return 0x00003000; | 79 | return 0x00003000; |
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void) | |||
114 | { | 118 | { |
115 | return 0x00000001; | 119 | return 0x00000001; |
116 | } | 120 | } |
121 | static inline u32 proj_scal_litter_num_fbpas_v(void) | ||
122 | { | ||
123 | return 0x00000001; | ||
124 | } | ||
117 | static inline u32 proj_scal_litter_num_gpcs_v(void) | 125 | static inline u32 proj_scal_litter_num_gpcs_v(void) |
118 | { | 126 | { |
119 | return 0x00000001; | 127 | return 0x00000001; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 368b32d3..833d896d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A memory management | 2 | * GK20A memory management |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -73,6 +73,11 @@ struct zcull_ctx_desc { | |||
73 | u32 ctx_sw_mode; | 73 | u32 ctx_sw_mode; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct pm_ctx_desc { | ||
77 | struct mem_desc mem; | ||
78 | u32 pm_mode; | ||
79 | }; | ||
80 | |||
76 | struct gk20a; | 81 | struct gk20a; |
77 | struct gr_ctx_buffer_desc { | 82 | struct gr_ctx_buffer_desc { |
78 | void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *); | 83 | void (*destroy)(struct gk20a *, struct gr_ctx_buffer_desc *); |
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c index 1696f759..e6162af2 100644 --- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tegra GK20A GPU Debugger Driver Register Ops | 2 | * Tegra GK20A GPU Debugger Driver Register Ops |
3 | * | 3 | * |
4 | * Copyright (c) 2013-2015, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -644,22 +644,31 @@ static int validate_reg_op_offset(struct dbg_session_gk20a *dbg_s, | |||
644 | valid = check_whitelists(dbg_s, op, offset + 4); | 644 | valid = check_whitelists(dbg_s, op, offset + 4); |
645 | 645 | ||
646 | if (valid && (op->type != REGOP(TYPE_GLOBAL))) { | 646 | if (valid && (op->type != REGOP(TYPE_GLOBAL))) { |
647 | err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, | 647 | err = gr_gk20a_get_ctx_buffer_offsets(dbg_s->g, |
648 | op->offset, | ||
649 | 1, | ||
650 | &buf_offset_lo, | ||
651 | &buf_offset_addr, | ||
652 | &num_offsets, | ||
653 | op->type == REGOP(TYPE_GR_CTX_QUAD), | ||
654 | op->quad); | ||
655 | if (err) { | ||
656 | err = gr_gk20a_get_pm_ctx_buffer_offsets(dbg_s->g, | ||
648 | op->offset, | 657 | op->offset, |
649 | 1, | 658 | 1, |
650 | &buf_offset_lo, | 659 | &buf_offset_lo, |
651 | &buf_offset_addr, | 660 | &buf_offset_addr, |
652 | &num_offsets, | 661 | &num_offsets); |
653 | op->type == REGOP(TYPE_GR_CTX_QUAD), | 662 | |
654 | op->quad); | ||
655 | if (err) { | 663 | if (err) { |
656 | op->status |= REGOP(STATUS_INVALID_OFFSET); | 664 | op->status |= REGOP(STATUS_INVALID_OFFSET); |
657 | return -EINVAL; | 665 | return -EINVAL; |
658 | } | 666 | } |
659 | if (!buf_offset_lo) { | 667 | } |
660 | op->status |= REGOP(STATUS_INVALID_OFFSET); | 668 | if (!buf_offset_lo) { |
661 | return -EINVAL; | 669 | op->status |= REGOP(STATUS_INVALID_OFFSET); |
662 | } | 670 | return -EINVAL; |
671 | } | ||
663 | } | 672 | } |
664 | 673 | ||
665 | if (!valid) { | 674 | if (!valid) { |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h index f9531ae1..b837918c 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void) | |||
70 | { | 70 | { |
71 | return 0x00000200; | 71 | return 0x00000200; |
72 | } | 72 | } |
73 | static inline u32 proj_fbpa_stride_v(void) | ||
74 | { | ||
75 | return 0x00001000; | ||
76 | } | ||
73 | static inline u32 proj_ppc_in_gpc_base_v(void) | 77 | static inline u32 proj_ppc_in_gpc_base_v(void) |
74 | { | 78 | { |
75 | return 0x00003000; | 79 | return 0x00003000; |
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void) | |||
114 | { | 118 | { |
115 | return 0x00000001; | 119 | return 0x00000001; |
116 | } | 120 | } |
121 | static inline u32 proj_scal_litter_num_fbpas_v(void) | ||
122 | { | ||
123 | return 0x00000001; | ||
124 | } | ||
117 | static inline u32 proj_scal_litter_num_gpcs_v(void) | 125 | static inline u32 proj_scal_litter_num_gpcs_v(void) |
118 | { | 126 | { |
119 | return 0x00000001; | 127 | return 0x00000001; |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 45d1c217..a75a5ae0 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -647,8 +647,20 @@ struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args { | |||
647 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args) | 647 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args) |
648 | 648 | ||
649 | 649 | ||
650 | /* PM Context Switch Mode */ | ||
651 | #define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000) | ||
652 | #define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW (0x00000001) | ||
653 | |||
654 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args { | ||
655 | __u32 mode; | ||
656 | __u32 reserved; | ||
657 | }; | ||
658 | |||
659 | #define NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE \ | ||
660 | _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 13, struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args) | ||
661 | |||
650 | #define NVGPU_DBG_GPU_IOCTL_LAST \ | 662 | #define NVGPU_DBG_GPU_IOCTL_LAST \ |
651 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE) | 663 | _IOC_NR(NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE) |
652 | 664 | ||
653 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ | 665 | #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ |
654 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) | 666 | sizeof(struct nvgpu_dbg_gpu_perfbuf_map_args) |