diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-06-22 15:58:43 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-30 21:34:58 -0400 |
commit | 82c0c96290602b1baf296133c7f55ae1848e433a (patch) | |
tree | 1f32dc8571ce777c50984be1194c68ddeea73b38 /drivers | |
parent | 3ffcadc8dd6152d4ce2677b4e5d8e043a3df58ac (diff) |
gpu: nvgpu: Remove gk20a support
Remove gk20a support. Leave only gk20a code which is reused by other
GPUs.
JIRA NVGPU-38
Change-Id: I3d5f2bc9f71cd9f161e64436561a5eadd5786a3b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master/r/1507927
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers')
33 files changed, 9 insertions, 2660 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 3d0c19c3..d062f42c 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -89,9 +89,7 @@ nvgpu-y := \ | |||
89 | gk20a/ltc_gk20a.o \ | 89 | gk20a/ltc_gk20a.o \ |
90 | gk20a/fb_gk20a.o \ | 90 | gk20a/fb_gk20a.o \ |
91 | gk20a/hal.o \ | 91 | gk20a/hal.o \ |
92 | gk20a/hal_gk20a.o \ | ||
93 | gk20a/cde_gk20a.o \ | 92 | gk20a/cde_gk20a.o \ |
94 | gk20a/platform_gk20a_generic.o \ | ||
95 | gk20a/tsg_gk20a.o \ | 93 | gk20a/tsg_gk20a.o \ |
96 | gk20a/ctxsw_trace_gk20a.o \ | 94 | gk20a/ctxsw_trace_gk20a.o \ |
97 | gk20a/fecs_trace_gk20a.o \ | 95 | gk20a/fecs_trace_gk20a.o \ |
@@ -153,8 +151,6 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ | |||
153 | vgpu/tsg_vgpu.o \ | 151 | vgpu/tsg_vgpu.o \ |
154 | vgpu/clk_vgpu.o \ | 152 | vgpu/clk_vgpu.o \ |
155 | vgpu/css_vgpu.o \ | 153 | vgpu/css_vgpu.o \ |
156 | vgpu/gk20a/vgpu_hal_gk20a.o \ | ||
157 | vgpu/gk20a/vgpu_gr_gk20a.o \ | ||
158 | vgpu/gm20b/vgpu_hal_gm20b.o \ | 154 | vgpu/gm20b/vgpu_hal_gm20b.o \ |
159 | vgpu/gm20b/vgpu_gr_gm20b.o \ | 155 | vgpu/gm20b/vgpu_gr_gm20b.o \ |
160 | vgpu/sysfs_vgpu.o | 156 | vgpu/sysfs_vgpu.o |
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 30a2fcd9..bb7e260d 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -234,8 +234,6 @@ done: | |||
234 | 234 | ||
235 | static struct of_device_id tegra_gk20a_of_match[] = { | 235 | static struct of_device_id tegra_gk20a_of_match[] = { |
236 | #ifdef CONFIG_TEGRA_GK20A | 236 | #ifdef CONFIG_TEGRA_GK20A |
237 | { .compatible = "nvidia,tegra124-gk20a", | ||
238 | .data = &gk20a_tegra_platform }, | ||
239 | { .compatible = "nvidia,tegra210-gm20b", | 237 | { .compatible = "nvidia,tegra210-gm20b", |
240 | .data = &gm20b_tegra_platform }, | 238 | .data = &gm20b_tegra_platform }, |
241 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 239 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
@@ -254,25 +252,8 @@ static struct of_device_id tegra_gk20a_of_match[] = { | |||
254 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | 252 | { .compatible = "nvidia,tegra124-gk20a-vgpu", |
255 | .data = &vgpu_tegra_platform }, | 253 | .data = &vgpu_tegra_platform }, |
256 | #endif | 254 | #endif |
257 | #else | ||
258 | { .compatible = "nvidia,tegra124-gk20a", | ||
259 | .data = &gk20a_generic_platform }, | ||
260 | { .compatible = "nvidia,tegra210-gm20b", | ||
261 | .data = &gk20a_generic_platform }, | ||
262 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
263 | { .compatible = TEGRA_18x_GPU_COMPAT_TEGRA, | ||
264 | .data = &gk20a_generic_platform }, | ||
265 | #endif | 255 | #endif |
266 | 256 | ||
267 | #endif | ||
268 | { .compatible = "nvidia,generic-gk20a", | ||
269 | .data = &gk20a_generic_platform }, | ||
270 | { .compatible = "nvidia,generic-gm20b", | ||
271 | .data = &gk20a_generic_platform }, | ||
272 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | ||
273 | { .compatible = "nvidia,generic-gp10b", | ||
274 | .data = &gk20a_generic_platform }, | ||
275 | #endif | ||
276 | { }, | 257 | { }, |
277 | }; | 258 | }; |
278 | 259 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c index ec14c08e..fda3bc95 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include <nvgpu/linux/dma.h> | 47 | #include <nvgpu/linux/dma.h> |
48 | 48 | ||
49 | #include "gk20a/gk20a.h" | 49 | #include "gk20a/gk20a.h" |
50 | #include "gk20a/hal_gk20a.h" | ||
51 | #include "gk20a/platform_gk20a.h" | 50 | #include "gk20a/platform_gk20a.h" |
52 | #include "gk20a/gk20a_scale.h" | 51 | #include "gk20a/gk20a_scale.h" |
53 | #include "gm20b/clk_gm20b.h" | 52 | #include "gm20b/clk_gm20b.h" |
@@ -232,66 +231,6 @@ static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, | |||
232 | } | 231 | } |
233 | 232 | ||
234 | /* | 233 | /* |
235 | * gk20a_tegra_postscale(profile, freq) | ||
236 | * | ||
237 | * This function sets emc frequency based on current gpu frequency | ||
238 | */ | ||
239 | |||
240 | static void gk20a_tegra_postscale(struct device *dev, | ||
241 | unsigned long freq) | ||
242 | { | ||
243 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
244 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
245 | struct gk20a_emc_params *emc_params = profile->private_data; | ||
246 | struct gk20a *g = get_gk20a(dev); | ||
247 | struct clk *emc_clk = platform->clk[2]; | ||
248 | enum tegra_chipid chip_id = tegra_get_chip_id(); | ||
249 | unsigned long emc_target; | ||
250 | unsigned long emc_freq_lower, emc_freq_upper, emc_freq_rounded; | ||
251 | |||
252 | emc_target = gk20a_tegra_get_emc_rate(g, emc_params); | ||
253 | |||
254 | switch (chip_id) { | ||
255 | case TEGRA124: | ||
256 | case TEGRA132: | ||
257 | /* T124 and T132 don't apply any rounding. The resulting | ||
258 | * emc frequency gets implicitly rounded up after issuing | ||
259 | * the clock_set_request. | ||
260 | * So explicitly round up the emc target here to achieve | ||
261 | * the same outcome. */ | ||
262 | emc_freq_rounded = | ||
263 | tegra_emc_round_rate_updown(emc_target, true); | ||
264 | break; | ||
265 | |||
266 | case TEGRA210: | ||
267 | emc_freq_lower = (unsigned long) | ||
268 | tegra_emc_round_rate_updown(emc_target, false); | ||
269 | emc_freq_upper = (unsigned long) | ||
270 | tegra_emc_round_rate_updown(emc_target, true); | ||
271 | |||
272 | /* round to the nearest frequency step */ | ||
273 | if (emc_target < (emc_freq_lower + emc_freq_upper) / 2) | ||
274 | emc_freq_rounded = emc_freq_lower; | ||
275 | else | ||
276 | emc_freq_rounded = emc_freq_upper; | ||
277 | break; | ||
278 | |||
279 | default: | ||
280 | /* a proper rounding function needs to be implemented | ||
281 | * for emc in t18x */ | ||
282 | emc_freq_rounded = clk_round_rate(emc_clk, emc_target); | ||
283 | break; | ||
284 | } | ||
285 | |||
286 | /* only change the emc clock if new rounded frequency is different | ||
287 | * from previously set emc rate */ | ||
288 | if (emc_freq_rounded != emc_params->freq_last_set) { | ||
289 | clk_set_rate(emc_clk, emc_freq_rounded); | ||
290 | emc_params->freq_last_set = emc_freq_rounded; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * gk20a_tegra_prescale(profile, freq) | 234 | * gk20a_tegra_prescale(profile, freq) |
296 | * | 235 | * |
297 | * This function informs EDP about changed constraints. | 236 | * This function informs EDP about changed constraints. |
@@ -619,30 +558,6 @@ err_get_clock: | |||
619 | return ret; | 558 | return ret; |
620 | } | 559 | } |
621 | 560 | ||
622 | static int gk20a_tegra_reset_assert(struct device *dev) | ||
623 | { | ||
624 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
625 | |||
626 | if (!platform->clk_reset) | ||
627 | platform->clk_reset = platform->clk[0]; | ||
628 | |||
629 | tegra_periph_reset_assert(platform->clk_reset); | ||
630 | |||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | static int gk20a_tegra_reset_deassert(struct device *dev) | ||
635 | { | ||
636 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
637 | |||
638 | if (!platform->clk_reset) | ||
639 | return -EINVAL; | ||
640 | |||
641 | tegra_periph_reset_deassert(platform->clk_reset); | ||
642 | |||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | 561 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) |
647 | static int gm20b_tegra_reset_assert(struct device *dev) | 562 | static int gm20b_tegra_reset_assert(struct device *dev) |
648 | { | 563 | { |
@@ -992,54 +907,6 @@ static int gk20a_clk_get_freqs(struct device *dev, | |||
992 | } | 907 | } |
993 | #endif | 908 | #endif |
994 | 909 | ||
995 | |||
996 | struct gk20a_platform gk20a_tegra_platform = { | ||
997 | .has_syncpoints = true, | ||
998 | .aggressive_sync_destroy_thresh = 64, | ||
999 | |||
1000 | /* power management configuration */ | ||
1001 | .railgate_delay_init = 500, | ||
1002 | .can_railgate_init = true, | ||
1003 | .can_elpg_init = true, | ||
1004 | .enable_slcg = true, | ||
1005 | .enable_blcg = true, | ||
1006 | .enable_elcg = true, | ||
1007 | .enable_elpg = true, | ||
1008 | .enable_aelpg = true, | ||
1009 | .ptimer_src_freq = 12000000, | ||
1010 | |||
1011 | .force_reset_in_do_idle = false, | ||
1012 | |||
1013 | .default_big_page_size = SZ_128K, | ||
1014 | |||
1015 | .ch_wdt_timeout_ms = 7000, | ||
1016 | |||
1017 | .probe = gk20a_tegra_probe, | ||
1018 | .late_probe = gk20a_tegra_late_probe, | ||
1019 | .remove = gk20a_tegra_remove, | ||
1020 | |||
1021 | /* power management callbacks */ | ||
1022 | .suspend = gk20a_tegra_suspend, | ||
1023 | |||
1024 | .busy = gk20a_tegra_busy, | ||
1025 | .idle = gk20a_tegra_idle, | ||
1026 | |||
1027 | .reset_assert = gk20a_tegra_reset_assert, | ||
1028 | .reset_deassert = gk20a_tegra_reset_deassert, | ||
1029 | |||
1030 | /* frequency scaling configuration */ | ||
1031 | .prescale = gk20a_tegra_prescale, | ||
1032 | .postscale = gk20a_tegra_postscale, | ||
1033 | .devfreq_governor = "nvhost_podgov", | ||
1034 | .qos_notify = gk20a_scale_qos_notify, | ||
1035 | |||
1036 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
1037 | |||
1038 | .soc_name = "tegra12x", | ||
1039 | |||
1040 | .unified_memory = true, | ||
1041 | }; | ||
1042 | |||
1043 | struct gk20a_platform gm20b_tegra_platform = { | 910 | struct gk20a_platform gm20b_tegra_platform = { |
1044 | .has_syncpoints = true, | 911 | .has_syncpoints = true, |
1045 | .aggressive_sync_destroy_thresh = 64, | 912 | .aggressive_sync_destroy_thresh = 64, |
@@ -1097,8 +964,6 @@ struct gk20a_platform gm20b_tegra_platform = { | |||
1097 | .prescale = gk20a_tegra_prescale, | 964 | .prescale = gk20a_tegra_prescale, |
1098 | #ifdef CONFIG_TEGRA_BWMGR | 965 | #ifdef CONFIG_TEGRA_BWMGR |
1099 | .postscale = gm20b_tegra_postscale, | 966 | .postscale = gm20b_tegra_postscale, |
1100 | #else | ||
1101 | .postscale = gk20a_tegra_postscale, | ||
1102 | #endif | 967 | #endif |
1103 | .devfreq_governor = "nvhost_podgov", | 968 | .devfreq_governor = "nvhost_podgov", |
1104 | .qos_notify = gk20a_scale_qos_notify, | 969 | .qos_notify = gk20a_scale_qos_notify, |
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index 5aae14f2..3c76e817 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c | |||
@@ -50,53 +50,6 @@ void gk20a_fb_init_hw(struct gk20a *g) | |||
50 | gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), addr); | 50 | gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), addr); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void gk20a_fb_set_mmu_page_size(struct gk20a *g) | ||
54 | { | ||
55 | /* set large page size in fb */ | ||
56 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
57 | |||
58 | fb_mmu_ctrl = (fb_mmu_ctrl & | ||
59 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | | ||
60 | fb_mmu_ctrl_vm_pg_size_128kb_f(); | ||
61 | |||
62 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
63 | } | ||
64 | |||
65 | static unsigned int gk20a_fb_compression_page_size(struct gk20a *g) | ||
66 | { | ||
67 | return SZ_128K; | ||
68 | } | ||
69 | |||
70 | static unsigned int gk20a_fb_compressible_page_size(struct gk20a *g) | ||
71 | { | ||
72 | return SZ_64K; | ||
73 | } | ||
74 | |||
75 | static bool gk20a_fb_debug_mode_enabled(struct gk20a *g) | ||
76 | { | ||
77 | u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
78 | return fb_mmu_debug_ctrl_debug_v(debug_ctrl) == | ||
79 | fb_mmu_debug_ctrl_debug_enabled_v(); | ||
80 | } | ||
81 | |||
82 | static void gk20a_fb_set_debug_mode(struct gk20a *g, bool enable) | ||
83 | { | ||
84 | u32 reg_val, debug_ctrl; | ||
85 | |||
86 | reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
87 | if (enable) { | ||
88 | debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f(); | ||
89 | g->mmu_debug_ctrl = true; | ||
90 | } else { | ||
91 | debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f(); | ||
92 | g->mmu_debug_ctrl = false; | ||
93 | } | ||
94 | |||
95 | reg_val = set_field(reg_val, | ||
96 | fb_mmu_debug_ctrl_debug_m(), debug_ctrl); | ||
97 | gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val); | ||
98 | } | ||
99 | |||
100 | void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) | 53 | void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) |
101 | { | 54 | { |
102 | struct nvgpu_timeout timeout; | 55 | struct nvgpu_timeout timeout; |
@@ -159,17 +112,3 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) | |||
159 | out: | 112 | out: |
160 | nvgpu_mutex_release(&g->mm.tlb_lock); | 113 | nvgpu_mutex_release(&g->mm.tlb_lock); |
161 | } | 114 | } |
162 | |||
163 | void gk20a_init_fb(struct gpu_ops *gops) | ||
164 | { | ||
165 | gops->fb.init_hw = gk20a_fb_init_hw; | ||
166 | gops->fb.reset = fb_gk20a_reset; | ||
167 | gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size; | ||
168 | gops->fb.compression_page_size = gk20a_fb_compression_page_size; | ||
169 | gops->fb.compressible_page_size = gk20a_fb_compressible_page_size; | ||
170 | gops->fb.is_debug_mode_enabled = gk20a_fb_debug_mode_enabled; | ||
171 | gops->fb.set_debug_mode = gk20a_fb_set_debug_mode; | ||
172 | gops->fb.tlb_invalidate = gk20a_fb_tlb_invalidate; | ||
173 | gk20a_init_uncompressed_kind_map(); | ||
174 | gk20a_init_kind_attr(); | ||
175 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h index f9f4fbd0..26670722 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -17,7 +17,6 @@ | |||
17 | struct gk20a; | 17 | struct gk20a; |
18 | struct nvgpu_mem; | 18 | struct nvgpu_mem; |
19 | 19 | ||
20 | void gk20a_init_fb(struct gpu_ops *gops); | ||
21 | void fb_gk20a_reset(struct gk20a *g); | 20 | void fb_gk20a_reset(struct gk20a *g); |
22 | void gk20a_fb_init_hw(struct gk20a *g); | 21 | void gk20a_fb_init_hw(struct gk20a *g); |
23 | void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb); | 22 | void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 17f3743f..31b470d4 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -822,51 +822,6 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
822 | return 0; | 822 | return 0; |
823 | } | 823 | } |
824 | 824 | ||
825 | static void gk20a_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f) | ||
826 | { | ||
827 | /* | ||
828 | * These are all errors which indicate something really wrong | ||
829 | * going on in the device | ||
830 | */ | ||
831 | f->intr.pbdma.device_fatal_0 = | ||
832 | pbdma_intr_0_memreq_pending_f() | | ||
833 | pbdma_intr_0_memack_timeout_pending_f() | | ||
834 | pbdma_intr_0_memack_extra_pending_f() | | ||
835 | pbdma_intr_0_memdat_timeout_pending_f() | | ||
836 | pbdma_intr_0_memdat_extra_pending_f() | | ||
837 | pbdma_intr_0_memflush_pending_f() | | ||
838 | pbdma_intr_0_memop_pending_f() | | ||
839 | pbdma_intr_0_lbconnect_pending_f() | | ||
840 | pbdma_intr_0_lback_timeout_pending_f() | | ||
841 | pbdma_intr_0_lback_extra_pending_f() | | ||
842 | pbdma_intr_0_lbdat_timeout_pending_f() | | ||
843 | pbdma_intr_0_lbdat_extra_pending_f() | | ||
844 | pbdma_intr_0_xbarconnect_pending_f() | | ||
845 | pbdma_intr_0_pri_pending_f(); | ||
846 | |||
847 | /* | ||
848 | * These are data parsing, framing errors or others which can be | ||
849 | * recovered from with intervention... or just resetting the | ||
850 | * channel | ||
851 | */ | ||
852 | f->intr.pbdma.channel_fatal_0 = | ||
853 | pbdma_intr_0_gpfifo_pending_f() | | ||
854 | pbdma_intr_0_gpptr_pending_f() | | ||
855 | pbdma_intr_0_gpentry_pending_f() | | ||
856 | pbdma_intr_0_gpcrc_pending_f() | | ||
857 | pbdma_intr_0_pbptr_pending_f() | | ||
858 | pbdma_intr_0_pbentry_pending_f() | | ||
859 | pbdma_intr_0_pbcrc_pending_f() | | ||
860 | pbdma_intr_0_method_pending_f() | | ||
861 | pbdma_intr_0_methodcrc_pending_f() | | ||
862 | pbdma_intr_0_pbseg_pending_f() | | ||
863 | pbdma_intr_0_signature_pending_f(); | ||
864 | |||
865 | /* Can be used for sw-methods, or represents a recoverable timeout. */ | ||
866 | f->intr.pbdma.restartable_0 = | ||
867 | pbdma_intr_0_device_pending_f(); | ||
868 | } | ||
869 | |||
870 | static int gk20a_init_fifo_setup_sw(struct gk20a *g) | 825 | static int gk20a_init_fifo_setup_sw(struct gk20a *g) |
871 | { | 826 | { |
872 | struct fifo_gk20a *f = &g->fifo; | 827 | struct fifo_gk20a *f = &g->fifo; |
@@ -1722,46 +1677,6 @@ static void gk20a_fifo_get_faulty_id_type(struct gk20a *g, int engine_id, | |||
1722 | fifo_engine_status_id_type_v(status); | 1677 | fifo_engine_status_id_type_v(status); |
1723 | } | 1678 | } |
1724 | 1679 | ||
1725 | static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g, | ||
1726 | unsigned long engine_ids) | ||
1727 | { | ||
1728 | struct nvgpu_timeout timeout; | ||
1729 | unsigned long delay = GR_IDLE_CHECK_DEFAULT; | ||
1730 | unsigned long engine_id; | ||
1731 | |||
1732 | /* trigger faults for all bad engines */ | ||
1733 | for_each_set_bit(engine_id, &engine_ids, 32) { | ||
1734 | u32 mmu_id; | ||
1735 | |||
1736 | if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { | ||
1737 | WARN_ON(true); | ||
1738 | break; | ||
1739 | } | ||
1740 | |||
1741 | mmu_id = gk20a_engine_id_to_mmu_id(g, engine_id); | ||
1742 | if (mmu_id != FIFO_INVAL_ENGINE_ID) | ||
1743 | gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), | ||
1744 | fifo_trigger_mmu_fault_id_f(mmu_id) | | ||
1745 | fifo_trigger_mmu_fault_enable_f(1)); | ||
1746 | } | ||
1747 | |||
1748 | /* Wait for MMU fault to trigger */ | ||
1749 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
1750 | NVGPU_TIMER_CPU_TIMER); | ||
1751 | do { | ||
1752 | if (gk20a_readl(g, fifo_intr_0_r()) & | ||
1753 | fifo_intr_0_mmu_fault_pending_f()) | ||
1754 | break; | ||
1755 | |||
1756 | nvgpu_usleep_range(delay, delay * 2); | ||
1757 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
1758 | } while (!nvgpu_timeout_expired_msg(&timeout, "mmu fault timeout")); | ||
1759 | |||
1760 | /* release mmu fault trigger */ | ||
1761 | for_each_set_bit(engine_id, &engine_ids, 32) | ||
1762 | gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0); | ||
1763 | } | ||
1764 | |||
1765 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) | 1680 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) |
1766 | { | 1681 | { |
1767 | unsigned int i; | 1682 | unsigned int i; |
@@ -3406,22 +3321,6 @@ int gk20a_fifo_wait_engine_idle(struct gk20a *g) | |||
3406 | return ret; | 3321 | return ret; |
3407 | } | 3322 | } |
3408 | 3323 | ||
3409 | static void gk20a_fifo_apply_pb_timeout(struct gk20a *g) | ||
3410 | { | ||
3411 | u32 timeout; | ||
3412 | |||
3413 | if (nvgpu_platform_is_silicon(g)) { | ||
3414 | timeout = gk20a_readl(g, fifo_pb_timeout_r()); | ||
3415 | timeout &= ~fifo_pb_timeout_detection_enabled_f(); | ||
3416 | gk20a_writel(g, fifo_pb_timeout_r(), timeout); | ||
3417 | } | ||
3418 | } | ||
3419 | |||
3420 | static u32 gk20a_fifo_get_num_fifos(struct gk20a *g) | ||
3421 | { | ||
3422 | return ccsr_channel__size_1_v(); | ||
3423 | } | ||
3424 | |||
3425 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | 3324 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) |
3426 | { | 3325 | { |
3427 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 3326 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
@@ -3686,38 +3585,6 @@ void gk20a_fifo_disable_channel(struct channel_gk20a *ch) | |||
3686 | ccsr_channel_enable_clr_true_f()); | 3585 | ccsr_channel_enable_clr_true_f()); |
3687 | } | 3586 | } |
3688 | 3587 | ||
3689 | static void gk20a_fifo_channel_bind(struct channel_gk20a *c) | ||
3690 | { | ||
3691 | struct gk20a *g = c->g; | ||
3692 | u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >> | ||
3693 | ram_in_base_shift_v(); | ||
3694 | |||
3695 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | ||
3696 | c->chid, inst_ptr); | ||
3697 | |||
3698 | |||
3699 | gk20a_writel(g, ccsr_channel_r(c->chid), | ||
3700 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | ||
3701 | ~ccsr_channel_runlist_f(~0)) | | ||
3702 | ccsr_channel_runlist_f(c->runlist_id)); | ||
3703 | |||
3704 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), | ||
3705 | ccsr_channel_inst_ptr_f(inst_ptr) | | ||
3706 | nvgpu_aperture_mask(g, &c->inst_block, | ||
3707 | ccsr_channel_inst_target_sys_mem_ncoh_f(), | ||
3708 | ccsr_channel_inst_target_vid_mem_f()) | | ||
3709 | ccsr_channel_inst_bind_true_f()); | ||
3710 | |||
3711 | gk20a_writel(g, ccsr_channel_r(c->chid), | ||
3712 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | ||
3713 | ~ccsr_channel_enable_set_f(~0)) | | ||
3714 | ccsr_channel_enable_set_true_f()); | ||
3715 | |||
3716 | wmb(); | ||
3717 | atomic_set(&c->bound, true); | ||
3718 | |||
3719 | } | ||
3720 | |||
3721 | void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a) | 3588 | void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a) |
3722 | { | 3589 | { |
3723 | struct gk20a *g = ch_gk20a->g; | 3590 | struct gk20a *g = ch_gk20a->g; |
@@ -4080,65 +3947,3 @@ int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c, | |||
4080 | return 0; | 3947 | return 0; |
4081 | } | 3948 | } |
4082 | #endif | 3949 | #endif |
4083 | |||
4084 | |||
4085 | void gk20a_init_fifo(struct gpu_ops *gops) | ||
4086 | { | ||
4087 | gops->fifo.disable_channel = gk20a_fifo_disable_channel; | ||
4088 | gops->fifo.enable_channel = gk20a_fifo_enable_channel; | ||
4089 | gops->fifo.bind_channel = gk20a_fifo_channel_bind; | ||
4090 | gops->fifo.unbind_channel = gk20a_fifo_channel_unbind; | ||
4091 | gops->fifo.init_fifo_setup_hw = gk20a_init_fifo_setup_hw; | ||
4092 | gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; | ||
4093 | gops->fifo.preempt_tsg = gk20a_fifo_preempt_tsg; | ||
4094 | gops->fifo.update_runlist = gk20a_fifo_update_runlist; | ||
4095 | gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault; | ||
4096 | gops->fifo.get_mmu_fault_info = gk20a_fifo_get_mmu_fault_info; | ||
4097 | gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout; | ||
4098 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | ||
4099 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; | ||
4100 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; | ||
4101 | gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; | ||
4102 | gops->fifo.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice; | ||
4103 | gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch; | ||
4104 | gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type; | ||
4105 | /* gk20a doesn't support device_info_data packet parsing */ | ||
4106 | gops->fifo.device_info_data_parse = NULL; | ||
4107 | gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v; | ||
4108 | gops->fifo.init_engine_info = gk20a_fifo_init_engine_info; | ||
4109 | gops->fifo.runlist_entry_size = ram_rl_entry_size_v; | ||
4110 | gops->fifo.get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry; | ||
4111 | gops->fifo.get_ch_runlist_entry = gk20a_get_ch_runlist_entry; | ||
4112 | gops->fifo.is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc; | ||
4113 | gops->fifo.dump_pbdma_status = gk20a_dump_pbdma_status; | ||
4114 | gops->fifo.dump_eng_status = gk20a_dump_eng_status; | ||
4115 | gops->fifo.dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc; | ||
4116 | gops->fifo.intr_0_error_mask = gk20a_fifo_intr_0_error_mask; | ||
4117 | gops->fifo.is_preempt_pending = gk20a_fifo_is_preempt_pending; | ||
4118 | gops->fifo.init_pbdma_intr_descs = gk20a_fifo_init_pbdma_intr_descs; | ||
4119 | gops->fifo.reset_enable_hw = gk20a_init_fifo_reset_enable_hw; | ||
4120 | gops->fifo.setup_ramfc = gk20a_fifo_setup_ramfc; | ||
4121 | gops->fifo.channel_set_priority = gk20a_fifo_set_priority; | ||
4122 | gops->fifo.channel_set_timeslice = gk20a_fifo_set_timeslice; | ||
4123 | gops->fifo.alloc_inst = gk20a_fifo_alloc_inst; | ||
4124 | gops->fifo.free_inst = gk20a_fifo_free_inst; | ||
4125 | gops->fifo.setup_userd = gk20a_fifo_setup_userd; | ||
4126 | gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get; | ||
4127 | gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put; | ||
4128 | gops->fifo.userd_pb_get = gk20a_fifo_userd_pb_get; | ||
4129 | gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val; | ||
4130 | gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg; | ||
4131 | gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error; | ||
4132 | gops->fifo.handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0; | ||
4133 | gops->fifo.handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1; | ||
4134 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
4135 | gops->fifo.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf; | ||
4136 | gops->fifo.free_syncpt_buf = gk20a_fifo_free_syncpt_buf; | ||
4137 | gops->fifo.add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd; | ||
4138 | gops->fifo.get_syncpt_wait_cmd_size = | ||
4139 | gk20a_fifo_get_syncpt_wait_cmd_size; | ||
4140 | gops->fifo.add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd; | ||
4141 | gops->fifo.get_syncpt_incr_cmd_size = | ||
4142 | gk20a_fifo_get_syncpt_incr_cmd_size; | ||
4143 | #endif | ||
4144 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 09f0c95f..b19a7b68 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -268,7 +268,6 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, | |||
268 | u32 err_code, bool verbose); | 268 | u32 err_code, bool verbose); |
269 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); | 269 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); |
270 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); | 270 | int gk20a_init_fifo_reset_enable_hw(struct gk20a *g); |
271 | void gk20a_init_fifo(struct gpu_ops *gops); | ||
272 | 271 | ||
273 | void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | 272 | void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, |
274 | unsigned long fault_id); | 273 | unsigned long fault_id); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c index b0a90fc8..eff45cb0 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | |||
@@ -66,50 +66,6 @@ static int gr_gk20a_alloc_load_netlist_aiv(struct gk20a *g, u32 *src, u32 len, | |||
66 | return 0; | 66 | return 0; |
67 | } | 67 | } |
68 | 68 | ||
69 | static int gr_gk20a_get_netlist_name(struct gk20a *g, int index, char *name) | ||
70 | { | ||
71 | switch (index) { | ||
72 | #ifdef GK20A_NETLIST_IMAGE_FW_NAME | ||
73 | case NETLIST_FINAL: | ||
74 | sprintf(name, GK20A_NETLIST_IMAGE_FW_NAME); | ||
75 | return 0; | ||
76 | #endif | ||
77 | #ifdef GK20A_NETLIST_IMAGE_A | ||
78 | case NETLIST_SLOT_A: | ||
79 | sprintf(name, GK20A_NETLIST_IMAGE_A); | ||
80 | return 0; | ||
81 | #endif | ||
82 | #ifdef GK20A_NETLIST_IMAGE_B | ||
83 | case NETLIST_SLOT_B: | ||
84 | sprintf(name, GK20A_NETLIST_IMAGE_B); | ||
85 | return 0; | ||
86 | #endif | ||
87 | #ifdef GK20A_NETLIST_IMAGE_C | ||
88 | case NETLIST_SLOT_C: | ||
89 | sprintf(name, GK20A_NETLIST_IMAGE_C); | ||
90 | return 0; | ||
91 | #endif | ||
92 | #ifdef GK20A_NETLIST_IMAGE_D | ||
93 | case NETLIST_SLOT_D: | ||
94 | sprintf(name, GK20A_NETLIST_IMAGE_D); | ||
95 | return 0; | ||
96 | #endif | ||
97 | default: | ||
98 | return -1; | ||
99 | } | ||
100 | |||
101 | return -1; | ||
102 | } | ||
103 | |||
104 | static bool gr_gk20a_is_firmware_defined(void) | ||
105 | { | ||
106 | #ifdef GK20A_NETLIST_IMAGE_FW_NAME | ||
107 | return true; | ||
108 | #else | ||
109 | return false; | ||
110 | #endif | ||
111 | } | ||
112 | |||
113 | static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) | 69 | static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) |
114 | { | 70 | { |
115 | struct nvgpu_firmware *netlist_fw; | 71 | struct nvgpu_firmware *netlist_fw; |
@@ -448,10 +404,3 @@ int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr) | |||
448 | else | 404 | else |
449 | return gr_gk20a_init_ctx_vars_fw(g, gr); | 405 | return gr_gk20a_init_ctx_vars_fw(g, gr); |
450 | } | 406 | } |
451 | |||
452 | void gk20a_init_gr_ctx(struct gpu_ops *gops) | ||
453 | { | ||
454 | gops->gr_ctx.get_netlist_name = gr_gk20a_get_netlist_name; | ||
455 | gops->gr_ctx.is_fw_defined = gr_gk20a_is_firmware_defined; | ||
456 | gops->gr_ctx.use_dma_for_fw_bootstrap = true; | ||
457 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h index b82f5275..d645ed13 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | |||
@@ -22,10 +22,6 @@ | |||
22 | 22 | ||
23 | struct gr_gk20a; | 23 | struct gr_gk20a; |
24 | 24 | ||
25 | /* production netlist, one and only one from below */ | ||
26 | /*#undef GK20A_NETLIST_IMAGE_FW_NAME*/ | ||
27 | #define GK20A_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B | ||
28 | |||
29 | /* emulation netlists, match majorV with HW */ | 25 | /* emulation netlists, match majorV with HW */ |
30 | #define GK20A_NETLIST_IMAGE_A "NETA_img.bin" | 26 | #define GK20A_NETLIST_IMAGE_A "NETA_img.bin" |
31 | #define GK20A_NETLIST_IMAGE_B "NETB_img.bin" | 27 | #define GK20A_NETLIST_IMAGE_B "NETB_img.bin" |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a9632eaa..3ac4e397 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -837,71 +837,6 @@ u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc) | |||
837 | return tpc_offset; | 837 | return tpc_offset; |
838 | } | 838 | } |
839 | 839 | ||
840 | static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | ||
841 | struct channel_gk20a *c, bool patch) | ||
842 | { | ||
843 | struct gr_gk20a *gr = &g->gr; | ||
844 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
845 | u32 attrib_offset_in_chunk = 0; | ||
846 | u32 alpha_offset_in_chunk = 0; | ||
847 | u32 pd_ab_max_output; | ||
848 | u32 gpc_index, ppc_index; | ||
849 | u32 temp; | ||
850 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
851 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
852 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
853 | |||
854 | gk20a_dbg_fn(""); | ||
855 | |||
856 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
857 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
858 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
859 | patch); | ||
860 | |||
861 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
862 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) / | ||
863 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
864 | |||
865 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
866 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
867 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
868 | |||
869 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
870 | gr->tpc_count * gr->attrib_cb_size; | ||
871 | |||
872 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
873 | temp = gpc_stride * gpc_index; | ||
874 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
875 | ppc_index++) { | ||
876 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
877 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
878 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
879 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
880 | |||
881 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
882 | gr_gpc0_ppc0_cbm_cfg_r() + temp + | ||
883 | ppc_in_gpc_stride * ppc_index, | ||
884 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | | ||
885 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | | ||
886 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); | ||
887 | |||
888 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
889 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
890 | |||
891 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
892 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + | ||
893 | ppc_in_gpc_stride * ppc_index, | ||
894 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | | ||
895 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); | ||
896 | |||
897 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
898 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
899 | } | ||
900 | } | ||
901 | |||
902 | return 0; | ||
903 | } | ||
904 | |||
905 | static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | 840 | static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, |
906 | struct channel_gk20a *c, bool patch) | 841 | struct channel_gk20a *c, bool patch) |
907 | { | 842 | { |
@@ -964,55 +899,6 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
964 | return 0; | 899 | return 0; |
965 | } | 900 | } |
966 | 901 | ||
967 | static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g, | ||
968 | struct channel_ctx_gk20a *ch_ctx, | ||
969 | u64 addr, bool patch) | ||
970 | { | ||
971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
972 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
973 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
974 | |||
975 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
976 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
977 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
978 | } | ||
979 | |||
980 | static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g, | ||
981 | struct channel_ctx_gk20a *ch_ctx, | ||
982 | u64 addr, u64 size, bool patch) | ||
983 | { | ||
984 | u32 data; | ||
985 | |||
986 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
987 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
988 | |||
989 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
990 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
991 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
992 | |||
993 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(), | ||
994 | gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch); | ||
995 | |||
996 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(), | ||
997 | gr_gpcs_setup_bundle_cb_size_div_256b_f(size) | | ||
998 | gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch); | ||
999 | |||
1000 | /* data for state_limit */ | ||
1001 | data = (g->gr.bundle_cb_default_size * | ||
1002 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
1003 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
1004 | |||
1005 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
1006 | |||
1007 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
1008 | g->gr.bundle_cb_token_limit, data); | ||
1009 | |||
1010 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
1011 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
1012 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
1013 | |||
1014 | } | ||
1015 | |||
1016 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, | 902 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, |
1017 | bool patch) | 903 | bool patch) |
1018 | { | 904 | { |
@@ -1235,135 +1121,6 @@ static inline u32 count_bits(u32 mask) | |||
1235 | return count; | 1121 | return count; |
1236 | } | 1122 | } |
1237 | 1123 | ||
1238 | static inline u32 clear_count_bits(u32 num, u32 clear_count) | ||
1239 | { | ||
1240 | u32 count = clear_count; | ||
1241 | for (; (num != 0) && (count != 0); count--) | ||
1242 | num &= num - 1; | ||
1243 | |||
1244 | return num; | ||
1245 | } | ||
1246 | |||
1247 | static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | ||
1248 | struct gr_gk20a *gr) | ||
1249 | { | ||
1250 | u32 table_index_bits = 5; | ||
1251 | u32 rows = (1 << table_index_bits); | ||
1252 | u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows; | ||
1253 | |||
1254 | u32 row; | ||
1255 | u32 index; | ||
1256 | u32 gpc_index; | ||
1257 | u32 gpcs_per_reg = 4; | ||
1258 | u32 pes_index; | ||
1259 | u32 tpc_count_pes; | ||
1260 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); | ||
1261 | |||
1262 | u32 alpha_target, beta_target; | ||
1263 | u32 alpha_bits, beta_bits; | ||
1264 | u32 alpha_mask, beta_mask, partial_mask; | ||
1265 | u32 reg_offset; | ||
1266 | bool assign_alpha; | ||
1267 | |||
1268 | u32 *map_alpha; | ||
1269 | u32 *map_beta; | ||
1270 | u32 *map_reg_used; | ||
1271 | |||
1272 | gk20a_dbg_fn(""); | ||
1273 | |||
1274 | map_alpha = nvgpu_kzalloc(g, 3 * gr_pd_alpha_ratio_table__size_1_v() * | ||
1275 | sizeof(u32)); | ||
1276 | if (!map_alpha) | ||
1277 | return -ENOMEM; | ||
1278 | map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v(); | ||
1279 | map_reg_used = map_beta + gr_pd_alpha_ratio_table__size_1_v(); | ||
1280 | |||
1281 | for (row = 0; row < rows; ++row) { | ||
1282 | alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); | ||
1283 | beta_target = gr->tpc_count - alpha_target; | ||
1284 | |||
1285 | assign_alpha = (alpha_target < beta_target); | ||
1286 | |||
1287 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1288 | reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg); | ||
1289 | alpha_mask = beta_mask = 0; | ||
1290 | |||
1291 | for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) { | ||
1292 | tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index]; | ||
1293 | |||
1294 | if (assign_alpha) { | ||
1295 | alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes; | ||
1296 | beta_bits = tpc_count_pes - alpha_bits; | ||
1297 | } else { | ||
1298 | beta_bits = (beta_target == 0) ? 0 : tpc_count_pes; | ||
1299 | alpha_bits = tpc_count_pes - beta_bits; | ||
1300 | } | ||
1301 | |||
1302 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index]; | ||
1303 | partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits); | ||
1304 | alpha_mask |= partial_mask; | ||
1305 | |||
1306 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask; | ||
1307 | beta_mask |= partial_mask; | ||
1308 | |||
1309 | alpha_target -= min(alpha_bits, alpha_target); | ||
1310 | beta_target -= min(beta_bits, beta_target); | ||
1311 | |||
1312 | if ((alpha_bits > 0) || (beta_bits > 0)) | ||
1313 | assign_alpha = !assign_alpha; | ||
1314 | } | ||
1315 | |||
1316 | switch (gpc_index % gpcs_per_reg) { | ||
1317 | case 0: | ||
1318 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask); | ||
1319 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask); | ||
1320 | break; | ||
1321 | case 1: | ||
1322 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask); | ||
1323 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask); | ||
1324 | break; | ||
1325 | case 2: | ||
1326 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask); | ||
1327 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask); | ||
1328 | break; | ||
1329 | case 3: | ||
1330 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask); | ||
1331 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask); | ||
1332 | break; | ||
1333 | } | ||
1334 | map_reg_used[reg_offset] = true; | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) { | ||
1339 | if (map_reg_used[index]) { | ||
1340 | gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]); | ||
1341 | gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]); | ||
1342 | } | ||
1343 | } | ||
1344 | |||
1345 | nvgpu_kfree(g, map_alpha); | ||
1346 | return 0; | ||
1347 | } | ||
1348 | |||
1349 | static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
1350 | { | ||
1351 | /* One TPC for gk20a */ | ||
1352 | return 0x1; | ||
1353 | } | ||
1354 | |||
1355 | static void gr_gk20a_program_active_tpc_counts(struct gk20a *g, u32 gpc_index) | ||
1356 | { | ||
1357 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1358 | u32 gpc_offset = gpc_stride * gpc_index; | ||
1359 | struct gr_gk20a *gr = &g->gr; | ||
1360 | |||
1361 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | ||
1362 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1363 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1364 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1365 | } | ||
1366 | |||
1367 | void gr_gk20a_init_sm_id_table(struct gk20a *g) | 1124 | void gr_gk20a_init_sm_id_table(struct gk20a *g) |
1368 | { | 1125 | { |
1369 | u32 gpc, tpc; | 1126 | u32 gpc, tpc; |
@@ -1385,24 +1142,6 @@ void gr_gk20a_init_sm_id_table(struct gk20a *g) | |||
1385 | g->gr.no_of_sm = sm_id; | 1142 | g->gr.no_of_sm = sm_id; |
1386 | } | 1143 | } |
1387 | 1144 | ||
1388 | static void gr_gk20a_program_sm_id_numbering(struct gk20a *g, | ||
1389 | u32 gpc, u32 tpc, u32 sm_id) | ||
1390 | { | ||
1391 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1392 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1393 | u32 gpc_offset = gpc_stride * gpc; | ||
1394 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
1395 | |||
1396 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1397 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1398 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1399 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1400 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
1401 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1402 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1403 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1404 | } | ||
1405 | |||
1406 | /* | 1145 | /* |
1407 | * Return number of TPCs in a GPC | 1146 | * Return number of TPCs in a GPC |
1408 | * Return 0 if GPC index is invalid i.e. GPC is disabled | 1147 | * Return 0 if GPC index is invalid i.e. GPC is disabled |
@@ -2564,23 +2303,6 @@ void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, | |||
2564 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | 2303 | gr_fecs_bootvec_vec_f(segments->boot_entry)); |
2565 | } | 2304 | } |
2566 | 2305 | ||
2567 | static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
2568 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
2569 | { | ||
2570 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
2571 | gr_fecs_dmactl_require_ctx_f(0)); | ||
2572 | |||
2573 | /* Copy falcon bootloader into dmem */ | ||
2574 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); | ||
2575 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); | ||
2576 | |||
2577 | /* Write to CPUCTL to start the falcon */ | ||
2578 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
2579 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
2580 | |||
2581 | return 0; | ||
2582 | } | ||
2583 | |||
2584 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | 2306 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) |
2585 | { | 2307 | { |
2586 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 2308 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
@@ -3116,41 +2838,6 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c) | |||
3116 | c->first_init = false; | 2838 | c->first_init = false; |
3117 | } | 2839 | } |
3118 | 2840 | ||
3119 | static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) | ||
3120 | { | ||
3121 | bool valid = false; | ||
3122 | |||
3123 | switch (class_num) { | ||
3124 | case KEPLER_COMPUTE_A: | ||
3125 | case KEPLER_C: | ||
3126 | case FERMI_TWOD_A: | ||
3127 | case KEPLER_DMA_COPY_A: | ||
3128 | valid = true; | ||
3129 | break; | ||
3130 | |||
3131 | default: | ||
3132 | break; | ||
3133 | } | ||
3134 | |||
3135 | return valid; | ||
3136 | } | ||
3137 | |||
3138 | static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
3139 | { | ||
3140 | if (class_num == KEPLER_C) | ||
3141 | return true; | ||
3142 | else | ||
3143 | return false; | ||
3144 | } | ||
3145 | |||
3146 | static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
3147 | { | ||
3148 | if (class_num == KEPLER_COMPUTE_A) | ||
3149 | return true; | ||
3150 | else | ||
3151 | return false; | ||
3152 | } | ||
3153 | |||
3154 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | 2841 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, |
3155 | struct nvgpu_alloc_obj_ctx_args *args) | 2842 | struct nvgpu_alloc_obj_ctx_args *args) |
3156 | { | 2843 | { |
@@ -3461,18 +3148,6 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3461 | gk20a_comptag_allocator_destroy(&gr->comp_tags); | 3148 | gk20a_comptag_allocator_destroy(&gr->comp_tags); |
3462 | } | 3149 | } |
3463 | 3150 | ||
3464 | static void gr_gk20a_bundle_cb_defaults(struct gk20a *g) | ||
3465 | { | ||
3466 | struct gr_gk20a *gr = &g->gr; | ||
3467 | |||
3468 | gr->bundle_cb_default_size = | ||
3469 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
3470 | gr->min_gpm_fifo_depth = | ||
3471 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
3472 | gr->bundle_cb_token_limit = | ||
3473 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
3474 | } | ||
3475 | |||
3476 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | 3151 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) |
3477 | { | 3152 | { |
3478 | u32 gpc_index, pes_index; | 3153 | u32 gpc_index, pes_index; |
@@ -3954,27 +3629,6 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | |||
3954 | return 0; | 3629 | return 0; |
3955 | } | 3630 | } |
3956 | 3631 | ||
3957 | static void gr_gk20a_detect_sm_arch(struct gk20a *g) | ||
3958 | { | ||
3959 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
3960 | |||
3961 | u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
3962 | u32 version = 0; | ||
3963 | |||
3964 | if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) | ||
3965 | version = 0x320; /* SM 3.2 */ | ||
3966 | else | ||
3967 | nvgpu_err(g, "Unknown SM version 0x%x", | ||
3968 | raw_version); | ||
3969 | |||
3970 | /* on Kepler, SM version == SPA version */ | ||
3971 | g->gpu_characteristics.sm_arch_spa_version = version; | ||
3972 | g->gpu_characteristics.sm_arch_sm_version = version; | ||
3973 | |||
3974 | g->gpu_characteristics.sm_arch_warp_count = | ||
3975 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
3976 | } | ||
3977 | |||
3978 | int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, | 3632 | int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, |
3979 | struct zbc_entry *color_val, u32 index) | 3633 | struct zbc_entry *color_val, u32 index) |
3980 | { | 3634 | { |
@@ -4729,42 +4383,6 @@ void gr_gk20a_enable_hww_exceptions(struct gk20a *g) | |||
4729 | gr_memfmt_hww_esr_reset_active_f()); | 4383 | gr_memfmt_hww_esr_reset_active_f()); |
4730 | } | 4384 | } |
4731 | 4385 | ||
4732 | static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g) | ||
4733 | { | ||
4734 | /* setup sm warp esr report masks */ | ||
4735 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
4736 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
4737 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
4738 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
4739 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
4740 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
4741 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
4742 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
4743 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
4744 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
4745 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
4746 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
4747 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
4748 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
4749 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
4750 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
4751 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
4752 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
4753 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
4754 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
4755 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
4756 | |||
4757 | /* setup sm global esr report mask */ | ||
4758 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
4759 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
4760 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | | ||
4761 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | | ||
4762 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | | ||
4763 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | | ||
4764 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | | ||
4765 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); | ||
4766 | } | ||
4767 | |||
4768 | static int gk20a_init_gr_setup_hw(struct gk20a *g) | 4386 | static int gk20a_init_gr_setup_hw(struct gk20a *g) |
4769 | { | 4387 | { |
4770 | struct gr_gk20a *gr = &g->gr; | 4388 | struct gr_gk20a *gr = &g->gr; |
@@ -5364,107 +4982,6 @@ void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data) | |||
5364 | } | 4982 | } |
5365 | } | 4983 | } |
5366 | 4984 | ||
5367 | static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
5368 | { | ||
5369 | struct gr_gk20a *gr = &g->gr; | ||
5370 | u32 gpc_index, ppc_index, stride, val, offset; | ||
5371 | u32 cb_size = data * 4; | ||
5372 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5373 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
5374 | |||
5375 | gk20a_dbg_fn(""); | ||
5376 | |||
5377 | if (cb_size > gr->attrib_cb_size) | ||
5378 | cb_size = gr->attrib_cb_size; | ||
5379 | |||
5380 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
5381 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
5382 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
5383 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
5384 | |||
5385 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
5386 | stride = gpc_stride * gpc_index; | ||
5387 | |||
5388 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
5389 | ppc_index++) { | ||
5390 | |||
5391 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5392 | stride + | ||
5393 | ppc_in_gpc_stride * ppc_index); | ||
5394 | |||
5395 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); | ||
5396 | |||
5397 | val = set_field(val, | ||
5398 | gr_gpc0_ppc0_cbm_cfg_size_m(), | ||
5399 | gr_gpc0_ppc0_cbm_cfg_size_f(cb_size * | ||
5400 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
5401 | val = set_field(val, | ||
5402 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
5403 | (offset + 1)); | ||
5404 | |||
5405 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5406 | stride + | ||
5407 | ppc_in_gpc_stride * ppc_index, val); | ||
5408 | |||
5409 | val = set_field(val, | ||
5410 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
5411 | offset); | ||
5412 | |||
5413 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5414 | stride + | ||
5415 | ppc_in_gpc_stride * ppc_index, val); | ||
5416 | } | ||
5417 | } | ||
5418 | } | ||
5419 | |||
5420 | static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
5421 | { | ||
5422 | struct gr_gk20a *gr = &g->gr; | ||
5423 | u32 gpc_index, ppc_index, stride, val; | ||
5424 | u32 pd_ab_max_output; | ||
5425 | u32 alpha_cb_size = data * 4; | ||
5426 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5427 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
5428 | |||
5429 | gk20a_dbg_fn(""); | ||
5430 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
5431 | return; */ | ||
5432 | |||
5433 | if (alpha_cb_size > gr->alpha_cb_size) | ||
5434 | alpha_cb_size = gr->alpha_cb_size; | ||
5435 | |||
5436 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
5437 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
5438 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
5439 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
5440 | |||
5441 | pd_ab_max_output = alpha_cb_size * | ||
5442 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() / | ||
5443 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
5444 | |||
5445 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
5446 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
5447 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | ||
5448 | |||
5449 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
5450 | stride = gpc_stride * gpc_index; | ||
5451 | |||
5452 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
5453 | ppc_index++) { | ||
5454 | |||
5455 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
5456 | stride + ppc_in_gpc_stride * ppc_index); | ||
5457 | |||
5458 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), | ||
5459 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * | ||
5460 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
5461 | |||
5462 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
5463 | stride + ppc_in_gpc_stride * ppc_index, val); | ||
5464 | } | ||
5465 | } | ||
5466 | } | ||
5467 | |||
5468 | int gk20a_enable_gr_hw(struct gk20a *g) | 4985 | int gk20a_enable_gr_hw(struct gk20a *g) |
5469 | { | 4986 | { |
5470 | int err; | 4987 | int err; |
@@ -5548,44 +5065,6 @@ int gk20a_gr_reset(struct gk20a *g) | |||
5548 | return err; | 5065 | return err; |
5549 | } | 5066 | } |
5550 | 5067 | ||
5551 | static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr, | ||
5552 | u32 class_num, u32 offset, u32 data) | ||
5553 | { | ||
5554 | gk20a_dbg_fn(""); | ||
5555 | |||
5556 | trace_gr_gk20a_handle_sw_method(g->name); | ||
5557 | |||
5558 | if (class_num == KEPLER_COMPUTE_A) { | ||
5559 | switch (offset << 2) { | ||
5560 | case NVA0C0_SET_SHADER_EXCEPTIONS: | ||
5561 | gk20a_gr_set_shader_exceptions(g, data); | ||
5562 | break; | ||
5563 | default: | ||
5564 | goto fail; | ||
5565 | } | ||
5566 | } | ||
5567 | |||
5568 | if (class_num == KEPLER_C) { | ||
5569 | switch (offset << 2) { | ||
5570 | case NVA297_SET_SHADER_EXCEPTIONS: | ||
5571 | gk20a_gr_set_shader_exceptions(g, data); | ||
5572 | break; | ||
5573 | case NVA297_SET_CIRCULAR_BUFFER_SIZE: | ||
5574 | g->ops.gr.set_circular_buffer_size(g, data); | ||
5575 | break; | ||
5576 | case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
5577 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
5578 | break; | ||
5579 | default: | ||
5580 | goto fail; | ||
5581 | } | ||
5582 | } | ||
5583 | return 0; | ||
5584 | |||
5585 | fail: | ||
5586 | return -EINVAL; | ||
5587 | } | ||
5588 | |||
5589 | static void gk20a_gr_set_error_notifier(struct gk20a *g, | 5068 | static void gk20a_gr_set_error_notifier(struct gk20a *g, |
5590 | struct gr_gk20a_isr_data *isr_data, u32 error_notifier) | 5069 | struct gr_gk20a_isr_data *isr_data, u32 error_notifier) |
5591 | { | 5070 | { |
@@ -6043,143 +5522,6 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr) | |||
6043 | return hww_warp_esr; | 5522 | return hww_warp_esr; |
6044 | } | 5523 | } |
6045 | 5524 | ||
6046 | static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
6047 | { | ||
6048 | int sm_id; | ||
6049 | struct gr_gk20a *gr = &g->gr; | ||
6050 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6051 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6052 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6053 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6054 | |||
6055 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6056 | |||
6057 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
6058 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
6059 | |||
6060 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
6061 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
6062 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
6063 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
6064 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
6065 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); | ||
6066 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
6067 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); | ||
6068 | |||
6069 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6070 | |||
6071 | return 0; | ||
6072 | } | ||
6073 | |||
6074 | static int gk20a_gr_update_sm_error_state(struct gk20a *g, | ||
6075 | struct channel_gk20a *ch, u32 sm_id, | ||
6076 | struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) | ||
6077 | { | ||
6078 | u32 gpc, tpc, offset; | ||
6079 | struct gr_gk20a *gr = &g->gr; | ||
6080 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
6081 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6082 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6083 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6084 | int err = 0; | ||
6085 | |||
6086 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6087 | |||
6088 | gr->sm_error_states[sm_id].hww_global_esr = | ||
6089 | sm_error_state->hww_global_esr; | ||
6090 | gr->sm_error_states[sm_id].hww_warp_esr = | ||
6091 | sm_error_state->hww_warp_esr; | ||
6092 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = | ||
6093 | sm_error_state->hww_global_esr_report_mask; | ||
6094 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = | ||
6095 | sm_error_state->hww_warp_esr_report_mask; | ||
6096 | |||
6097 | err = gr_gk20a_disable_ctxsw(g); | ||
6098 | if (err) { | ||
6099 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
6100 | goto fail; | ||
6101 | } | ||
6102 | |||
6103 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
6104 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
6105 | |||
6106 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6107 | |||
6108 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
6109 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
6110 | gr->sm_error_states[sm_id].hww_global_esr); | ||
6111 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
6112 | gr->sm_error_states[sm_id].hww_warp_esr); | ||
6113 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
6114 | gr->sm_error_states[sm_id].hww_global_esr_report_mask); | ||
6115 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
6116 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask); | ||
6117 | } else { | ||
6118 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
6119 | if (err) | ||
6120 | goto enable_ctxsw; | ||
6121 | |||
6122 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
6123 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
6124 | gr->sm_error_states[sm_id].hww_global_esr_report_mask, | ||
6125 | true); | ||
6126 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
6127 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
6128 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask, | ||
6129 | true); | ||
6130 | |||
6131 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
6132 | } | ||
6133 | |||
6134 | enable_ctxsw: | ||
6135 | err = gr_gk20a_enable_ctxsw(g); | ||
6136 | |||
6137 | fail: | ||
6138 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6139 | return err; | ||
6140 | } | ||
6141 | |||
6142 | static int gk20a_gr_clear_sm_error_state(struct gk20a *g, | ||
6143 | struct channel_gk20a *ch, u32 sm_id) | ||
6144 | { | ||
6145 | u32 gpc, tpc, offset; | ||
6146 | u32 val; | ||
6147 | struct gr_gk20a *gr = &g->gr; | ||
6148 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6149 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6150 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6151 | int err = 0; | ||
6152 | |||
6153 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6154 | |||
6155 | memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states)); | ||
6156 | |||
6157 | err = gr_gk20a_disable_ctxsw(g); | ||
6158 | if (err) { | ||
6159 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
6160 | goto fail; | ||
6161 | } | ||
6162 | |||
6163 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
6164 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
6165 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
6166 | |||
6167 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6168 | |||
6169 | val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
6170 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
6171 | val); | ||
6172 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
6173 | 0); | ||
6174 | } | ||
6175 | |||
6176 | err = gr_gk20a_enable_ctxsw(g); | ||
6177 | |||
6178 | fail: | ||
6179 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6180 | return err; | ||
6181 | } | ||
6182 | |||
6183 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | 5525 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
6184 | bool *post_event, struct channel_gk20a *fault_ch, | 5526 | bool *post_event, struct channel_gk20a *fault_ch, |
6185 | u32 *hww_global_esr) | 5527 | u32 *hww_global_esr) |
@@ -7171,12 +6513,6 @@ static const u32 _num_ovr_perf_regs = 17; | |||
7171 | static u32 _ovr_perf_regs[17] = { 0, }; | 6513 | static u32 _ovr_perf_regs[17] = { 0, }; |
7172 | /* Following are the blocks of registers that the ucode | 6514 | /* Following are the blocks of registers that the ucode |
7173 | stores in the extended region.*/ | 6515 | stores in the extended region.*/ |
7174 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
7175 | static const u32 _num_sm_dsm_perf_regs = 5; | ||
7176 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
7177 | static const u32 _num_sm_dsm_perf_ctrl_regs = 4; | ||
7178 | static u32 _sm_dsm_perf_regs[5]; | ||
7179 | static u32 _sm_dsm_perf_ctrl_regs[4]; | ||
7180 | 6516 | ||
7181 | static void init_ovr_perf_reg_info(void) | 6517 | static void init_ovr_perf_reg_info(void) |
7182 | { | 6518 | { |
@@ -7202,24 +6538,6 @@ static void init_ovr_perf_reg_info(void) | |||
7202 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | 6538 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); |
7203 | } | 6539 | } |
7204 | 6540 | ||
7205 | static void gr_gk20a_init_sm_dsm_reg_info(void) | ||
7206 | { | ||
7207 | if (_sm_dsm_perf_regs[0] != 0) | ||
7208 | return; | ||
7209 | |||
7210 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); | ||
7211 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); | ||
7212 | _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(); | ||
7213 | _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(); | ||
7214 | _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(); | ||
7215 | |||
7216 | _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(); | ||
7217 | _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(); | ||
7218 | _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(); | ||
7219 | _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(); | ||
7220 | |||
7221 | } | ||
7222 | |||
7223 | /* TBD: would like to handle this elsewhere, at a higher level. | 6541 | /* TBD: would like to handle this elsewhere, at a higher level. |
7224 | * these are currently constructed in a "test-then-write" style | 6542 | * these are currently constructed in a "test-then-write" style |
7225 | * which makes it impossible to know externally whether a ctx | 6543 | * which makes it impossible to know externally whether a ctx |
@@ -7289,44 +6607,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
7289 | return 0; | 6607 | return 0; |
7290 | } | 6608 | } |
7291 | 6609 | ||
7292 | static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | ||
7293 | { | ||
7294 | u32 reg; | ||
7295 | u32 quad_ctrl; | ||
7296 | u32 half_ctrl; | ||
7297 | u32 tpc, gpc; | ||
7298 | u32 gpc_tpc_addr; | ||
7299 | u32 gpc_tpc_stride; | ||
7300 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7301 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7302 | |||
7303 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | ||
7304 | |||
7305 | gpc = pri_get_gpc_num(g, offset); | ||
7306 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | ||
7307 | tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); | ||
7308 | |||
7309 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | ||
7310 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | ||
7311 | |||
7312 | gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; | ||
7313 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | ||
7314 | |||
7315 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
7316 | reg = set_field(reg, | ||
7317 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), | ||
7318 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); | ||
7319 | |||
7320 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
7321 | |||
7322 | gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; | ||
7323 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
7324 | reg = set_field(reg, | ||
7325 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), | ||
7326 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); | ||
7327 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
7328 | } | ||
7329 | |||
7330 | #define ILLEGAL_ID ((u32)~0) | 6610 | #define ILLEGAL_ID ((u32)~0) |
7331 | 6611 | ||
7332 | static inline bool check_main_image_header_magic(u8 *context) | 6612 | static inline bool check_main_image_header_magic(u8 *context) |
@@ -7349,26 +6629,6 @@ static inline int ctxsw_prog_ucode_header_size_in_bytes(void) | |||
7349 | return 256; | 6629 | return 256; |
7350 | } | 6630 | } |
7351 | 6631 | ||
7352 | static void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
7353 | u32 *num_sm_dsm_perf_regs, | ||
7354 | u32 **sm_dsm_perf_regs, | ||
7355 | u32 *perf_register_stride) | ||
7356 | { | ||
7357 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
7358 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
7359 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | ||
7360 | } | ||
7361 | |||
7362 | static void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
7363 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
7364 | u32 **sm_dsm_perf_ctrl_regs, | ||
7365 | u32 *ctrl_register_stride) | ||
7366 | { | ||
7367 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
7368 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
7369 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
7370 | } | ||
7371 | |||
7372 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | 6632 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, |
7373 | u32 addr, | 6633 | u32 addr, |
7374 | bool is_quad, u32 quad, | 6634 | bool is_quad, u32 quad, |
@@ -8639,37 +7899,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8639 | return err; | 7899 | return err; |
8640 | } | 7900 | } |
8641 | 7901 | ||
8642 | static void gr_gk20a_cb_size_default(struct gk20a *g) | ||
8643 | { | ||
8644 | struct gr_gk20a *gr = &g->gr; | ||
8645 | |||
8646 | if (!gr->attrib_cb_default_size) | ||
8647 | gr->attrib_cb_default_size = | ||
8648 | gr_gpc0_ppc0_cbm_cfg_size_default_v(); | ||
8649 | gr->alpha_cb_default_size = | ||
8650 | gr_gpc0_ppc0_cbm_cfg2_size_default_v(); | ||
8651 | } | ||
8652 | |||
8653 | static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g) | ||
8654 | { | ||
8655 | struct gr_gk20a *gr = &g->gr; | ||
8656 | int size; | ||
8657 | |||
8658 | gr->attrib_cb_size = gr->attrib_cb_default_size; | ||
8659 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
8660 | + (gr->alpha_cb_default_size >> 1); | ||
8661 | |||
8662 | size = gr->attrib_cb_size * | ||
8663 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * | ||
8664 | gr->max_tpc_count; | ||
8665 | |||
8666 | size += gr->alpha_cb_size * | ||
8667 | gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * | ||
8668 | gr->max_tpc_count; | ||
8669 | |||
8670 | return size; | ||
8671 | } | ||
8672 | |||
8673 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 7902 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
8674 | struct channel_ctx_gk20a *ch_ctx, | 7903 | struct channel_ctx_gk20a *ch_ctx, |
8675 | u64 addr, u32 size, bool patch) | 7904 | u64 addr, u32 size, bool patch) |
@@ -8697,33 +7926,6 @@ void gk20a_init_gr(struct gk20a *g) | |||
8697 | nvgpu_cond_init(&g->gr.init_wq); | 7926 | nvgpu_cond_init(&g->gr.init_wq); |
8698 | } | 7927 | } |
8699 | 7928 | ||
8700 | static bool gr_gk20a_is_tpc_addr(struct gk20a *g, u32 addr) | ||
8701 | { | ||
8702 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
8703 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8704 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
8705 | return ((addr >= tpc_in_gpc_base) && | ||
8706 | (addr < tpc_in_gpc_base + | ||
8707 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
8708 | || pri_is_tpc_addr_shared(g, addr); | ||
8709 | } | ||
8710 | |||
8711 | static u32 gr_gk20a_get_tpc_num(struct gk20a *g, u32 addr) | ||
8712 | { | ||
8713 | u32 i, start; | ||
8714 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
8715 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
8716 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8717 | |||
8718 | for (i = 0; i < num_tpcs; i++) { | ||
8719 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); | ||
8720 | if ((addr >= start) && | ||
8721 | (addr < (start + tpc_in_gpc_stride))) | ||
8722 | return i; | ||
8723 | } | ||
8724 | return 0; | ||
8725 | } | ||
8726 | |||
8727 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 7929 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, |
8728 | u32 global_esr_mask, bool check_errors) | 7930 | u32 global_esr_mask, bool check_errors) |
8729 | { | 7931 | { |
@@ -8949,176 +8151,6 @@ void gk20a_resume_all_sms(struct gk20a *g) | |||
8949 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | 8151 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); |
8950 | } | 8152 | } |
8951 | 8153 | ||
8952 | static u32 gr_gk20a_pagepool_default_size(struct gk20a *g) | ||
8953 | { | ||
8954 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | ||
8955 | } | ||
8956 | |||
8957 | static u32 gr_gk20a_get_max_fbps_count(struct gk20a *g) | ||
8958 | { | ||
8959 | u32 max_fbps_count, tmp; | ||
8960 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
8961 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
8962 | return max_fbps_count; | ||
8963 | } | ||
8964 | |||
8965 | |||
8966 | static u32 gr_gk20a_get_fbp_en_mask(struct gk20a *g) | ||
8967 | { | ||
8968 | u32 fbp_en_mask, opt_fbio; | ||
8969 | opt_fbio = gk20a_readl(g, top_fs_status_fbp_r()); | ||
8970 | fbp_en_mask = top_fs_status_fbp_cluster_v(opt_fbio); | ||
8971 | return fbp_en_mask; | ||
8972 | } | ||
8973 | |||
8974 | static u32 gr_gk20a_get_max_ltc_per_fbp(struct gk20a *g) | ||
8975 | { | ||
8976 | return 1; | ||
8977 | } | ||
8978 | |||
8979 | static u32 gr_gk20a_get_max_lts_per_ltc(struct gk20a *g) | ||
8980 | { | ||
8981 | return 1; | ||
8982 | } | ||
8983 | |||
8984 | static u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g) | ||
8985 | { | ||
8986 | /* gk20a doesnt have rop_l2_en_mask */ | ||
8987 | return NULL; | ||
8988 | } | ||
8989 | |||
8990 | |||
8991 | |||
8992 | static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, | ||
8993 | struct gk20a_debug_output *o) | ||
8994 | { | ||
8995 | u32 gr_engine_id; | ||
8996 | |||
8997 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
8998 | |||
8999 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | ||
9000 | gk20a_readl(g, gr_status_r())); | ||
9001 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | ||
9002 | gk20a_readl(g, gr_status_1_r())); | ||
9003 | gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", | ||
9004 | gk20a_readl(g, gr_status_2_r())); | ||
9005 | gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", | ||
9006 | gk20a_readl(g, gr_engine_status_r())); | ||
9007 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", | ||
9008 | gk20a_readl(g, gr_gpfifo_status_r())); | ||
9009 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", | ||
9010 | gk20a_readl(g, gr_gpfifo_ctl_r())); | ||
9011 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
9012 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
9013 | gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", | ||
9014 | gk20a_readl(g, gr_exception_r())); | ||
9015 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | ||
9016 | gk20a_readl(g, gr_fecs_intr_r())); | ||
9017 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | ||
9018 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); | ||
9019 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | ||
9020 | gk20a_readl(g, gr_activity_0_r())); | ||
9021 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | ||
9022 | gk20a_readl(g, gr_activity_1_r())); | ||
9023 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", | ||
9024 | gk20a_readl(g, gr_activity_2_r())); | ||
9025 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", | ||
9026 | gk20a_readl(g, gr_activity_4_r())); | ||
9027 | gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", | ||
9028 | gk20a_readl(g, gr_pri_sked_activity_r())); | ||
9029 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
9030 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); | ||
9031 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
9032 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); | ||
9033 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
9034 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); | ||
9035 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
9036 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); | ||
9037 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9038 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); | ||
9039 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9040 | gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); | ||
9041 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
9042 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); | ||
9043 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
9044 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); | ||
9045 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
9046 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); | ||
9047 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
9048 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); | ||
9049 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9050 | gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); | ||
9051 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9052 | gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); | ||
9053 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", | ||
9054 | gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); | ||
9055 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", | ||
9056 | gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); | ||
9057 | gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", | ||
9058 | gk20a_readl(g, gr_pri_ds_mpipe_status_r())); | ||
9059 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n", | ||
9060 | gk20a_readl(g, gr_pri_fe_go_idle_on_status_r())); | ||
9061 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", | ||
9062 | gk20a_readl(g, gr_fe_go_idle_timeout_r())); | ||
9063 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n", | ||
9064 | gk20a_readl(g, gr_pri_fe_go_idle_check_r())); | ||
9065 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", | ||
9066 | gk20a_readl(g, gr_pri_fe_go_idle_info_r())); | ||
9067 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", | ||
9068 | gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); | ||
9069 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", | ||
9070 | gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); | ||
9071 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", | ||
9072 | gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); | ||
9073 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", | ||
9074 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); | ||
9075 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", | ||
9076 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); | ||
9077 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", | ||
9078 | gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); | ||
9079 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", | ||
9080 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); | ||
9081 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", | ||
9082 | gk20a_readl(g, gr_fecs_current_ctx_r())); | ||
9083 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", | ||
9084 | gk20a_readl(g, gr_fecs_new_ctx_r())); | ||
9085 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", | ||
9086 | gk20a_readl(g, gr_pri_be0_crop_status1_r())); | ||
9087 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", | ||
9088 | gk20a_readl(g, gr_pri_bes_crop_status1_r())); | ||
9089 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", | ||
9090 | gk20a_readl(g, gr_pri_be0_zrop_status_r())); | ||
9091 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", | ||
9092 | gk20a_readl(g, gr_pri_be0_zrop_status2_r())); | ||
9093 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", | ||
9094 | gk20a_readl(g, gr_pri_bes_zrop_status_r())); | ||
9095 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", | ||
9096 | gk20a_readl(g, gr_pri_bes_zrop_status2_r())); | ||
9097 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", | ||
9098 | gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); | ||
9099 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", | ||
9100 | gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); | ||
9101 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", | ||
9102 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); | ||
9103 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", | ||
9104 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); | ||
9105 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", | ||
9106 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | ||
9107 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | ||
9108 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | ||
9109 | return 0; | ||
9110 | } | ||
9111 | |||
9112 | static void gr_gk20a_init_cyclestats(struct gk20a *g) | ||
9113 | { | ||
9114 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
9115 | g->gpu_characteristics.flags |= | ||
9116 | NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; | ||
9117 | #else | ||
9118 | (void)g; | ||
9119 | #endif | ||
9120 | } | ||
9121 | |||
9122 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | 8154 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, |
9123 | struct channel_gk20a *ch, u64 sms, bool enable) | 8155 | struct channel_gk20a *ch, u64 sms, bool enable) |
9124 | { | 8156 | { |
@@ -9175,70 +8207,6 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
9175 | return err; | 8207 | return err; |
9176 | } | 8208 | } |
9177 | 8209 | ||
9178 | static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | ||
9179 | { | ||
9180 | /* Check if we have at least one valid warp */ | ||
9181 | struct gr_gk20a *gr = &g->gr; | ||
9182 | u32 gpc, tpc, sm_id; | ||
9183 | u32 tpc_offset, gpc_offset, reg_offset; | ||
9184 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
9185 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9186 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9187 | |||
9188 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9189 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9190 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9191 | |||
9192 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
9193 | gpc_offset = gpc_stride * gpc; | ||
9194 | reg_offset = tpc_offset + gpc_offset; | ||
9195 | |||
9196 | /* 64 bit read */ | ||
9197 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
9198 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
9199 | |||
9200 | |||
9201 | /* 64 bit read */ | ||
9202 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
9203 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
9204 | |||
9205 | /* 64 bit read */ | ||
9206 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
9207 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
9208 | |||
9209 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
9210 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
9211 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
9212 | } | ||
9213 | |||
9214 | /* Only for debug purpose */ | ||
9215 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9216 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
9217 | sm_id, w_state[sm_id].valid_warps[0]); | ||
9218 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
9219 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
9220 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
9221 | sm_id, w_state[sm_id].paused_warps[0]); | ||
9222 | } | ||
9223 | } | ||
9224 | |||
9225 | static void gr_gk20a_get_access_map(struct gk20a *g, | ||
9226 | u32 **whitelist, int *num_entries) | ||
9227 | { | ||
9228 | static u32 wl_addr_gk20a[] = { | ||
9229 | /* this list must be sorted (low to high) */ | ||
9230 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
9231 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
9232 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
9233 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
9234 | 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ | ||
9235 | 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
9236 | }; | ||
9237 | |||
9238 | *whitelist = wl_addr_gk20a; | ||
9239 | *num_entries = ARRAY_SIZE(wl_addr_gk20a); | ||
9240 | } | ||
9241 | |||
9242 | /* | 8210 | /* |
9243 | * gr_gk20a_suspend_context() | 8211 | * gr_gk20a_suspend_context() |
9244 | * This API should be called with dbg_session lock held | 8212 | * This API should be called with dbg_session lock held |
@@ -9356,44 +8324,6 @@ clean_up: | |||
9356 | return err; | 8324 | return err; |
9357 | } | 8325 | } |
9358 | 8326 | ||
9359 | static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g, | ||
9360 | struct nvgpu_preemption_modes_rec *preemption_modes_rec) | ||
9361 | { | ||
9362 | preemption_modes_rec->graphics_preemption_mode_flags = | ||
9363 | NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
9364 | preemption_modes_rec->compute_preemption_mode_flags = | ||
9365 | NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
9366 | |||
9367 | preemption_modes_rec->default_graphics_preempt_mode = | ||
9368 | NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
9369 | preemption_modes_rec->default_compute_preempt_mode = | ||
9370 | NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
9371 | |||
9372 | return 0; | ||
9373 | } | ||
9374 | |||
9375 | static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
9376 | { | ||
9377 | return false; | ||
9378 | } | ||
9379 | |||
9380 | static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
9381 | { | ||
9382 | return false; | ||
9383 | } | ||
9384 | |||
9385 | static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
9386 | u32 *priv_addr_table, | ||
9387 | u32 *priv_addr_table_index) | ||
9388 | { | ||
9389 | } | ||
9390 | |||
9391 | static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
9392 | u32 *priv_addr_table, | ||
9393 | u32 *priv_addr_table_index) | ||
9394 | { | ||
9395 | } | ||
9396 | |||
9397 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) | 8327 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) |
9398 | { | 8328 | { |
9399 | int err = 0; | 8329 | int err = 0; |
@@ -9565,100 +8495,3 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) | |||
9565 | 8495 | ||
9566 | return tpc_exception_en; | 8496 | return tpc_exception_en; |
9567 | } | 8497 | } |
9568 | |||
9569 | void gk20a_init_gr_ops(struct gpu_ops *gops) | ||
9570 | { | ||
9571 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | ||
9572 | gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults; | ||
9573 | gops->gr.cb_size_default = gr_gk20a_cb_size_default; | ||
9574 | gops->gr.calc_global_ctx_buffer_size = | ||
9575 | gr_gk20a_calc_global_ctx_buffer_size; | ||
9576 | gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb; | ||
9577 | gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb; | ||
9578 | gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager; | ||
9579 | gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool; | ||
9580 | gops->gr.handle_sw_method = gr_gk20a_handle_sw_method; | ||
9581 | gops->gr.set_alpha_circular_buffer_size = | ||
9582 | gk20a_gr_set_circular_buffer_size; | ||
9583 | gops->gr.set_circular_buffer_size = | ||
9584 | gk20a_gr_set_alpha_circular_buffer_size; | ||
9585 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | ||
9586 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | ||
9587 | gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class; | ||
9588 | gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class; | ||
9589 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | ||
9590 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | ||
9591 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; | ||
9592 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | ||
9593 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | ||
9594 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | ||
9595 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | ||
9596 | gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; | ||
9597 | gops->gr.free_channel_ctx = gk20a_free_channel_ctx; | ||
9598 | gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; | ||
9599 | gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; | ||
9600 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; | ||
9601 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; | ||
9602 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; | ||
9603 | gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch; | ||
9604 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; | ||
9605 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; | ||
9606 | gops->gr.zbc_set_table = gk20a_gr_zbc_set_table; | ||
9607 | gops->gr.zbc_query_table = gr_gk20a_query_zbc; | ||
9608 | gops->gr.pmu_save_zbc = gr_gk20a_pmu_save_zbc; | ||
9609 | gops->gr.add_zbc = _gk20a_gr_zbc_set_table; | ||
9610 | gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; | ||
9611 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; | ||
9612 | gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; | ||
9613 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; | ||
9614 | gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs; | ||
9615 | gops->gr.get_max_fbps_count = gr_gk20a_get_max_fbps_count; | ||
9616 | gops->gr.get_fbp_en_mask = gr_gk20a_get_fbp_en_mask; | ||
9617 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; | ||
9618 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; | ||
9619 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; | ||
9620 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | ||
9621 | gops->gr.wait_empty = gr_gk20a_wait_idle; | ||
9622 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; | ||
9623 | gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode; | ||
9624 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; | ||
9625 | gops->gr.get_access_map = gr_gk20a_get_access_map; | ||
9626 | gops->gr.handle_fecs_error = gk20a_gr_handle_fecs_error; | ||
9627 | gops->gr.mask_hww_warp_esr = gk20a_mask_hww_warp_esr; | ||
9628 | gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; | ||
9629 | gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception; | ||
9630 | gops->gr.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions; | ||
9631 | gops->gr.enable_exceptions = gk20a_gr_enable_exceptions; | ||
9632 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; | ||
9633 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; | ||
9634 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; | ||
9635 | gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state; | ||
9636 | gops->gr.update_sm_error_state = gk20a_gr_update_sm_error_state; | ||
9637 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; | ||
9638 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | ||
9639 | gops->gr.resume_contexts = gr_gk20a_resume_contexts; | ||
9640 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; | ||
9641 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; | ||
9642 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; | ||
9643 | gops->gr.init_sm_id_table = gr_gk20a_init_sm_id_table; | ||
9644 | gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub; | ||
9645 | gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub; | ||
9646 | gops->gr.split_lts_broadcast_addr = | ||
9647 | gr_gk20a_split_lts_broadcast_addr_stub; | ||
9648 | gops->gr.split_ltc_broadcast_addr = | ||
9649 | gr_gk20a_split_ltc_broadcast_addr_stub; | ||
9650 | gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; | ||
9651 | gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping; | ||
9652 | gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice; | ||
9653 | gops->gr.commit_inst = gr_gk20a_commit_inst; | ||
9654 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; | ||
9655 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; | ||
9656 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; | ||
9657 | gops->gr.inval_icache = gr_gk20a_inval_icache; | ||
9658 | gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; | ||
9659 | gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; | ||
9660 | gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; | ||
9661 | gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; | ||
9662 | gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; | ||
9663 | gops->gr.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel; | ||
9664 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 109ae0a3..dd11a082 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -484,7 +484,6 @@ struct gpu_ops; | |||
484 | int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | 484 | int gr_gk20a_load_golden_ctx_image(struct gk20a *g, |
485 | struct channel_gk20a *c); | 485 | struct channel_gk20a *c); |
486 | void gk20a_init_gr(struct gk20a *g); | 486 | void gk20a_init_gr(struct gk20a *g); |
487 | void gk20a_init_gr_ops(struct gpu_ops *gops); | ||
488 | int gk20a_init_gr_support(struct gk20a *g); | 487 | int gk20a_init_gr_support(struct gk20a *g); |
489 | int gk20a_enable_gr_hw(struct gk20a *g); | 488 | int gk20a_enable_gr_hw(struct gk20a *g); |
490 | int gk20a_gr_reset(struct gk20a *g); | 489 | int gk20a_gr_reset(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/hal.c b/drivers/gpu/nvgpu/gk20a/hal.c index 8fadc199..812d351e 100644 --- a/drivers/gpu/nvgpu/gk20a/hal.c +++ b/drivers/gpu/nvgpu/gk20a/hal.c | |||
@@ -14,7 +14,6 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include "gk20a.h" | 16 | #include "gk20a.h" |
17 | #include "hal_gk20a.h" | ||
18 | #include "hal.h" | 17 | #include "hal.h" |
19 | #include "gm20b/hal_gm20b.h" | 18 | #include "gm20b/hal_gm20b.h" |
20 | #include "gp10b/hal_gp10b.h" | 19 | #include "gp10b/hal_gp10b.h" |
@@ -30,10 +29,6 @@ int gpu_init_hal(struct gk20a *g) | |||
30 | { | 29 | { |
31 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; | 30 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; |
32 | switch (ver) { | 31 | switch (ver) { |
33 | case GK20A_GPUID_GK20A: | ||
34 | gk20a_dbg_info("gk20a detected"); | ||
35 | gk20a_init_hal(g); | ||
36 | break; | ||
37 | case GK20A_GPUID_GM20B: | 32 | case GK20A_GPUID_GM20B: |
38 | case GK20A_GPUID_GM20B_B: | 33 | case GK20A_GPUID_GM20B_B: |
39 | gk20a_dbg_info("gm20b detected"); | 34 | gk20a_dbg_info("gm20b detected"); |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c deleted file mode 100644 index 5408c822..00000000 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ /dev/null | |||
@@ -1,208 +0,0 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hal_gk20a.c | ||
3 | * | ||
4 | * GK20A Tegra HAL interface. | ||
5 | * | ||
6 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include "hal_gk20a.h" | ||
19 | #include "bus_gk20a.h" | ||
20 | #include "ltc_gk20a.h" | ||
21 | #include "fb_gk20a.h" | ||
22 | #include "gk20a.h" | ||
23 | #include "gk20a_gating_reglist.h" | ||
24 | #include "channel_gk20a.h" | ||
25 | #include "gr_ctx_gk20a.h" | ||
26 | #include "fecs_trace_gk20a.h" | ||
27 | #include "mm_gk20a.h" | ||
28 | #include "mc_gk20a.h" | ||
29 | #include "flcn_gk20a.h" | ||
30 | #include "pmu_gk20a.h" | ||
31 | #include "clk_gk20a.h" | ||
32 | #include "regops_gk20a.h" | ||
33 | #include "therm_gk20a.h" | ||
34 | #include "tsg_gk20a.h" | ||
35 | #include "dbg_gpu_gk20a.h" | ||
36 | #include "css_gr_gk20a.h" | ||
37 | #include "pramin_gk20a.h" | ||
38 | #include "priv_ring_gk20a.h" | ||
39 | |||
40 | #include <nvgpu/debug.h> | ||
41 | #include <nvgpu/log.h> | ||
42 | #include <nvgpu/bug.h> | ||
43 | |||
44 | #include <nvgpu/hw/gk20a/hw_proj_gk20a.h> | ||
45 | |||
46 | static const struct gpu_ops gk20a_ops = { | ||
47 | .ltc = { | ||
48 | .determine_L2_size_bytes = gk20a_determine_L2_size_bytes, | ||
49 | .init_comptags = gk20a_ltc_init_comptags, | ||
50 | .cbc_ctrl = gk20a_ltc_cbc_ctrl, | ||
51 | .set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry, | ||
52 | .set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry, | ||
53 | .init_cbc = gk20a_ltc_init_cbc, | ||
54 | #ifdef CONFIG_DEBUG_FS | ||
55 | .sync_debugfs = gk20a_ltc_sync_debugfs, | ||
56 | #endif | ||
57 | .init_fs_state = gk20a_ltc_init_fs_state, | ||
58 | .isr = gk20a_ltc_isr, | ||
59 | }, | ||
60 | .clock_gating = { | ||
61 | .slcg_gr_load_gating_prod = | ||
62 | gr_gk20a_slcg_gr_load_gating_prod, | ||
63 | .slcg_perf_load_gating_prod = | ||
64 | gr_gk20a_slcg_perf_load_gating_prod, | ||
65 | .slcg_ltc_load_gating_prod = | ||
66 | ltc_gk20a_slcg_ltc_load_gating_prod, | ||
67 | .blcg_gr_load_gating_prod = | ||
68 | gr_gk20a_blcg_gr_load_gating_prod, | ||
69 | .pg_gr_load_gating_prod = | ||
70 | gr_gk20a_pg_gr_load_gating_prod, | ||
71 | .slcg_therm_load_gating_prod = | ||
72 | gr_gk20a_slcg_therm_load_gating_prod, | ||
73 | }, | ||
74 | }; | ||
75 | |||
76 | static int gk20a_get_litter_value(struct gk20a *g, int value) | ||
77 | { | ||
78 | int ret = EINVAL; | ||
79 | switch (value) { | ||
80 | case GPU_LIT_NUM_GPCS: | ||
81 | ret = proj_scal_litter_num_gpcs_v(); | ||
82 | break; | ||
83 | case GPU_LIT_NUM_PES_PER_GPC: | ||
84 | ret = proj_scal_litter_num_pes_per_gpc_v(); | ||
85 | break; | ||
86 | case GPU_LIT_NUM_ZCULL_BANKS: | ||
87 | ret = proj_scal_litter_num_zcull_banks_v(); | ||
88 | break; | ||
89 | case GPU_LIT_NUM_TPC_PER_GPC: | ||
90 | ret = proj_scal_litter_num_tpc_per_gpc_v(); | ||
91 | break; | ||
92 | case GPU_LIT_NUM_SM_PER_TPC: | ||
93 | ret = 1; | ||
94 | break; | ||
95 | case GPU_LIT_NUM_FBPS: | ||
96 | ret = proj_scal_litter_num_fbps_v(); | ||
97 | break; | ||
98 | case GPU_LIT_GPC_BASE: | ||
99 | ret = proj_gpc_base_v(); | ||
100 | break; | ||
101 | case GPU_LIT_GPC_STRIDE: | ||
102 | ret = proj_gpc_stride_v(); | ||
103 | break; | ||
104 | case GPU_LIT_GPC_SHARED_BASE: | ||
105 | ret = proj_gpc_shared_base_v(); | ||
106 | break; | ||
107 | case GPU_LIT_TPC_IN_GPC_BASE: | ||
108 | ret = proj_tpc_in_gpc_base_v(); | ||
109 | break; | ||
110 | case GPU_LIT_TPC_IN_GPC_STRIDE: | ||
111 | ret = proj_tpc_in_gpc_stride_v(); | ||
112 | break; | ||
113 | case GPU_LIT_TPC_IN_GPC_SHARED_BASE: | ||
114 | ret = proj_tpc_in_gpc_shared_base_v(); | ||
115 | break; | ||
116 | case GPU_LIT_PPC_IN_GPC_BASE: | ||
117 | ret = proj_ppc_in_gpc_base_v(); | ||
118 | break; | ||
119 | case GPU_LIT_PPC_IN_GPC_STRIDE: | ||
120 | ret = proj_ppc_in_gpc_stride_v(); | ||
121 | break; | ||
122 | case GPU_LIT_PPC_IN_GPC_SHARED_BASE: | ||
123 | ret = proj_ppc_in_gpc_shared_base_v(); | ||
124 | break; | ||
125 | case GPU_LIT_ROP_BASE: | ||
126 | ret = proj_rop_base_v(); | ||
127 | break; | ||
128 | case GPU_LIT_ROP_STRIDE: | ||
129 | ret = proj_rop_stride_v(); | ||
130 | break; | ||
131 | case GPU_LIT_ROP_SHARED_BASE: | ||
132 | ret = proj_rop_shared_base_v(); | ||
133 | break; | ||
134 | case GPU_LIT_HOST_NUM_ENGINES: | ||
135 | ret = proj_host_num_engines_v(); | ||
136 | break; | ||
137 | case GPU_LIT_HOST_NUM_PBDMA: | ||
138 | ret = proj_host_num_pbdma_v(); | ||
139 | break; | ||
140 | case GPU_LIT_LTC_STRIDE: | ||
141 | ret = proj_ltc_stride_v(); | ||
142 | break; | ||
143 | case GPU_LIT_LTS_STRIDE: | ||
144 | ret = proj_lts_stride_v(); | ||
145 | break; | ||
146 | /* GK20A does not have a FBPA unit, despite what's listed in the | ||
147 | * hw headers or read back through NV_PTOP_SCAL_NUM_FBPAS, | ||
148 | * so hardcode all values to 0. | ||
149 | */ | ||
150 | case GPU_LIT_NUM_FBPAS: | ||
151 | case GPU_LIT_FBPA_STRIDE: | ||
152 | case GPU_LIT_FBPA_BASE: | ||
153 | case GPU_LIT_FBPA_SHARED_BASE: | ||
154 | ret = 0; | ||
155 | break; | ||
156 | default: | ||
157 | nvgpu_err(g, "Missing definition %d", value); | ||
158 | BUG(); | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | int gk20a_init_hal(struct gk20a *g) | ||
166 | { | ||
167 | struct gpu_ops *gops = &g->ops; | ||
168 | struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics; | ||
169 | |||
170 | gops->ltc = gk20a_ops.ltc; | ||
171 | gops->clock_gating = gk20a_ops.clock_gating; | ||
172 | gops->privsecurity = 0; | ||
173 | gops->securegpccs = 0; | ||
174 | gops->pmupstate = false; | ||
175 | gk20a_init_bus(gops); | ||
176 | gk20a_init_mc(gops); | ||
177 | gk20a_init_priv_ring(gops); | ||
178 | gk20a_init_gr_ops(gops); | ||
179 | gk20a_init_fecs_trace_ops(gops); | ||
180 | gk20a_init_fb(gops); | ||
181 | gk20a_init_fifo(gops); | ||
182 | gk20a_init_ce2(gops); | ||
183 | gk20a_init_gr_ctx(gops); | ||
184 | gk20a_init_mm(gops); | ||
185 | gk20a_falcon_init_hal(gops); | ||
186 | gk20a_init_pmu_ops(gops); | ||
187 | gk20a_init_regops(gops); | ||
188 | gk20a_init_debug_ops(gops); | ||
189 | gk20a_init_dbg_session_ops(gops); | ||
190 | gk20a_init_therm_ops(gops); | ||
191 | gk20a_init_tsg_ops(gops); | ||
192 | gk20a_init_pramin_ops(gops); | ||
193 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
194 | gk20a_init_css_ops(gops); | ||
195 | #endif | ||
196 | g->name = "gk20a"; | ||
197 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | ||
198 | gops->get_litter_value = gk20a_get_litter_value; | ||
199 | |||
200 | c->twod_class = FERMI_TWOD_A; | ||
201 | c->threed_class = KEPLER_C; | ||
202 | c->compute_class = KEPLER_COMPUTE_A; | ||
203 | c->gpfifo_class = KEPLER_CHANNEL_GPFIFO_C; | ||
204 | c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_A; | ||
205 | c->dma_copy_class = KEPLER_DMA_COPY_A; | ||
206 | |||
207 | return 0; | ||
208 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h b/drivers/gpu/nvgpu/gk20a/hal_gk20a.h deleted file mode 100644 index f5475809..00000000 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.h +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/hal_gk20a.h | ||
3 | * | ||
4 | * GK20A Hardware Abstraction Layer functions definitions. | ||
5 | * | ||
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef __HAL_GK20A__ | ||
19 | #define __HAL_GK20A__ | ||
20 | |||
21 | struct gk20a; | ||
22 | |||
23 | int gk20a_init_hal(struct gk20a *g); | ||
24 | |||
25 | #endif /* __HAL_GK20A__ */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 9220685a..a543a0d3 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -16,19 +16,10 @@ | |||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <trace/events/gk20a.h> | ||
20 | #include <nvgpu/timers.h> | ||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/bug.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/dma.h> | 19 | #include <nvgpu/dma.h> |
25 | 20 | ||
26 | #include "gk20a.h" | 21 | #include "gk20a.h" |
27 | #include "gr_gk20a.h" | 22 | #include "gr_gk20a.h" |
28 | #include "ltc_gk20a.h" | ||
29 | |||
30 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> | ||
31 | |||
32 | 23 | ||
33 | /* Non HW reg dependent stuff: */ | 24 | /* Non HW reg dependent stuff: */ |
34 | 25 | ||
@@ -49,317 +40,3 @@ int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size) | |||
49 | compbit_backing_size, | 40 | compbit_backing_size, |
50 | &gr->compbit_store.mem); | 41 | &gr->compbit_store.mem); |
51 | } | 42 | } |
52 | |||
53 | /* HW reg dependent stuff: */ | ||
54 | int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
55 | { | ||
56 | /* max memory size (MB) to cover */ | ||
57 | u32 max_size = gr->max_comptag_mem; | ||
58 | /* one tag line covers 128KB */ | ||
59 | u32 max_comptag_lines = max_size << 3; | ||
60 | |||
61 | u32 hw_max_comptag_lines = | ||
62 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
63 | |||
64 | u32 cbc_param = | ||
65 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
66 | u32 comptags_per_cacheline = | ||
67 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
68 | u32 slices_per_fbp = | ||
69 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param); | ||
70 | u32 cacheline_size = | ||
71 | 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
72 | |||
73 | u32 compbit_backing_size; | ||
74 | |||
75 | int err; | ||
76 | |||
77 | gk20a_dbg_fn(""); | ||
78 | |||
79 | if (max_comptag_lines == 0) | ||
80 | return 0; | ||
81 | |||
82 | if (max_comptag_lines > hw_max_comptag_lines) | ||
83 | max_comptag_lines = hw_max_comptag_lines; | ||
84 | |||
85 | /* no hybird fb */ | ||
86 | compbit_backing_size = | ||
87 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
88 | cacheline_size * slices_per_fbp * gr->num_fbps; | ||
89 | |||
90 | /* aligned to 2KB * num_fbps */ | ||
91 | compbit_backing_size += | ||
92 | gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
93 | |||
94 | /* must be a multiple of 64KB */ | ||
95 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
96 | |||
97 | max_comptag_lines = | ||
98 | (compbit_backing_size * comptags_per_cacheline) / | ||
99 | cacheline_size * slices_per_fbp * gr->num_fbps; | ||
100 | |||
101 | if (max_comptag_lines > hw_max_comptag_lines) | ||
102 | max_comptag_lines = hw_max_comptag_lines; | ||
103 | |||
104 | gk20a_dbg_info("compbit backing store size : %d", | ||
105 | compbit_backing_size); | ||
106 | gk20a_dbg_info("max comptag lines : %d", | ||
107 | max_comptag_lines); | ||
108 | |||
109 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
110 | err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size); | ||
111 | else | ||
112 | err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size); | ||
113 | |||
114 | if (err) | ||
115 | return err; | ||
116 | |||
117 | err = gk20a_comptag_allocator_init(&gr->comp_tags, max_comptag_lines); | ||
118 | if (err) | ||
119 | return err; | ||
120 | |||
121 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
122 | gr->slices_per_ltc = slices_per_fbp / g->ltc_count; | ||
123 | gr->cacheline_size = cacheline_size; | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
129 | u32 min, u32 max) | ||
130 | { | ||
131 | int err = 0; | ||
132 | struct gr_gk20a *gr = &g->gr; | ||
133 | u32 fbp, slice, ctrl1, val, hw_op = 0; | ||
134 | u32 slices_per_fbp = | ||
135 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( | ||
136 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
137 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
138 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
139 | |||
140 | gk20a_dbg_fn(""); | ||
141 | |||
142 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
143 | |||
144 | if (gr->compbit_store.mem.size == 0) | ||
145 | return 0; | ||
146 | |||
147 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
148 | |||
149 | if (op == gk20a_cbc_op_clear) { | ||
150 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
151 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); | ||
152 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
153 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); | ||
154 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
155 | } else if (op == gk20a_cbc_op_clean) { | ||
156 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
157 | } else if (op == gk20a_cbc_op_invalidate) { | ||
158 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
159 | } else { | ||
160 | BUG_ON(1); | ||
161 | } | ||
162 | |||
163 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
164 | gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
165 | |||
166 | for (fbp = 0; fbp < gr->num_fbps; fbp++) { | ||
167 | struct nvgpu_timeout timeout; | ||
168 | |||
169 | nvgpu_timeout_init(g, &timeout, 200, NVGPU_TIMER_RETRY_TIMER); | ||
170 | for (slice = 0; slice < slices_per_fbp; slice++) { | ||
171 | |||
172 | |||
173 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
174 | fbp * ltc_stride + | ||
175 | slice * lts_stride; | ||
176 | |||
177 | do { | ||
178 | val = gk20a_readl(g, ctrl1); | ||
179 | if (!(val & hw_op)) | ||
180 | break; | ||
181 | nvgpu_udelay(5); | ||
182 | |||
183 | } while (!nvgpu_timeout_expired(&timeout)); | ||
184 | |||
185 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
186 | nvgpu_err(g, "comp tag clear timeout"); | ||
187 | err = -EBUSY; | ||
188 | goto out; | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | out: | ||
193 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
194 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
195 | return err; | ||
196 | } | ||
197 | |||
198 | |||
199 | void gk20a_ltc_init_fs_state(struct gk20a *g) | ||
200 | { | ||
201 | gk20a_dbg_info("initialize gk20a L2"); | ||
202 | |||
203 | g->max_ltc_count = g->ltc_count = 1; | ||
204 | } | ||
205 | |||
206 | void gk20a_ltc_isr(struct gk20a *g) | ||
207 | { | ||
208 | u32 intr; | ||
209 | |||
210 | intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r()); | ||
211 | nvgpu_err(g, "ltc: %08x", intr); | ||
212 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); | ||
213 | } | ||
214 | |||
215 | int gk20a_determine_L2_size_bytes(struct gk20a *g) | ||
216 | { | ||
217 | u32 lts_per_ltc; | ||
218 | u32 ways; | ||
219 | u32 sets; | ||
220 | u32 bytes_per_line; | ||
221 | u32 active_ltcs; | ||
222 | u32 cache_size; | ||
223 | |||
224 | u32 tmp; | ||
225 | u32 active_sets_value; | ||
226 | |||
227 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
228 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
229 | |||
230 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
231 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
232 | sets = 64; | ||
233 | } else if (active_sets_value == | ||
234 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
235 | sets = 32; | ||
236 | } else if (active_sets_value == | ||
237 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
238 | sets = 16; | ||
239 | } else { | ||
240 | nvgpu_err(g, | ||
241 | "Unknown constant %u for active sets", | ||
242 | (unsigned)active_sets_value); | ||
243 | sets = 0; | ||
244 | } | ||
245 | |||
246 | active_ltcs = g->gr.num_fbps; | ||
247 | |||
248 | /* chip-specific values */ | ||
249 | lts_per_ltc = 1; | ||
250 | bytes_per_line = 128; | ||
251 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
252 | |||
253 | return cache_size; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Sets the ZBC color for the passed index. | ||
258 | */ | ||
259 | void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, | ||
260 | struct zbc_entry *color_val, | ||
261 | u32 index) | ||
262 | { | ||
263 | u32 i; | ||
264 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
265 | |||
266 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
267 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
268 | |||
269 | for (i = 0; | ||
270 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
271 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
272 | color_val->color_l2[i]); | ||
273 | } | ||
274 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * Sets the ZBC depth for the passed index. | ||
279 | */ | ||
280 | void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
281 | struct zbc_entry *depth_val, | ||
282 | u32 index) | ||
283 | { | ||
284 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
285 | |||
286 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
287 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
288 | |||
289 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
290 | depth_val->depth); | ||
291 | |||
292 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
293 | } | ||
294 | |||
295 | void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
296 | { | ||
297 | u32 max_size = gr->max_comptag_mem; | ||
298 | u32 max_comptag_lines = max_size << 3; | ||
299 | |||
300 | u32 compbit_base_post_divide; | ||
301 | u64 compbit_base_post_multiply64; | ||
302 | u64 compbit_store_iova; | ||
303 | u64 compbit_base_post_divide64; | ||
304 | |||
305 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
306 | compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); | ||
307 | else | ||
308 | compbit_store_iova = g->ops.mm.get_iova_addr(g, | ||
309 | gr->compbit_store.mem.priv.sgt->sgl, 0); | ||
310 | |||
311 | compbit_base_post_divide64 = compbit_store_iova >> | ||
312 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
313 | |||
314 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
315 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
316 | |||
317 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
318 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
319 | |||
320 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
321 | compbit_base_post_divide++; | ||
322 | |||
323 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
324 | if (g->ops.ltc.cbc_fix_config) | ||
325 | compbit_base_post_divide = | ||
326 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
327 | |||
328 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
329 | compbit_base_post_divide); | ||
330 | |||
331 | gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
332 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
333 | (u32)(compbit_store_iova >> 32), | ||
334 | (u32)(compbit_store_iova & 0xffffffff), | ||
335 | compbit_base_post_divide); | ||
336 | |||
337 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
338 | |||
339 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
340 | 0, max_comptag_lines - 1); | ||
341 | |||
342 | } | ||
343 | |||
344 | #ifdef CONFIG_DEBUG_FS | ||
345 | void gk20a_ltc_sync_debugfs(struct gk20a *g) | ||
346 | { | ||
347 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
348 | |||
349 | nvgpu_spinlock_acquire(&g->debugfs_lock); | ||
350 | if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { | ||
351 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
352 | |||
353 | if (g->mm.ltc_enabled_debug) | ||
354 | /* bypass disabled (normal caching ops)*/ | ||
355 | reg &= ~reg_f; | ||
356 | else | ||
357 | /* bypass enabled (no caching) */ | ||
358 | reg |= reg_f; | ||
359 | |||
360 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
361 | g->mm.ltc_enabled = g->mm.ltc_enabled_debug; | ||
362 | } | ||
363 | nvgpu_spinlock_release(&g->debugfs_lock); | ||
364 | } | ||
365 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h index ee7d7f91..f871dc4b 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h | |||
@@ -15,27 +15,8 @@ | |||
15 | 15 | ||
16 | #ifndef LTC_GK20A_H | 16 | #ifndef LTC_GK20A_H |
17 | #define LTC_GK20A_H | 17 | #define LTC_GK20A_H |
18 | struct gpu_ops; | 18 | struct gk20a; |
19 | struct gr_gk20a; | ||
20 | struct zbc_entry; | ||
21 | enum gk20a_cbc_op; | ||
22 | 19 | ||
23 | int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); | ||
24 | int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
25 | u32 min, u32 max); | ||
26 | void gk20a_ltc_init_fs_state(struct gk20a *g); | ||
27 | void gk20a_ltc_isr(struct gk20a *g); | ||
28 | int gk20a_determine_L2_size_bytes(struct gk20a *g); | ||
29 | void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, | ||
30 | struct zbc_entry *color_val, | ||
31 | u32 index); | ||
32 | void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
33 | struct zbc_entry *depth_val, | ||
34 | u32 index); | ||
35 | void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); | ||
36 | #ifdef CONFIG_DEBUG_FS | ||
37 | void gk20a_ltc_sync_debugfs(struct gk20a *g); | ||
38 | #endif | ||
39 | int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, size_t compbit_backing_size); | 20 | int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, size_t compbit_backing_size); |
40 | int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size); | 21 | int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size); |
41 | #endif | 22 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 111872a2..accda972 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c | |||
@@ -235,21 +235,3 @@ bool mc_gk20a_is_intr1_pending(struct gk20a *g, | |||
235 | 235 | ||
236 | return is_pending; | 236 | return is_pending; |
237 | } | 237 | } |
238 | |||
239 | void gk20a_init_mc(struct gpu_ops *gops) | ||
240 | { | ||
241 | gops->mc.intr_enable = mc_gk20a_intr_enable; | ||
242 | gops->mc.intr_unit_config = mc_gk20a_intr_unit_config; | ||
243 | gops->mc.isr_stall = mc_gk20a_isr_stall; | ||
244 | gops->mc.intr_stall = mc_gk20a_intr_stall; | ||
245 | gops->mc.intr_stall_pause = mc_gk20a_intr_stall_pause; | ||
246 | gops->mc.intr_stall_resume = mc_gk20a_intr_stall_resume; | ||
247 | gops->mc.intr_nonstall = mc_gk20a_intr_nonstall; | ||
248 | gops->mc.intr_nonstall_pause = mc_gk20a_intr_nonstall_pause; | ||
249 | gops->mc.intr_nonstall_resume = mc_gk20a_intr_nonstall_resume; | ||
250 | gops->mc.enable = gk20a_mc_enable; | ||
251 | gops->mc.disable = gk20a_mc_disable; | ||
252 | gops->mc.reset = gk20a_mc_reset; | ||
253 | gops->mc.boot_0 = gk20a_mc_boot_0; | ||
254 | gops->mc.is_intr1_pending = mc_gk20a_is_intr1_pending; | ||
255 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.h b/drivers/gpu/nvgpu/gk20a/mc_gk20a.h index 7ecd27bf..dd8678a0 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.h | |||
@@ -15,7 +15,6 @@ | |||
15 | #define MC_GK20A_H | 15 | #define MC_GK20A_H |
16 | struct gk20a; | 16 | struct gk20a; |
17 | 17 | ||
18 | void gk20a_init_mc(struct gpu_ops *gops); | ||
19 | void mc_gk20a_intr_enable(struct gk20a *g); | 18 | void mc_gk20a_intr_enable(struct gk20a *g); |
20 | void mc_gk20a_intr_unit_config(struct gk20a *g, bool enable, | 19 | void mc_gk20a_intr_unit_config(struct gk20a *g, bool enable, |
21 | bool is_stalling, u32 mask); | 20 | bool is_stalling, u32 mask); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5ffa3e2f..fa6b5109 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -2545,27 +2545,3 @@ clean_up: | |||
2545 | dma_buf_put(dmabuf); | 2545 | dma_buf_put(dmabuf); |
2546 | return err; | 2546 | return err; |
2547 | } | 2547 | } |
2548 | |||
2549 | static bool gk20a_mm_is_bar1_supported(struct gk20a *g) | ||
2550 | { | ||
2551 | return true; | ||
2552 | } | ||
2553 | |||
2554 | void gk20a_init_mm(struct gpu_ops *gops) | ||
2555 | { | ||
2556 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; | ||
2557 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; | ||
2558 | gops->mm.vm_bind_channel = gk20a_vm_bind_channel; | ||
2559 | gops->mm.fb_flush = gk20a_mm_fb_flush; | ||
2560 | gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; | ||
2561 | gops->mm.l2_flush = gk20a_mm_l2_flush; | ||
2562 | gops->mm.cbc_clean = gk20a_mm_cbc_clean; | ||
2563 | gops->mm.get_iova_addr = gk20a_mm_iova_addr; | ||
2564 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | ||
2565 | gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; | ||
2566 | gops->mm.init_pdb = gk20a_mm_init_pdb; | ||
2567 | gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; | ||
2568 | gops->mm.init_inst_block = gk20a_init_inst_block; | ||
2569 | gops->mm.is_bar1_supported = gk20a_mm_is_bar1_supported; | ||
2570 | gops->mm.mmu_fault_pending = gk20a_fifo_mmu_fault_pending; | ||
2571 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7c476526..cf37640d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -439,8 +439,6 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
439 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); | 439 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); |
440 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); | 440 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); |
441 | 441 | ||
442 | struct gpu_ops; | ||
443 | void gk20a_init_mm(struct gpu_ops *gops); | ||
444 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | 442 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, |
445 | u32 big_page_size); | 443 | u32 big_page_size); |
446 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, | 444 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 72322e54..3eda1da4 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -243,9 +243,7 @@ static inline struct gk20a_platform *gk20a_get_platform( | |||
243 | return (struct gk20a_platform *)dev_get_drvdata(dev); | 243 | return (struct gk20a_platform *)dev_get_drvdata(dev); |
244 | } | 244 | } |
245 | 245 | ||
246 | extern struct gk20a_platform gk20a_generic_platform; | ||
247 | #ifdef CONFIG_TEGRA_GK20A | 246 | #ifdef CONFIG_TEGRA_GK20A |
248 | extern struct gk20a_platform gk20a_tegra_platform; | ||
249 | extern struct gk20a_platform gm20b_tegra_platform; | 247 | extern struct gk20a_platform gm20b_tegra_platform; |
250 | extern struct gk20a_platform gp10b_tegra_platform; | 248 | extern struct gk20a_platform gp10b_tegra_platform; |
251 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 249 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c deleted file mode 100644 index 33e2fb37..00000000 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | /* | ||
2 | * drivers/gpu/nvgpu/gk20a/platform_gk20a_generic.c | ||
3 | * | ||
4 | * GK20A Generic Platform Interface | ||
5 | * | ||
6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/clk.h> | ||
22 | |||
23 | #include "platform_gk20a.h" | ||
24 | #include "hal_gk20a.h" | ||
25 | #include "gk20a.h" | ||
26 | |||
27 | /* | ||
28 | * gk20a_generic_get_clocks() | ||
29 | * | ||
30 | * This function finds clocks in tegra platform and populates | ||
31 | * the clock information to gk20a platform data. | ||
32 | */ | ||
33 | |||
34 | static int gk20a_generic_get_clocks(struct device *pdev) | ||
35 | { | ||
36 | struct gk20a_platform *platform = dev_get_drvdata(pdev); | ||
37 | |||
38 | platform->clk[0] = clk_get_sys("tegra_gk20a.0", "PLLG_ref"); | ||
39 | platform->clk[1] = clk_get_sys("tegra_gk20a.0", "pwr"); | ||
40 | platform->clk[2] = clk_get_sys("tegra_gk20a.0", "emc"); | ||
41 | platform->num_clks = 3; | ||
42 | |||
43 | if (IS_ERR(platform->clk[0]) || | ||
44 | IS_ERR(platform->clk[1]) || | ||
45 | IS_ERR(platform->clk[2])) | ||
46 | goto err_get_clock; | ||
47 | |||
48 | clk_set_rate(platform->clk[0], UINT_MAX); | ||
49 | clk_set_rate(platform->clk[1], 204000000); | ||
50 | clk_set_rate(platform->clk[2], UINT_MAX); | ||
51 | |||
52 | return 0; | ||
53 | |||
54 | err_get_clock: | ||
55 | if (!IS_ERR_OR_NULL(platform->clk[0])) | ||
56 | clk_put(platform->clk[0]); | ||
57 | if (!IS_ERR_OR_NULL(platform->clk[1])) | ||
58 | clk_put(platform->clk[1]); | ||
59 | if (!IS_ERR_OR_NULL(platform->clk[2])) | ||
60 | clk_put(platform->clk[2]); | ||
61 | |||
62 | platform->clk[0] = NULL; | ||
63 | platform->clk[1] = NULL; | ||
64 | platform->clk[2] = NULL; | ||
65 | return -ENODEV; | ||
66 | } | ||
67 | |||
68 | static int gk20a_generic_probe(struct device *dev) | ||
69 | { | ||
70 | gk20a_generic_get_clocks(dev); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int gk20a_generic_late_probe(struct device *dev) | ||
76 | { | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static int gk20a_generic_remove(struct device *dev) | ||
81 | { | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | struct gk20a_platform gk20a_generic_platform = { | ||
86 | .probe = gk20a_generic_probe, | ||
87 | .late_probe = gk20a_generic_late_probe, | ||
88 | .remove = gk20a_generic_remove, | ||
89 | .default_big_page_size = SZ_128K, | ||
90 | }; | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c index f2c877f9..48b5a90a 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c | |||
@@ -14,7 +14,6 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include "gk20a.h" | 16 | #include "gk20a.h" |
17 | #include "hal_gk20a.h" | ||
18 | #include "platform_gk20a.h" | 17 | #include "platform_gk20a.h" |
19 | #include "vgpu/clk_vgpu.h" | 18 | #include "vgpu/clk_vgpu.h" |
20 | 19 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c index 9919fc3d..aee8677c 100644 --- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tegra GK20A GPU Debugger Driver Register Ops | 2 | * Tegra GK20A GPU Debugger Driver Register Ops |
3 | * | 3 | * |
4 | * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -729,138 +729,3 @@ bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset) | |||
729 | regop_bsearch_range_cmp); | 729 | regop_bsearch_range_cmp); |
730 | return valid; | 730 | return valid; |
731 | } | 731 | } |
732 | |||
733 | static const struct regop_offset_range *gk20a_get_global_whitelist_ranges(void) | ||
734 | { | ||
735 | return gk20a_global_whitelist_ranges; | ||
736 | } | ||
737 | |||
738 | static int gk20a_get_global_whitelist_ranges_count(void) | ||
739 | { | ||
740 | return gk20a_global_whitelist_ranges_count; | ||
741 | } | ||
742 | |||
743 | static const struct regop_offset_range *gk20a_get_context_whitelist_ranges(void) | ||
744 | { | ||
745 | return gk20a_context_whitelist_ranges; | ||
746 | } | ||
747 | |||
748 | static int gk20a_get_context_whitelist_ranges_count(void) | ||
749 | { | ||
750 | return gk20a_context_whitelist_ranges_count; | ||
751 | } | ||
752 | |||
753 | static const u32 *gk20a_get_runcontrol_whitelist(void) | ||
754 | { | ||
755 | return gk20a_runcontrol_whitelist; | ||
756 | } | ||
757 | |||
758 | static int gk20a_get_runcontrol_whitelist_count(void) | ||
759 | { | ||
760 | return gk20a_runcontrol_whitelist_count; | ||
761 | } | ||
762 | |||
763 | static const | ||
764 | struct regop_offset_range *gk20a_get_runcontrol_whitelist_ranges(void) | ||
765 | { | ||
766 | return gk20a_runcontrol_whitelist_ranges; | ||
767 | } | ||
768 | |||
769 | static int gk20a_get_runcontrol_whitelist_ranges_count(void) | ||
770 | { | ||
771 | return gk20a_runcontrol_whitelist_ranges_count; | ||
772 | } | ||
773 | |||
774 | static const u32 *gk20a_get_qctl_whitelist(void) | ||
775 | { | ||
776 | return gk20a_qctl_whitelist; | ||
777 | } | ||
778 | |||
779 | static int gk20a_get_qctl_whitelist_count(void) | ||
780 | { | ||
781 | return gk20a_qctl_whitelist_count; | ||
782 | } | ||
783 | |||
784 | static const struct regop_offset_range *gk20a_get_qctl_whitelist_ranges(void) | ||
785 | { | ||
786 | return gk20a_qctl_whitelist_ranges; | ||
787 | } | ||
788 | |||
789 | static int gk20a_get_qctl_whitelist_ranges_count(void) | ||
790 | { | ||
791 | return gk20a_qctl_whitelist_ranges_count; | ||
792 | } | ||
793 | |||
794 | static int gk20a_apply_smpc_war(struct dbg_session_gk20a *dbg_s) | ||
795 | { | ||
796 | /* The following regops are a hack/war to make up for the fact that we | ||
797 | * just scribbled into the ctxsw image w/o really knowing whether | ||
798 | * it was already swapped out in/out once or not, etc. | ||
799 | */ | ||
800 | struct nvgpu_dbg_gpu_reg_op ops[4]; | ||
801 | unsigned int i; | ||
802 | |||
803 | for (i = 0; i < ARRAY_SIZE(ops); i++) { | ||
804 | ops[i].op = REGOP(WRITE_32); | ||
805 | ops[i].type = REGOP(TYPE_GR_CTX); | ||
806 | ops[i].status = REGOP(STATUS_SUCCESS); | ||
807 | ops[i].value_hi = 0; | ||
808 | ops[i].and_n_mask_lo = 0; | ||
809 | ops[i].and_n_mask_hi = 0; | ||
810 | } | ||
811 | |||
812 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/ | ||
813 | ops[0].offset = 0x00419e08; | ||
814 | ops[0].value_lo = 0x1d; | ||
815 | |||
816 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */ | ||
817 | ops[1].offset = 0x00419e58; | ||
818 | ops[1].value_lo = 0x1; | ||
819 | |||
820 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */ | ||
821 | ops[2].offset = 0x00419e68; | ||
822 | ops[2].value_lo = 0xaaaa; | ||
823 | |||
824 | /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */ | ||
825 | ops[3].offset = 0x00419f40; | ||
826 | ops[3].value_lo = 0x18; | ||
827 | |||
828 | return dbg_s->g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, | ||
829 | ARRAY_SIZE(ops)); | ||
830 | } | ||
831 | |||
832 | void gk20a_init_regops(struct gpu_ops *gops) | ||
833 | { | ||
834 | gops->regops.get_global_whitelist_ranges = | ||
835 | gk20a_get_global_whitelist_ranges; | ||
836 | gops->regops.get_global_whitelist_ranges_count = | ||
837 | gk20a_get_global_whitelist_ranges_count; | ||
838 | |||
839 | gops->regops.get_context_whitelist_ranges = | ||
840 | gk20a_get_context_whitelist_ranges; | ||
841 | gops->regops.get_context_whitelist_ranges_count = | ||
842 | gk20a_get_context_whitelist_ranges_count; | ||
843 | |||
844 | gops->regops.get_runcontrol_whitelist = | ||
845 | gk20a_get_runcontrol_whitelist; | ||
846 | gops->regops.get_runcontrol_whitelist_count = | ||
847 | gk20a_get_runcontrol_whitelist_count; | ||
848 | |||
849 | gops->regops.get_runcontrol_whitelist_ranges = | ||
850 | gk20a_get_runcontrol_whitelist_ranges; | ||
851 | gops->regops.get_runcontrol_whitelist_ranges_count = | ||
852 | gk20a_get_runcontrol_whitelist_ranges_count; | ||
853 | |||
854 | gops->regops.get_qctl_whitelist = | ||
855 | gk20a_get_qctl_whitelist; | ||
856 | gops->regops.get_qctl_whitelist_count = | ||
857 | gk20a_get_qctl_whitelist_count; | ||
858 | |||
859 | gops->regops.get_qctl_whitelist_ranges = | ||
860 | gk20a_get_qctl_whitelist_ranges; | ||
861 | gops->regops.get_qctl_whitelist_ranges_count = | ||
862 | gk20a_get_qctl_whitelist_ranges_count; | ||
863 | |||
864 | gops->regops.apply_smpc_war = | ||
865 | gk20a_apply_smpc_war; | ||
866 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h index f53545e2..e14bc25e 100644 --- a/drivers/gpu/nvgpu/gk20a/regops_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/regops_gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tegra GK20A GPU Debugger Driver Register Ops | 2 | * Tegra GK20A GPU Debugger Driver Register Ops |
3 | * | 3 | * |
4 | * Copyright (c) 2013-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -49,5 +49,4 @@ static inline bool reg_op_is_read(u8 op) | |||
49 | 49 | ||
50 | bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset); | 50 | bool is_bar0_global_offset_whitelisted_gk20a(struct gk20a *g, u32 offset); |
51 | 51 | ||
52 | void gk20a_init_regops(struct gpu_ops *gops); | ||
53 | #endif /* REGOPS_GK20A_H */ | 52 | #endif /* REGOPS_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c index 00159fae..e2fedf21 100644 --- a/drivers/gpu/nvgpu/gk20a/therm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.c | |||
@@ -33,58 +33,6 @@ static int gk20a_init_therm_setup_sw(struct gk20a *g) | |||
33 | return 0; | 33 | return 0; |
34 | } | 34 | } |
35 | 35 | ||
36 | static int gk20a_init_therm_setup_hw(struct gk20a *g) | ||
37 | { | ||
38 | u32 v; | ||
39 | |||
40 | /* program NV_THERM registers */ | ||
41 | gk20a_writel(g, therm_use_a_r(), therm_use_a_ext_therm_0_enable_f() | | ||
42 | therm_use_a_ext_therm_1_enable_f() | | ||
43 | therm_use_a_ext_therm_2_enable_f()); | ||
44 | /* priority for EXT_THERM_0 event set to highest */ | ||
45 | gk20a_writel(g, therm_evt_ext_therm_0_r(), | ||
46 | therm_evt_ext_therm_0_slow_factor_f(0x2) | | ||
47 | therm_evt_ext_therm_0_priority_f(3)); | ||
48 | gk20a_writel(g, therm_evt_ext_therm_1_r(), | ||
49 | therm_evt_ext_therm_1_slow_factor_f(0x6) | | ||
50 | therm_evt_ext_therm_1_priority_f(2)); | ||
51 | gk20a_writel(g, therm_evt_ext_therm_2_r(), | ||
52 | therm_evt_ext_therm_2_slow_factor_f(0xe) | | ||
53 | therm_evt_ext_therm_2_priority_f(1)); | ||
54 | |||
55 | |||
56 | gk20a_writel(g, therm_grad_stepping_table_r(0), | ||
57 | therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) | | ||
58 | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) | | ||
59 | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) | | ||
60 | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
61 | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); | ||
62 | gk20a_writel(g, therm_grad_stepping_table_r(1), | ||
63 | therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
64 | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
65 | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
66 | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
67 | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); | ||
68 | |||
69 | v = gk20a_readl(g, therm_clk_timing_r(0)); | ||
70 | v |= therm_clk_timing_grad_slowdown_enabled_f(); | ||
71 | gk20a_writel(g, therm_clk_timing_r(0), v); | ||
72 | |||
73 | v = gk20a_readl(g, therm_config2_r()); | ||
74 | v |= therm_config2_grad_enable_f(1); | ||
75 | v |= therm_config2_slowdown_factor_extended_f(1); | ||
76 | gk20a_writel(g, therm_config2_r(), v); | ||
77 | |||
78 | gk20a_writel(g, therm_grad_stepping1_r(), | ||
79 | therm_grad_stepping1_pdiv_duration_f(32)); | ||
80 | |||
81 | v = gk20a_readl(g, therm_grad_stepping0_r()); | ||
82 | v |= therm_grad_stepping0_feature_enable_f(); | ||
83 | gk20a_writel(g, therm_grad_stepping0_r(), v); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | int gk20a_init_therm_support(struct gk20a *g) | 36 | int gk20a_init_therm_support(struct gk20a *g) |
89 | { | 37 | { |
90 | u32 err; | 38 | u32 err; |
@@ -153,9 +101,3 @@ int gk20a_elcg_init_idle_filters(struct gk20a *g) | |||
153 | gk20a_dbg_fn("done"); | 101 | gk20a_dbg_fn("done"); |
154 | return 0; | 102 | return 0; |
155 | } | 103 | } |
156 | |||
157 | void gk20a_init_therm_ops(struct gpu_ops *gops) | ||
158 | { | ||
159 | gops->therm.init_therm_setup_hw = gk20a_init_therm_setup_hw; | ||
160 | gops->therm.elcg_init_idle_filters = gk20a_elcg_init_idle_filters; | ||
161 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h index ddd7ac54..99d06c18 100644 --- a/drivers/gpu/nvgpu/gk20a/therm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/therm_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2011 - 2015, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -19,7 +19,6 @@ | |||
19 | struct gpu_ops; | 19 | struct gpu_ops; |
20 | struct gk20a; | 20 | struct gk20a; |
21 | 21 | ||
22 | void gk20a_init_therm_ops(struct gpu_ops *gops); | ||
23 | int gk20a_elcg_init_idle_filters(struct gk20a *g); | 22 | int gk20a_elcg_init_idle_filters(struct gk20a *g); |
24 | 23 | ||
25 | int gk20a_init_therm_support(struct gk20a *g); | 24 | int gk20a_init_therm_support(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h index b5661d4e..3b4b16e3 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h | |||
@@ -35,4 +35,8 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
35 | void gm20b_ltc_isr(struct gk20a *g); | 35 | void gm20b_ltc_isr(struct gk20a *g); |
36 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); | 36 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); |
37 | void gm20b_flush_ltc(struct gk20a *g); | 37 | void gm20b_flush_ltc(struct gk20a *g); |
38 | int gm20b_ltc_alloc_phys_cbc(struct gk20a *g, | ||
39 | size_t compbit_backing_size); | ||
40 | int gm20b_ltc_alloc_virt_cbc(struct gk20a *g, | ||
41 | size_t compbit_backing_size); | ||
38 | #endif | 42 | #endif |
diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c deleted file mode 100644 index 23da728e..00000000 --- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include "gk20a/gk20a.h" | ||
15 | #include "vgpu_gr_gk20a.h" | ||
16 | |||
17 | static int vgpu_gk20a_init_fs_state(struct gk20a *g) | ||
18 | { | ||
19 | struct gr_gk20a *gr = &g->gr; | ||
20 | u32 tpc_index, gpc_index; | ||
21 | u32 sm_id = 0; | ||
22 | |||
23 | gk20a_dbg_fn(""); | ||
24 | |||
25 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; | ||
26 | tpc_index++) { | ||
27 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
28 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | ||
29 | g->gr.sm_to_cluster[sm_id].tpc_index = | ||
30 | tpc_index; | ||
31 | g->gr.sm_to_cluster[sm_id].gpc_index = | ||
32 | gpc_index; | ||
33 | |||
34 | sm_id++; | ||
35 | } | ||
36 | } | ||
37 | } | ||
38 | |||
39 | gr->no_of_sm = sm_id; | ||
40 | |||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops) | ||
45 | { | ||
46 | gops->gr.init_fs_state = vgpu_gk20a_init_fs_state; | ||
47 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.h b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.h deleted file mode 100644 index bb80aff8..00000000 --- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.h +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __VGPU_GR_GK20A_H__ | ||
15 | #define __VGPU_GR_GK20A_H__ | ||
16 | |||
17 | #include "gk20a/gk20a.h" | ||
18 | |||
19 | void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops); | ||
20 | |||
21 | #endif | ||
diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_hal_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_hal_gk20a.c deleted file mode 100644 index aeeb2ad9..00000000 --- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_hal_gk20a.c +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include "gk20a/hal_gk20a.h" | ||
15 | #include "vgpu/vgpu.h" | ||
16 | #include "vgpu_gr_gk20a.h" | ||
17 | |||
18 | int vgpu_gk20a_init_hal(struct gk20a *g) | ||
19 | { | ||
20 | int err; | ||
21 | |||
22 | err = gk20a_init_hal(g); | ||
23 | if (err) | ||
24 | return err; | ||
25 | vgpu_init_hal_common(g); | ||
26 | vgpu_gk20a_init_gr_ops(&g->ops); | ||
27 | |||
28 | return 0; | ||
29 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index c4647e29..d41c0abb 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include "vgpu/vgpu.h" | 29 | #include "vgpu/vgpu.h" |
30 | #include "vgpu/fecs_trace_vgpu.h" | 30 | #include "vgpu/fecs_trace_vgpu.h" |
31 | #include "vgpu/clk_vgpu.h" | 31 | #include "vgpu/clk_vgpu.h" |
32 | #include "gk20a/hal_gk20a.h" | ||
33 | #include "gk20a/ctxsw_trace_gk20a.h" | 32 | #include "gk20a/ctxsw_trace_gk20a.h" |
34 | #include "gk20a/tsg_gk20a.h" | 33 | #include "gk20a/tsg_gk20a.h" |
35 | #include "gk20a/gk20a_scale.h" | 34 | #include "gk20a/gk20a_scale.h" |
@@ -451,10 +450,6 @@ static int vgpu_init_hal(struct gk20a *g) | |||
451 | int err; | 450 | int err; |
452 | 451 | ||
453 | switch (ver) { | 452 | switch (ver) { |
454 | case GK20A_GPUID_GK20A: | ||
455 | gk20a_dbg_info("gk20a detected"); | ||
456 | err = vgpu_gk20a_init_hal(g); | ||
457 | break; | ||
458 | case GK20A_GPUID_GM20B: | 453 | case GK20A_GPUID_GM20B: |
459 | case GK20A_GPUID_GM20B_B: | 454 | case GK20A_GPUID_GM20B_B: |
460 | gk20a_dbg_info("gm20b detected"); | 455 | gk20a_dbg_info("gm20b detected"); |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h index ec04c6f7..31ecb737 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/vgpu.h | |||
@@ -102,7 +102,6 @@ int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in, | |||
102 | size_t size_out); | 102 | size_t size_out); |
103 | 103 | ||
104 | void vgpu_init_hal_common(struct gk20a *g); | 104 | void vgpu_init_hal_common(struct gk20a *g); |
105 | int vgpu_gk20a_init_hal(struct gk20a *g); | ||
106 | int vgpu_gm20b_init_hal(struct gk20a *g); | 105 | int vgpu_gm20b_init_hal(struct gk20a *g); |
107 | int vgpu_gp10b_init_hal(struct gk20a *g); | 106 | int vgpu_gp10b_init_hal(struct gk20a *g); |
108 | 107 | ||