4 files changed, 331 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 8f1d42da..d6d60147 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -28,6 +28,7 @@ nvgpu-y += \
        $(nvgpu-t19x)/gv100/fb_gv100.o \
        $(nvgpu-t19x)/gv100/bios_gv100.o \
        $(nvgpu-t19x)/gv100/fifo_gv100.o \
+        $(nvgpu-t19x)/gv100/gr_gv100.o \
        $(nvgpu-t19x)/gv100/hal_gv100.o
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
new file mode 100644
index 00000000..4b2038ba
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -0,0 +1,289 @@
+/*
+ * GV100 GPU GR
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/log.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "gv100/gr_gv100.h"
+#include "gv11b/subctx_gv11b.h"
+#include <nvgpu/hw/gv100/hw_gr_gv100.h>
+#include <nvgpu/hw/gv100/hw_proj_gv100.h>
+/*
+ *  Estimate performance if the given logical TPC in the given logical GPC were
+ * removed.
+ */
+static int gr_gv100_scg_estimate_perf(struct gk20a *g,
+                                        unsigned long *gpc_tpc_mask,
+                                        u32 disable_gpc_id, u32 disable_tpc_id,
+                                        int *perf)
+{
+        struct gr_gk20a *gr = &g->gr;
+        int err = 0;
+        u32 scale_factor = 512UL; /* Use fx23.9 */
+        u32 pix_scale = 1024*1024UL;    /* Pix perf in [29:20] */
+        u32 world_scale = 1024UL;       /* World performance in [19:10] */
+        u32 tpc_scale = 1;              /* TPC balancing in [9:0] */
+        u32 scg_num_pes = 0;
+        u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
+        u32 average_tpcs = 0;           /* Average of # of TPCs per GPC */
+        u32 deviation;                  /* absolute diff between TPC# and
+                                         * average_tpcs, averaged across GPCs
+                                         */
+        u32 norm_tpc_deviation;         /* deviation/max_tpc_per_gpc */
+        u32 tpc_balance;
+        u32 scg_gpc_pix_perf;
+        u32 scg_world_perf;
+        u32 gpc_id;
+        u32 pes_id;
+        int diff;
+        bool is_tpc_removed_gpc = false;
+        bool is_tpc_removed_pes = false;
+        u32 max_tpc_gpc = 0;
+        u32 num_tpc_mask;
+        u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
+                                nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
+        if (!num_tpc_gpc)
+                return -ENOMEM;
+        /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
+        for (gpc_id = 0; gpc_id < gr->gpc_count; gpc_id++) {
+                num_tpc_mask = gpc_tpc_mask[gpc_id];
+                if ((gpc_id == disable_gpc_id) && num_tpc_mask &
+                                                (0x1 << disable_tpc_id)) {
+                        /* Safety check if a TPC is removed twice */
+                        if (is_tpc_removed_gpc) {
+                                err = -EINVAL;
+                                goto free_resources;
+                        }
+                        /* Remove logical TPC from set */
+                        num_tpc_mask &= ~(0x1 << disable_tpc_id);
+                        is_tpc_removed_gpc = true;
+                }
+                /* track balancing of tpcs across gpcs */
+                num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
+                average_tpcs += num_tpc_gpc[gpc_id];
+                /* save the maximum numer of gpcs */
+                max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
+                                num_tpc_gpc[gpc_id] : max_tpc_gpc;
+                /*
+                 * Calculate ratio between TPC count and post-FS and post-SCG
+                 *
+                 * ratio represents relative throughput of the GPC
+                 */
+                scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
+                                        gr->gpc_tpc_count[gpc_id];
+                if (min_scg_gpc_pix_perf > scg_gpc_pix_perf)
+                        min_scg_gpc_pix_perf = scg_gpc_pix_perf;
+                /* Calculate # of surviving PES */
+                for (pes_id = 0; pes_id < gr->gpc_ppc_count[gpc_id]; pes_id++) {
+                        /* Count the number of TPC on the set */
+                        num_tpc_mask = gr->pes_tpc_mask[pes_id][gpc_id] &
+                                        gpc_tpc_mask[gpc_id];
+                        if ((gpc_id == disable_gpc_id) && (num_tpc_mask &
+                                (0x1 << disable_tpc_id))) {
+                                if (is_tpc_removed_pes) {
+                                        err = -EINVAL;
+                                        goto free_resources;
+                                }
+                                num_tpc_mask &= ~(0x1 << disable_tpc_id);
+                                is_tpc_removed_pes = true;
+                        }
+                        if (hweight32(num_tpc_mask))
+                                scg_num_pes++;
+                }
+        }
+        if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
+                err = -EINVAL;
+                goto free_resources;
+        }
+        if (max_tpc_gpc == 0) {
+                *perf = 0;
+                goto free_resources;
+        }
+        /* Now calculate perf */
+        scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_count;
+        deviation = 0;
+        average_tpcs = scale_factor * average_tpcs / gr->gpc_count;
+        for (gpc_id =0; gpc_id < gr->gpc_count; gpc_id++) {
+                diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
+                if (diff < 0)
+                        diff = -diff;
+                deviation += diff;
+        }
+        deviation /= gr->gpc_count;
+        norm_tpc_deviation = deviation / max_tpc_gpc;
+        tpc_balance = scale_factor - norm_tpc_deviation;
+        if ((tpc_balance > scale_factor)          ||
+            (scg_world_perf > scale_factor)       ||
+            (min_scg_gpc_pix_perf > scale_factor) ||
+            (norm_tpc_deviation > scale_factor)) {
+                err = -EINVAL;
+                goto free_resources;
+        }
+        *perf = (pix_scale * min_scg_gpc_pix_perf) +
+                (world_scale * scg_world_perf) +
+                (tpc_scale * tpc_balance);
+free_resources:
+        nvgpu_kfree(g, num_tpc_gpc);
+        return err;
+}
+void gr_gv100_bundle_cb_defaults(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        gr->bundle_cb_default_size =
+                gr_scc_bundle_cb_size_div_256b__prod_v();
+        gr->min_gpm_fifo_depth =
+                gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
+        gr->bundle_cb_token_limit =
+                gr_pd_ab_dist_cfg2_token_limit_init_v();
+}
+void gr_gv100_cb_size_default(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        if (!gr->attrib_cb_default_size)
+                gr->attrib_cb_default_size =
+                        gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
+        gr->alpha_cb_default_size =
+                gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+}
+void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+{
+}
+void gr_gv100_init_sm_id_table(struct gk20a *g)
+{
+        u32 gpc, tpc, sm, pes, gtpc;
+        u32 sm_id = 0;
+        u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
+        u32 num_sm = sm_per_tpc * g->gr.tpc_count;
+        int perf, maxperf;
+        int err;
+        unsigned long *gpc_tpc_mask;
+        u32 *tpc_table, *gpc_table;
+        gpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
+        tpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
+        gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
+                        nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
+        if (!gpc_table || !tpc_table || !gpc_tpc_mask) {
+                nvgpu_err(g, "Error allocating memory for sm tables");
+                goto exit_build_table;
+        }
+        for (gpc = 0; gpc < g->gr.gpc_count; gpc++)
+                for (pes = 0; pes < g->gr.gpc_ppc_count[gpc]; pes++)
+                        gpc_tpc_mask[gpc] |= g->gr.pes_tpc_mask[pes][gpc];
+        for (gtpc = 0; gtpc < g->gr.tpc_count; gtpc++) {
+                maxperf = -1;
+                for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
+                        for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
+                                                g->gr.gpc_tpc_count[gpc]) {
+                                perf = -1;
+                                err = gr_gv100_scg_estimate_perf(g,
+                                                gpc_tpc_mask, gpc, tpc, &perf);
+                                if (err) {
+                                        nvgpu_err(g,
+                                                "Error while estimating perf");
+                                        goto exit_build_table;
+                                }
+                                if (perf >= maxperf) {
+                                        maxperf = perf;
+                                        gpc_table[gtpc] = gpc;
+                                        tpc_table[gtpc] = tpc;
+                                }
+                        }
+                }
+                gpc_tpc_mask[gpc_table[gtpc]] &= ~(0x1 << tpc_table[gtpc]);
+        }
+        for (tpc = 0, sm_id = 0;  sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
+                for (sm = 0; sm < sm_per_tpc; sm++) {
+                        g->gr.sm_to_cluster[sm_id + sm].gpc_index =
+                                                                gpc_table[tpc];
+                        g->gr.sm_to_cluster[sm_id + sm].tpc_index =
+                                                                tpc_table[tpc];
+                        g->gr.sm_to_cluster[sm_id + sm].sm_index = sm;
+                        g->gr.sm_to_cluster[sm_id + sm].global_tpc_index = tpc;
+                }
+        }
+        g->gr.no_of_sm = num_sm;
+        nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
+exit_build_table:
+        nvgpu_kfree(g, gpc_table);
+        nvgpu_kfree(g, tpc_table);
+        nvgpu_kfree(g, gpc_tpc_mask);
+}
+void gr_gv100_load_tpc_mask(struct gk20a *g)
+{
+        u64 pes_tpc_mask = 0x0ULL;
+        u32 gpc, pes;
+        u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
+                                GPU_LIT_NUM_TPC_PER_GPC);
+        /* gv100 has 6 GPC and 7 TPC/GPC */
+        for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
+                for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
+                        pes_tpc_mask |= (u64) g->gr.pes_tpc_mask[pes][gpc] <<
+                                (num_tpc_per_gpc * gpc);
+                }
+        }
+        nvgpu_log_info(g, "pes_tpc_mask: %016llx\n", pes_tpc_mask);
+        gk20a_writel(g, gr_fe_tpc_fs_r(0), u64_lo32(pes_tpc_mask));
+        gk20a_writel(g, gr_fe_tpc_fs_r(1), u64_hi32(pes_tpc_mask));
+}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
new file mode 100644
index 00000000..460b05ae
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -0,0 +1,36 @@
+/*
+ * GV100 GPU GR
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVGPU_GR_GV100_H_
+#define _NVGPU_GR_GV100_H_
+void gr_gv100_bundle_cb_defaults(struct gk20a *g);
+void gr_gv100_cb_size_default(struct gk20a *g);
+void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
+void gr_gv100_init_sm_id_table(struct gk20a *g);
+void gr_gv100_program_sm_id_numbering(struct gk20a *g,
+                                        u32 gpc, u32 tpc, u32 smid);
+int gr_gv100_load_smid_config(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index f20d2dcf..cefaf1ae 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -78,7 +78,7 @@
 #include "gv11b/dbg_gpu_gv11b.h"
 #include "gv11b/hal_gv11b.h"
-#include "gv11b/gr_gv11b.h"
+#include "gv100/gr_gv100.h"
 #include "gv11b/mc_gv11b.h"
 #include "gv11b/ltc_gv11b.h"
 #include "gv11b/gv11b.h"
@@ -263,8 +263,8 @@ static const struct gpu_ops gv100_ops = {
        },
        .gr = {
                .init_gpc_mmu = gr_gv11b_init_gpc_mmu,
-                .bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,
+                .bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
-                .cb_size_default = gr_gv11b_cb_size_default,
+                .cb_size_default = gr_gv100_cb_size_default,
                .calc_global_ctx_buffer_size =
                        gr_gv11b_calc_global_ctx_buffer_size,
                .commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
@@ -285,7 +285,7 @@ static const struct gpu_ops gv100_ops = {
                .set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
                .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
                .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
-                .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
+                .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
                .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
                .free_channel_ctx = gk20a_free_channel_ctx,
                .alloc_obj_ctx = gk20a_alloc_obj_ctx,
@@ -335,7 +335,7 @@ static const struct gpu_ops gv100_ops = {
                .resume_contexts = gr_gk20a_resume_contexts,
                .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
                .fuse_override = gp10b_gr_fuse_override,
-                .init_sm_id_table = gr_gv11b_init_sm_id_table,
+                .init_sm_id_table = gr_gv100_init_sm_id_table,
                .load_smid_config = gr_gv11b_load_smid_config,
                .program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
                .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 8f1d42da..d6d60147 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile
@@ -28,6 +28,7 @@ nvgpu-y += \
28	$(nvgpu-t19x)/gv100/fb_gv100.o \	28	$(nvgpu-t19x)/gv100/fb_gv100.o \
29	$(nvgpu-t19x)/gv100/bios_gv100.o \	29	$(nvgpu-t19x)/gv100/bios_gv100.o \
30	$(nvgpu-t19x)/gv100/fifo_gv100.o \	30	$(nvgpu-t19x)/gv100/fifo_gv100.o \
		31	$(nvgpu-t19x)/gv100/gr_gv100.o \
31	$(nvgpu-t19x)/gv100/hal_gv100.o	32	$(nvgpu-t19x)/gv100/hal_gv100.o
32		33
33	nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o	34	nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o


diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c new file mode 100644 index 00000000..4b2038ba --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -0,0 +1,289 @@
		1	/*
		2	* GV100 GPU GR
		3	*
		4	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice shall be included in
		14	* all copies or substantial portions of the Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		22	* DEALINGS IN THE SOFTWARE.
		23	*/
		24
		25	#include <nvgpu/log.h>
		26	#include <nvgpu/debug.h>
		27	#include <nvgpu/enabled.h>
		28
		29	#include "gk20a/gk20a.h"
		30	#include "gk20a/gr_gk20a.h"
		31
		32	#include "gv100/gr_gv100.h"
		33	#include "gv11b/subctx_gv11b.h"
		34
		35	#include <nvgpu/hw/gv100/hw_gr_gv100.h>
		36	#include <nvgpu/hw/gv100/hw_proj_gv100.h>
		37
		38	/*
		39	* Estimate performance if the given logical TPC in the given logical GPC were
		40	* removed.
		41	*/
		42	static int gr_gv100_scg_estimate_perf(struct gk20a *g,
		43	unsigned long *gpc_tpc_mask,
		44	u32 disable_gpc_id, u32 disable_tpc_id,
		45	int *perf)
		46	{
		47	struct gr_gk20a *gr = &g->gr;
		48	int err = 0;
		49	u32 scale_factor = 512UL; /* Use fx23.9 */
		50	u32 pix_scale = 10241024UL; / Pix perf in [29:20] */
		51	u32 world_scale = 1024UL; /* World performance in [19:10] */
		52	u32 tpc_scale = 1; /* TPC balancing in [9:0] */
		53	u32 scg_num_pes = 0;
		54	u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
		55	u32 average_tpcs = 0; /* Average of # of TPCs per GPC */
		56	u32 deviation; /* absolute diff between TPC# and
		57	* average_tpcs, averaged across GPCs
		58	*/
		59	u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */
		60	u32 tpc_balance;
		61	u32 scg_gpc_pix_perf;
		62	u32 scg_world_perf;
		63	u32 gpc_id;
		64	u32 pes_id;
		65	int diff;
		66	bool is_tpc_removed_gpc = false;
		67	bool is_tpc_removed_pes = false;
		68	u32 max_tpc_gpc = 0;
		69	u32 num_tpc_mask;
		70	u32 num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32)
		71	nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
		72
		73	if (!num_tpc_gpc)
		74	return -ENOMEM;
		75
		76	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
		77	for (gpc_id = 0; gpc_id < gr->gpc_count; gpc_id++) {
		78	num_tpc_mask = gpc_tpc_mask[gpc_id];
		79
		80	if ((gpc_id == disable_gpc_id) && num_tpc_mask &
		81	(0x1 << disable_tpc_id)) {
		82	/* Safety check if a TPC is removed twice */
		83	if (is_tpc_removed_gpc) {
		84	err = -EINVAL;
		85	goto free_resources;
		86	}
		87	/* Remove logical TPC from set */
		88	num_tpc_mask &= ~(0x1 << disable_tpc_id);
		89	is_tpc_removed_gpc = true;
		90	}
		91
		92	/* track balancing of tpcs across gpcs */
		93	num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
		94	average_tpcs += num_tpc_gpc[gpc_id];
		95
		96	/* save the maximum numer of gpcs */
		97	max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
		98	num_tpc_gpc[gpc_id] : max_tpc_gpc;
		99
		100	/*
		101	* Calculate ratio between TPC count and post-FS and post-SCG
		102	*
		103	* ratio represents relative throughput of the GPC
		104	*/
		105	scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
		106	gr->gpc_tpc_count[gpc_id];
		107
		108	if (min_scg_gpc_pix_perf > scg_gpc_pix_perf)
		109	min_scg_gpc_pix_perf = scg_gpc_pix_perf;
		110
		111	/* Calculate # of surviving PES */
		112	for (pes_id = 0; pes_id < gr->gpc_ppc_count[gpc_id]; pes_id++) {
		113	/* Count the number of TPC on the set */
		114	num_tpc_mask = gr->pes_tpc_mask[pes_id][gpc_id] &
		115	gpc_tpc_mask[gpc_id];
		116
		117	if ((gpc_id == disable_gpc_id) && (num_tpc_mask &
		118	(0x1 << disable_tpc_id))) {
		119
		120	if (is_tpc_removed_pes) {
		121	err = -EINVAL;
		122	goto free_resources;
		123	}
		124	num_tpc_mask &= ~(0x1 << disable_tpc_id);
		125	is_tpc_removed_pes = true;
		126	}
		127	if (hweight32(num_tpc_mask))
		128	scg_num_pes++;
		129	}
		130	}
		131
		132	if (!is_tpc_removed_gpc \|\| !is_tpc_removed_pes) {
		133	err = -EINVAL;
		134	goto free_resources;
		135	}
		136
		137	if (max_tpc_gpc == 0) {
		138	*perf = 0;
		139	goto free_resources;
		140	}
		141
		142	/* Now calculate perf */
		143	scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_count;
		144	deviation = 0;
		145	average_tpcs = scale_factor * average_tpcs / gr->gpc_count;
		146	for (gpc_id =0; gpc_id < gr->gpc_count; gpc_id++) {
		147	diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
		148	if (diff < 0)
		149	diff = -diff;
		150	deviation += diff;
		151	}
		152
		153	deviation /= gr->gpc_count;
		154
		155	norm_tpc_deviation = deviation / max_tpc_gpc;
		156
		157	tpc_balance = scale_factor - norm_tpc_deviation;
		158
		159	if ((tpc_balance > scale_factor) \|\|
		160	(scg_world_perf > scale_factor) \|\|
		161	(min_scg_gpc_pix_perf > scale_factor) \|\|
		162	(norm_tpc_deviation > scale_factor)) {
		163	err = -EINVAL;
		164	goto free_resources;
		165	}
		166
		167	perf = (pix_scale min_scg_gpc_pix_perf) +
		168	(world_scale * scg_world_perf) +
		169	(tpc_scale * tpc_balance);
		170	free_resources:
		171	nvgpu_kfree(g, num_tpc_gpc);
		172	return err;
		173	}
		174
		175	void gr_gv100_bundle_cb_defaults(struct gk20a *g)
		176	{
		177	struct gr_gk20a *gr = &g->gr;
		178
		179	gr->bundle_cb_default_size =
		180	gr_scc_bundle_cb_size_div_256b__prod_v();
		181	gr->min_gpm_fifo_depth =
		182	gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
		183	gr->bundle_cb_token_limit =
		184	gr_pd_ab_dist_cfg2_token_limit_init_v();
		185	}
		186
		187	void gr_gv100_cb_size_default(struct gk20a *g)
		188	{
		189	struct gr_gk20a *gr = &g->gr;
		190
		191	if (!gr->attrib_cb_default_size)
		192	gr->attrib_cb_default_size =
		193	gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
		194	gr->alpha_cb_default_size =
		195	gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
		196	}
		197
		198	void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
		199	{
		200	}
		201
		202	void gr_gv100_init_sm_id_table(struct gk20a *g)
		203	{
		204	u32 gpc, tpc, sm, pes, gtpc;
		205	u32 sm_id = 0;
		206	u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
		207	u32 num_sm = sm_per_tpc * g->gr.tpc_count;
		208	int perf, maxperf;
		209	int err;
		210	unsigned long *gpc_tpc_mask;
		211	u32 tpc_table, gpc_table;
		212
		213	gpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
		214	tpc_table = nvgpu_kzalloc(g, g->gr.tpc_count * sizeof(u32));
		215	gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
		216	nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
		217
		218	if (!gpc_table \|\| !tpc_table \|\| !gpc_tpc_mask) {
		219	nvgpu_err(g, "Error allocating memory for sm tables");
		220	goto exit_build_table;
		221	}
		222
		223	for (gpc = 0; gpc < g->gr.gpc_count; gpc++)
		224	for (pes = 0; pes < g->gr.gpc_ppc_count[gpc]; pes++)
		225	gpc_tpc_mask[gpc] \|= g->gr.pes_tpc_mask[pes][gpc];
		226
		227	for (gtpc = 0; gtpc < g->gr.tpc_count; gtpc++) {
		228	maxperf = -1;
		229	for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
		230	for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
		231	g->gr.gpc_tpc_count[gpc]) {
		232	perf = -1;
		233	err = gr_gv100_scg_estimate_perf(g,
		234	gpc_tpc_mask, gpc, tpc, &perf);
		235
		236	if (err) {
		237	nvgpu_err(g,
		238	"Error while estimating perf");
		239	goto exit_build_table;
		240	}
		241
		242	if (perf >= maxperf) {
		243	maxperf = perf;
		244	gpc_table[gtpc] = gpc;
		245	tpc_table[gtpc] = tpc;
		246	}
		247	}
		248	}
		249	gpc_tpc_mask[gpc_table[gtpc]] &= ~(0x1 << tpc_table[gtpc]);
		250	}
		251
		252	for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
		253	for (sm = 0; sm < sm_per_tpc; sm++) {
		254	g->gr.sm_to_cluster[sm_id + sm].gpc_index =
		255	gpc_table[tpc];
		256	g->gr.sm_to_cluster[sm_id + sm].tpc_index =
		257	tpc_table[tpc];
		258	g->gr.sm_to_cluster[sm_id + sm].sm_index = sm;
		259	g->gr.sm_to_cluster[sm_id + sm].global_tpc_index = tpc;
		260	}
		261	}
		262
		263	g->gr.no_of_sm = num_sm;
		264	nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
		265	exit_build_table:
		266	nvgpu_kfree(g, gpc_table);
		267	nvgpu_kfree(g, tpc_table);
		268	nvgpu_kfree(g, gpc_tpc_mask);
		269	}
		270
		271	void gr_gv100_load_tpc_mask(struct gk20a *g)
		272	{
		273	u64 pes_tpc_mask = 0x0ULL;
		274	u32 gpc, pes;
		275	u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
		276	GPU_LIT_NUM_TPC_PER_GPC);
		277
		278	/* gv100 has 6 GPC and 7 TPC/GPC */
		279	for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
		280	for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
		281	pes_tpc_mask \|= (u64) g->gr.pes_tpc_mask[pes][gpc] <<
		282	(num_tpc_per_gpc * gpc);
		283	}
		284	}
		285
		286	nvgpu_log_info(g, "pes_tpc_mask: %016llx\n", pes_tpc_mask);
		287	gk20a_writel(g, gr_fe_tpc_fs_r(0), u64_lo32(pes_tpc_mask));
		288	gk20a_writel(g, gr_fe_tpc_fs_r(1), u64_hi32(pes_tpc_mask));
		289	}


diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h new file mode 100644 index 00000000..460b05ae --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -0,0 +1,36 @@
		1	/*
		2	* GV100 GPU GR
		3	*
		4	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice shall be included in
		14	* all copies or substantial portions of the Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		22	* DEALINGS IN THE SOFTWARE.
		23	*/
		24
		25	#ifndef _NVGPU_GR_GV100_H_
		26	#define _NVGPU_GR_GV100_H_
		27
		28	void gr_gv100_bundle_cb_defaults(struct gk20a *g);
		29	void gr_gv100_cb_size_default(struct gk20a *g);
		30	void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
		31	void gr_gv100_init_sm_id_table(struct gk20a *g);
		32	void gr_gv100_program_sm_id_numbering(struct gk20a *g,
		33	u32 gpc, u32 tpc, u32 smid);
		34	int gr_gv100_load_smid_config(struct gk20a *g);
		35
		36	#endif


diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index f20d2dcf..cefaf1ae 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -78,7 +78,7 @@
78		78
79	#include "gv11b/dbg_gpu_gv11b.h"	79	#include "gv11b/dbg_gpu_gv11b.h"
80	#include "gv11b/hal_gv11b.h"	80	#include "gv11b/hal_gv11b.h"
81	#include "gv11b/gr_gv11b.h"	81	#include "gv100/gr_gv100.h"
82	#include "gv11b/mc_gv11b.h"	82	#include "gv11b/mc_gv11b.h"
83	#include "gv11b/ltc_gv11b.h"	83	#include "gv11b/ltc_gv11b.h"
84	#include "gv11b/gv11b.h"	84	#include "gv11b/gv11b.h"
@@ -263,8 +263,8 @@ static const struct gpu_ops gv100_ops = {
263	},	263	},
264	.gr = {	264	.gr = {
265	.init_gpc_mmu = gr_gv11b_init_gpc_mmu,	265	.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
266	.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,	266	.bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
267	.cb_size_default = gr_gv11b_cb_size_default,	267	.cb_size_default = gr_gv100_cb_size_default,
268	.calc_global_ctx_buffer_size =	268	.calc_global_ctx_buffer_size =
269	gr_gv11b_calc_global_ctx_buffer_size,	269	gr_gv11b_calc_global_ctx_buffer_size,
270	.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,	270	.commit_global_attrib_cb = gr_gv11b_commit_global_attrib_cb,
@@ -285,7 +285,7 @@ static const struct gpu_ops gv100_ops = {
285	.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,	285	.set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask,
286	.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,	286	.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
287	.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,	287	.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
288	.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,	288	.set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
289	.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,	289	.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
290	.free_channel_ctx = gk20a_free_channel_ctx,	290	.free_channel_ctx = gk20a_free_channel_ctx,
291	.alloc_obj_ctx = gk20a_alloc_obj_ctx,	291	.alloc_obj_ctx = gk20a_alloc_obj_ctx,
@@ -335,7 +335,7 @@ static const struct gpu_ops gv100_ops = {
335	.resume_contexts = gr_gk20a_resume_contexts,	335	.resume_contexts = gr_gk20a_resume_contexts,
336	.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,	336	.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
337	.fuse_override = gp10b_gr_fuse_override,	337	.fuse_override = gp10b_gr_fuse_override,
338	.init_sm_id_table = gr_gv11b_init_sm_id_table,	338	.init_sm_id_table = gr_gv100_init_sm_id_table,
339	.load_smid_config = gr_gv11b_load_smid_config,	339	.load_smid_config = gr_gv11b_load_smid_config,
340	.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,	340	.program_sm_id_numbering = gr_gv11b_program_sm_id_numbering,
341	.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,	341	.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,