4 files changed, 99 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
index 0a2ca743..2cfe16da 100644
--- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -45,7 +45,35 @@ static void vgpu_gk20a_detect_sm_arch(struct gk20a *g)
                gr_gpc0_tpc0_sm_arch_warp_count_v(v);
 }
+static int vgpu_gk20a_init_fs_state(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 tpc_index, gpc_index;
+        u32 sm_id = 0;
+        gk20a_dbg_fn("");
+        for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count;
+                tpc_index++) {
+                for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                        if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
+                                g->gr.sm_to_cluster[sm_id].tpc_index =
+                                                                tpc_index;
+                                g->gr.sm_to_cluster[sm_id].gpc_index =
+                                                                gpc_index;
+                                sm_id++;
+                        }
+                }
+        }
+        gr->no_of_sm = sm_id;
+        return 0;
+}
 void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops)
 {
        gops->gr.detect_sm_arch = vgpu_gk20a_detect_sm_arch;
+        gops->gr.init_fs_state = vgpu_gk20a_init_fs_state;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
index 07d41b2e..fb1f31d8 100644
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -37,7 +37,30 @@ static void vgpu_gm20b_detect_sm_arch(struct gk20a *g)
                gr_gpc0_tpc0_sm_arch_warp_count_v(v);
 }
+static int vgpu_gm20b_init_fs_state(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 tpc_index, gpc_index;
+        u32 sm_id = 0;
+        gk20a_dbg_fn("");
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
+                                                                tpc_index++) {
+                        g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
+                        g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
+                        sm_id++;
+                }
+        }
+        gr->no_of_sm = sm_id;
+        return 0;
+}
 void vgpu_gm20b_init_gr_ops(struct gpu_ops *gops)
 {
        gops->gr.detect_sm_arch = vgpu_gm20b_detect_sm_arch;
+        gops->gr.init_fs_state = vgpu_gm20b_init_fs_state;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 16d51ad3..420c714e 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -567,6 +567,19 @@ static int vgpu_gr_free_obj_ctx(struct channel_gk20a  *c,
        return 0;
 }
+static u32 vgpu_gr_get_gpc_tpc_count(struct gk20a *g, u32 gpc_index)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        u32 data;
+        WARN_ON(gpc_index > 0);
+        if (vgpu_get_attribute(platform->virt_handle,
+                        TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT, &data))
+                gk20a_err(dev_from_gk20a(g), "failed to retrieve gpc0_tpc_count");
+        return data;
+}
 static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 {
        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
@@ -593,13 +606,23 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
                        &gr->tpc_count))
                return -ENOMEM;
+        gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+        if (!gr->gpc_tpc_count)
+                goto cleanup;
        gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-        if (!gr->gpc_tpc_mask) {
+        if (!gr->gpc_tpc_mask)
-                gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
+                goto cleanup;
-                return -ENOMEM;
-        }
+        gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->max_tpc_per_gpc_count *
+                                sizeof(struct sm_info), GFP_KERNEL);
+        if (!gr->sm_to_cluster)
+                goto cleanup;
        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                gr->gpc_tpc_count[gpc_index] =
+                        vgpu_gr_get_gpc_tpc_count(g, gpc_index);
                if (g->ops.gr.get_gpc_tpc_mask)
                        gr->gpc_tpc_mask[gpc_index] =
                                g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
@@ -608,7 +631,18 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
        g->ops.gr.bundle_cb_defaults(g);
        g->ops.gr.cb_size_default(g);
        g->ops.gr.calc_global_ctx_buffer_size(g);
+        g->ops.gr.init_fs_state(g);
        return 0;
+cleanup:
+        gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
+        kfree(gr->gpc_tpc_count);
+        gr->gpc_tpc_count = NULL;
+        kfree(gr->gpc_tpc_mask);
+        gr->gpc_tpc_mask = NULL;
+        return -ENOMEM;
 }
 static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
@@ -823,6 +857,12 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
        kfree(gr->gpc_tpc_mask);
        gr->gpc_tpc_mask = NULL;
+        kfree(gr->sm_to_cluster);
+        gr->sm_to_cluster = NULL;
+        kfree(gr->gpc_tpc_count);
+        gr->gpc_tpc_count = NULL;
 }
 static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index 67bd0d76..d517fabc 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -117,7 +117,8 @@ enum {
        TEGRA_VGPU_ATTRIB_COMPTAGS_PER_CACHELINE,
        TEGRA_VGPU_ATTRIB_SLICES_PER_LTC,
        TEGRA_VGPU_ATTRIB_LTC_COUNT,
-        TEGRA_VGPU_ATTRIB_TPC_COUNT
+        TEGRA_VGPU_ATTRIB_TPC_COUNT,
+        TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT,
 };
 struct tegra_vgpu_attrib_params {

diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c index 0a2ca743..2cfe16da 100644 --- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c +++ b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c
@@ -1,5 +1,5 @@
1	/*	1	/*
2	* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.	2	* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
3	*	3	*
4	* This program is free software; you can redistribute it and/or modify it	4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms and conditions of the GNU General Public License,	5	* under the terms and conditions of the GNU General Public License,
@@ -45,7 +45,35 @@ static void vgpu_gk20a_detect_sm_arch(struct gk20a *g)
45	gr_gpc0_tpc0_sm_arch_warp_count_v(v);	45	gr_gpc0_tpc0_sm_arch_warp_count_v(v);
46	}	46	}
47		47
		48	static int vgpu_gk20a_init_fs_state(struct gk20a *g)
		49	{
		50	struct gr_gk20a *gr = &g->gr;
		51	u32 tpc_index, gpc_index;
		52	u32 sm_id = 0;
		53
		54	gk20a_dbg_fn("");
		55
		56	for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count;
		57	tpc_index++) {
		58	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
		59	if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
		60	g->gr.sm_to_cluster[sm_id].tpc_index =
		61	tpc_index;
		62	g->gr.sm_to_cluster[sm_id].gpc_index =
		63	gpc_index;
		64
		65	sm_id++;
		66	}
		67	}
		68	}
		69
		70	gr->no_of_sm = sm_id;
		71
		72	return 0;
		73	}
		74
48	void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops)	75	void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops)
49	{	76	{
50	gops->gr.detect_sm_arch = vgpu_gk20a_detect_sm_arch;	77	gops->gr.detect_sm_arch = vgpu_gk20a_detect_sm_arch;
		78	gops->gr.init_fs_state = vgpu_gk20a_init_fs_state;
51	}	79	}


diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c index 07d41b2e..fb1f31d8 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
@@ -1,5 +1,5 @@
1	/*	1	/*
2	* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.	2	* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
3	*	3	*
4	* This program is free software; you can redistribute it and/or modify it	4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms and conditions of the GNU General Public License,	5	* under the terms and conditions of the GNU General Public License,
@@ -37,7 +37,30 @@ static void vgpu_gm20b_detect_sm_arch(struct gk20a *g)
37	gr_gpc0_tpc0_sm_arch_warp_count_v(v);	37	gr_gpc0_tpc0_sm_arch_warp_count_v(v);
38	}	38	}
39		39
		40	static int vgpu_gm20b_init_fs_state(struct gk20a *g)
		41	{
		42	struct gr_gk20a *gr = &g->gr;
		43	u32 tpc_index, gpc_index;
		44	u32 sm_id = 0;
		45
		46	gk20a_dbg_fn("");
		47
		48	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
		49	for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
		50	tpc_index++) {
		51	g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
		52	g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
		53
		54	sm_id++;
		55	}
		56	}
		57
		58	gr->no_of_sm = sm_id;
		59	return 0;
		60	}
		61
40	void vgpu_gm20b_init_gr_ops(struct gpu_ops *gops)	62	void vgpu_gm20b_init_gr_ops(struct gpu_ops *gops)
41	{	63	{
42	gops->gr.detect_sm_arch = vgpu_gm20b_detect_sm_arch;	64	gops->gr.detect_sm_arch = vgpu_gm20b_detect_sm_arch;
		65	gops->gr.init_fs_state = vgpu_gm20b_init_fs_state;
43	}	66	}


diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 16d51ad3..420c714e 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -567,6 +567,19 @@ static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c,
567	return 0;	567	return 0;
568	}	568	}
569		569
		570	static u32 vgpu_gr_get_gpc_tpc_count(struct gk20a *g, u32 gpc_index)
		571	{
		572	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
		573	u32 data;
		574
		575	WARN_ON(gpc_index > 0);
		576
		577	if (vgpu_get_attribute(platform->virt_handle,
		578	TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT, &data))
		579	gk20a_err(dev_from_gk20a(g), "failed to retrieve gpc0_tpc_count");
		580	return data;
		581	}
		582
570	static int vgpu_gr_init_gr_config(struct gk20a g, struct gr_gk20a gr)	583	static int vgpu_gr_init_gr_config(struct gk20a g, struct gr_gk20a gr)
571	{	584	{
572	struct gk20a_platform *platform = gk20a_get_platform(g->dev);	585	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
@@ -593,13 +606,23 @@ static int vgpu_gr_init_gr_config(struct gk20a g, struct gr_gk20a gr)
593	&gr->tpc_count))	606	&gr->tpc_count))
594	return -ENOMEM;	607	return -ENOMEM;
595		608
		609	gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
		610	if (!gr->gpc_tpc_count)
		611	goto cleanup;
		612
596	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);	613	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
597	if (!gr->gpc_tpc_mask) {	614	if (!gr->gpc_tpc_mask)
598	gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);	615	goto cleanup;
599	return -ENOMEM;	616
600	}	617	gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->max_tpc_per_gpc_count *
		618	sizeof(struct sm_info), GFP_KERNEL);
		619	if (!gr->sm_to_cluster)
		620	goto cleanup;
601		621
602	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {	622	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
		623	gr->gpc_tpc_count[gpc_index] =
		624	vgpu_gr_get_gpc_tpc_count(g, gpc_index);
		625
603	if (g->ops.gr.get_gpc_tpc_mask)	626	if (g->ops.gr.get_gpc_tpc_mask)
604	gr->gpc_tpc_mask[gpc_index] =	627	gr->gpc_tpc_mask[gpc_index] =
605	g->ops.gr.get_gpc_tpc_mask(g, gpc_index);	628	g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
@@ -608,7 +631,18 @@ static int vgpu_gr_init_gr_config(struct gk20a g, struct gr_gk20a gr)
608	g->ops.gr.bundle_cb_defaults(g);	631	g->ops.gr.bundle_cb_defaults(g);
609	g->ops.gr.cb_size_default(g);	632	g->ops.gr.cb_size_default(g);
610	g->ops.gr.calc_global_ctx_buffer_size(g);	633	g->ops.gr.calc_global_ctx_buffer_size(g);
		634	g->ops.gr.init_fs_state(g);
611	return 0;	635	return 0;
		636	cleanup:
		637	gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
		638
		639	kfree(gr->gpc_tpc_count);
		640	gr->gpc_tpc_count = NULL;
		641
		642	kfree(gr->gpc_tpc_mask);
		643	gr->gpc_tpc_mask = NULL;
		644
		645	return -ENOMEM;
612	}	646	}
613		647
614	static int vgpu_gr_bind_ctxsw_zcull(struct gk20a g, struct gr_gk20a gr,	648	static int vgpu_gr_bind_ctxsw_zcull(struct gk20a g, struct gr_gk20a gr,
@@ -823,6 +857,12 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
823		857
824	kfree(gr->gpc_tpc_mask);	858	kfree(gr->gpc_tpc_mask);
825	gr->gpc_tpc_mask = NULL;	859	gr->gpc_tpc_mask = NULL;
		860
		861	kfree(gr->sm_to_cluster);
		862	gr->sm_to_cluster = NULL;
		863
		864	kfree(gr->gpc_tpc_count);
		865	gr->gpc_tpc_count = NULL;
826	}	866	}
827		867
828	static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)	868	static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)


diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index 67bd0d76..d517fabc 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h
@@ -117,7 +117,8 @@ enum {
117	TEGRA_VGPU_ATTRIB_COMPTAGS_PER_CACHELINE,	117	TEGRA_VGPU_ATTRIB_COMPTAGS_PER_CACHELINE,
118	TEGRA_VGPU_ATTRIB_SLICES_PER_LTC,	118	TEGRA_VGPU_ATTRIB_SLICES_PER_LTC,
119	TEGRA_VGPU_ATTRIB_LTC_COUNT,	119	TEGRA_VGPU_ATTRIB_LTC_COUNT,
120	TEGRA_VGPU_ATTRIB_TPC_COUNT	120	TEGRA_VGPU_ATTRIB_TPC_COUNT,
		121	TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT,
121	};	122	};
122		123
123	struct tegra_vgpu_attrib_params {	124	struct tegra_vgpu_attrib_params {