From a21e56d584641202327f64741b06b1cd9633d0f6 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Thu, 31 Mar 2016 14:57:20 -0700 Subject: gpu: nvgpu: vgpu: add support for VSM ioctls Add virtualized support for NUM_VSMS and VSMS_MAPPING ioctls. This requires adding an attribute request for the RM server, GPC0_TPC_COUNT JIRASW EVLR-253 Change-Id: Icaab4fadbbc9eab5d00cf78132928686944162df Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1130615 (cherry picked from commit 78514079382b0de48457db340e3479e99a012040) Reviewed-on: http://git-master/r/1133865 (cherry picked from commit 27a8e645e2787a43d0073f0be6e8f64c0f183228) Reviewed-on: http://git-master/r/1122553 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c | 30 ++++++++++++++++- drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c | 25 ++++++++++++++- drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 48 +++++++++++++++++++++++++--- include/linux/tegra_vgpu.h | 3 +- 4 files changed, 99 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c index 0a2ca743..2cfe16da 100644 --- a/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c +++ b/drivers/gpu/nvgpu/vgpu/gk20a/vgpu_gr_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -45,7 +45,35 @@ static void vgpu_gk20a_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } +static int vgpu_gk20a_init_fs_state(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 tpc_index, gpc_index; + u32 sm_id = 0; + + gk20a_dbg_fn(""); + + for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; + tpc_index++) { + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + if (tpc_index < gr->gpc_tpc_count[gpc_index]) { + g->gr.sm_to_cluster[sm_id].tpc_index = + tpc_index; + g->gr.sm_to_cluster[sm_id].gpc_index = + gpc_index; + + sm_id++; + } + } + } + + gr->no_of_sm = sm_id; + + return 0; +} + void vgpu_gk20a_init_gr_ops(struct gpu_ops *gops) { gops->gr.detect_sm_arch = vgpu_gk20a_detect_sm_arch; + gops->gr.init_fs_state = vgpu_gk20a_init_fs_state; } diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c index 07d41b2e..fb1f31d8 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -37,7 +37,30 @@ static void vgpu_gm20b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } +static int vgpu_gm20b_init_fs_state(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + u32 tpc_index, gpc_index; + u32 sm_id = 0; + + gk20a_dbg_fn(""); + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; + tpc_index++) { + g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; + + sm_id++; + } + } + + gr->no_of_sm = sm_id; + return 0; +} + void vgpu_gm20b_init_gr_ops(struct gpu_ops *gops) { gops->gr.detect_sm_arch = vgpu_gm20b_detect_sm_arch; + gops->gr.init_fs_state = vgpu_gm20b_init_fs_state; } diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 16d51ad3..420c714e 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -567,6 +567,19 @@ static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c, return 0; } +static u32 vgpu_gr_get_gpc_tpc_count(struct gk20a *g, u32 gpc_index) +{ + struct gk20a_platform *platform = gk20a_get_platform(g->dev); + u32 data; + + WARN_ON(gpc_index > 0); + + if (vgpu_get_attribute(platform->virt_handle, + TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT, &data)) + gk20a_err(dev_from_gk20a(g), "failed to retrieve gpc0_tpc_count"); + return data; +} + static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); @@ -593,13 +606,23 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) &gr->tpc_count)) return -ENOMEM; + gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); + if (!gr->gpc_tpc_count) + goto cleanup; + gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); - if (!gr->gpc_tpc_mask) { - gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__); - return -ENOMEM; - } + if (!gr->gpc_tpc_mask) + goto cleanup; + + gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->max_tpc_per_gpc_count * + sizeof(struct sm_info), GFP_KERNEL); + if (!gr->sm_to_cluster) + goto cleanup; for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + gr->gpc_tpc_count[gpc_index] = + vgpu_gr_get_gpc_tpc_count(g, gpc_index); + if (g->ops.gr.get_gpc_tpc_mask) gr->gpc_tpc_mask[gpc_index] = g->ops.gr.get_gpc_tpc_mask(g, gpc_index); @@ -608,7 +631,18 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) g->ops.gr.bundle_cb_defaults(g); g->ops.gr.cb_size_default(g); g->ops.gr.calc_global_ctx_buffer_size(g); + g->ops.gr.init_fs_state(g); return 0; +cleanup: + gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__); + + kfree(gr->gpc_tpc_count); + gr->gpc_tpc_count = NULL; + + kfree(gr->gpc_tpc_mask); + gr->gpc_tpc_mask = NULL; + + return -ENOMEM; } static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, @@ -823,6 +857,12 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) kfree(gr->gpc_tpc_mask); gr->gpc_tpc_mask = NULL; + + kfree(gr->sm_to_cluster); + gr->sm_to_cluster = NULL; + + kfree(gr->gpc_tpc_count); + gr->gpc_tpc_count = NULL; } static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index 67bd0d76..d517fabc 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h @@ -117,7 +117,8 @@ enum { TEGRA_VGPU_ATTRIB_COMPTAGS_PER_CACHELINE, TEGRA_VGPU_ATTRIB_SLICES_PER_LTC, TEGRA_VGPU_ATTRIB_LTC_COUNT, - TEGRA_VGPU_ATTRIB_TPC_COUNT + TEGRA_VGPU_ATTRIB_TPC_COUNT, + TEGRA_VGPU_ATTRIB_GPC0_TPC_COUNT, }; struct tegra_vgpu_attrib_params { -- cgit v1.2.2