From 6d758eb81bcbff4e50df5c9fa67a369a4e1f2074 Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Thu, 29 Jun 2017 15:59:05 -0700 Subject: gpu: nvgpu: gv11b: support for full subcontext Changes to enable 64 subcontexts: 1 SYNC + 63 ASYNC Currently all subcontexts with in a tsg can have only single address space. Add support for NVGPU_TSG_IOCTL_BIND_CHANNEL_EX for selecting subctx id by client. Bug 1842197 Change-Id: Icf56a41303bd1ad7fc6f2a6fbc691bb7b4a01d22 Signed-off-by: seshendra Gadagottu Reviewed-on: https://git-master/r/1511145 Reviewed-by: Terje Bergstrom GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/channel_t19x.h | 24 +++++++ drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c | 87 ++++++++++++++++++++++++ drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.h | 21 ++++++ drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 16 +++-- drivers/gpu/nvgpu/gv11b/fifo_gv11b.h | 4 +- drivers/gpu/nvgpu/gv11b/gv11b.c | 4 +- drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 90 ++++++++++++++++--------- drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | 2 +- drivers/gpu/nvgpu/tsg_t19x.h | 21 ++++++ include/uapi/linux/nvgpu-t19x.h | 22 +++++- 11 files changed, 249 insertions(+), 43 deletions(-) create mode 100644 drivers/gpu/nvgpu/channel_t19x.h create mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c create mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.h create mode 100644 drivers/gpu/nvgpu/tsg_t19x.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 8a7c768e..33391a80 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -1,6 +1,7 @@ nvgpu-t19x := ../../../../nvgpu-t19x/drivers/gpu/nvgpu nvgpu-y += \ + $(nvgpu-t19x)/common/linux/ioctl_tsg_t19x.o \ $(nvgpu-t19x)/gv11b/gv11b.o \ $(nvgpu-t19x)/gv11b/bus_gv11b.o \ $(nvgpu-t19x)/gv11b/mc_gv11b.o \ diff --git a/drivers/gpu/nvgpu/channel_t19x.h b/drivers/gpu/nvgpu/channel_t19x.h new file mode 100644 index 00000000..bb324426 --- /dev/null +++ b/drivers/gpu/nvgpu/channel_t19x.h @@ -0,0 +1,24 @@ +/* + * NVIDIA T19x Channel info + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVGPU_CHANNEL_T19X_H_ +#define _NVGPU_CHANNEL_T19X_H_ + +struct channel_t19x { + u32 subctx_id; + u32 runqueue_sel; +}; + +#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c new file mode 100644 index 00000000..bf6088ab --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c @@ -0,0 +1,87 @@ +/* + * GV11B TSG IOCTL Handler + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include "gk20a/gk20a.h" + +#include "gv11b/fifo_gv11b.h" +#include "gv11b/subctx_gv11b.h" +#include "ioctl_tsg_t19x.h" + +static int gv11b_tsg_ioctl_bind_channel_ex(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) +{ + struct gk20a_sched_ctrl *sched = &g->sched_ctrl; + struct channel_gk20a *ch; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + nvgpu_mutex_acquire(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu"); + goto done; + } + + ch = gk20a_get_channel_from_file(arg->channel_fd); + if (!ch) + return -EINVAL; + if (arg->subcontext_id < gv11b_get_max_subctx_count(g)) + ch->t19x.subctx_id = arg->subcontext_id; + else + return -EINVAL; + nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", + ch->chid, ch->t19x.subctx_id); + + /* Use runqueue selector 1 for all ASYNC ids */ + if (ch->t19x.subctx_id > CHANNEL_INFO_VEID0) + ch->t19x.runqueue_sel = 1; + + err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); + gk20a_idle(g); +done: + nvgpu_mutex_release(&sched->control_lock); + return err; +} + +int t19x_tsg_ioctl_handler(struct gk20a *g, struct tsg_gk20a *tsg, + unsigned int cmd, u8 *buf) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn, "t19x_tsg_ioctl_handler"); + + switch (cmd) { + case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: + { + err = gv11b_tsg_ioctl_bind_channel_ex(g, tsg, + (struct nvgpu_tsg_bind_channel_ex_args *)buf); + break; + } + + default: + nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + break; + } + return err; +} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.h b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.h new file mode 100644 index 00000000..3376ffce --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.h @@ -0,0 +1,21 @@ +/* + * GV11B TSG IOCTL handler + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVGPU_IOCTL_TSG_T19X +#define _NVGPU_IOCTL_TSG_T19X + +int t19x_tsg_ioctl_handler(struct gk20a *g, struct tsg_gk20a *tsg, + unsigned int cmd, u8 *arg); +#endif diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index a1f6d258..ace873e9 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -45,7 +45,6 @@ #include "subctx_gv11b.h" #include "gr_gv11b.h" -#define CHANNEL_INFO_VEID0 0 #define PBDMA_SUBDEVICE_ID 1 static void gv11b_fifo_init_ramfc_eng_method_buffer(struct gk20a *g, @@ -94,7 +93,8 @@ static void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist) /* Time being use 0 pbdma sequencer */ runlist_entry = ram_rl_entry_type_channel_v() | - ram_rl_entry_chan_runqueue_selector_f(0) | + ram_rl_entry_chan_runqueue_selector_f( + c->t19x.runqueue_sel) | ram_rl_entry_chan_userd_target_f( ram_rl_entry_chan_userd_target_sys_mem_ncoh_v()) | ram_rl_entry_chan_inst_target_f( @@ -178,10 +178,14 @@ static int channel_gv11b_setup_ramfc(struct channel_gk20a *c, nvgpu_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->chid)); - /* Until full subcontext is supported, always use VEID0 */ - nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), - pbdma_set_channel_info_scg_type_graphics_compute0_f() | - pbdma_set_channel_info_veid_f(CHANNEL_INFO_VEID0)); + if (c->t19x.subctx_id == CHANNEL_INFO_VEID0) + nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), + pbdma_set_channel_info_scg_type_graphics_compute0_f() | + pbdma_set_channel_info_veid_f(c->t19x.subctx_id)); + else + nvgpu_mem_wr32(g, mem, ram_fc_set_channel_info_w(), + pbdma_set_channel_info_scg_type_compute1_f() | + pbdma_set_channel_info_veid_f(c->t19x.subctx_id)); gv11b_fifo_init_ramfc_eng_method_buffer(g, c, mem); diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h index 03cca839..032342b2 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h @@ -1,7 +1,7 @@ /* * GV11B Fifo * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -36,6 +36,8 @@ #define GR_RUNQUE 0 /* pbdma 0 */ #define ASYNC_CE_RUNQUE 2 /* pbdma 2 */ +#define CHANNEL_INFO_VEID0 0 + struct gpu_ops; void gv11b_init_fifo(struct gpu_ops *gops); void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c index 09628940..69fd0cf4 100644 --- a/drivers/gpu/nvgpu/gv11b/gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gv11b.c @@ -1,7 +1,7 @@ /* * GV11B Graphics * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,5 +23,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g) { gk20a_init_gpu_characteristics(g); + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS; return 0; } diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 79ed0d1e..cb042f87 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -31,12 +31,17 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); +static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block); + void gv11b_free_subctx_header(struct channel_gk20a *c) { struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; struct gk20a *g = c->g; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); if (ctx->mem.gpu_va) { nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); @@ -52,7 +57,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) struct gr_gk20a *gr = &g->gr; int ret = 0; - gk20a_dbg_fn(""); + nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); if (ctx->mem.gpu_va == 0) { ret = nvgpu_dma_alloc_flags_sys(g, @@ -82,7 +87,6 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) nvgpu_mem_end(g, &ctx->mem); gv11b_init_subcontext_pdb(c, &c->inst_block); - } return ret; } @@ -91,37 +95,13 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, struct nvgpu_mem *inst_block) { struct gk20a *g = c->g; - struct vm_gk20a *vm; - u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; - u32 format_word; - u32 lo, hi; - gk20a_dbg_fn(""); - /* load main pdb as veid0 pdb also */ - vm = c->vm; - pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.priv.sgt->sgl, 0); - pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); - pdb_addr_hi = u64_hi32(pdb_addr); - format_word = ram_in_sc_page_dir_base_target_f( - ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | - ram_in_sc_page_dir_base_vol_f( - ram_in_sc_page_dir_base_vol_true_v(), 0) | - ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | - ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | - ram_in_sc_use_ver2_pt_format_f(1, 0) | - ram_in_sc_big_page_size_f(1, 0) | - ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); - lo = ram_in_sc_page_dir_base_vol_0_w(); - hi = ram_in_sc_page_dir_base_hi_0_w(); - nvgpu_mem_wr32(g, inst_block, lo, format_word); - nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); - - /* make subcontext0 address space to valid */ - /* TODO fix proper hw register definations */ - nvgpu_mem_wr32(g, inst_block, 166, 0x1); - nvgpu_mem_wr32(g, inst_block, 167, 0); + gv11b_subctx_commit_pdb(c, inst_block); + gv11b_subctx_commit_valid_mask(c, inst_block); + + nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id); nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), - ram_in_engine_wfi_veid_f(0)); + ram_in_engine_wfi_veid_f(c->t19x.subctx_id)); } @@ -149,7 +129,51 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) return ret; } -int gv11b_get_max_subctx_count(struct gk20a *g) +void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + + /* Make all subctx pdbs valid */ + nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff); + nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); +} + +void gv11b_subctx_commit_pdb(struct channel_gk20a *c, + struct nvgpu_mem *inst_block) +{ + struct gk20a *g = c->g; + u32 lo, hi; + u32 subctx_id = 0; + u32 format_word; + u32 pdb_addr_lo, pdb_addr_hi; + u64 pdb_addr; + + pdb_addr = g->ops.mm.get_iova_addr(g, c->vm->pdb.mem.priv.sgt->sgl, 0); + pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + pdb_addr_hi = u64_hi32(pdb_addr); + format_word = ram_in_sc_page_dir_base_target_f( + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | + ram_in_sc_page_dir_base_vol_f( + ram_in_sc_page_dir_base_vol_true_v(), 0) | + ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | + ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | + ram_in_sc_use_ver2_pt_format_f(1, 0) | + ram_in_sc_big_page_size_f(1, 0) | + ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); + nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", + format_word, pdb_addr_hi); + for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g); + subctx_id++) { + lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); + hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); + nvgpu_mem_wr32(g, inst_block, lo, format_word); + nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); + } +} + + +u32 gv11b_get_max_subctx_count(struct gk20a *g) { u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h index fdfe9e3b..5e4e99f5 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h @@ -25,5 +25,5 @@ void gv11b_free_subctx_header(struct channel_gk20a *c); int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); -int gv11b_get_max_subctx_count(struct gk20a *g); +u32 gv11b_get_max_subctx_count(struct gk20a *g); #endif /* __SUBCONTEXT_GV11B_H__ */ diff --git a/drivers/gpu/nvgpu/tsg_t19x.h b/drivers/gpu/nvgpu/tsg_t19x.h new file mode 100644 index 00000000..2f359668 --- /dev/null +++ b/drivers/gpu/nvgpu/tsg_t19x.h @@ -0,0 +1,21 @@ +/* + * NVIDIA T19x TSG + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVGPU_TSG_T19X_H_ +#define _NVGPU_TSG_T19X_H_ + +#include "common/linux/ioctl_tsg_t19x.h" + +#endif diff --git a/include/uapi/linux/nvgpu-t19x.h b/include/uapi/linux/nvgpu-t19x.h index 6a10fb77..96514a88 100644 --- a/include/uapi/linux/nvgpu-t19x.h +++ b/include/uapi/linux/nvgpu-t19x.h @@ -1,7 +1,7 @@ /* * NVGPU Public Interface Header * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -27,4 +27,24 @@ #define NVGPU_GPU_ARCH_GV110 0x00000150 #define NVGPU_GPU_IMPL_GV11B 0x0000000B +/* subcontexts are available */ +#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) + +struct nvgpu_tsg_bind_channel_ex_args { + /* in: channel fd */ + __s32 channel_fd; + + /* in: VEID in Volta */ + __u32 subcontext_id; + + __u64 reserved[2]; +}; + +#define NVGPU_TSG_IOCTL_BIND_CHANNEL_EX \ + _IOWR(NVGPU_TSG_IOCTL_MAGIC, 11, struct nvgpu_tsg_bind_channel_ex_args) + +#define NVGPU_TSG_IOCTL_MAX NVGPU_TSG_IOCTL_BIND_CHANNEL_EX + +#define NVGPU_TSG_IOCTL_MAX_ARG sizeof(struct nvgpu_tsg_bind_channel_ex_args) + #endif /* _UAPI__LINUX_NVGPU_T19X_IOCTL_H_ */ -- cgit v1.2.2