diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/subctx_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 00000000..fe1aa8a5 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /* | ||
2 | * Volta GPU series Subcontext | ||
3 | * | ||
4 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | |||
27 | #include "gv11b/subctx_gv11b.h" | ||
28 | |||
29 | #include <nvgpu/dma.h> | ||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/gmmu.h> | ||
32 | |||
33 | #include <nvgpu/hw/gv11b/hw_ram_gv11b.h> | ||
34 | #include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h> | ||
35 | |||
36 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | ||
37 | struct nvgpu_mem *inst_block); | ||
38 | |||
39 | static void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, | ||
40 | struct nvgpu_mem *inst_block); | ||
41 | static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | ||
42 | struct nvgpu_mem *inst_block); | ||
43 | |||
44 | void gv11b_free_subctx_header(struct channel_gk20a *c) | ||
45 | { | ||
46 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
47 | struct gk20a *g = c->g; | ||
48 | |||
49 | nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); | ||
50 | |||
51 | if (ctx->mem.gpu_va) { | ||
52 | nvgpu_gmmu_unmap(c->vm, &ctx->mem, ctx->mem.gpu_va); | ||
53 | |||
54 | nvgpu_dma_free(g, &ctx->mem); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | int gv11b_alloc_subctx_header(struct channel_gk20a *c) | ||
59 | { | ||
60 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
61 | struct gk20a *g = c->g; | ||
62 | int ret = 0; | ||
63 | |||
64 | nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); | ||
65 | |||
66 | if (ctx->mem.gpu_va == 0) { | ||
67 | ret = nvgpu_dma_alloc_flags_sys(g, | ||
68 | 0, /* No Special flags */ | ||
69 | ctxsw_prog_fecs_header_v(), | ||
70 | &ctx->mem); | ||
71 | if (ret) { | ||
72 | nvgpu_err(g, "failed to allocate sub ctx header"); | ||
73 | return ret; | ||
74 | } | ||
75 | ctx->mem.gpu_va = nvgpu_gmmu_map(c->vm, | ||
76 | &ctx->mem, | ||
77 | ctx->mem.size, | ||
78 | 0, /* not GPU-cacheable */ | ||
79 | gk20a_mem_flag_none, true, | ||
80 | ctx->mem.aperture); | ||
81 | if (!ctx->mem.gpu_va) { | ||
82 | nvgpu_err(g, "failed to map ctx header"); | ||
83 | nvgpu_dma_free(g, &ctx->mem); | ||
84 | return -ENOMEM; | ||
85 | } | ||
86 | /* Now clear the buffer */ | ||
87 | if (nvgpu_mem_begin(g, &ctx->mem)) | ||
88 | return -ENOMEM; | ||
89 | |||
90 | nvgpu_memset(g, &ctx->mem, 0, 0, ctx->mem.size); | ||
91 | nvgpu_mem_end(g, &ctx->mem); | ||
92 | |||
93 | gv11b_init_subcontext_pdb(c, &c->inst_block); | ||
94 | } | ||
95 | return ret; | ||
96 | } | ||
97 | |||
98 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | ||
99 | struct nvgpu_mem *inst_block) | ||
100 | { | ||
101 | struct gk20a *g = c->g; | ||
102 | |||
103 | gv11b_subctx_commit_pdb(c, inst_block); | ||
104 | gv11b_subctx_commit_valid_mask(c, inst_block); | ||
105 | |||
106 | nvgpu_log(g, gpu_dbg_info, " subctx %d instblk set", c->t19x.subctx_id); | ||
107 | nvgpu_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), | ||
108 | ram_in_engine_wfi_veid_f(c->t19x.subctx_id)); | ||
109 | |||
110 | } | ||
111 | |||
112 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) | ||
113 | { | ||
114 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
115 | struct nvgpu_mem *gr_mem; | ||
116 | struct gk20a *g = c->g; | ||
117 | int ret = 0; | ||
118 | u32 addr_lo, addr_hi; | ||
119 | |||
120 | addr_lo = u64_lo32(gpu_va); | ||
121 | addr_hi = u64_hi32(gpu_va); | ||
122 | |||
123 | gr_mem = &ctx->mem; | ||
124 | g->ops.mm.l2_flush(g, true); | ||
125 | if (nvgpu_mem_begin(g, gr_mem)) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | nvgpu_mem_wr(g, gr_mem, | ||
129 | ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); | ||
130 | nvgpu_mem_wr(g, gr_mem, | ||
131 | ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo); | ||
132 | |||
133 | nvgpu_mem_wr(g, gr_mem, | ||
134 | ctxsw_prog_main_image_ctl_o(), | ||
135 | ctxsw_prog_main_image_ctl_type_per_veid_header_v()); | ||
136 | nvgpu_mem_end(g, gr_mem); | ||
137 | return ret; | ||
138 | } | ||
139 | |||
140 | void gv11b_subctx_commit_valid_mask(struct channel_gk20a *c, | ||
141 | struct nvgpu_mem *inst_block) | ||
142 | { | ||
143 | struct gk20a *g = c->g; | ||
144 | |||
145 | /* Make all subctx pdbs valid */ | ||
146 | nvgpu_mem_wr32(g, inst_block, 166, 0xffffffff); | ||
147 | nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); | ||
148 | } | ||
149 | |||
150 | void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | ||
151 | struct nvgpu_mem *inst_block) | ||
152 | { | ||
153 | struct gk20a *g = c->g; | ||
154 | struct fifo_gk20a *f = &g->fifo; | ||
155 | struct vm_gk20a *vm = c->vm; | ||
156 | u32 lo, hi; | ||
157 | u32 subctx_id = 0; | ||
158 | u32 format_word; | ||
159 | u32 pdb_addr_lo, pdb_addr_hi; | ||
160 | u64 pdb_addr; | ||
161 | u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, | ||
162 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), | ||
163 | ram_in_sc_page_dir_base_target_vid_mem_v()); | ||
164 | |||
165 | pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); | ||
166 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
167 | pdb_addr_hi = u64_hi32(pdb_addr); | ||
168 | format_word = ram_in_sc_page_dir_base_target_f( | ||
169 | aperture, 0) | | ||
170 | ram_in_sc_page_dir_base_vol_f( | ||
171 | ram_in_sc_page_dir_base_vol_true_v(), 0) | | ||
172 | ram_in_sc_page_dir_base_fault_replay_tex_f(1, 0) | | ||
173 | ram_in_sc_page_dir_base_fault_replay_gcc_f(1, 0) | | ||
174 | ram_in_sc_use_ver2_pt_format_f(1, 0) | | ||
175 | ram_in_sc_big_page_size_f(1, 0) | | ||
176 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | ||
177 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", | ||
178 | format_word, pdb_addr_hi); | ||
179 | for (subctx_id = 0; subctx_id < f->t19x.max_subctx_count; subctx_id++) { | ||
180 | lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); | ||
181 | hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); | ||
182 | nvgpu_mem_wr32(g, inst_block, lo, format_word); | ||
183 | nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); | ||
184 | } | ||
185 | } | ||