diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 42 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 147 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | 27 |
5 files changed, 228 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 4ea56d8e..c059e464 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -13,6 +13,7 @@ nvgpu-y += \ | |||
13 | $(nvgpu-t19x)/gv11b/ce_gv11b.o \ | 13 | $(nvgpu-t19x)/gv11b/ce_gv11b.o \ |
14 | $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \ | 14 | $(nvgpu-t19x)/gv11b/gr_ctx_gv11b.o \ |
15 | $(nvgpu-t19x)/gv11b/pmu_gv11b.o \ | 15 | $(nvgpu-t19x)/gv11b/pmu_gv11b.o \ |
16 | $(nvgpu-t19x)/gv11b/therm_gv11b.o | 16 | $(nvgpu-t19x)/gv11b/therm_gv11b.o \ |
17 | $(nvgpu-t19x)/gv11b/subctx_gv11b.o | ||
17 | 18 | ||
18 | nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o | 19 | nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t19x)/gv11b/platform_gv11b_tegra.o |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index bc413a9e..b9276e09 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include "gp10b/fifo_gp10b.h" | 19 | #include "gp10b/fifo_gp10b.h" |
20 | #include "hw_pbdma_gv11b.h" | 20 | #include "hw_pbdma_gv11b.h" |
21 | #include "fifo_gv11b.h" | 21 | #include "fifo_gv11b.h" |
22 | #include "subctx_gv11b.h" | ||
23 | #include "gr_gv11b.h" | ||
22 | #include "hw_fifo_gv11b.h" | 24 | #include "hw_fifo_gv11b.h" |
23 | #include "hw_ram_gv11b.h" | 25 | #include "hw_ram_gv11b.h" |
24 | #include "hw_ccsr_gv11b.h" | 26 | #include "hw_ccsr_gv11b.h" |
@@ -200,6 +202,15 @@ static void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c) | |||
200 | 202 | ||
201 | } | 203 | } |
202 | 204 | ||
205 | static void channel_gv11b_unbind(struct channel_gk20a *ch) | ||
206 | { | ||
207 | gk20a_dbg_fn(""); | ||
208 | |||
209 | gv11b_free_subctx_header(ch); | ||
210 | |||
211 | channel_gk20a_unbind(ch); | ||
212 | |||
213 | } | ||
203 | 214 | ||
204 | static u32 gv11b_fifo_get_num_fifos(struct gk20a *g) | 215 | static u32 gv11b_fifo_get_num_fifos(struct gk20a *g) |
205 | { | 216 | { |
@@ -218,4 +229,5 @@ void gv11b_init_fifo(struct gpu_ops *gops) | |||
218 | gops->fifo.userd_gp_get = gv11b_userd_gp_get; | 229 | gops->fifo.userd_gp_get = gv11b_userd_gp_get; |
219 | gops->fifo.userd_gp_put = gv11b_userd_gp_put; | 230 | gops->fifo.userd_gp_put = gv11b_userd_gp_put; |
220 | gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc; | 231 | gops->fifo.setup_ramfc = channel_gv11b_setup_ramfc; |
232 | gops->fifo.unbind_channel = channel_gv11b_unbind; | ||
221 | } | 233 | } |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f5b8d3f..bdb96329 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -13,6 +13,7 @@ | |||
13 | * more details. | 13 | * more details. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/tegra_gpu_t19x.h> | ||
16 | #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ | 17 | #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ |
17 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
18 | #include <linux/tegra-fuse.h> | 19 | #include <linux/tegra-fuse.h> |
@@ -24,12 +25,16 @@ | |||
24 | 25 | ||
25 | #include "gm20b/gr_gm20b.h" | 26 | #include "gm20b/gr_gm20b.h" |
26 | #include "gv11b/gr_gv11b.h" | 27 | #include "gv11b/gr_gv11b.h" |
28 | #include "gv11b/mm_gv11b.h" | ||
29 | #include "gv11b/subctx_gv11b.h" | ||
27 | #include "hw_gr_gv11b.h" | 30 | #include "hw_gr_gv11b.h" |
28 | #include "hw_fifo_gv11b.h" | 31 | #include "hw_fifo_gv11b.h" |
29 | #include "hw_proj_gv11b.h" | 32 | #include "hw_proj_gv11b.h" |
30 | #include "hw_ctxsw_prog_gv11b.h" | 33 | #include "hw_ctxsw_prog_gv11b.h" |
31 | #include "hw_mc_gv11b.h" | 34 | #include "hw_mc_gv11b.h" |
32 | #include "hw_gr_gv11b.h" | 35 | #include "hw_gr_gv11b.h" |
36 | #include "hw_ram_gv11b.h" | ||
37 | #include "hw_pbdma_gv11b.h" | ||
33 | #include <linux/vmalloc.h> | 38 | #include <linux/vmalloc.h> |
34 | #include <linux/tegra_gpu_t19x.h> | 39 | #include <linux/tegra_gpu_t19x.h> |
35 | 40 | ||
@@ -1583,7 +1588,6 @@ static int gr_gv11b_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) | |||
1583 | return 0; | 1588 | return 0; |
1584 | } | 1589 | } |
1585 | 1590 | ||
1586 | |||
1587 | static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) | 1591 | static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) |
1588 | { | 1592 | { |
1589 | struct av_list_gk20a *sw_veid_bundle_init = | 1593 | struct av_list_gk20a *sw_veid_bundle_init = |
@@ -1766,12 +1770,43 @@ static int gr_gv11b_load_smid_config(struct gk20a *g) | |||
1766 | 1770 | ||
1767 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | 1771 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) |
1768 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | 1772 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); |
1769 | |||
1770 | kfree(tpc_sm_id); | 1773 | kfree(tpc_sm_id); |
1771 | 1774 | ||
1772 | return 0; | 1775 | return 0; |
1773 | } | 1776 | } |
1774 | 1777 | ||
1778 | static int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) | ||
1779 | { | ||
1780 | u32 addr_lo; | ||
1781 | u32 addr_hi; | ||
1782 | struct ctx_header_desc *ctx; | ||
1783 | |||
1784 | gk20a_dbg_fn(""); | ||
1785 | |||
1786 | gv11b_alloc_subctx_header(c); | ||
1787 | |||
1788 | gv11b_update_subctx_header(c, gpu_va); | ||
1789 | |||
1790 | ctx = &c->ch_ctx.ctx_header; | ||
1791 | addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); | ||
1792 | addr_hi = u64_hi32(ctx->mem.gpu_va); | ||
1793 | |||
1794 | /* point this address to engine_wfi_ptr */ | ||
1795 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_target_w(), | ||
1796 | ram_in_engine_cs_wfi_v() | | ||
1797 | ram_in_engine_wfi_target_f( | ||
1798 | ram_in_engine_wfi_target_sys_mem_ncoh_v()) | | ||
1799 | ram_in_engine_wfi_mode_f(ram_in_engine_wfi_mode_virtual_v()) | | ||
1800 | ram_in_engine_wfi_ptr_lo_f(addr_lo)); | ||
1801 | |||
1802 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_engine_wfi_ptr_hi_w(), | ||
1803 | ram_in_engine_wfi_ptr_hi_f(addr_hi)); | ||
1804 | |||
1805 | return 0; | ||
1806 | } | ||
1807 | |||
1808 | |||
1809 | |||
1775 | static int gr_gv11b_commit_global_timeslice(struct gk20a *g, | 1810 | static int gr_gv11b_commit_global_timeslice(struct gk20a *g, |
1776 | struct channel_gk20a *c, bool patch) | 1811 | struct channel_gk20a *c, bool patch) |
1777 | { | 1812 | { |
@@ -1828,6 +1863,7 @@ static int gr_gv11b_commit_global_timeslice(struct gk20a *g, | |||
1828 | void gv11b_init_gr(struct gpu_ops *gops) | 1863 | void gv11b_init_gr(struct gpu_ops *gops) |
1829 | { | 1864 | { |
1830 | gp10b_init_gr(gops); | 1865 | gp10b_init_gr(gops); |
1866 | gops->gr.init_preemption_state = NULL; | ||
1831 | gops->gr.init_fs_state = gr_gv11b_init_fs_state; | 1867 | gops->gr.init_fs_state = gr_gv11b_init_fs_state; |
1832 | gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; | 1868 | gops->gr.detect_sm_arch = gr_gv11b_detect_sm_arch; |
1833 | gops->gr.is_valid_class = gr_gv11b_is_valid_class; | 1869 | gops->gr.is_valid_class = gr_gv11b_is_valid_class; |
@@ -1872,4 +1908,6 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
1872 | gops->gr.load_smid_config = gr_gv11b_load_smid_config; | 1908 | gops->gr.load_smid_config = gr_gv11b_load_smid_config; |
1873 | gops->gr.program_sm_id_numbering = | 1909 | gops->gr.program_sm_id_numbering = |
1874 | gr_gv11b_program_sm_id_numbering; | 1910 | gr_gv11b_program_sm_id_numbering; |
1911 | gops->gr.commit_inst = gr_gv11b_commit_inst; | ||
1912 | |||
1875 | } | 1913 | } |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c new file mode 100644 index 00000000..3acc53f6 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * Volta GPU series Subcontext | ||
3 | * | ||
4 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program. | ||
17 | */ | ||
18 | |||
19 | #include "gk20a/gk20a.h" | ||
20 | #include "gk20a/semaphore_gk20a.h" | ||
21 | #include "gv11b/subctx_gv11b.h" | ||
22 | #include "gv11b/hw_ram_gv11b.h" | ||
23 | #include "gv11b/hw_ctxsw_prog_gv11b.h" | ||
24 | |||
25 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | ||
26 | struct mem_desc *inst_block); | ||
27 | |||
28 | void gv11b_free_subctx_header(struct channel_gk20a *c) | ||
29 | { | ||
30 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
31 | struct gk20a *g = c->g; | ||
32 | |||
33 | gk20a_dbg_fn(""); | ||
34 | |||
35 | if (ctx->mem.gpu_va) { | ||
36 | gk20a_gmmu_unmap(c->vm, ctx->mem.gpu_va, | ||
37 | ctx->mem.size, gk20a_mem_flag_none); | ||
38 | |||
39 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &ctx->mem); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | int gv11b_alloc_subctx_header(struct channel_gk20a *c) | ||
44 | { | ||
45 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
46 | struct gk20a *g = c->g; | ||
47 | int ret = 0; | ||
48 | |||
49 | gk20a_dbg_fn(""); | ||
50 | |||
51 | if (ctx->mem.gpu_va == 0) { | ||
52 | ret = gk20a_gmmu_alloc_attr_sys(g, | ||
53 | DMA_ATTR_NO_KERNEL_MAPPING, | ||
54 | ctxsw_prog_fecs_header_v(), | ||
55 | &ctx->mem); | ||
56 | if (ret) { | ||
57 | gk20a_err(dev_from_gk20a(g), | ||
58 | "failed to allocate sub ctx header"); | ||
59 | return ret; | ||
60 | } | ||
61 | ctx->mem.gpu_va = gk20a_gmmu_map(c->vm, | ||
62 | &ctx->mem.sgt, | ||
63 | ctx->mem.size, | ||
64 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
65 | gk20a_mem_flag_none, true, | ||
66 | ctx->mem.aperture); | ||
67 | if (!ctx->mem.gpu_va) { | ||
68 | gk20a_err(dev_from_gk20a(g), | ||
69 | "failed to map ctx header"); | ||
70 | gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, | ||
71 | &ctx->mem); | ||
72 | return -ENOMEM; | ||
73 | } | ||
74 | /* Now clear the buffer */ | ||
75 | if (gk20a_mem_begin(g, &ctx->mem)) | ||
76 | return -ENOMEM; | ||
77 | |||
78 | gk20a_memset(g, &ctx->mem, 0, 0, ctx->mem.size); | ||
79 | gk20a_mem_end(g, &ctx->mem); | ||
80 | |||
81 | gv11b_init_subcontext_pdb(c, &c->inst_block); | ||
82 | |||
83 | } | ||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | ||
88 | struct mem_desc *inst_block) | ||
89 | { | ||
90 | struct gk20a *g = c->g; | ||
91 | struct vm_gk20a *vm; | ||
92 | u64 pdb_addr, pdb_addr_lo, pdb_addr_hi; | ||
93 | u32 format_word; | ||
94 | u32 lo, hi; | ||
95 | |||
96 | gk20a_dbg_fn(""); | ||
97 | /* load main pdb as veid0 pdb also */ | ||
98 | vm = c->vm; | ||
99 | pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | ||
100 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
101 | pdb_addr_hi = u64_hi32(pdb_addr); | ||
102 | format_word = ram_in_sc_page_dir_base_target_f( | ||
103 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 0) | | ||
104 | ram_in_sc_page_dir_base_vol_f( | ||
105 | ram_in_sc_page_dir_base_vol_true_v(), 0) | | ||
106 | ram_in_sc_page_dir_base_fault_replay_tex_f(0, 0) | | ||
107 | ram_in_sc_page_dir_base_fault_replay_gcc_f(0, 0) | | ||
108 | ram_in_sc_use_ver2_pt_format_f(1, 0) | | ||
109 | ram_in_sc_big_page_size_f(1, 0) | | ||
110 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | ||
111 | lo = ram_in_sc_page_dir_base_vol_0_w(); | ||
112 | hi = ram_in_sc_page_dir_base_hi_0_w(); | ||
113 | gk20a_mem_wr32(g, inst_block, lo, format_word); | ||
114 | gk20a_mem_wr32(g, inst_block, hi, pdb_addr_hi); | ||
115 | |||
116 | /* make subcontext0 address space to valid */ | ||
117 | /* TODO fix proper hw register definations */ | ||
118 | gk20a_mem_wr32(g, inst_block, 166, 0x1); | ||
119 | gk20a_mem_wr32(g, inst_block, 167, 0); | ||
120 | gk20a_mem_wr32(g, inst_block, ram_in_engine_wfi_veid_w(), | ||
121 | ram_in_engine_wfi_veid_f(0)); | ||
122 | |||
123 | } | ||
124 | |||
125 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) | ||
126 | { | ||
127 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
128 | struct mem_desc *gr_mem; | ||
129 | struct gk20a *g = c->g; | ||
130 | int ret = 0; | ||
131 | u32 addr_lo, addr_hi; | ||
132 | |||
133 | addr_lo = u64_lo32(gpu_va); | ||
134 | addr_hi = u64_hi32(gpu_va); | ||
135 | |||
136 | gr_mem = &ctx->mem; | ||
137 | g->ops.mm.l2_flush(g, true); | ||
138 | if (gk20a_mem_begin(g, gr_mem)) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | gk20a_mem_wr(g, gr_mem, | ||
142 | ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); | ||
143 | gk20a_mem_wr(g, gr_mem, | ||
144 | ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo); | ||
145 | gk20a_mem_end(g, gr_mem); | ||
146 | return ret; | ||
147 | } | ||
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h new file mode 100644 index 00000000..357cd254 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Volta GPU series Subcontext | ||
4 | * | ||
5 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program. | ||
18 | */ | ||
19 | #ifndef __SUBCONTEXT_GV11B_H__ | ||
20 | #define __SUBCONTEXT_GV11B_H__ | ||
21 | |||
22 | int gv11b_alloc_subctx_header(struct channel_gk20a *c); | ||
23 | |||
24 | void gv11b_free_subctx_header(struct channel_gk20a *c); | ||
25 | |||
26 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); | ||
27 | #endif /* __SUBCONTEXT_GV11B_H__ */ | ||