diff options
author | Richard Zhao <rizhao@nvidia.com> | 2017-08-10 19:34:16 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-08-11 14:08:41 -0400 |
commit | 3197a918d5052c71ad854f6b22fdb35bfe7cebe2 (patch) | |
tree | 887f155d104926d38d74583739bacf57a57c040b | |
parent | de8e057f7eebcfe676278826ab457bf86b1b36fd (diff) |
gpu: nvgpu: gv11b: add max_subctx_count to g->fifo.t19x
- For better performance. It used to read register every time referencing
max_subctx_count.
- Avoid reading registers for vgpu.
Jira VFND-3797
Change-Id: Id6e6b15a0d9a035795e8a9a2c6bb63524c5eb544
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1537009
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/fifo_t19x.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | 1 |
6 files changed, 11 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c index bf6088ab..b0b1f9c4 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_tsg_t19x.c | |||
@@ -44,7 +44,7 @@ static int gv11b_tsg_ioctl_bind_channel_ex(struct gk20a *g, | |||
44 | ch = gk20a_get_channel_from_file(arg->channel_fd); | 44 | ch = gk20a_get_channel_from_file(arg->channel_fd); |
45 | if (!ch) | 45 | if (!ch) |
46 | return -EINVAL; | 46 | return -EINVAL; |
47 | if (arg->subcontext_id < gv11b_get_max_subctx_count(g)) | 47 | if (arg->subcontext_id < g->fifo.t19x.max_subctx_count) |
48 | ch->t19x.subctx_id = arg->subcontext_id; | 48 | ch->t19x.subctx_id = arg->subcontext_id; |
49 | else | 49 | else |
50 | return -EINVAL; | 50 | return -EINVAL; |
diff --git a/drivers/gpu/nvgpu/fifo_t19x.h b/drivers/gpu/nvgpu/fifo_t19x.h index 531c6f34..25d5f41d 100644 --- a/drivers/gpu/nvgpu/fifo_t19x.h +++ b/drivers/gpu/nvgpu/fifo_t19x.h | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | struct fifo_t19x { | 17 | struct fifo_t19x { |
18 | void __iomem *usermode_regs; | 18 | void __iomem *usermode_regs; |
19 | u32 max_subctx_count; | ||
19 | }; | 20 | }; |
20 | 21 | ||
21 | #endif | 22 | #endif |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index e210d40d..bd769f75 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <nvgpu/hw/gv11b/hw_top_gv11b.h> | 40 | #include <nvgpu/hw/gv11b/hw_top_gv11b.h> |
41 | #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> | 41 | #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> |
42 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | 42 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> |
43 | #include <nvgpu/hw/gv11b/hw_gr_gv11b.h> | ||
43 | 44 | ||
44 | #include "fifo_gv11b.h" | 45 | #include "fifo_gv11b.h" |
45 | #include "subctx_gv11b.h" | 46 | #include "subctx_gv11b.h" |
@@ -1717,16 +1718,20 @@ int gv11b_init_fifo_setup_hw(struct gk20a *g) | |||
1717 | struct fifo_gk20a *f = &g->fifo; | 1718 | struct fifo_gk20a *f = &g->fifo; |
1718 | 1719 | ||
1719 | f->t19x.usermode_regs = g->regs + usermode_cfg0_r(); | 1720 | f->t19x.usermode_regs = g->regs + usermode_cfg0_r(); |
1721 | f->t19x.max_subctx_count = | ||
1722 | gr_pri_fe_chip_def_info_max_veid_count_v( | ||
1723 | gk20a_readl(g, gr_pri_fe_chip_def_info_r())); | ||
1720 | return 0; | 1724 | return 0; |
1721 | } | 1725 | } |
1722 | 1726 | ||
1723 | static u32 gv11b_mmu_fault_id_to_gr_veid(struct gk20a *g, u32 gr_eng_fault_id, | 1727 | static u32 gv11b_mmu_fault_id_to_gr_veid(struct gk20a *g, u32 gr_eng_fault_id, |
1724 | u32 mmu_fault_id) | 1728 | u32 mmu_fault_id) |
1725 | { | 1729 | { |
1730 | struct fifo_gk20a *f = &g->fifo; | ||
1726 | u32 num_subctx; | 1731 | u32 num_subctx; |
1727 | u32 veid = FIFO_INVAL_VEID; | 1732 | u32 veid = FIFO_INVAL_VEID; |
1728 | 1733 | ||
1729 | num_subctx = gv11b_get_max_subctx_count(g); | 1734 | num_subctx = f->t19x.max_subctx_count; |
1730 | 1735 | ||
1731 | if (mmu_fault_id >= gr_eng_fault_id && | 1736 | if (mmu_fault_id >= gr_eng_fault_id && |
1732 | mmu_fault_id < (gr_eng_fault_id + num_subctx)) | 1737 | mmu_fault_id < (gr_eng_fault_id + num_subctx)) |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 850315f7..b95152eb 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -2021,7 +2021,7 @@ static void gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) | |||
2021 | u32 j; | 2021 | u32 j; |
2022 | u32 num_subctx, err = 0; | 2022 | u32 num_subctx, err = 0; |
2023 | 2023 | ||
2024 | num_subctx = gv11b_get_max_subctx_count(g); | 2024 | num_subctx = g->fifo.t19x.max_subctx_count; |
2025 | 2025 | ||
2026 | for (j = 0; j < num_subctx; j++) { | 2026 | for (j = 0; j < num_subctx; j++) { |
2027 | 2027 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 72a66530..4f64843c 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -26,7 +26,6 @@ | |||
26 | 26 | ||
27 | #include <nvgpu/hw/gv11b/hw_ram_gv11b.h> | 27 | #include <nvgpu/hw/gv11b/hw_ram_gv11b.h> |
28 | #include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h> | 28 | #include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h> |
29 | #include <nvgpu/hw/gv11b/hw_gr_gv11b.h> | ||
30 | 29 | ||
31 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | 30 | static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, |
32 | struct nvgpu_mem *inst_block); | 31 | struct nvgpu_mem *inst_block); |
@@ -143,6 +142,7 @@ void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | |||
143 | struct nvgpu_mem *inst_block) | 142 | struct nvgpu_mem *inst_block) |
144 | { | 143 | { |
145 | struct gk20a *g = c->g; | 144 | struct gk20a *g = c->g; |
145 | struct fifo_gk20a *f = &g->fifo; | ||
146 | struct vm_gk20a *vm = c->vm; | 146 | struct vm_gk20a *vm = c->vm; |
147 | u32 lo, hi; | 147 | u32 lo, hi; |
148 | u32 subctx_id = 0; | 148 | u32 subctx_id = 0; |
@@ -164,19 +164,10 @@ void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | |||
164 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | 164 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); |
165 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", | 165 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", |
166 | format_word, pdb_addr_hi); | 166 | format_word, pdb_addr_hi); |
167 | for (subctx_id = 0; subctx_id < gv11b_get_max_subctx_count(g); | 167 | for (subctx_id = 0; subctx_id < f->t19x.max_subctx_count; subctx_id++) { |
168 | subctx_id++) { | ||
169 | lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); | 168 | lo = ram_in_sc_page_dir_base_vol_0_w() + (4 * subctx_id); |
170 | hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); | 169 | hi = ram_in_sc_page_dir_base_hi_0_w() + (4 * subctx_id); |
171 | nvgpu_mem_wr32(g, inst_block, lo, format_word); | 170 | nvgpu_mem_wr32(g, inst_block, lo, format_word); |
172 | nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); | 171 | nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); |
173 | } | 172 | } |
174 | } | 173 | } |
175 | |||
176 | |||
177 | u32 gv11b_get_max_subctx_count(struct gk20a *g) | ||
178 | { | ||
179 | u32 data = gk20a_readl(g, gr_pri_fe_chip_def_info_r()); | ||
180 | |||
181 | return gr_pri_fe_chip_def_info_max_veid_count_v(data); | ||
182 | } | ||
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h index 5e4e99f5..d199711d 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | |||
@@ -25,5 +25,4 @@ void gv11b_free_subctx_header(struct channel_gk20a *c); | |||
25 | 25 | ||
26 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); | 26 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); |
27 | 27 | ||
28 | u32 gv11b_get_max_subctx_count(struct gk20a *g); | ||
29 | #endif /* __SUBCONTEXT_GV11B_H__ */ | 28 | #endif /* __SUBCONTEXT_GV11B_H__ */ |