diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2018-07-06 11:50:36 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-07-09 17:40:50 -0400 |
commit | 0ddd219697155bcb64aaa04544108519686e16cc (patch) | |
tree | e33da070ae0a486bca5e0510b2c5a24915d16187 | |
parent | bbebc611bc10a824d5d51fc2ea9d0408e350d26a (diff) |
gpu: nvgpu: Conditional enable for replayable fault
Enable replayable fault only for contexts where they are requested.
This required moving the code to initialize subcontexts to happen
later.
Fix signedness issues in definition of flags.
JIRA NVGPU-714
Change-Id: I472004e13b1ea46c1bd202f9b12d2ce221b756f9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1773262
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/mm_gv11b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | 3 |
5 files changed, 34 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 78325019..9f737192 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -42,18 +42,18 @@ struct fifo_profile_gk20a; | |||
42 | #include "fence_gk20a.h" | 42 | #include "fence_gk20a.h" |
43 | 43 | ||
44 | /* Flags to be passed to gk20a_channel_alloc_gpfifo() */ | 44 | /* Flags to be passed to gk20a_channel_alloc_gpfifo() */ |
45 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1 << 0) | 45 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1U << 0U) |
46 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) | 46 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1U << 1U) |
47 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) | 47 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1U << 2U) |
48 | #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3) | 48 | #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1U << 3U) |
49 | 49 | ||
50 | /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ | 50 | /* Flags to be passed to nvgpu_submit_channel_gpfifo() */ |
51 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1 << 0) | 51 | #define NVGPU_SUBMIT_FLAGS_FENCE_WAIT (1U << 0U) |
52 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1 << 1) | 52 | #define NVGPU_SUBMIT_FLAGS_FENCE_GET (1U << 1U) |
53 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1 << 2) | 53 | #define NVGPU_SUBMIT_FLAGS_HW_FORMAT (1U << 2U) |
54 | #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1 << 3) | 54 | #define NVGPU_SUBMIT_FLAGS_SYNC_FENCE (1U << 3U) |
55 | #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1 << 4) | 55 | #define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI (1U << 4U) |
56 | #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) | 56 | #define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING (1U << 5U) |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * The binary format of 'struct nvgpu_channel_fence' introduced here | 59 | * The binary format of 'struct nvgpu_channel_fence' introduced here |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 75ff9525..4edaaac1 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -138,11 +138,17 @@ int channel_gv11b_setup_ramfc(struct channel_gk20a *c, | |||
138 | struct gk20a *g = c->g; | 138 | struct gk20a *g = c->g; |
139 | struct nvgpu_mem *mem = &c->inst_block; | 139 | struct nvgpu_mem *mem = &c->inst_block; |
140 | u32 data; | 140 | u32 data; |
141 | bool replayable = false; | ||
141 | 142 | ||
142 | nvgpu_log_fn(g, " "); | 143 | nvgpu_log_fn(g, " "); |
143 | 144 | ||
144 | nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v()); | 145 | nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v()); |
145 | 146 | ||
147 | if ((flags & NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0) { | ||
148 | replayable = true; | ||
149 | } | ||
150 | gv11b_init_subcontext_pdb(c->vm, mem, replayable); | ||
151 | |||
146 | nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), | 152 | nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), |
147 | pbdma_gp_base_offset_f( | 153 | pbdma_gp_base_offset_f( |
148 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | 154 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); |
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index e37ea158..5dd43c34 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c | |||
@@ -61,7 +61,7 @@ void gv11b_init_inst_block(struct nvgpu_mem *inst_block, | |||
61 | if (big_page_size && g->ops.mm.set_big_page_size) | 61 | if (big_page_size && g->ops.mm.set_big_page_size) |
62 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); | 62 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); |
63 | 63 | ||
64 | gv11b_init_subcontext_pdb(vm, inst_block); | 64 | gv11b_init_subcontext_pdb(vm, inst_block, false); |
65 | } | 65 | } |
66 | 66 | ||
67 | bool gv11b_mm_mmu_fault_pending(struct gk20a *g) | 67 | bool gv11b_mm_mmu_fault_pending(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index b0bcb585..8f12bbe5 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -37,7 +37,8 @@ | |||
37 | static void gv11b_subctx_commit_valid_mask(struct vm_gk20a *vm, | 37 | static void gv11b_subctx_commit_valid_mask(struct vm_gk20a *vm, |
38 | struct nvgpu_mem *inst_block); | 38 | struct nvgpu_mem *inst_block); |
39 | static void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, | 39 | static void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, |
40 | struct nvgpu_mem *inst_block); | 40 | struct nvgpu_mem *inst_block, |
41 | bool replayable); | ||
41 | 42 | ||
42 | void gv11b_free_subctx_header(struct channel_gk20a *c) | 43 | void gv11b_free_subctx_header(struct channel_gk20a *c) |
43 | { | 44 | { |
@@ -84,9 +85,10 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) | |||
84 | } | 85 | } |
85 | 86 | ||
86 | void gv11b_init_subcontext_pdb(struct vm_gk20a *vm, | 87 | void gv11b_init_subcontext_pdb(struct vm_gk20a *vm, |
87 | struct nvgpu_mem *inst_block) | 88 | struct nvgpu_mem *inst_block, |
89 | bool replayable) | ||
88 | { | 90 | { |
89 | gv11b_subctx_commit_pdb(vm, inst_block); | 91 | gv11b_subctx_commit_pdb(vm, inst_block, replayable); |
90 | gv11b_subctx_commit_valid_mask(vm, inst_block); | 92 | gv11b_subctx_commit_valid_mask(vm, inst_block); |
91 | 93 | ||
92 | } | 94 | } |
@@ -157,8 +159,9 @@ void gv11b_subctx_commit_valid_mask(struct vm_gk20a *vm, | |||
157 | nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); | 159 | nvgpu_mem_wr32(g, inst_block, 167, 0xffffffff); |
158 | } | 160 | } |
159 | 161 | ||
160 | void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, | 162 | static void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, |
161 | struct nvgpu_mem *inst_block) | 163 | struct nvgpu_mem *inst_block, |
164 | bool replayable) | ||
162 | { | 165 | { |
163 | struct gk20a *g = gk20a_from_vm(vm); | 166 | struct gk20a *g = gk20a_from_vm(vm); |
164 | u32 lo, hi; | 167 | u32 lo, hi; |
@@ -179,11 +182,16 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, | |||
179 | aperture, 0) | | 182 | aperture, 0) | |
180 | ram_in_sc_page_dir_base_vol_f( | 183 | ram_in_sc_page_dir_base_vol_f( |
181 | ram_in_sc_page_dir_base_vol_true_v(), 0) | | 184 | ram_in_sc_page_dir_base_vol_true_v(), 0) | |
182 | ram_in_sc_page_dir_base_fault_replay_tex_f(1, 0) | | ||
183 | ram_in_sc_page_dir_base_fault_replay_gcc_f(1, 0) | | ||
184 | ram_in_sc_use_ver2_pt_format_f(1, 0) | | 185 | ram_in_sc_use_ver2_pt_format_f(1, 0) | |
185 | ram_in_sc_big_page_size_f(1, 0) | | 186 | ram_in_sc_big_page_size_f(1, 0) | |
186 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); | 187 | ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); |
188 | |||
189 | if (replayable) { | ||
190 | format_word |= | ||
191 | ram_in_sc_page_dir_base_fault_replay_tex_f(1, 0) | | ||
192 | ram_in_sc_page_dir_base_fault_replay_gcc_f(1, 0); | ||
193 | } | ||
194 | |||
187 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", | 195 | nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", |
188 | format_word, pdb_addr_hi); | 196 | format_word, pdb_addr_hi); |
189 | for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) { | 197 | for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) { |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h index 3dcb58b3..71d56502 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.h | |||
@@ -32,6 +32,7 @@ void gv11b_free_subctx_header(struct channel_gk20a *c); | |||
32 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); | 32 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va); |
33 | 33 | ||
34 | void gv11b_init_subcontext_pdb(struct vm_gk20a *vm, | 34 | void gv11b_init_subcontext_pdb(struct vm_gk20a *vm, |
35 | struct nvgpu_mem *inst_block); | 35 | struct nvgpu_mem *inst_block, |
36 | bool replayable); | ||
36 | 37 | ||
37 | #endif /* __SUBCONTEXT_GV11B_H__ */ | 38 | #endif /* __SUBCONTEXT_GV11B_H__ */ |