diff options
author | Seema Khowala <seemaj@nvidia.com> | 2017-03-07 15:16:07 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-08 14:35:48 -0400 |
commit | 2f6d321390a98ace1e52f21c6a399e06b3fe71e7 (patch) | |
tree | 70ab64bacd4b7a66a13a3a27f303f470175dad92 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |
parent | aa05648fd6038b69d1ed841f33b24cf1875efd83 (diff) |
gpu: nvgpu: gv11b: add mmu fault handling
HUB reports following memory sub-system interrupts:
a) ACCESS_COUNTER_NOTIFY:
GET != PUT for access counter notify buffer
b) ACCESS_COUNTER_ERROR:
HUB received a NACK (BAR2 fault) when writing the
notify buffer out to memory
c) MMU_ECC_UNCORRECTED_ERROR_NOTIFY:
Uncorrected ECC error detected by HUB MMU
d) MMU_REPLAYABLE_FAULT_NOTIFY:
GET != PUT for replayable fault buffer
e) MMU_REPLAYABLE_FAULT_OVERFLOW:
Overflow when writing to the replayable fault buffer
f) MMU_NONREPLAYABLE_FAULT_NOTIFY:
GET != PUT for non-replayable fault buffer
g) MMU_NONREPLAYABLE_FAULT_OVERFLOW:
Overflow when writing to the non-replayable fault buffer
h) MMU_OTHER_FAULT_NOTIFY: All other fault notifications from MMU
This change is to :
-Detect other fault notify
-Copy fault info from fault snap register for other fault notify
interrupt
-Detect and handle nonreplay/replay fault notify and fault overflow
-Copy fault info from fault buffer for nonreplay/replay fault
-Print fault info
JIRA GPUT19X-7
JIRA GPUT19X-12
Change-Id: Ifa08a4ebcd119a7d81c2eae3f52dc825d1ce3898
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master/r/1493394
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fb_gv11b.c | 748 |
1 files changed, 739 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c index da7c7d4a..6d1fbca9 100644 --- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |||
@@ -23,7 +23,9 @@ | |||
23 | 23 | ||
24 | #include "gp10b/fb_gp10b.h" | 24 | #include "gp10b/fb_gp10b.h" |
25 | 25 | ||
26 | #include "gv11b/fifo_gv11b.h" | ||
26 | #include "gv11b/fb_gv11b.h" | 27 | #include "gv11b/fb_gv11b.h" |
28 | #include "gv11b/ce_gv11b.h" | ||
27 | 29 | ||
28 | #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> | 30 | #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> |
29 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> | 31 | #include <nvgpu/hw/gv11b/hw_fb_gv11b.h> |
@@ -160,6 +162,89 @@ static void gv11b_fb_reset(struct gk20a *g) | |||
160 | } | 162 | } |
161 | } | 163 | } |
162 | 164 | ||
165 | static const char * const invalid_str = "invalid"; | ||
166 | |||
167 | static const char *const fault_type_descs_gv11b[] = { | ||
168 | "invalid pde", | ||
169 | "invalid pde size", | ||
170 | "invalid pte", | ||
171 | "limit violation", | ||
172 | "unbound inst block", | ||
173 | "priv violation", | ||
174 | "write", | ||
175 | "read", | ||
176 | "pitch mask violation", | ||
177 | "work creation", | ||
178 | "unsupported aperture", | ||
179 | "compression failure", | ||
180 | "unsupported kind", | ||
181 | "region violation", | ||
182 | "poison", | ||
183 | "atomic" | ||
184 | }; | ||
185 | |||
186 | static const char *const fault_client_type_descs_gv11b[] = { | ||
187 | "gpc", | ||
188 | "hub", | ||
189 | }; | ||
190 | |||
191 | static const char *const fault_access_type_descs_gv11b[] = { | ||
192 | "virt read", | ||
193 | "virt write", | ||
194 | "virt atomic strong", | ||
195 | "virt prefetch", | ||
196 | "virt atomic weak", | ||
197 | "xxx", | ||
198 | "xxx", | ||
199 | "xxx", | ||
200 | "phys read", | ||
201 | "phys write", | ||
202 | "phys atomic", | ||
203 | "phys prefetch", | ||
204 | }; | ||
205 | |||
206 | static const char *const hub_client_descs_gv11b[] = { | ||
207 | "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu", | ||
208 | "host cpu nb", "iso", "mmu", "nvdec", "nvenc1", "nvenc2", | ||
209 | "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc", | ||
210 | "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb", | ||
211 | "nvenc", "d falcon", "sked", "a falcon", "hsce0", "hsce1", | ||
212 | "hsce2", "hsce3", "hsce4", "hsce5", "hsce6", "hsce7", "hsce8", | ||
213 | "hsce9", "hshub", "ptp x0", "ptp x1", "ptp x2", "ptp x3", | ||
214 | "ptp x4", "ptp x5", "ptp x6", "ptp x7", "vpr scrubber0", | ||
215 | "vpr scrubber1", "dwbif", "fbfalcon", "ce shim", "gsp", | ||
216 | "dont care" | ||
217 | }; | ||
218 | |||
219 | static const char *const gpc_client_descs_gv11b[] = { | ||
220 | "t1 0", "t1 1", "t1 2", "t1 3", | ||
221 | "t1 4", "t1 5", "t1 6", "t1 7", | ||
222 | "pe 0", "pe 1", "pe 2", "pe 3", | ||
223 | "pe 4", "pe 5", "pe 6", "pe 7", | ||
224 | "rast", "gcc", "gpccs", | ||
225 | "prop 0", "prop 1", "prop 2", "prop 3", | ||
226 | "gpm", | ||
227 | "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3", | ||
228 | "ltp utlb 4", "ltp utlb 5", "ltp utlb 6", "ltp utlb 7", | ||
229 | "utlb", | ||
230 | "t1 8", "t1 9", "t1 10", "t1 11", | ||
231 | "t1 12", "t1 13", "t1 14", "t1 15", | ||
232 | "tpccs 0", "tpccs 1", "tpccs 2", "tpccs 3", | ||
233 | "tpccs 4", "tpccs 5", "tpccs 6", "tpccs 7", | ||
234 | "pe 8", "pe 9", "tpccs 8", "tpccs 9", | ||
235 | "t1 16", "t1 17", "t1 18", "t1 19", | ||
236 | "pe 10", "pe 11", "tpccs 10", "tpccs 11", | ||
237 | "t1 20", "t1 21", "t1 22", "t1 23", | ||
238 | "pe 12", "pe 13", "tpccs 12", "tpccs 13", | ||
239 | "t1 24", "t1 25", "t1 26", "t1 27", | ||
240 | "pe 14", "pe 15", "tpccs 14", "tpccs 15", | ||
241 | "t1 28", "t1 29", "t1 30", "t1 31", | ||
242 | "pe 16", "pe 17", "tpccs 16", "tpccs 17", | ||
243 | "t1 32", "t1 33", "t1 34", "t1 35", | ||
244 | "pe 18", "pe 19", "tpccs 18", "tpccs 19", | ||
245 | "t1 36", "t1 37", "t1 38", "t1 39", | ||
246 | }; | ||
247 | |||
163 | static void gv11b_init_uncompressed_kind_map(void) | 248 | static void gv11b_init_uncompressed_kind_map(void) |
164 | { | 249 | { |
165 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_4cbra_v()] = | 250 | gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_4cbra_v()] = |
@@ -218,6 +303,84 @@ u32 gv11b_fb_is_fault_buf_enabled(struct gk20a *g, | |||
218 | return fb_mmu_fault_buffer_size_enable_v(reg_val); | 303 | return fb_mmu_fault_buffer_size_enable_v(reg_val); |
219 | } | 304 | } |
220 | 305 | ||
306 | static void gv11b_fb_fault_buffer_get_ptr_update(struct gk20a *g, | ||
307 | unsigned int index, u32 next) | ||
308 | { | ||
309 | u32 reg_val; | ||
310 | |||
311 | nvgpu_log(g, gpu_dbg_intr, "updating get index with = %d", next); | ||
312 | |||
313 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index)); | ||
314 | reg_val = set_field(reg_val, fb_mmu_fault_buffer_get_ptr_m(), | ||
315 | fb_mmu_fault_buffer_get_ptr_f(next)); | ||
316 | |||
317 | /* while the fault is being handled it is possible for overflow | ||
318 | * to happen, | ||
319 | */ | ||
320 | if (reg_val & fb_mmu_fault_buffer_get_overflow_m()) | ||
321 | reg_val |= fb_mmu_fault_buffer_get_overflow_clear_f(); | ||
322 | |||
323 | gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val); | ||
324 | |||
325 | /* make sure get ptr update is visible to everyone to avoid | ||
326 | * reading already read entry | ||
327 | */ | ||
328 | mb(); | ||
329 | } | ||
330 | |||
331 | static u32 gv11b_fb_fault_buffer_get_index(struct gk20a *g, | ||
332 | unsigned int index) | ||
333 | { | ||
334 | u32 reg_val; | ||
335 | |||
336 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index)); | ||
337 | return fb_mmu_fault_buffer_get_ptr_v(reg_val); | ||
338 | } | ||
339 | |||
340 | static u32 gv11b_fb_fault_buffer_put_index(struct gk20a *g, | ||
341 | unsigned int index) | ||
342 | { | ||
343 | u32 reg_val; | ||
344 | |||
345 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_put_r(index)); | ||
346 | return fb_mmu_fault_buffer_put_ptr_v(reg_val); | ||
347 | } | ||
348 | |||
349 | static u32 gv11b_fb_fault_buffer_size_val(struct gk20a *g, | ||
350 | unsigned int index) | ||
351 | { | ||
352 | u32 reg_val; | ||
353 | |||
354 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_size_r(index)); | ||
355 | return fb_mmu_fault_buffer_size_val_v(reg_val); | ||
356 | } | ||
357 | |||
358 | static bool gv11b_fb_is_fault_buffer_empty(struct gk20a *g, | ||
359 | unsigned int index, u32 *get_idx) | ||
360 | { | ||
361 | u32 put_idx; | ||
362 | |||
363 | *get_idx = gv11b_fb_fault_buffer_get_index(g, index); | ||
364 | put_idx = gv11b_fb_fault_buffer_put_index(g, index); | ||
365 | |||
366 | return *get_idx == put_idx; | ||
367 | } | ||
368 | |||
369 | static bool gv11b_fb_is_fault_buffer_full(struct gk20a *g, | ||
370 | unsigned int index) | ||
371 | { | ||
372 | u32 get_idx, put_idx, entries; | ||
373 | |||
374 | |||
375 | get_idx = gv11b_fb_fault_buffer_get_index(g, index); | ||
376 | |||
377 | put_idx = gv11b_fb_fault_buffer_put_index(g, index); | ||
378 | |||
379 | entries = gv11b_fb_fault_buffer_size_val(g, index); | ||
380 | |||
381 | return get_idx == ((put_idx + 1) % entries); | ||
382 | } | ||
383 | |||
221 | void gv11b_fb_fault_buf_set_state_hw(struct gk20a *g, | 384 | void gv11b_fb_fault_buf_set_state_hw(struct gk20a *g, |
222 | unsigned int index, unsigned int state) | 385 | unsigned int index, unsigned int state) |
223 | { | 386 | { |
@@ -275,7 +438,6 @@ void gv11b_fb_fault_buf_configure_hw(struct gk20a *g, unsigned int index) | |||
275 | 438 | ||
276 | gv11b_fb_fault_buf_set_state_hw(g, index, | 439 | gv11b_fb_fault_buf_set_state_hw(g, index, |
277 | FAULT_BUF_DISABLED); | 440 | FAULT_BUF_DISABLED); |
278 | |||
279 | addr_lo = u64_lo32(g->mm.hw_fault_buf[index].gpu_va >> | 441 | addr_lo = u64_lo32(g->mm.hw_fault_buf[index].gpu_va >> |
280 | ram_in_base_shift_v()); | 442 | ram_in_base_shift_v()); |
281 | addr_hi = u64_hi32(g->mm.hw_fault_buf[index].gpu_va); | 443 | addr_hi = u64_hi32(g->mm.hw_fault_buf[index].gpu_va); |
@@ -586,18 +748,586 @@ static void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) | |||
586 | g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]); | 748 | g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]); |
587 | } | 749 | } |
588 | 750 | ||
589 | static void gv11b_fb_hub_isr(struct gk20a *g) | 751 | static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault) |
590 | { | 752 | { |
591 | u32 status; | 753 | if (WARN_ON(mmfault->fault_type >= |
592 | u32 niso_intr = gk20a_readl(g, fb_niso_intr_r()); | 754 | ARRAY_SIZE(fault_type_descs_gv11b))) |
755 | mmfault->fault_type_desc = invalid_str; | ||
756 | else | ||
757 | mmfault->fault_type_desc = | ||
758 | fault_type_descs_gv11b[mmfault->fault_type]; | ||
593 | 759 | ||
594 | nvgpu_info(g, "enter hub isr, niso_intr = 0x%08x", niso_intr); | 760 | if (WARN_ON(mmfault->client_type >= |
761 | ARRAY_SIZE(fault_client_type_descs_gv11b))) | ||
762 | mmfault->client_type_desc = invalid_str; | ||
763 | else | ||
764 | mmfault->client_type_desc = | ||
765 | fault_client_type_descs_gv11b[mmfault->client_type]; | ||
766 | |||
767 | mmfault->client_id_desc = invalid_str; | ||
768 | if (mmfault->client_type == | ||
769 | gmmu_fault_client_type_hub_v()) { | ||
770 | |||
771 | if (!(WARN_ON(mmfault->client_id >= | ||
772 | ARRAY_SIZE(hub_client_descs_gv11b)))) | ||
773 | mmfault->client_id_desc = | ||
774 | hub_client_descs_gv11b[mmfault->client_id]; | ||
775 | } else if (mmfault->client_type == | ||
776 | gmmu_fault_client_type_gpc_v()) { | ||
777 | if (!(WARN_ON(mmfault->client_id >= | ||
778 | ARRAY_SIZE(gpc_client_descs_gv11b)))) | ||
779 | mmfault->client_id_desc = | ||
780 | gpc_client_descs_gv11b[mmfault->client_id]; | ||
781 | } | ||
782 | |||
783 | } | ||
784 | |||
785 | static void gv11b_fb_print_fault_info(struct gk20a *g, | ||
786 | struct mmu_fault_info *mmfault) | ||
787 | { | ||
788 | if (mmfault && mmfault->valid) { | ||
789 | nvgpu_err(g, "[MMU FAULT] " | ||
790 | "mmu engine id: %d, " | ||
791 | "ch id: %d, " | ||
792 | "fault addr: 0x%llx, " | ||
793 | "fault addr aperture: %d, " | ||
794 | "fault type: %s, " | ||
795 | "access type: %s, ", | ||
796 | mmfault->mmu_engine_id, | ||
797 | mmfault->chid, | ||
798 | mmfault->fault_addr, | ||
799 | mmfault->fault_addr_aperture, | ||
800 | mmfault->fault_type_desc, | ||
801 | fault_access_type_descs_gv11b[mmfault->access_type]); | ||
802 | nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] " | ||
803 | "mmu engine id: %d, " | ||
804 | "faulted act eng id if any: 0x%x, " | ||
805 | "faulted veid if any: 0x%x, " | ||
806 | "faulted pbdma id if any: 0x%x, " | ||
807 | "fault addr: 0x%llx, ", | ||
808 | mmfault->mmu_engine_id, | ||
809 | mmfault->faulted_engine, | ||
810 | mmfault->faulted_subid, | ||
811 | mmfault->faulted_pbdma, | ||
812 | mmfault->fault_addr); | ||
813 | nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] " | ||
814 | "fault addr aperture: %d, " | ||
815 | "fault type: %s, " | ||
816 | "access type: %s, " | ||
817 | "inst ptr: 0x%llx, " | ||
818 | "inst ptr aperture: %d, ", | ||
819 | mmfault->fault_addr_aperture, | ||
820 | mmfault->fault_type_desc, | ||
821 | fault_access_type_descs_gv11b[mmfault->access_type], | ||
822 | mmfault->inst_ptr, | ||
823 | mmfault->inst_aperture); | ||
824 | nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] " | ||
825 | "ch id: %d, " | ||
826 | "timestamp hi:lo 0x%08x:0x%08x, " | ||
827 | "client type: %s, " | ||
828 | "client id: %s, " | ||
829 | "gpc id if client type is gpc: %d, ", | ||
830 | mmfault->chid, | ||
831 | mmfault->timestamp_hi, mmfault->timestamp_lo, | ||
832 | mmfault->client_type_desc, | ||
833 | mmfault->client_id_desc, | ||
834 | mmfault->gpc_id); | ||
835 | nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] " | ||
836 | "protected mode: %d, " | ||
837 | "replayable fault: %d, " | ||
838 | "replayable fault en: %d ", | ||
839 | mmfault->protected_mode, | ||
840 | mmfault->replayable_fault, | ||
841 | mmfault->replay_fault_en); | ||
842 | } | ||
843 | } | ||
844 | |||
845 | /* | ||
846 | *Fault buffer format | ||
847 | * | ||
848 | * 31 28 24 23 16 15 8 7 4 0 | ||
849 | *.-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-. | ||
850 | *| inst_lo |0 0|apr|0 0 0 0 0 0 0 0| | ||
851 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
852 | *| inst_hi | | ||
853 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
854 | *| addr_31_12 | |AP | | ||
855 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
856 | *| addr_63_32 | | ||
857 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
858 | *| timestamp_lo | | ||
859 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
860 | *| timestamp_hi | | ||
861 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
862 | *| (reserved) | engine_id | | ||
863 | *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-' | ||
864 | *|V|R|P| gpc_id |0 0 0|t|0|acctp|0| client |RF0 0|faulttype| | ||
865 | */ | ||
866 | |||
867 | static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g, | ||
868 | struct nvgpu_mem *mem, u32 offset, struct mmu_fault_info *mmfault) | ||
869 | { | ||
870 | u32 rd32_val; | ||
871 | u32 addr_lo, addr_hi; | ||
872 | u64 inst_ptr; | ||
873 | u32 chid = FIFO_INVAL_CHANNEL_ID; | ||
874 | struct channel_gk20a *refch; | ||
875 | |||
876 | memset(mmfault, 0, sizeof(*mmfault)); | ||
877 | |||
878 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
879 | gmmu_fault_buf_entry_inst_lo_w()); | ||
880 | addr_lo = gmmu_fault_buf_entry_inst_lo_v(rd32_val); | ||
881 | addr_lo = addr_lo << ram_in_base_shift_v(); | ||
882 | |||
883 | addr_hi = nvgpu_mem_rd32(g, mem, offset + | ||
884 | gmmu_fault_buf_entry_inst_hi_w()); | ||
885 | addr_hi = gmmu_fault_buf_entry_inst_hi_v(addr_hi); | ||
886 | |||
887 | inst_ptr = hi32_lo32_to_u64(addr_hi, addr_lo); | ||
888 | |||
889 | /* refch will be put back after fault is handled */ | ||
890 | refch = gk20a_refch_from_inst_ptr(g, inst_ptr); | ||
891 | if (refch) | ||
892 | chid = refch->chid; | ||
893 | |||
894 | /* it is ok to continue even if refch is NULL */ | ||
895 | mmfault->refch = refch; | ||
896 | mmfault->chid = chid; | ||
897 | mmfault->inst_ptr = inst_ptr; | ||
898 | mmfault->inst_aperture = gmmu_fault_buf_entry_inst_aperture_v(rd32_val); | ||
899 | |||
900 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
901 | gmmu_fault_buf_entry_addr_lo_w()); | ||
902 | |||
903 | mmfault->fault_addr_aperture = | ||
904 | gmmu_fault_buf_entry_addr_phys_aperture_v(rd32_val); | ||
905 | addr_lo = gmmu_fault_buf_entry_addr_lo_v(rd32_val); | ||
906 | addr_lo = addr_lo << ram_in_base_shift_v(); | ||
907 | |||
908 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
909 | gmmu_fault_buf_entry_addr_hi_w()); | ||
910 | addr_hi = gmmu_fault_buf_entry_addr_hi_v(rd32_val); | ||
911 | mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo); | ||
912 | |||
913 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
914 | gmmu_fault_buf_entry_timestamp_lo_w()); | ||
915 | mmfault->timestamp_lo = | ||
916 | gmmu_fault_buf_entry_timestamp_lo_v(rd32_val); | ||
917 | |||
918 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
919 | gmmu_fault_buf_entry_timestamp_hi_w()); | ||
920 | mmfault->timestamp_hi = | ||
921 | gmmu_fault_buf_entry_timestamp_hi_v(rd32_val); | ||
922 | |||
923 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
924 | gmmu_fault_buf_entry_engine_id_w()); | ||
925 | |||
926 | mmfault->mmu_engine_id = | ||
927 | gmmu_fault_buf_entry_engine_id_v(rd32_val); | ||
928 | gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(g, mmfault->mmu_engine_id, | ||
929 | &mmfault->faulted_engine, &mmfault->faulted_subid, | ||
930 | &mmfault->faulted_pbdma); | ||
931 | |||
932 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
933 | gmmu_fault_buf_entry_fault_type_w()); | ||
934 | mmfault->client_id = | ||
935 | gmmu_fault_buf_entry_client_v(rd32_val); | ||
936 | mmfault->replayable_fault = | ||
937 | gmmu_fault_buf_entry_replayable_fault_v(rd32_val); | ||
938 | |||
939 | mmfault->fault_type = | ||
940 | gmmu_fault_buf_entry_fault_type_v(rd32_val); | ||
941 | mmfault->access_type = | ||
942 | gmmu_fault_buf_entry_access_type_v(rd32_val); | ||
943 | |||
944 | mmfault->client_type = | ||
945 | gmmu_fault_buf_entry_mmu_client_type_v(rd32_val); | ||
946 | |||
947 | mmfault->gpc_id = | ||
948 | gmmu_fault_buf_entry_gpc_id_v(rd32_val); | ||
949 | mmfault->protected_mode = | ||
950 | gmmu_fault_buf_entry_protected_mode_v(rd32_val); | ||
951 | |||
952 | mmfault->replay_fault_en = | ||
953 | gmmu_fault_buf_entry_replayable_fault_en_v(rd32_val); | ||
954 | |||
955 | mmfault->valid = gmmu_fault_buf_entry_valid_v(rd32_val); | ||
956 | |||
957 | rd32_val = nvgpu_mem_rd32(g, mem, offset + | ||
958 | gmmu_fault_buf_entry_fault_type_w()); | ||
959 | rd32_val &= ~(gmmu_fault_buf_entry_valid_m()); | ||
960 | nvgpu_mem_wr32(g, mem, offset + gmmu_fault_buf_entry_valid_w(), | ||
961 | rd32_val); | ||
962 | |||
963 | gv11b_fb_parse_mmfault(mmfault); | ||
964 | } | ||
965 | |||
966 | static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | ||
967 | struct mmu_fault_info *mmfault) | ||
968 | { | ||
969 | if (!mmfault->valid) | ||
970 | return; | ||
971 | |||
972 | gv11b_fb_print_fault_info(g, mmfault); | ||
973 | |||
974 | if (mmfault->fault_type == gmmu_fault_type_unbound_inst_block_v()) { | ||
975 | /* | ||
976 | * Bug 1847172: When an engine faults due to an unbound | ||
977 | * instance block, the fault cannot be isolated to a | ||
978 | * single context so we need to reset the entire runlist | ||
979 | */ | ||
980 | nvgpu_log(g, gpu_dbg_intr, "UNBOUND INST"); | ||
981 | } | ||
982 | |||
983 | if (mmfault->refch) { | ||
984 | gk20a_channel_put(mmfault->refch); | ||
985 | mmfault->refch = NULL; | ||
986 | } | ||
987 | } | ||
988 | |||
989 | static void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g, | ||
990 | u32 fault_status, unsigned int index) | ||
991 | { | ||
992 | u32 get_indx, offset, rd32_val, entries; | ||
993 | struct nvgpu_mem *mem; | ||
994 | struct mmu_fault_info *mmfault; | ||
995 | |||
996 | if (gv11b_fb_is_fault_buffer_empty(g, index, | ||
997 | &get_indx)) { | ||
998 | nvgpu_log(g, gpu_dbg_intr, "SPURIOUS fault"); | ||
999 | return; | ||
1000 | } | ||
1001 | nvgpu_log(g, gpu_dbg_intr, "get ptr = %d", get_indx); | ||
1002 | |||
1003 | mem = &g->mm.hw_fault_buf[index]; | ||
1004 | mmfault = g->mm.fault_info[index]; | ||
1005 | |||
1006 | entries = gv11b_fb_fault_buffer_size_val(g, index); | ||
1007 | nvgpu_log(g, gpu_dbg_intr, "buffer num entries = %d", entries); | ||
1008 | |||
1009 | offset = (get_indx * gmmu_fault_buf_size_v()) / sizeof(u32); | ||
1010 | nvgpu_log(g, gpu_dbg_intr, "starting word offset = 0x%x", offset); | ||
1011 | |||
1012 | rd32_val = nvgpu_mem_rd32(g, mem, | ||
1013 | offset + gmmu_fault_buf_entry_valid_w()); | ||
1014 | nvgpu_log(g, gpu_dbg_intr, "entry valid offset val = 0x%x", rd32_val); | ||
1015 | |||
1016 | while ((rd32_val & gmmu_fault_buf_entry_valid_m())) { | ||
1017 | |||
1018 | nvgpu_log(g, gpu_dbg_intr, "entry valid = 0x%x", rd32_val); | ||
1019 | |||
1020 | gv11b_fb_copy_from_hw_fault_buf(g, mem, offset, mmfault); | ||
1021 | |||
1022 | /* Extra 1 in buffer size is to detect buffer full. | ||
1023 | * Actual number of entries for faults to be snapped are | ||
1024 | * one less than number in fault_buffer_size_val | ||
1025 | */ | ||
1026 | get_indx = (get_indx + 1) % (entries - 1); | ||
1027 | nvgpu_log(g, gpu_dbg_intr, "new get index = %d", get_indx); | ||
1028 | |||
1029 | gv11b_fb_fault_buffer_get_ptr_update(g, index, get_indx); | ||
1030 | |||
1031 | gv11b_fb_handle_mmu_fault_common(g, mmfault); | ||
1032 | |||
1033 | offset = (get_indx * gmmu_fault_buf_size_v()) / sizeof(u32); | ||
1034 | nvgpu_log(g, gpu_dbg_intr, "next word offset = 0x%x", offset); | ||
1035 | |||
1036 | rd32_val = nvgpu_mem_rd32(g, mem, | ||
1037 | offset + gmmu_fault_buf_entry_valid_w()); | ||
1038 | } | ||
1039 | } | ||
1040 | |||
1041 | static void gv11b_mm_copy_from_fault_snap_reg(struct gk20a *g, | ||
1042 | u32 fault_status, struct mmu_fault_info *mmfault) | ||
1043 | { | ||
1044 | u32 reg_val; | ||
1045 | u32 addr_lo, addr_hi; | ||
1046 | u64 inst_ptr; | ||
1047 | int chid = FIFO_INVAL_CHANNEL_ID; | ||
1048 | struct channel_gk20a *refch; | ||
1049 | |||
1050 | memset(mmfault, 0, sizeof(*mmfault)); | ||
1051 | |||
1052 | if (!(fault_status & fb_mmu_fault_status_valid_set_f())) { | ||
1053 | |||
1054 | nvgpu_log(g, gpu_dbg_intr, "mmu fault status valid not set"); | ||
1055 | return; | ||
1056 | } | ||
1057 | |||
1058 | reg_val = gk20a_readl(g, fb_mmu_fault_inst_lo_r()); | ||
1059 | addr_lo = fb_mmu_fault_inst_lo_addr_v(reg_val); | ||
1060 | addr_lo = addr_lo << ram_in_base_shift_v(); | ||
1061 | |||
1062 | addr_hi = gk20a_readl(g, fb_mmu_fault_inst_hi_r()); | ||
1063 | addr_hi = fb_mmu_fault_inst_hi_addr_v(addr_hi); | ||
1064 | inst_ptr = hi32_lo32_to_u64(addr_hi, addr_lo); | ||
1065 | |||
1066 | /* refch will be put back after fault is handled */ | ||
1067 | refch = gk20a_refch_from_inst_ptr(g, inst_ptr); | ||
1068 | if (refch) | ||
1069 | chid = refch->chid; | ||
1070 | |||
1071 | /* It is still ok to continue if refch is NULL */ | ||
1072 | mmfault->refch = refch; | ||
1073 | mmfault->chid = chid; | ||
1074 | mmfault->inst_ptr = inst_ptr; | ||
1075 | mmfault->inst_aperture = fb_mmu_fault_inst_lo_aperture_v(reg_val); | ||
1076 | mmfault->mmu_engine_id = fb_mmu_fault_inst_lo_engine_id_v(reg_val); | ||
1077 | |||
1078 | gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(g, mmfault->mmu_engine_id, | ||
1079 | &mmfault->faulted_engine, &mmfault->faulted_subid, | ||
1080 | &mmfault->faulted_pbdma); | ||
1081 | |||
1082 | reg_val = gk20a_readl(g, fb_mmu_fault_addr_lo_r()); | ||
1083 | addr_lo = fb_mmu_fault_addr_lo_addr_v(reg_val); | ||
1084 | addr_lo = addr_lo << ram_in_base_shift_v(); | ||
1085 | |||
1086 | mmfault->fault_addr_aperture = | ||
1087 | fb_mmu_fault_addr_lo_phys_aperture_v(reg_val); | ||
1088 | |||
1089 | addr_hi = gk20a_readl(g, fb_mmu_fault_addr_hi_r()); | ||
1090 | addr_hi = fb_mmu_fault_addr_hi_addr_v(addr_hi); | ||
1091 | mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo); | ||
1092 | |||
1093 | reg_val = gk20a_readl(g, fb_mmu_fault_info_r()); | ||
1094 | mmfault->fault_type = fb_mmu_fault_info_fault_type_v(reg_val); | ||
1095 | mmfault->replayable_fault = | ||
1096 | fb_mmu_fault_info_replayable_fault_v(reg_val); | ||
1097 | mmfault->client_id = fb_mmu_fault_info_client_v(reg_val); | ||
1098 | mmfault->access_type = fb_mmu_fault_info_access_type_v(reg_val); | ||
1099 | mmfault->client_type = fb_mmu_fault_info_client_type_v(reg_val); | ||
1100 | mmfault->gpc_id = fb_mmu_fault_info_gpc_id_v(reg_val); | ||
1101 | mmfault->protected_mode = | ||
1102 | fb_mmu_fault_info_protected_mode_v(reg_val); | ||
1103 | mmfault->replay_fault_en = | ||
1104 | fb_mmu_fault_info_replayable_fault_en_v(reg_val); | ||
1105 | |||
1106 | mmfault->valid = fb_mmu_fault_info_valid_v(reg_val); | ||
1107 | |||
1108 | fault_status &= ~(fb_mmu_fault_status_valid_m()); | ||
1109 | gk20a_writel(g, fb_mmu_fault_status_r(), fault_status); | ||
1110 | |||
1111 | gv11b_fb_parse_mmfault(mmfault); | ||
1112 | |||
1113 | } | ||
1114 | |||
1115 | static void gv11b_fb_handle_replay_fault_overflow(struct gk20a *g, | ||
1116 | u32 fault_status) | ||
1117 | { | ||
1118 | u32 reg_val; | ||
1119 | unsigned int index = REPLAY_REG_INDEX; | ||
1120 | |||
1121 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index)); | ||
1122 | |||
1123 | if (fault_status & | ||
1124 | fb_mmu_fault_status_replayable_getptr_corrupted_m()) { | ||
1125 | |||
1126 | nvgpu_err(g, "replayable getptr corrupted set"); | ||
1127 | |||
1128 | gv11b_fb_fault_buf_configure_hw(g, index); | ||
1129 | |||
1130 | reg_val = set_field(reg_val, | ||
1131 | fb_mmu_fault_buffer_get_getptr_corrupted_m(), | ||
1132 | fb_mmu_fault_buffer_get_getptr_corrupted_clear_f()); | ||
1133 | } | ||
1134 | |||
1135 | if (fault_status & | ||
1136 | fb_mmu_fault_status_replayable_overflow_m()) { | ||
1137 | bool buffer_full = gv11b_fb_is_fault_buffer_full(g, index); | ||
1138 | |||
1139 | nvgpu_err(g, "replayable overflow: buffer full:%s", | ||
1140 | buffer_full?"true":"false"); | ||
1141 | |||
1142 | reg_val = set_field(reg_val, | ||
1143 | fb_mmu_fault_buffer_get_overflow_m(), | ||
1144 | fb_mmu_fault_buffer_get_overflow_clear_f()); | ||
1145 | } | ||
1146 | |||
1147 | gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val); | ||
1148 | } | ||
1149 | |||
1150 | static void gv11b_fb_handle_nonreplay_fault_overflow(struct gk20a *g, | ||
1151 | u32 fault_status) | ||
1152 | { | ||
1153 | u32 reg_val; | ||
1154 | unsigned int index = NONREPLAY_REG_INDEX; | ||
1155 | |||
1156 | reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index)); | ||
1157 | |||
1158 | if (fault_status & | ||
1159 | fb_mmu_fault_status_non_replayable_getptr_corrupted_m()) { | ||
1160 | |||
1161 | nvgpu_err(g, "non replayable getptr corrupted set"); | ||
1162 | |||
1163 | gv11b_fb_fault_buf_configure_hw(g, index); | ||
1164 | |||
1165 | reg_val = set_field(reg_val, | ||
1166 | fb_mmu_fault_buffer_get_getptr_corrupted_m(), | ||
1167 | fb_mmu_fault_buffer_get_getptr_corrupted_clear_f()); | ||
1168 | } | ||
1169 | |||
1170 | if (fault_status & | ||
1171 | fb_mmu_fault_status_non_replayable_overflow_m()) { | ||
1172 | |||
1173 | bool buffer_full = gv11b_fb_is_fault_buffer_full(g, index); | ||
1174 | |||
1175 | nvgpu_err(g, "non replayable overflow: buffer full:%s", | ||
1176 | buffer_full?"true":"false"); | ||
1177 | |||
1178 | reg_val = set_field(reg_val, | ||
1179 | fb_mmu_fault_buffer_get_overflow_m(), | ||
1180 | fb_mmu_fault_buffer_get_overflow_clear_f()); | ||
1181 | } | ||
1182 | |||
1183 | gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val); | ||
1184 | } | ||
1185 | |||
1186 | static void gv11b_fb_handle_bar2_fault(struct gk20a *g, | ||
1187 | struct mmu_fault_info *mmfault, u32 fault_status) | ||
1188 | { | ||
1189 | gv11b_fb_disable_hub_intr(g, STALL_REG_INDEX, | ||
1190 | HUB_INTR_TYPE_NONREPLAY | HUB_INTR_TYPE_REPLAY); | ||
1191 | |||
1192 | |||
1193 | if (fault_status & fb_mmu_fault_status_non_replayable_error_m()) { | ||
1194 | if (gv11b_fb_is_fault_buf_enabled(g, NONREPLAY_REG_INDEX)) | ||
1195 | gv11b_fb_fault_buf_configure_hw(g, NONREPLAY_REG_INDEX); | ||
1196 | } | ||
1197 | |||
1198 | if (fault_status & fb_mmu_fault_status_replayable_error_m()) { | ||
1199 | if (gv11b_fb_is_fault_buf_enabled(g, REPLAY_REG_INDEX)) | ||
1200 | gv11b_fb_fault_buf_configure_hw(g, REPLAY_REG_INDEX); | ||
1201 | } | ||
1202 | gv11b_ce_mthd_buffer_fault_in_bar2_fault(g); | ||
1203 | |||
1204 | g->ops.mm.init_bar2_mm_hw_setup(g); | ||
1205 | |||
1206 | if (mmfault->refch) { | ||
1207 | gk20a_channel_put(mmfault->refch); | ||
1208 | mmfault->refch = NULL; | ||
1209 | } | ||
1210 | gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX, | ||
1211 | HUB_INTR_TYPE_NONREPLAY | HUB_INTR_TYPE_REPLAY); | ||
1212 | } | ||
1213 | |||
1214 | static void gv11b_fb_handle_other_fault_notify(struct gk20a *g, | ||
1215 | u32 fault_status) | ||
1216 | { | ||
1217 | struct mmu_fault_info *mmfault; | ||
1218 | |||
1219 | mmfault = g->mm.fault_info[FAULT_TYPE_OTHER_AND_NONREPLAY]; | ||
1220 | |||
1221 | gv11b_mm_copy_from_fault_snap_reg(g, fault_status, mmfault); | ||
1222 | |||
1223 | /* BAR2/Physical faults will not be snapped in hw fault buf */ | ||
1224 | if (mmfault->mmu_engine_id == gmmu_fault_mmu_eng_id_bar2_v()) { | ||
1225 | nvgpu_err(g, "BAR2 MMU FAULT"); | ||
1226 | gv11b_fb_handle_bar2_fault(g, mmfault, fault_status); | ||
1227 | |||
1228 | } else if (mmfault->mmu_engine_id == | ||
1229 | gmmu_fault_mmu_eng_id_physical_v()) { | ||
1230 | /* usually means VPR or out of bounds physical accesses */ | ||
1231 | nvgpu_err(g, "PHYSICAL MMU FAULT"); | ||
1232 | |||
1233 | } else { | ||
1234 | gv11b_fb_handle_mmu_fault_common(g, mmfault); | ||
1235 | } | ||
1236 | } | ||
1237 | |||
1238 | static void gv11b_fb_handle_dropped_mmu_fault(struct gk20a *g, u32 fault_status) | ||
1239 | { | ||
1240 | u32 dropped_faults = 0; | ||
1241 | |||
1242 | dropped_faults = fb_mmu_fault_status_dropped_bar1_phys_set_f() | | ||
1243 | fb_mmu_fault_status_dropped_bar1_virt_set_f() | | ||
1244 | fb_mmu_fault_status_dropped_bar2_phys_set_f() | | ||
1245 | fb_mmu_fault_status_dropped_bar2_virt_set_f() | | ||
1246 | fb_mmu_fault_status_dropped_ifb_phys_set_f() | | ||
1247 | fb_mmu_fault_status_dropped_ifb_virt_set_f() | | ||
1248 | fb_mmu_fault_status_dropped_other_phys_set_f()| | ||
1249 | fb_mmu_fault_status_dropped_other_virt_set_f(); | ||
1250 | |||
1251 | if (fault_status & dropped_faults) { | ||
1252 | nvgpu_err(g, "dropped mmu fault (0x%08x)", | ||
1253 | fault_status & dropped_faults); | ||
1254 | gk20a_writel(g, fb_mmu_fault_status_r(), dropped_faults); | ||
1255 | } | ||
1256 | } | ||
1257 | |||
1258 | |||
1259 | static void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr) | ||
1260 | { | ||
1261 | u32 fault_status = gk20a_readl(g, fb_mmu_fault_status_r()); | ||
1262 | |||
1263 | nvgpu_log(g, gpu_dbg_intr, "mmu_fault_status = 0x%08x", fault_status); | ||
1264 | |||
1265 | if (niso_intr & | ||
1266 | fb_niso_intr_mmu_other_fault_notify_m()) { | ||
1267 | |||
1268 | gv11b_fb_handle_dropped_mmu_fault(g, fault_status); | ||
1269 | |||
1270 | gv11b_fb_handle_other_fault_notify(g, fault_status); | ||
1271 | } | ||
1272 | |||
1273 | if (gv11b_fb_is_fault_buf_enabled(g, NONREPLAY_REG_INDEX)) { | ||
1274 | |||
1275 | if (niso_intr & | ||
1276 | fb_niso_intr_mmu_nonreplayable_fault_notify_m()) { | ||
1277 | |||
1278 | gv11b_fb_handle_mmu_nonreplay_replay_fault(g, | ||
1279 | fault_status, NONREPLAY_REG_INDEX); | ||
1280 | |||
1281 | /* | ||
1282 | * When all the faults are processed, | ||
1283 | * GET and PUT will have same value and mmu fault status | ||
1284 | * bit will be reset by HW | ||
1285 | */ | ||
1286 | } | ||
1287 | if (niso_intr & | ||
1288 | fb_niso_intr_mmu_nonreplayable_fault_overflow_m()) { | ||
1289 | |||
1290 | gv11b_fb_handle_nonreplay_fault_overflow(g, | ||
1291 | fault_status); | ||
1292 | } | ||
1293 | |||
1294 | } | ||
1295 | |||
1296 | if (gv11b_fb_is_fault_buf_enabled(g, REPLAY_REG_INDEX)) { | ||
1297 | |||
1298 | if (niso_intr & | ||
1299 | fb_niso_intr_mmu_replayable_fault_notify_m()) { | ||
1300 | |||
1301 | gv11b_fb_handle_mmu_nonreplay_replay_fault(g, | ||
1302 | fault_status, REPLAY_REG_INDEX); | ||
1303 | } | ||
1304 | if (niso_intr & | ||
1305 | fb_niso_intr_mmu_replayable_fault_overflow_m()) { | ||
1306 | |||
1307 | gv11b_fb_handle_replay_fault_overflow(g, | ||
1308 | fault_status); | ||
1309 | } | ||
1310 | |||
1311 | } | ||
1312 | |||
1313 | nvgpu_log(g, gpu_dbg_intr, "clear mmu fault status"); | ||
1314 | gk20a_writel(g, fb_mmu_fault_status_r(), | ||
1315 | fb_mmu_fault_status_valid_clear_f()); | ||
1316 | } | ||
1317 | |||
1318 | static void gv11b_fb_hub_isr(struct gk20a *g) | ||
1319 | { | ||
1320 | u32 status, niso_intr; | ||
595 | 1321 | ||
596 | nvgpu_mutex_acquire(&g->mm.hub_isr_mutex); | 1322 | nvgpu_mutex_acquire(&g->mm.hub_isr_mutex); |
597 | 1323 | ||
1324 | niso_intr = gk20a_readl(g, fb_niso_intr_r()); | ||
1325 | |||
1326 | nvgpu_info(g, "enter hub isr, niso_intr = 0x%08x", niso_intr); | ||
1327 | |||
598 | if (niso_intr & | 1328 | if (niso_intr & |
599 | (fb_niso_intr_hub_access_counter_notify_pending_f() | | 1329 | (fb_niso_intr_hub_access_counter_notify_m() | |
600 | fb_niso_intr_hub_access_counter_error_pending_f())) { | 1330 | fb_niso_intr_hub_access_counter_error_m())) { |
601 | 1331 | ||
602 | nvgpu_info(g, "hub access counter notify/error"); | 1332 | nvgpu_info(g, "hub access counter notify/error"); |
603 | } | 1333 | } |
@@ -634,8 +1364,8 @@ static void gv11b_fb_hub_isr(struct gk20a *g) | |||
634 | fb_niso_intr_mmu_nonreplayable_fault_notify_m() | | 1364 | fb_niso_intr_mmu_nonreplayable_fault_notify_m() | |
635 | fb_niso_intr_mmu_nonreplayable_fault_overflow_m())) { | 1365 | fb_niso_intr_mmu_nonreplayable_fault_overflow_m())) { |
636 | 1366 | ||
637 | nvgpu_info(g, "mmu fault : No handling in place"); | 1367 | nvgpu_info(g, "MMU Fault"); |
638 | 1368 | gv11b_fb_handle_mmu_fault(g, niso_intr); | |
639 | } | 1369 | } |
640 | 1370 | ||
641 | nvgpu_mutex_release(&g->mm.hub_isr_mutex); | 1371 | nvgpu_mutex_release(&g->mm.hub_isr_mutex); |