summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-03-07 15:16:07 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-08 14:35:48 -0400
commit2f6d321390a98ace1e52f21c6a399e06b3fe71e7 (patch)
tree70ab64bacd4b7a66a13a3a27f303f470175dad92 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c
parentaa05648fd6038b69d1ed841f33b24cf1875efd83 (diff)
gpu: nvgpu: gv11b: add mmu fault handling
HUB reports following memory sub-system interrupts: a) ACCESS_COUNTER_NOTIFY: GET != PUT for access counter notify buffer b) ACCESS_COUNTER_ERROR: HUB received a NACK (BAR2 fault) when writing the notify buffer out to memory c) MMU_ECC_UNCORRECTED_ERROR_NOTIFY: Uncorrected ECC error detected by HUB MMU d) MMU_REPLAYABLE_FAULT_NOTIFY: GET != PUT for replayable fault buffer e) MMU_REPLAYABLE_FAULT_OVERFLOW: Overflow when writing to the replayable fault buffer f) MMU_NONREPLAYABLE_FAULT_NOTIFY: GET != PUT for non-replayable fault buffer g) MMU_NONREPLAYABLE_FAULT_OVERFLOW: Overflow when writing to the non-replayable fault buffer h) MMU_OTHER_FAULT_NOTIFY: All other fault notifications from MMU This change is to : -Detect other fault notify -Copy fault info from fault snap register for other fault notify interrupt -Detect and handle nonreplay/replay fault notify and fault overflow -Copy fault info from fault buffer for nonreplay/replay fault -Print fault info JIRA GPUT19X-7 JIRA GPUT19X-12 Change-Id: Ifa08a4ebcd119a7d81c2eae3f52dc825d1ce3898 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master/r/1493394 Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fb_gv11b.c748
1 files changed, 739 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index da7c7d4a..6d1fbca9 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -23,7 +23,9 @@
23 23
24#include "gp10b/fb_gp10b.h" 24#include "gp10b/fb_gp10b.h"
25 25
26#include "gv11b/fifo_gv11b.h"
26#include "gv11b/fb_gv11b.h" 27#include "gv11b/fb_gv11b.h"
28#include "gv11b/ce_gv11b.h"
27 29
28#include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h> 30#include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h>
29#include <nvgpu/hw/gv11b/hw_fb_gv11b.h> 31#include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
@@ -160,6 +162,89 @@ static void gv11b_fb_reset(struct gk20a *g)
160 } 162 }
161} 163}
162 164
165static const char * const invalid_str = "invalid";
166
167static const char *const fault_type_descs_gv11b[] = {
168 "invalid pde",
169 "invalid pde size",
170 "invalid pte",
171 "limit violation",
172 "unbound inst block",
173 "priv violation",
174 "write",
175 "read",
176 "pitch mask violation",
177 "work creation",
178 "unsupported aperture",
179 "compression failure",
180 "unsupported kind",
181 "region violation",
182 "poison",
183 "atomic"
184};
185
186static const char *const fault_client_type_descs_gv11b[] = {
187 "gpc",
188 "hub",
189};
190
191static const char *const fault_access_type_descs_gv11b[] = {
192 "virt read",
193 "virt write",
194 "virt atomic strong",
195 "virt prefetch",
196 "virt atomic weak",
197 "xxx",
198 "xxx",
199 "xxx",
200 "phys read",
201 "phys write",
202 "phys atomic",
203 "phys prefetch",
204};
205
206static const char *const hub_client_descs_gv11b[] = {
207 "vip", "ce0", "ce1", "dniso", "fe", "fecs", "host", "host cpu",
208 "host cpu nb", "iso", "mmu", "nvdec", "nvenc1", "nvenc2",
209 "niso", "p2p", "pd", "perf", "pmu", "raster twod", "scc",
210 "scc nb", "sec", "ssync", "gr copy", "xv", "mmu nb",
211 "nvenc", "d falcon", "sked", "a falcon", "hsce0", "hsce1",
212 "hsce2", "hsce3", "hsce4", "hsce5", "hsce6", "hsce7", "hsce8",
213 "hsce9", "hshub", "ptp x0", "ptp x1", "ptp x2", "ptp x3",
214 "ptp x4", "ptp x5", "ptp x6", "ptp x7", "vpr scrubber0",
215 "vpr scrubber1", "dwbif", "fbfalcon", "ce shim", "gsp",
216 "dont care"
217};
218
219static const char *const gpc_client_descs_gv11b[] = {
220 "t1 0", "t1 1", "t1 2", "t1 3",
221 "t1 4", "t1 5", "t1 6", "t1 7",
222 "pe 0", "pe 1", "pe 2", "pe 3",
223 "pe 4", "pe 5", "pe 6", "pe 7",
224 "rast", "gcc", "gpccs",
225 "prop 0", "prop 1", "prop 2", "prop 3",
226 "gpm",
227 "ltp utlb 0", "ltp utlb 1", "ltp utlb 2", "ltp utlb 3",
228 "ltp utlb 4", "ltp utlb 5", "ltp utlb 6", "ltp utlb 7",
229 "utlb",
230 "t1 8", "t1 9", "t1 10", "t1 11",
231 "t1 12", "t1 13", "t1 14", "t1 15",
232 "tpccs 0", "tpccs 1", "tpccs 2", "tpccs 3",
233 "tpccs 4", "tpccs 5", "tpccs 6", "tpccs 7",
234 "pe 8", "pe 9", "tpccs 8", "tpccs 9",
235 "t1 16", "t1 17", "t1 18", "t1 19",
236 "pe 10", "pe 11", "tpccs 10", "tpccs 11",
237 "t1 20", "t1 21", "t1 22", "t1 23",
238 "pe 12", "pe 13", "tpccs 12", "tpccs 13",
239 "t1 24", "t1 25", "t1 26", "t1 27",
240 "pe 14", "pe 15", "tpccs 14", "tpccs 15",
241 "t1 28", "t1 29", "t1 30", "t1 31",
242 "pe 16", "pe 17", "tpccs 16", "tpccs 17",
243 "t1 32", "t1 33", "t1 34", "t1 35",
244 "pe 18", "pe 19", "tpccs 18", "tpccs 19",
245 "t1 36", "t1 37", "t1 38", "t1 39",
246};
247
163static void gv11b_init_uncompressed_kind_map(void) 248static void gv11b_init_uncompressed_kind_map(void)
164{ 249{
165 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_4cbra_v()] = 250 gk20a_uc_kind_map[gmmu_pte_kind_c32_ms2_4cbra_v()] =
@@ -218,6 +303,84 @@ u32 gv11b_fb_is_fault_buf_enabled(struct gk20a *g,
218 return fb_mmu_fault_buffer_size_enable_v(reg_val); 303 return fb_mmu_fault_buffer_size_enable_v(reg_val);
219} 304}
220 305
306static void gv11b_fb_fault_buffer_get_ptr_update(struct gk20a *g,
307 unsigned int index, u32 next)
308{
309 u32 reg_val;
310
311 nvgpu_log(g, gpu_dbg_intr, "updating get index with = %d", next);
312
313 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index));
314 reg_val = set_field(reg_val, fb_mmu_fault_buffer_get_ptr_m(),
315 fb_mmu_fault_buffer_get_ptr_f(next));
316
317 /* while the fault is being handled it is possible for overflow
318 * to happen,
319 */
320 if (reg_val & fb_mmu_fault_buffer_get_overflow_m())
321 reg_val |= fb_mmu_fault_buffer_get_overflow_clear_f();
322
323 gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val);
324
325 /* make sure get ptr update is visible to everyone to avoid
326 * reading already read entry
327 */
328 mb();
329}
330
331static u32 gv11b_fb_fault_buffer_get_index(struct gk20a *g,
332 unsigned int index)
333{
334 u32 reg_val;
335
336 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index));
337 return fb_mmu_fault_buffer_get_ptr_v(reg_val);
338}
339
340static u32 gv11b_fb_fault_buffer_put_index(struct gk20a *g,
341 unsigned int index)
342{
343 u32 reg_val;
344
345 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_put_r(index));
346 return fb_mmu_fault_buffer_put_ptr_v(reg_val);
347}
348
349static u32 gv11b_fb_fault_buffer_size_val(struct gk20a *g,
350 unsigned int index)
351{
352 u32 reg_val;
353
354 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_size_r(index));
355 return fb_mmu_fault_buffer_size_val_v(reg_val);
356}
357
358static bool gv11b_fb_is_fault_buffer_empty(struct gk20a *g,
359 unsigned int index, u32 *get_idx)
360{
361 u32 put_idx;
362
363 *get_idx = gv11b_fb_fault_buffer_get_index(g, index);
364 put_idx = gv11b_fb_fault_buffer_put_index(g, index);
365
366 return *get_idx == put_idx;
367}
368
369static bool gv11b_fb_is_fault_buffer_full(struct gk20a *g,
370 unsigned int index)
371{
372 u32 get_idx, put_idx, entries;
373
374
375 get_idx = gv11b_fb_fault_buffer_get_index(g, index);
376
377 put_idx = gv11b_fb_fault_buffer_put_index(g, index);
378
379 entries = gv11b_fb_fault_buffer_size_val(g, index);
380
381 return get_idx == ((put_idx + 1) % entries);
382}
383
221void gv11b_fb_fault_buf_set_state_hw(struct gk20a *g, 384void gv11b_fb_fault_buf_set_state_hw(struct gk20a *g,
222 unsigned int index, unsigned int state) 385 unsigned int index, unsigned int state)
223{ 386{
@@ -275,7 +438,6 @@ void gv11b_fb_fault_buf_configure_hw(struct gk20a *g, unsigned int index)
275 438
276 gv11b_fb_fault_buf_set_state_hw(g, index, 439 gv11b_fb_fault_buf_set_state_hw(g, index,
277 FAULT_BUF_DISABLED); 440 FAULT_BUF_DISABLED);
278
279 addr_lo = u64_lo32(g->mm.hw_fault_buf[index].gpu_va >> 441 addr_lo = u64_lo32(g->mm.hw_fault_buf[index].gpu_va >>
280 ram_in_base_shift_v()); 442 ram_in_base_shift_v());
281 addr_hi = u64_hi32(g->mm.hw_fault_buf[index].gpu_va); 443 addr_hi = u64_hi32(g->mm.hw_fault_buf[index].gpu_va);
@@ -586,18 +748,586 @@ static void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
586 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]); 748 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]);
587} 749}
588 750
589static void gv11b_fb_hub_isr(struct gk20a *g) 751static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)
590{ 752{
591 u32 status; 753 if (WARN_ON(mmfault->fault_type >=
592 u32 niso_intr = gk20a_readl(g, fb_niso_intr_r()); 754 ARRAY_SIZE(fault_type_descs_gv11b)))
755 mmfault->fault_type_desc = invalid_str;
756 else
757 mmfault->fault_type_desc =
758 fault_type_descs_gv11b[mmfault->fault_type];
593 759
594 nvgpu_info(g, "enter hub isr, niso_intr = 0x%08x", niso_intr); 760 if (WARN_ON(mmfault->client_type >=
761 ARRAY_SIZE(fault_client_type_descs_gv11b)))
762 mmfault->client_type_desc = invalid_str;
763 else
764 mmfault->client_type_desc =
765 fault_client_type_descs_gv11b[mmfault->client_type];
766
767 mmfault->client_id_desc = invalid_str;
768 if (mmfault->client_type ==
769 gmmu_fault_client_type_hub_v()) {
770
771 if (!(WARN_ON(mmfault->client_id >=
772 ARRAY_SIZE(hub_client_descs_gv11b))))
773 mmfault->client_id_desc =
774 hub_client_descs_gv11b[mmfault->client_id];
775 } else if (mmfault->client_type ==
776 gmmu_fault_client_type_gpc_v()) {
777 if (!(WARN_ON(mmfault->client_id >=
778 ARRAY_SIZE(gpc_client_descs_gv11b))))
779 mmfault->client_id_desc =
780 gpc_client_descs_gv11b[mmfault->client_id];
781 }
782
783}
784
785static void gv11b_fb_print_fault_info(struct gk20a *g,
786 struct mmu_fault_info *mmfault)
787{
788 if (mmfault && mmfault->valid) {
789 nvgpu_err(g, "[MMU FAULT] "
790 "mmu engine id: %d, "
791 "ch id: %d, "
792 "fault addr: 0x%llx, "
793 "fault addr aperture: %d, "
794 "fault type: %s, "
795 "access type: %s, ",
796 mmfault->mmu_engine_id,
797 mmfault->chid,
798 mmfault->fault_addr,
799 mmfault->fault_addr_aperture,
800 mmfault->fault_type_desc,
801 fault_access_type_descs_gv11b[mmfault->access_type]);
802 nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] "
803 "mmu engine id: %d, "
804 "faulted act eng id if any: 0x%x, "
805 "faulted veid if any: 0x%x, "
806 "faulted pbdma id if any: 0x%x, "
807 "fault addr: 0x%llx, ",
808 mmfault->mmu_engine_id,
809 mmfault->faulted_engine,
810 mmfault->faulted_subid,
811 mmfault->faulted_pbdma,
812 mmfault->fault_addr);
813 nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] "
814 "fault addr aperture: %d, "
815 "fault type: %s, "
816 "access type: %s, "
817 "inst ptr: 0x%llx, "
818 "inst ptr aperture: %d, ",
819 mmfault->fault_addr_aperture,
820 mmfault->fault_type_desc,
821 fault_access_type_descs_gv11b[mmfault->access_type],
822 mmfault->inst_ptr,
823 mmfault->inst_aperture);
824 nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] "
825 "ch id: %d, "
826 "timestamp hi:lo 0x%08x:0x%08x, "
827 "client type: %s, "
828 "client id: %s, "
829 "gpc id if client type is gpc: %d, ",
830 mmfault->chid,
831 mmfault->timestamp_hi, mmfault->timestamp_lo,
832 mmfault->client_type_desc,
833 mmfault->client_id_desc,
834 mmfault->gpc_id);
835 nvgpu_log(g, gpu_dbg_intr, "[MMU FAULT] "
836 "protected mode: %d, "
837 "replayable fault: %d, "
838 "replayable fault en: %d ",
839 mmfault->protected_mode,
840 mmfault->replayable_fault,
841 mmfault->replay_fault_en);
842 }
843}
844
845/*
846 *Fault buffer format
847 *
848 * 31 28 24 23 16 15 8 7 4 0
849 *.-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-.
850 *| inst_lo |0 0|apr|0 0 0 0 0 0 0 0|
851 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
852 *| inst_hi |
853 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
854 *| addr_31_12 | |AP |
855 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
856 *| addr_63_32 |
857 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
858 *| timestamp_lo |
859 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
860 *| timestamp_hi |
861 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
862 *| (reserved) | engine_id |
863 *`-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-'
864 *|V|R|P| gpc_id |0 0 0|t|0|acctp|0| client |RF0 0|faulttype|
865 */
866
867static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
868 struct nvgpu_mem *mem, u32 offset, struct mmu_fault_info *mmfault)
869{
870 u32 rd32_val;
871 u32 addr_lo, addr_hi;
872 u64 inst_ptr;
873 u32 chid = FIFO_INVAL_CHANNEL_ID;
874 struct channel_gk20a *refch;
875
876 memset(mmfault, 0, sizeof(*mmfault));
877
878 rd32_val = nvgpu_mem_rd32(g, mem, offset +
879 gmmu_fault_buf_entry_inst_lo_w());
880 addr_lo = gmmu_fault_buf_entry_inst_lo_v(rd32_val);
881 addr_lo = addr_lo << ram_in_base_shift_v();
882
883 addr_hi = nvgpu_mem_rd32(g, mem, offset +
884 gmmu_fault_buf_entry_inst_hi_w());
885 addr_hi = gmmu_fault_buf_entry_inst_hi_v(addr_hi);
886
887 inst_ptr = hi32_lo32_to_u64(addr_hi, addr_lo);
888
889 /* refch will be put back after fault is handled */
890 refch = gk20a_refch_from_inst_ptr(g, inst_ptr);
891 if (refch)
892 chid = refch->chid;
893
894 /* it is ok to continue even if refch is NULL */
895 mmfault->refch = refch;
896 mmfault->chid = chid;
897 mmfault->inst_ptr = inst_ptr;
898 mmfault->inst_aperture = gmmu_fault_buf_entry_inst_aperture_v(rd32_val);
899
900 rd32_val = nvgpu_mem_rd32(g, mem, offset +
901 gmmu_fault_buf_entry_addr_lo_w());
902
903 mmfault->fault_addr_aperture =
904 gmmu_fault_buf_entry_addr_phys_aperture_v(rd32_val);
905 addr_lo = gmmu_fault_buf_entry_addr_lo_v(rd32_val);
906 addr_lo = addr_lo << ram_in_base_shift_v();
907
908 rd32_val = nvgpu_mem_rd32(g, mem, offset +
909 gmmu_fault_buf_entry_addr_hi_w());
910 addr_hi = gmmu_fault_buf_entry_addr_hi_v(rd32_val);
911 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
912
913 rd32_val = nvgpu_mem_rd32(g, mem, offset +
914 gmmu_fault_buf_entry_timestamp_lo_w());
915 mmfault->timestamp_lo =
916 gmmu_fault_buf_entry_timestamp_lo_v(rd32_val);
917
918 rd32_val = nvgpu_mem_rd32(g, mem, offset +
919 gmmu_fault_buf_entry_timestamp_hi_w());
920 mmfault->timestamp_hi =
921 gmmu_fault_buf_entry_timestamp_hi_v(rd32_val);
922
923 rd32_val = nvgpu_mem_rd32(g, mem, offset +
924 gmmu_fault_buf_entry_engine_id_w());
925
926 mmfault->mmu_engine_id =
927 gmmu_fault_buf_entry_engine_id_v(rd32_val);
928 gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(g, mmfault->mmu_engine_id,
929 &mmfault->faulted_engine, &mmfault->faulted_subid,
930 &mmfault->faulted_pbdma);
931
932 rd32_val = nvgpu_mem_rd32(g, mem, offset +
933 gmmu_fault_buf_entry_fault_type_w());
934 mmfault->client_id =
935 gmmu_fault_buf_entry_client_v(rd32_val);
936 mmfault->replayable_fault =
937 gmmu_fault_buf_entry_replayable_fault_v(rd32_val);
938
939 mmfault->fault_type =
940 gmmu_fault_buf_entry_fault_type_v(rd32_val);
941 mmfault->access_type =
942 gmmu_fault_buf_entry_access_type_v(rd32_val);
943
944 mmfault->client_type =
945 gmmu_fault_buf_entry_mmu_client_type_v(rd32_val);
946
947 mmfault->gpc_id =
948 gmmu_fault_buf_entry_gpc_id_v(rd32_val);
949 mmfault->protected_mode =
950 gmmu_fault_buf_entry_protected_mode_v(rd32_val);
951
952 mmfault->replay_fault_en =
953 gmmu_fault_buf_entry_replayable_fault_en_v(rd32_val);
954
955 mmfault->valid = gmmu_fault_buf_entry_valid_v(rd32_val);
956
957 rd32_val = nvgpu_mem_rd32(g, mem, offset +
958 gmmu_fault_buf_entry_fault_type_w());
959 rd32_val &= ~(gmmu_fault_buf_entry_valid_m());
960 nvgpu_mem_wr32(g, mem, offset + gmmu_fault_buf_entry_valid_w(),
961 rd32_val);
962
963 gv11b_fb_parse_mmfault(mmfault);
964}
965
966static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
967 struct mmu_fault_info *mmfault)
968{
969 if (!mmfault->valid)
970 return;
971
972 gv11b_fb_print_fault_info(g, mmfault);
973
974 if (mmfault->fault_type == gmmu_fault_type_unbound_inst_block_v()) {
975 /*
976 * Bug 1847172: When an engine faults due to an unbound
977 * instance block, the fault cannot be isolated to a
978 * single context so we need to reset the entire runlist
979 */
980 nvgpu_log(g, gpu_dbg_intr, "UNBOUND INST");
981 }
982
983 if (mmfault->refch) {
984 gk20a_channel_put(mmfault->refch);
985 mmfault->refch = NULL;
986 }
987}
988
989static void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
990 u32 fault_status, unsigned int index)
991{
992 u32 get_indx, offset, rd32_val, entries;
993 struct nvgpu_mem *mem;
994 struct mmu_fault_info *mmfault;
995
996 if (gv11b_fb_is_fault_buffer_empty(g, index,
997 &get_indx)) {
998 nvgpu_log(g, gpu_dbg_intr, "SPURIOUS fault");
999 return;
1000 }
1001 nvgpu_log(g, gpu_dbg_intr, "get ptr = %d", get_indx);
1002
1003 mem = &g->mm.hw_fault_buf[index];
1004 mmfault = g->mm.fault_info[index];
1005
1006 entries = gv11b_fb_fault_buffer_size_val(g, index);
1007 nvgpu_log(g, gpu_dbg_intr, "buffer num entries = %d", entries);
1008
1009 offset = (get_indx * gmmu_fault_buf_size_v()) / sizeof(u32);
1010 nvgpu_log(g, gpu_dbg_intr, "starting word offset = 0x%x", offset);
1011
1012 rd32_val = nvgpu_mem_rd32(g, mem,
1013 offset + gmmu_fault_buf_entry_valid_w());
1014 nvgpu_log(g, gpu_dbg_intr, "entry valid offset val = 0x%x", rd32_val);
1015
1016 while ((rd32_val & gmmu_fault_buf_entry_valid_m())) {
1017
1018 nvgpu_log(g, gpu_dbg_intr, "entry valid = 0x%x", rd32_val);
1019
1020 gv11b_fb_copy_from_hw_fault_buf(g, mem, offset, mmfault);
1021
1022 /* Extra 1 in buffer size is to detect buffer full.
1023 * Actual number of entries for faults to be snapped are
1024 * one less than number in fault_buffer_size_val
1025 */
1026 get_indx = (get_indx + 1) % (entries - 1);
1027 nvgpu_log(g, gpu_dbg_intr, "new get index = %d", get_indx);
1028
1029 gv11b_fb_fault_buffer_get_ptr_update(g, index, get_indx);
1030
1031 gv11b_fb_handle_mmu_fault_common(g, mmfault);
1032
1033 offset = (get_indx * gmmu_fault_buf_size_v()) / sizeof(u32);
1034 nvgpu_log(g, gpu_dbg_intr, "next word offset = 0x%x", offset);
1035
1036 rd32_val = nvgpu_mem_rd32(g, mem,
1037 offset + gmmu_fault_buf_entry_valid_w());
1038 }
1039}
1040
1041static void gv11b_mm_copy_from_fault_snap_reg(struct gk20a *g,
1042 u32 fault_status, struct mmu_fault_info *mmfault)
1043{
1044 u32 reg_val;
1045 u32 addr_lo, addr_hi;
1046 u64 inst_ptr;
1047 int chid = FIFO_INVAL_CHANNEL_ID;
1048 struct channel_gk20a *refch;
1049
1050 memset(mmfault, 0, sizeof(*mmfault));
1051
1052 if (!(fault_status & fb_mmu_fault_status_valid_set_f())) {
1053
1054 nvgpu_log(g, gpu_dbg_intr, "mmu fault status valid not set");
1055 return;
1056 }
1057
1058 reg_val = gk20a_readl(g, fb_mmu_fault_inst_lo_r());
1059 addr_lo = fb_mmu_fault_inst_lo_addr_v(reg_val);
1060 addr_lo = addr_lo << ram_in_base_shift_v();
1061
1062 addr_hi = gk20a_readl(g, fb_mmu_fault_inst_hi_r());
1063 addr_hi = fb_mmu_fault_inst_hi_addr_v(addr_hi);
1064 inst_ptr = hi32_lo32_to_u64(addr_hi, addr_lo);
1065
1066 /* refch will be put back after fault is handled */
1067 refch = gk20a_refch_from_inst_ptr(g, inst_ptr);
1068 if (refch)
1069 chid = refch->chid;
1070
1071 /* It is still ok to continue if refch is NULL */
1072 mmfault->refch = refch;
1073 mmfault->chid = chid;
1074 mmfault->inst_ptr = inst_ptr;
1075 mmfault->inst_aperture = fb_mmu_fault_inst_lo_aperture_v(reg_val);
1076 mmfault->mmu_engine_id = fb_mmu_fault_inst_lo_engine_id_v(reg_val);
1077
1078 gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(g, mmfault->mmu_engine_id,
1079 &mmfault->faulted_engine, &mmfault->faulted_subid,
1080 &mmfault->faulted_pbdma);
1081
1082 reg_val = gk20a_readl(g, fb_mmu_fault_addr_lo_r());
1083 addr_lo = fb_mmu_fault_addr_lo_addr_v(reg_val);
1084 addr_lo = addr_lo << ram_in_base_shift_v();
1085
1086 mmfault->fault_addr_aperture =
1087 fb_mmu_fault_addr_lo_phys_aperture_v(reg_val);
1088
1089 addr_hi = gk20a_readl(g, fb_mmu_fault_addr_hi_r());
1090 addr_hi = fb_mmu_fault_addr_hi_addr_v(addr_hi);
1091 mmfault->fault_addr = hi32_lo32_to_u64(addr_hi, addr_lo);
1092
1093 reg_val = gk20a_readl(g, fb_mmu_fault_info_r());
1094 mmfault->fault_type = fb_mmu_fault_info_fault_type_v(reg_val);
1095 mmfault->replayable_fault =
1096 fb_mmu_fault_info_replayable_fault_v(reg_val);
1097 mmfault->client_id = fb_mmu_fault_info_client_v(reg_val);
1098 mmfault->access_type = fb_mmu_fault_info_access_type_v(reg_val);
1099 mmfault->client_type = fb_mmu_fault_info_client_type_v(reg_val);
1100 mmfault->gpc_id = fb_mmu_fault_info_gpc_id_v(reg_val);
1101 mmfault->protected_mode =
1102 fb_mmu_fault_info_protected_mode_v(reg_val);
1103 mmfault->replay_fault_en =
1104 fb_mmu_fault_info_replayable_fault_en_v(reg_val);
1105
1106 mmfault->valid = fb_mmu_fault_info_valid_v(reg_val);
1107
1108 fault_status &= ~(fb_mmu_fault_status_valid_m());
1109 gk20a_writel(g, fb_mmu_fault_status_r(), fault_status);
1110
1111 gv11b_fb_parse_mmfault(mmfault);
1112
1113}
1114
1115static void gv11b_fb_handle_replay_fault_overflow(struct gk20a *g,
1116 u32 fault_status)
1117{
1118 u32 reg_val;
1119 unsigned int index = REPLAY_REG_INDEX;
1120
1121 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index));
1122
1123 if (fault_status &
1124 fb_mmu_fault_status_replayable_getptr_corrupted_m()) {
1125
1126 nvgpu_err(g, "replayable getptr corrupted set");
1127
1128 gv11b_fb_fault_buf_configure_hw(g, index);
1129
1130 reg_val = set_field(reg_val,
1131 fb_mmu_fault_buffer_get_getptr_corrupted_m(),
1132 fb_mmu_fault_buffer_get_getptr_corrupted_clear_f());
1133 }
1134
1135 if (fault_status &
1136 fb_mmu_fault_status_replayable_overflow_m()) {
1137 bool buffer_full = gv11b_fb_is_fault_buffer_full(g, index);
1138
1139 nvgpu_err(g, "replayable overflow: buffer full:%s",
1140 buffer_full?"true":"false");
1141
1142 reg_val = set_field(reg_val,
1143 fb_mmu_fault_buffer_get_overflow_m(),
1144 fb_mmu_fault_buffer_get_overflow_clear_f());
1145 }
1146
1147 gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val);
1148}
1149
1150static void gv11b_fb_handle_nonreplay_fault_overflow(struct gk20a *g,
1151 u32 fault_status)
1152{
1153 u32 reg_val;
1154 unsigned int index = NONREPLAY_REG_INDEX;
1155
1156 reg_val = gk20a_readl(g, fb_mmu_fault_buffer_get_r(index));
1157
1158 if (fault_status &
1159 fb_mmu_fault_status_non_replayable_getptr_corrupted_m()) {
1160
1161 nvgpu_err(g, "non replayable getptr corrupted set");
1162
1163 gv11b_fb_fault_buf_configure_hw(g, index);
1164
1165 reg_val = set_field(reg_val,
1166 fb_mmu_fault_buffer_get_getptr_corrupted_m(),
1167 fb_mmu_fault_buffer_get_getptr_corrupted_clear_f());
1168 }
1169
1170 if (fault_status &
1171 fb_mmu_fault_status_non_replayable_overflow_m()) {
1172
1173 bool buffer_full = gv11b_fb_is_fault_buffer_full(g, index);
1174
1175 nvgpu_err(g, "non replayable overflow: buffer full:%s",
1176 buffer_full?"true":"false");
1177
1178 reg_val = set_field(reg_val,
1179 fb_mmu_fault_buffer_get_overflow_m(),
1180 fb_mmu_fault_buffer_get_overflow_clear_f());
1181 }
1182
1183 gk20a_writel(g, fb_mmu_fault_buffer_get_r(index), reg_val);
1184}
1185
1186static void gv11b_fb_handle_bar2_fault(struct gk20a *g,
1187 struct mmu_fault_info *mmfault, u32 fault_status)
1188{
1189 gv11b_fb_disable_hub_intr(g, STALL_REG_INDEX,
1190 HUB_INTR_TYPE_NONREPLAY | HUB_INTR_TYPE_REPLAY);
1191
1192
1193 if (fault_status & fb_mmu_fault_status_non_replayable_error_m()) {
1194 if (gv11b_fb_is_fault_buf_enabled(g, NONREPLAY_REG_INDEX))
1195 gv11b_fb_fault_buf_configure_hw(g, NONREPLAY_REG_INDEX);
1196 }
1197
1198 if (fault_status & fb_mmu_fault_status_replayable_error_m()) {
1199 if (gv11b_fb_is_fault_buf_enabled(g, REPLAY_REG_INDEX))
1200 gv11b_fb_fault_buf_configure_hw(g, REPLAY_REG_INDEX);
1201 }
1202 gv11b_ce_mthd_buffer_fault_in_bar2_fault(g);
1203
1204 g->ops.mm.init_bar2_mm_hw_setup(g);
1205
1206 if (mmfault->refch) {
1207 gk20a_channel_put(mmfault->refch);
1208 mmfault->refch = NULL;
1209 }
1210 gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX,
1211 HUB_INTR_TYPE_NONREPLAY | HUB_INTR_TYPE_REPLAY);
1212}
1213
1214static void gv11b_fb_handle_other_fault_notify(struct gk20a *g,
1215 u32 fault_status)
1216{
1217 struct mmu_fault_info *mmfault;
1218
1219 mmfault = g->mm.fault_info[FAULT_TYPE_OTHER_AND_NONREPLAY];
1220
1221 gv11b_mm_copy_from_fault_snap_reg(g, fault_status, mmfault);
1222
1223 /* BAR2/Physical faults will not be snapped in hw fault buf */
1224 if (mmfault->mmu_engine_id == gmmu_fault_mmu_eng_id_bar2_v()) {
1225 nvgpu_err(g, "BAR2 MMU FAULT");
1226 gv11b_fb_handle_bar2_fault(g, mmfault, fault_status);
1227
1228 } else if (mmfault->mmu_engine_id ==
1229 gmmu_fault_mmu_eng_id_physical_v()) {
1230 /* usually means VPR or out of bounds physical accesses */
1231 nvgpu_err(g, "PHYSICAL MMU FAULT");
1232
1233 } else {
1234 gv11b_fb_handle_mmu_fault_common(g, mmfault);
1235 }
1236}
1237
1238static void gv11b_fb_handle_dropped_mmu_fault(struct gk20a *g, u32 fault_status)
1239{
1240 u32 dropped_faults = 0;
1241
1242 dropped_faults = fb_mmu_fault_status_dropped_bar1_phys_set_f() |
1243 fb_mmu_fault_status_dropped_bar1_virt_set_f() |
1244 fb_mmu_fault_status_dropped_bar2_phys_set_f() |
1245 fb_mmu_fault_status_dropped_bar2_virt_set_f() |
1246 fb_mmu_fault_status_dropped_ifb_phys_set_f() |
1247 fb_mmu_fault_status_dropped_ifb_virt_set_f() |
1248 fb_mmu_fault_status_dropped_other_phys_set_f()|
1249 fb_mmu_fault_status_dropped_other_virt_set_f();
1250
1251 if (fault_status & dropped_faults) {
1252 nvgpu_err(g, "dropped mmu fault (0x%08x)",
1253 fault_status & dropped_faults);
1254 gk20a_writel(g, fb_mmu_fault_status_r(), dropped_faults);
1255 }
1256}
1257
1258
1259static void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr)
1260{
1261 u32 fault_status = gk20a_readl(g, fb_mmu_fault_status_r());
1262
1263 nvgpu_log(g, gpu_dbg_intr, "mmu_fault_status = 0x%08x", fault_status);
1264
1265 if (niso_intr &
1266 fb_niso_intr_mmu_other_fault_notify_m()) {
1267
1268 gv11b_fb_handle_dropped_mmu_fault(g, fault_status);
1269
1270 gv11b_fb_handle_other_fault_notify(g, fault_status);
1271 }
1272
1273 if (gv11b_fb_is_fault_buf_enabled(g, NONREPLAY_REG_INDEX)) {
1274
1275 if (niso_intr &
1276 fb_niso_intr_mmu_nonreplayable_fault_notify_m()) {
1277
1278 gv11b_fb_handle_mmu_nonreplay_replay_fault(g,
1279 fault_status, NONREPLAY_REG_INDEX);
1280
1281 /*
1282 * When all the faults are processed,
1283 * GET and PUT will have same value and mmu fault status
1284 * bit will be reset by HW
1285 */
1286 }
1287 if (niso_intr &
1288 fb_niso_intr_mmu_nonreplayable_fault_overflow_m()) {
1289
1290 gv11b_fb_handle_nonreplay_fault_overflow(g,
1291 fault_status);
1292 }
1293
1294 }
1295
1296 if (gv11b_fb_is_fault_buf_enabled(g, REPLAY_REG_INDEX)) {
1297
1298 if (niso_intr &
1299 fb_niso_intr_mmu_replayable_fault_notify_m()) {
1300
1301 gv11b_fb_handle_mmu_nonreplay_replay_fault(g,
1302 fault_status, REPLAY_REG_INDEX);
1303 }
1304 if (niso_intr &
1305 fb_niso_intr_mmu_replayable_fault_overflow_m()) {
1306
1307 gv11b_fb_handle_replay_fault_overflow(g,
1308 fault_status);
1309 }
1310
1311 }
1312
1313 nvgpu_log(g, gpu_dbg_intr, "clear mmu fault status");
1314 gk20a_writel(g, fb_mmu_fault_status_r(),
1315 fb_mmu_fault_status_valid_clear_f());
1316}
1317
1318static void gv11b_fb_hub_isr(struct gk20a *g)
1319{
1320 u32 status, niso_intr;
595 1321
596 nvgpu_mutex_acquire(&g->mm.hub_isr_mutex); 1322 nvgpu_mutex_acquire(&g->mm.hub_isr_mutex);
597 1323
1324 niso_intr = gk20a_readl(g, fb_niso_intr_r());
1325
1326 nvgpu_info(g, "enter hub isr, niso_intr = 0x%08x", niso_intr);
1327
598 if (niso_intr & 1328 if (niso_intr &
599 (fb_niso_intr_hub_access_counter_notify_pending_f() | 1329 (fb_niso_intr_hub_access_counter_notify_m() |
600 fb_niso_intr_hub_access_counter_error_pending_f())) { 1330 fb_niso_intr_hub_access_counter_error_m())) {
601 1331
602 nvgpu_info(g, "hub access counter notify/error"); 1332 nvgpu_info(g, "hub access counter notify/error");
603 } 1333 }
@@ -634,8 +1364,8 @@ static void gv11b_fb_hub_isr(struct gk20a *g)
634 fb_niso_intr_mmu_nonreplayable_fault_notify_m() | 1364 fb_niso_intr_mmu_nonreplayable_fault_notify_m() |
635 fb_niso_intr_mmu_nonreplayable_fault_overflow_m())) { 1365 fb_niso_intr_mmu_nonreplayable_fault_overflow_m())) {
636 1366
637 nvgpu_info(g, "mmu fault : No handling in place"); 1367 nvgpu_info(g, "MMU Fault");
638 1368 gv11b_fb_handle_mmu_fault(g, niso_intr);
639 } 1369 }
640 1370
641 nvgpu_mutex_release(&g->mm.hub_isr_mutex); 1371 nvgpu_mutex_release(&g->mm.hub_isr_mutex);