diff options
author | Vinod G <vinodg@nvidia.com> | 2018-08-29 15:32:25 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-31 21:57:23 -0400 |
commit | f187e0bf442c3b0a08c46b21196f06a18c8220a0 (patch) | |
tree | 48820c076f6ab4a2bad6ab6053d26293c99326c3 /drivers/gpu | |
parent | b25d5d86caa049201ddcea77cf1a733a85090698 (diff) |
gpu: nvgpu: Move SM_MASK_TYPE setting to TSG level
Moved the SM_MASK_TYPE variable from GR to
TSG struct. SM error registers are context based.
In dbg_session IOCTL to SET_SM_MASK_TYPE, kernel
code iterate the TSG associated with first channel
and set the mask_type to that context.
Bug 200412641
Change-Id: Ic91944037ad2447f403b4803d5266ae6250ba4c9
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1809322
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 37 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 71 |
6 files changed, 64 insertions, 61 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h index 4d3c4d74..50002557 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | |||
@@ -72,12 +72,6 @@ struct dbg_session_gk20a { | |||
72 | bool broadcast_stop_trigger; | 72 | bool broadcast_stop_trigger; |
73 | 73 | ||
74 | struct nvgpu_mutex ioctl_lock; | 74 | struct nvgpu_mutex ioctl_lock; |
75 | |||
76 | /* | ||
77 | * sm set exception type mask flag, to check whether | ||
78 | * exception type mask is requested or not. | ||
79 | */ | ||
80 | bool is_sm_exception_type_mask_set; | ||
81 | }; | 75 | }; |
82 | 76 | ||
83 | struct dbg_session_data { | 77 | struct dbg_session_data { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 0d32cca3..303e1f53 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -420,11 +420,6 @@ struct gr_gk20a { | |||
420 | u32 no_of_sm; | 420 | u32 no_of_sm; |
421 | struct sm_info *sm_to_cluster; | 421 | struct sm_info *sm_to_cluster; |
422 | 422 | ||
423 | #define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U) | ||
424 | #define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0) | ||
425 | u32 sm_exception_mask_type; | ||
426 | u32 sm_exception_mask_refcount; | ||
427 | |||
428 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 423 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
429 | struct nvgpu_mutex cs_lock; | 424 | struct nvgpu_mutex cs_lock; |
430 | struct gk20a_cs_snapshot *cs_data; | 425 | struct gk20a_cs_snapshot *cs_data; |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 624ee1d7..506d4330 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | |||
@@ -304,6 +304,7 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g, pid_t pid) | |||
304 | tsg->timeslice_scale = 0; | 304 | tsg->timeslice_scale = 0; |
305 | tsg->runlist_id = ~0; | 305 | tsg->runlist_id = ~0; |
306 | tsg->tgid = pid; | 306 | tsg->tgid = pid; |
307 | tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
307 | 308 | ||
308 | if (g->ops.fifo.init_eng_method_buffers) | 309 | if (g->ops.fifo.init_eng_method_buffers) |
309 | g->ops.fifo.init_eng_method_buffers(g, tsg); | 310 | g->ops.fifo.init_eng_method_buffers(g, tsg); |
@@ -373,6 +374,7 @@ void gk20a_tsg_release(struct nvgpu_ref *ref) | |||
373 | release_used_tsg(&g->fifo, tsg); | 374 | release_used_tsg(&g->fifo, tsg); |
374 | 375 | ||
375 | tsg->runlist_id = ~0; | 376 | tsg->runlist_id = ~0; |
377 | tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
376 | 378 | ||
377 | nvgpu_log(g, gpu_dbg_fn, "tsg released %d\n", tsg->tsgid); | 379 | nvgpu_log(g, gpu_dbg_fn, "tsg released %d\n", tsg->tsgid); |
378 | } | 380 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 67ccb9f5..1e3be553 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | |||
@@ -78,6 +78,10 @@ struct tsg_gk20a { | |||
78 | bool in_use; | 78 | bool in_use; |
79 | 79 | ||
80 | struct nvgpu_tsg_sm_error_state *sm_error_states; | 80 | struct nvgpu_tsg_sm_error_state *sm_error_states; |
81 | |||
82 | #define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U) | ||
83 | #define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0) | ||
84 | u32 sm_exception_mask_type; | ||
81 | }; | 85 | }; |
82 | 86 | ||
83 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); | 87 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9a6afa3e..aeb49982 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -2239,7 +2239,7 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | |||
2239 | static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) | 2239 | static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) |
2240 | { | 2240 | { |
2241 | u32 index = 0U; | 2241 | u32 index = 0U; |
2242 | u32 esr_err = gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(); | 2242 | bool esr_err = false; |
2243 | 2243 | ||
2244 | struct warp_esr_error_table_s { | 2244 | struct warp_esr_error_table_s { |
2245 | u32 error_value; | 2245 | u32 error_value; |
@@ -2285,7 +2285,7 @@ static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) | |||
2285 | 2285 | ||
2286 | for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) { | 2286 | for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) { |
2287 | if (warp_esr_error_table[index].error_value == warp_esr_error) { | 2287 | if (warp_esr_error_table[index].error_value == warp_esr_error) { |
2288 | esr_err = warp_esr_error_table[index].error_value; | 2288 | esr_err = true; |
2289 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | 2289 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, |
2290 | "WARP_ESR %s(0x%x)", | 2290 | "WARP_ESR %s(0x%x)", |
2291 | warp_esr_error_table[index].error_name, | 2291 | warp_esr_error_table[index].error_name, |
@@ -2294,8 +2294,9 @@ static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) | |||
2294 | } | 2294 | } |
2295 | } | 2295 | } |
2296 | 2296 | ||
2297 | return (esr_err == 0U) ? false : true; | 2297 | return esr_err; |
2298 | } | 2298 | } |
2299 | |||
2299 | static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, | 2300 | static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, |
2300 | u32 gpc, u32 tpc, u32 sm, | 2301 | u32 gpc, u32 tpc, u32 sm, |
2301 | u32 warp_esr_error, | 2302 | u32 warp_esr_error, |
@@ -2316,24 +2317,24 @@ static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, | |||
2316 | return 0; | 2317 | return 0; |
2317 | } | 2318 | } |
2318 | 2319 | ||
2319 | /* | ||
2320 | * Check SET_EXCEPTION_TYPE_MASK is being set. | ||
2321 | * If set, skip the recovery and trigger CILP | ||
2322 | * If not set, trigger the recovery. | ||
2323 | */ | ||
2324 | if ((g->gr.sm_exception_mask_type & | ||
2325 | NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) == | ||
2326 | NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) { | ||
2327 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2328 | "SM Exception Type Mask set %d," | ||
2329 | "skip recovery", | ||
2330 | g->gr.sm_exception_mask_type); | ||
2331 | return 0; | ||
2332 | } | ||
2333 | |||
2334 | if (fault_ch) { | 2320 | if (fault_ch) { |
2335 | tsg = &g->fifo.tsg[fault_ch->tsgid]; | 2321 | tsg = &g->fifo.tsg[fault_ch->tsgid]; |
2336 | 2322 | ||
2323 | /* | ||
2324 | * Check SET_EXCEPTION_TYPE_MASK is being set. | ||
2325 | * If set, skip the recovery and trigger CILP | ||
2326 | * If not set, trigger the recovery. | ||
2327 | */ | ||
2328 | if ((tsg->sm_exception_mask_type & | ||
2329 | NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) == | ||
2330 | NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL) { | ||
2331 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2332 | "SM Exception Type Mask set %d," | ||
2333 | "skip recovery", | ||
2334 | tsg->sm_exception_mask_type); | ||
2335 | return 0; | ||
2336 | } | ||
2337 | |||
2337 | nvgpu_rwsem_down_read(&tsg->ch_list_lock); | 2338 | nvgpu_rwsem_down_read(&tsg->ch_list_lock); |
2338 | nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, | 2339 | nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, |
2339 | channel_gk20a, ch_entry) { | 2340 | channel_gk20a, ch_entry) { |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 4ac4fb62..3931ab12 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | |||
@@ -223,10 +223,6 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | |||
223 | nvgpu_kfree(g, prof_obj); | 223 | nvgpu_kfree(g, prof_obj); |
224 | } | 224 | } |
225 | } | 225 | } |
226 | |||
227 | nvgpu_set_sm_exception_type_mask_locked(dbg_s, | ||
228 | NVGPU_SM_EXCEPTION_TYPE_MASK_NONE); | ||
229 | |||
230 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 226 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
231 | 227 | ||
232 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | 228 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); |
@@ -499,7 +495,6 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | |||
499 | dbg_s->is_profiler = is_profiler; | 495 | dbg_s->is_profiler = is_profiler; |
500 | dbg_s->is_pg_disabled = false; | 496 | dbg_s->is_pg_disabled = false; |
501 | dbg_s->is_timeout_disabled = false; | 497 | dbg_s->is_timeout_disabled = false; |
502 | dbg_s->is_sm_exception_type_mask_set = false; | ||
503 | 498 | ||
504 | nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); | 499 | nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); |
505 | nvgpu_init_list_node(&dbg_s->ch_list); | 500 | nvgpu_init_list_node(&dbg_s->ch_list); |
@@ -512,9 +507,6 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | |||
512 | dbg_s->dbg_events.events_enabled = false; | 507 | dbg_s->dbg_events.events_enabled = false; |
513 | dbg_s->dbg_events.num_pending_events = 0; | 508 | dbg_s->dbg_events.num_pending_events = 0; |
514 | 509 | ||
515 | nvgpu_set_sm_exception_type_mask_locked(dbg_s, | ||
516 | NVGPU_SM_EXCEPTION_TYPE_MASK_NONE); | ||
517 | |||
518 | return 0; | 510 | return 0; |
519 | 511 | ||
520 | err_destroy_lock: | 512 | err_destroy_lock: |
@@ -1887,34 +1879,29 @@ static int nvgpu_set_sm_exception_type_mask_locked( | |||
1887 | u32 exception_mask) | 1879 | u32 exception_mask) |
1888 | { | 1880 | { |
1889 | struct gk20a *g = dbg_s->g; | 1881 | struct gk20a *g = dbg_s->g; |
1890 | struct gr_gk20a *gr = &g->gr; | ||
1891 | int err = 0; | 1882 | int err = 0; |
1883 | struct channel_gk20a *ch = NULL; | ||
1892 | 1884 | ||
1893 | switch (exception_mask) { | 1885 | /* |
1894 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | 1886 | * Obtain the fisrt channel from the channel list in |
1895 | gr->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | 1887 | * dbg_session, find the context associated with channel |
1896 | if (dbg_s->is_sm_exception_type_mask_set == false) { | 1888 | * and set the sm_mask_type to that context |
1897 | gr->sm_exception_mask_refcount++; | 1889 | */ |
1898 | dbg_s->is_sm_exception_type_mask_set = true; | 1890 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); |
1899 | } | 1891 | if (ch != NULL) { |
1900 | break; | 1892 | struct tsg_gk20a *tsg; |
1901 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: | 1893 | |
1902 | if (dbg_s->is_sm_exception_type_mask_set) { | 1894 | tsg = tsg_gk20a_from_ch(ch); |
1903 | gr->sm_exception_mask_refcount--; | 1895 | if (tsg != NULL) { |
1904 | dbg_s->is_sm_exception_type_mask_set = false; | 1896 | tsg->sm_exception_mask_type = exception_mask; |
1897 | goto type_mask_end; | ||
1905 | } | 1898 | } |
1906 | if (gr->sm_exception_mask_refcount == 0) | ||
1907 | gr->sm_exception_mask_type = | ||
1908 | NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
1909 | break; | ||
1910 | default: | ||
1911 | nvgpu_err(g, | ||
1912 | "unrecognized dbg sm exception type mask: 0x%x", | ||
1913 | exception_mask); | ||
1914 | err = -EINVAL; | ||
1915 | break; | ||
1916 | } | 1899 | } |
1917 | 1900 | ||
1901 | nvgpu_log_fn(g, "unable to find the TSG\n"); | ||
1902 | err = -EINVAL; | ||
1903 | |||
1904 | type_mask_end: | ||
1918 | return err; | 1905 | return err; |
1919 | } | 1906 | } |
1920 | 1907 | ||
@@ -1924,10 +1911,30 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask( | |||
1924 | { | 1911 | { |
1925 | int err = 0; | 1912 | int err = 0; |
1926 | struct gk20a *g = dbg_s->g; | 1913 | struct gk20a *g = dbg_s->g; |
1914 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
1915 | |||
1916 | switch (args->exception_type_mask) { | ||
1917 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | ||
1918 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | ||
1919 | break; | ||
1920 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_NONE: | ||
1921 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | ||
1922 | break; | ||
1923 | default: | ||
1924 | nvgpu_err(g, | ||
1925 | "unrecognized dbg sm exception type mask: 0x%x", | ||
1926 | args->exception_type_mask); | ||
1927 | err = -EINVAL; | ||
1928 | break; | ||
1929 | } | ||
1930 | |||
1931 | if (err != 0) { | ||
1932 | return err; | ||
1933 | } | ||
1927 | 1934 | ||
1928 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 1935 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
1929 | err = nvgpu_set_sm_exception_type_mask_locked(dbg_s, | 1936 | err = nvgpu_set_sm_exception_type_mask_locked(dbg_s, |
1930 | args->exception_type_mask); | 1937 | sm_exception_mask_type); |
1931 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1938 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1932 | 1939 | ||
1933 | return err; | 1940 | return err; |