summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorVijayakumar <vsubbu@nvidia.com>2015-06-29 05:12:56 -0400
committerVijayakumar Subbu <vsubbu@nvidia.com>2015-07-17 04:16:48 -0400
commit55c85cfa7bc297b525a3b099d469eee0b71b155a (patch)
tree9835cc4b39d45fa1dac2e4b9dc477a6d39569232 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parent37869170e4f3c42fa31faa1bcda1e6c0a188179c (diff)
gpu: nvgpu: improve sched err handling
bug 200114561 1) when handling sched error, if CTXSW status reads switch check FECS mailbox register to know whether next or current channel caused error 2) Update recovery function to use ch id passed to it 3) Recovery function now passes mmu_engine_id to mmu fault handler instead of fifo_engine_id Change-Id: I3576cc4a90408b2f76b2c42cce19c27344531b1c Signed-off-by: Vijayakumar <vsubbu@nvidia.com> Reviewed-on: http://git-master/r/763538 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c132
1 files changed, 83 insertions, 49 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 034d060a..b195cf88 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -34,6 +34,7 @@
34#include "hw_top_gk20a.h" 34#include "hw_top_gk20a.h"
35#include "hw_mc_gk20a.h" 35#include "hw_mc_gk20a.h"
36#include "hw_gr_gk20a.h" 36#include "hw_gr_gk20a.h"
37#define FECS_METHOD_WFI_RESTORE 0x80000
37 38
38static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 39static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
39 u32 hw_chid, bool add, 40 u32 hw_chid, bool add,
@@ -1177,7 +1178,6 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1177 fifo_engine_status_id_type_v(status); 1178 fifo_engine_status_id_type_v(status);
1178 bool busy = fifo_engine_status_engine_v(status) == 1179 bool busy = fifo_engine_status_engine_v(status) ==
1179 fifo_engine_status_engine_busy_v(); 1180 fifo_engine_status_engine_busy_v();
1180
1181 if (busy && ctx_id == id) { 1181 if (busy && ctx_id == id) {
1182 if ((is_tsg && type == 1182 if ((is_tsg && type ==
1183 fifo_engine_status_id_type_tsgid_v()) || 1183 fifo_engine_status_id_type_tsgid_v()) ||
@@ -1202,7 +1202,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1202 engines = gk20a_fifo_engines_on_id(g, hw_chid, false); 1202 engines = gk20a_fifo_engines_on_id(g, hw_chid, false);
1203 1203
1204 if (engines) 1204 if (engines)
1205 gk20a_fifo_recover(g, engines, hw_chid, false, verbose); 1205 gk20a_fifo_recover(g, engines, hw_chid, false, true, verbose);
1206 else { 1206 else {
1207 struct channel_gk20a *ch = &g->fifo.channel[hw_chid]; 1207 struct channel_gk20a *ch = &g->fifo.channel[hw_chid];
1208 1208
@@ -1232,7 +1232,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1232 engines = gk20a_fifo_engines_on_id(g, tsgid, true); 1232 engines = gk20a_fifo_engines_on_id(g, tsgid, true);
1233 1233
1234 if (engines) 1234 if (engines)
1235 gk20a_fifo_recover(g, engines, tsgid, true, verbose); 1235 gk20a_fifo_recover(g, engines, tsgid, true, true, verbose);
1236 else { 1236 else {
1237 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1237 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1238 1238
@@ -1248,13 +1248,16 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1248 1248
1249void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, 1249void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1250 u32 hw_id, bool id_is_tsg, 1250 u32 hw_id, bool id_is_tsg,
1251 bool verbose) 1251 bool id_is_known, bool verbose)
1252{ 1252{
1253 unsigned long engine_id, i; 1253 unsigned long engine_id, i;
1254 unsigned long _engine_ids = __engine_ids; 1254 unsigned long _engine_ids = __engine_ids;
1255 unsigned long engine_ids = 0; 1255 unsigned long engine_ids = 0;
1256 u32 val; 1256 u32 val;
1257 u32 mmu_fault_engines = 0; 1257 u32 mmu_fault_engines = 0;
1258 u32 ref_type;
1259 u32 ref_id;
1260 u32 ref_id_is_tsg = false;
1258 1261
1259 if (verbose) 1262 if (verbose)
1260 gk20a_debug_dump(g->dev); 1263 gk20a_debug_dump(g->dev);
@@ -1262,44 +1265,65 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1262 if (g->ops.ltc.flush) 1265 if (g->ops.ltc.flush)
1263 g->ops.ltc.flush(g); 1266 g->ops.ltc.flush(g);
1264 1267
1265 /* store faulted engines in advance */ 1268 if (id_is_known) {
1266 for_each_set_bit(engine_id, &_engine_ids, 32) { 1269 engine_ids = gk20a_fifo_engines_on_id(g, hw_id, id_is_tsg);
1267 u32 ref_type; 1270 ref_id = hw_id;
1268 u32 ref_id; 1271 ref_type = id_is_tsg ?
1269 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id, 1272 fifo_engine_status_id_type_tsgid_v() :
1270 &ref_type); 1273 fifo_engine_status_id_type_chid_v();
1271 1274 ref_id_is_tsg = id_is_tsg;
1272 /* Reset *all* engines that use the 1275 /* atleast one engine will get passed during sched err*/
1273 * same channel as faulty engine */ 1276 engine_ids |= __engine_ids;
1274 for (i = 0; i < g->fifo.max_engines; i++) { 1277 for_each_set_bit(engine_id, &engine_ids, 32) {
1275 u32 type; 1278 mmu_fault_engines |=
1276 u32 id; 1279 BIT(gk20a_engine_id_to_mmu_id(engine_id));
1277 gk20a_fifo_get_faulty_id_type(g, i, &id, &type); 1280 }
1278 if (ref_type == type && ref_id == id) { 1281 } else {
1279 engine_ids |= BIT(i); 1282 /* store faulted engines in advance */
1280 mmu_fault_engines |= 1283 for_each_set_bit(engine_id, &_engine_ids, 32) {
1284 gk20a_fifo_get_faulty_id_type(g, engine_id, &ref_id,
1285 &ref_type);
1286 if (ref_type == fifo_engine_status_id_type_tsgid_v())
1287 ref_id_is_tsg = true;
1288 else
1289 ref_id_is_tsg = false;
1290 /* Reset *all* engines that use the
1291 * same channel as faulty engine */
1292 for (i = 0; i < g->fifo.max_engines; i++) {
1293 u32 type;
1294 u32 id;
1295
1296 gk20a_fifo_get_faulty_id_type(g, i, &id, &type);
1297 if (ref_type == type && ref_id == id) {
1298 engine_ids |= BIT(i);
1299 mmu_fault_engines |=
1281 BIT(gk20a_engine_id_to_mmu_id(i)); 1300 BIT(gk20a_engine_id_to_mmu_id(i));
1301 }
1282 } 1302 }
1283 } 1303 }
1284 } 1304 }
1285 1305
1286 /* 1306 if (mmu_fault_engines) {
1287 * sched error prevents recovery, and ctxsw error will retrigger 1307 /*
1288 * every 100ms. Disable the sched error to allow recovery. 1308 * sched error prevents recovery, and ctxsw error will retrigger
1289 */ 1309 * every 100ms. Disable the sched error to allow recovery.
1290 val = gk20a_readl(g, fifo_intr_en_0_r()); 1310 */
1291 val &= ~(fifo_intr_en_0_sched_error_m() | fifo_intr_en_0_mmu_fault_m()); 1311 val = gk20a_readl(g, fifo_intr_en_0_r());
1292 gk20a_writel(g, fifo_intr_en_0_r(), val); 1312 val &= ~(fifo_intr_en_0_sched_error_m() |
1293 gk20a_writel(g, fifo_intr_0_r(), 1313 fifo_intr_en_0_mmu_fault_m());
1294 fifo_intr_0_sched_error_reset_f()); 1314 gk20a_writel(g, fifo_intr_en_0_r(), val);
1295 1315 gk20a_writel(g, fifo_intr_0_r(),
1296 g->ops.fifo.trigger_mmu_fault(g, engine_ids); 1316 fifo_intr_0_sched_error_reset_f());
1297 gk20a_fifo_handle_mmu_fault(g, engine_ids, hw_id, id_is_tsg); 1317
1298 1318 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
1299 val = gk20a_readl(g, fifo_intr_en_0_r()); 1319 gk20a_fifo_handle_mmu_fault(g, mmu_fault_engines, ref_id,
1300 val |= fifo_intr_en_0_mmu_fault_f(1) 1320 ref_id_is_tsg);
1301 | fifo_intr_en_0_sched_error_f(1); 1321
1302 gk20a_writel(g, fifo_intr_en_0_r(), val); 1322 val = gk20a_readl(g, fifo_intr_en_0_r());
1323 val |= fifo_intr_en_0_mmu_fault_f(1)
1324 | fifo_intr_en_0_sched_error_f(1);
1325 gk20a_writel(g, fifo_intr_en_0_r(), val);
1326 }
1303} 1327}
1304 1328
1305/* force reset channel and tsg (if it's part of one) */ 1329/* force reset channel and tsg (if it's part of one) */
@@ -1340,7 +1364,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1340 int id = -1; 1364 int id = -1;
1341 bool non_chid = false; 1365 bool non_chid = false;
1342 bool ret = false; 1366 bool ret = false;
1343 1367 u32 mailbox2;
1344 /* read the scheduler error register */ 1368 /* read the scheduler error register */
1345 sched_error = gk20a_readl(g, fifo_intr_sched_error_r()); 1369 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
1346 1370
@@ -1362,15 +1386,24 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1362 || ctx_status == 1386 || ctx_status ==
1363 fifo_engine_status_ctx_status_ctxsw_load_v()); 1387 fifo_engine_status_ctx_status_ctxsw_load_v());
1364 1388
1365 if (failing_engine) { 1389 if (!failing_engine)
1366 id = (ctx_status == 1390 continue;
1367 fifo_engine_status_ctx_status_ctxsw_load_v()) ? 1391 if (ctx_status ==
1368 fifo_engine_status_next_id_v(status) : 1392 fifo_engine_status_ctx_status_ctxsw_load_v()) {
1369 fifo_engine_status_id_v(status); 1393 id = fifo_engine_status_next_id_v(status);
1370 non_chid = fifo_pbdma_status_id_type_v(status) != 1394 non_chid = fifo_pbdma_status_id_type_v(status)
1371 fifo_pbdma_status_id_type_chid_v(); 1395 != fifo_pbdma_status_id_type_chid_v();
1372 break; 1396 } else if (ctx_status ==
1397 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
1398 mailbox2 = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(2));
1399 if (mailbox2 & FECS_METHOD_WFI_RESTORE)
1400 id = fifo_engine_status_next_id_v(status);
1401 else
1402 id = fifo_engine_status_id_v(status);
1403 } else {
1404 id = fifo_engine_status_id_v(status);
1373 } 1405 }
1406 break;
1374 } 1407 }
1375 1408
1376 /* could not find the engine - should never happen */ 1409 /* could not find the engine - should never happen */
@@ -1387,7 +1420,8 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1387 struct channel_gk20a *ch = &f->channel[id]; 1420 struct channel_gk20a *ch = &f->channel[id];
1388 1421
1389 if (non_chid) { 1422 if (non_chid) {
1390 gk20a_fifo_recover(g, BIT(engine_id), id, true, true); 1423 gk20a_fifo_recover(g, BIT(engine_id), id, true,
1424 true, true);
1391 ret = true; 1425 ret = true;
1392 goto err; 1426 goto err;
1393 } 1427 }
@@ -1404,7 +1438,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1404 "engine = %u, ch = %d", engine_id, id); 1438 "engine = %u, ch = %d", engine_id, id);
1405 gk20a_gr_debug_dump(g->dev); 1439 gk20a_gr_debug_dump(g->dev);
1406 gk20a_fifo_recover(g, BIT(engine_id), id, false, 1440 gk20a_fifo_recover(g, BIT(engine_id), id, false,
1407 ch->timeout_debug_dump); 1441 true, ch->timeout_debug_dump);
1408 ret = true; 1442 ret = true;
1409 } else { 1443 } else {
1410 gk20a_dbg_info( 1444 gk20a_dbg_info(
@@ -1899,7 +1933,7 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
1899 } 1933 }
1900 1934
1901 if (engines) 1935 if (engines)
1902 gk20a_fifo_recover(g, engines, ~(u32)0, false, true); 1936 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true);
1903} 1937}
1904 1938
1905static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) 1939static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)