summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-12-18 02:05:04 -0500
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:10 -0500
commitde47308b2c2ef2d24951a7e1c4ece9964417c167 (patch)
treee35cf4a956fb2580cd63f50cdf9d422b2d0763df /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent095bd5e59d896ebab12af25ac05aa4071257ecb1 (diff)
gpu: nvgpu: add CILP support for gp10b
Add CILP support for gp10b by defining below function pointers (with detailed explanation) pre_process_sm_exception() - for CILP enabled channels, get the mask of errors - if we need to broadcast the stop_trigger, suspend all SMs - otherwise suspend only current SM - clear hww_global_esr values in h/w - gr_gp10b_set_cilp_preempt_pending() - get ctx_id - using sideband method, program FECS to generate interrupt on next ctxsw - disable and preempt the channel/TSG - set cilp_preempt_pending = true - clear single step mode - resume current SM handle_fecs_error() - we get ctxsw_intr1 upon next ctxsw - clear this interrupt - get handle of channel on which we first triggered SM exception - gr_gp10b_clear_cilp_preempt_pending() - set cilp_preempt_pending = false - send events to channel and debug session fd Bug 200156699 Change-Id: Ia765db47e68fb968fada6409609af505c079df53 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/925897 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c314
1 files changed, 314 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a13b9a2c..91adf20c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,6 +19,7 @@
19 19
20#include "gk20a/gr_gk20a.h" 20#include "gk20a/gr_gk20a.h"
21#include "gk20a/semaphore_gk20a.h" 21#include "gk20a/semaphore_gk20a.h"
22#include "gk20a/dbg_gpu_gk20a.h"
22 23
23#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ 24#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
24#include "gp10b/gr_gp10b.h" 25#include "gp10b/gr_gp10b.h"
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
657 if (err) 658 if (err)
658 return err; 659 return err;
659 660
661 (*gr_ctx)->t18x.ctx_id_valid = false;
662
660 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) 663 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
661 flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; 664 flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
662 665
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
1224 *num_entries = ARRAY_SIZE(wl_addr_gp10b); 1227 *num_entries = ARRAY_SIZE(wl_addr_gp10b);
1225} 1228}
1226 1229
1230static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
1231{
1232 int ret = 0;
1233
1234 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1235
1236 ret = gk20a_disable_channel_tsg(g, fault_ch);
1237 if (ret) {
1238 gk20a_err(dev_from_gk20a(g),
1239 "CILP: failed to disable channel/TSG!\n");
1240 return ret;
1241 }
1242
1243 ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
1244 if (ret) {
1245 gk20a_err(dev_from_gk20a(g),
1246 "CILP: failed to restart runlist 0!");
1247 return ret;
1248 }
1249
1250 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
1251
1252 if (gk20a_is_channel_marked_as_tsg(fault_ch))
1253 gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
1254 else
1255 gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
1256
1257 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
1258
1259 return ret;
1260}
1261
1262static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
1263{
1264 int ret;
1265 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1266
1267 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1268
1269 if (!gr_ctx)
1270 return -EINVAL;
1271
1272 if (gr_ctx->t18x.cilp_preempt_pending) {
1273 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1274 "CILP is already pending for chid %d",
1275 fault_ch->hw_chid);
1276 return 0;
1277 }
1278
1279 /* get ctx_id from the ucode image */
1280 if (!gr_ctx->t18x.ctx_id_valid) {
1281 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1282 "CILP: looking up ctx id");
1283 ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
1284 if (ret) {
1285 gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
1286 return ret;
1287 }
1288 gr_ctx->t18x.ctx_id_valid = true;
1289 }
1290
1291 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1292 "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
1293
1294 /* send ucode method to set ctxsw interrupt */
1295 ret = gr_gk20a_submit_fecs_sideband_method_op(g,
1296 (struct fecs_method_op_gk20a) {
1297 .method.data = gr_ctx->t18x.ctx_id,
1298 .method.addr =
1299 gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
1300 .mailbox = {
1301 .id = 1 /* sideband */, .data = 0,
1302 .clr = ~0, .ret = NULL,
1303 .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
1304 .fail = 0},
1305 .cond.ok = GR_IS_UCODE_OP_EQUAL,
1306 .cond.fail = GR_IS_UCODE_OP_SKIP});
1307
1308 if (ret) {
1309 gk20a_err(dev_from_gk20a(g),
1310 "CILP: failed to enable ctxsw interrupt!");
1311 return ret;
1312 }
1313
1314 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1315 "CILP: enabled ctxsw completion interrupt");
1316
1317 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1318 "CILP: disabling channel %d",
1319 fault_ch->hw_chid);
1320
1321 ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
1322 if (ret) {
1323 gk20a_err(dev_from_gk20a(g),
1324 "CILP: failed to disable channel!!");
1325 return ret;
1326 }
1327
1328 /* set cilp_preempt_pending = true and record the channel */
1329 gr_ctx->t18x.cilp_preempt_pending = true;
1330 g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
1331
1332 return 0;
1333}
1334
1335static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1336 struct channel_gk20a *fault_ch)
1337{
1338 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1339
1340 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1341
1342 if (!gr_ctx)
1343 return -EINVAL;
1344
1345 /* The ucode is self-clearing, so all we need to do here is
1346 to clear cilp_preempt_pending. */
1347 if (!gr_ctx->t18x.cilp_preempt_pending) {
1348 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1349 "CILP is already cleared for chid %d\n",
1350 fault_ch->hw_chid);
1351 return 0;
1352 }
1353
1354 gr_ctx->t18x.cilp_preempt_pending = false;
1355 g->gr.t18x.cilp_preempt_pending_chid = -1;
1356
1357 return 0;
1358}
1359
1360/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1361 *
1362 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
1363 */
1364int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1365 u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
1366 bool sm_debugger_attached, struct channel_gk20a *fault_ch,
1367 bool *early_exit, bool *ignore_debugger)
1368{
1369 int ret;
1370 bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
1371 NVGPU_GR_PREEMPTION_MODE_CILP) ;
1372 u32 global_mask = 0, dbgr_control0, global_esr_copy;
1373 u32 offset = proj_gpc_stride_v() * gpc +
1374 proj_tpc_in_gpc_stride_v() * tpc;
1375
1376 *early_exit = false;
1377 *ignore_debugger = false;
1378
1379 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1380 gpc, tpc, global_esr);
1381
1382 if (cilp_enabled && sm_debugger_attached) {
1383 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
1384 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1385 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
1386
1387 if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
1388 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1389 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
1390
1391 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
1392 gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
1393 gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
1394 gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
1395 gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
1396 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
1397
1398 if (warp_esr != 0 || (global_esr & global_mask) != 0) {
1399 *ignore_debugger = true;
1400
1401 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1402 "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
1403 gpc, tpc);
1404
1405 if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
1406 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1407 "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
1408 gpc, tpc);
1409 gk20a_suspend_all_sms(g, global_mask, false);
1410
1411 gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
1412 } else {
1413 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1414 "CILP: STOP_TRIGGER from gpc %d tpc %d\n",
1415 gpc, tpc);
1416 gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
1417 }
1418
1419 /* reset the HWW errors after locking down */
1420 global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1421 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
1422 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1423 "CILP: HWWs cleared for gpc %d tpc %d\n",
1424 gpc, tpc);
1425
1426 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
1427 ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
1428 if (ret) {
1429 gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
1430 return ret;
1431 }
1432
1433 dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
1434 if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
1435 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1436 "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
1437 gpc, tpc);
1438 dbgr_control0 = set_field(dbgr_control0,
1439 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
1440 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
1441 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
1442 }
1443
1444 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
1445 "CILP: resume for gpc %d tpc %d\n",
1446 gpc, tpc);
1447 gk20a_resume_single_sm(g, gpc, tpc);
1448
1449 *ignore_debugger = true;
1450 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
1451 }
1452
1453 *early_exit = true;
1454 }
1455 return 0;
1456}
1457
1458static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1459{
1460 struct gr_ctx_desc *gr_ctx;
1461 struct channel_gk20a *ch;
1462 int chid;
1463 int ret = -EINVAL;
1464
1465 chid = g->gr.t18x.cilp_preempt_pending_chid;
1466
1467 ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
1468 if (!ch)
1469 return ret;
1470
1471 gr_ctx = ch->ch_ctx.gr_ctx;
1472
1473 if (gr_ctx->t18x.cilp_preempt_pending) {
1474 *__chid = chid;
1475 ret = 0;
1476 }
1477
1478 gk20a_channel_put(ch);
1479
1480 return ret;
1481}
1482
1483static int gr_gp10b_handle_fecs_error(struct gk20a *g,
1484 struct channel_gk20a *__ch,
1485 struct gr_gk20a_isr_data *isr_data)
1486{
1487 u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
1488 struct channel_gk20a *ch;
1489 int chid = -1;
1490 int ret = 0;
1491
1492 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1493
1494 /*
1495 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
1496 * indicates that a CILP ctxsw save has finished
1497 */
1498 if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
1499 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1500 "CILP: ctxsw save completed!\n");
1501
1502 /* now clear the interrupt */
1503 gk20a_writel(g, gr_fecs_host_int_clear_r(),
1504 gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
1505
1506 ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
1507 if (ret)
1508 goto clean_up;
1509
1510 ch = gk20a_channel_get(
1511 gk20a_fifo_channel_from_hw_chid(g, chid));
1512 if (!ch)
1513 goto clean_up;
1514
1515
1516 /* set preempt_pending to false */
1517 ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
1518 if (ret) {
1519 gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
1520 gk20a_channel_put(ch);
1521 goto clean_up;
1522 }
1523
1524 if (gk20a_gr_sm_debugger_attached(g)) {
1525 gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
1526 gk20a_dbg_gpu_post_events(ch);
1527 gk20a_channel_post_event(ch);
1528 }
1529
1530 gk20a_channel_put(ch);
1531 }
1532
1533clean_up:
1534 /* handle any remaining interrupts */
1535 return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
1536}
1537
1227static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr) 1538static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
1228{ 1539{
1229 if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m())) 1540 if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
1267 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception; 1578 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
1268 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception; 1579 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
1269 gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr; 1580 gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
1581 gops->gr.pre_process_sm_exception =
1582 gr_gp10b_pre_process_sm_exception;
1583 gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
1270} 1584}