From 6299b00beb9dabdd53c211b02658d022827b3232 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Thu, 2 Jun 2016 09:34:46 +0530
Subject: gpu: nvgpu: Add multiple engine and runlist support

This CL covers the following modification,
1) Added multiple engine_info support
2) Added multiple runlist_info support
3) Initial changes for ASYNC CE support
4) Added ASYNC CE interrupt handling support
   for gm206 GPU family
5) Added generic mechanism to identify the
   CE engine pri_base address for gm206
   (CE0, CE1 and CE2)
6) Removed hard coded engine_id logic and
   made generic way
7) Code cleanup for readability

JIRA DNVGPU-26

Change-Id: I2c3846c40bcc8d10c2dfb225caa4105fc9123b65
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1155963
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile              |   3 +-
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c     |   6 +-
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.h     |   6 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c |  12 +-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c |  10 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c    | 489 +++++++++++++++++++++++---------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h    |  19 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c         |   2 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h         |   6 +-
 drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c   |   7 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c      |  73 +++--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h      |   8 +
 drivers/gpu/nvgpu/gk20a/hal_gk20a.c     |   3 +
 drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h |   4 +
 drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h  |  16 ++
 drivers/gpu/nvgpu/gk20a/mc_gk20a.c      |  57 +++-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c     |  38 ++-
 drivers/gpu/nvgpu/gm206/ce_gm206.c      | 107 +++++++
 drivers/gpu/nvgpu/gm206/ce_gm206.h      |  26 ++
 drivers/gpu/nvgpu/gm206/fifo_gm206.c    |   2 +
 drivers/gpu/nvgpu/gm206/hal_gm206.c     |   7 +-
 drivers/gpu/nvgpu/gm206/hw_proj_gm206.h |   4 +
 drivers/gpu/nvgpu/gm206/hw_top_gm206.h  |  16 ++
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c    |  15 +-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c      |   5 +-
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c     |   3 +
 drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h |   4 +
 drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h  |  16 ++
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c      |  64 ++---
 29 files changed, 791 insertions(+), 237 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gm206/ce_gm206.c
 create mode 100644 drivers/gpu/nvgpu/gm206/ce_gm206.h

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 2857cd5a..9e60e6a5 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -80,7 +80,8 @@ nvgpu-y := \
 	gm206/hal_gm206.o \
 	gm206/gr_gm206.o \
 	gm206/acr_gm206.o \
-	gm206/pmu_gm206.o
+	gm206/pmu_gm206.o \
+	gm206/ce_gm206.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o
 nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 00635c4d..96d38b11 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Graphics Copy Engine  (gr host)
  *
- * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -57,7 +57,7 @@ static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
 	return ce2_intr_status_launcherr_pending_f();
 }
 
-void gk20a_ce2_isr(struct gk20a *g)
+void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 {
 	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
 	u32 clear_intr = 0;
@@ -75,7 +75,7 @@ void gk20a_ce2_isr(struct gk20a *g)
 	return;
 }
 
-void gk20a_ce2_nonstall_isr(struct gk20a *g)
+void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 {
 	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r());
 
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index bf4e3066..5ceb69e1 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -3,7 +3,7 @@
  *
  * GK20A graphics copy engine (gr host)
  *
- * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -25,7 +25,7 @@
 #include "tsg_gk20a.h"
 
 void gk20a_init_ce2(struct gpu_ops *gops);
-void gk20a_ce2_isr(struct gk20a *g);
-void gk20a_ce2_nonstall_isr(struct gk20a *g);
+void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
+void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
 
 #endif /*__CE2_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 3159f026..a5243834 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -329,11 +329,17 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
 {
 	struct gk20a *g = c->g;
 	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_engine_info_gk20a *engine_info =
-		f->engine_info + ENGINE_GR_GK20A;
+	u32 engine_id;
+	struct fifo_engine_info_gk20a *engine_info = NULL;
 	u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
 		>> ram_in_base_shift_v();
 
+	/* TODO:Need to handle non GR engine channel bind path */
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+
+	/* Consider 1st available GR engine */
+	engine_info = (f->engine_info + engine_id);
+
 	gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
 		c->hw_chid, inst_ptr);
 
@@ -1716,7 +1722,7 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
 	/* Get failing engine data */
 	engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
 
-	if (engine_id >= g->fifo.max_engines) {
+	if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
 		/* If no failing engine, abort the channels */
 		if (gk20a_is_channel_marked_as_tsg(ch)) {
 			struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 6b9238e9..5e57502e 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1102,11 +1102,10 @@ static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
 					false);
 			g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
 					false);
-			gr_gk20a_init_blcg_mode(g, BLCG_RUN, ENGINE_GR_GK20A);
 
+			gr_gk20a_init_cg_mode(g, BLCG_MODE, BLCG_RUN);
 			g->elcg_enabled = false;
-			gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
-			gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+			gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 
 		}
 
@@ -1126,9 +1125,8 @@ static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
 		    --g->dbg_powergating_disabled_refcount == 0) {
 
 			g->elcg_enabled = true;
-			gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
-			gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
-			gr_gk20a_init_blcg_mode(g, BLCG_AUTO, ENGINE_GR_GK20A);
+			gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
+			gr_gk20a_init_cg_mode(g, BLCG_MODE, BLCG_AUTO);
 
 			g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
 					g->slcg_enabled);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 17efe5ca..766ea749 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -40,6 +40,131 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 					    u32 hw_chid, bool add,
 					    bool wait_for_finish);
 
+u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
+		u32 engine_id[], u32 engine_id_sz,
+		u32 engine_enum)
+{
+	struct fifo_gk20a *f = NULL;
+	u32 instance_cnt = 0;
+	u32 engine_id_idx;
+	u32 active_engine_id = 0;
+	struct fifo_engine_info_gk20a *info = NULL;
+
+	if (g && engine_id_sz && (engine_enum < ENGINE_INVAL_GK20A)) {
+		f = &g->fifo;
+		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
+			active_engine_id = f->active_engines_list[engine_id_idx];
+			info = &f->engine_info[active_engine_id];
+
+			if (info->engine_enum == engine_enum) {
+				if (instance_cnt < engine_id_sz) {
+					engine_id[instance_cnt] = active_engine_id;
+					++instance_cnt;
+				} else {
+					gk20a_dbg_info("warning engine_id table sz is small %d",
+							engine_id_sz);
+				}
+			}
+		}
+	}
+	return instance_cnt;
+}
+
+struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id)
+{
+	struct fifo_gk20a *f = NULL;
+	u32 engine_id_idx;
+	struct fifo_engine_info_gk20a *info = NULL;
+
+	if (!g)
+		return info;
+
+	f = &g->fifo;
+
+	if (engine_id < f->max_engines) {
+		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
+			if (engine_id == f->active_engines_list[engine_id_idx]) {
+				info = &f->engine_info[engine_id];
+				break;
+			}
+		}
+	}
+
+	if (!info)
+		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);
+
+	return info;
+}
+
+bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id)
+{
+	struct fifo_gk20a *f = NULL;
+	u32 engine_id_idx;
+	bool valid = false;
+
+	if (!g)
+		return valid;
+
+	f = &g->fifo;
+
+	if (engine_id < f->max_engines) {
+		for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
+			if (engine_id == f->active_engines_list[engine_id_idx]) {
+				valid = true;
+				break;
+			}
+		}
+	}
+
+	if (!valid)
+		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);
+
+	return valid;
+}
+
+u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g)
+{
+	u32 gr_engine_cnt = 0;
+	u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
+
+	/* Consider 1st available GR engine */
+	gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
+			1, ENGINE_GR_GK20A);
+
+	if (!gr_engine_cnt) {
+		gk20a_err(dev_from_gk20a(g), "No GR engine available on this device!\n");
+	}
+
+	return gr_engine_id;
+}
+
+u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
+{
+	u32 reset_mask = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
+	struct fifo_gk20a *f = NULL;
+	u32 engine_id_idx;
+	struct fifo_engine_info_gk20a *engine_info;
+	u32 active_engine_id = 0;
+
+	if (!g)
+		return reset_mask;
+
+	f = &g->fifo;
+
+	for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
+		active_engine_id = f->active_engines_list[engine_id_idx];
+		engine_info = &f->engine_info[active_engine_id];
+		engine_enum = engine_info->engine_enum;
+
+		if ((engine_enum == ENGINE_GRCE_GK20A) ||
+			(engine_enum == ENGINE_ASYNC_CE_GK20A))
+				reset_mask |= engine_info->reset_mask;
+	}
+
+	return reset_mask;
+}
+
 /*
  * Link engine IDs to MMU IDs and vice versa.
  */
@@ -47,12 +172,14 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
 {
 	u32 fault_id = ~0;
+	struct fifo_engine_info_gk20a *engine_info;
 
-	if (engine_id < ENGINE_INVAL_GK20A) {
-		struct fifo_engine_info_gk20a *info =
-			&g->fifo.engine_info[engine_id];
+	engine_info = gk20a_fifo_get_engine_info(g, engine_id);
 
-		fault_id = info->fault_id;
+	if (engine_info) {
+		fault_id = engine_info->fault_id;
+	} else {
+		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);
 	}
 	return fault_id;
 }
@@ -60,18 +187,19 @@ static inline u32 gk20a_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
 static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
 {
 	u32 engine_id;
-	u32 return_engine_id = ~0;
+	u32 active_engine_id;
+	struct fifo_engine_info_gk20a *engine_info;
+	struct fifo_gk20a *f = &g->fifo;
 
-	for (engine_id = 0; engine_id < ENGINE_INVAL_GK20A; engine_id++) {
-		struct fifo_engine_info_gk20a *info =
-			&g->fifo.engine_info[engine_id];
+	for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
+		active_engine_id = f->active_engines_list[engine_id];
+		engine_info = &g->fifo.engine_info[active_engine_id];
 
-		if (info->fault_id == fault_id) {
-			return_engine_id = engine_id;
+		if (engine_info->fault_id == fault_id)
 			break;
-		}
+		active_engine_id = FIFO_INVAL_ENGINE_ID;
 	}
-	return return_engine_id;
+	return active_engine_id;
 }
 
 int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
@@ -82,10 +210,15 @@ int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
 	gk20a_dbg_info("engine type %d", engine_type);
 	if (engine_type == top_device_info_type_enum_graphics_v())
 		ret = ENGINE_GR_GK20A;
-	else if (engine_type == top_device_info_type_enum_copy2_v()) {
-		ret = ENGINE_CE2_GK20A;
+	else if ((engine_type >= top_device_info_type_enum_copy0_v()) &&
+		(engine_type <= top_device_info_type_enum_copy2_v())) {
+		/* Lets consider all the CE engine have separate runlist at this point
+		 * We can identify the ENGINE_GRCE_GK20A type CE using runlist_id
+		 * comparsion logic with GR runlist_id in init_engine_info() */
+			ret = ENGINE_ASYNC_CE_GK20A;
+		/* inst_id starts from CE0 to CE2 */
 		if (inst_id)
-			*inst_id = 0x2;
+			*inst_id = (engine_type - top_device_info_type_enum_copy0_v());
 	}
 	else
 		gk20a_err(g->dev, "unknown engine %d", engine_type);
@@ -108,12 +241,11 @@ static int init_engine_info(struct fifo_gk20a *f)
 	u32 inst_id  = 0;
 	u32 pri_base = 0;
 	u32 fault_id = 0;
+	u32 gr_runlist_id = ~0;
 
 	gk20a_dbg_fn("");
 
-	/* all we really care about finding is the graphics entry    */
-	/* especially early on in sim it probably thinks it has more */
-	f->num_engines = 2;
+	f->num_engines = 0;
 
 	for (i = 0; i < max_info_entries; i++) {
 		u32 table_entry = gk20a_readl(f->g, top_device_info_r(i));
@@ -168,8 +300,7 @@ static int init_engine_info(struct fifo_gk20a *f)
 				g->ops.fifo.engine_enum_from_type(g,
 						engine_type, &inst_id);
 		} else if (entry == top_device_info_entry_data_v()) {
-			/* gk20a don't support device_info_data
-			   packet parsing */
+			/* gk20a doesn't support device_info_data packet parsing */
 			if (g->ops.fifo.device_info_data_parse)
 				g->ops.fifo.device_info_data_parse(g,
 					table_entry, &inst_id, &pri_base,
@@ -179,7 +310,7 @@ static int init_engine_info(struct fifo_gk20a *f)
 		if (!top_device_info_chain_v(table_entry)) {
 			if (engine_enum < ENGINE_INVAL_GK20A) {
 				struct fifo_engine_info_gk20a *info =
-					&g->fifo.engine_info[engine_enum];
+					&g->fifo.engine_info[engine_id];
 
 				info->intr_mask |= BIT(intr_id);
 				info->reset_mask |= BIT(reset_id);
@@ -188,11 +319,25 @@ static int init_engine_info(struct fifo_gk20a *f)
 				info->inst_id  = inst_id;
 				info->pri_base = pri_base;
 
-				if (!fault_id &&
-				(engine_enum == ENGINE_CE2_GK20A))
+				if (engine_enum == ENGINE_GR_GK20A)
+					gr_runlist_id = runlist_id;
+
+				/* GR and GR_COPY shares same runlist_id */
+				if ((engine_enum == ENGINE_ASYNC_CE_GK20A) &&
+					(gr_runlist_id == runlist_id))
+						engine_enum = ENGINE_GRCE_GK20A;
+
+				info->engine_enum = engine_enum;
+
+				if (!fault_id && (engine_enum == ENGINE_GRCE_GK20A))
 					fault_id = 0x1b;
 				info->fault_id = fault_id;
 
+				/* engine_id starts from 0 to NV_HOST_NUM_ENGINES */
+				f->active_engines_list[f->num_engines] = engine_id;
+
+				++f->num_engines;
+
 				engine_enum = ENGINE_INVAL_GK20A;
 			}
 		}
@@ -204,13 +349,19 @@ static int init_engine_info(struct fifo_gk20a *f)
 u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
 {
 	u32 eng_intr_mask = 0;
-	int i = 0;
+	int i;
+	u32 active_engine_id = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
 
-	for (i = 0; i < g->fifo.max_engines; i++) {
-		u32 intr_mask = g->fifo.engine_info[i].intr_mask;
-		if (i == ENGINE_CE2_GK20A &&
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		u32 intr_mask;
+		active_engine_id = g->fifo.active_engines_list[i];
+		intr_mask = g->fifo.engine_info[active_engine_id].intr_mask;
+		engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+		if (((engine_enum == ENGINE_GRCE_GK20A) ||
+			(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
 			(!g->ops.ce2.isr_stall || !g->ops.ce2.isr_nonstall))
-			continue;
+				continue;
 
 		eng_intr_mask |= intr_mask;
 	}
@@ -218,13 +369,44 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
 	return eng_intr_mask;
 }
 
+void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
+{
+	u32 i;
+	u32 runlist_id;
+	struct fifo_runlist_info_gk20a *runlist;
+	struct gk20a *g = NULL;
+
+	if (!f || !f->runlist_info)
+		return;
+
+	g = f->g;
+
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		runlist = &f->runlist_info[runlist_id];
+		for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+			gk20a_gmmu_free(g, &runlist->mem[i]);
+		}
+
+		kfree(runlist->active_channels);
+		runlist->active_channels = NULL;
+
+		kfree(runlist->active_tsgs);
+		runlist->active_tsgs = NULL;
+
+		mutex_destroy(&runlist->mutex);
+
+	}
+	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
+		f->max_runlists));
+
+	kfree(f->runlist_info);
+	f->runlist_info = NULL;
+	f->max_runlists = 0;
+}
+
 static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 {
 	struct gk20a *g = f->g;
-	struct fifo_engine_info_gk20a *engine_info;
-	struct fifo_runlist_info_gk20a *runlist;
-	u32 runlist_id;
-	u32 i;
 
 	gk20a_dbg_fn("");
 
@@ -232,19 +414,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	vfree(f->tsg);
 	gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
 
-	engine_info = f->engine_info + ENGINE_GR_GK20A;
-	runlist_id = engine_info->runlist_id;
-	runlist = &f->runlist_info[runlist_id];
-
-	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
-		gk20a_gmmu_free(g, &runlist->mem[i]);
+	gk20a_fifo_delete_runlist(f);
 
-	kfree(runlist->active_channels);
-	kfree(runlist->active_tsgs);
-
-	kfree(f->runlist_info);
 	kfree(f->pbdma_map);
+	f->pbdma_map = NULL;
 	kfree(f->engine_info);
+	f->engine_info = NULL;
+	kfree(f->active_engines_list);
+	f->active_engines_list = NULL;
 }
 
 /* reads info from hardware and fills in pbmda exception info record */
@@ -327,69 +504,58 @@ static void fifo_engine_exception_status(struct gk20a *g,
 
 static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 {
-	struct fifo_engine_info_gk20a *engine_info;
 	struct fifo_runlist_info_gk20a *runlist;
 	struct device *d = dev_from_gk20a(g);
-	u32 runlist_id;
+	s32 runlist_id = -1;
 	u32 i;
 	u64 runlist_size;
 
 	gk20a_dbg_fn("");
 
-	f->max_runlists = fifo_eng_runlist_base__size_1_v();
+	f->max_runlists = g->ops.fifo.eng_runlist_base_size();
 	f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
 				  f->max_runlists, GFP_KERNEL);
 	if (!f->runlist_info)
-		goto clean_up;
+		goto clean_up_runlist;
 
-	engine_info = f->engine_info + ENGINE_GR_GK20A;
-	runlist_id = engine_info->runlist_id;
-	runlist = &f->runlist_info[runlist_id];
+	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
+		f->max_runlists));
 
-	runlist->active_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->active_channels)
-		goto clean_up_runlist_info;
-
-	runlist->active_tsgs =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->active_tsgs)
-		goto clean_up_runlist_info;
-
-	runlist_size  = ram_rl_entry_size_v() * f->num_runlist_entries;
-	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
-		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
-		if (err) {
-			dev_err(d, "memory allocation failed\n");
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		runlist = &f->runlist_info[runlist_id];
+
+		runlist->active_channels =
+			kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+				GFP_KERNEL);
+		if (!runlist->active_channels)
 			goto clean_up_runlist;
+
+		runlist->active_tsgs =
+			kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+				GFP_KERNEL);
+		if (!runlist->active_tsgs)
+			goto clean_up_runlist;
+
+		runlist_size  = ram_rl_entry_size_v() * f->num_runlist_entries;
+		for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+			int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
+			if (err) {
+				dev_err(d, "memory allocation failed\n");
+				goto clean_up_runlist;
+			}
 		}
-	}
-	mutex_init(&runlist->mutex);
+		mutex_init(&runlist->mutex);
 
-	/* None of buffers is pinned if this value doesn't change.
-	    Otherwise, one of them (cur_buffer) must have been pinned. */
-	runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+		/* None of buffers is pinned if this value doesn't change.
+		    Otherwise, one of them (cur_buffer) must have been pinned. */
+		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+	}
 
 	gk20a_dbg_fn("done");
 	return 0;
 
 clean_up_runlist:
-	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
-		gk20a_gmmu_free(g, &runlist->mem[i]);
-
-clean_up_runlist_info:
-	kfree(runlist->active_channels);
-	runlist->active_channels = NULL;
-
-	kfree(runlist->active_tsgs);
-	runlist->active_tsgs = NULL;
-
-	kfree(f->runlist_info);
-	f->runlist_info = NULL;
-
-clean_up:
+	gk20a_fifo_delete_runlist(f);
 	gk20a_dbg_fn("fail");
 	return -ENOMEM;
 }
@@ -543,7 +709,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	f->num_channels = g->ops.fifo.get_num_fifos(g);
 	f->num_runlist_entries = fifo_eng_runlist_length_max_v();
 	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
-	f->max_engines = ENGINE_INVAL_GK20A;
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 
 	f->userd_entry_size = 1 << ram_userd_base_shift_v();
 
@@ -563,11 +729,15 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 				GFP_KERNEL);
 	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
 				GFP_KERNEL);
+	f->active_engines_list = kzalloc(f->max_engines * sizeof(u32),
+				GFP_KERNEL);
 
-	if (!(f->channel && f->pbdma_map && f->engine_info)) {
+	if (!(f->channel && f->pbdma_map && f->engine_info &&
+		f->active_engines_list)) {
 		err = -ENOMEM;
 		goto clean_up;
 	}
+	memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
 
 	/* pbdma map needs to be in place before calling engine info init */
 	for (i = 0; i < f->num_pbdma; ++i)
@@ -614,6 +784,8 @@ clean_up:
 	f->pbdma_map = NULL;
 	kfree(f->engine_info);
 	f->engine_info = NULL;
+	kfree(f->active_engines_list);
+	f->active_engines_list = NULL;
 
 	return err;
 }
@@ -829,9 +1001,29 @@ static inline void get_exception_mmu_fault_info(
 
 void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
 {
+	struct fifo_gk20a *f = NULL;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
+	u32 inst_id = 0;
+	struct fifo_engine_info_gk20a *engine_info;
+
 	gk20a_dbg_fn("");
 
-	if (engine_id == ENGINE_GR_GK20A) {
+	if (!g)
+		return;
+
+	f = &g->fifo;
+
+	engine_info = gk20a_fifo_get_engine_info(g, engine_id);
+
+	if (engine_info) {
+		engine_enum = engine_info->engine_enum;
+		inst_id = engine_info->inst_id;
+	}
+
+	if (engine_enum == ENGINE_INVAL_GK20A)
+		gk20a_err(dev_from_gk20a(g), "unsupported engine_id %d", engine_id);
+
+	if (engine_enum == ENGINE_GR_GK20A) {
 		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
 			gk20a_pmu_disable_elpg(g);
 		/* resetting engine will alter read/write index.
@@ -848,8 +1040,10 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
 		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
 			gk20a_pmu_enable_elpg(g);
 	}
-	if (engine_id == ENGINE_CE2_GK20A)
-		gk20a_reset(g, mc_enable_ce2_m());
+	if ((engine_enum == ENGINE_GRCE_GK20A) ||
+		(engine_enum == ENGINE_ASYNC_CE_GK20A)) {
+			gk20a_reset(g, engine_info->reset_mask);
+	}
 }
 
 static void gk20a_fifo_handle_chsw_fault(struct gk20a *g)
@@ -872,6 +1066,24 @@ static void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
 static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
 		struct fifo_mmu_fault_info_gk20a *f, bool fake_fault)
 {
+	u32 engine_enum = ENGINE_INVAL_GK20A;
+	struct fifo_gk20a *fifo = NULL;
+	struct fifo_engine_info_gk20a *engine_info;
+
+	if (!g || !f)
+		return false;
+
+	fifo = &g->fifo;
+
+	engine_info = gk20a_fifo_get_engine_info(g, engine_id);
+
+	if (engine_info) {
+		engine_enum = engine_info->engine_enum;
+	}
+
+	if (engine_enum == ENGINE_INVAL_GK20A)
+		return false;
+
 	/* channel recovery is only deferred if an sm debugger
 	   is attached and has MMU debug mode is enabled */
 	if (!gk20a_gr_sm_debugger_attached(g) ||
@@ -882,7 +1094,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
 	if (fake_fault)
 		return false;
 
-	if (engine_id != ENGINE_GR_GK20A ||
+	if (engine_enum != ENGINE_GR_GK20A ||
 	    f->engine_subid_v != fifo_intr_mmu_fault_info_engine_subid_gpc_v())
 		return false;
 
@@ -1001,8 +1213,8 @@ static bool gk20a_fifo_handle_mmu_fault(
 			false);
 	g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
 			false);
-	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
-	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+
+	gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 
 	/* Disable fifo access */
 	grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
@@ -1219,7 +1431,7 @@ static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g,
 
 	/* trigger faults for all bad engines */
 	for_each_set_bit(engine_id, &engine_ids, 32) {
-		if (engine_id > g->fifo.max_engines) {
+		if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
 			WARN_ON(true);
 			break;
 		}
@@ -1257,8 +1469,9 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
 	int i;
 	u32 engines = 0;
 
-	for (i = 0; i < g->fifo.max_engines; i++) {
-		u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		u32 active_engine_id = g->fifo.active_engines_list[i];
+		u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
 		u32 ctx_status =
 			fifo_engine_status_ctx_status_v(status);
 		u32 ctx_id = (ctx_status ==
@@ -1276,7 +1489,7 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
 					fifo_engine_status_id_type_tsgid_v()) ||
 				    (!is_tsg && type ==
 					fifo_engine_status_id_type_chid_v()))
-				engines |= BIT(i);
+				engines |= BIT(active_engine_id);
 		}
 	}
 
@@ -1382,15 +1595,16 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
 				ref_id_is_tsg = false;
 			/* Reset *all* engines that use the
 			 * same channel as faulty engine */
-			for (i = 0; i < g->fifo.max_engines; i++) {
+			for (i = 0; i < g->fifo.num_engines; i++) {
+				u32 active_engine_id = g->fifo.active_engines_list[i];
 				u32 type;
 				u32 id;
 
-				gk20a_fifo_get_faulty_id_type(g, i, &id, &type);
+				gk20a_fifo_get_faulty_id_type(g, active_engine_id, &id, &type);
 				if (ref_type == type && ref_id == id) {
-					engine_ids |= BIT(i);
+					engine_ids |= BIT(active_engine_id);
 					mmu_fault_engines |=
-					BIT(gk20a_engine_id_to_mmu_id(g, i));
+					BIT(gk20a_engine_id_to_mmu_id(g, active_engine_id));
 				}
 			}
 		}
@@ -1453,16 +1667,21 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose)
 u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
 			int *__id, bool *__is_tsg)
 {
-	u32 engine_id = -1;
+	u32 engine_id;
 	int id = -1;
 	bool is_tsg = false;
 	u32 mailbox2;
+	u32 active_engine_id = FIFO_INVAL_ENGINE_ID;
 
-	for (engine_id = 0; engine_id < g->fifo.max_engines; engine_id++) {
-		u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
-		u32 ctx_status = fifo_engine_status_ctx_status_v(status);
+	for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
+		u32 status;
+		u32 ctx_status;
 		bool failing_engine;
 
+		active_engine_id = g->fifo.active_engines_list[engine_id];
+		status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
+		ctx_status = fifo_engine_status_ctx_status_v(status);
+
 		/* we are interested in busy engines */
 		failing_engine = fifo_engine_status_engine_v(status) ==
 			fifo_engine_status_engine_busy_v();
@@ -1476,8 +1695,10 @@ u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
 			|| ctx_status ==
 				fifo_engine_status_ctx_status_ctxsw_load_v());
 
-		if (!failing_engine)
+		if (!failing_engine) {
+		    active_engine_id = FIFO_INVAL_ENGINE_ID;
 			continue;
+		}
 
 		if (ctx_status ==
 				fifo_engine_status_ctx_status_ctxsw_load_v()) {
@@ -1500,7 +1721,7 @@ u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
 	*__id = id;
 	*__is_tsg = is_tsg;
 
-	return engine_id;
+	return active_engine_id;
 }
 
 static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
@@ -1517,7 +1738,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
 	engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
 
 	/* could not find the engine - should never happen */
-	if (unlikely(engine_id >= g->fifo.max_engines)) {
+	if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
 		gk20a_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, failed to find engine\n",
 			sched_error);
 		ret = false;
@@ -1627,14 +1848,16 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
 			   "channel reset initiated from %s; intr=0x%08x",
 			   __func__, fifo_intr);
 		for (engine_id = 0;
-		     engine_id < g->fifo.max_engines;
+		     engine_id < g->fifo.num_engines;
 		     engine_id++) {
-			gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_id,
-				g->fifo.engine_info[engine_id].engine_id);
-			fifo_pbdma_exception_status(g,
-					&g->fifo.engine_info[engine_id]);
-			fifo_engine_exception_status(g,
-					&g->fifo.engine_info[engine_id]);
+				u32 active_engine_id = g->fifo.active_engines_list[engine_id];
+				u32 engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+				gk20a_dbg_fn("enum:%d -> engine_id:%d", engine_enum,
+					active_engine_id);
+				fifo_pbdma_exception_status(g,
+						&g->fifo.engine_info[active_engine_id]);
+				fifo_engine_exception_status(g,
+						&g->fifo.engine_info[active_engine_id]);
 		}
 	}
 
@@ -2057,12 +2280,13 @@ int gk20a_fifo_enable_all_engine_activity(struct gk20a *g)
 	int i;
 	int err = 0, ret = 0;
 
-	for (i = 0; i < g->fifo.max_engines; i++) {
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		u32 active_engine_id = g->fifo.active_engines_list[i];
 		err = gk20a_fifo_enable_engine_activity(g,
-				&g->fifo.engine_info[i]);
+				&g->fifo.engine_info[active_engine_id]);
 		if (err) {
 			gk20a_err(dev_from_gk20a(g),
-				"failed to enable engine %d activity\n", i);
+				"failed to enable engine %d activity\n", active_engine_id);
 			ret = err;
 		}
 	}
@@ -2149,14 +2373,16 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
 {
 	int i;
 	int err = 0, ret = 0;
+	u32 active_engine_id;
 
-	for (i = 0; i < g->fifo.max_engines; i++) {
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		active_engine_id = g->fifo.active_engines_list[i];
 		err = gk20a_fifo_disable_engine_activity(g,
-				&g->fifo.engine_info[i],
+				&g->fifo.engine_info[active_engine_id],
 				wait_for_idle);
 		if (err) {
 			gk20a_err(dev_from_gk20a(g),
-				"failed to disable engine %d activity\n", i);
+				"failed to disable engine %d activity\n", active_engine_id);
 			ret = err;
 			break;
 		}
@@ -2164,11 +2390,12 @@ int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
 
 	if (err) {
 		while (--i >= 0) {
+			active_engine_id = g->fifo.active_engines_list[i];
 			err = gk20a_fifo_enable_engine_activity(g,
-						&g->fifo.engine_info[i]);
+						&g->fifo.engine_info[active_engine_id]);
 			if (err)
 				gk20a_err(dev_from_gk20a(g),
-				 "failed to re-enable engine %d activity\n", i);
+				 "failed to re-enable engine %d activity\n", active_engine_id);
 		}
 	}
 
@@ -2181,14 +2408,15 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
 	u32 engines = 0;
 	int i;
 
-	for (i = 0; i < f->max_engines; i++) {
-		u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+	for (i = 0; i < f->num_engines; i++) {
+		u32 active_engine_id = g->fifo.active_engines_list[i];
+		u32 status = gk20a_readl(g, fifo_engine_status_r(active_engine_id));
 		bool engine_busy = fifo_engine_status_engine_v(status) ==
 			fifo_engine_status_engine_busy_v();
 
 		if (engine_busy &&
-		    (f->engine_info[i].runlist_id == runlist_id))
-			engines |= BIT(i);
+		    (f->engine_info[active_engine_id].runlist_id == runlist_id))
+			engines |= BIT(active_engine_id);
 	}
 
 	if (engines)
@@ -2669,8 +2897,10 @@ static int gk20a_fifo_sched_debugfs_seq_show(
 	struct fifo_runlist_info_gk20a *runlist;
 	u32 runlist_id;
 	int ret = SEQ_SKIP;
+	u32 engine_id;
 
-	engine_info = f->engine_info + ENGINE_GR_GK20A;
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	engine_info = (f->engine_info + engine_id);
 	runlist_id = engine_info->runlist_id;
 	runlist = &f->runlist_info[runlist_id];
 
@@ -2772,6 +3002,7 @@ void gk20a_init_fifo(struct gpu_ops *gops)
 	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
 	gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch;
 	gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type;
-	/* gk20a don't support device_info_data packet parsing */
+	/* gk20a doesn't support device_info_data packet parsing */
 	gops->fifo.device_info_data_parse = NULL;
+	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 5fb5f550..25d2cd9f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -26,6 +26,8 @@
 
 #define MAX_RUNLIST_BUFFERS	2
 
+#define FIFO_INVAL_ENGINE_ID	~0
+
 /* generally corresponds to the "pbdma" engine */
 
 struct fifo_runlist_info_gk20a {
@@ -40,10 +42,10 @@ struct fifo_runlist_info_gk20a {
 	struct mutex mutex; /* protect channel preempt and runlist upate */
 };
 
-/* so far gk20a has two engines: gr and ce2(gr_copy) */
 enum {
 	ENGINE_GR_GK20A	    = 0,
-	ENGINE_CE2_GK20A    = 1,
+	ENGINE_GRCE_GK20A    = 1,
+	ENGINE_ASYNC_CE_GK20A  = 2,
 	ENGINE_INVAL_GK20A
 };
 
@@ -85,6 +87,7 @@ struct fifo_engine_info_gk20a {
 	u32 inst_id;
 	u32 pri_base;
 	u32 fault_id;
+	u32 engine_enum;
 	struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
 	struct fifo_engine_exception_info_gk20a engine_exception_info;
 	struct fifo_mmu_fault_info_gk20a mmu_fault_info;
@@ -102,6 +105,7 @@ struct fifo_gk20a {
 	struct fifo_engine_info_gk20a *engine_info;
 	u32 max_engines;
 	u32 num_engines;
+	u32 *active_engines_list;
 
 	struct fifo_runlist_info_gk20a *runlist_info;
 	u32 max_runlists;
@@ -228,4 +232,15 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
 int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
 		u32 *inst_id);
 
+u32 gk20a_fifo_get_engine_ids(struct gk20a *g, u32 engine_id[], u32 engine_id_sz, u32 engine_enum);
+
+void gk20a_fifo_delete_runlist(struct fifo_gk20a *f);
+
+struct fifo_engine_info_gk20a *gk20a_fifo_get_engine_info(struct gk20a *g, u32 engine_id);
+
+bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id);
+
+u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g);
+
+u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index a817abcd..d21d0527 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1890,7 +1890,7 @@ void gk20a_enable(struct gk20a *g, u32 units)
 void gk20a_reset(struct gk20a *g, u32 units)
 {
 	gk20a_disable(g, units);
-	if (units & mc_enable_ce2_enabled_f())
+	if (units & gk20a_fifo_get_all_ce_engine_reset_mask(g))
 		udelay(500);
 	else
 		udelay(20);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 3da19cc8..b7f3f6f1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -90,6 +90,7 @@ enum nvgpu_litter_value {
 	GPU_LIT_ROP_BASE,
 	GPU_LIT_ROP_STRIDE,
 	GPU_LIT_ROP_SHARED_BASE,
+	GPU_LIT_HOST_NUM_ENGINES,
 	GPU_LIT_HOST_NUM_PBDMA,
 	GPU_LIT_LTC_STRIDE,
 	GPU_LIT_LTS_STRIDE,
@@ -120,8 +121,8 @@ struct gpu_ops {
 		void (*flush)(struct gk20a *g);
 	} ltc;
 	struct {
-		void (*isr_stall)(struct gk20a *g);
-		void (*isr_nonstall)(struct gk20a *g);
+		void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base);
+		void (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base);
 	} ce2;
 	struct {
 		int (*init_fs_state)(struct gk20a *g);
@@ -350,6 +351,7 @@ struct gpu_ops {
 		int (*tsg_bind_channel)(struct tsg_gk20a *tsg,
 				struct channel_gk20a *ch);
 		int (*tsg_unbind_channel)(struct channel_gk20a *ch);
+		u32 (*eng_runlist_base_size)(void);
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index 0c94e246..6784cd11 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -50,13 +50,12 @@ static ssize_t elcg_enable_store(struct device *dev,
 
 	if (val) {
 		g->elcg_enabled = true;
-		gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
-		gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
 	} else {
 		g->elcg_enabled = false;
-		gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
-		gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 	}
+
 	gk20a_idle(g->dev);
 
 	dev_info(dev, "ELCG is %s.\n", g->elcg_enabled ? "enabled" :
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 17307b63..fb777948 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -326,9 +326,12 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
 	bool gr_enabled;
 	bool ctxsw_active;
 	bool gr_busy;
+	u32 gr_engine_id;
 
 	gk20a_dbg_fn("");
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	do {
 		/* fmodel: host gets fifo_engine_status(gr) from gr
 		   only when gr_status is read */
@@ -338,7 +341,7 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
 			mc_enable_pgraph_enabled_f();
 
 		ctxsw_active = gk20a_readl(g,
-			fifo_engine_status_r(ENGINE_GR_GK20A)) &
+			fifo_engine_status_r(gr_engine_id)) &
 			fifo_engine_status_ctxsw_in_progress_f();
 
 		gr_busy = gk20a_readl(g, gr_engine_status_r()) &
@@ -3905,11 +3908,14 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
 {
 	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_engine_info_gk20a *gr_info =
-		f->engine_info + ENGINE_GR_GK20A;
+	struct fifo_engine_info_gk20a *gr_info = NULL;
 	unsigned long end_jiffies = jiffies +
 		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
 	u32 ret;
+	u32 engine_id;
+
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	gr_info = (f->engine_info + engine_id);
 
 	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
 	if (ret) {
@@ -4187,9 +4193,13 @@ int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
 			struct zbc_entry *zbc_val)
 {
 	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+	struct fifo_engine_info_gk20a *gr_info = NULL;
 	unsigned long end_jiffies;
 	int ret;
+	u32 engine_id;
+
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	gr_info = (f->engine_info + engine_id);
 
 	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
 	if (ret) {
@@ -4308,6 +4318,29 @@ void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
 	gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
 }
 
+void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config)
+{
+	u32 engine_idx;
+	u32 active_engine_id = 0;
+	struct fifo_engine_info_gk20a *engine_info = NULL;
+	struct fifo_gk20a *f = &g->fifo;
+
+	for (engine_idx = 0; engine_idx < f->num_engines; ++engine_idx) {
+		active_engine_id = f->active_engines_list[engine_idx];
+		engine_info = &f->engine_info[active_engine_id];
+
+		/* gr_engine supports both BLCG and ELCG */
+		if ((cgmode == BLCG_MODE) &&
+			(engine_info->engine_enum == ENGINE_GR_GK20A)) {
+				gr_gk20a_init_blcg_mode(g, mode_config, active_engine_id);
+				break;
+		} else if (cgmode == ELCG_MODE)
+			gr_gk20a_init_elcg_mode(g, mode_config, active_engine_id);
+		else
+			gk20a_err(dev_from_gk20a(g), "invalid cg mode %d %d", cgmode, mode_config);
+	}
+}
+
 static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
 {
 	u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
@@ -4710,8 +4743,9 @@ static int gk20a_init_gr_prepare(struct gk20a *g)
 {
 	u32 gpfifo_ctrl, pmc_en;
 	u32 err = 0;
-	struct fifo_engine_info_gk20a *ce_info =
-		g->fifo.engine_info + ENGINE_CE2_GK20A;
+	u32 ce_reset_mask;
+
+	ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g);
 
 	/* disable fifo access */
 	pmc_en = gk20a_readl(g, mc_enable_r());
@@ -4725,12 +4759,12 @@ static int gk20a_init_gr_prepare(struct gk20a *g)
 	gk20a_reset(g, mc_enable_pgraph_enabled_f()
 			| mc_enable_blg_enabled_f()
 			| mc_enable_perfmon_enabled_f()
-			| ce_info->reset_mask);
+			| ce_reset_mask);
 
 	gr_gk20a_load_gating_prod(g);
+
 	/* Disable elcg until it gets enabled later in the init*/
-	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
-	gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+	gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 
 	/* enable fifo access */
 	gk20a_writel(g, gr_gpfifo_ctl_r(),
@@ -5210,11 +5244,9 @@ int gk20a_enable_gr_hw(struct gk20a *g)
 static void gr_gk20a_enable_elcg(struct gk20a *g)
 {
 	if (g->elcg_enabled) {
-		gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
-		gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
 	} else {
-		gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_GR_GK20A);
-		gr_gk20a_init_elcg_mode(g, ELCG_RUN, ENGINE_CE2_GK20A);
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 	}
 }
 
@@ -6106,6 +6138,7 @@ int gk20a_gr_isr(struct gk20a *g)
 	u32 gr_intr = gk20a_readl(g, gr_intr_r());
 	struct channel_gk20a *ch = NULL;
 	int tsgid = NVGPU_INVALID_TSG_ID;
+	u32 gr_engine_id;
 
 	gk20a_dbg_fn("");
 	gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -6113,6 +6146,8 @@ int gk20a_gr_isr(struct gk20a *g)
 	if (!gr_intr)
 		return 0;
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
 	grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
 	grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
@@ -6283,13 +6318,13 @@ int gk20a_gr_isr(struct gk20a *g)
 
 	if (need_reset) {
 		if (tsgid != NVGPU_INVALID_TSG_ID)
-			gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
+			gk20a_fifo_recover(g, BIT(gr_engine_id),
 					   tsgid, true, true, true);
 		else if (ch)
-			gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
+			gk20a_fifo_recover(g, BIT(gr_engine_id),
 					   ch->hw_chid, false, true, true);
 		else
-			gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
+			gk20a_fifo_recover(g, BIT(gr_engine_id),
 					   0, false, false, true);
 	}
 
@@ -8441,6 +8476,10 @@ static u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g)
 static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o)
 {
+	u32 gr_engine_id;
+
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
 		gk20a_readl(g, gr_status_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
@@ -8460,7 +8499,7 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
 	gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR  : 0x%x\n",
 		gk20a_readl(g, gr_fecs_intr_r()));
 	gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
-		gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A)));
+		gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
 		gk20a_readl(g, gr_activity_0_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index b5d97727..189994ef 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -81,6 +81,12 @@ enum {
 	eUcodeHandshakeMethodFinished
 };
 
+enum {
+	ELCG_MODE = (1 << 0),
+	BLCG_MODE = (1 << 1),
+	INVALID_MODE = (1 << 2)
+};
+
 enum {
 	ELCG_RUN,	/* clk always run, i.e. disable elcg */
 	ELCG_STOP,	/* clk is stopped */
@@ -476,6 +482,8 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va);
 void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine);
 void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine);
 
+void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config);
+
 /* sm */
 bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
 void gk20a_gr_clear_sm_hww(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 5112af55..bf58d298 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -99,6 +99,9 @@ static int gk20a_get_litter_value(struct gk20a *g,
 	case GPU_LIT_ROP_SHARED_BASE:
 		ret = proj_rop_shared_base_v();
 		break;
+	case GPU_LIT_HOST_NUM_ENGINES:
+		ret = proj_host_num_engines_v();
+		break;
 	case GPU_LIT_HOST_NUM_PBDMA:
 		ret = proj_host_num_pbdma_v();
 		break;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
index ce10db35..7dce9f91 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h
@@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void)
 {
 	return 0x00001800;
 }
+static inline u32 proj_host_num_engines_v(void)
+{
+	return 0x00000002;
+}
 static inline u32 proj_host_num_pbdma_v(void)
 {
 	return 0x00000001;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
index 517cffef..d99e6135 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
@@ -118,6 +118,22 @@ static inline u32 top_device_info_type_enum_graphics_f(void)
 {
 	return 0x0;
 }
+static inline u32 top_device_info_type_enum_copy0_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_type_enum_copy0_f(void)
+{
+	return 0x4;
+}
+static inline u32 top_device_info_type_enum_copy1_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 top_device_info_type_enum_copy1_f(void)
+{
+	return 0x8;
+}
 static inline u32 top_device_info_type_enum_copy2_v(void)
 {
 	return 0x00000003;
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
index 70da7a02..57368235 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
@@ -74,6 +74,9 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
 	int hw_irq_count;
+	u32 engine_id_idx;
+	u32 active_engine_id = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -84,11 +87,26 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
 
-	if (mc_intr_0 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
-		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
-	if (mc_intr_0 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
-		&& g->ops.ce2.isr_stall)
-		g->ops.ce2.isr_stall(g);
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
+		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
+
+		if (mc_intr_0 & g->fifo.engine_info[active_engine_id].intr_mask) {
+			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+			/* GR Engine */
+			if (engine_enum == ENGINE_GR_GK20A) {
+				gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
+			}
+
+			/* CE Engine */
+			if (((engine_enum == ENGINE_GRCE_GK20A) ||
+				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
+				g->ops.ce2.isr_stall){
+					g->ops.ce2.isr_stall(g,
+					g->fifo.engine_info[active_engine_id].inst_id,
+					g->fifo.engine_info[active_engine_id].pri_base);
+			}
+		}
+	}
 	if (mc_intr_0 & mc_intr_0_pfifo_pending_f())
 		gk20a_fifo_isr(g);
 	if (mc_intr_0 & mc_intr_0_pmu_pending_f())
@@ -120,6 +138,9 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
 	int hw_irq_count;
+	u32 engine_id_idx;
+	u32 active_engine_id = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -132,11 +153,27 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
 		gk20a_fifo_nonstall_isr(g);
 	if (mc_intr_1 & mc_intr_0_priv_ring_pending_f())
 		gk20a_priv_ring_isr(g);
-	if (mc_intr_1 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
-		gk20a_gr_nonstall_isr(g);
-	if (mc_intr_1 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
-		&& g->ops.ce2.isr_nonstall)
-		g->ops.ce2.isr_nonstall(g);
+
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
+		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
+
+		if (mc_intr_1 & g->fifo.engine_info[active_engine_id].intr_mask) {
+			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+			/* GR Engine */
+			if (engine_enum == ENGINE_GR_GK20A) {
+				gk20a_gr_nonstall_isr(g);
+			}
+
+			/* CE Engine */
+			if (((engine_enum == ENGINE_GRCE_GK20A) ||
+				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
+				g->ops.ce2.isr_nonstall) {
+					g->ops.ce2.isr_nonstall(g,
+					g->fifo.engine_info[active_engine_id].inst_id,
+					g->fifo.engine_info[active_engine_id].pri_base);
+			}
+		}
+	}
 
 	/* sync handled irq counter before re-enabling interrupts */
 	atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index bca57585..f6e2df00 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -3061,8 +3061,12 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g)
 	struct pmu_cmd cmd;
 	u32 desc;
 	int err = 0;
+	u32 gr_engine_id;
+
 	gk20a_dbg_fn("");
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	memset(&cmd, 0, sizeof(struct pmu_cmd));
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE +
@@ -3070,7 +3074,7 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g)
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg,
 			PMU_PG_CMD_ID_ENG_BUF_LOAD);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg,
-			ENGINE_GR_GK20A);
+			gr_engine_id);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg,
 			PMU_PGENG_GR_BUFFER_IDX_FECS);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg,
@@ -3095,6 +3099,9 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g)
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
 	u32 desc;
+	u32 gr_engine_id;
+
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
 
 	memset(&cmd, 0, sizeof(struct pmu_cmd));
 	cmd.hdr.unit_id = PMU_UNIT_PG;
@@ -3103,7 +3110,7 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g)
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg,
 			PMU_PG_CMD_ID_ENG_BUF_LOAD);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg,
-			ENGINE_GR_GK20A);
+			gr_engine_id);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg,
 			PMU_PGENG_GR_BUFFER_IDX_ZBC);
 	g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg,
@@ -3288,20 +3295,23 @@ static int pmu_init_powergating(struct gk20a *g)
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
 	u32 seq;
+	u32 gr_engine_id;
 
 	gk20a_dbg_fn("");
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	if (tegra_cpu_is_asim()) {
 		/* TBD: calculate threshold for silicon */
-		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
+		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id),
 				PMU_PG_IDLE_THRESHOLD_SIM);
-		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
+		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id),
 				PMU_PG_POST_POWERUP_IDLE_THRESHOLD_SIM);
 	} else {
 		/* TBD: calculate threshold for silicon */
-		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(ENGINE_GR_GK20A),
+		gk20a_writel(g, pwr_pmu_pg_idlefilth_r(gr_engine_id),
 				PMU_PG_IDLE_THRESHOLD);
-		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(ENGINE_GR_GK20A),
+		gk20a_writel(g, pwr_pmu_pg_ppuidlefilth_r(gr_engine_id),
 				PMU_PG_POST_POWERUP_IDLE_THRESHOLD);
 	}
 
@@ -3316,7 +3326,7 @@ static int pmu_init_powergating(struct gk20a *g)
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
 	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
-	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+	cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id;
 	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
 
 	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_INIT");
@@ -3329,7 +3339,7 @@ static int pmu_init_powergating(struct gk20a *g)
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
 	cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
-	cmd.cmd.pg.stat.engine_id = ENGINE_GR_GK20A;
+	cmd.cmd.pg.stat.engine_id = gr_engine_id;
 	cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
 	cmd.cmd.pg.stat.data = 0;
 
@@ -3344,7 +3354,7 @@ static int pmu_init_powergating(struct gk20a *g)
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
 	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
-	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+	cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id;
 	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
 
 	gk20a_dbg_pmu("cmd post PMU_PG_ELPG_CMD_DISALLOW");
@@ -4429,14 +4439,17 @@ static int gk20a_pmu_enable_elpg_locked(struct gk20a *g)
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
 	u32 seq, status;
+	u32 gr_engine_id;
 
 	gk20a_dbg_fn("");
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	memset(&cmd, 0, sizeof(struct pmu_cmd));
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
 	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
-	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+	cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id;
 	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
 
 	/* no need to wait ack for ELPG enable but set pending to sync
@@ -4503,9 +4516,12 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
 	struct pmu_cmd cmd;
 	u32 seq;
 	int ret = 0;
+	u32 gr_engine_id;
 
 	gk20a_dbg_fn("");
 
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+
 	if (!support_gk20a_pmu(g->dev))
 		return ret;
 
@@ -4553,7 +4569,7 @@ int gk20a_pmu_disable_elpg(struct gk20a *g)
 	cmd.hdr.unit_id = PMU_UNIT_PG;
 	cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
 	cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
-	cmd.cmd.pg.elpg_cmd.engine_id = ENGINE_GR_GK20A;
+	cmd.cmd.pg.elpg_cmd.engine_id = gr_engine_id;
 	cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
 
 	pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.c b/drivers/gpu/nvgpu/gm206/ce_gm206.c
new file mode 100644
index 00000000..9ec42831
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm206/ce_gm206.c
@@ -0,0 +1,107 @@
+/*
+ * GM206 Copy Engine.
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+
+/*TODO: remove uncecessary */
+#include "gk20a/gk20a.h"
+#include "ce_gm206.h"
+
+/*TODO: remove uncecessary */
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <trace/events/gk20a.h>
+#include <linux/dma-mapping.h>
+#include <linux/nvhost.h>
+
+#include "gk20a/debug_gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "hw_ce2_gm206.h"
+#include "hw_pbdma_gm206.h"
+#include "hw_ccsr_gm206.h"
+#include "hw_ram_gm206.h"
+#include "hw_top_gm206.h"
+#include "hw_mc_gm206.h"
+#include "hw_gr_gm206.h"
+
+/* TODO: We need generic way for query the intr_status register offset.
+ * As of now, there is no way to query this information from dev_ceN_pri.h */
+#define COP_INTR_STATUS_OFFSET 0x908
+
+static u32 ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce non-blocking pipe interrupt\n");
+
+	return ce2_intr_status_nonblockpipe_pending_f();
+}
+
+static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce blocking pipe interrupt\n");
+
+	return ce2_intr_status_blockpipe_pending_f();
+}
+
+static u32 ce_launcherr_isr(struct gk20a *g, u32 fifo_intr, u32 inst_id)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce launch error interrupt\n");
+
+	return ce2_intr_status_launcherr_pending_f();
+}
+
+void gm206_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+{
+	u32 ce_intr_status_reg = (pri_base + COP_INTR_STATUS_OFFSET);
+	u32 ce_intr = gk20a_readl(g, ce_intr_status_reg);
+	u32 clear_intr = 0;
+
+	gk20a_dbg(gpu_dbg_intr, "ce isr %08x %08x\n", ce_intr, inst_id);
+
+	/* clear blocking interrupts: they exibit broken behavior */
+	if (ce_intr & ce2_intr_status_blockpipe_pending_f())
+		clear_intr |= ce_blockpipe_isr(g, ce_intr, inst_id);
+
+	if (ce_intr & ce2_intr_status_launcherr_pending_f())
+		clear_intr |= ce_launcherr_isr(g, ce_intr, inst_id);
+
+	gk20a_writel(g, ce_intr_status_reg, clear_intr);
+	return;
+}
+
+void gm206_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+{
+	u32 ce_intr_status_reg = (pri_base + COP_INTR_STATUS_OFFSET);
+	u32 ce_intr = gk20a_readl(g, ce_intr_status_reg);
+
+	gk20a_dbg(gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id);
+
+	if (ce_intr & ce2_intr_status_nonblockpipe_pending_f()) {
+		gk20a_writel(g, ce_intr_status_reg,
+			ce_nonblockpipe_isr(g, ce_intr, inst_id));
+
+		/* wake threads waiting in this channel */
+		gk20a_channel_semaphore_wakeup(g, true);
+	}
+
+	return;
+}
+
+void gm206_init_ce(struct gpu_ops *gops)
+{
+	gops->ce2.isr_stall = gm206_ce_isr;
+	gops->ce2.isr_nonstall = gm206_ce_nonstall_isr;
+}
diff --git a/drivers/gpu/nvgpu/gm206/ce_gm206.h b/drivers/gpu/nvgpu/gm206/ce_gm206.h
new file mode 100644
index 00000000..c2fb1586
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm206/ce_gm206.h
@@ -0,0 +1,26 @@
+/*
+ * GM206 copy engine.
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#ifndef __CE_GM206_H__
+#define __CE_GM206_H__
+
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+
+void gm206_init_ce(struct gpu_ops *gops);
+
+#endif /*__CE2_GM20B_H__*/
diff --git a/drivers/gpu/nvgpu/gm206/fifo_gm206.c b/drivers/gpu/nvgpu/gm206/fifo_gm206.c
index c78f256c..6caf30f4 100644
--- a/drivers/gpu/nvgpu/gm206/fifo_gm206.c
+++ b/drivers/gpu/nvgpu/gm206/fifo_gm206.c
@@ -18,6 +18,7 @@
 #include "gm20b/fifo_gm20b.h"
 #include "fifo_gm206.h"
 #include "hw_ccsr_gm206.h"
+#include "hw_fifo_gm206.h"
 
 static u32 gm206_fifo_get_num_fifos(struct gk20a *g)
 {
@@ -28,4 +29,5 @@ void gm206_init_fifo(struct gpu_ops *gops)
 {
 	gm20b_init_fifo(gops);
 	gops->fifo.get_num_fifos = gm206_fifo_get_num_fifos;
+	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
 }
diff --git a/drivers/gpu/nvgpu/gm206/hal_gm206.c b/drivers/gpu/nvgpu/gm206/hal_gm206.c
index 048a109f..6b5c70e2 100644
--- a/drivers/gpu/nvgpu/gm206/hal_gm206.c
+++ b/drivers/gpu/nvgpu/gm206/hal_gm206.c
@@ -21,7 +21,7 @@
 #include "gm20b/mc_gm20b.h"
 #include "gm20b/ltc_gm20b.h"
 #include "gm20b/mm_gm20b.h"
-#include "gm20b/ce2_gm20b.h"
+#include "ce_gm206.h"
 #include "gm20b/fb_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/gr_gm20b.h"
@@ -142,6 +142,9 @@ static int gm206_get_litter_value(struct gk20a *g,
 	case GPU_LIT_ROP_SHARED_BASE:
 		ret = proj_rop_shared_base_v();
 		break;
+	case GPU_LIT_HOST_NUM_ENGINES:
+		ret = proj_host_num_engines_v();
+		break;
 	case GPU_LIT_HOST_NUM_PBDMA:
 		ret = proj_host_num_pbdma_v();
 		break;
@@ -183,7 +186,7 @@ int gm206_init_hal(struct gk20a *g)
 	gm20b_init_fb(gops);
 	g->ops.fb.set_use_full_comp_tag_line = NULL;
 	gm206_init_fifo(gops);
-	gm20b_init_ce2(gops);
+	gm206_init_ce(gops);
 	gm20b_init_gr_ctx(gops);
 	gm20b_init_mm(gops);
 	gm206_init_pmu_ops(gops);
diff --git a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h
index 2f4187d0..6c21b39a 100644
--- a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h
+++ b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h
@@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void)
 {
 	return 0x00001800;
 }
+static inline u32 proj_host_num_engines_v(void)
+{
+	return 0x00000008;
+}
 static inline u32 proj_host_num_pbdma_v(void)
 {
 	return 0x00000003;
diff --git a/drivers/gpu/nvgpu/gm206/hw_top_gm206.h b/drivers/gpu/nvgpu/gm206/hw_top_gm206.h
index e6ec1d27..988f24ea 100644
--- a/drivers/gpu/nvgpu/gm206/hw_top_gm206.h
+++ b/drivers/gpu/nvgpu/gm206/hw_top_gm206.h
@@ -146,6 +146,22 @@ static inline u32 top_device_info_type_enum_copy0_f(void)
 {
 	return 0x4;
 }
+static inline u32 top_device_info_type_enum_copy1_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 top_device_info_type_enum_copy1_f(void)
+{
+	return 0x8;
+}
+static inline u32 top_device_info_type_enum_copy2_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 top_device_info_type_enum_copy2_f(void)
+{
+	return 0xc;
+}
 static inline u32 top_device_info_entry_v(u32 r)
 {
 	return (r >> 0) & 0x3;
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index bb873bcc..031c5bae 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -49,12 +49,14 @@ static void channel_gm20b_bind(struct channel_gk20a *c)
 static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
 {
 	u32 fault_id = ~0;
+	struct fifo_engine_info_gk20a *engine_info;
 
-	if (engine_id < ENGINE_INVAL_GK20A) {
-		struct fifo_engine_info_gk20a *info =
-			&g->fifo.engine_info[engine_id];
+	engine_info = gk20a_fifo_get_engine_info(g, engine_id);
 
-		fault_id = info->fault_id;
+	if (engine_info) {
+		fault_id = engine_info->fault_id;
+	} else {
+		gk20a_err(g->dev, "engine_id is not in active list/invalid %d", engine_id);
 	}
 	return fault_id;
 }
@@ -72,7 +74,7 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
 	for_each_set_bit(engine_id, &engine_ids, 32) {
 		u32 engine_mmu_fault_id;
 
-		if (engine_id > g->fifo.max_engines) {
+		if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
 			gk20a_err(dev_from_gk20a(g),
 				  "faulting unknown engine %ld", engine_id);
 		} else {
@@ -109,7 +111,7 @@ static u32 gm20b_fifo_get_num_fifos(struct gk20a *g)
 	return ccsr_channel__size_1_v();
 }
 
-void gm20b_device_info_data_parse(struct gk20a *g,
+static void gm20b_device_info_data_parse(struct gk20a *g,
 						u32 table_entry, u32 *inst_id,
 						u32 *pri_base, u32 *fault_id)
 {
@@ -152,4 +154,5 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.force_reset_ch = gk20a_fifo_force_reset_ch;
 	gops->fifo.engine_enum_from_type = gk20a_fifo_engine_enum_from_type;
 	gops->fifo.device_info_data_parse = gm20b_device_info_data_parse;
+	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index d5131b7a..cc709c78 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -855,6 +855,9 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o)
 {
 	struct gr_gk20a *gr = &g->gr;
+	u32 gr_engine_id;
+
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
 
 	gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
 		gk20a_readl(g, gr_status_r()));
@@ -875,7 +878,7 @@ static int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
 	gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR  : 0x%x\n",
 		gk20a_readl(g, gr_fecs_intr_r()));
 	gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
-		gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A)));
+		gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
 		gk20a_readl(g, gr_activity_0_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index c0113498..cbd3f50b 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -140,6 +140,9 @@ static int gm20b_get_litter_value(struct gk20a *g,
 	case GPU_LIT_ROP_SHARED_BASE:
 		ret = proj_rop_shared_base_v();
 		break;
+	case GPU_LIT_HOST_NUM_ENGINES:
+		ret = proj_host_num_engines_v();
+		break;
 	case GPU_LIT_HOST_NUM_PBDMA:
 		ret = proj_host_num_pbdma_v();
 		break;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
index b837918c..d4d412e1 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h
@@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void)
 {
 	return 0x00001800;
 }
+static inline u32 proj_host_num_engines_v(void)
+{
+	return 0x00000002;
+}
 static inline u32 proj_host_num_pbdma_v(void)
 {
 	return 0x00000001;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
index c70f388c..cca37294 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
@@ -138,6 +138,22 @@ static inline u32 top_device_info_type_enum_graphics_f(void)
 {
 	return 0x0;
 }
+static inline u32 top_device_info_type_enum_copy0_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_type_enum_copy0_f(void)
+{
+	return 0x4;
+}
+static inline u32 top_device_info_type_enum_copy1_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 top_device_info_type_enum_copy1_f(void)
+{
+	return 0x8;
+}
 static inline u32 top_device_info_type_enum_copy2_v(void)
 {
 	return 0x00000003;
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 0895b757..3e89e81f 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -163,60 +163,52 @@ static int init_engine_info(struct fifo_gk20a *f)
 
 static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 {
-	struct fifo_engine_info_gk20a *engine_info;
 	struct fifo_runlist_info_gk20a *runlist;
 	struct device *d = dev_from_gk20a(g);
-	u32 runlist_id;
+	s32 runlist_id = -1;
 	u32 i;
 	u64 runlist_size;
 
 	gk20a_dbg_fn("");
 
-	f->max_runlists = fifo_eng_runlist_base__size_1_v();
+	f->max_runlists = g->ops.fifo.eng_runlist_base_size();
 	f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
 				  f->max_runlists, GFP_KERNEL);
 	if (!f->runlist_info)
-		goto clean_up;
+		goto clean_up_runlist;
 
-	engine_info = f->engine_info + ENGINE_GR_GK20A;
-	runlist_id = engine_info->runlist_id;
-	runlist = &f->runlist_info[runlist_id];
+	memset(f->runlist_info, 0, (sizeof(struct fifo_runlist_info_gk20a) *
+		f->max_runlists));
+
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		runlist = &f->runlist_info[runlist_id];
 
-	runlist->active_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->active_channels)
-		goto clean_up_runlist_info;
-
-	runlist_size  = sizeof(u16) * f->num_channels;
-	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
-		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
-		if (err) {
-			dev_err(d, "memory allocation failed\n");
+		runlist->active_channels =
+			kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+				GFP_KERNEL);
+		if (!runlist->active_channels)
 			goto clean_up_runlist;
-		}
-	}
-	mutex_init(&runlist->mutex);
 
-	/* None of buffers is pinned if this value doesn't change.
-	    Otherwise, one of them (cur_buffer) must have been pinned. */
-	runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+			runlist_size  = sizeof(u16) * f->num_channels;
+			for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+				int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
+				if (err) {
+					dev_err(d, "memory allocation failed\n");
+					goto clean_up_runlist;
+				}
+			}
+		mutex_init(&runlist->mutex);
+
+		/* None of buffers is pinned if this value doesn't change.
+		    Otherwise, one of them (cur_buffer) must have been pinned. */
+		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+	}
 
 	gk20a_dbg_fn("done");
 	return 0;
 
 clean_up_runlist:
-	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
-		gk20a_gmmu_free(g, &runlist->mem[i]);
-
-clean_up_runlist_info:
-	kfree(runlist->active_channels);
-	runlist->active_channels = NULL;
-
-	kfree(f->runlist_info);
-	f->runlist_info = NULL;
-
-clean_up:
+	gk20a_fifo_delete_runlist(f);
 	gk20a_dbg_fn("fail");
 	return -ENOMEM;
 }
@@ -243,7 +235,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	if (err)
 		return -ENXIO;
 
-	f->max_engines = ENGINE_INVAL_GK20A;
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 
 	f->userd_entry_size = 1 << ram_userd_base_shift_v();
 
-- 
cgit v1.2.2