From dfdd5ba3cbc52f7359188783159b103d1d2edcf2 Mon Sep 17 00:00:00 2001
From: Ken Adams <kadams@nvidia.com>
Date: Fri, 25 Jul 2014 16:47:24 -0400
Subject: gpu: nvgpu: gp10b headers

first cut.  just to get started...

Change-Id: I3682909f9ac0a5395ec834046789356f53d0c47d
Signed-off-by: Ken Adams <kadams@nvidia.com>
Reviewed-on: http://git-master/r/447753
---
 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h             |  105 +
 drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h            |  109 +
 drivers/gpu/nvgpu/gp10b/hw_chiplet_pwr_gp10b.h     |   85 +
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h      |  181 ++
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h              |  221 ++
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h            |  509 ++++
 drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h           |  141 +
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h            |   57 +
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h            | 1153 +++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h              | 3173 ++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h             |  281 ++
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h              |  217 ++
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h           |  469 +++
 drivers/gpu/nvgpu/gp10b/hw_pri_ringmaster_gp10b.h  |  145 +
 .../gpu/nvgpu/gp10b/hw_pri_ringstation_sys_gp10b.h |   69 +
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h            |  141 +
 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h             |  805 +++++
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h             |  385 +++
 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h           |  217 ++
 drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h           |  101 +
 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h             |  137 +
 drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h            |  289 ++
 22 files changed, 8990 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_chiplet_pwr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_pri_ringmaster_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_pri_ringstation_sys_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
new file mode 100644
index 00000000..e443738f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_bus_gp10b_h_
+#define _hw_bus_gp10b_h_
+
+static inline u32 bus_bar1_block_r(void)
+{
+	return 0x00001704;
+}
+static inline u32 bus_bar1_block_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 bus_bar1_block_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bar1_block_mode_virtual_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 bus_bar1_block_ptr_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 bus_intr_0_r(void)
+{
+	return 0x00001100;
+}
+static inline u32 bus_intr_0_pri_squash_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 bus_intr_0_pri_fecserr_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 bus_intr_0_pri_timeout_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 bus_intr_en_0_r(void)
+{
+	return 0x00001140;
+}
+static inline u32 bus_intr_en_0_pri_squash_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 bus_intr_en_0_pri_fecserr_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 bus_intr_en_0_pri_timeout_m(void)
+{
+	return 0x1 << 3;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
new file mode 100644
index 00000000..cd5265b3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ccsr_gp10b_h_
+#define _hw_ccsr_gp10b_h_
+
+static inline u32 ccsr_channel_inst_r(u32 i)
+{
+	return 0x00800000 + i*8;
+}
+static inline u32 ccsr_channel_inst__size_1_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ccsr_channel_inst_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 ccsr_channel_inst_bind_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 ccsr_channel_inst_bind_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 ccsr_channel_r(u32 i)
+{
+	return 0x00800004 + i*8;
+}
+static inline u32 ccsr_channel__size_1_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ccsr_channel_enable_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ccsr_channel_enable_set_f(u32 v)
+{
+	return (v & 0x1) << 10;
+}
+static inline u32 ccsr_channel_enable_set_true_f(void)
+{
+	return 0x400;
+}
+static inline u32 ccsr_channel_enable_clr_true_f(void)
+{
+	return 0x800;
+}
+static inline u32 ccsr_channel_status_v(u32 r)
+{
+	return (r >> 24) & 0xf;
+}
+static inline u32 ccsr_channel_busy_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_chiplet_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_chiplet_pwr_gp10b.h
new file mode 100644
index 00000000..640453ce
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_chiplet_pwr_gp10b.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_chiplet_pwr_gp10b_h_
+#define _hw_chiplet_pwr_gp10b_h_
+
+static inline u32 chiplet_pwr_gpcs_weight_6_r(void)
+{
+	return 0x0010e018;
+}
+static inline u32 chiplet_pwr_gpcs_weight_7_r(void)
+{
+	return 0x0010e01c;
+}
+static inline u32 chiplet_pwr_gpcs_config_1_r(void)
+{
+	return 0x0010e03c;
+}
+static inline u32 chiplet_pwr_gpcs_config_1_ba_enable_yes_f(void)
+{
+	return 0x1;
+}
+static inline u32 chiplet_pwr_fbps_weight_0_r(void)
+{
+	return 0x0010e100;
+}
+static inline u32 chiplet_pwr_fbps_weight_1_r(void)
+{
+	return 0x0010e104;
+}
+static inline u32 chiplet_pwr_fbps_config_1_r(void)
+{
+	return 0x0010e13c;
+}
+static inline u32 chiplet_pwr_fbps_config_1_ba_enable_yes_f(void)
+{
+	return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
new file mode 100644
index 00000000..6339cf5b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ctxsw_prog_gp10b_h_
+#define _hw_ctxsw_prog_gp10b_h_
+
+static inline u32 ctxsw_prog_fecs_header_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
+{
+	return 0x00000008;
+}
+static inline u32 ctxsw_prog_main_image_patch_count_o(void)
+{
+	return 0x00000010;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
+{
+	return 0x00000014;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void)
+{
+	return 0x00000018;
+}
+static inline u32 ctxsw_prog_main_image_zcull_o(void)
+{
+	return 0x0000001c;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void)
+{
+	return 0x00000020;
+}
+static inline u32 ctxsw_prog_main_image_pm_o(void)
+{
+	return 0x00000028;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r)
+{
+	return (r >> 0) & 0x7;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
+{
+	return 0x0000002c;
+}
+static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
+{
+	return 0x000000f4;
+}
+static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
+{
+	return 0x000000f8;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_o(void)
+{
+	return 0x000000fc;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void)
+{
+	return 0x600dc0de;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_o(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_o(void)
+{
+	return 0x000000f4;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r)
+{
+	return (r >> 16) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_num_tpcs_o(void)
+{
+	return 0x000000f8;
+}
+static inline u32 ctxsw_prog_local_magic_value_o(void)
+{
+	return 0x000000fc;
+}
+static inline u32 ctxsw_prog_local_magic_value_v_value_v(void)
+{
+	return 0xad0becab;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void)
+{
+	return 0x000000ec;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void)
+{
+	return 0x00000002;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
new file mode 100644
index 00000000..9dacabce
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fb_gp10b_h_
+#define _hw_fb_gp10b_h_
+
+static inline u32 fb_fbhub_num_active_ltcs_r(void)
+{
+	return 0x00100800;
+}
+static inline u32 fb_mmu_ctrl_r(void)
+{
+	return 0x00100c80;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 fb_mmu_invalidate_pdb_r(void)
+{
+	return 0x00100cb8;
+}
+static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 fb_mmu_invalidate_r(void)
+{
+	return 0x00100cbc;
+}
+static inline u32 fb_mmu_invalidate_all_va_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_invalidate_trigger_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_trigger_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_trigger_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 fb_mmu_debug_wr_r(void)
+{
+	return 0x00100cc8;
+}
+static inline u32 fb_mmu_debug_wr_aperture_s(void)
+{
+	return 2;
+}
+static inline u32 fb_mmu_debug_wr_aperture_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 fb_mmu_debug_wr_addr_v(u32 r)
+{
+	return (r >> 4) & 0xfffffff;
+}
+static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_rd_r(void)
+{
+	return 0x00100ccc;
+}
+static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_addr_v(u32 r)
+{
+	return (r >> 4) & 0xfffffff;
+}
+static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_ctrl_r(void)
+{
+	return 0x00100cc4;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fb_mmu_vpr_info_r(void)
+{
+	return 0x00100cd0;
+}
+static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
new file mode 100644
index 00000000..764c1b6c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -0,0 +1,509 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fifo_gp10b_h_
+#define _hw_fifo_gp10b_h_
+
+static inline u32 fifo_bar1_base_r(void)
+{
+	return 0x00002254;
+}
+static inline u32 fifo_bar1_base_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_bar1_base_ptr_align_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fifo_bar1_base_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_bar1_base_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_runlist_base_r(void)
+{
+	return 0x00002270;
+}
+static inline u32 fifo_runlist_base_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_runlist_base_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_runlist_r(void)
+{
+	return 0x00002274;
+}
+static inline u32 fifo_runlist_engine_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 fifo_eng_runlist_base_r(u32 i)
+{
+	return 0x00002280 + i*8;
+}
+static inline u32 fifo_eng_runlist_base__size_1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_eng_runlist_r(u32 i)
+{
+	return 0x00002284 + i*8;
+}
+static inline u32 fifo_eng_runlist__size_1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_eng_runlist_length_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 fifo_eng_runlist_pending_true_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fifo_pb_timeslice_r(u32 i)
+{
+	return 0x00002350 + i*4;
+}
+static inline u32 fifo_pb_timeslice_timeout_16_f(void)
+{
+	return 0x10;
+}
+static inline u32 fifo_pb_timeslice_timescale_0_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_pb_timeslice_enable_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_pbdma_map_r(u32 i)
+{
+	return 0x00002390 + i*4;
+}
+static inline u32 fifo_intr_0_r(void)
+{
+	return 0x00002100;
+}
+static inline u32 fifo_intr_0_bind_error_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 fifo_intr_0_bind_error_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 fifo_intr_0_sched_error_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 fifo_intr_0_sched_error_reset_f(void)
+{
+	return 0x100;
+}
+static inline u32 fifo_intr_0_chsw_error_pending_f(void)
+{
+	return 0x10000;
+}
+static inline u32 fifo_intr_0_chsw_error_reset_f(void)
+{
+	return 0x10000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void)
+{
+	return 0x800000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void)
+{
+	return 0x800000;
+}
+static inline u32 fifo_intr_0_lb_error_pending_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fifo_intr_0_lb_error_reset_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 fifo_intr_0_mmu_fault_pending_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_intr_0_pbdma_intr_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 fifo_intr_0_runlist_event_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 fifo_intr_0_channel_intr_pending_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 fifo_intr_en_0_r(void)
+{
+	return 0x00002140;
+}
+static inline u32 fifo_intr_en_1_r(void)
+{
+	return 0x00002528;
+}
+static inline u32 fifo_intr_bind_error_r(void)
+{
+	return 0x0000252c;
+}
+static inline u32 fifo_intr_sched_error_r(void)
+{
+	return 0x0000254c;
+}
+static inline u32 fifo_intr_sched_error_code_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void)
+{
+	return 0x0000000a;
+}
+static inline u32 fifo_intr_chsw_error_r(void)
+{
+	return 0x0000256c;
+}
+static inline u32 fifo_intr_mmu_fault_id_r(void)
+{
+	return 0x0000259c;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_intr_mmu_fault_inst_r(u32 i)
+{
+	return 0x00002800 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fifo_intr_mmu_fault_lo_r(u32 i)
+{
+	return 0x00002804 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_hi_r(u32 i)
+{
+	return 0x00002808 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
+{
+	return 0x0000280c + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
+{
+	return (r >> 8) & 0x3f;
+}
+static inline u32 fifo_intr_pbdma_id_r(void)
+{
+	return 0x000025a0;
+}
+static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_intr_runlist_r(void)
+{
+	return 0x00002a00;
+}
+static inline u32 fifo_fb_timeout_r(void)
+{
+	return 0x00002a04;
+}
+static inline u32 fifo_fb_timeout_period_m(void)
+{
+	return 0x3fffffff << 0;
+}
+static inline u32 fifo_fb_timeout_period_max_f(void)
+{
+	return 0x3fffffff;
+}
+static inline u32 fifo_error_sched_disable_r(void)
+{
+	return 0x0000262c;
+}
+static inline u32 fifo_sched_disable_r(void)
+{
+	return 0x00002630;
+}
+static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_runlist_m(u32 i)
+{
+	return 0x1 << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_preempt_r(void)
+{
+	return 0x00002634;
+}
+static inline u32 fifo_preempt_pending_true_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fifo_preempt_type_channel_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_preempt_chid_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_r(u32 i)
+{
+	return 0x00002a30 + i*4;
+}
+static inline u32 fifo_trigger_mmu_fault_id_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 fifo_engine_status_r(u32 i)
+{
+	return 0x00002640 + i*8;
+}
+static inline u32 fifo_engine_status__size_1_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 fifo_engine_status_id_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_engine_status_id_type_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 fifo_engine_status_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_ctx_status_v(u32 r)
+{
+	return (r >> 13) & 0x7;
+}
+static inline u32 fifo_engine_status_ctx_status_valid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_engine_status_next_id_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_engine_status_next_id_type_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 fifo_engine_status_next_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_faulted_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 fifo_engine_status_faulted_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_engine_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 fifo_engine_status_engine_idle_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_engine_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_f(void)
+{
+	return 0x8000;
+}
+static inline u32 fifo_pbdma_status_r(u32 i)
+{
+	return 0x00003080 + i*4;
+}
+static inline u32 fifo_pbdma_status__size_1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_id_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_id_type_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 fifo_pbdma_status_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
+{
+	return (r >> 13) & 0x7;
+}
+static inline u32 fifo_pbdma_status_chan_status_valid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_pbdma_status_next_id_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_next_id_type_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 fifo_pbdma_status_next_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_chsw_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
new file mode 100644
index 00000000..b8e236b8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_flush_gp10b_h_
+#define _hw_flush_gp10b_h_
+
+static inline u32 flush_l2_system_invalidate_r(void)
+{
+	return 0x00070004;
+}
+static inline u32 flush_l2_system_invalidate_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_r(void)
+{
+	return 0x00070010;
+}
+static inline u32 flush_l2_flush_dirty_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_fb_flush_r(void)
+{
+	return 0x00070000;
+}
+static inline u32 flush_fb_flush_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_fb_flush_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_fb_flush_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
new file mode 100644
index 00000000..00291d30
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fuse_gp10b_h_
+#define _hw_fuse_gp10b_h_
+
+static inline u32 fuse_status_opt_tpc_gpc_r(u32 i)
+{
+	return 0x00021c38 + i*4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
new file mode 100644
index 00000000..161c1ce0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -0,0 +1,1153 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_gmmu_gp10b_h_
+#define _hw_gmmu_gp10b_h_
+
+static inline u32 gmmu_pde_aperture_big_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pde_aperture_big_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
+{
+	return 0x1;
+}
+static inline u32 gmmu_pde_size_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pde_size_full_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pde_address_big_sys_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pde_address_big_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pde_aperture_small_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pde_aperture_small_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
+{
+	return 0x1;
+}
+static inline u32 gmmu_pde_vol_small_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pde_vol_small_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_pde_vol_small_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pde_vol_big_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pde_vol_big_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_pde_vol_big_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pde_address_small_sys_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pde_address_small_sys_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pde_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_pde__size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_pte__size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_pte_valid_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pte_valid_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 gmmu_pte_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pte_address_sys_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_pte_address_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pte_vol_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_vol_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 gmmu_pte_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pte_aperture_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_aperture_video_memory_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_pte_read_only_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_pte_read_only_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_pte_write_disable_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_write_disable_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gmmu_pte_read_disable_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_read_disable_true_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gmmu_pte_comptagline_f(u32 v)
+{
+	return (v & 0x1ffff) << 12;
+}
+static inline u32 gmmu_pte_comptagline_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_f(u32 v)
+{
+	return (v & 0xff) << 4;
+}
+static inline u32 gmmu_pte_kind_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_kind_invalid_v(void)
+{
+	return 0x000000ff;
+}
+static inline u32 gmmu_pte_kind_pitch_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gmmu_pte_kind_z16_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gmmu_pte_kind_z16_2c_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 gmmu_pte_kind_z16_2z_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void)
+{
+	return 0x0000000a;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void)
+{
+	return 0x0000000b;
+}
+static inline u32 gmmu_pte_kind_z16_4cz_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void)
+{
+	return 0x0000000d;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void)
+{
+	return 0x0000000e;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void)
+{
+	return 0x0000000f;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gmmu_pte_kind_s8z24_v(void)
+{
+	return 0x00000011;
+}
+static inline u32 gmmu_pte_kind_s8z24_1z_v(void)
+{
+	return 0x00000012;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void)
+{
+	return 0x00000013;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void)
+{
+	return 0x00000014;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void)
+{
+	return 0x00000015;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void)
+{
+	return 0x00000016;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cz_v(void)
+{
+	return 0x00000017;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void)
+{
+	return 0x00000018;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void)
+{
+	return 0x00000019;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void)
+{
+	return 0x0000001a;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void)
+{
+	return 0x0000001b;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cs_v(void)
+{
+	return 0x0000001c;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void)
+{
+	return 0x0000001d;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void)
+{
+	return 0x0000001e;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void)
+{
+	return 0x0000001f;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void)
+{
+	return 0x00000021;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void)
+{
+	return 0x00000022;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void)
+{
+	return 0x00000023;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void)
+{
+	return 0x00000024;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void)
+{
+	return 0x00000025;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void)
+{
+	return 0x00000026;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void)
+{
+	return 0x00000027;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void)
+{
+	return 0x00000028;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void)
+{
+	return 0x00000029;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void)
+{
+	return 0x0000002e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void)
+{
+	return 0x0000002f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void)
+{
+	return 0x00000031;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void)
+{
+	return 0x00000032;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void)
+{
+	return 0x00000033;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void)
+{
+	return 0x00000034;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void)
+{
+	return 0x00000035;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void)
+{
+	return 0x0000003a;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void)
+{
+	return 0x0000003b;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void)
+{
+	return 0x0000003c;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void)
+{
+	return 0x0000003d;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void)
+{
+	return 0x0000003e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void)
+{
+	return 0x0000003f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void)
+{
+	return 0x00000041;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void)
+{
+	return 0x00000042;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void)
+{
+	return 0x00000043;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void)
+{
+	return 0x00000044;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void)
+{
+	return 0x00000045;
+}
+static inline u32 gmmu_pte_kind_z24s8_v(void)
+{
+	return 0x00000046;
+}
+static inline u32 gmmu_pte_kind_z24s8_1z_v(void)
+{
+	return 0x00000047;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void)
+{
+	return 0x00000048;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void)
+{
+	return 0x00000049;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void)
+{
+	return 0x0000004a;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void)
+{
+	return 0x0000004b;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cs_v(void)
+{
+	return 0x0000004c;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void)
+{
+	return 0x0000004d;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void)
+{
+	return 0x0000004e;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void)
+{
+	return 0x0000004f;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void)
+{
+	return 0x00000050;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cz_v(void)
+{
+	return 0x00000051;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void)
+{
+	return 0x00000052;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void)
+{
+	return 0x00000053;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void)
+{
+	return 0x00000054;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void)
+{
+	return 0x00000055;
+}
+static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void)
+{
+	return 0x00000056;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void)
+{
+	return 0x00000057;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void)
+{
+	return 0x00000058;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void)
+{
+	return 0x00000059;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void)
+{
+	return 0x0000005a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void)
+{
+	return 0x0000005b;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void)
+{
+	return 0x0000005c;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void)
+{
+	return 0x0000005d;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void)
+{
+	return 0x0000005e;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void)
+{
+	return 0x00000063;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void)
+{
+	return 0x00000064;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void)
+{
+	return 0x00000065;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void)
+{
+	return 0x00000066;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void)
+{
+	return 0x00000067;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void)
+{
+	return 0x00000068;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void)
+{
+	return 0x00000069;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void)
+{
+	return 0x0000006a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void)
+{
+	return 0x0000006f;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void)
+{
+	return 0x00000070;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void)
+{
+	return 0x00000071;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void)
+{
+	return 0x00000072;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void)
+{
+	return 0x00000073;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void)
+{
+	return 0x00000074;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void)
+{
+	return 0x00000075;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void)
+{
+	return 0x00000076;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void)
+{
+	return 0x00000077;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void)
+{
+	return 0x00000078;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void)
+{
+	return 0x00000079;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void)
+{
+	return 0x0000007a;
+}
+static inline u32 gmmu_pte_kind_zf32_v(void)
+{
+	return 0x0000007b;
+}
+static inline u32 gmmu_pte_kind_zf32_1z_v(void)
+{
+	return 0x0000007c;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void)
+{
+	return 0x0000007d;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void)
+{
+	return 0x0000007e;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void)
+{
+	return 0x0000007f;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 gmmu_pte_kind_zf32_2cs_v(void)
+{
+	return 0x00000081;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void)
+{
+	return 0x00000082;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void)
+{
+	return 0x00000083;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void)
+{
+	return 0x00000084;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void)
+{
+	return 0x00000085;
+}
+static inline u32 gmmu_pte_kind_zf32_2cz_v(void)
+{
+	return 0x00000086;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void)
+{
+	return 0x00000087;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void)
+{
+	return 0x00000088;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void)
+{
+	return 0x00000089;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void)
+{
+	return 0x0000008a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void)
+{
+	return 0x0000008b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void)
+{
+	return 0x0000008c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void)
+{
+	return 0x0000008d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void)
+{
+	return 0x0000008e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void)
+{
+	return 0x0000008f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void)
+{
+	return 0x00000090;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void)
+{
+	return 0x00000091;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void)
+{
+	return 0x00000092;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void)
+{
+	return 0x00000097;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void)
+{
+	return 0x00000098;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void)
+{
+	return 0x00000099;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void)
+{
+	return 0x0000009a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void)
+{
+	return 0x0000009b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void)
+{
+	return 0x0000009c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void)
+{
+	return 0x0000009d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void)
+{
+	return 0x0000009e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void)
+{
+	return 0x0000009f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void)
+{
+	return 0x000000a0;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void)
+{
+	return 0x000000a1;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void)
+{
+	return 0x000000a2;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+	return 0x000000a3;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+	return 0x000000a4;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+	return 0x000000a5;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+	return 0x000000a6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void)
+{
+	return 0x000000a7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void)
+{
+	return 0x000000a8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void)
+{
+	return 0x000000a9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void)
+{
+	return 0x000000aa;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void)
+{
+	return 0x000000ab;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void)
+{
+	return 0x000000ac;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void)
+{
+	return 0x000000ad;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void)
+{
+	return 0x000000ae;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void)
+{
+	return 0x000000b3;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void)
+{
+	return 0x000000b4;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void)
+{
+	return 0x000000b5;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void)
+{
+	return 0x000000b6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void)
+{
+	return 0x000000b7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void)
+{
+	return 0x000000b8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void)
+{
+	return 0x000000b9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void)
+{
+	return 0x000000ba;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void)
+{
+	return 0x000000bb;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void)
+{
+	return 0x000000bc;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void)
+{
+	return 0x000000bd;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void)
+{
+	return 0x000000be;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+	return 0x000000bf;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+	return 0x000000c0;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+	return 0x000000c1;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+	return 0x000000c2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_v(void)
+{
+	return 0x000000c3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void)
+{
+	return 0x000000c4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void)
+{
+	return 0x000000c5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void)
+{
+	return 0x000000c6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void)
+{
+	return 0x000000c7;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void)
+{
+	return 0x000000c8;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void)
+{
+	return 0x000000ce;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void)
+{
+	return 0x000000cf;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void)
+{
+	return 0x000000d0;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void)
+{
+	return 0x000000d1;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void)
+{
+	return 0x000000d2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void)
+{
+	return 0x000000d3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void)
+{
+	return 0x000000d4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void)
+{
+	return 0x000000d5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void)
+{
+	return 0x000000d6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void)
+{
+	return 0x000000d7;
+}
+static inline u32 gmmu_pte_kind_generic_16bx2_v(void)
+{
+	return 0x000000fe;
+}
+static inline u32 gmmu_pte_kind_c32_2c_v(void)
+{
+	return 0x000000d8;
+}
+static inline u32 gmmu_pte_kind_c32_2cbr_v(void)
+{
+	return 0x000000d9;
+}
+static inline u32 gmmu_pte_kind_c32_2cba_v(void)
+{
+	return 0x000000da;
+}
+static inline u32 gmmu_pte_kind_c32_2cra_v(void)
+{
+	return 0x000000db;
+}
+static inline u32 gmmu_pte_kind_c32_2bra_v(void)
+{
+	return 0x000000dc;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void)
+{
+	return 0x000000dd;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void)
+{
+	return 0x000000de;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2cra_v(void)
+{
+	return 0x000000cc;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void)
+{
+	return 0x000000df;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void)
+{
+	return 0x000000e0;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void)
+{
+	return 0x000000e1;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void)
+{
+	return 0x000000e2;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void)
+{
+	return 0x000000e3;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void)
+{
+	return 0x000000e4;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void)
+{
+	return 0x000000e5;
+}
+static inline u32 gmmu_pte_kind_c64_2c_v(void)
+{
+	return 0x000000e6;
+}
+static inline u32 gmmu_pte_kind_c64_2cbr_v(void)
+{
+	return 0x000000e7;
+}
+static inline u32 gmmu_pte_kind_c64_2cba_v(void)
+{
+	return 0x000000e8;
+}
+static inline u32 gmmu_pte_kind_c64_2cra_v(void)
+{
+	return 0x000000e9;
+}
+static inline u32 gmmu_pte_kind_c64_2bra_v(void)
+{
+	return 0x000000ea;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void)
+{
+	return 0x000000eb;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void)
+{
+	return 0x000000ec;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2cra_v(void)
+{
+	return 0x000000cd;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void)
+{
+	return 0x000000ed;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void)
+{
+	return 0x000000ee;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void)
+{
+	return 0x000000ef;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void)
+{
+	return 0x000000f0;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void)
+{
+	return 0x000000f1;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void)
+{
+	return 0x000000f2;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void)
+{
+	return 0x000000f3;
+}
+static inline u32 gmmu_pte_kind_c128_2c_v(void)
+{
+	return 0x000000f4;
+}
+static inline u32 gmmu_pte_kind_c128_2cr_v(void)
+{
+	return 0x000000f5;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void)
+{
+	return 0x000000f6;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void)
+{
+	return 0x000000f7;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void)
+{
+	return 0x000000f8;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void)
+{
+	return 0x000000f9;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void)
+{
+	return 0x000000fa;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void)
+{
+	return 0x000000fb;
+}
+static inline u32 gmmu_pte_kind_x8c24_v(void)
+{
+	return 0x000000fc;
+}
+static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void)
+{
+	return 0x000000fd;
+}
+static inline u32 gmmu_pte_kind_smsked_message_v(void)
+{
+	return 0x000000ca;
+}
+static inline u32 gmmu_pte_kind_smhost_message_v(void)
+{
+	return 0x000000cb;
+}
+static inline u32 gmmu_pte_kind_s8_v(void)
+{
+	return 0x0000002a;
+}
+static inline u32 gmmu_pte_kind_s8_2s_v(void)
+{
+	return 0x0000002b;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
new file mode 100644
index 00000000..7a4761d6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -0,0 +1,3173 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_gr_gp10b_h_
+#define _hw_gr_gp10b_h_
+
+static inline u32 gr_intr_r(void)
+{
+	return 0x00400100;
+}
+static inline u32 gr_intr_notify_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_intr_notify_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_intr_semaphore_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_intr_semaphore_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_intr_illegal_method_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_intr_illegal_method_reset_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_intr_illegal_class_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_intr_illegal_class_reset_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_intr_class_error_pending_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_intr_class_error_reset_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_intr_exception_pending_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_intr_exception_reset_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_intr_en_r(void)
+{
+	return 0x0040013c;
+}
+static inline u32 gr_exception_r(void)
+{
+	return 0x00400108;
+}
+static inline u32 gr_exception_fe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_exception_gpc_m(void)
+{
+	return 0x1 << 24;
+}
+static inline u32 gr_exception1_r(void)
+{
+	return 0x00400118;
+}
+static inline u32 gr_exception1_gpc_0_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_exception2_r(void)
+{
+	return 0x0040011c;
+}
+static inline u32 gr_exception_en_r(void)
+{
+	return 0x00400138;
+}
+static inline u32 gr_exception_en_fe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_exception1_en_r(void)
+{
+	return 0x00400130;
+}
+static inline u32 gr_exception2_en_r(void)
+{
+	return 0x00400134;
+}
+static inline u32 gr_gpfifo_ctl_r(void)
+{
+	return 0x00400500;
+}
+static inline u32 gr_gpfifo_ctl_access_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpfifo_ctl_access_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpfifo_ctl_access_enabled_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_trapped_addr_r(void)
+{
+	return 0x00400704;
+}
+static inline u32 gr_trapped_addr_mthd_v(u32 r)
+{
+	return (r >> 2) & 0xfff;
+}
+static inline u32 gr_trapped_addr_subch_v(u32 r)
+{
+	return (r >> 16) & 0x7;
+}
+static inline u32 gr_trapped_data_lo_r(void)
+{
+	return 0x00400708;
+}
+static inline u32 gr_trapped_data_hi_r(void)
+{
+	return 0x0040070c;
+}
+static inline u32 gr_status_r(void)
+{
+	return 0x00400700;
+}
+static inline u32 gr_status_fe_method_lower_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 gr_status_fe_method_lower_idle_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_status_mask_r(void)
+{
+	return 0x00400610;
+}
+static inline u32 gr_engine_status_r(void)
+{
+	return 0x0040060c;
+}
+static inline u32 gr_engine_status_value_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pipe_bundle_address_r(void)
+{
+	return 0x00400200;
+}
+static inline u32 gr_pipe_bundle_address_value_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pipe_bundle_data_r(void)
+{
+	return 0x00400204;
+}
+static inline u32 gr_pipe_bundle_config_r(void)
+{
+	return 0x00400208;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fe_hww_esr_r(void)
+{
+	return 0x00404000;
+}
+static inline u32 gr_fe_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_fe_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fe_go_idle_timeout_r(void)
+{
+	return 0x00404154;
+}
+static inline u32 gr_fe_go_idle_timeout_count_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fe_object_table_r(u32 i)
+{
+	return 0x00404200 + i*4;
+}
+static inline u32 gr_fe_object_table_nvclass_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_fe_tpc_fs_r(void)
+{
+	return 0x004041c4;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_r(void)
+{
+	return 0x00404488;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pri_mme_shadow_raw_data_r(void)
+{
+	return 0x0040448c;
+}
+static inline u32 gr_mme_hww_esr_r(void)
+{
+	return 0x00404490;
+}
+static inline u32 gr_mme_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_mme_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_memfmt_hww_esr_r(void)
+{
+	return 0x00404600;
+}
+static inline u32 gr_memfmt_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_memfmt_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fecs_cpuctl_r(void)
+{
+	return 0x00409100;
+}
+static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_cpuctl_alias_r(void)
+{
+	return 0x00409130;
+}
+static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_dmactl_r(void)
+{
+	return 0x0040910c;
+}
+static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_fecs_os_r(void)
+{
+	return 0x00409080;
+}
+static inline u32 gr_fecs_idlestate_r(void)
+{
+	return 0x0040904c;
+}
+static inline u32 gr_fecs_mailbox0_r(void)
+{
+	return 0x00409040;
+}
+static inline u32 gr_fecs_mailbox1_r(void)
+{
+	return 0x00409044;
+}
+static inline u32 gr_fecs_irqstat_r(void)
+{
+	return 0x00409008;
+}
+static inline u32 gr_fecs_irqmode_r(void)
+{
+	return 0x0040900c;
+}
+static inline u32 gr_fecs_irqmask_r(void)
+{
+	return 0x00409018;
+}
+static inline u32 gr_fecs_irqdest_r(void)
+{
+	return 0x0040901c;
+}
+static inline u32 gr_fecs_curctx_r(void)
+{
+	return 0x00409050;
+}
+static inline u32 gr_fecs_nxtctx_r(void)
+{
+	return 0x00409054;
+}
+static inline u32 gr_fecs_engctl_r(void)
+{
+	return 0x004090a4;
+}
+static inline u32 gr_fecs_debug1_r(void)
+{
+	return 0x00409090;
+}
+static inline u32 gr_fecs_debuginfo_r(void)
+{
+	return 0x00409094;
+}
+static inline u32 gr_fecs_icd_cmd_r(void)
+{
+	return 0x00409200;
+}
+static inline u32 gr_fecs_icd_cmd_opc_s(void)
+{
+	return 4;
+}
+static inline u32 gr_fecs_icd_cmd_opc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void)
+{
+	return 0xe;
+}
+static inline u32 gr_fecs_icd_cmd_idx_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 gr_fecs_icd_rdata_r(void)
+{
+	return 0x0040920c;
+}
+static inline u32 gr_fecs_imemc_r(u32 i)
+{
+	return 0x00409180 + i*16;
+}
+static inline u32 gr_fecs_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_imemd_r(u32 i)
+{
+	return 0x00409184 + i*16;
+}
+static inline u32 gr_fecs_imemt_r(u32 i)
+{
+	return 0x00409188 + i*16;
+}
+static inline u32 gr_fecs_imemt_tag_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_fecs_dmemc_r(u32 i)
+{
+	return 0x004091c0 + i*8;
+}
+static inline u32 gr_fecs_dmemc_offs_s(void)
+{
+	return 6;
+}
+static inline u32 gr_fecs_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_m(void)
+{
+	return 0x3f << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_v(u32 r)
+{
+	return (r >> 2) & 0x3f;
+}
+static inline u32 gr_fecs_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_dmemd_r(u32 i)
+{
+	return 0x004091c4 + i*8;
+}
+static inline u32 gr_fecs_dmatrfbase_r(void)
+{
+	return 0x00409110;
+}
+static inline u32 gr_fecs_dmatrfmoffs_r(void)
+{
+	return 0x00409114;
+}
+static inline u32 gr_fecs_dmatrffboffs_r(void)
+{
+	return 0x0040911c;
+}
+static inline u32 gr_fecs_dmatrfcmd_r(void)
+{
+	return 0x00409118;
+}
+static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_fecs_bootvec_r(void)
+{
+	return 0x00409104;
+}
+static inline u32 gr_fecs_bootvec_vec_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_falcon_hwcfg_r(void)
+{
+	return 0x00409108;
+}
+static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void)
+{
+	return 0x0041a108;
+}
+static inline u32 gr_fecs_falcon_rm_r(void)
+{
+	return 0x00409084;
+}
+static inline u32 gr_fecs_current_ctx_r(void)
+{
+	return 0x00409b00;
+}
+static inline u32 gr_fecs_current_ctx_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_current_ctx_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_current_ctx_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_current_ctx_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_current_ctx_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_current_ctx_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_current_ctx_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_method_data_r(void)
+{
+	return 0x00409500;
+}
+static inline u32 gr_fecs_method_push_r(void)
+{
+	return 0x00409504;
+}
+static inline u32 gr_fecs_method_push_adr_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void)
+{
+	return 0x3;
+}
+static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 gr_fecs_method_push_adr_restore_golden_v(void)
+{
+	return 0x00000015;
+}
+static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void)
+{
+	return 0x00000016;
+}
+static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void)
+{
+	return 0x00000025;
+}
+static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void)
+{
+	return 0x00000031;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void)
+{
+	return 0x00000032;
+}
+static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void)
+{
+	return 0x00000038;
+}
+static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void)
+{
+	return 0x00000039;
+}
+static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
+{
+	return 0x21;
+}
+static inline u32 gr_fecs_host_int_enable_r(void)
+{
+	return 0x00409c24;
+}
+static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_r(void)
+{
+	return 0x00409614;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v)
+{
+	return (v & 0x1) << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void)
+{
+	return 0x1 << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r)
+{
+	return (r >> 10) & 0x1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void)
+{
+	return 0x0040960c;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i)
+{
+	return 0x00409800 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i)
+{
+	return 0x004098c0 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i)
+{
+	return 0x00409840 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_fs_r(void)
+{
+	return 0x00409604;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_m(void)
+{
+	return 0x1f << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_m(void)
+{
+	return 0x1f << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
+static inline u32 gr_fecs_cfg_r(void)
+{
+	return 0x00409620;
+}
+static inline u32 gr_fecs_cfg_imem_sz_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_fecs_rc_lanes_r(void)
+{
+	return 0x00409880;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_s(void)
+{
+	return 6;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_fecs_ctxsw_status_1_r(void)
+{
+	return 0x00409400;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v)
+{
+	return (v & 0x1) << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void)
+{
+	return 0x1 << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_adr_r(void)
+{
+	return 0x00409a24;
+}
+static inline u32 gr_fecs_new_ctx_r(void)
+{
+	return 0x00409b04;
+}
+static inline u32 gr_fecs_new_ctx_ptr_s(void)
+{
+	return 28;
+}
+static inline u32 gr_fecs_new_ctx_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_m(void)
+{
+	return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_new_ctx_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_new_ctx_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_new_ctx_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_new_ctx_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_r(void)
+{
+	return 0x00409a0c;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void)
+{
+	return 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void)
+{
+	return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_r(void)
+{
+	return 0x00409a10;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void)
+{
+	return 0x1f << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_rstr2d_gpc_map0_r(void)
+{
+	return 0x0040780c;
+}
+static inline u32 gr_rstr2d_gpc_map1_r(void)
+{
+	return 0x00407810;
+}
+static inline u32 gr_rstr2d_gpc_map2_r(void)
+{
+	return 0x00407814;
+}
+static inline u32 gr_rstr2d_gpc_map3_r(void)
+{
+	return 0x00407818;
+}
+static inline u32 gr_rstr2d_gpc_map4_r(void)
+{
+	return 0x0040781c;
+}
+static inline u32 gr_rstr2d_gpc_map5_r(void)
+{
+	return 0x00407820;
+}
+static inline u32 gr_rstr2d_map_table_cfg_r(void)
+{
+	return 0x004078bc;
+}
+static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_hww_esr_r(void)
+{
+	return 0x00406018;
+}
+static inline u32 gr_pd_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_pd_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i)
+{
+	return 0x00406028 + i*4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v)
+{
+	return (v & 0xf) << 4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v)
+{
+	return (v & 0xf) << 12;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v)
+{
+	return (v & 0xf) << 24;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+static inline u32 gr_pd_ab_dist_cfg0_r(void)
+{
+	return 0x004064c0;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pd_ab_dist_cfg1_r(void)
+{
+	return 0x004064c4;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
+{
+	return 0xffff;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v)
+{
+	return (v & 0xffff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 gr_pd_ab_dist_cfg2_r(void)
+{
+	return 0x004064c8;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v)
+{
+	return (v & 0x1fff) << 0;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void)
+{
+	return 0x000001c0;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v)
+{
+	return (v & 0x1fff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
+{
+	return 0x00000182;
+}
+static inline u32 gr_pd_pagepool_r(void)
+{
+	return 0x004064cc;
+}
+static inline u32 gr_pd_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_pagepool_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pd_dist_skip_table_r(u32 i)
+{
+	return 0x004064d0 + i*4;
+}
+static inline u32 gr_pd_dist_skip_table__size_1_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 gr_ds_debug_r(void)
+{
+	return 0x00405800;
+}
+static inline u32 gr_ds_debug_timeslice_mode_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_debug_timeslice_mode_enable_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 gr_ds_zbc_color_r_r(void)
+{
+	return 0x00405804;
+}
+static inline u32 gr_ds_zbc_color_r_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_g_r(void)
+{
+	return 0x00405808;
+}
+static inline u32 gr_ds_zbc_color_g_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_b_r(void)
+{
+	return 0x0040580c;
+}
+static inline u32 gr_ds_zbc_color_b_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_a_r(void)
+{
+	return 0x00405810;
+}
+static inline u32 gr_ds_zbc_color_a_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_r(void)
+{
+	return 0x00405814;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v)
+{
+	return (v & 0x7f) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_ds_zbc_z_r(void)
+{
+	return 0x00405818;
+}
+static inline u32 gr_ds_zbc_z_val_s(void)
+{
+	return 32;
+}
+static inline u32 gr_ds_zbc_z_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_z_val_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 gr_ds_zbc_z_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_ds_zbc_z_val__init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_ds_zbc_z_val__init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_r(void)
+{
+	return 0x0040581c;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_zbc_tbl_index_r(void)
+{
+	return 0x00405820;
+}
+static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_r(void)
+{
+	return 0x00405824;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ds_hww_esr_r(void)
+{
+	return 0x00405840;
+}
+static inline u32 gr_ds_hww_esr_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_ds_hww_esr_reset_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_ds_hww_esr_reset_task_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_hww_esr_reset_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_ds_hww_esr_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_ds_hww_esr_2_r(void)
+{
+	return 0x00405848;
+}
+static inline u32 gr_ds_hww_esr_2_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_ds_hww_esr_2_reset_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 gr_ds_hww_esr_2_reset_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_ds_hww_esr_2_reset_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_ds_hww_esr_2_reset_task_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_hww_esr_2_reset_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_ds_hww_esr_2_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_ds_hww_report_mask_r(void)
+{
+	return 0x00405844;
+}
+static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void)
+{
+	return 0x1000;
+}
+static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void)
+{
+	return 0x2000;
+}
+static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void)
+{
+	return 0x4000;
+}
+static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void)
+{
+	return 0x8000;
+}
+static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void)
+{
+	return 0x400000;
+}
+static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void)
+{
+	return 0x800000;
+}
+static inline u32 gr_ds_hww_report_mask_2_r(void)
+{
+	return 0x0040584c;
+}
+static inline u32 gr_ds_hww_report_mask_2_sph24_err_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i)
+{
+	return 0x00405870 + i*4;
+}
+static inline u32 gr_scc_bundle_cb_base_r(void)
+{
+	return 0x00408004;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_scc_bundle_cb_size_r(void)
+{
+	return 0x00408008;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v)
+{
+	return (v & 0x7ff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void)
+{
+	return 0x00000018;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_scc_pagepool_base_r(void)
+{
+	return 0x0040800c;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_scc_pagepool_r(void)
+{
+	return 0x00408010;
+}
+static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
+{
+	return 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
+{
+	return (r >> 8) & 0xff;
+}
+static inline u32 gr_scc_pagepool_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_scc_init_r(void)
+{
+	return 0x0040802c;
+}
+static inline u32 gr_scc_init_ram_trigger_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_scc_hww_esr_r(void)
+{
+	return 0x00408030;
+}
+static inline u32 gr_scc_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_scc_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_sked_hww_esr_r(void)
+{
+	return 0x00407020;
+}
+static inline u32 gr_sked_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_cwd_fs_r(void)
+{
+	return 0x00405b00;
+}
+static inline u32 gr_cwd_fs_num_gpcs_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_cwd_fs_num_tpcs_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_cwd_gpc_tpc_id_r(u32 i)
+{
+	return 0x00405b60 + i*4;
+}
+static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_cwd_sm_id_r(u32 i)
+{
+	return 0x00405ba0 + i*4;
+}
+static inline u32 gr_cwd_sm_id_tpc0_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_cwd_sm_id_tpc1_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpc0_fs_gpc_r(void)
+{
+	return 0x00502608;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
+static inline u32 gr_gpc0_cfg_r(void)
+{
+	return 0x00502620;
+}
+static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpccs_rc_lanes_r(void)
+{
+	return 0x00502880;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpc0_zcull_fs_r(void)
+{
+	return 0x00500910;
+}
+static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_r(void)
+{
+	return 0x00500914;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void)
+{
+	return 0x00500918;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void)
+{
+	return 0x00800000;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_r(void)
+{
+	return 0x00500920;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_zcsize_r(u32 i)
+{
+	return 0x00500a04 + i*32;
+}
+static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i)
+{
+	return 0x00500c10 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i)
+{
+	return 0x00500c30 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void)
+{
+	return 0x00504088;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_r(void)
+{
+	return 0x00504698;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
+{
+	return 0x00503018;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_r(void)
+{
+	return 0x005030c0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(void)
+{
+	return 0x3fffff << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
+{
+	return 0x00100000;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_offset_r(void)
+{
+	return 0x005030f4;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_r(void)
+{
+	return 0x005030e4;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(void)
+{
+	return 0x00000800;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_offset_r(void)
+{
+	return 0x005030f8;
+}
+static inline u32 gr_gpccs_falcon_addr_r(void)
+{
+	return 0x0041a0ac;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v)
+{
+	return (v & 0x3f) << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_m(void)
+{
+	return 0x3f << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r)
+{
+	return (r >> 6) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_s(void)
+{
+	return 12;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_m(void)
+{
+	return 0xfff << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 gr_gpccs_cpuctl_r(void)
+{
+	return 0x0041a100;
+}
+static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_gpccs_dmactl_r(void)
+{
+	return 0x0041a10c;
+}
+static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpccs_imemc_r(u32 i)
+{
+	return 0x0041a180 + i*16;
+}
+static inline u32 gr_gpccs_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_imemd_r(u32 i)
+{
+	return 0x0041a184 + i*16;
+}
+static inline u32 gr_gpccs_imemt_r(u32 i)
+{
+	return 0x0041a188 + i*16;
+}
+static inline u32 gr_gpccs_imemt__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_gpccs_imemt_tag_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpccs_dmemc_r(u32 i)
+{
+	return 0x0041a1c0 + i*8;
+}
+static inline u32 gr_gpccs_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_dmemd_r(u32 i)
+{
+	return 0x0041a1c4 + i*8;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i)
+{
+	return 0x0041a800 + i*4;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_r(void)
+{
+	return 0x00418e24;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_s(void)
+{
+	return 32;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_r(void)
+{
+	return 0x00418e28;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_s(void)
+{
+	return 11;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_f(u32 v)
+{
+	return (v & 0x7ff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_m(void)
+{
+	return 0x7ff << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_v(u32 r)
+{
+	return (r >> 0) & 0x7ff;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_init_v(void)
+{
+	return 0x00000018;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_init_f(void)
+{
+	return 0x18;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
+{
+	return 0x00418ea0 + i*4;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
+{
+	return 0x3fffff << 0;
+}
+static inline u32 gr_gpcs_swdx_rm_pagepool_r(void)
+{
+	return 0x00418e30;
+}
+static inline u32 gr_gpcs_swdx_rm_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_gpcs_swdx_rm_pagepool_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
+{
+	return 0x00418810;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_crstr_gpc_map0_r(void)
+{
+	return 0x00418b08;
+}
+static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map1_r(void)
+{
+	return 0x00418b0c;
+}
+static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map2_r(void)
+{
+	return 0x00418b10;
+}
+static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map3_r(void)
+{
+	return 0x00418b14;
+}
+static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map4_r(void)
+{
+	return 0x00418b18;
+}
+static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map5_r(void)
+{
+	return 0x00418b1c;
+}
+static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_map_table_cfg_r(void)
+{
+	return 0x00418bb8;
+}
+static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void)
+{
+	return 0x00418980;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void)
+{
+	return 0x00418984;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void)
+{
+	return 0x00418988;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void)
+{
+	return 3;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void)
+{
+	return 0x7 << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r)
+{
+	return (r >> 28) & 0x7;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void)
+{
+	return 0x0041898c;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_r(void)
+{
+	return 0x00418c6c;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_r(void)
+{
+	return 0x00419004;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_gcc_pagepool_r(void)
+{
+	return 0x00419008;
+}
+static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
+{
+	return 0x0041980c;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void)
+{
+	return 0x00419848;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void)
+{
+	return 0x00419c00;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(void)
+{
+	return 0x00419c2c;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void)
+{
+	return 0x00419e00;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void)
+{
+	return 0x1 << 7;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void)
+{
+	return 0x1 << 15;
+}
+static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void)
+{
+	return 0x8000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
+{
+	return 0x00419e44;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void)
+{
+	return 0x1000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void)
+{
+	return 0x2000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void)
+{
+	return 0x4000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void)
+{
+	return 0x8000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f(void)
+{
+	return 0x800000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f(void)
+{
+	return 0x400000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void)
+{
+	return 0x00419e4c;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+{
+	return 0x0050450c;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void)
+{
+	return 0x00502c94;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_r(void)
+{
+	return 0x0041ac90;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void)
+{
+	return 0x00419d08;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
+{
+	return 0x00504610;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
+{
+	return 0x0050460c;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
+{
+	return 0x00504650;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
+{
+	return 0x00504648;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
+{
+	return 0x00504770;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void)
+{
+	return 0x00419f70;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void)
+{
+	return 0x0050477c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void)
+{
+	return 0x00419f7c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void)
+{
+	return 0x00419ed8;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
+{
+	return 0x0041be08;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void)
+{
+	return 0x0041bf00;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void)
+{
+	return 0x0041bf04;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void)
+{
+	return 0x0041bf08;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void)
+{
+	return 0x0041bf0c;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void)
+{
+	return 0x0041bf10;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void)
+{
+	return 0x0041bf14;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void)
+{
+	return 0x0041bfd0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v)
+{
+	return (v & 0x7) << 21;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 24;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void)
+{
+	return 0x0041bfd4;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void)
+{
+	return 0x0041bfe4;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 5;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 10;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 15;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 20;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 25;
+}
+static inline u32 gr_bes_zrop_settings_r(void)
+{
+	return 0x00408850;
+}
+static inline u32 gr_bes_zrop_settings_num_active_ltcs_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_be0_crop_debug3_r(void)
+{
+	return 0x00410108;
+}
+static inline u32 gr_bes_crop_debug3_r(void)
+{
+	return 0x00408908;
+}
+static inline u32 gr_bes_crop_debug3_comp_vdc_4to2_disable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_bes_crop_settings_r(void)
+{
+	return 0x00408958;
+}
+static inline u32 gr_bes_crop_settings_num_active_ltcs_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void)
+{
+	return 0x000000c0;
+}
+static inline u32 gr_zcull_subregion_qty_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void)
+{
+	return 0x00504604;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void)
+{
+	return 0x00504608;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void)
+{
+	return 0x0050465c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void)
+{
+	return 0x00504660;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void)
+{
+	return 0x00504664;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void)
+{
+	return 0x00504668;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void)
+{
+	return 0x0050466c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void)
+{
+	return 0x00504658;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void)
+{
+	return 0x00504730;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void)
+{
+	return 0x00504734;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void)
+{
+	return 0x00504738;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void)
+{
+	return 0x0050473c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void)
+{
+	return 0x00504740;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void)
+{
+	return 0x00504744;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void)
+{
+	return 0x00504748;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void)
+{
+	return 0x0050474c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_s1_r(void)
+{
+	return 0x00504678;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void)
+{
+	return 0x00504694;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_s0_r(void)
+{
+	return 0x005046f0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_s1_r(void)
+{
+	return 0x00504700;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_s0_r(void)
+{
+	return 0x005046f4;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_s1_r(void)
+{
+	return 0x00504704;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_s0_r(void)
+{
+	return 0x005046f8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_s1_r(void)
+{
+	return 0x00504708;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_s0_r(void)
+{
+	return 0x005046fc;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_s1_r(void)
+{
+	return 0x0050470c;
+}
+static inline u32 gr_fe_pwr_mode_r(void)
+{
+	return 0x00404170;
+}
+static inline u32 gr_fe_pwr_mode_mode_auto_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fe_pwr_mode_mode_force_on_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_fe_pwr_mode_req_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 gr_fe_pwr_mode_req_send_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_fe_pwr_mode_req_done_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_r(void)
+{
+	return 0x00419f88;
+}
+static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_blkactivity_enable_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_blkactivity_enable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_r(void)
+{
+	return 0x00419f80;
+}
+static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_blkactivity_enable_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_blkactivity_enable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_r(void)
+{
+	return 0x00419ccc;
+}
+static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_blkactivity_enable_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_blkactivity_enable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_r(void)
+{
+	return 0x00418880;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_vm_pg_size_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m(void)
+{
+	return 0x1 << 11;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_vol_fault_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_comp_fault_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_miss_gran_m(void)
+{
+	return 0x3 << 3;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_cache_mode_m(void)
+{
+	return 0x3 << 5;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_vol_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
+{
+	return 0x00418890;
+}
+static inline u32 gr_gpcs_pri_mmu_pm_req_mask_r(void)
+{
+	return 0x00418894;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_r(void)
+{
+	return 0x004188b0;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_wr_r(void)
+{
+	return 0x004188b4;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_rd_r(void)
+{
+	return 0x004188b8;
+}
+static inline u32 gr_gpcs_mmu_num_active_ltcs_r(void)
+{
+	return 0x004188ac;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
new file mode 100644
index 00000000..1ead0679
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ltc_gp10b_h_
+#define _hw_ltc_gp10b_h_
+
+static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
+{
+	return 0x0014046c;
+}
+static inline u32 ltc_ltc0_lts0_dstg_cfg0_r(void)
+{
+	return 0x00140518;
+}
+static inline u32 ltc_ltcs_ltss_dstg_cfg0_r(void)
+{
+	return 0x0017e318;
+}
+static inline u32 ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m(void)
+{
+	return 0x1 << 15;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void)
+{
+	return 0x00140494;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r)
+{
+	return (r >> 16) & 0x3;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
+{
+	return 0x0017e26c;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
+{
+	return 0x2;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
+{
+	return 0x4;
+}
+static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
+{
+	return 0x0017e26c;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
+{
+	return 0x0017e270;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
+{
+	return (v & 0x1ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
+{
+	return 0x0017e274;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
+{
+	return (v & 0x1ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
+{
+	return 0x0001ffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
+{
+	return 0x0017e278;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void)
+{
+	return 0x0000000b;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r)
+{
+	return (r >> 0) & 0x3ffffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_num_active_ltcs_r(void)
+{
+	return 0x0017e27c;
+}
+static inline u32 ltc_ltcs_misc_ltc_num_active_ltcs_r(void)
+{
+	return 0x0017e000;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_r(void)
+{
+	return 0x0017e280;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r)
+{
+	return (r >> 24) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(u32 r)
+{
+	return (r >> 28) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
+{
+	return 0x0017e2ac;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void)
+{
+	return 0x0017e338;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i)
+{
+	return 0x0017e33c + i*4;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void)
+{
+	return 0x0017e34c;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void)
+{
+	return 32;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void)
+{
+	return 0x0017e2b0;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_r(void)
+{
+	return 0x0017e214;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_r(void)
+{
+	return 0x00140214;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+	return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_intr_r(void)
+{
+	return 0x0014020c;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
new file mode 100644
index 00000000..f45fdc99
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_mc_gp10b_h_
+#define _hw_mc_gp10b_h_
+
+static inline u32 mc_intr_0_r(void)
+{
+	return 0x00000100;
+}
+static inline u32 mc_intr_0_pfifo_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 mc_intr_0_pgraph_pending_f(void)
+{
+	return 0x1000;
+}
+static inline u32 mc_intr_0_pmu_pending_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 mc_intr_0_ltc_pending_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 mc_intr_0_priv_ring_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 mc_intr_0_pbus_pending_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 mc_intr_mask_0_r(void)
+{
+	return 0x00000640;
+}
+static inline u32 mc_intr_mask_0_pmu_enabled_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 mc_intr_en_0_r(void)
+{
+	return 0x00000140;
+}
+static inline u32 mc_intr_en_0_inta_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 mc_intr_en_0_inta_hardware_f(void)
+{
+	return 0x1;
+}
+static inline u32 mc_intr_en_1_r(void)
+{
+	return 0x00000144;
+}
+static inline u32 mc_intr_en_1_inta_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 mc_enable_r(void)
+{
+	return 0x00000200;
+}
+static inline u32 mc_enable_xbar_enabled_f(void)
+{
+	return 0x4;
+}
+static inline u32 mc_enable_l2_enabled_f(void)
+{
+	return 0x8;
+}
+static inline u32 mc_enable_pmedia_s(void)
+{
+	return 1;
+}
+static inline u32 mc_enable_pmedia_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 mc_enable_pmedia_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 mc_enable_pmedia_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 mc_enable_priv_ring_enabled_f(void)
+{
+	return 0x20;
+}
+static inline u32 mc_enable_ce0_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 mc_enable_pfifo_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 mc_enable_pgraph_enabled_f(void)
+{
+	return 0x1000;
+}
+static inline u32 mc_enable_pwr_v(u32 r)
+{
+	return (r >> 13) & 0x1;
+}
+static inline u32 mc_enable_pwr_disabled_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 mc_enable_pwr_enabled_f(void)
+{
+	return 0x2000;
+}
+static inline u32 mc_enable_pfb_enabled_f(void)
+{
+	return 0x100000;
+}
+static inline u32 mc_enable_ce2_m(void)
+{
+	return 0x1 << 21;
+}
+static inline u32 mc_enable_ce2_enabled_f(void)
+{
+	return 0x200000;
+}
+static inline u32 mc_enable_blg_enabled_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 mc_enable_perfmon_enabled_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 mc_enable_hub_enabled_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 mc_enable_pb_r(void)
+{
+	return 0x00000204;
+}
+static inline u32 mc_enable_pb_0_s(void)
+{
+	return 1;
+}
+static inline u32 mc_enable_pb_0_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 mc_enable_pb_0_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 mc_enable_pb_0_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 mc_enable_pb_0_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
new file mode 100644
index 00000000..d3fa8553
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pbdma_gp10b_h_
+#define _hw_pbdma_gp10b_h_
+
+static inline u32 pbdma_gp_entry1_r(void)
+{
+	return 0x10000004;
+}
+static inline u32 pbdma_gp_entry1_get_hi_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pbdma_gp_entry1_length_f(u32 v)
+{
+	return (v & 0x1fffff) << 10;
+}
+static inline u32 pbdma_gp_entry1_length_v(u32 r)
+{
+	return (r >> 10) & 0x1fffff;
+}
+static inline u32 pbdma_gp_base_r(u32 i)
+{
+	return 0x00040048 + i*8192;
+}
+static inline u32 pbdma_gp_base__size_1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pbdma_gp_base_offset_f(u32 v)
+{
+	return (v & 0x1fffffff) << 3;
+}
+static inline u32 pbdma_gp_base_rsvd_s(void)
+{
+	return 3;
+}
+static inline u32 pbdma_gp_base_hi_r(u32 i)
+{
+	return 0x0004004c + i*8192;
+}
+static inline u32 pbdma_gp_base_hi_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pbdma_gp_base_hi_limit2_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 pbdma_gp_fetch_r(u32 i)
+{
+	return 0x00040050 + i*8192;
+}
+static inline u32 pbdma_gp_get_r(u32 i)
+{
+	return 0x00040014 + i*8192;
+}
+static inline u32 pbdma_gp_put_r(u32 i)
+{
+	return 0x00040000 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_r(u32 i)
+{
+	return 0x00040054 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_hi_r(u32 i)
+{
+	return 0x00040058 + i*8192;
+}
+static inline u32 pbdma_get_r(u32 i)
+{
+	return 0x00040018 + i*8192;
+}
+static inline u32 pbdma_get_hi_r(u32 i)
+{
+	return 0x0004001c + i*8192;
+}
+static inline u32 pbdma_put_r(u32 i)
+{
+	return 0x0004005c + i*8192;
+}
+static inline u32 pbdma_put_hi_r(u32 i)
+{
+	return 0x00040060 + i*8192;
+}
+static inline u32 pbdma_formats_r(u32 i)
+{
+	return 0x0004009c + i*8192;
+}
+static inline u32 pbdma_formats_gp_fermi0_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_formats_pb_fermi1_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_formats_mp_fermi0_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_r(u32 i)
+{
+	return 0x00040084 + i*8192;
+}
+static inline u32 pbdma_pb_header_priv_user_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_method_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_subchannel_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_level_main_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_first_true_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_pb_header_type_inc_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_subdevice_r(u32 i)
+{
+	return 0x00040094 + i*8192;
+}
+static inline u32 pbdma_subdevice_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 pbdma_subdevice_status_active_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 pbdma_subdevice_channel_dma_enable_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_method0_r(u32 i)
+{
+	return 0x000400c0 + i*8192;
+}
+static inline u32 pbdma_data0_r(u32 i)
+{
+	return 0x000400c4 + i*8192;
+}
+static inline u32 pbdma_target_r(u32 i)
+{
+	return 0x000400ac + i*8192;
+}
+static inline u32 pbdma_target_engine_sw_f(void)
+{
+	return 0x1f;
+}
+static inline u32 pbdma_acquire_r(u32 i)
+{
+	return 0x00040030 + i*8192;
+}
+static inline u32 pbdma_acquire_retry_man_2_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_acquire_retry_exp_2_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_acquire_timeout_exp_max_f(void)
+{
+	return 0x7800;
+}
+static inline u32 pbdma_acquire_timeout_man_max_f(void)
+{
+	return 0x7fff8000;
+}
+static inline u32 pbdma_acquire_timeout_en_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_status_r(u32 i)
+{
+	return 0x00040100 + i*8192;
+}
+static inline u32 pbdma_channel_r(u32 i)
+{
+	return 0x00040120 + i*8192;
+}
+static inline u32 pbdma_signature_r(u32 i)
+{
+	return 0x00040010 + i*8192;
+}
+static inline u32 pbdma_signature_hw_valid_f(void)
+{
+	return 0xface;
+}
+static inline u32 pbdma_signature_sw_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_userd_r(u32 i)
+{
+	return 0x00040008 + i*8192;
+}
+static inline u32 pbdma_userd_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_userd_addr_f(u32 v)
+{
+	return (v & 0x7fffff) << 9;
+}
+static inline u32 pbdma_userd_hi_r(u32 i)
+{
+	return 0x0004000c + i*8192;
+}
+static inline u32 pbdma_userd_hi_addr_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pbdma_hce_ctrl_r(u32 i)
+{
+	return 0x000400e4 + i*8192;
+}
+static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void)
+{
+	return 0x20;
+}
+static inline u32 pbdma_intr_0_r(u32 i)
+{
+	return 0x00040108 + i*8192;
+}
+static inline u32 pbdma_intr_0_memreq_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pbdma_intr_0_memreq_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 pbdma_intr_0_memack_timeout_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_intr_0_memack_extra_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void)
+{
+	return 0x8;
+}
+static inline u32 pbdma_intr_0_memdat_extra_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 pbdma_intr_0_memflush_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 pbdma_intr_0_memop_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 pbdma_intr_0_lbconnect_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 pbdma_intr_0_lbreq_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_intr_0_lback_timeout_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 pbdma_intr_0_lback_extra_pending_f(void)
+{
+	return 0x400;
+}
+static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void)
+{
+	return 0x800;
+}
+static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void)
+{
+	return 0x1000;
+}
+static inline u32 pbdma_intr_0_gpfifo_pending_f(void)
+{
+	return 0x2000;
+}
+static inline u32 pbdma_intr_0_gpptr_pending_f(void)
+{
+	return 0x4000;
+}
+static inline u32 pbdma_intr_0_gpentry_pending_f(void)
+{
+	return 0x8000;
+}
+static inline u32 pbdma_intr_0_gpcrc_pending_f(void)
+{
+	return 0x10000;
+}
+static inline u32 pbdma_intr_0_pbptr_pending_f(void)
+{
+	return 0x20000;
+}
+static inline u32 pbdma_intr_0_pbentry_pending_f(void)
+{
+	return 0x40000;
+}
+static inline u32 pbdma_intr_0_pbcrc_pending_f(void)
+{
+	return 0x80000;
+}
+static inline u32 pbdma_intr_0_xbarconnect_pending_f(void)
+{
+	return 0x100000;
+}
+static inline u32 pbdma_intr_0_method_pending_f(void)
+{
+	return 0x200000;
+}
+static inline u32 pbdma_intr_0_methodcrc_pending_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_intr_0_device_pending_f(void)
+{
+	return 0x800000;
+}
+static inline u32 pbdma_intr_0_semaphore_pending_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 pbdma_intr_0_acquire_pending_f(void)
+{
+	return 0x4000000;
+}
+static inline u32 pbdma_intr_0_pri_pending_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_intr_0_pbseg_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 pbdma_intr_0_signature_pending_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 pbdma_intr_1_r(u32 i)
+{
+	return 0x00040148 + i*8192;
+}
+static inline u32 pbdma_intr_en_0_r(u32 i)
+{
+	return 0x0004010c + i*8192;
+}
+static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_intr_en_1_r(u32 i)
+{
+	return 0x0004014c + i*8192;
+}
+static inline u32 pbdma_intr_stall_r(u32 i)
+{
+	return 0x0004013c + i*8192;
+}
+static inline u32 pbdma_intr_stall_lbreq_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_udma_nop_r(void)
+{
+	return 0x00000008;
+}
+static inline u32 pbdma_syncpointa_r(u32 i)
+{
+	return 0x000400a4 + i*8192;
+}
+static inline u32 pbdma_syncpointa_payload_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pbdma_syncpointb_r(u32 i)
+{
+	return 0x000400a8 + i*8192;
+}
+static inline u32 pbdma_syncpointb_op_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 pbdma_syncpointb_op_wait_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pbdma_syncpointb_wait_switch_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 pbdma_syncpointb_wait_switch_en_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
+{
+	return (r >> 8) & 0xff;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pri_ringmaster_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pri_ringmaster_gp10b.h
new file mode 100644
index 00000000..7a458858
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_pri_ringmaster_gp10b.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pri_ringmaster_gp10b_h_
+#define _hw_pri_ringmaster_gp10b_h_
+
+static inline u32 pri_ringmaster_command_r(void)
+{
+	return 0x0012004c;
+}
+static inline u32 pri_ringmaster_command_cmd_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 pri_ringmaster_command_cmd_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pri_ringmaster_command_cmd_start_ring_f(void)
+{
+	return 0x1;
+}
+static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void)
+{
+	return 0x2;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void)
+{
+	return 0x3;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 pri_ringmaster_command_data_r(void)
+{
+	return 0x00120048;
+}
+static inline u32 pri_ringmaster_start_results_r(void)
+{
+	return 0x00120050;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pri_ringmaster_intr_status0_r(void)
+{
+	return 0x00120058;
+}
+static inline u32 pri_ringmaster_intr_status1_r(void)
+{
+	return 0x0012005c;
+}
+static inline u32 pri_ringmaster_global_ctl_r(void)
+{
+	return 0x00120060;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void)
+{
+	return 0x1;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void)
+{
+	return 0x0;
+}
+static inline u32 pri_ringmaster_enum_fbp_r(void)
+{
+	return 0x00120074;
+}
+static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 pri_ringmaster_enum_gpc_r(void)
+{
+	return 0x00120078;
+}
+static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 pri_ringmaster_enum_ltc_r(void)
+{
+	return 0x0012006c;
+}
+static inline u32 pri_ringmaster_enum_ltc_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pri_ringstation_sys_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pri_ringstation_sys_gp10b.h
new file mode 100644
index 00000000..eb711452
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_pri_ringstation_sys_gp10b.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pri_ringstation_sys_gp10b_h_
+#define _hw_pri_ringstation_sys_gp10b_h_
+
+static inline u32 pri_ringstation_sys_master_config_r(u32 i)
+{
+	return 0x00122300 + i*4;
+}
+static inline u32 pri_ringstation_sys_decode_config_r(void)
+{
+	return 0x00122204;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_m(void)
+{
+	return 0x7 << 0;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void)
+{
+	return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
new file mode 100644
index 00000000..a315ae2d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_proj_gp10b_h_
+#define _hw_proj_gp10b_h_
+
+static inline u32 proj_gpc_base_v(void)
+{
+	return 0x00500000;
+}
+static inline u32 proj_gpc_shared_base_v(void)
+{
+	return 0x00418000;
+}
+static inline u32 proj_gpc_stride_v(void)
+{
+	return 0x00008000;
+}
+static inline u32 proj_ltc_stride_v(void)
+{
+	return 0x00002000;
+}
+static inline u32 proj_lts_stride_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 proj_ppc_in_gpc_base_v(void)
+{
+	return 0x00003000;
+}
+static inline u32 proj_ppc_in_gpc_stride_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 proj_rop_base_v(void)
+{
+	return 0x00410000;
+}
+static inline u32 proj_rop_shared_base_v(void)
+{
+	return 0x00408800;
+}
+static inline u32 proj_rop_stride_v(void)
+{
+	return 0x00000400;
+}
+static inline u32 proj_tpc_in_gpc_base_v(void)
+{
+	return 0x00004000;
+}
+static inline u32 proj_tpc_in_gpc_stride_v(void)
+{
+	return 0x00000800;
+}
+static inline u32 proj_tpc_in_gpc_shared_base_v(void)
+{
+	return 0x00001800;
+}
+static inline u32 proj_host_num_pbdma_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 proj_scal_litter_num_fbps_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_gpcs_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_pes_per_gpc_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 proj_scal_litter_num_zcull_banks_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 proj_scal_max_gpcs_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 proj_scal_max_tpc_per_gpc_v(void)
+{
+	return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
new file mode 100644
index 00000000..d76095ac
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
@@ -0,0 +1,805 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pwr_gp10b_h_
+#define _hw_pwr_gp10b_h_
+
+static inline u32 pwr_falcon_irqsset_r(void)
+{
+	return 0x0010a000;
+}
+static inline u32 pwr_falcon_irqsset_swgen0_set_f(void)
+{
+	return 0x40;
+}
+static inline u32 pwr_falcon_irqsclr_r(void)
+{
+	return 0x0010a004;
+}
+static inline u32 pwr_falcon_irqstat_r(void)
+{
+	return 0x0010a008;
+}
+static inline u32 pwr_falcon_irqstat_halt_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 pwr_falcon_irqstat_exterr_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 pwr_falcon_irqstat_swgen0_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 pwr_falcon_irqmode_r(void)
+{
+	return 0x0010a00c;
+}
+static inline u32 pwr_falcon_irqmset_r(void)
+{
+	return 0x0010a010;
+}
+static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmset_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmset_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmset_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_r(void)
+{
+	return 0x0010a014;
+}
+static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmclr_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqmask_r(void)
+{
+	return 0x0010a018;
+}
+static inline u32 pwr_falcon_irqdest_r(void)
+{
+	return 0x0010a01c;
+}
+static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 17;
+}
+static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v)
+{
+	return (v & 0x1) << 18;
+}
+static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 19;
+}
+static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v)
+{
+	return (v & 0x1) << 21;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 22;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 23;
+}
+static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 pwr_falcon_curctx_r(void)
+{
+	return 0x0010a050;
+}
+static inline u32 pwr_falcon_nxtctx_r(void)
+{
+	return 0x0010a054;
+}
+static inline u32 pwr_falcon_mailbox0_r(void)
+{
+	return 0x0010a040;
+}
+static inline u32 pwr_falcon_mailbox1_r(void)
+{
+	return 0x0010a044;
+}
+static inline u32 pwr_falcon_itfen_r(void)
+{
+	return 0x0010a048;
+}
+static inline u32 pwr_falcon_itfen_ctxen_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_falcon_idlestate_r(void)
+{
+	return 0x0010a04c;
+}
+static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r)
+{
+	return (r >> 1) & 0x7fff;
+}
+static inline u32 pwr_falcon_os_r(void)
+{
+	return 0x0010a080;
+}
+static inline u32 pwr_falcon_engctl_r(void)
+{
+	return 0x0010a0a4;
+}
+static inline u32 pwr_falcon_cpuctl_r(void)
+{
+	return 0x0010a100;
+}
+static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_alias_r(void)
+{
+	return 0x0010a130;
+}
+static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_pmu_scpctl_stat_r(void)
+{
+	return 0x0010ac08;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 pwr_falcon_imemc_r(u32 i)
+{
+	return 0x0010a180 + i*16;
+}
+static inline u32 pwr_falcon_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_imemd_r(u32 i)
+{
+	return 0x0010a184 + i*16;
+}
+static inline u32 pwr_falcon_imemt_r(u32 i)
+{
+	return 0x0010a188 + i*16;
+}
+static inline u32 pwr_falcon_sctl_r(void)
+{
+	return 0x0010a240;
+}
+static inline u32 pwr_falcon_mmu_phys_sec_r(void)
+{
+	return 0x00100ce4;
+}
+static inline u32 pwr_falcon_bootvec_r(void)
+{
+	return 0x0010a104;
+}
+static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_falcon_hwcfg_r(void)
+{
+	return 0x0010a108;
+}
+static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r)
+{
+	return (r >> 9) & 0x1ff;
+}
+static inline u32 pwr_falcon_dmatrfbase_r(void)
+{
+	return 0x0010a110;
+}
+static inline u32 pwr_falcon_dmatrfmoffs_r(void)
+{
+	return 0x0010a114;
+}
+static inline u32 pwr_falcon_dmatrfcmd_r(void)
+{
+	return 0x0010a118;
+}
+static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 pwr_falcon_dmatrffboffs_r(void)
+{
+	return 0x0010a11c;
+}
+static inline u32 pwr_falcon_exterraddr_r(void)
+{
+	return 0x0010a168;
+}
+static inline u32 pwr_falcon_exterrstat_r(void)
+{
+	return 0x0010a16c;
+}
+static inline u32 pwr_falcon_exterrstat_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 pwr_falcon_exterrstat_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 pwr_falcon_exterrstat_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_r(void)
+{
+	return 0x0010a200;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void)
+{
+	return 4;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void)
+{
+	return 0x8;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void)
+{
+	return 0xe;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 pwr_pmu_falcon_icd_rdata_r(void)
+{
+	return 0x0010a20c;
+}
+static inline u32 pwr_falcon_dmemc_r(u32 i)
+{
+	return 0x0010a1c0 + i*8;
+}
+static inline u32 pwr_falcon_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_dmemc_offs_m(void)
+{
+	return 0x3f << 2;
+}
+static inline u32 pwr_falcon_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_dmemc_blk_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 pwr_falcon_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_dmemc_aincr_f(u32 v)
+{
+	return (v & 0x1) << 25;
+}
+static inline u32 pwr_falcon_dmemd_r(u32 i)
+{
+	return 0x0010a1c4 + i*8;
+}
+static inline u32 pwr_pmu_new_instblk_r(void)
+{
+	return 0x0010a480;
+}
+static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 pwr_pmu_new_instblk_target_fb_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 pwr_pmu_mutex_id_r(void)
+{
+	return 0x0010a488;
+}
+static inline u32 pwr_pmu_mutex_id_value_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_id_value_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void)
+{
+	return 0x000000ff;
+}
+static inline u32 pwr_pmu_mutex_id_release_r(void)
+{
+	return 0x0010a48c;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_mutex_r(u32 i)
+{
+	return 0x0010a580 + i*4;
+}
+static inline u32 pwr_pmu_mutex__size_1_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 pwr_pmu_mutex_value_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_value_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_value_initial_lock_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_queue_head_r(u32 i)
+{
+	return 0x0010a4a0 + i*4;
+}
+static inline u32 pwr_pmu_queue_head__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_head_address_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_head_address_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_queue_tail_r(u32 i)
+{
+	return 0x0010a4b0 + i*4;
+}
+static inline u32 pwr_pmu_queue_tail__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_tail_address_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_tail_address_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_head_r(void)
+{
+	return 0x0010a4c8;
+}
+static inline u32 pwr_pmu_msgq_head_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_head_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_tail_r(void)
+{
+	return 0x0010a4cc;
+}
+static inline u32 pwr_pmu_msgq_tail_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_tail_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_idle_mask_r(u32 i)
+{
+	return 0x0010a504 + i*16;
+}
+static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void)
+{
+	return 0x200000;
+}
+static inline u32 pwr_pmu_idle_count_r(u32 i)
+{
+	return 0x0010a508 + i*16;
+}
+static inline u32 pwr_pmu_idle_count_value_f(u32 v)
+{
+	return (v & 0x7fffffff) << 0;
+}
+static inline u32 pwr_pmu_idle_count_value_v(u32 r)
+{
+	return (r >> 0) & 0x7fffffff;
+}
+static inline u32 pwr_pmu_idle_count_reset_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 pwr_pmu_idle_ctrl_r(u32 i)
+{
+	return 0x0010a50c + i*16;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void)
+{
+	return 0x2;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_always_f(void)
+{
+	return 0x3;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
+{
+	return 0x0010a9f0 + i*8;
+}
+static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i)
+{
+	return 0x0010a9f4 + i*8;
+}
+static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i)
+{
+	return 0x0010aa30 + i*8;
+}
+static inline u32 pwr_pmu_debug_r(u32 i)
+{
+	return 0x0010a5c0 + i*4;
+}
+static inline u32 pwr_pmu_debug__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_mailbox_r(u32 i)
+{
+	return 0x0010a450 + i*4;
+}
+static inline u32 pwr_pmu_mailbox__size_1_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 pwr_pmu_bar0_addr_r(void)
+{
+	return 0x0010a7a0;
+}
+static inline u32 pwr_pmu_bar0_data_r(void)
+{
+	return 0x0010a7a4;
+}
+static inline u32 pwr_pmu_bar0_ctl_r(void)
+{
+	return 0x0010a7ac;
+}
+static inline u32 pwr_pmu_bar0_timeout_r(void)
+{
+	return 0x0010a7a8;
+}
+static inline u32 pwr_pmu_bar0_fecs_error_r(void)
+{
+	return 0x0010a988;
+}
+static inline u32 pwr_pmu_bar0_error_status_r(void)
+{
+	return 0x0010a7b0;
+}
+static inline u32 pwr_pmu_pg_idlefilth_r(u32 i)
+{
+	return 0x0010a6c0 + i*4;
+}
+static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i)
+{
+	return 0x0010a6e8 + i*4;
+}
+static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i)
+{
+	return 0x0010a710 + i*4;
+}
+static inline u32 pwr_pmu_pg_intren_r(u32 i)
+{
+	return 0x0010a760 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_r(u32 i)
+{
+	return 0x0010ae00 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_target_local_fb_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void)
+{
+	return 0x2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_s(void)
+{
+	return 1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void)
+{
+	return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
new file mode 100644
index 00000000..ef53882b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ram_gp10b_h_
+#define _hw_ram_gp10b_h_
+
+static inline u32 ram_in_ramfc_s(void)
+{
+	return 4096;
+}
+static inline u32 ram_in_ramfc_w(void)
+{
+	return 0;
+}
+static inline u32 ram_in_page_dir_base_target_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ram_in_page_dir_base_target_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_page_dir_base_vol_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_vol_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 ram_in_page_dir_base_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_page_dir_base_lo_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_hi_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 ram_in_page_dir_base_hi_w(void)
+{
+	return 129;
+}
+static inline u32 ram_in_adr_limit_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_adr_limit_lo_w(void)
+{
+	return 130;
+}
+static inline u32 ram_in_adr_limit_hi_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ram_in_adr_limit_hi_w(void)
+{
+	return 131;
+}
+static inline u32 ram_in_engine_cs_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_engine_cs_wfi_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ram_in_engine_cs_wfi_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_engine_cs_fg_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ram_in_engine_cs_fg_f(void)
+{
+	return 0x8;
+}
+static inline u32 ram_in_gr_cs_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_cs_wfi_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_gr_wfi_target_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_f(void)
+{
+	return 0x4;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_w(void)
+{
+	return 133;
+}
+static inline u32 ram_in_base_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ram_in_alloc_size_v(void)
+{
+	return 0x00001000;
+}
+static inline u32 ram_fc_size_val_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ram_fc_gp_put_w(void)
+{
+	return 0;
+}
+static inline u32 ram_fc_userd_w(void)
+{
+	return 2;
+}
+static inline u32 ram_fc_userd_hi_w(void)
+{
+	return 3;
+}
+static inline u32 ram_fc_signature_w(void)
+{
+	return 4;
+}
+static inline u32 ram_fc_gp_get_w(void)
+{
+	return 5;
+}
+static inline u32 ram_fc_pb_get_w(void)
+{
+	return 6;
+}
+static inline u32 ram_fc_pb_get_hi_w(void)
+{
+	return 7;
+}
+static inline u32 ram_fc_pb_top_level_get_w(void)
+{
+	return 8;
+}
+static inline u32 ram_fc_pb_top_level_get_hi_w(void)
+{
+	return 9;
+}
+static inline u32 ram_fc_acquire_w(void)
+{
+	return 12;
+}
+static inline u32 ram_fc_semaphorea_w(void)
+{
+	return 14;
+}
+static inline u32 ram_fc_semaphoreb_w(void)
+{
+	return 15;
+}
+static inline u32 ram_fc_semaphorec_w(void)
+{
+	return 16;
+}
+static inline u32 ram_fc_semaphored_w(void)
+{
+	return 17;
+}
+static inline u32 ram_fc_gp_base_w(void)
+{
+	return 18;
+}
+static inline u32 ram_fc_gp_base_hi_w(void)
+{
+	return 19;
+}
+static inline u32 ram_fc_gp_fetch_w(void)
+{
+	return 20;
+}
+static inline u32 ram_fc_pb_fetch_w(void)
+{
+	return 21;
+}
+static inline u32 ram_fc_pb_fetch_hi_w(void)
+{
+	return 22;
+}
+static inline u32 ram_fc_pb_put_w(void)
+{
+	return 23;
+}
+static inline u32 ram_fc_pb_put_hi_w(void)
+{
+	return 24;
+}
+static inline u32 ram_fc_pb_header_w(void)
+{
+	return 33;
+}
+static inline u32 ram_fc_pb_count_w(void)
+{
+	return 34;
+}
+static inline u32 ram_fc_subdevice_w(void)
+{
+	return 37;
+}
+static inline u32 ram_fc_formats_w(void)
+{
+	return 39;
+}
+static inline u32 ram_fc_syncpointa_w(void)
+{
+	return 41;
+}
+static inline u32 ram_fc_syncpointb_w(void)
+{
+	return 42;
+}
+static inline u32 ram_fc_target_w(void)
+{
+	return 43;
+}
+static inline u32 ram_fc_hce_ctrl_w(void)
+{
+	return 57;
+}
+static inline u32 ram_fc_chid_w(void)
+{
+	return 58;
+}
+static inline u32 ram_fc_chid_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_fc_chid_id_w(void)
+{
+	return 0;
+}
+static inline u32 ram_fc_pb_timeslice_w(void)
+{
+	return 62;
+}
+static inline u32 ram_userd_base_shift_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 ram_userd_chan_size_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ram_userd_put_w(void)
+{
+	return 16;
+}
+static inline u32 ram_userd_get_w(void)
+{
+	return 17;
+}
+static inline u32 ram_userd_ref_w(void)
+{
+	return 18;
+}
+static inline u32 ram_userd_put_hi_w(void)
+{
+	return 19;
+}
+static inline u32 ram_userd_ref_threshold_w(void)
+{
+	return 20;
+}
+static inline u32 ram_userd_top_level_get_w(void)
+{
+	return 22;
+}
+static inline u32 ram_userd_top_level_get_hi_w(void)
+{
+	return 23;
+}
+static inline u32 ram_userd_get_hi_w(void)
+{
+	return 24;
+}
+static inline u32 ram_userd_gp_get_w(void)
+{
+	return 34;
+}
+static inline u32 ram_userd_gp_put_w(void)
+{
+	return 35;
+}
+static inline u32 ram_userd_gp_top_level_get_w(void)
+{
+	return 22;
+}
+static inline u32 ram_userd_gp_top_level_get_hi_w(void)
+{
+	return 23;
+}
+static inline u32 ram_rl_entry_size_v(void)
+{
+	return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
new file mode 100644
index 00000000..16bbb3ca
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_therm_gp10b_h_
+#define _hw_therm_gp10b_h_
+
+static inline u32 therm_use_a_r(void)
+{
+	return 0x00020798;
+}
+static inline u32 therm_evt_ext_therm_0_r(void)
+{
+	return 0x00020700;
+}
+static inline u32 therm_evt_ext_therm_1_r(void)
+{
+	return 0x00020704;
+}
+static inline u32 therm_evt_ext_therm_2_r(void)
+{
+	return 0x00020708;
+}
+static inline u32 therm_weight_1_r(void)
+{
+	return 0x00020024;
+}
+static inline u32 therm_peakpower_config1_r(u32 i)
+{
+	return 0x00020154 + i*4;
+}
+static inline u32 therm_peakpower_config1_window_period_2m_v(void)
+{
+	return 0x00000015;
+}
+static inline u32 therm_peakpower_config1_window_period_2m_f(void)
+{
+	return 0x15;
+}
+static inline u32 therm_peakpower_config1_window_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 therm_peakpower_config1_r(u32 i)
+{
+	return 0x000202e8 + i*4;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_s(void)
+{
+	return 5;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_m(void)
+{
+	return 0x1f << 8;
+}
+static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r)
+{
+	return (r >> 8) & 0x1f;
+}
+static inline u32 therm_peakpower_config2_r(u32 i)
+{
+	return 0x00020170 + i*4;
+}
+static inline u32 therm_peakpower_config4_r(u32 i)
+{
+	return 0x000201c0 + i*4;
+}
+static inline u32 therm_peakpower_config8_r(u32 i)
+{
+	return 0x000202e8 + i*4;
+}
+static inline u32 therm_peakpower_config9_r(u32 i)
+{
+	return 0x000202f4 + i*4;
+}
+static inline u32 therm_config1_r(void)
+{
+	return 0x00020050;
+}
+static inline u32 therm_gate_ctrl_r(u32 i)
+{
+	return 0x00020200 + i*4;
+}
+static inline u32 therm_gate_ctrl_eng_clk_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_run_f(void)
+{
+	return 0x0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_auto_f(void)
+{
+	return 0x1;
+}
+static inline u32 therm_gate_ctrl_eng_clk_stop_f(void)
+{
+	return 0x2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_m(void)
+{
+	return 0x3 << 2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_run_f(void)
+{
+	return 0x0;
+}
+static inline u32 therm_gate_ctrl_blk_clk_auto_f(void)
+{
+	return 0x4;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_m(void)
+{
+	return 0x3 << 4;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_auto_f(void)
+{
+	return 0x10;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_off_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 therm_gate_ctrl_eng_pwr_off_f(void)
+{
+	return 0x20;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void)
+{
+	return 0x1f << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v)
+{
+	return (v & 0x7) << 13;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
+{
+	return 0x7 << 13;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_m(void)
+{
+	return 0xf << 20;
+}
+static inline u32 therm_fecs_idle_filter_r(void)
+{
+	return 0x00020288;
+}
+static inline u32 therm_fecs_idle_filter_value_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 therm_hubmmu_idle_filter_r(void)
+{
+	return 0x0002028c;
+}
+static inline u32 therm_hubmmu_idle_filter_value_m(void)
+{
+	return 0xffffffff << 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
new file mode 100644
index 00000000..88d22128
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_timer_gp10b_h_
+#define _hw_timer_gp10b_h_
+
+static inline u32 timer_pri_timeout_r(void)
+{
+	return 0x00009080;
+}
+static inline u32 timer_pri_timeout_period_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 timer_pri_timeout_period_m(void)
+{
+	return 0xffffff << 0;
+}
+static inline u32 timer_pri_timeout_period_v(u32 r)
+{
+	return (r >> 0) & 0xffffff;
+}
+static inline u32 timer_pri_timeout_en_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 timer_pri_timeout_en_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 timer_pri_timeout_en_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 timer_pri_timeout_en_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 timer_pri_timeout_en_en_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 timer_pri_timeout_save_0_r(void)
+{
+	return 0x00009084;
+}
+static inline u32 timer_pri_timeout_save_1_r(void)
+{
+	return 0x00009088;
+}
+static inline u32 timer_pri_timeout_fecs_errcode_r(void)
+{
+	return 0x0000908c;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
new file mode 100644
index 00000000..ca6457c7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_top_gp10b_h_
+#define _hw_top_gp10b_h_
+
+static inline u32 top_num_gpcs_r(void)
+{
+	return 0x00022430;
+}
+static inline u32 top_num_gpcs_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_tpc_per_gpc_r(void)
+{
+	return 0x00022434;
+}
+static inline u32 top_tpc_per_gpc_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_num_fbps_r(void)
+{
+	return 0x00022438;
+}
+static inline u32 top_num_fbps_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_num_ltcs_r(void)
+{
+	return 0x00022454;
+}
+static inline u32 top_device_info_r(u32 i)
+{
+	return 0x00022700 + i*4;
+}
+static inline u32 top_device_info__size_1_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 top_device_info_chain_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 top_device_info_chain_enable_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_engine_enum_v(u32 r)
+{
+	return (r >> 26) & 0xf;
+}
+static inline u32 top_device_info_runlist_enum_v(u32 r)
+{
+	return (r >> 21) & 0xf;
+}
+static inline u32 top_device_info_type_enum_v(u32 r)
+{
+	return (r >> 2) & 0x1fffffff;
+}
+static inline u32 top_device_info_type_enum_graphics_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_type_enum_graphics_f(void)
+{
+	return 0x0;
+}
+static inline u32 top_device_info_type_enum_copy0_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_type_enum_copy0_f(void)
+{
+	return 0x4;
+}
+static inline u32 top_device_info_entry_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 top_device_info_entry_not_valid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_entry_enum_v(void)
+{
+	return 0x00000002;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
new file mode 100644
index 00000000..76597f69
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_trim_gp10b_h_
+#define _hw_trim_gp10b_h_
+
+static inline u32 trim_sys_gpcpll_cfg_r(void)
+{
+	return 0x00137000;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void)
+{
+	return 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void)
+{
+	return 0x10;
+}
+static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r)
+{
+	return (r >> 17) & 0x1;
+}
+static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void)
+{
+	return 0x20000;
+}
+static inline u32 trim_sys_gpcpll_coeff_r(void)
+{
+	return 0x00137004;
+}
+static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r)
+{
+	return (r >> 8) & 0xff;
+}
+static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v)
+{
+	return (v & 0x3f) << 16;
+}
+static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r)
+{
+	return (r >> 16) & 0x3f;
+}
+static inline u32 trim_sys_sel_vco_r(void)
+{
+	return 0x00137100;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void)
+{
+	return 0x1;
+}
+static inline u32 trim_sys_gpc2clk_out_r(void)
+{
+	return 0x00137250;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void)
+{
+	return 6;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void)
+{
+	return 0x3c;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void)
+{
+	return 0x3f << 8;
+}
+static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
+{
+	return 0x00134124 + i*512;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
+{
+	return (v & 0x3fff) << 0;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
+{
+	return 0x10000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
+{
+	return 0x100000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
+{
+	return 0x00134128 + i*512;
+}
+static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
+{
+	return (r >> 0) & 0xfffff;
+}
+static inline u32 trim_sys_gpcpll_cfg2_r(void)
+{
+	return 0x0013700c;
+}
+static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void)
+{
+	return 0xff << 24;
+}
+static inline u32 trim_sys_gpcpll_cfg3_r(void)
+{
+	return 0x00137018;
+}
+static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void)
+{
+	return 0xff << 16;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void)
+{
+	return 0x0013701c;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void)
+{
+	return 0x1 << 22;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void)
+{
+	return 0x400000;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void)
+{
+	return 0x001328a0;
+}
+static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r)
+{
+	return (r >> 24) & 0x1;
+}
+#endif
-- 
cgit v1.2.2


From badee8f41a6304817b66287e39d81b382c575163 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Thu, 2 Oct 2014 20:39:32 -0700
Subject: gpu: nvgpu: headers for linsim CL 33688874

Bug 1561645

Change-Id: Iccd909d54fc5b1d1c8fbc903b5908bf6f7f22ec8
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/553151
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h    | 54 ++++++++++++++++++--------------
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h   | 24 ++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h    | 32 +++----------------
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h |  4 +--
 drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h  |  8 ++---
 5 files changed, 65 insertions(+), 57 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 7a4761d6..03164957 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1126,18 +1126,6 @@ static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
 {
 	return 0x00000182;
 }
-static inline u32 gr_pd_pagepool_r(void)
-{
-	return 0x004064cc;
-}
-static inline u32 gr_pd_pagepool_total_pages_f(u32 v)
-{
-	return (v & 0xff) << 0;
-}
-static inline u32 gr_pd_pagepool_valid_true_f(void)
-{
-	return 0x80000000;
-}
 static inline u32 gr_pd_dist_skip_table_r(u32 i)
 {
 	return 0x004064d0 + i*4;
@@ -1302,6 +1290,18 @@ static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
 {
 	return 0x4;
 }
+static inline u32 gr_ds_tga_constraintlogic_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
+{
+	return (v & 0x1) << -1;
+}
+static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
+{
+	return (v & 0x1) << -1;
+}
 static inline u32 gr_ds_hww_esr_r(void)
 {
 	return 0x00405840;
@@ -1536,7 +1536,7 @@ static inline u32 gr_scc_pagepool_r(void)
 }
 static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
 {
@@ -1544,7 +1544,7 @@ static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
 }
 static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
 {
-	return 0x00000080;
+	return 0x00000200;
 }
 static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
 {
@@ -1552,19 +1552,19 @@ static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
 }
 static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
 {
-	return 8;
+	return 10;
 }
 static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
 {
-	return (v & 0xff) << 8;
+	return (v & 0x3ff) << 10;
 }
 static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
 {
-	return 0xff << 8;
+	return 0x3ff << 10;
 }
 static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
 {
-	return (r >> 8) & 0xff;
+	return (r >> 10) & 0x3ff;
 }
 static inline u32 gr_scc_pagepool_valid_true_f(void)
 {
@@ -1788,7 +1788,7 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(void)
 }
 static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
 {
-	return 0x00100000;
+	return 0x00030000;
 }
 static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
 {
@@ -2068,11 +2068,19 @@ static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
 }
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_f(u32 v)
 {
-	return (v & 0x3fffff) << 0;
+	return (v & 0xffffffff) << -1;
 }
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
 {
-	return 0x3fffff << 0;
+	return 0xffffffff << -1;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_div3_f(u32 v)
+{
+	return (v & 0xffffffff) << -1;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_div3_m(void)
+{
+	return 0xffffffff << -1;
 }
 static inline u32 gr_gpcs_swdx_rm_pagepool_r(void)
 {
@@ -2080,7 +2088,7 @@ static inline u32 gr_gpcs_swdx_rm_pagepool_r(void)
 }
 static inline u32 gr_gpcs_swdx_rm_pagepool_total_pages_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 static inline u32 gr_gpcs_swdx_rm_pagepool_valid_true_f(void)
 {
@@ -2464,7 +2472,7 @@ static inline u32 gr_gpcs_gcc_pagepool_r(void)
 }
 static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
 {
-	return (v & 0xff) << 0;
+	return (v & 0x3ff) << 0;
 }
 static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 1ead0679..32c4a01d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -50,6 +50,30 @@
 #ifndef _hw_ltc_gp10b_h_
 #define _hw_ltc_gp10b_h_
 
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
+{
+	return 0xffffffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_r(void)
+{
+	return 0xffffffff;
+}
 static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
 {
 	return 0x0014046c;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
index f45fdc99..83e06e8e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -50,9 +50,9 @@
 #ifndef _hw_mc_gp10b_h_
 #define _hw_mc_gp10b_h_
 
-static inline u32 mc_intr_0_r(void)
+static inline u32 mc_intr_0_r(u32 i)
 {
-	return 0x00000100;
+	return 0x00000100 + i*4;
 }
 static inline u32 mc_intr_0_pfifo_pending_f(void)
 {
@@ -78,33 +78,9 @@ static inline u32 mc_intr_0_pbus_pending_f(void)
 {
 	return 0x10000000;
 }
-static inline u32 mc_intr_mask_0_r(void)
+static inline u32 mc_intr_en_0_r(u32 i)
 {
-	return 0x00000640;
-}
-static inline u32 mc_intr_mask_0_pmu_enabled_f(void)
-{
-	return 0x1000000;
-}
-static inline u32 mc_intr_en_0_r(void)
-{
-	return 0x00000140;
-}
-static inline u32 mc_intr_en_0_inta_disabled_f(void)
-{
-	return 0x0;
-}
-static inline u32 mc_intr_en_0_inta_hardware_f(void)
-{
-	return 0x1;
-}
-static inline u32 mc_intr_en_1_r(void)
-{
-	return 0x00000144;
-}
-static inline u32 mc_intr_en_1_inta_disabled_f(void)
-{
-	return 0x0;
+	return 0x00000140 + i*4;
 }
 static inline u32 mc_enable_r(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index d3fa8553..5720cde1 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -448,7 +448,7 @@ static inline u32 pbdma_syncpointb_r(u32 i)
 }
 static inline u32 pbdma_syncpointb_op_v(u32 r)
 {
-	return (r >> 0) & 0x3;
+	return (r >> 0) & 0x1;
 }
 static inline u32 pbdma_syncpointb_op_wait_v(void)
 {
@@ -464,6 +464,6 @@ static inline u32 pbdma_syncpointb_wait_switch_en_v(void)
 }
 static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
 {
-	return (r >> 8) & 0xff;
+	return (r >> 8) & 0xfff;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
index 76597f69..94da91b0 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
@@ -200,11 +200,11 @@ static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
 }
 static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
 {
-	return 0x00134124 + i*512;
+	return 0x001e0124 + i*1024;
 }
 static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
 {
-	return (v & 0x3fff) << 0;
+	return (v & 0xffff) << 0;
 }
 static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
 {
@@ -220,11 +220,11 @@ static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
 }
 static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
 {
-	return 0x00134128 + i*512;
+	return 0x001e0128 + i*1024;
 }
 static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
 {
-	return (r >> 0) & 0xfffff;
+	return (r >> 0) & 0xfffffff;
 }
 static inline u32 trim_sys_gpcpll_cfg2_r(void)
 {
-- 
cgit v1.2.2


From 1f3b9d851a0beb716596040f77b1431cc1fd8670 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Tue, 14 Oct 2014 18:16:32 -0700
Subject: gpu: nvgpu: headers for linsim CL 33759297

Change-Id: Iaafb651875481b7fa31504642df86311ec9933a5
Signed-off-by: Adeel Raza <araza@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h | 2 +-
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 161c1ce0..f6020434 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -200,7 +200,7 @@ static inline u32 gmmu_pte_read_disable_true_f(void)
 }
 static inline u32 gmmu_pte_comptagline_f(u32 v)
 {
-	return (v & 0x1ffff) << 12;
+	return (v & 0x3ffff) << 12;
 }
 static inline u32 gmmu_pte_comptagline_w(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 32c4a01d..2b20199e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -148,7 +148,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
 {
-	return (v & 0x1ffff) << 0;
+	return (v & 0x3ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 {
@@ -156,7 +156,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
 {
-	return (v & 0x1ffff) << 0;
+	return (v & 0x3ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
 {
-- 
cgit v1.2.2


From 07b7a534fa8d5e93420521fcb5e745acad386f00 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 27 Oct 2014 09:47:25 +0200
Subject: gpu: nvgpu: Synchronize gp10b headers with gm20b

Added all registers added to gk20a and gm20b to gp10b. Remove gp10b
trim registers, because they will not be accessed by CPU.

Bug 1567274

Change-Id: Ib6be34ce3d55901bd7e1f30eea8e43725719a912
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/590312
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h |  68 +++++-
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h         |  32 ++-
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h       |  20 ++
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h       |  44 ++++
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h       |   2 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         | 228 ++++++++++++--------
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h        | 250 +++++++++++++++++++---
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h         |  50 ++++-
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h      |  20 ++
 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h        |  12 ++
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h        |  62 +++++-
 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h      |  72 ++-----
 drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h       | 289 --------------------------
 13 files changed, 662 insertions(+), 487 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 6339cf5b..79890f3c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -90,13 +90,25 @@ static inline u32 ctxsw_prog_main_image_pm_o(void)
 {
 	return 0x00000028;
 }
-static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r)
+static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
 {
-	return (r >> 0) & 0x7;
+	return 0x7 << 0;
 }
-static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_v(void)
+static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
 {
-	return 0x00000000;
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
+{
+	return 0x7 << 3;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
+{
+	return 0x8;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
+{
+	return 0x0;
 }
 static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
 {
@@ -178,4 +190,52 @@ static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_strid
 {
 	return 0x00000002;
 }
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void)
+{
+	return 0x000000a0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_s(void)
+{
+	return 2;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void)
+{
+	return 0x2;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void)
+{
+	return 0x000000a4;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void)
+{
+	return 0x000000a8;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_o(void)
+{
+	return 0x0000003c;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void)
+{
+	return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
index 9dacabce..d2ecdce1 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -66,6 +66,10 @@ static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
 {
 	return 0x0;
 }
+static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void)
+{
+	return 0x1;
+}
 static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
 {
 	return (r >> 15) & 0x1;
@@ -78,6 +82,22 @@ static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
 {
 	return (r >> 16) & 0xff;
 }
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void)
+{
+	return 0x800;
+}
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_priv_mmu_phy_secure_r(void)
+{
+	return 0x00100ce4;
+}
 static inline u32 fb_mmu_invalidate_pdb_r(void)
 {
 	return 0x00100cb8;
@@ -158,9 +178,9 @@ static inline u32 fb_mmu_debug_wr_vol_true_f(void)
 {
 	return 0x4;
 }
-static inline u32 fb_mmu_debug_wr_addr_v(u32 r)
+static inline u32 fb_mmu_debug_wr_addr_f(u32 v)
 {
-	return (r >> 4) & 0xfffffff;
+	return (v & 0xfffffff) << 4;
 }
 static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
 {
@@ -178,9 +198,9 @@ static inline u32 fb_mmu_debug_rd_vol_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 fb_mmu_debug_rd_addr_v(u32 r)
+static inline u32 fb_mmu_debug_rd_addr_f(u32 v)
 {
-	return (r >> 4) & 0xfffffff;
+	return (v & 0xfffffff) << 4;
 }
 static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
 {
@@ -202,10 +222,6 @@ static inline u32 fb_mmu_vpr_info_r(void)
 {
 	return 0x00100cd0;
 }
-static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
-{
-	return (v & 0x1) << 2;
-}
 static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
 {
 	return (r >> 2) & 0x1;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 764c1b6c..b79758d2 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -206,6 +206,10 @@ static inline u32 fifo_intr_en_0_r(void)
 {
 	return 0x00002140;
 }
+static inline u32 fifo_intr_en_0_sched_error_m(void)
+{
+	return 0x1 << 8;
+}
 static inline u32 fifo_intr_en_1_r(void)
 {
 	return 0x00002528;
@@ -346,10 +350,18 @@ static inline u32 fifo_preempt_type_channel_f(void)
 {
 	return 0x0;
 }
+static inline u32 fifo_preempt_type_tsg_f(void)
+{
+	return 0x1000000;
+}
 static inline u32 fifo_preempt_chid_f(u32 v)
 {
 	return (v & 0xfff) << 0;
 }
+static inline u32 fifo_preempt_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
 static inline u32 fifo_trigger_mmu_fault_r(u32 i)
 {
 	return 0x00002a30 + i*4;
@@ -382,6 +394,10 @@ static inline u32 fifo_engine_status_id_type_chid_v(void)
 {
 	return 0x00000000;
 }
+static inline u32 fifo_engine_status_id_type_tsgid_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 fifo_engine_status_ctx_status_v(u32 r)
 {
 	return (r >> 13) & 0x7;
@@ -466,6 +482,10 @@ static inline u32 fifo_pbdma_status_id_type_chid_v(void)
 {
 	return 0x00000000;
 }
+static inline u32 fifo_pbdma_status_id_type_tsgid_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
 {
 	return (r >> 13) & 0x7;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
index 00291d30..272f7fb3 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -54,4 +54,48 @@ static inline u32 fuse_status_opt_tpc_gpc_r(u32 i)
 {
 	return 0x00021c38 + i*4;
 }
+static inline u32 fuse_ctrl_opt_tpc_gpc_r(u32 i)
+{
+	return 0x00021838 + i*4;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_r(void)
+{
+	return 0x00021944;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_r(void)
+{
+	return 0x00021948;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_yes_f(void)
+{
+	return 0x1;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void)
+{
+	return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index f6020434..161c1ce0 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -200,7 +200,7 @@ static inline u32 gmmu_pte_read_disable_true_f(void)
 }
 static inline u32 gmmu_pte_comptagline_f(u32 v)
 {
-	return (v & 0x3ffff) << 12;
+	return (v & 0x1ffff) << 12;
 }
 static inline u32 gmmu_pte_comptagline_w(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 03164957..f8607618 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -78,6 +78,26 @@ static inline u32 gr_intr_illegal_method_reset_f(void)
 {
 	return 0x10;
 }
+static inline u32 gr_intr_illegal_notify_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_intr_illegal_notify_reset_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_intr_firmware_method_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 gr_intr_firmware_method_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_intr_firmware_method_reset_f(void)
+{
+	return 0x100;
+}
 static inline u32 gr_intr_illegal_class_pending_f(void)
 {
 	return 0x20;
@@ -86,6 +106,14 @@ static inline u32 gr_intr_illegal_class_reset_f(void)
 {
 	return 0x20;
 }
+static inline u32 gr_intr_fecs_error_pending_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_intr_fecs_error_reset_f(void)
+{
+	return 0x80000;
+}
 static inline u32 gr_intr_class_error_pending_f(void)
 {
 	return 0x100000;
@@ -102,6 +130,26 @@ static inline u32 gr_intr_exception_reset_f(void)
 {
 	return 0x200000;
 }
+static inline u32 gr_fecs_intr_r(void)
+{
+	return 0x00400144;
+}
+static inline u32 gr_class_error_r(void)
+{
+	return 0x00400110;
+}
+static inline u32 gr_class_error_code_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_intr_nonstall_r(void)
+{
+	return 0x00400120;
+}
+static inline u32 gr_intr_nonstall_trap_pending_f(void)
+{
+	return 0x2;
+}
 static inline u32 gr_intr_en_r(void)
 {
 	return 0x0040013c;
@@ -198,6 +246,10 @@ static inline u32 gr_status_r(void)
 {
 	return 0x00400700;
 }
+static inline u32 gr_status_fe_method_upper_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
 static inline u32 gr_status_fe_method_lower_v(u32 r)
 {
 	return (r >> 2) & 0x1;
@@ -206,6 +258,10 @@ static inline u32 gr_status_fe_method_lower_idle_v(void)
 {
 	return 0x00000000;
 }
+static inline u32 gr_status_fe_gi_v(u32 r)
+{
+	return (r >> 21) & 0x1;
+}
 static inline u32 gr_status_mask_r(void)
 {
 	return 0x00400610;
@@ -662,6 +718,22 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
 {
 	return 0x21;
 }
+static inline u32 gr_fecs_host_int_status_r(void)
+{
+	return 0x00409c18;
+}
+static inline u32 gr_fecs_host_int_status_umimp_firmware_method_f(u32 v)
+{
+	return (v & 0x1) << 17;
+}
+static inline u32 gr_fecs_host_int_status_umimp_illegal_method_f(u32 v)
+{
+	return (v & 0x1) << 18;
+}
+static inline u32 gr_fecs_host_int_clear_r(void)
+{
+	return 0x00409c20;
+}
 static inline u32 gr_fecs_host_int_enable_r(void)
 {
 	return 0x00409c24;
@@ -1292,15 +1364,19 @@ static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
 }
 static inline u32 gr_ds_tga_constraintlogic_r(void)
 {
-	return 0xffffffff;
+	return 0x00405830;
 }
 static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
 {
-	return (v & 0x1) << -1;
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_ds_tga_constraintlogic_r(void)
+{
+	return 0x0040585c;
 }
 static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
 {
-	return (v & 0x1) << -1;
+	return (v & 0xffff) << 0;
 }
 static inline u32 gr_ds_hww_esr_r(void)
 {
@@ -1674,6 +1750,34 @@ static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
 {
 	return (r >> 0) & 0x3f;
 }
+static inline u32 gr_gpccs_rc_lane_size_r(void)
+{
+	return 0x00502910;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_s(void)
+{
+	return 24;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_m(void)
+{
+	return 0xffffff << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r)
+{
+	return (r >> 0) & 0xffffff;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_f(void)
+{
+	return 0x0;
+}
 static inline u32 gr_gpc0_zcull_fs_r(void)
 {
 	return 0x00500910;
@@ -2068,19 +2172,11 @@ static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
 }
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_f(u32 v)
 {
-	return (v & 0xffffffff) << -1;
+	return (v & 0x3fffff) << 0;
 }
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
 {
-	return 0xffffffff << -1;
-}
-static inline u32 gr_gpcs_swdx_tc_beta_cb_size_div3_f(u32 v)
-{
-	return (v & 0xffffffff) << -1;
-}
-static inline u32 gr_gpcs_swdx_tc_beta_cb_size_div3_m(void)
-{
-	return 0xffffffff << -1;
+	return 0x3fffff << 0;
 }
 static inline u32 gr_gpcs_swdx_rm_pagepool_r(void)
 {
@@ -2526,26 +2622,6 @@ static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(void)
 {
 	return 0x10000000;
 }
-static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_r(void)
-{
-	return 0x00419e00;
-}
-static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_m(void)
-{
-	return 0x1 << 7;
-}
-static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_core_enable_enable_f(void)
-{
-	return 0x80;
-}
-static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_m(void)
-{
-	return 0x1 << 15;
-}
-static inline u32 gr_gpcs_tpcs_sm_pm_ctrl_qctl_enable_enable_f(void)
-{
-	return 0x8000;
-}
 static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
 {
 	return 0x00419e44;
@@ -2670,51 +2746,51 @@ static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complet
 {
 	return 0x40;
 }
-static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
 {
-	return 0x0050450c;
+	return 0x00419d0c;
 }
-static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
 {
 	return 0x2;
 }
-static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_disabled_f(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
 {
-	return 0x0;
+	return 0x0050450c;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_r(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
 {
-	return 0x00502c94;
+	return 0x2;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f(void)
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
 {
-	return 0x10000;
+	return 0x0041ac94;
 }
-static inline u32 gr_gpc0_gpccs_gpc_exception_en_tpc_0_disabled_f(void)
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
 {
-	return 0x0;
+	return (v & 0xff) << 16;
 }
-static inline u32 gr_gpcs_gpccs_gpc_exception_r(void)
+static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
 {
-	return 0x0041ac90;
+	return 0x00502c90;
 }
-static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_v(u32 r)
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
 {
 	return (r >> 16) & 0xff;
 }
-static inline u32 gr_gpcs_gpccs_gpc_exception_tpc_0_pending_v(void)
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
 {
 	return 0x00000001;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_r(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
 {
-	return 0x00419d08;
+	return 0x00504508;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_v(u32 r)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
 {
 	return (r >> 1) & 0x1;
 }
-static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_sm_pending_v(void)
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v(void)
 {
 	return 0x00000001;
 }
@@ -2810,10 +2886,6 @@ static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
 {
 	return (v & 0x1) << 0;
 }
-static inline u32 gr_gpcs_tpcs_sm_power_throttle_r(void)
-{
-	return 0x00419ed8;
-}
 static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
 {
 	return 0x0041be08;
@@ -3078,42 +3150,6 @@ static inline u32 gr_fe_pwr_mode_req_done_v(void)
 {
 	return 0x00000000;
 }
-static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_r(void)
-{
-	return 0x00419f88;
-}
-static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_blkactivity_enable_f(u32 v)
-{
-	return (v & 0x1) << 31;
-}
-static inline u32 gr_gpcs_tpcs_sm_sfe_ba_control_blkactivity_enable_m(void)
-{
-	return 0x1 << 31;
-}
-static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_r(void)
-{
-	return 0x00419f80;
-}
-static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_blkactivity_enable_f(u32 v)
-{
-	return (v & 0x1) << 31;
-}
-static inline u32 gr_gpcs_tpcs_sm_quad_ba_control_blkactivity_enable_m(void)
-{
-	return 0x1 << 31;
-}
-static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_r(void)
-{
-	return 0x00419ccc;
-}
-static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_blkactivity_enable_f(u32 v)
-{
-	return (v & 0x1) << 31;
-}
-static inline u32 gr_gpcs_tpcs_sm_mio_ba_control_blkactivity_enable_m(void)
-{
-	return 0x1 << 31;
-}
 static inline u32 gr_gpcs_pri_mmu_ctrl_r(void)
 {
 	return 0x00418880;
@@ -3166,6 +3202,14 @@ static inline u32 gr_gpcs_pri_mmu_debug_ctrl_r(void)
 {
 	return 0x004188b0;
 }
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_debug_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 gr_gpcs_pri_mmu_debug_wr_r(void)
 {
 	return 0x004188b4;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 2b20199e..a38cfe8d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -50,30 +50,6 @@
 #ifndef _hw_ltc_gp10b_h_
 #define _hw_ltc_gp10b_h_
 
-static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_r(void)
-{
-	return 0xffffffff;
-}
-static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_r(void)
-{
-	return 0xffffffff;
-}
-static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_r(void)
-{
-	return 0xffffffff;
-}
-static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_r(void)
-{
-	return 0xffffffff;
-}
-static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
-{
-	return 0xffffffff;
-}
-static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_r(void)
-{
-	return 0xffffffff;
-}
 static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
 {
 	return 0x0014046c;
@@ -140,7 +116,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
 }
 static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
 {
-	return 0x0017e26c;
+	return 0x0014046c;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
 {
@@ -148,7 +124,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
 {
-	return (v & 0x3ffff) << 0;
+	return (v & 0x1ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 {
@@ -156,7 +132,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
 {
-	return (v & 0x3ffff) << 0;
+	return (v & 0x1ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
 {
@@ -298,8 +274,224 @@ static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
 {
 	return 0x1;
 }
-static inline u32 ltc_ltc0_ltss_intr_r(void)
+static inline u32 ltc_ltcs_ltss_intr_r(void)
 {
-	return 0x0014020c;
+	return 0x0017e20c;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 ltc_ltc0_lts0_intr_r(void)
+{
+	return 0x0014040c;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x0017e2a0;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f(void)
+{
+	return 0x300;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_v(u32 r)
+{
+	return (r >> 29) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x0017e2a4;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f(void)
+{
+	return 0x300;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f(void)
+{
+	return 0x10000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_v(u32 r)
+{
+	return (r >> 29) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x001402a0;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x001402a4;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x001422a0;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x001422a4;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
index 83e06e8e..ba0af497 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -50,38 +50,62 @@
 #ifndef _hw_mc_gp10b_h_
 #define _hw_mc_gp10b_h_
 
-static inline u32 mc_intr_0_r(u32 i)
+static inline u32 mc_boot_0_r(void)
 {
-	return 0x00000100 + i*4;
+	return 0x00000000;
 }
-static inline u32 mc_intr_0_pfifo_pending_f(void)
+static inline u32 mc_boot_0_architecture_v(u32 r)
 {
-	return 0x100;
+	return (r >> 24) & 0x1f;
 }
-static inline u32 mc_intr_0_pgraph_pending_f(void)
+static inline u32 mc_boot_0_implementation_v(u32 r)
 {
-	return 0x1000;
+	return (r >> 20) & 0xf;
+}
+static inline u32 mc_boot_0_major_revision_v(u32 r)
+{
+	return (r >> 4) & 0xf;
+}
+static inline u32 mc_boot_0_minor_revision_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 mc_intr_r(u32 i)
+{
+	return 0x00000100 + i*4;
 }
-static inline u32 mc_intr_0_pmu_pending_f(void)
+static inline u32 mc_intr_pfifo_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 mc_intr_pmu_pending_f(void)
 {
 	return 0x1000000;
 }
-static inline u32 mc_intr_0_ltc_pending_f(void)
+static inline u32 mc_intr_ltc_pending_f(void)
 {
 	return 0x2000000;
 }
-static inline u32 mc_intr_0_priv_ring_pending_f(void)
+static inline u32 mc_intr_priv_ring_pending_f(void)
 {
 	return 0x40000000;
 }
-static inline u32 mc_intr_0_pbus_pending_f(void)
+static inline u32 mc_intr_pbus_pending_f(void)
 {
 	return 0x10000000;
 }
-static inline u32 mc_intr_en_0_r(u32 i)
+static inline u32 mc_intr_en_r(u32 i)
 {
 	return 0x00000140 + i*4;
 }
+static inline u32 mc_intr_en_set_r(u32 i)
+{
+	return 0x00000160 + i*4;
+}
+static inline u32 mc_intr_en_clear_r(u32 i)
+{
+	return 0x00000180 + i*4;
+}
 static inline u32 mc_enable_r(void)
 {
 	return 0x00000200;
@@ -162,6 +186,10 @@ static inline u32 mc_enable_hub_enabled_f(void)
 {
 	return 0x20000000;
 }
+static inline u32 mc_intr_ltc_r(void)
+{
+	return 0x000001c0;
+}
 static inline u32 mc_enable_pb_r(void)
 {
 	return 0x00000204;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index 5720cde1..91429b47 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -174,6 +174,10 @@ static inline u32 pbdma_pb_header_type_inc_f(void)
 {
 	return 0x20000000;
 }
+static inline u32 pbdma_hdr_shadow_r(u32 i)
+{
+	return 0x00040118 + i*8192;
+}
 static inline u32 pbdma_subdevice_r(u32 i)
 {
 	return 0x00040094 + i*8192;
@@ -466,4 +470,20 @@ static inline u32 pbdma_syncpointb_syncpt_index_v(u32 r)
 {
 	return (r >> 8) & 0xfff;
 }
+static inline u32 pbdma_runlist_timeslice_r(u32 i)
+{
+	return 0x000400f8 + i*8192;
+}
+static inline u32 pbdma_runlist_timeslice_timeout_128_f(void)
+{
+	return 0x80;
+}
+static inline u32 pbdma_runlist_timeslice_timescale_3_f(void)
+{
+	return 0x3000;
+}
+static inline u32 pbdma_runlist_timeslice_enable_true_f(void)
+{
+	return 0x10000000;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
index d76095ac..0de70b96 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
@@ -378,6 +378,18 @@ static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
 {
 	return (v & 0xffffffff) << 0;
 }
+static inline u32 pwr_falcon_dmactl_r(void)
+{
+	return 0x0010a10c;
+}
+static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
 static inline u32 pwr_falcon_hwcfg_r(void)
 {
 	return 0x0010a108;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index ef53882b..509031e5 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -78,6 +78,26 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
 {
 	return 0x4;
 }
+static inline u32 ram_in_big_page_size_f(u32 v)
+{
+	return (v & 0x1) << 11;
+}
+static inline u32 ram_in_big_page_size_m(void)
+{
+	return 0x1 << 11;
+}
+static inline u32 ram_in_big_page_size_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_big_page_size_128kb_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_big_page_size_64kb_f(void)
+{
+	return 0x800;
+}
 static inline u32 ram_in_page_dir_base_lo_f(u32 v)
 {
 	return (v & 0xfffff) << 12;
@@ -318,7 +338,7 @@ static inline u32 ram_fc_chid_id_w(void)
 {
 	return 0;
 }
-static inline u32 ram_fc_pb_timeslice_w(void)
+static inline u32 ram_fc_runlist_timeslice_w(void)
 {
 	return 62;
 }
@@ -382,4 +402,44 @@ static inline u32 ram_rl_entry_size_v(void)
 {
 	return 0x00000008;
 }
+static inline u32 ram_rl_entry_chid_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_rl_entry_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_rl_entry_type_f(u32 v)
+{
+	return (v & 0x1) << 13;
+}
+static inline u32 ram_rl_entry_type_chid_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_rl_entry_type_tsg_f(void)
+{
+	return 0x2000;
+}
+static inline u32 ram_rl_entry_timeslice_scale_f(u32 v)
+{
+	return (v & 0xf) << 14;
+}
+static inline u32 ram_rl_entry_timeslice_scale_3_f(void)
+{
+	return 0xc000;
+}
+static inline u32 ram_rl_entry_timeslice_timeout_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+static inline u32 ram_rl_entry_timeslice_timeout_128_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 ram_rl_entry_tsg_length_f(u32 v)
+{
+	return (v & 0x3f) << 26;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
index 16bbb3ca..25eecb70 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
@@ -70,58 +70,6 @@ static inline u32 therm_weight_1_r(void)
 {
 	return 0x00020024;
 }
-static inline u32 therm_peakpower_config1_r(u32 i)
-{
-	return 0x00020154 + i*4;
-}
-static inline u32 therm_peakpower_config1_window_period_2m_v(void)
-{
-	return 0x00000015;
-}
-static inline u32 therm_peakpower_config1_window_period_2m_f(void)
-{
-	return 0x15;
-}
-static inline u32 therm_peakpower_config1_window_en_enabled_f(void)
-{
-	return 0x80000000;
-}
-static inline u32 therm_peakpower_config1_r(u32 i)
-{
-	return 0x000202e8 + i*4;
-}
-static inline u32 therm_peakpower_config1_ba_sum_shift_s(void)
-{
-	return 5;
-}
-static inline u32 therm_peakpower_config1_ba_sum_shift_f(u32 v)
-{
-	return (v & 0x1f) << 8;
-}
-static inline u32 therm_peakpower_config1_ba_sum_shift_m(void)
-{
-	return 0x1f << 8;
-}
-static inline u32 therm_peakpower_config1_ba_sum_shift_v(u32 r)
-{
-	return (r >> 8) & 0x1f;
-}
-static inline u32 therm_peakpower_config2_r(u32 i)
-{
-	return 0x00020170 + i*4;
-}
-static inline u32 therm_peakpower_config4_r(u32 i)
-{
-	return 0x000201c0 + i*4;
-}
-static inline u32 therm_peakpower_config8_r(u32 i)
-{
-	return 0x000202e8 + i*4;
-}
-static inline u32 therm_peakpower_config9_r(u32 i)
-{
-	return 0x000202f4 + i*4;
-}
 static inline u32 therm_config1_r(void)
 {
 	return 0x00020050;
@@ -214,4 +162,24 @@ static inline u32 therm_hubmmu_idle_filter_value_m(void)
 {
 	return 0xffffffff << 0;
 }
+static inline u32 therm_clk_slowdown_r(u32 i)
+{
+	return 0x00020160 + i*4;
+}
+static inline u32 therm_clk_slowdown_idle_factor_f(u32 v)
+{
+	return (v & 0x3f) << 16;
+}
+static inline u32 therm_clk_slowdown_idle_factor_m(void)
+{
+	return 0x3f << 16;
+}
+static inline u32 therm_clk_slowdown_idle_factor_v(u32 r)
+{
+	return (r >> 16) & 0x3f;
+}
+static inline u32 therm_clk_slowdown_idle_factor_disabled_f(void)
+{
+	return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
deleted file mode 100644
index 94da91b0..00000000
--- a/drivers/gpu/nvgpu/gp10b/hw_trim_gp10b.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Function naming determines intended use:
- *
- *     <x>_r(void) : Returns the offset for register <x>.
- *
- *     <x>_o(void) : Returns the offset for element <x>.
- *
- *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
- *
- *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
- *
- *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
- *         and masked to place it at field <y> of register <x>.  This value
- *         can be |'d with others to produce a full register value for
- *         register <x>.
- *
- *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
- *         value can be ~'d and then &'d to clear the value of field <y> for
- *         register <x>.
- *
- *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
- *         to place it at field <y> of register <x>.  This value can be |'d
- *         with others to produce a full register value for <x>.
- *
- *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
- *         <x> value 'r' after being shifted to place its LSB at bit 0.
- *         This value is suitable for direct comparison with other unshifted
- *         values appropriate for use in field <y> of register <x>.
- *
- *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
- *         field <y> of register <x>.  This value is suitable for direct
- *         comparison with unshifted values appropriate for use in field <y>
- *         of register <x>.
- */
-#ifndef _hw_trim_gp10b_h_
-#define _hw_trim_gp10b_h_
-
-static inline u32 trim_sys_gpcpll_cfg_r(void)
-{
-	return 0x00137000;
-}
-static inline u32 trim_sys_gpcpll_cfg_enable_m(void)
-{
-	return 0x1 << 0;
-}
-static inline u32 trim_sys_gpcpll_cfg_enable_v(u32 r)
-{
-	return (r >> 0) & 0x1;
-}
-static inline u32 trim_sys_gpcpll_cfg_enable_no_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_gpcpll_cfg_enable_yes_f(void)
-{
-	return 0x1;
-}
-static inline u32 trim_sys_gpcpll_cfg_iddq_m(void)
-{
-	return 0x1 << 1;
-}
-static inline u32 trim_sys_gpcpll_cfg_iddq_v(u32 r)
-{
-	return (r >> 1) & 0x1;
-}
-static inline u32 trim_sys_gpcpll_cfg_iddq_power_on_v(void)
-{
-	return 0x00000000;
-}
-static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_m(void)
-{
-	return 0x1 << 4;
-}
-static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_gpcpll_cfg_enb_lckdet_power_off_f(void)
-{
-	return 0x10;
-}
-static inline u32 trim_sys_gpcpll_cfg_pll_lock_v(u32 r)
-{
-	return (r >> 17) & 0x1;
-}
-static inline u32 trim_sys_gpcpll_cfg_pll_lock_true_f(void)
-{
-	return 0x20000;
-}
-static inline u32 trim_sys_gpcpll_coeff_r(void)
-{
-	return 0x00137004;
-}
-static inline u32 trim_sys_gpcpll_coeff_mdiv_f(u32 v)
-{
-	return (v & 0xff) << 0;
-}
-static inline u32 trim_sys_gpcpll_coeff_mdiv_v(u32 r)
-{
-	return (r >> 0) & 0xff;
-}
-static inline u32 trim_sys_gpcpll_coeff_ndiv_f(u32 v)
-{
-	return (v & 0xff) << 8;
-}
-static inline u32 trim_sys_gpcpll_coeff_ndiv_m(void)
-{
-	return 0xff << 8;
-}
-static inline u32 trim_sys_gpcpll_coeff_ndiv_v(u32 r)
-{
-	return (r >> 8) & 0xff;
-}
-static inline u32 trim_sys_gpcpll_coeff_pldiv_f(u32 v)
-{
-	return (v & 0x3f) << 16;
-}
-static inline u32 trim_sys_gpcpll_coeff_pldiv_v(u32 r)
-{
-	return (r >> 16) & 0x3f;
-}
-static inline u32 trim_sys_sel_vco_r(void)
-{
-	return 0x00137100;
-}
-static inline u32 trim_sys_sel_vco_gpc2clk_out_m(void)
-{
-	return 0x1 << 0;
-}
-static inline u32 trim_sys_sel_vco_gpc2clk_out_init_v(void)
-{
-	return 0x00000000;
-}
-static inline u32 trim_sys_sel_vco_gpc2clk_out_init_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_sel_vco_gpc2clk_out_bypass_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_sel_vco_gpc2clk_out_vco_f(void)
-{
-	return 0x1;
-}
-static inline u32 trim_sys_gpc2clk_out_r(void)
-{
-	return 0x00137250;
-}
-static inline u32 trim_sys_gpc2clk_out_bypdiv_s(void)
-{
-	return 6;
-}
-static inline u32 trim_sys_gpc2clk_out_bypdiv_f(u32 v)
-{
-	return (v & 0x3f) << 0;
-}
-static inline u32 trim_sys_gpc2clk_out_bypdiv_m(void)
-{
-	return 0x3f << 0;
-}
-static inline u32 trim_sys_gpc2clk_out_bypdiv_v(u32 r)
-{
-	return (r >> 0) & 0x3f;
-}
-static inline u32 trim_sys_gpc2clk_out_bypdiv_by31_f(void)
-{
-	return 0x3c;
-}
-static inline u32 trim_sys_gpc2clk_out_vcodiv_m(void)
-{
-	return 0x3f << 8;
-}
-static inline u32 trim_sys_gpc2clk_out_vcodiv_by1_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_gpc2clk_out_sdiv14_m(void)
-{
-	return 0x1 << 31;
-}
-static inline u32 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f(void)
-{
-	return 0x80000000;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_r(u32 i)
-{
-	return 0x001e0124 + i*1024;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
-{
-	return (v & 0xffff) << 0;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
-{
-	return 0x10000;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
-{
-	return 0x100000;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
-{
-	return 0x1000000;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_r(u32 i)
-{
-	return 0x001e0128 + i*1024;
-}
-static inline u32 trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(u32 r)
-{
-	return (r >> 0) & 0xfffffff;
-}
-static inline u32 trim_sys_gpcpll_cfg2_r(void)
-{
-	return 0x0013700c;
-}
-static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_f(u32 v)
-{
-	return (v & 0xff) << 24;
-}
-static inline u32 trim_sys_gpcpll_cfg2_pll_stepa_m(void)
-{
-	return 0xff << 24;
-}
-static inline u32 trim_sys_gpcpll_cfg3_r(void)
-{
-	return 0x00137018;
-}
-static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_f(u32 v)
-{
-	return (v & 0xff) << 16;
-}
-static inline u32 trim_sys_gpcpll_cfg3_pll_stepb_m(void)
-{
-	return 0xff << 16;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_r(void)
-{
-	return 0x0013701c;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(void)
-{
-	return 0x1 << 22;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f(void)
-{
-	return 0x400000;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(void)
-{
-	return 0x1 << 31;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f(void)
-{
-	return 0x80000000;
-}
-static inline u32 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f(void)
-{
-	return 0x0;
-}
-static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r(void)
-{
-	return 0x001328a0;
-}
-static inline u32 trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(u32 r)
-{
-	return (r >> 24) & 0x1;
-}
-#endif
-- 
cgit v1.2.2


From 16c511220ecda4a0220976f649fddabcfbee86e0 Mon Sep 17 00:00:00 2001
From: Kenneth Adams <kadams@nvidia.com>
Date: Wed, 1 Oct 2014 08:27:17 -0700
Subject: gpu: nvgpu: t18x, gp10b framework

This change adds gp10b to the nvgpu build as
well as enabling CMA for buffer allocation.

Change-Id: Id3d45ad6ffdab14120395952e68b285dd7364c76
Signed-off-by: Ken Adams <kadams@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/553324
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile             | 10 ++++
 drivers/gpu/nvgpu/gp10b/Makefile       | 13 +++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c     | 54 +++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h     | 31 +++++++++++
 drivers/gpu/nvgpu/gp10b/gr_ops_gp10b.h | 28 ++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c    | 98 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.h    | 21 ++++++++
 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h   | 30 +++++++++++
 8 files changed, 285 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/Makefile
 create mode 100644 drivers/gpu/nvgpu/gp10b/Makefile
 create mode 100644 drivers/gpu/nvgpu/gp10b/gr_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/gr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/gr_ops_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hal_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/hal_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
new file mode 100644
index 00000000..c583f6d5
--- /dev/null
+++ b/drivers/gpu/nvgpu/Makefile
@@ -0,0 +1,10 @@
+GCOV_PROFILE := y
+
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/include/linux
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
+ccflags-$(CONFIG_GK20A) += -Wno-multichar
+ccflags-$(CONFIG_GK20A) += -Werror
+
+obj-$(CONFIG_GK20A)	+= gp10b/
+
diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
new file mode 100644
index 00000000..64cd4179
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -0,0 +1,13 @@
+GCOV_PROFILE := y
+
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
+
+ccflags-$(CONFIG_GK20A) += -Wno-multichar
+
+obj-$(CONFIG_GK20A)  += \
+	gr_gp10b.o  \
+	hal_gp10b.o
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
new file mode 100644
index 00000000..f4a63fad
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -0,0 +1,54 @@
+/*
+ * GP10B GPU GR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+
+#include "gk20a/gr_gk20a.h"
+
+#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
+#include "gp10b/gr_gp10b.h"
+
+
+bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
+{
+	bool valid = false;
+
+	switch (class_num) {
+	case PASCAL_COMPUTE_A:
+	case PASCAL_A:
+	case PASCAL_DMA_COPY_A:
+		valid = true;
+		break;
+
+	case MAXWELL_COMPUTE_B:
+	case MAXWELL_B:
+	case FERMI_TWOD_A:
+	case KEPLER_DMA_COPY_A:
+	case MAXWELL_DMA_COPY_A:
+		valid = true;
+		break;
+
+	default:
+		break;
+	}
+	gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
+	return valid;
+}
+
+void gp10b_init_gr(struct gpu_ops *gops)
+{
+	gm20b_init_gr(gops);
+	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
new file mode 100644
index 00000000..58616deb
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -0,0 +1,31 @@
+/*
+ * GM20B GPU GR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_GR_GP10B_H_
+#define _NVGPU_GR_GP10B_H_
+
+struct gk20a;
+
+enum {
+	PASCAL_CHANNEL_GPFIFO_A  = 0xC06F,
+	PASCAL_A                 = 0xC097,
+	PASCAL_COMPUTE_A         = 0xC0C0,
+	PASCAL_DMA_COPY_A        = 0xC0B5,
+};
+
+void gp10b_init_gr(struct gpu_ops *ops);
+
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/gr_ops_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_ops_gp10b.h
new file mode 100644
index 00000000..c3277017
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_ops_gp10b.h
@@ -0,0 +1,28 @@
+/*
+ * GP10B GPU graphics ops
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GR_OPS_GP10B_H_
+#define _GR_OPS_GP10B_H_
+
+#include "gr_ops.h"
+
+#define __gr_gp10b_op(X)            gr_gp10b_ ## X
+#define __set_gr_gp10b_op(X)  . X = gr_gp10b_ ## X
+
+bool __gr_gp10b_op(is_valid_class)(struct gk20a *, u32);
+int  __gr_gp10b_op(alloc_obj_ctx)(struct channel_gk20a  *, struct nvgpu_alloc_obj_ctx_args *);
+
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
new file mode 100644
index 00000000..61bae5c7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -0,0 +1,98 @@
+/*
+ * GP10B Tegra HAL interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/printk.h>
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+
+#include "gp10b/gr_gp10b.h"
+
+#include "gm20b/ltc_gm20b.h"
+#include "gm20b/fb_gm20b.h"
+#include "gm20b/gm20b_gating_reglist.h"
+#include "gm20b/fifo_gm20b.h"
+#include "gm20b/gr_ctx_gm20b.h"
+#include "gm20b/mm_gm20b.h"
+#include "gm20b/pmu_gm20b.h"
+#include "gm20b/clk_gm20b.h"
+
+struct gpu_ops gp10b_ops = {
+	.clock_gating = {
+		.slcg_bus_load_gating_prod =
+			gm20b_slcg_bus_load_gating_prod,
+		.slcg_ce2_load_gating_prod =
+			gm20b_slcg_ce2_load_gating_prod,
+		.slcg_chiplet_load_gating_prod =
+			gm20b_slcg_chiplet_load_gating_prod,
+		.slcg_ctxsw_firmware_load_gating_prod =
+			gm20b_slcg_ctxsw_firmware_load_gating_prod,
+		.slcg_fb_load_gating_prod =
+			gm20b_slcg_fb_load_gating_prod,
+		.slcg_fifo_load_gating_prod =
+			gm20b_slcg_fifo_load_gating_prod,
+		.slcg_gr_load_gating_prod =
+			gr_gm20b_slcg_gr_load_gating_prod,
+		.slcg_ltc_load_gating_prod =
+			ltc_gm20b_slcg_ltc_load_gating_prod,
+		.slcg_perf_load_gating_prod =
+			gm20b_slcg_perf_load_gating_prod,
+		.slcg_priring_load_gating_prod =
+			gm20b_slcg_priring_load_gating_prod,
+		.slcg_pmu_load_gating_prod =
+			gm20b_slcg_pmu_load_gating_prod,
+		.slcg_therm_load_gating_prod =
+			gm20b_slcg_therm_load_gating_prod,
+		.slcg_xbar_load_gating_prod =
+			gm20b_slcg_xbar_load_gating_prod,
+		.blcg_bus_load_gating_prod =
+			gm20b_blcg_bus_load_gating_prod,
+		.blcg_ctxsw_firmware_load_gating_prod =
+			gm20b_blcg_ctxsw_firmware_load_gating_prod,
+		.blcg_fb_load_gating_prod =
+			gm20b_blcg_fb_load_gating_prod,
+		.blcg_fifo_load_gating_prod =
+			gm20b_blcg_fifo_load_gating_prod,
+		.blcg_gr_load_gating_prod =
+			gm20b_blcg_gr_load_gating_prod,
+		.blcg_ltc_load_gating_prod =
+			gm20b_blcg_ltc_load_gating_prod,
+		.blcg_pwr_csb_load_gating_prod =
+			gm20b_blcg_pwr_csb_load_gating_prod,
+		.blcg_pmu_load_gating_prod =
+			gm20b_blcg_pmu_load_gating_prod,
+		.pg_gr_load_gating_prod =
+			gr_gm20b_pg_gr_load_gating_prod,
+	}
+};
+
+int gp10b_init_hal(struct gpu_ops *gops)
+{
+	*gops = gp10b_ops;
+	gm20b_init_ltc(gops);
+	gp10b_init_gr(gops);
+	gm20b_init_ltc(gops);
+	gm20b_init_fb(gops);
+	gm20b_init_fifo(gops);
+	gm20b_init_gr_ctx(gops);
+	gm20b_init_mm(gops);
+	gm20b_init_pmu_ops(gops);
+	gm20b_init_clk_ops(gops);
+	gops->name = "gp10b";
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.h b/drivers/gpu/nvgpu/gp10b/hal_gp10b.h
new file mode 100644
index 00000000..78615ed1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * GP10B Tegra HAL interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_HAL_GP10B_H
+#define _NVGPU_HAL_GP10B_H
+struct gpu_ops;
+
+int gp10b_init_hal(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
new file mode 100644
index 00000000..fe027fb0
--- /dev/null
+++ b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
@@ -0,0 +1,30 @@
+/*
+ * NVIDIA GPU ID functions, definitions.
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVGPU_GPUID_T18X_H_
+#define _NVGPU_GPUID_T18X_H_
+
+#define NVGPU_GPUID_GP10B \
+	GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP10B)
+
+#define NVGPU_COMPAT_TEGRA_GP10B "nvidia,tegra186-gp10b"
+#define NVGPU_COMPAT_GENERIC_GP10B "nvidia,generic-gp10b"
+
+#define TEGRA_18x_GPUID NVGPU_GPUID_GP10B
+#define TEGRA_18x_GPUID_HAL gp10b_init_hal
+#define TEGRA_18x_GPU_COMPAT_TEGRA NVGPU_COMPAT_TEGRA_GP10B
+#define TEGRA_18x_GPU_COMPAT_GENERIC NVGPU_COMPAT_GENERIC_GP10B
+struct gpu_ops;
+extern int gp10b_init_hal(struct gpu_ops *);
+#endif
-- 
cgit v1.2.2


From 0b50f2a2020c81f00999a8f06a67dde4c214821f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 27 Oct 2014 15:05:45 +0200
Subject: gpu: nvgpu: Implement gp10b intr processing

Bug 1567274

Change-Id: I2a6cef954b56d1f97208d29584e0748ec1c5e29d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/591628
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/Makefile      |   1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c   |   2 +
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h |   4 +
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c    | 135 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/mc_gp10b.h    |  24 ++++++
 5 files changed, 166 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp10b/mc_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/mc_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 64cd4179..722cc550 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -10,4 +10,5 @@ ccflags-$(CONFIG_GK20A) += -Wno-multichar
 
 obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
+	mc_gp10b.o  \
 	hal_gp10b.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 61bae5c7..235254c8 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -21,6 +21,7 @@
 #include "gk20a/gk20a.h"
 
 #include "gp10b/gr_gp10b.h"
+#include "gp10b/mc_gp10b.h"
 
 #include "gm20b/ltc_gm20b.h"
 #include "gm20b/fb_gm20b.h"
@@ -83,6 +84,7 @@ struct gpu_ops gp10b_ops = {
 int gp10b_init_hal(struct gpu_ops *gops)
 {
 	*gops = gp10b_ops;
+	gp10b_init_mc(gops);
 	gm20b_init_ltc(gops);
 	gp10b_init_gr(gops);
 	gm20b_init_ltc(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
index ba0af497..21c592da 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -78,6 +78,10 @@ static inline u32 mc_intr_pfifo_pending_f(void)
 {
 	return 0x100;
 }
+static inline u32 mc_intr_pgraph_pending_f(void)
+{
+	return 0x1000;
+}
 static inline u32 mc_intr_pmu_pending_f(void)
 {
 	return 0x1000000;
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
new file mode 100644
index 00000000..cdafaf56
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -0,0 +1,135 @@
+/*
+ * GP20B master
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "mc_gp10b.h"
+#include "hw_mc_gp10b.h"
+
+void mc_gp10b_intr_enable(struct gk20a *g)
+{
+	if (!tegra_platform_is_linsim()) {
+		gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
+		gk20a_writel(g, mc_intr_en_set_r(0),
+			     mc_intr_pfifo_pending_f()
+			     | mc_intr_pgraph_pending_f());
+		gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
+		gk20a_writel(g, mc_intr_en_set_r(1),
+			     mc_intr_pfifo_pending_f()
+			     | mc_intr_pgraph_pending_f()
+			     | mc_intr_priv_ring_pending_f()
+			     | mc_intr_ltc_pending_f()
+			     | mc_intr_pbus_pending_f());
+	}
+}
+
+irqreturn_t mc_gp10b_isr_stall(struct gk20a *g)
+{
+	u32 mc_intr_0;
+
+	if (!g->power_on)
+		return IRQ_NONE;
+
+	/* not from gpu when sharing irq with others */
+	mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
+	if (unlikely(!mc_intr_0))
+		return IRQ_NONE;
+
+	gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
+
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
+{
+	u32 mc_intr_1;
+
+	if (!g->power_on)
+		return IRQ_NONE;
+
+	/* not from gpu when sharing irq with others */
+	mc_intr_1 = gk20a_readl(g, mc_intr_r(1));
+	if (unlikely(!mc_intr_1))
+		return IRQ_NONE;
+
+	gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
+
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
+{
+	u32 mc_intr_0;
+
+	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
+
+	mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
+
+	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
+
+	if (mc_intr_0 & mc_intr_pgraph_pending_f())
+		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
+	if (mc_intr_0 & mc_intr_pfifo_pending_f())
+		gk20a_fifo_isr(g);
+	if (mc_intr_0 & mc_intr_pmu_pending_f())
+		gk20a_pmu_isr(g);
+	if (mc_intr_0 & mc_intr_priv_ring_pending_f())
+		gk20a_priv_ring_isr(g);
+	if (mc_intr_0 & mc_intr_ltc_pending_f())
+		g->ops.ltc.isr(g);
+	if (mc_intr_0 & mc_intr_pbus_pending_f())
+		gk20a_pbus_isr(g);
+
+	gk20a_writel(g, mc_intr_en_set_r(0),
+		     mc_intr_pfifo_pending_f()
+		     | mc_intr_pgraph_pending_f());
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
+{
+	u32 mc_intr_1;
+
+	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
+
+	mc_intr_1 = gk20a_readl(g, mc_intr_r(1));
+
+	gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
+
+	if (mc_intr_1 & mc_intr_pfifo_pending_f())
+		gk20a_fifo_nonstall_isr(g);
+	if (mc_intr_1 & mc_intr_pgraph_pending_f())
+		gk20a_gr_nonstall_isr(g);
+
+	gk20a_writel(g, mc_intr_en_set_r(1),
+		     mc_intr_pfifo_pending_f()
+		     | mc_intr_pgraph_pending_f()
+		     | mc_intr_priv_ring_pending_f()
+		     | mc_intr_ltc_pending_f()
+		     | mc_intr_pbus_pending_f());
+
+	return IRQ_HANDLED;
+}
+
+void gp10b_init_mc(struct gpu_ops *gops)
+{
+	gops->mc.intr_enable = mc_gp10b_intr_enable;
+	gops->mc.isr_stall = mc_gp10b_isr_stall;
+	gops->mc.isr_nonstall = mc_gp10b_isr_nonstall;
+	gops->mc.isr_thread_stall = mc_gp10b_intr_thread_stall;
+	gops->mc.isr_thread_nonstall = mc_gp10b_intr_thread_nonstall;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/mc_gp10b.h
new file mode 100644
index 00000000..f274ce05
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef MC_GP20B_H
+#define MC_GP20B_H
+struct gk20a;
+
+void gp10b_init_mc(struct gpu_ops *gops);
+void mc_gp10b_intr_enable(struct gk20a *g);
+irqreturn_t mc_gp10b_isr_stall(struct gk20a *g);
+irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g);
+irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g);
+irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g);
+#endif
-- 
cgit v1.2.2


From 1e4861a347eb4ae602ff494596bacf01a6ddd4cc Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 31 Oct 2014 12:12:25 +0200
Subject: gpu: nvgpu: gp10b specific CB callbacks

Bug 1570662

Change-Id: Icb7e90b1216acfd19bb3027dc9e9844eb08c99d9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/592101
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 115 +++++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h |   8 ++-
 2 files changed, 120 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index f4a63fad..b7a52be0 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,7 +19,8 @@
 
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
-
+#include "hw_gr_gp10b.h"
+#include "hw_proj_gp10b.h"
 
 bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -47,8 +48,120 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
+int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
+			struct channel_gk20a *c, bool patch)
+{
+	struct gr_gk20a *gr = &g->gr;
+	struct channel_ctx_gk20a *ch_ctx = NULL;
+	u32 attrib_offset_in_chunk = 0;
+	u32 alpha_offset_in_chunk = 0;
+	u32 pd_ab_max_output;
+	u32 gpc_index, ppc_index;
+	u32 temp;
+	u32 cbm_cfg_size1, cbm_cfg_size2;
+
+	gk20a_dbg_fn("");
+
+	if (patch) {
+		int err;
+		ch_ctx = &c->ch_ctx;
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		if (err)
+			return err;
+	}
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
+		gr->attrib_cb_default_size, patch);
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(),
+		gr->alpha_cb_default_size, patch);
+
+	pd_ab_max_output = (gr->alpha_cb_default_size *
+		gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
+		gr_pd_ab_dist_cfg1_max_output_granularity_v();
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
+		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+		gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
+
+	alpha_offset_in_chunk = attrib_offset_in_chunk +
+		gr->tpc_count * gr->attrib_cb_size;
+
+	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		temp = proj_gpc_stride_v() * gpc_index;
+		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+		     ppc_index++) {
+			cbm_cfg_size1 = gr->attrib_cb_default_size *
+				gr->pes_tpc_count[ppc_index][gpc_index];
+			cbm_cfg_size2 = gr->alpha_cb_default_size *
+				gr->pes_tpc_count[ppc_index][gpc_index];
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				cbm_cfg_size1, patch);
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				attrib_offset_in_chunk, patch);
+
+			attrib_offset_in_chunk += gr->attrib_cb_size *
+				gr->pes_tpc_count[ppc_index][gpc_index];
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				cbm_cfg_size2, patch);
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				alpha_offset_in_chunk, patch);
+
+			alpha_offset_in_chunk += gr->alpha_cb_size *
+				gr->pes_tpc_count[ppc_index][gpc_index];
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				gr->alpha_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index],
+				patch);
+
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index),
+				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1),
+				patch);
+		}
+	}
+
+	if (patch)
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+
+	return 0;
+}
+
+void gr_gp10b_commit_global_pagepool(struct gk20a *g,
+					    struct channel_ctx_gk20a *ch_ctx,
+					    u64 addr, u32 size, bool patch)
+{
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(),
+		gr_scc_pagepool_base_addr_39_8_f(addr), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(),
+		gr_scc_pagepool_total_pages_f(size) |
+		gr_scc_pagepool_valid_true_f(), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(),
+		gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(),
+		gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
 	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
+	gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
+	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f8607618..9b681104 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1362,7 +1362,7 @@ static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
 {
 	return 0x4;
 }
-static inline u32 gr_ds_tga_constraintlogic_r(void)
+static inline u32 gr_ds_tga_constraintlogic_beta_r(void)
 {
 	return 0x00405830;
 }
@@ -1370,7 +1370,7 @@ static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
 {
 	return (v & 0x3fffff) << 0;
 }
-static inline u32 gr_ds_tga_constraintlogic_r(void)
+static inline u32 gr_ds_tga_constraintlogic_alpha_r(void)
 {
 	return 0x0040585c;
 }
@@ -1926,6 +1926,10 @@ static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_offset_r(void)
 {
 	return 0x005030f8;
 }
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r(void)
+{
+	return 0x005030f0;
+}
 static inline u32 gr_gpccs_falcon_addr_r(void)
 {
 	return 0x0041a0ac;
-- 
cgit v1.2.2


From 23a4456260f163881b54b89fc14ec14a2b0d1f35 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 3 Nov 2014 10:37:29 +0200
Subject: gpu: nvgpu: gp10b: Add SM debug registers

Add SM debug registers to gp10b, and regenerate headers.

Bug 1567274

Change-Id: Ifcfa65a6fbf16e89023caa5aaf4ae3a7846df749
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/592646
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h |  2 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h   | 64 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h  |  4 +--
 3 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 161c1ce0..f6020434 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -200,7 +200,7 @@ static inline u32 gmmu_pte_read_disable_true_f(void)
 }
 static inline u32 gmmu_pte_comptagline_f(u32 v)
 {
-	return (v & 0x1ffff) << 12;
+	return (v & 0x3ffff) << 12;
 }
 static inline u32 gmmu_pte_comptagline_w(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 9b681104..f314c75c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2814,6 +2814,14 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
+{
+	return 0x40000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
 {
 	return 0x0050460c;
@@ -2826,6 +2834,22 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_r(void)
+{
+	return 0x00419e50;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+	return 0x40;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
 {
 	return 0x00504650;
@@ -3226,4 +3250,44 @@ static inline u32 gr_gpcs_mmu_num_active_ltcs_r(void)
 {
 	return 0x004188ac;
 }
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void)
+{
+	return 0x00419e10;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_on_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
+{
+	return 0x40000000;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index a38cfe8d..68f5a128 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -124,7 +124,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
 {
-	return (v & 0x1ffff) << 0;
+	return (v & 0x3ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 {
@@ -132,7 +132,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
 {
-	return (v & 0x1ffff) << 0;
+	return (v & 0x3ffff) << 0;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
 {
-- 
cgit v1.2.2


From 0f4da5e11825ae26f86efcd06dede8dfb80e73fd Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 31 Oct 2014 11:59:39 +0200
Subject: gpu: nvgpu: Add own platform data to enable host1x

Add gp10b platform data to enable sync point support.

Bug 1572701

Change-Id: Iaf03ecb8fb6b8bf4bb824e2a012c80dfe3f4fcae
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/592099
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/Makefile               |  2 +
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 99 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h           |  1 +
 3 files changed, 102 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 722cc550..193efee0 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
 	mc_gp10b.o  \
 	hal_gp10b.o
+
+obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
new file mode 100644
index 00000000..7a25a3fc
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -0,0 +1,99 @@
+/*
+ * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
+ *
+ * GK20A Tegra Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/nvhost.h>
+#include <linux/debugfs.h>
+#include <linux/tegra-powergate.h>
+#include <linux/platform_data/tegra_edp.h>
+#include <uapi/linux/nvgpu.h>
+#include <linux/dma-buf.h>
+#include <linux/nvmap.h>
+#include <linux/tegra_pm_domains.h>
+#include "gk20a/platform_gk20a.h"
+#include "gk20a/gk20a.h"
+
+static int gp10b_tegra_probe(struct platform_device *pdev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *host1x_node;
+	struct platform_device *host1x_pdev;
+	const __be32 *host1x_ptr;
+
+	host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
+	if (!host1x_ptr) {
+		gk20a_err(&pdev->dev, "host1x device not available");
+		return -ENOSYS;
+	}
+
+	host1x_node = of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
+	host1x_pdev = of_find_device_by_node(host1x_node);
+	if (!host1x_pdev) {
+		gk20a_err(&pdev->dev, "host1x device not available");
+		return -ENOSYS;
+	}
+
+	platform->g->host1x_dev = host1x_pdev;
+
+	return 0;
+}
+
+static int gp10b_tegra_late_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
+{
+	return false;
+}
+
+static int gp10b_tegra_railgate(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int gp10b_tegra_unrailgate(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int gp10b_tegra_suspend(struct device *dev)
+{
+	return 0;
+}
+
+struct gk20a_platform t18x_gpu_tegra_platform = {
+	.has_syncpoints = true,
+
+	.default_big_page_size	= SZ_128K,
+
+	.probe = gp10b_tegra_probe,
+	.late_probe = gp10b_tegra_late_probe,
+
+	/* power management callbacks */
+	.suspend = gp10b_tegra_suspend,
+	.railgate = gp10b_tegra_railgate,
+	.unrailgate = gp10b_tegra_unrailgate,
+	.is_railgated = gp10b_tegra_is_railgated,
+
+	.busy = gk20a_tegra_busy,
+	.idle = gk20a_tegra_idle,
+
+	.dump_platform_dependencies = gk20a_tegra_debug_dump,
+};
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
index fe027fb0..ac65af18 100644
--- a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
+++ b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
@@ -27,4 +27,5 @@
 #define TEGRA_18x_GPU_COMPAT_GENERIC NVGPU_COMPAT_GENERIC_GP10B
 struct gpu_ops;
 extern int gp10b_init_hal(struct gpu_ops *);
+extern struct gk20a_platform t18x_gpu_tegra_platform;
 #endif
-- 
cgit v1.2.2


From 317e7bb75862eb7e7272271435a6387a4f5c9839 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 5 Nov 2014 11:21:03 +0200
Subject: gpu: nvgpu: gp10b: Fill class numbers

Fill class numbers to characteristics structure.

Bug 1567274

Change-Id: I129e79fa3f850899ae0c7d93704dc4786ad514d9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/594404
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c  | 13 ++++++++++++-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.h  |  4 ++--
 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 235254c8..067c9bf4 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -23,6 +23,7 @@
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/mc_gp10b.h"
 
+#include "gm20b/gr_gm20b.h"
 #include "gm20b/ltc_gm20b.h"
 #include "gm20b/fb_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
@@ -81,8 +82,11 @@ struct gpu_ops gp10b_ops = {
 	}
 };
 
-int gp10b_init_hal(struct gpu_ops *gops)
+int gp10b_init_hal(struct gk20a *g)
 {
+	struct gpu_ops *gops = &g->ops;
+	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
+
 	*gops = gp10b_ops;
 	gp10b_init_mc(gops);
 	gm20b_init_ltc(gops);
@@ -96,5 +100,12 @@ int gp10b_init_hal(struct gpu_ops *gops)
 	gm20b_init_clk_ops(gops);
 	gops->name = "gp10b";
 
+	c->twod_class = FERMI_TWOD_A;
+	c->threed_class = PASCAL_A;
+	c->compute_class = PASCAL_COMPUTE_A;
+	c->gpfifo_class = MAXWELL_CHANNEL_GPFIFO_A;
+	c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_B;
+	c->dma_copy_class = MAXWELL_DMA_COPY_A;
+
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.h b/drivers/gpu/nvgpu/gp10b/hal_gp10b.h
index 78615ed1..0b464d07 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.h
@@ -15,7 +15,7 @@
 
 #ifndef _NVGPU_HAL_GP10B_H
 #define _NVGPU_HAL_GP10B_H
-struct gpu_ops;
+struct gk20a;
 
-int gp10b_init_hal(struct gpu_ops *gops);
+int gp10b_init_hal(struct gk20a *gops);
 #endif
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
index ac65af18..59ecefee 100644
--- a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
+++ b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
@@ -26,6 +26,6 @@
 #define TEGRA_18x_GPU_COMPAT_TEGRA NVGPU_COMPAT_TEGRA_GP10B
 #define TEGRA_18x_GPU_COMPAT_GENERIC NVGPU_COMPAT_GENERIC_GP10B
 struct gpu_ops;
-extern int gp10b_init_hal(struct gpu_ops *);
+extern int gp10b_init_hal(struct gk20a *);
 extern struct gk20a_platform t18x_gpu_tegra_platform;
 #endif
-- 
cgit v1.2.2


From caeddb940fb81b5956939168faf5804cfb3bebfd Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sat, 1 Nov 2014 19:29:28 +0200
Subject: gpu: nvgpu: gp10b: Enable interrupts in linsim

Change-Id: I7d4211743793b905a20080bb44c62c036f23c854
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/592336
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index cdafaf56..4f7ab698 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -21,19 +21,17 @@
 
 void mc_gp10b_intr_enable(struct gk20a *g)
 {
-	if (!tegra_platform_is_linsim()) {
-		gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
-		gk20a_writel(g, mc_intr_en_set_r(0),
-			     mc_intr_pfifo_pending_f()
-			     | mc_intr_pgraph_pending_f());
-		gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
-		gk20a_writel(g, mc_intr_en_set_r(1),
-			     mc_intr_pfifo_pending_f()
-			     | mc_intr_pgraph_pending_f()
-			     | mc_intr_priv_ring_pending_f()
-			     | mc_intr_ltc_pending_f()
-			     | mc_intr_pbus_pending_f());
-	}
+	gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
+	gk20a_writel(g, mc_intr_en_set_r(0),
+		     mc_intr_pfifo_pending_f()
+		     | mc_intr_pgraph_pending_f());
+	gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
+	gk20a_writel(g, mc_intr_en_set_r(1),
+		     mc_intr_pfifo_pending_f()
+		     | mc_intr_pgraph_pending_f()
+		     | mc_intr_priv_ring_pending_f()
+		     | mc_intr_ltc_pending_f()
+		     | mc_intr_pbus_pending_f());
 }
 
 irqreturn_t mc_gp10b_isr_stall(struct gk20a *g)
-- 
cgit v1.2.2


From 68ad020887e93c2eb7f1bdf4fc83ec747397033a Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Wed, 5 Nov 2014 19:31:45 -0800
Subject: gpu: nvgpu: headers for linsim CL 33823014

Change-Id: I1b9172f0afa0391ce6289aa24dc1a993c723c90e
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/594681
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h  | 12 ------------
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f314c75c..372c5e51 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2182,18 +2182,6 @@ static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
 {
 	return 0x3fffff << 0;
 }
-static inline u32 gr_gpcs_swdx_rm_pagepool_r(void)
-{
-	return 0x00418e30;
-}
-static inline u32 gr_gpcs_swdx_rm_pagepool_total_pages_f(u32 v)
-{
-	return (v & 0x3ff) << 0;
-}
-static inline u32 gr_gpcs_swdx_rm_pagepool_valid_true_f(void)
-{
-	return 0x80000000;
-}
 static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
 {
 	return 0x00418810;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 68f5a128..45bb1d42 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -136,7 +136,7 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
 {
-	return 0x0001ffff;
+	return 0x0003ffff;
 }
 static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
 {
-- 
cgit v1.2.2


From 7918de1c1b05ae126f830588de1cac533ef1c0cf Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 13 Nov 2014 13:30:52 +0200
Subject: gpu: nvgpu: gp10b: Implement L2 query

Bug 1567274

Change-Id: I0b8eaebc0949e70f6d8bfbb101048a3d95bec5e3
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/602858
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/Makefile       |  1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c    |  5 ++--
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h | 12 +++++++++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c    | 47 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.h    | 19 ++++++++++++++
 5 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/ltc_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 193efee0..4ecf761a 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -11,6 +11,7 @@ ccflags-$(CONFIG_GK20A) += -Wno-multichar
 obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
 	mc_gp10b.o  \
+	ltc_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 067c9bf4..5ef64e1f 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -22,9 +22,9 @@
 
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/mc_gp10b.h"
+#include "gp10b/ltc_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
-#include "gm20b/ltc_gm20b.h"
 #include "gm20b/fb_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
@@ -89,9 +89,8 @@ int gp10b_init_hal(struct gk20a *g)
 
 	*gops = gp10b_ops;
 	gp10b_init_mc(gops);
-	gm20b_init_ltc(gops);
 	gp10b_init_gr(gops);
-	gm20b_init_ltc(gops);
+	gp10b_init_ltc(gops);
 	gm20b_init_fb(gops);
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 45bb1d42..32683dc2 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -494,4 +494,16 @@ static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_f(void)
 {
 	return 0x1;
 }
+static inline u32 ltc_ltc0_lts0_tstg_info_1_r(void)
+{
+	return 0x0014058c;
+}
+static inline u32 ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
new file mode 100644
index 00000000..9f5c16ba
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -0,0 +1,47 @@
+/*
+ * GP10B L2
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "gm20b/ltc_gm20b.h"
+#include "hw_ltc_gp10b.h"
+
+static int gp10b_determine_L2_size_bytes(struct gk20a *g)
+{
+	u32 tmp;
+	int ret;
+
+	gk20a_dbg_fn("");
+
+	tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_info_1_r());
+
+	ret = g->ltc_count *
+		ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(tmp) *
+		ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(tmp);
+
+	gk20a_dbg(gpu_dbg_info, "L2 size: %d\n", ret);
+
+	gk20a_dbg_fn("done");
+
+	return ret;
+}
+
+void gp10b_init_ltc(struct gpu_ops *gops)
+{
+	gm20b_init_ltc(gops);
+
+	gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h
new file mode 100644
index 00000000..7408348e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef LTC_GP10B_H
+#define LTC_GP10B_H
+struct gpu_ops;
+
+void gp10b_init_ltc(struct gpu_ops *gops);
+#endif
-- 
cgit v1.2.2


From 2d23236ae26ec6dcbbc934bb372fe56ef839bb80 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 11 Nov 2014 11:13:11 +0200
Subject: gpu: nvgpu: Use queried interrupt ids

Change-Id: I258b54447d09b32adc076de50997d792f0567af5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/601019
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h |  8 ++++++++
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c     | 20 ++++++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
index ca6457c7..0982bc09 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
@@ -102,6 +102,14 @@ static inline u32 top_device_info_runlist_enum_v(u32 r)
 {
 	return (r >> 21) & 0xf;
 }
+static inline u32 top_device_info_intr_enum_v(u32 r)
+{
+	return (r >> 15) & 0x1f;
+}
+static inline u32 top_device_info_reset_enum_v(u32 r)
+{
+	return (r >> 9) & 0x1f;
+}
 static inline u32 top_device_info_type_enum_v(u32 r)
 {
 	return (r >> 2) & 0x1fffffff;
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 4f7ab698..3fae4ea3 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -21,17 +21,19 @@
 
 void mc_gp10b_intr_enable(struct gk20a *g)
 {
+	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
+
 	gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
 	gk20a_writel(g, mc_intr_en_set_r(0),
 		     mc_intr_pfifo_pending_f()
-		     | mc_intr_pgraph_pending_f());
+		     | eng_intr_mask);
 	gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
 	gk20a_writel(g, mc_intr_en_set_r(1),
 		     mc_intr_pfifo_pending_f()
-		     | mc_intr_pgraph_pending_f()
 		     | mc_intr_priv_ring_pending_f()
 		     | mc_intr_ltc_pending_f()
-		     | mc_intr_pbus_pending_f());
+		     | mc_intr_pbus_pending_f()
+		     | eng_intr_mask);
 }
 
 irqreturn_t mc_gp10b_isr_stall(struct gk20a *g)
@@ -71,6 +73,7 @@ irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
+	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -78,7 +81,7 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
 
-	if (mc_intr_0 & mc_intr_pgraph_pending_f())
+	if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
 		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
 	if (mc_intr_0 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_isr(g);
@@ -93,7 +96,7 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 
 	gk20a_writel(g, mc_intr_en_set_r(0),
 		     mc_intr_pfifo_pending_f()
-		     | mc_intr_pgraph_pending_f());
+		     | eng_intr_mask);
 
 	return IRQ_HANDLED;
 }
@@ -101,6 +104,7 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
+	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -110,15 +114,15 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 
 	if (mc_intr_1 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_nonstall_isr(g);
-	if (mc_intr_1 & mc_intr_pgraph_pending_f())
+	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
 		gk20a_gr_nonstall_isr(g);
 
 	gk20a_writel(g, mc_intr_en_set_r(1),
 		     mc_intr_pfifo_pending_f()
-		     | mc_intr_pgraph_pending_f()
 		     | mc_intr_priv_ring_pending_f()
 		     | mc_intr_ltc_pending_f()
-		     | mc_intr_pbus_pending_f());
+		     | mc_intr_pbus_pending_f()
+		     | eng_intr_mask);
 
 	return IRQ_HANDLED;
 }
-- 
cgit v1.2.2


From e8c5b7dd170d4bf9c49c90c4f0e8eb0e8b17c9b2 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 11 Nov 2014 11:13:41 +0200
Subject: gpu: nvgpu: Add SM registers

Add SM registers which were taken into use in GPU
characteristics.

Bug 1551769
Bug 1558186

Change-Id: I705da9ac25556b6b94137199e0acd9af3c8e6422
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/601020
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 372c5e51..ca4aa6bd 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1866,6 +1866,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xfff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r)
+{
+	return (r >> 20) & 0xfff;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
-- 
cgit v1.2.2


From 951100f63652138374476a722faed19557a7a46b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sat, 8 Nov 2014 19:11:24 +0200
Subject: gpu: nvgpu: Define gp10b big page size

Set default big page size of 128kB.

Bug 1567274

Change-Id: Ie27c6ffa23b8d75ebd21afca267068604fb57f0b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/603498
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 7a25a3fc..f199d569 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -96,4 +96,6 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.idle = gk20a_tegra_idle,
 
 	.dump_platform_dependencies = gk20a_tegra_debug_dump,
+
+	.default_big_page_size	= SZ_128K,
 };
-- 
cgit v1.2.2


From 3cfc020b91fed07598bea39367a505a6e5bc9684 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 11 Nov 2014 13:47:03 +0200
Subject: gpu: nvgpu: Write ZBC registers to DSS

Bug 1567274

Change-Id: Ife98ae512c62bd26450e59338719c7a10635b5dd
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/601108
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 141 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h |  48 ++++++++++++
 2 files changed, 189 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index b7a52be0..250dc65c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -158,10 +158,151 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 		gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
 }
 
+static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
+				  struct zbc_entry *color_val, u32 index)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+	u32 i;
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+	u32 ret;
+	u32 zbc_c;
+
+	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to disable gr engine activity\n");
+		return ret;
+	}
+
+	ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to idle graphics\n");
+		goto clean_up;
+	}
+
+	/* update l2 table */
+	g->ops.ltc.set_zbc_color_entry(g, color_val, index);
+
+	/* update ds table */
+	gk20a_writel(g, gr_ds_zbc_color_r_r(),
+		gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
+	gk20a_writel(g, gr_ds_zbc_color_g_r(),
+		gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
+	gk20a_writel(g, gr_ds_zbc_color_b_r(),
+		gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
+	gk20a_writel(g, gr_ds_zbc_color_a_r(),
+		gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
+
+	gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
+		gr_ds_zbc_color_fmt_val_f(color_val->format));
+
+	gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+		gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
+
+	/* trigger the write */
+	gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+		gr_ds_zbc_tbl_ld_select_c_f() |
+		gr_ds_zbc_tbl_ld_action_write_f() |
+		gr_ds_zbc_tbl_ld_trigger_active_f());
+
+	/* update local copy */
+	for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+		gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
+		gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
+	}
+	gr->zbc_col_tbl[index].format = color_val->format;
+	gr->zbc_col_tbl[index].ref_cnt++;
+
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_r_r(index), color_val->color_ds[0]);
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_g_r(index), color_val->color_ds[1]);
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_b_r(index), color_val->color_ds[2]);
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_a_r(index), color_val->color_ds[3]);
+	zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + ALIGN(index, 4));
+	zbc_c |= color_val->format << (index % 4) * 6;
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + ALIGN(index, 4), zbc_c);
+
+clean_up:
+	ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to enable gr engine activity\n");
+	}
+
+	return ret;
+}
+
+static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
+				struct zbc_entry *depth_val, u32 index)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+	u32 ret;
+	u32 zbc_z;
+
+	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to disable gr engine activity\n");
+		return ret;
+	}
+
+	ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to idle graphics\n");
+		goto clean_up;
+	}
+
+	/* update l2 table */
+	g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
+
+	/* update ds table */
+	gk20a_writel(g, gr_ds_zbc_z_r(),
+		gr_ds_zbc_z_val_f(depth_val->depth));
+
+	gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
+		gr_ds_zbc_z_fmt_val_f(depth_val->format));
+
+	gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
+		gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
+
+	/* trigger the write */
+	gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
+		gr_ds_zbc_tbl_ld_select_z_f() |
+		gr_ds_zbc_tbl_ld_action_write_f() |
+		gr_ds_zbc_tbl_ld_trigger_active_f());
+
+	/* update local copy */
+	gr->zbc_dep_tbl[index].depth = depth_val->depth;
+	gr->zbc_dep_tbl[index].format = depth_val->format;
+	gr->zbc_dep_tbl[index].ref_cnt++;
+
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth);
+	zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + ALIGN(index, 4));
+	zbc_z |= depth_val->format << (index % 4) * 6;
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + ALIGN(index, 4), zbc_z);
+
+clean_up:
+	ret = gk20a_fifo_enable_engine_activity(g, gr_info);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to enable gr engine activity\n");
+	}
+
+	return ret;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
 	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
 	gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
+	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
+	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index ca4aa6bd..f2237a50 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2198,6 +2198,54 @@ static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
 {
 	return 0x3fffff << 0;
 }
+static inline u32 gr_gpcs_swdx_dss_zbc_color_r_r(u32 i)
+{
+	return 0x00418010 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_r_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_g_r(u32 i)
+{
+	return 0x0041804c + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_g_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_b_r(u32 i)
+{
+	return 0x00418088 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_b_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_a_r(u32 i)
+{
+	return 0x004180c4 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_a_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r(void)
+{
+	return 0x00500100;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_r(u32 i)
+{
+	return 0x00418110 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r(void)
+{
+	return 0x0050014c;
+}
 static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
 {
 	return 0x00418810;
-- 
cgit v1.2.2


From c23f7708ac84c1866b2d9f1b8d5a9e560026e859 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 12 Nov 2014 14:32:29 +0200
Subject: gpu: nvgpu: gp10b: Define physical address width

GP10B physical address width is 37 bits. Use old width for now,
and add gp10b specific definition. We can switch to new definition
once we've verified them.

Bug 1567274

Change-Id: I33cc1b99f14f1a7ee5f6fe3bd3d8b3126c23ecbe
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/601703
---
 drivers/gpu/nvgpu/gp10b/Makefile    |  1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c |  4 ++--
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c  | 28 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.h  | 19 +++++++++++++++++++
 4 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/mm_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/mm_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 4ecf761a..ecb09cf6 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
 	mc_gp10b.o  \
 	ltc_gp10b.o \
+	mm_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 5ef64e1f..acd1b73c 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -23,13 +23,13 @@
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
+#include "gp10b/mm_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fb_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
 #include "gm20b/gr_ctx_gm20b.h"
-#include "gm20b/mm_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
@@ -94,7 +94,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gm20b_init_fb(gops);
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
-	gm20b_init_mm(gops);
+	gp10b_init_mm(gops);
 	gm20b_init_pmu_ops(gops);
 	gm20b_init_clk_ops(gops);
 	gops->name = "gp10b";
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
new file mode 100644
index 00000000..a0b6a7d1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -0,0 +1,28 @@
+/*
+ * GP10B MMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/pm_runtime.h>
+#include "gk20a/gk20a.h"
+
+u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
+{
+	return 37;
+}
+
+void gp10b_init_mm(struct gpu_ops *gops)
+{
+	gm20b_init_mm(gops);
+	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
new file mode 100644
index 00000000..38ca93a4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef MM_GP10B_H
+#define MM_GP10B_H
+struct gpu_ops;
+
+void gp10b_init_mm(struct gpu_ops *gops);
+#endif
-- 
cgit v1.2.2


From a83e5281af81bbf56b6d5ce4c7acc8a28beda00a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 20 Nov 2014 15:51:50 +0200
Subject: gpu: nvgpu: gp10b: Define pagepool size

Bug 1567274

Change-Id: I4369458d3af0c4da32af8a5881c8fe60b11f7632
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/606932
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 250dc65c..7a316106 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -297,6 +297,14 @@ clean_up:
 	return ret;
 }
 
+static void gr_gp10b_buffer_size_defaults(struct gk20a *g)
+{
+	g->gr.pagepool_default_size =
+		gr_scc_pagepool_total_pages_hwmax_value_v();
+	g->gr.pagepool_max_size =
+		gr_scc_pagepool_total_pages_hwmax_value_v();
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -305,4 +313,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
 	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
 	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
+	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
 }
-- 
cgit v1.2.2


From 1f11c7ffe745571753903fdca7024d4428bd99bd Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 13 Nov 2014 14:55:51 +0200
Subject: gpu: nvgpu: gp10b: Add new supported kind

Bug 1567274

Change-Id: I38c3ffd6129893b02f6bef878a579925cf2bfa1e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/606931
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/Makefile        |  1 +
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c      | 96 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/fb_gp10b.h      | 21 ++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     |  4 +-
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h | 28 ++++++++++
 5 files changed, 148 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/fb_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/fb_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index ecb09cf6..6f1fb9e2 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_GK20A)  += \
 	mc_gp10b.o  \
 	ltc_gp10b.o \
 	mm_gp10b.o \
+	fb_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
new file mode 100644
index 00000000..df35c5b0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -0,0 +1,96 @@
+/*
+ * GP10B FB
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "gm20b/fb_gm20b.h"
+#include "gk20a/kind_gk20a.h"
+
+#include "hw_gmmu_gp10b.h"
+
+static void gp10b_init_uncompressed_kind_map(void)
+{
+	gm20b_init_uncompressed_kind_map();
+
+	gk20a_uc_kind_map[gmmu_pte_kind_z16_2cz_v()] =
+	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2cz_v()] =
+	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2cz_v()] =
+	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms8_2cz_v()] =
+	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms16_2cz_v()] =
+		gmmu_pte_kind_z16_v();
+
+	gk20a_uc_kind_map[gmmu_pte_kind_c32_ms4_4cbra_v()] =
+	gk20a_uc_kind_map[gmmu_pte_kind_c64_ms4_4cbra_v()] =
+		gmmu_pte_kind_generic_16bx2_v();
+}
+
+static bool gp10b_kind_supported(u8 k)
+{
+	return (k >= gmmu_pte_kind_z16_2cz_v() &&
+		k <= gmmu_pte_kind_z16_ms8_2cz_v())
+		|| k == gmmu_pte_kind_z16_ms16_2cz_v()
+		|| k == gmmu_pte_kind_c32_ms4_4cbra_v()
+		|| k == gmmu_pte_kind_c64_ms4_4cbra_v();
+}
+
+static bool gp10b_kind_z(u8 k)
+{
+	return (k >= gmmu_pte_kind_z16_2cz_v() &&
+		 k <= gmmu_pte_kind_z16_ms8_2cz_v()) ||
+		k == gmmu_pte_kind_z16_ms16_2cz_v();
+}
+
+static bool gp10b_kind_compressible(u8 k)
+{
+	return (k >= gmmu_pte_kind_z16_2cz_v() &&
+		 k <= gmmu_pte_kind_z16_ms8_2cz_v()) ||
+		k == gmmu_pte_kind_z16_ms16_2cz_v() ||
+	       (k >= gmmu_pte_kind_z16_4cz_v() &&
+		 k <= gmmu_pte_kind_z16_ms16_4cz_v());
+}
+
+static bool gp10b_kind_zbc(u8 k)
+{
+	return (k >= gmmu_pte_kind_z16_2cz_v() &&
+		 k <= gmmu_pte_kind_z16_ms8_2cz_v()) ||
+		k == gmmu_pte_kind_z16_ms16_2cz_v();
+}
+
+static void gp10b_init_kind_attr(void)
+{
+	u16 k;
+
+	gm20b_init_kind_attr();
+
+	for (k = 0; k < 256; k++) {
+		if (gp10b_kind_supported((u8)k))
+			gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED;
+		if (gp10b_kind_compressible((u8)k))
+			gk20a_kind_attr[k] |= GK20A_KIND_ATTR_COMPRESSIBLE;
+		if (gp10b_kind_z((u8)k))
+			gk20a_kind_attr[k] |= GK20A_KIND_ATTR_Z;
+		if (gp10b_kind_zbc((u8)k))
+			gk20a_kind_attr[k] |= GK20A_KIND_ATTR_ZBC;
+	}
+}
+
+void gp10b_init_fb(struct gpu_ops *gops)
+{
+	gm20b_init_fb(gops);
+
+	gp10b_init_uncompressed_kind_map();
+	gp10b_init_kind_attr();
+}
diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/fb_gp10b.h
new file mode 100644
index 00000000..76efd331
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * GP10B FB
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_GP10B_FB
+#define _NVGPU_GP10B_FB
+struct gpu_ops;
+
+void gp10b_init_fb(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index acd1b73c..a739ce77 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -24,9 +24,9 @@
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
+#include "gp10b/fb_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
-#include "gm20b/fb_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
 #include "gm20b/gr_ctx_gm20b.h"
@@ -91,7 +91,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_mc(gops);
 	gp10b_init_gr(gops);
 	gp10b_init_ltc(gops);
-	gm20b_init_fb(gops);
+	gp10b_init_fb(gops);
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index f6020434..5a0f9fe2 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -270,6 +270,26 @@ static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void)
 {
 	return 0x0000000b;
 }
+static inline u32 gmmu_pte_kind_z16_2cz_v(void)
+{
+	return 0x00000036;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2cz_v(void)
+{
+	return 0x00000037;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2cz_v(void)
+{
+	return 0x00000038;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2cz_v(void)
+{
+	return 0x00000039;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2cz_v(void)
+{
+	return 0x0000005f;
+}
 static inline u32 gmmu_pte_kind_z16_4cz_v(void)
 {
 	return 0x0000000c;
@@ -1026,6 +1046,10 @@ static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void)
 {
 	return 0x000000e3;
 }
+static inline u32 gmmu_pte_kind_c32_ms4_4cbra_v(void)
+{
+	return 0x0000002c;
+}
 static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void)
 {
 	return 0x000000e4;
@@ -1086,6 +1110,10 @@ static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void)
 {
 	return 0x000000f1;
 }
+static inline u32 gmmu_pte_kind_c64_ms4_4cbra_v(void)
+{
+	return 0x0000002d;
+}
 static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void)
 {
 	return 0x000000f2;
-- 
cgit v1.2.2


From 230779e25b16af48d4fb7add7a22fce4bb1bb474 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 26 Nov 2014 08:23:34 +0200
Subject: gpu: nvgpu: gp10b: Calc global context buffer size

In gp10b we need to limit global context buffer size, and it needs
to be 128b aligned.

Change-Id: I51570e2457a374c09be4d611e683ae30917f9fc0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/657911
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 7a316106..b8fbc28e 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -305,6 +305,34 @@ static void gr_gp10b_buffer_size_defaults(struct gk20a *g)
 		gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 
+static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int size;
+
+	gr->attrib_cb_size = gr->attrib_cb_default_size
+		+ (gr->attrib_cb_default_size >> 1);
+	gr->alpha_cb_size = gr->alpha_cb_default_size
+		+ (gr->alpha_cb_default_size >> 1);
+
+	gr->attrib_cb_size = min(gr->attrib_cb_size,
+				 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(0xffffffff));
+	gr->alpha_cb_size = min(gr->attrib_cb_size,
+				 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(0xffffffff));
+
+	size = gr->attrib_cb_size *
+		gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+		gr->max_tpc_count;
+
+	size += gr->alpha_cb_size *
+		gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
+		gr->max_tpc_count;
+
+	size = ALIGN(size, 128);
+
+	return size;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -314,4 +342,6 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
 	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
 	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
+	gops->gr.calc_global_ctx_buffer_size =
+		gr_gp10b_calc_global_ctx_buffer_size;
 }
-- 
cgit v1.2.2


From e5161d1518e227050f94213e6615f8b35a5fb115 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 21 Nov 2014 14:15:13 +0200
Subject: gpu: nvgpu: gp10b: Implement SW methods

Bug 1567274

Change-Id: I6d760eca7ac0931847f9a04a9d4a408519ade511
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/654098
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 37 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h |  6 +++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index b8fbc28e..844ba81f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -333,6 +333,42 @@ static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
 	return size;
 }
 
+static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
+				     u32 class_num, u32 offset, u32 data)
+{
+	gk20a_dbg_fn("");
+
+	if (class_num == PASCAL_COMPUTE_A) {
+		switch (offset << 2) {
+		case NVC0C0_SET_SHADER_EXCEPTIONS:
+			gk20a_gr_set_shader_exceptions(g, data);
+			break;
+		default:
+			goto fail;
+		}
+	}
+
+	if (class_num == PASCAL_A) {
+		switch (offset << 2) {
+		case NVC097_SET_SHADER_EXCEPTIONS:
+			gk20a_gr_set_shader_exceptions(g, data);
+			break;
+		case NVC097_SET_CIRCULAR_BUFFER_SIZE:
+			g->ops.gr.set_circular_buffer_size(g, data);
+			break;
+		case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
+			g->ops.gr.set_alpha_circular_buffer_size(g, data);
+			break;
+		default:
+			goto fail;
+		}
+	}
+	return 0;
+
+fail:
+	return -EINVAL;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -344,4 +380,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
+	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 58616deb..536a7d27 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -25,7 +25,11 @@ enum {
 	PASCAL_DMA_COPY_A        = 0xC0B5,
 };
 
-void gp10b_init_gr(struct gpu_ops *ops);
+#define NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE	0x02dc
+#define NVC097_SET_CIRCULAR_BUFFER_SIZE		0x1280
+#define NVC097_SET_SHADER_EXCEPTIONS		0x1528
+#define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 
+void gp10b_init_gr(struct gpu_ops *ops);
 
 #endif
-- 
cgit v1.2.2


From 59f267981c3024cc3b73c5743d2481f80228b15f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 21 Nov 2014 14:08:07 +0200
Subject: gpu: nvgpu: gp10b: Program CB sizes

Program CB sizes.

Bug 1567274

Change-Id: Idc88f69b70e85bf950af852a9ca80a328d95883f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/654097
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 109 +++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 844ba81f..81e04095 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -369,6 +369,110 @@ fail:
 	return -EINVAL;
 }
 
+static void gr_gp10b_cb_size_default(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+
+	gr->attrib_cb_default_size =
+		gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
+	gr->alpha_cb_default_size =
+		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+}
+
+static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 gpc_index, ppc_index, stride, val;
+	u32 pd_ab_max_output;
+	u32 alpha_cb_size = data * 4;
+
+	gk20a_dbg_fn("");
+
+	if (alpha_cb_size > gr->alpha_cb_size)
+		alpha_cb_size = gr->alpha_cb_size;
+
+	gk20a_writel(g, gr_ds_tga_constraintlogic_alpha_r(),
+		(gk20a_readl(g, gr_ds_tga_constraintlogic_alpha_r()) &
+		 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
+		 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
+
+	pd_ab_max_output = alpha_cb_size *
+		gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
+		gr_pd_ab_dist_cfg1_max_output_granularity_v();
+
+	gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
+		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
+
+	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		stride = proj_gpc_stride_v() * gpc_index;
+
+		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+			ppc_index++) {
+
+			val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
+				stride +
+				proj_ppc_in_gpc_stride_v() * ppc_index);
+
+			val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
+					gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
+						gr->pes_tpc_count[ppc_index][gpc_index]));
+
+			gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
+				stride +
+				proj_ppc_in_gpc_stride_v() * ppc_index, val);
+		}
+	}
+}
+
+static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
+{
+	struct gr_gk20a *gr = &g->gr;
+	u32 gpc_index, ppc_index, stride, val;
+	u32 cb_size = data * 4;
+
+	gk20a_dbg_fn("");
+
+	if (cb_size > gr->attrib_cb_size)
+		cb_size = gr->attrib_cb_size;
+
+	gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
+		(gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
+		 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
+		 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
+
+	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		stride = proj_gpc_stride_v() * gpc_index;
+
+		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+			ppc_index++) {
+
+			val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
+				stride +
+				proj_ppc_in_gpc_stride_v() * ppc_index);
+
+			val = set_field(val,
+				gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
+				gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
+					gr->pes_tpc_count[ppc_index][gpc_index]));
+
+			gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
+				stride +
+				proj_ppc_in_gpc_stride_v() * ppc_index, val);
+
+			val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
+						ppc_index + gpc_index));
+
+			val = set_field(val,
+				gr_gpcs_swdx_tc_beta_cb_size_v_m(),
+				gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
+					gr->gpc_ppc_count[gpc_index]));
+
+			gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
+						ppc_index + gpc_index), val);
+		}
+	}
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -381,4 +485,9 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
 	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
+	gops->gr.cb_size_default = gr_gp10b_cb_size_default;
+	gops->gr.set_alpha_circular_buffer_size =
+		gr_gp10b_set_alpha_circular_buffer_size;
+	gops->gr.set_circular_buffer_size =
+		gr_gp10b_set_circular_buffer_size;
 }
-- 
cgit v1.2.2


From 5d54f4660cfd5f2abee41ac63bc5fe1a1c43a87d Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 1 Dec 2014 14:33:01 +0200
Subject: gpu: nvgpu: gp10b: Change order of alpha & beta

Change order of alpha & attribute buffers in CB. The new order
follows RM.

Change-Id: I2b24daa46055b3bd667a1026c282f74d56882623
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/657907
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 81e04095..0dce115a 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -83,8 +83,8 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
 		gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
 
-	alpha_offset_in_chunk = attrib_offset_in_chunk +
-		gr->tpc_count * gr->attrib_cb_size;
+	attrib_offset_in_chunk = alpha_offset_in_chunk +
+		gr->tpc_count * gr->alpha_cb_size;
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
 		temp = proj_gpc_stride_v() * gpc_index;
@@ -105,6 +105,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 				proj_ppc_in_gpc_stride_v() * ppc_index,
 				attrib_offset_in_chunk, patch);
 
+			gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
+				proj_ppc_in_gpc_stride_v() * ppc_index,
+				gr->alpha_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index],
+				patch);
+
 			attrib_offset_in_chunk += gr->attrib_cb_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
@@ -121,12 +127,6 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			alpha_offset_in_chunk += gr->alpha_cb_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
-			gr_gk20a_ctx_patch_write(g, ch_ctx,
-				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
-				gr->alpha_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index],
-				patch);
-
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index),
 				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1),
-- 
cgit v1.2.2


From 945e5e6832bd2461b9eafa61e8dd06b793a6f6b9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 3 Dec 2014 09:35:37 +0200
Subject: gpu: nvgpu: gp10b: Correct SMMU bit number

Bit 36 is the correct bit to indicate SMMU translation.

Bug 1580756

Change-Id: I761e70265d5981b07940f1d43716416829993827
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/658827
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index a0b6a7d1..ff248f51 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -18,7 +18,7 @@
 
 u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 {
-	return 37;
+	return 36;
 }
 
 void gp10b_init_mm(struct gpu_ops *gops)
-- 
cgit v1.2.2


From 15839d4763e0651e789a6511476851cccef0febb Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 3 Dec 2014 16:11:50 +0200
Subject: gpu: nvgpu: Implement gp10b context creation

Implement context creation for gp10b. GfxP contexts need per channel
buffers.

Bug 1517461

Change-Id: Ifecb59002f89f0407457730a35bfb3fe988b907a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/660236
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c            | 221 +++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h            |  19 ++-
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h |  16 ++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         |  64 ++++++++
 drivers/gpu/nvgpu/gr_t18x.h                   |  20 +++
 5 files changed, 333 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gr_t18x.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 0dce115a..0a60612d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -21,6 +21,7 @@
 #include "gp10b/gr_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_proj_gp10b.h"
+#include "hw_ctxsw_prog_gp10b.h"
 
 bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -297,12 +298,9 @@ clean_up:
 	return ret;
 }
 
-static void gr_gp10b_buffer_size_defaults(struct gk20a *g)
+static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
 {
-	g->gr.pagepool_default_size =
-		gr_scc_pagepool_total_pages_hwmax_value_v();
-	g->gr.pagepool_max_size =
-		gr_scc_pagepool_total_pages_hwmax_value_v();
+	return gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 
 static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
@@ -473,6 +471,212 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 	}
 }
 
+static int gr_gp10b_init_ctx_state(struct gk20a *g)
+{
+	struct fecs_method_op_gk20a op = {
+		.mailbox = { .id = 0, .data = 0,
+			     .clr = ~0, .ok = 0, .fail = 0},
+		.method.data = 0,
+		.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+		.cond.fail = GR_IS_UCODE_OP_SKIP,
+		};
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gr_gk20a_init_ctx_state(g);
+	if (err)
+		return err;
+
+	if (!g->gr.t18x.ctx_vars.preempt_image_size) {
+		op.method.addr =
+			gr_fecs_method_push_adr_discover_preemption_image_size_v();
+		op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
+		err = gr_gk20a_submit_fecs_method_op(g, op);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"query preempt image size failed");
+			return err;
+		}
+	}
+
+	gk20a_dbg_info("preempt image size: %u",
+		g->gr.t18x.ctx_vars.preempt_image_size);
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+}
+
+int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+			  u32 flags)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, flags);
+	if (err)
+		return err;
+
+	if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+		u32 spill_size =
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
+		u32 betacb_size = ALIGN(
+			(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() *
+			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+			 g->gr.max_tpc_count) +
+			(g->gr.alpha_cb_size *
+			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+			 g->gr.max_tpc_count),
+			128);
+		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+			gr_scc_pagepool_total_pages_byte_granularity_v();
+
+		err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
+				&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate preempt buffer");
+			goto fail_free_gk20a_ctx;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, spill_size,
+				&(*gr_ctx)->t18x.spill_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate spill buffer");
+			goto fail_free_preempt;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, betacb_size,
+					   &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate beta buffer");
+			goto fail_free_spill;
+		}
+
+		err = gk20a_gmmu_alloc_map(vm, pagepool_size,
+					   &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(vm->mm->g),
+				  "cannot allocate page pool");
+			goto fail_free_betacb;
+		}
+
+		(*gr_ctx)->t18x.preempt_mode = flags;
+	}
+
+	gk20a_dbg_fn("done");
+
+	return err;
+
+fail_free_betacb:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+fail_free_spill:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
+fail_free_preempt:
+	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+fail_free_gk20a_ctx:
+	gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
+	*gr_ctx = NULL;
+
+	return err;
+}
+
+static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+			  struct gr_ctx_desc *gr_ctx)
+{
+	gk20a_dbg_fn("");
+
+	if (!gr_ctx)
+		return;
+
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+	gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
+
+	gk20a_dbg_fn("done");
+}
+
+static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
+		struct channel_ctx_gk20a *ch_ctx,
+		void *ctx_ptr)
+{
+	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+	u32 gfxp_preempt_option =
+		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (gr_ctx->t18x.preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
+		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
+				gfxp_preempt_option);
+	}
+
+	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
+		u32 addr;
+		u32 size;
+		u32 cbes_reserve;
+
+		gk20a_mem_wr32(ctx_ptr, ctxsw_prog_main_image_full_preemption_ptr_o(),
+				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
+
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+
+		addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
+			gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
+
+		gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
+		g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true);
+
+		addr = (u64_lo32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) >>
+			gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
+		size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
+		g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
+
+		addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
+			gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
+			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+			 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
+				gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
+				true);
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpc0_swdx_rm_spill_buffer_size_r(),
+				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(addr),
+				true);
+
+		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_swdx_beta_cb_ctrl_r(),
+				gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
+					cbes_reserve),
+				true);
+		gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
+				gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
+					cbes_reserve),
+				true);
+
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+	}
+
+	gk20a_dbg_fn("done");
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -481,7 +685,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
 	gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
 	gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
-	gops->gr.buffer_size_defaults = gr_gp10b_buffer_size_defaults;
+	gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
 	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
@@ -490,4 +694,9 @@ void gp10b_init_gr(struct gpu_ops *gops)
 		gr_gp10b_set_alpha_circular_buffer_size;
 	gops->gr.set_circular_buffer_size =
 		gr_gp10b_set_circular_buffer_size;
+	gops->gr.init_ctx_state = gr_gp10b_init_ctx_state;
+	gops->gr.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx;
+	gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
+	gops->gr.update_ctxsw_preemption_mode =
+		gr_gp10b_update_ctxsw_preemption_mode;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 536a7d27..6bbda564 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -16,7 +16,7 @@
 #ifndef _NVGPU_GR_GP10B_H_
 #define _NVGPU_GR_GP10B_H_
 
-struct gk20a;
+struct gpu_ops;
 
 enum {
 	PASCAL_CHANNEL_GPFIFO_A  = 0xC06F,
@@ -32,4 +32,21 @@ enum {
 
 void gp10b_init_gr(struct gpu_ops *ops);
 
+struct gr_t18x {
+	struct {
+		u32 preempt_image_size;
+	} ctx_vars;
+};
+
+struct gr_ctx_desc_t18x {
+	int preempt_mode;
+	struct mem_desc preempt_ctxsw_buffer;
+	struct mem_desc spill_ctxsw_buffer;
+	struct mem_desc betacb_ctxsw_buffer;
+	struct mem_desc pagepool_ctxsw_buffer;
+};
+
+#define NVGPU_GR_PREEMPTION_MODE_WFI		0
+#define NVGPU_GR_PREEMPTION_MODE_GFXP		1
+
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 79890f3c..0892f12e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -238,4 +238,20 @@ static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(v
 {
 	return 0x0;
 }
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_o(void)
+{
+	return 0x00000080;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(void)
+{
+	return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
+{
+	return 0x00000068;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f2237a50..0bd707db 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -718,6 +718,10 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
 {
 	return 0x21;
 }
+static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
+{
+	return 0x0000001a;
+}
 static inline u32 gr_fecs_host_int_status_r(void)
 {
 	return 0x00409c18;
@@ -1910,6 +1914,10 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
 {
 	return 0x00030000;
 }
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(void)
+{
+	return 0x00030a00;
+}
 static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
 {
 	return 0x00000020;
@@ -2186,6 +2194,50 @@ static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_r(void)
+{
+	return 0x00500ee4;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
+{
+	return 0x00000250;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
+{
+	return 0x00500ee0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_r(void)
+{
+	return 0x00418eec;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(void)
+{
+	return 0x0041befc;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
 static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
 {
 	return 0x00418ea0 + i*4;
@@ -3342,4 +3394,16 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
 {
 	return 0x40000000;
 }
+static inline u32 gr_fe_gfxp_wfi_timeout_r(void)
+{
+	return 0x004041c0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
+{
+	return 0x0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gr_t18x.h b/drivers/gpu/nvgpu/gr_t18x.h
new file mode 100644
index 00000000..95601116
--- /dev/null
+++ b/drivers/gpu/nvgpu/gr_t18x.h
@@ -0,0 +1,20 @@
+/*
+ * NVIDIA T18x GR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVGPU_GR_T18X_H_
+#define _NVGPU_GR_T18X_H_
+
+#include "gp10b/gr_gp10b.h"
+
+#endif
-- 
cgit v1.2.2


From 5452d161544f40778f75dda06bfddb14bcb48707 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 11 Dec 2014 12:40:03 +0530
Subject: gpu: nvgpu: gp10b: gpmu elpg support

Temporally used gm20b elpg sequencing values for gp10b elpg.

Bug 1525971

Change-Id: Ibffb5180979be9d7ee68cad67cd6f10cf23590c3
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/662517
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile               |   1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c            |   3 +-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c |   3 +
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c            | 167 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h            |  21 ++++
 5 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 6f1fb9e2..b2c143a6 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_GK20A)  += \
 	ltc_gp10b.o \
 	mm_gp10b.o \
 	fb_gp10b.o \
+	pmu_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a739ce77..526caff1 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -25,6 +25,7 @@
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/fb_gp10b.h"
+#include "gp10b/pmu_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
@@ -95,7 +96,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
-	gm20b_init_pmu_ops(gops);
+	gp10b_init_pmu_ops(gops);
 	gm20b_init_clk_ops(gops);
 	gops->name = "gp10b";
 
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index f199d569..0f2d290f 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -81,6 +81,9 @@ static int gp10b_tegra_suspend(struct device *dev)
 struct gk20a_platform t18x_gpu_tegra_platform = {
 	.has_syncpoints = true,
 
+	/* power management configuration */
+	.enable_elpg            = false,
+
 	.default_big_page_size	= SZ_128K,
 
 	.probe = gp10b_tegra_probe,
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
new file mode 100644
index 00000000..3db0d4c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -0,0 +1,167 @@
+/*
+ * GP10B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/delay.h>	/* for udelay */
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gm20b/acr_gm20b.h"
+#include "gm20b/pmu_gm20b.h"
+
+#include "pmu_gp10b.h"
+
+/*!
+ * Structure/object which single register write need to be done during PG init
+ * sequence to set PROD values.
+ */
+struct pg_init_sequence_list {
+	u32 regaddr;
+	u32 writeval;
+};
+
+/* PROD settings for ELPG sequencing registers*/
+static struct pg_init_sequence_list _pginitseq_gm20b[] = {
+		{ 0x0010ab10, 0x8180},
+		{ 0x0010e118, 0x83828180},
+		{ 0x0010e068, 0},
+		{ 0x0010e06c, 0x00000080},
+		{ 0x0010e06c, 0x00000081},
+		{ 0x0010e06c, 0x00000082},
+		{ 0x0010e06c, 0x00000083},
+		{ 0x0010e06c, 0x00000084},
+		{ 0x0010e06c, 0x00000085},
+		{ 0x0010e06c, 0x00000086},
+		{ 0x0010e06c, 0x00000087},
+		{ 0x0010e06c, 0x00000088},
+		{ 0x0010e06c, 0x00000089},
+		{ 0x0010e06c, 0x0000008a},
+		{ 0x0010e06c, 0x0000008b},
+		{ 0x0010e06c, 0x0000008c},
+		{ 0x0010e06c, 0x0000008d},
+		{ 0x0010e06c, 0x0000008e},
+		{ 0x0010e06c, 0x0000008f},
+		{ 0x0010e06c, 0x00000090},
+		{ 0x0010e06c, 0x00000091},
+		{ 0x0010e06c, 0x00000092},
+		{ 0x0010e06c, 0x00000093},
+		{ 0x0010e06c, 0x00000094},
+		{ 0x0010e06c, 0x00000095},
+		{ 0x0010e06c, 0x00000096},
+		{ 0x0010e06c, 0x00000097},
+		{ 0x0010e06c, 0x00000098},
+		{ 0x0010e06c, 0x00000099},
+		{ 0x0010e06c, 0x0000009a},
+		{ 0x0010e06c, 0x0000009b},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010e06c, 0x00000000},
+		{ 0x0010ab14, 0x00000000},
+		{ 0x0010ab18, 0x00000000},
+		{ 0x0010e024, 0x00000000},
+		{ 0x0010e028, 0x00000000},
+		{ 0x0010e11c, 0x00000000},
+		{ 0x0010e120, 0x00000000},
+		{ 0x0010ab1c, 0x02010155},
+		{ 0x0010e020, 0x001b1b55},
+		{ 0x0010e124, 0x01030355},
+		{ 0x0010ab20, 0x89abcdef},
+		{ 0x0010ab24, 0x00000000},
+		{ 0x0010e02c, 0x89abcdef},
+		{ 0x0010e030, 0x00000000},
+		{ 0x0010e128, 0x89abcdef},
+		{ 0x0010e12c, 0x00000000},
+		{ 0x0010ab28, 0x74444444},
+		{ 0x0010ab2c, 0x70000000},
+		{ 0x0010e034, 0x74444444},
+		{ 0x0010e038, 0x70000000},
+		{ 0x0010e130, 0x74444444},
+		{ 0x0010e134, 0x70000000},
+		{ 0x0010ab30, 0x00000000},
+		{ 0x0010ab34, 0x00000001},
+		{ 0x00020004, 0x00000000},
+		{ 0x0010e138, 0x00000000},
+		{ 0x0010e040, 0x00000000},
+};
+
+static int gp10b_pmu_setup_elpg(struct gk20a *g)
+{
+	int ret = 0;
+	u32 reg_writes;
+	u32 index;
+
+	gk20a_dbg_fn("");
+
+	if (g->elpg_enabled) {
+		reg_writes = ((sizeof(_pginitseq_gm20b) /
+				sizeof((_pginitseq_gm20b)[0])));
+		/* Initialize registers with production values*/
+		for (index = 0; index < reg_writes; index++) {
+			gk20a_writel(g, _pginitseq_gm20b[index].regaddr,
+				_pginitseq_gm20b[index].writeval);
+		}
+	}
+
+	gk20a_dbg_fn("done");
+	return ret;
+}
+
+void gp10b_init_pmu_ops(struct gpu_ops *gops)
+{
+	if (gops->privsecurity) {
+		gm20b_init_secure_pmu(gops);
+		gops->pmu.init_wpr_region = NULL;
+	} else {
+		gk20a_init_pmu_ops(gops);
+		gops->pmu.init_wpr_region = NULL;
+	}
+	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
+	gops->pmu.lspmuwprinitdone = false;
+	gops->pmu.fecsbootstrapdone = false;
+	gops->pmu.fecsrecoveryinprogress = 0;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
new file mode 100644
index 00000000..f61f6a93
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * GP10B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PMU_GP10B_H_
+#define __PMU_GP10B_H_
+
+void gp10b_init_pmu_ops(struct gpu_ops *gops);
+
+#endif /*__PMU_GP10B_H_*/
-- 
cgit v1.2.2


From d40f3fb2731b9e0586677026dcdeaabf63398933 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Tue, 9 Dec 2014 11:48:54 +0530
Subject: gpu: nvgpu: Handle MC pmu interrupts

- Made changes to MC to get pmu interrrupts

Change-Id: I07aaec8392b1fbb34ae727bc7547a571aaeeb814
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/661212
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Tested-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c | 65 +++++++++++++++++++++++++-------------
 drivers/gpu/nvgpu/gp10b/mc_gp10b.h |  7 ++++
 2 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 3fae4ea3..c7a4bc75 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -23,17 +23,44 @@ void mc_gp10b_intr_enable(struct gk20a *g)
 {
 	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
 
-	gk20a_writel(g, mc_intr_en_clear_r(0), 0xffffffff);
-	gk20a_writel(g, mc_intr_en_set_r(0),
-		     mc_intr_pfifo_pending_f()
-		     | eng_intr_mask);
-	gk20a_writel(g, mc_intr_en_clear_r(1), 0xffffffff);
-	gk20a_writel(g, mc_intr_en_set_r(1),
-		     mc_intr_pfifo_pending_f()
-		     | mc_intr_priv_ring_pending_f()
-		     | mc_intr_ltc_pending_f()
-		     | mc_intr_pbus_pending_f()
-		     | eng_intr_mask);
+	gk20a_writel(g, mc_intr_en_clear_r(NVGPU_MC_INTR_STALLING),
+				0xffffffff);
+	g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING] =
+				mc_intr_pfifo_pending_f()
+				| eng_intr_mask;
+	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_STALLING),
+			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING]);
+
+	gk20a_writel(g, mc_intr_en_clear_r(NVGPU_MC_INTR_NONSTALLING),
+				0xffffffff);
+	g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING] =
+				mc_intr_pfifo_pending_f()
+			     | mc_intr_priv_ring_pending_f()
+			     | mc_intr_ltc_pending_f()
+			     | mc_intr_pbus_pending_f()
+			     | eng_intr_mask;
+	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
+			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
+}
+
+void mc_gp10b_intr_unit_config(struct gk20a *g, bool enable,
+		bool is_stalling, u32 mask)
+{
+	u32 intr_index = 0;
+	u32 reg = 0;
+
+	intr_index = (is_stalling ? NVGPU_MC_INTR_STALLING :
+			NVGPU_MC_INTR_NONSTALLING);
+	if (enable) {
+		reg = mc_intr_en_set_r(intr_index);
+		g->ops.mc.intr_mask_restore[intr_index] |= mask;
+
+	} else {
+		reg = mc_intr_en_clear_r(intr_index);
+		g->ops.mc.intr_mask_restore[intr_index] &= ~mask;
+	}
+
+	gk20a_writel(g, reg, mask);
 }
 
 irqreturn_t mc_gp10b_isr_stall(struct gk20a *g)
@@ -73,7 +100,6 @@ irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
-	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -94,9 +120,8 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 	if (mc_intr_0 & mc_intr_pbus_pending_f())
 		gk20a_pbus_isr(g);
 
-	gk20a_writel(g, mc_intr_en_set_r(0),
-		     mc_intr_pfifo_pending_f()
-		     | eng_intr_mask);
+	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_STALLING),
+			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING]);
 
 	return IRQ_HANDLED;
 }
@@ -104,7 +129,6 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
-	u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -117,12 +141,8 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
 		gk20a_gr_nonstall_isr(g);
 
-	gk20a_writel(g, mc_intr_en_set_r(1),
-		     mc_intr_pfifo_pending_f()
-		     | mc_intr_priv_ring_pending_f()
-		     | mc_intr_ltc_pending_f()
-		     | mc_intr_pbus_pending_f()
-		     | eng_intr_mask);
+	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
+			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
 
 	return IRQ_HANDLED;
 }
@@ -130,6 +150,7 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 void gp10b_init_mc(struct gpu_ops *gops)
 {
 	gops->mc.intr_enable = mc_gp10b_intr_enable;
+	gops->mc.intr_unit_config = mc_gp10b_intr_unit_config;
 	gops->mc.isr_stall = mc_gp10b_isr_stall;
 	gops->mc.isr_nonstall = mc_gp10b_isr_nonstall;
 	gops->mc.isr_thread_stall = mc_gp10b_intr_thread_stall;
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/mc_gp10b.h
index f274ce05..b2ec4be4 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.h
@@ -15,8 +15,15 @@
 #define MC_GP20B_H
 struct gk20a;
 
+enum MC_INTERRUPT_REGLIST {
+	NVGPU_MC_INTR_STALLING = 0,
+	NVGPU_MC_INTR_NONSTALLING,
+};
+
 void gp10b_init_mc(struct gpu_ops *gops);
 void mc_gp10b_intr_enable(struct gk20a *g);
+void mc_gp10b_intr_unit_config(struct gk20a *g, bool enable,
+		bool is_stalling, u32 mask);
 irqreturn_t mc_gp10b_isr_stall(struct gk20a *g);
 irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g);
 irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g);
-- 
cgit v1.2.2


From 4493b6b2004420eacab07f4e063377599ec9fe53 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 9 Dec 2014 10:49:08 +0200
Subject: gpu: nvgpu: gp10b: Enable CILP mode for compute

Allow enabling CILP for compute. Set CTA by default.

Bug 1517461

Change-Id: I85cc931b810afb3ee6116de1200d01b52e1bc29e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/661298
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c            | 21 ++++++++++++++++++---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h            |  3 +--
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 0a60612d..576cdf45 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -510,13 +510,14 @@ static int gr_gp10b_init_ctx_state(struct gk20a *g)
 
 int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+			  u32 class,
 			  u32 flags)
 {
 	int err;
 
 	gk20a_dbg_fn("");
 
-	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, flags);
+	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
 	if (err)
 		return err;
 
@@ -566,9 +567,15 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			goto fail_free_betacb;
 		}
 
-		(*gr_ctx)->t18x.preempt_mode = flags;
+		(*gr_ctx)->preempt_mode = flags;
 	}
 
+	if (class == PASCAL_COMPUTE_A)
+		if (flags == NVGPU_GR_PREEMPTION_MODE_CILP)
+			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
+		else
+			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+
 	gk20a_dbg_fn("done");
 
 	return err;
@@ -610,16 +617,24 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
 	u32 gfxp_preempt_option =
 		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
+	u32 cilp_preempt_option =
+		ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
 	int err;
 
 	gk20a_dbg_fn("");
 
-	if (gr_ctx->t18x.preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
 		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
 		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
 				gfxp_preempt_option);
 	}
 
+	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) {
+		gk20a_dbg_info("CILP: %x", cilp_preempt_option);
+		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
+				cilp_preempt_option);
+	}
+
 	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
 		u32 addr;
 		u32 size;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 6bbda564..1b99cafb 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -39,14 +39,13 @@ struct gr_t18x {
 };
 
 struct gr_ctx_desc_t18x {
-	int preempt_mode;
 	struct mem_desc preempt_ctxsw_buffer;
 	struct mem_desc spill_ctxsw_buffer;
 	struct mem_desc betacb_ctxsw_buffer;
 	struct mem_desc pagepool_ctxsw_buffer;
 };
 
-#define NVGPU_GR_PREEMPTION_MODE_WFI		0
 #define NVGPU_GR_PREEMPTION_MODE_GFXP		1
+#define NVGPU_GR_PREEMPTION_MODE_CILP		3
 
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 0892f12e..7872c19c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -254,4 +254,20 @@ static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
 {
 	return 0x00000068;
 }
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_o(void)
+{
+	return 0x00000084;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cta_f(void)
+{
+	return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(void)
+{
+	return 0x2;
+}
 #endif
-- 
cgit v1.2.2


From d6ef9c657895e505c6cd2c521f1ee784760f4290 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 12 Jan 2015 19:19:16 +0200
Subject: gpu: nvgpu: gp10b: Fix L2 size calculation

L2 size is expressed in kB, so add a multiplier.

Bug 1592495

Change-Id: I4c10034cd21bf874c84c96f1adc25261b195063d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/671704
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 9f5c16ba..88f7b072 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -29,7 +29,7 @@ static int gp10b_determine_L2_size_bytes(struct gk20a *g)
 	tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_info_1_r());
 
 	ret = g->ltc_count *
-		ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(tmp) *
+		ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(tmp)*1024 *
 		ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(tmp);
 
 	gk20a_dbg(gpu_dbg_info, "L2 size: %d\n", ret);
-- 
cgit v1.2.2


From 0cb992afd76e2748d5ae5759914352d1a3ece7e7 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 12 Jan 2015 19:20:59 +0200
Subject: gpu: nvgpu: gp10b: Default page size 64kB

Set default big page size to 64kB.

Bug 1592495

Change-Id: Id23dac012cde75f2809a49779e1a1cee879d08a0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/671705
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 0f2d290f..0542086c 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -84,8 +84,6 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	/* power management configuration */
 	.enable_elpg            = false,
 
-	.default_big_page_size	= SZ_128K,
-
 	.probe = gp10b_tegra_probe,
 	.late_probe = gp10b_tegra_late_probe,
 
@@ -100,5 +98,5 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.dump_platform_dependencies = gk20a_tegra_debug_dump,
 
-	.default_big_page_size	= SZ_128K,
+	.default_big_page_size	= SZ_64K,
 };
-- 
cgit v1.2.2


From 667143ed939494f311ba45e3cfd89546e625bbca Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 22 Jan 2015 08:02:21 -0800
Subject: gpu: nvgpu: gp10b: Enable cycling through ctx bins

Remove hard coded NETB for gp10b. This enables cycling through
available firmware files.

Change-Id: I60765a05b1cf6c2e6003341f611c5ecc3f16e9b7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/676557
Reviewed-by: Peng Du <pdu@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/Makefile       |  1 +
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c | 72 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h | 28 +++++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c    |  4 +-
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index b2c143a6..3575d414 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -10,6 +10,7 @@ ccflags-$(CONFIG_GK20A) += -Wno-multichar
 
 obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
+	gr_ctx_gp10b.o  \
 	mc_gp10b.o  \
 	ltc_gp10b.o \
 	mm_gp10b.o \
diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
new file mode 100644
index 00000000..1d77ad65
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
@@ -0,0 +1,72 @@
+/*
+ * drivers/video/tegra/host/gp10b/gr_ctx_gp10b.c
+ *
+ * GM20B Graphics Context
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gr_ctx_gp10b.h"
+
+static int gr_gp10b_get_netlist_name(int index, char *name)
+{
+	switch (index) {
+#ifdef GP10B_NETLIST_IMAGE_FW_NAME
+	case NETLIST_FINAL:
+		sprintf(name, GP10B_NETLIST_IMAGE_FW_NAME);
+		return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_A
+	case NETLIST_SLOT_A:
+		sprintf(name, GK20A_NETLIST_IMAGE_A);
+		return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_B
+	case NETLIST_SLOT_B:
+		sprintf(name, GK20A_NETLIST_IMAGE_B);
+		return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_C
+	case NETLIST_SLOT_C:
+		sprintf(name, GK20A_NETLIST_IMAGE_C);
+		return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_D
+	case NETLIST_SLOT_D:
+		sprintf(name, GK20A_NETLIST_IMAGE_D);
+		return 0;
+#endif
+	default:
+		return -1;
+	}
+
+	return -1;
+}
+
+static bool gr_gp10b_is_firmware_defined(void)
+{
+#ifdef GM20B_NETLIST_IMAGE_FW_NAME
+	return true;
+#else
+	return false;
+#endif
+}
+
+void gp10b_init_gr_ctx(struct gpu_ops *gops) {
+	gops->gr_ctx.get_netlist_name = gr_gp10b_get_netlist_name;
+	gops->gr_ctx.is_fw_defined = gr_gp10b_is_firmware_defined;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h
new file mode 100644
index 00000000..b1184f9d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h
@@ -0,0 +1,28 @@
+/*
+ * GP10B Graphics Context
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GR_CTX_GM10B_H__
+#define __GR_CTX_GM10B_H__
+
+#include "gk20a/gr_ctx_gk20a.h"
+
+/* production netlist, one and only one from below */
+/*#undef GM20B_NETLIST_IMAGE_FW_NAME*/
+
+void gp10b_init_gr_ctx(struct gpu_ops *gops);
+
+#endif /*__GR_CTX_GP10B_H__*/
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 526caff1..161c20c6 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -26,11 +26,11 @@
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/fb_gp10b.h"
 #include "gp10b/pmu_gp10b.h"
+#include "gp10b/gr_ctx_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
-#include "gm20b/gr_ctx_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
@@ -94,7 +94,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
 	gm20b_init_fifo(gops);
-	gm20b_init_gr_ctx(gops);
+	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
 	gm20b_init_clk_ops(gops);
-- 
cgit v1.2.2


From 587a7b1e931d421b7526a0328c2c82ee78075f0c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 8 Dec 2014 18:15:37 -0800
Subject: gpu: nvgpu: gp10b: update headers

Update replayable page fault fifo, interrupt and bar2 block
headers.

Bug 1587825

Change-Id: Ifa0d3b640bdd5f3f6fbc7826c1d1edba494340df
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/661117
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h        |  20 ++++
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h |   2 +-
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h       | 130 +++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         |   6 +-
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h         |   4 +
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h        |  34 ++++++-
 6 files changed, 192 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
index e443738f..c04b01c1 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
@@ -66,10 +66,30 @@ static inline u32 bus_bar1_block_mode_virtual_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 bus_bar2_block_r(void)
+{
+	return 0x00001714;
+}
+static inline u32 bus_bar2_block_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 bus_bar2_block_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bar2_block_mode_virtual_f(void)
+{
+	return 0x80000000;
+}
 static inline u32 bus_bar1_block_ptr_shift_v(void)
 {
 	return 0x0000000c;
 }
+static inline u32 bus_bar2_block_ptr_shift_v(void)
+{
+	return 0x0000000c;
+}
 static inline u32 bus_intr_0_r(void)
 {
 	return 0x00001100;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 7872c19c..3b97c9da 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index b79758d2..d2629b08 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -178,6 +178,10 @@ static inline u32 fifo_intr_0_lb_error_reset_f(void)
 {
 	return 0x1000000;
 }
+static inline u32 fifo_intr_0_replayable_fault_error_pending_f(void)
+{
+	return 0x2000000;
+}
 static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
 {
 	return 0x8000000;
@@ -526,4 +530,128 @@ static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 fifo_replay_fault_buffer_lo_r(void)
+{
+	return 0x00002a70;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_lo_base_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 fifo_replay_fault_buffer_lo_base_reset_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_hi_r(void)
+{
+	return 0x00002a74;
+}
+static inline u32 fifo_replay_fault_buffer_hi_base_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_hi_base_reset_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_size_r(void)
+{
+	return 0x00002a78;
+}
+static inline u32 fifo_replay_fault_buffer_size_hw_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_size_hw_entries_v(void)
+{
+	return 0x000000c0;
+}
+static inline u32 fifo_replay_fault_buffer_get_r(void)
+{
+	return 0x00002a7c;
+}
+static inline u32 fifo_replay_fault_buffer_get_offset_hw_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_get_offset_hw_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_put_r(void)
+{
+	return 0x00002a80;
+}
+static inline u32 fifo_replay_fault_buffer_put_offset_hw_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_put_offset_hw_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_r(void)
+{
+	return 0x00002a84;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_clear_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_clear_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_clear_v(void)
+{
+	return 0x00000001;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 0bd707db..49078f11 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1294,6 +1294,10 @@ static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
 {
 	return 0x00000004;
 }
+static inline u32 gr_ds_zbc_color_fmt_val_a8_b8_g8_r8_v(void)
+{
+	return 0x00000028;
+}
 static inline u32 gr_ds_zbc_z_r(void)
 {
 	return 0x00405818;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
index 21c592da..7d153b6f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -78,6 +78,10 @@ static inline u32 mc_intr_pfifo_pending_f(void)
 {
 	return 0x100;
 }
+static inline u32 mc_intr_replayable_fault_pending_f(void)
+{
+	return 0x200;
+}
 static inline u32 mc_intr_pgraph_pending_f(void)
 {
 	return 0x1000;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index 509031e5..dea53f96 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -78,6 +78,38 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
 {
 	return 0x4;
 }
+static inline u32 ram_in_page_dir_base_fault_replay_tex_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_m(void)
+{
+	return 0x1 << 5;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_true_f(void)
+{
+	return 0x20;
+}
 static inline u32 ram_in_big_page_size_f(u32 v)
 {
 	return (v & 0x1) << 11;
-- 
cgit v1.2.2


From 6056528af809a9a861149e218bcaff250f964eea Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Mon, 22 Dec 2014 16:16:29 -0800
Subject: gpu: nvgpu: headers for linsim CL 34000094

Change-Id: I43380fda328414e96601e1c03c3e0ec28c0b4871
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/666905
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h | 12 ++++++------
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index d2629b08..d10345c3 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -276,23 +276,23 @@ static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
 }
 static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
 {
-	return (r >> 0) & 0xf;
+	return (r >> 0) & 0x1f;
 }
-static inline u32 fifo_intr_mmu_fault_info_engine_subid_v(u32 r)
+static inline u32 fifo_intr_mmu_fault_info_client_type_v(u32 r)
 {
-	return (r >> 6) & 0x1;
+	return (r >> 20) & 0x1;
 }
-static inline u32 fifo_intr_mmu_fault_info_engine_subid_gpc_v(void)
+static inline u32 fifo_intr_mmu_fault_info_client_type_gpc_v(void)
 {
 	return 0x00000000;
 }
-static inline u32 fifo_intr_mmu_fault_info_engine_subid_hub_v(void)
+static inline u32 fifo_intr_mmu_fault_info_client_type_hub_v(void)
 {
 	return 0x00000001;
 }
 static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
 {
-	return (r >> 8) & 0x3f;
+	return (r >> 8) & 0x7f;
 }
 static inline u32 fifo_intr_pbdma_id_r(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
index 272f7fb3..cdb28d08 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -64,15 +64,15 @@ static inline u32 fuse_ctrl_opt_ram_svop_pdp_r(void)
 }
 static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_f(u32 v)
 {
-	return (v & 0x3) << 0;
+	return (v & 0xff) << 0;
 }
 static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_m(void)
 {
-	return 0x3 << 0;
+	return 0xff << 0;
 }
 static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_v(u32 r)
 {
-	return (r >> 0) & 0x3;
+	return (r >> 0) & 0xff;
 }
 static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_r(void)
 {
-- 
cgit v1.2.2


From 08b8c0564866c9a37e1790f603f8f4ebdfe4dbf4 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 3 Dec 2014 11:20:11 -0800
Subject: gpu: nvgpu: gp10b: enable replayable fault interrupt

Bug 1587825

Change-Id: I6df2f870b4488bb3d5ada52b4819f6f80624becd
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/659092
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index c7a4bc75..1760b6ad 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP20B master
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -27,6 +27,7 @@ void mc_gp10b_intr_enable(struct gk20a *g)
 				0xffffffff);
 	g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING] =
 				mc_intr_pfifo_pending_f()
+				| mc_intr_replayable_fault_pending_f()
 				| eng_intr_mask;
 	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_STALLING),
 			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_STALLING]);
-- 
cgit v1.2.2


From df6d5ab07b5f68704ec392b1465331a47e682d4d Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 3 Dec 2014 17:55:05 -0800
Subject: gpu: nvgpu: gp10b: Add Bar2 support

Add bar2 support for gp10b and set-up bar2 binding.

Bug 1587825

Change-Id: I46660b3a28a5667ec782dd45b4528ae5f79e17c8
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/659236
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 58 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index ff248f51..00e41fa7 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B MMU
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -14,15 +14,71 @@
  */
 
 #include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
 #include "gk20a/gk20a.h"
+#include "mm_gp10b.h"
+#include "hw_ram_gp10b.h"
+#include "hw_bus_gp10b.h"
 
 u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 {
 	return 36;
 }
 
+static int gb10b_init_bar2_vm(struct gk20a *g)
+{
+	int err;
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->bar2.vm;
+	struct inst_desc *inst_block = &mm->bar2.inst_block;
+	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
+
+	/* BAR2 aperture size is 32MB */
+	mm->bar2.aperture_size = 32 << 20;
+	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
+	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
+		mm->bar2.aperture_size, false, "bar2");
+
+	/* allocate instance mem for bar2 */
+	err = gk20a_alloc_inst_block(g, inst_block);
+	if (err)
+		goto clean_up_va;
+
+	gk20a_init_inst_block(inst_block, vm, big_page_size);
+
+	return 0;
+
+clean_up_va:
+	gk20a_deinit_vm(vm);
+	return err;
+}
+
+
+static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct inst_desc *inst_block = &mm->bar2.inst_block;
+	phys_addr_t inst_pa = inst_block->cpu_pa;
+
+	gk20a_dbg_fn("");
+
+	g->ops.fb.set_mmu_page_size(g);
+
+	inst_pa = (u32)(inst_pa >> bus_bar2_block_ptr_shift_v());
+	gk20a_dbg_info("bar2 inst block ptr: 0x%08x",  (u32)inst_pa);
+
+	gk20a_writel(g, bus_bar2_block_r(),
+		bus_bar2_block_target_vid_mem_f() |
+		bus_bar2_block_mode_virtual_f() |
+		bus_bar2_block_ptr_f(inst_pa));
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
 void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
 	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
+	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
+	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
 }
-- 
cgit v1.2.2


From c0fcbdf2fc853e6eaf60af131c6de1624d4d4858 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 27 Jan 2015 09:19:10 -0800
Subject: gpu: nvgpu: gp10b: Compression page size to 64k

Define compression page size for gp10b to be 64k. We also need to
copy some LTC initialization code from gm20b to gp10b.

Change-Id: I0235c32cdb1486a23d33eb98ebbc79c97a3c32d4
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/677837
---
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c  |  6 +++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 95 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
index df35c5b0..bd1b7bf0 100644
--- a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -87,9 +87,15 @@ static void gp10b_init_kind_attr(void)
 	}
 }
 
+static int gp10b_fb_compression_page_size(struct gk20a *g)
+{
+	return SZ_64K;
+}
+
 void gp10b_init_fb(struct gpu_ops *gops)
 {
 	gm20b_init_fb(gops);
+	gops->fb.compression_page_size = gp10b_fb_compression_page_size;
 
 	gp10b_init_uncompressed_kind_map();
 	gp10b_init_kind_attr();
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 88f7b072..03454240 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -19,6 +19,8 @@
 #include "gm20b/ltc_gm20b.h"
 #include "hw_ltc_gp10b.h"
 
+#include "gk20a/ltc_common.c"
+
 static int gp10b_determine_L2_size_bytes(struct gk20a *g)
 {
 	u32 tmp;
@@ -39,9 +41,98 @@ static int gp10b_determine_L2_size_bytes(struct gk20a *g)
 	return ret;
 }
 
-void gp10b_init_ltc(struct gpu_ops *gops)
+static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 {
-	gm20b_init_ltc(gops);
+	/* max memory size (MB) to cover */
+	u32 max_size = gr->max_comptag_mem;
+	/* one tag line covers 64KB */
+	u32 max_comptag_lines = max_size << 4;
+
+	u32 hw_max_comptag_lines =
+		ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
+
+	u32 cbc_param =
+		gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
+	u32 comptags_per_cacheline =
+		ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
+	u32 cacheline_size =
+		512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
+	u32 slices_per_ltc =
+		ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
+
+	u32 compbit_backing_size;
+
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (max_comptag_lines == 0) {
+		gr->compbit_store.size = 0;
+		return 0;
+	}
+
+	if (max_comptag_lines > hw_max_comptag_lines)
+		max_comptag_lines = hw_max_comptag_lines;
+
+	compbit_backing_size =
+		DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
+		cacheline_size * slices_per_ltc * g->ltc_count;
+
+	/* aligned to 2KB * ltc_count */
+	compbit_backing_size +=
+		g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+
+	/* must be a multiple of 64KB */
+	compbit_backing_size = roundup(compbit_backing_size, 64*1024);
 
+	max_comptag_lines =
+		(compbit_backing_size * comptags_per_cacheline) /
+		(cacheline_size * slices_per_ltc * g->ltc_count);
+
+	if (max_comptag_lines > hw_max_comptag_lines)
+		max_comptag_lines = hw_max_comptag_lines;
+
+	gk20a_dbg_info("compbit backing store size : %d",
+		compbit_backing_size);
+	gk20a_dbg_info("max comptag lines : %d",
+		max_comptag_lines);
+
+	if (tegra_platform_is_linsim())
+		err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size);
+	else
+		err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size);
+
+	if (err)
+		return err;
+
+	gk20a_allocator_init(&gr->comp_tags, "comptag",
+			      1, /* start */
+			      max_comptag_lines - 1); /* length*/
+
+	gr->comptags_per_cacheline = comptags_per_cacheline;
+	gr->slices_per_ltc = slices_per_ltc;
+	gr->cacheline_size = cacheline_size;
+
+	return 0;
+}
+
+void gp10b_init_ltc(struct gpu_ops *gops)
+{
 	gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes;
+	gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
+	gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
+	gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
+	gops->ltc.init_cbc = gk20a_ltc_init_cbc;
+
+	/* GM20b specific ops. */
+	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
+	gops->ltc.init_comptags = gp10b_ltc_init_comptags;
+	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
+	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
+	gops->ltc.isr = gm20b_ltc_isr;
+	gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config;
+	gops->ltc.flush = gm20b_flush_ltc;
+#ifdef CONFIG_DEBUG_FS
+	gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
+#endif
 }
-- 
cgit v1.2.2


From 99d41c05f5f724f960e5b0320cdadac356750e9c Mon Sep 17 00:00:00 2001
From: Jussi Rasanen <jrasanen@nvidia.com>
Date: Fri, 16 Jan 2015 12:55:28 +0200
Subject: gpu: nvgpu: read gobs_per_comptagline_per_slice

Add code to read NV_PLTCG_LTCS_LTSS_CBC_PARAM2_GOBS_PER_COMPTAGLINE_PER_SLICE
during t18x ltc init and store it for use in CDE code.

Change-Id: I4d4a3a6c7e3ad369d8359ff838e7040a0521b441
Signed-off-by: Jussi Rasanen <jrasanen@nvidia.com>
Reviewed-on: http://git-master/r/673150
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h | 10 +++++++++-
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c    | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 32683dc2..01b66c9c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -174,6 +174,14 @@ static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(u32 r)
 {
 	return (r >> 28) & 0xf;
 }
+static inline u32 ltc_ltcs_ltss_cbc_param2_r(void)
+{
+	return 0x0017e3f4;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
 static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
 {
 	return 0x0017e2ac;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 03454240..beda3038 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B L2
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -59,6 +59,10 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 		512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
 	u32 slices_per_ltc =
 		ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
+	u32 cbc_param2 =
+		gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r());
+	u32 gobs_per_comptagline_per_slice =
+		ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2);
 
 	u32 compbit_backing_size;
 
@@ -96,6 +100,8 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 		compbit_backing_size);
 	gk20a_dbg_info("max comptag lines : %d",
 		max_comptag_lines);
+	gk20a_dbg_info("gobs_per_comptagline_per_slice: %d",
+		gobs_per_comptagline_per_slice);
 
 	if (tegra_platform_is_linsim())
 		err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size);
@@ -112,6 +118,7 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
 	gr->cacheline_size = cacheline_size;
+	gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice;
 
 	return 0;
 }
@@ -136,3 +143,4 @@ void gp10b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
 #endif
 }
+
-- 
cgit v1.2.2


From 1214aabe95a7b19689f7dd732ce544cae6f837fe Mon Sep 17 00:00:00 2001
From: Jussi Rasanen <jrasanen@nvidia.com>
Date: Tue, 13 Jan 2015 11:36:49 +0200
Subject: gpu: nvgpu: enable CDE for t18x

Mark CDE as supported on t18x.

Change-Id: I03c23178712b9018137edddfa8e1ff3a2ad9106c
Signed-off-by: Jussi Rasanen <jrasanen@nvidia.com>
Reviewed-on: http://git-master/r/672384
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 0542086c..ab98cbde 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -3,7 +3,7 @@
  *
  * GK20A Tegra Platform Interface
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -99,4 +99,6 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.dump_platform_dependencies = gk20a_tegra_debug_dump,
 
 	.default_big_page_size	= SZ_64K,
+
+	.has_cde = true,
 };
-- 
cgit v1.2.2


From ea29b9e779ebbc7ab5fc9daa2dc4ebcde63b3550 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 30 Jan 2015 15:14:55 -0800
Subject: gpu: nvgpu: gp10b: Enable debug spew

Change-Id: I58811bbce0e39b85074f3aa9022a730f696e407e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/679704
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 161c20c6..1a34688a 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -98,6 +98,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
 	gm20b_init_clk_ops(gops);
+	gk20a_init_debug_ops(gops);
 	gops->name = "gp10b";
 
 	c->twod_class = FERMI_TWOD_A;
-- 
cgit v1.2.2


From fdbf60a84f4e7d0e90d0b10f5a98ef6a93b7c54e Mon Sep 17 00:00:00 2001
From: Peng Du <pdu@nvidia.com>
Date: Mon, 26 Jan 2015 14:49:49 -0800
Subject: gpu: nvgpu: headers for linsim CL 34116551

Change-Id: Ia8760772b0135813475f96a786484d7caef3759d
Signed-off-by: Peng Du <pdu@nvidia.com>
Reviewed-on: http://git-master/r/677464
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bo Yan <byan@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h |  2 +-
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h  | 16 ----------------
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index d10345c3..2c0367d5 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -560,7 +560,7 @@ static inline u32 fifo_replay_fault_buffer_hi_r(void)
 }
 static inline u32 fifo_replay_fault_buffer_hi_base_f(u32 v)
 {
-	return (v & 0xffff) << 0;
+	return (v & 0xff) << 0;
 }
 static inline u32 fifo_replay_fault_buffer_hi_base_reset_v(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index dea53f96..6ddff281 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -78,14 +78,6 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
 {
 	return 0x4;
 }
-static inline u32 ram_in_page_dir_base_fault_replay_tex_f(u32 v)
-{
-	return (v & 0x1) << 4;
-}
-static inline u32 ram_in_page_dir_base_fault_replay_tex_m(void)
-{
-	return 0x1 << 4;
-}
 static inline u32 ram_in_page_dir_base_fault_replay_tex_w(void)
 {
 	return 128;
@@ -94,14 +86,6 @@ static inline u32 ram_in_page_dir_base_fault_replay_tex_true_f(void)
 {
 	return 0x10;
 }
-static inline u32 ram_in_page_dir_base_fault_replay_gcc_f(u32 v)
-{
-	return (v & 0x1) << 5;
-}
-static inline u32 ram_in_page_dir_base_fault_replay_gcc_m(void)
-{
-	return 0x1 << 5;
-}
 static inline u32 ram_in_page_dir_base_fault_replay_gcc_w(void)
 {
 	return 128;
-- 
cgit v1.2.2


From 83c223ac56465e72771aca073cc0648230616d18 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Thu, 12 Feb 2015 16:09:02 +0530
Subject: gpu: nvgpu: gp10b: use tight loop for fecs method

bug 200078367

Change-Id: I9a68e988fa7921276e334c75afa5ee4b15aab464
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/707313
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 576cdf45..90514b82 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B GPU GR
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -492,7 +492,7 @@ static int gr_gp10b_init_ctx_state(struct gk20a *g)
 		op.method.addr =
 			gr_fecs_method_push_adr_discover_preemption_image_size_v();
 		op.mailbox.ret = &g->gr.t18x.ctx_vars.preempt_image_size;
-		err = gr_gk20a_submit_fecs_method_op(g, op);
+		err = gr_gk20a_submit_fecs_method_op(g, op, false);
 		if (err) {
 			gk20a_err(dev_from_gk20a(g),
 					"query preempt image size failed");
-- 
cgit v1.2.2


From 6cf9d594f01d9de7a602956a15c2aec6ea570b6a Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 29 Jan 2015 12:20:49 +0530
Subject: gpu: nvgpu: gp10b: dump GR status registers

Add function pointer gr_gp10b_dump_gr_status_regs()
which will enable dumping GR status registers for gp10b

Bug 200062436

Change-Id: Iaecc2f9c9364232079bb03e114f68550bd035372
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/678832
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 120 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 156 ++++++++++++++++++++++++++++++++++
 2 files changed, 276 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 90514b82..e3d4b973 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -20,6 +20,7 @@
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
 #include "hw_gr_gp10b.h"
+#include "hw_fifo_gp10b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 
@@ -692,6 +693,124 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 	gk20a_dbg_fn("done");
 }
 
+static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
+			   struct gk20a_debug_output *o)
+{
+	struct gr_gk20a *gr = &g->gr;
+
+	gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
+		gk20a_readl(g, gr_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
+		gk20a_readl(g, gr_status_1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
+		gk20a_readl(g, gr_status_2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
+		gk20a_readl(g, gr_engine_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
+		gk20a_readl(g, gr_gpfifo_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
+		gk20a_readl(g, gr_gpfifo_ctl_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
+		gk20a_readl(g, gr_fecs_host_int_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION  : 0x%x\n",
+		gk20a_readl(g, gr_exception_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR  : 0x%x\n",
+		gk20a_readl(g, gr_fecs_intr_r()));
+	gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
+		gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A)));
+	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
+		gk20a_readl(g, gr_activity_1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
+		gk20a_readl(g, gr_activity_2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
+		gk20a_readl(g, gr_activity_4_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
+		gk20a_readl(g, gr_pri_sked_activity_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
+	if (gr->gpc_tpc_count[0] == 2)
+		gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+			gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
+	if (gr->gpc_tpc_count[0] == 2)
+		gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+			gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
+		gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
+		gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
+		gk20a_readl(g, gr_fe_go_idle_timeout_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
+		gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
+		gk20a_readl(g, gr_cwd_fs_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
+		gk20a_readl(g, gr_fe_tpc_fs_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
+		gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
+		gk20a_readl(g, gr_cwd_sm_id_r(0)));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
+		gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
+		gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
+		gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
+		gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
+		gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
+		gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
+		gk20a_readl(g, gr_fecs_current_ctx_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
+		gk20a_readl(g, gr_fecs_new_ctx_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_crop_status1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
+		gk20a_readl(g, gr_pri_bes_crop_status1_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_zrop_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
+		gk20a_readl(g, gr_pri_bes_zrop_status_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
+		gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
+	return 0;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -714,4 +833,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.free_gr_ctx = gr_gp10b_free_gr_ctx;
 	gops->gr.update_ctxsw_preemption_mode =
 		gr_gp10b_update_ctxsw_preemption_mode;
+	gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 49078f11..a1cfab57 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -222,6 +222,10 @@ static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
 {
 	return 0x10000;
 }
+static inline u32 gr_gpfifo_status_r(void)
+{
+	return 0x00400504;
+}
 static inline u32 gr_trapped_addr_r(void)
 {
 	return 0x00400704;
@@ -266,6 +270,14 @@ static inline u32 gr_status_mask_r(void)
 {
 	return 0x00400610;
 }
+static inline u32 gr_status_1_r(void)
+{
+	return 0x00400604;
+}
+static inline u32 gr_status_2_r(void)
+{
+	return 0x00400608;
+}
 static inline u32 gr_engine_status_r(void)
 {
 	return 0x0040060c;
@@ -274,6 +286,130 @@ static inline u32 gr_engine_status_value_busy_f(void)
 {
 	return 0x1;
 }
+static inline u32 gr_activity_0_r(void)
+{
+	return 0x00400380;
+}
+static inline u32 gr_activity_1_r(void)
+{
+	return 0x00400384;
+}
+static inline u32 gr_activity_2_r(void)
+{
+	return 0x00400388;
+}
+static inline u32 gr_activity_4_r(void)
+{
+	return 0x00400390;
+}
+static inline u32 gr_pri_sked_activity_r(void)
+{
+	return 0x00407054;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity0_r(void)
+{
+	return 0x00502c80;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity1_r(void)
+{
+	return 0x00502c84;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity2_r(void)
+{
+	return 0x00502c88;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity3_r(void)
+{
+	return 0x00502c8c;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00504500;
+}
+static inline u32 gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00504d00;
+}
+static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00501d00;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
+{
+	return 0x0041ac80;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_1_r(void)
+{
+	return 0x0041ac84;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_2_r(void)
+{
+	return 0x0041ac88;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
+{
+	return 0x0041ac8c;
+}
+static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
+{
+	return 0x0041c500;
+}
+static inline u32 gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r(void)
+{
+	return 0x0041cd00;
+}
+static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00419d00;
+}
+static inline u32 gr_pri_be0_becs_be_activity0_r(void)
+{
+	return 0x00410200;
+}
+static inline u32 gr_pri_be1_becs_be_activity0_r(void)
+{
+	return 0x00410600;
+}
+static inline u32 gr_pri_bes_becs_be_activity0_r(void)
+{
+	return 0x00408a00;
+}
+static inline u32 gr_pri_ds_mpipe_status_r(void)
+{
+	return 0x00405858;
+}
+static inline u32 gr_pri_fe_go_idle_info_r(void)
+{
+	return 0x00404194;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r(void)
+{
+	return 0x00504238;
+}
+static inline u32 gr_pri_be0_crop_status1_r(void)
+{
+	return 0x00410134;
+}
+static inline u32 gr_pri_bes_crop_status1_r(void)
+{
+	return 0x00408934;
+}
+static inline u32 gr_pri_be0_zrop_status_r(void)
+{
+	return 0x00410048;
+}
+static inline u32 gr_pri_be0_zrop_status2_r(void)
+{
+	return 0x0041004c;
+}
+static inline u32 gr_pri_bes_zrop_status_r(void)
+{
+	return 0x00408848;
+}
+static inline u32 gr_pri_bes_zrop_status2_r(void)
+{
+	return 0x0040884c;
+}
 static inline u32 gr_pipe_bundle_address_r(void)
 {
 	return 0x00400200;
@@ -1062,6 +1198,26 @@ static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
 {
 	return (r >> 0) & 0x1f;
 }
+static inline u32 gr_fecs_ctxsw_status_fe_0_r(void)
+{
+	return 0x00409c00;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_status_gpc_0_r(void)
+{
+	return 0x00502c04;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
+{
+	return 0x00502400;
+}
+static inline u32 gr_fecs_ctxsw_idlestate_r(void)
+{
+	return 0x00409420;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_idlestate_r(void)
+{
+	return 0x00502420;
+}
 static inline u32 gr_rstr2d_gpc_map0_r(void)
 {
 	return 0x0040780c;
-- 
cgit v1.2.2


From 8d717d1e7c09c9cff25d671be3cd25ea6aa57f31 Mon Sep 17 00:00:00 2001
From: Supriya <ssharatkumar@nvidia.com>
Date: Thu, 19 Feb 2015 10:40:22 +0530
Subject: gpu: nvgpu: reg with FECS HALT method

Change-Id: Ia196b98c79a71c9545e555260660e274982455a3
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/709279
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index a1cfab57..ca425447 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -858,6 +858,10 @@ static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
 {
 	return 0x0000001a;
 }
+static inline u32 gr_fecs_method_push_adr_halt_pipeline_v(void)
+{
+	return 0x00000004;
+}
 static inline u32 gr_fecs_host_int_status_r(void)
 {
 	return 0x00409c18;
-- 
cgit v1.2.2


From 7b70eb224a008b5debd1a6220d7835d6ebef70a0 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 20 Feb 2015 08:39:31 -0800
Subject: gpu: nvgpu: gp10b: Enable warnings as errors

Change-Id: I86de27309ebecd038a7b32c6f86d87ce0156eb14
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/709867
---
 drivers/gpu/nvgpu/gp10b/Makefile   | 1 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 3575d414..e897ab9c 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -7,6 +7,7 @@ ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
 
 ccflags-$(CONFIG_GK20A) += -Wno-multichar
+ccflags-y += -Werror
 
 obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e3d4b973..271a6d0c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -571,11 +571,12 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		(*gr_ctx)->preempt_mode = flags;
 	}
 
-	if (class == PASCAL_COMPUTE_A)
+	if (class == PASCAL_COMPUTE_A) {
 		if (flags == NVGPU_GR_PREEMPTION_MODE_CILP)
 			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
 		else
 			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+	}
 
 	gk20a_dbg_fn("done");
 
-- 
cgit v1.2.2


From eff1aa4d9212f76f24d362bc1f871bf82baa5d98 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 16 Jan 2015 09:15:20 -0800
Subject: gpu: nvgpu: gp10b: Set correct PBDMA signature

GPFIFO class was set to Maxwell class number. Also implement the
PBDMA signature HAL.

Change-Id: Ieaebcda8af96d5779289b311c0c433e8b4349234
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/672921
---
 drivers/gpu/nvgpu/gp10b/Makefile     |  1 +
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.h | 18 ++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c  |  5 +++--
 4 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/fifo_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index e897ab9c..e14cd0ee 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_GK20A)  += \
 	mm_gp10b.o \
 	fb_gp10b.o \
 	pmu_gp10b.o \
+	fifo_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
new file mode 100644
index 00000000..d67c7ee2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -0,0 +1,32 @@
+/*
+ * GP10B fifo
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "gm20b/fifo_gm20b.h"
+#include "hw_pbdma_gp10b.h"
+
+static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
+{
+	return g->gpu_characteristics.gpfifo_class 
+		| pbdma_signature_sw_zero_f();
+}
+
+void gp10b_init_fifo(struct gpu_ops *gops)
+{
+	gm20b_init_fifo(gops);
+	gops->fifo.get_pbdma_signature = gp10b_fifo_get_pbdma_signature;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
new file mode 100644
index 00000000..18e2034f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef FIFO_GP10B_H
+#define FIFO_GP10B_H
+struct gpu_ops;
+void gp10b_init_fifo(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 1a34688a..30b56a5c 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -31,6 +31,7 @@
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
+#include "gp10b/fifo_gp10b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
@@ -93,7 +94,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_gr(gops);
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
-	gm20b_init_fifo(gops);
+	gp10b_init_fifo(gops);
 	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
@@ -104,7 +105,7 @@ int gp10b_init_hal(struct gk20a *g)
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
 	c->compute_class = PASCAL_COMPUTE_A;
-	c->gpfifo_class = MAXWELL_CHANNEL_GPFIFO_A;
+	c->gpfifo_class = PASCAL_CHANNEL_GPFIFO_A;
 	c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_B;
 	c->dma_copy_class = MAXWELL_DMA_COPY_A;
 
-- 
cgit v1.2.2


From fc898d8f56b4d1cdc5efcb44d572313e41189620 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 19 Feb 2015 12:55:19 -0800
Subject: gpu: nvgpu: gp10b specific LTC ISR

LTC interrupt register got moved, so use the new offset.

Bug 1587638

Change-Id: I3dbd44d92f2bcb3634c21ed46870ec1620d936cf
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/709571
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index beda3038..b32e2979 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -17,6 +17,8 @@
 
 #include "gk20a/gk20a.h"
 #include "gm20b/ltc_gm20b.h"
+#include "hw_proj_gp10b.h"
+#include "hw_mc_gp10b.h"
 #include "hw_ltc_gp10b.h"
 
 #include "gk20a/ltc_common.c"
@@ -123,6 +125,31 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	return 0;
 }
 
+void gp10b_ltc_isr(struct gk20a *g)
+{
+	u32 mc_intr, ltc_intr;
+	int ltc, slice;
+
+	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
+	gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
+		  mc_intr);
+	for (ltc = 0; ltc < g->ltc_count; ltc++) {
+		if ((mc_intr & 1 << ltc) == 0)
+			continue;
+		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
+			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
+					   proj_ltc_stride_v() * ltc +
+					   proj_lts_stride_v() * slice);
+			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
+				  ltc, slice, ltc_intr);
+			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
+					   proj_ltc_stride_v() * ltc +
+					   proj_lts_stride_v() * slice,
+				     ltc_intr);
+		}
+	}
+}
+
 void gp10b_init_ltc(struct gpu_ops *gops)
 {
 	gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes;
@@ -136,7 +163,7 @@ void gp10b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.init_comptags = gp10b_ltc_init_comptags;
 	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
 	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
-	gops->ltc.isr = gm20b_ltc_isr;
+	gops->ltc.isr = gp10b_ltc_isr;
 	gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config;
 	gops->ltc.flush = gm20b_flush_ltc;
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.2.2


From 3d08b0dc35cd94612d77aab7e348ae509cc096f2 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 11 Feb 2015 14:34:48 -0800
Subject: gpu: nvgpu: Add ELPG_ENABLE register

Change-Id: I8b2272641c7f406cec9bb2649846e4b4b195e21a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/708720
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
index 7d153b6f..30165e66 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_mc_gp10b.h
@@ -226,4 +226,20 @@ static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
 {
 	return (v & 0x1) << (0 + i*1);
 }
+static inline u32 mc_elpg_enable_r(void)
+{
+	return 0x0000020c;
+}
+static inline u32 mc_elpg_enable_xbar_enabled_f(void)
+{
+	return 0x4;
+}
+static inline u32 mc_elpg_enable_pfb_enabled_f(void)
+{
+	return 0x100000;
+}
+static inline u32 mc_elpg_enable_hub_enabled_f(void)
+{
+	return 0x20000000;
+}
 #endif
-- 
cgit v1.2.2


From 20a1ab078546c7206bb65ab007882195953df1dd Mon Sep 17 00:00:00 2001
From: Sam Payne <spayne@nvidia.com>
Date: Thu, 5 Feb 2015 10:46:35 -0800
Subject: gpu: nvgpu: gp10b: add ce interrupt support

ce interrupts use different register mapping
and format from gk20a and gm20b.

Change-Id: Icfe33bad940b2b829b6f57d07a3300adaf53d43c
Signed-off-by: Sam Payne <spayne@nvidia.com>
Reviewed-on: http://git-master/r/681646
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile       |  1 +
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c    | 83 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.h    | 29 ++++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c    |  6 ++-
 drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h | 81 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c     |  8 ++++
 6 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index e14cd0ee..421e50f6 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -12,6 +12,7 @@ ccflags-y += -Werror
 obj-$(CONFIG_GK20A)  += \
 	gr_gp10b.o  \
 	gr_ctx_gp10b.o  \
+	ce2_gp10b.o \
 	mc_gp10b.o  \
 	ltc_gp10b.o \
 	mm_gp10b.o \
diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
new file mode 100644
index 00000000..d76b97a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
@@ -0,0 +1,83 @@
+/*
+ * GK20A Graphics Copy Engine  (gr host)
+ *
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+#include "hw_ce2_gp10b.h"
+#include "ce2_gp10b.h"
+
+static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
+
+	/* wake theads waiting in this channel */
+	gk20a_channel_semaphore_wakeup(g);
+	return ce2_intr_status_nonblockpipe_pending_f();
+}
+
+static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce2 blocking pipe interrupt\n");
+
+	return ce2_intr_status_blockpipe_pending_f();
+}
+
+static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce2 launch error interrupt\n");
+
+	return ce2_intr_status_launcherr_pending_f();
+}
+
+void gp10b_ce2_isr(struct gk20a *g)
+{
+	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
+	u32 clear_intr = 0;
+
+	gk20a_dbg(gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr);
+
+	/* clear blocking interrupts: they exibit broken behavior */
+	if (ce2_intr & ce2_intr_status_blockpipe_pending_f())
+		clear_intr |= ce2_blockpipe_isr(g, ce2_intr);
+
+	if (ce2_intr & ce2_intr_status_launcherr_pending_f())
+		clear_intr |= ce2_launcherr_isr(g, ce2_intr);
+
+	gk20a_writel(g, ce2_intr_status_r(0), clear_intr);
+	return;
+}
+
+void gp10b_ce2_nonstall_isr(struct gk20a *g)
+{
+	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
+	u32 clear_intr = 0;
+
+	gk20a_dbg(gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr);
+
+	if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f())
+		clear_intr |= ce2_nonblockpipe_isr(g, ce2_intr);
+
+	gk20a_writel(g, ce2_intr_status_r(0), clear_intr);
+
+	return;
+}
+void gp10b_init_ce2(struct gpu_ops *gops)
+{
+	gops->ce2.isr_stall = gp10b_ce2_isr;
+	gops->ce2.isr_nonstall = gp10b_ce2_nonstall_isr;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
new file mode 100644
index 00000000..d432d1e0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
@@ -0,0 +1,29 @@
+/*
+ * drivers/video/tegra/host/gk20a/fifo_gk20a.h
+ *
+ * GK20A graphics copy engine (gr host)
+ *
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __CE2_GP10B_H__
+#define __CE2_GP10B_H__
+
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+
+void gp10b_init_ce2(struct gpu_ops *gops);
+
+#endif /*__CE2_GP10B_H__*/
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 30b56a5c..c23c0f17 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B Tegra HAL interface
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -24,6 +24,7 @@
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
+#include "gp10b/ce2_gp10b.h"
 #include "gp10b/fb_gp10b.h"
 #include "gp10b/pmu_gp10b.h"
 #include "gp10b/gr_ctx_gp10b.h"
@@ -94,7 +95,8 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_gr(gops);
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
-	gp10b_init_fifo(gops);
+	gm20b_init_fifo(gops);
+	gp10b_init_ce2(gops);
 	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h
new file mode 100644
index 00000000..b0c35a30
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ce2_gp10b_h_
+#define _hw_ce2_gp10b_h_
+
+static inline u32 ce2_intr_status_r(u32 i)
+{
+	return 0x00104410 + i*128;
+}
+static inline u32 ce2_intr_status_blockpipe_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce2_intr_status_blockpipe_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce2_intr_status_nonblockpipe_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce2_intr_status_nonblockpipe_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce2_intr_status_launcherr_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 ce2_intr_status_launcherr_reset_f(void)
+{
+	return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 1760b6ad..47c8fcc6 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -110,6 +110,9 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 
 	if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
 		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
+	if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id)
+		&& g->ops.ce2.isr_stall)
+		g->ops.ce2.isr_stall(g);
 	if (mc_intr_0 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_isr(g);
 	if (mc_intr_0 & mc_intr_pmu_pending_f())
@@ -141,6 +144,11 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 		gk20a_fifo_nonstall_isr(g);
 	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
 		gk20a_gr_nonstall_isr(g);
+	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id)
+		&& g->ops.ce2.isr_nonstall)
+		g->ops.ce2.isr_nonstall(g);
+
+
 
 	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
 			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
-- 
cgit v1.2.2


From 82bc7a9f2e50be7ffb99483f98b162b0a6e49765 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 20 Feb 2015 10:09:08 -0800
Subject: gpu: nvgpu: gp10b: update headers

Sync with latest hw includes and generated
header files.

Bug 1587825

Change-Id: I165b541e3215245eb43614e34670093b8420a7df
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/709881
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index 6ddff281..dea53f96 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -78,6 +78,14 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
 {
 	return 0x4;
 }
+static inline u32 ram_in_page_dir_base_fault_replay_tex_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_m(void)
+{
+	return 0x1 << 4;
+}
 static inline u32 ram_in_page_dir_base_fault_replay_tex_w(void)
 {
 	return 128;
@@ -86,6 +94,14 @@ static inline u32 ram_in_page_dir_base_fault_replay_tex_true_f(void)
 {
 	return 0x10;
 }
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_m(void)
+{
+	return 0x1 << 5;
+}
 static inline u32 ram_in_page_dir_base_fault_replay_gcc_w(void)
 {
 	return 128;
-- 
cgit v1.2.2


From 1f9b2f2852a48ee571ce631a2d72ac02db2d6c87 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 3 Feb 2015 12:56:24 -0800
Subject: gpu: nvgpu: gp10b: update fb headers

Update fb header with new mmu invalidate fields.

Bug 1587836

Change-Id: I33a30dc742f35d325c528a9bc73fea8cfc21e856
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/680800
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h | 214 +++++++++++++++++++++++++++++++++-
 1 file changed, 213 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
index d2ecdce1..2078bdca 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -122,6 +122,218 @@ static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
 {
 	return 0x2;
 }
+static inline u32 fb_mmu_invalidate_hubtlb_only_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 fb_mmu_invalidate_replay_s(void)
+{
+	return 3;
+}
+static inline u32 fb_mmu_invalidate_replay_f(u32 v)
+{
+	return (v & 0x7) << 3;
+}
+static inline u32 fb_mmu_invalidate_replay_m(void)
+{
+	return 0x7 << 3;
+}
+static inline u32 fb_mmu_invalidate_replay_v(u32 r)
+{
+	return (r >> 3) & 0x7;
+}
+static inline u32 fb_mmu_invalidate_replay_none_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_replay_start_f(void)
+{
+	return 0x8;
+}
+static inline u32 fb_mmu_invalidate_replay_start_ack_all_f(void)
+{
+	return 0x10;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_targeted_f(void)
+{
+	return 0x18;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_global_f(void)
+{
+	return 0x20;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_f(void)
+{
+	return 0x20;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 fb_mmu_invalidate_ack_s(void)
+{
+	return 2;
+}
+static inline u32 fb_mmu_invalidate_ack_f(u32 v)
+{
+	return (v & 0x3) << 7;
+}
+static inline u32 fb_mmu_invalidate_ack_m(void)
+{
+	return 0x3 << 7;
+}
+static inline u32 fb_mmu_invalidate_ack_v(u32 r)
+{
+	return (r >> 7) & 0x3;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_none_required_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_intranode_f(void)
+{
+	return 0x100;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_globally_f(void)
+{
+	return 0x80;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_s(void)
+{
+	return 6;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_f(u32 v)
+{
+	return (v & 0x3f) << 9;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_m(void)
+{
+	return 0x3f << 9;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_v(u32 r)
+{
+	return (r >> 9) & 0x3f;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_s(void)
+{
+	return 5;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_f(u32 v)
+{
+	return (v & 0x1f) << 15;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_m(void)
+{
+	return 0x1f << 15;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_v(u32 r)
+{
+	return (r >> 15) & 0x1f;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_gpc_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_hub_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_s(void)
+{
+	return 3;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_m(void)
+{
+	return 0x7 << 24;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_v(u32 r)
+{
+	return (r >> 24) & 0x7;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_pte_only_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde0_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde1_f(void)
+{
+	return 0x3000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde2_f(void)
+{
+	return 0x4000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde3_f(void)
+{
+	return 0x5000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde4_f(void)
+{
+	return 0x6000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde5_f(void)
+{
+	return 0x7000000;
+}
 static inline u32 fb_mmu_invalidate_trigger_s(void)
 {
 	return 1;
-- 
cgit v1.2.2


From c965d7a54a347dc71191561ea5cd1a389ee8b091 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 5 Feb 2015 17:38:49 -0800
Subject: gpu: nvgpu: gp10b: setup mm hw init

Add support for gp10b specific mm hw init.

Bug 1587825

Change-Id: Iaccf1bf73468cfdd1842a001ab5e682ac06f1950
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/681787
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 00e41fa7..293eb999 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -25,6 +25,39 @@ u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 	return 36;
 }
 
+static int gp10b_init_mm_setup_hw(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct inst_desc *inst_block = &mm->bar1.inst_block;
+	phys_addr_t inst_pa = inst_block->cpu_pa;
+	int err = 0;
+
+	gk20a_dbg_fn("");
+
+	g->ops.fb.set_mmu_page_size(g);
+
+	inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
+	gk20a_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
+
+	gk20a_writel(g, bus_bar1_block_r(),
+			bus_bar1_block_target_vid_mem_f() |
+			bus_bar1_block_mode_virtual_f() |
+			bus_bar1_block_ptr_f(inst_pa));
+
+	if (g->ops.mm.init_bar2_mm_hw_setup) {
+		err = g->ops.mm.init_bar2_mm_hw_setup(g);
+		if (err)
+			return err;
+	}
+
+	if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
+		return -EBUSY;
+
+	gk20a_dbg_fn("done");
+	return err;
+
+}
+
 static int gb10b_init_bar2_vm(struct gk20a *g)
 {
 	int err;
@@ -79,6 +112,7 @@ void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
 	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
+	gops->mm.init_mm_setup_hw = gp10b_init_mm_setup_hw;
 	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
 	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
 }
-- 
cgit v1.2.2


From 750014be79cce9562653db96e735f78fdc2e058f Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 5 Feb 2015 14:01:59 -0800
Subject: gpu: nvgpu: gp10b: support for replayable faults

Add support for enabling replayable faults during
channel instance block binding. Also fixed register
programing sequence for setting channel pbdma timeout.

Bug 1587825

Change-Id: I5a25819b960001d184507bc597aca051f2ac43ad
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/681703
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile     |   2 +-
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 124 +++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.h |   4 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c  |   3 +-
 4 files changed, 130 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 421e50f6..e44aaeca 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -14,11 +14,11 @@ obj-$(CONFIG_GK20A)  += \
 	gr_ctx_gp10b.o  \
 	ce2_gp10b.o \
 	mc_gp10b.o  \
+	fifo_gp10b.o  \
 	ltc_gp10b.o \
 	mm_gp10b.o \
 	fb_gp10b.o \
 	pmu_gp10b.o \
-	fifo_gp10b.o \
 	hal_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index d67c7ee2..db5d4ede 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -13,11 +13,133 @@
  * more details.
  */
 
+#include <linux/delay.h>
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
 #include "gm20b/fifo_gm20b.h"
 #include "hw_pbdma_gp10b.h"
+#include "fifo_gp10b.h"
+#include "hw_ccsr_gp10b.h"
+#include "hw_fifo_gp10b.h"
+#include "hw_ram_gp10b.h"
+
+static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g,
+				void *inst_ptr)
+{
+	u32 val;
+
+	gk20a_dbg_fn("");
+
+	val = gk20a_mem_rd32(inst_ptr,
+			ram_in_page_dir_base_fault_replay_tex_w());
+	val &= ~ram_in_page_dir_base_fault_replay_tex_m();
+	val |= ram_in_page_dir_base_fault_replay_tex_true_f();
+	gk20a_mem_wr32(inst_ptr,
+		ram_in_page_dir_base_fault_replay_tex_w(), val);
+
+	val = gk20a_mem_rd32(inst_ptr,
+			ram_in_page_dir_base_fault_replay_gcc_w());
+	val &= ~ram_in_page_dir_base_fault_replay_gcc_m();
+	val |= ram_in_page_dir_base_fault_replay_gcc_true_f();
+	gk20a_mem_wr32(inst_ptr,
+		ram_in_page_dir_base_fault_replay_gcc_w(), val);
+
+	gk20a_dbg_fn("done");
+}
+
+static int channel_gp10b_commit_userd(struct channel_gk20a *c)
+{
+	u32 addr_lo;
+	u32 addr_hi;
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
+	addr_hi = u64_hi32(c->userd_iova);
+
+	gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
+		c->hw_chid, (u64)c->userd_iova);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
+		 pbdma_userd_target_vid_mem_f() |
+		 pbdma_userd_addr_f(addr_lo));
+
+	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
+		 pbdma_userd_target_vid_mem_f() |
+		 pbdma_userd_hi_addr_f(addr_hi));
+
+	return 0;
+}
+
+static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
+			u64 gpfifo_base, u32 gpfifo_entries)
+{
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	memset(inst_ptr, 0, ram_fc_size_val_v());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
+		pbdma_gp_base_offset_f(
+		u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
+		pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
+		pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
+		 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
+		pbdma_formats_gp_fermi0_f() |
+		pbdma_formats_pb_fermi1_f() |
+		pbdma_formats_mp_fermi0_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
+		pbdma_pb_header_priv_user_f() |
+		pbdma_pb_header_method_zero_f() |
+		pbdma_pb_header_subchannel_zero_f() |
+		pbdma_pb_header_level_main_f() |
+		pbdma_pb_header_first_true_f() |
+		pbdma_pb_header_type_inc_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
+		pbdma_subdevice_id_f(1) |
+		pbdma_subdevice_status_active_f() |
+		pbdma_subdevice_channel_dma_enable_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
+		pbdma_acquire_retry_man_2_f() |
+		pbdma_acquire_retry_exp_2_f() |
+		pbdma_acquire_timeout_exp_max_f() |
+		pbdma_acquire_timeout_man_max_f() |
+		pbdma_acquire_timeout_en_disable_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
+		pbdma_runlist_timeslice_timeout_128_f() |
+		pbdma_runlist_timeslice_timescale_3_f() |
+		pbdma_runlist_timeslice_enable_true_f());
+
+	gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr);
+
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
+
+	return channel_gp10b_commit_userd(c);
+}
 
 static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
 {
@@ -28,5 +150,7 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
 void gp10b_init_fifo(struct gpu_ops *gops)
 {
 	gm20b_init_fifo(gops);
+	gops->fifo.setup_ramfc = channel_gp10b_setup_ramfc;
 	gops->fifo.get_pbdma_signature = gp10b_fifo_get_pbdma_signature;
+
 }
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
index 18e2034f..ad3679aa 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
@@ -1,5 +1,7 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * GP10B Fifo
+ *
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index c23c0f17..ff140a04 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -28,6 +28,7 @@
 #include "gp10b/fb_gp10b.h"
 #include "gp10b/pmu_gp10b.h"
 #include "gp10b/gr_ctx_gp10b.h"
+#include "gp10b/fifo_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/gm20b_gating_reglist.h"
@@ -95,7 +96,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_gr(gops);
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
-	gm20b_init_fifo(gops);
+	gp10b_init_fifo(gops);
 	gp10b_init_ce2(gops);
 	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
-- 
cgit v1.2.2


From 8fe7abebbbbfdc8b1acedd41aa8ac1926a24dc93 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 6 Feb 2015 12:39:05 -0800
Subject: gpu: nvgpu: gp10b: Add replayable pagefault buffer

Add support for replayable fault buffer and enable it.

Bug 1587836

Change-Id: Iee4ba42ab175c0d72d2c041fdb3ac9d845358847
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/661668
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile     |   3 +-
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c   |   3 +
 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c | 146 +++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.h |  30 +++++++
 4 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index e44aaeca..9d0c0311 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_GK20A)  += \
 	mm_gp10b.o \
 	fb_gp10b.o \
 	pmu_gp10b.o \
-	hal_gp10b.o
+	hal_gp10b.o \
+	rpfb_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 293eb999..65bc6cbc 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -17,6 +17,7 @@
 #include <linux/dma-mapping.h>
 #include "gk20a/gk20a.h"
 #include "mm_gp10b.h"
+#include "rpfb_gp10b.h"
 #include "hw_ram_gp10b.h"
 #include "hw_bus_gp10b.h"
 
@@ -53,6 +54,8 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 	if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
 		return -EBUSY;
 
+	err = gp10b_replayable_pagefault_buffer_init(g);
+
 	gk20a_dbg_fn("done");
 	return err;
 
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
new file mode 100644
index 00000000..ba91403c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -0,0 +1,146 @@
+/*
+ * GP10B RPFB
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
+#include "gk20a/gk20a.h"
+#include "rpfb_gp10b.h"
+#include "hw_fifo_gp10b.h"
+#include "hw_fb_gp10b.h"
+#include "hw_bus_gp10b.h"
+#include "hw_gmmu_gp10b.h"
+
+int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
+{
+	u32 addr_lo;
+	u32 addr_hi;
+	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	int err;
+	size_t rbfb_size = NV_UVM_FAULT_BUF_SIZE *
+		fifo_replay_fault_buffer_size_hw_entries_v();
+
+	gk20a_dbg_fn("");
+
+	err = gk20a_gmmu_alloc_map(vm, rbfb_size, &g->mm.bar2_desc);
+	if (err) {
+		dev_err(dev_from_gk20a(g), "%s Error in replayable fault buffer\n",
+			__func__);
+		return err;
+	}
+	addr_lo = u64_lo32(g->mm.bar2_desc.gpu_va >> 12);
+	addr_hi = u64_hi32(g->mm.bar2_desc.gpu_va);
+	gk20a_writel(g, fifo_replay_fault_buffer_hi_r(),
+			fifo_replay_fault_buffer_hi_base_f(addr_hi));
+
+	gk20a_writel(g, fifo_replay_fault_buffer_lo_r(),
+			fifo_replay_fault_buffer_lo_base_f(addr_lo) |
+			fifo_replay_fault_buffer_lo_enable_true_v());
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+void gp10b_replayable_pagefault_buffer_deinit(struct gk20a *g)
+{
+	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	gk20a_gmmu_unmap_free(vm, &g->mm.bar2_desc);
+}
+
+u32 gp10b_replayable_pagefault_buffer_get_index(struct gk20a *g)
+{
+	u32 get_idx = 0;
+	gk20a_dbg_fn("");
+
+	get_idx = gk20a_readl(g, fifo_replay_fault_buffer_get_r());
+
+	if (get_idx >= fifo_replay_fault_buffer_size_hw_entries_v())
+		dev_err(dev_from_gk20a(g), "%s Error in replayable fault buffer\n",
+			__func__);
+	gk20a_dbg_fn("done");
+	return get_idx;
+}
+
+u32 gp10b_replayable_pagefault_buffer_put_index(struct gk20a *g)
+{
+	u32 put_idx = 0;
+
+	gk20a_dbg_fn("");
+	put_idx = gk20a_readl(g, fifo_replay_fault_buffer_put_r());
+
+	if (put_idx >= fifo_replay_fault_buffer_size_hw_entries_v())
+		dev_err(dev_from_gk20a(g), "%s Error in UVM\n",
+			__func__);
+	gk20a_dbg_fn("done");
+	return put_idx;
+}
+
+bool gp10b_replayable_pagefault_buffer_is_empty(struct gk20a *g)
+{
+	u32 get_idx = gk20a_readl(g, fifo_replay_fault_buffer_get_r());
+	u32 put_idx = gk20a_readl(g, fifo_replay_fault_buffer_put_r());
+
+	return (get_idx == put_idx ? true : false);
+}
+
+bool gp10b_replayable_pagefault_buffer_is_full(struct gk20a *g)
+{
+	u32 get_idx = gk20a_readl(g, fifo_replay_fault_buffer_get_r());
+	u32 put_idx = gk20a_readl(g, fifo_replay_fault_buffer_put_r());
+	u32 hw_entries = gk20a_readl(g, fifo_replay_fault_buffer_size_r());
+
+	return (get_idx == ((put_idx + 1) % hw_entries) ? true : false);
+}
+
+bool gp10b_replayable_pagefault_buffer_is_overflow(struct gk20a *g)
+{
+	u32 info = gk20a_readl(g, fifo_replay_fault_buffer_info_r());
+
+	return fifo_replay_fault_buffer_info_overflow_f(info);
+}
+
+void gp10b_replayable_pagefault_buffer_clear_overflow(struct gk20a *g)
+{
+	u32 info = gk20a_readl(g, fifo_replay_fault_buffer_info_r());
+
+	info |= fifo_replay_fault_buffer_info_overflow_clear_v();
+	gk20a_writel(g, fifo_replay_fault_buffer_info_r(), info);
+
+}
+
+void gp10b_replayable_pagefault_buffer_info(struct gk20a *g)
+{
+
+	gk20a_dbg_fn("");
+	pr_info("rpfb low: 0x%x\n",
+		(gk20a_readl(g, fifo_replay_fault_buffer_lo_r()) >> 12));
+	pr_info("rpfb hi: 0x%x\n",
+		gk20a_readl(g, fifo_replay_fault_buffer_hi_r()));
+	pr_info("rpfb enabled: 0x%x\n",
+		(gk20a_readl(g, fifo_replay_fault_buffer_lo_r()) & 0x1));
+	pr_info("rpfb size: %d\n",
+		gk20a_readl(g, fifo_replay_fault_buffer_size_r()));
+	pr_info("rpfb get index: %d\n",
+		gp10b_replayable_pagefault_buffer_get_index(g));
+	pr_info("rpfb put index: %d\n",
+		gp10b_replayable_pagefault_buffer_put_index(g));
+	pr_info("rpfb empty: %d\n",
+		gp10b_replayable_pagefault_buffer_is_empty(g));
+	pr_info("rpfb full  %d\n",
+		gp10b_replayable_pagefault_buffer_is_full(g));
+	pr_info("rpfb overflow  %d\n",
+		gp10b_replayable_pagefault_buffer_is_overflow(g));
+
+	gk20a_dbg_fn("done");
+}
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.h b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.h
new file mode 100644
index 00000000..965c9573
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef RPFB_GP20B_H
+#define RPFB_GP20B_H
+struct gk20a;
+
+#define NV_UVM_FAULT_BUF_SIZE 32
+
+int gp10b_replayable_pagefault_buffer_init(struct gk20a *g);
+u32 gp10b_replayable_pagefault_buffer_get_index(struct gk20a *g);
+u32 gp10b_replayable_pagefault_buffer_put_index(struct gk20a *g);
+bool gp10b_replayable_pagefault_buffer_is_empty(struct gk20a *g);
+bool gp10b_replayable_pagefault_buffer_is_full(struct gk20a *g);
+bool gp10b_replayable_pagefault_buffer_is_overflow(struct gk20a *g);
+void gp10b_replayable_pagefault_buffer_clear_overflow(struct gk20a *g);
+void gp10b_replayable_pagefault_buffer_info(struct gk20a *g);
+void gp10b_replayable_pagefault_buffer_deinit(struct gk20a *g);
+
+#endif
-- 
cgit v1.2.2


From ce85eae72a1aa54518fae68ef616e9b95bccb052 Mon Sep 17 00:00:00 2001
From: Kirill Artamonov <kartamonov@nvidia.com>
Date: Tue, 3 Mar 2015 17:29:19 +0200
Subject: gpu: nvgpu: gp10b: fix swdx_rm_spill size and pointer

Fixed incorrectly encoded pointer and size.

bug 1525327
bug 1581799

Change-Id: Ie6e94e47c3b11e9d9aa63a70b61e6e89f69e971b
Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Reviewed-on: http://git-master/r/713209
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 271a6d0c..4d0de15f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -664,8 +664,9 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 
 		addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
 			gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
-			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
+			(u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
 			 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
+		size = gr_ctx->t18x.spill_ctxsw_buffer.size;
 
 		gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
@@ -673,7 +674,7 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 				true);
 		gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_size_r(),
-				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(addr),
+				gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
 				true);
 
 		cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
-- 
cgit v1.2.2


From 3be18b463bfc7cfa66c16219e4432601ee769a26 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 2 Mar 2015 14:48:00 +0530
Subject: gpu: nvgpu: add exception registers to dump

Add below exception registers to GR dump :
NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION
NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN
NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION
NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN
NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION
NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN

Bug 200078514

Change-Id: I2400e360fea0b3bdcdf5f3dd6ef250867fb191e6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/712481
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 12 ++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 24 ++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 4d0de15f..306c4223 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -810,6 +810,18 @@ static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
 		gk20a_readl(g, gr_pri_bes_zrop_status_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
 		gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
+		gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
+	gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
+		gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index ca425447..3ee1cbfe 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -286,6 +286,30 @@ static inline u32 gr_engine_status_value_busy_f(void)
 {
 	return 0x1;
 }
+static inline u32 gr_pri_be0_becs_be_exception_r(void)
+{
+	return 0x00410204;
+}
+static inline u32 gr_pri_be0_becs_be_exception_en_r(void)
+{
+	return 0x00410208;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_exception_r(void)
+{
+	return 0x00502c90;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_exception_en_r(void)
+{
+	return 0x00502c94;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_r(void)
+{
+	return 0x00504508;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+{
+	return 0x0050450c;
+}
 static inline u32 gr_activity_0_r(void)
 {
 	return 0x00400380;
-- 
cgit v1.2.2


From 6539c538c1d1fcd0227096959790c625d28e791a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 5 Mar 2015 09:34:06 -0800
Subject: gpu: nvgpu: gp10b: Use gp10b version of phys bits

Use gp10b version of get_physical_addr_bits.

Change-Id: I56d1299e259e91a61fa82dc061e7ca3a5130b9d4
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/714402
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 65bc6cbc..6b140c92 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -114,7 +114,7 @@ static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
-	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
+	gops->mm.get_physical_addr_bits = gp10b_mm_get_physical_addr_bits;
 	gops->mm.init_mm_setup_hw = gp10b_init_mm_setup_hw;
 	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
 	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
-- 
cgit v1.2.2


From 208e2c33534eebf48239df04b0a25d3f82e681d0 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 11 Mar 2015 15:55:14 -0700
Subject: gpu: nvgpu: gp10b: Fix offset for preemption ptr

Offset for preemption pointer was calculated incorrectly.

Bug 1617214

Change-Id: I9c1a9ae24dcd523f4ae17eae0a5b07831839fadb
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/716528
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 306c4223..84faa252 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -642,7 +642,7 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		u32 size;
 		u32 cbes_reserve;
 
-		gk20a_mem_wr32(ctx_ptr, ctxsw_prog_main_image_full_preemption_ptr_o(),
+		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0,
 				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
 
 		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
-- 
cgit v1.2.2


From e1339b8589017221f921e7ed95b1445ac289a4a9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 2 Mar 2015 17:52:21 -0800
Subject: gpu: nvgpu: gp10b: Use mem_desc for buffers

Change-Id: Ia986125bf1a6e06121291f6dde24e580f0a1b61f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/712836
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c |  4 ++--
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c   | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index db5d4ede..59f7deef 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -56,7 +56,7 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 
 	gk20a_dbg_fn("");
 
-	inst_ptr = c->inst_block.cpuva;
+	inst_ptr = c->inst_block.cpu_va;
 	if (!inst_ptr)
 		return -ENOMEM;
 
@@ -84,7 +84,7 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 
 	gk20a_dbg_fn("");
 
-	inst_ptr = c->inst_block.cpuva;
+	inst_ptr = c->inst_block.cpu_va;
 	if (!inst_ptr)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 6b140c92..9fde6147 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -29,8 +29,8 @@ u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 static int gp10b_init_mm_setup_hw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct inst_desc *inst_block = &mm->bar1.inst_block;
-	phys_addr_t inst_pa = inst_block->cpu_pa;
+	struct mem_desc *inst_block = &mm->bar1.inst_block;
+	phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
 	int err = 0;
 
 	gk20a_dbg_fn("");
@@ -66,7 +66,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	int err;
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm = &mm->bar2.vm;
-	struct inst_desc *inst_block = &mm->bar2.inst_block;
+	struct mem_desc *inst_block = &mm->bar2.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
 	/* BAR2 aperture size is 32MB */
@@ -93,8 +93,8 @@ clean_up_va:
 static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct inst_desc *inst_block = &mm->bar2.inst_block;
-	phys_addr_t inst_pa = inst_block->cpu_pa;
+	struct mem_desc *inst_block = &mm->bar2.inst_block;
+	phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
 
 	gk20a_dbg_fn("");
 
-- 
cgit v1.2.2


From 8db1d8abc2b72ecb5b1ddcbe9ff3eeaeeee072e7 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 4 Mar 2015 15:15:44 +0530
Subject: gpu: nvgpu: accessor for memfmt exception

Add accessor for NV_PGRAPH_EXCEPTION_MEMFMT

Bug 200078514

Change-Id: Ibf4ce91dfac12d7f6cffb7c65873696e080ff1a5
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/714167
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 3ee1cbfe..3cac1f70 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -166,6 +166,10 @@ static inline u32 gr_exception_gpc_m(void)
 {
 	return 0x1 << 24;
 }
+static inline u32 gr_exception_memfmt_m(void)
+{
+	return 0x1 << 1;
+}
 static inline u32 gr_exception1_r(void)
 {
 	return 0x00400118;
-- 
cgit v1.2.2


From bd65e7611feb4bfaea0e3cb7f57a573391587e2e Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 9 Mar 2015 12:12:26 +0200
Subject: gpu: nvgpu: zbc: disable activity only from ioctl

Move the fifo engine activity disabling and wait-for-idle from the
lowest-level functions higher, into the ioctl path of zbc operations, so
that the sw initialization path wouldn't call them. During the init
path, the disable isn't necessary, and the code path could result in a
deadlock in the fifo runlist mutex.

Change-Id: I56e73204e288331165358fc9856390f1eb724488
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/715196
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 56 ++------------------------------------
 1 file changed, 2 insertions(+), 54 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 84faa252..1b7dd405 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -163,28 +163,9 @@ void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 				  struct zbc_entry *color_val, u32 index)
 {
-	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
 	u32 i;
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
-	u32 ret;
 	u32 zbc_c;
 
-	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to disable gr engine activity\n");
-		return ret;
-	}
-
-	ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to idle graphics\n");
-		goto clean_up;
-	}
-
 	/* update l2 table */
 	g->ops.ltc.set_zbc_color_entry(g, color_val, index);
 
@@ -226,40 +207,14 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 	zbc_c |= color_val->format << (index % 4) * 6;
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + ALIGN(index, 4), zbc_c);
 
-clean_up:
-	ret = gk20a_fifo_enable_engine_activity(g, gr_info);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to enable gr engine activity\n");
-	}
-
-	return ret;
+	return 0;
 }
 
 static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 				struct zbc_entry *depth_val, u32 index)
 {
-	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
-	u32 ret;
 	u32 zbc_z;
 
-	ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to disable gr engine activity\n");
-		return ret;
-	}
-
-	ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to idle graphics\n");
-		goto clean_up;
-	}
-
 	/* update l2 table */
 	g->ops.ltc.set_zbc_depth_entry(g, depth_val, index);
 
@@ -289,14 +244,7 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 	zbc_z |= depth_val->format << (index % 4) * 6;
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + ALIGN(index, 4), zbc_z);
 
-clean_up:
-	ret = gk20a_fifo_enable_engine_activity(g, gr_info);
-	if (ret) {
-		gk20a_err(dev_from_gk20a(g),
-			"failed to enable gr engine activity\n");
-	}
-
-	return ret;
+	return 0;
 }
 
 static u32 gr_gp10b_pagepool_default_size(struct gk20a *g)
-- 
cgit v1.2.2


From e3dcb7546a0d99ca7f1e120f481174e6ec650860 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sat, 21 Mar 2015 09:02:22 -0700
Subject: gpu: nvgpu: gp10b: Do not clear compbit store size

Do not clear compbit store size if max size is zero. It's already
zero at this point.

Change-Id: I70d99cfe459fae27d8c1be4aa569ac0717a454d7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/720599
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index b32e2979..dbeed474 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -72,10 +72,8 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 
 	gk20a_dbg_fn("");
 
-	if (max_comptag_lines == 0) {
-		gr->compbit_store.size = 0;
+	if (max_comptag_lines == 0)
 		return 0;
-	}
 
 	if (max_comptag_lines > hw_max_comptag_lines)
 		max_comptag_lines = hw_max_comptag_lines;
-- 
cgit v1.2.2


From 9f22ad4687068089696bf61e5e900361e2b62502 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 20 Mar 2015 13:34:59 +0530
Subject: gpu: nvgpu: add get_iova_addr() for gp10b

Add platform specific gp10b_mm_iova_addr() to get
iova/phys address for gp10b

If SMMU is not enabled and IO coherence flag is set,
set 34th bit in the physical address and return the
physical address

If SMMU is enabled, return the iova address

Bug 1605653

Change-Id: I5c91a8c8d85d8a8e422406e3c91fc1dda3cb0870
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/713106
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 28 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.h |  3 +++
 2 files changed, 31 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 9fde6147..c316aa9f 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -111,6 +111,33 @@ static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 	gk20a_dbg_fn("done");
 	return 0;
 }
+
+static u64 gp10b_mm_phys_addr_translate(struct gk20a *g, u64 phys_addr,
+		u32 flags)
+{
+	if (!device_is_iommuable(dev_from_gk20a(g)))
+		if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT)
+			return phys_addr |
+				1ULL << NVGPU_MM_GET_IO_COHERENCE_BIT;
+
+	return phys_addr;
+}
+
+static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
+		u32 flags)
+{
+	if (!device_is_iommuable(dev_from_gk20a(g)))
+		return gp10b_mm_phys_addr_translate(g, sg_phys(sgl), flags);
+
+	if (sg_dma_address(sgl) == 0)
+		return gp10b_mm_phys_addr_translate(g, sg_phys(sgl), flags);
+
+	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
+		return 0;
+
+	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
+}
+
 void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
@@ -118,4 +145,5 @@ void gp10b_init_mm(struct gpu_ops *gops)
 	gops->mm.init_mm_setup_hw = gp10b_init_mm_setup_hw;
 	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
 	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
+	gops->mm.get_iova_addr = gp10b_mm_iova_addr;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
index 38ca93a4..034944e0 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
@@ -13,6 +13,9 @@
 
 #ifndef MM_GP10B_H
 #define MM_GP10B_H
+
+#define NVGPU_MM_GET_IO_COHERENCE_BIT	35
+
 struct gpu_ops;
 
 void gp10b_init_mm(struct gpu_ops *gops);
-- 
cgit v1.2.2


From d4e870edd0251cb8e8ec4915490e8716b8b67fa9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 25 Mar 2015 10:50:41 -0700
Subject: gpu: nvgpu: gp10b: Regenerate HW headers

Change-Id: Id1954b6e96dbc75ab217a4b36a11a0457f9ceef1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/722845
---
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h   |  8 ++++
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h | 12 ++++++
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h | 30 ++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h   | 76 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h  | 18 +++++++-
 5 files changed, 142 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
index 2078bdca..1fee19b1 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -426,10 +426,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
 {
 	return (r >> 16) & 0x1;
 }
+static inline u32 fb_mmu_debug_ctrl_debug_m(void)
+{
+	return 0x1 << 16;
+}
 static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 fb_mmu_vpr_info_r(void)
 {
 	return 0x00100cd0;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 2c0367d5..6f7e09ff 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -210,10 +210,22 @@ static inline u32 fifo_intr_en_0_r(void)
 {
 	return 0x00002140;
 }
+static inline u32 fifo_intr_en_0_sched_error_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
 static inline u32 fifo_intr_en_0_sched_error_m(void)
 {
 	return 0x1 << 8;
 }
+static inline u32 fifo_intr_en_0_mmu_fault_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 fifo_intr_en_0_mmu_fault_m(void)
+{
+	return 0x1 << 28;
+}
 static inline u32 fifo_intr_en_1_r(void)
 {
 	return 0x00002528;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
index cdb28d08..b6b68718 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -98,4 +98,32 @@ static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void)
 {
 	return 0x0;
 }
+static inline u32 fuse_status_opt_fbio_r(void)
+{
+	return 0x00021c14;
+}
+static inline u32 fuse_status_opt_fbio_data_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 fuse_status_opt_fbio_data_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 fuse_status_opt_fbio_data_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 fuse_status_opt_rop_l2_fbp_r(u32 i)
+{
+	return 0x00021d70 + i*4;
+}
+static inline u32 fuse_status_opt_fbp_r(void)
+{
+	return 0x00021d38;
+}
+static inline u32 fuse_status_opt_fbp_idx_v(u32 r, u32 i)
+{
+	return (r >> (0 + i*0)) & 0x1;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 3cac1f70..72f1d68c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -170,6 +170,10 @@ static inline u32 gr_exception_memfmt_m(void)
 {
 	return 0x1 << 1;
 }
+static inline u32 gr_exception_ds_m(void)
+{
+	return 0x1 << 4;
+}
 static inline u32 gr_exception1_r(void)
 {
 	return 0x00400118;
@@ -330,6 +334,30 @@ static inline u32 gr_activity_4_r(void)
 {
 	return 0x00400390;
 }
+static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
+{
+	return 0x00501000;
+}
+static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
+{
+	return 0x00419000;
+}
+static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
+{
+	return 0x005046a4;
+}
+static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
+{
+	return 0x00419ea4;
+}
+static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
+{
+	return 0x1 << 0;
+}
 static inline u32 gr_pri_sked_activity_r(void)
 {
 	return 0x00407054;
@@ -3058,6 +3086,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
 {
 	return 0x0050450c;
 }
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
 static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
 {
 	return 0x2;
@@ -3106,6 +3138,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
 {
 	return 0x80000000;
@@ -3118,10 +3154,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
 {
 	return 0x40000000;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
+{
+	return 0x00504614;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
+{
+	return 0x00504624;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
+{
+	return 0x00504634;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
 {
 	return 0x0050460c;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
 {
 	return (r >> 4) & 0x1;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
index 0982bc09..ab6f6373 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -74,6 +74,22 @@ static inline u32 top_num_fbps_value_v(u32 r)
 {
 	return (r >> 0) & 0x1f;
 }
+static inline u32 top_ltc_per_fbp_r(void)
+{
+	return 0x00022450;
+}
+static inline u32 top_ltc_per_fbp_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_slices_per_ltc_r(void)
+{
+	return 0x0002245c;
+}
+static inline u32 top_slices_per_ltc_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
 static inline u32 top_num_ltcs_r(void)
 {
 	return 0x00022454;
-- 
cgit v1.2.2


From 4de370c12e948ded4f9602869209ed499f103b0c Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 27 Mar 2015 10:13:16 -0700
Subject: gpu: nvgpu: gp10b: Program stream id to LTC

Program a constant stream id 31 to LTC.

Bug 1610019

Change-Id: I9b5fb794b5ea8da0fba67a2376126d89e056f955
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/724348
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h |  8 ++++++++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c    | 10 +++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 01b66c9c..ea96a9aa 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -514,4 +514,12 @@ static inline u32 ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(u32 r)
 {
 	return (r >> 16) & 0x1f;
 }
+static inline u32 ltc_ltca_g_axi_pctrl_r(void)
+{
+	return 0x00160000;
+}
+static inline u32 ltc_ltca_g_axi_pctrl_user_sid_f(u32 v)
+{
+	return (v & 0xff) << 2;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index dbeed474..78c85f3e 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -148,6 +148,14 @@ void gp10b_ltc_isr(struct gk20a *g)
 	}
 }
 
+void gp10b_ltc_init_fs_state(struct gk20a *g)
+{
+	gm20b_ltc_init_fs_state(g);
+
+	gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(),
+			ltc_ltca_g_axi_pctrl_user_sid_f(31));
+}
+
 void gp10b_init_ltc(struct gpu_ops *gops)
 {
 	gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes;
@@ -157,7 +165,7 @@ void gp10b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.init_cbc = gk20a_ltc_init_cbc;
 
 	/* GM20b specific ops. */
-	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
+	gops->ltc.init_fs_state = gp10b_ltc_init_fs_state;
 	gops->ltc.init_comptags = gp10b_ltc_init_comptags;
 	gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
 	gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
-- 
cgit v1.2.2


From c258832b99468adba3cc19a38ba07234cd00de93 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 30 Mar 2015 11:21:27 -0700
Subject: gpu: nvgpu: gp10b: Correct steady state CB size

Program steady state CB size to be the HW default.

Bug 1626065

Change-Id: If0bdc5a649f307b6adab4e914a6201222b8453f8
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/725106
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 2 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 1b7dd405..f91ef1ba 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -110,7 +110,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
 				proj_ppc_in_gpc_stride_v() * ppc_index,
-				gr->alpha_cb_default_size * gr->pes_tpc_count[ppc_index][gpc_index],
+				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_default_v(),
 				patch);
 
 			attrib_offset_in_chunk += gr->attrib_cb_size *
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 72f1d68c..02674d6b 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2174,6 +2174,14 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r(void)
 {
 	return 0x005030f0;
 }
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_default_v(void)
+{
+	return 0x00030000;
+}
 static inline u32 gr_gpccs_falcon_addr_r(void)
 {
 	return 0x0041a0ac;
-- 
cgit v1.2.2


From 1c3f7c7e1ee67acb2ab71785e534a38a8952051f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 31 Mar 2015 14:50:10 -0700
Subject: gpu: nvgpu: gp10b: Define compressible page size

gp10b can compress 4k pages.

Bug 1605769

Change-Id: I15cf4b9ead0fefdfc430cfc4919dcb16721f5cb2
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/725794
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
index bd1b7bf0..3a143ced 100644
--- a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -92,10 +92,16 @@ static int gp10b_fb_compression_page_size(struct gk20a *g)
 	return SZ_64K;
 }
 
+static int gp10b_fb_compressible_page_size(struct gk20a *g)
+{
+	return SZ_4K;
+}
+
 void gp10b_init_fb(struct gpu_ops *gops)
 {
 	gm20b_init_fb(gops);
 	gops->fb.compression_page_size = gp10b_fb_compression_page_size;
+	gops->fb.compressible_page_size = gp10b_fb_compressible_page_size;
 
 	gp10b_init_uncompressed_kind_map();
 	gp10b_init_kind_attr();
-- 
cgit v1.2.2


From 11c589070272d34763559014f1c11738028823a8 Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Mon, 6 Apr 2015 13:35:36 -0700
Subject: gpu: nvgpu: gp10b: Fix include path

Remove the requirement that srcdir is called kernel.

Change-Id: I9379ef530ac34009bd0461a29d65d6d707bc8014
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/728153
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile       | 4 ++--
 drivers/gpu/nvgpu/gp10b/Makefile | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index c583f6d5..f259eefa 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -1,7 +1,7 @@
 GCOV_PROFILE := y
 
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/include/linux
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/include/linux
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
 ccflags-$(CONFIG_GK20A) += -Wno-multichar
 ccflags-$(CONFIG_GK20A) += -Werror
diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 9d0c0311..a51ba15e 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -1,7 +1,7 @@
 GCOV_PROFILE := y
 
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/include
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
-- 
cgit v1.2.2


From 0158c380376722636ff696543071427ef3d3739f Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 6 Apr 2015 16:32:29 +0530
Subject: gpu: nvgpu: gp10b: fix sparse warnings of static symbol

Fix sparse warnings of below type by making necessary
symbols static:

warning: symbol '<symbol>' was not declared. Should it be static?

Bug 200088648

Change-Id: Ic20ef3eb73dcbfe5f13506b5afa629c3e1db59d0
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/728012
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c | 4 ++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c  | 8 ++++----
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 +-
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 4 ++--
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c  | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
index d76b97a5..b2d417b7 100644
--- a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
@@ -44,7 +44,7 @@ static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
 	return ce2_intr_status_launcherr_pending_f();
 }
 
-void gp10b_ce2_isr(struct gk20a *g)
+static void gp10b_ce2_isr(struct gk20a *g)
 {
 	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
 	u32 clear_intr = 0;
@@ -62,7 +62,7 @@ void gp10b_ce2_isr(struct gk20a *g)
 	return;
 }
 
-void gp10b_ce2_nonstall_isr(struct gk20a *g)
+static void gp10b_ce2_nonstall_isr(struct gk20a *g)
 {
 	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
 	u32 clear_intr = 0;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index f91ef1ba..a2c981a0 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -24,7 +24,7 @@
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 
-bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
+static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
 
@@ -50,7 +50,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
-int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
+static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
 	struct gr_gk20a *gr = &g->gr;
@@ -142,7 +142,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 	return 0;
 }
 
-void gr_gp10b_commit_global_pagepool(struct gk20a *g,
+static void gr_gp10b_commit_global_pagepool(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u32 size, bool patch)
 {
@@ -457,7 +457,7 @@ static int gr_gp10b_init_ctx_state(struct gk20a *g)
 	return 0;
 }
 
-int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
 			  u32 class,
 			  u32 flags)
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index ff140a04..31597753 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -37,7 +37,7 @@
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
-struct gpu_ops gp10b_ops = {
+static struct gpu_ops gp10b_ops = {
 	.clock_gating = {
 		.slcg_bus_load_gating_prod =
 			gm20b_slcg_bus_load_gating_prod,
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 78c85f3e..3c809eaf 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -123,7 +123,7 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	return 0;
 }
 
-void gp10b_ltc_isr(struct gk20a *g)
+static void gp10b_ltc_isr(struct gk20a *g)
 {
 	u32 mc_intr, ltc_intr;
 	int ltc, slice;
@@ -148,7 +148,7 @@ void gp10b_ltc_isr(struct gk20a *g)
 	}
 }
 
-void gp10b_ltc_init_fs_state(struct gk20a *g)
+static void gp10b_ltc_init_fs_state(struct gk20a *g)
 {
 	gm20b_ltc_init_fs_state(g);
 
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index c316aa9f..e3e2c173 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -21,7 +21,7 @@
 #include "hw_ram_gp10b.h"
 #include "hw_bus_gp10b.h"
 
-u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
+static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 {
 	return 36;
 }
-- 
cgit v1.2.2


From 1fcd7fd547daac5374993f243fad77a822a5a048 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Wed, 1 Apr 2015 18:15:44 +0300
Subject: gpu: nvgpu: set zbc format field properly

Add a missing bitmask for clearing existing bits before setting a new
value, and shift the value the correct amount. Also format register
needs to be rounded down.

Bug 200087330

Change-Id: I39051be7eb68327fc010495f0c16c879447c8e4c
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/726265
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a2c981a0..4f7a037b 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -203,9 +203,10 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_g_r(index), color_val->color_ds[1]);
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_b_r(index), color_val->color_ds[2]);
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_a_r(index), color_val->color_ds[3]);
-	zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + ALIGN(index, 4));
-	zbc_c |= color_val->format << (index % 4) * 6;
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + ALIGN(index, 4), zbc_c);
+	zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3));
+	zbc_c &= ~(0x7f << ((index % 4) * 7));
+	zbc_c |= color_val->format << ((index % 4) * 7);
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
 
 	return 0;
 }
@@ -240,9 +241,10 @@ static int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
 	gr->zbc_dep_tbl[index].ref_cnt++;
 
 	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_r(index), depth_val->depth);
-	zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + ALIGN(index, 4));
-	zbc_z |= depth_val->format << (index % 4) * 6;
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + ALIGN(index, 4), zbc_z);
+	zbc_z = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3));
+	zbc_z &= ~(0x7f << (index % 4) * 7);
+	zbc_z |= depth_val->format << (index % 4) * 7;
+	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r() + (index & ~3), zbc_z);
 
 	return 0;
 }
-- 
cgit v1.2.2


From ac0cd782ab539d3a89ac2cc50955f80b8be456d1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 29 Jan 2015 11:29:56 -0800
Subject: gpu: nvgpu: gp10b: Implement new page table format

Implement the 5-level Pascal page table format. It is enabled
only for simulation.

Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/682114
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h | 168 +++++++++++++++++----------
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c      | 197 ++++++++++++++++++++++++++++++++
 2 files changed, 303 insertions(+), 62 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 5a0f9fe2..fc65f57d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -50,163 +50,207 @@
 #ifndef _hw_gmmu_gp10b_h_
 #define _hw_gmmu_gp10b_h_
 
-static inline u32 gmmu_pde_aperture_big_w(void)
+static inline u32 gmmu_new_pde_is_pte_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pde_aperture_big_invalid_f(void)
+static inline u32 gmmu_new_pde_is_pte_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
+static inline u32 gmmu_new_pde_aperture_w(void)
 {
-	return 0x1;
+	return 0;
+}
+static inline u32 gmmu_new_pde_aperture_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
+{
+	return 0x2;
+}
+static inline u32 gmmu_new_pde_address_sys_f(u32 v)
+{
+	return (v & 0xffffff) << 8;
+}
+static inline u32 gmmu_new_pde_address_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_vol_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_vol_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_new_pde_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pde_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_new_pde__size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_new_dual_pde_is_pte_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_dual_pde_is_pte_false_f(void)
+{
+	return 0x0;
 }
-static inline u32 gmmu_pde_size_w(void)
+static inline u32 gmmu_new_dual_pde_aperture_big_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pde_size_full_f(void)
+static inline u32 gmmu_new_dual_pde_aperture_big_invalid_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pde_address_big_sys_f(u32 v)
+static inline u32 gmmu_new_dual_pde_aperture_big_video_memory_f(void)
+{
+	return 0x2;
+}
+static inline u32 gmmu_new_dual_pde_address_big_sys_f(u32 v)
 {
 	return (v & 0xfffffff) << 4;
 }
-static inline u32 gmmu_pde_address_big_sys_w(void)
+static inline u32 gmmu_new_dual_pde_address_big_sys_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pde_aperture_small_w(void)
+static inline u32 gmmu_new_dual_pde_aperture_small_w(void)
 {
-	return 1;
+	return 2;
 }
-static inline u32 gmmu_pde_aperture_small_invalid_f(void)
+static inline u32 gmmu_new_dual_pde_aperture_small_invalid_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
+static inline u32 gmmu_new_dual_pde_aperture_small_video_memory_f(void)
 {
-	return 0x1;
+	return 0x2;
 }
-static inline u32 gmmu_pde_vol_small_w(void)
+static inline u32 gmmu_new_dual_pde_vol_small_w(void)
 {
-	return 1;
+	return 2;
 }
-static inline u32 gmmu_pde_vol_small_true_f(void)
+static inline u32 gmmu_new_dual_pde_vol_small_true_f(void)
 {
-	return 0x4;
+	return 0x8;
 }
-static inline u32 gmmu_pde_vol_small_false_f(void)
+static inline u32 gmmu_new_dual_pde_vol_small_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pde_vol_big_w(void)
+static inline u32 gmmu_new_dual_pde_vol_big_w(void)
 {
-	return 1;
+	return 0;
 }
-static inline u32 gmmu_pde_vol_big_true_f(void)
+static inline u32 gmmu_new_dual_pde_vol_big_true_f(void)
 {
 	return 0x8;
 }
-static inline u32 gmmu_pde_vol_big_false_f(void)
+static inline u32 gmmu_new_dual_pde_vol_big_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pde_address_small_sys_f(u32 v)
+static inline u32 gmmu_new_dual_pde_address_small_sys_f(u32 v)
 {
-	return (v & 0xfffffff) << 4;
+	return (v & 0xffffff) << 8;
 }
-static inline u32 gmmu_pde_address_small_sys_w(void)
+static inline u32 gmmu_new_dual_pde_address_small_sys_w(void)
 {
-	return 1;
+	return 2;
 }
-static inline u32 gmmu_pde_address_shift_v(void)
+static inline u32 gmmu_new_dual_pde_address_shift_v(void)
 {
 	return 0x0000000c;
 }
-static inline u32 gmmu_pde__size_v(void)
+static inline u32 gmmu_new_dual_pde_address_big_shift_v(void)
 {
 	return 0x00000008;
 }
-static inline u32 gmmu_pte__size_v(void)
+static inline u32 gmmu_new_dual_pde__size_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gmmu_new_pte__size_v(void)
 {
 	return 0x00000008;
 }
-static inline u32 gmmu_pte_valid_w(void)
+static inline u32 gmmu_new_pte_valid_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pte_valid_true_f(void)
+static inline u32 gmmu_new_pte_valid_true_f(void)
 {
 	return 0x1;
 }
-static inline u32 gmmu_pte_valid_false_f(void)
+static inline u32 gmmu_new_pte_valid_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pte_address_sys_f(u32 v)
+static inline u32 gmmu_new_pte_address_sys_f(u32 v)
 {
-	return (v & 0xfffffff) << 4;
+	return (v & 0xffffff) << 8;
 }
-static inline u32 gmmu_pte_address_sys_w(void)
+static inline u32 gmmu_new_pte_address_sys_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pte_vol_w(void)
+static inline u32 gmmu_new_pte_vol_w(void)
 {
-	return 1;
+	return 0;
 }
-static inline u32 gmmu_pte_vol_true_f(void)
+static inline u32 gmmu_new_pte_vol_true_f(void)
 {
-	return 0x1;
+	return 0x8;
 }
-static inline u32 gmmu_pte_vol_false_f(void)
+static inline u32 gmmu_new_pte_vol_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pte_aperture_w(void)
+static inline u32 gmmu_new_pte_aperture_w(void)
 {
-	return 1;
+	return 0;
 }
-static inline u32 gmmu_pte_aperture_video_memory_f(void)
+static inline u32 gmmu_new_pte_aperture_video_memory_f(void)
 {
 	return 0x0;
 }
-static inline u32 gmmu_pte_read_only_w(void)
+static inline u32 gmmu_new_pte_read_only_w(void)
 {
 	return 0;
 }
-static inline u32 gmmu_pte_read_only_true_f(void)
+static inline u32 gmmu_new_pte_read_only_true_f(void)
 {
-	return 0x4;
+	return 0x40;
 }
-static inline u32 gmmu_pte_write_disable_w(void)
+static inline u32 gmmu_new_pte_comptagline_f(u32 v)
 {
-	return 1;
+	return (v & 0x3ffff) << 4;
 }
-static inline u32 gmmu_pte_write_disable_true_f(void)
-{
-	return 0x80000000;
-}
-static inline u32 gmmu_pte_read_disable_w(void)
+static inline u32 gmmu_new_pte_comptagline_w(void)
 {
 	return 1;
 }
-static inline u32 gmmu_pte_read_disable_true_f(void)
-{
-	return 0x40000000;
-}
-static inline u32 gmmu_pte_comptagline_f(u32 v)
+static inline u32 gmmu_new_pte_kind_f(u32 v)
 {
-	return (v & 0x3ffff) << 12;
+	return (v & 0xff) << 24;
 }
-static inline u32 gmmu_pte_comptagline_w(void)
+static inline u32 gmmu_new_pte_kind_w(void)
 {
 	return 1;
 }
-static inline u32 gmmu_pte_address_shift_v(void)
+static inline u32 gmmu_new_pte_address_shift_v(void)
 {
 	return 0x0000000c;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index e3e2c173..1608b176 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -20,6 +20,8 @@
 #include "rpfb_gp10b.h"
 #include "hw_ram_gp10b.h"
 #include "hw_bus_gp10b.h"
+#include "hw_gmmu_gp10b.h"
+#include "gk20a/semaphore_gk20a.h"
 
 static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
 {
@@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
 	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
 }
 
+u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
+{
+	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
+}
+
+static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
+			   struct gk20a_mm_entry *parent,
+			   u32 i, u32 gmmu_pgsz_idx,
+			   u64 iova,
+			   u32 kind_v, u32 *ctag,
+			   bool cacheable, bool unmapped_pte,
+			   int rw_flag, bool sparse, u32 flags)
+{
+	u64 pte_addr = 0;
+	u64 pde_addr = 0;
+	struct gk20a_mm_entry *pte = parent->entries + i;
+	u32 pde_v[2] = {0, 0};
+	u32 *pde;
+	struct gk20a *g = vm->mm->g;
+
+	gk20a_dbg_fn("");
+
+	pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
+		   >> gmmu_new_pde_address_shift_v();
+	pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
+
+	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
+	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
+
+	pde = pde3_from_index(parent, i);
+
+	gk20a_mem_wr32(pde, 0, pde_v[0]);
+	gk20a_mem_wr32(pde, 1, pde_v[1]);
+
+	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
+		  i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
+{
+	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
+}
+
+static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
+			   struct gk20a_mm_entry *pte,
+			   u32 i, u32 gmmu_pgsz_idx,
+			   u64 iova,
+			   u32 kind_v, u32 *ctag,
+			   bool cacheable, bool unmapped_pte,
+			   int rw_flag, bool sparse, u32 flags)
+{
+	bool small_valid, big_valid;
+	u32 pte_addr_small = 0, pte_addr_big = 0;
+	struct gk20a_mm_entry *entry = pte->entries + i;
+	u32 pde_v[4] = {0, 0, 0, 0};
+	u32 *pde;
+	struct gk20a *g = vm->mm->g;
+
+	gk20a_dbg_fn("");
+
+	small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
+	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
+
+	if (small_valid)
+		pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+				 >> gmmu_new_dual_pde_address_shift_v();
+
+	if (big_valid)
+		pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+			       >> gmmu_new_dual_pde_address_big_shift_v();
+
+	if (small_valid) {
+		pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
+		pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
+		pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
+	}
+
+	if (big_valid) {
+		pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
+		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
+		pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
+	}
+
+	pde = pde0_from_index(pte, i);
+
+	gk20a_mem_wr32(pde, 0, pde_v[0]);
+	gk20a_mem_wr32(pde, 1, pde_v[1]);
+	gk20a_mem_wr32(pde, 2, pde_v[2]);
+	gk20a_mem_wr32(pde, 3, pde_v[3]);
+
+	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
+		  i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+static int update_gmmu_pte_locked(struct vm_gk20a *vm,
+			   struct gk20a_mm_entry *pte,
+			   u32 i, u32 gmmu_pgsz_idx,
+			   u64 iova,
+			   u32 kind_v, u32 *ctag,
+			   bool cacheable, bool unmapped_pte,
+			   int rw_flag, bool sparse, u32 flags)
+{
+	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
+	u32 pte_w[2] = {0, 0}; /* invalid pte */
+
+	gk20a_dbg_fn("");
+
+	if (iova) {
+		pte_w[0] = gmmu_new_pte_valid_true_f() |
+			gmmu_new_pte_address_sys_f(iova
+				>> gmmu_new_pte_address_shift_v());
+		pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
+			gmmu_new_pte_kind_f(kind_v) |
+			gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
+
+		if (rw_flag == gk20a_mem_flag_read_only)
+			pte_w[0] |= gmmu_new_pte_read_only_true_f();
+		if (!cacheable)
+			pte_w[1] |= gmmu_new_pte_vol_true_f();
+
+		gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
+			   " ctag=%d vol=%d"
+			   " [0x%08x, 0x%08x]",
+			   i, iova,
+			   kind_v, *ctag, !cacheable,
+			   pte_w[1], pte_w[0]);
+
+		if (*ctag)
+			*ctag += page_size;
+	} else if (sparse) {
+		pte_w[0] = gmmu_new_pte_valid_false_f();
+		pte_w[1] |= gmmu_new_pte_vol_true_f();
+	} else {
+		gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
+	}
+
+	gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
+	gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+const struct gk20a_mmu_level gp10b_mm_levels[] = {
+	{.hi_bit = {48, 48},
+	 .lo_bit = {47, 47},
+	 .update_entry = update_gmmu_pde3_locked,
+	 .entry_size = 8},
+	{.hi_bit = {46, 46},
+	 .lo_bit = {38, 38},
+	 .update_entry = update_gmmu_pde3_locked,
+	 .entry_size = 8},
+	{.hi_bit = {37, 37},
+	 .lo_bit = {29, 29},
+	 .update_entry = update_gmmu_pde3_locked,
+	 .entry_size = 8},
+	{.hi_bit = {28, 28},
+	 .lo_bit = {21, 21},
+	 .update_entry = update_gmmu_pde0_locked,
+	 .entry_size = 16},
+	{.hi_bit = {20, 20},
+	 .lo_bit = {12, 16},
+	 .update_entry = update_gmmu_pte_locked,
+	 .entry_size = 8},
+	{.update_entry = NULL}
+};
+
+const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size)
+{
+	return gp10b_mm_levels;
+}
+
+static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
+{
+	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
+	u32 pdb_addr_hi = u64_hi32(pdb_addr);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+		ram_in_page_dir_base_target_vid_mem_f() |
+		ram_in_page_dir_base_vol_true_f() |
+		ram_in_page_dir_base_lo_f(pdb_addr_lo) |
+		1 << 10);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+		ram_in_page_dir_base_hi_f(pdb_addr_hi));
+}
+
 void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
@@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops)
 	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
 	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
 	gops->mm.get_iova_addr = gp10b_mm_iova_addr;
+	if (tegra_platform_is_linsim()) {
+		gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+		gops->mm.init_pdb = gp10b_mm_init_pdb;
+	}
 }
-- 
cgit v1.2.2


From c5425c5a1b3795e9a7d0887eccd025509186bcd1 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 16 Apr 2015 16:16:47 +0530
Subject: gpu: nvgpu: gp10b:inherit gm20b acr init wpr func

-method gm20b_pmu_init_acr() used for gp10b
acr init wpr region

Bug 200085428

Change-Id: I897aa42b0a8ef7478d4b3f64fe1834532d35b303
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/732213
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Tested-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 3db0d4c3..4ba0f997 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B PMU
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
 *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -155,7 +155,7 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
 		gm20b_init_secure_pmu(gops);
-		gops->pmu.init_wpr_region = NULL;
+		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
 	} else {
 		gk20a_init_pmu_ops(gops);
 		gops->pmu.init_wpr_region = NULL;
-- 
cgit v1.2.2


From 0f2a1edd655be0d6f364c07a409ee100ca940f4b Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 8 Apr 2015 23:06:11 +0530
Subject: gpu: nvgpu: secure boot flag, default disabled

- set "privsecurity" to 1 to enable secure boot else
  set to 0.

Bug 200085428

Change-Id: Ia4bf214f4a4bb2573c8869ea2182bbe680f67782
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/729101
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Tested-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 31597753..914d8089 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -92,6 +92,8 @@ int gp10b_init_hal(struct gk20a *g)
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
 
 	*gops = gp10b_ops;
+	gops->privsecurity = 0;
+
 	gp10b_init_mc(gops);
 	gp10b_init_gr(gops);
 	gp10b_init_ltc(gops);
-- 
cgit v1.2.2


From faa1e5d82b8c3a2d125f04788f8146ba9b9b186c Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Fri, 17 Apr 2015 11:43:40 +0530
Subject: gpu: nvgpu: gp10b: update elpg sequencing value

- Added final elpg sequencing value
- by default elpg is disabled.

Bug 1525971

Change-Id: I2c306d9f03e361560a95fcfa723eafe14d004191
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/732574
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 201 ++++++++++++++++++------------------
 1 file changed, 101 insertions(+), 100 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 4ba0f997..9ecf3964 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -31,102 +31,103 @@ struct pg_init_sequence_list {
 };
 
 /* PROD settings for ELPG sequencing registers*/
-static struct pg_init_sequence_list _pginitseq_gm20b[] = {
-		{ 0x0010ab10, 0x8180},
-		{ 0x0010e118, 0x83828180},
-		{ 0x0010e068, 0},
-		{ 0x0010e06c, 0x00000080},
-		{ 0x0010e06c, 0x00000081},
-		{ 0x0010e06c, 0x00000082},
-		{ 0x0010e06c, 0x00000083},
-		{ 0x0010e06c, 0x00000084},
-		{ 0x0010e06c, 0x00000085},
-		{ 0x0010e06c, 0x00000086},
-		{ 0x0010e06c, 0x00000087},
-		{ 0x0010e06c, 0x00000088},
-		{ 0x0010e06c, 0x00000089},
-		{ 0x0010e06c, 0x0000008a},
-		{ 0x0010e06c, 0x0000008b},
-		{ 0x0010e06c, 0x0000008c},
-		{ 0x0010e06c, 0x0000008d},
-		{ 0x0010e06c, 0x0000008e},
-		{ 0x0010e06c, 0x0000008f},
-		{ 0x0010e06c, 0x00000090},
-		{ 0x0010e06c, 0x00000091},
-		{ 0x0010e06c, 0x00000092},
-		{ 0x0010e06c, 0x00000093},
-		{ 0x0010e06c, 0x00000094},
-		{ 0x0010e06c, 0x00000095},
-		{ 0x0010e06c, 0x00000096},
-		{ 0x0010e06c, 0x00000097},
-		{ 0x0010e06c, 0x00000098},
-		{ 0x0010e06c, 0x00000099},
-		{ 0x0010e06c, 0x0000009a},
-		{ 0x0010e06c, 0x0000009b},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010e06c, 0x00000000},
-		{ 0x0010ab14, 0x00000000},
-		{ 0x0010ab18, 0x00000000},
-		{ 0x0010e024, 0x00000000},
-		{ 0x0010e028, 0x00000000},
-		{ 0x0010e11c, 0x00000000},
-		{ 0x0010e120, 0x00000000},
-		{ 0x0010ab1c, 0x02010155},
-		{ 0x0010e020, 0x001b1b55},
-		{ 0x0010e124, 0x01030355},
-		{ 0x0010ab20, 0x89abcdef},
-		{ 0x0010ab24, 0x00000000},
-		{ 0x0010e02c, 0x89abcdef},
-		{ 0x0010e030, 0x00000000},
-		{ 0x0010e128, 0x89abcdef},
-		{ 0x0010e12c, 0x00000000},
-		{ 0x0010ab28, 0x74444444},
-		{ 0x0010ab2c, 0x70000000},
-		{ 0x0010e034, 0x74444444},
-		{ 0x0010e038, 0x70000000},
-		{ 0x0010e130, 0x74444444},
-		{ 0x0010e134, 0x70000000},
-		{ 0x0010ab30, 0x00000000},
-		{ 0x0010ab34, 0x00000001},
-		{ 0x00020004, 0x00000000},
-		{ 0x0010e138, 0x00000000},
-		{ 0x0010e040, 0x00000000},
+static struct pg_init_sequence_list _pginitseq_gp10b[] = {
+		{0x0010ab10, 0x0000868B} ,
+		{0x0010e118, 0x8590848F} ,
+		{0x0010e000, 0} ,
+		{0x0010e06c, 0x000000A3} ,
+		{0x0010e06c, 0x000000A0} ,
+		{0x0010e06c, 0x00000095} ,
+		{0x0010e06c, 0x000000A6} ,
+		{0x0010e06c, 0x0000008C} ,
+		{0x0010e06c, 0x00000080} ,
+		{0x0010e06c, 0x00000081} ,
+		{0x0010e06c, 0x00000087} ,
+		{0x0010e06c, 0x00000088} ,
+		{0x0010e06c, 0x0000008D} ,
+		{0x0010e06c, 0x000000A00} ,
+		{0x0010e06c, 0x000000A01} ,
+		{0x0010e06c, 0x000000A02} ,
+		{0x0010e06c, 0x000000A03} ,
+		{0x0010e06c, 0x000000A04} ,
+		{0x0010e06c, 0x000000A05} ,
+		{0x0010e06c, 0x000000A06} ,
+		{0x0010e06c, 0x000000A07} ,
+		{0x0010e06c, 0x000000A08} ,
+		{0x0010e06c, 0x000000A09} ,
+		{0x0010e06c, 0x000000950} ,
+		{0x0010e06c, 0x000000951} ,
+		{0x0010e06c, 0x000000952} ,
+		{0x0010e06c, 0x000000953} ,
+		{0x0010e06c, 0x000000954} ,
+		{0x0010e06c, 0x000000955} ,
+		{0x0010e06c, 0x000000956} ,
+		{0x0010e06c, 0x000000957} ,
+		{0x0010ab14, 0x00000000} ,
+		{0x0010e024, 0x00000000} ,
+		{0x0010e028, 0x00000000} ,
+		{0x0010e11c, 0x00000000} ,
+		{0x0010ab1c, 0x140B0B55} ,
+		{0x0010e020, 0x0E262655} ,
+		{0x0010e124, 0x25101055} ,
+		{0x0010ab20, 0x89abcdef} ,
+		{0x0010ab24, 0x00000000} ,
+		{0x0010e02c, 0x89abcdef} ,
+		{0x0010e030, 0x00000000} ,
+		{0x0010e128, 0x89abcdef} ,
+		{0x0010e12c, 0x00000000} ,
+		{0x0010ab28, 0x75555555} ,
+		{0x0010ab2c, 0x70000000} ,
+		{0x0010e034, 0x75555555} ,
+		{0x0010e038, 0x70000000} ,
+		{0x0010e130, 0x75555555} ,
+		{0x0010e134, 0x70000000} ,
+		{0x0010ab30, 0x00000000} ,
+		{0x0010ab34, 0x00000001} ,
+		{0x00020004, 0x00000000} ,
+		{0x0010e138, 0x00000000} ,
+		{0x0010e040, 0x00000000} ,
+		{0x0010e168, 0x00000000} ,
+		{0x0010e114, 0x0000A5A4} ,
+		{0x0010e110, 0x00000000} ,
+		{0x0010e10c, 0x8590848F} ,
+		{0x0010e05c, 0x00000000} ,
+		{0x0010e044, 0x00000000} ,
+		{0x0010a644, 0x0000868B} ,
+		{0x0010a648, 0x00000000 } ,
+		{0x0010a64c, 0x00829493 } ,
+		{0x0010a650, 0x00000000} ,
+		{0x0010e000, 0} ,
+		{0x0010e068, 0x000000A3} ,
+		{0x0010e068, 0x000000A0} ,
+		{0x0010e068, 0x00000095} ,
+		{0x0010e068, 0x000000A6} ,
+		{0x0010e068, 0x0000008C} ,
+		{0x0010e068, 0x00000080} ,
+		{0x0010e068, 0x00000081} ,
+		{0x0010e068, 0x00000087} ,
+		{0x0010e068, 0x00000088} ,
+		{0x0010e068, 0x0000008D} ,
+		{0x0010e068, 0x000000A00} ,
+		{0x0010e068, 0x000000A01} ,
+		{0x0010e068, 0x000000A02} ,
+		{0x0010e068, 0x000000A03} ,
+		{0x0010e068, 0x000000A04} ,
+		{0x0010e068, 0x000000A05} ,
+		{0x0010e068, 0x000000A06} ,
+		{0x0010e068, 0x000000A07} ,
+		{0x0010e068, 0x000000A08} ,
+		{0x0010e068, 0x000000A09} ,
+		{0x0010e068, 0x000000950} ,
+		{0x0010e068, 0x000000951} ,
+		{0x0010e068, 0x000000952} ,
+		{0x0010e068, 0x000000953} ,
+		{0x0010e068, 0x000000954} ,
+		{0x0010e068, 0x000000955} ,
+		{0x0010e068, 0x000000956} ,
+		{0x0010e068, 0x000000957} ,
+		{0x0010e000, 0} ,
+		{0x0010e004, 0x0000008E},
 };
 
 static int gp10b_pmu_setup_elpg(struct gk20a *g)
@@ -138,12 +139,12 @@ static int gp10b_pmu_setup_elpg(struct gk20a *g)
 	gk20a_dbg_fn("");
 
 	if (g->elpg_enabled) {
-		reg_writes = ((sizeof(_pginitseq_gm20b) /
-				sizeof((_pginitseq_gm20b)[0])));
+		reg_writes = ((sizeof(_pginitseq_gp10b) /
+				sizeof((_pginitseq_gp10b)[0])));
 		/* Initialize registers with production values*/
 		for (index = 0; index < reg_writes; index++) {
-			gk20a_writel(g, _pginitseq_gm20b[index].regaddr,
-				_pginitseq_gm20b[index].writeval);
+			gk20a_writel(g, _pginitseq_gp10b[index].regaddr,
+				_pginitseq_gp10b[index].writeval);
 		}
 	}
 
-- 
cgit v1.2.2


From 4b02177fd3c84e84a3e6894f3696feecf8f5c508 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 22 Apr 2015 12:47:15 -0700
Subject: gpu: nvgpu: gp10b: Make page mapping scatter aware

Augment new page mapping code to be aware of scattered
buffers.

Bug 1605769

Change-Id: Ifdb326563d28ccf07fc4d3d76a24492a68493fe3
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/734355
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 49 +++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 1608b176..3633e9d9 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -148,7 +148,9 @@ u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
 static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   struct gk20a_mm_entry *parent,
 			   u32 i, u32 gmmu_pgsz_idx,
-			   u64 iova,
+			   struct scatterlist **sgl,
+			   u64 *offset,
+			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, u32 flags)
@@ -188,7 +190,9 @@ u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
 static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 			   struct gk20a_mm_entry *pte,
 			   u32 i, u32 gmmu_pgsz_idx,
-			   u64 iova,
+			   struct scatterlist **sgl,
+			   u64 *offset,
+			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, u32 flags)
@@ -241,7 +245,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   struct gk20a_mm_entry *pte,
 			   u32 i, u32 gmmu_pgsz_idx,
-			   u64 iova,
+			   struct scatterlist **sgl,
+			   u64 *offset,
+			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, u32 flags)
@@ -251,23 +257,31 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 
 	gk20a_dbg_fn("");
 
-	if (iova) {
-		pte_w[0] = gmmu_new_pte_valid_true_f() |
-			gmmu_new_pte_address_sys_f(iova
-				>> gmmu_new_pte_address_shift_v());
+	if (*iova) {
+		if (unmapped_pte)
+			pte_w[0] = gmmu_new_pte_valid_false_f() |
+				gmmu_new_pte_address_sys_f(*iova
+					>> gmmu_new_pte_address_shift_v());
+		else
+			pte_w[0] = gmmu_new_pte_valid_true_f() |
+				gmmu_new_pte_address_sys_f(*iova
+					>> gmmu_new_pte_address_shift_v());
+
 		pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
 			gmmu_new_pte_kind_f(kind_v) |
 			gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
 
 		if (rw_flag == gk20a_mem_flag_read_only)
 			pte_w[0] |= gmmu_new_pte_read_only_true_f();
-		if (!cacheable)
+		if (unmapped_pte && !cacheable)
+			pte_w[0] |= gmmu_new_pte_read_only_true_f();
+		else if (!cacheable)
 			pte_w[1] |= gmmu_new_pte_vol_true_f();
 
 		gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
 			   " ctag=%d vol=%d"
 			   " [0x%08x, 0x%08x]",
-			   i, iova,
+			   i, *iova,
 			   kind_v, *ctag, !cacheable,
 			   pte_w[1], pte_w[0]);
 
@@ -283,6 +297,23 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
 	gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
 
+	if (*iova) {
+		*iova += page_size;
+		*offset += page_size;
+		if (*sgl && *offset + page_size > (*sgl)->length) {
+			u64 new_iova;
+			*sgl = sg_next(*sgl);
+			if (*sgl) {
+				new_iova = sg_phys(*sgl);
+				gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
+					  new_iova, (*sgl)->length);
+				if (new_iova) {
+					*offset = 0;
+					*iova = new_iova;
+				}
+			}
+		}
+	}
 	gk20a_dbg_fn("done");
 	return 0;
 }
-- 
cgit v1.2.2


From 93e001d24f9ee31bf4f0810e9aa91e70df992cc5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 27 Mar 2015 09:09:54 -0700
Subject: gpu: nvgpu: gp10b: Gating reglist

Change-Id: I4931958c21692306d6c78bffdc45e21c553b913c
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/731494
---
 drivers/gpu/nvgpu/gp10b/Makefile               |   3 +-
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 621 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h |  87 ++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c            |  47 +-
 4 files changed, 732 insertions(+), 26 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index a51ba15e..f25f7b34 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_GK20A)  += \
 	fb_gp10b.o \
 	pmu_gp10b.o \
 	hal_gp10b.o \
-	rpfb_gp10b.o
+	rpfb_gp10b.o \
+	gp10b_gating_reglist.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
new file mode 100644
index 00000000..f8ee80c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * This file is autogenerated.  Do not edit.
+ */
+
+#ifndef __gp10b_gating_reglist_h__
+#define __gp10b_gating_reglist_h__
+
+#include <linux/types.h>
+#include "gp10b_gating_reglist.h"
+
+struct gating_desc {
+	u32 addr;
+	u32 prod;
+	u32 disable;
+};
+/* slcg bus */
+static const struct gating_desc gp10b_slcg_bus[] = {
+	{.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000003fe},
+};
+
+/* slcg ce2 */
+static const struct gating_desc gp10b_slcg_ce2[] = {
+	{.addr = 0x00106f28, .prod = 0x00000000, .disable = 0x000007fe},
+};
+
+/* slcg chiplet */
+static const struct gating_desc gp10b_slcg_chiplet[] = {
+	{.addr = 0x0010c07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010e07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010e17c, .prod = 0x00000000, .disable = 0x00000007},
+};
+
+/* slcg fb */
+static const struct gating_desc gp10b_slcg_fb[] = {
+	{.addr = 0x00100d14, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x00100c9c, .prod = 0x00000000, .disable = 0x000001fe},
+};
+
+/* slcg fifo */
+static const struct gating_desc gp10b_slcg_fifo[] = {
+	{.addr = 0x000026ac, .prod = 0x00000100, .disable = 0x0001fffe},
+};
+
+/* slcg gr */
+static const struct gating_desc gp10b_slcg_gr[] = {
+	{.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe},
+	{.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x00409894, .prod = 0x00000040, .disable = 0x03fffffe},
+	{.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00405910, .prod = 0xfffffff0, .disable = 0xfffffffe},
+	{.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
+	{.addr = 0x00407004, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x0041a17c, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x0041a894, .prod = 0x00000040, .disable = 0x03fffffe},
+	{.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0007fffe},
+	{.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
+	{.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e},
+	{.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
+	{.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00418c74, .prod = 0xffffffc0, .disable = 0xfffffffe},
+	{.addr = 0x00418cf4, .prod = 0xfffffffc, .disable = 0xfffffffe},
+	{.addr = 0x00418d74, .prod = 0xffffffe0, .disable = 0xfffffffe},
+	{.addr = 0x00418f10, .prod = 0xffffffe0, .disable = 0xfffffffe},
+	{.addr = 0x00418e10, .prod = 0xfffffffe, .disable = 0xfffffffe},
+	{.addr = 0x00419024, .prod = 0x000001fe, .disable = 0x000001fe},
+	{.addr = 0x0041889c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419d24, .prod = 0x00000000, .disable = 0x0000ffff},
+	{.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
+	{.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
+	{.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x0041986c, .prod = 0x00000104, .disable = 0x00fffffe},
+	{.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
+	{.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
+	{.addr = 0x00419c74, .prod = 0x0000001e, .disable = 0x0000001e},
+	{.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
+	{.addr = 0x00419fdc, .prod = 0xffedff00, .disable = 0xfffffffe},
+	{.addr = 0x00419fe4, .prod = 0x00001b00, .disable = 0x00001ffe},
+	{.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
+	{.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x0041be2c, .prod = 0x04115fc0, .disable = 0xfffffffe},
+	{.addr = 0x0041bfec, .prod = 0xfffffff0, .disable = 0xfffffffe},
+	{.addr = 0x0041bed4, .prod = 0xfffffff8, .disable = 0xfffffffe},
+	{.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x0000ffff},
+};
+
+/* slcg ltc */
+static const struct gating_desc gp10b_slcg_ltc[] = {
+	{.addr = 0x0017e050, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0017e35c, .prod = 0x00000000, .disable = 0xfffffffe},
+};
+
+/* slcg perf */
+static const struct gating_desc gp10b_slcg_perf[] = {
+	{.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
+};
+
+/* slcg PriRing */
+static const struct gating_desc gp10b_slcg_priring[] = {
+	{.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
+};
+
+/* slcg pwr_csb */
+static const struct gating_desc gp10b_slcg_pwr_csb[] = {
+	{.addr = 0x00000134, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x00000e74, .prod = 0x00000000, .disable = 0x0000000f},
+	{.addr = 0x00000a74, .prod = 0x00000000, .disable = 0x00007ffe},
+	{.addr = 0x000016b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* slcg pmu */
+static const struct gating_desc gp10b_slcg_pmu[] = {
+	{.addr = 0x0010a134, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe},
+	{.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* therm gr */
+static const struct gating_desc gp10b_slcg_therm[] = {
+	{.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* slcg Xbar */
+static const struct gating_desc gp10b_slcg_xbar[] = {
+	{.addr = 0x0013cbe4, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc04, .prod = 0x00000000, .disable = 0x1ffffffe},
+};
+
+/* blcg bus */
+static const struct gating_desc gp10b_blcg_bus[] = {
+	{.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
+};
+
+/* blcg ctxsw prog */
+static const struct gating_desc gp10b_blcg_ctxsw_prog[] = {
+};
+
+/* blcg fb */
+static const struct gating_desc gp10b_blcg_fb[] = {
+	{.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
+	{.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00100c98, .prod = 0x00004242, .disable = 0x00000000},
+};
+
+/* blcg fifo */
+static const struct gating_desc gp10b_blcg_fifo[] = {
+	{.addr = 0x000026a4, .prod = 0x0000c242, .disable = 0x00000000},
+};
+
+/* blcg gr */
+static const struct gating_desc gp10b_blcg_gr[] = {
+	{.addr = 0x004041f0, .prod = 0x0000c646, .disable = 0x00000000},
+	{.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x004078c0, .prod = 0x00004242, .disable = 0x00000000},
+	{.addr = 0x00406000, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00405860, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x0040590c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00408040, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00407000, .prod = 0x4000c141, .disable = 0x00000000},
+	{.addr = 0x00405bf0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x0041a890, .prod = 0x0000427f, .disable = 0x00000000},
+	{.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x00418500, .prod = 0x0000c244, .disable = 0x00000000},
+	{.addr = 0x00418608, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418688, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00418828, .prod = 0x00008444, .disable = 0x00000000},
+	{.addr = 0x00418bbc, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418970, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418c70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418cf0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418d70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418f0c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418e0c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00419020, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00418898, .prod = 0x00004242, .disable = 0x00000000},
+	{.addr = 0x00419a40, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a48, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a50, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a58, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a60, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a68, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a70, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a78, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419868, .prod = 0x00008242, .disable = 0x00000000},
+	{.addr = 0x00419cd4, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419cdc, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419c70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00419fd0, .prod = 0x0000c044, .disable = 0x00000000},
+	{.addr = 0x00419fd8, .prod = 0x0000c046, .disable = 0x00000000},
+	{.addr = 0x00419fe0, .prod = 0x0000c044, .disable = 0x00000000},
+	{.addr = 0x00419fe8, .prod = 0x0000c042, .disable = 0x00000000},
+	{.addr = 0x00419ff0, .prod = 0x0000c045, .disable = 0x00000000},
+	{.addr = 0x00419ff8, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419f90, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x0041be28, .prod = 0x00008242, .disable = 0x00000000},
+	{.addr = 0x0041bfe8, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x0041bed0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00408810, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00408a80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x004089a8, .prod = 0x0000c242, .disable = 0x00000000},
+};
+
+/* blcg ltc */
+static const struct gating_desc gp10b_blcg_ltc[] = {
+	{.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e3e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e3c8, .prod = 0x00000044, .disable = 0x00000000},
+};
+
+/* blcg pwr_csb  */
+static const struct gating_desc gp10b_blcg_pwr_csb[] = {
+	{.addr = 0x00000a70, .prod = 0x00000045, .disable = 0x00000000},
+};
+
+/* blcg pmu */
+static const struct gating_desc gp10b_blcg_pmu[] = {
+	{.addr = 0x0010aa70, .prod = 0x00000045, .disable = 0x00000000},
+};
+
+/* blcg Xbar */
+static const struct gating_desc gp10b_blcg_xbar[] = {
+	{.addr = 0x0013cbe0, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc00, .prod = 0x00000042, .disable = 0x00000000},
+};
+
+/* pg gr */
+static const struct gating_desc gp10b_pg_gr[] = {
+};
+
+/* inline functions */
+void gp10b_slcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_bus) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_bus[i].addr,
+				gp10b_slcg_bus[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_bus[i].addr,
+				 gp10b_slcg_bus[i].disable);
+	}
+}
+
+void gp10b_slcg_ce2_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_ce2) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_ce2[i].addr,
+				gp10b_slcg_ce2[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_ce2[i].addr,
+				 gp10b_slcg_ce2[i].disable);
+	}
+}
+
+void gp10b_slcg_chiplet_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_chiplet) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_chiplet[i].addr,
+				gp10b_slcg_chiplet[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_chiplet[i].addr,
+				 gp10b_slcg_chiplet[i].disable);
+	}
+}
+
+void gp10b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+}
+
+void gp10b_slcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_fb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_fb[i].addr,
+				gp10b_slcg_fb[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_fb[i].addr,
+				 gp10b_slcg_fb[i].disable);
+	}
+}
+
+void gp10b_slcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_fifo) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_fifo[i].addr,
+				gp10b_slcg_fifo[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_fifo[i].addr,
+				 gp10b_slcg_fifo[i].disable);
+	}
+}
+
+void gr_gp10b_slcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_gr[i].addr,
+				gp10b_slcg_gr[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_gr[i].addr,
+				 gp10b_slcg_gr[i].disable);
+	}
+}
+
+void ltc_gp10b_slcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_ltc) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_ltc[i].addr,
+				gp10b_slcg_ltc[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_ltc[i].addr,
+				gp10b_slcg_ltc[i].disable);
+	}
+}
+
+void gp10b_slcg_perf_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_perf) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_perf[i].addr,
+				gp10b_slcg_perf[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_perf[i].addr,
+				gp10b_slcg_perf[i].disable);
+	}
+}
+
+void gp10b_slcg_priring_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_priring) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_priring[i].addr,
+				gp10b_slcg_priring[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_priring[i].addr,
+				gp10b_slcg_priring[i].disable);
+	}
+}
+
+void gp10b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_pwr_csb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_pwr_csb[i].addr,
+				gp10b_slcg_pwr_csb[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_pwr_csb[i].addr,
+				gp10b_slcg_pwr_csb[i].disable);
+	}
+}
+
+void gp10b_slcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_pmu) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_pmu[i].addr,
+				gp10b_slcg_pmu[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_pmu[i].addr,
+				gp10b_slcg_pmu[i].disable);
+	}
+}
+
+void gp10b_slcg_therm_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_therm) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_therm[i].addr,
+				gp10b_slcg_therm[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_therm[i].addr,
+				gp10b_slcg_therm[i].disable);
+	}
+}
+
+void gp10b_slcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_slcg_xbar) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_slcg_xbar[i].addr,
+				gp10b_slcg_xbar[i].prod);
+		else
+			gk20a_writel(g, gp10b_slcg_xbar[i].addr,
+				gp10b_slcg_xbar[i].disable);
+	}
+}
+
+void gp10b_blcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_bus) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_bus[i].addr,
+				gp10b_blcg_bus[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_bus[i].addr,
+				gp10b_blcg_bus[i].disable);
+	}
+}
+
+void gp10b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_ctxsw_prog) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_ctxsw_prog[i].addr,
+				gp10b_blcg_ctxsw_prog[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_ctxsw_prog[i].addr,
+				gp10b_blcg_ctxsw_prog[i].disable);
+	}
+}
+
+void gp10b_blcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_fb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_fb[i].addr,
+				gp10b_blcg_fb[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_fb[i].addr,
+				gp10b_blcg_fb[i].disable);
+	}
+}
+
+void gp10b_blcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_fifo) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_fifo[i].addr,
+				gp10b_blcg_fifo[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_fifo[i].addr,
+				gp10b_blcg_fifo[i].disable);
+	}
+}
+
+void gp10b_blcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_gr[i].addr,
+				gp10b_blcg_gr[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_gr[i].addr,
+				gp10b_blcg_gr[i].disable);
+	}
+}
+
+void gp10b_blcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_ltc) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_ltc[i].addr,
+				gp10b_blcg_ltc[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_ltc[i].addr,
+				gp10b_blcg_ltc[i].disable);
+	}
+}
+
+void gp10b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_pwr_csb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_pwr_csb[i].addr,
+				gp10b_blcg_pwr_csb[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_pwr_csb[i].addr,
+				gp10b_blcg_pwr_csb[i].disable);
+	}
+}
+
+void gp10b_blcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_pmu) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_pmu[i].addr,
+				gp10b_blcg_pmu[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_pmu[i].addr,
+				gp10b_blcg_pmu[i].disable);
+	}
+}
+
+void gp10b_blcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_xbar) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_xbar[i].addr,
+				gp10b_blcg_xbar[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_xbar[i].addr,
+				gp10b_blcg_xbar[i].disable);
+	}
+}
+
+void gr_gp10b_pg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_pg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_pg_gr[i].addr,
+				gp10b_pg_gr[i].prod);
+		else
+			gk20a_writel(g, gp10b_pg_gr[i].addr,
+				gp10b_pg_gr[i].disable);
+	}
+}
+
+#endif /* __gp10b_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h
new file mode 100644
index 00000000..465a0b4d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gk20a/gk20a.h"
+
+void gp10b_slcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_chiplet_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gr_gp10b_slcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void ltc_gp10b_slcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_perf_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_priring_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_therm_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_slcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp10b_blcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gr_gp10b_pg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 914d8089..9d099479 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -29,60 +29,57 @@
 #include "gp10b/pmu_gp10b.h"
 #include "gp10b/gr_ctx_gp10b.h"
 #include "gp10b/fifo_gp10b.h"
+#include "gp10b/gp10b_gating_reglist.h"
 
 #include "gm20b/gr_gm20b.h"
-#include "gm20b/gm20b_gating_reglist.h"
 #include "gm20b/fifo_gm20b.h"
-#include "gp10b/fifo_gp10b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
 static struct gpu_ops gp10b_ops = {
 	.clock_gating = {
 		.slcg_bus_load_gating_prod =
-			gm20b_slcg_bus_load_gating_prod,
-		.slcg_ce2_load_gating_prod =
-			gm20b_slcg_ce2_load_gating_prod,
+			gp10b_slcg_bus_load_gating_prod,
 		.slcg_chiplet_load_gating_prod =
-			gm20b_slcg_chiplet_load_gating_prod,
+			gp10b_slcg_chiplet_load_gating_prod,
 		.slcg_ctxsw_firmware_load_gating_prod =
-			gm20b_slcg_ctxsw_firmware_load_gating_prod,
+			gp10b_slcg_ctxsw_firmware_load_gating_prod,
 		.slcg_fb_load_gating_prod =
-			gm20b_slcg_fb_load_gating_prod,
+			gp10b_slcg_fb_load_gating_prod,
 		.slcg_fifo_load_gating_prod =
-			gm20b_slcg_fifo_load_gating_prod,
+			gp10b_slcg_fifo_load_gating_prod,
 		.slcg_gr_load_gating_prod =
-			gr_gm20b_slcg_gr_load_gating_prod,
+			gr_gp10b_slcg_gr_load_gating_prod,
 		.slcg_ltc_load_gating_prod =
-			ltc_gm20b_slcg_ltc_load_gating_prod,
+			ltc_gp10b_slcg_ltc_load_gating_prod,
 		.slcg_perf_load_gating_prod =
-			gm20b_slcg_perf_load_gating_prod,
+			gp10b_slcg_perf_load_gating_prod,
 		.slcg_priring_load_gating_prod =
-			gm20b_slcg_priring_load_gating_prod,
+			gp10b_slcg_priring_load_gating_prod,
 		.slcg_pmu_load_gating_prod =
-			gm20b_slcg_pmu_load_gating_prod,
+			gp10b_slcg_pmu_load_gating_prod,
 		.slcg_therm_load_gating_prod =
-			gm20b_slcg_therm_load_gating_prod,
+			gp10b_slcg_therm_load_gating_prod,
 		.slcg_xbar_load_gating_prod =
-			gm20b_slcg_xbar_load_gating_prod,
+			gp10b_slcg_xbar_load_gating_prod,
 		.blcg_bus_load_gating_prod =
-			gm20b_blcg_bus_load_gating_prod,
+			gp10b_blcg_bus_load_gating_prod,
 		.blcg_ctxsw_firmware_load_gating_prod =
-			gm20b_blcg_ctxsw_firmware_load_gating_prod,
+			gp10b_blcg_ctxsw_firmware_load_gating_prod,
 		.blcg_fb_load_gating_prod =
-			gm20b_blcg_fb_load_gating_prod,
+			gp10b_blcg_fb_load_gating_prod,
 		.blcg_fifo_load_gating_prod =
-			gm20b_blcg_fifo_load_gating_prod,
+			gp10b_blcg_fifo_load_gating_prod,
 		.blcg_gr_load_gating_prod =
-			gm20b_blcg_gr_load_gating_prod,
+			gp10b_blcg_gr_load_gating_prod,
 		.blcg_ltc_load_gating_prod =
-			gm20b_blcg_ltc_load_gating_prod,
+			gp10b_blcg_ltc_load_gating_prod,
 		.blcg_pwr_csb_load_gating_prod =
-			gm20b_blcg_pwr_csb_load_gating_prod,
+			gp10b_blcg_pwr_csb_load_gating_prod,
 		.blcg_pmu_load_gating_prod =
-			gm20b_blcg_pmu_load_gating_prod,
+			gp10b_blcg_pmu_load_gating_prod,
 		.pg_gr_load_gating_prod =
-			gr_gm20b_pg_gr_load_gating_prod,
+			gr_gp10b_pg_gr_load_gating_prod,
 	}
 };
 
-- 
cgit v1.2.2


From b5fd6e4fd5941d042098cd6d92fc2fa12f3eb4fe Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 15 Apr 2015 16:22:05 -0700
Subject: gpu: nvgpu: gp10b: Enable SMMU bypass

Change-Id: I1fcc7e93d3e31bfbb5d540b43b655566f6dc13cd
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/732010
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index ab98cbde..58b55d4b 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -49,6 +49,8 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
+	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
+	platform->disable_bigpage = platform->bypass_smmu;
 
 	return 0;
 }
-- 
cgit v1.2.2


From d2a5cf6e80a65366bc2ba78303cb8ef1f12f1596 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 28 Apr 2015 14:07:05 +0530
Subject: gpu: nvgpu: gp10b: fix sparse warnings of static symbol

Fix sparse warnings of below type by making necessary
symbols static:

warning: symbol '<symbol>' was not declared. Should it be static?

Bug 200088648

Change-Id: I222bebd958e29b3a95d161f05a3052389200fc10
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/736663
GVS: Gerrit_Virtual_Submit
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 3633e9d9..b998ed4d 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -140,7 +140,7 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
 	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
 }
 
-u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
+static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
 {
 	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
 }
@@ -182,7 +182,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	return 0;
 }
 
-u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
+static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
 {
 	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
 }
@@ -318,7 +318,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	return 0;
 }
 
-const struct gk20a_mmu_level gp10b_mm_levels[] = {
+static const struct gk20a_mmu_level gp10b_mm_levels[] = {
 	{.hi_bit = {48, 48},
 	 .lo_bit = {47, 47},
 	 .update_entry = update_gmmu_pde3_locked,
@@ -342,7 +342,8 @@ const struct gk20a_mmu_level gp10b_mm_levels[] = {
 	{.update_entry = NULL}
 };
 
-const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size)
+static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
+	u32 big_page_size)
 {
 	return gp10b_mm_levels;
 }
-- 
cgit v1.2.2


From 607b8649768ff32ce435cbf5726c9d185a68cd85 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 23 Feb 2015 15:20:36 -0800
Subject: gpu: nvgpu: Implement syncpt protection

Change-Id: I05b2554588e5e1001cdbb54551cf8a064ea531bd
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/711303
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c     | 27 ++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h | 26 +++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h   |  4 ++++
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 59f7deef..08f1c13c 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -147,10 +147,35 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
 		| pbdma_signature_sw_zero_f();
 }
 
+static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
+{
+	int syncpt_id;
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpu_va;
+	if (c->sync) {
+		u32 v = pbdma_allowed_syncpoints_0_valid_f(1);
+
+		syncpt_id = c->sync->syncpt_id(c->sync);
+		gk20a_dbg_info("Channel %d, syncpt id %d\n",
+				c->hw_chid, syncpt_id);
+
+		v |= pbdma_allowed_syncpoints_0_index_f(syncpt_id);
+
+		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v);
+	}
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+}
+
 void gp10b_init_fifo(struct gpu_ops *gops)
 {
 	gm20b_init_fifo(gops);
 	gops->fifo.setup_ramfc = channel_gp10b_setup_ramfc;
 	gops->fifo.get_pbdma_signature = gp10b_fifo_get_pbdma_signature;
-
+	gops->fifo.resetup_ramfc = gp10b_fifo_resetup_ramfc;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index 91429b47..18db8595 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -410,6 +410,10 @@ static inline u32 pbdma_intr_0_signature_pending_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 pbdma_intr_0_syncpoint_illegal_pending_f(void)
+{
+	return 0x10000000;
+}
 static inline u32 pbdma_intr_1_r(u32 i)
 {
 	return 0x00040148 + i*8192;
@@ -438,6 +442,26 @@ static inline u32 pbdma_udma_nop_r(void)
 {
 	return 0x00000008;
 }
+static inline u32 pbdma_allowed_syncpoints_r(u32 i)
+{
+	return 0x000400e8 + i*8192;
+}
+static inline u32 pbdma_allowed_syncpoints_0_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 pbdma_allowed_syncpoints_0_index_f(u32 v)
+{
+	return (v & 0x7fff) << 16;
+}
+static inline u32 pbdma_allowed_syncpoints_1_valid_f(u32 v)
+{
+	return (v & 0x1) << 15;
+}
+static inline u32 pbdma_allowed_syncpoints_1_index_f(u32 v)
+{
+	return (v & 0x7fff) << 0;
+}
 static inline u32 pbdma_syncpointa_r(u32 i)
 {
 	return 0x000400a4 + i*8192;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index dea53f96..863b15b8 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -342,6 +342,10 @@ static inline u32 ram_fc_formats_w(void)
 {
 	return 39;
 }
+static inline u32 ram_fc_allowed_syncpoints_w(void)
+{
+	return 58;
+}
 static inline u32 ram_fc_syncpointa_w(void)
 {
 	return 41;
-- 
cgit v1.2.2


From c0e798c25035daaf7f5b67953f8aa9a0210c76f9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 20 Apr 2015 15:48:12 -0700
Subject: gpu: nvgpu: gp10b: Use betacb size from debugfs

If betacb size has been given via debugfs, use that instead of the
calculated number.

Bug 1628352

Change-Id: I8c68c27a2bfdd7f013776734ef846377a89b0033
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/733332
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 4f7a037b..96070df7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -322,8 +322,9 @@ static void gr_gp10b_cb_size_default(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
 
-	gr->attrib_cb_default_size =
-		gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
+	if (!gr->attrib_cb_default_size)
+		gr->attrib_cb_default_size =
+			gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
 	gr->alpha_cb_default_size =
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
 }
-- 
cgit v1.2.2


From b9999f25cce027c50c17200f4d5f1090f31a578b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 24 Apr 2015 10:13:43 -0700
Subject: gpu: nvgpu: gp10b: Dynamic GfxP buffer size

Calculate GFXP attrib cb buffer size from the global buffer size.

Bug 1628352

Change-Id: If4edfbf5700334b791dbf8e5cf38fd0208ee7fa1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/735717
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 96070df7..1b88112e 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -476,17 +476,20 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
-		u32 betacb_size = ALIGN(
-			(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() *
-			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
-			 g->gr.max_tpc_count) +
-			(g->gr.alpha_cb_size *
-			 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
-			 g->gr.max_tpc_count),
-			128);
 		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
 			gr_scc_pagepool_total_pages_byte_granularity_v();
-
+		u32 betacb_size = g->gr.attrib_cb_default_size +
+				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+		u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+				  gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+				  g->gr.max_tpc_count;
+		attrib_cb_size = ALIGN(attrib_cb_size, 128);
+
+		gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
+		gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
+		gk20a_dbg_info("gfxp context attrib_cb_size=%d",
+				attrib_cb_size);
 		err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
 				&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
 		if (err) {
-- 
cgit v1.2.2


From ba61cc77936ad4ef4a39b52f6925c5f8d5a2e3ec Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 30 Apr 2015 13:30:28 -0700
Subject: gpu: nvgpu: gp10b: Fix caching attribute

Fix caching attribute on 5-level page tables.

Bug 1525976

Change-Id: I5c5bf336d87c642f42a387206a55a889e6e07ba6
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/737923
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index b998ed4d..1aba16c4 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -170,6 +170,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
+	pde_v[0] |= gmmu_new_pde_vol_true_f();
 
 	pde = pde3_from_index(parent, i);
 
@@ -259,24 +260,22 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 
 	if (*iova) {
 		if (unmapped_pte)
-			pte_w[0] = gmmu_new_pte_valid_false_f() |
-				gmmu_new_pte_address_sys_f(*iova
-					>> gmmu_new_pte_address_shift_v());
+			pte_w[0] = gmmu_new_pte_valid_false_f();
 		else
-			pte_w[0] = gmmu_new_pte_valid_true_f() |
-				gmmu_new_pte_address_sys_f(*iova
-					>> gmmu_new_pte_address_shift_v());
+			pte_w[0] = gmmu_new_pte_valid_true_f();
+		pte_w[0] |= gmmu_new_pte_aperture_video_memory_f() |
+			    gmmu_new_pte_address_sys_f(*iova
+			      >> gmmu_new_pte_address_shift_v());
 
-		pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
-			gmmu_new_pte_kind_f(kind_v) |
-			gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
+		pte_w[1] = gmmu_new_pte_kind_f(kind_v) |
+			   gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
 
 		if (rw_flag == gk20a_mem_flag_read_only)
 			pte_w[0] |= gmmu_new_pte_read_only_true_f();
 		if (unmapped_pte && !cacheable)
 			pte_w[0] |= gmmu_new_pte_read_only_true_f();
 		else if (!cacheable)
-			pte_w[1] |= gmmu_new_pte_vol_true_f();
+			pte_w[0] |= gmmu_new_pte_vol_true_f();
 
 		gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
 			   " ctag=%d vol=%d"
@@ -289,7 +288,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			*ctag += page_size;
 	} else if (sparse) {
 		pte_w[0] = gmmu_new_pte_valid_false_f();
-		pte_w[1] |= gmmu_new_pte_vol_true_f();
+		pte_w[0] |= gmmu_new_pte_vol_true_f();
 	} else {
 		gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
 	}
-- 
cgit v1.2.2


From 046dce93a6a8f36b2e518ab1d0b2596855fd5044 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 30 Apr 2015 14:28:13 -0700
Subject: gpu: nvgpu: gp10b: Enable new page table format

Enable new page table format for all platforms.

Bug 1525976

Change-Id: I9a3cfabdef7dc6ec33e18a8a4f32063c40f680fa
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/737364
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 1aba16c4..9b347f00 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -370,8 +370,6 @@ void gp10b_init_mm(struct gpu_ops *gops)
 	gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
 	gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
 	gops->mm.get_iova_addr = gp10b_mm_iova_addr;
-	if (tegra_platform_is_linsim()) {
-		gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
-		gops->mm.init_pdb = gp10b_mm_init_pdb;
-	}
+	gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	gops->mm.init_pdb = gp10b_mm_init_pdb;
 }
-- 
cgit v1.2.2


From 58613244d2f0ea903cb0209deb8ea05a4c7c318a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 30 Apr 2015 15:28:57 -0700
Subject: gpu: nvgpu: gp10b: Fix comptag index calculation

Used 128k comptag spacing, when 64k is the correct one.

Bug 1525976

Change-Id: Ie2f926929fa89cf715b86a57ffbf4dd1e4920473
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/737947
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 9b347f00..c651eeb9 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -253,7 +253,9 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, u32 flags)
 {
+	struct gk20a *g = vm->mm->g;
 	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
+	u32 ctag_granularity = g->ops.fb.compression_page_size(g);
 	u32 pte_w[2] = {0, 0}; /* invalid pte */
 
 	gk20a_dbg_fn("");
@@ -268,7 +270,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			      >> gmmu_new_pte_address_shift_v());
 
 		pte_w[1] = gmmu_new_pte_kind_f(kind_v) |
-			   gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
+			   gmmu_new_pte_comptagline_f(*ctag / ctag_granularity);
 
 		if (rw_flag == gk20a_mem_flag_read_only)
 			pte_w[0] |= gmmu_new_pte_read_only_true_f();
@@ -281,7 +283,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   " ctag=%d vol=%d"
 			   " [0x%08x, 0x%08x]",
 			   i, *iova,
-			   kind_v, *ctag, !cacheable,
+			   kind_v, *ctag / ctag_granularity, !cacheable,
 			   pte_w[1], pte_w[0]);
 
 		if (*ctag)
-- 
cgit v1.2.2


From 648c097b965976d1f94864851acf8e6f43a28c60 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 30 Apr 2015 09:52:37 -0700
Subject: gpu: nvgpu: gp10b: Define VPR allocator

VPR allocator needs to be used when allocating graphics context for
VPR channels. Define it for gp10b.

Bug 1625090

Change-Id: Ie2e3a865c310c34c629627891ac0b579f299983f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/737846
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 58b55d4b..efecb1e3 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -26,6 +26,7 @@
 #include <linux/tegra_pm_domains.h>
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
+#include "platform_tegra.h"
 
 static int gp10b_tegra_probe(struct platform_device *pdev)
 {
@@ -103,4 +104,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.default_big_page_size	= SZ_64K,
 
 	.has_cde = true,
+
+	.secure_alloc = gk20a_tegra_secure_alloc,
+	.secure_page_alloc = gk20a_tegra_secure_page_alloc,
 };
-- 
cgit v1.2.2


From 588d8975bda1f740d54af516b7a04521810c3735 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Fri, 17 Apr 2015 09:49:50 +0530
Subject: gpu:nvgpu:gp10b: support secure gpccs changes

bug 200080684

Change-Id: I5888939017877a50b9bd596393ee8ad1547c18e5
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/732535
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 9ecf3964..b8b985b3 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -164,5 +164,4 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
 	gops->pmu.lspmuwprinitdone = false;
 	gops->pmu.fecsbootstrapdone = false;
-	gops->pmu.fecsrecoveryinprogress = 0;
 }
-- 
cgit v1.2.2


From 74bdb403274e897731d1a03c4f3a6cfaff8448d5 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 23 Mar 2015 16:26:14 -0700
Subject: gpu: nvgpu: gp10b part of new VA allocator

The comptag allocator is made in the chip-specific init code
for the comptags. Thus, a t18x change needs to be made to make
sure the new allocator code compiles and works on t18x.

Change-Id: I57a34f3c61ebd31f875caa577378e829812f2d4c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/721171
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 3c809eaf..68f4eafa 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
+#include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_mc_gp10b.h"
@@ -111,9 +112,8 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
-	gk20a_allocator_init(&gr->comp_tags, "comptag",
-			      1, /* start */
-			      max_comptag_lines - 1); /* length*/
+	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
+			       1, max_comptag_lines - 1, 1, 10, 0);
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
-- 
cgit v1.2.2


From 5e18ae63dbb9d6cdb0561d251e2d301c35af6e03 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 11 May 2015 19:27:34 -0700
Subject: Revert "gpu: nvgpu: gp10b part of new VA allocator"

This reverts commit 3a4f0285c7e9212b394b2c1b151987a7084de927.

Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Change-Id: I886e434ce98e85f99b0a77749179e31c0bd00620
Reviewed-on: http://git-master/r/741468
Reviewed-by: Hiroshi Doyu <hdoyu@nvidia.com>
Tested-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 68f4eafa..3c809eaf 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -16,7 +16,6 @@
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
-#include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_mc_gp10b.h"
@@ -112,8 +111,9 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
-	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
-			       1, max_comptag_lines - 1, 1, 10, 0);
+	gk20a_allocator_init(&gr->comp_tags, "comptag",
+			      1, /* start */
+			      max_comptag_lines - 1); /* length*/
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
-- 
cgit v1.2.2


From 2907e24e8bb31f41d13692aef76aa7c0ca227525 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 15 May 2015 09:16:58 -0700
Subject: Revert "Revert "gpu: nvgpu: gp10b part of new VA allocator""

This reverts commit 30e5947fa1f26ed6bb4f137fd76c8869e91b9829.

The original commit was actually fine.

Signed-off-by: Alex Waterman <alexw@nvidia.com>
Change-Id: I0454415981d29ed0b877f7a21db6f54bc4c30470
Reviewed-on: http://git-master/r/743302
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 3c809eaf..68f4eafa 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
+#include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_mc_gp10b.h"
@@ -111,9 +112,8 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
-	gk20a_allocator_init(&gr->comp_tags, "comptag",
-			      1, /* start */
-			      max_comptag_lines - 1); /* length*/
+	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
+			       1, max_comptag_lines - 1, 1, 10, 0);
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
-- 
cgit v1.2.2


From 94a7c5ff2cbe8a583e9b8fc4777e5debe4c48810 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 26 May 2015 16:12:19 -0700
Subject: gpu: nvgpu: gp10b: Fix PDE/PTE address handling

We were dropping the part of address that span word bounary. The register
generator does not know how to real with multi-word fields, to edit things
in manually.

Bug 1646531

Change-Id: I3ef06d6dfcb0a499ed45456d165fe60c91492250
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/747468
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h | 6 +++---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c      | 7 +++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index fc65f57d..844cb142 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -72,7 +72,7 @@ static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
 }
 static inline u32 gmmu_new_pde_address_sys_f(u32 v)
 {
-	return (v & 0xffffff) << 8;
+	return (v & 0xfffffff) << 8;
 }
 static inline u32 gmmu_new_pde_address_sys_w(void)
 {
@@ -164,7 +164,7 @@ static inline u32 gmmu_new_dual_pde_vol_big_false_f(void)
 }
 static inline u32 gmmu_new_dual_pde_address_small_sys_f(u32 v)
 {
-	return (v & 0xffffff) << 8;
+	return (v & 0xfffffff) << 8;
 }
 static inline u32 gmmu_new_dual_pde_address_small_sys_w(void)
 {
@@ -200,7 +200,7 @@ static inline u32 gmmu_new_pte_valid_false_f(void)
 }
 static inline u32 gmmu_new_pte_address_sys_f(u32 v)
 {
-	return (v & 0xffffff) << 8;
+	return (v & 0xfffffff) << 8;
 }
 static inline u32 gmmu_new_pte_address_sys_w(void)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index c651eeb9..bcdee9fc 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -171,7 +171,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
-
+	pde_v[1] |= pte_addr >> 24;
 	pde = pde3_from_index(parent, i);
 
 	gk20a_mem_wr32(pde, 0, pde_v[0]);
@@ -222,12 +222,14 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 		pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
 		pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
 		pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
+		pde_v[3] |= pte_addr_small >> 24;
 	}
 
 	if (big_valid) {
 		pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
 		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
 		pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
+		pde_v[1] |= pte_addr_big >> 28;
 	}
 
 	pde = pde0_from_index(pte, i);
@@ -269,7 +271,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			    gmmu_new_pte_address_sys_f(*iova
 			      >> gmmu_new_pte_address_shift_v());
 
-		pte_w[1] = gmmu_new_pte_kind_f(kind_v) |
+		pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) |
+			   gmmu_new_pte_kind_f(kind_v) |
 			   gmmu_new_pte_comptagline_f(*ctag / ctag_granularity);
 
 		if (rw_flag == gk20a_mem_flag_read_only)
-- 
cgit v1.2.2


From 0c5c1bf61ae1bd3e16a398a7b54e78314c361eb1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 21 May 2015 08:53:53 -0700
Subject: gpu: nvgpu: gp10b: Wait for preempted or empty

ZBC is safe to update and GPU is safe to rail gate when units are
in preempted or empty state. Idle may never be reached in case of
graphics preemption, so relax the ZBC update wait condition.

Bug 1640378

Change-Id: I40c59e9af22a7a30b777c6b9f87e69d130042e44
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/745655
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 66 +++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 24 +++++++++++++
 2 files changed, 90 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 1b88112e..03462d5f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -14,6 +14,7 @@
  */
 
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+#include <linux/delay.h>
 
 #include "gk20a/gr_gk20a.h"
 
@@ -23,6 +24,7 @@
 #include "hw_fifo_gp10b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
+#include "hw_mc_gp10b.h"
 
 static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -779,6 +781,69 @@ static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
 	return 0;
 }
 
+static bool gr_activity_empty_or_preempted(u32 val)
+{
+	while(val) {
+		u32 v = val & 7;
+		if (v != gr_activity_4_gpc0_empty_v() &&
+		    v != gr_activity_4_gpc0_preempted_v())
+			return false;
+		val >>= 3;
+	}
+
+	return true;
+}
+
+static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long end_jiffies,
+		       u32 expect_delay)
+{
+	u32 delay = expect_delay;
+	bool gr_enabled;
+	bool ctxsw_active;
+	bool gr_busy;
+	u32 gr_status;
+	u32 activity0, activity1, activity2, activity4;
+
+	gk20a_dbg_fn("");
+
+	do {
+		/* fmodel: host gets fifo_engine_status(gr) from gr
+		   only when gr_status is read */
+		gr_status = gk20a_readl(g, gr_status_r());
+
+		gr_enabled = gk20a_readl(g, mc_enable_r()) &
+			mc_enable_pgraph_enabled_f();
+
+		ctxsw_active = gr_status & 1<<7;
+
+		activity0 = gk20a_readl(g, gr_activity_0_r());
+		activity1 = gk20a_readl(g, gr_activity_1_r());
+		activity2 = gk20a_readl(g, gr_activity_2_r());
+		activity4 = gk20a_readl(g, gr_activity_4_r());
+
+		gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
+			    gr_activity_empty_or_preempted(activity1) &&
+			    activity2 == 0 &&
+			    gr_activity_empty_or_preempted(activity4));
+
+		if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
+			gk20a_dbg_fn("done");
+			return 0;
+		}
+
+		usleep_range(delay, delay * 2);
+		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+
+	} while (time_before(jiffies, end_jiffies)
+			|| !tegra_platform_is_silicon());
+
+	gk20a_err(dev_from_gk20a(g),
+		"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
+		ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
+
+	return -EAGAIN;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -802,4 +867,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.update_ctxsw_preemption_mode =
 		gr_gp10b_update_ctxsw_preemption_mode;
 	gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
+	gops->gr.wait_empty = gr_gp10b_wait_empty;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 02674d6b..b185604e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -334,6 +334,30 @@ static inline u32 gr_activity_4_r(void)
 {
 	return 0x00400390;
 }
+static inline u32 gr_activity_4_gpc0_s(void)
+{
+	return 3;
+}
+static inline u32 gr_activity_4_gpc0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_activity_4_gpc0_m(void)
+{
+	return 0x7 << 0;
+}
+static inline u32 gr_activity_4_gpc0_v(u32 r)
+{
+	return (r >> 0) & 0x7;
+}
+static inline u32 gr_activity_4_gpc0_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_activity_4_gpc0_preempted_v(void)
+{
+	return 0x00000004;
+}
 static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
 {
 	return 0x00501000;
-- 
cgit v1.2.2


From 4b1d9ad4415f1aa044f02afa1f642c3b7855a447 Mon Sep 17 00:00:00 2001
From: Leonid Moiseichuk <lmoiseichuk@nvidia.com>
Date: Wed, 27 May 2015 14:19:04 +0300
Subject: gpu: nvgpu: gp10b: add hwpm registers

The produced wrappers for HW PM registers access which are required for
cyclestats support for snapshot buffers mapping.

See commit 589e7a9ffe2a5a70f8803a88fcf8429f553e2fba for tools:nvhost
generators update.

Bug 1573150
Bug 1517458

Change-Id: I9c9332a55f2282c0c626bc8ddbcfdce1289f778b
Signed-off-by: Leonid Moiseichuk <lmoiseichuk@nvidia.com>
Reviewed-on: http://git-master/r/747717
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_perf_gp10b.h | 205 ++++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_perf_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_perf_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_perf_gp10b.h
new file mode 100644
index 00000000..ea1a61d2
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_perf_gp10b.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_perf_gp10b_h_
+#define _hw_perf_gp10b_h_
+
+static inline u32 perf_pmasys_control_r(void)
+{
+	return 0x001b4000;
+}
+static inline u32 perf_pmasys_control_membuf_status_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 perf_pmasys_control_membuf_status_overflowed_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_control_membuf_status_overflowed_f(void)
+{
+	return 0x10;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_v(u32 r)
+{
+	return (r >> 5) & 0x1;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_doit_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_doit_f(void)
+{
+	return 0x20;
+}
+static inline u32 perf_pmasys_mem_block_r(void)
+{
+	return 0x001b4070;
+}
+static inline u32 perf_pmasys_mem_block_base_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 perf_pmasys_mem_block_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 perf_pmasys_mem_block_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
+{
+	return 0x0;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_coh_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_ncoh_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 perf_pmasys_outbase_r(void)
+{
+	return 0x001b4074;
+}
+static inline u32 perf_pmasys_outbase_ptr_f(u32 v)
+{
+	return (v & 0x7ffffff) << 5;
+}
+static inline u32 perf_pmasys_outbaseupper_r(void)
+{
+	return 0x001b4078;
+}
+static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 perf_pmasys_outsize_r(void)
+{
+	return 0x001b407c;
+}
+static inline u32 perf_pmasys_outsize_numbytes_f(u32 v)
+{
+	return (v & 0x7ffffff) << 5;
+}
+static inline u32 perf_pmasys_mem_bytes_r(void)
+{
+	return 0x001b4084;
+}
+static inline u32 perf_pmasys_mem_bytes_numbytes_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 perf_pmasys_mem_bump_r(void)
+{
+	return 0x001b4088;
+}
+static inline u32 perf_pmasys_mem_bump_numbytes_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 perf_pmasys_enginestatus_r(void)
+{
+	return 0x001b40a4;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_empty_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
+{
+	return 0x10;
+}
+#endif
-- 
cgit v1.2.2


From 65ef5bc238a782e76c5d104ef90562b1fe6dd038 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 15 May 2015 12:32:48 -0700
Subject: gpu:nvgpu: gp10b: update channel_setup_ramfc

Enable re-playable faults based on characteristics
flags passed in channel_setup_ramfc.

Bug 1645628

Change-Id: I7176efb3e5af9fefe5fb92cd5b49eb295e8e2c4a
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/743382
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 08f1c13c..acf6f829 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -78,7 +78,7 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 }
 
 static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
-			u64 gpfifo_base, u32 gpfifo_entries)
+			u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
 {
 	void *inst_ptr;
 
@@ -133,7 +133,8 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 		pbdma_runlist_timeslice_timescale_3_f() |
 		pbdma_runlist_timeslice_enable_true_f());
 
-	gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr);
+	if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
+		gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr);
 
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
-- 
cgit v1.2.2


From 021748b782b054ed11d02341ee373f60b1ae0cb3 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 2 Jun 2015 16:13:01 +0530
Subject: gpu: nvgpu: fix allocator_init() calls

Change for new VA space allocator is being reverted with
http://git-master/r/#/c/749291/ but only for Kernel3.18

In Kernel3.10, we support the new VA allocator

Since we support both the kernel versions as of now,
use a KERNEL_VERSION based mechanism to select
appropriate call

Define new macro NVGPU_USE_NEW_ALLOCATOR for Kernel3.10
where we want to use new allocator

Bug 200106514

Change-Id: I9af26d555278c40e03fe82b0912961a862c8bf55
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/751353
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 7 +++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.h  | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 68f4eafa..9db18aa6 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 
+#include "gp10b/mm_gp10b.h"
 #include "gk20a/gk20a.h"
 #include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
@@ -112,8 +113,14 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
+#if NVGPU_USE_NEW_ALLOCATOR
 	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
 			       1, max_comptag_lines - 1, 1, 10, 0);
+#else
+	gk20a_allocator_init(&gr->comp_tags, "comptag",
+				1, /* start */
+				max_comptag_lines - 1); /* length*/
+#endif
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
index 034944e0..f34200a0 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
@@ -14,8 +14,16 @@
 #ifndef MM_GP10B_H
 #define MM_GP10B_H
 
+#include <linux/version.h>
+
 #define NVGPU_MM_GET_IO_COHERENCE_BIT	35
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+#define NVGPU_USE_NEW_ALLOCATOR	1
+#else
+#define NVGPU_USE_NEW_ALLOCATOR	0
+#endif
+
 struct gpu_ops;
 
 void gp10b_init_mm(struct gpu_ops *gops);
-- 
cgit v1.2.2


From 4d30fe5a24d9f4416cce66ee1c90c8594879ab7b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 13 Apr 2015 15:47:13 -0700
Subject: gpu: nvgpu: gp10b: Use correct PBDMA sig

Change-Id: Ic71ff2408bd01a1bf5cf1354453a2fe715438cf0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/751555
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index acf6f829..d62f7316 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -99,7 +99,7 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 		pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
-		 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
+		 c->g->ops.fifo.get_pbdma_signature(c->g));
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
 		pbdma_formats_gp_fermi0_f() |
-- 
cgit v1.2.2


From 634acd7422afb9cec5a390471937dc964ba31025 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 29 May 2015 18:34:37 -0700
Subject: gpu: nvgpu: Expose preemption flags to user space

Expose CILP and GFXP flags to user space ioctl
NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX.

Bug 200111328

Change-Id: I10931db2babd3222e308fd491824d95204355ff3
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/748932
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 03462d5f..33a52db5 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -475,7 +475,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
-	if (flags == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
 		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
@@ -528,7 +528,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
-		if (flags == NVGPU_GR_PREEMPTION_MODE_CILP)
+		if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
 			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
 		else
 			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
-- 
cgit v1.2.2


From c25a2ac26e11057c1bef0da7b4a661247817140f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 1 Jun 2015 13:03:38 -0700
Subject: gpu: nvgpu: Disable channel when writing syncpt id

Kick channel off PBDMA before writing new sync point id to allowed
sync points.

Bug 1648297
Bug 1646477

Change-Id: I7c686d474c403fdd54bc64cff63b7d049feecb4d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/750981
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index d62f7316..3a6c3c23 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -156,6 +156,13 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 	gk20a_dbg_fn("");
 
 	inst_ptr = c->inst_block.cpu_va;
+
+	/* disable channel */
+	c->g->ops.fifo.disable_channel(c);
+
+	/* preempt the channel */
+	WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
+
 	if (c->sync) {
 		u32 v = pbdma_allowed_syncpoints_0_valid_f(1);
 
@@ -166,8 +173,15 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 		v |= pbdma_allowed_syncpoints_0_index_f(syncpt_id);
 
 		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v);
+	} else {
+		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), 0);
 	}
 
+	/* enable channel */
+	gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
+		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
+		ccsr_channel_enable_set_true_f());
+
 	gk20a_dbg_fn("done");
 
 	return 0;
-- 
cgit v1.2.2


From dfe6493dcba650318275b73e62fe2e5d35b36622 Mon Sep 17 00:00:00 2001
From: Bharat Nihalani <bnihalani@nvidia.com>
Date: Thu, 4 Jun 2015 05:11:04 -0700
Subject: Revert "gpu: nvgpu: fix allocator_init() calls"

This reverts commit 053037f1450d6ba6c5d01abcdcd9b24019ae8c85
since the issue seen with bug 200106514 is fixed with change
http://git-master/r/#/c/752080/.

Bug 200112195

Change-Id: If54eb570fd2ad5de99d180d03d5d90492283fe33
Signed-off-by: Bharat Nihalani <bnihalani@nvidia.com>
Reviewed-on: http://git-master/r/752504
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 7 -------
 drivers/gpu/nvgpu/gp10b/mm_gp10b.h  | 8 --------
 2 files changed, 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 9db18aa6..68f4eafa 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -15,7 +15,6 @@
 
 #include <linux/types.h>
 
-#include "gp10b/mm_gp10b.h"
 #include "gk20a/gk20a.h"
 #include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
@@ -113,14 +112,8 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
-#if NVGPU_USE_NEW_ALLOCATOR
 	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
 			       1, max_comptag_lines - 1, 1, 10, 0);
-#else
-	gk20a_allocator_init(&gr->comp_tags, "comptag",
-				1, /* start */
-				max_comptag_lines - 1); /* length*/
-#endif
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
index f34200a0..034944e0 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h
@@ -14,16 +14,8 @@
 #ifndef MM_GP10B_H
 #define MM_GP10B_H
 
-#include <linux/version.h>
-
 #define NVGPU_MM_GET_IO_COHERENCE_BIT	35
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
-#define NVGPU_USE_NEW_ALLOCATOR	1
-#else
-#define NVGPU_USE_NEW_ALLOCATOR	0
-#endif
-
 struct gpu_ops;
 
 void gp10b_init_mm(struct gpu_ops *gops);
-- 
cgit v1.2.2


From 6b4a7ed432327bc9bc0e1983613ac17e5200d822 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 5 Jun 2015 12:42:12 -0700
Subject: gpu: nvgpu: gp10b: Rewrite compbit backing store calc

Compbit backing store did not take into account number of GOBS
per comptagline per slice.

Bug 1604102

Change-Id: I42666e72ea54697b6fbc7318e65a6a09d867f5b6
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/754706
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sami Kiminki <skiminki@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 68f4eafa..d6fca6e4 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -80,8 +80,11 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 		max_comptag_lines = hw_max_comptag_lines;
 
 	compbit_backing_size =
-		DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
-		cacheline_size * slices_per_ltc * g->ltc_count;
+		roundup(max_comptag_lines * gobs_per_comptagline_per_slice,
+			cacheline_size);
+	compbit_backing_size =
+		roundup(compbit_backing_size * slices_per_ltc * g->ltc_count,
+			g->ops.fb.compressible_page_size(g));
 
 	/* aligned to 2KB * ltc_count */
 	compbit_backing_size +=
@@ -90,13 +93,6 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	/* must be a multiple of 64KB */
 	compbit_backing_size = roundup(compbit_backing_size, 64*1024);
 
-	max_comptag_lines =
-		(compbit_backing_size * comptags_per_cacheline) /
-		(cacheline_size * slices_per_ltc * g->ltc_count);
-
-	if (max_comptag_lines > hw_max_comptag_lines)
-		max_comptag_lines = hw_max_comptag_lines;
-
 	gk20a_dbg_info("compbit backing store size : %d",
 		compbit_backing_size);
 	gk20a_dbg_info("max comptag lines : %d",
-- 
cgit v1.2.2


From 4e55cfd9959c5468de7584306c68bb2d2bae1a4b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 4 Jun 2015 10:28:18 -0700
Subject: gpu: nvgpu: gp10b: Use alpha+beta size for beta cb

When allocating betacb for a GfxP channel, add both alpha and beta
cb sizes together.

Change-Id: I8cef62f6272bfb3b5e9a3835a51590e5eb91dc92
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/752633
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 33a52db5..240bab81 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -508,7 +508,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			goto fail_free_preempt;
 		}
 
-		err = gk20a_gmmu_alloc_map(vm, betacb_size,
+		err = gk20a_gmmu_alloc_map(vm, attrib_cb_size,
 					   &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
 		if (err) {
 			gk20a_err(dev_from_gk20a(vm->mm->g),
-- 
cgit v1.2.2


From 888a27706b1285b7482e49143ac50f8d08551d84 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 4 Jun 2015 09:17:50 -0700
Subject: gpu: nvgpu: gp10b: Program TEX RM registers

Program CB base to new gp10b registers.

Change-Id: I1ab39a487dade58d3a024fb1aba1af5c878f31bb
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/752634
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 29 +++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 52 +++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 240bab81..973653a0 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -844,6 +844,34 @@ static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long end_jiffies,
 	return -EAGAIN;
 }
 
+static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
+					     struct channel_ctx_gk20a *ch_ctx,
+					     u64 addr, bool patch)
+{
+	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+	int attrBufferSize;
+
+	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
+		attrBufferSize = gr_ctx->t18x.preempt_ctxsw_buffer.size;
+	else
+		attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
+
+	attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
+
+	gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
+		gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
+		gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
+		gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
+		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -855,6 +883,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
+	gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
 	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
 	gops->gr.cb_size_default = gr_gp10b_cb_size_default;
 	gops->gr.set_alpha_circular_buffer_size =
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index b185604e..32903fba 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2206,6 +2206,58 @@ static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_default_v(void)
 {
 	return 0x00030000;
 }
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_0_r(void)
+{
+	return 0x00419b00;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_r(void)
+{
+	return 0x00419b04;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_s(void)
+{
+	return 21;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(u32 v)
+{
+	return (v & 0x1fffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_m(void)
+{
+	return 0x1fffff << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_v(u32 r)
+{
+	return (r >> 0) & 0x1fffff;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(void)
+{
+	return 0x80000000;
+}
 static inline u32 gr_gpccs_falcon_addr_r(void)
 {
 	return 0x0041a0ac;
-- 
cgit v1.2.2


From 32002c59bac11d3bdb9080e653666039e4848bde Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 4 Jun 2015 09:17:50 -0700
Subject: gpu: nvgpu: gp10b: Pascal specific global bundle CB

Some fields have different widths, so duplicate the code to program
global bundle CB.

Change-Id: Ib6af5abf3e90dfa1bcda2fbc6b97ad1031e6ab16
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/752635
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 973653a0..c6f5022b 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -872,6 +872,42 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
 		gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
 }
 
+static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
+					    struct channel_ctx_gk20a *ch_ctx,
+					    u64 addr, u64 size, bool patch)
+{
+	u32 data;
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
+		gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
+		gr_scc_bundle_cb_size_div_256b_f(size) |
+		gr_scc_bundle_cb_size_valid_true_f(), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
+		gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
+		gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
+		gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
+
+	/* data for state_limit */
+	data = (g->gr.bundle_cb_default_size *
+		gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
+		gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
+
+	data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
+
+	gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
+		   g->gr.bundle_cb_token_limit, data);
+
+	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
+		gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
+		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
+
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -884,6 +920,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.calc_global_ctx_buffer_size =
 		gr_gp10b_calc_global_ctx_buffer_size;
 	gops->gr.commit_global_attrib_cb = gr_gp10b_commit_global_attrib_cb;
+	gops->gr.commit_global_bundle_cb = gr_gp10b_commit_global_bundle_cb;
 	gops->gr.handle_sw_method = gr_gp10b_handle_sw_method;
 	gops->gr.cb_size_default = gr_gp10b_cb_size_default;
 	gops->gr.set_alpha_circular_buffer_size =
-- 
cgit v1.2.2


From 477ca4b64888b02c211d0e0ea9d67544a88bd4b5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 4 Jun 2015 12:15:36 -0700
Subject: gpu: nvgpu: gp10b: Fix clipping of alpha/beta size

Alpha and beta sizes need to be clipped to a maximum value. For
alpha CB we were using beta size in clipping, and for both we were
not using number of TPCs to determine the max value.

Change-Id: I0c925464ba4c9f575e6e59dd5ba7759aa1cb6381
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/752667
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c6f5022b..045847b2 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -267,9 +267,9 @@ static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
 		+ (gr->alpha_cb_default_size >> 1);
 
 	gr->attrib_cb_size = min(gr->attrib_cb_size,
-				 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(0xffffffff));
-	gr->alpha_cb_size = min(gr->attrib_cb_size,
-				 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(0xffffffff));
+		 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
+	gr->alpha_cb_size = min(gr->alpha_cb_size,
+		 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(~0) / g->gr.tpc_count);
 
 	size = gr->attrib_cb_size *
 		gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
-- 
cgit v1.2.2


From 910bb6ad0d326e13b16da5ee0d06f4007cc9439e Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 2 Jun 2015 20:04:18 -0700
Subject: gpu: nvgpu: gp10b: Set address check mode

Set address check mode for SM.

Bug 1625763

Change-Id: I5ddf8334673b414956e57c55aaa5be1a9f9aeaf1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/752139
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 12 ++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 045847b2..265cad66 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -905,12 +905,24 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
 		gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
 		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
+}
+
+static int gr_gp10b_init_fs_state(struct gk20a *g)
+{
+	u32 data;
+
+	data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
+	data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
+			gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
+	gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
 
+	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
+	gops->gr.init_fs_state = gr_gp10b_init_fs_state;
 	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
 	gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 32903fba..54d21eb3 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -3758,4 +3758,16 @@ static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpcs_tpcs_sm_texio_control_r(void)
+{
+	return 0x00419c84;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f(void)
+{
+	return 0x100;
+}
 #endif
-- 
cgit v1.2.2


From 3b5a1295fa7b19296da4b370a08025d0bc6f5998 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 3 Jun 2015 15:53:12 -0700
Subject: gpu: nvgpu: gp10b: Disable RE suppression

Bug 1642669

Change-Id: I683338256b7f2a165a7933aa59de510eb109ea6f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/755150
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    |  5 +++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 265cad66..1942b1e7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -916,6 +916,11 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 			gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
 	gk20a_writel(g, gr_gpcs_tpcs_sm_texio_control_r(), data);
 
+	data = gk20a_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
+	data = set_field(data, gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(),
+			 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
+	gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
+
 	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 54d21eb3..6e4f7d1a 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -3766,8 +3766,24 @@ static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_f(u32 v)
 {
 	return (v & 0x7) << 8;
 }
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(void)
+{
+	return 0x7 << 8;
+}
 static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f(void)
 {
 	return 0x100;
 }
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_r(void)
+{
+	return 0x00419f78;
+}
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(void)
+{
+	return 0x3 << 11;
+}
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f(void)
+{
+	return 0x1000;
+}
 #endif
-- 
cgit v1.2.2


From d42ca3a0fc70473135a90013515c739da57319a9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 12 Jun 2015 08:42:27 -0700
Subject: gpu: nvgpu: gp10b: Lazy sync point update

Update sync point protection field only when we have a valid sync
point id, and the new id is different from old id.

Bug 1653328

Change-Id: Ie07e26f8abd7c8239ad562603b62fda00164cbc7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/757102
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c     | 27 +++++++++++++++------------
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h |  4 ++++
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 3a6c3c23..8c53978b 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -150,31 +150,34 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
 
 static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 {
-	int syncpt_id;
+	u32 new_syncpt = 0, old_syncpt;
 	void *inst_ptr;
+	u32 v;
 
 	gk20a_dbg_fn("");
 
 	inst_ptr = c->inst_block.cpu_va;
 
-	/* disable channel */
-	c->g->ops.fifo.disable_channel(c);
+	v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w());
+	old_syncpt = pbdma_allowed_syncpoints_0_index_v(v);
+	if (c->sync)
+		new_syncpt = c->sync->syncpt_id(c->sync);
 
-	/* preempt the channel */
-	WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
+	if (new_syncpt && new_syncpt != old_syncpt) {
+		/* disable channel */
+		c->g->ops.fifo.disable_channel(c);
 
-	if (c->sync) {
-		u32 v = pbdma_allowed_syncpoints_0_valid_f(1);
+		/* preempt the channel */
+		WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
+
+		v = pbdma_allowed_syncpoints_0_valid_f(1);
 
-		syncpt_id = c->sync->syncpt_id(c->sync);
 		gk20a_dbg_info("Channel %d, syncpt id %d\n",
-				c->hw_chid, syncpt_id);
+				c->hw_chid, new_syncpt);
 
-		v |= pbdma_allowed_syncpoints_0_index_f(syncpt_id);
+		v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt);
 
 		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v);
-	} else {
-		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), 0);
 	}
 
 	/* enable channel */
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index 18db8595..977a8ee2 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -454,6 +454,10 @@ static inline u32 pbdma_allowed_syncpoints_0_index_f(u32 v)
 {
 	return (v & 0x7fff) << 16;
 }
+static inline u32 pbdma_allowed_syncpoints_0_index_v(u32 r)
+{
+	return (r >> 16) & 0x7fff;
+}
 static inline u32 pbdma_allowed_syncpoints_1_valid_f(u32 v)
 {
 	return (v & 0x1) << 15;
-- 
cgit v1.2.2


From a22aa6d4d338f9d8fc126c4062c416b74785d728 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 22 Jun 2015 12:42:23 -0700
Subject: gpu: nvgpu: gp10b: Do not set up gm20b clocks

gm20b clock registers do not exist in gp10b. Skip setting the clock
HAL to gm20b variants.

Change-Id: Ieaa9a04a8afbe772864d947d968e3e1c7f9968e9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/760854
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 9d099479..d50ad791 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -100,7 +100,6 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
-	gm20b_init_clk_ops(gops);
 	gk20a_init_debug_ops(gops);
 	gops->name = "gp10b";
 
-- 
cgit v1.2.2


From 4b806879d582d41b20c17cc1739b537dbd41cb9a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 22 Jun 2015 16:19:44 -0700
Subject: gpu: nvgpu: gp10b: Add regops whitelists

Add regops whitelists for gp10b. The whitelist is generated, and is the
same for context switched and global registers.

Bug 1633363

Change-Id: I6d4d43d036d684c9f0d836a1a032f2c452604902
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/760935
---
 drivers/gpu/nvgpu/gp10b/Makefile       |   3 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c    |   2 +
 drivers/gpu/nvgpu/gp10b/regops_gp10b.c | 845 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/regops_gp10b.h |  24 +
 4 files changed, 873 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/regops_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/regops_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index f25f7b34..688965da 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_GK20A)  += \
 	pmu_gp10b.o \
 	hal_gp10b.o \
 	rpfb_gp10b.o \
-	gp10b_gating_reglist.o
+	gp10b_gating_reglist.o \
+	regops_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index d50ad791..9eba5571 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -30,6 +30,7 @@
 #include "gp10b/gr_ctx_gp10b.h"
 #include "gp10b/fifo_gp10b.h"
 #include "gp10b/gp10b_gating_reglist.h"
+#include "gp10b/regops_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fifo_gm20b.h"
@@ -101,6 +102,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
+	gp10b_init_regops(gops);
 	gops->name = "gp10b";
 
 	c->twod_class = FERMI_TWOD_A;
diff --git a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
new file mode 100644
index 00000000..0ab80b78
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
@@ -0,0 +1,845 @@
+/*
+ * Tegra GK20A GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/bsearch.h>
+#include <uapi/linux/nvgpu.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "regops_gp10b.h"
+
+static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
+	{ 0x000004f0,   1},
+	{ 0x00001a00,   3},
+	{ 0x00002800, 128},
+	{ 0x00009400,   1},
+	{ 0x00009410,   1},
+	{ 0x00009480,   1},
+	{ 0x00020200,  24},
+	{ 0x00021c00,   4},
+	{ 0x00021c14,   3},
+	{ 0x00021c24,   1},
+	{ 0x00021c2c,  69},
+	{ 0x00021d44,   1},
+	{ 0x00021d4c,   1},
+	{ 0x00021d54,   1},
+	{ 0x00021d5c,   1},
+	{ 0x00021d64,   2},
+	{ 0x00021d70,  16},
+	{ 0x00022430,   7},
+	{ 0x00022450,   1},
+	{ 0x0002245c,   1},
+	{ 0x00070000,   5},
+	{ 0x0008e00c,   1},
+	{ 0x00100c18,   3},
+	{ 0x00100c84,   1},
+	{ 0x0010a0a8,   1},
+	{ 0x0010a4f0,   1},
+	{ 0x0013cc14,   1},
+	{ 0x00140028,   1},
+	{ 0x00140280,   1},
+	{ 0x001402a0,   1},
+	{ 0x00140350,   1},
+	{ 0x00140480,   1},
+	{ 0x001404a0,   1},
+	{ 0x00140550,   1},
+	{ 0x00142028,   1},
+	{ 0x00142280,   1},
+	{ 0x001422a0,   1},
+	{ 0x00142350,   1},
+	{ 0x00142480,   1},
+	{ 0x001424a0,   1},
+	{ 0x00142550,   1},
+	{ 0x0017e280,   1},
+	{ 0x0017e294,   1},
+	{ 0x0017e29c,   2},
+	{ 0x0017e2ac,   1},
+	{ 0x0017e350,   1},
+	{ 0x0017e39c,   1},
+	{ 0x0017e480,   1},
+	{ 0x0017e4a0,   1},
+	{ 0x0017e550,   1},
+	{ 0x00180040,  41},
+	{ 0x001800ec,   9},
+	{ 0x00180240,  41},
+	{ 0x001802ec,   9},
+	{ 0x00180440,  41},
+	{ 0x001804ec,   9},
+	{ 0x00180640,  41},
+	{ 0x001806ec,   9},
+	{ 0x00180840,  41},
+	{ 0x001808ec,   9},
+	{ 0x00180a40,  41},
+	{ 0x00180aec,   9},
+	{ 0x00180c40,  41},
+	{ 0x00180cec,   9},
+	{ 0x00180e40,  41},
+	{ 0x00180eec,   9},
+	{ 0x001a0040,  41},
+	{ 0x001a00ec,   9},
+	{ 0x001a0240,  41},
+	{ 0x001a02ec,   9},
+	{ 0x001a0440,  41},
+	{ 0x001a04ec,   9},
+	{ 0x001a0640,  41},
+	{ 0x001a06ec,   9},
+	{ 0x001a0840,  41},
+	{ 0x001a08ec,   9},
+	{ 0x001a0a40,  41},
+	{ 0x001a0aec,   9},
+	{ 0x001a0c40,  41},
+	{ 0x001a0cec,   9},
+	{ 0x001a0e40,  41},
+	{ 0x001a0eec,   9},
+	{ 0x001b0040,  41},
+	{ 0x001b00ec,   9},
+	{ 0x001b0240,  41},
+	{ 0x001b02ec,   9},
+	{ 0x001b0440,  41},
+	{ 0x001b04ec,   9},
+	{ 0x001b0640,  41},
+	{ 0x001b06ec,   9},
+	{ 0x001b0840,  41},
+	{ 0x001b08ec,   9},
+	{ 0x001b0a40,  41},
+	{ 0x001b0aec,   9},
+	{ 0x001b0c40,  41},
+	{ 0x001b0cec,   9},
+	{ 0x001b0e40,  41},
+	{ 0x001b0eec,   9},
+	{ 0x001b4000,   1},
+	{ 0x001b4008,   1},
+	{ 0x001b4010,   3},
+	{ 0x001b4020,   3},
+	{ 0x001b4030,   3},
+	{ 0x001b4040,   3},
+	{ 0x001b4050,   3},
+	{ 0x001b4060,   4},
+	{ 0x001b4074,   7},
+	{ 0x001b4094,   3},
+	{ 0x001b40a4,   1},
+	{ 0x001b4100,   6},
+	{ 0x001b4124,   1},
+	{ 0x001b8000,   1},
+	{ 0x001b8008,   1},
+	{ 0x001b8010,   3},
+	{ 0x001bc000,   1},
+	{ 0x001bc008,   1},
+	{ 0x001bc010,   3},
+	{ 0x001be000,   1},
+	{ 0x001be008,   1},
+	{ 0x001be010,   3},
+	{ 0x00400500,   1},
+	{ 0x0040415c,   1},
+	{ 0x00404468,   1},
+	{ 0x00404498,   1},
+	{ 0x00405800,   1},
+	{ 0x00405840,   2},
+	{ 0x00405850,   1},
+	{ 0x00405908,   1},
+	{ 0x00405b40,   1},
+	{ 0x00405b50,   1},
+	{ 0x00406024,   5},
+	{ 0x00407010,   1},
+	{ 0x00407808,   1},
+	{ 0x0040803c,   1},
+	{ 0x00408804,   1},
+	{ 0x0040880c,   1},
+	{ 0x00408900,   2},
+	{ 0x00408910,   1},
+	{ 0x00408944,   1},
+	{ 0x00408984,   1},
+	{ 0x004090a8,   1},
+	{ 0x004098a0,   1},
+	{ 0x00409b00,   1},
+	{ 0x0041000c,   1},
+	{ 0x00410110,   1},
+	{ 0x00410184,   1},
+	{ 0x0041040c,   1},
+	{ 0x00410510,   1},
+	{ 0x00410584,   1},
+	{ 0x00418384,   1},
+	{ 0x00418400,   2},
+	{ 0x004184a0,   1},
+	{ 0x00418604,   1},
+	{ 0x00418680,   1},
+	{ 0x00418704,   1},
+	{ 0x00418714,   1},
+	{ 0x00418800,   1},
+	{ 0x0041881c,   1},
+	{ 0x00418830,   1},
+	{ 0x00418884,   1},
+	{ 0x004188b0,   1},
+	{ 0x004188c8,   3},
+	{ 0x004188fc,   1},
+	{ 0x00418b04,   1},
+	{ 0x00418c04,   1},
+	{ 0x00418c10,   8},
+	{ 0x00418c88,   1},
+	{ 0x00418d00,   1},
+	{ 0x00418e00,   1},
+	{ 0x00418e08,   1},
+	{ 0x00418e34,   1},
+	{ 0x00418e40,   4},
+	{ 0x00418e58,  16},
+	{ 0x00418f08,   1},
+	{ 0x00419000,   1},
+	{ 0x0041900c,   1},
+	{ 0x00419018,   1},
+	{ 0x00419854,   1},
+	{ 0x00419864,   1},
+	{ 0x00419a04,   2},
+	{ 0x00419a14,   1},
+	{ 0x00419ab0,   1},
+	{ 0x00419ab8,   3},
+	{ 0x00419c0c,   1},
+	{ 0x00419c8c,   2},
+	{ 0x00419d00,   1},
+	{ 0x00419d08,   2},
+	{ 0x00419e00,  11},
+	{ 0x00419e34,   2},
+	{ 0x00419e44,  11},
+	{ 0x00419e74,  10},
+	{ 0x00419ea4,   1},
+	{ 0x00419eac,   2},
+	{ 0x00419ee8,   1},
+	{ 0x00419ef0,  28},
+	{ 0x00419f70,   1},
+	{ 0x00419f78,   2},
+	{ 0x00419f98,   2},
+	{ 0x00419fdc,   1},
+	{ 0x0041a02c,   2},
+	{ 0x0041a0a0,   1},
+	{ 0x0041a0a8,   1},
+	{ 0x0041a890,   2},
+	{ 0x0041a8a0,   3},
+	{ 0x0041a8b0,   2},
+	{ 0x0041b014,   1},
+	{ 0x0041b0cc,   1},
+	{ 0x0041b1dc,   1},
+	{ 0x0041be0c,   3},
+	{ 0x0041becc,   1},
+	{ 0x0041bfdc,   1},
+	{ 0x0041c054,   1},
+	{ 0x0041c2b0,   1},
+	{ 0x0041c2b8,   3},
+	{ 0x0041c40c,   1},
+	{ 0x0041c48c,   2},
+	{ 0x0041c500,   1},
+	{ 0x0041c508,   2},
+	{ 0x0041c600,  11},
+	{ 0x0041c634,   2},
+	{ 0x0041c644,  11},
+	{ 0x0041c674,  10},
+	{ 0x0041c6a4,   1},
+	{ 0x0041c6ac,   2},
+	{ 0x0041c6e8,   1},
+	{ 0x0041c6f0,  28},
+	{ 0x0041c770,   1},
+	{ 0x0041c778,   2},
+	{ 0x0041c798,   2},
+	{ 0x0041c7dc,   1},
+	{ 0x0041c854,   1},
+	{ 0x0041cab0,   1},
+	{ 0x0041cab8,   3},
+	{ 0x0041cc0c,   1},
+	{ 0x0041cc8c,   2},
+	{ 0x0041cd00,   1},
+	{ 0x0041cd08,   2},
+	{ 0x0041ce00,  11},
+	{ 0x0041ce34,   2},
+	{ 0x0041ce44,  11},
+	{ 0x0041ce74,  10},
+	{ 0x0041cea4,   1},
+	{ 0x0041ceac,   2},
+	{ 0x0041cee8,   1},
+	{ 0x0041cef0,  28},
+	{ 0x0041cf70,   1},
+	{ 0x0041cf78,   2},
+	{ 0x0041cf98,   2},
+	{ 0x0041cfdc,   1},
+	{ 0x00500384,   1},
+	{ 0x005004a0,   1},
+	{ 0x00500604,   1},
+	{ 0x00500680,   1},
+	{ 0x00500714,   1},
+	{ 0x0050081c,   1},
+	{ 0x00500884,   1},
+	{ 0x005008b0,   1},
+	{ 0x005008c8,   3},
+	{ 0x005008fc,   1},
+	{ 0x00500b04,   1},
+	{ 0x00500c04,   1},
+	{ 0x00500c10,   8},
+	{ 0x00500c88,   1},
+	{ 0x00500d00,   1},
+	{ 0x00500e08,   1},
+	{ 0x00500f08,   1},
+	{ 0x00501000,   1},
+	{ 0x0050100c,   1},
+	{ 0x00501018,   1},
+	{ 0x00501854,   1},
+	{ 0x00501ab0,   1},
+	{ 0x00501ab8,   3},
+	{ 0x00501c0c,   1},
+	{ 0x00501c8c,   2},
+	{ 0x00501d00,   1},
+	{ 0x00501d08,   2},
+	{ 0x00501e00,  11},
+	{ 0x00501e34,   2},
+	{ 0x00501e44,  11},
+	{ 0x00501e74,  10},
+	{ 0x00501ea4,   1},
+	{ 0x00501eac,   2},
+	{ 0x00501ee8,   1},
+	{ 0x00501ef0,  28},
+	{ 0x00501f70,   1},
+	{ 0x00501f78,   2},
+	{ 0x00501f98,   2},
+	{ 0x00501fdc,   1},
+	{ 0x0050202c,   2},
+	{ 0x005020a0,   1},
+	{ 0x005020a8,   1},
+	{ 0x00502890,   2},
+	{ 0x005028a0,   3},
+	{ 0x005028b0,   2},
+	{ 0x00503014,   1},
+	{ 0x005030cc,   1},
+	{ 0x005031dc,   1},
+	{ 0x00503e14,   1},
+	{ 0x00503ecc,   1},
+	{ 0x00503fdc,   1},
+	{ 0x00504054,   1},
+	{ 0x005042b0,   1},
+	{ 0x005042b8,   3},
+	{ 0x0050440c,   1},
+	{ 0x0050448c,   2},
+	{ 0x00504500,   1},
+	{ 0x00504508,   2},
+	{ 0x00504600,  11},
+	{ 0x00504634,   2},
+	{ 0x00504644,  11},
+	{ 0x00504674,  10},
+	{ 0x005046a4,   1},
+	{ 0x005046ac,   2},
+	{ 0x005046e8,   1},
+	{ 0x005046f0,  28},
+	{ 0x00504770,   1},
+	{ 0x00504778,   2},
+	{ 0x00504798,   2},
+	{ 0x005047dc,   1},
+	{ 0x00504854,   1},
+	{ 0x00504ab0,   1},
+	{ 0x00504ab8,   3},
+	{ 0x00504c0c,   1},
+	{ 0x00504c8c,   2},
+	{ 0x00504d00,   1},
+	{ 0x00504d08,   2},
+	{ 0x00504e00,  11},
+	{ 0x00504e34,   2},
+	{ 0x00504e44,  11},
+	{ 0x00504e74,  10},
+	{ 0x00504ea4,   1},
+	{ 0x00504eac,   2},
+	{ 0x00504ee8,   1},
+	{ 0x00504ef0,  28},
+	{ 0x00504f70,   1},
+	{ 0x00504f78,   2},
+	{ 0x00504f98,   2},
+	{ 0x00504fdc,   1},
+	{ 0x00900100,   1},
+	{ 0x009a0100,   1},
+};
+static const u32 gp10b_global_whitelist_ranges_count =
+	ARRAY_SIZE(gp10b_global_whitelist_ranges);
+
+/* context */
+
+static const struct regop_offset_range gp10b_context_whitelist_ranges[] = {
+	{ 0x000004f0,   1},
+	{ 0x00001a00,   3},
+	{ 0x00002800, 128},
+	{ 0x00009400,   1},
+	{ 0x00009410,   1},
+	{ 0x00009480,   1},
+	{ 0x00020200,  24},
+	{ 0x00021c00,   4},
+	{ 0x00021c14,   3},
+	{ 0x00021c24,   1},
+	{ 0x00021c2c,  69},
+	{ 0x00021d44,   1},
+	{ 0x00021d4c,   1},
+	{ 0x00021d54,   1},
+	{ 0x00021d5c,   1},
+	{ 0x00021d64,   2},
+	{ 0x00021d70,  16},
+	{ 0x00022430,   7},
+	{ 0x00022450,   1},
+	{ 0x0002245c,   1},
+	{ 0x00070000,   5},
+	{ 0x0008e00c,   1},
+	{ 0x00100c18,   3},
+	{ 0x00100c84,   1},
+	{ 0x0010a0a8,   1},
+	{ 0x0010a4f0,   1},
+	{ 0x0013cc14,   1},
+	{ 0x00140028,   1},
+	{ 0x00140280,   1},
+	{ 0x001402a0,   1},
+	{ 0x00140350,   1},
+	{ 0x00140480,   1},
+	{ 0x001404a0,   1},
+	{ 0x00140550,   1},
+	{ 0x00142028,   1},
+	{ 0x00142280,   1},
+	{ 0x001422a0,   1},
+	{ 0x00142350,   1},
+	{ 0x00142480,   1},
+	{ 0x001424a0,   1},
+	{ 0x00142550,   1},
+	{ 0x0017e280,   1},
+	{ 0x0017e294,   1},
+	{ 0x0017e29c,   2},
+	{ 0x0017e2ac,   1},
+	{ 0x0017e350,   1},
+	{ 0x0017e39c,   1},
+	{ 0x0017e480,   1},
+	{ 0x0017e4a0,   1},
+	{ 0x0017e550,   1},
+	{ 0x00180040,  41},
+	{ 0x001800ec,   9},
+	{ 0x00180240,  41},
+	{ 0x001802ec,   9},
+	{ 0x00180440,  41},
+	{ 0x001804ec,   9},
+	{ 0x00180640,  41},
+	{ 0x001806ec,   9},
+	{ 0x00180840,  41},
+	{ 0x001808ec,   9},
+	{ 0x00180a40,  41},
+	{ 0x00180aec,   9},
+	{ 0x00180c40,  41},
+	{ 0x00180cec,   9},
+	{ 0x00180e40,  41},
+	{ 0x00180eec,   9},
+	{ 0x001a0040,  41},
+	{ 0x001a00ec,   9},
+	{ 0x001a0240,  41},
+	{ 0x001a02ec,   9},
+	{ 0x001a0440,  41},
+	{ 0x001a04ec,   9},
+	{ 0x001a0640,  41},
+	{ 0x001a06ec,   9},
+	{ 0x001a0840,  41},
+	{ 0x001a08ec,   9},
+	{ 0x001a0a40,  41},
+	{ 0x001a0aec,   9},
+	{ 0x001a0c40,  41},
+	{ 0x001a0cec,   9},
+	{ 0x001a0e40,  41},
+	{ 0x001a0eec,   9},
+	{ 0x001b0040,  41},
+	{ 0x001b00ec,   9},
+	{ 0x001b0240,  41},
+	{ 0x001b02ec,   9},
+	{ 0x001b0440,  41},
+	{ 0x001b04ec,   9},
+	{ 0x001b0640,  41},
+	{ 0x001b06ec,   9},
+	{ 0x001b0840,  41},
+	{ 0x001b08ec,   9},
+	{ 0x001b0a40,  41},
+	{ 0x001b0aec,   9},
+	{ 0x001b0c40,  41},
+	{ 0x001b0cec,   9},
+	{ 0x001b0e40,  41},
+	{ 0x001b0eec,   9},
+	{ 0x001b4000,   1},
+	{ 0x001b4008,   1},
+	{ 0x001b4010,   3},
+	{ 0x001b4020,   3},
+	{ 0x001b4030,   3},
+	{ 0x001b4040,   3},
+	{ 0x001b4050,   3},
+	{ 0x001b4060,   4},
+	{ 0x001b4074,   7},
+	{ 0x001b4094,   3},
+	{ 0x001b40a4,   1},
+	{ 0x001b4100,   6},
+	{ 0x001b4124,   1},
+	{ 0x001b8000,   1},
+	{ 0x001b8008,   1},
+	{ 0x001b8010,   3},
+	{ 0x001bc000,   1},
+	{ 0x001bc008,   1},
+	{ 0x001bc010,   3},
+	{ 0x001be000,   1},
+	{ 0x001be008,   1},
+	{ 0x001be010,   3},
+	{ 0x00400500,   1},
+	{ 0x0040415c,   1},
+	{ 0x00404468,   1},
+	{ 0x00404498,   1},
+	{ 0x00405800,   1},
+	{ 0x00405840,   2},
+	{ 0x00405850,   1},
+	{ 0x00405908,   1},
+	{ 0x00405b40,   1},
+	{ 0x00405b50,   1},
+	{ 0x00406024,   5},
+	{ 0x00407010,   1},
+	{ 0x00407808,   1},
+	{ 0x0040803c,   1},
+	{ 0x00408804,   1},
+	{ 0x0040880c,   1},
+	{ 0x00408900,   2},
+	{ 0x00408910,   1},
+	{ 0x00408944,   1},
+	{ 0x00408984,   1},
+	{ 0x004090a8,   1},
+	{ 0x004098a0,   1},
+	{ 0x00409b00,   1},
+	{ 0x0041000c,   1},
+	{ 0x00410110,   1},
+	{ 0x00410184,   1},
+	{ 0x0041040c,   1},
+	{ 0x00410510,   1},
+	{ 0x00410584,   1},
+	{ 0x00418384,   1},
+	{ 0x00418400,   2},
+	{ 0x004184a0,   1},
+	{ 0x00418604,   1},
+	{ 0x00418680,   1},
+	{ 0x00418704,   1},
+	{ 0x00418714,   1},
+	{ 0x00418800,   1},
+	{ 0x0041881c,   1},
+	{ 0x00418830,   1},
+	{ 0x00418884,   1},
+	{ 0x004188b0,   1},
+	{ 0x004188c8,   3},
+	{ 0x004188fc,   1},
+	{ 0x00418b04,   1},
+	{ 0x00418c04,   1},
+	{ 0x00418c10,   8},
+	{ 0x00418c88,   1},
+	{ 0x00418d00,   1},
+	{ 0x00418e00,   1},
+	{ 0x00418e08,   1},
+	{ 0x00418e34,   1},
+	{ 0x00418e40,   4},
+	{ 0x00418e58,  16},
+	{ 0x00418f08,   1},
+	{ 0x00419000,   1},
+	{ 0x0041900c,   1},
+	{ 0x00419018,   1},
+	{ 0x00419854,   1},
+	{ 0x00419864,   1},
+	{ 0x00419a04,   2},
+	{ 0x00419a14,   1},
+	{ 0x00419ab0,   1},
+	{ 0x00419ab8,   3},
+	{ 0x00419c0c,   1},
+	{ 0x00419c8c,   2},
+	{ 0x00419d00,   1},
+	{ 0x00419d08,   2},
+	{ 0x00419e00,  11},
+	{ 0x00419e34,   2},
+	{ 0x00419e44,  11},
+	{ 0x00419e74,  10},
+	{ 0x00419ea4,   1},
+	{ 0x00419eac,   2},
+	{ 0x00419ee8,   1},
+	{ 0x00419ef0,  28},
+	{ 0x00419f70,   1},
+	{ 0x00419f78,   2},
+	{ 0x00419f98,   2},
+	{ 0x00419fdc,   1},
+	{ 0x0041a02c,   2},
+	{ 0x0041a0a0,   1},
+	{ 0x0041a0a8,   1},
+	{ 0x0041a890,   2},
+	{ 0x0041a8a0,   3},
+	{ 0x0041a8b0,   2},
+	{ 0x0041b014,   1},
+	{ 0x0041b0cc,   1},
+	{ 0x0041b1dc,   1},
+	{ 0x0041be0c,   3},
+	{ 0x0041becc,   1},
+	{ 0x0041bfdc,   1},
+	{ 0x0041c054,   1},
+	{ 0x0041c2b0,   1},
+	{ 0x0041c2b8,   3},
+	{ 0x0041c40c,   1},
+	{ 0x0041c48c,   2},
+	{ 0x0041c500,   1},
+	{ 0x0041c508,   2},
+	{ 0x0041c600,  11},
+	{ 0x0041c634,   2},
+	{ 0x0041c644,  11},
+	{ 0x0041c674,  10},
+	{ 0x0041c6a4,   1},
+	{ 0x0041c6ac,   2},
+	{ 0x0041c6e8,   1},
+	{ 0x0041c6f0,  28},
+	{ 0x0041c770,   1},
+	{ 0x0041c778,   2},
+	{ 0x0041c798,   2},
+	{ 0x0041c7dc,   1},
+	{ 0x0041c854,   1},
+	{ 0x0041cab0,   1},
+	{ 0x0041cab8,   3},
+	{ 0x0041cc0c,   1},
+	{ 0x0041cc8c,   2},
+	{ 0x0041cd00,   1},
+	{ 0x0041cd08,   2},
+	{ 0x0041ce00,  11},
+	{ 0x0041ce34,   2},
+	{ 0x0041ce44,  11},
+	{ 0x0041ce74,  10},
+	{ 0x0041cea4,   1},
+	{ 0x0041ceac,   2},
+	{ 0x0041cee8,   1},
+	{ 0x0041cef0,  28},
+	{ 0x0041cf70,   1},
+	{ 0x0041cf78,   2},
+	{ 0x0041cf98,   2},
+	{ 0x0041cfdc,   1},
+	{ 0x00500384,   1},
+	{ 0x005004a0,   1},
+	{ 0x00500604,   1},
+	{ 0x00500680,   1},
+	{ 0x00500714,   1},
+	{ 0x0050081c,   1},
+	{ 0x00500884,   1},
+	{ 0x005008b0,   1},
+	{ 0x005008c8,   3},
+	{ 0x005008fc,   1},
+	{ 0x00500b04,   1},
+	{ 0x00500c04,   1},
+	{ 0x00500c10,   8},
+	{ 0x00500c88,   1},
+	{ 0x00500d00,   1},
+	{ 0x00500e08,   1},
+	{ 0x00500f08,   1},
+	{ 0x00501000,   1},
+	{ 0x0050100c,   1},
+	{ 0x00501018,   1},
+	{ 0x00501854,   1},
+	{ 0x00501ab0,   1},
+	{ 0x00501ab8,   3},
+	{ 0x00501c0c,   1},
+	{ 0x00501c8c,   2},
+	{ 0x00501d00,   1},
+	{ 0x00501d08,   2},
+	{ 0x00501e00,  11},
+	{ 0x00501e34,   2},
+	{ 0x00501e44,  11},
+	{ 0x00501e74,  10},
+	{ 0x00501ea4,   1},
+	{ 0x00501eac,   2},
+	{ 0x00501ee8,   1},
+	{ 0x00501ef0,  28},
+	{ 0x00501f70,   1},
+	{ 0x00501f78,   2},
+	{ 0x00501f98,   2},
+	{ 0x00501fdc,   1},
+	{ 0x0050202c,   2},
+	{ 0x005020a0,   1},
+	{ 0x005020a8,   1},
+	{ 0x00502890,   2},
+	{ 0x005028a0,   3},
+	{ 0x005028b0,   2},
+	{ 0x00503014,   1},
+	{ 0x005030cc,   1},
+	{ 0x005031dc,   1},
+	{ 0x00503e14,   1},
+	{ 0x00503ecc,   1},
+	{ 0x00503fdc,   1},
+	{ 0x00504054,   1},
+	{ 0x005042b0,   1},
+	{ 0x005042b8,   3},
+	{ 0x0050440c,   1},
+	{ 0x0050448c,   2},
+	{ 0x00504500,   1},
+	{ 0x00504508,   2},
+	{ 0x00504600,  11},
+	{ 0x00504634,   2},
+	{ 0x00504644,  11},
+	{ 0x00504674,  10},
+	{ 0x005046a4,   1},
+	{ 0x005046ac,   2},
+	{ 0x005046e8,   1},
+	{ 0x005046f0,  28},
+	{ 0x00504770,   1},
+	{ 0x00504778,   2},
+	{ 0x00504798,   2},
+	{ 0x005047dc,   1},
+	{ 0x00504854,   1},
+	{ 0x00504ab0,   1},
+	{ 0x00504ab8,   3},
+	{ 0x00504c0c,   1},
+	{ 0x00504c8c,   2},
+	{ 0x00504d00,   1},
+	{ 0x00504d08,   2},
+	{ 0x00504e00,  11},
+	{ 0x00504e34,   2},
+	{ 0x00504e44,  11},
+	{ 0x00504e74,  10},
+	{ 0x00504ea4,   1},
+	{ 0x00504eac,   2},
+	{ 0x00504ee8,   1},
+	{ 0x00504ef0,  28},
+	{ 0x00504f70,   1},
+	{ 0x00504f78,   2},
+	{ 0x00504f98,   2},
+	{ 0x00504fdc,   1},
+	{ 0x00900100,   1},
+	{ 0x009a0100,   1},
+};
+static const u32 gp10b_context_whitelist_ranges_count =
+	ARRAY_SIZE(gp10b_context_whitelist_ranges);
+
+/* runcontrol */
+static const u32 gp10b_runcontrol_whitelist[] = {
+};
+static const u32 gp10b_runcontrol_whitelist_count =
+	ARRAY_SIZE(gp10b_runcontrol_whitelist);
+
+static const struct regop_offset_range gp10b_runcontrol_whitelist_ranges[] = {
+};
+static const u32 gp10b_runcontrol_whitelist_ranges_count =
+	ARRAY_SIZE(gp10b_runcontrol_whitelist_ranges);
+
+
+/* quad ctl */
+static const u32 gp10b_qctl_whitelist[] = {
+};
+static const u32 gp10b_qctl_whitelist_count =
+	ARRAY_SIZE(gp10b_qctl_whitelist);
+
+static const struct regop_offset_range gp10b_qctl_whitelist_ranges[] = {
+};
+static const u32 gp10b_qctl_whitelist_ranges_count =
+	ARRAY_SIZE(gp10b_qctl_whitelist_ranges);
+
+static const struct regop_offset_range *gp10b_get_global_whitelist_ranges(void)
+{
+	return gp10b_global_whitelist_ranges;
+}
+
+static int gp10b_get_global_whitelist_ranges_count(void)
+{
+	return gp10b_global_whitelist_ranges_count;
+}
+
+static const struct regop_offset_range *gp10b_get_context_whitelist_ranges(void)
+{
+	return gp10b_context_whitelist_ranges;
+}
+
+static int gp10b_get_context_whitelist_ranges_count(void)
+{
+	return gp10b_context_whitelist_ranges_count;
+}
+
+static const u32 *gp10b_get_runcontrol_whitelist(void)
+{
+	return gp10b_runcontrol_whitelist;
+}
+
+static int gp10b_get_runcontrol_whitelist_count(void)
+{
+	return gp10b_runcontrol_whitelist_count;
+}
+
+static const
+struct regop_offset_range *gp10b_get_runcontrol_whitelist_ranges(void)
+{
+	return gp10b_runcontrol_whitelist_ranges;
+}
+
+static int gp10b_get_runcontrol_whitelist_ranges_count(void)
+{
+	return gp10b_runcontrol_whitelist_ranges_count;
+}
+
+static const u32 *gp10b_get_qctl_whitelist(void)
+{
+	return gp10b_qctl_whitelist;
+}
+
+static int gp10b_get_qctl_whitelist_count(void)
+{
+	return gp10b_qctl_whitelist_count;
+}
+
+static const struct regop_offset_range *gp10b_get_qctl_whitelist_ranges(void)
+{
+	return gp10b_qctl_whitelist_ranges;
+}
+
+static int gp10b_get_qctl_whitelist_ranges_count(void)
+{
+	return gp10b_qctl_whitelist_ranges_count;
+}
+
+static int gp10b_apply_smpc_war(struct dbg_session_gk20a *dbg_s)
+{
+	/* Not needed on gp10b */
+	return 0;
+}
+
+void gp10b_init_regops(struct gpu_ops *gops)
+{
+	gops->regops.get_global_whitelist_ranges =
+		gp10b_get_global_whitelist_ranges;
+	gops->regops.get_global_whitelist_ranges_count =
+		gp10b_get_global_whitelist_ranges_count;
+
+	gops->regops.get_context_whitelist_ranges =
+		gp10b_get_context_whitelist_ranges;
+	gops->regops.get_context_whitelist_ranges_count =
+		gp10b_get_context_whitelist_ranges_count;
+
+	gops->regops.get_runcontrol_whitelist =
+		gp10b_get_runcontrol_whitelist;
+	gops->regops.get_runcontrol_whitelist_count =
+		gp10b_get_runcontrol_whitelist_count;
+
+	gops->regops.get_runcontrol_whitelist_ranges =
+		gp10b_get_runcontrol_whitelist_ranges;
+	gops->regops.get_runcontrol_whitelist_ranges_count =
+		gp10b_get_runcontrol_whitelist_ranges_count;
+
+	gops->regops.get_qctl_whitelist =
+		gp10b_get_qctl_whitelist;
+	gops->regops.get_qctl_whitelist_count =
+		gp10b_get_qctl_whitelist_count;
+
+	gops->regops.get_qctl_whitelist_ranges =
+		gp10b_get_qctl_whitelist_ranges;
+	gops->regops.get_qctl_whitelist_ranges_count =
+		gp10b_get_qctl_whitelist_ranges_count;
+
+	gops->regops.apply_smpc_war =
+		gp10b_apply_smpc_war;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/regops_gp10b.h b/drivers/gpu/nvgpu/gp10b/regops_gp10b.h
new file mode 100644
index 00000000..8727951a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/regops_gp10b.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * Tegra GP10B GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __REGOPS_GP10B_H_
+#define __REGOPS_GP10B_H_
+
+void gp10b_init_regops(struct gpu_ops *gops);
+
+#endif /* __REGOPS_GP10B_H_ */
-- 
cgit v1.2.2


From 8d354418ec1ee46e7fcc6489ec26f4b1e6ef9572 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 15 Jun 2015 11:33:35 -0700
Subject: gpu: nvgpu: gp10b: Phys addresses for page tables

Use always physical addresses for page tables. In gp10b new format
each level fits in one page, so we do not need SMMU translation.

Change-Id: Ie46b2bce0f7a4e8d2904d74b1df616e389874141
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/758181
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index bcdee9fc..5371605f 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -160,13 +160,12 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 *pde;
-	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
-	pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
+	pte_addr = virt_to_phys(pte->cpu_va)
 		   >> gmmu_new_pde_address_shift_v();
-	pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
+	pde_addr = virt_to_phys(parent->cpu_va);
 
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
@@ -203,7 +202,6 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *entry = pte->entries + i;
 	u32 pde_v[4] = {0, 0, 0, 0};
 	u32 *pde;
-	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
@@ -211,11 +209,11 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid)
-		pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+		pte_addr_small = virt_to_phys(entry->cpu_va)
 				 >> gmmu_new_dual_pde_address_shift_v();
 
 	if (big_valid)
-		pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+		pte_addr_big = virt_to_phys(entry->cpu_va)
 			       >> gmmu_new_dual_pde_address_big_shift_v();
 
 	if (small_valid) {
-- 
cgit v1.2.2


From 4c074ba3021e7fd52b10a5e7267b36e07da5660a Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 25 May 2015 15:01:04 +0530
Subject: gpu: nvgpu: gp10b: dma support for secure gpccs

bug 200080684

Change-Id: I013a0ca7762f6cca0498bd282303597bf683cb7d
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/746737
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c |  3 +-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c    | 74 ++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
index 1d77ad65..515ba630 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
@@ -3,7 +3,7 @@
  *
  * GM20B Graphics Context
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -69,4 +69,5 @@ static bool gr_gp10b_is_firmware_defined(void)
 void gp10b_init_gr_ctx(struct gpu_ops *gops) {
 	gops->gr_ctx.get_netlist_name = gr_gp10b_get_netlist_name;
 	gops->gr_ctx.is_fw_defined = gr_gp10b_is_firmware_defined;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = false;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index b8b985b3..7b806026 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -21,6 +21,8 @@
 
 #include "pmu_gp10b.h"
 
+#define gp10b_dbg_pmu(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
 /*!
  * Structure/object which single register write need to be done during PG init
  * sequence to set PROD values.
@@ -130,6 +132,76 @@ static struct pg_init_sequence_list _pginitseq_gp10b[] = {
 		{0x0010e004, 0x0000008E},
 };
 
+void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
+					 u32 flags)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_cmd cmd;
+	u32 seq;
+
+	gk20a_dbg_fn("");
+
+	gp10b_dbg_pmu("wprinit status = %x\n", g->ops.pmu.lspmuwprinitdone);
+	if (g->ops.pmu.lspmuwprinitdone) {
+		/* send message to load FECS falcon */
+		memset(&cmd, 0, sizeof(struct pmu_cmd));
+		cmd.hdr.unit_id = PMU_UNIT_ACR;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE +
+		  sizeof(struct pmu_acr_cmd_bootstrap_multiple_falcons);
+		cmd.cmd.acr.boot_falcons.cmd_type =
+		  PMU_ACR_CMD_ID_BOOTSTRAP_MULTIPLE_FALCONS;
+		cmd.cmd.acr.boot_falcons.flags = flags;
+		cmd.cmd.acr.boot_falcons.falconidmask =
+				falconidmask;
+		cmd.cmd.acr.boot_falcons.usevamask =
+				1 << LSF_FALCON_ID_GPCCS;
+		cmd.cmd.acr.boot_falcons.wprvirtualbase.lo =
+				u64_lo32(g->pmu.wpr_buf.gpu_va);
+		cmd.cmd.acr.boot_falcons.wprvirtualbase.hi =
+				u64_hi32(g->pmu.wpr_buf.gpu_va);
+		gp10b_dbg_pmu("PMU_ACR_CMD_ID_BOOTSTRAP_MULTIPLE_FALCONS:%x\n",
+				falconidmask);
+		gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+				pmu_handle_fecs_boot_acr_msg, pmu, &seq, ~0);
+	}
+
+	gk20a_dbg_fn("done");
+	return;
+}
+
+int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
+{
+	u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+
+	/* GM20B PMU supports loading FECS and GPCCS only */
+	if (falconidmask == 0)
+		return -EINVAL;
+	if (falconidmask & ~((1 << LSF_FALCON_ID_FECS) |
+				(1 << LSF_FALCON_ID_GPCCS)))
+				return -EINVAL;
+	g->ops.pmu.lsfloadedfalconid = 0;
+	/* check whether pmu is ready to bootstrap lsf if not wait for it */
+	if (!g->ops.pmu.lspmuwprinitdone) {
+		pmu_wait_message_cond(&g->pmu,
+				gk20a_get_gr_idle_timeout(g),
+				&g->ops.pmu.lspmuwprinitdone, 1);
+		/* check again if it still not ready indicate an error */
+		if (!g->ops.pmu.lspmuwprinitdone) {
+			gk20a_err(dev_from_gk20a(g),
+				"PMU not ready to load LSF");
+			return -ETIMEDOUT;
+		}
+	}
+	/* load falcon(s) */
+	gp10b_pmu_load_multiple_falcons(g, falconidmask, flags);
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&g->ops.pmu.lsfloadedfalconid, falconidmask);
+	if (g->ops.pmu.lsfloadedfalconid != falconidmask)
+		return -ETIMEDOUT;
+	return 0;
+}
+
 static int gp10b_pmu_setup_elpg(struct gk20a *g)
 {
 	int ret = 0;
@@ -157,8 +229,10 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	if (gops->privsecurity) {
 		gm20b_init_secure_pmu(gops);
 		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+		gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
 	} else {
 		gk20a_init_pmu_ops(gops);
+		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 	}
 	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
-- 
cgit v1.2.2


From 6a071e5ad5581e57a5be109d2fc0f44680207783 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 24 Jun 2015 10:10:57 -0700
Subject: gpu: nvgpu: gp10b: Implement priv pages

Implement support for privileged pages. Use them for kernel allocated buffers.

Change-Id: I24778c2b6063b6bc8a4bfd9d97fa6de01d49569a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/761920
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h | 12 ++++++++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c      |  9 ++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 844cb142..9ce9448e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -198,6 +198,18 @@ static inline u32 gmmu_new_pte_valid_false_f(void)
 {
 	return 0x0;
 }
+static inline u32 gmmu_new_pte_privilege_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_privilege_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 gmmu_new_pte_privilege_false_f(void)
+{
+	return 0x0;
+}
 static inline u32 gmmu_new_pte_address_sys_f(u32 v)
 {
 	return (v & 0xfffffff) << 8;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 5371605f..9f66c21f 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -153,7 +153,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, u32 flags)
+			   int rw_flag, bool sparse, bool priv)
 {
 	u64 pte_addr = 0;
 	u64 pde_addr = 0;
@@ -195,7 +195,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, u32 flags)
+			   int rw_flag, bool sparse, bool priv)
 {
 	bool small_valid, big_valid;
 	u32 pte_addr_small = 0, pte_addr_big = 0;
@@ -251,7 +251,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u32 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, u32 flags)
+			   int rw_flag, bool sparse, bool priv)
 {
 	struct gk20a *g = vm->mm->g;
 	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
@@ -269,6 +269,9 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			    gmmu_new_pte_address_sys_f(*iova
 			      >> gmmu_new_pte_address_shift_v());
 
+		if (priv)
+			pte_w[0] |= gmmu_new_pte_privilege_true_f();
+
 		pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) |
 			   gmmu_new_pte_kind_f(kind_v) |
 			   gmmu_new_pte_comptagline_f(*ctag / ctag_granularity);
-- 
cgit v1.2.2


From c965e6655800afffd3d4e3d73f28198adef7a118 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Wed, 1 Jul 2015 12:50:26 +0530
Subject: gpu: nvgpu: gp10b: make local function 'static'

Fixed the following sparse warning by making the local function as static:
- symbol 'gp10b_pmu_load_multiple_falcons' was not declared.
    Should it be static?
- symbol 'gp10b_load_falcon_ucode' was not declared.
    Should it be static?

bug 200067946

Change-Id: I67d865aef6f57bf614db351929cd4bb1b6077c00
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/764646
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 7b806026..2a8d968c 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -132,7 +132,7 @@ static struct pg_init_sequence_list _pginitseq_gp10b[] = {
 		{0x0010e004, 0x0000008E},
 };
 
-void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
+static void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 					 u32 flags)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
@@ -169,7 +169,7 @@ void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 	return;
 }
 
-int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
+static int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
 {
 	u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
 
-- 
cgit v1.2.2


From b0667dcd8ade3dd61ac3b32000ff6b25b077208d Mon Sep 17 00:00:00 2001
From: Matt Craighead <mcraighead@nvidia.com>
Date: Thu, 9 Jul 2015 14:24:31 -0500
Subject: Revert "gpu: nvgpu: gp10b: Phys addresses for page tables"

This reverts commit f7bf99929cf2ec5a295ac21c74cf9c4f1afd78c5.

Change-Id: I0acfa18e9cf9bedd4051ec00faa497b3cdb9454b
Signed-off-by: Matt Craighead <mcraighead@nvidia.com>
Reviewed-on: http://git-master/r/768599
Reviewed-by: Hiroshi Doyu <hdoyu@nvidia.com>
Tested-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 9f66c21f..190dc7f6 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -160,12 +160,13 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 *pde;
+	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
-	pte_addr = virt_to_phys(pte->cpu_va)
+	pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
 		   >> gmmu_new_pde_address_shift_v();
-	pde_addr = virt_to_phys(parent->cpu_va);
+	pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
 
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
@@ -202,6 +203,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *entry = pte->entries + i;
 	u32 pde_v[4] = {0, 0, 0, 0};
 	u32 *pde;
+	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
@@ -209,11 +211,11 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid)
-		pte_addr_small = virt_to_phys(entry->cpu_va)
+		pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
 				 >> gmmu_new_dual_pde_address_shift_v();
 
 	if (big_valid)
-		pte_addr_big = virt_to_phys(entry->cpu_va)
+		pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
 			       >> gmmu_new_dual_pde_address_big_shift_v();
 
 	if (small_valid) {
-- 
cgit v1.2.2


From 8ae3f0ac28c3ee754ff7bbfe3f75b73e6ab33836 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 10 Aug 2015 14:53:47 -0700
Subject: gpu: nvgpu: gp10b: Use phys addresses in PDEs

Use physical addresses in PDEs. All page table levels fit in 4k, so no
need for SMMU mapping.

Change-Id: Id9e418f35a79343f4a332a230e04abda5e0dd5d2
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/783748
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 190dc7f6..65449534 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -160,13 +160,11 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 *pde;
-	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
-	pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
-		   >> gmmu_new_pde_address_shift_v();
-	pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
+	pte_addr = sg_phys(pte->sgt->sgl) >> gmmu_new_pde_address_shift_v();
+	pde_addr = sg_phys(parent->sgt->sgl);
 
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
@@ -203,7 +201,6 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	struct gk20a_mm_entry *entry = pte->entries + i;
 	u32 pde_v[4] = {0, 0, 0, 0};
 	u32 *pde;
-	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
@@ -211,11 +208,11 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid)
-		pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+		pte_addr_small = sg_phys(entry->sgt->sgl)
 				 >> gmmu_new_dual_pde_address_shift_v();
 
 	if (big_valid)
-		pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
+		pte_addr_big = sg_phys(entry->sgt->sgl)
 			       >> gmmu_new_dual_pde_address_big_shift_v();
 
 	if (small_valid) {
-- 
cgit v1.2.2


From 6b2bfcbfe7f4134b7b640cdc61b250d27de5311f Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Wed, 19 Aug 2015 12:35:29 +0530
Subject: gpu: nvgpu: update t186 slcg prod settings

bug 1675413

work around for timestamp slcg bug

Change-Id: I0950403b89e9ea161bd7eb7052f47de3f9733240
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/785854
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index f8ee80c3..fbf146a2 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -35,7 +35,7 @@ static const struct gating_desc gp10b_slcg_bus[] = {
 
 /* slcg ce2 */
 static const struct gating_desc gp10b_slcg_ce2[] = {
-	{.addr = 0x00106f28, .prod = 0x00000000, .disable = 0x000007fe},
+	{.addr = 0x00104204, .prod = 0x00000000, .disable = 0x000007fe},
 };
 
 /* slcg chiplet */
@@ -59,7 +59,7 @@ static const struct gating_desc gp10b_slcg_fifo[] = {
 
 /* slcg gr */
 static const struct gating_desc gp10b_slcg_gr[] = {
-	{.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x03fffffe},
+	{.addr = 0x004041f4, .prod = 0x00000002, .disable = 0x03fffffe},
 	{.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe},
 	{.addr = 0x00409894, .prod = 0x00000040, .disable = 0x03fffffe},
 	{.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
@@ -136,14 +136,14 @@ static const struct gating_desc gp10b_slcg_priring[] = {
 static const struct gating_desc gp10b_slcg_pwr_csb[] = {
 	{.addr = 0x00000134, .prod = 0x00020008, .disable = 0x0003fffe},
 	{.addr = 0x00000e74, .prod = 0x00000000, .disable = 0x0000000f},
-	{.addr = 0x00000a74, .prod = 0x00000000, .disable = 0x00007ffe},
+	{.addr = 0x00000a74, .prod = 0x00004000, .disable = 0x00007ffe},
 	{.addr = 0x000016b8, .prod = 0x00000000, .disable = 0x0000000f},
 };
 
 /* slcg pmu */
 static const struct gating_desc gp10b_slcg_pmu[] = {
 	{.addr = 0x0010a134, .prod = 0x00020008, .disable = 0x0003fffe},
-	{.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe},
+	{.addr = 0x0010aa74, .prod = 0x00004000, .disable = 0x00007ffe},
 	{.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f},
 };
 
@@ -192,7 +192,7 @@ static const struct gating_desc gp10b_blcg_gr[] = {
 	{.addr = 0x00405860, .prod = 0x0000c242, .disable = 0x00000000},
 	{.addr = 0x0040590c, .prod = 0x0000c444, .disable = 0x00000000},
 	{.addr = 0x00408040, .prod = 0x0000c444, .disable = 0x00000000},
-	{.addr = 0x00407000, .prod = 0x4000c141, .disable = 0x00000000},
+	{.addr = 0x00407000, .prod = 0x4000c242, .disable = 0x00000000},
 	{.addr = 0x00405bf0, .prod = 0x0000c444, .disable = 0x00000000},
 	{.addr = 0x0041a890, .prod = 0x0000427f, .disable = 0x00000000},
 	{.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
-- 
cgit v1.2.2


From 95a271905918d91468134728079c3025ea58b537 Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Tue, 18 Aug 2015 19:34:51 +0300
Subject: gpu: nvgpu: Add CDE program number selection for GP10B

Add CDE program number selection for GP10B.

Bug 1604102

Change-Id: I0054e670e3bc6b8c2380124eb58204088aaae275
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/785459
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile    |  3 +-
 drivers/gpu/nvgpu/gp10b/cde_gp10b.c | 64 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/cde_gp10b.h | 23 +++++++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c |  2 ++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/cde_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/cde_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 688965da..ad198327 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_GK20A)  += \
 	hal_gp10b.o \
 	rpfb_gp10b.o \
 	gp10b_gating_reglist.o \
-	regops_gp10b.o
+	regops_gp10b.o \
+	cde_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
new file mode 100644
index 00000000..acb8aee3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -0,0 +1,64 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "cde_gp10b.h"
+
+enum gp10b_programs {
+	GP10B_PROG_HPASS              = 0,
+	GP10B_PROG_HPASS_4K           = 1,
+	GP10B_PROG_VPASS              = 2,
+	GP10B_PROG_VPASS_4K           = 3,
+	GP10B_PROG_HPASS_DEBUG        = 4,
+	GP10B_PROG_HPASS_4K_DEBUG     = 5,
+	GP10B_PROG_VPASS_DEBUG        = 6,
+	GP10B_PROG_VPASS_4K_DEBUG     = 7,
+	GP10B_PROG_PASSTHROUGH        = 8,
+};
+
+static void gp10b_cde_get_program_numbers(struct gk20a *g,
+					  u32 block_height_log2,
+					  int *hprog_out, int *vprog_out)
+{
+	int hprog, vprog;
+
+	if (g->cde_app.shader_parameter == 1) {
+		hprog = GP10B_PROG_PASSTHROUGH;
+		vprog = GP10B_PROG_PASSTHROUGH;
+	} else {
+		hprog = GP10B_PROG_HPASS;
+		vprog = GP10B_PROG_VPASS;
+		if (g->cde_app.shader_parameter == 2) {
+			hprog = GP10B_PROG_HPASS_DEBUG;
+			vprog = GP10B_PROG_VPASS_DEBUG;
+		}
+		if (g->mm.bypass_smmu) {
+			if (!g->mm.disable_bigpage) {
+				gk20a_warn(&g->dev->dev,
+					   "when bypass_smmu is 1, disable_bigpage must be 1 too");
+			}
+			hprog |= 1;
+			vprog |= 1;
+		}
+	}
+
+	*hprog_out = hprog;
+	*vprog_out = vprog;
+}
+
+void gp10b_init_cde_ops(struct gpu_ops *gops)
+{
+	gops->cde.get_program_numbers = gp10b_cde_get_program_numbers;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
new file mode 100644
index 00000000..52f785f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
@@ -0,0 +1,23 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVHOST_GP10B_CDE
+#define _NVHOST_GP10B_CDE
+
+struct gpu_ops;
+
+void gp10b_init_cde_ops(struct gpu_ops *gops);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 9eba5571..983b985d 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -31,6 +31,7 @@
 #include "gp10b/fifo_gp10b.h"
 #include "gp10b/gp10b_gating_reglist.h"
 #include "gp10b/regops_gp10b.h"
+#include "gp10b/cde_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fifo_gm20b.h"
@@ -103,6 +104,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
 	gp10b_init_regops(gops);
+	gp10b_init_cde_ops(gops);
 	gops->name = "gp10b";
 
 	c->twod_class = FERMI_TWOD_A;
-- 
cgit v1.2.2


From 892ed4317b581abc85a481c48272f35b36410b65 Mon Sep 17 00:00:00 2001
From: Supriya <ssharatkumar@nvidia.com>
Date: Thu, 6 Aug 2015 16:39:39 +0530
Subject: gpu: nvgpu: gp10b: Fix NS boot transcfg

Bug 1667322

Accomodate for transcfg address change

Change-Id: I83c5d4921040258a480df44a69792c721ff88f05
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/779764
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 2a8d968c..dcf28edf 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -232,6 +232,8 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 		gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
 	} else {
 		gk20a_init_pmu_ops(gops);
+		gops->pmu.pmu_setup_hw_and_bootstrap =
+			gm20b_init_nspmu_setup_hw1;
 		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 	}
-- 
cgit v1.2.2


From 4711d41bdf3d0b54af4db009a3bb1c09aa105e67 Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Mon, 25 May 2015 14:49:38 +0300
Subject: gpu: nvgpu: Separate kernel and user GPU VA regions (gp10b)

Specify that everything in bar2 VM is kernel reserved.

Bug 200077571

Change-Id: I8f6c6ac6352ffd64eedc09187593b6c8d05757ef
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/746802
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 65449534..0c76abb5 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -75,6 +75,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	mm->bar2.aperture_size = 32 << 20;
 	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
 	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
+		mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, "bar2");
 
 	/* allocate instance mem for bar2 */
-- 
cgit v1.2.2


From 83955e553c9111e544eaa9d269347a61cfe3fa71 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Sun, 13 Sep 2015 13:44:01 -0700
Subject: gpu: nvgpu: priv load for gpccs load.

- clear mask to load gpcss with priv load.

Bug n/a

Change-Id: I21522bda83c4dd5c665d47ae334b9fed5cb8ec74
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/798406
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index dcf28edf..f29bcbad 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -153,8 +153,7 @@ static void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 		cmd.cmd.acr.boot_falcons.flags = flags;
 		cmd.cmd.acr.boot_falcons.falconidmask =
 				falconidmask;
-		cmd.cmd.acr.boot_falcons.usevamask =
-				1 << LSF_FALCON_ID_GPCCS;
+		cmd.cmd.acr.boot_falcons.usevamask = 0;
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.lo =
 				u64_lo32(g->pmu.wpr_buf.gpu_va);
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.hi =
-- 
cgit v1.2.2


From cc1b124d5a6fdabdd541d3ddd0570a264b46be0c Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 14 Sep 2015 10:03:05 -0700
Subject: gpu: nvgpu: HAL to write DMATRFBASE

- Must write DMATRFBASE1 to 0 whenever
  DMATRFBASE is written.

Bug 200137618

Change-Id: Id8526d1bafbd116ffc4d8018983791fe9e9fa604
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/798780
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h |  4 ++++
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c    | 10 ++++++++++
 2 files changed, 14 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
index 0de70b96..f66812d6 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
@@ -406,6 +406,10 @@ static inline u32 pwr_falcon_dmatrfbase_r(void)
 {
 	return 0x0010a110;
 }
+static inline u32 pwr_falcon_dmatrfbase1_r(void)
+{
+	return 0x0010a128;
+}
 static inline u32 pwr_falcon_dmatrfmoffs_r(void)
 {
 	return 0x0010a114;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index f29bcbad..529491d0 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -20,6 +20,7 @@
 #include "gm20b/pmu_gm20b.h"
 
 #include "pmu_gp10b.h"
+#include "hw_pwr_gp10b.h"
 
 #define gp10b_dbg_pmu(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
@@ -223,6 +224,14 @@ static int gp10b_pmu_setup_elpg(struct gk20a *g)
 	return ret;
 }
 
+void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
+{
+	gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
+				addr);
+	gk20a_writel(g, pwr_falcon_dmatrfbase1_r(),
+				0x0);
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
@@ -239,4 +248,5 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
 	gops->pmu.lspmuwprinitdone = false;
 	gops->pmu.fecsbootstrapdone = false;
+	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
 }
-- 
cgit v1.2.2


From 17bc6e64577f2b0ce2c26b5ed4a8b7a8d06eede2 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 18 Sep 2015 14:15:32 +0530
Subject: gpu: nvgpu: fix sparse warning

Fix below sparse warning by declaring gp10b_write_dmatrfbase()
as static

kernel-t18x/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c:227:6: warning: symbol
'gp10b_write_dmatrfbase' was not declared. Should it be static?

Bug 200088648

Change-Id: I3bd2eeaeb7234ab54d7e9342a7512ec28388f751
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/801213
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 529491d0..a4d7a0f7 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -224,7 +224,7 @@ static int gp10b_pmu_setup_elpg(struct gk20a *g)
 	return ret;
 }
 
-void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
+static void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
 {
 	gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
 				addr);
-- 
cgit v1.2.2


From 5df1bc28b3708427db3f3dabe78ea864ff085183 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 25 Aug 2015 13:00:38 -0700
Subject: gpu: nvgpu: gp10b: Choose netlist A

Force usage of netlist slot A.

Change-Id: Ib507b0e0c7ff6d0dbb43f91b6c7264424975d681
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/789060
Reviewed-on: http://git-master/r/806183
---
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c | 2 +-
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
index 515ba630..b50698e0 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
@@ -59,7 +59,7 @@ static int gr_gp10b_get_netlist_name(int index, char *name)
 
 static bool gr_gp10b_is_firmware_defined(void)
 {
-#ifdef GM20B_NETLIST_IMAGE_FW_NAME
+#ifdef GP10B_NETLIST_IMAGE_FW_NAME
 	return true;
 #else
 	return false;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h
index b1184f9d..b5c76d24 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.h
@@ -21,7 +21,7 @@
 #include "gk20a/gr_ctx_gk20a.h"
 
 /* production netlist, one and only one from below */
-/*#undef GM20B_NETLIST_IMAGE_FW_NAME*/
+#define GP10B_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_A
 
 void gp10b_init_gr_ctx(struct gpu_ops *gops);
 
-- 
cgit v1.2.2


From 48cbfac597021ce163d4df997cdbff2f2a73eb88 Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Tue, 18 Aug 2015 18:16:46 +0300
Subject: gpu: nvgpu: Add CDE scatter buffer code for GP10B

Add GP10B-specific code for populating the scatter buffer. Essentially,
this enables the use of SMMU bypass mode with 4-kB page compression.

Bug 1604102

Change-Id: Ic586e2f93827b9aa1c7b73b53b8f65d518588c26
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/789434
Reviewed-on: http://git-master/r/806184
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/cde_gp10b.c | 84 +++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
index acb8aee3..dadec4d2 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -58,7 +58,91 @@ static void gp10b_cde_get_program_numbers(struct gk20a *g,
 	*vprog_out = vprog;
 }
 
+static bool gp10b_need_scatter_buffer(struct gk20a *g)
+{
+	return g->mm.bypass_smmu;
+}
+
+static u8 parity(u32 a)
+{
+	a ^= a>>16u;
+	a ^= a>>8u;
+	a ^= a>>4u;
+	a &= 0xfu;
+	return (0x6996u >> a) & 1u;
+}
+
+static int gp10b_populate_scatter_buffer(struct gk20a *g,
+					 struct sg_table *sgt,
+					 size_t surface_size,
+					 void *scatter_buffer_ptr,
+					 size_t scatter_buffer_size)
+{
+	/* map scatter buffer to CPU VA and fill it */
+	const u32 page_size_log2 = 12;
+	const u32 page_size = 1 << page_size_log2;
+	const u32 page_size_shift = page_size_log2 - 7u;
+
+	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
+	const u32 getSliceMaskGP10B = 0x3ffffe48;
+	u8 *scatter_buffer = scatter_buffer_ptr;
+
+	size_t i;
+	struct scatterlist *sg = NULL;
+	u8 d = 0;
+	size_t page = 0;
+	size_t pages_left;
+
+	surface_size = round_up(surface_size, page_size);
+
+	pages_left = surface_size >> page_size_log2;
+	if ((pages_left >> 3) > scatter_buffer_size)
+	    return -ENOMEM;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		unsigned int j;
+		u64 surf_pa = sg_phys(sg);
+		unsigned int n = (int)(sg->length >> page_size_log2);
+
+		gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
+
+		for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
+			u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
+			u8 scatter_bit = parity(addr);
+			u8 bit = page & 7;
+
+			d |= scatter_bit << bit;
+			if (bit == 7) {
+				scatter_buffer[page >> 3] = d;
+				d = 0;
+			}
+
+			++page;
+			--pages_left;
+		}
+
+		if (pages_left == 0)
+			break;
+	}
+
+	/* write the last byte in case the number of pages is not divisible by 8 */
+	if ((page & 7) != 0)
+		scatter_buffer[page >> 3] = d;
+
+#if defined(GK20A_DEBUG)
+	if (unlikely(gpu_dbg_cde & gk20a_dbg_mask)) {
+		gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
+		for (i=0; i < page>>3; i++) {
+			gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
+		}
+	}
+#endif
+	return 0;
+}
+
 void gp10b_init_cde_ops(struct gpu_ops *gops)
 {
 	gops->cde.get_program_numbers = gp10b_cde_get_program_numbers;
+	gops->cde.need_scatter_buffer = gp10b_need_scatter_buffer;
+	gops->cde.populate_scatter_buffer = gp10b_populate_scatter_buffer;
 }
-- 
cgit v1.2.2


From c54ebdd78a4a7185f5f0e7a7bcf4026c036c10a9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 26 Aug 2015 16:00:19 -0700
Subject: gpu: nvgpu: gp10b: Implement NVC0_SET_GO_IDLE_TIMEOUT

Bug 1678603

Change-Id: Ib8fb09dace864567b1ce574c216a584831723684
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/790402
Reviewed-on: http://git-master/r/806185
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 1942b1e7..7d9f5f47 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -284,6 +284,11 @@ static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
 	return size;
 }
 
+static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
+{
+	gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
+}
+
 static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
 				     u32 class_num, u32 offset, u32 data)
 {
@@ -310,6 +315,9 @@ static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
 		case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
 			g->ops.gr.set_alpha_circular_buffer_size(g, data);
 			break;
+		case NVC097_SET_GO_IDLE_TIMEOUT:
+			gr_gp10b_set_go_idle_timeout(g, data);
+			break;
 		default:
 			goto fail;
 		}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 1b99cafb..302ea197 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -28,6 +28,7 @@ enum {
 #define NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE	0x02dc
 #define NVC097_SET_CIRCULAR_BUFFER_SIZE		0x1280
 #define NVC097_SET_SHADER_EXCEPTIONS		0x1528
+#define NVC097_SET_GO_IDLE_TIMEOUT		0x152c
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 
 void gp10b_init_gr(struct gpu_ops *ops);
-- 
cgit v1.2.2


From cc182623ab24b17ce3828fcd541672ad87191ea9 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 26 Aug 2015 16:04:04 -0700
Subject: gpu: nvgpu: gp10b: Disable deep binning

Disable deep binning by default.

Change-Id: I75da95984ac314015c6927e099a3eaa37fcc26fc
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/790403
Reviewed-on: http://git-master/r/806186
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 7d9f5f47..c9b870c7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -333,8 +333,7 @@ static void gr_gp10b_cb_size_default(struct gk20a *g)
 	struct gr_gk20a *gr = &g->gr;
 
 	if (!gr->attrib_cb_default_size)
-		gr->attrib_cb_default_size =
-			gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
+		gr->attrib_cb_default_size = 0x800;
 	gr->alpha_cb_default_size =
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
 }
@@ -929,6 +928,9 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 			 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
 	gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
 
+	/* disable deep binning */
+	gk20a_writel(g, gr_fe_go_idle_timeout_r(), 0x800);
+
 	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
-- 
cgit v1.2.2


From e7ab0321d3502515a7ede0bd395e80e5783664fc Mon Sep 17 00:00:00 2001
From: Robert Morell <rmorell@nvidia.com>
Date: Tue, 1 Sep 2015 17:59:57 -0700
Subject: gpu: nvgpu: gp10b: Correct C097_SET_GO_IDLE_TIMEOUT offset

Bug 1678603

Change-Id: I1c2c3c9395e068fabf554779ded6f0f536622c90
Signed-off-by: Robert Morell <rmorell@nvidia.com>
Reviewed-on: http://git-master/r/792831
Reviewed-on: http://git-master/r/806187
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 302ea197..7c3ddf27 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -25,10 +25,10 @@ enum {
 	PASCAL_DMA_COPY_A        = 0xC0B5,
 };
 
+#define NVC097_SET_GO_IDLE_TIMEOUT		0x022c
 #define NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE	0x02dc
 #define NVC097_SET_CIRCULAR_BUFFER_SIZE		0x1280
 #define NVC097_SET_SHADER_EXCEPTIONS		0x1528
-#define NVC097_SET_GO_IDLE_TIMEOUT		0x152c
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 
 void gp10b_init_gr(struct gpu_ops *ops);
-- 
cgit v1.2.2


From 71afbe484f0bda343f73c3afcfd5ca4205be4e09 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Sat, 29 Aug 2015 05:12:29 +0530
Subject: gpu: nvgpu: fuse read to boot in SECURE mode

-Read fuse to boot in secure/production
 mode else non sercure mode.

Bug N/A

Change-Id: Ia66acff63a4a5ed9351c01cd8907a337e88dc8eb
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/791323
Reviewed-on: http://git-master/r/806191
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 39 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 983b985d..a6131cea 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -37,6 +37,10 @@
 #include "gm20b/fifo_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
+#include <linux/tegra-fuse.h>
+
+#define FUSE_OPT_PRIV_SEC_EN_0 0x264
+#define PRIV_SECURITY_ENABLED 0x01
 
 static struct gpu_ops gp10b_ops = {
 	.clock_gating = {
@@ -91,7 +95,40 @@ int gp10b_init_hal(struct gk20a *g)
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
 
 	*gops = gp10b_ops;
-	gops->privsecurity = 0;
+
+#ifdef CONFIG_TEGRA_ACR
+	if (tegra_platform_is_linsim()) {
+		gops->privsecurity = 1;
+		gops->securegpccs = 1;
+	} else {
+		if (tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0) &
+				PRIV_SECURITY_ENABLED) {
+			gops->privsecurity = 1;
+			gops->securegpccs =1;
+		} else {
+			gk20a_dbg_info("priv security is disabled in HW");
+			gops->privsecurity = 0;
+			gops->securegpccs = 0;
+		}
+	}
+#else
+	if (tegra_platform_is_linsim()) {
+		gk20a_dbg_info("running ASIM with PRIV security disabled");
+		gops->privsecurity = 0;
+		gops->securegpccs = 0;
+	} else {
+		if (tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0) &
+				PRIV_SECURITY_ENABLED) {
+			gk20a_dbg_info("priv security is not supported but enabled");
+			gops->privsecurity = 1;
+			gops->securegpccs =1;
+			return -EPERM;
+		} else {
+			gops->privsecurity = 0;
+			gops->securegpccs = 0;
+		}
+	}
+#endif
 
 	gp10b_init_mc(gops);
 	gp10b_init_gr(gops);
-- 
cgit v1.2.2


From 8e1c56689639917ca637875434a074fb697435e8 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sun, 20 Sep 2015 13:36:54 -0700
Subject: gpu: nvgpu: gp10b: Always disable security in sim

Change-Id: I1fc8c4c4c71ebf84fe913af07fc2055959e5ab91
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/801850
Reviewed-on: http://git-master/r/806192
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a6131cea..70486c4a 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -98,8 +98,8 @@ int gp10b_init_hal(struct gk20a *g)
 
 #ifdef CONFIG_TEGRA_ACR
 	if (tegra_platform_is_linsim()) {
-		gops->privsecurity = 1;
-		gops->securegpccs = 1;
+		gops->privsecurity = 0;
+		gops->securegpccs = 0;
 	} else {
 		if (tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0) &
 				PRIV_SECURITY_ENABLED) {
-- 
cgit v1.2.2


From 01ba044bdbbfa831eb9f507230bac0a1ed67e4ce Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Thu, 27 Aug 2015 14:01:45 -0400
Subject: gpu: nvgpu: vgpu: add gp10b support

Add support for gp10b in a virtualized environment.

Bug 1677153
VFND-693

Change-Id: I919ffa44c6773940a7a3411ee8bbc403a992b7cb
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/792556
Reviewed-on: http://git-master/r/806193
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                     |   2 +-
 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h           |   7 +-
 drivers/gpu/nvgpu/vgpu/gp10b/Makefile          |  16 +++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c |  20 +++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.h |  21 +++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c   |  20 +++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.h   |  21 +++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c  |  35 +++++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c   | 172 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.h   |  21 +++
 10 files changed, 333 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/Makefile
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index f259eefa..37cc15a5 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -7,4 +7,4 @@ ccflags-$(CONFIG_GK20A) += -Wno-multichar
 ccflags-$(CONFIG_GK20A) += -Werror
 
 obj-$(CONFIG_GK20A)	+= gp10b/
-
+obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/gp10b/
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
index 59ecefee..96f02125 100644
--- a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
+++ b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
@@ -1,7 +1,7 @@
 /*
  * NVIDIA GPU ID functions, definitions.
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -28,4 +28,9 @@
 struct gpu_ops;
 extern int gp10b_init_hal(struct gk20a *);
 extern struct gk20a_platform t18x_gpu_tegra_platform;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+#define TEGRA_18x_GPUID_VGPU_HAL vgpu_gp10b_init_hal
+extern int vgpu_gp10b_init_hal(struct gk20a *);
+#endif
 #endif
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/Makefile b/drivers/gpu/nvgpu/vgpu/gp10b/Makefile
new file mode 100644
index 00000000..fed8a08e
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/Makefile
@@ -0,0 +1,16 @@
+GCOV_PROFILE := y
+
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
+
+ccflags-$(CONFIG_GK20A) += -Wno-multichar
+ccflags-y += -Werror
+
+obj-$(CONFIG_GK20A)  += \
+	vgpu_hal_gp10b.o  \
+	vgpu_gr_gp10b.o  \
+	vgpu_mm_gp10b.o \
+	vgpu_fifo_gp10b.o
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
new file mode 100644
index 00000000..34d942c1
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu_fifo_gp10b.h"
+
+void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops)
+{
+	/* syncpoint protection not supported yet */
+	gops->fifo.resetup_ramfc = NULL;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.h b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.h
new file mode 100644
index 00000000..4ede0b6d
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VGPU_FIFO_GP10B_H__
+#define __VGPU_FIFO_GP10B_H__
+
+#include "gk20a/gk20a.h"
+
+void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops);
+
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
new file mode 100644
index 00000000..9df29eee
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu_gr_gp10b.h"
+#include "vgpu/gm20b/vgpu_gr_gm20b.h"
+
+void vgpu_gp10b_init_gr_ops(struct gpu_ops *gops)
+{
+	vgpu_gm20b_init_gr_ops(gops);
+}
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.h
new file mode 100644
index 00000000..b3be49a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VGPU_GR_GP10B_H__
+#define __VGPU_GR_GP10B_H__
+
+#include "gk20a/gk20a.h"
+
+void vgpu_gp10b_init_gr_ops(struct gpu_ops *gops);
+
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
new file mode 100644
index 00000000..14a7768a
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu/vgpu.h"
+#include "gp10b/hal_gp10b.h"
+#include "vgpu_gr_gp10b.h"
+#include "vgpu_fifo_gp10b.h"
+#include "vgpu_mm_gp10b.h"
+
+int vgpu_gp10b_init_hal(struct gk20a *g)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gp10b_init_hal(g);
+	if (err)
+		return err;
+
+	vgpu_init_hal_common(g);
+	vgpu_gp10b_init_gr_ops(&g->ops);
+	vgpu_gp10b_init_fifo_ops(&g->ops);
+	vgpu_gp10b_init_mm_ops(&g->ops);
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
new file mode 100644
index 00000000..0a769e94
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -0,0 +1,172 @@
+/*
+ * Virtualized GPU Memory Management
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include "vgpu/vgpu.h"
+#include "vgpu_mm_gp10b.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+static int vgpu_gp10b_init_mm_setup_hw(struct gk20a *g)
+{
+	g->mm.bypass_smmu = true;
+	g->mm.disable_bigpage = true;
+	return 0;
+}
+
+static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
+				u64 addr, u64 size, size_t *oob_size)
+{
+	if (*oob_size < sizeof(*mem_desc))
+		return -ENOMEM;
+
+	mem_desc->addr = addr;
+	mem_desc->length = size;
+	*oob_size -= sizeof(*mem_desc);
+	return 0;
+}
+
+static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
+				u64 map_offset,
+				struct sg_table *sgt,
+				u64 buffer_offset,
+				u64 size,
+				int pgsz_idx,
+				u8 kind_v,
+				u32 ctag_offset,
+				u32 flags,
+				int rw_flag,
+				bool clear_ctags,
+				bool sparse,
+				bool priv,
+				struct vm_gk20a_mapping_batch *batch)
+{
+	int err = 0;
+	struct device *d = dev_from_vm(vm);
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
+	struct tegra_vgpu_mem_desc *mem_desc;
+	u32 page_size  = vm->gmmu_page_sizes[pgsz_idx];
+	u64 space_to_skip = buffer_offset;
+	u64 buffer_size = 0;
+	u32 mem_desc_count = 0;
+	struct scatterlist *sgl;
+	void *handle = NULL;
+	size_t oob_size;
+	u8 prot;
+
+	gk20a_dbg_fn("");
+
+	/* FIXME: add support for sparse mappings */
+
+	if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
+		return -EINVAL;
+
+	if (space_to_skip & (page_size - 1))
+		return -EINVAL;
+
+	/* Allocate (or validate when map_offset != 0) the virtual address. */
+	if (!map_offset) {
+		map_offset = gk20a_vm_alloc_va(vm, size, pgsz_idx);
+		if (!map_offset) {
+			gk20a_err(d, "failed to allocate va space");
+			err = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	handle = tegra_gr_comm_oob_get_ptr(TEGRA_GR_COMM_CTX_CLIENT,
+					tegra_gr_comm_get_server_vmid(),
+					TEGRA_VGPU_QUEUE_CMD,
+					(void **)&mem_desc, &oob_size);
+	if (!handle) {
+		err = -EINVAL;
+		goto fail;
+	}
+
+	sgl = sgt->sgl;
+	while (space_to_skip && sgl &&
+		(space_to_skip + page_size > sgl->length)) {
+		space_to_skip -= sgl->length;
+		sgl = sg_next(sgl);
+	}
+	WARN_ON(!sgl);
+
+	if (add_mem_desc(&mem_desc[mem_desc_count++],
+			sg_phys(sgl) + space_to_skip,
+			sgl->length - space_to_skip,
+			&oob_size)) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	buffer_size += sgl->length - space_to_skip;
+
+	sgl = sg_next(sgl);
+	while (sgl && buffer_size < size) {
+		if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl),
+				sgl->length, &oob_size)) {
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		buffer_size += sgl->length;
+		sgl = sg_next(sgl);
+	}
+
+	if (rw_flag == gk20a_mem_flag_read_only)
+		prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+	else if (rw_flag == gk20a_mem_flag_write_only)
+		prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
+	else
+		prot = TEGRA_VGPU_MAP_PROT_NONE;
+
+	msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX;
+	msg.handle = platform->virt_handle;
+	p->handle = vm->handle;
+	p->gpu_va = map_offset;
+	p->size = size;
+	p->mem_desc_count = mem_desc_count;
+	p->pgsz_idx = pgsz_idx;
+	p->iova = 0;
+	p->kind = kind_v;
+	p->cacheable =
+		(flags & NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
+	p->prot = prot;
+	p->ctag_offset = ctag_offset;
+	p->clear_ctags = clear_ctags;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	if (err || msg.ret)
+		goto fail;
+
+	/* TLB invalidate handled on server side */
+
+	tegra_gr_comm_oob_put_ptr(handle);
+	return map_offset;
+fail:
+	if (handle)
+		tegra_gr_comm_oob_put_ptr(handle);
+	gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
+	return 0;
+}
+
+void vgpu_gp10b_init_mm_ops(struct gpu_ops *gops)
+{
+	gk20a_dbg_fn("");
+
+	gops->mm.gmmu_map = vgpu_gp10b_locked_gmmu_map;
+	gops->mm.init_mm_setup_hw = vgpu_gp10b_init_mm_setup_hw;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.h b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.h
new file mode 100644
index 00000000..5bdc9d1b
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VGPU_MM_GP10B_H__
+#define __VGPU_MM_GP10B_H__
+
+#include "gk20a/gk20a.h"
+
+void vgpu_gp10b_init_mm_ops(struct gpu_ops *gops);
+
+#endif
-- 
cgit v1.2.2


From 3e3d83aff1688cb4e5560799de3deb68bd3617c4 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Fri, 15 May 2015 17:03:18 +0530
Subject: gpu: nvgpu: enable gp10b rail calls to bpmp

Bug 200086985

Change-Id: I9eaa135b96629636a6b949ae1e3874dd3abd5138
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/794723
Reviewed-on: http://git-master/r/743217
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index efecb1e3..44b27fe7 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -63,17 +63,28 @@ static int gp10b_tegra_late_probe(struct platform_device *pdev)
 
 static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
 {
-	return false;
+	bool ret = false;
+
+	if (!tegra_platform_is_linsim())
+		ret = !tegra_powergate_is_powered(TEGRA_POWERGATE_GPU);
+
+	return ret;
 }
 
 static int gp10b_tegra_railgate(struct platform_device *pdev)
 {
+	if (!tegra_platform_is_linsim() &&
+	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
+		tegra_powergate_partition(TEGRA_POWERGATE_GPU);
 	return 0;
 }
 
 static int gp10b_tegra_unrailgate(struct platform_device *pdev)
 {
-	return 0;
+	int ret = 0;
+	if (!tegra_platform_is_linsim())
+		ret = tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
+	return ret;
 }
 
 static int gp10b_tegra_suspend(struct device *dev)
@@ -85,6 +96,11 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.has_syncpoints = true,
 
 	/* power management configuration */
+	.railgate_delay		= 500,
+	.clockgate_delay	= 50,
+
+	/* power management configuration */
+	.can_railgate           = false,
 	.enable_elpg            = false,
 
 	.probe = gp10b_tegra_probe,
-- 
cgit v1.2.2


From 6434195dc75df5d574a9ae4f5535924bf4704aec Mon Sep 17 00:00:00 2001
From: Kirill Artamonov <kartamonov@nvidia.com>
Date: Tue, 4 Aug 2015 16:50:15 +0300
Subject: gpu: nvgpu: gp10b: update headers

Add counters for GFXP, WFI, CTA and CILP context switches

bug 1525327
bug 1581799

Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Change-Id: Ifd6ee08af8a83ed827a8996725139416d81ca10e
Reviewed-on: http://git-master/r/794977
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/778761
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 3b97c9da..f358d405 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -118,6 +118,22 @@ static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
 {
 	return 0x000000f4;
 }
+static inline u32 ctxsw_prog_main_image_num_wfi_save_ops_o(void)
+{
+	return 0x000000d0;
+}
+static inline u32 ctxsw_prog_main_image_num_cta_save_ops_o(void)
+{
+	return 0x000000d4;
+}
+static inline u32 ctxsw_prog_main_image_num_gfxp_save_ops_o(void)
+{
+	return 0x000000d8;
+}
+static inline u32 ctxsw_prog_main_image_num_cilp_save_ops_o(void)
+{
+	return 0x000000dc;
+}
 static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
 {
 	return 0x000000f8;
-- 
cgit v1.2.2


From 3b08d73568ddaf0dec2c2abe8e813672da2463ae Mon Sep 17 00:00:00 2001
From: Kirill Artamonov <kartamonov@nvidia.com>
Date: Sun, 25 Jan 2015 18:42:18 +0200
Subject: gpu: nvgpu: gp10b: add debug features for gfxp and cilp

Add debugfs switch to force cilp and gfx preemption
Add debugfs switch to dump context switch stats on channel
destruction.

bug 1525327
bug 1581799

Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Change-Id: I7d0558cc325ce655411388ea66ad982101f2fe66
Reviewed-on: http://git-master/r/794976
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/677231
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c             | 47 ++++++++++++++++++++++++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h             |  8 ++++-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 24 +++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c9b870c7..e727ee99 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -25,6 +25,7 @@
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 #include "hw_mc_gp10b.h"
+#include <linux/vmalloc.h>
 
 static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
@@ -482,6 +483,13 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
+	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
+
+	if (class == PASCAL_COMPUTE_A &&
+			g->gr.t18x.ctx_vars.force_preemption_cilp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
+
 	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
@@ -531,7 +539,7 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			goto fail_free_betacb;
 		}
 
-		(*gr_ctx)->preempt_mode = flags;
+		(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
@@ -558,6 +566,38 @@ fail_free_gk20a_ctx:
 	return err;
 }
 
+static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
+		  struct gr_ctx_desc *gr_ctx) {
+	void *ctx_ptr = vmap(gr_ctx->mem.pages,
+		PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT,
+		0, pgprot_writecombine(PAGE_KERNEL));
+	if (!ctx_ptr) {
+		WARN_ON("Cannot map context");
+		return;
+	}
+	gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_wfi_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_cta_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_num_cilp_save_ops_o(), 0));
+	gk20a_err(dev_from_gk20a(g),
+		"image gfx preemption option (GFXP is 1) %x\n",
+		gk20a_mem_rd32(ctx_ptr +
+			ctxsw_prog_main_image_graphics_preemption_options_o(),
+			0));
+	vunmap(ctx_ptr);
+}
+
 static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 			  struct gr_ctx_desc *gr_ctx)
 {
@@ -566,15 +606,18 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	if (!gr_ctx)
 		return;
 
+	if (g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close)
+		dump_ctx_switch_stats(g, vm, gr_ctx);
+
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
 	gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
-
 	gk20a_dbg_fn("done");
 }
 
+
 static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		struct channel_ctx_gk20a *ch_ctx,
 		void *ctx_ptr)
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 7c3ddf27..370e0ea3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -1,7 +1,7 @@
 /*
  * GM20B GPU GR
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -36,6 +36,12 @@ void gp10b_init_gr(struct gpu_ops *ops);
 struct gr_t18x {
 	struct {
 		u32 preempt_image_size;
+		u32 force_preemption_gfxp;
+		u32 force_preemption_cilp;
+		u32 dump_ctxsw_stats_on_channel_close;
+		struct dentry *debugfs_force_preemption_cilp;
+		struct dentry *debugfs_force_preemption_gfxp;
+		struct dentry *debugfs_dump_ctxsw_stats;
 	} ctx_vars;
 };
 
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 44b27fe7..2f81378d 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -53,6 +53,30 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
+	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+
+	platform->g->gr.t18x.ctx_vars.force_preemption_gfxp = false;
+	platform->g->gr.t18x.ctx_vars.force_preemption_cilp = false;
+
+	platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
+		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.ctx_vars.force_preemption_gfxp);
+
+	platform->g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
+		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.ctx_vars.force_preemption_cilp);
+
+	platform->g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
+		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+			S_IRUGO|S_IWUSR,
+			platform->debugfs,
+			&platform->g->gr.t18x.
+				ctx_vars.dump_ctxsw_stats_on_channel_close);
 	return 0;
 }
 
-- 
cgit v1.2.2


From e51dfa9d61048d0c38e93e3873aa7c74b922a3a9 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Tue, 18 Aug 2015 12:49:38 +0530
Subject: gpu: nvgpu: gp10b: Use clock API to enable clocks

Use CCF to enable GPU clocks. Keep an extra reference to prevent
runtime PM callbacks from disabling clocks while GPU is powered up.

Bug 1673672

Change-Id: I8c34be5ec338fedea62aa3e05bd6bed0513bf1b6
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/788814
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-on: http://git-master/r/785265
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 66 +++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 2f81378d..fbca62b0 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -28,6 +28,49 @@
 #include "gk20a/gk20a.h"
 #include "platform_tegra.h"
 
+static struct {
+	char *name;
+	unsigned long default_rate;
+} tegra_gp10b_clocks[] = {
+	{"gpu", 1900000000},
+	{"gpu_sys", 204000000} };
+
+/*
+ * gp10b_tegra_get_clocks()
+ *
+ * This function finds clocks in tegra platform and populates
+ * the clock information to gp10b platform data.
+ */
+
+static int gp10b_tegra_get_clocks(struct platform_device *pdev)
+{
+	struct gk20a_platform *platform = platform_get_drvdata(pdev);
+	struct gk20a *g = get_gk20a(pdev);
+	struct device *dev = dev_from_gk20a(g);
+	int i;
+
+	if (tegra_platform_is_linsim())
+		return 0;
+
+	platform->num_clks = 0;
+	for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
+		long rate = tegra_gp10b_clocks[i].default_rate;
+		struct clk *c;
+
+		c = clk_get(dev, tegra_gp10b_clocks[i].name);
+		if (IS_ERR(c)) {
+			gk20a_err(&pdev->dev, "cannot get clock %s",
+					tegra_gp10b_clocks[i].name);
+		} else {
+			clk_set_rate(c, rate);
+			platform->clk[i] = c;
+		}
+	}
+	platform->num_clks = i;
+
+	return 0;
+}
+
 static int gp10b_tegra_probe(struct platform_device *pdev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(pdev);
@@ -77,6 +120,9 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 			platform->debugfs,
 			&platform->g->gr.t18x.
 				ctx_vars.dump_ctxsw_stats_on_channel_close);
+
+	gp10b_tegra_get_clocks(pdev);
+
 	return 0;
 }
 
@@ -97,17 +143,33 @@ static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
 
 static int gp10b_tegra_railgate(struct platform_device *pdev)
 {
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+
 	if (!tegra_platform_is_linsim() &&
-	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU))
+	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) {
+		int i;
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_disable_unprepare(platform->clk[i]);
+		}
 		tegra_powergate_partition(TEGRA_POWERGATE_GPU);
+	}
 	return 0;
 }
 
 static int gp10b_tegra_unrailgate(struct platform_device *pdev)
 {
 	int ret = 0;
-	if (!tegra_platform_is_linsim())
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+
+	if (!tegra_platform_is_linsim()) {
+		int i;
 		ret = tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_prepare_enable(platform->clk[i]);
+		}
+	}
 	return ret;
 }
 
-- 
cgit v1.2.2


From 1ef64423f91a4add0351bed5bf55577768ccebf2 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 21 Sep 2015 14:57:54 -0700
Subject: gpu: nvgpu: ELPG init & statistics update

- Required init param to start elpg
- change in statistics dump

Bug  1684939

Change-Id: Icc482c08303d0870ec2e1c18a845074968b15e77
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/802455
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/806194
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 56 +++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index a4d7a0f7..6832bf41 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -202,6 +202,60 @@ static int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
 	return 0;
 }
 
+static void pmu_handle_gr_param_msg(struct gk20a *g, struct pmu_msg *msg,
+			void *param, u32 handle, u32 status)
+{
+	gk20a_dbg_fn("");
+
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g), "GR PARAM cmd aborted");
+		/* TBD: disable ELPG */
+		return;
+	}
+
+	gp10b_dbg_pmu("GR PARAM is acknowledged from PMU %x \n",
+			msg->msg.pg.msg_type);
+
+	return;
+}
+
+static int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_cmd cmd;
+	u32 seq;
+
+	memset(&cmd, 0, sizeof(struct pmu_cmd));
+	cmd.hdr.unit_id = PMU_UNIT_PG;
+	cmd.hdr.size = PMU_CMD_HDR_SIZE +
+			sizeof(struct pmu_pg_cmd_gr_init_param);
+	cmd.cmd.pg.gr_init_param.cmd_type =
+			PMU_PG_CMD_ID_PG_PARAM;
+	cmd.cmd.pg.gr_init_param.sub_cmd_id =
+			PMU_PG_PARAM_CMD_GR_INIT_PARAM;
+	cmd.cmd.pg.gr_init_param.featuremask =
+			grfeaturemask;
+
+	gp10b_dbg_pmu("cmd post PMU_PG_CMD_ID_PG_PARAM %x", grfeaturemask);
+	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+			pmu_handle_gr_param_msg, pmu, &seq, ~0);
+
+	return 0;
+}
+void gp10b_pmu_elpg_statistics(struct gk20a *g,
+		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_pg_stats_v1 stats;
+
+	pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
+		(u8 *)&stats, sizeof(struct pmu_pg_stats_v1), 0);
+
+	*ingating_time = stats.total_sleep_timeus;
+	*ungating_time = stats.total_nonsleep_timeus;
+	*gating_cnt = stats.entry_count;
+}
+
 static int gp10b_pmu_setup_elpg(struct gk20a *g)
 {
 	int ret = 0;
@@ -249,4 +303,6 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.lspmuwprinitdone = false;
 	gops->pmu.fecsbootstrapdone = false;
 	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
+	gops->pmu.pmu_elpg_statistics = gp10b_pmu_elpg_statistics;
+	gops->pmu.pmu_pg_grinit_param = gp10b_pg_gr_init;
 }
-- 
cgit v1.2.2


From 0e6a87cf229831b302d667df6aed052e726fd3c6 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 1 Sep 2015 13:54:49 -0700
Subject: gpu: nvgpu: gp10b: Fix CB size for GfxP

Program correct CB size for GfxP channels. We were accidentally
using the context image size.

Change-Id: I273215256e41e89b7d76f3294a73641804beeb79
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/792713
Reviewed-on: http://git-master/r/806188
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e727ee99..b614da64 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -902,7 +902,7 @@ static void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
 	int attrBufferSize;
 
 	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va)
-		attrBufferSize = gr_ctx->t18x.preempt_ctxsw_buffer.size;
+		attrBufferSize = gr_ctx->t18x.betacb_ctxsw_buffer.size;
 	else
 		attrBufferSize = g->ops.gr.calc_global_ctx_buffer_size(g);
 
-- 
cgit v1.2.2


From 0b0ce7de093facfcd2e56ba6f47faef6a8ab025a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 7 Sep 2015 08:24:09 -0700
Subject: gpu: nvgpu: gp10b: Fix steady state beta CB size

We program the default steady state beta CB size. The default is
for deep binning, but we've disabled deep binning. As result steady
state CB size was left too high.

Bug 1683535

Change-Id: I17029078d9c83e55eec6faacfc83c6d812f8c3c0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/795306
Reviewed-on: http://git-master/r/806189
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index b614da64..dff8adae 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -113,7 +113,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
 				proj_ppc_in_gpc_stride_v() * ppc_index,
-				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_default_v(),
+				gr->attrib_cb_default_size,
 				patch);
 
 			attrib_offset_in_chunk += gr->attrib_cb_size *
-- 
cgit v1.2.2


From 177a9716e2324f428d3aa72b427461228c06bd7e Mon Sep 17 00:00:00 2001
From: Leonid Moiseichuk <lmoiseichuk@nvidia.com>
Date: Wed, 2 Sep 2015 10:41:23 +0300
Subject: gpu: nvgpu: enabling cyclestats for gp10b

Enabling cyclestats and cyclestats snapshot support for gp10b (t186) devices.

Bug 1674079

Change-Id: I2e14801de3c61d180630bb9dcd2c607749814893
Signed-off-by: Leonid Moiseichuk <lmoiseichuk@nvidia.com>
Reviewed-on: http://git-master/r/792953
Reviewed-on: http://git-master/r/806190
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index dff8adae..c339b14d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -977,6 +977,18 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
+static void gr_gp10b_init_cyclestats(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	g->gpu_characteristics.flags |=
+		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+	g->gpu_characteristics.flags |=
+		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
+#else
+	(void)g;
+#endif
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1004,4 +1016,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 		gr_gp10b_update_ctxsw_preemption_mode;
 	gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
 	gops->gr.wait_empty = gr_gp10b_wait_empty;
+	gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
 }
-- 
cgit v1.2.2


From c9da53da4e595271fc6a9639cf1907d84061e356 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 2 Oct 2015 17:27:32 +0530
Subject: gpu: nvgpu: fix sparse warning

fix below sparse warning
drivers/gpu/nvgpu/gm20b/gr_gm20b.c:1055:6: warning: symbol
'gr_gm20b_enable_cde_in_fecs' was not declared. Should it be static?

Bug 200088648

Change-Id: I862100d76f2ed5669d15a8f3b8cb9211df7f98ee
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/810394
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 6832bf41..77727ff2 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -242,7 +242,7 @@ static int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask)
 
 	return 0;
 }
-void gp10b_pmu_elpg_statistics(struct gk20a *g,
+static void gp10b_pmu_elpg_statistics(struct gk20a *g,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
-- 
cgit v1.2.2


From d1331bd07d9e9b9c6432ec9406db29e197eabd8a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 21 Sep 2015 11:10:30 -0700
Subject: gpu: nvgpu: gp10b: Implement SetCoalesceBufferSize

Implement method for setting the coalesce buffer size at runtime.

Bug 1681992

Change-Id: Ice6c00a27f642c2d68d6cd0e30c12df2e48f5374
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/802366
(cherry picked from commit bd763bc8a16b80ccc8f79b2229eccf2fe2417611)
Reviewed-on: http://git-master/r/808239
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 17 +++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h    |  1 +
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 12 ++++++++++++
 3 files changed, 30 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c339b14d..1a50d29a 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -290,6 +290,20 @@ static void gr_gp10b_set_go_idle_timeout(struct gk20a *g, u32 data)
 	gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
 }
 
+static void gr_gp10b_set_coalesce_buffer_size(struct gk20a *g, u32 data)
+{
+	u32 val;
+
+	gk20a_dbg_fn("");
+
+	val = gk20a_readl(g, gr_gpcs_tc_debug0_r());
+	val = set_field(val, gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(),
+			     gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(data));
+	gk20a_writel(g, gr_gpcs_tc_debug0_r(), val);
+
+	gk20a_dbg_fn("done");
+}
+
 static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
 				     u32 class_num, u32 offset, u32 data)
 {
@@ -319,6 +333,9 @@ static int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
 		case NVC097_SET_GO_IDLE_TIMEOUT:
 			gr_gp10b_set_go_idle_timeout(g, data);
 			break;
+		case NVC097_SET_COALESCE_BUFFER_SIZE:
+			gr_gp10b_set_coalesce_buffer_size(g, data);
+			break;
 		default:
 			goto fail;
 		}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 370e0ea3..bf49ba6f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -27,6 +27,7 @@ enum {
 
 #define NVC097_SET_GO_IDLE_TIMEOUT		0x022c
 #define NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE	0x02dc
+#define NVC097_SET_COALESCE_BUFFER_SIZE		0x1028
 #define NVC097_SET_CIRCULAR_BUFFER_SIZE		0x1280
 #define NVC097_SET_SHADER_EXCEPTIONS		0x1528
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 6e4f7d1a..e33f0734 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -3786,4 +3786,16 @@ static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f(void)
 {
 	return 0x1000;
 }
+static inline u32 gr_gpcs_tc_debug0_r(void)
+{
+	return 0x00418708;
+}
+static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
+{
+	return 0xff << 0;
+}
 #endif
-- 
cgit v1.2.2


From d3c12a335d5e04bdfabfe07877695085c4f26612 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 10 Sep 2015 21:54:35 +0530
Subject: gpu: nvgpu: implement reset_assert/deassert for gp10b

Implement platform specific reset_assert() and reset_deassert()
calls for gp10b

These APIs will in turn will use reset_control APIs to do
their work

Also, set force_reset_in_do_idle = true for gp10b, since
railgating is not supported yet

Bug 200137963

Change-Id: I2c0fe1273d3ecfd0c46704a44374712052ff51d6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/797150
(cherry picked from commit 6ac04ca84cee8a4d3b089678c81534799880712d)
Reviewed-on: http://git-master/r/808240
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index fbca62b0..a4348f6c 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -24,6 +24,7 @@
 #include <linux/dma-buf.h>
 #include <linux/nvmap.h>
 #include <linux/tegra_pm_domains.h>
+#include <linux/reset.h>
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
 #include "platform_tegra.h"
@@ -178,6 +179,32 @@ static int gp10b_tegra_suspend(struct device *dev)
 	return 0;
 }
 
+static int gp10b_tegra_reset_assert(struct platform_device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int ret = 0;
+
+	if (!platform->reset_control)
+		return -EINVAL;
+
+	ret = reset_control_assert(platform->reset_control);
+
+	return ret;
+}
+
+static int gp10b_tegra_reset_deassert(struct platform_device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int ret = 0;
+
+	if (!platform->reset_control)
+		return -EINVAL;
+
+	ret = reset_control_deassert(platform->reset_control);
+
+	return ret;
+}
+
 struct gk20a_platform t18x_gpu_tegra_platform = {
 	.has_syncpoints = true,
 
@@ -209,4 +236,9 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.secure_alloc = gk20a_tegra_secure_alloc,
 	.secure_page_alloc = gk20a_tegra_secure_page_alloc,
+
+	.reset_assert = gp10b_tegra_reset_assert,
+	.reset_deassert = gp10b_tegra_reset_deassert,
+
+	.force_reset_in_do_idle = true,
 };
-- 
cgit v1.2.2


From f2b4fcdce747015dddddc35dc96eb5d3a4a2bc5d Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 10 Sep 2015 17:56:39 +0530
Subject: gpu: nvgpu: implement set_gpc_tpc_mask for gp10b

Bug 200137963

Change-Id: Ibd09b206620e6d6826586bb40e1125fc178dd8e4
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/797151
(cherry picked from commit 343c4704564f4b4f22a943a94e66d2c83f63a28f)
Reviewed-on: http://git-master/r/808241
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 1a50d29a..d61ac5bb 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -15,6 +15,7 @@
 
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
 #include <linux/delay.h>
+#include <linux/tegra-fuse.h>
 
 #include "gk20a/gr_gk20a.h"
 
@@ -1006,6 +1007,19 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
 #endif
 }
 
+static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+{
+	tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0);
+	tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0);
+
+	if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
+		tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
+	else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
+		tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
+	else
+		tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1034,4 +1048,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs;
 	gops->gr.wait_empty = gr_gp10b_wait_empty;
 	gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
+	gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
 }
-- 
cgit v1.2.2


From 040d71d07bdad49bbd2aac4564f0ea33800fa595 Mon Sep 17 00:00:00 2001
From: Jussi Rasanen <jrasanen@nvidia.com>
Date: Tue, 29 Sep 2015 13:21:05 +0300
Subject: gpu: nvgpu: fix ctag computation overflow with 8GB

Bug 1689976

Change-Id: Ibf1c296fac4f2a2c6fcf062cbd80b3526a4fd4ed
Signed-off-by: Jussi Rasanen <jrasanen@nvidia.com>
Reviewed-on: http://git-master/r/806588
(cherry picked from commit 24b57989dc9636b41004bac32ee56dce90318350)
Reviewed-on: http://git-master/r/808242
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0c76abb5..ae9c5c7e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -152,7 +152,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   struct scatterlist **sgl,
 			   u64 *offset,
 			   u64 *iova,
-			   u32 kind_v, u32 *ctag,
+			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, bool priv)
 {
@@ -193,7 +193,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 			   struct scatterlist **sgl,
 			   u64 *offset,
 			   u64 *iova,
-			   u32 kind_v, u32 *ctag,
+			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, bool priv)
 {
@@ -249,13 +249,13 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   struct scatterlist **sgl,
 			   u64 *offset,
 			   u64 *iova,
-			   u32 kind_v, u32 *ctag,
+			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, bool priv)
 {
 	struct gk20a *g = vm->mm->g;
 	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
-	u32 ctag_granularity = g->ops.fb.compression_page_size(g);
+	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
 	u32 pte_w[2] = {0, 0}; /* invalid pte */
 
 	gk20a_dbg_fn("");
@@ -274,7 +274,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 
 		pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) |
 			   gmmu_new_pte_kind_f(kind_v) |
-			   gmmu_new_pte_comptagline_f(*ctag / ctag_granularity);
+			   gmmu_new_pte_comptagline_f((u32)(*ctag / ctag_granularity));
 
 		if (rw_flag == gk20a_mem_flag_read_only)
 			pte_w[0] |= gmmu_new_pte_read_only_true_f();
@@ -287,7 +287,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   " ctag=%d vol=%d"
 			   " [0x%08x, 0x%08x]",
 			   i, *iova,
-			   kind_v, *ctag / ctag_granularity, !cacheable,
+			   kind_v, (u32)(*ctag / ctag_granularity), !cacheable,
 			   pte_w[1], pte_w[0]);
 
 		if (*ctag)
-- 
cgit v1.2.2


From c6766cc798efe070eac8944d686e27592661e849 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Fri, 13 Feb 2015 17:11:37 +0530
Subject: gpu :nvgpu: gp10b: add ptimer scaling factor as 1x

bug 1603226

t18x fixes ptimer bug and ticks at 1ns.

Change-Id: I590c94957c93adf70263f81a0cdfcb8dc913639e
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/799989
(cherry picked from commit 44866e195113b0a44ed2513a81dcaaf079c2a5f1)
Reviewed-on: http://git-master/r/707810
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index a4348f6c..c28c71b5 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -216,6 +216,9 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.can_railgate           = false,
 	.enable_elpg            = false,
 
+	/* ptimer scaling constant */
+	.ptimerscaling10x	= 10,
+
 	.probe = gp10b_tegra_probe,
 	.late_probe = gp10b_tegra_late_probe,
 
-- 
cgit v1.2.2


From 9fb5c25782af6ef1e4a60056afbce7a7e1bff46c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 25 Sep 2015 17:32:29 -0700
Subject: gpu: nvgpu: gp10b: update slcg xbar prod settings

Bug 1689806

Change-Id: I98ca5fe006ecdf056ac45b15b2dc128929ea4fd5
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/806115
(cherry picked from commit fc15b029187db4f2aba213e89672bd84b5d020cd)
Reviewed-on: http://git-master/r/805482
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 70486c4a..381ee8b1 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -84,6 +84,8 @@ static struct gpu_ops gp10b_ops = {
 			gp10b_blcg_pwr_csb_load_gating_prod,
 		.blcg_pmu_load_gating_prod =
 			gp10b_blcg_pmu_load_gating_prod,
+		.blcg_xbar_load_gating_prod =
+			gp10b_blcg_xbar_load_gating_prod,
 		.pg_gr_load_gating_prod =
 			gr_gp10b_pg_gr_load_gating_prod,
 	}
-- 
cgit v1.2.2


From 1ba28cf44d91c0d3c83372ea2f248b5588893f6a Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 29 Sep 2015 12:39:57 -0700
Subject: gpu: nvgpu: gp10b: Force always SMMU bypass

Bug 1688709

Change-Id: If778034225dabbd0f9e6ff843ea6f06011c432bd
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/807030
(cherry picked from commit 32f03899ca689f6af12760afe04cf4c8e60ebba1)
Reviewed-on: http://git-master/r/808243
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index c28c71b5..eb99a66c 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -94,7 +94,7 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
-	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
+	platform->bypass_smmu = 1;
 	platform->disable_bigpage = platform->bypass_smmu;
 
 	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
-- 
cgit v1.2.2


From 6f4d1bb2e70e97424d44cba131199b41deb102d8 Mon Sep 17 00:00:00 2001
From: Matt Craighead <mcraighead@nvidia.com>
Date: Tue, 6 Oct 2015 17:22:08 -0500
Subject: gpu: nvgpu: gp10b: skip powergate if no BPMP

The powergating APIs only work if the BPMP is running.  Skip
these calls if it's not available, instead of relying on
is_linsim, which doesn't work under all environments.

Change-Id: I34325847b2ebf33c5db2f31111c57d22ed28ef53
Signed-off-by: Matt Craighead <mcraighead@nvidia.com>
Reviewed-on: http://git-master/r/812415
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index eb99a66c..559cad2e 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -25,6 +25,7 @@
 #include <linux/nvmap.h>
 #include <linux/tegra_pm_domains.h>
 #include <linux/reset.h>
+#include <soc/tegra/tegra_bpmp.h>
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
 #include "platform_tegra.h"
@@ -136,7 +137,7 @@ static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
 {
 	bool ret = false;
 
-	if (!tegra_platform_is_linsim())
+	if (tegra_bpmp_running())
 		ret = !tegra_powergate_is_powered(TEGRA_POWERGATE_GPU);
 
 	return ret;
@@ -146,7 +147,7 @@ static int gp10b_tegra_railgate(struct platform_device *pdev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(pdev);
 
-	if (!tegra_platform_is_linsim() &&
+	if (tegra_bpmp_running() &&
 	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) {
 		int i;
 		for (i = 0; i < platform->num_clks; i++) {
@@ -163,7 +164,7 @@ static int gp10b_tegra_unrailgate(struct platform_device *pdev)
 	int ret = 0;
 	struct gk20a_platform *platform = gk20a_get_platform(pdev);
 
-	if (!tegra_platform_is_linsim()) {
+	if (tegra_bpmp_running()) {
 		int i;
 		ret = tegra_unpowergate_partition(TEGRA_POWERGATE_GPU);
 		for (i = 0; i < platform->num_clks; i++) {
-- 
cgit v1.2.2


From 8d864432f5b411d6aaade01520c17c97c6282ac3 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 5 Oct 2015 11:48:20 +0530
Subject: gpu: nvgpu: set wdt timeout for gp10b

set platform specific channel watchdog timeout to 5s
for gp10b

Bug 200133289

Change-Id: I4478463e22a8167c2fc1235dd9a80e069a27b47c
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/811509
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 559cad2e..5dc8e33d 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -220,6 +220,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	/* ptimer scaling constant */
 	.ptimerscaling10x	= 10,
 
+	.ch_wdt_timeout_ms = 5000,
+
 	.probe = gp10b_tegra_probe,
 	.late_probe = gp10b_tegra_late_probe,
 
-- 
cgit v1.2.2


From 8066fc9b7be169e29f294a34eaa6e699f13baa5d Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 6 Oct 2015 09:37:11 -0700
Subject: gpu:nvgpu: gp10b: modify gpmu hw init

Modify gpmu hwinit to take gp10b specific register offsets in
non-secure GPMU boot path.

Bug 1685722

Change-Id: Id6696fb20c4fd40ee1b168c952a438771721c792
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/812271
(cherry picked from commit b9408892dd08beca5f4b2e056287a2bc28ccff0e)
Reviewed-on: http://git-master/r/813979
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 39 +++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 77727ff2..df515d1b 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -286,6 +286,44 @@ static void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
 				0x0);
 }
 
+static int gp10b_init_pmu_setup_hw1(struct gk20a *g)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	mutex_lock(&pmu->isr_mutex);
+	pmu_reset(pmu);
+	pmu->isr_enabled = true;
+	mutex_unlock(&pmu->isr_mutex);
+
+	/* setup apertures - virtual */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+		pwr_fbif_transcfg_mem_type_virtual_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+		pwr_fbif_transcfg_mem_type_virtual_f());
+
+	/* setup apertures - physical */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+		pwr_fbif_transcfg_mem_type_physical_f() |
+		pwr_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+		pwr_fbif_transcfg_mem_type_physical_f() |
+		pwr_fbif_transcfg_target_coherent_sysmem_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+		pwr_fbif_transcfg_mem_type_physical_f() |
+		pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+
+	err = pmu_bootstrap(pmu);
+	if (err)
+		return err;
+
+	gk20a_dbg_fn("done");
+	return 0;
+
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
@@ -299,6 +337,7 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 	}
+	gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
 	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
 	gops->pmu.lspmuwprinitdone = false;
 	gops->pmu.fecsbootstrapdone = false;
-- 
cgit v1.2.2


From 520b461aa7b1befdeee9d4226904f2a1ed370e82 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 6 Oct 2015 16:07:57 -0700
Subject: gpu: nvgpu: gp10b: enable dma for firmware loading

Bug 1692799

Change-Id: Idf825c954c646f649d85b8fa7f76b5b45150bfe5
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/812442
(cherry picked from commit f72c0738238c3f9a034c6a8b064226f0d7d5dd63)
Reviewed-on: http://git-master/r/813978
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
index b50698e0..b2956257 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
@@ -69,5 +69,5 @@ static bool gr_gp10b_is_firmware_defined(void)
 void gp10b_init_gr_ctx(struct gpu_ops *gops) {
 	gops->gr_ctx.get_netlist_name = gr_gp10b_get_netlist_name;
 	gops->gr_ctx.is_fw_defined = gr_gp10b_is_firmware_defined;
-	gops->gr_ctx.use_dma_for_fw_bootstrap = false;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 }
-- 
cgit v1.2.2


From a982fab35152126e1ea072e40441a7e869bbbfff Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 18 Sep 2015 14:44:36 -0700
Subject: gpu: nvgpu: gp10b: Fix pagepool max size

If pagepool size equals max we should use zero. Add the comparison
to do that.

Bug 1686189

Change-Id: I15bd43663550b1089a726c0256b89f849c193e21
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/801526
(cherry picked from commit 9d89ea5ba345b19d2cff86130ba9d3c4c5f07e6e)
Reviewed-on: http://git-master/r/815681
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index d61ac5bb..c5f45816 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -684,6 +684,10 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 			(u64_hi32(gr_ctx->t18x.pagepool_ctxsw_buffer.gpu_va) <<
 			 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
 		size = gr_ctx->t18x.pagepool_ctxsw_buffer.size;
+
+		if (size == g->ops.gr.pagepool_default_size(g))
+			size = gr_scc_pagepool_total_pages_hwmax_v();
+
 		g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true);
 
 		addr = (u64_lo32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) >>
-- 
cgit v1.2.2


From 959756873a2445c024df2f27c316b606a59e7e59 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 18 Sep 2015 08:16:23 -0700
Subject: gpu: nvgpu: gp10b: Fix spill buffer size

Spill buffer size is in chunks of 256B. Multiply the size by
granularity to get the size in bytes.

Bug 1686189

Change-Id: I0462293668322645bd1eab190c12faaeb6c316c1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/801344
(cherry picked from commit 4bf6de7d9c9014a9eaeff56b19437d1841d7cfb0)
Reviewed-on: http://git-master/r/815680
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 6 ++++--
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c5f45816..61ecddef 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -510,7 +510,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 
 	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
 		u32 spill_size =
-			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v();
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
 		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
 			gr_scc_pagepool_total_pages_byte_granularity_v();
 		u32 betacb_size = g->gr.attrib_cb_default_size +
@@ -694,7 +695,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 			gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
 			(u64_hi32(gr_ctx->t18x.spill_ctxsw_buffer.gpu_va) <<
 			 (32 - gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()));
-		size = gr_ctx->t18x.spill_ctxsw_buffer.size;
+		size = gr_ctx->t18x.spill_ctxsw_buffer.size /
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
 
 		gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_swdx_rm_spill_buffer_addr_r(),
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index e33f0734..0aa68fa2 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2510,6 +2510,10 @@ static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
 {
 	return 0x00000250;
 }
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
 static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
 {
 	return 0x00500ee0;
-- 
cgit v1.2.2


From 4d3f44849bd48f1a2390692ccce7e7203d3198ae Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 1 Oct 2015 10:38:32 -0700
Subject: gpu: nvgpu: gp10b: Report Pascal DMA copy class

Announce supporting Pascal DMA copy class instead of Maxwell.

Change-Id: Ic0b9d50e7423648c5573857142c86b8a8bc87e35
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/808140
(cherry picked from commit c779975d6b40ecb0780ae4167ab26aed4886c7a7)
Reviewed-on: http://git-master/r/815679
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 381ee8b1..5222fca6 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -151,7 +151,7 @@ int gp10b_init_hal(struct gk20a *g)
 	c->compute_class = PASCAL_COMPUTE_A;
 	c->gpfifo_class = PASCAL_CHANNEL_GPFIFO_A;
 	c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_B;
-	c->dma_copy_class = MAXWELL_DMA_COPY_A;
+	c->dma_copy_class = PASCAL_DMA_COPY_A;
 
 	return 0;
 }
-- 
cgit v1.2.2


From f1fe07c123099644d89a56b9cf878f764bb1820e Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 18 Sep 2015 14:55:39 -0700
Subject: gpu: nvgpu: gp10b: Fix beta CB sizing

Handle beta CB sizing differences for GfxP versus WFI channels.

Bug 1686189

Change-Id: Icc421eeb8305f7e4156a74c957662f19504ddad7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/801533
(cherry picked from commit 95b9ae4e5f3c29fdb97567d846b9d2139f1a8ec4)
Reviewed-on: http://git-master/r/815682
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 46 +++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 61ecddef..d2acba96 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -58,22 +58,28 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
 	struct gr_gk20a *gr = &g->gr;
-	struct channel_ctx_gk20a *ch_ctx = NULL;
+	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
 	u32 attrib_offset_in_chunk = 0;
 	u32 alpha_offset_in_chunk = 0;
 	u32 pd_ab_max_output;
 	u32 gpc_index, ppc_index;
-	u32 temp;
-	u32 cbm_cfg_size1, cbm_cfg_size2;
+	u32 temp, temp2;
+	u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
+	u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
 
 	gk20a_dbg_fn("");
 
-	if (patch) {
-		int err;
-		ch_ctx = &c->ch_ctx;
-		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
-		if (err)
-			return err;
+	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+		attrib_size_in_chunk = gr->attrib_cb_default_size +
+				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+		cb_attrib_cache_size_init = gr->attrib_cb_default_size +
+				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+	} else {
+		attrib_size_in_chunk = gr->attrib_cb_size;
+		cb_attrib_cache_size_init = gr->attrib_cb_default_size;
 	}
 
 	gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(),
@@ -94,17 +100,20 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
 		temp = proj_gpc_stride_v() * gpc_index;
+		temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index;
 		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
 		     ppc_index++) {
-			cbm_cfg_size1 = gr->attrib_cb_default_size *
+			cbm_cfg_size_beta = cb_attrib_cache_size_init *
+				gr->pes_tpc_count[ppc_index][gpc_index];
+			cbm_cfg_size_alpha = gr->alpha_cb_default_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
-			cbm_cfg_size2 = gr->alpha_cb_default_size *
+			cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
 				proj_ppc_in_gpc_stride_v() * ppc_index,
-				cbm_cfg_size1, patch);
+				cbm_cfg_size_beta, patch);
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
@@ -114,16 +123,16 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
 				proj_ppc_in_gpc_stride_v() * ppc_index,
-				gr->attrib_cb_default_size,
+				cbm_cfg_size_steadystate,
 				patch);
 
-			attrib_offset_in_chunk += gr->attrib_cb_size *
+			attrib_offset_in_chunk += attrib_size_in_chunk *
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
 				proj_ppc_in_gpc_stride_v() * ppc_index,
-				cbm_cfg_size2, patch);
+				cbm_cfg_size_alpha, patch);
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
@@ -134,15 +143,12 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 				gr->pes_tpc_count[ppc_index][gpc_index];
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
-				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index),
-				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1),
+				gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
+				gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
 				patch);
 		}
 	}
 
-	if (patch)
-		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
-
 	return 0;
 }
 
-- 
cgit v1.2.2


From fa467827b747ef67ed6ee195d01812dd7df9d6cb Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sun, 20 Sep 2015 17:14:29 -0700
Subject: gpu: nvgpu: gp10b: Make CB size and default size same

We used to allocate 1.5x buffer size. This leads to memory waste, as
we do not set the CB size via SW methods anymore.

Bug 1686189

Change-Id: I45cbdeadc154f59b65138f99f50a72d97511cb78
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/801865
(cherry picked from commit 791f2fe03d16521206649ab90498443e91e284e2)
Reviewed-on: http://git-master/r/815683
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index d2acba96..49ba21e0 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -269,10 +269,8 @@ static int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g)
 	struct gr_gk20a *gr = &g->gr;
 	int size;
 
-	gr->attrib_cb_size = gr->attrib_cb_default_size
-		+ (gr->attrib_cb_default_size >> 1);
-	gr->alpha_cb_size = gr->alpha_cb_default_size
-		+ (gr->alpha_cb_default_size >> 1);
+	gr->attrib_cb_size = gr->attrib_cb_default_size;
+	gr->alpha_cb_size = gr->alpha_cb_default_size;
 
 	gr->attrib_cb_size = min(gr->attrib_cb_size,
 		 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(~0) / g->gr.tpc_count);
-- 
cgit v1.2.2


From 50f5c87f1cb452fc4338cf932c35428aeee57dd7 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 12 Oct 2015 13:54:43 +0530
Subject: gpu: nvgpu: gp10b non-secure gpmu hw init

call gp10b_init_pmu_setup_hw1 during non-secure boot only.

Change-Id: Ia90474c7c04edd9be029d013f1da5f73de1b5326
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/815843
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index df515d1b..57accfb0 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -336,8 +336,8 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 			gm20b_init_nspmu_setup_hw1;
 		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
+		gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
 	}
-	gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
 	gops->pmu.pmu_setup_elpg = gp10b_pmu_setup_elpg;
 	gops->pmu.lspmuwprinitdone = false;
 	gops->pmu.fecsbootstrapdone = false;
-- 
cgit v1.2.2


From 9f2d5c97f9555ecae52347121101d89fdb366184 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 8 Oct 2015 11:32:47 -0700
Subject: gpu: nvgpu: gp10b: create fault buffer only once

Create only one instance of replayable fault buffer
mapping.

Change-Id: Id766298f338ce54cfca7510cbb9e4528ef1945a3
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/814615
(cherry picked from commit 422d2ced384220668347dc8422876d75f6e8807d)
Reviewed-on: http://git-master/r/817696
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index ba91403c..59af5cde 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -33,11 +33,14 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
-	err = gk20a_gmmu_alloc_map(vm, rbfb_size, &g->mm.bar2_desc);
-	if (err) {
-		dev_err(dev_from_gk20a(g), "%s Error in replayable fault buffer\n",
-			__func__);
-		return err;
+	if (!g->mm.bar2_desc.gpu_va) {
+		err = gk20a_gmmu_alloc_map(vm, rbfb_size,
+						&g->mm.bar2_desc);
+		if (err) {
+			dev_err(dev_from_gk20a(g),
+			"%s Error in replayable fault buffer\n", __func__);
+			return err;
+		}
 	}
 	addr_lo = u64_lo32(g->mm.bar2_desc.gpu_va >> 12);
 	addr_hi = u64_hi32(g->mm.bar2_desc.gpu_va);
@@ -47,7 +50,6 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 	gk20a_writel(g, fifo_replay_fault_buffer_lo_r(),
 			fifo_replay_fault_buffer_lo_base_f(addr_lo) |
 			fifo_replay_fault_buffer_lo_enable_true_v());
-
 	gk20a_dbg_fn("done");
 	return 0;
 }
@@ -55,12 +57,14 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 void gp10b_replayable_pagefault_buffer_deinit(struct gk20a *g)
 {
 	struct vm_gk20a *vm = &g->mm.bar2.vm;
+
 	gk20a_gmmu_unmap_free(vm, &g->mm.bar2_desc);
 }
 
 u32 gp10b_replayable_pagefault_buffer_get_index(struct gk20a *g)
 {
 	u32 get_idx = 0;
+
 	gk20a_dbg_fn("");
 
 	get_idx = gk20a_readl(g, fifo_replay_fault_buffer_get_r());
-- 
cgit v1.2.2


From 35b5e65166529f68f306c8ac7ae64dd938b8e8c0 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 9 Oct 2015 15:38:15 +0530
Subject: gpu: nvgpu: add h/w headers for pbdma_methods

Bug 200134238

Change-Id: I263a12b7a3a74d1ab07bca03d5dda685b1e4f22f
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/815128
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h | 40 ++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index 977a8ee2..d3f97a44 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -174,6 +174,10 @@ static inline u32 pbdma_pb_header_type_inc_f(void)
 {
 	return 0x20000000;
 }
+static inline u32 pbdma_pb_header_type_non_inc_f(void)
+{
+	return 0x60000000;
+}
 static inline u32 pbdma_hdr_shadow_r(u32 i)
 {
 	return 0x00040118 + i*8192;
@@ -198,6 +202,42 @@ static inline u32 pbdma_method0_r(u32 i)
 {
 	return 0x000400c0 + i*8192;
 }
+static inline u32 pbdma_method0_fifo_size_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pbdma_method0_addr_f(u32 v)
+{
+	return (v & 0xfff) << 2;
+}
+static inline u32 pbdma_method0_addr_v(u32 r)
+{
+	return (r >> 2) & 0xfff;
+}
+static inline u32 pbdma_method0_subch_v(u32 r)
+{
+	return (r >> 16) & 0x7;
+}
+static inline u32 pbdma_method0_first_true_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_method0_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 pbdma_method1_r(u32 i)
+{
+	return 0x000400c8 + i*8192;
+}
+static inline u32 pbdma_method2_r(u32 i)
+{
+	return 0x000400d0 + i*8192;
+}
+static inline u32 pbdma_method3_r(u32 i)
+{
+	return 0x000400d8 + i*8192;
+}
 static inline u32 pbdma_data0_r(u32 i)
 {
 	return 0x000400c4 + i*8192;
-- 
cgit v1.2.2


From 2643f200cfdb655e5ee00fa406c0dea534859df3 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 7 Oct 2015 14:52:55 -0700
Subject: gpu: nvgpu: gp10b: Use PROD value for FE_GO_IDLE_TIMEOUT

Add gp10b PROD value for FE_GO_IDLE_TIMEOUT. Use the PROD value
written in gk20a_init_gr_setup_hw() instead of hard coding here.

Change-Id: If3bd981c1c0d9cc8ad19c21c220b7de81fdb529e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/813959
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 3 ---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 49ba21e0..de6023b5 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -999,9 +999,6 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 			 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
 	gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
 
-	/* disable deep binning */
-	gk20a_writel(g, gr_fe_go_idle_timeout_r(), 0x800);
-
 	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 0aa68fa2..347e530d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -538,6 +538,10 @@ static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_fe_go_idle_timeout_count_prod_f(void)
+{
+	return 0x7fffffff;
+}
 static inline u32 gr_fe_object_table_r(u32 i)
 {
 	return 0x00404200 + i*4;
-- 
cgit v1.2.2


From 428b9eb5523d478499c7ef023ea7287bf7ac617f Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Mon, 12 Oct 2015 14:13:16 +0300
Subject: gpu: nvgpu: gp10b: Fix support for new color compression kinds

Fix support for kinds C32_MS4_4CBRA and C64_MS4_4CBRA. They're both
compressible and ZBC kinds, so mark them as such, too.

Change-Id: Ide09ea79a885361ecfc3c188606799c6b2fbdd2e
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/816015
(cherry picked from commit 302b06b76aed5278286487225d6e7280b747d4b3)
Reviewed-on: http://git-master/r/816014
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
index 3a143ced..8b3b2153 100644
--- a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -59,14 +59,18 @@ static bool gp10b_kind_compressible(u8 k)
 		 k <= gmmu_pte_kind_z16_ms8_2cz_v()) ||
 		k == gmmu_pte_kind_z16_ms16_2cz_v() ||
 	       (k >= gmmu_pte_kind_z16_4cz_v() &&
-		 k <= gmmu_pte_kind_z16_ms16_4cz_v());
+		 k <= gmmu_pte_kind_z16_ms16_4cz_v()) ||
+		k == gmmu_pte_kind_c32_ms4_4cbra_v() ||
+		k == gmmu_pte_kind_c64_ms4_4cbra_v();
 }
 
 static bool gp10b_kind_zbc(u8 k)
 {
 	return (k >= gmmu_pte_kind_z16_2cz_v() &&
 		 k <= gmmu_pte_kind_z16_ms8_2cz_v()) ||
-		k == gmmu_pte_kind_z16_ms16_2cz_v();
+		k == gmmu_pte_kind_z16_ms16_2cz_v() ||
+		k == gmmu_pte_kind_c32_ms4_4cbra_v() ||
+		k == gmmu_pte_kind_c64_ms4_4cbra_v();
 }
 
 static void gp10b_init_kind_attr(void)
-- 
cgit v1.2.2


From 9320d4711f3e39d90d27daae97211d8fc753ba37 Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Tue, 29 Sep 2015 09:57:37 -0700
Subject: gpu: nvgpu: vgpu: add interface to alloc ctxsw buffers

gp10b introduces support for preemption (GfxP and CILP).
Add a new interface to allow allocating buffers needed
to support this functionality.

Bug 1677153

Change-Id: I8578a7b0a4327f3496d852eeb8be5fc778e2c225
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/806963
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: http://git-master/r/817039
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 169 +++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 9df29eee..5edaa819 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -11,10 +11,179 @@
  * more details.
  */
 
+#include "vgpu/vgpu.h"
 #include "vgpu_gr_gp10b.h"
 #include "vgpu/gm20b/vgpu_gr_gm20b.h"
 
+#include "gp10b/hw_gr_gp10b.h"
+
+static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
+				struct gr_ctx_desc *gr_ctx)
+{
+	gk20a_dbg_fn("");
+
+	if (!gr_ctx || !gr_ctx->mem.gpu_va)
+		return;
+
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+	vgpu_gr_free_gr_ctx(g, vm, gr_ctx);
+}
+
+static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+				struct gr_ctx_desc **__gr_ctx,
+				struct vm_gk20a *vm,
+				u32 class,
+				u32 flags)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
+			&msg.params.gr_bind_ctxsw_buffers;
+	struct gr_ctx_desc *gr_ctx = *__gr_ctx;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	WARN_ON(TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAX !=
+		TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_LAST);
+
+	err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags);
+	if (err)
+		return err;
+
+	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
+
+	if (class == PASCAL_COMPUTE_A &&
+			g->gr.t18x.ctx_vars.force_preemption_cilp)
+		flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
+
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
+		u32 spill_size =
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+			gr_scc_pagepool_total_pages_byte_granularity_v();
+		u32 betacb_size = g->gr.attrib_cb_default_size +
+				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+		u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+				  gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+				  g->gr.max_tpc_count;
+		struct mem_desc *desc;
+
+		attrib_cb_size = ALIGN(attrib_cb_size, 128);
+
+		gk20a_dbg_info("gfxp context preempt size=%d",
+			g->gr.t18x.ctx_vars.preempt_image_size);
+		gk20a_dbg_info("gfxp context spill size=%d", spill_size);
+		gk20a_dbg_info("gfxp context pagepool size=%d", pagepool_size);
+		gk20a_dbg_info("gfxp context attrib cb size=%d",
+			attrib_cb_size);
+
+		err = gk20a_gmmu_alloc_map(vm,
+				g->gr.t18x.ctx_vars.preempt_image_size,
+				&gr_ctx->t18x.preempt_ctxsw_buffer);
+		if (err) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		desc = &gr_ctx->t18x.preempt_ctxsw_buffer;
+		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->gpu_va;
+		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->size;
+
+		err = gk20a_gmmu_alloc_map(vm, spill_size,
+				&gr_ctx->t18x.spill_ctxsw_buffer);
+		if (err) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		desc = &gr_ctx->t18x.spill_ctxsw_buffer;
+		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->gpu_va;
+		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->size;
+
+		err = gk20a_gmmu_alloc_map(vm, pagepool_size,
+					   &gr_ctx->t18x.pagepool_ctxsw_buffer);
+		if (err) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		desc = &gr_ctx->t18x.pagepool_ctxsw_buffer;
+		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] =
+			desc->gpu_va;
+		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = desc->size;
+
+		err = gk20a_gmmu_alloc_map(vm, attrib_cb_size,
+					   &gr_ctx->t18x.betacb_ctxsw_buffer);
+		if (err) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		desc = &gr_ctx->t18x.betacb_ctxsw_buffer;
+		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] =
+			desc->gpu_va;
+		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = desc->size;
+
+		gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
+		p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP;
+	}
+
+	if (class == PASCAL_COMPUTE_A) {
+		if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) {
+			gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
+			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP;
+		} else {
+			gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA;
+		}
+	}
+
+	if (gr_ctx->preempt_mode) {
+		msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS;
+		msg.handle = platform->virt_handle;
+		p->handle = gr_ctx->virt_ctx;
+		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+		if (err || msg.ret) {
+			err = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	gk20a_dbg_fn("done");
+	return err;
+
+fail:
+	vgpu_gr_gp10b_free_gr_ctx(g, vm, gr_ctx);
+	return err;
+}
+
+static int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = vgpu_gr_init_ctx_state(g);
+	if (err)
+		return err;
+
+	vgpu_get_attribute(platform->virt_handle,
+			TEGRA_VGPU_ATTRIB_PREEMPT_CTX_SIZE,
+			&g->gr.t18x.ctx_vars.preempt_image_size);
+	if (!g->gr.t18x.ctx_vars.preempt_image_size)
+		return -ENXIO;
+
+	return 0;
+}
+
 void vgpu_gp10b_init_gr_ops(struct gpu_ops *gops)
 {
 	vgpu_gm20b_init_gr_ops(gops);
+	gops->gr.alloc_gr_ctx = vgpu_gr_gp10b_alloc_gr_ctx;
+	gops->gr.free_gr_ctx = vgpu_gr_gp10b_free_gr_ctx;
+	gops->gr.init_ctx_state = vgpu_gr_gp10b_init_ctx_state;
 }
-- 
cgit v1.2.2


From 242623a0a8115a823cb386b0a04b9be2e253bf0a Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 8 Oct 2015 09:41:49 -0700
Subject: gpu: nvgpu: gp10b: enable clock gating features

Enable clock gating power features: slcg, blcg and elcg

Bug 200144583

Reviewed-on: http://git-master/r/821149
(cherry picked from commit 1980d443c64e6660e3cd41b8908964c07459dcce)

Change-Id: I6ce813552fa57d0fd14dd7ed6a3d9864c88dc58b
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/818636
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 5dc8e33d..4ddc47fb 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -216,6 +216,9 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	/* power management configuration */
 	.can_railgate           = false,
 	.enable_elpg            = false,
+	.enable_blcg		= true,
+	.enable_slcg		= true,
+	.enable_elcg		= true,
 
 	/* ptimer scaling constant */
 	.ptimerscaling10x	= 10,
-- 
cgit v1.2.2


From 35e3018be12b56ecfa8487ba479cb2df38c6fbf3 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 8 Oct 2015 10:12:46 -0700
Subject: gpu: nvgpu: gp10b: support to remove bar2 vm

Implement function to support bar2 vm clean-up.

Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/814573

Change-Id: If5d884e4e1ed87bec6284719d90e9e1963c69bed
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/815428
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index ae9c5c7e..d3297e31 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -368,6 +368,15 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
 		ram_in_page_dir_base_hi_f(pdb_addr_hi));
 }
 
+static void gp10b_remove_bar2_vm(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+
+	gp10b_replayable_pagefault_buffer_deinit(g);
+	gk20a_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block);
+}
+
+
 void gp10b_init_mm(struct gpu_ops *gops)
 {
 	gm20b_init_mm(gops);
@@ -378,4 +387,5 @@ void gp10b_init_mm(struct gpu_ops *gops)
 	gops->mm.get_iova_addr = gp10b_mm_iova_addr;
 	gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
 	gops->mm.init_pdb = gp10b_mm_init_pdb;
+	gops->mm.remove_bar2_vm = gp10b_remove_bar2_vm;
 }
-- 
cgit v1.2.2


From 313fcdb1d3c12026246df01c81e2ecd212132de8 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 22 Oct 2015 15:31:43 -0700
Subject: gpu: nvgpu: gp10b: update thermal programming

Add required fileds and values for thermal slow-down
settings in thermal header file and corrected
thermal register programming with correct values.

Bug 1695567

Reviewed-on: http://git-master/r/822200
(cherry picked from commit 859d1bda6a059b321d859c887fab8d51d2caa981)

Change-Id: Id90ebd46bc3d6e4284a91e7f2b775d78502a3eca
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/823013
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile         |  3 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c      |  2 +
 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h | 98 +++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c    | 46 +++++++++++++++
 drivers/gpu/nvgpu/gp10b/therm_gp10b.h    | 19 +++++++
 5 files changed, 166 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/therm_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/therm_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index ad198327..f34d836d 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_GK20A)  += \
 	rpfb_gp10b.o \
 	gp10b_gating_reglist.o \
 	regops_gp10b.o \
-	cde_gp10b.o
+	cde_gp10b.o \
+	therm_gp10b.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 5222fca6..544be96b 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -32,6 +32,7 @@
 #include "gp10b/gp10b_gating_reglist.h"
 #include "gp10b/regops_gp10b.h"
 #include "gp10b/cde_gp10b.h"
+#include "gp10b/therm_gp10b.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fifo_gm20b.h"
@@ -144,6 +145,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gk20a_init_debug_ops(gops);
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
+	gp10b_init_therm_ops(gops);
 	gops->name = "gp10b";
 
 	c->twod_class = FERMI_TWOD_A;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
index 25eecb70..aed75481 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -54,18 +54,114 @@ static inline u32 therm_use_a_r(void)
 {
 	return 0x00020798;
 }
+static inline u32 therm_use_a_ext_therm_0_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 therm_use_a_ext_therm_1_enable_f(void)
+{
+	return 0x2;
+}
+static inline u32 therm_use_a_ext_therm_2_enable_f(void)
+{
+	return 0x4;
+}
 static inline u32 therm_evt_ext_therm_0_r(void)
 {
 	return 0x00020700;
 }
+static inline u32 therm_evt_ext_therm_0_slow_factor_f(u32 v)
+{
+	return (v & 0x3f) << 24;
+}
+static inline u32 therm_evt_ext_therm_0_slow_factor_init_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 therm_evt_ext_therm_0_mode_f(u32 v)
+{
+	return (v & 0x3) << 30;
+}
+static inline u32 therm_evt_ext_therm_0_mode_normal_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 therm_evt_ext_therm_0_mode_inverted_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 therm_evt_ext_therm_0_mode_forced_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 therm_evt_ext_therm_0_mode_cleared_v(void)
+{
+	return 0x00000003;
+}
 static inline u32 therm_evt_ext_therm_1_r(void)
 {
 	return 0x00020704;
 }
+static inline u32 therm_evt_ext_therm_1_slow_factor_f(u32 v)
+{
+	return (v & 0x3f) << 24;
+}
+static inline u32 therm_evt_ext_therm_1_slow_factor_init_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 therm_evt_ext_therm_1_mode_f(u32 v)
+{
+	return (v & 0x3) << 30;
+}
+static inline u32 therm_evt_ext_therm_1_mode_normal_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 therm_evt_ext_therm_1_mode_inverted_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 therm_evt_ext_therm_1_mode_forced_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 therm_evt_ext_therm_1_mode_cleared_v(void)
+{
+	return 0x00000003;
+}
 static inline u32 therm_evt_ext_therm_2_r(void)
 {
 	return 0x00020708;
 }
+static inline u32 therm_evt_ext_therm_2_slow_factor_f(u32 v)
+{
+	return (v & 0x3f) << 24;
+}
+static inline u32 therm_evt_ext_therm_2_slow_factor_init_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 therm_evt_ext_therm_2_mode_f(u32 v)
+{
+	return (v & 0x3) << 30;
+}
+static inline u32 therm_evt_ext_therm_2_mode_normal_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 therm_evt_ext_therm_2_mode_inverted_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 therm_evt_ext_therm_2_mode_forced_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 therm_evt_ext_therm_2_mode_cleared_v(void)
+{
+	return 0x00000003;
+}
 static inline u32 therm_weight_1_r(void)
 {
 	return 0x00020024;
diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
new file mode 100644
index 00000000..471edb87
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -0,0 +1,46 @@
+/*
+ * drivers/gpu/nvgpu/gm20b/therm_gk20a.c
+ *
+ * GP10B Therm
+ *
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "hw_therm_gp10b.h"
+
+static int gp10b_init_therm_setup_hw(struct gk20a *g)
+{
+	gk20a_dbg_fn("");
+
+	/* program NV_THERM registers */
+	gk20a_writel(g, therm_use_a_r(), therm_use_a_ext_therm_0_enable_f() |
+					therm_use_a_ext_therm_1_enable_f()  |
+					therm_use_a_ext_therm_2_enable_f());
+	gk20a_writel(g, therm_evt_ext_therm_0_r(),
+		therm_evt_ext_therm_0_slow_factor_f(
+			therm_evt_ext_therm_0_slow_factor_init_v()));
+	gk20a_writel(g, therm_evt_ext_therm_1_r(),
+		therm_evt_ext_therm_1_slow_factor_f(
+			therm_evt_ext_therm_1_slow_factor_init_v()));
+	gk20a_writel(g, therm_evt_ext_therm_2_r(),
+		therm_evt_ext_therm_2_slow_factor_f(
+			therm_evt_ext_therm_2_slow_factor_init_v()));
+
+	return 0;
+}
+
+void gp10b_init_therm_ops(struct gpu_ops *gops)
+{
+	gops->therm.init_therm_setup_hw = gp10b_init_therm_setup_hw;
+
+}
diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/therm_gp10b.h
new file mode 100644
index 00000000..18c102fe
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef THERM_GP10B_H
+#define THERM_GP10B_H
+
+struct gpu_ops;
+void gp10b_init_therm_ops(struct gpu_ops *gops);
+
+#endif /* THERM_GP10B_H */
-- 
cgit v1.2.2


From fb7065a2e484cd7eb90d76158d63903029600e58 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 15 Jun 2015 18:09:47 -0700
Subject: gpu: nvgpu: gp10b: Implement sparse PDEs

Change-Id: I260958d8dea1b445f91b8d15bf76d5321bdc76d1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/758653
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index d3297e31..b5ea5d68 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -157,17 +157,18 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   int rw_flag, bool sparse, bool priv)
 {
 	u64 pte_addr = 0;
-	u64 pde_addr = 0;
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 *pde;
 
 	gk20a_dbg_fn("");
 
-	pte_addr = sg_phys(pte->sgt->sgl) >> gmmu_new_pde_address_shift_v();
-	pde_addr = sg_phys(parent->sgt->sgl);
+	if (!sparse)
+		pte_addr = sg_phys(pte->sgt->sgl)
+			   >> gmmu_new_pde_address_shift_v();
 
-	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
+	pde_v[0] |= sparse ? gmmu_new_pde_aperture_invalid_f()
+			   : gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= pte_addr >> 24;
@@ -204,9 +205,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	u32 *pde;
 
 	gk20a_dbg_fn("");
+	gk20a_dbg(gpu_dbg_pte, "entry %p\n", entry);
 
-	small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
-	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
+	small_valid = !sparse && entry->size
+			      && entry->pgsz == gmmu_page_size_small;
+	big_valid = !sparse && entry->size
+			    && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid)
 		pte_addr_small = sg_phys(entry->sgt->sgl)
@@ -230,6 +234,11 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 		pde_v[1] |= pte_addr_big >> 28;
 	}
 
+	if (sparse) {
+		pde_v[0] |= gmmu_new_dual_pde_aperture_big_invalid_f();
+		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
+	}
+
 	pde = pde0_from_index(pte, i);
 
 	gk20a_mem_wr32(pde, 0, pde_v[0]);
-- 
cgit v1.2.2


From 4ff59992afa50fb946b57e5556513b106cd17e8c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 16 Oct 2015 12:38:35 -0700
Subject: gpu: nvgpu: gp10b: set ptimer source frequency

Set platform data with ptimer source frequency.
Removed ptimerscaling10x platform data, and use
ptimer source frequency to calculate ptimerscaling
factor.

Reviewed-on: http://git-master/r/819031
(cherry picked from commit 6849603024943184b0463233bedd95934c353663)

Change-Id: I14b0735fcb602cda2e692f6b842a5ecf469ab724
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/827301
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 4ddc47fb..c08202ec 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -220,8 +220,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.enable_slcg		= true,
 	.enable_elcg		= true,
 
-	/* ptimer scaling constant */
-	.ptimerscaling10x	= 10,
+	/* ptimer src frequency in hz*/
+	.ptimer_src_freq	= 31250000,
 
 	.ch_wdt_timeout_ms = 5000,
 
-- 
cgit v1.2.2


From de2656300ae74df5075a3a7e38a701c8048af3b2 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 9 Nov 2015 09:06:37 -0800
Subject: Revert "gpu: nvgpu: gp10b: Implement sparse PDEs"

This reverts commit c2707054192b058eec24a52c7f586b030f9ff007. It
introduces regression in T124.

Bug 1702063

Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Change-Id: I8516c0bfe129bb1ac3d7a1983846061df8ae967b
Reviewed-on: http://git-master/r/830787
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index b5ea5d68..d3297e31 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -157,18 +157,17 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   int rw_flag, bool sparse, bool priv)
 {
 	u64 pte_addr = 0;
+	u64 pde_addr = 0;
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 *pde;
 
 	gk20a_dbg_fn("");
 
-	if (!sparse)
-		pte_addr = sg_phys(pte->sgt->sgl)
-			   >> gmmu_new_pde_address_shift_v();
+	pte_addr = sg_phys(pte->sgt->sgl) >> gmmu_new_pde_address_shift_v();
+	pde_addr = sg_phys(parent->sgt->sgl);
 
-	pde_v[0] |= sparse ? gmmu_new_pde_aperture_invalid_f()
-			   : gmmu_new_pde_aperture_video_memory_f();
+	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= pte_addr >> 24;
@@ -205,12 +204,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	u32 *pde;
 
 	gk20a_dbg_fn("");
-	gk20a_dbg(gpu_dbg_pte, "entry %p\n", entry);
 
-	small_valid = !sparse && entry->size
-			      && entry->pgsz == gmmu_page_size_small;
-	big_valid = !sparse && entry->size
-			    && entry->pgsz == gmmu_page_size_big;
+	small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
+	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid)
 		pte_addr_small = sg_phys(entry->sgt->sgl)
@@ -234,11 +230,6 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 		pde_v[1] |= pte_addr_big >> 28;
 	}
 
-	if (sparse) {
-		pde_v[0] |= gmmu_new_dual_pde_aperture_big_invalid_f();
-		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
-	}
-
 	pde = pde0_from_index(pte, i);
 
 	gk20a_mem_wr32(pde, 0, pde_v[0]);
-- 
cgit v1.2.2


From b7de6b004be56e489879616a57bd65a1fbcb0ece Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Mon, 12 Oct 2015 15:24:25 -0700
Subject: gpu: nvgpu: vgpu: set correct page size index for gp10b

VM server only know big page and small page, so convert
gmmu_page_size_kernel to according page size index.

JIRA VFND-890

Change-Id: Id1f932752b8ca33d14635ac9d71019364aa89dc4
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/816359
(cherry picked from commit 5bfc4a2a55889f5457bd34aa06861c042ee67421)
Reviewed-on: http://git-master/r/827131
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 0a769e94..3a286249 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -134,6 +134,19 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
 	else
 		prot = TEGRA_VGPU_MAP_PROT_NONE;
 
+	if (pgsz_idx == gmmu_page_size_kernel) {
+		if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
+			pgsz_idx = gmmu_page_size_small;
+		} else if (page_size ==
+				vm->gmmu_page_sizes[gmmu_page_size_big]) {
+			pgsz_idx = gmmu_page_size_big;
+		} else {
+			gk20a_err(d, "invalid kernel page size %d\n",
+				page_size);
+			goto fail;
+		}
+	}
+
 	msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX;
 	msg.handle = platform->virt_handle;
 	p->handle = vm->handle;
-- 
cgit v1.2.2


From f4b2a02b68d79d30a1292f9b3551d08c71fb899f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 6 Nov 2015 08:33:15 -0800
Subject: gpu: nvgpu: gp10b: Add L2 clean comptags regs

Bug 1698618

Change-Id: I5bad939d94171d2296897260043f0e67e43802e7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/829097
(cherry picked from commit a067cfeb8dda03641ba981d86bef93fa9041e18e)
Reviewed-on: http://git-master/r/829414
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h | 42 +++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
index b8e236b8..f442991e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -114,6 +114,46 @@ static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 flush_clean_comptags_r(void)
+{
+	return 0x0007000c;
+}
+static inline u32 flush_clean_comptags_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_clean_comptags_pending_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_clean_comptags_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_clean_comptags_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_clean_comptags_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_clean_comptags_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_clean_comptags_outstanding_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_clean_comptags_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_clean_comptags_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 flush_fb_flush_r(void)
 {
 	return 0x00070000;
-- 
cgit v1.2.2


From 9ab9436268ae2121d3dc57c98d16890953f6cd35 Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Tue, 3 Nov 2015 11:44:14 -0500
Subject: gpu: nvgpu: gp10b: map GfxP buffers as GPU cacheable

Some of the allocated buffers are used during normal graphics
processing. Mark them as GPU cacheable to improve performance.

Bug 1695718

Change-Id: I71d5d1538516e966526abe5e38a557776321597f
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/827087
(cherry picked from commit 60b40ac144c94e24a2c449c8be937edf8865e1ed)
Reviewed-on: http://git-master/r/828493
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c           | 51 +++++++++++++++++++++++-----
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h           |  2 ++
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 21 +++++++-----
 3 files changed, 57 insertions(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index de6023b5..c801a2b8 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -18,6 +18,7 @@
 #include <linux/tegra-fuse.h>
 
 #include "gk20a/gr_gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
 
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
@@ -492,6 +493,36 @@ static int gr_gp10b_init_ctx_state(struct gk20a *g)
 	return 0;
 }
 
+int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
+			struct mem_desc *mem)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = gk20a_gmmu_alloc_attr(vm->mm->g, 0, size, mem);
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm,
+				&mem->sgt,
+				size,
+				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
+				gk20a_mem_flag_none,
+				false);
+
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
 static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
 			  u32 class,
@@ -530,32 +561,36 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
 		gk20a_dbg_info("gfxp context attrib_cb_size=%d",
 				attrib_cb_size);
-		err = gk20a_gmmu_alloc_map(vm, g->gr.t18x.ctx_vars.preempt_image_size,
-				&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					g->gr.t18x.ctx_vars.preempt_image_size,
+					&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
 		if (err) {
 			gk20a_err(dev_from_gk20a(vm->mm->g),
 				  "cannot allocate preempt buffer");
 			goto fail_free_gk20a_ctx;
 		}
 
-		err = gk20a_gmmu_alloc_map(vm, spill_size,
-				&(*gr_ctx)->t18x.spill_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					spill_size,
+					&(*gr_ctx)->t18x.spill_ctxsw_buffer);
 		if (err) {
 			gk20a_err(dev_from_gk20a(vm->mm->g),
 				  "cannot allocate spill buffer");
 			goto fail_free_preempt;
 		}
 
-		err = gk20a_gmmu_alloc_map(vm, attrib_cb_size,
-					   &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					attrib_cb_size,
+					&(*gr_ctx)->t18x.betacb_ctxsw_buffer);
 		if (err) {
 			gk20a_err(dev_from_gk20a(vm->mm->g),
 				  "cannot allocate beta buffer");
 			goto fail_free_spill;
 		}
 
-		err = gk20a_gmmu_alloc_map(vm, pagepool_size,
-					   &(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					pagepool_size,
+					&(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
 		if (err) {
 			gk20a_err(dev_from_gk20a(vm->mm->g),
 				  "cannot allocate page pool");
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index bf49ba6f..62b70a22 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -33,6 +33,8 @@ enum {
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 
 void gp10b_init_gr(struct gpu_ops *ops);
+int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
+			struct mem_desc *mem);
 
 struct gr_t18x {
 	struct {
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 5edaa819..c5c53b58 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -84,9 +84,9 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		gk20a_dbg_info("gfxp context attrib cb size=%d",
 			attrib_cb_size);
 
-		err = gk20a_gmmu_alloc_map(vm,
-				g->gr.t18x.ctx_vars.preempt_image_size,
-				&gr_ctx->t18x.preempt_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					g->gr.t18x.ctx_vars.preempt_image_size,
+					&gr_ctx->t18x.preempt_ctxsw_buffer);
 		if (err) {
 			err = -ENOMEM;
 			goto fail;
@@ -95,8 +95,9 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->gpu_va;
 		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN] = desc->size;
 
-		err = gk20a_gmmu_alloc_map(vm, spill_size,
-				&gr_ctx->t18x.spill_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					spill_size,
+					&gr_ctx->t18x.spill_ctxsw_buffer);
 		if (err) {
 			err = -ENOMEM;
 			goto fail;
@@ -105,8 +106,9 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		p->gpu_va[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->gpu_va;
 		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_SPILL] = desc->size;
 
-		err = gk20a_gmmu_alloc_map(vm, pagepool_size,
-					   &gr_ctx->t18x.pagepool_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					pagepool_size,
+					&gr_ctx->t18x.pagepool_ctxsw_buffer);
 		if (err) {
 			err = -ENOMEM;
 			goto fail;
@@ -116,8 +118,9 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			desc->gpu_va;
 		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_PAGEPOOL] = desc->size;
 
-		err = gk20a_gmmu_alloc_map(vm, attrib_cb_size,
-					   &gr_ctx->t18x.betacb_ctxsw_buffer);
+		err = gr_gp10b_alloc_buffer(vm,
+					attrib_cb_size,
+					&gr_ctx->t18x.betacb_ctxsw_buffer);
 		if (err) {
 			err = -ENOMEM;
 			goto fail;
-- 
cgit v1.2.2


From 528c08b5010e52bfd4422b76fe73f75ff451bf82 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Tue, 29 Sep 2015 10:13:18 -0700
Subject: arm64: tegra: dts: Use new SID dt-bindings

Use bindings more specific to the ARM SMMU.

Change-Id: I0e2df8e8e7bfa51036a84e923fa06e42bbed3cd7
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/812438
(cherry picked from commit 98cb259c87e9531b0a21dfd3132a3f3db07ff6f0)
Reviewed-on: http://git-master/r/831515
Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
Tested-by: Krishna Reddy <vdumpa@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index d6fca6e4..91a4964c 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -15,6 +15,8 @@
 
 #include <linux/types.h>
 
+#include <dt-bindings/memory/tegra-swgroup.h>
+
 #include "gk20a/gk20a.h"
 #include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
@@ -149,7 +151,7 @@ static void gp10b_ltc_init_fs_state(struct gk20a *g)
 	gm20b_ltc_init_fs_state(g);
 
 	gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(),
-			ltc_ltca_g_axi_pctrl_user_sid_f(31));
+			ltc_ltca_g_axi_pctrl_user_sid_f(TEGRA_SID_GPUB));
 }
 
 void gp10b_init_ltc(struct gpu_ops *gops)
@@ -172,4 +174,3 @@ void gp10b_init_ltc(struct gpu_ops *gops)
 	gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
 #endif
 }
-
-- 
cgit v1.2.2


From 1dde902b50dbc90fc77a58c536be175e6837995f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 9 Nov 2015 10:23:02 -0800
Subject: Revert "gpu: nvgpu: gp10b: Force always SMMU bypass"

This reverts commit cc9bd2dc24f562e97a87641e7436594fd3b469f2.

Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Change-Id: Ic4493bc7b71a2ebfb49644c91b34222dd15a9be1
Reviewed-on: http://git-master/r/830854
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index c08202ec..e7c55c97 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -95,7 +95,7 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
-	platform->bypass_smmu = 1;
+	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
 	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
-- 
cgit v1.2.2


From e390f6e95aaf88063d7356454cff4bf2b8bf286b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 6 Nov 2015 09:46:57 -0800
Subject: gpu: nvgpu: ZBC update without idle

Do ZBC updates without forcing engine idle first.

Bug 1698013

Change-Id: I188563dd60ba511b087e9b9bdacd7f9445efd7a4
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/829146
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c801a2b8..defda1c3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -210,14 +210,18 @@ static int gr_gp10b_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 	gr->zbc_col_tbl[index].format = color_val->format;
 	gr->zbc_col_tbl[index].ref_cnt++;
 
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_r_r(index), color_val->color_ds[0]);
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_g_r(index), color_val->color_ds[1]);
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_b_r(index), color_val->color_ds[2]);
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_color_a_r(index), color_val->color_ds[3]);
+	gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_r_r(index),
+			   color_val->color_ds[0]);
+	gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_g_r(index),
+			   color_val->color_ds[1]);
+	gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_b_r(index),
+			   color_val->color_ds[2]);
+	gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_color_a_r(index),
+			   color_val->color_ds[3]);
 	zbc_c = gk20a_readl(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3));
 	zbc_c &= ~(0x7f << ((index % 4) * 7));
 	zbc_c |= color_val->format << ((index % 4) * 7);
-	gk20a_writel(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
+	gk20a_writel_check(g, gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r() + (index & ~3), zbc_c);
 
 	return 0;
 }
-- 
cgit v1.2.2


From ac335e6fb54a9c283296f1dffe077b78d9cb436e Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 17 Nov 2015 12:13:03 -0800
Subject: gpu: nvgpu: gp10b: correct initial gpcclk rate

Set initial gpcclk rate to 1GHz.

Bug 200151332

Reviewed-on: http://git-master/r/834113
(cherry picked from commit 9ed69164da7afeec20c3a557885f74db4cbea9cb)

Change-Id: I85107eb5852b25977b30663f6ae173b271ecafeb
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/834322
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index e7c55c97..3476543d 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -34,7 +34,7 @@ static struct {
 	char *name;
 	unsigned long default_rate;
 } tegra_gp10b_clocks[] = {
-	{"gpu", 1900000000},
+	{"gpu", 1000000000},
 	{"gpu_sys", 204000000} };
 
 /*
-- 
cgit v1.2.2


From 4181fa718597c2731eb7ae50ca3e6d9705830ff3 Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Mon, 25 May 2015 14:51:56 +0300
Subject: gpu: nvgpu: User-space managed address space support (gp10b)

Tell gk20a_init_vm() that bar2 VM is kernel-managed.

Bug 200077571

Change-Id: I151c540a6dec76238e7959f745cfca280927f2d4
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/746803
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index d3297e31..d4a4e7f3 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -76,7 +76,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
 	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
 		mm->bar2.aperture_size - SZ_4K,
-		mm->bar2.aperture_size, false, "bar2");
+		mm->bar2.aperture_size, false, false, "bar2");
 
 	/* allocate instance mem for bar2 */
 	err = gk20a_alloc_inst_block(g, inst_block);
-- 
cgit v1.2.2


From 1146dbae18aeb872d55ebb00f850a603911c191c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 13 Oct 2015 12:22:12 -0700
Subject: gpu: nvgpu: gp10b: add support for freq scaling

Add support for gp10b freq scaling.

Bug 200147662

Reviewed-on: http://git-master/r/816962
(cherry picked from commit 62de7dba758e46ee80c896dcfcbccb0f8b979438)

Change-Id: I71ddfa394d490c002761d2a8bbb95090a4c0e799
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/834758
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile               |  1 +
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 94 ++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index f34d836d..0542fd67 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -2,6 +2,7 @@ GCOV_PROFILE := y
 
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/include
+ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/devfreq
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
 ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 3476543d..48665ddb 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -30,6 +30,9 @@
 #include "gk20a/gk20a.h"
 #include "platform_tegra.h"
 
+#define GP10B_MAX_SUPPORTED_FREQS 11
+unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
+
 static struct {
 	char *name;
 	unsigned long default_rate;
@@ -130,7 +133,19 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 
 static int gp10b_tegra_late_probe(struct platform_device *pdev)
 {
+	/* Make gk20a power domain a subdomain of host1x */
+	nvhost_register_client_domain(dev_to_genpd(&pdev->dev));
+
+	return 0;
+}
+
+static int gp10b_tegra_remove(struct platform_device *pdev)
+{
+	/* remove gk20a power subdomain from host1x */
+	nvhost_unregister_client_domain(dev_to_genpd(&pdev->dev));
+
 	return 0;
+
 }
 
 static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
@@ -206,6 +221,72 @@ static int gp10b_tegra_reset_deassert(struct platform_device *dev)
 	return ret;
 }
 
+static void gp10b_tegra_prescale(struct platform_device *pdev)
+{
+	struct gk20a *g = get_gk20a(pdev);
+	u32 avg = 0;
+
+	gk20a_dbg_fn("");
+
+	gk20a_pmu_load_norm(g, &avg);
+	/* TBD - Notify EDP about changed constrains */
+
+	gk20a_dbg_fn("done");
+}
+
+static void gp10b_tegra_postscale(struct platform_device *pdev,
+					unsigned long freq)
+{
+	/* TBD -  notify EMC about frequency change */
+	gk20a_dbg_fn("");
+}
+
+static unsigned long gp10b_get_clk_rate(struct platform_device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+	return clk_get_rate(platform->clk[0]);
+
+}
+
+static long gp10b_round_clk_rate(struct platform_device *dev,
+						unsigned long rate)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+	return clk_round_rate(platform->clk[0], rate);
+}
+
+static int gp10b_set_clk_rate(struct platform_device *dev, unsigned long rate)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+	return clk_set_rate(platform->clk[0], rate);
+}
+
+static int gp10b_clk_get_freqs(struct platform_device *pdev,
+				unsigned long **freqs, int *num_freqs)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	unsigned long min_rate, max_rate, freq_step, rate;
+	int i;
+
+	min_rate = clk_round_rate(platform->clk[0], 0);
+	max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
+	freq_step = (max_rate - min_rate)/(GP10B_MAX_SUPPORTED_FREQS - 1);
+	gk20a_dbg_info("min rate: %ld max rate: %ld freq step %ld\n",
+						min_rate, max_rate, freq_step);
+
+	for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; i++) {
+		rate = min_rate + i * freq_step;
+		gp10b_freq_table[i] = clk_round_rate(platform->clk[0], rate);
+	}
+	/* Fill freq table */
+	*freqs = gp10b_freq_table;
+	*num_freqs = GP10B_MAX_SUPPORTED_FREQS;
+	return 0;
+}
+
 struct gk20a_platform t18x_gpu_tegra_platform = {
 	.has_syncpoints = true,
 
@@ -227,6 +308,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.probe = gp10b_tegra_probe,
 	.late_probe = gp10b_tegra_late_probe,
+	.remove = gp10b_tegra_remove,
 
 	/* power management callbacks */
 	.suspend = gp10b_tegra_suspend,
@@ -243,6 +325,18 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.has_cde = true,
 
+	.clk_get_rate = gp10b_get_clk_rate,
+	.clk_round_rate = gp10b_round_clk_rate,
+	.clk_set_rate = gp10b_set_clk_rate,
+	.get_clk_freqs = gp10b_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.prescale = gp10b_tegra_prescale,
+	.postscale = gp10b_tegra_postscale,
+
+	.devfreq_governor = "nvhost_podgov",
+	.qos_id = PM_QOS_GPU_FREQ_MIN,
+
 	.secure_alloc = gk20a_tegra_secure_alloc,
 	.secure_page_alloc = gk20a_tegra_secure_page_alloc,
 
-- 
cgit v1.2.2


From b76acb0ef67d45fe775e915fed2648da03cfc424 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Tue, 24 Nov 2015 10:15:28 +0530
Subject: gpu: nvgpu: ELPG prod values update

Bug 200151348

Change-Id: I44851b69adfe9c6bf5d4c897730d6da7df9bedd8
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/836877
(cherry picked from commit 69de3f3c439f544fd5f9223f5663010f5ec80193)
Reviewed-on: http://git-master/r/837228
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 88 ++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 57accfb0..dc7539a8 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -48,42 +48,42 @@ static struct pg_init_sequence_list _pginitseq_gp10b[] = {
 		{0x0010e06c, 0x00000087} ,
 		{0x0010e06c, 0x00000088} ,
 		{0x0010e06c, 0x0000008D} ,
-		{0x0010e06c, 0x000000A00} ,
-		{0x0010e06c, 0x000000A01} ,
-		{0x0010e06c, 0x000000A02} ,
-		{0x0010e06c, 0x000000A03} ,
-		{0x0010e06c, 0x000000A04} ,
-		{0x0010e06c, 0x000000A05} ,
-		{0x0010e06c, 0x000000A06} ,
-		{0x0010e06c, 0x000000A07} ,
-		{0x0010e06c, 0x000000A08} ,
-		{0x0010e06c, 0x000000A09} ,
-		{0x0010e06c, 0x000000950} ,
-		{0x0010e06c, 0x000000951} ,
-		{0x0010e06c, 0x000000952} ,
-		{0x0010e06c, 0x000000953} ,
-		{0x0010e06c, 0x000000954} ,
-		{0x0010e06c, 0x000000955} ,
-		{0x0010e06c, 0x000000956} ,
-		{0x0010e06c, 0x000000957} ,
+		{0x0010e06c, 0x00000082} ,
+		{0x0010e06c, 0x00000083} ,
+		{0x0010e06c, 0x00000089} ,
+		{0x0010e06c, 0x0000008A} ,
+		{0x0010e06c, 0x000000A2} ,
+		{0x0010e06c, 0x00000097} ,
+		{0x0010e06c, 0x00000092} ,
+		{0x0010e06c, 0x00000099} ,
+		{0x0010e06c, 0x0000009B} ,
+		{0x0010e06c, 0x0000009D} ,
+		{0x0010e06c, 0x0000009F} ,
+		{0x0010e06c, 0x000000A1} ,
+		{0x0010e06c, 0x00000096} ,
+		{0x0010e06c, 0x00000091} ,
+		{0x0010e06c, 0x00000098} ,
+		{0x0010e06c, 0x0000009A} ,
+		{0x0010e06c, 0x0000009C} ,
+		{0x0010e06c, 0x0000009E} ,
 		{0x0010ab14, 0x00000000} ,
 		{0x0010e024, 0x00000000} ,
 		{0x0010e028, 0x00000000} ,
 		{0x0010e11c, 0x00000000} ,
-		{0x0010ab1c, 0x140B0B55} ,
-		{0x0010e020, 0x0E262655} ,
-		{0x0010e124, 0x25101055} ,
+		{0x0010ab1c, 0x140B0BFF} ,
+		{0x0010e020, 0x0E2626FF} ,
+		{0x0010e124, 0x251010FF} ,
 		{0x0010ab20, 0x89abcdef} ,
 		{0x0010ab24, 0x00000000} ,
 		{0x0010e02c, 0x89abcdef} ,
 		{0x0010e030, 0x00000000} ,
 		{0x0010e128, 0x89abcdef} ,
 		{0x0010e12c, 0x00000000} ,
-		{0x0010ab28, 0x75555555} ,
+		{0x0010ab28, 0x7FFFFFFF} ,
 		{0x0010ab2c, 0x70000000} ,
-		{0x0010e034, 0x75555555} ,
+		{0x0010e034, 0x7FFFFFFF} ,
 		{0x0010e038, 0x70000000} ,
-		{0x0010e130, 0x75555555} ,
+		{0x0010e130, 0x7FFFFFFF} ,
 		{0x0010e134, 0x70000000} ,
 		{0x0010ab30, 0x00000000} ,
 		{0x0010ab34, 0x00000001} ,
@@ -97,8 +97,8 @@ static struct pg_init_sequence_list _pginitseq_gp10b[] = {
 		{0x0010e05c, 0x00000000} ,
 		{0x0010e044, 0x00000000} ,
 		{0x0010a644, 0x0000868B} ,
-		{0x0010a648, 0x00000000 } ,
-		{0x0010a64c, 0x00829493 } ,
+		{0x0010a648, 0x00000000} ,
+		{0x0010a64c, 0x00829493} ,
 		{0x0010a650, 0x00000000} ,
 		{0x0010e000, 0} ,
 		{0x0010e068, 0x000000A3} ,
@@ -111,24 +111,24 @@ static struct pg_init_sequence_list _pginitseq_gp10b[] = {
 		{0x0010e068, 0x00000087} ,
 		{0x0010e068, 0x00000088} ,
 		{0x0010e068, 0x0000008D} ,
-		{0x0010e068, 0x000000A00} ,
-		{0x0010e068, 0x000000A01} ,
-		{0x0010e068, 0x000000A02} ,
-		{0x0010e068, 0x000000A03} ,
-		{0x0010e068, 0x000000A04} ,
-		{0x0010e068, 0x000000A05} ,
-		{0x0010e068, 0x000000A06} ,
-		{0x0010e068, 0x000000A07} ,
-		{0x0010e068, 0x000000A08} ,
-		{0x0010e068, 0x000000A09} ,
-		{0x0010e068, 0x000000950} ,
-		{0x0010e068, 0x000000951} ,
-		{0x0010e068, 0x000000952} ,
-		{0x0010e068, 0x000000953} ,
-		{0x0010e068, 0x000000954} ,
-		{0x0010e068, 0x000000955} ,
-		{0x0010e068, 0x000000956} ,
-		{0x0010e068, 0x000000957} ,
+		{0x0010e068, 0x00000082} ,
+		{0x0010e068, 0x00000083} ,
+		{0x0010e068, 0x00000089} ,
+		{0x0010e068, 0x0000008A} ,
+		{0x0010e068, 0x000000A2} ,
+		{0x0010e068, 0x00000097} ,
+		{0x0010e068, 0x00000092} ,
+		{0x0010e068, 0x00000099} ,
+		{0x0010e068, 0x0000009B} ,
+		{0x0010e068, 0x0000009D} ,
+		{0x0010e068, 0x0000009F} ,
+		{0x0010e068, 0x000000A1} ,
+		{0x0010e068, 0x00000096} ,
+		{0x0010e068, 0x00000091} ,
+		{0x0010e068, 0x00000098} ,
+		{0x0010e068, 0x0000009A} ,
+		{0x0010e068, 0x0000009C} ,
+		{0x0010e068, 0x0000009E} ,
 		{0x0010e000, 0} ,
 		{0x0010e004, 0x0000008E},
 };
-- 
cgit v1.2.2


From 1cde817120fe82cbb0b6cfc03e0c952bff8cf669 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 23 Nov 2015 11:19:00 -0800
Subject: gpu: nvgpu: t18x: make gp10b_freq_table static

Make gp10b_freq_table static to fix sparse warning

Bug 200088648

Change-Id: Ibaaabd145e37685e049ac3a49e2b276fb6545d0e
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/837421
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 48665ddb..6abb70c3 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -31,7 +31,7 @@
 #include "platform_tegra.h"
 
 #define GP10B_MAX_SUPPORTED_FREQS 11
-unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
+static unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
 
 static struct {
 	char *name;
-- 
cgit v1.2.2


From fd624a1f4ef5207ef5fe0b70b063c08e7678ec2e Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 6 Oct 2015 10:58:54 -0700
Subject: gpu: nvgpu: gp10b: Install gp10b access map

Bug 1692373

Change-Id: I63bb1f8a40fe5d2c7b61440c989b78e4cb3ece98
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/812351
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index defda1c3..9c83030f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1066,6 +1066,23 @@ static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 		tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
 }
 
+static void gr_gp10b_get_access_map(struct gk20a *g,
+				   u32 **whitelist, int *num_entries)
+{
+	static u32 wl_addr_gp10b[] = {
+		/* this list must be sorted (low to high) */
+		0x404468, /* gr_pri_mme_max_instructions       */
+		0x418800, /* gr_pri_gpcs_setup_debug           */
+		0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg      */
+		0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg     */
+		0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
+		0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl     */
+	};
+
+	*whitelist = wl_addr_gp10b;
+	*num_entries = ARRAY_SIZE(wl_addr_gp10b);
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1095,4 +1112,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.wait_empty = gr_gp10b_wait_empty;
 	gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
 	gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
+	gops->gr.get_access_map = gr_gp10b_get_access_map;
 }
-- 
cgit v1.2.2


From 108c0ac8bdeb6b27a9ab8756137e5f58af0da9d1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 6 Oct 2015 12:45:15 -0700
Subject: gpu: nvgpu: gp10b: Add tile caching registers

Add tile caching registers to access map.

Bug 1692373

Change-Id: Ic95fce02c564fa8d5556543a744c9828b542fb1f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/812352
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 9c83030f..45befc51 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1072,7 +1072,31 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
 	static u32 wl_addr_gp10b[] = {
 		/* this list must be sorted (low to high) */
 		0x404468, /* gr_pri_mme_max_instructions       */
+		0x418300, /* gr_pri_gpcs_rasterarb_line_class  */
 		0x418800, /* gr_pri_gpcs_setup_debug           */
+		0x418e00, /* gr_pri_gpcs_swdx_config           */
+		0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+		0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+		0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+		0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+		0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+		0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+		0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
 		0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg      */
 		0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg     */
 		0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
-- 
cgit v1.2.2


From d44b5ecc30eaf4c307d56ee6205f8ca0b0f737c4 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 30 Nov 2015 09:30:22 -0800
Subject: gpu: nvgpu: Recreate HW headers

Add gradual slowdown registers, and fix names for L2 flush registers.

Change-Id: If085c4febef494ae299d2147ca5201cd373bee0b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/839369
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h |  20 +++---
 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h   |   2 +-
 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h | 120 +++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
index f442991e..e2dff490 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_flush_gp10b.h
@@ -114,43 +114,43 @@ static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
 {
 	return 0x00000001;
 }
-static inline u32 flush_clean_comptags_r(void)
+static inline u32 flush_l2_clean_comptags_r(void)
 {
 	return 0x0007000c;
 }
-static inline u32 flush_clean_comptags_pending_v(u32 r)
+static inline u32 flush_l2_clean_comptags_pending_v(u32 r)
 {
 	return (r >> 0) & 0x1;
 }
-static inline u32 flush_clean_comptags_pending_empty_v(void)
+static inline u32 flush_l2_clean_comptags_pending_empty_v(void)
 {
 	return 0x00000000;
 }
-static inline u32 flush_clean_comptags_pending_empty_f(void)
+static inline u32 flush_l2_clean_comptags_pending_empty_f(void)
 {
 	return 0x0;
 }
-static inline u32 flush_clean_comptags_pending_busy_v(void)
+static inline u32 flush_l2_clean_comptags_pending_busy_v(void)
 {
 	return 0x00000001;
 }
-static inline u32 flush_clean_comptags_pending_busy_f(void)
+static inline u32 flush_l2_clean_comptags_pending_busy_f(void)
 {
 	return 0x1;
 }
-static inline u32 flush_clean_comptags_outstanding_v(u32 r)
+static inline u32 flush_l2_clean_comptags_outstanding_v(u32 r)
 {
 	return (r >> 1) & 0x1;
 }
-static inline u32 flush_clean_comptags_outstanding_false_v(void)
+static inline u32 flush_l2_clean_comptags_outstanding_false_v(void)
 {
 	return 0x00000000;
 }
-static inline u32 flush_clean_comptags_outstanding_false_f(void)
+static inline u32 flush_l2_clean_comptags_outstanding_false_f(void)
 {
 	return 0x0;
 }
-static inline u32 flush_clean_comptags_outstanding_true_v(void)
+static inline u32 flush_l2_clean_comptags_outstanding_true_v(void)
 {
 	return 0x00000001;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
index f66812d6..75bf59a3 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
diff --git a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
index aed75481..17b1fb7e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
@@ -170,6 +170,18 @@ static inline u32 therm_config1_r(void)
 {
 	return 0x00020050;
 }
+static inline u32 therm_config2_r(void)
+{
+	return 0x00020130;
+}
+static inline u32 therm_config2_slowdown_factor_extended_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 therm_config2_grad_enable_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
 static inline u32 therm_gate_ctrl_r(u32 i)
 {
 	return 0x00020200 + i*4;
@@ -278,4 +290,112 @@ static inline u32 therm_clk_slowdown_idle_factor_disabled_f(void)
 {
 	return 0x0;
 }
+static inline u32 therm_grad_stepping_table_r(u32 i)
+{
+	return 0x000202c8 + i*4;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f(void)
+{
+	return 0x1;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f(void)
+{
+	return 0x2;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f(void)
+{
+	return 0x6;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f(void)
+{
+	return 0xe;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor1_f(u32 v)
+{
+	return (v & 0x3f) << 6;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor1_m(void)
+{
+	return 0x3f << 6;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor2_f(u32 v)
+{
+	return (v & 0x3f) << 12;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor2_m(void)
+{
+	return 0x3f << 12;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor3_f(u32 v)
+{
+	return (v & 0x3f) << 18;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor3_m(void)
+{
+	return 0x3f << 18;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor4_f(u32 v)
+{
+	return (v & 0x3f) << 24;
+}
+static inline u32 therm_grad_stepping_table_slowdown_factor4_m(void)
+{
+	return 0x3f << 24;
+}
+static inline u32 therm_grad_stepping0_r(void)
+{
+	return 0x000202c0;
+}
+static inline u32 therm_grad_stepping0_feature_s(void)
+{
+	return 1;
+}
+static inline u32 therm_grad_stepping0_feature_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 therm_grad_stepping0_feature_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 therm_grad_stepping0_feature_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 therm_grad_stepping0_feature_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 therm_grad_stepping1_r(void)
+{
+	return 0x000202c4;
+}
+static inline u32 therm_grad_stepping1_pdiv_duration_f(u32 v)
+{
+	return (v & 0x1ffff) << 0;
+}
+static inline u32 therm_clk_timing_r(u32 i)
+{
+	return 0x000203c0 + i*4;
+}
+static inline u32 therm_clk_timing_grad_slowdown_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 therm_clk_timing_grad_slowdown_m(void)
+{
+	return 0x1 << 16;
+}
+static inline u32 therm_clk_timing_grad_slowdown_enabled_f(void)
+{
+	return 0x10000;
+}
 #endif
-- 
cgit v1.2.2


From 6430abceef37de5bd6dcbc3d4d33f9b90b6f08a7 Mon Sep 17 00:00:00 2001
From: David Li <davli@nvidia.com>
Date: Wed, 30 Sep 2015 23:20:44 +0000
Subject: gpu: nvgpu: gp10b: fix set_circular_buffer_size

It didn't set gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r
  causing a GPU MMU fault when used.

Bug 200141640
Bug 200141981
Bug 200141640

Change-Id: I8b9f71e480553ead2827ff1f1dde2ba2e6efe697
Signed-off-by: David Li <davli@nvidia.com>
Reviewed-on: http://git-master/r/807694
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 45befc51..04856e44 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -415,17 +415,26 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
-	u32 cb_size = data * 4;
+	u32 cb_size_steady = data * 4, cb_size;
 
 	gk20a_dbg_fn("");
 
-	if (cb_size > gr->attrib_cb_size)
-		cb_size = gr->attrib_cb_size;
+	if (cb_size_steady > gr->attrib_cb_size)
+		cb_size_steady = gr->attrib_cb_size;
+	if (gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
+		gk20a_readl(g,
+			gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
+		cb_size = cb_size_steady +
+			(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+			 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+	} else {
+		cb_size = cb_size_steady;
+	}
 
 	gk20a_writel(g, gr_ds_tga_constraintlogic_beta_r(),
 		(gk20a_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
 		 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
-		 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
+		 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
 		stride = proj_gpc_stride_v() * gpc_index;
@@ -446,12 +455,19 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 				stride +
 				proj_ppc_in_gpc_stride_v() * ppc_index, val);
 
+			gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index +
+				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
+				stride,
+				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
+					cb_size_steady));
+
 			val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
 						ppc_index + gpc_index));
 
 			val = set_field(val,
 				gr_gpcs_swdx_tc_beta_cb_size_v_m(),
-				gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
+				gr_gpcs_swdx_tc_beta_cb_size_v_f(
+					cb_size_steady *
 					gr->gpc_ppc_count[gpc_index]));
 
 			gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
-- 
cgit v1.2.2


From 97ba307f517c889a5dafd7b415e0df7ccdc68e4f Mon Sep 17 00:00:00 2001
From: David Li <davli@nvidia.com>
Date: Fri, 4 Sep 2015 18:28:10 -0700
Subject: gpu: nvgpu: fix setting gr_pd_ab_dist_cfg1_r()

gr_*__set_alpha_circular_buffer_size() left max_batches field of
  gr_pd_ab_dist_cfg1_r as 0 which results in too many alpha beta
  transitions and poor performance when tessellation or geometry
  shaders are used

Change-Id: Ic3673f45b60674b3527641a6fdda0cedc6861db5
Signed-off-by: David Li <davli@nvidia.com>
Reviewed-on: http://git-master/r/840079
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 04856e44..6bdb9a7c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -388,7 +388,8 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 		gr_pd_ab_dist_cfg1_max_output_granularity_v();
 
 	gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
-		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
+		gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+		gr_pd_ab_dist_cfg1_max_batches_init_f());
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
 		stride = proj_gpc_stride_v() * gpc_index;
-- 
cgit v1.2.2


From 36fa64cab4559e3c066b159303fcade7da9124c3 Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Mon, 14 Dec 2015 20:39:47 -0500
Subject: gpu: nvgpu: vgpu: update interface to free GR ctx

The server only releases ownership of the ctxsw buffer mappings
after the GR ctx has been released. Update the sequence to
account for this.

JIRA VFND-1117
Bug 1708163

Change-Id: I3aed015805b4ca51433e7d37ad32de2f8353999f
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/922817
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index c5c53b58..3023ef4b 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -20,16 +20,30 @@
 static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 				struct gr_ctx_desc *gr_ctx)
 {
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+	int err;
+
 	gk20a_dbg_fn("");
 
 	if (!gr_ctx || !gr_ctx->mem.gpu_va)
 		return;
 
+	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
+	msg.handle = platform->virt_handle;
+	p->handle = gr_ctx->virt_ctx;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	WARN_ON(err || msg.ret);
+
+	gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size, 0);
+
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
-	vgpu_gr_free_gr_ctx(g, vm, gr_ctx);
+
+	kfree(gr_ctx);
 }
 
 static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
-- 
cgit v1.2.2


From 03afa9b0608126e1652d7e489ab8e1e05547a6e1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 11 Jan 2016 10:38:25 -0800
Subject: gpu: nvgpu: gp10b: Refresh regops whitelist

Context & global whitelists are same, so delete second copy. Update
the list.

Bug 200164983

Change-Id: I440ce04316120b8128baeabc002c55436cf41d5b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/931178
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sandarbh Jain <sanjain@nvidia.com>
Tested-by: Sandarbh Jain <sanjain@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/regops_gp10b.c | 402 +++------------------------------
 1 file changed, 30 insertions(+), 372 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
index 0ab80b78..48f6161d 100644
--- a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
@@ -48,6 +48,7 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x00022450,   1},
 	{ 0x0002245c,   1},
 	{ 0x00070000,   5},
+	{ 0x000884e0,   1},
 	{ 0x0008e00c,   1},
 	{ 0x00100c18,   3},
 	{ 0x00100c84,   1},
@@ -78,53 +79,53 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x0017e4a0,   1},
 	{ 0x0017e550,   1},
 	{ 0x00180040,  41},
-	{ 0x001800ec,   9},
+	{ 0x001800ec,  10},
 	{ 0x00180240,  41},
-	{ 0x001802ec,   9},
+	{ 0x001802ec,  10},
 	{ 0x00180440,  41},
-	{ 0x001804ec,   9},
+	{ 0x001804ec,  10},
 	{ 0x00180640,  41},
-	{ 0x001806ec,   9},
+	{ 0x001806ec,  10},
 	{ 0x00180840,  41},
-	{ 0x001808ec,   9},
+	{ 0x001808ec,  10},
 	{ 0x00180a40,  41},
-	{ 0x00180aec,   9},
+	{ 0x00180aec,  10},
 	{ 0x00180c40,  41},
-	{ 0x00180cec,   9},
+	{ 0x00180cec,  10},
 	{ 0x00180e40,  41},
-	{ 0x00180eec,   9},
+	{ 0x00180eec,  10},
 	{ 0x001a0040,  41},
-	{ 0x001a00ec,   9},
+	{ 0x001a00ec,  10},
 	{ 0x001a0240,  41},
-	{ 0x001a02ec,   9},
+	{ 0x001a02ec,  10},
 	{ 0x001a0440,  41},
-	{ 0x001a04ec,   9},
+	{ 0x001a04ec,  10},
 	{ 0x001a0640,  41},
-	{ 0x001a06ec,   9},
+	{ 0x001a06ec,  10},
 	{ 0x001a0840,  41},
-	{ 0x001a08ec,   9},
+	{ 0x001a08ec,  10},
 	{ 0x001a0a40,  41},
-	{ 0x001a0aec,   9},
+	{ 0x001a0aec,  10},
 	{ 0x001a0c40,  41},
-	{ 0x001a0cec,   9},
+	{ 0x001a0cec,  10},
 	{ 0x001a0e40,  41},
-	{ 0x001a0eec,   9},
+	{ 0x001a0eec,  10},
 	{ 0x001b0040,  41},
-	{ 0x001b00ec,   9},
+	{ 0x001b00ec,  10},
 	{ 0x001b0240,  41},
-	{ 0x001b02ec,   9},
+	{ 0x001b02ec,  10},
 	{ 0x001b0440,  41},
-	{ 0x001b04ec,   9},
+	{ 0x001b04ec,  10},
 	{ 0x001b0640,  41},
-	{ 0x001b06ec,   9},
+	{ 0x001b06ec,  10},
 	{ 0x001b0840,  41},
-	{ 0x001b08ec,   9},
+	{ 0x001b08ec,  10},
 	{ 0x001b0a40,  41},
-	{ 0x001b0aec,   9},
+	{ 0x001b0aec,  10},
 	{ 0x001b0c40,  41},
-	{ 0x001b0cec,   9},
+	{ 0x001b0cec,  10},
 	{ 0x001b0e40,  41},
-	{ 0x001b0eec,   9},
+	{ 0x001b0eec,  10},
 	{ 0x001b4000,   1},
 	{ 0x001b4008,   1},
 	{ 0x001b4010,   3},
@@ -176,7 +177,9 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x0041040c,   1},
 	{ 0x00410510,   1},
 	{ 0x00410584,   1},
-	{ 0x00418384,   1},
+	{ 0x00418000,   1},
+	{ 0x00418008,   1},
+	{ 0x00418380,   2},
 	{ 0x00418400,   2},
 	{ 0x004184a0,   1},
 	{ 0x00418604,   1},
@@ -373,351 +376,6 @@ static const u32 gp10b_global_whitelist_ranges_count =
 
 /* context */
 
-static const struct regop_offset_range gp10b_context_whitelist_ranges[] = {
-	{ 0x000004f0,   1},
-	{ 0x00001a00,   3},
-	{ 0x00002800, 128},
-	{ 0x00009400,   1},
-	{ 0x00009410,   1},
-	{ 0x00009480,   1},
-	{ 0x00020200,  24},
-	{ 0x00021c00,   4},
-	{ 0x00021c14,   3},
-	{ 0x00021c24,   1},
-	{ 0x00021c2c,  69},
-	{ 0x00021d44,   1},
-	{ 0x00021d4c,   1},
-	{ 0x00021d54,   1},
-	{ 0x00021d5c,   1},
-	{ 0x00021d64,   2},
-	{ 0x00021d70,  16},
-	{ 0x00022430,   7},
-	{ 0x00022450,   1},
-	{ 0x0002245c,   1},
-	{ 0x00070000,   5},
-	{ 0x0008e00c,   1},
-	{ 0x00100c18,   3},
-	{ 0x00100c84,   1},
-	{ 0x0010a0a8,   1},
-	{ 0x0010a4f0,   1},
-	{ 0x0013cc14,   1},
-	{ 0x00140028,   1},
-	{ 0x00140280,   1},
-	{ 0x001402a0,   1},
-	{ 0x00140350,   1},
-	{ 0x00140480,   1},
-	{ 0x001404a0,   1},
-	{ 0x00140550,   1},
-	{ 0x00142028,   1},
-	{ 0x00142280,   1},
-	{ 0x001422a0,   1},
-	{ 0x00142350,   1},
-	{ 0x00142480,   1},
-	{ 0x001424a0,   1},
-	{ 0x00142550,   1},
-	{ 0x0017e280,   1},
-	{ 0x0017e294,   1},
-	{ 0x0017e29c,   2},
-	{ 0x0017e2ac,   1},
-	{ 0x0017e350,   1},
-	{ 0x0017e39c,   1},
-	{ 0x0017e480,   1},
-	{ 0x0017e4a0,   1},
-	{ 0x0017e550,   1},
-	{ 0x00180040,  41},
-	{ 0x001800ec,   9},
-	{ 0x00180240,  41},
-	{ 0x001802ec,   9},
-	{ 0x00180440,  41},
-	{ 0x001804ec,   9},
-	{ 0x00180640,  41},
-	{ 0x001806ec,   9},
-	{ 0x00180840,  41},
-	{ 0x001808ec,   9},
-	{ 0x00180a40,  41},
-	{ 0x00180aec,   9},
-	{ 0x00180c40,  41},
-	{ 0x00180cec,   9},
-	{ 0x00180e40,  41},
-	{ 0x00180eec,   9},
-	{ 0x001a0040,  41},
-	{ 0x001a00ec,   9},
-	{ 0x001a0240,  41},
-	{ 0x001a02ec,   9},
-	{ 0x001a0440,  41},
-	{ 0x001a04ec,   9},
-	{ 0x001a0640,  41},
-	{ 0x001a06ec,   9},
-	{ 0x001a0840,  41},
-	{ 0x001a08ec,   9},
-	{ 0x001a0a40,  41},
-	{ 0x001a0aec,   9},
-	{ 0x001a0c40,  41},
-	{ 0x001a0cec,   9},
-	{ 0x001a0e40,  41},
-	{ 0x001a0eec,   9},
-	{ 0x001b0040,  41},
-	{ 0x001b00ec,   9},
-	{ 0x001b0240,  41},
-	{ 0x001b02ec,   9},
-	{ 0x001b0440,  41},
-	{ 0x001b04ec,   9},
-	{ 0x001b0640,  41},
-	{ 0x001b06ec,   9},
-	{ 0x001b0840,  41},
-	{ 0x001b08ec,   9},
-	{ 0x001b0a40,  41},
-	{ 0x001b0aec,   9},
-	{ 0x001b0c40,  41},
-	{ 0x001b0cec,   9},
-	{ 0x001b0e40,  41},
-	{ 0x001b0eec,   9},
-	{ 0x001b4000,   1},
-	{ 0x001b4008,   1},
-	{ 0x001b4010,   3},
-	{ 0x001b4020,   3},
-	{ 0x001b4030,   3},
-	{ 0x001b4040,   3},
-	{ 0x001b4050,   3},
-	{ 0x001b4060,   4},
-	{ 0x001b4074,   7},
-	{ 0x001b4094,   3},
-	{ 0x001b40a4,   1},
-	{ 0x001b4100,   6},
-	{ 0x001b4124,   1},
-	{ 0x001b8000,   1},
-	{ 0x001b8008,   1},
-	{ 0x001b8010,   3},
-	{ 0x001bc000,   1},
-	{ 0x001bc008,   1},
-	{ 0x001bc010,   3},
-	{ 0x001be000,   1},
-	{ 0x001be008,   1},
-	{ 0x001be010,   3},
-	{ 0x00400500,   1},
-	{ 0x0040415c,   1},
-	{ 0x00404468,   1},
-	{ 0x00404498,   1},
-	{ 0x00405800,   1},
-	{ 0x00405840,   2},
-	{ 0x00405850,   1},
-	{ 0x00405908,   1},
-	{ 0x00405b40,   1},
-	{ 0x00405b50,   1},
-	{ 0x00406024,   5},
-	{ 0x00407010,   1},
-	{ 0x00407808,   1},
-	{ 0x0040803c,   1},
-	{ 0x00408804,   1},
-	{ 0x0040880c,   1},
-	{ 0x00408900,   2},
-	{ 0x00408910,   1},
-	{ 0x00408944,   1},
-	{ 0x00408984,   1},
-	{ 0x004090a8,   1},
-	{ 0x004098a0,   1},
-	{ 0x00409b00,   1},
-	{ 0x0041000c,   1},
-	{ 0x00410110,   1},
-	{ 0x00410184,   1},
-	{ 0x0041040c,   1},
-	{ 0x00410510,   1},
-	{ 0x00410584,   1},
-	{ 0x00418384,   1},
-	{ 0x00418400,   2},
-	{ 0x004184a0,   1},
-	{ 0x00418604,   1},
-	{ 0x00418680,   1},
-	{ 0x00418704,   1},
-	{ 0x00418714,   1},
-	{ 0x00418800,   1},
-	{ 0x0041881c,   1},
-	{ 0x00418830,   1},
-	{ 0x00418884,   1},
-	{ 0x004188b0,   1},
-	{ 0x004188c8,   3},
-	{ 0x004188fc,   1},
-	{ 0x00418b04,   1},
-	{ 0x00418c04,   1},
-	{ 0x00418c10,   8},
-	{ 0x00418c88,   1},
-	{ 0x00418d00,   1},
-	{ 0x00418e00,   1},
-	{ 0x00418e08,   1},
-	{ 0x00418e34,   1},
-	{ 0x00418e40,   4},
-	{ 0x00418e58,  16},
-	{ 0x00418f08,   1},
-	{ 0x00419000,   1},
-	{ 0x0041900c,   1},
-	{ 0x00419018,   1},
-	{ 0x00419854,   1},
-	{ 0x00419864,   1},
-	{ 0x00419a04,   2},
-	{ 0x00419a14,   1},
-	{ 0x00419ab0,   1},
-	{ 0x00419ab8,   3},
-	{ 0x00419c0c,   1},
-	{ 0x00419c8c,   2},
-	{ 0x00419d00,   1},
-	{ 0x00419d08,   2},
-	{ 0x00419e00,  11},
-	{ 0x00419e34,   2},
-	{ 0x00419e44,  11},
-	{ 0x00419e74,  10},
-	{ 0x00419ea4,   1},
-	{ 0x00419eac,   2},
-	{ 0x00419ee8,   1},
-	{ 0x00419ef0,  28},
-	{ 0x00419f70,   1},
-	{ 0x00419f78,   2},
-	{ 0x00419f98,   2},
-	{ 0x00419fdc,   1},
-	{ 0x0041a02c,   2},
-	{ 0x0041a0a0,   1},
-	{ 0x0041a0a8,   1},
-	{ 0x0041a890,   2},
-	{ 0x0041a8a0,   3},
-	{ 0x0041a8b0,   2},
-	{ 0x0041b014,   1},
-	{ 0x0041b0cc,   1},
-	{ 0x0041b1dc,   1},
-	{ 0x0041be0c,   3},
-	{ 0x0041becc,   1},
-	{ 0x0041bfdc,   1},
-	{ 0x0041c054,   1},
-	{ 0x0041c2b0,   1},
-	{ 0x0041c2b8,   3},
-	{ 0x0041c40c,   1},
-	{ 0x0041c48c,   2},
-	{ 0x0041c500,   1},
-	{ 0x0041c508,   2},
-	{ 0x0041c600,  11},
-	{ 0x0041c634,   2},
-	{ 0x0041c644,  11},
-	{ 0x0041c674,  10},
-	{ 0x0041c6a4,   1},
-	{ 0x0041c6ac,   2},
-	{ 0x0041c6e8,   1},
-	{ 0x0041c6f0,  28},
-	{ 0x0041c770,   1},
-	{ 0x0041c778,   2},
-	{ 0x0041c798,   2},
-	{ 0x0041c7dc,   1},
-	{ 0x0041c854,   1},
-	{ 0x0041cab0,   1},
-	{ 0x0041cab8,   3},
-	{ 0x0041cc0c,   1},
-	{ 0x0041cc8c,   2},
-	{ 0x0041cd00,   1},
-	{ 0x0041cd08,   2},
-	{ 0x0041ce00,  11},
-	{ 0x0041ce34,   2},
-	{ 0x0041ce44,  11},
-	{ 0x0041ce74,  10},
-	{ 0x0041cea4,   1},
-	{ 0x0041ceac,   2},
-	{ 0x0041cee8,   1},
-	{ 0x0041cef0,  28},
-	{ 0x0041cf70,   1},
-	{ 0x0041cf78,   2},
-	{ 0x0041cf98,   2},
-	{ 0x0041cfdc,   1},
-	{ 0x00500384,   1},
-	{ 0x005004a0,   1},
-	{ 0x00500604,   1},
-	{ 0x00500680,   1},
-	{ 0x00500714,   1},
-	{ 0x0050081c,   1},
-	{ 0x00500884,   1},
-	{ 0x005008b0,   1},
-	{ 0x005008c8,   3},
-	{ 0x005008fc,   1},
-	{ 0x00500b04,   1},
-	{ 0x00500c04,   1},
-	{ 0x00500c10,   8},
-	{ 0x00500c88,   1},
-	{ 0x00500d00,   1},
-	{ 0x00500e08,   1},
-	{ 0x00500f08,   1},
-	{ 0x00501000,   1},
-	{ 0x0050100c,   1},
-	{ 0x00501018,   1},
-	{ 0x00501854,   1},
-	{ 0x00501ab0,   1},
-	{ 0x00501ab8,   3},
-	{ 0x00501c0c,   1},
-	{ 0x00501c8c,   2},
-	{ 0x00501d00,   1},
-	{ 0x00501d08,   2},
-	{ 0x00501e00,  11},
-	{ 0x00501e34,   2},
-	{ 0x00501e44,  11},
-	{ 0x00501e74,  10},
-	{ 0x00501ea4,   1},
-	{ 0x00501eac,   2},
-	{ 0x00501ee8,   1},
-	{ 0x00501ef0,  28},
-	{ 0x00501f70,   1},
-	{ 0x00501f78,   2},
-	{ 0x00501f98,   2},
-	{ 0x00501fdc,   1},
-	{ 0x0050202c,   2},
-	{ 0x005020a0,   1},
-	{ 0x005020a8,   1},
-	{ 0x00502890,   2},
-	{ 0x005028a0,   3},
-	{ 0x005028b0,   2},
-	{ 0x00503014,   1},
-	{ 0x005030cc,   1},
-	{ 0x005031dc,   1},
-	{ 0x00503e14,   1},
-	{ 0x00503ecc,   1},
-	{ 0x00503fdc,   1},
-	{ 0x00504054,   1},
-	{ 0x005042b0,   1},
-	{ 0x005042b8,   3},
-	{ 0x0050440c,   1},
-	{ 0x0050448c,   2},
-	{ 0x00504500,   1},
-	{ 0x00504508,   2},
-	{ 0x00504600,  11},
-	{ 0x00504634,   2},
-	{ 0x00504644,  11},
-	{ 0x00504674,  10},
-	{ 0x005046a4,   1},
-	{ 0x005046ac,   2},
-	{ 0x005046e8,   1},
-	{ 0x005046f0,  28},
-	{ 0x00504770,   1},
-	{ 0x00504778,   2},
-	{ 0x00504798,   2},
-	{ 0x005047dc,   1},
-	{ 0x00504854,   1},
-	{ 0x00504ab0,   1},
-	{ 0x00504ab8,   3},
-	{ 0x00504c0c,   1},
-	{ 0x00504c8c,   2},
-	{ 0x00504d00,   1},
-	{ 0x00504d08,   2},
-	{ 0x00504e00,  11},
-	{ 0x00504e34,   2},
-	{ 0x00504e44,  11},
-	{ 0x00504e74,  10},
-	{ 0x00504ea4,   1},
-	{ 0x00504eac,   2},
-	{ 0x00504ee8,   1},
-	{ 0x00504ef0,  28},
-	{ 0x00504f70,   1},
-	{ 0x00504f78,   2},
-	{ 0x00504f98,   2},
-	{ 0x00504fdc,   1},
-	{ 0x00900100,   1},
-	{ 0x009a0100,   1},
-};
-static const u32 gp10b_context_whitelist_ranges_count =
-	ARRAY_SIZE(gp10b_context_whitelist_ranges);
-
 /* runcontrol */
 static const u32 gp10b_runcontrol_whitelist[] = {
 };
@@ -753,12 +411,12 @@ static int gp10b_get_global_whitelist_ranges_count(void)
 
 static const struct regop_offset_range *gp10b_get_context_whitelist_ranges(void)
 {
-	return gp10b_context_whitelist_ranges;
+	return gp10b_global_whitelist_ranges;
 }
 
 static int gp10b_get_context_whitelist_ranges_count(void)
 {
-	return gp10b_context_whitelist_ranges_count;
+	return gp10b_global_whitelist_ranges_count;
 }
 
 static const u32 *gp10b_get_runcontrol_whitelist(void)
-- 
cgit v1.2.2


From 5dcbe39a71ccd7c09c128b92d612036e57293a71 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Mon, 4 Jan 2016 16:27:33 -0800
Subject: gpu: nvgpu: enable semaphore acquire timeout for gp10b

It'll detect dead semaphore acquire. The worst case is when
ACQUIRE_SWITCH is disabled, semaphore acquire will poll and
consume full gpu timeslicees.

The timeout value is set to half of channel WDT.

Bug 1636800

Change-Id: Idbd4bfa52981e8a849b62a168e3a6828330112f5
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/928830
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c     |  8 ++------
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h | 22 +++++++++++++++++++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 8c53978b..ade6ff0a 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B fifo
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -122,11 +122,7 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 	gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
-		pbdma_acquire_retry_man_2_f() |
-		pbdma_acquire_retry_exp_2_f() |
-		pbdma_acquire_timeout_exp_max_f() |
-		pbdma_acquire_timeout_man_max_f() |
-		pbdma_acquire_timeout_en_disable_f());
+		channel_gk20a_pbdma_acquire_val(c));
 
 	gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
 		pbdma_runlist_timeslice_timeout_128_f() |
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index d3f97a44..f66fa934 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,14 +262,34 @@ static inline u32 pbdma_acquire_retry_exp_2_f(void)
 {
 	return 0x100;
 }
+static inline u32 pbdma_acquire_timeout_exp_f(u32 v)
+{
+	return (v & 0xf) << 11;
+}
+static inline u32 pbdma_acquire_timeout_exp_max_v(void)
+{
+	return 0x0000000f;
+}
 static inline u32 pbdma_acquire_timeout_exp_max_f(void)
 {
 	return 0x7800;
 }
+static inline u32 pbdma_acquire_timeout_man_f(u32 v)
+{
+	return (v & 0xffff) << 15;
+}
+static inline u32 pbdma_acquire_timeout_man_max_v(void)
+{
+	return 0x0000ffff;
+}
 static inline u32 pbdma_acquire_timeout_man_max_f(void)
 {
 	return 0x7fff8000;
 }
+static inline u32 pbdma_acquire_timeout_en_enable_f(void)
+{
+	return 0x80000000;
+}
 static inline u32 pbdma_acquire_timeout_en_disable_f(void)
 {
 	return 0x0;
-- 
cgit v1.2.2


From 1ec6d2b6d6e746d93b430cb035d3702c5e1df4c4 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 30 Nov 2015 12:51:13 +0200
Subject: gpu: nvgpu: bitmap allocator for comptags

Restore comptags to be bitmap-allocated, like they were before we had
the buddy allocator.

Bug 200145635

Change-Id: I681493871096f437014b7eca1182fefbaf7f6a74
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/839240
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 91a4964c..47992988 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -18,7 +18,6 @@
 #include <dt-bindings/memory/tegra-swgroup.h>
 
 #include "gk20a/gk20a.h"
-#include "gk20a/gk20a_allocator.h"
 #include "gm20b/ltc_gm20b.h"
 #include "hw_proj_gp10b.h"
 #include "hw_mc_gp10b.h"
@@ -110,8 +109,9 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	if (err)
 		return err;
 
-	__gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
-			       1, max_comptag_lines - 1, 1, 10, 0);
+	err = gk20a_comptag_allocator_init(&gr->comp_tags, max_comptag_lines);
+	if (err)
+		return err;
 
 	gr->comptags_per_cacheline = comptags_per_cacheline;
 	gr->slices_per_ltc = slices_per_ltc;
-- 
cgit v1.2.2


From b8db86a6b63231c488dc0ebca14c61e4c54a3c23 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 15 Jan 2016 12:33:16 -0800
Subject: gpu: nvgpu: gp10b: enable gradual slowdown

Enable gradual slowdown for gp10b and also correct
thermal slowdown factors with extended mode.

Bug 1719974

Change-Id: I31a5d7df71c98135273a980c49b70bc76fac0b40
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/933279
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c | 53 ++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
index 471edb87..7bf0c0e5 100644
--- a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -3,7 +3,7 @@
  *
  * GP10B Therm
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,8 @@
 
 static int gp10b_init_therm_setup_hw(struct gk20a *g)
 {
+	u32 v;
+
 	gk20a_dbg_fn("");
 
 	/* program NV_THERM registers */
@@ -27,14 +29,51 @@ static int gp10b_init_therm_setup_hw(struct gk20a *g)
 					therm_use_a_ext_therm_1_enable_f()  |
 					therm_use_a_ext_therm_2_enable_f());
 	gk20a_writel(g, therm_evt_ext_therm_0_r(),
-		therm_evt_ext_therm_0_slow_factor_f(
-			therm_evt_ext_therm_0_slow_factor_init_v()));
+		therm_evt_ext_therm_0_slow_factor_f(0x2));
 	gk20a_writel(g, therm_evt_ext_therm_1_r(),
-		therm_evt_ext_therm_1_slow_factor_f(
-			therm_evt_ext_therm_1_slow_factor_init_v()));
+		therm_evt_ext_therm_1_slow_factor_f(0x6));
 	gk20a_writel(g, therm_evt_ext_therm_2_r(),
-		therm_evt_ext_therm_2_slow_factor_f(
-			therm_evt_ext_therm_2_slow_factor_init_v()));
+		therm_evt_ext_therm_2_slow_factor_f(0xe));
+
+	gk20a_writel(g, therm_grad_stepping_table_r(0),
+		therm_grad_stepping_table_slowdown_factor0_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) |
+		therm_grad_stepping_table_slowdown_factor1_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) |
+		therm_grad_stepping_table_slowdown_factor2_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) |
+		therm_grad_stepping_table_slowdown_factor3_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+		therm_grad_stepping_table_slowdown_factor4_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
+
+	gk20a_writel(g, therm_grad_stepping_table_r(1),
+		therm_grad_stepping_table_slowdown_factor0_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+		therm_grad_stepping_table_slowdown_factor1_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+		therm_grad_stepping_table_slowdown_factor2_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+		therm_grad_stepping_table_slowdown_factor3_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+		therm_grad_stepping_table_slowdown_factor4_f(
+		therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
+
+	v = gk20a_readl(g, therm_clk_timing_r(0));
+	v |= therm_clk_timing_grad_slowdown_enabled_f();
+	gk20a_writel(g, therm_clk_timing_r(0), v);
+
+	v = gk20a_readl(g, therm_config2_r());
+	v |= therm_config2_grad_enable_f(1);
+	v |= therm_config2_slowdown_factor_extended_f(1);
+	gk20a_writel(g, therm_config2_r(), v);
+
+	gk20a_writel(g, therm_grad_stepping1_r(),
+			therm_grad_stepping1_pdiv_duration_f(32));
+
+	v = gk20a_readl(g, therm_grad_stepping0_r());
+	v |= therm_grad_stepping0_feature_enable_f();
+	gk20a_writel(g, therm_grad_stepping0_r(), v);
 
 	return 0;
 }
-- 
cgit v1.2.2


From d730381f938e67313968c76b34fec0c6f79f0532 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 26 Jan 2016 17:53:12 -0800
Subject: gpu: nvgpu: gp10b: add delay cycles before engine gating

For copy engine, add 16 clock cycle delay
before engine clock gating.

Bug 1717152

Change-Id: Ife92299c052f44000bc0d900f0129a2eab13f3b5
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/998408
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h | 10 +++++++++-
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c    | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
index 17b1fb7e..8a587b7c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_therm_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -246,6 +246,14 @@ static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
 {
 	return 0x7 << 13;
 }
+static inline u32 therm_gate_ctrl_eng_delay_before_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 therm_gate_ctrl_eng_delay_before_m(void)
+{
+	return 0xf << 16;
+}
 static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
 {
 	return (v & 0xf) << 20;
diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
index 7bf0c0e5..687593b0 100644
--- a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -78,8 +78,22 @@ static int gp10b_init_therm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
+static int gp10b_update_therm_gate_ctrl(struct gk20a *g)
+{
+	u32 gate_ctrl;
+
+	gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(ENGINE_CE2_GK20A));
+	gate_ctrl = set_field(gate_ctrl,
+		therm_gate_ctrl_eng_delay_before_m(),
+		therm_gate_ctrl_eng_delay_before_f(4));
+	gk20a_writel(g, therm_gate_ctrl_r(ENGINE_CE2_GK20A), gate_ctrl);
+
+	return 0;
+}
+
 void gp10b_init_therm_ops(struct gpu_ops *gops)
 {
 	gops->therm.init_therm_setup_hw = gp10b_init_therm_setup_hw;
+	gops->therm.update_therm_gate_ctrl = gp10b_update_therm_gate_ctrl;
 
 }
-- 
cgit v1.2.2


From acc62a236fa631c8bd75ac9c6234fc9ae4187f38 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 8 Oct 2015 09:43:26 -0700
Subject: gpu: nvgpu: gp10b: enable power gating

Enable engine level power gating(elpg)

Bug 200144583

Change-Id: I66f3be841625c2c9e07cafbf19af8f1dbdbfd390
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/818637
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 6abb70c3..8bddff3d 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -3,7 +3,7 @@
  *
  * GK20A Tegra Platform Interface
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -296,7 +296,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	/* power management configuration */
 	.can_railgate           = false,
-	.enable_elpg            = false,
+	.enable_elpg            = true,
 	.enable_blcg		= true,
 	.enable_slcg		= true,
 	.enable_elcg		= true,
-- 
cgit v1.2.2


From 4c5bc9c93b86d9de022d6baff343217f1d047a62 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 8 Dec 2015 15:43:39 -0800
Subject: gpu: nvgpu: gp10b: clean-up pmu init operations

Removed unwanted initlization of function pointer.

Bug 200157852

Change-Id: I3b44ccce366f1b72c3ff769a7b9ab350bb2c0066
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/843218
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index dc7539a8..00701a50 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B PMU
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
 *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -332,8 +332,6 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 		gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
 	} else {
 		gk20a_init_pmu_ops(gops);
-		gops->pmu.pmu_setup_hw_and_bootstrap =
-			gm20b_init_nspmu_setup_hw1;
 		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 		gops->pmu.pmu_setup_hw_and_bootstrap = gp10b_init_pmu_setup_hw1;
-- 
cgit v1.2.2


From f17e0d822b47465cca23afa2054bfa1267b52b95 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Thu, 18 Jun 2015 16:31:50 -0700
Subject: gpu: nvgpu: gp10b: add ECC support

Add ECC exception handling support for SM, TEX, and LTC.

Bug 1635727
Bug 1637486

Change-Id: I8862ead5784f48742355432ec07c71a82b1b6735
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/935362
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c     | 102 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h  | 110 ++++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h |  16 +++++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c    |  23 +++++++
 4 files changed, 250 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6bdb9a7c..f8c31bd3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -55,6 +55,106 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
+static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
+			bool *post_event, struct channel_gk20a *fault_ch)
+{
+	int ret = 0;
+	u32 offset = proj_gpc_stride_v() * gpc +
+			proj_tpc_in_gpc_stride_v() * tpc;
+	u32 lrf_ecc_status, shm_ecc_status;
+
+	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, NULL);
+
+	/* Check for LRF ECC errors. */
+        lrf_ecc_status = gk20a_readl(g,
+			gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
+	if ( (lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) {
+
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Single bit error detected in SM LRF!");
+	}
+	if ( (lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) ||
+		(lrf_ecc_status &
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) {
+
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Double bit error detected in SM LRF!");
+	}
+	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
+			lrf_ecc_status);
+
+	/* Check for SHM ECC errors. */
+        shm_ecc_status = gk20a_readl(g,
+			gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
+	if ((shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
+		(shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
+		(shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
+		(shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
+
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Single bit error detected in SM SHM!");
+	}
+	if ( (shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
+		(shm_ecc_status &
+		gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
+
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Double bit error detected in SM SHM!");
+	}
+	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
+			shm_ecc_status);
+
+
+	return ret;
+}
+
+static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
+		bool *post_event)
+{
+	int ret = 0;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+	u32 esr;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+	esr = gk20a_readl(g,
+			 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
+	gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
+
+	if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Single bit error detected in TEX!");
+	}
+	if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
+			"Double bit error detected in TEX!");
+	}
+
+	gk20a_writel(g,
+		     gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
+		     esr);
+
+	return ret;
+}
+
 static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 			struct channel_gk20a *c, bool patch)
 {
@@ -1154,4 +1254,6 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
 	gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
 	gops->gr.get_access_map = gr_gp10b_get_access_map;
+	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
+	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 347e530d..9569bb9c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -466,6 +466,70 @@ static inline u32 gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r(void)
 {
 	return 0x00504238;
 }
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(void)
+{
+	return 0x005046b8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r(void)
+{
+	return 0x005044a0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f(void)
+{
+	return 0x200;
+}
 static inline u32 gr_pri_be0_crop_status1_r(void)
 {
 	return 0x00410134;
@@ -3158,6 +3222,14 @@ static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(vo
 {
 	return 0x10;
 }
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_sec_error_report_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_ded_error_report_f(void)
+{
+	return 0x40000000;
+}
 static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
 {
 	return 0x20;
@@ -3174,6 +3246,10 @@ static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
 {
 	return 0x2;
 }
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f(void)
+{
+	return 0x1;
+}
 static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
 {
 	return 0x0050450c;
@@ -3210,6 +3286,14 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
 {
 	return 0x00504508;
 }
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
 {
 	return (r >> 1) & 0x1;
@@ -3322,6 +3406,14 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
 {
 	return 0x10;
 }
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_sec_error_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_ded_error_pending_f(void)
+{
+	return 0x40000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
 {
 	return 0x20;
@@ -3330,6 +3422,22 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(
 {
 	return 0x40;
 }
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
+{
+	return 0x00504224;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
+{
+	return 0x100;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
 {
 	return 0x00504648;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index ea96a9aa..302c2243 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -286,6 +286,14 @@ static inline u32 ltc_ltcs_ltss_intr_r(void)
 {
 	return 0x0017e20c;
 }
+static inline u32 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f(void)
+{
+	return 0x200;
+}
 static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void)
 {
 	return 0x1 << 20;
@@ -294,6 +302,14 @@ static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(void)
 {
 	return 0x1 << 30;
 }
+static inline u32 ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f(void)
+{
+	return 0x2000000;
+}
 static inline u32 ltc_ltc0_lts0_intr_r(void)
 {
 	return 0x0014040c;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 47992988..d0be86a4 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -136,6 +136,20 @@ static void gp10b_ltc_isr(struct gk20a *g)
 			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
 					   proj_ltc_stride_v() * ltc +
 					   proj_lts_stride_v() * slice);
+
+			/* Detect and handle ECC errors */
+			if (ltc_intr &
+				ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
+				gk20a_err(dev_from_gk20a(g),
+					"Single bit error detected in GPU L2!");
+				g->ops.mm.l2_flush(g, true);
+			}
+			if (ltc_intr &
+				ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
+				gk20a_err(dev_from_gk20a(g),
+					"Double bit error detected in GPU L2!");
+			}
+
 			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
 				  ltc, slice, ltc_intr);
 			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
@@ -148,10 +162,19 @@ static void gp10b_ltc_isr(struct gk20a *g)
 
 static void gp10b_ltc_init_fs_state(struct gk20a *g)
 {
+	u32 ltc_intr;
+
 	gm20b_ltc_init_fs_state(g);
 
 	gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(),
 			ltc_ltca_g_axi_pctrl_user_sid_f(TEGRA_SID_GPUB));
+
+	/* Enable ECC interrupts */
+	ltc_intr = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
+	ltc_intr |= ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f() |
+			ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f();
+	gk20a_writel(g, ltc_ltcs_ltss_intr_r(),
+			ltc_intr);
 }
 
 void gp10b_init_ltc(struct gpu_ops *gops)
-- 
cgit v1.2.2


From 095bd5e59d896ebab12af25ac05aa4071257ecb1 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 28 Dec 2015 16:21:58 +0530
Subject: gpu: nvgpu: mask hww_warp_esr for gp10b

Add API gp10b_mask_hww_warp_esr() to mask
hww_warp_esr appropriately on gp10b

Bug 200156699

Change-Id: I451b5e949bd4e6d286e5d0c7cd7616e6cfaf3ea9
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/927129
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index f8c31bd3..a13b9a2c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1224,6 +1224,16 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
 	*num_entries = ARRAY_SIZE(wl_addr_gp10b);
 }
 
+static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
+{
+	if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
+		hww_warp_esr = set_field(hww_warp_esr,
+			gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(),
+			gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f());
+
+	return hww_warp_esr;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1256,4 +1266,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_access_map = gr_gp10b_get_access_map;
 	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
 	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
+	gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
 }
-- 
cgit v1.2.2


From de47308b2c2ef2d24951a7e1c4ece9964417c167 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 18 Dec 2015 12:35:04 +0530
Subject: gpu: nvgpu: add CILP support for gp10b

Add CILP support for gp10b by defining below function
pointers (with detailed explanation)

pre_process_sm_exception()
- for CILP enabled channels, get the mask of errors
- if we need to broadcast the stop_trigger, suspend all SMs
- otherwise suspend only current SM
- clear hww_global_esr values in h/w
- gr_gp10b_set_cilp_preempt_pending()
  - get ctx_id
  - using sideband method, program FECS to generate
    interrupt on next ctxsw
  - disable and preempt the channel/TSG
  - set cilp_preempt_pending = true
- clear single step mode
- resume current SM

handle_fecs_error()
- we get ctxsw_intr1 upon next ctxsw
- clear this interrupt
- get handle of channel on which we first
  triggered SM exception
- gr_gp10b_clear_cilp_preempt_pending()
  - set cilp_preempt_pending = false
- send events to channel and debug session fd

Bug 200156699

Change-Id: Ia765db47e68fb968fada6409609af505c079df53
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/925897
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c            | 314 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h            |   5 +
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h |   6 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         | 100 ++++++++
 4 files changed, 424 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a13b9a2c..91adf20c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -19,6 +19,7 @@
 
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
 
 #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
 #include "gp10b/gr_gp10b.h"
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
+	(*gr_ctx)->t18x.ctx_id_valid = false;
+
 	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
 		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
 
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g,
 	*num_entries = ARRAY_SIZE(wl_addr_gp10b);
 }
 
+static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+	int ret = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	ret = gk20a_disable_channel_tsg(g, fault_ch);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to disable channel/TSG!\n");
+		return ret;
+	}
+
+	ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to restart runlist 0!");
+		return ret;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist");
+
+	if (gk20a_is_channel_marked_as_tsg(fault_ch))
+		gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true);
+	else
+		gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false);
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg");
+
+	return ret;
+}
+
+static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch)
+{
+	int ret;
+	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	if (!gr_ctx)
+		return -EINVAL;
+
+	if (gr_ctx->t18x.cilp_preempt_pending) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP is already pending for chid %d",
+				fault_ch->hw_chid);
+		return 0;
+	}
+
+	/* get ctx_id from the ucode image */
+	if (!gr_ctx->t18x.ctx_id_valid) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: looking up ctx id");
+		ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id);
+		if (ret) {
+			gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n");
+			return ret;
+		}
+		gr_ctx->t18x.ctx_id_valid = true;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+			"CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id);
+
+	/* send ucode method to set ctxsw interrupt */
+	ret = gr_gk20a_submit_fecs_sideband_method_op(g,
+			(struct fecs_method_op_gk20a) {
+			.method.data = gr_ctx->t18x.ctx_id,
+			.method.addr =
+			gr_fecs_method_push_adr_configure_interrupt_completion_option_v(),
+			.mailbox = {
+			.id = 1 /* sideband */, .data = 0,
+			.clr = ~0, .ret = NULL,
+			.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+			.fail = 0},
+			.cond.ok = GR_IS_UCODE_OP_EQUAL,
+			.cond.fail = GR_IS_UCODE_OP_SKIP});
+
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to enable ctxsw interrupt!");
+		return ret;
+	}
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: enabled ctxsw completion interrupt");
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+			"CILP: disabling channel %d",
+			fault_ch->hw_chid);
+
+	ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch);
+	if (ret) {
+		gk20a_err(dev_from_gk20a(g),
+				"CILP: failed to disable channel!!");
+		return ret;
+	}
+
+	/* set cilp_preempt_pending = true and record the channel */
+	gr_ctx->t18x.cilp_preempt_pending = true;
+	g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
+
+	return 0;
+}
+
+static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
+					       struct channel_gk20a *fault_ch)
+{
+	struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	if (!gr_ctx)
+		return -EINVAL;
+
+	/* The ucode is self-clearing, so all we need to do here is
+	   to clear cilp_preempt_pending. */
+	if (!gr_ctx->t18x.cilp_preempt_pending) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP is already cleared for chid %d\n",
+				fault_ch->hw_chid);
+		return 0;
+	}
+
+	gr_ctx->t18x.cilp_preempt_pending = false;
+	g->gr.t18x.cilp_preempt_pending_chid = -1;
+
+	return 0;
+}
+
+/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
+ *
+ * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
+ */
+int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+		u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
+		bool sm_debugger_attached, struct channel_gk20a *fault_ch,
+		bool *early_exit, bool *ignore_debugger)
+{
+	int ret;
+	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
+			NVGPU_GR_PREEMPTION_MODE_CILP) ;
+	u32 global_mask = 0, dbgr_control0, global_esr_copy;
+	u32 offset = proj_gpc_stride_v() * gpc +
+		     proj_tpc_in_gpc_stride_v() * tpc;
+
+	*early_exit = false;
+	*ignore_debugger = false;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
+			gpc, tpc, global_esr);
+
+	if (cilp_enabled && sm_debugger_attached) {
+		if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f())
+			gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+					gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f());
+
+		if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f())
+			gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+					gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f());
+
+		global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() |
+			gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f();
+
+		if (warp_esr != 0 || (global_esr & global_mask) != 0) {
+			*ignore_debugger = true;
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n",
+					gpc, tpc);
+
+			if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n",
+						gpc, tpc);
+				gk20a_suspend_all_sms(g, global_mask, false);
+
+				gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch);
+			} else {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: STOP_TRIGGER from gpc %d tpc %d\n",
+						gpc, tpc);
+				gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true);
+			}
+
+			/* reset the HWW errors after locking down */
+			global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+			gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy);
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: HWWs cleared for gpc %d tpc %d\n",
+					gpc, tpc);
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n");
+			ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch);
+			if (ret) {
+				gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n");
+				return ret;
+			}
+
+			dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset);
+			if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) {
+				gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+						"CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n",
+						gpc, tpc);
+				dbgr_control0 = set_field(dbgr_control0,
+						gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(),
+						gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f());
+				gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
+			}
+
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
+					"CILP: resume for gpc %d tpc %d\n",
+					gpc, tpc);
+			gk20a_resume_single_sm(g, gpc, tpc);
+
+			*ignore_debugger = true;
+			gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc);
+		}
+
+		*early_exit = true;
+	}
+	return 0;
+}
+
+static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
+{
+	struct gr_ctx_desc *gr_ctx;
+	struct channel_gk20a *ch;
+	int chid;
+	int ret = -EINVAL;
+
+	chid = g->gr.t18x.cilp_preempt_pending_chid;
+
+	ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid));
+	if (!ch)
+		return ret;
+
+	gr_ctx = ch->ch_ctx.gr_ctx;
+
+	if (gr_ctx->t18x.cilp_preempt_pending) {
+		*__chid = chid;
+		ret = 0;
+	}
+
+	gk20a_channel_put(ch);
+
+	return ret;
+}
+
+static int gr_gp10b_handle_fecs_error(struct gk20a *g,
+				struct channel_gk20a *__ch,
+				struct gr_gk20a_isr_data *isr_data)
+{
+	u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
+	struct channel_gk20a *ch;
+	int chid = -1;
+	int ret = 0;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
+
+	/*
+	 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
+	 * indicates that a CILP ctxsw save has finished
+	 */
+	if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) {
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+				"CILP: ctxsw save completed!\n");
+
+		/* now clear the interrupt */
+		gk20a_writel(g, gr_fecs_host_int_clear_r(),
+				gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
+
+		ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
+		if (ret)
+			goto clean_up;
+
+		ch = gk20a_channel_get(
+				gk20a_fifo_channel_from_hw_chid(g, chid));
+		if (!ch)
+			goto clean_up;
+
+
+		/* set preempt_pending to false */
+		ret = gr_gp10b_clear_cilp_preempt_pending(g, ch);
+		if (ret) {
+			gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n");
+			gk20a_channel_put(ch);
+			goto clean_up;
+		}
+
+		if (gk20a_gr_sm_debugger_attached(g)) {
+			gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
+			gk20a_dbg_gpu_post_events(ch);
+			gk20a_channel_post_event(ch);
+		}
+
+		gk20a_channel_put(ch);
+	}
+
+clean_up:
+	/* handle any remaining interrupts */
+	return gk20a_gr_handle_fecs_error(g, __ch, isr_data);
+}
+
 static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
 {
 	if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m()))
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
 	gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
 	gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr;
+	gops->gr.pre_process_sm_exception =
+		gr_gp10b_pre_process_sm_exception;
+	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 62b70a22..c35fb384 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -46,6 +46,8 @@ struct gr_t18x {
 		struct dentry *debugfs_force_preemption_gfxp;
 		struct dentry *debugfs_dump_ctxsw_stats;
 	} ctx_vars;
+
+	int cilp_preempt_pending_chid;
 };
 
 struct gr_ctx_desc_t18x {
@@ -53,6 +55,9 @@ struct gr_ctx_desc_t18x {
 	struct mem_desc spill_ctxsw_buffer;
 	struct mem_desc betacb_ctxsw_buffer;
 	struct mem_desc pagepool_ctxsw_buffer;
+	u32 ctx_id;
+	bool ctx_id_valid;
+	bool cilp_preempt_pending;
 };
 
 #define NVGPU_GR_PREEMPTION_MODE_GFXP		1
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index f358d405..49e92df9 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -286,4 +286,8 @@ static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cilp_
 {
 	return 0x2;
 }
+static inline u32 ctxsw_prog_main_image_context_id_o(void)
+{
+	return 0x000000f0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 9569bb9c..b494482a 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1010,6 +1010,10 @@ static inline u32 gr_fecs_method_push_adr_halt_pipeline_v(void)
 {
 	return 0x00000004;
 }
+static inline u32 gr_fecs_method_push_adr_configure_interrupt_completion_option_v(void)
+{
+	return 0x0000003a;
+}
 static inline u32 gr_fecs_host_int_status_r(void)
 {
 	return 0x00409c18;
@@ -1022,14 +1026,30 @@ static inline u32 gr_fecs_host_int_status_umimp_illegal_method_f(u32 v)
 {
 	return (v & 0x1) << 18;
 }
+static inline u32 gr_fecs_host_int_status_ctxsw_intr_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
 static inline u32 gr_fecs_host_int_clear_r(void)
 {
 	return 0x00409c20;
 }
+static inline u32 gr_fecs_host_int_clear_ctxsw_intr1_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_host_int_clear_ctxsw_intr1_clear_f(void)
+{
+	return 0x2;
+}
 static inline u32 gr_fecs_host_int_enable_r(void)
 {
 	return 0x00409c24;
 }
+static inline u32 gr_fecs_host_int_enable_ctxsw_intr1_enable_f(void)
+{
+	return 0x2;
+}
 static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
 {
 	return 0x10000;
@@ -2182,6 +2202,10 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
 static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
 {
 	return 0x0050469c;
@@ -3326,6 +3350,14 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_single_step_mode_enable_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_single_step_mode_disable_f(void)
+{
+	return 0x0;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
 {
 	return 0x40000000;
@@ -3398,6 +3430,26 @@ static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(
 {
 	return 0x40;
 }
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_sm_to_sm_fault_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f(void)
+{
+	return 0x80000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
 {
 	return 0x00504650;
@@ -3438,6 +3490,26 @@ static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
 {
 	return 0x100;
 }
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_l1_error_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_multiple_warp_errors_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_physical_stack_overflow_error_pending_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_timeout_error_pending_f(void)
+{
+	return 0x80000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
 {
 	return 0x00504648;
@@ -3454,6 +3526,22 @@ static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m(void)
+{
+	return 0x1 << 24;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_m(void)
+{
+	return 0x7 << 25;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_addr_error_type_none_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(void)
+{
+	return 0x00504654;
+}
 static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
 {
 	return 0x00504770;
@@ -3850,6 +3938,18 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f(void)
+{
+	return 0x0;
+}
 static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
 {
 	return 0x1 << 30;
-- 
cgit v1.2.2


From 333b839b278f004bdb49cd7c72118a7137cb802e Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 2 Feb 2016 19:18:10 +0530
Subject: gpu: nvgpu: post events on all channels of TSG

While posting CILP preemption complete event to
user space, raise the event to all channels of TSG
(if channel is part of TSG)

This is a WAR until we have proper sync mechanism
with user space to raise CILP events

Bug 200156699

Change-Id: Ieedc866498a8c5464cf65962257a803b37da6826
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1001696
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 91adf20c..6ed91bb1 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1523,8 +1523,21 @@ static int gr_gp10b_handle_fecs_error(struct gk20a *g,
 
 		if (gk20a_gr_sm_debugger_attached(g)) {
 			gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
-			gk20a_dbg_gpu_post_events(ch);
-			gk20a_channel_post_event(ch);
+
+			if (gk20a_is_channel_marked_as_tsg(ch)) {
+				struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+				struct channel_gk20a *__ch;
+
+				mutex_lock(&tsg->ch_list_lock);
+				list_for_each_entry(__ch, &tsg->ch_list, ch_entry) {
+					gk20a_dbg_gpu_post_events(__ch);
+					gk20a_channel_post_event(__ch);
+				}
+				mutex_unlock(&tsg->ch_list_lock);
+			} else {
+				gk20a_dbg_gpu_post_events(ch);
+				gk20a_channel_post_event(ch);
+			}
 		}
 
 		gk20a_channel_put(ch);
-- 
cgit v1.2.2


From 83e9c506eed0bc61b707694756ff716b13fef45d Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Fri, 29 Jan 2016 11:32:44 -0800
Subject: gpu: nvgpu: vgpu: fix sparse warnings

Bug 200088648

Change-Id: I61be7b4787e9bc9bac310a8739977f43c38a67ee
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1000174
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 14a7768a..b665a8dd 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -16,6 +16,7 @@
 #include "vgpu_gr_gp10b.h"
 #include "vgpu_fifo_gp10b.h"
 #include "vgpu_mm_gp10b.h"
+#include "nvgpu_gpuid_t18x.h"
 
 int vgpu_gp10b_init_hal(struct gk20a *g)
 {
-- 
cgit v1.2.2


From 0c9ba5c067f777413ee1132a05be50f03c79db89 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 9 Feb 2016 18:32:35 +0530
Subject: gpu: nvgpu: fix sparse warning

fix below sparse warning :
drivers/gpu/nvgpu/gp10b/gr_gp10b.c:1364:5: warning: symbol
'gr_gp10b_pre_process_sm_exception' was not declared. Should it be
static?

Bug 200088648

Change-Id: Ie55ffc12eb653b10358001e2aef8766562fd0df9
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1009938
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6ed91bb1..ae2ffc0a 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B GPU GR
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1361,7 +1361,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
  *
  * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
  */
-int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
+static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
 		u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr,
 		bool sm_debugger_attached, struct channel_gk20a *fault_ch,
 		bool *early_exit, bool *ignore_debugger)
-- 
cgit v1.2.2


From f7d327985fca67266ea409e24c0ef6505d98f338 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 12 Feb 2016 16:28:28 +0530
Subject: gpu: nvgpu: pass channel pointer to handle sm exception

Pass faulting channel pointer to gr_gk20a_handle_sm_exception()
instead of NULL

Bug 200156699

Change-Id: I909327e2a000bea8bc91cfd0820a759960664b46
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1011289
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index ae2ffc0a..c66dea92 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -64,7 +64,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 			proj_tpc_in_gpc_stride_v() * tpc;
 	u32 lrf_ecc_status, shm_ecc_status;
 
-	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, NULL);
+	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch);
 
 	/* Check for LRF ECC errors. */
         lrf_ecc_status = gk20a_readl(g,
-- 
cgit v1.2.2


From e9b03e903c10e1fce9daf5fa7e51b8c4a0b65c95 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Fri, 11 Dec 2015 16:16:21 -0800
Subject: gpu: nvgpu: gp10b: add ECC stats sysfs nodes

Add sysfs nodes for querying ECC single/double bit error counts.

Bug 1699676

Change-Id: I6d5219facadaa17207ac759b88fe19077207d8f1
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/935363
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c             | 145 +++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h             |  28 +++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h          |  92 +++++++
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h         |  22 +-
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c            |  33 ++-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 328 +++++++++++++++++++++++++
 6 files changed, 644 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c66dea92..90d0ce8d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -80,6 +80,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Single bit error detected in SM LRF!");
+
+		g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] +=
+			gk20a_readl(g,
+				gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset);
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
+			0);
 	}
 	if ( (lrf_ecc_status &
 		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
@@ -92,6 +99,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Double bit error detected in SM LRF!");
+
+		g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] +=
+			gk20a_readl(g,
+				gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset);
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
+			0);
 	}
 	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
 			lrf_ecc_status);
@@ -107,17 +121,42 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
 		(shm_ecc_status &
 		gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
+		u32 ecc_stats_reg_val;
 
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Single bit error detected in SM SHM!");
+
+		ecc_stats_reg_val =
+			gk20a_readl(g,
+				gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
+		g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] +=
+			gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
+		g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] +=
+			gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
+					gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
+			ecc_stats_reg_val);
 	}
 	if ( (shm_ecc_status &
 		gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
 		(shm_ecc_status &
 		gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
+		u32 ecc_stats_reg_val;
 
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Double bit error detected in SM SHM!");
+
+		ecc_stats_reg_val =
+			gk20a_readl(g,
+				gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
+		g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] +=
+			gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
+			ecc_stats_reg_val);
 	}
 	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
 			shm_ecc_status);
@@ -133,6 +172,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	u32 offset = proj_gpc_stride_v() * gpc +
 		     proj_tpc_in_gpc_stride_v() * tpc;
 	u32 esr;
+	u32 ecc_stats_reg_val;
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
@@ -143,10 +183,114 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Single bit error detected in TEX!");
+
+		/* Pipe 0 counters */
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
+		g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
+			ecc_stats_reg_val);
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
+		g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
+			ecc_stats_reg_val);
+
+
+		/* Pipe 1 counters */
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
+		g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
+			ecc_stats_reg_val);
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
+		g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
+			ecc_stats_reg_val);
+
+
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
 	}
 	if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Double bit error detected in TEX!");
+
+		/* Pipe 0 counters */
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
+		g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
+			ecc_stats_reg_val);
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
+		g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
+			ecc_stats_reg_val);
+
+
+		/* Pipe 1 counters */
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
+		g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
+			ecc_stats_reg_val);
+
+		ecc_stats_reg_val = gk20a_readl(g,
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
+		g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] +=
+				gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
+		ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
+			ecc_stats_reg_val);
+
+
+		gk20a_writel(g,
+			gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
+			gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
 	}
 
 	gk20a_writel(g,
@@ -1594,4 +1738,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.pre_process_sm_exception =
 		gr_gp10b_pre_process_sm_exception;
 	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
+	gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index c35fb384..bd4b5879 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -35,6 +35,13 @@ enum {
 void gp10b_init_gr(struct gpu_ops *ops);
 int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 			struct mem_desc *mem);
+void gr_gp10b_create_sysfs(struct platform_device *dev);
+
+struct ecc_stat {
+	char **names;
+	u32 *counters;
+	struct hlist_node hash_node;
+};
 
 struct gr_t18x {
 	struct {
@@ -47,6 +54,27 @@ struct gr_t18x {
 		struct dentry *debugfs_dump_ctxsw_stats;
 	} ctx_vars;
 
+	struct {
+		struct ecc_stat sm_lrf_single_err_count;
+		struct ecc_stat sm_lrf_double_err_count;
+
+		struct ecc_stat sm_shm_sec_count;
+		struct ecc_stat sm_shm_sed_count;
+		struct ecc_stat sm_shm_ded_count;
+
+		struct ecc_stat tex_total_sec_pipe0_count;
+		struct ecc_stat tex_total_ded_pipe0_count;
+		struct ecc_stat tex_unique_sec_pipe0_count;
+		struct ecc_stat tex_unique_ded_pipe0_count;
+		struct ecc_stat tex_total_sec_pipe1_count;
+		struct ecc_stat tex_total_ded_pipe1_count;
+		struct ecc_stat tex_unique_sec_pipe1_count;
+		struct ecc_stat tex_unique_ded_pipe1_count;
+
+		struct ecc_stat l2_sec_count;
+		struct ecc_stat l2_ded_count;
+	} ecc_stats;
+
 	int cilp_preempt_pending_chid;
 };
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index b494482a..0480527c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -530,6 +530,98 @@ static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pe
 {
 	return 0x200;
 }
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r(void)
+{
+	return 0x005046bc;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r(void)
+{
+	return 0x005046c0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(void)
+{
+	return 0x005044a4;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(u32 r)
+{
+	return (r >> 8) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m(void)
+{
+	return 0xff << 16;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_r(void)
+{
+	return 0x005042c4;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r(void)
+{
+	return 0x00504218;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(void)
+{
+	return 0xffff << 16;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(u32 r)
+{
+	return (r >> 16) & 0xffff;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r(void)
+{
+	return 0x005042ec;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(void)
+{
+	return 0xffff << 16;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(u32 r)
+{
+	return (r >> 16) & 0xffff;
+}
 static inline u32 gr_pri_be0_crop_status1_r(void)
 {
 	return 0x00410134;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 302c2243..5916f695 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -314,6 +314,26 @@ static inline u32 ltc_ltc0_lts0_intr_r(void)
 {
 	return 0x0014040c;
 }
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void)
+{
+	return 0x0014051c;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_m(void)
+{
+	return 0xff << 16;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
 static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
 {
 	return 0x0017e2a0;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index d0be86a4..e68e762d 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -133,21 +133,48 @@ static void gp10b_ltc_isr(struct gk20a *g)
 		if ((mc_intr & 1 << ltc) == 0)
 			continue;
 		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
-			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
-					   proj_ltc_stride_v() * ltc +
-					   proj_lts_stride_v() * slice);
+			u32 offset = proj_ltc_stride_v() * ltc +
+					proj_lts_stride_v() * slice;
+			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
 
 			/* Detect and handle ECC errors */
 			if (ltc_intr &
 				ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
+				u32 ecc_stats_reg_val;
+
 				gk20a_err(dev_from_gk20a(g),
 					"Single bit error detected in GPU L2!");
+
+				ecc_stats_reg_val =
+					gk20a_readl(g,
+						ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
+				g->gr.t18x.ecc_stats.l2_sec_count.counters[ltc] +=
+					ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
+				ecc_stats_reg_val &=
+					~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
+				gk20a_writel(g,
+					ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
+					ecc_stats_reg_val);
+
 				g->ops.mm.l2_flush(g, true);
 			}
 			if (ltc_intr &
 				ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
+				u32 ecc_stats_reg_val;
+
 				gk20a_err(dev_from_gk20a(g),
 					"Double bit error detected in GPU L2!");
+
+				ecc_stats_reg_val =
+					gk20a_readl(g,
+						ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
+				g->gr.t18x.ecc_stats.l2_ded_count.counters[ltc] +=
+					ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
+				ecc_stats_reg_val &=
+					~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
+				gk20a_writel(g,
+					ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
+					ecc_stats_reg_val);
 			}
 
 			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 8bddff3d..0cfb1d91 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -26,9 +26,14 @@
 #include <linux/tegra_pm_domains.h>
 #include <linux/reset.h>
 #include <soc/tegra/tegra_bpmp.h>
+#include <linux/hashtable.h>
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
 #include "platform_tegra.h"
+#include "gr_gp10b.h"
+#include "ltc_gp10b.h"
+#include "hw_gr_gp10b.h"
+#include "hw_ltc_gp10b.h"
 
 #define GP10B_MAX_SUPPORTED_FREQS 11
 static unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
@@ -40,6 +45,8 @@ static struct {
 	{"gpu", 1000000000},
 	{"gpu_sys", 204000000} };
 
+static void gr_gp10b_remove_sysfs(struct device *dev);
+
 /*
  * gp10b_tegra_get_clocks()
  *
@@ -144,6 +151,8 @@ static int gp10b_tegra_remove(struct platform_device *pdev)
 	/* remove gk20a power subdomain from host1x */
 	nvhost_unregister_client_domain(dev_to_genpd(&pdev->dev));
 
+	gr_gp10b_remove_sysfs(&pdev->dev);
+
 	return 0;
 
 }
@@ -345,3 +354,322 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.force_reset_in_do_idle = true,
 };
+
+
+#define ECC_STAT_NAME_MAX_SIZE	100
+
+
+DEFINE_HASHTABLE(ecc_hash_table, 5);
+
+static struct device_attribute *dev_attr_sm_lrf_ecc_single_err_count_array;
+static struct device_attribute *dev_attr_sm_lrf_ecc_double_err_count_array;
+
+static struct device_attribute *dev_attr_sm_shm_ecc_sec_count_array;
+static struct device_attribute *dev_attr_sm_shm_ecc_sed_count_array;
+static struct device_attribute *dev_attr_sm_shm_ecc_ded_count_array;
+
+static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe0_count_array;
+static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe0_count_array;
+static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe0_count_array;
+static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe0_count_array;
+static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe1_count_array;
+static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe1_count_array;
+static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe1_count_array;
+static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe1_count_array;
+
+static struct device_attribute *dev_attr_l2_ecc_sec_count_array;
+static struct device_attribute *dev_attr_l2_ecc_ded_count_array;
+
+
+static u32 gen_ecc_hash_key(char *str)
+{
+	int i = 0;
+	u32 hash_key = 0;
+
+	while (str[i]) {
+		hash_key += (u32)(str[i]);
+		i++;
+	};
+
+	return hash_key;
+}
+
+static ssize_t ecc_stat_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	const char *ecc_stat_full_name = attr->attr.name;
+	const char *ecc_stat_base_name;
+	unsigned int hw_unit;
+	struct ecc_stat *ecc_stat;
+	u32 hash_key;
+
+	if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
+	} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
+	} else {
+		return snprintf(buf,
+				PAGE_SIZE,
+				"Error: Invalid ECC stat name!\n");
+	}
+
+	hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
+	hash_for_each_possible(ecc_hash_table,
+				ecc_stat,
+				hash_node,
+				hash_key) {
+		if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
+			return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
+	}
+
+	return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
+}
+
+static int ecc_stat_create(struct platform_device *dev,
+				int is_l2,
+				char *ecc_stat_name,
+				struct ecc_stat *ecc_stat,
+				struct device_attribute *dev_attr_array)
+{
+	int error = 0;
+	struct gk20a *g = get_gk20a(dev);
+	int num_hw_units = 0;
+	int hw_unit = 0;
+	u32 hash_key = 0;
+
+	if (is_l2)
+		num_hw_units = g->ltc_count;
+	else
+		num_hw_units = g->gr.tpc_count;
+
+	/* Allocate arrays */
+	dev_attr_array = kzalloc(sizeof(struct device_attribute) * num_hw_units, GFP_KERNEL);
+	ecc_stat->counters = kzalloc(sizeof(u32) * num_hw_units, GFP_KERNEL);
+	ecc_stat->names = kzalloc(sizeof(char *) * num_hw_units, GFP_KERNEL);
+	for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+		ecc_stat->names[hw_unit] = kzalloc(sizeof(char) * ECC_STAT_NAME_MAX_SIZE, GFP_KERNEL);
+	}
+
+	for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+		/* Fill in struct device_attribute members */
+		if (is_l2)
+			snprintf(ecc_stat->names[hw_unit],
+				ECC_STAT_NAME_MAX_SIZE,
+				"ltc%d_%s",
+				hw_unit,
+				ecc_stat_name);
+		else
+			snprintf(ecc_stat->names[hw_unit],
+				ECC_STAT_NAME_MAX_SIZE,
+				"gpc0_tpc%d_%s",
+				hw_unit,
+				ecc_stat_name);
+		dev_attr_array[hw_unit].attr.name = ecc_stat->names[hw_unit];
+		dev_attr_array[hw_unit].attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
+		dev_attr_array[hw_unit].show = ecc_stat_show;
+		dev_attr_array[hw_unit].store = NULL;
+
+		/* Create sysfs file */
+		error |= device_create_file(&dev->dev,
+				&dev_attr_array[hw_unit]);
+	}
+
+	/* Add hash table entry */
+	hash_key = gen_ecc_hash_key(ecc_stat_name);
+	hash_add(ecc_hash_table,
+		&ecc_stat->hash_node,
+		hash_key);
+
+	return error;
+}
+
+static void ecc_stat_remove(struct device *dev,
+				int is_l2,
+				struct ecc_stat *ecc_stat,
+				struct device_attribute *dev_attr_array)
+{
+	struct platform_device *ndev = to_platform_device(dev);
+	struct gk20a *g = get_gk20a(ndev);
+	int num_hw_units = 0;
+	int hw_unit = 0;
+
+	if (is_l2)
+		num_hw_units = g->ltc_count;
+	else
+		num_hw_units = g->gr.tpc_count;
+
+	/* Remove sysfs files */
+	for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+		device_remove_file(dev, &dev_attr_array[hw_unit]);
+	}
+
+	/* Remove hash table entry */
+	hash_del(&ecc_stat->hash_node);
+
+	/* Free arrays */
+	kfree(ecc_stat->counters);
+	for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+		kfree(ecc_stat->names[hw_unit]);
+	}
+	kfree(ecc_stat->names);
+	kfree(dev_attr_array);
+}
+
+void gr_gp10b_create_sysfs(struct platform_device *dev)
+{
+	int error = 0;
+	struct gk20a *g = get_gk20a(dev);
+
+	error |= ecc_stat_create(dev,
+				0,
+				"sm_lrf_ecc_single_err_count",
+				&g->gr.t18x.ecc_stats.sm_lrf_single_err_count,
+				dev_attr_sm_lrf_ecc_single_err_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"sm_lrf_ecc_double_err_count",
+				&g->gr.t18x.ecc_stats.sm_lrf_double_err_count,
+				dev_attr_sm_lrf_ecc_double_err_count_array);
+
+	error |= ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_sec_count",
+				&g->gr.t18x.ecc_stats.sm_shm_sec_count,
+				dev_attr_sm_shm_ecc_sec_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_sed_count",
+				&g->gr.t18x.ecc_stats.sm_shm_sed_count,
+				dev_attr_sm_shm_ecc_sed_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_ded_count",
+				&g->gr.t18x.ecc_stats.sm_shm_ded_count,
+				dev_attr_sm_shm_ecc_ded_count_array);
+
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_sec_pipe0_count",
+				&g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count,
+				dev_attr_tex_ecc_total_sec_pipe0_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_ded_pipe0_count",
+				&g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count,
+				dev_attr_tex_ecc_total_ded_pipe0_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_sec_pipe0_count",
+				&g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count,
+				dev_attr_tex_ecc_unique_sec_pipe0_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_ded_pipe0_count",
+				&g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count,
+				dev_attr_tex_ecc_unique_ded_pipe0_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_sec_pipe1_count",
+				&g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count,
+				dev_attr_tex_ecc_total_sec_pipe1_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_ded_pipe1_count",
+				&g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count,
+				dev_attr_tex_ecc_total_ded_pipe1_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_sec_pipe1_count",
+				&g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count,
+				dev_attr_tex_ecc_unique_sec_pipe1_count_array);
+	error |= ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_ded_pipe1_count",
+				&g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count,
+				dev_attr_tex_ecc_unique_ded_pipe1_count_array);
+
+	error |= ecc_stat_create(dev,
+				1,
+				"lts0_ecc_sec_count",
+				&g->gr.t18x.ecc_stats.l2_sec_count,
+				dev_attr_l2_ecc_sec_count_array);
+	error |= ecc_stat_create(dev,
+				1,
+				"lts0_ecc_ded_count",
+				&g->gr.t18x.ecc_stats.l2_ded_count,
+				dev_attr_l2_ecc_ded_count_array);
+
+	if (error)
+		dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
+}
+
+static void gr_gp10b_remove_sysfs(struct device *dev)
+{
+	struct platform_device *ndev = to_platform_device(dev);
+	struct gk20a *g = get_gk20a(ndev);
+
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.sm_lrf_single_err_count,
+			dev_attr_sm_lrf_ecc_single_err_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.sm_lrf_double_err_count,
+			dev_attr_sm_lrf_ecc_double_err_count_array);
+
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.sm_shm_sec_count,
+			dev_attr_sm_shm_ecc_sec_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.sm_shm_sed_count,
+			dev_attr_sm_shm_ecc_sed_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.sm_shm_ded_count,
+			dev_attr_sm_shm_ecc_ded_count_array);
+
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count,
+			dev_attr_tex_ecc_total_sec_pipe0_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count,
+			dev_attr_tex_ecc_total_ded_pipe0_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count,
+			dev_attr_tex_ecc_unique_sec_pipe0_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count,
+			dev_attr_tex_ecc_unique_ded_pipe0_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count,
+			dev_attr_tex_ecc_total_sec_pipe1_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count,
+			dev_attr_tex_ecc_total_ded_pipe1_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count,
+			dev_attr_tex_ecc_unique_sec_pipe1_count_array);
+	ecc_stat_remove(dev,
+			0,
+			&g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count,
+			dev_attr_tex_ecc_unique_ded_pipe1_count_array);
+
+	ecc_stat_remove(dev,
+			1,
+			&g->gr.t18x.ecc_stats.l2_sec_count,
+			dev_attr_l2_ecc_sec_count_array);
+	ecc_stat_remove(dev,
+			1,
+			&g->gr.t18x.ecc_stats.l2_ded_count,
+			dev_attr_l2_ecc_ded_count_array);
+}
-- 
cgit v1.2.2


From 6ad20e900419596c286096d6444272b3f11ce27f Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Fri, 19 Feb 2016 15:23:30 +0530
Subject: gpu: nvgpu: gp10b: Enable adaptive ELPG

ELPG is enabled on TOT.

Bug 200144583

Change-Id: Icbdcb5f575a4ca37becf47b098fbd6a1f89feec7
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1013845
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 0cfb1d91..3221e423 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -309,6 +309,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.enable_blcg		= true,
 	.enable_slcg		= true,
 	.enable_elcg		= true,
+	.enable_aelpg       = true,
 
 	/* ptimer src frequency in hz*/
 	.ptimer_src_freq	= 31250000,
-- 
cgit v1.2.2


From cdf3fdd63bd74dd37d1857f7d5a8e43df23693b6 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 22 Sep 2015 12:40:24 -0700
Subject: gpu: nvgpu: gp10b: enable gpu rail gating

Bug 1698618

Change-Id: Iabfd726891165d7879376ab96445b7b81b907153
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/841856
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 3221e423..65e11d37 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -304,7 +304,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.clockgate_delay	= 50,
 
 	/* power management configuration */
-	.can_railgate           = false,
+	.can_railgate           = true,
 	.enable_elpg            = true,
 	.enable_blcg		= true,
 	.enable_slcg		= true,
-- 
cgit v1.2.2


From 02ee4d418834c99746487b72b04d5f10139eea90 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Wed, 24 Feb 2016 11:47:28 +0530
Subject: Revert "gpu: nvgpu: gp10b: enable gpu rail gating"

This reverts commit 71b59d75fc49e2159830026bce387ef4d829faa8
since it causes suspend_sanity to fail on quill platform.

On system resume, we see the following error dump from GPU

gk20a 17000000.gp10b: gk20a_channel_timeout_handler: Job on channel 501 timed out

gk20a 17000000.gp10b: gk20a_fifo_set_ctx_mmu_error_ch: channel 501 generated a mmu fault
gk20a 17000000.gp10b: gk20a_set_error_notifier: error notifier set to 31 for ch 501
gk20a 17000000.gp10b: gk20a_channel_timeout_handler: Job on channel 509 timed out

Change-Id: I61bc3b0745fe136675ab79b13f54e9126602f51c
Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Reviewed-on: http://git-master/r/1017967
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 65e11d37..3221e423 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -304,7 +304,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.clockgate_delay	= 50,
 
 	/* power management configuration */
-	.can_railgate           = true,
+	.can_railgate           = false,
 	.enable_elpg            = true,
 	.enable_blcg		= true,
 	.enable_slcg		= true,
-- 
cgit v1.2.2


From 640d0e2c3b58294cd11f420a5fc8377d490c43d3 Mon Sep 17 00:00:00 2001
From: Supriya <ssharatkumar@nvidia.com>
Date: Thu, 10 Dec 2015 12:54:38 +0530
Subject: gpu: nvgpu: ECC override

-sysfs functions to call into LS PMU and modify
 ECC overide register

Bug 1699676

Change-Id: Iaf6cc3a86160b806e52ab168577caad42b2c5d22
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/921252
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/Makefile               |  3 +-
 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c          | 66 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h          | 25 +++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c             | 10 ++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h          |  4 ++
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c |  8 ++-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c            | 76 +++++++++++++++++++++++++-
 7 files changed, 187 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
index 0542fd67..8b930bca 100644
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ b/drivers/gpu/nvgpu/gp10b/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_GK20A)  += \
 	gp10b_gating_reglist.o \
 	regops_gp10b.o \
 	cde_gp10b.o \
-	therm_gp10b.o
+	therm_gp10b.o \
+	gp10b_sysfs.o
 
 obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
new file mode 100644
index 00000000..800f39c3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
@@ -0,0 +1,66 @@
+/*
+ * GP10B specific sysfs files
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/platform_device.h>
+
+#include "gk20a/gk20a.h"
+#include "gp10b_sysfs.h"
+
+#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
+
+static ssize_t ecc_enable_store(struct device *device,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct platform_device *ndev = to_platform_device(device);
+	struct gk20a *g = get_gk20a(ndev);
+	u32 ecc_mask;
+	u32 err = 0;
+
+	err = sscanf(buf, "%d", &ecc_mask);
+	if (err == 1) {
+		err = g->ops.pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd
+			(g, ecc_mask);
+		if (err)
+			dev_err(device, "ECC override did not happen\n");
+	} else
+		return -EINVAL;
+	return count;
+}
+
+static ssize_t ecc_enable_read(struct device *device,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *ndev = to_platform_device(device);
+	struct gk20a *g = get_gk20a(ndev);
+
+	return sprintf(buf, "ecc override =0x%x\n",
+			g->ops.gr.get_lrf_tex_ltc_dram_override(g));
+}
+
+static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);
+
+void gp10b_create_sysfs(struct platform_device *dev)
+{
+	int error = 0;
+
+	error |= device_create_file(&dev->dev, &dev_attr_ecc_enable);
+	if (error)
+		dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
+}
+
+void gp10b_remove_sysfs(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_ecc_enable);
+}
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
new file mode 100644
index 00000000..c1d101da
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
@@ -0,0 +1,25 @@
+/*
+ * GP10B specific sysfs files
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GP10B_SYSFS_H_
+#define _GP10B_SYSFS_H_
+
+/*ECC Fuse*/
+#define FUSE_OPT_ECC_EN  0x358
+
+void gp10b_create_sysfs(struct platform_device *dev);
+void gp10b_remove_sysfs(struct device *dev);
+
+#endif /*_GP10B_SYSFS_H_*/
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 90d0ce8d..9eea7d43 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -28,6 +28,7 @@
 #include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 #include "hw_mc_gp10b.h"
+#include "gp10b_sysfs.h"
 #include <linux/vmalloc.h>
 
 static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
@@ -1702,6 +1703,14 @@ static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
 	return hww_warp_esr;
 }
 
+static u32 get_ecc_override_val(struct gk20a *g)
+{
+	if (tegra_fuse_readl(FUSE_OPT_ECC_EN))
+		return gk20a_readl(g, gr_fecs_feature_override_ecc_r());
+	else
+		return 0;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1739,4 +1748,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 		gr_gp10b_pre_process_sm_exception;
 	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
 	gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
+	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 0480527c..62ac1327 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1478,6 +1478,10 @@ static inline u32 gr_fecs_ctxsw_idlestate_r(void)
 {
 	return 0x00409420;
 }
+static inline u32 gr_fecs_feature_override_ecc_r(void)
+{
+	return 0x00409658;
+}
 static inline u32 gr_gpc0_gpccs_ctxsw_idlestate_r(void)
 {
 	return 0x00502420;
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 3221e423..c4a5179a 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -34,6 +34,7 @@
 #include "ltc_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_ltc_gp10b.h"
+#include "gp10b_sysfs.h"
 
 #define GP10B_MAX_SUPPORTED_FREQS 11
 static unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
@@ -142,7 +143,8 @@ static int gp10b_tegra_late_probe(struct platform_device *pdev)
 {
 	/* Make gk20a power domain a subdomain of host1x */
 	nvhost_register_client_domain(dev_to_genpd(&pdev->dev));
-
+	/*Create GP10B specific sysfs*/
+	gp10b_create_sysfs(pdev);
 	return 0;
 }
 
@@ -150,9 +152,9 @@ static int gp10b_tegra_remove(struct platform_device *pdev)
 {
 	/* remove gk20a power subdomain from host1x */
 	nvhost_unregister_client_domain(dev_to_genpd(&pdev->dev));
-
 	gr_gp10b_remove_sysfs(&pdev->dev);
-
+	/*Remove GP10B specific sysfs*/
+	gp10b_remove_sysfs(&pdev->dev);
 	return 0;
 
 }
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 00701a50..6a704813 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -2,7 +2,7 @@
  * GP10B PMU
  *
  * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
-*
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
  * version 2, as published by the Free Software Foundation.
@@ -14,6 +14,7 @@
  */
 
 #include <linux/delay.h>	/* for udelay */
+#include <linux/tegra-fuse.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
 #include "gm20b/acr_gm20b.h"
@@ -21,6 +22,7 @@
 
 #include "pmu_gp10b.h"
 #include "hw_pwr_gp10b.h"
+#include "gp10b_sysfs.h"
 
 #define gp10b_dbg_pmu(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
@@ -324,6 +326,76 @@ static int gp10b_init_pmu_setup_hw1(struct gk20a *g)
 
 }
 
+static void pmu_handle_ecc_en_dis_msg(struct gk20a *g, struct pmu_msg *msg,
+			void *param, u32 handle, u32 status)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_msg_lrf_tex_ltc_dram_en_dis *ecc =
+			&msg->msg.lrf_tex_ltc_dram.en_dis;
+	gk20a_dbg_fn("");
+
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g), "ECC en dis cmd aborted");
+		return;
+	}
+	if (msg->msg.lrf_tex_ltc_dram.msg_type !=
+			PMU_LRF_TEX_LTC_DRAM_MSG_ID_EN_DIS) {
+		gk20a_err(dev_from_gk20a(g),
+			"Invalid msg for LRF_TEX_LTC_DRAM_CMD_ID_EN_DIS cmd");
+		return;
+	} else if (ecc->pmu_status != 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"LRF_TEX_LTC_DRAM_MSG_ID_EN_DIS msg status = %x",
+			ecc->pmu_status);
+		gk20a_err(dev_from_gk20a(g),
+			"LRF_TEX_LTC_DRAM_MSG_ID_EN_DIS msg en fail = %x",
+			ecc->en_fail_mask);
+		gk20a_err(dev_from_gk20a(g),
+			"LRF_TEX_LTC_DRAM_MSG_ID_EN_DIS msg dis fail = %x",
+			ecc->dis_fail_mask);
+	} else
+		pmu->override_done = 1;
+	gk20a_dbg_fn("done");
+}
+
+static int send_ecc_overide_en_dis_cmd(struct gk20a *g, u32 bitmask)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_cmd cmd;
+	u32 seq;
+	int status;
+	gk20a_dbg_fn("");
+
+	if (!tegra_fuse_readl(FUSE_OPT_ECC_EN)) {
+		gk20a_err(dev_from_gk20a(g), "Board not ECC capable");
+		return -1;
+	}
+	if (!(g->acr.capabilities &
+			ACR_LRF_TEX_LTC_DRAM_PRIV_MASK_ENABLE_LS_OVERRIDE)) {
+		gk20a_err(dev_from_gk20a(g), "check ACR capabilities");
+		return -1;
+	}
+	memset(&cmd, 0, sizeof(struct pmu_cmd));
+	cmd.hdr.unit_id = PMU_UNIT_FECS_MEM_OVERRIDE;
+	cmd.hdr.size = PMU_CMD_HDR_SIZE +
+			sizeof(struct pmu_cmd_lrf_tex_ltc_dram_en_dis);
+	cmd.cmd.lrf_tex_ltc_dram.en_dis.cmd_type =
+			PMU_LRF_TEX_LTC_DRAM_CMD_ID_EN_DIS;
+	cmd.cmd.lrf_tex_ltc_dram.en_dis.en_dis_mask = (u8)(bitmask & 0xff);
+
+	gp10b_dbg_pmu("cmd post PMU_ECC_CMD_ID_EN_DIS_ECC");
+	pmu->override_done = 0;
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+			pmu_handle_ecc_en_dis_msg, NULL, &seq, ~0);
+	if (status)
+		gk20a_err(dev_from_gk20a(g), "ECC override failed");
+	else
+		pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+				      &pmu->override_done, 1);
+	gk20a_dbg_fn("done");
+	return status;
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
@@ -342,4 +414,6 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
 	gops->pmu.pmu_elpg_statistics = gp10b_pmu_elpg_statistics;
 	gops->pmu.pmu_pg_grinit_param = gp10b_pg_gr_init;
+	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
+			send_ecc_overide_en_dis_cmd;
 }
-- 
cgit v1.2.2


From 6ce874a30c0e97354de2975d1e7055e67e9aba1e Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 26 Feb 2016 10:12:49 -0800
Subject: Revert "Revert "gpu: nvgpu: gp10b: enable gpu rail gating""

This reverts commit 7c1f6f0b2998c354f315b431e00f3c8f861cb190.

Bug 200176691

Change-Id: Ia546513ec5c61999f6eb4d56ccd7e45ae072167c
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1020813
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index c4a5179a..43d90528 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -306,7 +306,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.clockgate_delay	= 50,
 
 	/* power management configuration */
-	.can_railgate           = false,
+	.can_railgate           = true,
 	.enable_elpg            = true,
 	.enable_blcg		= true,
 	.enable_slcg		= true,
-- 
cgit v1.2.2


From 9a2ecd3efbc68d19604e33a348acf8c7e18fe904 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 1 Mar 2016 13:32:42 -0800
Subject: gpu: nvgpu: t18x: update slcg prod settings

Update prod settings to disable slcg pbdma related
domains.

Bug 1703083

Change-Id: I9f9192da69d07c5cea5bc7d79a031e5d2428b685
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1022219
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index fbf146a2..7be9b60f 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,7 +54,7 @@ static const struct gating_desc gp10b_slcg_fb[] = {
 
 /* slcg fifo */
 static const struct gating_desc gp10b_slcg_fifo[] = {
-	{.addr = 0x000026ac, .prod = 0x00000100, .disable = 0x0001fffe},
+	{.addr = 0x000026ac, .prod = 0x00000f00, .disable = 0x0001fffe},
 };
 
 /* slcg gr */
-- 
cgit v1.2.2


From 63d463c1ac1aa618111cbaee5c90ae69698816b6 Mon Sep 17 00:00:00 2001
From: Amit Sharma <amisharma@nvidia.com>
Date: Wed, 2 Mar 2016 10:00:44 +0530
Subject: gpu: nvgpu: gp10b: make local symbol static

Fixed the following sparse warning by making local symbol static:
- platform_gp10b_tegra.c:365: warning: symbol 'ecc_hash_table' was not declared.
                                       Should it be static?

Bug 200088648

Change-Id: Iea1a682c3ee0609730366d44fab91849cd59c9ad
Signed-off-by: Amit Sharma <amisharma@nvidia.com>
Reviewed-on: http://git-master/r/1022410
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Tested-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 43d90528..dfeba9c4 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -362,7 +362,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 #define ECC_STAT_NAME_MAX_SIZE	100
 
 
-DEFINE_HASHTABLE(ecc_hash_table, 5);
+static DEFINE_HASHTABLE(ecc_hash_table, 5);
 
 static struct device_attribute *dev_attr_sm_lrf_ecc_single_err_count_array;
 static struct device_attribute *dev_attr_sm_lrf_ecc_double_err_count_array;
-- 
cgit v1.2.2


From f03ee50232bbd562f47b44430ae9f1e83b424dd6 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Mon, 29 Feb 2016 14:25:15 -0800
Subject: gpu: nvgpu: gp10b: only create ECC stats once

The ECC sysfs stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC stats
once. Therefore, add a check to avoid creating duplicate stat sysfs
nodes.

Change-Id: Ifb338e57643f2f15492df137d2a7521e0c990cf2
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/1021660
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index dfeba9c4..e1a96e4c 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -524,6 +524,13 @@ void gr_gp10b_create_sysfs(struct platform_device *dev)
 	int error = 0;
 	struct gk20a *g = get_gk20a(dev);
 
+	/* This stat creation function is called on GR init. GR can get
+	   initialized multiple times but we only need to create the ECC
+	   stats once. Therefore, add the following check to avoid
+	   creating duplicate stat sysfs nodes. */
+	if (g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters != NULL)
+		return;
+
 	error |= ecc_stat_create(dev,
 				0,
 				"sm_lrf_ecc_single_err_count",
-- 
cgit v1.2.2


From 5244299cdf56047a28d5ba0944649ea0f8cd0928 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 8 Mar 2016 11:01:47 -0800
Subject: gpu: nvgpu: t18x: update blcg prod settings

Update prod settings to disable stall blcg.

Bug 1729471

Change-Id: I1123bf47159fc9dbb1223aebcacf37361b90743f
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1026611
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index 7be9b60f..6bcd9f35 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -207,7 +207,7 @@ static const struct gating_desc gp10b_blcg_gr[] = {
 	{.addr = 0x00418cf0, .prod = 0x0000c444, .disable = 0x00000000},
 	{.addr = 0x00418d70, .prod = 0x0000c444, .disable = 0x00000000},
 	{.addr = 0x00418f0c, .prod = 0x0000c444, .disable = 0x00000000},
-	{.addr = 0x00418e0c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418e0c, .prod = 0x00008444, .disable = 0x00000000},
 	{.addr = 0x00419020, .prod = 0x0000c242, .disable = 0x00000000},
 	{.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
 	{.addr = 0x00418898, .prod = 0x00004242, .disable = 0x00000000},
-- 
cgit v1.2.2


From eada66b2a90d08e3323ec76dfd4ad78f3c9f0ee5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 8 Mar 2016 10:54:02 -0800
Subject: gpu: nvgpu: gp10b: Allow importing makefile via include

Refactor makefiles so that there is one makefile, and that file
can be included in the main nvgpu build.

Bug 1476801

Change-Id: I23ac451d695fc64064de2300e83b9d9487c52743
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1028353
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile            | 33 +++++++++++++++++++++++++--------
 drivers/gpu/nvgpu/gp10b/Makefile      | 31 -------------------------------
 drivers/gpu/nvgpu/vgpu/gp10b/Makefile | 16 ----------------
 3 files changed, 25 insertions(+), 55 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/gp10b/Makefile
 delete mode 100644 drivers/gpu/nvgpu/vgpu/gp10b/Makefile

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 37cc15a5..c4873257 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -1,10 +1,27 @@
-GCOV_PROFILE := y
+nvgpu-t18x := ../../../../kernel-t18x/drivers/gpu/nvgpu
 
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/include/linux
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
-ccflags-$(CONFIG_GK20A) += -Wno-multichar
-ccflags-$(CONFIG_GK20A) += -Werror
+nvgpu-y += \
+	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/gr_ctx_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/ce2_gp10b.o \
+	$(nvgpu-t18x)/gp10b/mc_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/fifo_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/ltc_gp10b.o \
+	$(nvgpu-t18x)/gp10b/mm_gp10b.o \
+	$(nvgpu-t18x)/gp10b/fb_gp10b.o \
+	$(nvgpu-t18x)/gp10b/pmu_gp10b.o \
+	$(nvgpu-t18x)/gp10b/hal_gp10b.o \
+	$(nvgpu-t18x)/gp10b/rpfb_gp10b.o \
+	$(nvgpu-t18x)/gp10b/gp10b_gating_reglist.o \
+	$(nvgpu-t18x)/gp10b/regops_gp10b.o \
+	$(nvgpu-t18x)/gp10b/cde_gp10b.o \
+	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
+	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o
 
-obj-$(CONFIG_GK20A)	+= gp10b/
-obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/gp10b/
+nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
+
+nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_hal_gp10b.o  \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_gr_gp10b.o  \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_mm_gp10b.o \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_fifo_gp10b.o
diff --git a/drivers/gpu/nvgpu/gp10b/Makefile b/drivers/gpu/nvgpu/gp10b/Makefile
deleted file mode 100644
index 8b930bca..00000000
--- a/drivers/gpu/nvgpu/gp10b/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-GCOV_PROFILE := y
-
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/include
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/devfreq
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
-
-ccflags-$(CONFIG_GK20A) += -Wno-multichar
-ccflags-y += -Werror
-
-obj-$(CONFIG_GK20A)  += \
-	gr_gp10b.o  \
-	gr_ctx_gp10b.o  \
-	ce2_gp10b.o \
-	mc_gp10b.o  \
-	fifo_gp10b.o  \
-	ltc_gp10b.o \
-	mm_gp10b.o \
-	fb_gp10b.o \
-	pmu_gp10b.o \
-	hal_gp10b.o \
-	rpfb_gp10b.o \
-	gp10b_gating_reglist.o \
-	regops_gp10b.o \
-	cde_gp10b.o \
-	therm_gp10b.o \
-	gp10b_sysfs.o
-
-obj-$(CONFIG_TEGRA_GK20A) += platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/Makefile b/drivers/gpu/nvgpu/vgpu/gp10b/Makefile
deleted file mode 100644
index fed8a08e..00000000
--- a/drivers/gpu/nvgpu/vgpu/gp10b/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-GCOV_PROFILE := y
-
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/include
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include
-ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi
-
-ccflags-$(CONFIG_GK20A) += -Wno-multichar
-ccflags-y += -Werror
-
-obj-$(CONFIG_GK20A)  += \
-	vgpu_hal_gp10b.o  \
-	vgpu_gr_gp10b.o  \
-	vgpu_mm_gp10b.o \
-	vgpu_fifo_gp10b.o
-- 
cgit v1.2.2


From 57a75c3ba632a0300b932ce574ea3caab8275f4c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 14 Mar 2016 12:00:05 -0700
Subject: gpu: nvgpu: gp10b: update prod setiings

Add/update following prod settings:
  blcg ce
  slcg ce2

Change-Id: I10a62d980479ad23efd7033d29e269c4aac08834
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1030986
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 20 ++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h |  8 +++++++-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c            |  6 +++++-
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index 6bcd9f35..4719b13e 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -163,6 +163,11 @@ static const struct gating_desc gp10b_blcg_bus[] = {
 	{.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
 };
 
+/* blcg ce */
+static const struct gating_desc gp10b_blcg_ce[] = {
+	{.addr = 0x00104200, .prod = 0x00008242, .disable = 0x00000000},
+};
+
 /* blcg ctxsw prog */
 static const struct gating_desc gp10b_blcg_ctxsw_prog[] = {
 };
@@ -483,6 +488,21 @@ void gp10b_blcg_bus_load_gating_prod(struct gk20a *g,
 	}
 }
 
+void gp10b_blcg_ce_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp10b_blcg_ce) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp10b_blcg_ce[i].addr,
+				gp10b_blcg_ce[i].prod);
+		else
+			gk20a_writel(g, gp10b_blcg_ce[i].addr,
+				gp10b_blcg_ce[i].disable);
+	}
+}
+
 void gp10b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
 	bool prod)
 {
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h
index 465a0b4d..e4080def 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA Corporation. All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -19,6 +19,9 @@
 void gp10b_slcg_bus_load_gating_prod(struct gk20a *g,
 	bool prod);
 
+void gp10b_slcg_ce2_load_gating_prod(struct gk20a *g,
+	bool prod);
+
 void gp10b_slcg_chiplet_load_gating_prod(struct gk20a *g,
 	bool prod);
 
@@ -58,6 +61,9 @@ void gp10b_slcg_xbar_load_gating_prod(struct gk20a *g,
 void gp10b_blcg_bus_load_gating_prod(struct gk20a *g,
 	bool prod);
 
+void gp10b_blcg_ce_load_gating_prod(struct gk20a *g,
+	bool prod);
+
 void gp10b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
 	bool prod);
 
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 544be96b..37fad41a 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B Tegra HAL interface
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -47,6 +47,8 @@ static struct gpu_ops gp10b_ops = {
 	.clock_gating = {
 		.slcg_bus_load_gating_prod =
 			gp10b_slcg_bus_load_gating_prod,
+		.slcg_ce2_load_gating_prod =
+			gp10b_slcg_ce2_load_gating_prod,
 		.slcg_chiplet_load_gating_prod =
 			gp10b_slcg_chiplet_load_gating_prod,
 		.slcg_ctxsw_firmware_load_gating_prod =
@@ -71,6 +73,8 @@ static struct gpu_ops gp10b_ops = {
 			gp10b_slcg_xbar_load_gating_prod,
 		.blcg_bus_load_gating_prod =
 			gp10b_blcg_bus_load_gating_prod,
+		.blcg_ce_load_gating_prod =
+			gp10b_blcg_ce_load_gating_prod,
 		.blcg_ctxsw_firmware_load_gating_prod =
 			gp10b_blcg_ctxsw_firmware_load_gating_prod,
 		.blcg_fb_load_gating_prod =
-- 
cgit v1.2.2


From f2bb4f10ce7ff750d40a7ac881385d5902d7dbcd Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 11 Mar 2016 12:45:53 -0800
Subject: gpu: nvgpu: gp10b: Update regops whitelist

Update regops whitelist with two new registers.

Bug 1734151

Change-Id: Id09bdfb1733620bb75d4558299c5e9c7f66bb00b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1029772
GVS: Gerrit_Virtual_Submit
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/regops_gp10b.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
index 48f6161d..8934c324 100644
--- a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
@@ -69,6 +69,7 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x00142480,   1},
 	{ 0x001424a0,   1},
 	{ 0x00142550,   1},
+	{ 0x0017e028,   1},
 	{ 0x0017e280,   1},
 	{ 0x0017e294,   1},
 	{ 0x0017e29c,   2},
@@ -138,7 +139,7 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x001b4094,   3},
 	{ 0x001b40a4,   1},
 	{ 0x001b4100,   6},
-	{ 0x001b4124,   1},
+	{ 0x001b4124,   2},
 	{ 0x001b8000,   1},
 	{ 0x001b8008,   1},
 	{ 0x001b8010,   3},
@@ -371,6 +372,7 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x00900100,   1},
 	{ 0x009a0100,   1},
 };
+
 static const u32 gp10b_global_whitelist_ranges_count =
 	ARRAY_SIZE(gp10b_global_whitelist_ranges);
 
-- 
cgit v1.2.2


From 9864f1b0773c079f4517f3bfadddd24b1d5859a6 Mon Sep 17 00:00:00 2001
From: Arul Sekar <aruls@nvidia.com>
Date: Fri, 11 Mar 2016 11:25:40 -0800
Subject: gpu: nvgpu: add function to access ptimer time

bug 1648908

Change-Id: I32211b13489b21eba25f7473a18b9d1a303d2642
Signed-off-by: Arul Sekar <aruls@nvidia.com>
Reviewed-on: http://git-master/r/1029733
Reviewed-by: Arun Gona <agona@nvidia.com>
Tested-by: Arun Gona <agona@nvidia.com>
Reviewed-on: http://git-master/r/1111716
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
index 88d22128..e0fc59df 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -98,4 +98,12 @@ static inline u32 timer_pri_timeout_fecs_errcode_r(void)
 {
 	return 0x0000908c;
 }
+static inline u32 timer_time_0_r(void)
+{
+	return 0x00009400;
+}
+static inline u32 timer_time_1_r(void)
+{
+	return 0x00009410;
+}
 #endif
-- 
cgit v1.2.2


From f7872bec493d285d1f8a2c0bda69d9247f932b27 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Mon, 15 Feb 2016 11:15:19 +0100
Subject: gpu: nvpgu: setup fecs_trace hal operations

bug 1648908

Change-Id: I630f74f09e0a4143f5028c88634b9793ec86b279
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1022730
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                    |   1 +
 drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c    |  53 ++++++++
 drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.h    |  23 ++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c            |  17 +++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |   2 +
 drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h | 184 +++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h         |   4 +
 drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h      |   2 +-
 8 files changed, 283 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index c4873257..c29173f5 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -16,6 +16,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/regops_gp10b.o \
 	$(nvgpu-t18x)/gp10b/cde_gp10b.o \
 	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
+	$(nvgpu-t18x)/gp10b/fecs_trace_gp10b.o \
 	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
diff --git a/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c
new file mode 100644
index 00000000..7dd200a9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c
@@ -0,0 +1,53 @@
+/*
+ * GP10B GPU FECS traces
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gk20a/fecs_trace_gk20a.h"
+#include "gp10b/hw_ctxsw_prog_gp10b.h"
+#include "gp10b/hw_gr_gp10b.h"
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+static int gp10b_fecs_trace_flush(struct gk20a *g)
+{
+	struct fecs_method_op_gk20a op = {
+		.mailbox = { .id = 0, .data = 0,
+			.clr = ~0, .ok = 0, .fail = 0},
+		.method.addr = gr_fecs_method_push_adr_write_timestamp_record_v(),
+		.method.data = 0,
+		.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+		.cond.fail = GR_IS_UCODE_OP_SKIP,
+	};
+	int err;
+
+	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "");
+
+	err = gr_gk20a_elpg_protected_call(g,
+			gr_gk20a_submit_fecs_method_op(g, op, false));
+	if (err)
+		gk20a_err(dev_from_gk20a(g), "write timestamp record failed");
+
+	return err;
+}
+
+void gp10b_init_fecs_trace_ops(struct gpu_ops *ops)
+{
+	gk20a_init_fecs_trace_ops(ops);
+	ops->fecs_trace.flush = gp10b_fecs_trace_flush;
+}
+#else
+void gp10b_init_fecs_trace_ops(struct gpu_ops *ops)
+{
+}
+#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.h b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.h
new file mode 100644
index 00000000..2a25f4f6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.h
@@ -0,0 +1,23 @@
+/*
+ * GP10B GPU FECS traces
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_FECS_TRACE_GP10B_H_
+#define _NVGPU_FECS_TRACE_GP10B_H_
+
+struct gpu_ops;
+
+int gp10b_init_fecs_trace_ops(struct gpu_ops *);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 9eea7d43..f2308ab5 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -902,6 +902,23 @@ static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
 		WARN_ON("Cannot map context");
 		return;
 	}
+	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n",
+		gk20a_mem_rd32(ctx_ptr +
+				ctxsw_prog_main_image_magic_value_o(), 0),
+		ctxsw_prog_main_image_magic_value_v_value_v());
+
+	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n",
+		gk20a_mem_rd32(ctx_ptr +
+				ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0));
+
+	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n",
+		gk20a_mem_rd32(ctx_ptr +
+				ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0));
+
+	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n",
+		gk20a_mem_rd32(ctx_ptr +
+				ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0));
+
 	gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
 		gk20a_mem_rd32(ctx_ptr +
 			ctxsw_prog_main_image_num_save_ops_o(), 0));
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 37fad41a..4f67cb09 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -21,6 +21,7 @@
 #include "gk20a/gk20a.h"
 
 #include "gp10b/gr_gp10b.h"
+#include "gp10b/fecs_trace_gp10b.h"
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
@@ -139,6 +140,7 @@ int gp10b_init_hal(struct gk20a *g)
 
 	gp10b_init_mc(gops);
 	gp10b_init_gr(gops);
+	gp10b_init_fecs_trace_ops(gops);
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
 	gp10b_init_fifo(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
index 49e92df9..eef9a96f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ctxsw_prog_gp10b.h
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_main_image_patch_count_o(void)
 {
 	return 0x00000010;
 }
+static inline u32 ctxsw_prog_main_image_context_id_o(void)
+{
+	return 0x000000f0;
+}
 static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
 {
 	return 0x00000014;
@@ -286,8 +290,184 @@ static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cilp_
 {
 	return 0x2;
 }
-static inline u32 ctxsw_prog_main_image_context_id_o(void)
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void)
 {
-	return 0x000000f0;
+	return 0x000000ac;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void)
+{
+	return 0x000000b0;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void)
+{
+	return 0xfffffff << 0;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void)
+{
+	return 0x000000b4;
+}
+static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void)
+{
+	return 0x00000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void)
+{
+	return 0x00000004;
+}
+static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void)
+{
+	return 0x600dbeef;
+}
+static inline u32 ctxsw_prog_record_timestamp_context_id_o(void)
+{
+	return 0x00000008;
+}
+static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void)
+{
+	return 0x00000018;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void)
+{
+	return 0x0000001c;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r)
+{
+	return (r >> 0) & 0xffffff;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void)
+{
+	return 0xff << 24;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r)
+{
+	return (r >> 24) & 0xff;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void)
+{
+	return 0x0000000a;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void)
+{
+	return 0xa000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void)
+{
+	return 0x0000000b;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void)
+{
+	return 0xb000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void)
+{
+	return 0xc000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void)
+{
+	return 0x0000000d;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void)
+{
+	return 0xd000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void)
+{
+	return 0x3000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void)
+{
+	return 0x4000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void)
+{
+	return 0x5000000;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void)
+{
+	return 0x000000ff;
+}
+static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void)
+{
+	return 0xff000000;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 62ac1327..26578bb5 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1094,6 +1094,10 @@ static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
 {
 	return 0x21;
 }
+static inline u32 gr_fecs_method_push_adr_write_timestamp_record_v(void)
+{
+	return 0x0000003d;
+}
 static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
 {
 	return 0x0000001a;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
index e0fc59df..df27154f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_timer_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
-- 
cgit v1.2.2


From 0a98bb9fcf09eaf6f39aa8a5bc69c1457c11933a Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 9 Dec 2015 13:54:54 -0800
Subject: gpu: nvgpu: gp10b: add emc clock request

Use Bandwidth manager API to request required
emc clock.

Bug 1673672

Change-Id: I909213d2a69a45939247fd079b1c57ce93be6e0e
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/843777
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 56 +++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index e1a96e4c..27274e95 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -29,16 +29,23 @@
 #include <linux/hashtable.h>
 #include "gk20a/platform_gk20a.h"
 #include "gk20a/gk20a.h"
+#include "gk20a/gk20a_scale.h"
 #include "platform_tegra.h"
 #include "gr_gp10b.h"
 #include "ltc_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_ltc_gp10b.h"
 #include "gp10b_sysfs.h"
+#include <linux/platform/tegra/emc_bwmgr.h>
 
 #define GP10B_MAX_SUPPORTED_FREQS 11
 static unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS];
 
+#define TEGRA_GP10B_BW_PER_FREQ 64
+#define TEGRA_DDR4_BW_PER_FREQ 16
+
+#define EMC_BW_RATIO  (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
+
 static struct {
 	char *name;
 	unsigned long default_rate;
@@ -84,6 +91,30 @@ static int gp10b_tegra_get_clocks(struct platform_device *pdev)
 	return 0;
 }
 
+static void gp10b_tegra_scale_init(struct platform_device *pdev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct tegra_bwmgr_client *bwmgr_handle;
+
+	bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
+	if (!bwmgr_handle)
+		return;
+
+	profile->private_data = (void *)bwmgr_handle;
+}
+
+static void gp10b_tegra_scale_exit(struct platform_device *pdev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	if (profile)
+		tegra_bwmgr_unregister(
+			(struct tegra_bwmgr_client *)profile->private_data);
+}
+
+
 static int gp10b_tegra_probe(struct platform_device *pdev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(pdev);
@@ -145,6 +176,9 @@ static int gp10b_tegra_late_probe(struct platform_device *pdev)
 	nvhost_register_client_domain(dev_to_genpd(&pdev->dev));
 	/*Create GP10B specific sysfs*/
 	gp10b_create_sysfs(pdev);
+
+	/* Initialise tegra specific scaling quirks */
+	gp10b_tegra_scale_init(pdev);
 	return 0;
 }
 
@@ -155,6 +189,10 @@ static int gp10b_tegra_remove(struct platform_device *pdev)
 	gr_gp10b_remove_sysfs(&pdev->dev);
 	/*Remove GP10B specific sysfs*/
 	gp10b_remove_sysfs(&pdev->dev);
+
+	/* deinitialise tegra specific scaling quirks */
+	gp10b_tegra_scale_exit(pdev);
+
 	return 0;
 
 }
@@ -240,7 +278,6 @@ static void gp10b_tegra_prescale(struct platform_device *pdev)
 	gk20a_dbg_fn("");
 
 	gk20a_pmu_load_norm(g, &avg);
-	/* TBD - Notify EDP about changed constrains */
 
 	gk20a_dbg_fn("done");
 }
@@ -248,8 +285,23 @@ static void gp10b_tegra_prescale(struct platform_device *pdev)
 static void gp10b_tegra_postscale(struct platform_device *pdev,
 					unsigned long freq)
 {
-	/* TBD -  notify EMC about frequency change */
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a *g = get_gk20a(pdev);
+	unsigned long emc_rate;
+
 	gk20a_dbg_fn("");
+	if (profile) {
+		emc_rate = (freq * EMC_BW_RATIO * g->emc3d_ratio) / 1000;
+
+		if (emc_rate > tegra_bwmgr_get_max_emc_rate())
+			emc_rate = tegra_bwmgr_get_max_emc_rate();
+
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
+	}
+	gk20a_dbg_fn("done");
 }
 
 static unsigned long gp10b_get_clk_rate(struct platform_device *dev)
-- 
cgit v1.2.2


From 960704ca2579ba78cd7996f3b5d29c0f8461596b Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Mon, 4 Apr 2016 14:05:12 -0700
Subject: gpu: nvgpu: move t18x code to kernel-nvgpu-t18x

Part of moving the nvgpu driver out of the common kernel is moving the T18x
part of the nvgpu driver out of kernel-t18x. So, update the Makefile to
replect this change.

bug 200187033

Change-Id: I61288943ee210840e483b3e3e14758d4a47a0a2f
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/1119965
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index c29173f5..e5ed32d4 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -1,4 +1,4 @@
-nvgpu-t18x := ../../../../kernel-t18x/drivers/gpu/nvgpu
+nvgpu-t18x := ../../../../kernel-nvgpu-t18x/drivers/gpu/nvgpu
 
 nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
-- 
cgit v1.2.2


From 58adb7385de5dd3dee6d1493edbf5ee33d142dbc Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Mon, 10 Aug 2015 12:06:18 +0300
Subject: gpu: nvgpu: Determine ECC-enabled units for GP10B

Determine ECC-enabled units for GP10B by reading fuses/registers.

Bug 1637486

Change-Id: I6431709e3c405d6156dd96438df14d4054b48644
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/780992
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1120463
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile              |   3 +-
 drivers/gpu/nvgpu/gp10b/gp10b.c         | 110 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gp10b.h         |  26 ++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     |   3 +
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h |  10 ++-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h   |  32 ++++++++++
 6 files changed, 182 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index e5ed32d4..13d52f84 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -17,7 +17,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/cde_gp10b.o \
 	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
 	$(nvgpu-t18x)/gp10b/fecs_trace_gp10b.o \
-	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o
+	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o \
+	$(nvgpu-t18x)/gp10b/gp10b.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b.c b/drivers/gpu/nvgpu/gp10b/gp10b.c
new file mode 100644
index 00000000..a541dda3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b.c
@@ -0,0 +1,110 @@
+/*
+ * GP10B Graphics
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gk20a/gk20a.h"
+#include "hw_fuse_gp10b.h"
+#include "hw_gr_gp10b.h"
+
+static u64 gp10b_detect_ecc_enabled_units(struct gk20a *g)
+{
+	u64 ecc_enabled_units = 0;
+	u32 opt_ecc_en = gk20a_readl(g, fuse_opt_ecc_en_r());
+	u32 opt_feature_fuses_override_disable =
+			gk20a_readl(g,
+				fuse_opt_feature_fuses_override_disable_r());
+	u32 fecs_feature_override_ecc =
+				gk20a_readl(g,
+					gr_fecs_feature_override_ecc_r());
+
+	if (opt_feature_fuses_override_disable) {
+		if (opt_ecc_en)
+			ecc_enabled_units = NVGPU_GPU_FLAGS_ALL_ECC_ENABLED;
+		else
+			ecc_enabled_units = 0;
+	} else {
+		/* SM LRF */
+		if (gr_fecs_feature_override_ecc_sm_lrf_override_v(
+						fecs_feature_override_ecc)) {
+			if (gr_fecs_feature_override_ecc_sm_lrf_v(
+						fecs_feature_override_ecc)) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF;
+			}
+		} else {
+			if (opt_ecc_en) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF;
+			}
+		}
+
+		/* SM SHM */
+		if (gr_fecs_feature_override_ecc_sm_shm_override_v(
+						fecs_feature_override_ecc)) {
+			if (gr_fecs_feature_override_ecc_sm_shm_v(
+						fecs_feature_override_ecc)) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM;
+			}
+		} else {
+			if (opt_ecc_en) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM;
+			}
+		}
+
+		/* TEX */
+		if (gr_fecs_feature_override_ecc_tex_override_v(
+						fecs_feature_override_ecc)) {
+			if (gr_fecs_feature_override_ecc_tex_v(
+						fecs_feature_override_ecc)) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_TEX;
+			}
+		} else {
+			if (opt_ecc_en) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_TEX;
+			}
+		}
+
+		/* LTC */
+		if (gr_fecs_feature_override_ecc_ltc_override_v(
+						fecs_feature_override_ecc)) {
+			if (gr_fecs_feature_override_ecc_ltc_v(
+						fecs_feature_override_ecc)) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_LTC;
+			}
+		} else {
+			if (opt_ecc_en) {
+				ecc_enabled_units |=
+					NVGPU_GPU_FLAGS_ECC_ENABLED_LTC;
+			}
+		}
+	}
+
+	return ecc_enabled_units;
+}
+
+int gp10b_init_gpu_characteristics(struct gk20a *g)
+{
+	gk20a_init_gpu_characteristics(g);
+	g->gpu_characteristics.flags |= gp10b_detect_ecc_enabled_units(g);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b.h b/drivers/gpu/nvgpu/gp10b/gp10b.h
new file mode 100644
index 00000000..263f3cbe
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/gp10b.h
@@ -0,0 +1,26 @@
+/*
+ * GP10B Graphics
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GP10B_H
+#define GP10B_H
+
+#include "gk20a/gk20a.h"
+
+int gp10b_init_gpu_characteristics(struct gk20a *g);
+
+#endif /* GP10B_H */
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 4f67cb09..427936c7 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -41,6 +41,8 @@
 #include "gm20b/clk_gm20b.h"
 #include <linux/tegra-fuse.h>
 
+#include "gp10b.h"
+
 #define FUSE_OPT_PRIV_SEC_EN_0 0x264
 #define PRIV_SECURITY_ENABLED 0x01
 
@@ -153,6 +155,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
 	gops->name = "gp10b";
+	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
index b6b68718..ae524ce5 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -126,4 +126,12 @@ static inline u32 fuse_status_opt_fbp_idx_v(u32 r, u32 i)
 {
 	return (r >> (0 + i*0)) & 0x1;
 }
+static inline u32 fuse_opt_ecc_en_r(void)
+{
+	return 0x00021228;
+}
+static inline u32 fuse_opt_feature_fuses_override_disable_r(void)
+{
+	return 0x000213f0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 26578bb5..b3fd704b 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1486,6 +1486,38 @@ static inline u32 gr_fecs_feature_override_ecc_r(void)
 {
 	return 0x00409658;
 }
+static inline u32 gr_fecs_feature_override_ecc_sm_lrf_override_v(u32 r)
+{
+	return (r >> 3) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_sm_shm_override_v(u32 r)
+{
+	return (r >> 7) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_tex_override_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_ltc_override_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_sm_lrf_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_sm_shm_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_tex_v(u32 r)
+{
+	return (r >> 8) & 0x1;
+}
+static inline u32 gr_fecs_feature_override_ecc_ltc_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
 static inline u32 gr_gpc0_gpccs_ctxsw_idlestate_r(void)
 {
 	return 0x00502420;
-- 
cgit v1.2.2


From 4dee2dd64c6df38000477cd826ab70508cb8c017 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 21 Mar 2016 18:24:49 +0530
Subject: gpu: nvgpu: post CILP_PREEMPTION_STARTED/COMPLETE events

Remove posting of events using old channel event API i.e.
gk20a_channel_post_event()

Also, update gk20a_channel_semaphore_wakeup() to post
events when called from ce2_nonblockpipe_isr()

Bug 200089620

Change-Id: I677cdab11183a649663ff9272a527c63b9994430
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1112275
(cherry picked from commit 4840efda393cd5928f1a8463db8b52cc586860bc)
Reviewed-on: http://git-master/r/1120289
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c |  2 +-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c  | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
index b2d417b7..4cb13f3b 100644
--- a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
@@ -26,7 +26,7 @@ static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
 	gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
 
 	/* wake theads waiting in this channel */
-	gk20a_channel_semaphore_wakeup(g);
+	gk20a_channel_semaphore_wakeup(g, true);
 	return ce2_intr_status_nonblockpipe_pending_f();
 }
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index f2308ab5..1aba35f3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1491,6 +1491,16 @@ static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk2
 	gr_ctx->t18x.cilp_preempt_pending = true;
 	g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid;
 
+	if (gk20a_is_channel_marked_as_tsg(fault_ch)) {
+		struct tsg_gk20a *tsg = &g->fifo.tsg[fault_ch->tsgid];
+
+		gk20a_tsg_event_id_post_event(tsg,
+				NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
+	} else {
+		gk20a_channel_event_id_post_event(fault_ch,
+				NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED);
+	}
+
 	return 0;
 }
 
@@ -1684,8 +1694,6 @@ static int gr_gp10b_handle_fecs_error(struct gk20a *g,
 		}
 
 		if (gk20a_gr_sm_debugger_attached(g)) {
-			gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event");
-
 			if (gk20a_is_channel_marked_as_tsg(ch)) {
 				struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
 				struct channel_gk20a *__ch;
@@ -1693,12 +1701,16 @@ static int gr_gp10b_handle_fecs_error(struct gk20a *g,
 				mutex_lock(&tsg->ch_list_lock);
 				list_for_each_entry(__ch, &tsg->ch_list, ch_entry) {
 					gk20a_dbg_gpu_post_events(__ch);
-					gk20a_channel_post_event(__ch);
 				}
 				mutex_unlock(&tsg->ch_list_lock);
+
+				gk20a_tsg_event_id_post_event(tsg,
+					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
 			} else {
 				gk20a_dbg_gpu_post_events(ch);
-				gk20a_channel_post_event(ch);
+
+				gk20a_channel_event_id_post_event(ch,
+					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
 			}
 		}
 
-- 
cgit v1.2.2


From be7ee41989008b76ba118f6f520ba9b1b1efd44c Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 5 Apr 2016 19:41:01 -0700
Subject: gpu: nvgpu: gp10b: Sync with register generator

Use re-generated register definitions. This synchronizes
kernel with the register generator.

Change-Id: I5ad34ad0b92327091758a2d10581a1b4170fa919
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1120811
---
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h |  6 +++-
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h |  2 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h   | 64 +++++++++++++++++++++------------
 3 files changed, 48 insertions(+), 24 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 6f7e09ff..34977523 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -110,6 +110,10 @@ static inline u32 fifo_eng_runlist_length_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 fifo_eng_runlist_length_max_v(void)
+{
+	return 0x0000ffff;
+}
 static inline u32 fifo_eng_runlist_pending_true_f(void)
 {
 	return 0x100000;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 9ce9448e..30e4307d 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index b3fd704b..78792f50 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1114,6 +1114,10 @@ static inline u32 gr_fecs_host_int_status_r(void)
 {
 	return 0x00409c18;
 }
+static inline u32 gr_fecs_host_int_status_fault_during_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
 static inline u32 gr_fecs_host_int_status_umimp_firmware_method_f(u32 v)
 {
 	return (v & 0x1) << 17;
@@ -3462,6 +3466,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
 {
 	return 0x00504610;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(void)
+{
+	return 0x1 << 0;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
 {
 	return (r >> 0) & 0x1;
@@ -3494,6 +3502,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
 {
 	return 0x40000000;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m(void)
+{
+	return 0x1 << 1;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
 {
 	return (r >> 1) & 0x1;
@@ -3502,6 +3514,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m(void)
+{
+	return 0x1 << 2;
+}
 static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
 {
 	return (r >> 2) & 0x1;
@@ -3510,6 +3526,14 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_stop_on_any_warp_disable_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_stop_on_any_sm_disable_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
 {
 	return 0x00504614;
@@ -3522,13 +3546,9 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
 {
 	return 0x00504634;
 }
-static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
+static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void)
 {
-	return 0x00000000;
-}
-static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
-{
-	return 0x00000000;
+	return 0x00419e24;
 }
 static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
 {
@@ -3606,22 +3626,6 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(
 {
 	return 0x40;
 }
-static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
-{
-	return 0x00504224;
-}
-static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
-{
-	return 0x1;
-}
-static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f(void)
-{
-	return 0x80;
-}
-static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
-{
-	return 0x100;
-}
 static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f(void)
 {
 	return 0x1;
@@ -3642,6 +3646,22 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_timeout_error_pending_f(void)
 {
 	return 0x80000000;
 }
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
+{
+	return 0x00504224;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
+{
+	return 0x100;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
 {
 	return 0x00504648;
-- 
cgit v1.2.2


From 7ed45599dc18c4c1b82792b8f451329fbc72f712 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 29 Mar 2016 17:48:03 -0700
Subject: gpu: nvgpu: gp10b: disable force_reset_in_do_idle

Since gpu rail gating is enabled, force_reset in
idle can be disabled.

Bug 200183798

Change-Id: I04ed04b66e3059459ec32cbffbfdb6756b009200
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1120147
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 27274e95..6cb89cae 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -407,7 +407,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.reset_assert = gp10b_tegra_reset_assert,
 	.reset_deassert = gp10b_tegra_reset_deassert,
 
-	.force_reset_in_do_idle = true,
+	.force_reset_in_do_idle = false,
 };
 
 
-- 
cgit v1.2.2


From bd688d31ce7df4a2d9134a521fa4c4f5eba06c2d Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 16 Mar 2016 20:03:10 -0700
Subject: gpu: nvgpu: Add fbpa number and stride

Add fbpa number and stride, used in hwpm context switch code

Bug 1648200

Change-Id: I44570c072b1266d7ec2fc5dfb7fa73000ac01831
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1120451
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index a315ae2d..dedc5a3f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,10 @@ static inline u32 proj_lts_stride_v(void)
 {
 	return 0x00000200;
 }
+static inline u32 proj_fbpa_stride_v(void)
+{
+	return 0x00004000;
+}
 static inline u32 proj_ppc_in_gpc_base_v(void)
 {
 	return 0x00003000;
@@ -114,6 +118,10 @@ static inline u32 proj_scal_litter_num_fbps_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 proj_scal_litter_num_fbpas_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 proj_scal_litter_num_gpcs_v(void)
 {
 	return 0x00000001;
-- 
cgit v1.2.2


From fce01666d57c9c76c21d4ac31adc225a2a6c2e42 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 30 Mar 2016 14:03:35 -0700
Subject: gpu: nvgpu: Use device instead of platform_device

Use struct device instead of struct platform_device wherever
possible. This allows adding other bus types later.

Change-Id: I90623c020919ca8e2e5b31d53914c324d2dc6af9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1120464
---
 drivers/gpu/nvgpu/gp10b/cde_gp10b.c            |   4 +-
 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c          |  18 ++---
 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h          |   2 +-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h             |   6 +-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 101 ++++++++++++-------------
 5 files changed, 61 insertions(+), 70 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
index dadec4d2..4a16abd1 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B CDE
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -46,7 +46,7 @@ static void gp10b_cde_get_program_numbers(struct gk20a *g,
 		}
 		if (g->mm.bypass_smmu) {
 			if (!g->mm.disable_bigpage) {
-				gk20a_warn(&g->dev->dev,
+				gk20a_warn(g->dev,
 					   "when bypass_smmu is 1, disable_bigpage must be 1 too");
 			}
 			hprog |= 1;
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
index 800f39c3..5035bb99 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
@@ -20,11 +20,10 @@
 
 #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
 
-static ssize_t ecc_enable_store(struct device *device,
+static ssize_t ecc_enable_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct platform_device *ndev = to_platform_device(device);
-	struct gk20a *g = get_gk20a(ndev);
+	struct gk20a *g = get_gk20a(dev);
 	u32 ecc_mask;
 	u32 err = 0;
 
@@ -33,17 +32,16 @@ static ssize_t ecc_enable_store(struct device *device,
 		err = g->ops.pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd
 			(g, ecc_mask);
 		if (err)
-			dev_err(device, "ECC override did not happen\n");
+			dev_err(dev, "ECC override did not happen\n");
 	} else
 		return -EINVAL;
 	return count;
 }
 
-static ssize_t ecc_enable_read(struct device *device,
+static ssize_t ecc_enable_read(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct platform_device *ndev = to_platform_device(device);
-	struct gk20a *g = get_gk20a(ndev);
+	struct gk20a *g = get_gk20a(dev);
 
 	return sprintf(buf, "ecc override =0x%x\n",
 			g->ops.gr.get_lrf_tex_ltc_dram_override(g));
@@ -51,13 +49,13 @@ static ssize_t ecc_enable_read(struct device *device,
 
 static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);
 
-void gp10b_create_sysfs(struct platform_device *dev)
+void gp10b_create_sysfs(struct device *dev)
 {
 	int error = 0;
 
-	error |= device_create_file(&dev->dev, &dev_attr_ecc_enable);
+	error |= device_create_file(dev, &dev_attr_ecc_enable);
 	if (error)
-		dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
+		dev_err(dev, "Failed to create sysfs attributes!\n");
 }
 
 void gp10b_remove_sysfs(struct device *dev)
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
index c1d101da..7c3d3400 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
@@ -19,7 +19,7 @@
 /*ECC Fuse*/
 #define FUSE_OPT_ECC_EN  0x358
 
-void gp10b_create_sysfs(struct platform_device *dev);
+void gp10b_create_sysfs(struct device *dev);
 void gp10b_remove_sysfs(struct device *dev);
 
 #endif /*_GP10B_SYSFS_H_*/
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index bd4b5879..25509d50 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -1,7 +1,7 @@
 /*
- * GM20B GPU GR
+ * GP10B GPU GR
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -35,7 +35,7 @@ enum {
 void gp10b_init_gr(struct gpu_ops *ops);
 int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 			struct mem_desc *mem);
-void gr_gp10b_create_sysfs(struct platform_device *dev);
+void gr_gp10b_create_sysfs(struct device *dev);
 
 struct ecc_stat {
 	char **names;
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 6cb89cae..7d8312fc 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -1,7 +1,5 @@
 /*
- * drivers/video/tegra/host/gk20a/platform_gk20a_tegra.c
- *
- * GK20A Tegra Platform Interface
+ * GP10B Tegra Platform Interface
  *
  * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
@@ -62,11 +60,9 @@ static void gr_gp10b_remove_sysfs(struct device *dev);
  * the clock information to gp10b platform data.
  */
 
-static int gp10b_tegra_get_clocks(struct platform_device *pdev)
+static int gp10b_tegra_get_clocks(struct device *dev)
 {
-	struct gk20a_platform *platform = platform_get_drvdata(pdev);
-	struct gk20a *g = get_gk20a(pdev);
-	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	int i;
 
 	if (tegra_platform_is_linsim())
@@ -79,7 +75,7 @@ static int gp10b_tegra_get_clocks(struct platform_device *pdev)
 
 		c = clk_get(dev, tegra_gp10b_clocks[i].name);
 		if (IS_ERR(c)) {
-			gk20a_err(&pdev->dev, "cannot get clock %s",
+			gk20a_err(dev, "cannot get clock %s",
 					tegra_gp10b_clocks[i].name);
 		} else {
 			clk_set_rate(c, rate);
@@ -91,9 +87,9 @@ static int gp10b_tegra_get_clocks(struct platform_device *pdev)
 	return 0;
 }
 
-static void gp10b_tegra_scale_init(struct platform_device *pdev)
+static void gp10b_tegra_scale_init(struct device *dev)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
 	struct gk20a_scale_profile *profile = platform->g->scale_profile;
 	struct tegra_bwmgr_client *bwmgr_handle;
 
@@ -104,9 +100,9 @@ static void gp10b_tegra_scale_init(struct platform_device *pdev)
 	profile->private_data = (void *)bwmgr_handle;
 }
 
-static void gp10b_tegra_scale_exit(struct platform_device *pdev)
+static void gp10b_tegra_scale_exit(struct device *dev)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
 	struct gk20a_scale_profile *profile = platform->g->scale_profile;
 
 	if (profile)
@@ -114,30 +110,29 @@ static void gp10b_tegra_scale_exit(struct platform_device *pdev)
 			(struct tegra_bwmgr_client *)profile->private_data);
 }
 
-
-static int gp10b_tegra_probe(struct platform_device *pdev)
+static int gp10b_tegra_probe(struct device *dev)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
-	struct device_node *np = pdev->dev.of_node;
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct device_node *np = dev->of_node;
 	struct device_node *host1x_node;
 	struct platform_device *host1x_pdev;
 	const __be32 *host1x_ptr;
 
 	host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
 	if (!host1x_ptr) {
-		gk20a_err(&pdev->dev, "host1x device not available");
+		gk20a_err(dev, "host1x device not available");
 		return -ENOSYS;
 	}
 
 	host1x_node = of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
 	host1x_pdev = of_find_device_by_node(host1x_node);
 	if (!host1x_pdev) {
-		gk20a_err(&pdev->dev, "host1x device not available");
+		gk20a_err(dev, "host1x device not available");
 		return -ENOSYS;
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
-	platform->bypass_smmu = !device_is_iommuable(&pdev->dev);
+	platform->bypass_smmu = !device_is_iommuable(dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
 	platform->g->gr.t18x.ctx_vars.dump_ctxsw_stats_on_channel_close
@@ -165,39 +160,39 @@ static int gp10b_tegra_probe(struct platform_device *pdev)
 			&platform->g->gr.t18x.
 				ctx_vars.dump_ctxsw_stats_on_channel_close);
 
-	gp10b_tegra_get_clocks(pdev);
+	gp10b_tegra_get_clocks(dev);
 
 	return 0;
 }
 
-static int gp10b_tegra_late_probe(struct platform_device *pdev)
+static int gp10b_tegra_late_probe(struct device *dev)
 {
 	/* Make gk20a power domain a subdomain of host1x */
-	nvhost_register_client_domain(dev_to_genpd(&pdev->dev));
+	nvhost_register_client_domain(dev_to_genpd(dev));
 	/*Create GP10B specific sysfs*/
-	gp10b_create_sysfs(pdev);
+	gp10b_create_sysfs(dev);
 
 	/* Initialise tegra specific scaling quirks */
-	gp10b_tegra_scale_init(pdev);
+	gp10b_tegra_scale_init(dev);
 	return 0;
 }
 
-static int gp10b_tegra_remove(struct platform_device *pdev)
+static int gp10b_tegra_remove(struct device *dev)
 {
 	/* remove gk20a power subdomain from host1x */
-	nvhost_unregister_client_domain(dev_to_genpd(&pdev->dev));
-	gr_gp10b_remove_sysfs(&pdev->dev);
+	nvhost_unregister_client_domain(dev_to_genpd(dev));
+	gr_gp10b_remove_sysfs(dev);
 	/*Remove GP10B specific sysfs*/
-	gp10b_remove_sysfs(&pdev->dev);
+	gp10b_remove_sysfs(dev);
 
 	/* deinitialise tegra specific scaling quirks */
-	gp10b_tegra_scale_exit(pdev);
+	gp10b_tegra_scale_exit(dev);
 
 	return 0;
 
 }
 
-static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
+static bool gp10b_tegra_is_railgated(struct device *dev)
 {
 	bool ret = false;
 
@@ -207,9 +202,9 @@ static bool gp10b_tegra_is_railgated(struct platform_device *pdev)
 	return ret;
 }
 
-static int gp10b_tegra_railgate(struct platform_device *pdev)
+static int gp10b_tegra_railgate(struct device *dev)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
 
 	if (tegra_bpmp_running() &&
 	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) {
@@ -223,10 +218,10 @@ static int gp10b_tegra_railgate(struct platform_device *pdev)
 	return 0;
 }
 
-static int gp10b_tegra_unrailgate(struct platform_device *pdev)
+static int gp10b_tegra_unrailgate(struct device *dev)
 {
 	int ret = 0;
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
 
 	if (tegra_bpmp_running()) {
 		int i;
@@ -244,7 +239,7 @@ static int gp10b_tegra_suspend(struct device *dev)
 	return 0;
 }
 
-static int gp10b_tegra_reset_assert(struct platform_device *dev)
+static int gp10b_tegra_reset_assert(struct device *dev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
 	int ret = 0;
@@ -257,7 +252,7 @@ static int gp10b_tegra_reset_assert(struct platform_device *dev)
 	return ret;
 }
 
-static int gp10b_tegra_reset_deassert(struct platform_device *dev)
+static int gp10b_tegra_reset_deassert(struct device *dev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
 	int ret = 0;
@@ -270,9 +265,9 @@ static int gp10b_tegra_reset_deassert(struct platform_device *dev)
 	return ret;
 }
 
-static void gp10b_tegra_prescale(struct platform_device *pdev)
+static void gp10b_tegra_prescale(struct device *dev)
 {
-	struct gk20a *g = get_gk20a(pdev);
+	struct gk20a *g = get_gk20a(dev);
 	u32 avg = 0;
 
 	gk20a_dbg_fn("");
@@ -282,7 +277,7 @@ static void gp10b_tegra_prescale(struct platform_device *pdev)
 	gk20a_dbg_fn("done");
 }
 
-static void gp10b_tegra_postscale(struct platform_device *pdev,
+static void gp10b_tegra_postscale(struct device *pdev,
 					unsigned long freq)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(pdev);
@@ -304,7 +299,7 @@ static void gp10b_tegra_postscale(struct platform_device *pdev,
 	gk20a_dbg_fn("done");
 }
 
-static unsigned long gp10b_get_clk_rate(struct platform_device *dev)
+static unsigned long gp10b_get_clk_rate(struct device *dev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
 
@@ -312,25 +307,24 @@ static unsigned long gp10b_get_clk_rate(struct platform_device *dev)
 
 }
 
-static long gp10b_round_clk_rate(struct platform_device *dev,
-						unsigned long rate)
+static long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
 
 	return clk_round_rate(platform->clk[0], rate);
 }
 
-static int gp10b_set_clk_rate(struct platform_device *dev, unsigned long rate)
+static int gp10b_set_clk_rate(struct device *dev, unsigned long rate)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
 
 	return clk_set_rate(platform->clk[0], rate);
 }
 
-static int gp10b_clk_get_freqs(struct platform_device *pdev,
+static int gp10b_clk_get_freqs(struct device *dev,
 				unsigned long **freqs, int *num_freqs)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
 	unsigned long min_rate, max_rate, freq_step, rate;
 	int i;
 
@@ -408,6 +402,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.reset_deassert = gp10b_tegra_reset_deassert,
 
 	.force_reset_in_do_idle = false,
+
+	.soc_name = "tegra18x",
 };
 
 
@@ -481,7 +477,7 @@ static ssize_t ecc_stat_show(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
 }
 
-static int ecc_stat_create(struct platform_device *dev,
+static int ecc_stat_create(struct device *dev,
 				int is_l2,
 				char *ecc_stat_name,
 				struct ecc_stat *ecc_stat,
@@ -526,8 +522,7 @@ static int ecc_stat_create(struct platform_device *dev,
 		dev_attr_array[hw_unit].store = NULL;
 
 		/* Create sysfs file */
-		error |= device_create_file(&dev->dev,
-				&dev_attr_array[hw_unit]);
+		error |= device_create_file(dev, &dev_attr_array[hw_unit]);
 	}
 
 	/* Add hash table entry */
@@ -544,8 +539,7 @@ static void ecc_stat_remove(struct device *dev,
 				struct ecc_stat *ecc_stat,
 				struct device_attribute *dev_attr_array)
 {
-	struct platform_device *ndev = to_platform_device(dev);
-	struct gk20a *g = get_gk20a(ndev);
+	struct gk20a *g = get_gk20a(dev);
 	int num_hw_units = 0;
 	int hw_unit = 0;
 
@@ -571,7 +565,7 @@ static void ecc_stat_remove(struct device *dev,
 	kfree(dev_attr_array);
 }
 
-void gr_gp10b_create_sysfs(struct platform_device *dev)
+void gr_gp10b_create_sysfs(struct device *dev)
 {
 	int error = 0;
 	struct gk20a *g = get_gk20a(dev);
@@ -663,13 +657,12 @@ void gr_gp10b_create_sysfs(struct platform_device *dev)
 				dev_attr_l2_ecc_ded_count_array);
 
 	if (error)
-		dev_err(&dev->dev, "Failed to create sysfs attributes!\n");
+		dev_err(dev, "Failed to create sysfs attributes!\n");
 }
 
 static void gr_gp10b_remove_sysfs(struct device *dev)
 {
-	struct platform_device *ndev = to_platform_device(dev);
-	struct gk20a *g = get_gk20a(ndev);
+	struct gk20a *g = get_gk20a(dev);
 
 	ecc_stat_remove(dev,
 			0,
-- 
cgit v1.2.2


From 03614bff771ee7ca93382d4464b1df373b6fe501 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 8 Apr 2016 12:02:37 -0700
Subject: gpu: nvgpu: gp10b: Support GPUs with no physical mode

Support GPUs which cannot choose between SMMU and physical
addressing.

Change-Id: Ic097fccb313d98fcea918a705eefb5cd619138f1
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1122590
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index d4a4e7f3..deb8c138 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B MMU
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -32,7 +32,7 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct mem_desc *inst_block = &mm->bar1.inst_block;
-	phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
+	u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
 	int err = 0;
 
 	gk20a_dbg_fn("");
@@ -97,7 +97,7 @@ static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct mem_desc *inst_block = &mm->bar2.inst_block;
-	phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
+	u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
 
 	gk20a_dbg_fn("");
 
@@ -146,6 +146,17 @@ static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
 	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
 }
 
+static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
+{
+	u64 addr;
+	if (g->mm.has_physical_mode)
+		addr = sg_phys(entry->sgt->sgl);
+	else
+		addr = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
+
+	return addr;
+}
+
 static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   struct gk20a_mm_entry *parent,
 			   u32 i, u32 gmmu_pgsz_idx,
@@ -156,6 +167,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, bool priv)
 {
+	struct gk20a *g = gk20a_from_vm(vm);
 	u64 pte_addr = 0;
 	u64 pde_addr = 0;
 	struct gk20a_mm_entry *pte = parent->entries + i;
@@ -164,8 +176,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 
 	gk20a_dbg_fn("");
 
-	pte_addr = sg_phys(pte->sgt->sgl) >> gmmu_new_pde_address_shift_v();
-	pde_addr = sg_phys(parent->sgt->sgl);
+	pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v();
+	pde_addr = entry_addr(g, parent);
 
 	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
@@ -197,6 +209,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 			   bool cacheable, bool unmapped_pte,
 			   int rw_flag, bool sparse, bool priv)
 {
+	struct gk20a *g = gk20a_from_vm(vm);
 	bool small_valid, big_valid;
 	u32 pte_addr_small = 0, pte_addr_big = 0;
 	struct gk20a_mm_entry *entry = pte->entries + i;
@@ -208,12 +221,13 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
 	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
 
-	if (small_valid)
-		pte_addr_small = sg_phys(entry->sgt->sgl)
+	if (small_valid) {
+		pte_addr_small = entry_addr(g, entry)
 				 >> gmmu_new_dual_pde_address_shift_v();
+	}
 
 	if (big_valid)
-		pte_addr_big = sg_phys(entry->sgt->sgl)
+		pte_addr_big = entry_addr(g, entry)
 			       >> gmmu_new_dual_pde_address_big_shift_v();
 
 	if (small_valid) {
-- 
cgit v1.2.2


From 9acab4c9759c23cc355839187f7c07c48b4dc017 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 12 Apr 2016 16:09:48 +0530
Subject: gpu: nvgpu: pass bool pointer to debugfs_create_bool()

Port the change 621a5f7ad9cd1ce7933f1d302067cbd58354173c from
kernel.org to the nvgpu driver

Change-Id: I3a8aa873e1f0b601bfe89f836c400113e50b638e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1125443
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 25509d50..edf536f5 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -16,6 +16,8 @@
 #ifndef _NVGPU_GR_GP10B_H_
 #define _NVGPU_GR_GP10B_H_
 
+#include <linux/version.h>
+
 struct gpu_ops;
 
 enum {
@@ -46,9 +48,15 @@ struct ecc_stat {
 struct gr_t18x {
 	struct {
 		u32 preempt_image_size;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
 		u32 force_preemption_gfxp;
 		u32 force_preemption_cilp;
 		u32 dump_ctxsw_stats_on_channel_close;
+#else
+		bool force_preemption_gfxp;
+		bool force_preemption_cilp;
+		bool dump_ctxsw_stats_on_channel_close;
+#endif
 		struct dentry *debugfs_force_preemption_cilp;
 		struct dentry *debugfs_force_preemption_gfxp;
 		struct dentry *debugfs_dump_ctxsw_stats;
-- 
cgit v1.2.2


From 545dd0e3707c317a8b47dc0660b74d81f74bd416 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 12 Apr 2016 18:00:18 +0530
Subject: gpu: nvgpu: return from scale_init() if no profile

In gp10b_tegra_scale_init(), return immediately
if CONFIG_GK20A_DEVFREQ is disabled and
profile is NULL

Change-Id: I08e15afdc72bef62a4fb43f30b74cebf8a4b0d68
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1125444
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 7d8312fc..25673873 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -93,6 +93,9 @@ static void gp10b_tegra_scale_init(struct device *dev)
 	struct gk20a_scale_profile *profile = platform->g->scale_profile;
 	struct tegra_bwmgr_client *bwmgr_handle;
 
+	if (!profile)
+		return;
+
 	bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
 	if (!bwmgr_handle)
 		return;
-- 
cgit v1.2.2


From b268c9103760fd26706cb3a5b651b82fb82fb105 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 14 Apr 2016 17:36:14 +0530
Subject: gpu: nvgpu: register to nvhost for debug dump

Register debug dump callback gk20a_debug_dump_device()
to nvhost using nvhost_register_dump_device()

Unregister the callback in gp10b_tegra_remove()

Bug 200188753

Change-Id: I9161cfdf969208bd8b6160742bf89e327aa2a6b4
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1126792
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 25673873..587e0a7a 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -135,6 +135,11 @@ static int gp10b_tegra_probe(struct device *dev)
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
+	if (platform->g->host1x_dev)
+		nvhost_register_dump_device(platform->g->host1x_dev,
+					gk20a_debug_dump_device,
+					platform->g->dev);
+
 	platform->bypass_smmu = !device_is_iommuable(dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
@@ -182,6 +187,11 @@ static int gp10b_tegra_late_probe(struct device *dev)
 
 static int gp10b_tegra_remove(struct device *dev)
 {
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	if (platform->g->host1x_dev)
+		nvhost_unregister_dump_device(platform->g->host1x_dev);
+
 	/* remove gk20a power subdomain from host1x */
 	nvhost_unregister_client_domain(dev_to_genpd(dev));
 	gr_gp10b_remove_sysfs(dev);
-- 
cgit v1.2.2


From 342d45e060ba9c6a7815633c351ec8d95422dcbb Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 6 Apr 2016 13:10:46 -0700
Subject: gpu: nvgpu: gp10b: Add litter values HAL

Move per-chip constants to be returned by a chip specific function.
Implement get_litter_value() for each chip.

Change-Id: I8bda9bf99b2cc6aba0fb88a69cc374e0a6abab6b
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1121384
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c  | 51 ++++++++++++++-----------
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 74 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c |  9 ++---
 3 files changed, 108 insertions(+), 26 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 1aba35f3..6ab8e923 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -25,7 +25,6 @@
 #include "gp10b/gr_gp10b.h"
 #include "hw_gr_gp10b.h"
 #include "hw_fifo_gp10b.h"
-#include "hw_proj_gp10b.h"
 #include "hw_ctxsw_prog_gp10b.h"
 #include "hw_mc_gp10b.h"
 #include "gp10b_sysfs.h"
@@ -61,8 +60,9 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 			bool *post_event, struct channel_gk20a *fault_ch)
 {
 	int ret = 0;
-	u32 offset = proj_gpc_stride_v() * gpc +
-			proj_tpc_in_gpc_stride_v() * tpc;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
 	u32 lrf_ecc_status, shm_ecc_status;
 
 	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch);
@@ -170,8 +170,9 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 		bool *post_event)
 {
 	int ret = 0;
-	u32 offset = proj_gpc_stride_v() * gpc +
-		     proj_tpc_in_gpc_stride_v() * tpc;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
 	u32 esr;
 	u32 ecc_stats_reg_val;
 
@@ -314,6 +315,9 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 	u32 temp, temp2;
 	u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate;
 	u32 attrib_size_in_chunk, cb_attrib_cache_size_init;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
+	u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
 
 	gk20a_dbg_fn("");
 
@@ -346,8 +350,8 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 		gr->tpc_count * gr->alpha_cb_size;
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		temp = proj_gpc_stride_v() * gpc_index;
-		temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index;
+		temp = gpc_stride * gpc_index;
+		temp2 = num_pes_per_gpc * gpc_index;
 		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
 		     ppc_index++) {
 			cbm_cfg_size_beta = cb_attrib_cache_size_init *
@@ -359,17 +363,17 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
+				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_beta, patch);
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
+				ppc_in_gpc_stride * ppc_index,
 				attrib_offset_in_chunk, patch);
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
+				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_steadystate,
 				patch);
 
@@ -378,12 +382,12 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
+				ppc_in_gpc_stride * ppc_index,
 				cbm_cfg_size_alpha, patch);
 
 			gr_gk20a_ctx_patch_write(g, ch_ctx,
 				gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
-				proj_ppc_in_gpc_stride_v() * ppc_index,
+				ppc_in_gpc_stride * ppc_index,
 				alpha_offset_in_chunk, patch);
 
 			alpha_offset_in_chunk += gr->alpha_cb_size *
@@ -618,6 +622,8 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 	u32 gpc_index, ppc_index, stride, val;
 	u32 pd_ab_max_output;
 	u32 alpha_cb_size = data * 4;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
 
 	gk20a_dbg_fn("");
 
@@ -638,14 +644,14 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 		gr_pd_ab_dist_cfg1_max_batches_init_f());
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		stride = proj_gpc_stride_v() * gpc_index;
+		stride = gpc_stride * gpc_index;
 
 		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
 			ppc_index++) {
 
 			val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
 				stride +
-				proj_ppc_in_gpc_stride_v() * ppc_index);
+				ppc_in_gpc_stride * ppc_index);
 
 			val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
 					gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
@@ -653,7 +659,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
 
 			gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
 				stride +
-				proj_ppc_in_gpc_stride_v() * ppc_index, val);
+				ppc_in_gpc_stride * ppc_index, val);
 		}
 	}
 }
@@ -663,6 +669,8 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 	struct gr_gk20a *gr = &g->gr;
 	u32 gpc_index, ppc_index, stride, val;
 	u32 cb_size_steady = data * 4, cb_size;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
 
 	gk20a_dbg_fn("");
 
@@ -684,14 +692,14 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 		 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
 
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		stride = proj_gpc_stride_v() * gpc_index;
+		stride = gpc_stride * gpc_index;
 
 		for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
 			ppc_index++) {
 
 			val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
 				stride +
-				proj_ppc_in_gpc_stride_v() * ppc_index);
+				ppc_in_gpc_stride * ppc_index);
 
 			val = set_field(val,
 				gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
@@ -700,9 +708,9 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data)
 
 			gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
 				stride +
-				proj_ppc_in_gpc_stride_v() * ppc_index, val);
+				ppc_in_gpc_stride * ppc_index, val);
 
-			gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index +
+			gk20a_writel(g, ppc_in_gpc_stride * ppc_index +
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
 				stride,
 				gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
@@ -1542,8 +1550,9 @@ static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
 	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
 			NVGPU_GR_PREEMPTION_MODE_CILP) ;
 	u32 global_mask = 0, dbgr_control0, global_esr_copy;
-	u32 offset = proj_gpc_stride_v() * gpc +
-		     proj_tpc_in_gpc_stride_v() * tpc;
+	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
 
 	*early_exit = false;
 	*ignore_debugger = false;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 427936c7..ea5e3f15 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -42,6 +42,7 @@
 #include <linux/tegra-fuse.h>
 
 #include "gp10b.h"
+#include "hw_proj_gp10b.h"
 
 #define FUSE_OPT_PRIV_SEC_EN_0 0x264
 #define PRIV_SECURITY_ENABLED 0x01
@@ -99,6 +100,78 @@ static struct gpu_ops gp10b_ops = {
 	}
 };
 
+static int gp10b_get_litter_value(struct gk20a *g,
+		enum nvgpu_litter_value value)
+{
+	int ret = EINVAL;
+	switch (value) {
+	case GPU_LIT_NUM_GPCS:
+		ret = proj_scal_litter_num_gpcs_v();
+		break;
+	case GPU_LIT_NUM_PES_PER_GPC:
+		ret = proj_scal_litter_num_pes_per_gpc_v();
+		break;
+	case GPU_LIT_NUM_ZCULL_BANKS:
+		ret = proj_scal_litter_num_zcull_banks_v();
+		break;
+	case GPU_LIT_NUM_TPC_PER_GPC:
+		ret = proj_scal_litter_num_tpc_per_gpc_v();
+		break;
+	case GPU_LIT_NUM_FBPS:
+		ret = proj_scal_litter_num_fbps_v();
+		break;
+	case GPU_LIT_GPC_BASE:
+		ret = proj_gpc_base_v();
+		break;
+	case GPU_LIT_GPC_STRIDE:
+		ret = proj_gpc_stride_v();
+		break;
+	case GPU_LIT_GPC_SHARED_BASE:
+		ret = proj_gpc_shared_base_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_BASE:
+		ret = proj_tpc_in_gpc_base_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_STRIDE:
+		ret = proj_tpc_in_gpc_stride_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
+		ret = proj_tpc_in_gpc_shared_base_v();
+		break;
+	case GPU_LIT_PPC_IN_GPC_STRIDE:
+		ret = proj_ppc_in_gpc_stride_v();
+		break;
+	case GPU_LIT_ROP_BASE:
+		ret = proj_rop_base_v();
+		break;
+	case GPU_LIT_ROP_STRIDE:
+		ret = proj_rop_stride_v();
+		break;
+	case GPU_LIT_ROP_SHARED_BASE:
+		ret = proj_rop_shared_base_v();
+		break;
+	case GPU_LIT_HOST_NUM_PBDMA:
+		ret = proj_host_num_pbdma_v();
+		break;
+	case GPU_LIT_LTC_STRIDE:
+		ret = proj_ltc_stride_v();
+		break;
+	case GPU_LIT_LTS_STRIDE:
+		ret = proj_lts_stride_v();
+		break;
+	case GPU_LIT_NUM_FBPAS:
+		ret = proj_scal_litter_num_fbpas_v();
+		break;
+	case GPU_LIT_FBPA_STRIDE:
+		ret = proj_fbpa_stride_v();
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 int gp10b_init_hal(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
@@ -156,6 +229,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_therm_ops(gops);
 	gops->name = "gp10b";
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
+	gops->get_litter_value = gp10b_get_litter_value;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index e68e762d..92735f1a 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -19,7 +19,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gm20b/ltc_gm20b.h"
-#include "hw_proj_gp10b.h"
 #include "hw_mc_gp10b.h"
 #include "hw_ltc_gp10b.h"
 
@@ -125,6 +124,8 @@ static void gp10b_ltc_isr(struct gk20a *g)
 {
 	u32 mc_intr, ltc_intr;
 	int ltc, slice;
+	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+	u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
 
 	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
 	gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
@@ -133,8 +134,7 @@ static void gp10b_ltc_isr(struct gk20a *g)
 		if ((mc_intr & 1 << ltc) == 0)
 			continue;
 		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
-			u32 offset = proj_ltc_stride_v() * ltc +
-					proj_lts_stride_v() * slice;
+			u32 offset = ltc_stride * ltc + lts_stride * slice;
 			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
 
 			/* Detect and handle ECC errors */
@@ -180,8 +180,7 @@ static void gp10b_ltc_isr(struct gk20a *g)
 			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
 				  ltc, slice, ltc_intr);
 			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
-					   proj_ltc_stride_v() * ltc +
-					   proj_lts_stride_v() * slice,
+					   ltc_stride * ltc + lts_stride * slice,
 				     ltc_intr);
 		}
 	}
-- 
cgit v1.2.2


From ae893b37c014b13f68aa7547640bda3589363f4d Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 8 Apr 2016 12:03:32 -0700
Subject: gpu: nvgpu: gp10b: Use sysmem aperture for SoC memory

In Tegra GPU, SoC memory has to be accessed as vidmem. In discrete GPU, it
has to be accessed as sysmem.

Change-Id: Id26588df17b4921533804f72bc8c0ac3892ae154
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1122591
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c     | 10 +++++----
 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h   | 18 ++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h  | 10 ++++++++-
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h    | 22 +++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h  |  8 +++++++
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h  | 32 +++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h    |  8 +++++++
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h |  8 +++++++
 drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h   |  6 ++++-
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h   | 10 ++++++++-
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c       | 38 ++++++++++++++++++++++----------
 11 files changed, 149 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index ade6ff0a..9cb26d3f 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -53,6 +53,7 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 	u32 addr_lo;
 	u32 addr_hi;
 	void *inst_ptr;
+	struct gk20a *g = c->g;
 
 	gk20a_dbg_fn("");
 
@@ -67,12 +68,13 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 		c->hw_chid, (u64)c->userd_iova);
 
 	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
-		 pbdma_userd_target_vid_mem_f() |
-		 pbdma_userd_addr_f(addr_lo));
+		       (g->mm.vidmem_is_vidmem ?
+			pbdma_userd_target_sys_mem_ncoh_f() :
+			pbdma_userd_target_vid_mem_f()) |
+		       pbdma_userd_addr_f(addr_lo));
 
 	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
-		 pbdma_userd_target_vid_mem_f() |
-		 pbdma_userd_hi_addr_f(addr_hi));
+		       pbdma_userd_hi_addr_f(addr_hi));
 
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
index c04b01c1..bf251cd6 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -62,6 +62,14 @@ static inline u32 bus_bar1_block_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 bus_bar1_block_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 bus_bar1_block_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 bus_bar1_block_mode_virtual_f(void)
 {
 	return 0x80000000;
@@ -78,6 +86,14 @@ static inline u32 bus_bar2_block_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 bus_bar2_block_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 bus_bar2_block_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 bus_bar2_block_mode_virtual_f(void)
 {
 	return 0x80000000;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
index cd5265b3..99398961 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ccsr_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -66,6 +66,14 @@ static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 ccsr_channel_inst_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ccsr_channel_inst_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 ccsr_channel_inst_bind_false_f(void)
 {
 	return 0x0;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
index 1fee19b1..236c2f8a 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -106,6 +106,10 @@ static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 fb_mmu_invalidate_pdb_aperture_sys_mem_f(void)
+{
+	return 0x2;
+}
 static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
 {
 	return (v & 0xfffffff) << 4;
@@ -378,6 +382,14 @@ static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 fb_mmu_debug_wr_aperture_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
 static inline u32 fb_mmu_debug_wr_vol_false_f(void)
 {
 	return 0x0;
@@ -406,6 +418,14 @@ static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 fb_mmu_debug_rd_aperture_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_debug_rd_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
 static inline u32 fb_mmu_debug_rd_vol_false_f(void)
 {
 	return 0x0;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 34977523..4781ff85 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -82,6 +82,14 @@ static inline u32 fifo_runlist_base_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 fifo_runlist_base_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 fifo_runlist_base_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 fifo_runlist_r(void)
 {
 	return 0x00002274;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 30e4307d..86870aea 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -70,6 +70,14 @@ static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
 {
 	return 0x2;
 }
+static inline u32 gmmu_new_pde_aperture_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_pde_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
 static inline u32 gmmu_new_pde_address_sys_f(u32 v)
 {
 	return (v & 0xfffffff) << 8;
@@ -118,6 +126,14 @@ static inline u32 gmmu_new_dual_pde_aperture_big_video_memory_f(void)
 {
 	return 0x2;
 }
+static inline u32 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
 static inline u32 gmmu_new_dual_pde_address_big_sys_f(u32 v)
 {
 	return (v & 0xfffffff) << 4;
@@ -138,6 +154,14 @@ static inline u32 gmmu_new_dual_pde_aperture_small_video_memory_f(void)
 {
 	return 0x2;
 }
+static inline u32 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
 static inline u32 gmmu_new_dual_pde_vol_small_w(void)
 {
 	return 2;
@@ -238,6 +262,14 @@ static inline u32 gmmu_new_pte_aperture_video_memory_f(void)
 {
 	return 0x0;
 }
+static inline u32 gmmu_new_pte_aperture_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_pte_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
 static inline u32 gmmu_new_pte_read_only_w(void)
 {
 	return 0;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 78792f50..78304fb1 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1010,6 +1010,14 @@ static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_fecs_current_ctx_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_fecs_current_ctx_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 gr_fecs_current_ctx_valid_s(void)
 {
 	return 1;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index f66fa934..0caffb22 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -322,6 +322,14 @@ static inline u32 pbdma_userd_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 pbdma_userd_target_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_userd_target_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
 static inline u32 pbdma_userd_addr_f(u32 v)
 {
 	return (v & 0x7fffff) << 9;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
index 75bf59a3..9a3591c7 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pwr_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -542,6 +542,10 @@ static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
 {
 	return 0x20000000;
 }
+static inline u32 pwr_pmu_new_instblk_target_sys_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
 {
 	return (v & 0x1) << 30;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index 863b15b8..55323579 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,14 @@ static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
 {
 	return 0x0;
 }
+static inline u32 ram_in_page_dir_base_target_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 ram_in_page_dir_base_target_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
 static inline u32 ram_in_page_dir_base_vol_w(void)
 {
 	return 128;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index deb8c138..0c00feb4 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -43,9 +43,11 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 	gk20a_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
 
 	gk20a_writel(g, bus_bar1_block_r(),
-			bus_bar1_block_target_vid_mem_f() |
-			bus_bar1_block_mode_virtual_f() |
-			bus_bar1_block_ptr_f(inst_pa));
+		     (g->mm.vidmem_is_vidmem ?
+		       bus_bar1_block_target_sys_mem_ncoh_f() :
+		       bus_bar1_block_target_vid_mem_f()) |
+		     bus_bar1_block_mode_virtual_f() |
+		     bus_bar1_block_ptr_f(inst_pa));
 
 	if (g->ops.mm.init_bar2_mm_hw_setup) {
 		err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -107,9 +109,11 @@ static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 	gk20a_dbg_info("bar2 inst block ptr: 0x%08x",  (u32)inst_pa);
 
 	gk20a_writel(g, bus_bar2_block_r(),
-		bus_bar2_block_target_vid_mem_f() |
-		bus_bar2_block_mode_virtual_f() |
-		bus_bar2_block_ptr_f(inst_pa));
+		     (g->mm.vidmem_is_vidmem ?
+		       bus_bar2_block_target_sys_mem_ncoh_f() :
+		       bus_bar2_block_target_vid_mem_f()) |
+		     bus_bar2_block_mode_virtual_f() |
+		     bus_bar2_block_ptr_f(inst_pa));
 
 	gk20a_dbg_fn("done");
 	return 0;
@@ -179,7 +183,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v();
 	pde_addr = entry_addr(g, parent);
 
-	pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
+	pde_v[0] |= g->mm.vidmem_is_vidmem ?
+			gmmu_new_pde_aperture_sys_mem_ncoh_f() :
+			gmmu_new_pde_aperture_video_memory_f();
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= pte_addr >> 24;
@@ -232,7 +238,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 
 	if (small_valid) {
 		pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
-		pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
+		pde_v[2] |= g->mm.vidmem_is_vidmem ?
+			gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() :
+			gmmu_new_dual_pde_aperture_small_video_memory_f();
 		pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
 		pde_v[3] |= pte_addr_small >> 24;
 	}
@@ -240,7 +248,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	if (big_valid) {
 		pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
 		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
-		pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
+		pde_v[0] |= g->mm.vidmem_is_vidmem ?
+			gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() :
+			gmmu_new_dual_pde_aperture_big_video_memory_f();
 		pde_v[1] |= pte_addr_big >> 28;
 	}
 
@@ -279,8 +289,10 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			pte_w[0] = gmmu_new_pte_valid_false_f();
 		else
 			pte_w[0] = gmmu_new_pte_valid_true_f();
-		pte_w[0] |= gmmu_new_pte_aperture_video_memory_f() |
-			    gmmu_new_pte_address_sys_f(*iova
+		pte_w[0] |= g->mm.vidmem_is_vidmem ?
+			gmmu_new_pte_aperture_sys_mem_ncoh_f() :
+			gmmu_new_pte_aperture_video_memory_f();
+		pte_w[0] |= gmmu_new_pte_address_sys_f(*iova
 			      >> gmmu_new_pte_address_shift_v());
 
 		if (priv)
@@ -373,7 +385,9 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
 	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
-		ram_in_page_dir_base_target_vid_mem_f() |
+		(g->mm.vidmem_is_vidmem ?
+		  ram_in_page_dir_base_target_sys_mem_ncoh_f() :
+		  ram_in_page_dir_base_target_vid_mem_f()) |
 		ram_in_page_dir_base_vol_true_f() |
 		ram_in_page_dir_base_lo_f(pdb_addr_lo) |
 		1 << 10);
-- 
cgit v1.2.2


From 2c939d35bb07b9ab6643c301efd63e65fc29ed46 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 11 Apr 2016 13:02:26 -0700
Subject: gpu: nvgpu: gp10b: Wait for BAR1 bind

Wait for BAR1 bind to complete before continuing. The register to
wait exists Maxwell onwards.

Change-Id: Icf03ae66aeb265808c4ba8da24ba4e1ebb91564e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1123939
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h | 52 ++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
index bf251cd6..e8f7f3fb 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
@@ -106,6 +106,58 @@ static inline u32 bus_bar2_block_ptr_shift_v(void)
 {
 	return 0x0000000c;
 }
+static inline u32 bus_bind_status_r(void)
+{
+	return 0x00001710;
+}
+static inline u32 bus_bind_status_bar1_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 bus_bind_status_bar1_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar1_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 bus_bind_status_bar1_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 bus_bind_status_bar1_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar1_outstanding_true_f(void)
+{
+	return 0x2;
+}
+static inline u32 bus_bind_status_bar2_pending_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 bus_bind_status_bar2_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar2_pending_busy_f(void)
+{
+	return 0x4;
+}
+static inline u32 bus_bind_status_bar2_outstanding_v(u32 r)
+{
+	return (r >> 3) & 0x1;
+}
+static inline u32 bus_bind_status_bar2_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar2_outstanding_true_f(void)
+{
+	return 0x8;
+}
 static inline u32 bus_intr_0_r(void)
 {
 	return 0x00001100;
-- 
cgit v1.2.2


From d0965c746d1824b34b589952b247fe5f0e23f8ea Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 9 Mar 2016 14:55:20 +0530
Subject: gpu: nvgpu: suspend context support for gp10b

Add API gr_gp10b_suspend_contexts() to support context
suspend on gp10b

sequence to suspend:
- disable ctxsw
- loop through list of channels
- if channel is ctx resident, suspend all SMs
  - if CILP channel, set CILP preempt pending = true
  - resume all SMs
- otherwise, disable channel/TSG
- enable ctxsw
- if CILP preempt is pending, wait for it to complete

Bug 200156699

Change-Id: Id9609077c283f99f420ad21c636b29f74b8eff6b
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120334
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 104 +++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6ab8e923..e55c5768 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1749,6 +1749,109 @@ static u32 get_ecc_override_val(struct gk20a *g)
 		return 0;
 }
 
+static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
+				bool *cilp_preempt_pending)
+{
+	struct gk20a *g = ch->g;
+	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+	bool ctx_resident = false;
+	int err = 0;
+
+	*cilp_preempt_pending = false;
+
+	if (gk20a_is_channel_ctx_resident(ch)) {
+		gk20a_suspend_all_sms(g, 0, false);
+
+		if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) {
+			err = gr_gp10b_set_cilp_preempt_pending(g, ch);
+			if (err)
+				gk20a_err(dev_from_gk20a(g),
+					"unable to set CILP preempt pending\n");
+			else
+				*cilp_preempt_pending = true;
+
+			gk20a_resume_all_sms(g);
+		}
+
+		ctx_resident = true;
+	} else {
+		gk20a_disable_channel_tsg(g, ch);
+	}
+
+	return ctx_resident;
+}
+
+static int gr_gp10b_suspend_contexts(struct gk20a *g,
+				struct dbg_session_gk20a *dbg_s,
+				int *ctx_resident_ch_fd)
+{
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+	u32 delay = GR_IDLE_CHECK_DEFAULT;
+	bool cilp_preempt_pending = false;
+	struct channel_gk20a *cilp_preempt_pending_ch = NULL;
+	struct channel_gk20a *ch;
+	struct dbg_session_channel_data *ch_data;
+	int err = 0;
+	int local_ctx_resident_ch_fd = -1;
+	bool ctx_resident;
+
+	mutex_lock(&g->dbg_sessions_lock);
+
+	err = gr_gk20a_disable_ctxsw(g);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw");
+		mutex_unlock(&g->dbg_sessions_lock);
+		goto clean_up;
+	}
+
+	mutex_lock(&dbg_s->ch_list_lock);
+
+	list_for_each_entry(ch_data, &dbg_s->ch_list, ch_entry) {
+		ch = g->fifo.channel + ch_data->chid;
+
+		ctx_resident = gr_gp10b_suspend_context(ch,
+					&cilp_preempt_pending);
+		if (ctx_resident)
+			local_ctx_resident_ch_fd = ch_data->channel_fd;
+		if (cilp_preempt_pending)
+			cilp_preempt_pending_ch = ch;
+	}
+
+	mutex_unlock(&dbg_s->ch_list_lock);
+
+	err = gr_gk20a_enable_ctxsw(g);
+	if (err) {
+		mutex_unlock(&g->dbg_sessions_lock);
+		goto clean_up;
+	}
+
+	mutex_unlock(&g->dbg_sessions_lock);
+
+	if (cilp_preempt_pending_ch) {
+		struct channel_ctx_gk20a *ch_ctx =
+				&cilp_preempt_pending_ch->ch_ctx;
+		struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+
+		do {
+			if (!gr_ctx->t18x.cilp_preempt_pending)
+				break;
+
+			usleep_range(delay, delay * 2);
+			delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+		} while (time_before(jiffies, end_jiffies)
+			|| !tegra_platform_is_silicon());
+
+		err = -ETIMEDOUT;
+	}
+
+	*ctx_resident_ch_fd = local_ctx_resident_ch_fd;
+
+clean_up:
+	return err;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1787,4 +1890,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
 	gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
 	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
+	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
 }
-- 
cgit v1.2.2


From 24568369348cbe1a09d279b65d772d320128228b Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 20 Apr 2016 17:00:30 -0700
Subject: gpu: nvgpu: gp10b: add delay cycles before elcg

Update prod value for gr engine delay cycles before
engine clock gating. For copy engine, it was updated
earlier and now it is extended to both gr and ce.

Bug 1689806

Change-Id: I457ad6f9c461db89d53c57e68ad937ab5292849e
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1129922
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
index 687593b0..5763b3b1 100644
--- a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -81,12 +81,15 @@ static int gp10b_init_therm_setup_hw(struct gk20a *g)
 static int gp10b_update_therm_gate_ctrl(struct gk20a *g)
 {
 	u32 gate_ctrl;
-
-	gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(ENGINE_CE2_GK20A));
-	gate_ctrl = set_field(gate_ctrl,
-		therm_gate_ctrl_eng_delay_before_m(),
-		therm_gate_ctrl_eng_delay_before_f(4));
-	gk20a_writel(g, therm_gate_ctrl_r(ENGINE_CE2_GK20A), gate_ctrl);
+	u32 engine_id;
+
+	for (engine_id = 0; engine_id < ENGINE_INVAL_GK20A; engine_id++) {
+		gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine_id));
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_delay_before_m(),
+			therm_gate_ctrl_eng_delay_before_f(4));
+		gk20a_writel(g, therm_gate_ctrl_r(engine_id), gate_ctrl);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.2


From dd55c1c44fa5f8ee2b73c1a081e12af46c08f235 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 19 Apr 2016 14:21:13 -0700
Subject: gpu: nvgpu: gp10b: set soc memory aperture type

For gp10b, set platform data for soc memory aperture type
as vidmem.

Bug 1749338

Change-Id: I7961734d3ebcca4af459c7c7d49bc31f0fc8ce5d
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1129168
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 587e0a7a..64d9d917 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -168,6 +168,8 @@ static int gp10b_tegra_probe(struct device *dev)
 			&platform->g->gr.t18x.
 				ctx_vars.dump_ctxsw_stats_on_channel_close);
 
+	platform->g->mm.vidmem_is_vidmem = platform->vidmem_is_vidmem;
+
 	gp10b_tegra_get_clocks(dev);
 
 	return 0;
@@ -417,6 +419,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.force_reset_in_do_idle = false,
 
 	.soc_name = "tegra18x",
+
+	.vidmem_is_vidmem = false,
 };
 
 
-- 
cgit v1.2.2


From 7be0ee4bb9c1436ecf37984d2e3f5e39a48537fe Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 19 Apr 2016 15:49:01 -0700
Subject: gpu: nvgpu: gp10b: Add def for NISO sysmem flush addr

Add definition for NISO sysmem flush addr. This makes gp10b in sync
with rest of chips.

Change-Id: Ic3548585000602497e9d7ff271144b9ca9b2acca
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1129217
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
index 236c2f8a..ec340777 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fb_gp10b.h
@@ -474,4 +474,8 @@ static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 fb_niso_flush_sysmem_addr_r(void)
+{
+	return 0x00100c10;
+}
 #endif
-- 
cgit v1.2.2


From 5237f4a2a143a6410cc2eac04a62511a637fd321 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 2 May 2016 17:43:33 +0300
Subject: gpu: nvgpu: adapt gk20a_mm_entry for mem_desc

For upcoming vidmem refactor, replace struct gk20a_mm_entry's contents
identical to struct mem_desc, with a struct mem_desc member. This makes
it possible to use the page table buffers like the others too.

JIRA DNVGPU-23
JIRA DNVGPU-20

Change-Id: Ia82da07b5a3bb9fb14a86bcf96a46b3a3c80bf28
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1139696
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Ken Adams <kadams@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0c00feb4..c9a47d70 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -147,16 +147,16 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
 
 static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
 {
-	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
+	return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v());
 }
 
 static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
 	u64 addr;
 	if (g->mm.has_physical_mode)
-		addr = sg_phys(entry->sgt->sgl);
+		addr = sg_phys(entry->mem.sgt->sgl);
 	else
-		addr = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
+		addr = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0);
 
 	return addr;
 }
@@ -202,7 +202,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 
 static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
 {
-	return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
+	return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v());
 }
 
 static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
@@ -224,8 +224,8 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 
 	gk20a_dbg_fn("");
 
-	small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
-	big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
+	small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small;
+	big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid) {
 		pte_addr_small = entry_addr(g, entry)
@@ -325,8 +325,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 		gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
 	}
 
-	gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
-	gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
+	gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]);
+	gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]);
 
 	if (*iova) {
 		*iova += page_size;
-- 
cgit v1.2.2


From 6113c679a99ca09256d33a582a4dfe648e100c23 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 19 Apr 2016 16:58:28 +0530
Subject: gpu: nvgpu: API to set preemption mode

Separate out new API gr_gp10b_set_ctxsw_preemption_mode()
which will check requested preemption modes and take appropriate
action for each preemption mode
This API will also do some sanity checking for valid
preemption modes and combinations

Define API set_preemption_mode() for gp10b which will set the
preemption modes passed as argument and then use
gr_gp10b_set_ctxsw_preemption_mode() and
update_ctxsw_preemption_mode() to update preemption mode

Legacy path from gr_gp10b_alloc_gr_ctx() will convert
flags NVGPU_ALLOC_OBJ_FLAGS_* into appropriate preemption modes
and then call gr_gp10b_set_ctxsw_preemption_mode()

New API set_preemption_mode() will use new flags
NVGPU_GRAPHICS/COMPUTE_PREEMPTION_MODE_* and set and update
ctxsw preemption mode

In gr_gp10b_update_ctxsw_preemption_mode(), update graphics
context to set CTA premption mode if mode
NVGPU_COMPUTE_PREEMPTION_MODE_CTA is set

Also, define preemption modes in nvgpu-t18x.h
and use them everywhere
Remove old definitions of modes from gr_gp10b.h

Bug 1646259

Change-Id: Ib4dc1fb9933b15d32f0122a9e52665b69402df18
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1131806
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c           | 204 +++++++++++++++++++++------
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h           |   3 -
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c |   8 +-
 3 files changed, 166 insertions(+), 49 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index e55c5768..a1a13a2b 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -321,7 +321,7 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
-	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+	if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
 		attrib_size_in_chunk = gr->attrib_cb_default_size +
 				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
 				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
@@ -798,29 +798,33 @@ fail_free:
 	return err;
 }
 
-static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
-			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
-			  u32 class,
-			  u32 flags)
+static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+				struct gr_ctx_desc *gr_ctx,
+				struct vm_gk20a *vm, u32 class,
+				u32 graphics_preempt_mode,
+				u32 compute_preempt_mode)
 {
-	int err;
-
-	gk20a_dbg_fn("");
-
-	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
-	if (err)
-		return err;
-
-	(*gr_ctx)->t18x.ctx_id_valid = false;
+	int err = 0;
 
 	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
-		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
+		graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
 
 	if (class == PASCAL_COMPUTE_A &&
 			g->gr.t18x.ctx_vars.force_preemption_cilp)
-		flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
+		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	/* check for invalid combinations */
+	if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+		return -EINVAL;
 
-	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
+	if ((graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) &&
+		   (compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP))
+		return -EINVAL;
+
+	/* set preemption modes */
+	switch (graphics_preempt_mode) {
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
+		{
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
@@ -838,62 +842,112 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
 		gk20a_dbg_info("gfxp context attrib_cb_size=%d",
 				attrib_cb_size);
+
 		err = gr_gp10b_alloc_buffer(vm,
 					g->gr.t18x.ctx_vars.preempt_image_size,
-					&(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+					&gr_ctx->t18x.preempt_ctxsw_buffer);
 		if (err) {
-			gk20a_err(dev_from_gk20a(vm->mm->g),
+			gk20a_err(dev_from_gk20a(g),
 				  "cannot allocate preempt buffer");
-			goto fail_free_gk20a_ctx;
+			goto fail;
 		}
 
 		err = gr_gp10b_alloc_buffer(vm,
 					spill_size,
-					&(*gr_ctx)->t18x.spill_ctxsw_buffer);
+					&gr_ctx->t18x.spill_ctxsw_buffer);
 		if (err) {
-			gk20a_err(dev_from_gk20a(vm->mm->g),
+			gk20a_err(dev_from_gk20a(g),
 				  "cannot allocate spill buffer");
 			goto fail_free_preempt;
 		}
 
 		err = gr_gp10b_alloc_buffer(vm,
 					attrib_cb_size,
-					&(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+					&gr_ctx->t18x.betacb_ctxsw_buffer);
 		if (err) {
-			gk20a_err(dev_from_gk20a(vm->mm->g),
+			gk20a_err(dev_from_gk20a(g),
 				  "cannot allocate beta buffer");
 			goto fail_free_spill;
 		}
 
 		err = gr_gp10b_alloc_buffer(vm,
 					pagepool_size,
-					&(*gr_ctx)->t18x.pagepool_ctxsw_buffer);
+					&gr_ctx->t18x.pagepool_ctxsw_buffer);
 		if (err) {
-			gk20a_err(dev_from_gk20a(vm->mm->g),
+			gk20a_err(dev_from_gk20a(g),
 				  "cannot allocate page pool");
 			goto fail_free_betacb;
 		}
 
-		(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
+		gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+		break;
+		}
+
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
+		gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+		break;
+
+	default:
+		break;
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
-		if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
-			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
-		else
-			(*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+		switch (compute_preempt_mode) {
+		case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
+			gr_ctx->compute_preempt_mode = compute_preempt_mode;
+			break;
+		default:
+			break;
+		}
 	}
 
-	gk20a_dbg_fn("done");
-
-	return err;
+	return 0;
 
 fail_free_betacb:
-	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.betacb_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
 fail_free_spill:
-	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.spill_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
 fail_free_preempt:
-	gk20a_gmmu_unmap_free(vm, &(*gr_ctx)->t18x.preempt_ctxsw_buffer);
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+fail:
+	return err;
+}
+
+static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
+			  struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+			  u32 class,
+			  u32 flags)
+{
+	int err;
+	u32 graphics_preempt_mode = 0;
+	u32 compute_preempt_mode = 0;
+
+	gk20a_dbg_fn("");
+
+	err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
+	if (err)
+		return err;
+
+	(*gr_ctx)->t18x.ctx_id_valid = false;
+
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
+		graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
+		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	if (graphics_preempt_mode || compute_preempt_mode) {
+		err = gr_gp10b_set_ctxsw_preemption_mode(g, *gr_ctx, vm,
+			    class, graphics_preempt_mode, compute_preempt_mode);
+		if (err)
+			goto fail_free_gk20a_ctx;
+	}
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+
 fail_free_gk20a_ctx:
 	gr_gk20a_free_gr_ctx(g, vm, *gr_ctx);
 	*gr_ctx = NULL;
@@ -979,22 +1033,30 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
 	u32 cilp_preempt_option =
 		ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
+	u32 cta_preempt_option =
+		ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
 	int err;
 
 	gk20a_dbg_fn("");
 
-	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_GFXP) {
+	if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
 		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
 		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
 				gfxp_preempt_option);
 	}
 
-	if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) {
+	if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
 		gk20a_dbg_info("CILP: %x", cilp_preempt_option);
 		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
 				cilp_preempt_option);
 	}
 
+	if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
+		gk20a_dbg_info("CTA: %x", cta_preempt_option);
+		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
+				cta_preempt_option);
+	}
+
 	if (gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va) {
 		u32 addr;
 		u32 size;
@@ -1547,8 +1609,8 @@ static int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
 		bool *early_exit, bool *ignore_debugger)
 {
 	int ret;
-	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode ==
-			NVGPU_GR_PREEMPTION_MODE_CILP) ;
+	bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
+			NVGPU_COMPUTE_PREEMPTION_MODE_CILP) ;
 	u32 global_mask = 0, dbgr_control0, global_esr_copy;
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
 	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
@@ -1763,7 +1825,7 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
 	if (gk20a_is_channel_ctx_resident(ch)) {
 		gk20a_suspend_all_sms(g, 0, false);
 
-		if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CILP) {
+		if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
 			err = gr_gp10b_set_cilp_preempt_pending(g, ch);
 			if (err)
 				gk20a_err(dev_from_gk20a(g),
@@ -1852,6 +1914,63 @@ clean_up:
 	return err;
 }
 
+static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+					u32 graphics_preempt_mode,
+					u32 compute_preempt_mode)
+{
+	struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
+	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+	struct gk20a *g = ch->g;
+	struct tsg_gk20a *tsg;
+	struct vm_gk20a *vm;
+	void *ctx_ptr;
+	u32 class;
+	int err = 0;
+
+	class = ch->obj_class;
+	if (!class)
+		return -EINVAL;
+
+	/* preemption already set ? */
+	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode)
+		return -EINVAL;
+
+	if (gk20a_is_channel_marked_as_tsg(ch)) {
+		tsg = &g->fifo.tsg[ch->tsgid];
+		vm = tsg->vm;
+	} else {
+		vm = ch->vm;
+	}
+
+	err = gr_gp10b_set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
+					graphics_preempt_mode, compute_preempt_mode);
+	if (err)
+		return err;
+
+	ctx_ptr = vmap(gr_ctx->mem.pages,
+			PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
+			0, pgprot_writecombine(PAGE_KERNEL));
+	if (!ctx_ptr)
+		return -ENOMEM;
+
+	g->ops.fifo.disable_channel(ch);
+	err = g->ops.fifo.preempt_channel(g, ch->hw_chid);
+	if (err)
+		goto unmap_ctx;
+
+	if (g->ops.gr.update_ctxsw_preemption_mode) {
+		g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr);
+		g->ops.gr.commit_global_cb_manager(g, ch, true);
+	}
+
+	g->ops.fifo.enable_channel(ch);
+
+unmap_ctx:
+	vunmap(ctx_ptr);
+
+	return err;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -1891,4 +2010,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
 	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
 	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
+	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index edf536f5..8c544f14 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -96,7 +96,4 @@ struct gr_ctx_desc_t18x {
 	bool cilp_preempt_pending;
 };
 
-#define NVGPU_GR_PREEMPTION_MODE_GFXP		1
-#define NVGPU_GR_PREEMPTION_MODE_CILP		3
-
 #endif
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 3023ef4b..08793e18 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -144,21 +144,21 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 			desc->gpu_va;
 		p->size[TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_BETACB] = desc->size;
 
-		gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_GFXP;
+		gr_ctx->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
 		p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP;
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
 		if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) {
-			gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CILP;
+			gr_ctx->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
 			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP;
 		} else {
-			gr_ctx->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA;
+			gr_ctx->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
 			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA;
 		}
 	}
 
-	if (gr_ctx->preempt_mode) {
+	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode) {
 		msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS;
 		msg.handle = platform->virt_handle;
 		p->handle = gr_ctx->virt_ctx;
-- 
cgit v1.2.2


From c09f0baf5bce8f2533c6df10ede5c1a40bf6d4e7 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 27 Apr 2016 19:02:51 +0530
Subject: gpu: nvgpu: API to return preemption modes

Add API gr_gp10b_get_preemption_mode_flags() to return
supported and default graphics/compute preemption modes
on gp10b

Bug 1646259

Change-Id: I291a82a911e021b605b6d1ccae9cef663cc7a01a
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1133596
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a1a13a2b..8ea9235d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1971,6 +1971,25 @@ unmap_ctx:
 	return err;
 }
 
+static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
+	struct nvgpu_preemption_modes_rec *preemption_modes_rec)
+{
+	preemption_modes_rec->graphics_preemption_mode_flags = (
+			NVGPU_GRAPHICS_PREEMPTION_MODE_WFI |
+			NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP);
+	preemption_modes_rec->compute_preemption_mode_flags = (
+			NVGPU_COMPUTE_PREEMPTION_MODE_WFI |
+			NVGPU_COMPUTE_PREEMPTION_MODE_CTA |
+			NVGPU_COMPUTE_PREEMPTION_MODE_CILP);
+
+	preemption_modes_rec->default_graphics_preempt_mode =
+			NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
+	preemption_modes_rec->default_compute_preempt_mode =
+			NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
+
+	return 0;
+}
+
 void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
@@ -2011,4 +2030,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
 	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
 	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
+	gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
 }
-- 
cgit v1.2.2


From 869b4dd2748263a4c88569973a17c787834427c2 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Tue, 3 May 2016 17:42:07 -0700
Subject: gpu: nvgpu: add code to handle DT fuse overrides

Add code for handling GP10B fuse overrides specified in the device tree.
Also add specific handling for the ECC fuse override.

Bug 1699676

Change-Id: Ifa07983054cd143f7f1745a6a6de36f4d4e08126
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/1140893
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 45 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h |  2 ++
 2 files changed, 47 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 8ea9235d..b36eff8f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -17,6 +17,8 @@
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
 
+#include <dt-bindings/soc/gp10b-fuse.h>
+
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -1390,6 +1392,12 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 			 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f());
 	gk20a_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data);
 
+	if (g->gr.t18x.fecs_feature_override_ecc_val != 0) {
+		gk20a_writel(g,
+			gr_fecs_feature_override_ecc_r(),
+			g->gr.t18x.fecs_feature_override_ecc_val);
+	}
+
 	return gr_gm20b_ctx_state_floorsweep(g);
 }
 
@@ -1989,6 +1997,42 @@ static int gr_gp10b_get_preemption_mode_flags(struct gk20a *g,
 
 	return 0;
 }
+static int gp10b_gr_fuse_override(struct gk20a *g)
+{
+	struct device_node *np = g->dev->of_node;
+	u32 *fuses;
+	int count, i;
+
+	if (!np) /* may be pcie device */
+		return 0;
+
+	count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
+	if (count <= 0)
+		return count;
+
+	fuses = kmalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+	if (!fuses)
+		return -ENOMEM;
+	of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
+	for (i = 0; i < count; i++) {
+		u32 fuse, value;
+
+		fuse = fuses[2 * i];
+		value = fuses[2 * i + 1];
+		switch (fuse) {
+		case GP10B_FUSE_OPT_ECC_EN:
+			g->gr.t18x.fecs_feature_override_ecc_val = value;
+			break;
+		default:
+			gk20a_err(dev_from_gk20a(g),
+				"ignore unknown fuse override %08x", fuse);
+			break;
+		}
+	}
+
+	kfree(fuses);
+	return 0;
+}
 
 void gp10b_init_gr(struct gpu_ops *gops)
 {
@@ -2031,4 +2075,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
 	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
 	gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
+	gops->gr.fuse_override = gp10b_gr_fuse_override;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index 8c544f14..eb361820 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -83,6 +83,8 @@ struct gr_t18x {
 		struct ecc_stat l2_ded_count;
 	} ecc_stats;
 
+	u32 fecs_feature_override_ecc_val;
+
 	int cilp_preempt_pending_chid;
 };
 
-- 
cgit v1.2.2


From 2f4efc7f3d2be417c3fd3487dce64652e5d468ba Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 10 May 2016 08:53:41 -0700
Subject: gpu: nvgpu: Remove fn debug from PTE update

Function trace in update_gmmu_ptes_locked() cause too much spew on
UART.

Change-Id: I94c79be76394631cdee343b2f77e4bf0f830e0a8
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1144808
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Ken Adams <kadams@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index c9a47d70..0e1f3c4b 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -282,8 +282,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
 	u32 pte_w[2] = {0, 0}; /* invalid pte */
 
-	gk20a_dbg_fn("");
-
 	if (*iova) {
 		if (unmapped_pte)
 			pte_w[0] = gmmu_new_pte_valid_false_f();
@@ -345,7 +343,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			}
 		}
 	}
-	gk20a_dbg_fn("done");
 	return 0;
 }
 
-- 
cgit v1.2.2


From e746a16f7abbaacba89e390c692620941fc1b34c Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remid@nvidia.com>
Date: Wed, 11 May 2016 17:54:43 +0300
Subject: gp10b: initialize dynamic sysfs attributes

All dynamically allocated sysfs attributes MUST be initialized
explicitly. Otherwise lock debugging fails.

Change-Id: I8f77857831221b5ceddb43f9d161c3bf4ca049d6
Signed-off-by: Remi Denis-Courmont <remid@nvidia.com>
Reviewed-on: http://git-master/r/1145929
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Juha Tukkinen <jtukkinen@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 64d9d917..a857b838 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -533,6 +533,8 @@ static int ecc_stat_create(struct device *dev,
 				"gpc0_tpc%d_%s",
 				hw_unit,
 				ecc_stat_name);
+
+		sysfs_attr_init(&dev_attr_array[hw_unit].attr);
 		dev_attr_array[hw_unit].attr.name = ecc_stat->names[hw_unit];
 		dev_attr_array[hw_unit].attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
 		dev_attr_array[hw_unit].show = ecc_stat_show;
-- 
cgit v1.2.2


From 18a017865946617fd63256858a0d2300160643f4 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 12 May 2016 09:31:30 +0300
Subject: gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem

To support vidmem, pass g and mem_desc to the buffer memory accessor
functions. This allows the functions to select the memory access method
based on the buffer aperture instead of using the cpu pointer directly
(like until now). The selection and aperture support will be in another
patch; this patch only refactors these accessors, but keeps the
underlying functionality as-is.

JIRA DNVGPU-23

Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1121914
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 61 +++++++++++++---------------
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c   | 78 ++++++++++++++++++------------------
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c   | 44 +++++++++++---------
 3 files changed, 91 insertions(+), 92 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 9cb26d3f..4766e0e4 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,24 +25,24 @@
 #include "hw_ram_gp10b.h"
 
 static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g,
-				void *inst_ptr)
+				struct mem_desc *mem)
 {
 	u32 val;
 
 	gk20a_dbg_fn("");
 
-	val = gk20a_mem_rd32(inst_ptr,
+	val = gk20a_mem_rd32(g, mem,
 			ram_in_page_dir_base_fault_replay_tex_w());
 	val &= ~ram_in_page_dir_base_fault_replay_tex_m();
 	val |= ram_in_page_dir_base_fault_replay_tex_true_f();
-	gk20a_mem_wr32(inst_ptr,
+	gk20a_mem_wr32(g, mem,
 		ram_in_page_dir_base_fault_replay_tex_w(), val);
 
-	val = gk20a_mem_rd32(inst_ptr,
+	val = gk20a_mem_rd32(g, mem,
 			ram_in_page_dir_base_fault_replay_gcc_w());
 	val &= ~ram_in_page_dir_base_fault_replay_gcc_m();
 	val |= ram_in_page_dir_base_fault_replay_gcc_true_f();
-	gk20a_mem_wr32(inst_ptr,
+	gk20a_mem_wr32(g, mem,
 		ram_in_page_dir_base_fault_replay_gcc_w(), val);
 
 	gk20a_dbg_fn("done");
@@ -52,28 +52,25 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 {
 	u32 addr_lo;
 	u32 addr_hi;
-	void *inst_ptr;
 	struct gk20a *g = c->g;
 
 	gk20a_dbg_fn("");
 
-	inst_ptr = c->inst_block.cpu_va;
-	if (!inst_ptr)
-		return -ENOMEM;
-
 	addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
 	addr_hi = u64_hi32(c->userd_iova);
 
 	gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
 		c->hw_chid, (u64)c->userd_iova);
 
-	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
+	gk20a_mem_wr32(g, &c->inst_block,
+		       ram_in_ramfc_w() + ram_fc_userd_w(),
 		       (g->mm.vidmem_is_vidmem ?
 			pbdma_userd_target_sys_mem_ncoh_f() :
 			pbdma_userd_target_vid_mem_f()) |
 		       pbdma_userd_addr_f(addr_lo));
 
-	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
+	gk20a_mem_wr32(g, &c->inst_block,
+		       ram_in_ramfc_w() + ram_fc_userd_hi_w(),
 		       pbdma_userd_hi_addr_f(addr_hi));
 
 	return 0;
@@ -82,33 +79,30 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
 static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 			u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
 {
-	void *inst_ptr;
+	struct gk20a *g = c->g;
+	struct mem_desc *mem = &c->inst_block;
 
 	gk20a_dbg_fn("");
 
-	inst_ptr = c->inst_block.cpu_va;
-	if (!inst_ptr)
-		return -ENOMEM;
+	gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
 
-	memset(inst_ptr, 0, ram_fc_size_val_v());
-
-	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
 		pbdma_gp_base_offset_f(
 		u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
 		pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
 		pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
 		 c->g->ops.fifo.get_pbdma_signature(c->g));
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
 		pbdma_formats_gp_fermi0_f() |
 		pbdma_formats_pb_fermi1_f() |
 		pbdma_formats_mp_fermi0_f());
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
 		pbdma_pb_header_priv_user_f() |
 		pbdma_pb_header_method_zero_f() |
 		pbdma_pb_header_subchannel_zero_f() |
@@ -116,26 +110,26 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 		pbdma_pb_header_first_true_f() |
 		pbdma_pb_header_type_inc_f());
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
 		pbdma_subdevice_id_f(1) |
 		pbdma_subdevice_status_active_f() |
 		pbdma_subdevice_channel_dma_enable_f());
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
+	gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
 		channel_gk20a_pbdma_acquire_val(c));
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
+	gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
 		pbdma_runlist_timeslice_timeout_128_f() |
 		pbdma_runlist_timeslice_timescale_3_f() |
 		pbdma_runlist_timeslice_enable_true_f());
 
 	if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
-		gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr);
+		gp10b_set_pdb_fault_replay_flags(c->g, mem);
 
 
-	gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
+	gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 
 	return channel_gp10b_commit_userd(c);
 }
@@ -149,14 +143,12 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
 static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 {
 	u32 new_syncpt = 0, old_syncpt;
-	void *inst_ptr;
 	u32 v;
 
 	gk20a_dbg_fn("");
 
-	inst_ptr = c->inst_block.cpu_va;
-
-	v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w());
+	v = gk20a_mem_rd32(c->g, &c->inst_block,
+			ram_fc_allowed_syncpoints_w());
 	old_syncpt = pbdma_allowed_syncpoints_0_index_v(v);
 	if (c->sync)
 		new_syncpt = c->sync->syncpt_id(c->sync);
@@ -175,7 +167,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 
 		v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt);
 
-		gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v);
+		gk20a_mem_wr32(c->g, &c->inst_block,
+				ram_fc_allowed_syncpoints_w(), v);
 	}
 
 	/* enable channel */
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index b36eff8f..07f1014f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -958,52 +958,51 @@ fail_free_gk20a_ctx:
 }
 
 static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
-		  struct gr_ctx_desc *gr_ctx) {
-	void *ctx_ptr = vmap(gr_ctx->mem.pages,
-		PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT,
-		0, pgprot_writecombine(PAGE_KERNEL));
-	if (!ctx_ptr) {
+		  struct gr_ctx_desc *gr_ctx)
+{
+	struct mem_desc *mem = &gr_ctx->mem;
+
+	if (gk20a_mem_begin(g, mem)) {
 		WARN_ON("Cannot map context");
 		return;
 	}
 	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n",
-		gk20a_mem_rd32(ctx_ptr +
-				ctxsw_prog_main_image_magic_value_o(), 0),
+		gk20a_mem_rd(g, mem,
+				ctxsw_prog_main_image_magic_value_o()),
 		ctxsw_prog_main_image_magic_value_v_value_v());
 
 	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n",
-		gk20a_mem_rd32(ctx_ptr +
-				ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0));
+		gk20a_mem_rd(g, mem,
+				ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
 
 	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n",
-		gk20a_mem_rd32(ctx_ptr +
-				ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0));
+		gk20a_mem_rd(g, mem,
+				ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
 
 	gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n",
-		gk20a_mem_rd32(ctx_ptr +
-				ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0));
+		gk20a_mem_rd(g, mem,
+				ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
 
 	gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_num_save_ops_o(), 0));
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_num_save_ops_o()));
 	gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_num_wfi_save_ops_o(), 0));
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_num_wfi_save_ops_o()));
 	gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_num_cta_save_ops_o(), 0));
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_num_cta_save_ops_o()));
 	gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0));
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_num_gfxp_save_ops_o()));
 	gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_num_cilp_save_ops_o(), 0));
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_num_cilp_save_ops_o()));
 	gk20a_err(dev_from_gk20a(g),
 		"image gfx preemption option (GFXP is 1) %x\n",
-		gk20a_mem_rd32(ctx_ptr +
-			ctxsw_prog_main_image_graphics_preemption_options_o(),
-			0));
-	vunmap(ctx_ptr);
+		gk20a_mem_rd(g, mem,
+			ctxsw_prog_main_image_graphics_preemption_options_o()));
+	gk20a_mem_end(g, mem);
 }
 
 static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
@@ -1028,7 +1027,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 
 static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		struct channel_ctx_gk20a *ch_ctx,
-		void *ctx_ptr)
+		struct mem_desc *mem)
 {
 	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
 	u32 gfxp_preempt_option =
@@ -1043,19 +1042,22 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 
 	if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
 		gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
-		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0,
+		gk20a_mem_wr(g, mem,
+				ctxsw_prog_main_image_graphics_preemption_options_o(),
 				gfxp_preempt_option);
 	}
 
 	if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
 		gk20a_dbg_info("CILP: %x", cilp_preempt_option);
-		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
+		gk20a_mem_wr(g, mem,
+				ctxsw_prog_main_image_compute_preemption_options_o(),
 				cilp_preempt_option);
 	}
 
 	if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
 		gk20a_dbg_info("CTA: %x", cta_preempt_option);
-		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0,
+		gk20a_mem_wr(g, mem,
+				ctxsw_prog_main_image_compute_preemption_options_o(),
 				cta_preempt_option);
 	}
 
@@ -1064,7 +1066,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		u32 size;
 		u32 cbes_reserve;
 
-		gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0,
+		gk20a_mem_wr(g, mem,
+				ctxsw_prog_main_image_full_preemption_ptr_o(),
 				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
 
 		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
@@ -1931,7 +1934,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 	struct gk20a *g = ch->g;
 	struct tsg_gk20a *tsg;
 	struct vm_gk20a *vm;
-	void *ctx_ptr;
+	struct mem_desc *mem = &gr_ctx->mem;
 	u32 class;
 	int err = 0;
 
@@ -1955,10 +1958,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 	if (err)
 		return err;
 
-	ctx_ptr = vmap(gr_ctx->mem.pages,
-			PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
-			0, pgprot_writecombine(PAGE_KERNEL));
-	if (!ctx_ptr)
+	if (gk20a_mem_begin(g, mem))
 		return -ENOMEM;
 
 	g->ops.fifo.disable_channel(ch);
@@ -1967,14 +1967,14 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		goto unmap_ctx;
 
 	if (g->ops.gr.update_ctxsw_preemption_mode) {
-		g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr);
+		g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem);
 		g->ops.gr.commit_global_cb_manager(g, ch, true);
 	}
 
 	g->ops.fifo.enable_channel(ch);
 
 unmap_ctx:
-	vunmap(ctx_ptr);
+	gk20a_mem_end(g, mem);
 
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0e1f3c4b..0b693f7c 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
 	return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
 }
 
-static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
+static u32 pde3_from_index(u32 i)
 {
-	return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v());
+	return i * gmmu_new_pde__size_v() / sizeof(u32);
+}
+
+static u32 pte3_from_index(u32 i)
+{
+	return i * gmmu_new_pte__size_v() / sizeof(u32);
 }
 
 static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	u64 pde_addr = 0;
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
-	u32 *pde;
+	u32 pde;
 
 	gk20a_dbg_fn("");
 
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= pte_addr >> 24;
-	pde = pde3_from_index(parent, i);
+	pde = pde3_from_index(i);
 
-	gk20a_mem_wr32(pde, 0, pde_v[0]);
-	gk20a_mem_wr32(pde, 1, pde_v[1]);
+	gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]);
+	gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]);
 
 	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
 		  i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	return 0;
 }
 
-static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
+static u32 pde0_from_index(u32 i)
 {
-	return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v());
+	return i * gmmu_new_dual_pde__size_v() / sizeof(u32);
 }
 
 static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	u32 pte_addr_small = 0, pte_addr_big = 0;
 	struct gk20a_mm_entry *entry = pte->entries + i;
 	u32 pde_v[4] = {0, 0, 0, 0};
-	u32 *pde;
+	u32 pde;
 
 	gk20a_dbg_fn("");
 
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 		pde_v[1] |= pte_addr_big >> 28;
 	}
 
-	pde = pde0_from_index(pte, i);
+	pde = pde0_from_index(i);
 
-	gk20a_mem_wr32(pde, 0, pde_v[0]);
-	gk20a_mem_wr32(pde, 1, pde_v[1]);
-	gk20a_mem_wr32(pde, 2, pde_v[2]);
-	gk20a_mem_wr32(pde, 3, pde_v[3]);
+	gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]);
+	gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]);
+	gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]);
+	gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]);
 
 	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
 		  i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 		gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
 	}
 
-	gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]);
-	gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]);
+	gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]);
+	gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]);
 
 	if (*iova) {
 		*iova += page_size;
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
 	return gp10b_mm_levels;
 }
 
-static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
+static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
+		u64 pdb_addr)
 {
 	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
-	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+	gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
 		(g->mm.vidmem_is_vidmem ?
 		  ram_in_page_dir_base_target_sys_mem_ncoh_f() :
 		  ram_in_page_dir_base_target_vid_mem_f()) |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
 		ram_in_page_dir_base_lo_f(pdb_addr_lo) |
 		1 << 10);
 
-	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+	gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
 		ram_in_page_dir_base_hi_f(pdb_addr_hi));
 }
 
-- 
cgit v1.2.2


From da21fb5d060d9f92d3da05db4d7b49806710ad2a Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Wed, 11 May 2016 16:52:55 +0530
Subject: gpu: nvgpu: Add support for multiple PBDMAs

Added support for multiple PBDMAs handling during
fifo_pbdma_isr and gk20a_init_fifo_reset_enable_hw
use case.

JIRA DNVGPU-26

Change-Id: I3ce65fdeacb012551d15eed85dc61602f7dadbbb
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1145601
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 4781ff85..89037a7c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -326,6 +326,10 @@ static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
 {
 	return (v & 0x1) << (0 + i*1);
 }
+static inline u32 fifo_intr_pbdma_id_status_v(u32 r, u32 i)
+{
+	return (r >> (0 + i*1)) & 0x1;
+}
 static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
 {
 	return 0x00000001;
-- 
cgit v1.2.2


From 205559cf31212af1c3c602f4889421748a433416 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 12 May 2016 10:19:22 -0700
Subject: gpu: nvgpu: Remove setting op set_max_ways_evict_last

Do not set op set_max_ways_evict_last. It gets removed from
ltc_gk20a.c, and it's never called in gm20b and beyond anyway.

Change-Id: Ib8851057810aa8ddf2088c9e9245e4caf469bddf
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1146882
---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 92735f1a..0ccabaf8 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -206,7 +206,6 @@ static void gp10b_ltc_init_fs_state(struct gk20a *g)
 void gp10b_init_ltc(struct gpu_ops *gops)
 {
 	gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes;
-	gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
 	gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
 	gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
 	gops->ltc.init_cbc = gk20a_ltc_init_cbc;
-- 
cgit v1.2.2


From a6682186de77b90fa41718d4b0012b35aba95ae0 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 6 May 2016 15:00:17 -0700
Subject: gpu: nvgpu: gp10b: Fix CWD floorsweep programming

Program CWD TPC and SM registers correctly. The old code did not work
when there are more than 4 TPCs.

Change-Id: I18a14a0f76d97b0962607ec0bbd71aafcd768bca
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1143075
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 52 +++++++++++++++++++++++++++++++++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h    |  1 +
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 16 +++++++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 07f1014f..ebe11c67 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1381,10 +1381,57 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 		gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
 }
 
-static int gr_gp10b_init_fs_state(struct gk20a *g)
+static int gr_gp10b_load_smid_config(struct gk20a *g)
+{
+	u32 *tpc_sm_id;
+	u32 i, j;
+	u32 tpc_index, gpc_index;
+	u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
+
+	tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
+	if (!tpc_sm_id)
+		return -ENOMEM;
+
+	/* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
+	for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
+		u32 reg = 0;
+		u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
+				 gr_cwd_gpc_tpc_id_tpc0_s();
+
+		for (j = 0; j < 4; j++) {
+			u32 sm_id = (i / 4) + j;
+			u32 bits;
+
+			if (sm_id >= g->gr.tpc_count)
+				break;
+
+			gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
+			tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
+
+			bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
+			       gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
+			reg |= bits << (j * bit_stride);
+
+			tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4) >> 2)]
+				|= sm_id << (bit_stride * (tpc_index & 3));
+		}
+		gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
+	}
+
+	for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
+		gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
+
+	kfree(tpc_sm_id);
+
+	return 0;
+}
+
+int gr_gp10b_init_fs_state(struct gk20a *g)
 {
 	u32 data;
 
+	gk20a_dbg_fn("");
+
 	data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
 	data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
 			gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
@@ -1401,7 +1448,7 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
 			g->gr.t18x.fecs_feature_override_ecc_val);
 	}
 
-	return gr_gm20b_ctx_state_floorsweep(g);
+	return gr_gm20b_init_fs_state(g);
 }
 
 static void gr_gp10b_init_cyclestats(struct gk20a *g)
@@ -2076,4 +2123,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
 	gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
 	gops->gr.fuse_override = gp10b_gr_fuse_override;
+	gops->gr.load_smid_config = gr_gp10b_load_smid_config;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index eb361820..e08a7dc5 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -35,6 +35,7 @@ enum {
 #define NVC0C0_SET_SHADER_EXCEPTIONS		0x1528
 
 void gp10b_init_gr(struct gpu_ops *ops);
+int gr_gp10b_init_fs_state(struct gk20a *g);
 int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 			struct mem_desc *mem);
 void gr_gp10b_create_sysfs(struct device *dev);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 78304fb1..f7fd3b09 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -2170,10 +2170,22 @@ static inline u32 gr_cwd_gpc_tpc_id_r(u32 i)
 {
 	return 0x00405b60 + i*4;
 }
+static inline u32 gr_cwd_gpc_tpc_id_tpc0_s(void)
+{
+	return 4;
+}
 static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v)
 {
 	return (v & 0xf) << 0;
 }
+static inline u32 gr_cwd_gpc_tpc_id_gpc0_s(void)
+{
+	return 4;
+}
+static inline u32 gr_cwd_gpc_tpc_id_gpc0_f(u32 v)
+{
+	return (v & 0xf) << 4;
+}
 static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v)
 {
 	return (v & 0xf) << 8;
@@ -2182,6 +2194,10 @@ static inline u32 gr_cwd_sm_id_r(u32 i)
 {
 	return 0x00405ba0 + i*4;
 }
+static inline u32 gr_cwd_sm_id__size_1_v(void)
+{
+	return 0x00000010;
+}
 static inline u32 gr_cwd_sm_id_tpc0_f(u32 v)
 {
 	return (v & 0xff) << 0;
-- 
cgit v1.2.2


From 2580fa57fb4d1c0b6c002bade851a358ac867b24 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 17 May 2016 07:57:47 -0700
Subject: gpu: nvgpu: gp10b: Program NISO sysmem flush addr

Program sysmem flush address to prevent random accesses of
address 0.

Change-Id: Ia577106c63a80589c154af41d18b70480ed7c7d7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1149174
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0b693f7c..c25abc78 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -18,6 +18,7 @@
 #include "gk20a/gk20a.h"
 #include "mm_gp10b.h"
 #include "rpfb_gp10b.h"
+#include "hw_fb_gp10b.h"
 #include "hw_ram_gp10b.h"
 #include "hw_bus_gp10b.h"
 #include "hw_gmmu_gp10b.h"
@@ -39,15 +40,11 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 
 	g->ops.fb.set_mmu_page_size(g);
 
-	inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
-	gk20a_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
+	gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
+		     (g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
+		     >> 8ULL));
 
-	gk20a_writel(g, bus_bar1_block_r(),
-		     (g->mm.vidmem_is_vidmem ?
-		       bus_bar1_block_target_sys_mem_ncoh_f() :
-		       bus_bar1_block_target_vid_mem_f()) |
-		     bus_bar1_block_mode_virtual_f() |
-		     bus_bar1_block_ptr_f(inst_pa));
+	g->ops.mm.bar1_bind(g, inst_pa);
 
 	if (g->ops.mm.init_bar2_mm_hw_setup) {
 		err = g->ops.mm.init_bar2_mm_hw_setup(g);
-- 
cgit v1.2.2


From 1f225fa73167ec31c9332e4031c156d8b04a41f6 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 13 May 2016 12:22:21 -0700
Subject: gpu: nvgpu: Implement engine_enum_from_type

Implement a helper function engine_enum_from_type. This allows
parsing device_info entries for LCE engine type.

Pascal has logical copy engine instead of CE2, so so add definition
of that.

Change-Id: I71f59c308641d84ac59fd57fc37d9b627bb07a43
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1147747
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c   | 17 +++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h | 38 +++++++++++++++++++++++++++++-----
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c     |  8 +++----
 3 files changed, 54 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 4766e0e4..45de221e 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -23,6 +23,7 @@
 #include "hw_ccsr_gp10b.h"
 #include "hw_fifo_gp10b.h"
 #include "hw_ram_gp10b.h"
+#include "hw_top_gp10b.h"
 
 static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g,
 				struct mem_desc *mem)
@@ -181,10 +182,26 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 	return 0;
 }
 
+static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type)
+{
+	int ret = ENGINE_INVAL_GK20A;
+
+	gk20a_dbg_info("engine type %d", engine_type);
+	if (engine_type == top_device_info_type_enum_graphics_v())
+		ret = ENGINE_GR_GK20A;
+	else if (engine_type == top_device_info_type_enum_lce_v())
+		ret = ENGINE_CE2_GK20A;
+	else
+		gk20a_err(g->dev, "unknown engine %d", engine_type);
+
+	return ret;
+}
+
 void gp10b_init_fifo(struct gpu_ops *gops)
 {
 	gm20b_init_fifo(gops);
 	gops->fifo.setup_ramfc = channel_gp10b_setup_ramfc;
 	gops->fifo.get_pbdma_signature = gp10b_fifo_get_pbdma_signature;
 	gops->fifo.resetup_ramfc = gp10b_fifo_resetup_ramfc;
+	gops->fifo.engine_enum_from_type = gp10b_fifo_engine_enum_from_type;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
index ab6f6373..5376717f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -138,13 +138,37 @@ static inline u32 top_device_info_type_enum_graphics_f(void)
 {
 	return 0x0;
 }
-static inline u32 top_device_info_type_enum_copy0_v(void)
+static inline u32 top_device_info_type_enum_copy2_v(void)
 {
-	return 0x00000001;
+	return 0x00000003;
+}
+static inline u32 top_device_info_type_enum_copy2_f(void)
+{
+	return 0xc;
+}
+static inline u32 top_device_info_type_enum_lce_v(void)
+{
+	return 0x00000013;
+}
+static inline u32 top_device_info_type_enum_lce_f(void)
+{
+	return 0x4c;
+}
+static inline u32 top_device_info_engine_v(u32 r)
+{
+	return (r >> 5) & 0x1;
+}
+static inline u32 top_device_info_runlist_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 top_device_info_intr_v(u32 r)
+{
+	return (r >> 3) & 0x1;
 }
-static inline u32 top_device_info_type_enum_copy0_f(void)
+static inline u32 top_device_info_reset_v(u32 r)
 {
-	return 0x4;
+	return (r >> 2) & 0x1;
 }
 static inline u32 top_device_info_entry_v(u32 r)
 {
@@ -158,4 +182,8 @@ static inline u32 top_device_info_entry_enum_v(void)
 {
 	return 0x00000002;
 }
+static inline u32 top_device_info_entry_engine_type_v(void)
+{
+	return 0x00000002;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 47c8fcc6..4d9573d1 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -108,9 +108,9 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
 
-	if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
+	if (mc_intr_0 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
 		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
-	if (mc_intr_0 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id)
+	if (mc_intr_0 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
 		&& g->ops.ce2.isr_stall)
 		g->ops.ce2.isr_stall(g);
 	if (mc_intr_0 & mc_intr_pfifo_pending_f())
@@ -142,9 +142,9 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 
 	if (mc_intr_1 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_nonstall_isr(g);
-	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_GR_GK20A].intr_id))
+	if (mc_intr_1 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
 		gk20a_gr_nonstall_isr(g);
-	if (mc_intr_1 & BIT(g->fifo.engine_info[ENGINE_CE2_GK20A].intr_id)
+	if (mc_intr_1 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
 		&& g->ops.ce2.isr_nonstall)
 		g->ops.ce2.isr_nonstall(g);
 
-- 
cgit v1.2.2


From fed910d75fb810267700ef9af1068471f0ce6fb2 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Fri, 15 Apr 2016 18:16:21 -0700
Subject: gpu: nvgpu: hwpm broadcast register support

Add support for hwpm broadcast registers (ltc and lts)

Bug 1648200

Change-Id: I2aa4e6c0991abaa94b0f58354a826f626f1d43a2
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1131363
(cherry picked from commit 383d195dabed76ecc50bb2bd355d6180bcda082a)
Reviewed-on: http://git-master/r/1133629
(cherry picked from commit 725d02e2690c96fbfa5f49ade550442de5961e82)
Reviewed-on: http://git-master/r/1127750
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index 5916f695..4a3f634e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -50,6 +50,26 @@
 #ifndef _hw_ltc_gp10b_h_
 #define _hw_ltc_gp10b_h_
 
+static inline u32 ltc_pltcg_base_v(void)
+{
+	return 0x00140000;
+}
+static inline u32 ltc_pltcg_extent_v(void)
+{
+	return 0x0017ffff;
+}
+static inline u32 ltc_ltc0_ltss_v(void)
+{
+	return 0x00140200;
+}
+static inline u32 ltc_ltc0_lts0_v(void)
+{
+	return 0x00140400;
+}
+static inline u32 ltc_ltcs_ltss_v(void)
+{
+	return 0x0017e200;
+}
 static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
 {
 	return 0x0014046c;
-- 
cgit v1.2.2


From b2b1c6d2be54dad286d2441e5f77aa3c9b67fdd6 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 19 May 2016 15:27:05 -0700
Subject: gpu: nvgpu: Add HWPM registers to regops whitelist

Bug 1763653

Change-Id: Ief7ed56c29dba5836fc8435359a7c615ce53bb84
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1150717
Reviewed-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/regops_gp10b.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
index 8934c324..a494c9b8 100644
--- a/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/regops_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * Tegra GK20A GPU Debugger Driver Register Ops
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -52,8 +52,10 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x0008e00c,   1},
 	{ 0x00100c18,   3},
 	{ 0x00100c84,   1},
+	{ 0x00104038,   1},
 	{ 0x0010a0a8,   1},
 	{ 0x0010a4f0,   1},
+	{ 0x0010e490,   1},
 	{ 0x0013cc14,   1},
 	{ 0x00140028,   1},
 	{ 0x00140280,   1},
@@ -237,9 +239,11 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x0041a8a0,   3},
 	{ 0x0041a8b0,   2},
 	{ 0x0041b014,   1},
+	{ 0x0041b0a0,   1},
 	{ 0x0041b0cc,   1},
 	{ 0x0041b1dc,   1},
 	{ 0x0041be0c,   3},
+	{ 0x0041bea0,   1},
 	{ 0x0041becc,   1},
 	{ 0x0041bfdc,   1},
 	{ 0x0041c054,   1},
@@ -326,9 +330,11 @@ static const struct regop_offset_range gp10b_global_whitelist_ranges[] = {
 	{ 0x005028a0,   3},
 	{ 0x005028b0,   2},
 	{ 0x00503014,   1},
+	{ 0x005030a0,   1},
 	{ 0x005030cc,   1},
 	{ 0x005031dc,   1},
 	{ 0x00503e14,   1},
+	{ 0x00503ea0,   1},
 	{ 0x00503ecc,   1},
 	{ 0x00503fdc,   1},
 	{ 0x00504054,   1},
-- 
cgit v1.2.2


From 49cedb9650d178ad5653b55885d022aacbd66f61 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 19 May 2016 09:31:14 -0700
Subject: gpu: nvgpu: gp10b: Use gk20a version of PMU reset

Change-Id: I9b6c2e3bcae4ac43a20089e05891654654df1b54
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1150541
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 6a704813..fca84116 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -416,4 +416,5 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_pg_grinit_param = gp10b_pg_gr_init;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
 			send_ecc_overide_en_dis_cmd;
+	gops->pmu.reset = gk20a_pmu_reset;
 }
-- 
cgit v1.2.2


From 21eda905ea69a0e090f6e29c444a9129c65f0b1f Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 19 May 2016 11:43:29 -0700
Subject: gpu: nvgpu: Fix SM number when more than 4 TPCs

Use multiplication instead of division to come up with an SM id.

Change-Id: Ib185970ee99cc8c010d02ba846229e0959a5fef3
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1150599
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index ebe11c67..3c04c2e4 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1399,7 +1399,7 @@ static int gr_gp10b_load_smid_config(struct gk20a *g)
 				 gr_cwd_gpc_tpc_id_tpc0_s();
 
 		for (j = 0; j < 4; j++) {
-			u32 sm_id = (i / 4) + j;
+			u32 sm_id = (i * 4) + j;
 			u32 bits;
 
 			if (sm_id >= g->gr.tpc_count)
-- 
cgit v1.2.2


From 3d0f9a751784ac9eb27f9f989f3b584ff5dc8f17 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 23 Mar 2016 08:41:04 -0700
Subject: gpu: nvgpu: Add support for gp104 and gp106

Add support for chips gp104 and gp106.

Change-Id: Ied5f239bdd0ec85245bce1fb6ef51330871d0f05
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1120465
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                         |    6 +-
 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c             |   35 +
 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h             |   26 +
 drivers/gpu/nvgpu/gp106/gr_gp106.c                 |  111 +
 drivers/gpu/nvgpu/gp106/gr_gp106.h                 |   26 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c                |  215 ++
 drivers/gpu/nvgpu/gp106/hal_gp106.h                |   21 +
 drivers/gpu/nvgpu/gp106/hw_bus_gp106.h             |  193 +
 drivers/gpu/nvgpu/gp106/hw_ccsr_gp106.h            |  125 +
 drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h             |   81 +
 drivers/gpu/nvgpu/gp106/hw_ctxsw_prog_gp106.h      |  289 ++
 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h              |  489 +++
 drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h            |  681 ++++
 drivers/gpu/nvgpu/gp106/hw_flush_gp106.h           |  181 +
 drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h            |  129 +
 drivers/gpu/nvgpu/gp106/hw_gmmu_gp106.h            | 1261 ++++++
 drivers/gpu/nvgpu/gp106/hw_gr_gp106.h              | 4001 ++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hw_ltc_gp106.h             |  553 +++
 drivers/gpu/nvgpu/gp106/hw_mc_gp106.h              |  245 ++
 drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h           |  505 +++
 drivers/gpu/nvgpu/gp106/hw_perf_gp106.h            |  205 +
 drivers/gpu/nvgpu/gp106/hw_pri_ringmaster_gp106.h  |  145 +
 .../gpu/nvgpu/gp106/hw_pri_ringstation_sys_gp106.h |   69 +
 drivers/gpu/nvgpu/gp106/hw_proj_gp106.h            |  149 +
 drivers/gpu/nvgpu/gp106/hw_pwr_gp106.h             |  841 ++++
 drivers/gpu/nvgpu/gp106/hw_ram_gp106.h             |  477 +++
 drivers/gpu/nvgpu/gp106/hw_timer_gp106.h           |  109 +
 drivers/gpu/nvgpu/gp106/hw_top_gp106.h             |  169 +
 drivers/gpu/nvgpu/gp106/hw_xve_gp106.h             |   69 +
 drivers/gpu/nvgpu/gp106/pmu_gp106.c                |   46 +
 drivers/gpu/nvgpu/gp106/pmu_gp106.h                |   19 +
 drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h               |   11 +-
 32 files changed, 11480 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/gr_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/gr_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hal_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/hal_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_bus_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ccsr_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ctxsw_prog_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_flush_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_gmmu_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ltc_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_mc_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_perf_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_pri_ringmaster_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_pri_ringstation_sys_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_pwr_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_timer_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_top_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/pmu_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/pmu_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 13d52f84..75329a8d 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -18,7 +18,11 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
 	$(nvgpu-t18x)/gp10b/fecs_trace_gp10b.o \
 	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o \
-	$(nvgpu-t18x)/gp10b/gp10b.o
+	$(nvgpu-t18x)/gp10b/gp10b.o \
+	$(nvgpu-t18x)/gp106/hal_gp106.o \
+	$(nvgpu-t18x)/gp106/pmu_gp106.o \
+	$(nvgpu-t18x)/gp106/gr_gp106.o \
+	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
new file mode 100644
index 00000000..34e1f859
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
@@ -0,0 +1,35 @@
+/*
+ * GP106 Graphics Context
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gr_ctx_gp106.h"
+
+static int gr_gp106_get_netlist_name(int index, char *name)
+{
+	sprintf(name, GP106_NETLIST_IMAGE_FW_NAME);
+	return 0;
+}
+
+static bool gr_gp106_is_firmware_defined(void)
+{
+	return true;
+}
+
+void gp106_init_gr_ctx(struct gpu_ops *gops)
+{
+	gops->gr_ctx.get_netlist_name = gr_gp106_get_netlist_name;
+	gops->gr_ctx.is_fw_defined = gr_gp106_is_firmware_defined;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = false;
+}
diff --git a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
new file mode 100644
index 00000000..d14a9126
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GR_CTX_GP106_H__
+#define __GR_CTX_GP106_H__
+
+#include "gk20a/gr_ctx_gk20a.h"
+
+/* production netlist, one and only one from below */
+#define GP106_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_C
+
+void gp106_init_gr_ctx(struct gpu_ops *gops);
+
+#endif /*__GR_CTX_GP106_H__*/
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
new file mode 100644
index 00000000..e4768e0d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -0,0 +1,111 @@
+/*
+ * GP106 GPU GR
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+
+#include "gk20a/gr_gk20a.h"
+
+#include "gm20b/gr_gm20b.h" /* for MAXWELL classes */
+#include "gp10b/gr_gp10b.h"
+#include "gr_gp106.h"
+#include "hw_gr_gp106.h"
+
+static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
+{
+	bool valid = false;
+
+	switch (class_num) {
+	case PASCAL_COMPUTE_A:
+	case PASCAL_COMPUTE_B:
+	case PASCAL_A:
+	case PASCAL_B:
+	case PASCAL_DMA_COPY_A:
+		valid = true;
+		break;
+
+	case MAXWELL_COMPUTE_B:
+	case MAXWELL_B:
+	case FERMI_TWOD_A:
+	case KEPLER_DMA_COPY_A:
+	case MAXWELL_DMA_COPY_A:
+		valid = true;
+		break;
+
+	default:
+		break;
+	}
+	gk20a_dbg_info("class=0x%x valid=%d", class_num, valid);
+	return valid;
+}
+
+static u32 gr_gp106_pagepool_default_size(struct gk20a *g)
+{
+	return gr_scc_pagepool_total_pages_hwmax_value_v();
+}
+
+static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
+				     u32 class_num, u32 offset, u32 data)
+{
+	gk20a_dbg_fn("");
+
+	if (class_num == PASCAL_COMPUTE_B) {
+		switch (offset << 2) {
+		case NVC0C0_SET_SHADER_EXCEPTIONS:
+			gk20a_gr_set_shader_exceptions(g, data);
+			break;
+		default:
+			goto fail;
+		}
+	}
+
+	if (class_num == PASCAL_B) {
+		switch (offset << 2) {
+		case NVC097_SET_SHADER_EXCEPTIONS:
+			gk20a_gr_set_shader_exceptions(g, data);
+			break;
+		case NVC097_SET_CIRCULAR_BUFFER_SIZE:
+			g->ops.gr.set_circular_buffer_size(g, data);
+			break;
+		case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
+			g->ops.gr.set_alpha_circular_buffer_size(g, data);
+			break;
+		default:
+			goto fail;
+		}
+	}
+	return 0;
+
+fail:
+	return -EINVAL;
+}
+
+static void gr_gp106_cb_size_default(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+
+	if (!gr->attrib_cb_default_size)
+		gr->attrib_cb_default_size = 0x800;
+	gr->alpha_cb_default_size =
+		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+}
+
+void gp106_init_gr(struct gpu_ops *gops)
+{
+	gp10b_init_gr(gops);
+	gops->gr.is_valid_class = gr_gp106_is_valid_class;
+	gops->gr.pagepool_default_size = gr_gp106_pagepool_default_size;
+	gops->gr.handle_sw_method = gr_gp106_handle_sw_method;
+	gops->gr.cb_size_default = gr_gp106_cb_size_default;
+}
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h
new file mode 100644
index 00000000..4fe22ee9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h
@@ -0,0 +1,26 @@
+/*
+ * GP106 GPU GR
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_GR_GP106_H_
+#define _NVGPU_GR_GP106_H_
+
+enum {
+	PASCAL_B                 = 0xC197,
+	PASCAL_COMPUTE_B         = 0xC1C0,
+};
+
+void gp106_init_gr(struct gpu_ops *gops);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
new file mode 100644
index 00000000..5c9e012d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -0,0 +1,215 @@
+/*
+ * GP106 HAL interface
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/printk.h>
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+
+#include "gp10b/gr_gp10b.h"
+#include "gp10b/mc_gp10b.h"
+#include "gp10b/ltc_gp10b.h"
+#include "gp10b/mm_gp10b.h"
+#include "gp10b/ce2_gp10b.h"
+#include "gp10b/fb_gp10b.h"
+#include "gp10b/fifo_gp10b.h"
+#include "gp10b/gp10b_gating_reglist.h"
+#include "gp10b/regops_gp10b.h"
+#include "gp10b/cde_gp10b.h"
+#include "gp10b/therm_gp10b.h"
+
+#include "gm206/bios_gm206.h"
+
+#include "gm20b/gr_gm20b.h"
+#include "gm20b/fifo_gm20b.h"
+#include "gm20b/pmu_gm20b.h"
+#include "gm20b/clk_gm20b.h"
+
+#include "gp106/pmu_gp106.h"
+#include "gp106/gr_ctx_gp106.h"
+#include "gp106/gr_gp106.h"
+#include "nvgpu_gpuid_t18x.h"
+#include "hw_proj_gp106.h"
+
+static struct gpu_ops gp106_ops = {
+	.clock_gating = {
+		.slcg_bus_load_gating_prod =
+			gp10b_slcg_bus_load_gating_prod,
+		.slcg_ce2_load_gating_prod =
+			gp10b_slcg_ce2_load_gating_prod,
+		.slcg_chiplet_load_gating_prod =
+			gp10b_slcg_chiplet_load_gating_prod,
+		.slcg_ctxsw_firmware_load_gating_prod =
+			gp10b_slcg_ctxsw_firmware_load_gating_prod,
+		.slcg_fb_load_gating_prod =
+			gp10b_slcg_fb_load_gating_prod,
+		.slcg_fifo_load_gating_prod =
+			gp10b_slcg_fifo_load_gating_prod,
+		.slcg_gr_load_gating_prod =
+			gr_gp10b_slcg_gr_load_gating_prod,
+		.slcg_ltc_load_gating_prod =
+			ltc_gp10b_slcg_ltc_load_gating_prod,
+		.slcg_perf_load_gating_prod =
+			gp10b_slcg_perf_load_gating_prod,
+		.slcg_priring_load_gating_prod =
+			gp10b_slcg_priring_load_gating_prod,
+		.slcg_pmu_load_gating_prod =
+			gp10b_slcg_pmu_load_gating_prod,
+		.slcg_therm_load_gating_prod =
+			gp10b_slcg_therm_load_gating_prod,
+		.slcg_xbar_load_gating_prod =
+			gp10b_slcg_xbar_load_gating_prod,
+		.blcg_bus_load_gating_prod =
+			gp10b_blcg_bus_load_gating_prod,
+		.blcg_ce_load_gating_prod =
+			gp10b_blcg_ce_load_gating_prod,
+		.blcg_ctxsw_firmware_load_gating_prod =
+			gp10b_blcg_ctxsw_firmware_load_gating_prod,
+		.blcg_fb_load_gating_prod =
+			gp10b_blcg_fb_load_gating_prod,
+		.blcg_fifo_load_gating_prod =
+			gp10b_blcg_fifo_load_gating_prod,
+		.blcg_gr_load_gating_prod =
+			gp10b_blcg_gr_load_gating_prod,
+		.blcg_ltc_load_gating_prod =
+			gp10b_blcg_ltc_load_gating_prod,
+		.blcg_pwr_csb_load_gating_prod =
+			gp10b_blcg_pwr_csb_load_gating_prod,
+		.blcg_pmu_load_gating_prod =
+			gp10b_blcg_pmu_load_gating_prod,
+		.blcg_xbar_load_gating_prod =
+			gp10b_blcg_xbar_load_gating_prod,
+		.pg_gr_load_gating_prod =
+			gr_gp10b_pg_gr_load_gating_prod,
+	}
+};
+
+static int gp106_get_litter_value(struct gk20a *g,
+		enum nvgpu_litter_value value)
+{
+	int ret = -EINVAL;
+
+	switch (value) {
+	case GPU_LIT_NUM_GPCS:
+		ret = proj_scal_litter_num_gpcs_v();
+		break;
+	case GPU_LIT_NUM_PES_PER_GPC:
+		ret = proj_scal_litter_num_pes_per_gpc_v();
+		break;
+	case GPU_LIT_NUM_ZCULL_BANKS:
+		ret = proj_scal_litter_num_zcull_banks_v();
+		break;
+	case GPU_LIT_NUM_TPC_PER_GPC:
+		ret = proj_scal_litter_num_tpc_per_gpc_v();
+		break;
+	case GPU_LIT_NUM_FBPS:
+		ret = proj_scal_litter_num_fbps_v();
+		break;
+	case GPU_LIT_GPC_BASE:
+		ret = proj_gpc_base_v();
+		break;
+	case GPU_LIT_GPC_STRIDE:
+		ret = proj_gpc_stride_v();
+		break;
+	case GPU_LIT_GPC_SHARED_BASE:
+		ret = proj_gpc_shared_base_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_BASE:
+		ret = proj_tpc_in_gpc_base_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_STRIDE:
+		ret = proj_tpc_in_gpc_stride_v();
+		break;
+	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
+		ret = proj_tpc_in_gpc_shared_base_v();
+		break;
+	case GPU_LIT_PPC_IN_GPC_STRIDE:
+		ret = proj_ppc_in_gpc_stride_v();
+		break;
+	case GPU_LIT_ROP_BASE:
+		ret = proj_rop_base_v();
+		break;
+	case GPU_LIT_ROP_STRIDE:
+		ret = proj_rop_stride_v();
+		break;
+	case GPU_LIT_ROP_SHARED_BASE:
+		ret = proj_rop_shared_base_v();
+		break;
+	case GPU_LIT_HOST_NUM_PBDMA:
+		ret = proj_host_num_pbdma_v();
+		break;
+	case GPU_LIT_LTC_STRIDE:
+		ret = proj_ltc_stride_v();
+		break;
+	case GPU_LIT_LTS_STRIDE:
+		ret = proj_lts_stride_v();
+		break;
+	case GPU_LIT_NUM_FBPAS:
+		ret = proj_scal_litter_num_fbpas_v();
+		break;
+	case GPU_LIT_FBPA_STRIDE:
+		ret = proj_fbpa_stride_v();
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return ret;
+}
+
+int gp106_init_hal(struct gk20a *g)
+{
+	struct gpu_ops *gops = &g->ops;
+	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
+
+	gk20a_dbg_fn("");
+
+	*gops = gp106_ops;
+
+	gops->privsecurity = 0;
+	gops->securegpccs = 0;
+
+	gp10b_init_mc(gops);
+	gp106_init_gr(gops);
+	gp10b_init_ltc(gops);
+	gp10b_init_fb(gops);
+	gp10b_init_fifo(gops);
+	gp10b_init_ce2(gops);
+	gp106_init_gr_ctx(gops);
+	gp10b_init_mm(gops);
+	gp106_init_pmu_ops(gops);
+	gk20a_init_debug_ops(gops);
+	gp10b_init_regops(gops);
+	gp10b_init_cde_ops(gops);
+	gp10b_init_therm_ops(gops);
+	gm206_init_bios(gops);
+	gops->name = "gp106";
+	gops->get_litter_value = gp106_get_litter_value;
+	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
+
+	c->twod_class = FERMI_TWOD_A;
+	c->threed_class = PASCAL_B;
+	c->compute_class = PASCAL_COMPUTE_B;
+	c->gpfifo_class = PASCAL_CHANNEL_GPFIFO_A;
+	c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_B;
+	c->dma_copy_class = PASCAL_DMA_COPY_A;
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.h b/drivers/gpu/nvgpu/gp106/hal_gp106.h
new file mode 100644
index 00000000..af91267b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.h
@@ -0,0 +1,21 @@
+/*
+ * GP106 Tegra HAL interface
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_HAL_GP106_H
+#define _NVGPU_HAL_GP106_H
+struct gk20a;
+
+int gp106_init_hal(struct gk20a *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_bus_gp106.h b/drivers/gpu/nvgpu/gp106/hw_bus_gp106.h
new file mode 100644
index 00000000..6d80b6a6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_bus_gp106.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_bus_gp106_h_
+#define _hw_bus_gp106_h_
+
+static inline u32 bus_bar1_block_r(void)
+{
+	return 0x00001704;
+}
+static inline u32 bus_bar1_block_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 bus_bar1_block_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bar1_block_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 bus_bar1_block_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 bus_bar1_block_mode_virtual_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 bus_bar2_block_r(void)
+{
+	return 0x00001714;
+}
+static inline u32 bus_bar2_block_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 bus_bar2_block_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bar2_block_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 bus_bar2_block_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 bus_bar2_block_mode_virtual_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 bus_bar1_block_ptr_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 bus_bar2_block_ptr_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 bus_bind_status_r(void)
+{
+	return 0x00001710;
+}
+static inline u32 bus_bind_status_bar1_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 bus_bind_status_bar1_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar1_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 bus_bind_status_bar1_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 bus_bind_status_bar1_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar1_outstanding_true_f(void)
+{
+	return 0x2;
+}
+static inline u32 bus_bind_status_bar2_pending_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 bus_bind_status_bar2_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar2_pending_busy_f(void)
+{
+	return 0x4;
+}
+static inline u32 bus_bind_status_bar2_outstanding_v(u32 r)
+{
+	return (r >> 3) & 0x1;
+}
+static inline u32 bus_bind_status_bar2_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bind_status_bar2_outstanding_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 bus_intr_0_r(void)
+{
+	return 0x00001100;
+}
+static inline u32 bus_intr_0_pri_squash_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 bus_intr_0_pri_fecserr_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 bus_intr_0_pri_timeout_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 bus_intr_en_0_r(void)
+{
+	return 0x00001140;
+}
+static inline u32 bus_intr_en_0_pri_squash_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 bus_intr_en_0_pri_fecserr_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 bus_intr_en_0_pri_timeout_m(void)
+{
+	return 0x1 << 3;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ccsr_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ccsr_gp106.h
new file mode 100644
index 00000000..65146d39
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ccsr_gp106.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ccsr_gp106_h_
+#define _hw_ccsr_gp106_h_
+
+static inline u32 ccsr_channel_inst_r(u32 i)
+{
+	return 0x00800000 + i*8;
+}
+static inline u32 ccsr_channel_inst__size_1_v(void)
+{
+	return 0x00001000;
+}
+static inline u32 ccsr_channel_inst_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 ccsr_channel_inst_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 ccsr_channel_inst_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ccsr_channel_inst_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 ccsr_channel_inst_bind_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 ccsr_channel_inst_bind_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 ccsr_channel_r(u32 i)
+{
+	return 0x00800004 + i*8;
+}
+static inline u32 ccsr_channel__size_1_v(void)
+{
+	return 0x00001000;
+}
+static inline u32 ccsr_channel_enable_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ccsr_channel_enable_set_f(u32 v)
+{
+	return (v & 0x1) << 10;
+}
+static inline u32 ccsr_channel_enable_set_true_f(void)
+{
+	return 0x400;
+}
+static inline u32 ccsr_channel_enable_clr_true_f(void)
+{
+	return 0x800;
+}
+static inline u32 ccsr_channel_status_v(u32 r)
+{
+	return (r >> 24) & 0xf;
+}
+static inline u32 ccsr_channel_status_pending_ctx_reload_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ccsr_channel_busy_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 ccsr_channel_next_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
new file mode 100644
index 00000000..d56b930b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ce2_gp106_h_
+#define _hw_ce2_gp106_h_
+
+static inline u32 ce2_intr_status_r(u32 i)
+{
+	return 0x00104410 + i*128;
+}
+static inline u32 ce2_intr_status_blockpipe_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce2_intr_status_blockpipe_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce2_intr_status_nonblockpipe_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce2_intr_status_nonblockpipe_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce2_intr_status_launcherr_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 ce2_intr_status_launcherr_reset_f(void)
+{
+	return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ctxsw_prog_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ctxsw_prog_gp106.h
new file mode 100644
index 00000000..ed3e6009
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ctxsw_prog_gp106.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ctxsw_prog_gp106_h_
+#define _hw_ctxsw_prog_gp106_h_
+
+static inline u32 ctxsw_prog_fecs_header_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
+{
+	return 0x00000008;
+}
+static inline u32 ctxsw_prog_main_image_patch_count_o(void)
+{
+	return 0x00000010;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_lo_o(void)
+{
+	return 0x00000014;
+}
+static inline u32 ctxsw_prog_main_image_patch_adr_hi_o(void)
+{
+	return 0x00000018;
+}
+static inline u32 ctxsw_prog_main_image_zcull_o(void)
+{
+	return 0x0000001c;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ctxsw_prog_main_image_zcull_mode_separate_buffer_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ctxsw_prog_main_image_zcull_ptr_o(void)
+{
+	return 0x00000020;
+}
+static inline u32 ctxsw_prog_main_image_pm_o(void)
+{
+	return 0x00000028;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
+{
+	return 0x7 << 0;
+}
+static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
+{
+	return 0x7 << 3;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
+{
+	return 0x8;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
+{
+	return 0x0000002c;
+}
+static inline u32 ctxsw_prog_main_image_num_save_ops_o(void)
+{
+	return 0x000000f4;
+}
+static inline u32 ctxsw_prog_main_image_num_wfi_save_ops_o(void)
+{
+	return 0x000000d0;
+}
+static inline u32 ctxsw_prog_main_image_num_cta_save_ops_o(void)
+{
+	return 0x000000d4;
+}
+static inline u32 ctxsw_prog_main_image_num_gfxp_save_ops_o(void)
+{
+	return 0x000000d8;
+}
+static inline u32 ctxsw_prog_main_image_num_cilp_save_ops_o(void)
+{
+	return 0x000000dc;
+}
+static inline u32 ctxsw_prog_main_image_num_restore_ops_o(void)
+{
+	return 0x000000f8;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_o(void)
+{
+	return 0x000000fc;
+}
+static inline u32 ctxsw_prog_main_image_magic_value_v_value_v(void)
+{
+	return 0x600dc0de;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_o(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ctxsw_prog_local_priv_register_ctl_offset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_o(void)
+{
+	return 0x000000f4;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_num_ppcs_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_ppc_info_ppc_mask_v(u32 r)
+{
+	return (r >> 16) & 0xffff;
+}
+static inline u32 ctxsw_prog_local_image_num_tpcs_o(void)
+{
+	return 0x000000f8;
+}
+static inline u32 ctxsw_prog_local_magic_value_o(void)
+{
+	return 0x000000fc;
+}
+static inline u32 ctxsw_prog_local_magic_value_v_value_v(void)
+{
+	return 0xad0becab;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_o(void)
+{
+	return 0x000000ec;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_offset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ctxsw_prog_main_extended_buffer_ctl_size_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 ctxsw_prog_extended_buffer_segments_size_in_bytes_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 ctxsw_prog_extended_marker_size_in_bytes_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_o(void)
+{
+	return 0x000000a0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_s(void)
+{
+	return 2;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(void)
+{
+	return 0x2;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_lo_o(void)
+{
+	return 0x000000a4;
+}
+static inline u32 ctxsw_prog_main_image_priv_access_map_addr_hi_o(void)
+{
+	return 0x000000a8;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_o(void)
+{
+	return 0x0000003c;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 ctxsw_prog_main_image_misc_options_verif_features_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_o(void)
+{
+	return 0x00000080;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(void)
+{
+	return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_full_preemption_ptr_o(void)
+{
+	return 0x00000068;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_o(void)
+{
+	return 0x00000084;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cta_f(void)
+{
+	return 0x1;
+}
+static inline u32 ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(void)
+{
+	return 0x2;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
new file mode 100644
index 00000000..42d32ab3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fb_gp106_h_
+#define _hw_fb_gp106_h_
+
+static inline u32 fb_fbhub_num_active_ltcs_r(void)
+{
+	return 0x00100800;
+}
+static inline u32 fb_mmu_ctrl_r(void)
+{
+	return 0x00100c80;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void)
+{
+	return 0x1;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_empty_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void)
+{
+	return 0x800;
+}
+static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_priv_mmu_phy_secure_r(void)
+{
+	return 0x00100ce4;
+}
+static inline u32 fb_mmu_invalidate_pdb_r(void)
+{
+	return 0x00100cb8;
+}
+static inline u32 fb_mmu_invalidate_pdb_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_pdb_aperture_sys_mem_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_invalidate_pdb_addr_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 fb_mmu_invalidate_r(void)
+{
+	return 0x00100cbc;
+}
+static inline u32 fb_mmu_invalidate_all_va_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 fb_mmu_invalidate_all_pdb_true_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_hubtlb_only_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 fb_mmu_invalidate_replay_s(void)
+{
+	return 3;
+}
+static inline u32 fb_mmu_invalidate_replay_f(u32 v)
+{
+	return (v & 0x7) << 3;
+}
+static inline u32 fb_mmu_invalidate_replay_m(void)
+{
+	return 0x7 << 3;
+}
+static inline u32 fb_mmu_invalidate_replay_v(u32 r)
+{
+	return (r >> 3) & 0x7;
+}
+static inline u32 fb_mmu_invalidate_replay_none_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_replay_start_f(void)
+{
+	return 0x8;
+}
+static inline u32 fb_mmu_invalidate_replay_start_ack_all_f(void)
+{
+	return 0x10;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_targeted_f(void)
+{
+	return 0x18;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_global_f(void)
+{
+	return 0x20;
+}
+static inline u32 fb_mmu_invalidate_replay_cancel_f(void)
+{
+	return 0x20;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_sys_membar_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 fb_mmu_invalidate_ack_s(void)
+{
+	return 2;
+}
+static inline u32 fb_mmu_invalidate_ack_f(u32 v)
+{
+	return (v & 0x3) << 7;
+}
+static inline u32 fb_mmu_invalidate_ack_m(void)
+{
+	return 0x3 << 7;
+}
+static inline u32 fb_mmu_invalidate_ack_v(u32 r)
+{
+	return (r >> 7) & 0x3;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_none_required_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_intranode_f(void)
+{
+	return 0x100;
+}
+static inline u32 fb_mmu_invalidate_ack_ack_globally_f(void)
+{
+	return 0x80;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_s(void)
+{
+	return 6;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_f(u32 v)
+{
+	return (v & 0x3f) << 9;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_m(void)
+{
+	return 0x3f << 9;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_id_v(u32 r)
+{
+	return (r >> 9) & 0x3f;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_s(void)
+{
+	return 5;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_f(u32 v)
+{
+	return (v & 0x1f) << 15;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_m(void)
+{
+	return 0x1f << 15;
+}
+static inline u32 fb_mmu_invalidate_cancel_gpc_id_v(u32 r)
+{
+	return (r >> 15) & 0x1f;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_gpc_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_cancel_client_type_hub_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_s(void)
+{
+	return 3;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_m(void)
+{
+	return 0x7 << 24;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_v(u32 r)
+{
+	return (r >> 24) & 0x7;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_pte_only_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde0_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde1_f(void)
+{
+	return 0x3000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde2_f(void)
+{
+	return 0x4000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde3_f(void)
+{
+	return 0x5000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde4_f(void)
+{
+	return 0x6000000;
+}
+static inline u32 fb_mmu_invalidate_cancel_cache_level_up_to_pde5_f(void)
+{
+	return 0x7000000;
+}
+static inline u32 fb_mmu_invalidate_trigger_s(void)
+{
+	return 1;
+}
+static inline u32 fb_mmu_invalidate_trigger_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 fb_mmu_invalidate_trigger_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 fb_mmu_invalidate_trigger_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 fb_mmu_debug_wr_r(void)
+{
+	return 0x00100cc8;
+}
+static inline u32 fb_mmu_debug_wr_aperture_s(void)
+{
+	return 2;
+}
+static inline u32 fb_mmu_debug_wr_aperture_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 fb_mmu_debug_wr_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_aperture_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
+static inline u32 fb_mmu_debug_wr_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fb_mmu_debug_wr_vol_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 fb_mmu_debug_wr_addr_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 fb_mmu_debug_wr_addr_alignment_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_rd_r(void)
+{
+	return 0x00100ccc;
+}
+static inline u32 fb_mmu_debug_rd_aperture_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_aperture_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 fb_mmu_debug_rd_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
+static inline u32 fb_mmu_debug_rd_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_debug_rd_addr_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 fb_mmu_debug_rd_addr_alignment_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fb_mmu_debug_ctrl_r(void)
+{
+	return 0x00100cc4;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_m(void)
+{
+	return 0x1 << 16;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_enabled_f(void)
+{
+	return 0x10000;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fb_mmu_debug_ctrl_debug_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 fb_mmu_vpr_info_r(void)
+{
+	return 0x00100cd0;
+}
+static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fb_niso_flush_sysmem_addr_r(void)
+{
+	return 0x00100c10;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
new file mode 100644
index 00000000..763b58df
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
@@ -0,0 +1,681 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fifo_gp106_h_
+#define _hw_fifo_gp106_h_
+
+static inline u32 fifo_bar1_base_r(void)
+{
+	return 0x00002254;
+}
+static inline u32 fifo_bar1_base_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_bar1_base_ptr_align_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fifo_bar1_base_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_bar1_base_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_runlist_base_r(void)
+{
+	return 0x00002270;
+}
+static inline u32 fifo_runlist_base_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 fifo_runlist_base_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_runlist_base_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 fifo_runlist_base_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 fifo_runlist_r(void)
+{
+	return 0x00002274;
+}
+static inline u32 fifo_runlist_engine_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 fifo_eng_runlist_base_r(u32 i)
+{
+	return 0x00002280 + i*8;
+}
+static inline u32 fifo_eng_runlist_base__size_1_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_eng_runlist_r(u32 i)
+{
+	return 0x00002284 + i*8;
+}
+static inline u32 fifo_eng_runlist__size_1_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_eng_runlist_length_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 fifo_eng_runlist_length_max_v(void)
+{
+	return 0x0000ffff;
+}
+static inline u32 fifo_eng_runlist_pending_true_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fifo_pb_timeslice_r(u32 i)
+{
+	return 0x00002350 + i*4;
+}
+static inline u32 fifo_pb_timeslice_timeout_16_f(void)
+{
+	return 0x10;
+}
+static inline u32 fifo_pb_timeslice_timescale_0_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_pb_timeslice_enable_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_pbdma_map_r(u32 i)
+{
+	return 0x00002390 + i*4;
+}
+static inline u32 fifo_intr_0_r(void)
+{
+	return 0x00002100;
+}
+static inline u32 fifo_intr_0_bind_error_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 fifo_intr_0_bind_error_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 fifo_intr_0_sched_error_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 fifo_intr_0_sched_error_reset_f(void)
+{
+	return 0x100;
+}
+static inline u32 fifo_intr_0_chsw_error_pending_f(void)
+{
+	return 0x10000;
+}
+static inline u32 fifo_intr_0_chsw_error_reset_f(void)
+{
+	return 0x10000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_pending_f(void)
+{
+	return 0x800000;
+}
+static inline u32 fifo_intr_0_fb_flush_timeout_reset_f(void)
+{
+	return 0x800000;
+}
+static inline u32 fifo_intr_0_lb_error_pending_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fifo_intr_0_lb_error_reset_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fifo_intr_0_replayable_fault_error_pending_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_pending_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 fifo_intr_0_dropped_mmu_fault_reset_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 fifo_intr_0_mmu_fault_pending_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 fifo_intr_0_pbdma_intr_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 fifo_intr_0_runlist_event_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 fifo_intr_0_channel_intr_pending_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 fifo_intr_en_0_r(void)
+{
+	return 0x00002140;
+}
+static inline u32 fifo_intr_en_0_sched_error_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 fifo_intr_en_0_sched_error_m(void)
+{
+	return 0x1 << 8;
+}
+static inline u32 fifo_intr_en_0_mmu_fault_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 fifo_intr_en_0_mmu_fault_m(void)
+{
+	return 0x1 << 28;
+}
+static inline u32 fifo_intr_en_1_r(void)
+{
+	return 0x00002528;
+}
+static inline u32 fifo_intr_bind_error_r(void)
+{
+	return 0x0000252c;
+}
+static inline u32 fifo_intr_sched_error_r(void)
+{
+	return 0x0000254c;
+}
+static inline u32 fifo_intr_sched_error_code_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 fifo_intr_sched_error_code_ctxsw_timeout_v(void)
+{
+	return 0x0000000a;
+}
+static inline u32 fifo_intr_chsw_error_r(void)
+{
+	return 0x0000256c;
+}
+static inline u32 fifo_intr_mmu_fault_id_r(void)
+{
+	return 0x0000259c;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_eng_id_graphics_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_intr_mmu_fault_inst_r(u32 i)
+{
+	return 0x00002800 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 fifo_intr_mmu_fault_inst_ptr_align_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 fifo_intr_mmu_fault_lo_r(u32 i)
+{
+	return 0x00002804 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_hi_r(u32 i)
+{
+	return 0x00002808 + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_r(u32 i)
+{
+	return 0x0000280c + i*16;
+}
+static inline u32 fifo_intr_mmu_fault_info_type_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_type_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_type_gpc_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_type_hub_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_intr_mmu_fault_info_client_v(u32 r)
+{
+	return (r >> 8) & 0x7f;
+}
+static inline u32 fifo_intr_pbdma_id_r(void)
+{
+	return 0x000025a0;
+}
+static inline u32 fifo_intr_pbdma_id_status_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_intr_pbdma_id_status__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 fifo_intr_runlist_r(void)
+{
+	return 0x00002a00;
+}
+static inline u32 fifo_fb_timeout_r(void)
+{
+	return 0x00002a04;
+}
+static inline u32 fifo_fb_timeout_period_m(void)
+{
+	return 0x3fffffff << 0;
+}
+static inline u32 fifo_fb_timeout_period_max_f(void)
+{
+	return 0x3fffffff;
+}
+static inline u32 fifo_error_sched_disable_r(void)
+{
+	return 0x0000262c;
+}
+static inline u32 fifo_sched_disable_r(void)
+{
+	return 0x00002630;
+}
+static inline u32 fifo_sched_disable_runlist_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_runlist_m(u32 i)
+{
+	return 0x1 << (0 + i*1);
+}
+static inline u32 fifo_sched_disable_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_preempt_r(void)
+{
+	return 0x00002634;
+}
+static inline u32 fifo_preempt_pending_true_f(void)
+{
+	return 0x100000;
+}
+static inline u32 fifo_preempt_type_channel_f(void)
+{
+	return 0x0;
+}
+static inline u32 fifo_preempt_type_tsg_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 fifo_preempt_chid_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 fifo_preempt_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_r(u32 i)
+{
+	return 0x00002a30 + i*4;
+}
+static inline u32 fifo_trigger_mmu_fault_id_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 fifo_trigger_mmu_fault_enable_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 fifo_engine_status_r(u32 i)
+{
+	return 0x00002640 + i*8;
+}
+static inline u32 fifo_engine_status__size_1_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 fifo_engine_status_id_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_engine_status_id_type_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 fifo_engine_status_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_id_type_tsgid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctx_status_v(u32 r)
+{
+	return (r >> 13) & 0x7;
+}
+static inline u32 fifo_engine_status_ctx_status_valid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_load_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_save_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 fifo_engine_status_ctx_status_ctxsw_switch_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_engine_status_next_id_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_engine_status_next_id_type_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 fifo_engine_status_next_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_faulted_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 fifo_engine_status_faulted_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_engine_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 fifo_engine_status_engine_idle_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_engine_status_engine_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_engine_status_ctxsw_in_progress_f(void)
+{
+	return 0x8000;
+}
+static inline u32 fifo_pbdma_status_r(u32 i)
+{
+	return 0x00003080 + i*4;
+}
+static inline u32 fifo_pbdma_status__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 fifo_pbdma_status_id_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_id_type_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 fifo_pbdma_status_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_id_type_tsgid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_chan_status_v(u32 r)
+{
+	return (r >> 13) & 0x7;
+}
+static inline u32 fifo_pbdma_status_chan_status_valid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_load_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_save_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 fifo_pbdma_status_chan_status_chsw_switch_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 fifo_pbdma_status_next_id_v(u32 r)
+{
+	return (r >> 16) & 0xfff;
+}
+static inline u32 fifo_pbdma_status_next_id_type_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 fifo_pbdma_status_next_id_type_chid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_pbdma_status_chsw_v(u32 r)
+{
+	return (r >> 15) & 0x1;
+}
+static inline u32 fifo_pbdma_status_chsw_in_progress_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_lo_r(void)
+{
+	return 0x00002a70;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_lo_enable_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_lo_base_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 fifo_replay_fault_buffer_lo_base_reset_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_hi_r(void)
+{
+	return 0x00002a74;
+}
+static inline u32 fifo_replay_fault_buffer_hi_base_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_hi_base_reset_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_size_r(void)
+{
+	return 0x00002a78;
+}
+static inline u32 fifo_replay_fault_buffer_size_hw_f(u32 v)
+{
+	return (v & 0x3fff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_size_hw_entries_v(void)
+{
+	return 0x00001200;
+}
+static inline u32 fifo_replay_fault_buffer_get_r(void)
+{
+	return 0x00002a7c;
+}
+static inline u32 fifo_replay_fault_buffer_get_offset_hw_f(u32 v)
+{
+	return (v & 0x3fff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_get_offset_hw_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_put_r(void)
+{
+	return 0x00002a80;
+}
+static inline u32 fifo_replay_fault_buffer_put_offset_hw_f(u32 v)
+{
+	return (v & 0x3fff) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_put_offset_hw_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_r(void)
+{
+	return 0x00002a84;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_overflow_clear_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_write_nack_clear_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 fifo_replay_fault_buffer_info_fault_while_buffer_disabled_clear_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_flush_gp106.h b/drivers/gpu/nvgpu/gp106/hw_flush_gp106.h
new file mode 100644
index 00000000..83bd65bb
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_flush_gp106.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_flush_gp106_h_
+#define _hw_flush_gp106_h_
+
+static inline u32 flush_l2_system_invalidate_r(void)
+{
+	return 0x00070004;
+}
+static inline u32 flush_l2_system_invalidate_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_system_invalidate_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_system_invalidate_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_r(void)
+{
+	return 0x00070010;
+}
+static inline u32 flush_l2_flush_dirty_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_flush_dirty_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_flush_dirty_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_clean_comptags_r(void)
+{
+	return 0x0007000c;
+}
+static inline u32 flush_l2_clean_comptags_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_l2_clean_comptags_pending_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_clean_comptags_pending_empty_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_clean_comptags_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_l2_clean_comptags_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_l2_clean_comptags_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_l2_clean_comptags_outstanding_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 flush_l2_clean_comptags_outstanding_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 flush_l2_clean_comptags_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_fb_flush_r(void)
+{
+	return 0x00070000;
+}
+static inline u32 flush_fb_flush_pending_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 flush_fb_flush_pending_busy_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 flush_fb_flush_pending_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 flush_fb_flush_outstanding_true_v(void)
+{
+	return 0x00000001;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
new file mode 100644
index 00000000..0d4c0362
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fuse_gp106_h_
+#define _hw_fuse_gp106_h_
+
+static inline u32 fuse_status_opt_tpc_gpc_r(u32 i)
+{
+	return 0x00021c38 + i*4;
+}
+static inline u32 fuse_ctrl_opt_tpc_gpc_r(u32 i)
+{
+	return 0x00021838 + i*4;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_r(void)
+{
+	return 0x00021944;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_data_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_r(void)
+{
+	return 0x00021948;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_yes_f(void)
+{
+	return 0x1;
+}
+static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void)
+{
+	return 0x0;
+}
+static inline u32 fuse_status_opt_fbio_r(void)
+{
+	return 0x00021c14;
+}
+static inline u32 fuse_status_opt_fbio_data_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 fuse_status_opt_fbio_data_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 fuse_status_opt_fbio_data_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 fuse_status_opt_rop_l2_fbp_r(u32 i)
+{
+	return 0x00021d70 + i*4;
+}
+static inline u32 fuse_status_opt_fbp_r(void)
+{
+	return 0x00021d38;
+}
+static inline u32 fuse_status_opt_fbp_idx_v(u32 r, u32 i)
+{
+	return (r >> (0 + i*0)) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_gmmu_gp106.h b/drivers/gpu/nvgpu/gp106/hw_gmmu_gp106.h
new file mode 100644
index 00000000..96ab77df
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_gmmu_gp106.h
@@ -0,0 +1,1261 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_gmmu_gp106_h_
+#define _hw_gmmu_gp106_h_
+
+static inline u32 gmmu_new_pde_is_pte_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_is_pte_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pde_aperture_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_aperture_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
+{
+	return 0x2;
+}
+static inline u32 gmmu_new_pde_aperture_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_pde_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
+static inline u32 gmmu_new_pde_address_sys_f(u32 v)
+{
+	return (v & 0xffffff) << 8;
+}
+static inline u32 gmmu_new_pde_address_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_vol_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pde_vol_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_new_pde_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pde_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_new_pde__size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_new_dual_pde_is_pte_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_dual_pde_is_pte_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_video_memory_f(void)
+{
+	return 0x2;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
+static inline u32 gmmu_new_dual_pde_address_big_sys_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 gmmu_new_dual_pde_address_big_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_w(void)
+{
+	return 2;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_video_memory_f(void)
+{
+	return 0x2;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
+static inline u32 gmmu_new_dual_pde_vol_small_w(void)
+{
+	return 2;
+}
+static inline u32 gmmu_new_dual_pde_vol_small_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_new_dual_pde_vol_small_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_dual_pde_vol_big_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_dual_pde_vol_big_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_new_dual_pde_vol_big_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_dual_pde_address_small_sys_f(u32 v)
+{
+	return (v & 0xffffff) << 8;
+}
+static inline u32 gmmu_new_dual_pde_address_small_sys_w(void)
+{
+	return 2;
+}
+static inline u32 gmmu_new_dual_pde_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_new_dual_pde_address_big_shift_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_new_dual_pde__size_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gmmu_new_pte__size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_new_pte_valid_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_valid_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 gmmu_new_pte_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pte_privilege_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_privilege_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 gmmu_new_pte_privilege_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pte_address_sys_f(u32 v)
+{
+	return (v & 0xffffff) << 8;
+}
+static inline u32 gmmu_new_pte_address_sys_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_vol_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_vol_true_f(void)
+{
+	return 0x8;
+}
+static inline u32 gmmu_new_pte_vol_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pte_aperture_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_aperture_video_memory_f(void)
+{
+	return 0x0;
+}
+static inline u32 gmmu_new_pte_aperture_sys_mem_coh_f(void)
+{
+	return 0x4;
+}
+static inline u32 gmmu_new_pte_aperture_sys_mem_ncoh_f(void)
+{
+	return 0x6;
+}
+static inline u32 gmmu_new_pte_read_only_w(void)
+{
+	return 0;
+}
+static inline u32 gmmu_new_pte_read_only_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 gmmu_new_pte_comptagline_f(u32 v)
+{
+	return (v & 0x3ffff) << 4;
+}
+static inline u32 gmmu_new_pte_comptagline_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_new_pte_kind_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 gmmu_new_pte_kind_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_new_pte_address_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_f(u32 v)
+{
+	return (v & 0xff) << 4;
+}
+static inline u32 gmmu_pte_kind_w(void)
+{
+	return 1;
+}
+static inline u32 gmmu_pte_kind_invalid_v(void)
+{
+	return 0x000000ff;
+}
+static inline u32 gmmu_pte_kind_pitch_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gmmu_pte_kind_z16_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gmmu_pte_kind_z16_2c_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2c_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2c_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2c_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2c_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 gmmu_pte_kind_z16_2z_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2z_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2z_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2z_v(void)
+{
+	return 0x0000000a;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2z_v(void)
+{
+	return 0x0000000b;
+}
+static inline u32 gmmu_pte_kind_z16_2cz_v(void)
+{
+	return 0x00000036;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_2cz_v(void)
+{
+	return 0x00000037;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_2cz_v(void)
+{
+	return 0x00000038;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_2cz_v(void)
+{
+	return 0x00000039;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_2cz_v(void)
+{
+	return 0x0000005f;
+}
+static inline u32 gmmu_pte_kind_z16_4cz_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gmmu_pte_kind_z16_ms2_4cz_v(void)
+{
+	return 0x0000000d;
+}
+static inline u32 gmmu_pte_kind_z16_ms4_4cz_v(void)
+{
+	return 0x0000000e;
+}
+static inline u32 gmmu_pte_kind_z16_ms8_4cz_v(void)
+{
+	return 0x0000000f;
+}
+static inline u32 gmmu_pte_kind_z16_ms16_4cz_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gmmu_pte_kind_s8z24_v(void)
+{
+	return 0x00000011;
+}
+static inline u32 gmmu_pte_kind_s8z24_1z_v(void)
+{
+	return 0x00000012;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_1z_v(void)
+{
+	return 0x00000013;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_1z_v(void)
+{
+	return 0x00000014;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_1z_v(void)
+{
+	return 0x00000015;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_1z_v(void)
+{
+	return 0x00000016;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cz_v(void)
+{
+	return 0x00000017;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cz_v(void)
+{
+	return 0x00000018;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cz_v(void)
+{
+	return 0x00000019;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cz_v(void)
+{
+	return 0x0000001a;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cz_v(void)
+{
+	return 0x0000001b;
+}
+static inline u32 gmmu_pte_kind_s8z24_2cs_v(void)
+{
+	return 0x0000001c;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_2cs_v(void)
+{
+	return 0x0000001d;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_2cs_v(void)
+{
+	return 0x0000001e;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_2cs_v(void)
+{
+	return 0x0000001f;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_2cs_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gmmu_pte_kind_s8z24_4cszv_v(void)
+{
+	return 0x00000021;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms2_4cszv_v(void)
+{
+	return 0x00000022;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms4_4cszv_v(void)
+{
+	return 0x00000023;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms8_4cszv_v(void)
+{
+	return 0x00000024;
+}
+static inline u32 gmmu_pte_kind_s8z24_ms16_4cszv_v(void)
+{
+	return 0x00000025;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_v(void)
+{
+	return 0x00000026;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_v(void)
+{
+	return 0x00000027;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_v(void)
+{
+	return 0x00000028;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_v(void)
+{
+	return 0x00000029;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_1zv_v(void)
+{
+	return 0x0000002e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_1zv_v(void)
+{
+	return 0x0000002f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_1zv_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_1zv_v(void)
+{
+	return 0x00000031;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2cs_v(void)
+{
+	return 0x00000032;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2cs_v(void)
+{
+	return 0x00000033;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2cs_v(void)
+{
+	return 0x00000034;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2cs_v(void)
+{
+	return 0x00000035;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2czv_v(void)
+{
+	return 0x0000003a;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2czv_v(void)
+{
+	return 0x0000003b;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2czv_v(void)
+{
+	return 0x0000003c;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2czv_v(void)
+{
+	return 0x0000003d;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_2zv_v(void)
+{
+	return 0x0000003e;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_2zv_v(void)
+{
+	return 0x0000003f;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_2zv_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_2zv_v(void)
+{
+	return 0x00000041;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc12_4cszv_v(void)
+{
+	return 0x00000042;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms4_vc4_4cszv_v(void)
+{
+	return 0x00000043;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc8_4cszv_v(void)
+{
+	return 0x00000044;
+}
+static inline u32 gmmu_pte_kind_v8z24_ms8_vc24_4cszv_v(void)
+{
+	return 0x00000045;
+}
+static inline u32 gmmu_pte_kind_z24s8_v(void)
+{
+	return 0x00000046;
+}
+static inline u32 gmmu_pte_kind_z24s8_1z_v(void)
+{
+	return 0x00000047;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_1z_v(void)
+{
+	return 0x00000048;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_1z_v(void)
+{
+	return 0x00000049;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_1z_v(void)
+{
+	return 0x0000004a;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_1z_v(void)
+{
+	return 0x0000004b;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cs_v(void)
+{
+	return 0x0000004c;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cs_v(void)
+{
+	return 0x0000004d;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cs_v(void)
+{
+	return 0x0000004e;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cs_v(void)
+{
+	return 0x0000004f;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cs_v(void)
+{
+	return 0x00000050;
+}
+static inline u32 gmmu_pte_kind_z24s8_2cz_v(void)
+{
+	return 0x00000051;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_2cz_v(void)
+{
+	return 0x00000052;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_2cz_v(void)
+{
+	return 0x00000053;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_2cz_v(void)
+{
+	return 0x00000054;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_2cz_v(void)
+{
+	return 0x00000055;
+}
+static inline u32 gmmu_pte_kind_z24s8_4cszv_v(void)
+{
+	return 0x00000056;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms2_4cszv_v(void)
+{
+	return 0x00000057;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms4_4cszv_v(void)
+{
+	return 0x00000058;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms8_4cszv_v(void)
+{
+	return 0x00000059;
+}
+static inline u32 gmmu_pte_kind_z24s8_ms16_4cszv_v(void)
+{
+	return 0x0000005a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_v(void)
+{
+	return 0x0000005b;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_v(void)
+{
+	return 0x0000005c;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_v(void)
+{
+	return 0x0000005d;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_v(void)
+{
+	return 0x0000005e;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_1zv_v(void)
+{
+	return 0x00000063;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_1zv_v(void)
+{
+	return 0x00000064;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_1zv_v(void)
+{
+	return 0x00000065;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_1zv_v(void)
+{
+	return 0x00000066;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2cs_v(void)
+{
+	return 0x00000067;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2cs_v(void)
+{
+	return 0x00000068;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2cs_v(void)
+{
+	return 0x00000069;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2cs_v(void)
+{
+	return 0x0000006a;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2czv_v(void)
+{
+	return 0x0000006f;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2czv_v(void)
+{
+	return 0x00000070;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2czv_v(void)
+{
+	return 0x00000071;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2czv_v(void)
+{
+	return 0x00000072;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_2zv_v(void)
+{
+	return 0x00000073;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_2zv_v(void)
+{
+	return 0x00000074;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_2zv_v(void)
+{
+	return 0x00000075;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_2zv_v(void)
+{
+	return 0x00000076;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc12_4cszv_v(void)
+{
+	return 0x00000077;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms4_vc4_4cszv_v(void)
+{
+	return 0x00000078;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc8_4cszv_v(void)
+{
+	return 0x00000079;
+}
+static inline u32 gmmu_pte_kind_z24v8_ms8_vc24_4cszv_v(void)
+{
+	return 0x0000007a;
+}
+static inline u32 gmmu_pte_kind_zf32_v(void)
+{
+	return 0x0000007b;
+}
+static inline u32 gmmu_pte_kind_zf32_1z_v(void)
+{
+	return 0x0000007c;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_1z_v(void)
+{
+	return 0x0000007d;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_1z_v(void)
+{
+	return 0x0000007e;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_1z_v(void)
+{
+	return 0x0000007f;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_1z_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 gmmu_pte_kind_zf32_2cs_v(void)
+{
+	return 0x00000081;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cs_v(void)
+{
+	return 0x00000082;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cs_v(void)
+{
+	return 0x00000083;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cs_v(void)
+{
+	return 0x00000084;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cs_v(void)
+{
+	return 0x00000085;
+}
+static inline u32 gmmu_pte_kind_zf32_2cz_v(void)
+{
+	return 0x00000086;
+}
+static inline u32 gmmu_pte_kind_zf32_ms2_2cz_v(void)
+{
+	return 0x00000087;
+}
+static inline u32 gmmu_pte_kind_zf32_ms4_2cz_v(void)
+{
+	return 0x00000088;
+}
+static inline u32 gmmu_pte_kind_zf32_ms8_2cz_v(void)
+{
+	return 0x00000089;
+}
+static inline u32 gmmu_pte_kind_zf32_ms16_2cz_v(void)
+{
+	return 0x0000008a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_v(void)
+{
+	return 0x0000008b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_v(void)
+{
+	return 0x0000008c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_v(void)
+{
+	return 0x0000008d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_v(void)
+{
+	return 0x0000008e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1cs_v(void)
+{
+	return 0x0000008f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1cs_v(void)
+{
+	return 0x00000090;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1cs_v(void)
+{
+	return 0x00000091;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1cs_v(void)
+{
+	return 0x00000092;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1zv_v(void)
+{
+	return 0x00000097;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1zv_v(void)
+{
+	return 0x00000098;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1zv_v(void)
+{
+	return 0x00000099;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1zv_v(void)
+{
+	return 0x0000009a;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_1czv_v(void)
+{
+	return 0x0000009b;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_1czv_v(void)
+{
+	return 0x0000009c;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_1czv_v(void)
+{
+	return 0x0000009d;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_1czv_v(void)
+{
+	return 0x0000009e;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cs_v(void)
+{
+	return 0x0000009f;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cs_v(void)
+{
+	return 0x000000a0;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cs_v(void)
+{
+	return 0x000000a1;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cs_v(void)
+{
+	return 0x000000a2;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+	return 0x000000a3;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+	return 0x000000a4;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+	return 0x000000a5;
+}
+static inline u32 gmmu_pte_kind_x8z24_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+	return 0x000000a6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_v(void)
+{
+	return 0x000000a7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_v(void)
+{
+	return 0x000000a8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_v(void)
+{
+	return 0x000000a9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_v(void)
+{
+	return 0x000000aa;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1cs_v(void)
+{
+	return 0x000000ab;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1cs_v(void)
+{
+	return 0x000000ac;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1cs_v(void)
+{
+	return 0x000000ad;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1cs_v(void)
+{
+	return 0x000000ae;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1zv_v(void)
+{
+	return 0x000000b3;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1zv_v(void)
+{
+	return 0x000000b4;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1zv_v(void)
+{
+	return 0x000000b5;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1zv_v(void)
+{
+	return 0x000000b6;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_1czv_v(void)
+{
+	return 0x000000b7;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_1czv_v(void)
+{
+	return 0x000000b8;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_1czv_v(void)
+{
+	return 0x000000b9;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_1czv_v(void)
+{
+	return 0x000000ba;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cs_v(void)
+{
+	return 0x000000bb;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cs_v(void)
+{
+	return 0x000000bc;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cs_v(void)
+{
+	return 0x000000bd;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cs_v(void)
+{
+	return 0x000000be;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc12_2cszv_v(void)
+{
+	return 0x000000bf;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms4_vc4_2cszv_v(void)
+{
+	return 0x000000c0;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc8_2cszv_v(void)
+{
+	return 0x000000c1;
+}
+static inline u32 gmmu_pte_kind_zf32_x16v8s8_ms8_vc24_2cszv_v(void)
+{
+	return 0x000000c2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_v(void)
+{
+	return 0x000000c3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_1cs_v(void)
+{
+	return 0x000000c4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_1cs_v(void)
+{
+	return 0x000000c5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_1cs_v(void)
+{
+	return 0x000000c6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_1cs_v(void)
+{
+	return 0x000000c7;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_1cs_v(void)
+{
+	return 0x000000c8;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cszv_v(void)
+{
+	return 0x000000ce;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cszv_v(void)
+{
+	return 0x000000cf;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cszv_v(void)
+{
+	return 0x000000d0;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cszv_v(void)
+{
+	return 0x000000d1;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cszv_v(void)
+{
+	return 0x000000d2;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_2cs_v(void)
+{
+	return 0x000000d3;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms2_2cs_v(void)
+{
+	return 0x000000d4;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms4_2cs_v(void)
+{
+	return 0x000000d5;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms8_2cs_v(void)
+{
+	return 0x000000d6;
+}
+static inline u32 gmmu_pte_kind_zf32_x24s8_ms16_2cs_v(void)
+{
+	return 0x000000d7;
+}
+static inline u32 gmmu_pte_kind_generic_16bx2_v(void)
+{
+	return 0x000000fe;
+}
+static inline u32 gmmu_pte_kind_c32_2c_v(void)
+{
+	return 0x000000d8;
+}
+static inline u32 gmmu_pte_kind_c32_2cbr_v(void)
+{
+	return 0x000000d9;
+}
+static inline u32 gmmu_pte_kind_c32_2cba_v(void)
+{
+	return 0x000000da;
+}
+static inline u32 gmmu_pte_kind_c32_2cra_v(void)
+{
+	return 0x000000db;
+}
+static inline u32 gmmu_pte_kind_c32_2bra_v(void)
+{
+	return 0x000000dc;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2c_v(void)
+{
+	return 0x000000dd;
+}
+static inline u32 gmmu_pte_kind_c32_ms2_2cbr_v(void)
+{
+	return 0x000000de;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2c_v(void)
+{
+	return 0x000000df;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cbr_v(void)
+{
+	return 0x000000e0;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cba_v(void)
+{
+	return 0x000000e1;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2cra_v(void)
+{
+	return 0x000000e2;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_2bra_v(void)
+{
+	return 0x000000e3;
+}
+static inline u32 gmmu_pte_kind_c32_ms4_4cbra_v(void)
+{
+	return 0x0000002c;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2c_v(void)
+{
+	return 0x000000e4;
+}
+static inline u32 gmmu_pte_kind_c32_ms8_ms16_2cra_v(void)
+{
+	return 0x000000e5;
+}
+static inline u32 gmmu_pte_kind_c64_2c_v(void)
+{
+	return 0x000000e6;
+}
+static inline u32 gmmu_pte_kind_c64_2cbr_v(void)
+{
+	return 0x000000e7;
+}
+static inline u32 gmmu_pte_kind_c64_2cba_v(void)
+{
+	return 0x000000e8;
+}
+static inline u32 gmmu_pte_kind_c64_2cra_v(void)
+{
+	return 0x000000e9;
+}
+static inline u32 gmmu_pte_kind_c64_2bra_v(void)
+{
+	return 0x000000ea;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2c_v(void)
+{
+	return 0x000000eb;
+}
+static inline u32 gmmu_pte_kind_c64_ms2_2cbr_v(void)
+{
+	return 0x000000ec;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2c_v(void)
+{
+	return 0x000000ed;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cbr_v(void)
+{
+	return 0x000000ee;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cba_v(void)
+{
+	return 0x000000ef;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2cra_v(void)
+{
+	return 0x000000f0;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_2bra_v(void)
+{
+	return 0x000000f1;
+}
+static inline u32 gmmu_pte_kind_c64_ms4_4cbra_v(void)
+{
+	return 0x0000002d;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2c_v(void)
+{
+	return 0x000000f2;
+}
+static inline u32 gmmu_pte_kind_c64_ms8_ms16_2cra_v(void)
+{
+	return 0x000000f3;
+}
+static inline u32 gmmu_pte_kind_c128_2c_v(void)
+{
+	return 0x000000f4;
+}
+static inline u32 gmmu_pte_kind_c128_2cr_v(void)
+{
+	return 0x000000f5;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2c_v(void)
+{
+	return 0x000000f6;
+}
+static inline u32 gmmu_pte_kind_c128_ms2_2cr_v(void)
+{
+	return 0x000000f7;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2c_v(void)
+{
+	return 0x000000f8;
+}
+static inline u32 gmmu_pte_kind_c128_ms4_2cr_v(void)
+{
+	return 0x000000f9;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2c_v(void)
+{
+	return 0x000000fa;
+}
+static inline u32 gmmu_pte_kind_c128_ms8_ms16_2cr_v(void)
+{
+	return 0x000000fb;
+}
+static inline u32 gmmu_pte_kind_x8c24_v(void)
+{
+	return 0x000000fc;
+}
+static inline u32 gmmu_pte_kind_pitch_no_swizzle_v(void)
+{
+	return 0x000000fd;
+}
+static inline u32 gmmu_pte_kind_smsked_message_v(void)
+{
+	return 0x000000ca;
+}
+static inline u32 gmmu_pte_kind_smhost_message_v(void)
+{
+	return 0x000000cb;
+}
+static inline u32 gmmu_pte_kind_s8_v(void)
+{
+	return 0x0000002a;
+}
+static inline u32 gmmu_pte_kind_s8_2s_v(void)
+{
+	return 0x0000002b;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h b/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
new file mode 100644
index 00000000..e5e1c527
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
@@ -0,0 +1,4001 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_gr_gp106_h_
+#define _hw_gr_gp106_h_
+
+static inline u32 gr_intr_r(void)
+{
+	return 0x00400100;
+}
+static inline u32 gr_intr_notify_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_intr_notify_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_intr_semaphore_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_intr_semaphore_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_intr_illegal_method_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_intr_illegal_method_reset_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_intr_illegal_notify_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_intr_illegal_notify_reset_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_intr_firmware_method_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 gr_intr_firmware_method_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_intr_firmware_method_reset_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_intr_illegal_class_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_intr_illegal_class_reset_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_intr_fecs_error_pending_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_intr_fecs_error_reset_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_intr_class_error_pending_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_intr_class_error_reset_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_intr_exception_pending_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_intr_exception_reset_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_fecs_intr_r(void)
+{
+	return 0x00400144;
+}
+static inline u32 gr_class_error_r(void)
+{
+	return 0x00400110;
+}
+static inline u32 gr_class_error_code_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_intr_nonstall_r(void)
+{
+	return 0x00400120;
+}
+static inline u32 gr_intr_nonstall_trap_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_intr_en_r(void)
+{
+	return 0x0040013c;
+}
+static inline u32 gr_exception_r(void)
+{
+	return 0x00400108;
+}
+static inline u32 gr_exception_fe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_exception_gpc_m(void)
+{
+	return 0x1 << 24;
+}
+static inline u32 gr_exception_memfmt_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_exception_ds_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 gr_exception1_r(void)
+{
+	return 0x00400118;
+}
+static inline u32 gr_exception1_gpc_0_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_exception2_r(void)
+{
+	return 0x0040011c;
+}
+static inline u32 gr_exception_en_r(void)
+{
+	return 0x00400138;
+}
+static inline u32 gr_exception_en_fe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_exception1_en_r(void)
+{
+	return 0x00400130;
+}
+static inline u32 gr_exception2_en_r(void)
+{
+	return 0x00400134;
+}
+static inline u32 gr_gpfifo_ctl_r(void)
+{
+	return 0x00400500;
+}
+static inline u32 gr_gpfifo_ctl_access_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpfifo_ctl_access_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpfifo_ctl_access_enabled_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpfifo_ctl_semaphore_access_enabled_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_gpfifo_status_r(void)
+{
+	return 0x00400504;
+}
+static inline u32 gr_trapped_addr_r(void)
+{
+	return 0x00400704;
+}
+static inline u32 gr_trapped_addr_mthd_v(u32 r)
+{
+	return (r >> 2) & 0xfff;
+}
+static inline u32 gr_trapped_addr_subch_v(u32 r)
+{
+	return (r >> 16) & 0x7;
+}
+static inline u32 gr_trapped_data_lo_r(void)
+{
+	return 0x00400708;
+}
+static inline u32 gr_trapped_data_hi_r(void)
+{
+	return 0x0040070c;
+}
+static inline u32 gr_status_r(void)
+{
+	return 0x00400700;
+}
+static inline u32 gr_status_fe_method_upper_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_status_fe_method_lower_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 gr_status_fe_method_lower_idle_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_status_fe_gi_v(u32 r)
+{
+	return (r >> 21) & 0x1;
+}
+static inline u32 gr_status_mask_r(void)
+{
+	return 0x00400610;
+}
+static inline u32 gr_status_1_r(void)
+{
+	return 0x00400604;
+}
+static inline u32 gr_status_2_r(void)
+{
+	return 0x00400608;
+}
+static inline u32 gr_engine_status_r(void)
+{
+	return 0x0040060c;
+}
+static inline u32 gr_engine_status_value_busy_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pri_be0_becs_be_exception_r(void)
+{
+	return 0x00410204;
+}
+static inline u32 gr_pri_be0_becs_be_exception_en_r(void)
+{
+	return 0x00410208;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_exception_r(void)
+{
+	return 0x00502c90;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_exception_en_r(void)
+{
+	return 0x00502c94;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_r(void)
+{
+	return 0x00504508;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+{
+	return 0x0050450c;
+}
+static inline u32 gr_activity_0_r(void)
+{
+	return 0x00400380;
+}
+static inline u32 gr_activity_1_r(void)
+{
+	return 0x00400384;
+}
+static inline u32 gr_activity_2_r(void)
+{
+	return 0x00400388;
+}
+static inline u32 gr_activity_4_r(void)
+{
+	return 0x00400390;
+}
+static inline u32 gr_activity_4_gpc0_s(void)
+{
+	return 3;
+}
+static inline u32 gr_activity_4_gpc0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_activity_4_gpc0_m(void)
+{
+	return 0x7 << 0;
+}
+static inline u32 gr_activity_4_gpc0_v(u32 r)
+{
+	return (r >> 0) & 0x7;
+}
+static inline u32 gr_activity_4_gpc0_empty_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_activity_4_gpc0_preempted_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
+{
+	return 0x00501000;
+}
+static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
+{
+	return 0x00419000;
+}
+static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
+{
+	return 0x005046a4;
+}
+static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
+{
+	return 0x00419ea4;
+}
+static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_pri_sked_activity_r(void)
+{
+	return 0x00407054;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity0_r(void)
+{
+	return 0x00502c80;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity1_r(void)
+{
+	return 0x00502c84;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity2_r(void)
+{
+	return 0x00502c88;
+}
+static inline u32 gr_pri_gpc0_gpccs_gpc_activity3_r(void)
+{
+	return 0x00502c8c;
+}
+static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00504500;
+}
+static inline u32 gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00504d00;
+}
+static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00501d00;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
+{
+	return 0x0041ac80;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_1_r(void)
+{
+	return 0x0041ac84;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_2_r(void)
+{
+	return 0x0041ac88;
+}
+static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
+{
+	return 0x0041ac8c;
+}
+static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
+{
+	return 0x0041c500;
+}
+static inline u32 gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r(void)
+{
+	return 0x0041cd00;
+}
+static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
+{
+	return 0x00419d00;
+}
+static inline u32 gr_pri_be0_becs_be_activity0_r(void)
+{
+	return 0x00410200;
+}
+static inline u32 gr_pri_be1_becs_be_activity0_r(void)
+{
+	return 0x00410600;
+}
+static inline u32 gr_pri_bes_becs_be_activity0_r(void)
+{
+	return 0x00408a00;
+}
+static inline u32 gr_pri_ds_mpipe_status_r(void)
+{
+	return 0x00405858;
+}
+static inline u32 gr_pri_fe_go_idle_info_r(void)
+{
+	return 0x00404194;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r(void)
+{
+	return 0x00504238;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(void)
+{
+	return 0x005046b8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r(void)
+{
+	return 0x005044a0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r(void)
+{
+	return 0x005046bc;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r(void)
+{
+	return 0x005046c0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(void)
+{
+	return 0x005044a4;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(u32 r)
+{
+	return (r >> 8) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m(void)
+{
+	return 0xff << 16;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_r(void)
+{
+	return 0x005042c4;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_pri_be0_crop_status1_r(void)
+{
+	return 0x00410134;
+}
+static inline u32 gr_pri_bes_crop_status1_r(void)
+{
+	return 0x00408934;
+}
+static inline u32 gr_pri_be0_zrop_status_r(void)
+{
+	return 0x00410048;
+}
+static inline u32 gr_pri_be0_zrop_status2_r(void)
+{
+	return 0x0041004c;
+}
+static inline u32 gr_pri_bes_zrop_status_r(void)
+{
+	return 0x00408848;
+}
+static inline u32 gr_pri_bes_zrop_status2_r(void)
+{
+	return 0x0040884c;
+}
+static inline u32 gr_pipe_bundle_address_r(void)
+{
+	return 0x00400200;
+}
+static inline u32 gr_pipe_bundle_address_value_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_pipe_bundle_data_r(void)
+{
+	return 0x00400204;
+}
+static inline u32 gr_pipe_bundle_config_r(void)
+{
+	return 0x00400208;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pipe_bundle_config_override_pipe_mode_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fe_hww_esr_r(void)
+{
+	return 0x00404000;
+}
+static inline u32 gr_fe_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_fe_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fe_go_idle_timeout_r(void)
+{
+	return 0x00404154;
+}
+static inline u32 gr_fe_go_idle_timeout_count_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_go_idle_timeout_count_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fe_go_idle_timeout_count_prod_f(void)
+{
+	return 0x1800;
+}
+static inline u32 gr_fe_object_table_r(u32 i)
+{
+	return 0x00404200 + i*4;
+}
+static inline u32 gr_fe_object_table_nvclass_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_fe_tpc_fs_r(void)
+{
+	return 0x004041c4;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_r(void)
+{
+	return 0x00404488;
+}
+static inline u32 gr_pri_mme_shadow_raw_index_write_trigger_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pri_mme_shadow_raw_data_r(void)
+{
+	return 0x0040448c;
+}
+static inline u32 gr_mme_hww_esr_r(void)
+{
+	return 0x00404490;
+}
+static inline u32 gr_mme_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_mme_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_memfmt_hww_esr_r(void)
+{
+	return 0x00404600;
+}
+static inline u32 gr_memfmt_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_memfmt_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_fecs_cpuctl_r(void)
+{
+	return 0x00409100;
+}
+static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_cpuctl_alias_r(void)
+{
+	return 0x00409130;
+}
+static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_fecs_dmactl_r(void)
+{
+	return 0x0040910c;
+}
+static inline u32 gr_fecs_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_fecs_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_fecs_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_fecs_os_r(void)
+{
+	return 0x00409080;
+}
+static inline u32 gr_fecs_idlestate_r(void)
+{
+	return 0x0040904c;
+}
+static inline u32 gr_fecs_mailbox0_r(void)
+{
+	return 0x00409040;
+}
+static inline u32 gr_fecs_mailbox1_r(void)
+{
+	return 0x00409044;
+}
+static inline u32 gr_fecs_irqstat_r(void)
+{
+	return 0x00409008;
+}
+static inline u32 gr_fecs_irqmode_r(void)
+{
+	return 0x0040900c;
+}
+static inline u32 gr_fecs_irqmask_r(void)
+{
+	return 0x00409018;
+}
+static inline u32 gr_fecs_irqdest_r(void)
+{
+	return 0x0040901c;
+}
+static inline u32 gr_fecs_curctx_r(void)
+{
+	return 0x00409050;
+}
+static inline u32 gr_fecs_nxtctx_r(void)
+{
+	return 0x00409054;
+}
+static inline u32 gr_fecs_engctl_r(void)
+{
+	return 0x004090a4;
+}
+static inline u32 gr_fecs_debug1_r(void)
+{
+	return 0x00409090;
+}
+static inline u32 gr_fecs_debuginfo_r(void)
+{
+	return 0x00409094;
+}
+static inline u32 gr_fecs_icd_cmd_r(void)
+{
+	return 0x00409200;
+}
+static inline u32 gr_fecs_icd_cmd_opc_s(void)
+{
+	return 4;
+}
+static inline u32 gr_fecs_icd_cmd_opc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 gr_fecs_icd_cmd_opc_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rreg_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_fecs_icd_cmd_opc_rstat_f(void)
+{
+	return 0xe;
+}
+static inline u32 gr_fecs_icd_cmd_idx_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 gr_fecs_icd_rdata_r(void)
+{
+	return 0x0040920c;
+}
+static inline u32 gr_fecs_imemc_r(u32 i)
+{
+	return 0x00409180 + i*16;
+}
+static inline u32 gr_fecs_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_imemd_r(u32 i)
+{
+	return 0x00409184 + i*16;
+}
+static inline u32 gr_fecs_imemt_r(u32 i)
+{
+	return 0x00409188 + i*16;
+}
+static inline u32 gr_fecs_imemt_tag_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_fecs_dmemc_r(u32 i)
+{
+	return 0x004091c0 + i*8;
+}
+static inline u32 gr_fecs_dmemc_offs_s(void)
+{
+	return 6;
+}
+static inline u32 gr_fecs_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_m(void)
+{
+	return 0x3f << 2;
+}
+static inline u32 gr_fecs_dmemc_offs_v(u32 r)
+{
+	return (r >> 2) & 0x3f;
+}
+static inline u32 gr_fecs_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_fecs_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_fecs_dmemd_r(u32 i)
+{
+	return 0x004091c4 + i*8;
+}
+static inline u32 gr_fecs_dmatrfbase_r(void)
+{
+	return 0x00409110;
+}
+static inline u32 gr_fecs_dmatrfmoffs_r(void)
+{
+	return 0x00409114;
+}
+static inline u32 gr_fecs_dmatrffboffs_r(void)
+{
+	return 0x0040911c;
+}
+static inline u32 gr_fecs_dmatrfcmd_r(void)
+{
+	return 0x00409118;
+}
+static inline u32 gr_fecs_dmatrfcmd_imem_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 gr_fecs_dmatrfcmd_write_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 gr_fecs_dmatrfcmd_size_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_fecs_dmatrfcmd_ctxdma_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_fecs_bootvec_r(void)
+{
+	return 0x00409104;
+}
+static inline u32 gr_fecs_bootvec_vec_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_falcon_hwcfg_r(void)
+{
+	return 0x00409108;
+}
+static inline u32 gr_gpcs_gpccs_falcon_hwcfg_r(void)
+{
+	return 0x0041a108;
+}
+static inline u32 gr_fecs_falcon_rm_r(void)
+{
+	return 0x00409084;
+}
+static inline u32 gr_fecs_current_ctx_r(void)
+{
+	return 0x00409b00;
+}
+static inline u32 gr_fecs_current_ctx_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_current_ctx_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_current_ctx_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_current_ctx_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_current_ctx_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_current_ctx_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_current_ctx_target_sys_mem_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_fecs_current_ctx_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 gr_fecs_current_ctx_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_current_ctx_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_fecs_current_ctx_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_current_ctx_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_method_data_r(void)
+{
+	return 0x00409500;
+}
+static inline u32 gr_fecs_method_push_r(void)
+{
+	return 0x00409504;
+}
+static inline u32 gr_fecs_method_push_adr_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 gr_fecs_method_push_adr_bind_pointer_f(void)
+{
+	return 0x3;
+}
+static inline u32 gr_fecs_method_push_adr_discover_image_size_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_fecs_method_push_adr_wfi_golden_save_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 gr_fecs_method_push_adr_restore_golden_v(void)
+{
+	return 0x00000015;
+}
+static inline u32 gr_fecs_method_push_adr_discover_zcull_image_size_v(void)
+{
+	return 0x00000016;
+}
+static inline u32 gr_fecs_method_push_adr_discover_pm_image_size_v(void)
+{
+	return 0x00000025;
+}
+static inline u32 gr_fecs_method_push_adr_discover_reglist_image_size_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_bind_instance_v(void)
+{
+	return 0x00000031;
+}
+static inline u32 gr_fecs_method_push_adr_set_reglist_virtual_address_v(void)
+{
+	return 0x00000032;
+}
+static inline u32 gr_fecs_method_push_adr_stop_ctxsw_v(void)
+{
+	return 0x00000038;
+}
+static inline u32 gr_fecs_method_push_adr_start_ctxsw_v(void)
+{
+	return 0x00000039;
+}
+static inline u32 gr_fecs_method_push_adr_set_watchdog_timeout_f(void)
+{
+	return 0x21;
+}
+static inline u32 gr_fecs_method_push_adr_discover_preemption_image_size_v(void)
+{
+	return 0x0000001a;
+}
+static inline u32 gr_fecs_method_push_adr_halt_pipeline_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_fecs_host_int_status_r(void)
+{
+	return 0x00409c18;
+}
+static inline u32 gr_fecs_host_int_status_fault_during_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 gr_fecs_host_int_status_umimp_firmware_method_f(u32 v)
+{
+	return (v & 0x1) << 17;
+}
+static inline u32 gr_fecs_host_int_status_umimp_illegal_method_f(u32 v)
+{
+	return (v & 0x1) << 18;
+}
+static inline u32 gr_fecs_host_int_clear_r(void)
+{
+	return 0x00409c20;
+}
+static inline u32 gr_fecs_host_int_enable_r(void)
+{
+	return 0x00409c24;
+}
+static inline u32 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_firmware_method_enable_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_fecs_host_int_enable_umimp_illegal_method_enable_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_fecs_host_int_enable_watchdog_enable_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_r(void)
+{
+	return 0x00409614;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_f(u32 v)
+{
+	return (v & 0x1) << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_m(void)
+{
+	return 0x1 << 10;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_v(u32 r)
+{
+	return (r >> 10) & 0x1;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_fecs_ctx_state_store_major_rev_id_r(void)
+{
+	return 0x0040960c;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_r(u32 i)
+{
+	return 0x00409800 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox__size_1_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_pass_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_value_fail_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_r(u32 i)
+{
+	return 0x004098c0 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_set_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_r(u32 i)
+{
+	return 0x00409840 + i*4;
+}
+static inline u32 gr_fecs_ctxsw_mailbox_clear_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fecs_fs_r(void)
+{
+	return 0x00409604;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_m(void)
+{
+	return 0x1f << 0;
+}
+static inline u32 gr_fecs_fs_num_available_gpcs_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_m(void)
+{
+	return 0x1f << 16;
+}
+static inline u32 gr_fecs_fs_num_available_fbps_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
+static inline u32 gr_fecs_cfg_r(void)
+{
+	return 0x00409620;
+}
+static inline u32 gr_fecs_cfg_imem_sz_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_fecs_rc_lanes_r(void)
+{
+	return 0x00409880;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_s(void)
+{
+	return 6;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_fecs_rc_lanes_num_chains_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_fecs_ctxsw_status_1_r(void)
+{
+	return 0x00409400;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_f(u32 v)
+{
+	return (v & 0x1) << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_m(void)
+{
+	return 0x1 << 12;
+}
+static inline u32 gr_fecs_ctxsw_status_1_arb_busy_v(u32 r)
+{
+	return (r >> 12) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_adr_r(void)
+{
+	return 0x00409a24;
+}
+static inline u32 gr_fecs_new_ctx_r(void)
+{
+	return 0x00409b04;
+}
+static inline u32 gr_fecs_new_ctx_ptr_s(void)
+{
+	return 28;
+}
+static inline u32 gr_fecs_new_ctx_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_m(void)
+{
+	return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_new_ctx_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_new_ctx_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_new_ctx_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_new_ctx_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_new_ctx_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_fecs_new_ctx_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_fecs_new_ctx_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_r(void)
+{
+	return 0x00409a0c;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_s(void)
+{
+	return 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_m(void)
+{
+	return 0xfffffff << 0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_ptr_v(u32 r)
+{
+	return (r >> 0) & 0xfffffff;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_s(void)
+{
+	return 2;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_r(void)
+{
+	return 0x00409a10;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_s(void)
+{
+	return 5;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_m(void)
+{
+	return 0x1f << 0;
+}
+static inline u32 gr_fecs_arb_ctx_cmd_cmd_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_fecs_ctxsw_status_fe_0_r(void)
+{
+	return 0x00409c00;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_status_gpc_0_r(void)
+{
+	return 0x00502c04;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
+{
+	return 0x00502400;
+}
+static inline u32 gr_fecs_ctxsw_idlestate_r(void)
+{
+	return 0x00409420;
+}
+static inline u32 gr_fecs_feature_override_ecc_r(void)
+{
+	return 0x00409658;
+}
+static inline u32 gr_gpc0_gpccs_ctxsw_idlestate_r(void)
+{
+	return 0x00502420;
+}
+static inline u32 gr_rstr2d_gpc_map0_r(void)
+{
+	return 0x0040780c;
+}
+static inline u32 gr_rstr2d_gpc_map1_r(void)
+{
+	return 0x00407810;
+}
+static inline u32 gr_rstr2d_gpc_map2_r(void)
+{
+	return 0x00407814;
+}
+static inline u32 gr_rstr2d_gpc_map3_r(void)
+{
+	return 0x00407818;
+}
+static inline u32 gr_rstr2d_gpc_map4_r(void)
+{
+	return 0x0040781c;
+}
+static inline u32 gr_rstr2d_gpc_map5_r(void)
+{
+	return 0x00407820;
+}
+static inline u32 gr_rstr2d_map_table_cfg_r(void)
+{
+	return 0x004078bc;
+}
+static inline u32 gr_rstr2d_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_rstr2d_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_hww_esr_r(void)
+{
+	return 0x00406018;
+}
+static inline u32 gr_pd_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_pd_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_r(u32 i)
+{
+	return 0x00406028 + i*4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count0_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count1_f(u32 v)
+{
+	return (v & 0xf) << 4;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count2_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count3_f(u32 v)
+{
+	return (v & 0xf) << 12;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count4_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count5_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count6_f(u32 v)
+{
+	return (v & 0xf) << 24;
+}
+static inline u32 gr_pd_num_tpc_per_gpc_count7_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+static inline u32 gr_pd_ab_dist_cfg0_r(void)
+{
+	return 0x004064c0;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_en_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_pd_ab_dist_cfg0_timeslice_enable_dis_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_pd_ab_dist_cfg1_r(void)
+{
+	return 0x004064c4;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void)
+{
+	return 0xffff;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_f(u32 v)
+{
+	return (v & 0xffff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg1_max_output_granularity_v(void)
+{
+	return 0x00000080;
+}
+static inline u32 gr_pd_ab_dist_cfg2_r(void)
+{
+	return 0x004064c8;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_f(u32 v)
+{
+	return (v & 0x1fff) << 0;
+}
+static inline u32 gr_pd_ab_dist_cfg2_token_limit_init_v(void)
+{
+	return 0x00000900;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_f(u32 v)
+{
+	return (v & 0x1fff) << 16;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(void)
+{
+	return 0x00000900;
+}
+static inline u32 gr_pd_dist_skip_table_r(u32 i)
+{
+	return 0x004064d0 + i*4;
+}
+static inline u32 gr_pd_dist_skip_table__size_1_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n0_mask_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n1_mask_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n2_mask_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+static inline u32 gr_pd_dist_skip_table_gpc_4n3_mask_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 gr_ds_debug_r(void)
+{
+	return 0x00405800;
+}
+static inline u32 gr_ds_debug_timeslice_mode_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_debug_timeslice_mode_enable_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 gr_ds_zbc_color_r_r(void)
+{
+	return 0x00405804;
+}
+static inline u32 gr_ds_zbc_color_r_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_g_r(void)
+{
+	return 0x00405808;
+}
+static inline u32 gr_ds_zbc_color_g_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_b_r(void)
+{
+	return 0x0040580c;
+}
+static inline u32 gr_ds_zbc_color_b_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_a_r(void)
+{
+	return 0x00405810;
+}
+static inline u32 gr_ds_zbc_color_a_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_r(void)
+{
+	return 0x00405814;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_f(u32 v)
+{
+	return (v & 0x7f) << 0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_zero_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_unorm_one_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_ds_zbc_color_fmt_val_a8_b8_g8_r8_v(void)
+{
+	return 0x00000028;
+}
+static inline u32 gr_ds_zbc_z_r(void)
+{
+	return 0x00405818;
+}
+static inline u32 gr_ds_zbc_z_val_s(void)
+{
+	return 32;
+}
+static inline u32 gr_ds_zbc_z_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_ds_zbc_z_val_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 gr_ds_zbc_z_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_ds_zbc_z_val__init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_ds_zbc_z_val__init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_r(void)
+{
+	return 0x0040581c;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_invalid_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_z_fmt_val_fp32_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_zbc_tbl_index_r(void)
+{
+	return 0x00405820;
+}
+static inline u32 gr_ds_zbc_tbl_index_val_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_r(void)
+{
+	return 0x00405824;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_c_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_select_z_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_zbc_tbl_ld_action_write_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_ds_zbc_tbl_ld_trigger_active_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ds_tga_constraintlogic_beta_r(void)
+{
+	return 0x00405830;
+}
+static inline u32 gr_ds_tga_constraintlogic_beta_cbsize_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_ds_tga_constraintlogic_alpha_r(void)
+{
+	return 0x0040585c;
+}
+static inline u32 gr_ds_tga_constraintlogic_alpha_cbsize_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_ds_hww_esr_r(void)
+{
+	return 0x00405840;
+}
+static inline u32 gr_ds_hww_esr_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_ds_hww_esr_reset_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_ds_hww_esr_reset_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_ds_hww_esr_reset_task_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_hww_esr_reset_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_ds_hww_esr_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_ds_hww_esr_2_r(void)
+{
+	return 0x00405848;
+}
+static inline u32 gr_ds_hww_esr_2_reset_s(void)
+{
+	return 1;
+}
+static inline u32 gr_ds_hww_esr_2_reset_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 gr_ds_hww_esr_2_reset_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_ds_hww_esr_2_reset_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_ds_hww_esr_2_reset_task_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_ds_hww_esr_2_reset_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_ds_hww_esr_2_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_ds_hww_report_mask_r(void)
+{
+	return 0x00405844;
+}
+static inline u32 gr_ds_hww_report_mask_sph0_err_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_hww_report_mask_sph1_err_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_ds_hww_report_mask_sph2_err_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ds_hww_report_mask_sph3_err_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_ds_hww_report_mask_sph4_err_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_ds_hww_report_mask_sph5_err_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_ds_hww_report_mask_sph6_err_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_ds_hww_report_mask_sph7_err_report_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_ds_hww_report_mask_sph8_err_report_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_ds_hww_report_mask_sph9_err_report_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_ds_hww_report_mask_sph10_err_report_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_ds_hww_report_mask_sph11_err_report_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_ds_hww_report_mask_sph12_err_report_f(void)
+{
+	return 0x1000;
+}
+static inline u32 gr_ds_hww_report_mask_sph13_err_report_f(void)
+{
+	return 0x2000;
+}
+static inline u32 gr_ds_hww_report_mask_sph14_err_report_f(void)
+{
+	return 0x4000;
+}
+static inline u32 gr_ds_hww_report_mask_sph15_err_report_f(void)
+{
+	return 0x8000;
+}
+static inline u32 gr_ds_hww_report_mask_sph16_err_report_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_ds_hww_report_mask_sph17_err_report_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_ds_hww_report_mask_sph18_err_report_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_ds_hww_report_mask_sph19_err_report_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_ds_hww_report_mask_sph20_err_report_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_ds_hww_report_mask_sph21_err_report_f(void)
+{
+	return 0x200000;
+}
+static inline u32 gr_ds_hww_report_mask_sph22_err_report_f(void)
+{
+	return 0x400000;
+}
+static inline u32 gr_ds_hww_report_mask_sph23_err_report_f(void)
+{
+	return 0x800000;
+}
+static inline u32 gr_ds_hww_report_mask_2_r(void)
+{
+	return 0x0040584c;
+}
+static inline u32 gr_ds_hww_report_mask_2_sph24_err_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_ds_num_tpc_per_gpc_r(u32 i)
+{
+	return 0x00405870 + i*4;
+}
+static inline u32 gr_scc_bundle_cb_base_r(void)
+{
+	return 0x00408004;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_base_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_scc_bundle_cb_size_r(void)
+{
+	return 0x00408008;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_f(u32 v)
+{
+	return (v & 0x7ff) << 0;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b__prod_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gr_scc_bundle_cb_size_div_256b_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_scc_bundle_cb_size_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_scc_pagepool_base_r(void)
+{
+	return 0x0040800c;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_scc_pagepool_base_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_scc_pagepool_r(void)
+{
+	return 0x00408010;
+}
+static inline u32 gr_scc_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_scc_pagepool_total_pages_hwmax_value_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 gr_scc_pagepool_total_pages_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_s(void)
+{
+	return 10;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_f(u32 v)
+{
+	return (v & 0x3ff) << 10;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_m(void)
+{
+	return 0x3ff << 10;
+}
+static inline u32 gr_scc_pagepool_max_valid_pages_v(u32 r)
+{
+	return (r >> 10) & 0x3ff;
+}
+static inline u32 gr_scc_pagepool_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_scc_init_r(void)
+{
+	return 0x0040802c;
+}
+static inline u32 gr_scc_init_ram_trigger_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_scc_hww_esr_r(void)
+{
+	return 0x00408030;
+}
+static inline u32 gr_scc_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_scc_hww_esr_en_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_sked_hww_esr_r(void)
+{
+	return 0x00407020;
+}
+static inline u32 gr_sked_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_cwd_fs_r(void)
+{
+	return 0x00405b00;
+}
+static inline u32 gr_cwd_fs_num_gpcs_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_cwd_fs_num_tpcs_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_cwd_gpc_tpc_id_r(u32 i)
+{
+	return 0x00405b60 + i*4;
+}
+static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_cwd_sm_id_r(u32 i)
+{
+	return 0x00405ba0 + i*4;
+}
+static inline u32 gr_cwd_sm_id_tpc0_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_cwd_sm_id_tpc1_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpc0_fs_gpc_r(void)
+{
+	return 0x00502608;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_tpcs_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 gr_gpc0_fs_gpc_num_available_zculls_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
+static inline u32 gr_gpc0_cfg_r(void)
+{
+	return 0x00502620;
+}
+static inline u32 gr_gpc0_cfg_imem_sz_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpccs_rc_lanes_r(void)
+{
+	return 0x00502880;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_gpccs_rc_lanes_num_chains_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpccs_rc_lane_size_r(void)
+{
+	return 0x00502910;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_s(void)
+{
+	return 24;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_m(void)
+{
+	return 0xffffff << 0;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_v(u32 r)
+{
+	return (r >> 0) & 0xffffff;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_rc_lane_size_v_0_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_zcull_fs_r(void)
+{
+	return 0x00500910;
+}
+static inline u32 gr_gpc0_zcull_fs_num_sms_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 gr_gpc0_zcull_fs_num_active_banks_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_r(void)
+{
+	return 0x00500914;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_gpc0_zcull_ram_addr_row_offset_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_r(void)
+{
+	return 0x00500918;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_sm_num_rcp_conservative__max_v(void)
+{
+	return 0x00800000;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_r(void)
+{
+	return 0x00500920;
+}
+static inline u32 gr_gpc0_zcull_total_ram_size_num_aliquots_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_zcull_zcsize_r(u32 i)
+{
+	return 0x00500a04 + i*32;
+}
+static inline u32 gr_gpc0_zcull_zcsize_height_subregion__multiple_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 gr_gpc0_zcull_zcsize_width_subregion__multiple_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_r(u32 i)
+{
+	return 0x00500c10 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_sm_id_id_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(u32 i)
+{
+	return 0x00500c30 + i*4;
+}
+static inline u32 gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_r(void)
+{
+	return 0x00504088;
+}
+static inline u32 gr_gpc0_tpc0_pe_cfg_smid_value_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_r(void)
+{
+	return 0x00504698;
+}
+static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xfff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r)
+{
+	return (r >> 20) & 0xfff;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
+{
+	return 0x00503018;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_r(void)
+{
+	return 0x005030c0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(void)
+{
+	return 0x3fffff << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(void)
+{
+	return 0x00000320;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(void)
+{
+	return 0x00000ba8;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_cb_offset_r(void)
+{
+	return 0x005030f4;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_r(void)
+{
+	return 0x005030e4;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(void)
+{
+	return 0x00000800;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_gpc0_ppc0_cbm_alpha_cb_offset_r(void)
+{
+	return 0x005030f8;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r(void)
+{
+	return 0x005030f0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_default_v(void)
+{
+	return 0x00000320;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_0_r(void)
+{
+	return 0x00419b00;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_r(void)
+{
+	return 0x00419b04;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_s(void)
+{
+	return 21;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(u32 v)
+{
+	return (v & 0x1fffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_m(void)
+{
+	return 0x1fffff << 0;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_v(u32 r)
+{
+	return (r >> 0) & 0x1fffff;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpccs_falcon_addr_r(void)
+{
+	return 0x0041a0ac;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_f(u32 v)
+{
+	return (v & 0x3f) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_lsb_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_s(void)
+{
+	return 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_f(u32 v)
+{
+	return (v & 0x3f) << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_m(void)
+{
+	return 0x3f << 6;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_v(u32 r)
+{
+	return (r >> 6) & 0x3f;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpccs_falcon_addr_msb_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_s(void)
+{
+	return 12;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_m(void)
+{
+	return 0xfff << 0;
+}
+static inline u32 gr_gpccs_falcon_addr_ext_v(u32 r)
+{
+	return (r >> 0) & 0xfff;
+}
+static inline u32 gr_gpccs_cpuctl_r(void)
+{
+	return 0x0041a100;
+}
+static inline u32 gr_gpccs_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 gr_gpccs_dmactl_r(void)
+{
+	return 0x0041a10c;
+}
+static inline u32 gr_gpccs_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpccs_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpccs_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpccs_imemc_r(u32 i)
+{
+	return 0x0041a180 + i*16;
+}
+static inline u32 gr_gpccs_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_imemd_r(u32 i)
+{
+	return 0x0041a184 + i*16;
+}
+static inline u32 gr_gpccs_imemt_r(u32 i)
+{
+	return 0x0041a188 + i*16;
+}
+static inline u32 gr_gpccs_imemt__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 gr_gpccs_imemt_tag_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpccs_dmemc_r(u32 i)
+{
+	return 0x0041a1c0 + i*8;
+}
+static inline u32 gr_gpccs_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 gr_gpccs_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpccs_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 gr_gpccs_dmemd_r(u32 i)
+{
+	return 0x0041a1c4 + i*8;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_r(u32 i)
+{
+	return 0x0041a800 + i*4;
+}
+static inline u32 gr_gpccs_ctxsw_mailbox_value_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_r(void)
+{
+	return 0x00418e24;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_s(void)
+{
+	return 32;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_base_addr_39_8_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_r(void)
+{
+	return 0x00418e28;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_s(void)
+{
+	return 11;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_f(u32 v)
+{
+	return (v & 0x7ff) << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_m(void)
+{
+	return 0x7ff << 0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_v(u32 r)
+{
+	return (r >> 0) & 0x7ff;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_init_v(void)
+{
+	return 0x00000030;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_div_256b_init_f(void)
+{
+	return 0x30;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_s(void)
+{
+	return 1;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_swdx_bundle_cb_size_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_r(void)
+{
+	return 0x005001dc;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v(void)
+{
+	return 0x00000de0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_r(void)
+{
+	return 0x005001d8;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_r(void)
+{
+	return 0x004181e4;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(void)
+{
+	return 0x00000100;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(void)
+{
+	return 0x0041befc;
+}
+static inline u32 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_r(u32 i)
+{
+	return 0x00418ea0 + i*4;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_f(u32 v)
+{
+	return (v & 0x3fffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_tc_beta_cb_size_v_m(void)
+{
+	return 0x3fffff << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_r_r(u32 i)
+{
+	return 0x00418010 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_r_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_g_r(u32 i)
+{
+	return 0x0041804c + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_g_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_b_r(u32 i)
+{
+	return 0x00418088 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_b_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_a_r(u32 i)
+{
+	return 0x004180c4 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_color_a_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_c_01_to_04_format_r(void)
+{
+	return 0x00500100;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_r(u32 i)
+{
+	return 0x00418110 + i*4;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_swdx_dss_zbc_z_01_to_04_format_r(void)
+{
+	return 0x0050014c;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_r(void)
+{
+	return 0x00418810;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 gr_gpcs_setup_attrib_cb_base_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_crstr_gpc_map0_r(void)
+{
+	return 0x00418b08;
+}
+static inline u32 gr_crstr_gpc_map0_tile0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map0_tile1_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map0_tile2_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map0_tile3_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map0_tile4_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map0_tile5_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map1_r(void)
+{
+	return 0x00418b0c;
+}
+static inline u32 gr_crstr_gpc_map1_tile6_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map1_tile7_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map1_tile8_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map1_tile9_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map1_tile10_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map1_tile11_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map2_r(void)
+{
+	return 0x00418b10;
+}
+static inline u32 gr_crstr_gpc_map2_tile12_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map2_tile13_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map2_tile14_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map2_tile15_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map2_tile16_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map2_tile17_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map3_r(void)
+{
+	return 0x00418b14;
+}
+static inline u32 gr_crstr_gpc_map3_tile18_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map3_tile19_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map3_tile20_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map3_tile21_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map3_tile22_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map3_tile23_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map4_r(void)
+{
+	return 0x00418b18;
+}
+static inline u32 gr_crstr_gpc_map4_tile24_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map4_tile25_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map4_tile26_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map4_tile27_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map4_tile28_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map4_tile29_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_gpc_map5_r(void)
+{
+	return 0x00418b1c;
+}
+static inline u32 gr_crstr_gpc_map5_tile30_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_crstr_gpc_map5_tile31_f(u32 v)
+{
+	return (v & 0x7) << 5;
+}
+static inline u32 gr_crstr_gpc_map5_tile32_f(u32 v)
+{
+	return (v & 0x7) << 10;
+}
+static inline u32 gr_crstr_gpc_map5_tile33_f(u32 v)
+{
+	return (v & 0x7) << 15;
+}
+static inline u32 gr_crstr_gpc_map5_tile34_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_crstr_gpc_map5_tile35_f(u32 v)
+{
+	return (v & 0x7) << 25;
+}
+static inline u32 gr_crstr_map_table_cfg_r(void)
+{
+	return 0x00418bb8;
+}
+static inline u32 gr_crstr_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_crstr_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_r(void)
+{
+	return 0x00418980;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_r(void)
+{
+	return 0x00418984;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_r(void)
+{
+	return 0x00418988;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_s(void)
+{
+	return 3;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_m(void)
+{
+	return 0x7 << 28;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_v(u32 r)
+{
+	return (r >> 28) & 0x7;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_r(void)
+{
+	return 0x0041898c;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(u32 v)
+{
+	return (v & 0x7) << 24;
+}
+static inline u32 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(u32 v)
+{
+	return (v & 0x7) << 28;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_r(void)
+{
+	return 0x00418c6c;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_r(void)
+{
+	return 0x00419004;
+}
+static inline u32 gr_gpcs_gcc_pagepool_base_addr_39_8_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_gpcs_gcc_pagepool_r(void)
+{
+	return 0x00419008;
+}
+static inline u32 gr_gpcs_gcc_pagepool_total_pages_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_r(void)
+{
+	return 0x0041980c;
+}
+static inline u32 gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(void)
+{
+	return 0x00419848;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_r(void)
+{
+	return 0x00419c00;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(void)
+{
+	return 0x00419c2c;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_f(u32 v)
+{
+	return (v & 0x1) << 28;
+}
+static inline u32 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(void)
+{
+	return 0x00419e44;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f(void)
+{
+	return 0x80;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f(void)
+{
+	return 0x200;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f(void)
+{
+	return 0x400;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f(void)
+{
+	return 0x800;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f(void)
+{
+	return 0x1000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f(void)
+{
+	return 0x2000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f(void)
+{
+	return 0x4000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f(void)
+{
+	return 0x8000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f(void)
+{
+	return 0x10000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f(void)
+{
+	return 0x20000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f(void)
+{
+	return 0x40000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f(void)
+{
+	return 0x800000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f(void)
+{
+	return 0x400000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f(void)
+{
+	return 0x80000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f(void)
+{
+	return 0x100000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(void)
+{
+	return 0x00419e4c;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f(void)
+{
+	return 0x8;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_sec_error_report_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_ded_error_report_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_r(void)
+{
+	return 0x00419d0c;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
+{
+	return 0x0050450c;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
+{
+	return 0x0041ac94;
+}
+static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
+{
+	return 0x00502c90;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
+{
+	return 0x00504508;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_r(void)
+{
+	return 0x00504610;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
+{
+	return 0x00504614;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
+{
+	return 0x00504624;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
+{
+	return 0x00504634;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void)
+{
+	return 0x00419e24;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_stop_on_any_warp_disable_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_stop_on_any_sm_disable_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
+{
+	return 0x0050460c;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_r(void)
+{
+	return 0x00419e50;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_r(void)
+{
+	return 0x00504650;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_sec_error_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_ded_error_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
+{
+	return 0x00504224;
+}
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
+{
+	return 0x00504648;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
+{
+	return 0x00504770;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_r(void)
+{
+	return 0x00419f70;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 gr_gpc0_tpc0_sm_debug_sfe_control_r(void)
+{
+	return 0x0050477c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_r(void)
+{
+	return 0x00419f7c;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_r(void)
+{
+	return 0x0041be08;
+}
+static inline u32 gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map0_r(void)
+{
+	return 0x0041bf00;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map1_r(void)
+{
+	return 0x0041bf04;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map2_r(void)
+{
+	return 0x0041bf08;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map3_r(void)
+{
+	return 0x0041bf0c;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map4_r(void)
+{
+	return 0x0041bf10;
+}
+static inline u32 gr_ppcs_wwdx_map_gpc_map5_r(void)
+{
+	return 0x0041bf14;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_r(void)
+{
+	return 0x0041bfd0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_row_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_num_entries_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(u32 v)
+{
+	return (v & 0x7) << 21;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 24;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_r(void)
+{
+	return 0x0041bfd4;
+}
+static inline u32 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_r(void)
+{
+	return 0x0041bfe4;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 0;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 5;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 10;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 15;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 20;
+}
+static inline u32 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(u32 v)
+{
+	return (v & 0x1f) << 25;
+}
+static inline u32 gr_bes_zrop_settings_r(void)
+{
+	return 0x00408850;
+}
+static inline u32 gr_bes_zrop_settings_num_active_ltcs_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_be0_crop_debug3_r(void)
+{
+	return 0x00410108;
+}
+static inline u32 gr_bes_crop_debug3_r(void)
+{
+	return 0x00408908;
+}
+static inline u32 gr_bes_crop_debug3_comp_vdc_4to2_disable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_bes_crop_settings_r(void)
+{
+	return 0x00408958;
+}
+static inline u32 gr_bes_crop_settings_num_active_ltcs_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 gr_zcull_bytes_per_aliquot_per_gpu_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_header_bytes_per_gpc_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(void)
+{
+	return 0x000000c0;
+}
+static inline u32 gr_zcull_subregion_qty_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel0_r(void)
+{
+	return 0x00504604;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control_sel1_r(void)
+{
+	return 0x00504608;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(void)
+{
+	return 0x0050465c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(void)
+{
+	return 0x00504660;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(void)
+{
+	return 0x00504664;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(void)
+{
+	return 0x00504668;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(void)
+{
+	return 0x0050466c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(void)
+{
+	return 0x00504658;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_control_r(void)
+{
+	return 0x00504730;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_control_r(void)
+{
+	return 0x00504734;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_control_r(void)
+{
+	return 0x00504738;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_control_r(void)
+{
+	return 0x0050473c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter4_control_r(void)
+{
+	return 0x00504740;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_control_r(void)
+{
+	return 0x00504744;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_control_r(void)
+{
+	return 0x00504748;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_control_r(void)
+{
+	return 0x0050474c;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_s1_r(void)
+{
+	return 0x00504678;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status1_r(void)
+{
+	return 0x00504694;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_s0_r(void)
+{
+	return 0x005046f0;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_s1_r(void)
+{
+	return 0x00504700;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_s0_r(void)
+{
+	return 0x005046f4;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_s1_r(void)
+{
+	return 0x00504704;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_s0_r(void)
+{
+	return 0x005046f8;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_s1_r(void)
+{
+	return 0x00504708;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_s0_r(void)
+{
+	return 0x005046fc;
+}
+static inline u32 gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_s1_r(void)
+{
+	return 0x0050470c;
+}
+static inline u32 gr_fe_pwr_mode_r(void)
+{
+	return 0x00404170;
+}
+static inline u32 gr_fe_pwr_mode_mode_auto_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fe_pwr_mode_mode_force_on_f(void)
+{
+	return 0x2;
+}
+static inline u32 gr_fe_pwr_mode_req_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 gr_fe_pwr_mode_req_send_f(void)
+{
+	return 0x10;
+}
+static inline u32 gr_fe_pwr_mode_req_done_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_r(void)
+{
+	return 0x00418880;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_vm_pg_size_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m(void)
+{
+	return 0x1 << 11;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_vol_fault_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_comp_fault_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_miss_gran_m(void)
+{
+	return 0x3 << 3;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_cache_mode_m(void)
+{
+	return 0x3 << 5;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m(void)
+{
+	return 0x3 << 28;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_vol_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
+{
+	return 0x00418890;
+}
+static inline u32 gr_gpcs_pri_mmu_pm_req_mask_r(void)
+{
+	return 0x00418894;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_r(void)
+{
+	return 0x004188b0;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_debug_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_wr_r(void)
+{
+	return 0x004188b4;
+}
+static inline u32 gr_gpcs_pri_mmu_debug_rd_r(void)
+{
+	return 0x004188b8;
+}
+static inline u32 gr_gpcs_mmu_num_active_ltcs_r(void)
+{
+	return 0x004188ac;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_r(void)
+{
+	return 0x00419e10;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_debugger_mode_on_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_r(void)
+{
+	return 0x004041c0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_r(void)
+{
+	return 0x00419c84;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(void)
+{
+	return 0x7 << 8;
+}
+static inline u32 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f(void)
+{
+	return 0x100;
+}
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_r(void)
+{
+	return 0x00419f78;
+}
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m(void)
+{
+	return 0x3 << 11;
+}
+static inline u32 gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f(void)
+{
+	return 0x1000;
+}
+static inline u32 gr_gpcs_tc_debug0_r(void)
+{
+	return 0x00418708;
+}
+static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
+{
+	return 0x1ff << 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ltc_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ltc_gp106.h
new file mode 100644
index 00000000..d760b588
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ltc_gp106.h
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ltc_gp106_h_
+#define _hw_ltc_gp106_h_
+
+static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
+{
+	return 0x0014046c;
+}
+static inline u32 ltc_ltc0_lts0_dstg_cfg0_r(void)
+{
+	return 0x00140518;
+}
+static inline u32 ltc_ltcs_ltss_dstg_cfg0_r(void)
+{
+	return 0x0017e318;
+}
+static inline u32 ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m(void)
+{
+	return 0x1 << 15;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_r(void)
+{
+	return 0x00140494;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_ways_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_v(u32 r)
+{
+	return (r >> 16) & 0x3;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
+{
+	return 0x0017e26c;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
+{
+	return 0x2;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(void)
+{
+	return 0x4;
+}
+static inline u32 ltc_ltc0_lts0_cbc_ctrl1_r(void)
+{
+	return 0x0014046c;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_r(void)
+{
+	return 0x0017e270;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(u32 v)
+{
+	return (v & 0x3ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_r(void)
+{
+	return 0x0017e274;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(u32 v)
+{
+	return (v & 0x3ffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(void)
+{
+	return 0x0003ffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_r(void)
+{
+	return 0x0017e278;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_alignment_shift_v(void)
+{
+	return 0x0000000b;
+}
+static inline u32 ltc_ltcs_ltss_cbc_base_address_v(u32 r)
+{
+	return (r >> 0) & 0x3ffffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_num_active_ltcs_r(void)
+{
+	return 0x0017e27c;
+}
+static inline u32 ltc_ltcs_misc_ltc_num_active_ltcs_r(void)
+{
+	return 0x0017e000;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_r(void)
+{
+	return 0x0017e280;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_cache_line_size_v(u32 r)
+{
+	return (r >> 24) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(u32 r)
+{
+	return (r >> 28) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param2_r(void)
+{
+	return 0x0017e3f4;
+}
+static inline u32 ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_r(void)
+{
+	return 0x0017e2ac;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_r(void)
+{
+	return 0x0017e338;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_index_address_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(u32 i)
+{
+	return 0x0017e33c + i*4;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(void)
+{
+	return 0x0017e34c;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_s(void)
+{
+	return 32;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 ltc_ltcs_ltss_dstg_zbc_depth_clear_value_field_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_r(void)
+{
+	return 0x0017e2b0;
+}
+static inline u32 ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_r(void)
+{
+	return 0x0017e214;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_r(void)
+{
+	return 0x00140214;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+	return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_intr_r(void)
+{
+	return 0x0017e20c;
+}
+static inline u32 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(void)
+{
+	return 0x1 << 30;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 ltc_ltc0_lts0_intr_r(void)
+{
+	return 0x0014040c;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void)
+{
+	return 0x0014051c;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_m(void)
+{
+	return 0xff << 16;
+}
+static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(u32 r)
+{
+	return (r >> 16) & 0xff;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x0017e2a0;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f(void)
+{
+	return 0x300;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_v(u32 r)
+{
+	return (r >> 29) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x0017e2a4;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f(void)
+{
+	return 0x300;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f(void)
+{
+	return 0x10000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_v(u32 r)
+{
+	return (r >> 28) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_v(u32 r)
+{
+	return (r >> 29) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x001402a0;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x001402a4;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_r(void)
+{
+	return 0x001422a0;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_r(void)
+{
+	return 0x001422a4;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ltc_ltc0_lts0_tstg_info_1_r(void)
+{
+	return 0x0014058c;
+}
+static inline u32 ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(u32 r)
+{
+	return (r >> 16) & 0x1f;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_mc_gp106.h b/drivers/gpu/nvgpu/gp106/hw_mc_gp106.h
new file mode 100644
index 00000000..99ad8bc0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_mc_gp106.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_mc_gp106_h_
+#define _hw_mc_gp106_h_
+
+static inline u32 mc_boot_0_r(void)
+{
+	return 0x00000000;
+}
+static inline u32 mc_boot_0_architecture_v(u32 r)
+{
+	return (r >> 24) & 0x1f;
+}
+static inline u32 mc_boot_0_implementation_v(u32 r)
+{
+	return (r >> 20) & 0xf;
+}
+static inline u32 mc_boot_0_major_revision_v(u32 r)
+{
+	return (r >> 4) & 0xf;
+}
+static inline u32 mc_boot_0_minor_revision_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 mc_intr_r(u32 i)
+{
+	return 0x00000100 + i*4;
+}
+static inline u32 mc_intr_pfifo_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 mc_intr_replayable_fault_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 mc_intr_pgraph_pending_f(void)
+{
+	return 0x1000;
+}
+static inline u32 mc_intr_pmu_pending_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 mc_intr_ltc_pending_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 mc_intr_priv_ring_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 mc_intr_pbus_pending_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 mc_intr_en_r(u32 i)
+{
+	return 0x00000140 + i*4;
+}
+static inline u32 mc_intr_en_set_r(u32 i)
+{
+	return 0x00000160 + i*4;
+}
+static inline u32 mc_intr_en_clear_r(u32 i)
+{
+	return 0x00000180 + i*4;
+}
+static inline u32 mc_enable_r(void)
+{
+	return 0x00000200;
+}
+static inline u32 mc_enable_xbar_enabled_f(void)
+{
+	return 0x4;
+}
+static inline u32 mc_enable_l2_enabled_f(void)
+{
+	return 0x8;
+}
+static inline u32 mc_enable_pmedia_s(void)
+{
+	return 1;
+}
+static inline u32 mc_enable_pmedia_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 mc_enable_pmedia_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 mc_enable_pmedia_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 mc_enable_priv_ring_enabled_f(void)
+{
+	return 0x20;
+}
+static inline u32 mc_enable_ce0_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 mc_enable_pfifo_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 mc_enable_pgraph_enabled_f(void)
+{
+	return 0x1000;
+}
+static inline u32 mc_enable_pwr_v(u32 r)
+{
+	return (r >> 13) & 0x1;
+}
+static inline u32 mc_enable_pwr_disabled_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 mc_enable_pwr_enabled_f(void)
+{
+	return 0x2000;
+}
+static inline u32 mc_enable_pfb_enabled_f(void)
+{
+	return 0x100000;
+}
+static inline u32 mc_enable_ce2_m(void)
+{
+	return 0x1 << 21;
+}
+static inline u32 mc_enable_ce2_enabled_f(void)
+{
+	return 0x200000;
+}
+static inline u32 mc_enable_blg_enabled_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 mc_enable_perfmon_enabled_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 mc_enable_hub_enabled_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 mc_intr_ltc_r(void)
+{
+	return 0x000001c0;
+}
+static inline u32 mc_enable_pb_r(void)
+{
+	return 0x00000204;
+}
+static inline u32 mc_enable_pb_0_s(void)
+{
+	return 1;
+}
+static inline u32 mc_enable_pb_0_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 mc_enable_pb_0_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 mc_enable_pb_0_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 mc_enable_pb_0_enabled_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 mc_enable_pb_sel_f(u32 v, u32 i)
+{
+	return (v & 0x1) << (0 + i*1);
+}
+static inline u32 mc_elpg_enable_r(void)
+{
+	return 0x0000020c;
+}
+static inline u32 mc_elpg_enable_xbar_enabled_f(void)
+{
+	return 0x4;
+}
+static inline u32 mc_elpg_enable_pfb_enabled_f(void)
+{
+	return 0x100000;
+}
+static inline u32 mc_elpg_enable_hub_enabled_f(void)
+{
+	return 0x20000000;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h b/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
new file mode 100644
index 00000000..a5406672
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pbdma_gp106_h_
+#define _hw_pbdma_gp106_h_
+
+static inline u32 pbdma_gp_entry1_r(void)
+{
+	return 0x10000004;
+}
+static inline u32 pbdma_gp_entry1_get_hi_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pbdma_gp_entry1_length_f(u32 v)
+{
+	return (v & 0x1fffff) << 10;
+}
+static inline u32 pbdma_gp_entry1_length_v(u32 r)
+{
+	return (r >> 10) & 0x1fffff;
+}
+static inline u32 pbdma_gp_base_r(u32 i)
+{
+	return 0x00040048 + i*8192;
+}
+static inline u32 pbdma_gp_base__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pbdma_gp_base_offset_f(u32 v)
+{
+	return (v & 0x1fffffff) << 3;
+}
+static inline u32 pbdma_gp_base_rsvd_s(void)
+{
+	return 3;
+}
+static inline u32 pbdma_gp_base_hi_r(u32 i)
+{
+	return 0x0004004c + i*8192;
+}
+static inline u32 pbdma_gp_base_hi_offset_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pbdma_gp_base_hi_limit2_f(u32 v)
+{
+	return (v & 0x1f) << 16;
+}
+static inline u32 pbdma_gp_fetch_r(u32 i)
+{
+	return 0x00040050 + i*8192;
+}
+static inline u32 pbdma_gp_get_r(u32 i)
+{
+	return 0x00040014 + i*8192;
+}
+static inline u32 pbdma_gp_put_r(u32 i)
+{
+	return 0x00040000 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_r(u32 i)
+{
+	return 0x00040054 + i*8192;
+}
+static inline u32 pbdma_pb_fetch_hi_r(u32 i)
+{
+	return 0x00040058 + i*8192;
+}
+static inline u32 pbdma_get_r(u32 i)
+{
+	return 0x00040018 + i*8192;
+}
+static inline u32 pbdma_get_hi_r(u32 i)
+{
+	return 0x0004001c + i*8192;
+}
+static inline u32 pbdma_put_r(u32 i)
+{
+	return 0x0004005c + i*8192;
+}
+static inline u32 pbdma_put_hi_r(u32 i)
+{
+	return 0x00040060 + i*8192;
+}
+static inline u32 pbdma_formats_r(u32 i)
+{
+	return 0x0004009c + i*8192;
+}
+static inline u32 pbdma_formats_gp_fermi0_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_formats_pb_fermi1_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_formats_mp_fermi0_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_r(u32 i)
+{
+	return 0x00040084 + i*8192;
+}
+static inline u32 pbdma_pb_header_priv_user_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_method_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_subchannel_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_level_main_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_pb_header_first_true_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_pb_header_type_inc_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_pb_header_type_non_inc_f(void)
+{
+	return 0x60000000;
+}
+static inline u32 pbdma_hdr_shadow_r(u32 i)
+{
+	return 0x00040118 + i*8192;
+}
+static inline u32 pbdma_subdevice_r(u32 i)
+{
+	return 0x00040094 + i*8192;
+}
+static inline u32 pbdma_subdevice_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 pbdma_subdevice_status_active_f(void)
+{
+	return 0x10000000;
+}
+static inline u32 pbdma_subdevice_channel_dma_enable_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_method0_r(u32 i)
+{
+	return 0x000400c0 + i*8192;
+}
+static inline u32 pbdma_method0_fifo_size_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pbdma_method0_addr_f(u32 v)
+{
+	return (v & 0xfff) << 2;
+}
+static inline u32 pbdma_method0_addr_v(u32 r)
+{
+	return (r >> 2) & 0xfff;
+}
+static inline u32 pbdma_method0_subch_v(u32 r)
+{
+	return (r >> 16) & 0x7;
+}
+static inline u32 pbdma_method0_first_true_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_method0_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 pbdma_method1_r(u32 i)
+{
+	return 0x000400c8 + i*8192;
+}
+static inline u32 pbdma_method2_r(u32 i)
+{
+	return 0x000400d0 + i*8192;
+}
+static inline u32 pbdma_method3_r(u32 i)
+{
+	return 0x000400d8 + i*8192;
+}
+static inline u32 pbdma_data0_r(u32 i)
+{
+	return 0x000400c4 + i*8192;
+}
+static inline u32 pbdma_target_r(u32 i)
+{
+	return 0x000400ac + i*8192;
+}
+static inline u32 pbdma_target_engine_sw_f(void)
+{
+	return 0x1f;
+}
+static inline u32 pbdma_acquire_r(u32 i)
+{
+	return 0x00040030 + i*8192;
+}
+static inline u32 pbdma_acquire_retry_man_2_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_acquire_retry_exp_2_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_acquire_timeout_exp_max_f(void)
+{
+	return 0x7800;
+}
+static inline u32 pbdma_acquire_timeout_man_max_f(void)
+{
+	return 0x7fff8000;
+}
+static inline u32 pbdma_acquire_timeout_en_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_status_r(u32 i)
+{
+	return 0x00040100 + i*8192;
+}
+static inline u32 pbdma_channel_r(u32 i)
+{
+	return 0x00040120 + i*8192;
+}
+static inline u32 pbdma_signature_r(u32 i)
+{
+	return 0x00040010 + i*8192;
+}
+static inline u32 pbdma_signature_hw_valid_f(void)
+{
+	return 0xface;
+}
+static inline u32 pbdma_signature_sw_zero_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_userd_r(u32 i)
+{
+	return 0x00040008 + i*8192;
+}
+static inline u32 pbdma_userd_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 pbdma_userd_target_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_userd_target_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
+static inline u32 pbdma_userd_addr_f(u32 v)
+{
+	return (v & 0x7fffff) << 9;
+}
+static inline u32 pbdma_userd_hi_r(u32 i)
+{
+	return 0x0004000c + i*8192;
+}
+static inline u32 pbdma_userd_hi_addr_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pbdma_hce_ctrl_r(u32 i)
+{
+	return 0x000400e4 + i*8192;
+}
+static inline u32 pbdma_hce_ctrl_hce_priv_mode_yes_f(void)
+{
+	return 0x20;
+}
+static inline u32 pbdma_intr_0_r(u32 i)
+{
+	return 0x00040108 + i*8192;
+}
+static inline u32 pbdma_intr_0_memreq_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pbdma_intr_0_memreq_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 pbdma_intr_0_memack_timeout_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 pbdma_intr_0_memack_extra_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 pbdma_intr_0_memdat_timeout_pending_f(void)
+{
+	return 0x8;
+}
+static inline u32 pbdma_intr_0_memdat_extra_pending_f(void)
+{
+	return 0x10;
+}
+static inline u32 pbdma_intr_0_memflush_pending_f(void)
+{
+	return 0x20;
+}
+static inline u32 pbdma_intr_0_memop_pending_f(void)
+{
+	return 0x40;
+}
+static inline u32 pbdma_intr_0_lbconnect_pending_f(void)
+{
+	return 0x80;
+}
+static inline u32 pbdma_intr_0_lbreq_pending_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_intr_0_lback_timeout_pending_f(void)
+{
+	return 0x200;
+}
+static inline u32 pbdma_intr_0_lback_extra_pending_f(void)
+{
+	return 0x400;
+}
+static inline u32 pbdma_intr_0_lbdat_timeout_pending_f(void)
+{
+	return 0x800;
+}
+static inline u32 pbdma_intr_0_lbdat_extra_pending_f(void)
+{
+	return 0x1000;
+}
+static inline u32 pbdma_intr_0_gpfifo_pending_f(void)
+{
+	return 0x2000;
+}
+static inline u32 pbdma_intr_0_gpptr_pending_f(void)
+{
+	return 0x4000;
+}
+static inline u32 pbdma_intr_0_gpentry_pending_f(void)
+{
+	return 0x8000;
+}
+static inline u32 pbdma_intr_0_gpcrc_pending_f(void)
+{
+	return 0x10000;
+}
+static inline u32 pbdma_intr_0_pbptr_pending_f(void)
+{
+	return 0x20000;
+}
+static inline u32 pbdma_intr_0_pbentry_pending_f(void)
+{
+	return 0x40000;
+}
+static inline u32 pbdma_intr_0_pbcrc_pending_f(void)
+{
+	return 0x80000;
+}
+static inline u32 pbdma_intr_0_xbarconnect_pending_f(void)
+{
+	return 0x100000;
+}
+static inline u32 pbdma_intr_0_method_pending_f(void)
+{
+	return 0x200000;
+}
+static inline u32 pbdma_intr_0_methodcrc_pending_f(void)
+{
+	return 0x400000;
+}
+static inline u32 pbdma_intr_0_device_pending_f(void)
+{
+	return 0x800000;
+}
+static inline u32 pbdma_intr_0_semaphore_pending_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 pbdma_intr_0_acquire_pending_f(void)
+{
+	return 0x4000000;
+}
+static inline u32 pbdma_intr_0_pri_pending_f(void)
+{
+	return 0x8000000;
+}
+static inline u32 pbdma_intr_0_no_ctxsw_seg_pending_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pbdma_intr_0_pbseg_pending_f(void)
+{
+	return 0x40000000;
+}
+static inline u32 pbdma_intr_0_signature_pending_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 pbdma_intr_1_r(u32 i)
+{
+	return 0x00040148 + i*8192;
+}
+static inline u32 pbdma_intr_en_0_r(u32 i)
+{
+	return 0x0004010c + i*8192;
+}
+static inline u32 pbdma_intr_en_0_lbreq_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_intr_en_1_r(u32 i)
+{
+	return 0x0004014c + i*8192;
+}
+static inline u32 pbdma_intr_stall_r(u32 i)
+{
+	return 0x0004013c + i*8192;
+}
+static inline u32 pbdma_intr_stall_lbreq_enabled_f(void)
+{
+	return 0x100;
+}
+static inline u32 pbdma_udma_nop_r(void)
+{
+	return 0x00000008;
+}
+static inline u32 pbdma_runlist_timeslice_r(u32 i)
+{
+	return 0x000400f8 + i*8192;
+}
+static inline u32 pbdma_runlist_timeslice_timeout_128_f(void)
+{
+	return 0x80;
+}
+static inline u32 pbdma_runlist_timeslice_timescale_3_f(void)
+{
+	return 0x3000;
+}
+static inline u32 pbdma_runlist_timeslice_enable_true_f(void)
+{
+	return 0x10000000;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_perf_gp106.h b/drivers/gpu/nvgpu/gp106/hw_perf_gp106.h
new file mode 100644
index 00000000..cd3501a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_perf_gp106.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_perf_gp106_h_
+#define _hw_perf_gp106_h_
+
+static inline u32 perf_pmasys_control_r(void)
+{
+	return 0x001b4000;
+}
+static inline u32 perf_pmasys_control_membuf_status_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 perf_pmasys_control_membuf_status_overflowed_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_control_membuf_status_overflowed_f(void)
+{
+	return 0x10;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_v(u32 r)
+{
+	return (r >> 5) & 0x1;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_doit_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_control_membuf_clear_status_doit_f(void)
+{
+	return 0x20;
+}
+static inline u32 perf_pmasys_mem_block_r(void)
+{
+	return 0x001b4070;
+}
+static inline u32 perf_pmasys_mem_block_base_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 perf_pmasys_mem_block_target_f(u32 v)
+{
+	return (v & 0x3) << 28;
+}
+static inline u32 perf_pmasys_mem_block_target_v(u32 r)
+{
+	return (r >> 28) & 0x3;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_target_lfb_f(void)
+{
+	return 0x0;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_coh_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_ncoh_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 perf_pmasys_mem_block_target_sys_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 perf_pmasys_mem_block_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_mem_block_valid_true_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 perf_pmasys_mem_block_valid_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 perf_pmasys_outbase_r(void)
+{
+	return 0x001b4074;
+}
+static inline u32 perf_pmasys_outbase_ptr_f(u32 v)
+{
+	return (v & 0x7ffffff) << 5;
+}
+static inline u32 perf_pmasys_outbaseupper_r(void)
+{
+	return 0x001b4078;
+}
+static inline u32 perf_pmasys_outbaseupper_ptr_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 perf_pmasys_outsize_r(void)
+{
+	return 0x001b407c;
+}
+static inline u32 perf_pmasys_outsize_numbytes_f(u32 v)
+{
+	return (v & 0x7ffffff) << 5;
+}
+static inline u32 perf_pmasys_mem_bytes_r(void)
+{
+	return 0x001b4084;
+}
+static inline u32 perf_pmasys_mem_bytes_numbytes_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 perf_pmasys_mem_bump_r(void)
+{
+	return 0x001b4088;
+}
+static inline u32 perf_pmasys_mem_bump_numbytes_f(u32 v)
+{
+	return (v & 0xfffffff) << 4;
+}
+static inline u32 perf_pmasys_enginestatus_r(void)
+{
+	return 0x001b40a4;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_empty_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 perf_pmasys_enginestatus_rbufempty_empty_f(void)
+{
+	return 0x10;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_pri_ringmaster_gp106.h b/drivers/gpu/nvgpu/gp106/hw_pri_ringmaster_gp106.h
new file mode 100644
index 00000000..0eb2187a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_pri_ringmaster_gp106.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pri_ringmaster_gp106_h_
+#define _hw_pri_ringmaster_gp106_h_
+
+static inline u32 pri_ringmaster_command_r(void)
+{
+	return 0x0012004c;
+}
+static inline u32 pri_ringmaster_command_cmd_m(void)
+{
+	return 0x3f << 0;
+}
+static inline u32 pri_ringmaster_command_cmd_v(u32 r)
+{
+	return (r >> 0) & 0x3f;
+}
+static inline u32 pri_ringmaster_command_cmd_no_cmd_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pri_ringmaster_command_cmd_start_ring_f(void)
+{
+	return 0x1;
+}
+static inline u32 pri_ringmaster_command_cmd_ack_interrupt_f(void)
+{
+	return 0x2;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_f(void)
+{
+	return 0x3;
+}
+static inline u32 pri_ringmaster_command_cmd_enumerate_stations_bc_grp_all_f(void)
+{
+	return 0x0;
+}
+static inline u32 pri_ringmaster_command_data_r(void)
+{
+	return 0x00120048;
+}
+static inline u32 pri_ringmaster_start_results_r(void)
+{
+	return 0x00120050;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pri_ringmaster_start_results_connectivity_pass_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pri_ringmaster_intr_status0_r(void)
+{
+	return 0x00120058;
+}
+static inline u32 pri_ringmaster_intr_status1_r(void)
+{
+	return 0x0012005c;
+}
+static inline u32 pri_ringmaster_global_ctl_r(void)
+{
+	return 0x00120060;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_asserted_f(void)
+{
+	return 0x1;
+}
+static inline u32 pri_ringmaster_global_ctl_ring_reset_deasserted_f(void)
+{
+	return 0x0;
+}
+static inline u32 pri_ringmaster_enum_fbp_r(void)
+{
+	return 0x00120074;
+}
+static inline u32 pri_ringmaster_enum_fbp_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 pri_ringmaster_enum_gpc_r(void)
+{
+	return 0x00120078;
+}
+static inline u32 pri_ringmaster_enum_gpc_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 pri_ringmaster_enum_ltc_r(void)
+{
+	return 0x0012006c;
+}
+static inline u32 pri_ringmaster_enum_ltc_count_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_pri_ringstation_sys_gp106.h b/drivers/gpu/nvgpu/gp106/hw_pri_ringstation_sys_gp106.h
new file mode 100644
index 00000000..a22d6a05
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_pri_ringstation_sys_gp106.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pri_ringstation_sys_gp106_h_
+#define _hw_pri_ringstation_sys_gp106_h_
+
+static inline u32 pri_ringstation_sys_master_config_r(u32 i)
+{
+	return 0x00122300 + i*4;
+}
+static inline u32 pri_ringstation_sys_decode_config_r(void)
+{
+	return 0x00122204;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_m(void)
+{
+	return 0x7 << 0;
+}
+static inline u32 pri_ringstation_sys_decode_config_ring_drop_on_ring_not_started_f(void)
+{
+	return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
new file mode 100644
index 00000000..0b4b86b1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_proj_gp106_h_
+#define _hw_proj_gp106_h_
+
+static inline u32 proj_gpc_base_v(void)
+{
+	return 0x00500000;
+}
+static inline u32 proj_gpc_shared_base_v(void)
+{
+	return 0x00418000;
+}
+static inline u32 proj_gpc_stride_v(void)
+{
+	return 0x00008000;
+}
+static inline u32 proj_ltc_stride_v(void)
+{
+	return 0x00002000;
+}
+static inline u32 proj_lts_stride_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 proj_fbpa_stride_v(void)
+{
+	return 0x00004000;
+}
+static inline u32 proj_ppc_in_gpc_base_v(void)
+{
+	return 0x00003000;
+}
+static inline u32 proj_ppc_in_gpc_stride_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 proj_rop_base_v(void)
+{
+	return 0x00410000;
+}
+static inline u32 proj_rop_shared_base_v(void)
+{
+	return 0x00408800;
+}
+static inline u32 proj_rop_stride_v(void)
+{
+	return 0x00000400;
+}
+static inline u32 proj_tpc_in_gpc_base_v(void)
+{
+	return 0x00004000;
+}
+static inline u32 proj_tpc_in_gpc_stride_v(void)
+{
+	return 0x00000800;
+}
+static inline u32 proj_tpc_in_gpc_shared_base_v(void)
+{
+	return 0x00001800;
+}
+static inline u32 proj_host_num_pbdma_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 proj_scal_litter_num_tpc_per_gpc_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 proj_scal_litter_num_fbps_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 proj_scal_litter_num_fbpas_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 proj_scal_litter_num_gpcs_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 proj_scal_litter_num_pes_per_gpc_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 proj_scal_litter_num_tpcs_per_pes_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 proj_scal_litter_num_zcull_banks_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 proj_scal_max_gpcs_v(void)
+{
+	return 0x00000020;
+}
+static inline u32 proj_scal_max_tpc_per_gpc_v(void)
+{
+	return 0x00000008;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_pwr_gp106.h b/drivers/gpu/nvgpu/gp106/hw_pwr_gp106.h
new file mode 100644
index 00000000..b4dfea0d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_pwr_gp106.h
@@ -0,0 +1,841 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pwr_gp106_h_
+#define _hw_pwr_gp106_h_
+
+static inline u32 pwr_falcon_irqsset_r(void)
+{
+	return 0x0010a000;
+}
+static inline u32 pwr_falcon_irqsset_swgen0_set_f(void)
+{
+	return 0x40;
+}
+static inline u32 pwr_falcon_irqsclr_r(void)
+{
+	return 0x0010a004;
+}
+static inline u32 pwr_falcon_irqstat_r(void)
+{
+	return 0x0010a008;
+}
+static inline u32 pwr_falcon_irqstat_halt_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 pwr_falcon_irqstat_exterr_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 pwr_falcon_irqstat_swgen0_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 pwr_falcon_irqmode_r(void)
+{
+	return 0x0010a00c;
+}
+static inline u32 pwr_falcon_irqmset_r(void)
+{
+	return 0x0010a010;
+}
+static inline u32 pwr_falcon_irqmset_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmset_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmset_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmset_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmset_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmset_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmset_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmset_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_r(void)
+{
+	return 0x0010a014;
+}
+static inline u32 pwr_falcon_irqmclr_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqmclr_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqmclr_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqmclr_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqmclr_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqmclr_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqmclr_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqmclr_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqmclr_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqmask_r(void)
+{
+	return 0x0010a018;
+}
+static inline u32 pwr_falcon_irqdest_r(void)
+{
+	return 0x0010a01c;
+}
+static inline u32 pwr_falcon_irqdest_host_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_irqdest_host_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_irqdest_host_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_falcon_irqdest_host_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 pwr_falcon_irqdest_host_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_irqdest_host_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_irqdest_host_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 pwr_falcon_irqdest_host_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_irqdest_target_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 pwr_falcon_irqdest_target_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 17;
+}
+static inline u32 pwr_falcon_irqdest_target_mthd_f(u32 v)
+{
+	return (v & 0x1) << 18;
+}
+static inline u32 pwr_falcon_irqdest_target_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 19;
+}
+static inline u32 pwr_falcon_irqdest_target_halt_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_falcon_irqdest_target_exterr_f(u32 v)
+{
+	return (v & 0x1) << 21;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 22;
+}
+static inline u32 pwr_falcon_irqdest_target_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 23;
+}
+static inline u32 pwr_falcon_irqdest_target_ext_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 pwr_falcon_curctx_r(void)
+{
+	return 0x0010a050;
+}
+static inline u32 pwr_falcon_nxtctx_r(void)
+{
+	return 0x0010a054;
+}
+static inline u32 pwr_falcon_mailbox0_r(void)
+{
+	return 0x0010a040;
+}
+static inline u32 pwr_falcon_mailbox1_r(void)
+{
+	return 0x0010a044;
+}
+static inline u32 pwr_falcon_itfen_r(void)
+{
+	return 0x0010a048;
+}
+static inline u32 pwr_falcon_itfen_ctxen_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_falcon_idlestate_r(void)
+{
+	return 0x0010a04c;
+}
+static inline u32 pwr_falcon_idlestate_falcon_busy_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 pwr_falcon_idlestate_ext_busy_v(u32 r)
+{
+	return (r >> 1) & 0x7fff;
+}
+static inline u32 pwr_falcon_os_r(void)
+{
+	return 0x0010a080;
+}
+static inline u32 pwr_falcon_engctl_r(void)
+{
+	return 0x0010a0a4;
+}
+static inline u32 pwr_falcon_cpuctl_r(void)
+{
+	return 0x0010a100;
+}
+static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_alias_r(void)
+{
+	return 0x0010a130;
+}
+static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_pmu_scpctl_stat_r(void)
+{
+	return 0x0010ac08;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 pwr_falcon_imemc_r(u32 i)
+{
+	return 0x0010a180 + i*16;
+}
+static inline u32 pwr_falcon_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_imemd_r(u32 i)
+{
+	return 0x0010a184 + i*16;
+}
+static inline u32 pwr_falcon_imemt_r(u32 i)
+{
+	return 0x0010a188 + i*16;
+}
+static inline u32 pwr_falcon_sctl_r(void)
+{
+	return 0x0010a240;
+}
+static inline u32 pwr_falcon_mmu_phys_sec_r(void)
+{
+	return 0x00100ce4;
+}
+static inline u32 pwr_falcon_bootvec_r(void)
+{
+	return 0x0010a104;
+}
+static inline u32 pwr_falcon_bootvec_vec_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_falcon_dmactl_r(void)
+{
+	return 0x0010a10c;
+}
+static inline u32 pwr_falcon_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 pwr_falcon_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 pwr_falcon_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 pwr_falcon_hwcfg_r(void)
+{
+	return 0x0010a108;
+}
+static inline u32 pwr_falcon_hwcfg_imem_size_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+static inline u32 pwr_falcon_hwcfg_dmem_size_v(u32 r)
+{
+	return (r >> 9) & 0x1ff;
+}
+static inline u32 pwr_falcon_dmatrfbase_r(void)
+{
+	return 0x0010a110;
+}
+static inline u32 pwr_falcon_dmatrfbase1_r(void)
+{
+	return 0x0010a128;
+}
+static inline u32 pwr_falcon_dmatrfmoffs_r(void)
+{
+	return 0x0010a114;
+}
+static inline u32 pwr_falcon_dmatrfcmd_r(void)
+{
+	return 0x0010a118;
+}
+static inline u32 pwr_falcon_dmatrfcmd_imem_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_dmatrfcmd_write_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 pwr_falcon_dmatrfcmd_size_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 pwr_falcon_dmatrfcmd_ctxdma_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 pwr_falcon_dmatrffboffs_r(void)
+{
+	return 0x0010a11c;
+}
+static inline u32 pwr_falcon_exterraddr_r(void)
+{
+	return 0x0010a168;
+}
+static inline u32 pwr_falcon_exterrstat_r(void)
+{
+	return 0x0010a16c;
+}
+static inline u32 pwr_falcon_exterrstat_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 pwr_falcon_exterrstat_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 pwr_falcon_exterrstat_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_r(void)
+{
+	return 0x0010a200;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_s(void)
+{
+	return 4;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rreg_f(void)
+{
+	return 0x8;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_opc_rstat_f(void)
+{
+	return 0xe;
+}
+static inline u32 pwr_pmu_falcon_icd_cmd_idx_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 pwr_pmu_falcon_icd_rdata_r(void)
+{
+	return 0x0010a20c;
+}
+static inline u32 pwr_falcon_dmemc_r(u32 i)
+{
+	return 0x0010a1c0 + i*8;
+}
+static inline u32 pwr_falcon_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_dmemc_offs_m(void)
+{
+	return 0x3f << 2;
+}
+static inline u32 pwr_falcon_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_dmemc_blk_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 pwr_falcon_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_dmemc_aincr_f(u32 v)
+{
+	return (v & 0x1) << 25;
+}
+static inline u32 pwr_falcon_dmemd_r(u32 i)
+{
+	return 0x0010a1c4 + i*8;
+}
+static inline u32 pwr_pmu_new_instblk_r(void)
+{
+	return 0x0010a480;
+}
+static inline u32 pwr_pmu_new_instblk_ptr_f(u32 v)
+{
+	return (v & 0xfffffff) << 0;
+}
+static inline u32 pwr_pmu_new_instblk_target_fb_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_new_instblk_target_sys_coh_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 pwr_pmu_new_instblk_target_sys_ncoh_f(void)
+{
+	return 0x30000000;
+}
+static inline u32 pwr_pmu_new_instblk_valid_f(u32 v)
+{
+	return (v & 0x1) << 30;
+}
+static inline u32 pwr_pmu_mutex_id_r(void)
+{
+	return 0x0010a488;
+}
+static inline u32 pwr_pmu_mutex_id_value_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_id_value_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_value_not_avail_v(void)
+{
+	return 0x000000ff;
+}
+static inline u32 pwr_pmu_mutex_id_release_r(void)
+{
+	return 0x0010a48c;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_m(void)
+{
+	return 0xff << 0;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 pwr_pmu_mutex_id_release_value_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_mutex_r(u32 i)
+{
+	return 0x0010a580 + i*4;
+}
+static inline u32 pwr_pmu_mutex__size_1_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 pwr_pmu_mutex_value_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 pwr_pmu_mutex_value_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 pwr_pmu_mutex_value_initial_lock_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_queue_head_r(u32 i)
+{
+	return 0x0010a4a0 + i*4;
+}
+static inline u32 pwr_pmu_queue_head__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_head_address_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_head_address_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_queue_tail_r(u32 i)
+{
+	return 0x0010a4b0 + i*4;
+}
+static inline u32 pwr_pmu_queue_tail__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_queue_tail_address_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_queue_tail_address_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_head_r(void)
+{
+	return 0x0010a4c8;
+}
+static inline u32 pwr_pmu_msgq_head_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_head_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_msgq_tail_r(void)
+{
+	return 0x0010a4cc;
+}
+static inline u32 pwr_pmu_msgq_tail_val_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 pwr_pmu_msgq_tail_val_v(u32 r)
+{
+	return (r >> 0) & 0xffffffff;
+}
+static inline u32 pwr_pmu_idle_mask_r(u32 i)
+{
+	return 0x0010a504 + i*16;
+}
+static inline u32 pwr_pmu_idle_mask_gr_enabled_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_pmu_idle_mask_ce_2_enabled_f(void)
+{
+	return 0x200000;
+}
+static inline u32 pwr_pmu_idle_count_r(u32 i)
+{
+	return 0x0010a508 + i*16;
+}
+static inline u32 pwr_pmu_idle_count_value_f(u32 v)
+{
+	return (v & 0x7fffffff) << 0;
+}
+static inline u32 pwr_pmu_idle_count_value_v(u32 r)
+{
+	return (r >> 0) & 0x7fffffff;
+}
+static inline u32 pwr_pmu_idle_count_reset_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 pwr_pmu_idle_ctrl_r(u32 i)
+{
+	return 0x0010a50c + i*16;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_busy_f(void)
+{
+	return 0x2;
+}
+static inline u32 pwr_pmu_idle_ctrl_value_always_f(void)
+{
+	return 0x3;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 pwr_pmu_idle_ctrl_filter_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_pmu_idle_mask_supp_r(u32 i)
+{
+	return 0x0010a9f0 + i*8;
+}
+static inline u32 pwr_pmu_idle_mask_1_supp_r(u32 i)
+{
+	return 0x0010a9f4 + i*8;
+}
+static inline u32 pwr_pmu_idle_ctrl_supp_r(u32 i)
+{
+	return 0x0010aa30 + i*8;
+}
+static inline u32 pwr_pmu_debug_r(u32 i)
+{
+	return 0x0010a5c0 + i*4;
+}
+static inline u32 pwr_pmu_debug__size_1_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 pwr_pmu_mailbox_r(u32 i)
+{
+	return 0x0010a450 + i*4;
+}
+static inline u32 pwr_pmu_mailbox__size_1_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 pwr_pmu_bar0_addr_r(void)
+{
+	return 0x0010a7a0;
+}
+static inline u32 pwr_pmu_bar0_data_r(void)
+{
+	return 0x0010a7a4;
+}
+static inline u32 pwr_pmu_bar0_ctl_r(void)
+{
+	return 0x0010a7ac;
+}
+static inline u32 pwr_pmu_bar0_timeout_r(void)
+{
+	return 0x0010a7a8;
+}
+static inline u32 pwr_pmu_bar0_fecs_error_r(void)
+{
+	return 0x0010a988;
+}
+static inline u32 pwr_pmu_bar0_error_status_r(void)
+{
+	return 0x0010a7b0;
+}
+static inline u32 pwr_pmu_pg_idlefilth_r(u32 i)
+{
+	return 0x0010a6c0 + i*4;
+}
+static inline u32 pwr_pmu_pg_ppuidlefilth_r(u32 i)
+{
+	return 0x0010a6e8 + i*4;
+}
+static inline u32 pwr_pmu_pg_idle_cnt_r(u32 i)
+{
+	return 0x0010a710 + i*4;
+}
+static inline u32 pwr_pmu_pg_intren_r(u32 i)
+{
+	return 0x0010a760 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_r(u32 i)
+{
+	return 0x0010ae00 + i*4;
+}
+static inline u32 pwr_fbif_transcfg_target_local_fb_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_target_coherent_sysmem_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_fbif_transcfg_target_noncoherent_sysmem_f(void)
+{
+	return 0x2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_s(void)
+{
+	return 1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_virtual_f(void)
+{
+	return 0x0;
+}
+static inline u32 pwr_fbif_transcfg_mem_type_physical_f(void)
+{
+	return 0x4;
+}
+static inline u32 pwr_falcon_engine_r(void)
+{
+	return 0x0010a3c0;
+}
+static inline u32 pwr_falcon_engine_reset_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 pwr_falcon_engine_reset_false_f(void)
+{
+	return 0x0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
new file mode 100644
index 00000000..b325affc
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ram_gp106_h_
+#define _hw_ram_gp106_h_
+
+static inline u32 ram_in_ramfc_s(void)
+{
+	return 4096;
+}
+static inline u32 ram_in_ramfc_w(void)
+{
+	return 0;
+}
+static inline u32 ram_in_page_dir_base_target_f(u32 v)
+{
+	return (v & 0x3) << 0;
+}
+static inline u32 ram_in_page_dir_base_target_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_page_dir_base_target_sys_mem_coh_f(void)
+{
+	return 0x2;
+}
+static inline u32 ram_in_page_dir_base_target_sys_mem_ncoh_f(void)
+{
+	return 0x3;
+}
+static inline u32 ram_in_page_dir_base_vol_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_vol_true_f(void)
+{
+	return 0x4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_tex_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_m(void)
+{
+	return 0x1 << 5;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_fault_replay_gcc_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 ram_in_big_page_size_f(u32 v)
+{
+	return (v & 0x1) << 11;
+}
+static inline u32 ram_in_big_page_size_m(void)
+{
+	return 0x1 << 11;
+}
+static inline u32 ram_in_big_page_size_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_big_page_size_128kb_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_big_page_size_64kb_f(void)
+{
+	return 0x800;
+}
+static inline u32 ram_in_page_dir_base_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_page_dir_base_lo_w(void)
+{
+	return 128;
+}
+static inline u32 ram_in_page_dir_base_hi_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ram_in_page_dir_base_hi_w(void)
+{
+	return 129;
+}
+static inline u32 ram_in_adr_limit_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_adr_limit_lo_w(void)
+{
+	return 130;
+}
+static inline u32 ram_in_adr_limit_hi_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 ram_in_adr_limit_hi_w(void)
+{
+	return 131;
+}
+static inline u32 ram_in_engine_cs_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_engine_cs_wfi_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ram_in_engine_cs_wfi_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_engine_cs_fg_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ram_in_engine_cs_fg_f(void)
+{
+	return 0x8;
+}
+static inline u32 ram_in_gr_cs_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_cs_wfi_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_gr_wfi_target_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 ram_in_gr_wfi_mode_physical_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 ram_in_gr_wfi_mode_virtual_f(void)
+{
+	return 0x4;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_f(u32 v)
+{
+	return (v & 0xfffff) << 12;
+}
+static inline u32 ram_in_gr_wfi_ptr_lo_w(void)
+{
+	return 132;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+static inline u32 ram_in_gr_wfi_ptr_hi_w(void)
+{
+	return 133;
+}
+static inline u32 ram_in_base_shift_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 ram_in_alloc_size_v(void)
+{
+	return 0x00001000;
+}
+static inline u32 ram_fc_size_val_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ram_fc_gp_put_w(void)
+{
+	return 0;
+}
+static inline u32 ram_fc_userd_w(void)
+{
+	return 2;
+}
+static inline u32 ram_fc_userd_hi_w(void)
+{
+	return 3;
+}
+static inline u32 ram_fc_signature_w(void)
+{
+	return 4;
+}
+static inline u32 ram_fc_gp_get_w(void)
+{
+	return 5;
+}
+static inline u32 ram_fc_pb_get_w(void)
+{
+	return 6;
+}
+static inline u32 ram_fc_pb_get_hi_w(void)
+{
+	return 7;
+}
+static inline u32 ram_fc_pb_top_level_get_w(void)
+{
+	return 8;
+}
+static inline u32 ram_fc_pb_top_level_get_hi_w(void)
+{
+	return 9;
+}
+static inline u32 ram_fc_acquire_w(void)
+{
+	return 12;
+}
+static inline u32 ram_fc_semaphorea_w(void)
+{
+	return 14;
+}
+static inline u32 ram_fc_semaphoreb_w(void)
+{
+	return 15;
+}
+static inline u32 ram_fc_semaphorec_w(void)
+{
+	return 16;
+}
+static inline u32 ram_fc_semaphored_w(void)
+{
+	return 17;
+}
+static inline u32 ram_fc_gp_base_w(void)
+{
+	return 18;
+}
+static inline u32 ram_fc_gp_base_hi_w(void)
+{
+	return 19;
+}
+static inline u32 ram_fc_gp_fetch_w(void)
+{
+	return 20;
+}
+static inline u32 ram_fc_pb_fetch_w(void)
+{
+	return 21;
+}
+static inline u32 ram_fc_pb_fetch_hi_w(void)
+{
+	return 22;
+}
+static inline u32 ram_fc_pb_put_w(void)
+{
+	return 23;
+}
+static inline u32 ram_fc_pb_put_hi_w(void)
+{
+	return 24;
+}
+static inline u32 ram_fc_pb_header_w(void)
+{
+	return 33;
+}
+static inline u32 ram_fc_pb_count_w(void)
+{
+	return 34;
+}
+static inline u32 ram_fc_subdevice_w(void)
+{
+	return 37;
+}
+static inline u32 ram_fc_formats_w(void)
+{
+	return 39;
+}
+static inline u32 ram_fc_target_w(void)
+{
+	return 43;
+}
+static inline u32 ram_fc_hce_ctrl_w(void)
+{
+	return 57;
+}
+static inline u32 ram_fc_chid_w(void)
+{
+	return 58;
+}
+static inline u32 ram_fc_chid_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_fc_chid_id_w(void)
+{
+	return 0;
+}
+static inline u32 ram_fc_runlist_timeslice_w(void)
+{
+	return 62;
+}
+static inline u32 ram_userd_base_shift_v(void)
+{
+	return 0x00000009;
+}
+static inline u32 ram_userd_chan_size_v(void)
+{
+	return 0x00000200;
+}
+static inline u32 ram_userd_put_w(void)
+{
+	return 16;
+}
+static inline u32 ram_userd_get_w(void)
+{
+	return 17;
+}
+static inline u32 ram_userd_ref_w(void)
+{
+	return 18;
+}
+static inline u32 ram_userd_put_hi_w(void)
+{
+	return 19;
+}
+static inline u32 ram_userd_ref_threshold_w(void)
+{
+	return 20;
+}
+static inline u32 ram_userd_top_level_get_w(void)
+{
+	return 22;
+}
+static inline u32 ram_userd_top_level_get_hi_w(void)
+{
+	return 23;
+}
+static inline u32 ram_userd_get_hi_w(void)
+{
+	return 24;
+}
+static inline u32 ram_userd_gp_get_w(void)
+{
+	return 34;
+}
+static inline u32 ram_userd_gp_put_w(void)
+{
+	return 35;
+}
+static inline u32 ram_userd_gp_top_level_get_w(void)
+{
+	return 22;
+}
+static inline u32 ram_userd_gp_top_level_get_hi_w(void)
+{
+	return 23;
+}
+static inline u32 ram_rl_entry_size_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 ram_rl_entry_chid_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_rl_entry_id_f(u32 v)
+{
+	return (v & 0xfff) << 0;
+}
+static inline u32 ram_rl_entry_type_f(u32 v)
+{
+	return (v & 0x1) << 13;
+}
+static inline u32 ram_rl_entry_type_chid_f(void)
+{
+	return 0x0;
+}
+static inline u32 ram_rl_entry_type_tsg_f(void)
+{
+	return 0x2000;
+}
+static inline u32 ram_rl_entry_timeslice_scale_f(u32 v)
+{
+	return (v & 0xf) << 14;
+}
+static inline u32 ram_rl_entry_timeslice_scale_3_f(void)
+{
+	return 0xc000;
+}
+static inline u32 ram_rl_entry_timeslice_timeout_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+static inline u32 ram_rl_entry_timeslice_timeout_128_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 ram_rl_entry_tsg_length_f(u32 v)
+{
+	return (v & 0x3f) << 26;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_timer_gp106.h b/drivers/gpu/nvgpu/gp106/hw_timer_gp106.h
new file mode 100644
index 00000000..62771628
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_timer_gp106.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_timer_gp106_h_
+#define _hw_timer_gp106_h_
+
+static inline u32 timer_pri_timeout_r(void)
+{
+	return 0x00009080;
+}
+static inline u32 timer_pri_timeout_period_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 timer_pri_timeout_period_m(void)
+{
+	return 0xffffff << 0;
+}
+static inline u32 timer_pri_timeout_period_v(u32 r)
+{
+	return (r >> 0) & 0xffffff;
+}
+static inline u32 timer_pri_timeout_en_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+static inline u32 timer_pri_timeout_en_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 timer_pri_timeout_en_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 timer_pri_timeout_en_en_enabled_f(void)
+{
+	return 0x80000000;
+}
+static inline u32 timer_pri_timeout_en_en_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 timer_pri_timeout_save_0_r(void)
+{
+	return 0x00009084;
+}
+static inline u32 timer_pri_timeout_save_1_r(void)
+{
+	return 0x00009088;
+}
+static inline u32 timer_pri_timeout_fecs_errcode_r(void)
+{
+	return 0x0000908c;
+}
+static inline u32 timer_time_0_r(void)
+{
+	return 0x00009400;
+}
+static inline u32 timer_time_1_r(void)
+{
+	return 0x00009410;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
new file mode 100644
index 00000000..ed8e0888
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_top_gp106_h_
+#define _hw_top_gp106_h_
+
+static inline u32 top_num_gpcs_r(void)
+{
+	return 0x00022430;
+}
+static inline u32 top_num_gpcs_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_tpc_per_gpc_r(void)
+{
+	return 0x00022434;
+}
+static inline u32 top_tpc_per_gpc_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_num_fbps_r(void)
+{
+	return 0x00022438;
+}
+static inline u32 top_num_fbps_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_ltc_per_fbp_r(void)
+{
+	return 0x00022450;
+}
+static inline u32 top_ltc_per_fbp_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_slices_per_ltc_r(void)
+{
+	return 0x0002245c;
+}
+static inline u32 top_slices_per_ltc_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
+static inline u32 top_num_ltcs_r(void)
+{
+	return 0x00022454;
+}
+static inline u32 top_device_info_r(u32 i)
+{
+	return 0x00022700 + i*4;
+}
+static inline u32 top_device_info__size_1_v(void)
+{
+	return 0x00000040;
+}
+static inline u32 top_device_info_chain_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 top_device_info_chain_enable_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_engine_enum_v(u32 r)
+{
+	return (r >> 26) & 0xf;
+}
+static inline u32 top_device_info_runlist_enum_v(u32 r)
+{
+	return (r >> 21) & 0xf;
+}
+static inline u32 top_device_info_intr_enum_v(u32 r)
+{
+	return (r >> 15) & 0x1f;
+}
+static inline u32 top_device_info_reset_enum_v(u32 r)
+{
+	return (r >> 9) & 0x1f;
+}
+static inline u32 top_device_info_type_enum_v(u32 r)
+{
+	return (r >> 2) & 0x1fffffff;
+}
+static inline u32 top_device_info_type_enum_graphics_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_type_enum_graphics_f(void)
+{
+	return 0x0;
+}
+static inline u32 top_device_info_type_enum_copy0_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_type_enum_copy0_f(void)
+{
+	return 0x4;
+}
+static inline u32 top_device_info_entry_v(u32 r)
+{
+	return (r >> 0) & 0x3;
+}
+static inline u32 top_device_info_entry_not_valid_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_entry_enum_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 top_scratch1_r(void)
+{
+	return 0x0002240c;
+}
+static inline u32 top_scratch1_devinit_completed_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h b/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
new file mode 100644
index 00000000..74b6cf7c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_xve_gp106_h_
+#define _hw_xve_gp106_h_
+
+static inline u32 xve_rom_ctrl_r(void)
+{
+	return 0x00000050;
+}
+static inline u32 xve_rom_ctrl_rom_shadow_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 xve_rom_ctrl_rom_shadow_disabled_f(void)
+{
+	return 0x0;
+}
+static inline u32 xve_rom_ctrl_rom_shadow_enabled_f(void)
+{
+	return 0x1;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
new file mode 100644
index 00000000..a9d05730
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/delay.h>	/* for udelay */
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "gp10b/pmu_gp10b.h"
+#include "hw_mc_gp106.h"
+#include "hw_pwr_gp106.h"
+
+int gp106_pmu_reset(struct gk20a *g)
+{
+	gk20a_dbg_fn("");
+
+	gk20a_reset(g, mc_enable_pwr_enabled_f());
+
+	gk20a_writel(g, pwr_falcon_engine_r(),
+			pwr_falcon_engine_reset_true_f());
+	udelay(10);
+	gk20a_writel(g, pwr_falcon_engine_r(),
+			pwr_falcon_engine_reset_false_f());
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+void gp106_init_pmu_ops(struct gpu_ops *gops)
+{
+	gk20a_dbg_fn("");
+
+	gp10b_init_pmu_ops(gops);
+	gops->pmu.reset = gp106_pmu_reset;
+
+	gk20a_dbg_fn("done");
+}
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.h b/drivers/gpu/nvgpu/gp106/pmu_gp106.h
new file mode 100644
index 00000000..8fb4c736
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PMU_GP106_H_
+#define __PMU_GP106_H_
+
+void gp106_init_pmu_ops(struct gpu_ops *gops);
+
+#endif /*__PMU_GP106_H_*/
diff --git a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
index 96f02125..84da4b96 100644
--- a/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
+++ b/drivers/gpu/nvgpu/nvgpu_gpuid_t18x.h
@@ -1,7 +1,7 @@
 /*
  * NVIDIA GPU ID functions, definitions.
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,6 +17,10 @@
 
 #define NVGPU_GPUID_GP10B \
 	GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP10B)
+#define NVGPU_GPUID_GP104 \
+	GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP104)
+#define NVGPU_GPUID_GP106 \
+	GK20A_GPUID(NVGPU_GPU_ARCH_GP100, NVGPU_GPU_IMPL_GP106)
 
 #define NVGPU_COMPAT_TEGRA_GP10B "nvidia,tegra186-gp10b"
 #define NVGPU_COMPAT_GENERIC_GP10B "nvidia,generic-gp10b"
@@ -25,8 +29,13 @@
 #define TEGRA_18x_GPUID_HAL gp10b_init_hal
 #define TEGRA_18x_GPU_COMPAT_TEGRA NVGPU_COMPAT_TEGRA_GP10B
 #define TEGRA_18x_GPU_COMPAT_GENERIC NVGPU_COMPAT_GENERIC_GP10B
+#define TEGRA_18x_GPUID2 NVGPU_GPUID_GP104
+#define TEGRA_18x_GPUID2_HAL gp106_init_hal
+#define TEGRA_18x_GPUID3 NVGPU_GPUID_GP106
+#define TEGRA_18x_GPUID3_HAL gp106_init_hal
 struct gpu_ops;
 extern int gp10b_init_hal(struct gk20a *);
+extern int gp106_init_hal(struct gk20a *);
 extern struct gk20a_platform t18x_gpu_tegra_platform;
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-- 
cgit v1.2.2


From 4df844f7fc2bdd795445ca15c78df60e0d53e7ad Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 16 May 2016 11:37:58 +0300
Subject: gpu: nvgpu: gp10b: add PRAMIN support for mem accessors

JIRA DNVGPU-23

Change-Id: I6f4a7018ebeb5c7928667148a52f779ca4938e47
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1148120
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h  | 24 ++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_pram_gp10b.h | 57 +++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_pram_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
index e8f7f3fb..02c06610 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_bus_gp10b.h
@@ -50,6 +50,30 @@
 #ifndef _hw_bus_gp10b_h_
 #define _hw_bus_gp10b_h_
 
+static inline u32 bus_bar0_window_r(void)
+{
+	return 0x00001700;
+}
+static inline u32 bus_bar0_window_base_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline u32 bus_bar0_window_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void)
+{
+	return 0x2000000;
+}
+static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void)
+{
+	return 0x3000000;
+}
+static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void)
+{
+	return 0x00000010;
+}
 static inline u32 bus_bar1_block_r(void)
 {
 	return 0x00001704;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pram_gp10b.h
new file mode 100644
index 00000000..12a83a71
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_pram_gp10b.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_pram_gp10b_h_
+#define _hw_pram_gp10b_h_
+
+static inline u32 pram_data032_r(u32 i)
+{
+	return 0x00700000 + i*4;
+}
+#endif
-- 
cgit v1.2.2


From 140921cdf8c4c27ccf7b9844b2cc23130ba275f9 Mon Sep 17 00:00:00 2001
From: Pritesh Raithatha <praithatha@nvidia.com>
Date: Mon, 9 May 2016 11:47:17 +0530
Subject: gpu: nvgpu: change kernel path

All kernel versions are getting moved inside $TOP/kernel folder.
Changing kernel paths accordingly.

Bug 200190733

Change-Id: If2f4b8fd77da6c1534558ed34763aa1e1e76cbd6
Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
Reviewed-on: http://git-master/r/1143387
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 75329a8d..20ba4b46 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -1,4 +1,4 @@
-nvgpu-t18x := ../../../../kernel-nvgpu-t18x/drivers/gpu/nvgpu
+nvgpu-t18x := ../../../../nvgpu-t18x/drivers/gpu/nvgpu
 
 nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
-- 
cgit v1.2.2


From 5bc7b40524e0cd30ae5a601ed685bc2d470b8d78 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Thu, 19 May 2016 17:24:44 -0700
Subject: gpu: nvgpu: gp10b: SM LRF ECC overcount WAR

SM LRF ECC HW overcounts errors in certain situations. Implement SW WAR
to correct error counts.

Bug 1752609
Bug 1761594

Change-Id: I79047d21e2e44e0fca3ece1da80f02faa4cd6c54
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/1150773
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 105 ++++++++++++++++++++++++----------
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h |   8 +++
 2 files changed, 84 insertions(+), 29 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 3c04c2e4..86cc0555 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -58,6 +58,41 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 	return valid;
 }
 
+static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
+						u32 sed_status,
+						u32 ded_status,
+						u32 *count_to_adjust,
+						u32 opposite_count)
+{
+	u32 over_count = 0;
+
+	sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b();
+	ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b();
+
+	/* One overcount for each partition on which a SBE occurred but not a
+	   DBE (or vice-versa) */
+	if (single_err) {
+		over_count =
+			hweight32(sed_status & ~ded_status);
+	} else {
+		over_count =
+			hweight32(ded_status & ~sed_status);
+	}
+
+	/* If both a SBE and a DBE occur on the same partition, then we have an
+	   overcount for the subpartition if the opposite error counts are
+	   zero. */
+	if ((sed_status & ded_status) && (opposite_count == 0)) {
+		over_count +=
+			hweight32(sed_status & ded_status);
+	}
+
+	if (*count_to_adjust > over_count)
+		*count_to_adjust -= over_count;
+	else
+		*count_to_adjust = 0;
+}
+
 static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 			bool *post_event, struct channel_gk20a *fault_ch)
 {
@@ -65,50 +100,62 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
 	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
 	u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
-	u32 lrf_ecc_status, shm_ecc_status;
+	u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status;
+	u32 lrf_single_count_delta, lrf_double_count_delta;
+	u32 shm_ecc_status;
 
 	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch);
 
 	/* Check for LRF ECC errors. */
         lrf_ecc_status = gk20a_readl(g,
 			gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
-	if ( (lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) {
-
+	lrf_ecc_sed_status = lrf_ecc_status &
+				(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f());
+	lrf_ecc_ded_status = lrf_ecc_status &
+				(gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() |
+				 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f());
+	lrf_single_count_delta =
+		gk20a_readl(g,
+			gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() +
+			offset);
+	lrf_double_count_delta =
+		gk20a_readl(g,
+			gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() +
+			offset);
+	gk20a_writel(g,
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
+		0);
+	gk20a_writel(g,
+		gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
+		0);
+	if (lrf_ecc_sed_status) {
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Single bit error detected in SM LRF!");
 
+		gr_gp10b_sm_lrf_ecc_overcount_war(1,
+						lrf_ecc_sed_status,
+						lrf_ecc_ded_status,
+						&lrf_single_count_delta,
+						lrf_double_count_delta);
 		g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] +=
-			gk20a_readl(g,
-				gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset);
-		gk20a_writel(g,
-			gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
-			0);
+							lrf_single_count_delta;
 	}
-	if ( (lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) ||
-		(lrf_ecc_status &
-		gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) {
-
+	if (lrf_ecc_ded_status) {
 		gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
 			"Double bit error detected in SM LRF!");
 
+		gr_gp10b_sm_lrf_ecc_overcount_war(0,
+						lrf_ecc_sed_status,
+						lrf_ecc_ded_status,
+						&lrf_double_count_delta,
+						lrf_single_count_delta);
 		g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] +=
-			gk20a_readl(g,
-				gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset);
-		gk20a_writel(g,
-			gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
-			0);
+							lrf_double_count_delta;
 	}
 	gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
 			lrf_ecc_status);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index f7fd3b09..12d84716 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -470,6 +470,10 @@ static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(void)
 {
 	return 0x005046b8;
 }
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b(void)
+{
+	return 4;
+}
 static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void)
 {
 	return 0x10;
@@ -486,6 +490,10 @@ static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_
 {
 	return 0x80;
 }
+static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b(void)
+{
+	return 8;
+}
 static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void)
 {
 	return 0x100;
-- 
cgit v1.2.2


From a549165e7332c7618a61fbe65b86bf212901fee2 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 23 May 2016 16:31:45 +0530
Subject: gpu: nvgpu: secure boot HAL update

-And also enable GPCCS load using DMA

Updated/added secure boot HAL with methods
required to support multiple GPU chips.

JIRA DNVGPU-10

Change-Id: Id4546fa74954ba7be7c4544d74ad2b7a31b0ecec
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1151788
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 47 +++++++++++++++++++++++++++++++++----
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h |  5 +++-
 2 files changed, 47 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index fca84116..ab736fbe 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -156,7 +156,8 @@ static void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 		cmd.cmd.acr.boot_falcons.flags = flags;
 		cmd.cmd.acr.boot_falcons.falconidmask =
 				falconidmask;
-		cmd.cmd.acr.boot_falcons.usevamask = 0;
+		cmd.cmd.acr.boot_falcons.usevamask =
+				1 << LSF_FALCON_ID_GPCCS;
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.lo =
 				u64_lo32(g->pmu.wpr_buf.gpu_va);
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.hi =
@@ -171,7 +172,7 @@ static void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 	return;
 }
 
-static int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
+int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
 {
 	u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
 
@@ -221,7 +222,7 @@ static void pmu_handle_gr_param_msg(struct gk20a *g, struct pmu_msg *msg,
 	return;
 }
 
-static int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask)
+int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
@@ -280,7 +281,7 @@ static int gp10b_pmu_setup_elpg(struct gk20a *g)
 	return ret;
 }
 
-static void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
+void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr)
 {
 	gk20a_writel(g, pwr_falcon_dmatrfbase_r(),
 				addr);
@@ -396,12 +397,50 @@ static int send_ecc_overide_en_dis_cmd(struct gk20a *g, u32 bitmask)
 	return status;
 }
 
+static bool gp10b_is_lazy_bootstrap(u32 falcon_id)
+{
+	bool enable_status = false;
+
+	switch (falcon_id) {
+	case LSF_FALCON_ID_FECS:
+		enable_status = false;
+		break;
+	case LSF_FALCON_ID_GPCCS:
+		enable_status = true;
+		break;
+	default:
+		break;
+	}
+
+	return enable_status;
+}
+
+static bool gp10b_is_priv_load(u32 falcon_id)
+{
+	bool enable_status = false;
+
+	switch (falcon_id) {
+	case LSF_FALCON_ID_FECS:
+		enable_status = false;
+		break;
+	case LSF_FALCON_ID_GPCCS:
+		enable_status = false;
+		break;
+	default:
+		break;
+	}
+
+	return enable_status;
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
 		gm20b_init_secure_pmu(gops);
 		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
 		gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode;
+		gops->pmu.is_lazy_bootstrap = gp10b_is_lazy_bootstrap;
+		gops->pmu.is_priv_load = gp10b_is_priv_load;
 	} else {
 		gk20a_init_pmu_ops(gops);
 		gops->pmu.load_lsfalcon_ucode = NULL;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
index f61f6a93..18e7bdd3 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
@@ -1,7 +1,7 @@
 /*
  * GP10B PMU
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,5 +17,8 @@
 #define __PMU_GP10B_H_
 
 void gp10b_init_pmu_ops(struct gpu_ops *gops);
+int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
+int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask);
+void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr);
 
 #endif /*__PMU_GP10B_H_*/
-- 
cgit v1.2.2


From b251b0125a07940fd3417ceec0c2e7c7f4794e30 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Tue, 24 May 2016 15:49:10 +0530
Subject: gpu: nvgpu: Enable ELPG init for gp10b

set can_elpg to true to support ELPG init

Bug N/A

Change-Id: I9bdf264689440ef715cf34a5332d03cb60c5aef7
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1152432
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index a857b838..68179685 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -369,6 +369,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	/* power management configuration */
 	.can_railgate           = true,
 	.enable_elpg            = true,
+	.can_elpg               = true,
 	.enable_blcg		= true,
 	.enable_slcg		= true,
 	.enable_elcg		= true,
-- 
cgit v1.2.2


From 85e67e368b4aa41f9a65c77731623d5ffd7029f5 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 26 May 2016 19:26:45 +0530
Subject: gpu: nvgpu: fix sparse warning

fix below sparse warning :
$TOP/kernel-nvgpu-t18x/drivers/gpu/nvgpu/gp106/pmu_gp106.c:22:5:
warning: symbol 'gp106_pmu_reset' was not declared. Should it be static?

Bug 200088648

Change-Id: I86120fb6b9733f256c96764a77c6ea4bb636934a
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1154452
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/pmu_gp106.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index a9d05730..42ed85ec 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -19,7 +19,7 @@
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
 
-int gp106_pmu_reset(struct gk20a *g)
+static int gp106_pmu_reset(struct gk20a *g)
 {
 	gk20a_dbg_fn("");
 
-- 
cgit v1.2.2


From 642cc7416ebcf0d1e7b813a1cc67d48d58004297 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Mon, 23 May 2016 12:26:46 +0530
Subject: gpu: nvgpu: Add device_info_data support

Added device_info_data parsing
support for pascal GPU series.
This is required
to identify the (Logical CE)
NV_PTOP_DEVICE_INFO_TYPE_ENUM_LCE
instance id.
(example - CE0, CE1, CE2, CE3, ...)

JIRA DNVGPU-26

Change-Id: I35c42cb1d544729e4099db1528c690dd2be025f4
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1151605
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hw_top_gp106.h | 36 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c   | 26 +++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h | 36 ++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
index ed8e0888..bef6b804 100644
--- a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
@@ -158,6 +158,42 @@ static inline u32 top_device_info_entry_enum_v(void)
 {
 	return 0x00000002;
 }
+static inline u32 top_device_info_entry_data_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_data_type_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 top_device_info_data_type_enum2_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_data_inst_id_v(u32 r)
+{
+	return (r >> 26) & 0xf;
+}
+static inline u32 top_device_info_data_pri_base_v(u32 r)
+{
+	return (r >> 12) & 0xfff;
+}
+static inline u32 top_device_info_data_pri_base_align_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 top_device_info_data_fault_id_enum_v(u32 r)
+{
+	return (r >> 3) & 0x1f;
+}
+static inline u32 top_device_info_data_fault_id_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 top_device_info_data_fault_id_valid_v(void)
+{
+	return 0x00000001;
+}
 static inline u32 top_scratch1_r(void)
 {
 	return 0x0002240c;
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 45de221e..89b5527d 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -182,7 +182,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 	return 0;
 }
 
-static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type)
+static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
+					u32 *inst_id)
 {
 	int ret = ENGINE_INVAL_GK20A;
 
@@ -197,6 +198,28 @@ static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type)
 	return ret;
 }
 
+void gp10b_device_info_data_parse(struct gk20a *g, u32 table_entry,
+				u32 *inst_id, u32 *pri_base, u32 *fault_id)
+{
+	if (top_device_info_data_type_v(table_entry) ==
+	    top_device_info_data_type_enum2_v()) {
+		if (inst_id)
+			*inst_id = top_device_info_data_inst_id_v(table_entry);
+		if (pri_base) {
+			*pri_base =
+			    (top_device_info_data_pri_base_v(table_entry)
+			    << top_device_info_data_pri_base_align_v());
+		}
+		if (fault_id && (top_device_info_data_fault_id_v(table_entry) ==
+		    top_device_info_data_fault_id_valid_v())) {
+			*fault_id =
+			     top_device_info_data_fault_id_enum_v(table_entry);
+		}
+	} else
+		gk20a_err(g->dev, "unknown device_info_data %d",
+			top_device_info_data_type_v(table_entry));
+}
+
 void gp10b_init_fifo(struct gpu_ops *gops)
 {
 	gm20b_init_fifo(gops);
@@ -204,4 +227,5 @@ void gp10b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.get_pbdma_signature = gp10b_fifo_get_pbdma_signature;
 	gops->fifo.resetup_ramfc = gp10b_fifo_resetup_ramfc;
 	gops->fifo.engine_enum_from_type = gp10b_fifo_engine_enum_from_type;
+	gops->fifo.device_info_data_parse = gp10b_device_info_data_parse;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
index 5376717f..c6645ca0 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_top_gp10b.h
@@ -186,4 +186,40 @@ static inline u32 top_device_info_entry_engine_type_v(void)
 {
 	return 0x00000002;
 }
+static inline u32 top_device_info_entry_data_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 top_device_info_data_type_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
+static inline u32 top_device_info_data_type_enum2_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 top_device_info_data_inst_id_v(u32 r)
+{
+	return (r >> 26) & 0xf;
+}
+static inline u32 top_device_info_data_pri_base_v(u32 r)
+{
+	return (r >> 12) & 0xfff;
+}
+static inline u32 top_device_info_data_pri_base_align_v(void)
+{
+	return 0x0000000c;
+}
+static inline u32 top_device_info_data_fault_id_enum_v(u32 r)
+{
+	return (r >> 3) & 0x1f;
+}
+static inline u32 top_device_info_data_fault_id_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 top_device_info_data_fault_id_valid_v(void)
+{
+	return 0x00000001;
+}
 #endif
-- 
cgit v1.2.2


From 9564aa4abb5faa5cc46cd66bf7d00d1f457828c1 Mon Sep 17 00:00:00 2001
From: Cory Perry <cperry@nvidia.com>
Date: Thu, 19 May 2016 18:45:22 -0700
Subject: gpu: nvgpu: Fix timeout error in suspend_contexts

* Moving jiffy counter after preemption work to more accurately and fairly give
time for preemption to complete.
* Add debug information to coordinate waiting.
* Check if cilp is still pending before returning the timedout error.

Bug 1700310

Change-Id: Ic16bb3b11f2cd5aea9a5a85b5e0d9927732a065c
Signed-off-by: Cory Perry <cperry@nvidia.com>
Reviewed-on: http://git-master/r/1151907
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 86cc0555..205d2c97 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1953,8 +1953,6 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
 				struct dbg_session_gk20a *dbg_s,
 				int *ctx_resident_ch_fd)
 {
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	bool cilp_preempt_pending = false;
 	struct channel_gk20a *cilp_preempt_pending_ch = NULL;
@@ -2000,6 +1998,12 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
 		struct channel_ctx_gk20a *ch_ctx =
 				&cilp_preempt_pending_ch->ch_ctx;
 		struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+		unsigned long end_jiffies = jiffies +
+			msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
+
+		gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
+			"CILP preempt pending, waiting %lu msecs for preemption",
+			gk20a_get_gr_idle_timeout(g));
 
 		do {
 			if (!gr_ctx->t18x.cilp_preempt_pending)
@@ -2010,7 +2014,9 @@ static int gr_gp10b_suspend_contexts(struct gk20a *g,
 		} while (time_before(jiffies, end_jiffies)
 			|| !tegra_platform_is_silicon());
 
-		err = -ETIMEDOUT;
+		/* If cilp is still pending at this point, timeout */
+		if (gr_ctx->t18x.cilp_preempt_pending)
+			err = -ETIMEDOUT;
 	}
 
 	*ctx_resident_ch_fd = local_ctx_resident_ch_fd;
-- 
cgit v1.2.2


From a334f78461a1d5a840275a3c55d9b5b41eeca699 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 27 May 2016 12:05:59 -0700
Subject: gpu: nvgpu: Force GPCCS priv load

Use priv load for GPCCS instead of DMA.

Bug 200204675

Change-Id: Ic7ea7d9e0ef98330e0bdd7606284b8fb3c5bfec8
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1155281
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index ab736fbe..7832b2ed 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -156,8 +156,7 @@ static void gp10b_pmu_load_multiple_falcons(struct gk20a *g, u32 falconidmask,
 		cmd.cmd.acr.boot_falcons.flags = flags;
 		cmd.cmd.acr.boot_falcons.falconidmask =
 				falconidmask;
-		cmd.cmd.acr.boot_falcons.usevamask =
-				1 << LSF_FALCON_ID_GPCCS;
+		cmd.cmd.acr.boot_falcons.usevamask = 0;
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.lo =
 				u64_lo32(g->pmu.wpr_buf.gpu_va);
 		cmd.cmd.acr.boot_falcons.wprvirtualbase.hi =
@@ -424,7 +423,7 @@ static bool gp10b_is_priv_load(u32 falcon_id)
 		enable_status = false;
 		break;
 	case LSF_FALCON_ID_GPCCS:
-		enable_status = false;
+		enable_status = true;
 		break;
 	default:
 		break;
-- 
cgit v1.2.2


From 2029134634fe5292f56ba049fef30fc85a5bcef0 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Mon, 9 May 2016 15:25:44 -0700
Subject: gpu: nvgpu: vgpu: manage gr_ctx as independent resource

gr_ctx will managed as independent resource in RM server
and vgpu can get a gr_ctx handle.

Bug 1702773

Change-Id: Ifceb44b7d9a1ba03fc2a4df847f4a78ac4c4a0d4
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1144934
(cherry picked from commit 0da3101d9b59fe1f9a47ce7b70b30cb8919f35ac)
Reviewed-on: http://git-master/r/1150707
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 08793e18..3194fff1 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -21,7 +21,7 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 				struct gr_ctx_desc *gr_ctx)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
-	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
 	int err;
 
@@ -30,9 +30,9 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	if (!gr_ctx || !gr_ctx->mem.gpu_va)
 		return;
 
-	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
+	msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
 	msg.handle = platform->virt_handle;
-	p->handle = gr_ctx->virt_ctx;
+	p->gr_ctx_handle = gr_ctx->virt_ctx;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
@@ -53,10 +53,10 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 				u32 flags)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
-	struct tegra_vgpu_cmd_msg msg;
+	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
 			&msg.params.gr_bind_ctxsw_buffers;
-	struct gr_ctx_desc *gr_ctx = *__gr_ctx;
+	struct gr_ctx_desc *gr_ctx;
 	int err;
 
 	gk20a_dbg_fn("");
@@ -68,6 +68,8 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (err)
 		return err;
 
+	gr_ctx = *__gr_ctx;
+
 	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
 		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
 
@@ -161,7 +163,7 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode) {
 		msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS;
 		msg.handle = platform->virt_handle;
-		p->handle = gr_ctx->virt_ctx;
+		p->gr_ctx_handle = gr_ctx->virt_ctx;
 		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 		if (err || msg.ret) {
 			err = -ENOMEM;
-- 
cgit v1.2.2


From 1e67de6e6ea930091bee381ff4a96ae0ca0c76d6 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Mon, 9 May 2016 15:27:36 -0700
Subject: gpu: nvgpu: init tsg HAL ops

Bug 1702773

Change-Id: I9b6e1d0f2f4fe979f6fab83347884bd69413ccda
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1144935
(cherry picked from commit f79eb75272879c869b137cd042312db0a5953412)
Reviewed-on: http://git-master/r/1127031
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index ea5e3f15..a75d2604 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -227,6 +227,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
+	gk20a_init_tsg_ops(gops);
 	gops->name = "gp10b";
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 	gops->get_litter_value = gp10b_get_litter_value;
-- 
cgit v1.2.2


From ba949fd8af16283e9712541ed69eaffcc35ed38c Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Tue, 24 May 2016 16:47:13 -0700
Subject: gpu: nvgpu: gp10b: set floor emc freq to bwmgr

Set emc floor frequency as zero during rail-gate and set max emc
frequency as floor frequency during rail-ungate.

Bug 1770241

Change-Id: Ib6b6ea6c8b04518423126c3ca3600b4afac15180
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1152848
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 68179685..d3ccd98e 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -220,6 +220,13 @@ static bool gp10b_tegra_is_railgated(struct device *dev)
 static int gp10b_tegra_railgate(struct device *dev)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	/* remove emc frequency floor */
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			0, TEGRA_BWMGR_SET_EMC_FLOOR);
 
 	if (tegra_bpmp_running() &&
 	    tegra_powergate_is_powered(TEGRA_POWERGATE_GPU)) {
@@ -237,6 +244,7 @@ static int gp10b_tegra_unrailgate(struct device *dev)
 {
 	int ret = 0;
 	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
 
 	if (tegra_bpmp_running()) {
 		int i;
@@ -246,6 +254,13 @@ static int gp10b_tegra_unrailgate(struct device *dev)
 				clk_prepare_enable(platform->clk[i]);
 		}
 	}
+
+	/* to start with set emc frequency floor to max rate*/
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			tegra_bwmgr_get_max_emc_rate(),
+			TEGRA_BWMGR_SET_EMC_FLOOR);
 	return ret;
 }
 
@@ -407,7 +422,6 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	/* frequency scaling configuration */
 	.prescale = gp10b_tegra_prescale,
 	.postscale = gp10b_tegra_postscale,
-
 	.devfreq_governor = "nvhost_podgov",
 	.qos_id = PM_QOS_GPU_FREQ_MIN,
 
-- 
cgit v1.2.2


From 85f579c6e57bdfcab18b01e6dba5854e5bc308ae Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 31 May 2016 18:09:08 +0530
Subject: gpu: nvgpu: use correct APIs for disable and preempt

In gr_gp10b_set_preemption_mode() and in gp10b_fifo_resetup_ramfc(),
we call channel specific APIs to disable/preempt/enable channel
But we do not consider TSGs in this case

Hence use correct (below) APIs in above function which
will handle channel or TSG internally :
gk20a_disable_channel_tsg()
gk20a_fifo_preempt()
gk20a_enable_channel_tsg()

Bug 200205041

Change-Id: I2369e79b2af3b8a91699044106293865d5f8f260
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1157192
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c |  8 +++-----
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c   | 11 +++++++----
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 89b5527d..aa38dc54 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -156,10 +156,10 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 
 	if (new_syncpt && new_syncpt != old_syncpt) {
 		/* disable channel */
-		c->g->ops.fifo.disable_channel(c);
+		gk20a_disable_channel_tsg(c->g, c);
 
 		/* preempt the channel */
-		WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
+		WARN_ON(gk20a_fifo_preempt(c->g, c));
 
 		v = pbdma_allowed_syncpoints_0_valid_f(1);
 
@@ -173,9 +173,7 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
 	}
 
 	/* enable channel */
-	gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
-		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
-		ccsr_channel_enable_set_true_f());
+	gk20a_enable_channel_tsg(c->g, c);
 
 	gk20a_dbg_fn("done");
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 205d2c97..5db65175 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -2061,18 +2061,21 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 	if (gk20a_mem_begin(g, mem))
 		return -ENOMEM;
 
-	g->ops.fifo.disable_channel(ch);
-	err = g->ops.fifo.preempt_channel(g, ch->hw_chid);
+	err = gk20a_disable_channel_tsg(g, ch);
 	if (err)
 		goto unmap_ctx;
 
+	err = gk20a_fifo_preempt(g, ch);
+	if (err)
+		goto enable_ch;
+
 	if (g->ops.gr.update_ctxsw_preemption_mode) {
 		g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem);
 		g->ops.gr.commit_global_cb_manager(g, ch, true);
 	}
 
-	g->ops.fifo.enable_channel(ch);
-
+enable_ch:
+	gk20a_enable_channel_tsg(g, ch);
 unmap_ctx:
 	gk20a_mem_end(g, mem);
 
-- 
cgit v1.2.2


From 4e321eb1c84dca5f045b6ad1363cdc35ab763462 Mon Sep 17 00:00:00 2001
From: Supriya <ssharatkumar@nvidia.com>
Date: Wed, 23 Mar 2016 20:33:02 +0530
Subject: gpu: nvgpu: Add Fuse prints on PMU Halt

-Print fuse values in case of PMU halt error
-and mailbox reads 0xDEADDEAD

Bug 1737044

Change-Id: Icb9677ca278bd316232e07f1d92980f6deb17125
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/1120988
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h |  8 ++++++++
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c     | 13 +++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
index ae524ce5..2b1acf2f 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fuse_gp10b.h
@@ -134,4 +134,12 @@ static inline u32 fuse_opt_feature_fuses_override_disable_r(void)
 {
 	return 0x000213f0;
 }
+static inline u32 fuse_opt_sec_debug_en_r(void)
+{
+	return 0x00021218;
+}
+static inline u32 fuse_opt_priv_sec_en_r(void)
+{
+	return 0x00021434;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 7832b2ed..f40c1b7b 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -22,6 +22,7 @@
 
 #include "pmu_gp10b.h"
 #include "hw_pwr_gp10b.h"
+#include "hw_fuse_gp10b.h"
 #include "gp10b_sysfs.h"
 
 #define gp10b_dbg_pmu(fmt, arg...) \
@@ -432,6 +433,17 @@ static bool gp10b_is_priv_load(u32 falcon_id)
 	return enable_status;
 }
 
+/*Dump Security related fuses*/
+static void pmu_dump_security_fuses_gp10b(struct gk20a *g)
+{
+	gk20a_err(dev_from_gk20a(g), "FUSE_OPT_SEC_DEBUG_EN_0 : 0x%x",
+			gk20a_readl(g, fuse_opt_sec_debug_en_r()));
+	gk20a_err(dev_from_gk20a(g), "FUSE_OPT_PRIV_SEC_EN_0 : 0x%x",
+			gk20a_readl(g, fuse_opt_priv_sec_en_r()));
+	gk20a_err(dev_from_gk20a(g), "FUSE_GCPLEX_CONFIG_FUSE_0 : 0x%x",
+			tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0));
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
@@ -455,4 +467,5 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
 			send_ecc_overide_en_dis_cmd;
 	gops->pmu.reset = gk20a_pmu_reset;
+	gops->pmu.dump_secure_fuses = pmu_dump_security_fuses_gp10b;
 }
-- 
cgit v1.2.2


From 11e9ba82de18ba3be7701f36628a9f70ebbe1b39 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Wed, 25 May 2016 13:23:52 +0300
Subject: gpu: nvgpu: fix patch write error check in
 update_ctxsw_preemption_mode

Don't attempt to access memory if the patch context can't be mapped, but
print an error message instead.

Change-Id: I374dc94d13674e0bd9d081b790f7c0dac834e868
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1157828
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 5db65175..4a8a1d8e 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1118,6 +1118,11 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
 
 		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"can't map patch context");
+			goto out;
+		}
 
 		addr = (u64_lo32(gr_ctx->t18x.betacb_ctxsw_buffer.gpu_va) >>
 			gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
@@ -1169,6 +1174,7 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
 	}
 
+out:
 	gk20a_dbg_fn("done");
 }
 
-- 
cgit v1.2.2


From 8403bb63000fd9aa35a43fcddc82de2760ad0ac3 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Wed, 25 May 2016 13:23:52 +0300
Subject: gpu: nvgpu: map patch ctx in set_preemption_mode

The per-write map/unmap feature from gr_gk20a_ctx_patch_write_begin() is
dropped, so call begin/end explicitly from gr_gp10b_set_preemption_mode
for the commit_global_cb_manager call.

Change-Id: I7bf952fffb54d4f18706e77dea015ffe4b68bcfe
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1157835
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 4a8a1d8e..339f0413 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -2077,7 +2077,15 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 
 	if (g->ops.gr.update_ctxsw_preemption_mode) {
 		g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem);
+
+		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"can't map patch context");
+			goto enable_ch;
+		}
 		g->ops.gr.commit_global_cb_manager(g, ch, true);
+		gr_gk20a_ctx_patch_write_end(g, ch_ctx);
 	}
 
 enable_ch:
-- 
cgit v1.2.2


From 943be575ccae312ab4cf10e1bdf1a2203d10d689 Mon Sep 17 00:00:00 2001
From: Adeel Raza <araza@nvidia.com>
Date: Fri, 3 Jun 2016 10:46:31 -0700
Subject: gpu: nvgpu: gp10b: clear TEX ECC interrupt

Fix bug in clearing the TEX ECC interrupt.

Bug 200206379

Change-Id: I758b55d20919173de527aeb98143851edcde4eeb
Signed-off-by: Adeel Raza <araza@nvidia.com>
Reviewed-on: http://git-master/r/1158806
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 2 +-
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 339f0413..2ba18410 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -346,7 +346,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
 
 	gk20a_writel(g,
 		     gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
-		     esr);
+		     esr | gr_gpc0_tpc0_tex_m_hww_esr_reset_active_f());
 
 	return ret;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 12d84716..0e47c508 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -3694,6 +3694,10 @@ static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
 {
 	return 0x100;
 }
+static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_reset_active_f(void)
+{
+	return 0x40000000;
+}
 static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
 {
 	return 0x00504648;
-- 
cgit v1.2.2


From c8569f1ebfcdd4546d3674458684c7e1315872a4 Mon Sep 17 00:00:00 2001
From: Seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 6 Jun 2016 10:43:56 -0700
Subject: gpu: nvgpu: remove clockgate_delay param

Removed platform data parameter clockgate_delay, since it is not
really used for gpu clock gating any more.

Change-Id: I4c7148c70699cb5ed24f0b034ddc92bfb4b41887
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1159594
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index d3ccd98e..952b79b5 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -379,7 +379,6 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	/* power management configuration */
 	.railgate_delay		= 500,
-	.clockgate_delay	= 50,
 
 	/* power management configuration */
 	.can_railgate           = true,
-- 
cgit v1.2.2


From 9454529abe0ac42d15df01e36898cd2c840de9c8 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Thu, 2 Jun 2016 09:39:52 +0530
Subject: gpu: nvgpu: Add multiple engine and runlist support

This CL covers the following modification,
1) Added multiple engine_info support
2) Added multiple runlist_info support
3) Initial changes for ASYNC CE support
4) Added ASYNC CE interrupt support for
   Pascal GPU series
5) Removed hard coded engine_id logic and
   made generic way
6) Code cleanup for readability

JIRA DNVGPU-26

Change-Id: Ibf46a89a5308c82f01040ffa979c5014b3206f8e
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1156022
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile              |  2 +-
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |  7 ++-
 drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h  | 81 --------------------------------
 drivers/gpu/nvgpu/gp106/hw_ce_gp106.h   | 81 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hw_proj_gp106.h |  4 ++
 drivers/gpu/nvgpu/gp106/hw_top_gp106.h  |  8 ++++
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c     | 83 ---------------------------------
 drivers/gpu/nvgpu/gp10b/ce2_gp10b.h     | 29 ------------
 drivers/gpu/nvgpu/gp10b/ce_gp10b.c      | 82 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/ce_gp10b.h      | 26 +++++++++++
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c    |  9 ++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c      |  5 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     |  7 ++-
 drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h  | 81 --------------------------------
 drivers/gpu/nvgpu/gp10b/hw_ce_gp10b.h   | 81 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h |  4 ++
 drivers/gpu/nvgpu/gp10b/mc_gp10b.c      | 59 ++++++++++++++++++-----
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c   |  9 ++--
 18 files changed, 360 insertions(+), 298 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_ce_gp106.h
 delete mode 100644 drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
 delete mode 100644 drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/ce_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/gp10b/ce_gp10b.h
 delete mode 100644 drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/gp10b/hw_ce_gp10b.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 20ba4b46..89b9b13a 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -3,7 +3,7 @@ nvgpu-t18x := ../../../../nvgpu-t18x/drivers/gpu/nvgpu
 nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
 	$(nvgpu-t18x)/gp10b/gr_ctx_gp10b.o  \
-	$(nvgpu-t18x)/gp10b/ce2_gp10b.o \
+	$(nvgpu-t18x)/gp10b/ce_gp10b.o \
 	$(nvgpu-t18x)/gp10b/mc_gp10b.o  \
 	$(nvgpu-t18x)/gp10b/fifo_gp10b.o  \
 	$(nvgpu-t18x)/gp10b/ltc_gp10b.o \
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 5c9e012d..1dd16139 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -24,7 +24,7 @@
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
-#include "gp10b/ce2_gp10b.h"
+#include "gp10b/ce_gp10b.h"
 #include "gp10b/fb_gp10b.h"
 #include "gp10b/fifo_gp10b.h"
 #include "gp10b/gp10b_gating_reglist.h"
@@ -149,6 +149,9 @@ static int gp106_get_litter_value(struct gk20a *g,
 	case GPU_LIT_ROP_SHARED_BASE:
 		ret = proj_rop_shared_base_v();
 		break;
+	case GPU_LIT_HOST_NUM_ENGINES:
+		ret = proj_host_num_engines_v();
+		break;
 	case GPU_LIT_HOST_NUM_PBDMA:
 		ret = proj_host_num_pbdma_v();
 		break;
@@ -189,7 +192,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
 	gp10b_init_fifo(gops);
-	gp10b_init_ce2(gops);
+	gp10b_init_ce(gops);
 	gp106_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp106_init_pmu_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
deleted file mode 100644
index d56b930b..00000000
--- a/drivers/gpu/nvgpu/gp106/hw_ce2_gp106.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Function naming determines intended use:
- *
- *     <x>_r(void) : Returns the offset for register <x>.
- *
- *     <x>_o(void) : Returns the offset for element <x>.
- *
- *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
- *
- *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
- *
- *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
- *         and masked to place it at field <y> of register <x>.  This value
- *         can be |'d with others to produce a full register value for
- *         register <x>.
- *
- *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
- *         value can be ~'d and then &'d to clear the value of field <y> for
- *         register <x>.
- *
- *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
- *         to place it at field <y> of register <x>.  This value can be |'d
- *         with others to produce a full register value for <x>.
- *
- *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
- *         <x> value 'r' after being shifted to place its LSB at bit 0.
- *         This value is suitable for direct comparison with other unshifted
- *         values appropriate for use in field <y> of register <x>.
- *
- *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
- *         field <y> of register <x>.  This value is suitable for direct
- *         comparison with unshifted values appropriate for use in field <y>
- *         of register <x>.
- */
-#ifndef _hw_ce2_gp106_h_
-#define _hw_ce2_gp106_h_
-
-static inline u32 ce2_intr_status_r(u32 i)
-{
-	return 0x00104410 + i*128;
-}
-static inline u32 ce2_intr_status_blockpipe_pending_f(void)
-{
-	return 0x1;
-}
-static inline u32 ce2_intr_status_blockpipe_reset_f(void)
-{
-	return 0x1;
-}
-static inline u32 ce2_intr_status_nonblockpipe_pending_f(void)
-{
-	return 0x2;
-}
-static inline u32 ce2_intr_status_nonblockpipe_reset_f(void)
-{
-	return 0x2;
-}
-static inline u32 ce2_intr_status_launcherr_pending_f(void)
-{
-	return 0x4;
-}
-static inline u32 ce2_intr_status_launcherr_reset_f(void)
-{
-	return 0x4;
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_ce_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ce_gp106.h
new file mode 100644
index 00000000..36311136
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_ce_gp106.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ce_gp106_h_
+#define _hw_ce_gp106_h_
+
+static inline u32 ce_intr_status_r(u32 i)
+{
+	return 0x00104410 + i*128;
+}
+static inline u32 ce_intr_status_blockpipe_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce_intr_status_blockpipe_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce_intr_status_nonblockpipe_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce_intr_status_nonblockpipe_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce_intr_status_launcherr_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 ce_intr_status_launcherr_reset_f(void)
+{
+	return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
index 0b4b86b1..01e835ec 100644
--- a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
@@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void)
 {
 	return 0x00001800;
 }
+static inline u32 proj_host_num_engines_v(void)
+{
+	return 0x00000009;
+}
 static inline u32 proj_host_num_pbdma_v(void)
 {
 	return 0x00000004;
diff --git a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
index bef6b804..e833c152 100644
--- a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
@@ -146,6 +146,14 @@ static inline u32 top_device_info_type_enum_copy0_f(void)
 {
 	return 0x4;
 }
+static inline u32 top_device_info_type_enum_lce_v(void)
+{
+	return 0x00000013;
+}
+static inline u32 top_device_info_type_enum_lce_f(void)
+{
+	return 0x4c;
+}
 static inline u32 top_device_info_entry_v(u32 r)
 {
 	return (r >> 0) & 0x3;
diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
deleted file mode 100644
index 4cb13f3b..00000000
--- a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * GK20A Graphics Copy Engine  (gr host)
- *
- * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
-#include "hw_ce2_gp10b.h"
-#include "ce2_gp10b.h"
-
-static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
-{
-	gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
-
-	/* wake theads waiting in this channel */
-	gk20a_channel_semaphore_wakeup(g, true);
-	return ce2_intr_status_nonblockpipe_pending_f();
-}
-
-static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
-{
-	gk20a_dbg(gpu_dbg_intr, "ce2 blocking pipe interrupt\n");
-
-	return ce2_intr_status_blockpipe_pending_f();
-}
-
-static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr)
-{
-	gk20a_dbg(gpu_dbg_intr, "ce2 launch error interrupt\n");
-
-	return ce2_intr_status_launcherr_pending_f();
-}
-
-static void gp10b_ce2_isr(struct gk20a *g)
-{
-	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
-	u32 clear_intr = 0;
-
-	gk20a_dbg(gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr);
-
-	/* clear blocking interrupts: they exibit broken behavior */
-	if (ce2_intr & ce2_intr_status_blockpipe_pending_f())
-		clear_intr |= ce2_blockpipe_isr(g, ce2_intr);
-
-	if (ce2_intr & ce2_intr_status_launcherr_pending_f())
-		clear_intr |= ce2_launcherr_isr(g, ce2_intr);
-
-	gk20a_writel(g, ce2_intr_status_r(0), clear_intr);
-	return;
-}
-
-static void gp10b_ce2_nonstall_isr(struct gk20a *g)
-{
-	u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r(0));
-	u32 clear_intr = 0;
-
-	gk20a_dbg(gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr);
-
-	if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f())
-		clear_intr |= ce2_nonblockpipe_isr(g, ce2_intr);
-
-	gk20a_writel(g, ce2_intr_status_r(0), clear_intr);
-
-	return;
-}
-void gp10b_init_ce2(struct gpu_ops *gops)
-{
-	gops->ce2.isr_stall = gp10b_ce2_isr;
-	gops->ce2.isr_nonstall = gp10b_ce2_nonstall_isr;
-}
diff --git a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h b/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
deleted file mode 100644
index d432d1e0..00000000
--- a/drivers/gpu/nvgpu/gp10b/ce2_gp10b.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * drivers/video/tegra/host/gk20a/fifo_gk20a.h
- *
- * GK20A graphics copy engine (gr host)
- *
- * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#ifndef __CE2_GP10B_H__
-#define __CE2_GP10B_H__
-
-#include "gk20a/channel_gk20a.h"
-#include "gk20a/tsg_gk20a.h"
-
-void gp10b_init_ce2(struct gpu_ops *gops);
-
-#endif /*__CE2_GP10B_H__*/
diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
new file mode 100644
index 00000000..a35c9817
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
@@ -0,0 +1,82 @@
+/*
+ * Pascal GPU series Copy Engine.
+ *
+ * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+
+#include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+#include "hw_ce_gp10b.h"
+#include "ce_gp10b.h"
+
+static u32 ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce non-blocking pipe interrupt\n");
+
+	/* wake theads waiting in this channel */
+	gk20a_channel_semaphore_wakeup(g, true);
+	return ce_intr_status_nonblockpipe_pending_f();
+}
+
+static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce blocking pipe interrupt\n");
+
+	return ce_intr_status_blockpipe_pending_f();
+}
+
+static u32 ce_launcherr_isr(struct gk20a *g, u32 fifo_intr)
+{
+	gk20a_dbg(gpu_dbg_intr, "ce launch error interrupt\n");
+
+	return ce_intr_status_launcherr_pending_f();
+}
+
+static void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+{
+	u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id));
+	u32 clear_intr = 0;
+
+	gk20a_dbg(gpu_dbg_intr, "ce isr %08x %08x\n", ce_intr, inst_id);
+
+	/* clear blocking interrupts: they exibit broken behavior */
+	if (ce_intr & ce_intr_status_blockpipe_pending_f())
+		clear_intr |= ce_blockpipe_isr(g, ce_intr);
+
+	if (ce_intr & ce_intr_status_launcherr_pending_f())
+		clear_intr |= ce_launcherr_isr(g, ce_intr);
+
+	gk20a_writel(g, ce_intr_status_r(inst_id), clear_intr);
+	return;
+}
+
+static void gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
+{
+	u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id));
+	u32 clear_intr = 0;
+
+	gk20a_dbg(gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id);
+
+	if (ce_intr & ce_intr_status_nonblockpipe_pending_f())
+		clear_intr |= ce_nonblockpipe_isr(g, ce_intr);
+
+	gk20a_writel(g, ce_intr_status_r(inst_id), clear_intr);
+
+	return;
+}
+void gp10b_init_ce(struct gpu_ops *gops)
+{
+	gops->ce2.isr_stall = gp10b_ce_isr;
+	gops->ce2.isr_nonstall = gp10b_ce_nonstall_isr;
+}
diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.h b/drivers/gpu/nvgpu/gp10b/ce_gp10b.h
new file mode 100644
index 00000000..948d0454
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ce_gp10b.h
@@ -0,0 +1,26 @@
+/*
+ * Pascal GPU series Copy Engine.
+ *
+ * Copyright (c) 2011-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.
+ */
+#ifndef __CE_GP10B_H__
+#define __CE_GP10B_H__
+
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+
+void gp10b_init_ce(struct gpu_ops *gops);
+
+#endif /*__CE2_GP10B_H__*/
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index aa38dc54..0aa6e29e 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -188,15 +188,17 @@ static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
 	gk20a_dbg_info("engine type %d", engine_type);
 	if (engine_type == top_device_info_type_enum_graphics_v())
 		ret = ENGINE_GR_GK20A;
-	else if (engine_type == top_device_info_type_enum_lce_v())
-		ret = ENGINE_CE2_GK20A;
+	else if (engine_type == top_device_info_type_enum_lce_v()) {
+		/* Default assumptions - all the CE engine have separate runlist */
+		ret = ENGINE_ASYNC_CE_GK20A;
+	}
 	else
 		gk20a_err(g->dev, "unknown engine %d", engine_type);
 
 	return ret;
 }
 
-void gp10b_device_info_data_parse(struct gk20a *g, u32 table_entry,
+static void gp10b_device_info_data_parse(struct gk20a *g, u32 table_entry,
 				u32 *inst_id, u32 *pri_base, u32 *fault_id)
 {
 	if (top_device_info_data_type_v(table_entry) ==
@@ -226,4 +228,5 @@ void gp10b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.resetup_ramfc = gp10b_fifo_resetup_ramfc;
 	gops->fifo.engine_enum_from_type = gp10b_fifo_engine_enum_from_type;
 	gops->fifo.device_info_data_parse = gp10b_device_info_data_parse;
+	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 2ba18410..607fca59 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1182,6 +1182,9 @@ static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
 			   struct gk20a_debug_output *o)
 {
 	struct gr_gk20a *gr = &g->gr;
+	u32 gr_engine_id;
+
+	gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
 
 	gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
 		gk20a_readl(g, gr_status_r()));
@@ -1202,7 +1205,7 @@ static int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
 	gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR  : 0x%x\n",
 		gk20a_readl(g, gr_fecs_intr_r()));
 	gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
-		gk20a_readl(g, fifo_engine_status_r(ENGINE_GR_GK20A)));
+		gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
 		gk20a_readl(g, gr_activity_0_r()));
 	gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a75d2604..b8fffab3 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -25,7 +25,7 @@
 #include "gp10b/mc_gp10b.h"
 #include "gp10b/ltc_gp10b.h"
 #include "gp10b/mm_gp10b.h"
-#include "gp10b/ce2_gp10b.h"
+#include "gp10b/ce_gp10b.h"
 #include "gp10b/fb_gp10b.h"
 #include "gp10b/pmu_gp10b.h"
 #include "gp10b/gr_ctx_gp10b.h"
@@ -150,6 +150,9 @@ static int gp10b_get_litter_value(struct gk20a *g,
 	case GPU_LIT_ROP_SHARED_BASE:
 		ret = proj_rop_shared_base_v();
 		break;
+	case GPU_LIT_HOST_NUM_ENGINES:
+		ret = proj_host_num_engines_v();
+		break;
 	case GPU_LIT_HOST_NUM_PBDMA:
 		ret = proj_host_num_pbdma_v();
 		break;
@@ -219,7 +222,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
 	gp10b_init_fifo(gops);
-	gp10b_init_ce2(gops);
+	gp10b_init_ce(gops);
 	gp10b_init_gr_ctx(gops);
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h
deleted file mode 100644
index b0c35a30..00000000
--- a/drivers/gpu/nvgpu/gp10b/hw_ce2_gp10b.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Function naming determines intended use:
- *
- *     <x>_r(void) : Returns the offset for register <x>.
- *
- *     <x>_o(void) : Returns the offset for element <x>.
- *
- *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
- *
- *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
- *
- *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
- *         and masked to place it at field <y> of register <x>.  This value
- *         can be |'d with others to produce a full register value for
- *         register <x>.
- *
- *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
- *         value can be ~'d and then &'d to clear the value of field <y> for
- *         register <x>.
- *
- *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
- *         to place it at field <y> of register <x>.  This value can be |'d
- *         with others to produce a full register value for <x>.
- *
- *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
- *         <x> value 'r' after being shifted to place its LSB at bit 0.
- *         This value is suitable for direct comparison with other unshifted
- *         values appropriate for use in field <y> of register <x>.
- *
- *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
- *         field <y> of register <x>.  This value is suitable for direct
- *         comparison with unshifted values appropriate for use in field <y>
- *         of register <x>.
- */
-#ifndef _hw_ce2_gp10b_h_
-#define _hw_ce2_gp10b_h_
-
-static inline u32 ce2_intr_status_r(u32 i)
-{
-	return 0x00104410 + i*128;
-}
-static inline u32 ce2_intr_status_blockpipe_pending_f(void)
-{
-	return 0x1;
-}
-static inline u32 ce2_intr_status_blockpipe_reset_f(void)
-{
-	return 0x1;
-}
-static inline u32 ce2_intr_status_nonblockpipe_pending_f(void)
-{
-	return 0x2;
-}
-static inline u32 ce2_intr_status_nonblockpipe_reset_f(void)
-{
-	return 0x2;
-}
-static inline u32 ce2_intr_status_launcherr_pending_f(void)
-{
-	return 0x4;
-}
-static inline u32 ce2_intr_status_launcherr_reset_f(void)
-{
-	return 0x4;
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ce_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ce_gp10b.h
new file mode 100644
index 00000000..3f6e1470
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/hw_ce_gp10b.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_ce_gp10b_h_
+#define _hw_ce_gp10b_h_
+
+static inline u32 ce_intr_status_r(u32 i)
+{
+	return 0x00104410 + i*128;
+}
+static inline u32 ce_intr_status_blockpipe_pending_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce_intr_status_blockpipe_reset_f(void)
+{
+	return 0x1;
+}
+static inline u32 ce_intr_status_nonblockpipe_pending_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce_intr_status_nonblockpipe_reset_f(void)
+{
+	return 0x2;
+}
+static inline u32 ce_intr_status_launcherr_pending_f(void)
+{
+	return 0x4;
+}
+static inline u32 ce_intr_status_launcherr_reset_f(void)
+{
+	return 0x4;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index dedc5a3f..d1a60c29 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -106,6 +106,10 @@ static inline u32 proj_tpc_in_gpc_shared_base_v(void)
 {
 	return 0x00001800;
 }
+static inline u32 proj_host_num_engines_v(void)
+{
+	return 0x00000002;
+}
 static inline u32 proj_host_num_pbdma_v(void)
 {
 	return 0x00000001;
diff --git a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
index 4d9573d1..eda961b6 100644
--- a/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mc_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP20B master
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -101,6 +101,9 @@ irqreturn_t mc_gp10b_isr_nonstall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 {
 	u32 mc_intr_0;
+	u32 engine_id_idx;
+	u32 active_engine_id = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -108,11 +111,26 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 
 	gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
 
-	if (mc_intr_0 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
-		gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
-	if (mc_intr_0 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
-		&& g->ops.ce2.isr_stall)
-		g->ops.ce2.isr_stall(g);
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
+		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
+
+		if (mc_intr_0 & g->fifo.engine_info[active_engine_id].intr_mask) {
+			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+			/* GR Engine */
+			if (engine_enum == ENGINE_GR_GK20A) {
+				gr_gk20a_elpg_protected_call(g, gk20a_gr_isr(g));
+			}
+
+			/* CE Engine */
+			if (((engine_enum == ENGINE_GRCE_GK20A) ||
+				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
+				g->ops.ce2.isr_stall){
+					g->ops.ce2.isr_stall(g,
+					g->fifo.engine_info[active_engine_id].inst_id,
+					g->fifo.engine_info[active_engine_id].pri_base);
+			}
+		}
+	}
 	if (mc_intr_0 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_isr(g);
 	if (mc_intr_0 & mc_intr_pmu_pending_f())
@@ -133,6 +151,9 @@ irqreturn_t mc_gp10b_intr_thread_stall(struct gk20a *g)
 irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 {
 	u32 mc_intr_1;
+	u32 engine_id_idx;
+	u32 active_engine_id = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
 
 	gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
 
@@ -142,13 +163,27 @@ irqreturn_t mc_gp10b_intr_thread_nonstall(struct gk20a *g)
 
 	if (mc_intr_1 & mc_intr_pfifo_pending_f())
 		gk20a_fifo_nonstall_isr(g);
-	if (mc_intr_1 & g->fifo.engine_info[ENGINE_GR_GK20A].intr_mask)
-		gk20a_gr_nonstall_isr(g);
-	if (mc_intr_1 & g->fifo.engine_info[ENGINE_CE2_GK20A].intr_mask
-		&& g->ops.ce2.isr_nonstall)
-		g->ops.ce2.isr_nonstall(g);
-
 
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
+		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
+
+		if (mc_intr_1 & g->fifo.engine_info[active_engine_id].intr_mask) {
+			engine_enum = g->fifo.engine_info[active_engine_id].engine_enum;
+			/* GR Engine */
+			if (engine_enum == ENGINE_GR_GK20A) {
+				gk20a_gr_nonstall_isr(g);
+			}
+
+			/* CE Engine */
+			if (((engine_enum == ENGINE_GRCE_GK20A) ||
+				(engine_enum == ENGINE_ASYNC_CE_GK20A)) &&
+				g->ops.ce2.isr_nonstall) {
+					g->ops.ce2.isr_nonstall(g,
+					g->fifo.engine_info[active_engine_id].inst_id,
+					g->fifo.engine_info[active_engine_id].pri_base);
+			}
+		}
+	}
 
 	gk20a_writel(g, mc_intr_en_set_r(NVGPU_MC_INTR_NONSTALLING),
 			g->ops.mc.intr_mask_restore[NVGPU_MC_INTR_NONSTALLING]);
diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
index 5763b3b1..63efc945 100644
--- a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -82,13 +82,16 @@ static int gp10b_update_therm_gate_ctrl(struct gk20a *g)
 {
 	u32 gate_ctrl;
 	u32 engine_id;
+	u32 active_engine_id = 0;
+	struct fifo_gk20a *f = &g->fifo;
 
-	for (engine_id = 0; engine_id < ENGINE_INVAL_GK20A; engine_id++) {
-		gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine_id));
+	for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
+		active_engine_id = f->active_engines_list[engine_id];
+		gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(active_engine_id));
 		gate_ctrl = set_field(gate_ctrl,
 			therm_gate_ctrl_eng_delay_before_m(),
 			therm_gate_ctrl_eng_delay_before_f(4));
-		gk20a_writel(g, therm_gate_ctrl_r(engine_id), gate_ctrl);
+		gk20a_writel(g, therm_gate_ctrl_r(active_engine_id), gate_ctrl);
 	}
 
 	return 0;
-- 
cgit v1.2.2


From 8a2be379cd25f2096ca63a50893688b8cf538691 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Tue, 31 May 2016 10:31:27 +0300
Subject: gpu: nvgpu: detect vidmem configuration from HW

Read video memory size from hardware during initialization for devices
that support it.

JIRA DNVGPU-14

Change-Id: I84e1bca0eaac8dc204e1fb82628acc6b52c3e5cc
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1157212
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile              |  1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |  3 +-
 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h   | 16 +++++++++
 drivers/gpu/nvgpu/gp106/hw_fbpa_gp106.h | 61 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hw_top_gp106.h  |  8 +++++
 drivers/gpu/nvgpu/gp106/mm_gp106.c      | 39 +++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/mm_gp106.h      | 23 +++++++++++++
 7 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_fbpa_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/mm_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/mm_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 89b9b13a..7e4c259b 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -20,6 +20,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o \
 	$(nvgpu-t18x)/gp10b/gp10b.o \
 	$(nvgpu-t18x)/gp106/hal_gp106.o \
+	$(nvgpu-t18x)/gp106/mm_gp106.o \
 	$(nvgpu-t18x)/gp106/pmu_gp106.o \
 	$(nvgpu-t18x)/gp106/gr_gp106.o \
 	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1dd16139..7ef72d72 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -39,6 +39,7 @@
 #include "gm20b/pmu_gm20b.h"
 #include "gm20b/clk_gm20b.h"
 
+#include "gp106/mm_gp106.h"
 #include "gp106/pmu_gp106.h"
 #include "gp106/gr_ctx_gp106.h"
 #include "gp106/gr_gp106.h"
@@ -194,7 +195,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_fifo(gops);
 	gp10b_init_ce(gops);
 	gp106_init_gr_ctx(gops);
-	gp10b_init_mm(gops);
+	gp106_init_mm(gops);
 	gp106_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
 	gp10b_init_regops(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
index 42d32ab3..1ab876cd 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
@@ -486,4 +486,20 @@ static inline u32 fb_niso_flush_sysmem_addr_r(void)
 {
 	return 0x00100c10;
 }
+static inline u32 fb_mmu_local_memory_range_r(void)
+{
+	return 0x00100ce0;
+}
+static inline u32 fb_mmu_local_memory_range_lower_scale_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 fb_mmu_local_memory_range_lower_mag_v(u32 r)
+{
+	return (r >> 4) & 0x3f;
+}
+static inline u32 fb_mmu_local_memory_range_ecc_mode_v(u32 r)
+{
+	return (r >> 30) & 0x1;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_fbpa_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fbpa_gp106.h
new file mode 100644
index 00000000..7f02eeb6
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_fbpa_gp106.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_fbpa_gp106_h_
+#define _hw_fbpa_gp106_h_
+
+static inline u32 fbpa_cstatus_r(void)
+{
+	return 0x009a020c;
+}
+static inline u32 fbpa_cstatus_ramamount_v(u32 r)
+{
+	return (r >> 0) & 0x1ffff;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
index e833c152..85350954 100644
--- a/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_top_gp106.h
@@ -74,6 +74,14 @@ static inline u32 top_num_fbps_value_v(u32 r)
 {
 	return (r >> 0) & 0x1f;
 }
+static inline u32 top_num_fbpas_r(void)
+{
+	return 0x0002243c;
+}
+static inline u32 top_num_fbpas_value_v(u32 r)
+{
+	return (r >> 0) & 0x1f;
+}
 static inline u32 top_ltc_per_fbp_r(void)
 {
 	return 0x00022450;
diff --git a/drivers/gpu/nvgpu/gp106/mm_gp106.c b/drivers/gpu/nvgpu/gp106/mm_gp106.c
new file mode 100644
index 00000000..1a4b6dd1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/mm_gp106.c
@@ -0,0 +1,39 @@
+/*
+ * GP106 memory management
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gp10b/mm_gp10b.h"
+
+#include "hw_fb_gp106.h"
+
+static size_t gp106_mm_get_vidmem_size(struct gk20a *g)
+{
+	u32 range = gk20a_readl(g, fb_mmu_local_memory_range_r());
+	u32 mag = fb_mmu_local_memory_range_lower_mag_v(range);
+	u32 scale = fb_mmu_local_memory_range_lower_scale_v(range);
+	u32 ecc = fb_mmu_local_memory_range_ecc_mode_v(range);
+	size_t bytes = ((size_t)mag << scale) * SZ_1M;
+
+	if (ecc)
+		bytes = bytes / 16 * 15;
+
+	return bytes;
+}
+
+void gp106_init_mm(struct gpu_ops *gops)
+{
+	gp10b_init_mm(gops);
+	gops->mm.get_vidmem_size = gp106_mm_get_vidmem_size;
+}
diff --git a/drivers/gpu/nvgpu/gp106/mm_gp106.h b/drivers/gpu/nvgpu/gp106/mm_gp106.h
new file mode 100644
index 00000000..36a89a11
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/mm_gp106.h
@@ -0,0 +1,23 @@
+/*
+ * GP106 memory management
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef MM_GP106_H
+#define MM_GP106_H
+
+struct gpu_ops;
+
+void gp106_init_mm(struct gpu_ops *gops);
+
+#endif
-- 
cgit v1.2.2


From 4ded050bd04b8c380f350631745919b674deb16f Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 9 Jun 2016 09:41:04 +0300
Subject: gpu: nvgpu: include matching header in mm_gp106.c

Include mm_gp106.h in mm_gp106.c to bring function declarations visible
and to fix a Sparse warning.

Bug 200088648

Change-Id: Id76f565021de585bc02a53a01e52084ff70009c2
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1161607
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/mm_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/mm_gp106.c b/drivers/gpu/nvgpu/gp106/mm_gp106.c
index 1a4b6dd1..37365022 100644
--- a/drivers/gpu/nvgpu/gp106/mm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/mm_gp106.c
@@ -15,6 +15,7 @@
 
 #include "gk20a/gk20a.h"
 #include "gp10b/mm_gp10b.h"
+#include "gp106/mm_gp106.h"
 
 #include "hw_fb_gp106.h"
 
-- 
cgit v1.2.2


From 24a6dee36ea943d9f7b00cbbebcc0798ed0ee0fc Mon Sep 17 00:00:00 2001
From: Krishna Reddy <vdumpa@nvidia.com>
Date: Wed, 8 Jun 2016 23:05:00 -0700
Subject: Revert "gpu: nvgpu: register to nvhost for debug dump"

This reverts commit fe3adf3d0a72f936788b98365557783b53ecb6ed.

This revert is fixing the Vulkan 1.0.1 CTS failures.

Bug 200196104

Change-Id: I8cc90ac9dc3d29a08341f37e83277a0b431e2187
Signed-off-by: Krishna Reddy <vdumpa@nvidia.com>
Reviewed-on: http://git-master/r/1161577
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 952b79b5..b9f50817 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -135,11 +135,6 @@ static int gp10b_tegra_probe(struct device *dev)
 	}
 
 	platform->g->host1x_dev = host1x_pdev;
-	if (platform->g->host1x_dev)
-		nvhost_register_dump_device(platform->g->host1x_dev,
-					gk20a_debug_dump_device,
-					platform->g->dev);
-
 	platform->bypass_smmu = !device_is_iommuable(dev);
 	platform->disable_bigpage = platform->bypass_smmu;
 
@@ -189,11 +184,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
 
 static int gp10b_tegra_remove(struct device *dev)
 {
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-
-	if (platform->g->host1x_dev)
-		nvhost_unregister_dump_device(platform->g->host1x_dev);
-
 	/* remove gk20a power subdomain from host1x */
 	nvhost_unregister_client_domain(dev_to_genpd(dev));
 	gr_gp10b_remove_sysfs(dev);
-- 
cgit v1.2.2


From 7b43eac2bc1e9e5946f1c721686f841af0550aef Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 8 Jun 2016 17:22:21 +0530
Subject: gpu: nvgpu: ACR interface update for GP104/GP106

JIRA DNVGPU-34

Change-Id: Ieb8e73451a5d73480b8d9e29e78b1a273b17d796
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1161120
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/acr_t18x.h        |  20 ++++++
 drivers/gpu/nvgpu/gp106/acr_gp106.h | 121 ++++++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/acr_t18x.h
 create mode 100644 drivers/gpu/nvgpu/gp106/acr_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/acr_t18x.h b/drivers/gpu/nvgpu/acr_t18x.h
new file mode 100644
index 00000000..1e48d5ca
--- /dev/null
+++ b/drivers/gpu/nvgpu/acr_t18x.h
@@ -0,0 +1,20 @@
+/*
+ * NVIDIA T18x ACR
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _NVGPU_ACR_T18X_H_
+#define _NVGPU_ACR_T18X_H_
+
+#include "gp106/acr_gp106.h"
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.h b/drivers/gpu/nvgpu/gp106/acr_gp106.h
new file mode 100644
index 00000000..26e68cd7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ACR_GP106_H_
+#define __ACR_GP106_H_
+
+#include "gm20b/acr_gm20b.h"
+#include "gm206/acr_gm206.h"
+
+struct lsf_ucode_desc_v1 {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+	u32 bsupports_versioning;
+	u32 version;
+	u32 dep_map_count;
+	u8  dep_map[LSF_FALCON_ID_END * 2 * 4];
+	u8  kdf[16];
+};
+
+struct lsf_wpr_header_v1 {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+	u32  lazy_bootstrap;
+	u32  bin_version;
+	u32  status;
+};
+
+struct lsf_lsb_header_v1 {
+	struct lsf_ucode_desc_v1 signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+};
+
+struct flcn_ucode_img_v1 {
+	u32 *header; /*only some falcons have header*/
+	u32 *data;
+	struct pmu_ucode_desc_v1 *desc;  /*only some falcons have descriptor*/
+	u32 data_size;
+	void *fw_ver; /*NV2080_CTRL_GPU_GET_FIRMWARE_VERSION_PARAMS struct*/
+	u8 load_entire_os_data; /* load the whole osData section at boot time.*/
+	struct lsf_ucode_desc_v1 *lsf_desc; /* NULL if not a light secure falcon.*/
+	u8 free_res_allocs;/*True if there a resources to freed by the client.*/
+	u32 flcn_inst;
+};
+
+struct lsfm_managed_ucode_img_v2 {
+	struct lsfm_managed_ucode_img_v2 *next;
+	struct lsf_wpr_header_v1 wpr_header;
+	struct lsf_lsb_header_v1 lsb_header;
+	union flcn_bl_generic_desc_v1 bl_gen_desc;
+	u32 bl_gen_desc_size;
+	u32 full_ucode_size;
+	struct flcn_ucode_img_v1 ucode_img;
+};
+struct ls_flcn_mgr_v1 {
+	u16 managed_flcn_cnt;
+	u32 wpr_size;
+	u32 disable_mask;
+	struct lsfm_managed_ucode_img_v2 *ucode_img_list;
+	void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
+};
+
+struct flcn_acr_region_prop_v1 {
+	u32   start_addr;
+	u32   end_addr;
+	u32   region_id;
+	u32   read_mask;
+	u32   write_mask;
+	u32   client_mask;
+	u32   shadowmMem_startaddress;
+};
+
+/*!
+ * no_regions   - Number of regions used.
+ * region_props   - Region properties
+ */
+struct flcn_acr_regions_v1 {
+	u32                     no_regions;
+	struct flcn_acr_region_prop_v1   region_props[T210_FLCN_ACR_MAX_REGIONS];
+};
+
+struct flcn_acr_desc_v1 {
+	union {
+		u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
+	} ucode_reserved_space;
+	u32 signatures[4];
+	/*Always 1st*/
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+	struct flcn_acr_regions_v1 regions;
+	u32 nonwpr_ucode_blob_size;
+	u64 nonwpr_ucode_blob_start;
+	u32 dummy[4];  //ACR_BSI_VPR_DESC
+};
+
+#endif /*__PMU_GP106_H_*/
-- 
cgit v1.2.2


From 6ed3cffb73488b22d671c88d30061cd045417378 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 8 Jun 2016 17:27:49 +0530
Subject: gpu: nvgpu: ACR boot on SEC2

ACR/SEC2 methods to support ACR boot
SEC2 falcon

JIRA DNVGPU-34

Change-Id: I917be1d6c61a1c1ae61a918f50228ea00492cd50
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1161122
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c     | 1131 +++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/acr_gp106.h     |    2 +
 drivers/gpu/nvgpu/gp106/hw_psec_gp106.h |  609 +++++++++++++++++
 drivers/gpu/nvgpu/gp106/sec2_gp106.c    |  384 +++++++++++
 drivers/gpu/nvgpu/gp106/sec2_gp106.h    |   29 +
 5 files changed, 2155 insertions(+)
 create mode 100644 drivers/gpu/nvgpu/gp106/acr_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_psec_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/sec2_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/sec2_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
new file mode 100644
index 00000000..2ea2f817
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -0,0 +1,1131 @@
+/*
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/delay.h>	/* for mdelay */
+#include <linux/firmware.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gk20a/semaphore_gk20a.h"
+#include "gm20b/hw_pwr_gm20b.h"
+#include "gm206/acr_gm206.h"
+#include "gm20b/acr_gm20b.h"
+#include "gm206/pmu_gm206.h"
+#include "sec2_gp106.h"
+
+/*Defines*/
+#define gp106_dbg_pmu(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+
+typedef int (*get_ucode_details)(struct gk20a *g,
+		struct flcn_ucode_img_v1 *udata);
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+#define GP106_DGPU_NONWPR 0x10000000 /* start from 256MB location at VIDMEM */
+#define GP106_DGPU_WPR 0x20000000
+#define DGPU_WPR_SIZE 0x100000
+
+/*Externs*/
+
+/*Forwards*/
+static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img);
+static int fecs_ucode_details(struct gk20a *g,
+		struct flcn_ucode_img_v1 *p_img);
+static int gpccs_ucode_details(struct gk20a *g,
+		struct flcn_ucode_img_v1 *p_img);
+static int gp106_bootstrap_hs_flcn(struct gk20a *g);
+
+static int lsfm_discover_ucode_images(struct gk20a *g,
+	struct ls_flcn_mgr_v1 *plsfm);
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr_v1 *plsfm,
+	struct flcn_ucode_img_v1 *ucode_image, u32 falcon_id);
+static void lsfm_free_ucode_img_res(struct flcn_ucode_img_v1 *p_img);
+static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img_v1 *p_img);
+static int lsf_gen_wpr_requirements(struct gk20a *g,
+		struct ls_flcn_mgr_v1 *plsfm);
+static void lsfm_init_wpr_contents(struct gk20a *g,
+		struct ls_flcn_mgr_v1 *plsfm, struct mem_desc *nonwpr);
+static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr_v1 *plsfm);
+static int gp106_pmu_populate_loader_cfg(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size);
+static int gp106_flcn_populate_bl_dmem_desc(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid);
+static int gp106_prepare_ucode_blob(struct gk20a *g);
+
+/*Globals*/
+static get_ucode_details pmu_acr_supp_ucode_list[] = {
+	pmu_ucode_details,
+	fecs_ucode_details,
+	gpccs_ucode_details,
+};
+
+void gp106_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
+{
+	inf->wpr_base = GP106_DGPU_WPR;
+	inf->nonwpr_base = GP106_DGPU_NONWPR;
+	inf->size = DGPU_WPR_SIZE;
+}
+
+static void flcn64_set_dma(struct falc_u64 *dma_addr, u64 value)
+{
+	dma_addr->lo |= u64_lo32(value);
+	dma_addr->hi |= u64_hi32(value);
+}
+
+int gp106_alloc_blob_space(struct gk20a *g,
+		size_t size, struct mem_desc *mem)
+{
+	int err = 0;
+	struct wpr_carveout_info wpr_inf;
+
+	g->ops.pmu.get_wpr(g, &wpr_inf);
+
+	mem->aperture = APERTURE_VIDMEM;
+	mem->sgt = kzalloc(sizeof(*mem->sgt), GFP_KERNEL);
+	if (!mem->sgt) {
+		gk20a_err(dev_from_gk20a(g), "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "failed to allocate sg_table\n");
+		goto free_sgt;
+	}
+
+	sg_dma_address(mem->sgt->sgl) = wpr_inf.nonwpr_base;
+
+	return err;
+
+free_sgt:
+	gk20a_free_sgtable(&mem->sgt);
+	return err;
+}
+
+void gp106_init_secure_pmu(struct gpu_ops *gops)
+{
+	gops->pmu.prepare_ucode = gp106_prepare_ucode_blob;
+	gops->pmu.pmu_setup_hw_and_bootstrap = gp106_bootstrap_hs_flcn;
+	gops->pmu.is_lazy_bootstrap = gm206_is_lazy_bootstrap;
+	gops->pmu.is_priv_load = gm206_is_priv_load;
+	gops->pmu.get_wpr = gp106_wpr_info;
+	gops->pmu.alloc_blob_space = gp106_alloc_blob_space;
+	gops->pmu.pmu_populate_loader_cfg = gp106_pmu_populate_loader_cfg;
+	gops->pmu.flcn_populate_bl_dmem_desc = gp106_flcn_populate_bl_dmem_desc;
+	gops->pmu.falcon_wait_for_halt = sec2_wait_for_halt;
+	gops->pmu.falcon_clear_halt_interrupt_status =
+			sec2_clear_halt_interrupt_status;
+	gops->pmu.init_falcon_setup_hw = init_sec2_setup_hw1;
+}
+/* TODO - check if any free blob res needed*/
+
+int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+{
+	const struct firmware *pmu_fw, *pmu_desc, *pmu_sig;
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct lsf_ucode_desc_v1 *lsf_desc;
+	int err;
+
+	gp106_dbg_pmu("requesting PMU ucode in gp106\n");
+	pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
+	if (!pmu_fw) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
+		return -ENOENT;
+	}
+	g->acr.pmu_fw = pmu_fw;
+	gp106_dbg_pmu("Loaded PMU ucode in for blob preparation");
+
+	gp106_dbg_pmu("requesting PMU ucode desc in GM20B\n");
+	pmu_desc = gk20a_request_firmware(g, GM20B_PMU_UCODE_DESC);
+	if (!pmu_desc) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode desc!!");
+		err = -ENOENT;
+		goto release_img_fw;
+	}
+	pmu_sig = gk20a_request_firmware(g, GM20B_PMU_UCODE_SIG);
+	if (!pmu_sig) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu sig!!");
+		err = -ENOENT;
+		goto release_desc;
+	}
+	pmu->desc_v1 = (struct pmu_ucode_desc_v1 *)pmu_desc->data;
+	pmu->ucode_image = (u32 *)pmu_fw->data;
+	g->acr.pmu_desc = pmu_desc;
+
+	err = gk20a_init_pmu(pmu);
+	if (err) {
+		gp106_dbg_pmu("failed to set function pointers\n");
+		goto release_desc;
+	}
+
+	lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc_v1), GFP_KERNEL);
+	if (!lsf_desc) {
+		err = -ENOMEM;
+		goto release_sig;
+	}
+	memcpy(lsf_desc, (void *)pmu_sig->data, sizeof(struct lsf_ucode_desc_v1));
+	lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
+
+	p_img->desc = pmu->desc_v1;
+	p_img->data = pmu->ucode_image;
+	p_img->data_size = pmu->desc_v1->app_start_offset
+						+ pmu->desc_v1->app_size;
+	p_img->fw_ver = NULL;
+	p_img->header = NULL;
+	p_img->lsf_desc = (struct lsf_ucode_desc_v1 *)lsf_desc;
+	gp106_dbg_pmu("requesting PMU ucode in GM20B exit\n");
+
+	release_firmware(pmu_sig);
+	return 0;
+release_sig:
+	release_firmware(pmu_sig);
+release_desc:
+	release_firmware(pmu_desc);
+release_img_fw:
+	release_firmware(pmu_fw);
+	return err;
+}
+
+int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+{
+	struct lsf_ucode_desc_v1 *lsf_desc;
+	const struct firmware *fecs_sig;
+	int err;
+
+	fecs_sig = gk20a_request_firmware(g, GM20B_FECS_UCODE_SIG);
+	if (!fecs_sig) {
+		gk20a_err(dev_from_gk20a(g), "failed to load fecs sig");
+		return -ENOENT;
+	}
+	lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc_v1), GFP_KERNEL);
+	if (!lsf_desc) {
+		err = -ENOMEM;
+		goto rel_sig;
+	}
+	memcpy(lsf_desc, (void *)fecs_sig->data, sizeof(struct lsf_ucode_desc_v1));
+	lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
+
+	p_img->desc = kzalloc(sizeof(struct pmu_ucode_desc_v1), GFP_KERNEL);
+	if (p_img->desc == NULL) {
+		err = -ENOMEM;
+		goto free_lsf_desc;
+	}
+
+	p_img->desc->bootloader_start_offset =
+		g->ctxsw_ucode_info.fecs.boot.offset;
+	p_img->desc->bootloader_size =
+		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
+	p_img->desc->bootloader_imem_offset =
+		g->ctxsw_ucode_info.fecs.boot_imem_offset;
+	p_img->desc->bootloader_entry_point =
+		g->ctxsw_ucode_info.fecs.boot_entry;
+
+	p_img->desc->image_size =
+		ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
+		ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
+		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
+	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
+		ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
+	p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
+	p_img->desc->app_imem_offset = 0;
+	p_img->desc->app_imem_entry = 0;
+	p_img->desc->app_dmem_offset = 0;
+	p_img->desc->app_resident_code_offset = 0;
+	p_img->desc->app_resident_code_size =
+		g->ctxsw_ucode_info.fecs.code.size;
+	p_img->desc->app_resident_data_offset =
+		g->ctxsw_ucode_info.fecs.data.offset -
+		g->ctxsw_ucode_info.fecs.code.offset;
+	p_img->desc->app_resident_data_size =
+		g->ctxsw_ucode_info.fecs.data.size;
+	p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
+	p_img->data_size = p_img->desc->image_size;
+
+	p_img->fw_ver = NULL;
+	p_img->header = NULL;
+	p_img->lsf_desc = (struct lsf_ucode_desc_v1 *)lsf_desc;
+	gp106_dbg_pmu("fecs fw loaded\n");
+	release_firmware(fecs_sig);
+	return 0;
+free_lsf_desc:
+	kfree(lsf_desc);
+rel_sig:
+	release_firmware(fecs_sig);
+	return err;
+}
+int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+{
+	struct lsf_ucode_desc_v1 *lsf_desc;
+	const struct firmware *gpccs_sig;
+	int err;
+
+	if (g->ops.securegpccs == false)
+		return -ENOENT;
+
+	gpccs_sig = gk20a_request_firmware(g, T18x_GPCCS_UCODE_SIG);
+	if (!gpccs_sig) {
+		gk20a_err(dev_from_gk20a(g), "failed to load gpccs sig");
+		return -ENOENT;
+	}
+	lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc_v1), GFP_KERNEL);
+	if (!lsf_desc) {
+		err = -ENOMEM;
+		goto rel_sig;
+	}
+	memcpy(lsf_desc, (void *)gpccs_sig->data,
+		sizeof(struct lsf_ucode_desc_v1));
+	lsf_desc->falcon_id = LSF_FALCON_ID_GPCCS;
+
+	p_img->desc = kzalloc(sizeof(struct pmu_ucode_desc_v1), GFP_KERNEL);
+	if (p_img->desc == NULL) {
+		err = -ENOMEM;
+		goto free_lsf_desc;
+	}
+
+	p_img->desc->bootloader_start_offset =
+		0;
+	p_img->desc->bootloader_size =
+		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256);
+	p_img->desc->bootloader_imem_offset =
+		g->ctxsw_ucode_info.gpccs.boot_imem_offset;
+	p_img->desc->bootloader_entry_point =
+		g->ctxsw_ucode_info.gpccs.boot_entry;
+
+	p_img->desc->image_size =
+		ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) +
+		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) +
+		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+	p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256)
+		+ ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+	p_img->desc->app_start_offset = p_img->desc->bootloader_size;
+	p_img->desc->app_imem_offset = 0;
+	p_img->desc->app_imem_entry = 0;
+	p_img->desc->app_dmem_offset = 0;
+	p_img->desc->app_resident_code_offset = 0;
+	p_img->desc->app_resident_code_size =
+		ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256);
+	p_img->desc->app_resident_data_offset =
+		ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) -
+		ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256);
+	p_img->desc->app_resident_data_size =
+		ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+	p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va +
+		g->ctxsw_ucode_info.gpccs.boot.offset);
+	p_img->data_size = ALIGN(p_img->desc->image_size, 256);
+	p_img->fw_ver = NULL;
+	p_img->header = NULL;
+	p_img->lsf_desc = (struct lsf_ucode_desc_v1 *)lsf_desc;
+	gp106_dbg_pmu("gpccs fw loaded\n");
+	release_firmware(gpccs_sig);
+	return 0;
+free_lsf_desc:
+	kfree(lsf_desc);
+rel_sig:
+	release_firmware(gpccs_sig);
+	return err;
+}
+
+int gp106_prepare_ucode_blob(struct gk20a *g)
+{
+
+	int err;
+	struct ls_flcn_mgr_v1 lsfm_l, *plsfm;
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct wpr_carveout_info wpr_inf;
+
+	if (g->acr.ucode_blob.cpu_va) {
+		/*Recovery case, we do not need to form
+		non WPR blob of ucodes*/
+		err = gk20a_init_pmu(pmu);
+		if (err) {
+			gp106_dbg_pmu("failed to set function pointers\n");
+			return err;
+		}
+		return 0;
+	}
+	plsfm = &lsfm_l;
+	memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v1));
+	gp106_dbg_pmu("fetching GMMU regs\n");
+	gm20b_mm_mmu_vpr_info_fetch(g);
+	gr_gk20a_init_ctxsw_ucode(g);
+
+	if (g->ops.fb.dump_vpr_wpr_info)
+		g->ops.fb.dump_vpr_wpr_info(g);
+
+	g->ops.pmu.get_wpr(g, &wpr_inf);
+	gp106_dbg_pmu("wpr carveout base:%llx\n", (wpr_inf.wpr_base));
+	gp106_dbg_pmu("wpr carveout size :%x\n", (u32)wpr_inf.size);
+
+	/* Discover all managed falcons*/
+	err = lsfm_discover_ucode_images(g, plsfm);
+	gp106_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
+	if (err)
+		goto exit_err;
+
+	if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
+		/* Generate WPR requirements*/
+		err = lsf_gen_wpr_requirements(g, plsfm);
+		if (err)
+			goto exit_err;
+
+		/*Alloc memory to hold ucode blob contents*/
+		err = g->ops.pmu.alloc_blob_space(g, plsfm->wpr_size
+							,&g->acr.ucode_blob);
+		if (err)
+			goto exit_err;
+
+		gp106_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
+			plsfm->managed_flcn_cnt, plsfm->wpr_size);
+
+		lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
+	} else {
+		gp106_dbg_pmu("LSFM is managing no falcons.\n");
+	}
+	gp106_dbg_pmu("prepare ucode blob return 0\n");
+	free_acr_resources(g, plsfm);
+
+ exit_err:
+	return err;
+}
+
+static u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr_v1 *plsfm,
+	u32 falcon_id)
+{
+	return (plsfm->disable_mask >> falcon_id) & 0x1;
+}
+
+/* Discover all managed falcon ucode images */
+static int lsfm_discover_ucode_images(struct gk20a *g,
+	struct ls_flcn_mgr_v1 *plsfm)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct flcn_ucode_img_v1 ucode_img;
+	u32 falcon_id;
+	u32 i;
+	int status;
+
+	/* LSFM requires a secure PMU, discover it first.*/
+	/* Obtain the PMU ucode image and add it to the list if required*/
+	memset(&ucode_img, 0, sizeof(ucode_img));
+	status = pmu_ucode_details(g, &ucode_img);
+	if (status == 0) {
+		if (ucode_img.lsf_desc != NULL) {
+			/* The falon_id is formed by grabbing the static base
+			 * falon_id from the image and adding the
+			 * engine-designated falcon instance.*/
+			pmu->pmu_mode |= PMU_SECURE_MODE;
+			falcon_id = ucode_img.lsf_desc->falcon_id +
+				ucode_img.flcn_inst;
+
+			if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
+				pmu->falcon_id = falcon_id;
+				if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
+					pmu->falcon_id) == 0)
+					pmu->pmu_mode |= PMU_LSFM_MANAGED;
+
+				plsfm->managed_flcn_cnt++;
+			} else {
+				gp106_dbg_pmu("id not managed %d\n",
+					ucode_img.lsf_desc->falcon_id);
+			}
+		}
+
+		/*Free any ucode image resources if not managing this falcon*/
+		if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
+			gp106_dbg_pmu("pmu is not LSFM managed\n");
+			lsfm_free_ucode_img_res(&ucode_img);
+		}
+	}
+
+	/* Enumerate all constructed falcon objects,
+	 as we need the ucode image info and total falcon count.*/
+
+	/*0th index is always PMU which is already handled in earlier
+	if condition*/
+	for (i = 1; i < (MAX_SUPPORTED_LSFM); i++) {
+		memset(&ucode_img, 0, sizeof(ucode_img));
+		if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
+			if (ucode_img.lsf_desc != NULL) {
+				/* We have engine sigs, ensure that this falcon
+				is aware of the secure mode expectations
+				(ACR status)*/
+
+				/* falon_id is formed by grabbing the static
+				base falonId from the image and adding the
+				engine-designated falcon instance. */
+				falcon_id = ucode_img.lsf_desc->falcon_id +
+					ucode_img.flcn_inst;
+
+				if (!lsfm_falcon_disabled(g, plsfm,
+					falcon_id)) {
+					/* Do not manage non-FB ucode*/
+					if (lsfm_add_ucode_img(g,
+						plsfm, &ucode_img, falcon_id)
+						== 0)
+						plsfm->managed_flcn_cnt++;
+				} else {
+					gp106_dbg_pmu("not managed %d\n",
+						ucode_img.lsf_desc->falcon_id);
+					lsfm_free_nonpmu_ucode_img_res(
+						&ucode_img);
+				}
+			}
+		} else {
+			/* Consumed all available falcon objects */
+			gp106_dbg_pmu("Done checking for ucodes %d\n", i);
+			break;
+		}
+	}
+	return 0;
+}
+
+
+static int gp106_pmu_populate_loader_cfg(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size)
+{
+	struct wpr_carveout_info wpr_inf;
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct lsfm_managed_ucode_img_v2 *p_lsfm =
+		(struct lsfm_managed_ucode_img_v2 *)lsfm;
+	struct flcn_ucode_img_v1 *p_img = &(p_lsfm->ucode_img);
+	struct loader_config_v1 *ldr_cfg =
+			&(p_lsfm->bl_gen_desc.loader_cfg_v1);
+	u64 addr_base;
+	struct pmu_ucode_desc_v1 *desc;
+	u64 addr_code, addr_data;
+	u32 addr_args;
+
+	if (p_img->desc == NULL) /*This means its a header based ucode,
+				  and so we do not fill BL gen desc structure*/
+		return -EINVAL;
+	desc = p_img->desc;
+	/*
+	 Calculate physical and virtual addresses for various portions of
+	 the PMU ucode image
+	 Calculate the 32-bit addresses for the application code, application
+	 data, and bootloader code. These values are all based on IM_BASE.
+	 The 32-bit addresses will be the upper 32-bits of the virtual or
+	 physical addresses of each respective segment.
+	*/
+	addr_base = p_lsfm->lsb_header.ucode_off;
+	g->ops.pmu.get_wpr(g, &wpr_inf);
+	addr_base += (wpr_inf.wpr_base);
+
+	gp106_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
+	/*From linux*/
+	addr_code = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_code_offset) );
+	gp106_dbg_pmu("app start %d app res code off %d\n",
+		desc->app_start_offset, desc->app_resident_code_offset);
+	addr_data = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_data_offset) );
+	gp106_dbg_pmu("app res data offset%d\n",
+		desc->app_resident_data_offset);
+	gp106_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
+
+	addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
+			gk20a_readl(g, pwr_falcon_hwcfg_r())))
+			<< GK20A_PMU_DMEM_BLKSIZE2);
+
+	addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
+
+	gp106_dbg_pmu("addr_args %x\n", addr_args);
+
+	/* Populate the loader_config state*/
+	ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code);
+	ldr_cfg->code_size_total = desc->app_size;
+	ldr_cfg->code_size_to_load = desc->app_resident_code_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data);
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	flcn64_set_dma(&ldr_cfg->overlay_dma_base, addr_code);
+
+	/* Update the argc/argv members*/
+	ldr_cfg->argc = 1;
+	ldr_cfg->argv = addr_args;
+
+	*p_bl_gen_desc_size = sizeof(struct loader_config_v1);
+	g->acr.pmu_args = addr_args;
+	return 0;
+}
+
+static int gp106_flcn_populate_bl_dmem_desc(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid)
+{
+	struct wpr_carveout_info wpr_inf;
+	struct lsfm_managed_ucode_img_v2 *p_lsfm =
+			(struct lsfm_managed_ucode_img_v2 *)lsfm;
+	struct flcn_ucode_img_v1 *p_img = &(p_lsfm->ucode_img);
+	struct flcn_bl_dmem_desc_v1 *ldr_cfg =
+			&(p_lsfm->bl_gen_desc.bl_dmem_desc_v1);
+	u64 addr_base;
+	struct pmu_ucode_desc_v1 *desc;
+	u64 addr_code, addr_data;
+
+	if (p_img->desc == NULL) /*This means its a header based ucode,
+				  and so we do not fill BL gen desc structure*/
+		return -EINVAL;
+	desc = p_img->desc;
+
+	/*
+	 Calculate physical and virtual addresses for various portions of
+	 the PMU ucode image
+	 Calculate the 32-bit addresses for the application code, application
+	 data, and bootloader code. These values are all based on IM_BASE.
+	 The 32-bit addresses will be the upper 32-bits of the virtual or
+	 physical addresses of each respective segment.
+	*/
+	addr_base = p_lsfm->lsb_header.ucode_off;
+	g->ops.pmu.get_wpr(g, &wpr_inf);
+	addr_base += (wpr_inf.wpr_base);
+
+	gp106_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
+			p_lsfm->wpr_header.falcon_id);
+	addr_code = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_code_offset) );
+	addr_data = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_data_offset) );
+
+	gp106_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
+			(u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
+			p_lsfm->wpr_header.falcon_id);
+
+	/* Populate the LOADER_CONFIG state */
+	memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v1));
+	ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
+	flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code);
+	ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
+	flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data);
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+
+	*p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc_v1);
+	return 0;
+}
+
+/* Populate falcon boot loader generic desc.*/
+static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
+		struct lsfm_managed_ucode_img_v2 *pnode)
+{
+
+	struct pmu_gk20a *pmu = &g->pmu;
+	if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
+		gp106_dbg_pmu("non pmu. write flcn bl gen desc\n");
+		g->ops.pmu.flcn_populate_bl_dmem_desc(g,
+				pnode, &pnode->bl_gen_desc_size,
+					pnode->wpr_header.falcon_id);
+		return 0;
+	}
+
+	if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
+		gp106_dbg_pmu("pmu write flcn bl gen desc\n");
+		if (pnode->wpr_header.falcon_id == pmu->falcon_id)
+			return g->ops.pmu.pmu_populate_loader_cfg(g, pnode,
+				&pnode->bl_gen_desc_size);
+	}
+
+	/* Failed to find the falcon requested. */
+	return -ENOENT;
+}
+
+/* Initialize WPR contents */
+static void lsfm_init_wpr_contents(struct gk20a *g,
+		struct ls_flcn_mgr_v1 *plsfm, struct mem_desc *ucode)
+{
+	struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
+	u32 i;
+
+	/* The WPR array is at the base of the WPR */
+	pnode = plsfm->ucode_img_list;
+	i = 0;
+
+	/*
+	 * Walk the managed falcons, flush WPR and LSB headers to FB.
+	 * flush any bl args to the storage area relative to the
+	 * ucode image (appended on the end as a DMEM area).
+	 */
+	while (pnode) {
+		/* Flush WPR header to memory*/
+		gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
+				&pnode->wpr_header, sizeof(pnode->wpr_header));
+
+		gp106_dbg_pmu("wpr header");
+		gp106_dbg_pmu("falconid :%d",
+				pnode->wpr_header.falcon_id);
+		gp106_dbg_pmu("lsb_offset :%x",
+				pnode->wpr_header.lsb_offset);
+		gp106_dbg_pmu("bootstrap_owner :%d",
+			pnode->wpr_header.bootstrap_owner);
+		gp106_dbg_pmu("lazy_bootstrap :%d",
+				pnode->wpr_header.lazy_bootstrap);
+		gp106_dbg_pmu("status :%d",
+				pnode->wpr_header.status);
+
+		/*Flush LSB header to memory*/
+		gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
+				&pnode->lsb_header, sizeof(pnode->lsb_header));
+
+		gp106_dbg_pmu("lsb header");
+		gp106_dbg_pmu("ucode_off :%x",
+				pnode->lsb_header.ucode_off);
+		gp106_dbg_pmu("ucode_size :%x",
+				pnode->lsb_header.ucode_size);
+		gp106_dbg_pmu("data_size :%x",
+				pnode->lsb_header.data_size);
+		gp106_dbg_pmu("bl_code_size :%x",
+				pnode->lsb_header.bl_code_size);
+		gp106_dbg_pmu("bl_imem_off :%x",
+				pnode->lsb_header.bl_imem_off);
+		gp106_dbg_pmu("bl_data_off :%x",
+				pnode->lsb_header.bl_data_off);
+		gp106_dbg_pmu("bl_data_size :%x",
+				pnode->lsb_header.bl_data_size);
+		gp106_dbg_pmu("app_code_off :%x",
+				pnode->lsb_header.app_code_off);
+		gp106_dbg_pmu("app_code_size :%x",
+				pnode->lsb_header.app_code_size);
+		gp106_dbg_pmu("app_data_off :%x",
+				pnode->lsb_header.app_data_off);
+		gp106_dbg_pmu("app_data_size :%x",
+				pnode->lsb_header.app_data_size);
+		gp106_dbg_pmu("flags :%x",
+				pnode->lsb_header.flags);
+
+		/*If this falcon has a boot loader and related args,
+		 * flush them.*/
+		if (!pnode->ucode_img.header) {
+			/*Populate gen bl and flush to memory*/
+			lsfm_fill_flcn_bl_gen_desc(g, pnode);
+			gk20a_mem_wr_n(g, ucode,
+					pnode->lsb_header.bl_data_off,
+					&pnode->bl_gen_desc,
+					pnode->bl_gen_desc_size);
+		}
+		/*Copying of ucode*/
+		gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
+				pnode->ucode_img.data,
+				pnode->ucode_img.data_size);
+		pnode = pnode->next;
+		i++;
+	}
+
+	/* Tag the terminator WPR header with an invalid falcon ID. */
+	gk20a_mem_wr32(g, ucode,
+			plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) +
+			offsetof(struct lsf_wpr_header, falcon_id),
+			LSF_FALCON_ID_INVALID);
+}
+
+/*!
+ * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
+ *
+ * @param[in] p_ucodehdr : UCODE header
+ * @param[out] lsb_hdr : updates values in LSB header
+ *
+ * @return 0
+ */
+static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
+	struct lsf_lsb_header_v1 *lsb_hdr)
+{
+
+	u32 code_size = 0;
+	u32 data_size = 0;
+	u32 i = 0;
+	u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
+
+	/* Lets calculate code size*/
+	code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+	for (i = 0; i < total_apps; i++) {
+		code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
+			(total_apps, i)];
+	}
+	code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
+
+	/* Calculate data size*/
+	data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+	for (i = 0; i < total_apps; i++) {
+		data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
+			(total_apps, i)];
+	}
+
+	lsb_hdr->ucode_size = code_size;
+	lsb_hdr->data_size = data_size;
+	lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+	lsb_hdr->bl_imem_off = 0;
+	lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
+	lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+	return 0;
+}
+
+/*!
+ * @brief lsfm_fill_static_lsb_hdr_info
+ * Populate static LSB header infomation using the provided ucode image
+ */
+static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
+	u32 falcon_id, struct lsfm_managed_ucode_img_v2 *pnode)
+{
+
+	struct pmu_gk20a *pmu = &g->pmu;
+	u32 full_app_size = 0;
+	u32 data = 0;
+
+	if (pnode->ucode_img.lsf_desc)
+		memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
+			sizeof(struct lsf_ucode_desc_v1));
+	pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
+
+	/* The remainder of the LSB depends on the loader usage */
+	if (pnode->ucode_img.header) {
+		/* Does not use a loader */
+		pnode->lsb_header.data_size = 0;
+		pnode->lsb_header.bl_code_size = 0;
+		pnode->lsb_header.bl_data_off = 0;
+		pnode->lsb_header.bl_data_size = 0;
+
+		lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
+			&(pnode->lsb_header));
+
+		/* Load the first 256 bytes of IMEM. */
+		/* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
+		True for all method based falcons */
+		data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
+			NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+		pnode->lsb_header.flags = data;
+	} else {
+		/* Uses a loader. that is has a desc */
+		pnode->lsb_header.data_size = 0;
+
+		/* The loader code size is already aligned (padded) such that
+		the code following it is aligned, but the size in the image
+		desc is not, bloat it up to be on a 256 byte alignment. */
+		pnode->lsb_header.bl_code_size = ALIGN(
+			pnode->ucode_img.desc->bootloader_size,
+			LSF_BL_CODE_SIZE_ALIGNMENT);
+		full_app_size = ALIGN(pnode->ucode_img.desc->app_size,
+			LSF_BL_CODE_SIZE_ALIGNMENT) +
+			pnode->lsb_header.bl_code_size;
+		pnode->lsb_header.ucode_size = ALIGN(
+			pnode->ucode_img.desc->app_resident_data_offset,
+			LSF_BL_CODE_SIZE_ALIGNMENT) +
+			pnode->lsb_header.bl_code_size;
+		pnode->lsb_header.data_size = full_app_size -
+			pnode->lsb_header.ucode_size;
+		/* Though the BL is located at 0th offset of the image, the VA
+		is different to make sure that it doesnt collide the actual OS
+		VA range */
+		pnode->lsb_header.bl_imem_off =
+			pnode->ucode_img.desc->bootloader_imem_offset;
+
+		/* TODO: OBJFLCN should export properties using which the below
+			flags should be populated.*/
+		pnode->lsb_header.flags = 0;
+
+		if (falcon_id == pmu->falcon_id) {
+			data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+			pnode->lsb_header.flags = data;
+		}
+
+		if(g->ops.pmu.is_priv_load(falcon_id))
+			pnode->lsb_header.flags |=
+				NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
+	}
+}
+
+/* Adds a ucode image to the list of managed ucode images managed. */
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr_v1 *plsfm,
+	struct flcn_ucode_img_v1 *ucode_image, u32 falcon_id)
+{
+	struct lsfm_managed_ucode_img_v2 *pnode;
+
+	pnode = kzalloc(sizeof(struct lsfm_managed_ucode_img_v2), GFP_KERNEL);
+	if (pnode == NULL)
+		return -ENOMEM;
+
+	/* Keep a copy of the ucode image info locally */
+	memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img_v1));
+
+	/* Fill in static WPR header info*/
+	pnode->wpr_header.falcon_id = falcon_id;
+	pnode->wpr_header.bootstrap_owner = 0x07; //LSF_BOOTSTRAP_OWNER_DEFAULT;
+	pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
+
+	pnode->wpr_header.lazy_bootstrap =
+			g->ops.pmu.is_lazy_bootstrap(falcon_id);
+
+	/*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
+	Android */
+	/* Fill in static LSB header info elsewhere */
+	lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
+	pnode->wpr_header.bin_version = pnode->lsb_header.signature.version;
+	pnode->next = plsfm->ucode_img_list;
+	plsfm->ucode_img_list = pnode;
+	return 0;
+}
+
+/* Free any ucode image structure resources*/
+static void lsfm_free_ucode_img_res(struct flcn_ucode_img_v1 *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		kfree(p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+}
+
+/* Free any ucode image structure resources*/
+static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img_v1 *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		kfree(p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+	if (p_img->desc != NULL) {
+		kfree(p_img->desc);
+		p_img->desc = NULL;
+	}
+}
+
+static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr_v1 *plsfm)
+{
+	u32 cnt = plsfm->managed_flcn_cnt;
+	struct lsfm_managed_ucode_img_v2 *mg_ucode_img;
+
+	while (cnt) {
+		mg_ucode_img = plsfm->ucode_img_list;
+		if (mg_ucode_img->ucode_img.lsf_desc->falcon_id ==
+				LSF_FALCON_ID_PMU)
+			lsfm_free_ucode_img_res(&mg_ucode_img->ucode_img);
+		else
+			lsfm_free_nonpmu_ucode_img_res(
+				&mg_ucode_img->ucode_img);
+		plsfm->ucode_img_list = mg_ucode_img->next;
+		kfree(mg_ucode_img);
+		cnt--;
+	}
+}
+
+/* Generate WPR requirements for ACR allocation request */
+static int lsf_gen_wpr_requirements(struct gk20a *g,
+		struct ls_flcn_mgr_v1 *plsfm)
+{
+	struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
+	u32 wpr_offset;
+
+	/* Calculate WPR size required */
+
+	/* Start with an array of WPR headers at the base of the WPR.
+	 The expectation here is that the secure falcon will do a single DMA
+	 read of this array and cache it internally so it's OK to pack these.
+	 Also, we add 1 to the falcon count to indicate the end of the array.*/
+	wpr_offset = sizeof(struct lsf_wpr_header_v1) *
+		(plsfm->managed_flcn_cnt+1);
+
+	/* Walk the managed falcons, accounting for the LSB structs
+	as well as the ucode images. */
+	while (pnode) {
+		/* Align, save off, and include an LSB header size */
+		wpr_offset = ALIGN(wpr_offset,
+			LSF_LSB_HEADER_ALIGNMENT);
+		pnode->wpr_header.lsb_offset = wpr_offset;
+		wpr_offset += sizeof(struct lsf_lsb_header_v1);
+
+		/* Align, save off, and include the original (static)
+		ucode image size */
+		wpr_offset = ALIGN(wpr_offset,
+			LSF_UCODE_DATA_ALIGNMENT);
+		pnode->lsb_header.ucode_off = wpr_offset;
+		wpr_offset += pnode->ucode_img.data_size;
+
+		/* For falcons that use a boot loader (BL), we append a loader
+		desc structure on the end of the ucode image and consider this
+		the boot loader data. The host will then copy the loader desc
+		args to this space within the WPR region (before locking down)
+		and the HS bin will then copy them to DMEM 0 for the loader. */
+		if (!pnode->ucode_img.header) {
+			/* Track the size for LSB details filled in later
+			 Note that at this point we don't know what kind of i
+			boot loader desc, so we just take the size of the
+			generic one, which is the largest it will will ever be.
+			*/
+			/* Align (size bloat) and save off generic
+			descriptor size*/
+			pnode->lsb_header.bl_data_size = ALIGN(
+				sizeof(pnode->bl_gen_desc),
+				LSF_BL_DATA_SIZE_ALIGNMENT);
+
+			/*Align, save off, and include the additional BL data*/
+			wpr_offset = ALIGN(wpr_offset,
+				LSF_BL_DATA_ALIGNMENT);
+			pnode->lsb_header.bl_data_off = wpr_offset;
+			wpr_offset += pnode->lsb_header.bl_data_size;
+		} else {
+			/* bl_data_off is already assigned in static
+			information. But that is from start of the image */
+			pnode->lsb_header.bl_data_off +=
+				(wpr_offset - pnode->ucode_img.data_size);
+		}
+
+		/* Finally, update ucode surface size to include updates */
+		pnode->full_ucode_size = wpr_offset -
+			pnode->lsb_header.ucode_off;
+		if (pnode->wpr_header.falcon_id != LSF_FALCON_ID_PMU) {
+			pnode->lsb_header.app_code_off =
+				pnode->lsb_header.bl_code_size;
+			pnode->lsb_header.app_code_size =
+				pnode->lsb_header.ucode_size -
+				pnode->lsb_header.bl_code_size;
+			pnode->lsb_header.app_data_off =
+				pnode->lsb_header.ucode_size;
+			pnode->lsb_header.app_data_size =
+				pnode->lsb_header.data_size;
+		}
+		pnode = pnode->next;
+	}
+	plsfm->wpr_size = wpr_offset;
+	return 0;
+}
+
+/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
+ * start and end are addresses of ucode blob in non-WPR region*/
+int gp106_bootstrap_hs_flcn(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+	int err = 0;
+	u64 *acr_dmem;
+	u32 img_size_in_bytes = 0;
+	u32 status;
+	struct acr_desc *acr = &g->acr;
+	const struct firmware *acr_fw = acr->acr_fw;
+	struct flcn_bl_dmem_desc_v1 *bl_dmem_desc = &acr->bl_dmem_desc_v1;
+	u32 *acr_ucode_header_t210_load;
+	u32 *acr_ucode_data_t210_load;
+	struct wpr_carveout_info wpr_inf;
+
+	gp106_dbg_pmu("");
+
+	if (!acr_fw) {
+		/*First time init case*/
+		acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
+		if (!acr_fw) {
+			gk20a_err(dev_from_gk20a(g), "pmu ucode get fail");
+			return -ENOENT;
+		}
+		acr->acr_fw = acr_fw;
+		acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
+		acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
+				acr->hsbin_hdr->header_offset);
+		acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
+				acr->hsbin_hdr->data_offset);
+		acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
+				acr->fw_hdr->hdr_offset);
+		img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
+
+		/* Lets patch the signatures first.. */
+		if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
+					(u32 *)(acr_fw->data +
+						acr->fw_hdr->sig_prod_offset),
+					(u32 *)(acr_fw->data +
+						acr->fw_hdr->sig_dbg_offset),
+					(u32 *)(acr_fw->data +
+						acr->fw_hdr->patch_loc),
+					(u32 *)(acr_fw->data +
+						acr->fw_hdr->patch_sig)) < 0) {
+			gk20a_err(dev_from_gk20a(g), "patch signatures fail");
+			err = -1;
+			goto err_release_acr_fw;
+		}
+		err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes,
+				&acr->acr_ucode);
+		if (err) {
+			err = -ENOMEM;
+			goto err_release_acr_fw;
+		}
+
+		g->ops.pmu.get_wpr(g, &wpr_inf);
+
+		acr_dmem = (u64 *)
+			&(((u8 *)acr_ucode_data_t210_load)[
+					acr_ucode_header_t210_load[2]]);
+		acr->acr_dmem_desc_v1 = (struct flcn_acr_desc_v1 *)((u8 *)(
+			acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]);
+		((struct flcn_acr_desc_v1 *)acr_dmem)->nonwpr_ucode_blob_start =
+				wpr_inf.nonwpr_base;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->nonwpr_ucode_blob_size =
+				wpr_inf.size;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->regions.no_regions = 1;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->wpr_offset = 0;
+
+		((struct flcn_acr_desc_v1 *)acr_dmem)->wpr_region_id = 1;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->regions.region_props[
+											0].region_id = 1;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->regions.region_props[
+			0].start_addr = (wpr_inf.wpr_base ) >> 8;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->regions.region_props[
+			0].end_addr = ((wpr_inf.wpr_base) + wpr_inf.size) >> 8;
+		((struct flcn_acr_desc_v1 *)acr_dmem)->regions.region_props[
+			0].shadowmMem_startaddress = wpr_inf.nonwpr_base >> 8;
+
+		gk20a_mem_wr_n(g, &acr->acr_ucode, 0,
+				acr_ucode_data_t210_load, img_size_in_bytes);
+
+		/*
+		 * In order to execute this binary, we will be using
+		 * a bootloader which will load this image into PMU IMEM/DMEM.
+		 * Fill up the bootloader descriptor for PMU HAL to use..
+		 * TODO: Use standard descriptor which the generic bootloader is
+		 * checked in.
+		 */
+
+		bl_dmem_desc->signature[0] = 0;
+		bl_dmem_desc->signature[1] = 0;
+		bl_dmem_desc->signature[2] = 0;
+		bl_dmem_desc->signature[3] = 0;
+		bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
+		flcn64_set_dma( &bl_dmem_desc->code_dma_base,
+						acr->acr_ucode.gpu_va);
+		bl_dmem_desc->non_sec_code_off  = acr_ucode_header_t210_load[0];
+		bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1];
+		bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5];
+		bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6];
+		bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */
+		flcn64_set_dma( &bl_dmem_desc->data_dma_base,
+					acr->acr_ucode.gpu_va +
+					(acr_ucode_header_t210_load[2]));
+		bl_dmem_desc->data_size = acr_ucode_header_t210_load[3];
+	} else
+		acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0;
+
+	status = pmu_exec_gen_bl(g, bl_dmem_desc, 1);
+	if (status != 0) {
+		err = status;
+		goto err_free_ucode_map;
+	}
+	return 0;
+err_free_ucode_map:
+	gk20a_gmmu_unmap_free(vm, &acr->acr_ucode);
+err_release_acr_fw:
+	release_firmware(acr_fw);
+	acr->acr_fw = NULL;
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.h b/drivers/gpu/nvgpu/gp106/acr_gp106.h
index 26e68cd7..9afec529 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.h
@@ -118,4 +118,6 @@ struct flcn_acr_desc_v1 {
 	u32 dummy[4];  //ACR_BSI_VPR_DESC
 };
 
+void gp106_init_secure_pmu(struct gpu_ops *gops);
+
 #endif /*__PMU_GP106_H_*/
diff --git a/drivers/gpu/nvgpu/gp106/hw_psec_gp106.h b/drivers/gpu/nvgpu/gp106/hw_psec_gp106.h
new file mode 100644
index 00000000..f9c9f69c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_psec_gp106.h
@@ -0,0 +1,609 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_psec_gp106_h_
+#define _hw_psec_gp106_h_
+
+static inline u32 psec_falcon_irqsset_r(void)
+{
+	return 0x00087000;
+}
+static inline u32 psec_falcon_irqsset_swgen0_set_f(void)
+{
+	return 0x40;
+}
+static inline u32 psec_falcon_irqsclr_r(void)
+{
+	return 0x00087004;
+}
+static inline u32 psec_falcon_irqstat_r(void)
+{
+	return 0x00087008;
+}
+static inline u32 psec_falcon_irqstat_halt_true_f(void)
+{
+	return 0x10;
+}
+static inline u32 psec_falcon_irqstat_exterr_true_f(void)
+{
+	return 0x20;
+}
+static inline u32 psec_falcon_irqstat_swgen0_true_f(void)
+{
+	return 0x40;
+}
+static inline u32 psec_falcon_irqmode_r(void)
+{
+	return 0x0008700c;
+}
+static inline u32 psec_falcon_irqmset_r(void)
+{
+	return 0x00087010;
+}
+static inline u32 psec_falcon_irqmset_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 psec_falcon_irqmset_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 psec_falcon_irqmset_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 psec_falcon_irqmset_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 psec_falcon_irqmset_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 psec_falcon_irqmset_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 psec_falcon_irqmset_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 psec_falcon_irqmset_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 psec_falcon_irqmclr_r(void)
+{
+	return 0x00087014;
+}
+static inline u32 psec_falcon_irqmclr_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 psec_falcon_irqmclr_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 psec_falcon_irqmclr_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 psec_falcon_irqmclr_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 psec_falcon_irqmclr_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 psec_falcon_irqmclr_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 psec_falcon_irqmclr_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 psec_falcon_irqmclr_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 psec_falcon_irqmclr_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 psec_falcon_irqmask_r(void)
+{
+	return 0x00087018;
+}
+static inline u32 psec_falcon_irqdest_r(void)
+{
+	return 0x0008701c;
+}
+static inline u32 psec_falcon_irqdest_host_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 psec_falcon_irqdest_host_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 psec_falcon_irqdest_host_mthd_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 psec_falcon_irqdest_host_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 3;
+}
+static inline u32 psec_falcon_irqdest_host_halt_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 psec_falcon_irqdest_host_exterr_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 psec_falcon_irqdest_host_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 psec_falcon_irqdest_host_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 psec_falcon_irqdest_host_ext_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 psec_falcon_irqdest_target_gptmr_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 psec_falcon_irqdest_target_wdtmr_f(u32 v)
+{
+	return (v & 0x1) << 17;
+}
+static inline u32 psec_falcon_irqdest_target_mthd_f(u32 v)
+{
+	return (v & 0x1) << 18;
+}
+static inline u32 psec_falcon_irqdest_target_ctxsw_f(u32 v)
+{
+	return (v & 0x1) << 19;
+}
+static inline u32 psec_falcon_irqdest_target_halt_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 psec_falcon_irqdest_target_exterr_f(u32 v)
+{
+	return (v & 0x1) << 21;
+}
+static inline u32 psec_falcon_irqdest_target_swgen0_f(u32 v)
+{
+	return (v & 0x1) << 22;
+}
+static inline u32 psec_falcon_irqdest_target_swgen1_f(u32 v)
+{
+	return (v & 0x1) << 23;
+}
+static inline u32 psec_falcon_irqdest_target_ext_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+static inline u32 psec_falcon_curctx_r(void)
+{
+	return 0x00087050;
+}
+static inline u32 psec_falcon_nxtctx_r(void)
+{
+	return 0x00087054;
+}
+static inline u32 psec_falcon_mailbox0_r(void)
+{
+	return 0x00087040;
+}
+static inline u32 psec_falcon_mailbox1_r(void)
+{
+	return 0x00087044;
+}
+static inline u32 psec_falcon_itfen_r(void)
+{
+	return 0x00087048;
+}
+static inline u32 psec_falcon_itfen_ctxen_enable_f(void)
+{
+	return 0x1;
+}
+static inline u32 psec_falcon_idlestate_r(void)
+{
+	return 0x0008704c;
+}
+static inline u32 psec_falcon_idlestate_falcon_busy_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 psec_falcon_idlestate_ext_busy_v(u32 r)
+{
+	return (r >> 1) & 0x7fff;
+}
+static inline u32 psec_falcon_os_r(void)
+{
+	return 0x00087080;
+}
+static inline u32 psec_falcon_engctl_r(void)
+{
+	return 0x000870a4;
+}
+static inline u32 psec_falcon_cpuctl_r(void)
+{
+	return 0x00087100;
+}
+static inline u32 psec_falcon_cpuctl_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 psec_falcon_cpuctl_halt_intr_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 psec_falcon_cpuctl_halt_intr_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 psec_falcon_cpuctl_halt_intr_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 psec_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 psec_falcon_cpuctl_cpuctl_alias_en_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 psec_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 psec_falcon_cpuctl_alias_r(void)
+{
+	return 0x00087130;
+}
+static inline u32 psec_falcon_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 psec_falcon_imemc_r(u32 i)
+{
+	return 0x00087180 + i*16;
+}
+static inline u32 psec_falcon_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 psec_falcon_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 psec_falcon_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 psec_falcon_imemd_r(u32 i)
+{
+	return 0x00087184 + i*16;
+}
+static inline u32 psec_falcon_imemt_r(u32 i)
+{
+	return 0x00087188 + i*16;
+}
+static inline u32 psec_falcon_sctl_r(void)
+{
+	return 0x00087240;
+}
+static inline u32 psec_falcon_mmu_phys_sec_r(void)
+{
+	return 0x00100ce4;
+}
+static inline u32 psec_falcon_bootvec_r(void)
+{
+	return 0x00087104;
+}
+static inline u32 psec_falcon_bootvec_vec_f(u32 v)
+{
+	return (v & 0xffffffff) << 0;
+}
+static inline u32 psec_falcon_dmactl_r(void)
+{
+	return 0x0008710c;
+}
+static inline u32 psec_falcon_dmactl_dmem_scrubbing_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 psec_falcon_dmactl_imem_scrubbing_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 psec_falcon_dmactl_require_ctx_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 psec_falcon_hwcfg_r(void)
+{
+	return 0x00087108;
+}
+static inline u32 psec_falcon_hwcfg_imem_size_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+static inline u32 psec_falcon_hwcfg_dmem_size_v(u32 r)
+{
+	return (r >> 9) & 0x1ff;
+}
+static inline u32 psec_falcon_dmatrfbase_r(void)
+{
+	return 0x00087110;
+}
+static inline u32 psec_falcon_dmatrfbase1_r(void)
+{
+	return 0x00087128;
+}
+static inline u32 psec_falcon_dmatrfmoffs_r(void)
+{
+	return 0x00087114;
+}
+static inline u32 psec_falcon_dmatrfcmd_r(void)
+{
+	return 0x00087118;
+}
+static inline u32 psec_falcon_dmatrfcmd_imem_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 psec_falcon_dmatrfcmd_write_f(u32 v)
+{
+	return (v & 0x1) << 5;
+}
+static inline u32 psec_falcon_dmatrfcmd_size_f(u32 v)
+{
+	return (v & 0x7) << 8;
+}
+static inline u32 psec_falcon_dmatrfcmd_ctxdma_f(u32 v)
+{
+	return (v & 0x7) << 12;
+}
+static inline u32 psec_falcon_dmatrffboffs_r(void)
+{
+	return 0x0008711c;
+}
+static inline u32 psec_falcon_exterraddr_r(void)
+{
+	return 0x00087168;
+}
+static inline u32 psec_falcon_exterrstat_r(void)
+{
+	return 0x0008716c;
+}
+static inline u32 psec_falcon_exterrstat_valid_m(void)
+{
+	return 0x1 << 31;
+}
+static inline u32 psec_falcon_exterrstat_valid_v(u32 r)
+{
+	return (r >> 31) & 0x1;
+}
+static inline u32 psec_falcon_exterrstat_valid_true_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_r(void)
+{
+	return 0x00087200;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_s(void)
+{
+	return 4;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_v(u32 r)
+{
+	return (r >> 0) & 0xf;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_rreg_f(void)
+{
+	return 0x8;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_opc_rstat_f(void)
+{
+	return 0xe;
+}
+static inline u32 psec_sec2_falcon_icd_cmd_idx_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 psec_sec2_falcon_icd_rdata_r(void)
+{
+	return 0x0008720c;
+}
+static inline u32 psec_falcon_dmemc_r(u32 i)
+{
+	return 0x000871c0 + i*8;
+}
+static inline u32 psec_falcon_dmemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 psec_falcon_dmemc_offs_m(void)
+{
+	return 0x3f << 2;
+}
+static inline u32 psec_falcon_dmemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 psec_falcon_dmemc_blk_m(void)
+{
+	return 0xff << 8;
+}
+static inline u32 psec_falcon_dmemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 psec_falcon_dmemc_aincr_f(u32 v)
+{
+	return (v & 0x1) << 25;
+}
+static inline u32 psec_falcon_dmemd_r(u32 i)
+{
+	return 0x000871c4 + i*8;
+}
+static inline u32 psec_falcon_debug1_r(void)
+{
+	return 0x00087090;
+}
+static inline u32 psec_falcon_debug1_ctxsw_mode_s(void)
+{
+	return 1;
+}
+static inline u32 psec_falcon_debug1_ctxsw_mode_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 psec_falcon_debug1_ctxsw_mode_m(void)
+{
+	return 0x1 << 16;
+}
+static inline u32 psec_falcon_debug1_ctxsw_mode_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 psec_falcon_debug1_ctxsw_mode_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_transcfg_r(u32 i)
+{
+	return 0x00087600 + i*4;
+}
+static inline u32 psec_fbif_transcfg_target_local_fb_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_transcfg_target_coherent_sysmem_f(void)
+{
+	return 0x1;
+}
+static inline u32 psec_fbif_transcfg_target_noncoherent_sysmem_f(void)
+{
+	return 0x2;
+}
+static inline u32 psec_fbif_transcfg_mem_type_s(void)
+{
+	return 1;
+}
+static inline u32 psec_fbif_transcfg_mem_type_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 psec_fbif_transcfg_mem_type_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 psec_fbif_transcfg_mem_type_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 psec_fbif_transcfg_mem_type_virtual_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_transcfg_mem_type_physical_f(void)
+{
+	return 0x4;
+}
+static inline u32 psec_falcon_engine_r(void)
+{
+	return 0x000873c0;
+}
+static inline u32 psec_falcon_engine_reset_true_f(void)
+{
+	return 0x1;
+}
+static inline u32 psec_falcon_engine_reset_false_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_ctl_r(void)
+{
+	return 0x00087624;
+}
+static inline u32 psec_fbif_ctl_allow_phys_no_ctx_init_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_ctl_allow_phys_no_ctx_disallow_f(void)
+{
+	return 0x0;
+}
+static inline u32 psec_fbif_ctl_allow_phys_no_ctx_allow_f(void)
+{
+	return 0x80;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
new file mode 100644
index 00000000..f8b32f8f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/delay.h>	/* for udelay */
+#include <linux/clk.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "gm206/pmu_gm206.h"
+#include "gm20b/pmu_gm20b.h"
+#include "gp10b/pmu_gp10b.h"
+#include "gp106/pmu_gp106.h"
+#include "gp106/acr_gp106.h"
+#include "gp106/hw_mc_gp106.h"
+#include "gp106/hw_pwr_gp106.h"
+#include "gp106/hw_psec_gp106.h"
+#include "sec2_gp106.h"
+#include "acr.h"
+
+/*Defines*/
+#define gm20b_dbg_pmu(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+
+int sec2_clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout)
+{
+	u32 data = 0;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, end_jiffies) ||
+			!tegra_platform_is_silicon()) {
+		gk20a_writel(g, psec_falcon_irqsclr_r(),
+			     gk20a_readl(g, psec_falcon_irqsclr_r()) | (0x10));
+		data = gk20a_readl(g, psec_falcon_irqstat_r());
+		if ((data & psec_falcon_irqstat_halt_true_f()) !=
+			psec_falcon_irqstat_halt_true_f())
+			/*halt irq is clear*/
+			break;
+		timeout--;
+		udelay(1);
+	}
+	if (timeout == 0)
+		return -EBUSY;
+	return 0;
+}
+
+int sec2_wait_for_halt(struct gk20a *g, unsigned int timeout)
+{
+	u32 data = 0;
+	int completion = -EBUSY;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, end_jiffies) ||
+			!tegra_platform_is_silicon()) {
+		data = gk20a_readl(g, psec_falcon_cpuctl_r());
+		if (data & psec_falcon_cpuctl_halt_intr_m()) {
+			/*CPU is halted break*/
+			completion = 0;
+			break;
+		}
+		udelay(1);
+	}
+	if (completion){
+		gk20a_err(dev_from_gk20a(g), "ACR boot timed out");
+	}
+	else {
+
+		g->acr.capabilities = gk20a_readl(g, psec_falcon_mailbox1_r());
+		gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
+		data = gk20a_readl(g, psec_falcon_mailbox0_r());
+		if (data) {
+
+			gk20a_err(dev_from_gk20a(g),
+				"ACR boot failed, err %x", data);
+			completion = -EAGAIN;
+		}
+	}
+
+	init_pmu_setup_hw1(g);
+
+	return completion;
+}
+
+void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
+		u32 dst, u8 *src, u32 size, u8 port)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	u32 i, words, bytes;
+	u32 data, addr_mask;
+	u32 *src_u32 = (u32*)src;
+
+	if (size == 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"size is zero");
+		return;
+	}
+
+	if (dst & 0x3) {
+		gk20a_err(dev_from_gk20a(g),
+			"dst (0x%08x) not 4-byte aligned", dst);
+		return;
+	}
+
+	mutex_lock(&pmu->pmu_copy_lock);
+
+	words = size >> 2;
+	bytes = size & 0x3;
+
+	addr_mask = psec_falcon_dmemc_offs_m() |
+		    psec_falcon_dmemc_blk_m();
+
+	dst &= addr_mask;
+
+	gk20a_writel(g, psec_falcon_dmemc_r(port),
+		dst | psec_falcon_dmemc_aincw_f(1));
+
+	for (i = 0; i < words; i++)
+		gk20a_writel(g, psec_falcon_dmemd_r(port), src_u32[i]);
+
+	if (bytes > 0) {
+		data = 0;
+		for (i = 0; i < bytes; i++)
+			((u8 *)&data)[i] = src[(words << 2) + i];
+		gk20a_writel(g, psec_falcon_dmemd_r(port), data);
+	}
+
+	data = gk20a_readl(g, psec_falcon_dmemc_r(port)) & addr_mask;
+	size = ALIGN(size, 4);
+	if (data != dst + size) {
+		gk20a_err(dev_from_gk20a(g),
+			"copy failed. bytes written %d, expected %d",
+			data - dst, size);
+	}
+	mutex_unlock(&pmu->pmu_copy_lock);
+	return;
+}
+
+int bl_bootstrap_sec2(struct pmu_gk20a *pmu,
+	void *desc, u32 bl_sz)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct acr_desc *acr = &g->acr;
+	struct mm_gk20a *mm = &g->mm;
+	u32 imem_dst_blk = 0;
+	u32 virt_addr = 0;
+	u32 tag = 0;
+	u32 index = 0;
+	struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
+	u32 *bl_ucode;
+	u32 data = 0;
+
+	gk20a_dbg_fn("");
+
+	/* SEC2 Config */
+	gk20a_writel(g, psec_falcon_itfen_r(),
+			gk20a_readl(g, psec_falcon_itfen_r()) |
+			psec_falcon_itfen_ctxen_enable_f());
+
+	gk20a_writel(g, psec_falcon_nxtctx_r(),
+			pwr_pmu_new_instblk_ptr_f(
+			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+			pwr_pmu_new_instblk_valid_f(1) |
+			pwr_pmu_new_instblk_target_sys_coh_f());
+
+	data = gk20a_readl(g, psec_falcon_debug1_r());
+	data |= psec_falcon_debug1_ctxsw_mode_m();
+	gk20a_writel(g, psec_falcon_debug1_r(), data);
+
+	data = gk20a_readl(g, psec_falcon_engctl_r());
+	data |= (1 << 3);
+	gk20a_writel(g, psec_falcon_engctl_r(), data);
+
+	/* TBD: load all other surfaces */
+	/*copy bootloader interface structure to dmem*/
+	gk20a_writel(g, psec_falcon_dmemc_r(0),
+			psec_falcon_dmemc_offs_f(0) |
+			psec_falcon_dmemc_blk_f(0)  |
+			psec_falcon_dmemc_aincw_f(1));
+	sec2_copy_to_dmem(pmu, 0, (u8 *)desc,
+		sizeof(struct flcn_bl_dmem_desc), 0);
+	/*TODO This had to be copied to bl_desc_dmem_load_off, but since
+	 * this is 0, so ok for now*/
+
+	/* Now copy bootloader to TOP of IMEM */
+	imem_dst_blk = (psec_falcon_hwcfg_imem_size_v(
+			gk20a_readl(g, psec_falcon_hwcfg_r()))) - bl_sz/256;
+
+	/* Set Auto-Increment on write */
+	gk20a_writel(g, psec_falcon_imemc_r(0),
+			psec_falcon_imemc_offs_f(0) |
+			psec_falcon_imemc_blk_f(imem_dst_blk)  |
+			psec_falcon_imemc_aincw_f(1));
+	virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
+	tag = virt_addr >> 8; /* tag is always 256B aligned */
+	bl_ucode = (u32 *)(acr->hsbl_ucode.cpu_va);
+	for (index = 0; index < bl_sz/4; index++) {
+		if ((index % 64) == 0) {
+			gk20a_writel(g, psec_falcon_imemt_r(0),
+				(tag & 0xffff) << 0);
+			tag++;
+		}
+		gk20a_writel(g, psec_falcon_imemd_r(0),
+				bl_ucode[index] & 0xffffffff);
+	}
+	gk20a_writel(g, psec_falcon_imemt_r(0), (0 & 0xffff) << 0);
+
+	gm20b_dbg_pmu("Before starting falcon with BL\n");
+
+	gk20a_writel(g, psec_falcon_mailbox0_r(), 0xDEADA5A5);
+
+	gk20a_writel(g, psec_falcon_bootvec_r(),
+			psec_falcon_bootvec_vec_f(virt_addr));
+
+	gk20a_writel(g, psec_falcon_cpuctl_r(),
+			psec_falcon_cpuctl_startcpu_f(1));
+
+	return 0;
+}
+
+void sec_enable_irq(struct pmu_gk20a *pmu, bool enable)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+
+	gk20a_dbg_fn("");
+
+	gk20a_writel(g, psec_falcon_irqmclr_r(),
+		psec_falcon_irqmclr_gptmr_f(1)  |
+		psec_falcon_irqmclr_wdtmr_f(1)  |
+		psec_falcon_irqmclr_mthd_f(1)   |
+		psec_falcon_irqmclr_ctxsw_f(1)  |
+		psec_falcon_irqmclr_halt_f(1)   |
+		psec_falcon_irqmclr_exterr_f(1) |
+		psec_falcon_irqmclr_swgen0_f(1) |
+		psec_falcon_irqmclr_swgen1_f(1) |
+		psec_falcon_irqmclr_ext_f(0xff));
+
+	if (enable) {
+		/* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */
+		gk20a_writel(g, psec_falcon_irqdest_r(),
+			psec_falcon_irqdest_host_gptmr_f(0)    |
+			psec_falcon_irqdest_host_wdtmr_f(1)    |
+			psec_falcon_irqdest_host_mthd_f(0)     |
+			psec_falcon_irqdest_host_ctxsw_f(0)    |
+			psec_falcon_irqdest_host_halt_f(1)     |
+			psec_falcon_irqdest_host_exterr_f(0)   |
+			psec_falcon_irqdest_host_swgen0_f(1)   |
+			psec_falcon_irqdest_host_swgen1_f(0)   |
+			psec_falcon_irqdest_host_ext_f(0xff)   |
+			psec_falcon_irqdest_target_gptmr_f(1)  |
+			psec_falcon_irqdest_target_wdtmr_f(0)  |
+			psec_falcon_irqdest_target_mthd_f(0)   |
+			psec_falcon_irqdest_target_ctxsw_f(0)  |
+			psec_falcon_irqdest_target_halt_f(0)   |
+			psec_falcon_irqdest_target_exterr_f(0) |
+			psec_falcon_irqdest_target_swgen0_f(0) |
+			psec_falcon_irqdest_target_swgen1_f(1) |
+			psec_falcon_irqdest_target_ext_f(0xff));
+
+		/* 0=disable, 1=enable */
+		gk20a_writel(g, psec_falcon_irqmset_r(),
+			psec_falcon_irqmset_gptmr_f(1)  |
+			psec_falcon_irqmset_wdtmr_f(1)  |
+			psec_falcon_irqmset_mthd_f(0)   |
+			psec_falcon_irqmset_ctxsw_f(0)  |
+			psec_falcon_irqmset_halt_f(1)   |
+			psec_falcon_irqmset_exterr_f(1) |
+			psec_falcon_irqmset_swgen0_f(1) |
+			psec_falcon_irqmset_swgen1_f(1));
+
+	}
+
+	gk20a_dbg_fn("done");
+}
+
+void init_pmu_setup_hw1(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	/* PMU TRANSCFG */
+	/* setup apertures - virtual */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+			pwr_fbif_transcfg_mem_type_virtual_f());
+	/* setup apertures - physical */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_coherent_sysmem_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+
+	/* PMU Config */
+	gk20a_writel(g, pwr_falcon_itfen_r(),
+				gk20a_readl(g, pwr_falcon_itfen_r()) |
+				pwr_falcon_itfen_ctxen_enable_f());
+	gk20a_writel(g, pwr_pmu_new_instblk_r(),
+				pwr_pmu_new_instblk_ptr_f(
+					gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+				pwr_pmu_new_instblk_valid_f(1) |
+				pwr_pmu_new_instblk_target_sys_coh_f());
+
+	/*Copying pmu cmdline args*/
+	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
+				clk_get_rate(platform->clk[1]));
+	g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
+		pmu, GK20A_PMU_TRACE_BUFSIZE);
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
+		pmu, GK20A_PMU_DMAIDX_VIRT);
+
+	pmu_copy_to_dmem(pmu, g->acr.pmu_args,
+			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
+			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+
+}
+
+int init_sec2_setup_hw1(struct gk20a *g,
+		void *desc, u32 bl_sz)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	int err;
+	u32 data = 0;
+
+	gk20a_dbg_fn("");
+
+	mutex_lock(&pmu->isr_mutex);
+	g->ops.pmu.reset(g);
+	pmu->isr_enabled = true;
+	mutex_unlock(&pmu->isr_mutex);
+
+	data = gk20a_readl(g, psec_fbif_ctl_r());
+	data |= psec_fbif_ctl_allow_phys_no_ctx_allow_f();
+	gk20a_writel(g, psec_fbif_ctl_r(), data);
+
+	data = gk20a_readl(g, psec_falcon_dmactl_r());
+	data &= ~(psec_falcon_dmactl_require_ctx_f(1));
+	gk20a_writel(g, psec_falcon_dmactl_r(), data);
+
+	/* setup apertures - virtual */
+	gk20a_writel(g, psec_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+			psec_fbif_transcfg_mem_type_physical_f() |
+			psec_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, psec_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+			psec_fbif_transcfg_mem_type_virtual_f());
+	/* setup apertures - physical */
+	gk20a_writel(g, psec_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+			psec_fbif_transcfg_mem_type_physical_f() |
+			psec_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, psec_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+			psec_fbif_transcfg_mem_type_physical_f() |
+			psec_fbif_transcfg_target_coherent_sysmem_f());
+	gk20a_writel(g, psec_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+			psec_fbif_transcfg_mem_type_physical_f() |
+			psec_fbif_transcfg_target_noncoherent_sysmem_f());
+
+	/*disable irqs for hs falcon booting as we will poll for halt*/
+	mutex_lock(&pmu->isr_mutex);
+	pmu_enable_irq(pmu, false);
+	sec_enable_irq(pmu, false);
+	pmu->isr_enabled = false;
+	mutex_unlock(&pmu->isr_mutex);
+	err = bl_bootstrap_sec2(pmu, desc, bl_sz);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.h b/drivers/gpu/nvgpu/gp106/sec2_gp106.h
new file mode 100644
index 00000000..336bb0f0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SEC2_H_
+#define __SEC2_H_
+
+int sec2_clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout);
+int sec2_wait_for_halt(struct gk20a *g, unsigned int timeout);
+void sec2_copy_to_dmem(struct pmu_gk20a *pmu,
+		u32 dst, u8 *src, u32 size, u8 port);
+void sec2_dump_falcon_stats(struct pmu_gk20a *pmu);
+int bl_bootstrap_sec2(struct pmu_gk20a *pmu,
+	void *desc, u32 bl_sz);
+void sec_enable_irq(struct pmu_gk20a *pmu, bool enable);
+void init_pmu_setup_hw1(struct gk20a *g);
+int init_sec2_setup_hw1(struct gk20a *g,
+		void *desc, u32 bl_sz);
+
+#endif /*__SEC2_H_*/
-- 
cgit v1.2.2


From ee6be7beca896e8fbb324c164c6382fe8d695971 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 8 Jun 2016 17:29:48 +0530
Subject: gpu: nvgpu: PMU/SEC2 reset sequence & OPS update

- Enable OPS to support secure boot
- PMU/SEC2 reset sequence change for GP104/GP106

JIRA DNVGPU-34

Change-Id: I583a6af1d5354649c3df9d9b4d74141d52d6ca9d
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1161132
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/pmu_gp106.c | 165 ++++++++++++++++++++++++++++++++++--
 1 file changed, 159 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 42ed85ec..be3e8c64 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -15,32 +15,185 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
 
+#include "gm206/pmu_gm206.h"
+#include "gm20b/pmu_gm20b.h"
 #include "gp10b/pmu_gp10b.h"
+#include "gp106/pmu_gp106.h"
+#include "gp106/acr_gp106.h"
+#include "gp106/hw_psec_gp106.h"
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
 
-static int gp106_pmu_reset(struct gk20a *g)
+#define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
+#define PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
+
+int gp106_pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
 {
+	struct gk20a *g = gk20a_from_pmu(pmu);
+
 	gk20a_dbg_fn("");
 
-	gk20a_reset(g, mc_enable_pwr_enabled_f());
+	/*
+	* From GP10X onwards, we are using PPWR_FALCON_ENGINE for reset. And as
+	* it may come into same behaviour, reading NV_PPWR_FALCON_ENGINE again
+	* after Reset.
+	*/
+
+	if (enable) {
+		int retries = PMU_MEM_SCRUBBING_TIMEOUT_MAX /
+				PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT;
+		gk20a_writel(g, pwr_falcon_engine_r(),
+			pwr_falcon_engine_reset_false_f());
+		gk20a_readl(g, pwr_falcon_engine_r());
+
+		/* make sure ELPG is in a good state */
+		if (g->ops.clock_gating.slcg_pmu_load_gating_prod)
+			g->ops.clock_gating.slcg_pmu_load_gating_prod(g,
+					g->slcg_enabled);
+		if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
+			g->ops.clock_gating.blcg_pmu_load_gating_prod(g,
+					g->blcg_enabled);
+
+		/* wait for Scrubbing to complete */
+		do {
+			u32 w = gk20a_readl(g, pwr_falcon_dmactl_r()) &
+				(pwr_falcon_dmactl_dmem_scrubbing_m() |
+				 pwr_falcon_dmactl_imem_scrubbing_m());
 
-	gk20a_writel(g, pwr_falcon_engine_r(),
+			if (!w) {
+				gk20a_dbg_fn("done");
+				return 0;
+			}
+			udelay(PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT);
+		} while (--retries || !tegra_platform_is_silicon());
+
+		/* If scrubbing timeout, keep PMU in reset state */
+		gk20a_writel(g, pwr_falcon_engine_r(),
+			pwr_falcon_engine_reset_true_f());
+		gk20a_readl(g, pwr_falcon_engine_r());
+		gk20a_err(dev_from_gk20a(g), "Falcon mem scrubbing timeout");
+		return -ETIMEDOUT;
+	} else {
+		/* DISBALE */
+		gk20a_writel(g, pwr_falcon_engine_r(),
+			pwr_falcon_engine_reset_true_f());
+		gk20a_readl(g, pwr_falcon_engine_r());
+		return 0;
+	}
+}
+
+static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
+{
+	struct gk20a *g = gk20a_from_pmu(pmu);
+	u32 reg_reset;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (!enable) {
+		reg_reset = gk20a_readl(g, pwr_falcon_engine_r());
+		if (reg_reset !=
+			pwr_falcon_engine_reset_true_f()) {
+
+			pmu_enable_irq(pmu, false);
+			gp106_pmu_enable_hw(pmu, false);
+			udelay(10);
+		}
+	} else {
+		gp106_pmu_enable_hw(pmu, true);
+		/* TBD: post reset */
+
+		/*idle the PMU and enable interrupts on the Falcon*/
+		err = pmu_idle(pmu);
+		if (err)
+			return err;
+		udelay(5);
+		pmu_enable_irq(pmu, true);
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+int gp106_pmu_reset(struct gk20a *g)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	int err = 0;
+
+	gk20a_dbg_fn("");
+
+	err = pmu_idle(pmu);
+	if (err)
+		return err;
+
+	/* TBD: release pmu hw mutex */
+
+	err = pmu_enable(pmu, false);
+	if (err)
+		return err;
+
+	/* TBD: cancel all sequences */
+	/* TBD: init all sequences and state tables */
+	/* TBD: restore pre-init message handler */
+
+	err = pmu_enable(pmu, true);
+	if (err)
+		return err;
+
+	return err;
+}
+
+int gp106_sec2_reset(struct gk20a *g)
+{
+	gk20a_dbg_fn("");
+	//sec2 reset
+	gk20a_writel(g, psec_falcon_engine_r(),
 			pwr_falcon_engine_reset_true_f());
 	udelay(10);
-	gk20a_writel(g, pwr_falcon_engine_r(),
+	gk20a_writel(g, psec_falcon_engine_r(),
 			pwr_falcon_engine_reset_false_f());
 
 	gk20a_dbg_fn("done");
 	return 0;
 }
 
+static int gp106_falcon_reset(struct gk20a *g)
+{
+	gk20a_dbg_fn("");
+
+	gp106_pmu_reset(g);
+	gp106_sec2_reset(g);
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
 void gp106_init_pmu_ops(struct gpu_ops *gops)
 {
 	gk20a_dbg_fn("");
 
-	gp10b_init_pmu_ops(gops);
-	gops->pmu.reset = gp106_pmu_reset;
+	if (gops->privsecurity) {
+		gp106_init_secure_pmu(gops);
+		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+		gops->pmu.load_lsfalcon_ucode = gm206_load_falcon_ucode;
+		gops->pmu.is_lazy_bootstrap = gm206_is_lazy_bootstrap;
+		gops->pmu.is_priv_load = gm206_is_priv_load;
+	} else {
+		gk20a_init_pmu_ops(gops);
+		gops->pmu.pmu_setup_hw_and_bootstrap =
+			gm20b_init_nspmu_setup_hw1;
+		gops->pmu.load_lsfalcon_ucode = NULL;
+		gops->pmu.init_wpr_region = NULL;
+	}
+	gops->pmu.pmu_setup_elpg = NULL;
+	gops->pmu.lspmuwprinitdone = 0;
+	gops->pmu.fecsbootstrapdone = false;
+	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
+	gops->pmu.pmu_elpg_statistics = NULL;
+	gops->pmu.pmu_pg_grinit_param = NULL;
+	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
+	gops->pmu.dump_secure_fuses = NULL;
+	gops->pmu.reset = gp106_falcon_reset;
 
 	gk20a_dbg_fn("done");
 }
-- 
cgit v1.2.2


From ca2c4ce83a846bbab953e91af37c762efaa975e6 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 8 Jun 2016 19:57:15 +0530
Subject: gpu: nvgpu: Enable GP10x GPMU secure boot

Build support & enable GPMU secure boot
for GP10x

JIRA DNVGPU-34

Change-Id: Id1316677ed44790aa150e0ada8ff39daf0ef1d0c
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1161174
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile          |  4 +++-
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 7e4c259b..1579e6eb 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -23,7 +23,9 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/mm_gp106.o \
 	$(nvgpu-t18x)/gp106/pmu_gp106.o \
 	$(nvgpu-t18x)/gp106/gr_gp106.o \
-	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o
+	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o \
+	$(nvgpu-t18x)/gp106/acr_gp106.o \
+	$(nvgpu-t18x)/gp106/sec2_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 7ef72d72..1b22547d 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -180,13 +180,14 @@ int gp106_init_hal(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
+	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 
 	gk20a_dbg_fn("");
 
 	*gops = gp106_ops;
 
-	gops->privsecurity = 0;
-	gops->securegpccs = 0;
+	gops->privsecurity = 1;
+	gops->securegpccs = 1;
 
 	gp10b_init_mc(gops);
 	gp106_init_gr(gops);
@@ -202,9 +203,20 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
 	gm206_init_bios(gops);
-	gops->name = "gp106";
+	switch(ver){
+		case NVGPU_GPUID_GP106:
+			gops->name = "gp106";
+			break;
+		case NVGPU_GPUID_GP104:
+			gops->name = "gp104";
+			break;
+		default:
+			gk20a_err(g->dev, "no support for %x", ver);
+			BUG();
+	}
 	gops->get_litter_value = gp106_get_litter_value;
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_B;
-- 
cgit v1.2.2


From 4306af531d1e0b68b1b24d9c4a625018461d6d34 Mon Sep 17 00:00:00 2001
From: Bharat Nihalani <bnihalani@nvidia.com>
Date: Fri, 10 Jun 2016 17:39:17 +0530
Subject: WAR: gpu: nvgpu: gp10b: disable railgate for K4.4

This is done to mask a race issue seen where power refcount
is zero during ISR or bottom half.

Bug 200198908

Change-Id: I0a8ed774cd4fda9db65429b5aad03c5e001ff666
Signed-off-by: Bharat Nihalani <bnihalani@nvidia.com>
Reviewed-on: http://git-master/r/1162314
Reviewed-by: Juha Tukkinen <jtukkinen@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index b9f50817..b844875b 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -371,7 +371,12 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.railgate_delay		= 500,
 
 	/* power management configuration */
+	/* WAR: Rail-gating GPU causes hangs on kernel-4.4 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
 	.can_railgate           = true,
+#else
+	.can_railgate           = false,
+#endif
 	.enable_elpg            = true,
 	.can_elpg               = true,
 	.enable_blcg		= true,
-- 
cgit v1.2.2


From faa11f0bab6984ea6ecba881b55c33263ccac1dd Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Thu, 9 Jun 2016 18:46:22 +0530
Subject: gpu: nvgpu: Remove hard coded runlist_id mapping

From this patch onwards, runlist_id is a member of
struct channel_gk20a. So removed hard coded
runlist_id mapping logic.

JIRA DNVGPU-25

Change-Id: Ib87d96a518a490d4167071708a76100a4d4c02dd
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1161776
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 607fca59..534ffdb7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1586,7 +1586,7 @@ static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a
 		return ret;
 	}
 
-	ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false);
+	ret = g->ops.fifo.update_runlist(g, fault_ch->runlist_id, ~0, true, false);
 	if (ret) {
 		gk20a_err(dev_from_gk20a(g),
 				"CILP: failed to restart runlist 0!");
-- 
cgit v1.2.2


From 5da9567834299e9bc8190850587132ed0e41c0f0 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 13 Jun 2016 12:16:28 +0530
Subject: Revert "WAR: gpu: nvgpu: gp10b: disable railgate for K4.4"

This reverts commit 39a62cba57b243632be56e155813b7318e22c273.

Proper fixes are merged for failing tests.
Hence re-enable railgating

Bug 200198908

Change-Id: Ic9693736add36e7ff77d39fed585126bb6281677
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1163167
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index b844875b..b9f50817 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -371,12 +371,7 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.railgate_delay		= 500,
 
 	/* power management configuration */
-	/* WAR: Rail-gating GPU causes hangs on kernel-4.4 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
 	.can_railgate           = true,
-#else
-	.can_railgate           = false,
-#endif
 	.enable_elpg            = true,
 	.can_elpg               = true,
 	.enable_blcg		= true,
-- 
cgit v1.2.2


From a445c27d5bb2989f6865b5c267b22c9edbe6cc31 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 15 Jun 2016 14:51:51 +0530
Subject: gpu: nvgpu: WPR update

- setting WPR at 188MB of VIDMEM
- setting 256/512MB location at
VIDMEM for WPR cause ACR boot failure
on GP104/GP106 PROD board but works fine
for DEBUG board,
- Removed unwanted WPR info dump

JIRA DNVGPU-34

Change-Id: I44f9861774fe77dd534d316d91ed9f8dfcb298b4
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1164840
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 2ea2f817..0b4b86f4 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -37,8 +37,8 @@ typedef int (*get_ucode_details)(struct gk20a *g,
 
 /* Both size and address of WPR need to be 128K-aligned */
 #define WPR_ALIGNMENT	0x20000
-#define GP106_DGPU_NONWPR 0x10000000 /* start from 256MB location at VIDMEM */
-#define GP106_DGPU_WPR 0x20000000
+#define GP106_DGPU_NONWPR 0x18000000
+#define GP106_DGPU_WPR (GP106_DGPU_NONWPR + 0x400000)
 #define DGPU_WPR_SIZE 0x100000
 
 /*Externs*/
@@ -365,9 +365,6 @@ int gp106_prepare_ucode_blob(struct gk20a *g)
 	gm20b_mm_mmu_vpr_info_fetch(g);
 	gr_gk20a_init_ctxsw_ucode(g);
 
-	if (g->ops.fb.dump_vpr_wpr_info)
-		g->ops.fb.dump_vpr_wpr_info(g);
-
 	g->ops.pmu.get_wpr(g, &wpr_inf);
 	gp106_dbg_pmu("wpr carveout base:%llx\n", (wpr_inf.wpr_base));
 	gp106_dbg_pmu("wpr carveout size :%x\n", (u32)wpr_inf.size);
-- 
cgit v1.2.2


From 39f3a8b89fbd7589bb911d64f894c0c7d56fb694 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Wed, 15 Jun 2016 10:23:43 +0530
Subject: gpu: nvgpu: Add fifo conf support for gp10x

Added fifo configuration support for gp104 and
gp106. These GPU chips have more number of
channel fifo and runlist than gp10b.
Added get_num_fifos and
eng_runlist_base_size function pointer
to find out the actual value from HW headers.

JIRA DNVGPU-25

Change-Id: I2322a6354eaa2af2b2605f3e9eedebf9827c7dda
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1164653
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile           |  3 ++-
 drivers/gpu/nvgpu/gp106/fifo_gp106.c | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/fifo_gp106.h | 18 ++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c  |  4 ++--
 4 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/fifo_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/fifo_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 1579e6eb..643c0967 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -25,7 +25,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/gr_gp106.o \
 	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o \
 	$(nvgpu-t18x)/gp106/acr_gp106.o \
-	$(nvgpu-t18x)/gp106/sec2_gp106.o
+	$(nvgpu-t18x)/gp106/sec2_gp106.o \
+	$(nvgpu-t18x)/gp106/fifo_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/fifo_gp106.c b/drivers/gpu/nvgpu/gp106/fifo_gp106.c
new file mode 100644
index 00000000..3c70d517
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/fifo_gp106.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gp10b/fifo_gp10b.h"
+#include "fifo_gp106.h"
+#include "hw_ccsr_gp106.h"
+#include "hw_fifo_gp106.h"
+
+static u32 gp106_fifo_get_num_fifos(struct gk20a *g)
+{
+	return ccsr_channel__size_1_v();
+}
+
+void gp106_init_fifo(struct gpu_ops *gops)
+{
+	gp10b_init_fifo(gops);
+	gops->fifo.get_num_fifos = gp106_fifo_get_num_fifos;
+	gops->fifo.eng_runlist_base_size = fifo_eng_runlist_base__size_1_v;
+}
diff --git a/drivers/gpu/nvgpu/gp106/fifo_gp106.h b/drivers/gpu/nvgpu/gp106/fifo_gp106.h
new file mode 100644
index 00000000..1bcec9ef
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/fifo_gp106.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef NVGPU_FIFO_GP106_H
+#define NVGPU_FIFO_GP106_H
+struct gpu_ops;
+void gp106_init_fifo(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1b22547d..f9cd2e07 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -26,7 +26,7 @@
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/ce_gp10b.h"
 #include "gp10b/fb_gp10b.h"
-#include "gp10b/fifo_gp10b.h"
+#include "gp106/fifo_gp106.h"
 #include "gp10b/gp10b_gating_reglist.h"
 #include "gp10b/regops_gp10b.h"
 #include "gp10b/cde_gp10b.h"
@@ -193,7 +193,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp106_init_gr(gops);
 	gp10b_init_ltc(gops);
 	gp10b_init_fb(gops);
-	gp10b_init_fifo(gops);
+	gp106_init_fifo(gops);
 	gp10b_init_ce(gops);
 	gp106_init_gr_ctx(gops);
 	gp106_init_mm(gops);
-- 
cgit v1.2.2


From 14e0681fe5bb39b1773c06c51bc56101a9a1ca40 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Fri, 3 Jun 2016 15:55:35 -0700
Subject: gpu: nvgpu: set gops.read_ptimer

Bug 1395833

Change-Id: I7e7f453d83db76a46f79d62f205832254fcf401e
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1159589
(cherry picked from commit a1f43172ebf91066969c4d9e25b8a781edb20724)
Reviewed-on: http://git-master/r/1158898
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index b8fffab3..e44767a0 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -234,6 +234,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gops->name = "gp10b";
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 	gops->get_litter_value = gp10b_get_litter_value;
+	gops->read_ptimer = gk20a_read_ptimer;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_A;
-- 
cgit v1.2.2


From dfc2f7e5df52410c2b7c744419f0b94496827bd9 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 16 Jun 2016 11:55:03 +0300
Subject: gpu: ngpu: gp106: use vidmem allocator for ucode blob

Use the general video memory allocator for reserving wpr space for acr
ucode blob instead of crafting a mem_desc manually.

Jira DNVGPU-16

Change-Id: I9d34b3b964eb9ab781fcebecd15ba81643c5452d
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1165642
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 0b4b86f4..0e49214e 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -91,31 +91,12 @@ static void flcn64_set_dma(struct falc_u64 *dma_addr, u64 value)
 int gp106_alloc_blob_space(struct gk20a *g,
 		size_t size, struct mem_desc *mem)
 {
-	int err = 0;
 	struct wpr_carveout_info wpr_inf;
 
 	g->ops.pmu.get_wpr(g, &wpr_inf);
 
-	mem->aperture = APERTURE_VIDMEM;
-	mem->sgt = kzalloc(sizeof(*mem->sgt), GFP_KERNEL);
-	if (!mem->sgt) {
-		gk20a_err(dev_from_gk20a(g), "failed to allocate memory\n");
-		return -ENOMEM;
-	}
-
-	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
-	if (err) {
-		gk20a_err(dev_from_gk20a(g), "failed to allocate sg_table\n");
-		goto free_sgt;
-	}
-
-	sg_dma_address(mem->sgt->sgl) = wpr_inf.nonwpr_base;
-
-	return err;
-
-free_sgt:
-	gk20a_free_sgtable(&mem->sgt);
-	return err;
+	return gk20a_gmmu_alloc_attr_vid_at(g, 0, wpr_inf.size, mem,
+			wpr_inf.nonwpr_base);
 }
 
 void gp106_init_secure_pmu(struct gpu_ops *gops)
-- 
cgit v1.2.2


From 454cb1631be1a09b25c45a18a97fdaae2f5cdf76 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Fri, 17 Jun 2016 12:40:54 +0530
Subject: gpu: nvgpu: Add new CE class for gp10x

Added new CE class(PASCAL_DMA_COPY_B) for gp106 and gp104.

JIRA DNVGPU-25

Change-Id: I3c85e3ffdedf7594d41bf5c2fbebbf44addd1720
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1166709
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/gr_gp106.c | 1 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index e4768e0d..d54a2089 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -32,6 +32,7 @@ static bool gr_gp106_is_valid_class(struct gk20a *g, u32 class_num)
 	case PASCAL_A:
 	case PASCAL_B:
 	case PASCAL_DMA_COPY_A:
+	case PASCAL_DMA_COPY_B:
 		valid = true;
 		break;
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index e08a7dc5..5338789f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -25,6 +25,7 @@ enum {
 	PASCAL_A                 = 0xC097,
 	PASCAL_COMPUTE_A         = 0xC0C0,
 	PASCAL_DMA_COPY_A        = 0xC0B5,
+	PASCAL_DMA_COPY_B        = 0xC1B5,
 };
 
 #define NVC097_SET_GO_IDLE_TIMEOUT		0x022c
-- 
cgit v1.2.2


From d4eb7f691ef14263377c0f33777b104e2b1a0c53 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Fri, 17 Jun 2016 14:09:34 +0530
Subject: gpu: nvgpu: select FW based on ARCH

JIRA DNVGPU-34

Change-Id: Iea1964c7d12536591659188c8e969fc7fb632d12
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1166785
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c    | 31 +++++++++++++++++++++++++++----
 drivers/gpu/nvgpu/gp106/acr_gp106.h    |  5 +++++
 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c | 19 +++++++++++++++++--
 drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h |  1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c    | 13 +------------
 drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c |  2 +-
 6 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 0e49214e..a578c4a0 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -27,6 +27,7 @@
 #include "gm20b/acr_gm20b.h"
 #include "gm206/pmu_gm206.h"
 #include "sec2_gp106.h"
+#include "nvgpu_gpuid_t18x.h"
 
 /*Defines*/
 #define gp106_dbg_pmu(fmt, arg...) \
@@ -185,11 +186,22 @@ release_img_fw:
 
 int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 {
+	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 	struct lsf_ucode_desc_v1 *lsf_desc;
-	const struct firmware *fecs_sig;
+	const struct firmware *fecs_sig = NULL;
 	int err;
 
-	fecs_sig = gk20a_request_firmware(g, GM20B_FECS_UCODE_SIG);
+	switch (ver) {
+		case NVGPU_GPUID_GP104:
+			fecs_sig = gk20a_request_firmware(g, GP104_FECS_UCODE_SIG);
+			break;
+		case NVGPU_GPUID_GP106:
+			fecs_sig = gk20a_request_firmware(g, GP106_FECS_UCODE_SIG);
+			break;
+		default:
+			gk20a_err(g->dev, "no support for GPUID %x", ver);
+	}
+
 	if (!fecs_sig) {
 		gk20a_err(dev_from_gk20a(g), "failed to load fecs sig");
 		return -ENOENT;
@@ -252,14 +264,25 @@ rel_sig:
 }
 int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 {
+	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 	struct lsf_ucode_desc_v1 *lsf_desc;
-	const struct firmware *gpccs_sig;
+	const struct firmware *gpccs_sig = NULL;
 	int err;
 
 	if (g->ops.securegpccs == false)
 		return -ENOENT;
 
-	gpccs_sig = gk20a_request_firmware(g, T18x_GPCCS_UCODE_SIG);
+	switch (ver) {
+		case NVGPU_GPUID_GP104:
+			gpccs_sig = gk20a_request_firmware(g, GP104_GPCCS_UCODE_SIG);
+			break;
+		case NVGPU_GPUID_GP106:
+			gpccs_sig = gk20a_request_firmware(g, GP106_GPCCS_UCODE_SIG);
+			break;
+		default:
+			gk20a_err(g->dev, "no support for GPUID %x", ver);
+	}
+
 	if (!gpccs_sig) {
 		gk20a_err(dev_from_gk20a(g), "failed to load gpccs sig");
 		return -ENOENT;
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.h b/drivers/gpu/nvgpu/gp106/acr_gp106.h
index 9afec529..cd555eb8 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.h
@@ -17,6 +17,11 @@
 #include "gm20b/acr_gm20b.h"
 #include "gm206/acr_gm206.h"
 
+#define GP106_FECS_UCODE_SIG "gp106/fecs_sig.bin"
+#define GP106_GPCCS_UCODE_SIG "gp106/gpccs_sig.bin"
+#define GP104_FECS_UCODE_SIG "gp104/fecs_sig.bin"
+#define GP104_GPCCS_UCODE_SIG "gp104/gpccs_sig.bin"
+
 struct lsf_ucode_desc_v1 {
 	u8  prd_keys[2][16];
 	u8  dbg_keys[2][16];
diff --git a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
index 34e1f859..1f47cc5a 100644
--- a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.c
@@ -15,10 +15,25 @@
 
 #include "gk20a/gk20a.h"
 #include "gr_ctx_gp106.h"
+#include "nvgpu_gpuid_t18x.h"
 
-static int gr_gp106_get_netlist_name(int index, char *name)
+static int gr_gp106_get_netlist_name(struct gk20a *g, int index, char *name)
 {
-	sprintf(name, GP106_NETLIST_IMAGE_FW_NAME);
+	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
+
+	switch (ver) {
+		case NVGPU_GPUID_GP104:
+			sprintf(name, "%s/%s", "gp104",
+					GP104_NETLIST_IMAGE_FW_NAME);
+			break;
+		case NVGPU_GPUID_GP106:
+			sprintf(name, "%s/%s", "gp106",
+					GP106_NETLIST_IMAGE_FW_NAME);
+			break;
+		default:
+			gk20a_err(g->dev, "no support for GPUID %x", ver);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
index d14a9126..fef80abb 100644
--- a/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/gr_ctx_gp106.h
@@ -20,6 +20,7 @@
 
 /* production netlist, one and only one from below */
 #define GP106_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_C
+#define GP104_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_D
 
 void gp106_init_gr_ctx(struct gpu_ops *gops);
 
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index f9cd2e07..a47fa0fd 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -180,7 +180,6 @@ int gp106_init_hal(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
-	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 
 	gk20a_dbg_fn("");
 
@@ -203,17 +202,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
 	gm206_init_bios(gops);
-	switch(ver){
-		case NVGPU_GPUID_GP106:
-			gops->name = "gp106";
-			break;
-		case NVGPU_GPUID_GP104:
-			gops->name = "gp104";
-			break;
-		default:
-			gk20a_err(g->dev, "no support for %x", ver);
-			BUG();
-	}
+	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
index b2956257..2bb4a313 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_ctx_gp10b.c
@@ -22,7 +22,7 @@
 #include "gk20a/gk20a.h"
 #include "gr_ctx_gp10b.h"
 
-static int gr_gp10b_get_netlist_name(int index, char *name)
+static int gr_gp10b_get_netlist_name(struct gk20a *g, int index, char *name)
 {
 	switch (index) {
 #ifdef GP10B_NETLIST_IMAGE_FW_NAME
-- 
cgit v1.2.2


From 528758f488a8f39c1306d8c17be4799273a94a5d Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Wed, 22 Jun 2016 16:07:46 +0530
Subject: gpu: nvgpu: Add interface for privileged channel allocation

Added interface for privileged channel allocation to execute
the privileged method  (ex. CE phys mode transfer).

JIRA DNVGPU-53

Change-Id: I1606f8c9d10f29d5a10738b5110ce9f6a2bb428d
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1169320
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h | 8 ++++++++
 drivers/gpu/nvgpu/gp106/hw_ram_gp106.h   | 4 ++++
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c     | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h   | 4 ++++
 5 files changed, 32 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h b/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
index a5406672..1e299bae 100644
--- a/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_pbdma_gp106.h
@@ -322,6 +322,14 @@ static inline u32 pbdma_userd_hi_addr_f(u32 v)
 {
 	return (v & 0xff) << 0;
 }
+static inline u32 pbdma_config_r(u32 i)
+{
+	return 0x000400f4 + i*8192;
+}
+static inline u32 pbdma_config_auth_level_privileged_f(void)
+{
+	return 0x100;
+}
 static inline u32 pbdma_hce_ctrl_r(u32 i)
 {
 	return 0x000400e4 + i*8192;
diff --git a/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h b/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
index b325affc..eb02ac28 100644
--- a/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_ram_gp106.h
@@ -370,6 +370,10 @@ static inline u32 ram_fc_chid_id_w(void)
 {
 	return 0;
 }
+static inline u32 ram_fc_config_w(void)
+{
+	return 61;
+}
 static inline u32 ram_fc_runlist_timeslice_w(void)
 {
 	return 62;
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 0aa6e29e..349f25fc 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -132,6 +132,14 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
 
 	gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 
+	if (c->is_privileged_channel) {
+		/* Set privilege level for channel */
+		gk20a_mem_wr32(g, mem, ram_fc_config_w(),
+			pbdma_config_auth_level_privileged_f());
+
+		gk20a_channel_setup_ramfc_for_privileged_channel(c);
+	}
+
 	return channel_gp10b_commit_userd(c);
 }
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
index 0caffb22..65aedccd 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_pbdma_gp10b.h
@@ -342,6 +342,14 @@ static inline u32 pbdma_userd_hi_addr_f(u32 v)
 {
 	return (v & 0xff) << 0;
 }
+static inline u32 pbdma_config_r(u32 i)
+{
+	return 0x000400f4 + i*8192;
+}
+static inline u32 pbdma_config_auth_level_privileged_f(void)
+{
+	return 0x100;
+}
 static inline u32 pbdma_hce_ctrl_r(u32 i)
 {
 	return 0x000400e4 + i*8192;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
index 55323579..89dfbc21 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ram_gp10b.h
@@ -382,6 +382,10 @@ static inline u32 ram_fc_chid_id_w(void)
 {
 	return 0;
 }
+static inline u32 ram_fc_config_w(void)
+{
+	return 61;
+}
 static inline u32 ram_fc_runlist_timeslice_w(void)
 {
 	return 62;
-- 
cgit v1.2.2


From e175580d52759c4faa0e05eb728340b31fa7c4d6 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 23 Jun 2016 09:22:33 -0700
Subject: gpu: nvgpu: vgpu: Add CE engine to engine list

Initialize CE engine also for gp10b.

Change-Id: Ibce2f80b523a09fb1345995c03c5430f3b20844f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1170453
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Tested-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Lakshmanan M <lm@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c | 30 +++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
index 34d942c1..23d945fb 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_fifo_gp10b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -13,8 +13,36 @@
 
 #include "vgpu_fifo_gp10b.h"
 
+static int vgpu_gp10b_fifo_init_engine_info(struct fifo_gk20a *f)
+{
+	struct fifo_engine_info_gk20a *gr_info;
+	struct fifo_engine_info_gk20a *ce_info;
+	const u32 gr_sw_id = ENGINE_GR_GK20A;
+	const u32 ce_sw_id = ENGINE_GRCE_GK20A;
+
+	gk20a_dbg_fn("");
+
+	f->num_engines = 2;
+
+	gr_info = &f->engine_info[0];
+
+	/* FIXME: retrieve this from server */
+	gr_info->runlist_id = 0;
+	gr_info->engine_enum = gr_sw_id;
+	f->active_engines_list[0] = 0;
+
+	ce_info = &f->engine_info[1];
+	ce_info->runlist_id = 0;
+	ce_info->inst_id = 0;
+	ce_info->engine_enum = ce_sw_id;
+	f->active_engines_list[1] = 1;
+
+	return 0;
+}
+
 void vgpu_gp10b_init_fifo_ops(struct gpu_ops *gops)
 {
 	/* syncpoint protection not supported yet */
+	gops->fifo.init_engine_info = vgpu_gp10b_fifo_init_engine_info;
 	gops->fifo.resetup_ramfc = NULL;
 }
-- 
cgit v1.2.2


From 9704c3ad16e7320dfb79ba6b9ae4c9d06cc9dd42 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 8 Jun 2016 17:50:14 +0530
Subject: gpu: nvgpu: add QoS notifier for T186

Add QoS notifier callback gk20a_scale_qos_notify()
for T186. This enables QoS for T186.

Bug 1772462

Change-Id: Ie25ff4ba24c94354e08fa019704f5d5cc4ef8f33
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1161162
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Puneet Saxena <puneets@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index b9f50817..e8ecb043 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -412,7 +412,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 	.prescale = gp10b_tegra_prescale,
 	.postscale = gp10b_tegra_postscale,
 	.devfreq_governor = "nvhost_podgov",
-	.qos_id = PM_QOS_GPU_FREQ_MIN,
+
+	.qos_notify = gk20a_scale_qos_notify,
 
 	.secure_alloc = gk20a_tegra_secure_alloc,
 	.secure_page_alloc = gk20a_tegra_secure_page_alloc,
-- 
cgit v1.2.2


From b21aa660ef15dac60a011b2b1dc4a3bda6330889 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Wed, 8 Jun 2016 09:58:43 -0700
Subject: gpu: nvgpu: accessors for nv_pgraph_debug_2

Add accessors for GFXP_WFI_ALWAYS_INJECTS_WFI,
field to control FE behaviour for GFXP

Jira VFND-1900

Change-Id: Id531f795422393dc603859a0f3059d0681cf9464
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1162628
(cherry picked from commit 4175a21dd2fcbf9c25623bf5d472a3bc30476faa)
Reviewed-on: http://git-master/r/1166989
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 0e47c508..00f2ac5e 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -4162,6 +4162,26 @@ static inline u32 gr_fe_gfxp_wfi_timeout_count_disabled_f(void)
 {
 	return 0x0;
 }
+static inline u32 gr_debug_2_r(void)
+{
+	return 0x00400088;
+}
+static inline u32 gr_debug_2_gfxp_wfi_always_injects_wfi_m(void)
+{
+	return 0x1 << 23;
+}
+static inline u32 gr_debug_2_gfxp_wfi_always_injects_wfi_v(u32 r)
+{
+	return (r >> 23) & 0x1;
+}
+static inline u32 gr_debug_2_gfxp_wfi_always_injects_wfi_enabled_f(void)
+{
+	return 0x800000;
+}
+static inline u32 gr_debug_2_gfxp_wfi_always_injects_wfi_disabled_f(void)
+{
+	return 0x0;
+}
 static inline u32 gr_gpcs_tpcs_sm_texio_control_r(void)
 {
 	return 0x00419c84;
-- 
cgit v1.2.2


From d459bd68a6ea0462b7db58722d5ee26c9ce1dd73 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Thu, 9 Jun 2016 10:33:04 -0700
Subject: gpu: nvgpu: set graphics preemption state

Set NV_PGRAPH_PRI_FE_GFXP_WFI_TIMEOUT from the
default of ~20us to ~100us. Also set
NV_PGRAPH_DEBUG_2_GFXP_WFI_ALWAYS_INJECTS_WFI o
avoid going into GFXP all the time.

Bug 1593548
Jira VFND-1894

Change-Id: I6310c3605f7b83178c38de88788d87e36ee428b4
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1162629
(cherry picked from commit 873ddc7288063b1773d31a5bda30d980122d6645)
Reviewed-on: http://git-master/r/1166988
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 534ffdb7..88893886 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -14,6 +14,7 @@
  */
 
 #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
 
@@ -32,6 +33,8 @@
 #include "gp10b_sysfs.h"
 #include <linux/vmalloc.h>
 
+#define NVGPU_GFXP_WFI_TIMEOUT_US	100LL
+
 static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
 {
 	bool valid = false;
@@ -2151,6 +2154,28 @@ static int gp10b_gr_fuse_override(struct gk20a *g)
 	}
 
 	kfree(fuses);
+
+	return 0;
+}
+
+static int gr_gp10b_init_preemption_state(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	u32 debug_2;
+	u64 sysclk_rate;
+	u32 sysclk_cycles;
+
+	sysclk_rate = platform->clk_get_rate(g->dev);
+	sysclk_cycles = (u32)((sysclk_rate * NVGPU_GFXP_WFI_TIMEOUT_US) / 1000000ULL);
+	gk20a_writel(g, gr_fe_gfxp_wfi_timeout_r(),
+			gr_fe_gfxp_wfi_timeout_count_f(sysclk_cycles));
+
+	debug_2 = gk20a_readl(g, gr_debug_2_r());
+	debug_2 = set_field(debug_2,
+			gr_debug_2_gfxp_wfi_always_injects_wfi_m(),
+			gr_debug_2_gfxp_wfi_always_injects_wfi_enabled_f());
+	gk20a_writel(g, gr_debug_2_r(), debug_2);
+
 	return 0;
 }
 
@@ -2158,6 +2183,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 {
 	gm20b_init_gr(gops);
 	gops->gr.init_fs_state = gr_gp10b_init_fs_state;
+	gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
 	gops->gr.is_valid_class = gr_gp10b_is_valid_class;
 	gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
 	gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
-- 
cgit v1.2.2


From bcf321ed1fe4da2ebb5ef34c09c545cf025ac419 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 29 Jun 2016 15:30:17 +0530
Subject: gpu: nvgpu: fixing sparse error/warning

nvgpu/gp106/pmu_gp106.c:30:5: warning: symbol
'gp106_pmu_enable_hw' was not declared. Should it be static?
nvgpu/gp106/pmu_gp106.c:118:5: warning: symbol
'gp106_pmu_reset' was not declared. Should it be static?
nvgpu/gp106/pmu_gp106.c:146:5: warning: symbol
'gp106_sec2_reset' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:79:6: warning: symbol
'gp106_wpr_info' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:92:5: warning: symbol
'gp106_alloc_blob_space' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:120:5: warning: symbol
'pmu_ucode_details' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:187:5: warning: symbol
'fecs_ucode_details' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:265:5: warning: symbol
'gpccs_ucode_details' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:348:5: warning: symbol
'gp106_prepare_ucode_blob' was not declared. Should it be static?
nvgpu/gp106/acr_gp106.c:1011:5: warning: symbol
'gp106_bootstrap_hs_flcn' was not declared. Should it be static?

Bug 200088648

Change-Id: I13716e39f540f8674b1c0f917048bb6b63f7b763
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1173076
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 15 ++++++++-------
 drivers/gpu/nvgpu/gp106/pmu_gp106.c |  6 +++---
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index a578c4a0..5c4afef9 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -76,7 +76,7 @@ static get_ucode_details pmu_acr_supp_ucode_list[] = {
 	gpccs_ucode_details,
 };
 
-void gp106_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
+static void gp106_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
 {
 	inf->wpr_base = GP106_DGPU_WPR;
 	inf->nonwpr_base = GP106_DGPU_NONWPR;
@@ -89,7 +89,7 @@ static void flcn64_set_dma(struct falc_u64 *dma_addr, u64 value)
 	dma_addr->hi |= u64_hi32(value);
 }
 
-int gp106_alloc_blob_space(struct gk20a *g,
+static int gp106_alloc_blob_space(struct gk20a *g,
 		size_t size, struct mem_desc *mem)
 {
 	struct wpr_carveout_info wpr_inf;
@@ -117,7 +117,7 @@ void gp106_init_secure_pmu(struct gpu_ops *gops)
 }
 /* TODO - check if any free blob res needed*/
 
-int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 {
 	const struct firmware *pmu_fw, *pmu_desc, *pmu_sig;
 	struct pmu_gk20a *pmu = &g->pmu;
@@ -184,7 +184,7 @@ release_img_fw:
 	return err;
 }
 
-int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 {
 	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 	struct lsf_ucode_desc_v1 *lsf_desc;
@@ -262,7 +262,8 @@ rel_sig:
 	release_firmware(fecs_sig);
 	return err;
 }
-int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
+
+static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 {
 	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
 	struct lsf_ucode_desc_v1 *lsf_desc;
@@ -345,7 +346,7 @@ rel_sig:
 	return err;
 }
 
-int gp106_prepare_ucode_blob(struct gk20a *g)
+static int gp106_prepare_ucode_blob(struct gk20a *g)
 {
 
 	int err;
@@ -1008,7 +1009,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g,
 
 /*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
  * start and end are addresses of ucode blob in non-WPR region*/
-int gp106_bootstrap_hs_flcn(struct gk20a *g)
+static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm = &mm->pmu.vm;
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index be3e8c64..f6fcd234 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -27,7 +27,7 @@
 #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
 #define PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
 
-int gp106_pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
+static int gp106_pmu_enable_hw(struct pmu_gk20a *pmu, bool enable)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
 
@@ -115,7 +115,7 @@ static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
 	return 0;
 }
 
-int gp106_pmu_reset(struct gk20a *g)
+static int gp106_pmu_reset(struct gk20a *g)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	int err = 0;
@@ -143,7 +143,7 @@ int gp106_pmu_reset(struct gk20a *g)
 	return err;
 }
 
-int gp106_sec2_reset(struct gk20a *g)
+static int gp106_sec2_reset(struct gk20a *g)
 {
 	gk20a_dbg_fn("");
 	//sec2 reset
-- 
cgit v1.2.2


From 6e1428484e31f2a908fc3f9edc3349813d3e3057 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 28 Jun 2016 09:58:03 -0700
Subject: gpu: nvgpu: Add TSG ops for gp106

Bug 200214046

Change-Id: I02a2e5d13f444dbdc1b4eab51ebfda6ab9402734
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1172600
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index a47fa0fd..f32a2cd4 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -201,6 +201,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
+	gk20a_init_tsg_ops(gops);
 	gm206_init_bios(gops);
 	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
-- 
cgit v1.2.2


From 81756640cb2c7cadb1b30c0233088268bd57ee6c Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Fri, 17 Jun 2016 15:45:17 +0300
Subject: gpu: nvgpu: gp10x: initial support for vidmem apertures

add gk20a_aperture_mask() for memory target selection now that buffers
can actually be allocated from vidmem, and use it in all cases that have
a mem_desc available.

Jira DNVGPU-76

Change-Id: Ifd1908808d928155a0cadeff8ca451a151bfc8c5
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169294
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index c25abc78..a183154e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -33,7 +33,6 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct mem_desc *inst_block = &mm->bar1.inst_block;
-	u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
 	int err = 0;
 
 	gk20a_dbg_fn("");
@@ -44,7 +43,7 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
 		     (g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
 		     >> 8ULL));
 
-	g->ops.mm.bar1_bind(g, inst_pa);
+	g->ops.mm.bar1_bind(g, inst_block);
 
 	if (g->ops.mm.init_bar2_mm_hw_setup) {
 		err = g->ops.mm.init_bar2_mm_hw_setup(g);
@@ -378,21 +377,24 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
 	return gp10b_mm_levels;
 }
 
-static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
-		u64 pdb_addr)
+static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
+		struct vm_gk20a *vm)
 {
+	u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
 	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
-	gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
-		(g->mm.vidmem_is_vidmem ?
-		  ram_in_page_dir_base_target_sys_mem_ncoh_f() :
+	gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
+
+	gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
+		gk20a_aperture_mask(g, &vm->pdb.mem,
+		  ram_in_page_dir_base_target_sys_mem_ncoh_f(),
 		  ram_in_page_dir_base_target_vid_mem_f()) |
 		ram_in_page_dir_base_vol_true_f() |
 		ram_in_page_dir_base_lo_f(pdb_addr_lo) |
 		1 << 10);
 
-	gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
+	gk20a_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
 		ram_in_page_dir_base_hi_f(pdb_addr_hi));
 }
 
-- 
cgit v1.2.2


From d6730d5214a941451a298a22a037e5bcc4fb9ea4 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Fri, 17 Jun 2016 15:45:31 +0300
Subject: gpu: nvgpu: gp10x: add support for vidmem in page tables

Modify page table updates to take an aperture flag (up until
gk20a_locked_gmmu_map()), don't hard-assume sysmem and propagate it to
hardware.

Jira DNVGPU-76

Change-Id: I797fdaaf5f42a84fa0446577359147fb6908a720
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169295
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h |  8 ++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c      | 48 ++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
index 86870aea..d231ee44 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gmmu_gp10b.h
@@ -242,6 +242,14 @@ static inline u32 gmmu_new_pte_address_sys_w(void)
 {
 	return 0;
 }
+static inline u32 gmmu_new_pte_address_vid_f(u32 v)
+{
+	return (v & 0xffffff) << 8;
+}
+static inline u32 gmmu_new_pte_address_vid_w(void)
+{
+	return 0;
+}
 static inline u32 gmmu_new_pte_vol_w(void)
 {
 	return 0;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index a183154e..7778883e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -170,7 +170,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, bool priv)
+			   int rw_flag, bool sparse, bool priv,
+			   enum gk20a_aperture aperture)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	u64 pte_addr = 0;
@@ -184,9 +185,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v();
 	pde_addr = entry_addr(g, parent);
 
-	pde_v[0] |= g->mm.vidmem_is_vidmem ?
-			gmmu_new_pde_aperture_sys_mem_ncoh_f() :
-			gmmu_new_pde_aperture_video_memory_f();
+	pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
+			gmmu_new_pde_aperture_sys_mem_ncoh_f(),
+			gmmu_new_pde_aperture_video_memory_f());
 	pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= pte_addr >> 24;
@@ -214,7 +215,8 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, bool priv)
+			   int rw_flag, bool sparse, bool priv,
+			   enum gk20a_aperture aperture)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	bool small_valid, big_valid;
@@ -239,9 +241,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 
 	if (small_valid) {
 		pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
-		pde_v[2] |= g->mm.vidmem_is_vidmem ?
-			gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() :
-			gmmu_new_dual_pde_aperture_small_video_memory_f();
+		pde_v[2] |= gk20a_aperture_mask(g, &pte->mem,
+			gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
+			gmmu_new_dual_pde_aperture_small_video_memory_f());
 		pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
 		pde_v[3] |= pte_addr_small >> 24;
 	}
@@ -249,9 +251,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	if (big_valid) {
 		pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
 		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
-		pde_v[0] |= g->mm.vidmem_is_vidmem ?
-			gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() :
-			gmmu_new_dual_pde_aperture_big_video_memory_f();
+		pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
+			gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
+			gmmu_new_dual_pde_aperture_big_video_memory_f());
 		pde_v[1] |= pte_addr_big >> 28;
 	}
 
@@ -276,7 +278,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 			   u64 *iova,
 			   u32 kind_v, u64 *ctag,
 			   bool cacheable, bool unmapped_pte,
-			   int rw_flag, bool sparse, bool priv)
+			   int rw_flag, bool sparse, bool priv,
+			   enum gk20a_aperture aperture)
 {
 	struct gk20a *g = vm->mm->g;
 	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
@@ -284,15 +287,18 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	u32 pte_w[2] = {0, 0}; /* invalid pte */
 
 	if (*iova) {
-		if (unmapped_pte)
-			pte_w[0] = gmmu_new_pte_valid_false_f();
-		else
-			pte_w[0] = gmmu_new_pte_valid_true_f();
-		pte_w[0] |= g->mm.vidmem_is_vidmem ?
-			gmmu_new_pte_aperture_sys_mem_ncoh_f() :
-			gmmu_new_pte_aperture_video_memory_f();
-		pte_w[0] |= gmmu_new_pte_address_sys_f(*iova
-			      >> gmmu_new_pte_address_shift_v());
+		u32 pte_valid = unmapped_pte ?
+			gmmu_new_pte_valid_false_f() :
+			gmmu_new_pte_valid_true_f();
+		u32 iova_v = *iova >> gmmu_new_pte_address_shift_v();
+		u32 pte_addr = aperture == APERTURE_SYSMEM ?
+				gmmu_new_pte_address_sys_f(iova_v) :
+				gmmu_new_pte_address_vid_f(iova_v);
+		u32 pte_tgt = __gk20a_aperture_mask(g, aperture,
+				gmmu_new_pte_aperture_sys_mem_ncoh_f(),
+				gmmu_new_pte_aperture_video_memory_f());
+
+		pte_w[0] = pte_valid | pte_addr | pte_tgt;
 
 		if (priv)
 			pte_w[0] |= gmmu_new_pte_privilege_true_f();
-- 
cgit v1.2.2


From bc45c8ef2ba7836eba3ffd7a911905322ed17ab4 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Fri, 17 Jun 2016 15:45:38 +0300
Subject: gpu: nvgpu: gp10x: support in-kernel vidmem mappings

Propagate the buffer aperture flag in gk20a_locked_gmmu_map up so that
buffers represented as a mem_desc and present in vidmem can be mapped to
gpu.

JIRA DNVGPU-18
JIRA DNVGPU-76

Change-Id: Icd675e83e3c28836f0ed8880425748697713bb0a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169296
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c           | 3 ++-
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 88893886..6f5016cb 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -836,7 +836,8 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 				size,
 				NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
 				gk20a_mem_flag_none,
-				false);
+				false,
+				mem->aperture);
 
 	if (!mem->gpu_va) {
 		err = -ENOMEM;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 3a286249..1b6003b3 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -51,7 +51,8 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
 				bool clear_ctags,
 				bool sparse,
 				bool priv,
-				struct vm_gk20a_mapping_batch *batch)
+				struct vm_gk20a_mapping_batch *batch,
+				enum gk20a_aperture aperture)
 {
 	int err = 0;
 	struct device *d = dev_from_vm(vm);
-- 
cgit v1.2.2


From f34a8046d5f594f16eb29bc90a7ca49a66ef3c96 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 9 Jun 2016 18:15:04 +0530
Subject: gpu: nvgpu: clean up pm_domain code

NvGPU is moving to use runtime PM only for its power
management
Remove pm_domain calls to register to nvhost

Jira DNVGPU-57

Change-Id: Idd01b680af0e8fd601801150fc663afa53b7ce6f
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1163217
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index e8ecb043..dfd1f8a5 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -21,7 +21,6 @@
 #include <uapi/linux/nvgpu.h>
 #include <linux/dma-buf.h>
 #include <linux/nvmap.h>
-#include <linux/tegra_pm_domains.h>
 #include <linux/reset.h>
 #include <soc/tegra/tegra_bpmp.h>
 #include <linux/hashtable.h>
@@ -172,8 +171,6 @@ static int gp10b_tegra_probe(struct device *dev)
 
 static int gp10b_tegra_late_probe(struct device *dev)
 {
-	/* Make gk20a power domain a subdomain of host1x */
-	nvhost_register_client_domain(dev_to_genpd(dev));
 	/*Create GP10B specific sysfs*/
 	gp10b_create_sysfs(dev);
 
@@ -184,8 +181,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
 
 static int gp10b_tegra_remove(struct device *dev)
 {
-	/* remove gk20a power subdomain from host1x */
-	nvhost_unregister_client_domain(dev_to_genpd(dev));
 	gr_gp10b_remove_sysfs(dev);
 	/*Remove GP10B specific sysfs*/
 	gp10b_remove_sysfs(dev);
-- 
cgit v1.2.2


From e6b270e9e4a4d3e99871eaa5bd724a6110e93b5e Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 7 Jul 2016 10:55:17 +0300
Subject: gpu: nvgpu: use vidmem by default in gmmu_alloc variants

For devices that have vidmem available, use the vidmem allocator in
gk20a_gmmu_alloc{,attr,_map,_map_attr}. For others, use sysmem.

Because all of the buffers haven't been tested to work in vidmem yet,
rename calls to gk20a_gmmu_alloc{,attr,_map,_map_attr} to have _sys at
the end to declare explicitly that vidmem is used. Enabling vidmem for
each now is a matter of removing "_sys" from the function call.

Jira DNVGPU-18

Change-Id: I4a67eae403f1d9d271118c35e3775b1129170676
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1176806
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c  | 2 +-
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c   | 2 +-
 drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 5c4afef9..9364afec 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -1057,7 +1057,7 @@ static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 			err = -1;
 			goto err_release_acr_fw;
 		}
-		err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes,
+		err = gk20a_gmmu_alloc_map_sys(vm, img_size_in_bytes,
 				&acr->acr_ucode);
 		if (err) {
 			err = -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6f5016cb..ff3c891f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -827,7 +827,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
 
 	gk20a_dbg_fn("");
 
-	err = gk20a_gmmu_alloc_attr(vm->mm->g, 0, size, mem);
+	err = gk20a_gmmu_alloc_sys(vm->mm->g, size, mem);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index 59af5cde..f88718b6 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -34,7 +34,7 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 	gk20a_dbg_fn("");
 
 	if (!g->mm.bar2_desc.gpu_va) {
-		err = gk20a_gmmu_alloc_map(vm, rbfb_size,
+		err = gk20a_gmmu_alloc_map_sys(vm, rbfb_size,
 						&g->mm.bar2_desc);
 		if (err) {
 			dev_err(dev_from_gk20a(g),
-- 
cgit v1.2.2


From b583ef729636677c86651685a392849bb8e22431 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 7 Jul 2016 04:03:44 -0700
Subject: gpu: nvgpu: Disable init_preemption_state on dGPU

Discrete GPU does not have clocks wired correctly. They're needed to
be able to calculate correct preemption timeout, so disable setting
the timeout.

Change-Id: I14a6d262f6b004d40432a4e026c5558303aa90a4
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1176904
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Tested-by: Mahantesh Kumbar <mkumbar@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/gr_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index d54a2089..9d6ce6ec 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -109,4 +109,5 @@ void gp106_init_gr(struct gpu_ops *gops)
 	gops->gr.pagepool_default_size = gr_gp106_pagepool_default_size;
 	gops->gr.handle_sw_method = gr_gp106_handle_sw_method;
 	gops->gr.cb_size_default = gr_gp106_cb_size_default;
+	gops->gr.init_preemption_state = NULL;
 }
-- 
cgit v1.2.2


From 00a092b729e42bc905db2b785c1564ad386c8ace Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Tue, 12 Jul 2016 15:31:19 +0300
Subject: gpu: nvgpu: Fix read_ptimer hook for GP106

Add the read_ptimer hook for GP106. This makes NVGPU_GPU_IOCTL_GET_GPU_TIME
not crash on call.

Bug 1787348

Change-Id: I31d7c30bcf0d6ad7fdecccd25a7c9c16276632a2
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/1179661
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index f32a2cd4..55c263cb 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -207,6 +207,7 @@ int gp106_init_hal(struct gk20a *g)
 	gops->get_litter_value = gp106_get_litter_value;
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
 	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
+	gops->read_ptimer = gk20a_read_ptimer;
 
 	c->twod_class = FERMI_TWOD_A;
 	c->threed_class = PASCAL_B;
-- 
cgit v1.2.2


From a536a99385ebf5bc9002597618526bf085fee324 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 18 Jul 2016 18:50:05 +0300
Subject: gpu: nvgpu: gp106: reserve wpr from allocator

In addition to nonwpr_base address, allocate also the wpr_base that is
configured as wpr, in order to not overlap user allocations on that
area.

Jira DNVGPU-18

Change-Id: Ie2976a091e8084fcdc8ffd9fb4b6c75411450acb
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1182874
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 9364afec..c43b0165 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -93,9 +93,19 @@ static int gp106_alloc_blob_space(struct gk20a *g,
 		size_t size, struct mem_desc *mem)
 {
 	struct wpr_carveout_info wpr_inf;
+	int err;
 
 	g->ops.pmu.get_wpr(g, &wpr_inf);
 
+	/*
+	 * Even though this mem_desc wouldn't be used, the wpr region needs to
+	 * be reserved in the allocator.
+	 */
+	err = gk20a_gmmu_alloc_attr_vid_at(g, 0, wpr_inf.size,
+				&g->acr.wpr_dummy, wpr_inf.wpr_base);
+	if (err)
+		return err;
+
 	return gk20a_gmmu_alloc_attr_vid_at(g, 0, wpr_inf.size, mem,
 			wpr_inf.nonwpr_base);
 }
-- 
cgit v1.2.2


From 8edf67e2e53c4ffd9c487a90c27eca080cb72246 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Mon, 18 Jul 2016 16:00:34 -0700
Subject: gpu: nvgpu: unset get_phys_addr_bits for dGPU

Unset get_phys_addr_bits as PCIe devices do not need
to care if SMMU is enabled or not.

Jira VFND-1965

Change-Id: Ice87ff06087ec6c0a123dcf054717eff80acc8f9
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1183085
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/mm_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/mm_gp106.c b/drivers/gpu/nvgpu/gp106/mm_gp106.c
index 37365022..3309a0d7 100644
--- a/drivers/gpu/nvgpu/gp106/mm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/mm_gp106.c
@@ -37,4 +37,5 @@ void gp106_init_mm(struct gpu_ops *gops)
 {
 	gp10b_init_mm(gops);
 	gops->mm.get_vidmem_size = gp106_mm_get_vidmem_size;
+	gops->mm.get_physical_addr_bits = NULL;
 }
-- 
cgit v1.2.2


From a680bd05b15b9695000b04960e36873eb9e4c9d4 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Wed, 29 Jun 2016 16:09:37 +0530
Subject: gpu: nvgpu: Add control flag to allow kernel to create privileged CE
 channels

Added control flag for nvgpu infra to allow kernel to create privileged
CE channels for page migration and clearing support between sysmem
and videmem.

JIRA DNVGPU-53

Change-Id: I1fc35eea60af3d1ea9a0b5582011f20d58958ccb
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1173091
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index dfd1f8a5..1e4796d4 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -398,6 +398,8 @@ struct gk20a_platform t18x_gpu_tegra_platform = {
 
 	.has_cde = true,
 
+	.has_ce = true,
+
 	.clk_get_rate = gp10b_get_clk_rate,
 	.clk_round_rate = gp10b_round_clk_rate,
 	.clk_set_rate = gp10b_set_clk_rate,
-- 
cgit v1.2.2


From f61d819accfd90a8db59799f36f9ec97e97424f0 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Fri, 3 Jun 2016 15:29:57 -0700
Subject: gpu: nvgpu: ppc register support

Fix support for ppc_in_gpc_base
Add support for ppc_in_gpc_shared_base

Bug 1771830

Change-Id: I1d04bfd20eac08a26986a2436524b97a008ed913
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1158889
(cherry picked from commit 0f9ac2fd958556ee5d76d4cb2f6a335960227433)
Reviewed-on: http://git-master/r/1164398
(cherry picked from commit aa12f60061bdbeb68094d59258ac2db34f0cfe2a)
Reviewed-on: http://git-master/r/1181501
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h | 4 ++++
 2 files changed, 12 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e44767a0..d82a03eb 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -138,9 +138,15 @@ static int gp10b_get_litter_value(struct gk20a *g,
 	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
 		ret = proj_tpc_in_gpc_shared_base_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_BASE:
+		ret = proj_ppc_in_gpc_base_v();
+		break;
 	case GPU_LIT_PPC_IN_GPC_STRIDE:
 		ret = proj_ppc_in_gpc_stride_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
+		ret = proj_ppc_in_gpc_shared_base_v();
+		break;
 	case GPU_LIT_ROP_BASE:
 		ret = proj_rop_base_v();
 		break;
@@ -169,6 +175,8 @@ static int gp10b_get_litter_value(struct gk20a *g,
 		ret = proj_fbpa_stride_v();
 		break;
 	default:
+		gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
+		BUG();
 		break;
 	}
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index d1a60c29..08a7cb82 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
 {
 	return 0x00003000;
 }
+static inline u32 proj_ppc_in_gpc_shared_base_v(void)
+{
+	return 0x00003e00;
+}
 static inline u32 proj_ppc_in_gpc_stride_v(void)
 {
 	return 0x00000200;
-- 
cgit v1.2.2


From a5fad1ec42d3f1ef1391bed8c768e0b3aa0f3fab Mon Sep 17 00:00:00 2001
From: Hoang Pham <hopham@nvidia.com>
Date: Fri, 22 Jul 2016 09:59:32 -0700
Subject: Revert "gpu: nvgpu: ppc register support"

This reverts commit 3639659575e76f81e31c5c9f3aca8896c4ebcb69.

Change-Id: Ieb6a40e30128bb9c59f64f6e39bb026de9a30397
Signed-off-by: Hoang Pham <hopham@nvidia.com>
Reviewed-on: http://git-master/r/1189599
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
Tested-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     | 8 --------
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h | 4 ----
 2 files changed, 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index d82a03eb..e44767a0 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -138,15 +138,9 @@ static int gp10b_get_litter_value(struct gk20a *g,
 	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
 		ret = proj_tpc_in_gpc_shared_base_v();
 		break;
-	case GPU_LIT_PPC_IN_GPC_BASE:
-		ret = proj_ppc_in_gpc_base_v();
-		break;
 	case GPU_LIT_PPC_IN_GPC_STRIDE:
 		ret = proj_ppc_in_gpc_stride_v();
 		break;
-	case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
-		ret = proj_ppc_in_gpc_shared_base_v();
-		break;
 	case GPU_LIT_ROP_BASE:
 		ret = proj_rop_base_v();
 		break;
@@ -175,8 +169,6 @@ static int gp10b_get_litter_value(struct gk20a *g,
 		ret = proj_fbpa_stride_v();
 		break;
 	default:
-		gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
-		BUG();
 		break;
 	}
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index 08a7cb82..d1a60c29 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -78,10 +78,6 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
 {
 	return 0x00003000;
 }
-static inline u32 proj_ppc_in_gpc_shared_base_v(void)
-{
-	return 0x00003e00;
-}
 static inline u32 proj_ppc_in_gpc_stride_v(void)
 {
 	return 0x00000200;
-- 
cgit v1.2.2


From cb80b2315db303e7f27dbbc5a37d1e9eb72ae8e6 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Fri, 3 Jun 2016 15:29:57 -0700
Subject: gpu: nvgpu: ppc register support

Fix support for ppc_in_gpc_base
Add support for ppc_in_gpc_shared_base

Bug 1771830

Change-Id: Icb0bdedbe78ec4246426789e62302118682ed20a
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1158889
(cherry picked from commit 0f9ac2fd958556ee5d76d4cb2f6a335960227433)
Reviewed-on: http://git-master/r/1164398
(cherry picked from commit aa12f60061bdbeb68094d59258ac2db34f0cfe2a)
Reviewed-on: http://git-master/r/1181501
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-on: http://git-master/r/1189608
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h | 4 ++++
 2 files changed, 12 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e44767a0..d82a03eb 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -138,9 +138,15 @@ static int gp10b_get_litter_value(struct gk20a *g,
 	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
 		ret = proj_tpc_in_gpc_shared_base_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_BASE:
+		ret = proj_ppc_in_gpc_base_v();
+		break;
 	case GPU_LIT_PPC_IN_GPC_STRIDE:
 		ret = proj_ppc_in_gpc_stride_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
+		ret = proj_ppc_in_gpc_shared_base_v();
+		break;
 	case GPU_LIT_ROP_BASE:
 		ret = proj_rop_base_v();
 		break;
@@ -169,6 +175,8 @@ static int gp10b_get_litter_value(struct gk20a *g,
 		ret = proj_fbpa_stride_v();
 		break;
 	default:
+		gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
+		BUG();
 		break;
 	}
 
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index d1a60c29..08a7cb82 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
 {
 	return 0x00003000;
 }
+static inline u32 proj_ppc_in_gpc_shared_base_v(void)
+{
+	return 0x00003e00;
+}
 static inline u32 proj_ppc_in_gpc_stride_v(void)
 {
 	return 0x00000200;
-- 
cgit v1.2.2


From c88a9e92603afbbafe37a80cb89600a12890d00b Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@nvidia.com>
Date: Mon, 4 Jul 2016 10:23:47 +0200
Subject: nvgpu-t18x: rename Makefile to Makefile.{suffix}

Bug 1783210

Signed-off-by: dmitry pervushin <dpervushin@nvidia.com>
Change-Id: I0ff8be222d18eb2fc81de1b22ae26bd4825e6c97
Reviewed-on: http://git-master/r/1175098
Reviewed-on: http://git-master/r/1178717
Tested-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/1183066
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile            | 37 -----------------------------------
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x | 37 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 37 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/Makefile
 create mode 100644 drivers/gpu/nvgpu/Makefile.nvgpu-t18x

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
deleted file mode 100644
index 643c0967..00000000
--- a/drivers/gpu/nvgpu/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-nvgpu-t18x := ../../../../nvgpu-t18x/drivers/gpu/nvgpu
-
-nvgpu-y += \
-	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
-	$(nvgpu-t18x)/gp10b/gr_ctx_gp10b.o  \
-	$(nvgpu-t18x)/gp10b/ce_gp10b.o \
-	$(nvgpu-t18x)/gp10b/mc_gp10b.o  \
-	$(nvgpu-t18x)/gp10b/fifo_gp10b.o  \
-	$(nvgpu-t18x)/gp10b/ltc_gp10b.o \
-	$(nvgpu-t18x)/gp10b/mm_gp10b.o \
-	$(nvgpu-t18x)/gp10b/fb_gp10b.o \
-	$(nvgpu-t18x)/gp10b/pmu_gp10b.o \
-	$(nvgpu-t18x)/gp10b/hal_gp10b.o \
-	$(nvgpu-t18x)/gp10b/rpfb_gp10b.o \
-	$(nvgpu-t18x)/gp10b/gp10b_gating_reglist.o \
-	$(nvgpu-t18x)/gp10b/regops_gp10b.o \
-	$(nvgpu-t18x)/gp10b/cde_gp10b.o \
-	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
-	$(nvgpu-t18x)/gp10b/fecs_trace_gp10b.o \
-	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o \
-	$(nvgpu-t18x)/gp10b/gp10b.o \
-	$(nvgpu-t18x)/gp106/hal_gp106.o \
-	$(nvgpu-t18x)/gp106/mm_gp106.o \
-	$(nvgpu-t18x)/gp106/pmu_gp106.o \
-	$(nvgpu-t18x)/gp106/gr_gp106.o \
-	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o \
-	$(nvgpu-t18x)/gp106/acr_gp106.o \
-	$(nvgpu-t18x)/gp106/sec2_gp106.o \
-	$(nvgpu-t18x)/gp106/fifo_gp106.o
-
-nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
-
-nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
-	$(nvgpu-t18x)/vgpu/gp10b/vgpu_hal_gp10b.o  \
-	$(nvgpu-t18x)/vgpu/gp10b/vgpu_gr_gp10b.o  \
-	$(nvgpu-t18x)/vgpu/gp10b/vgpu_mm_gp10b.o \
-	$(nvgpu-t18x)/vgpu/gp10b/vgpu_fifo_gp10b.o
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
new file mode 100644
index 00000000..643c0967
--- /dev/null
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -0,0 +1,37 @@
+nvgpu-t18x := ../../../../nvgpu-t18x/drivers/gpu/nvgpu
+
+nvgpu-y += \
+	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/gr_ctx_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/ce_gp10b.o \
+	$(nvgpu-t18x)/gp10b/mc_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/fifo_gp10b.o  \
+	$(nvgpu-t18x)/gp10b/ltc_gp10b.o \
+	$(nvgpu-t18x)/gp10b/mm_gp10b.o \
+	$(nvgpu-t18x)/gp10b/fb_gp10b.o \
+	$(nvgpu-t18x)/gp10b/pmu_gp10b.o \
+	$(nvgpu-t18x)/gp10b/hal_gp10b.o \
+	$(nvgpu-t18x)/gp10b/rpfb_gp10b.o \
+	$(nvgpu-t18x)/gp10b/gp10b_gating_reglist.o \
+	$(nvgpu-t18x)/gp10b/regops_gp10b.o \
+	$(nvgpu-t18x)/gp10b/cde_gp10b.o \
+	$(nvgpu-t18x)/gp10b/therm_gp10b.o \
+	$(nvgpu-t18x)/gp10b/fecs_trace_gp10b.o \
+	$(nvgpu-t18x)/gp10b/gp10b_sysfs.o \
+	$(nvgpu-t18x)/gp10b/gp10b.o \
+	$(nvgpu-t18x)/gp106/hal_gp106.o \
+	$(nvgpu-t18x)/gp106/mm_gp106.o \
+	$(nvgpu-t18x)/gp106/pmu_gp106.o \
+	$(nvgpu-t18x)/gp106/gr_gp106.o \
+	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o \
+	$(nvgpu-t18x)/gp106/acr_gp106.o \
+	$(nvgpu-t18x)/gp106/sec2_gp106.o \
+	$(nvgpu-t18x)/gp106/fifo_gp106.o
+
+nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
+
+nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_hal_gp10b.o  \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_gr_gp10b.o  \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_mm_gp10b.o \
+	$(nvgpu-t18x)/vgpu/gp10b/vgpu_fifo_gp10b.o
-- 
cgit v1.2.2


From 233df0927b68095bb08ffc78157884c4658c517b Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@nvidia.com>
Date: Thu, 7 Jul 2016 20:08:29 +0200
Subject: nvgpu-t18x: use tegra-path for t18x project

Use the tegra-path function to compute the path to the t18x project.
This allows the file to be unmodified if the nvmap project is collaped
into the main project.

Bug 1783210

Signed-off-by: dmitry pervushin <dpervushin@nvidia.com>
Change-Id: I04e8311fda5ba7984f0985c9abec63beb1b38c74
Reviewed-on: http://git-master/r/1177101
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-on: http://git-master/r/1178718
Tested-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Reviewed-on: http://git-master/r/1189710
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 643c0967..548a9c0d 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -1,4 +1,4 @@
-nvgpu-t18x := ../../../../nvgpu-t18x/drivers/gpu/nvgpu
+nvgpu-t18x := $(call tegra-path,nvgpu-t18x,drivers/gpu/nvgpu)
 
 nvgpu-y += \
 	$(nvgpu-t18x)/gp10b/gr_gp10b.o  \
-- 
cgit v1.2.2


From b1ee53e03e37c507f3cebb6c156bb20c6b2a5eb1 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Thu, 21 Jul 2016 10:42:59 -0700
Subject: gpu: nvgpu: gp10b: add tpc disable fuse override

When gp10b added fuse override callback, tpc disable
fuse override was missed, which was added in gm20b file.

Bug 200220632

Change-Id: I49101a6691f86ccbca390c83f3bd4314eb1a9e36
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1185066
(cherry picked from commit 46f8b7c66d745e80e8c5fc80c19f759ffadf3308)
Reviewed-on: http://git-master/r/1189783
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index ff3c891f..374242bf 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
 
+#include <dt-bindings/soc/gm20b-fuse.h>
 #include <dt-bindings/soc/gp10b-fuse.h>
 
 #include "gk20a/gr_gk20a.h"
@@ -2144,6 +2145,9 @@ static int gp10b_gr_fuse_override(struct gk20a *g)
 		fuse = fuses[2 * i];
 		value = fuses[2 * i + 1];
 		switch (fuse) {
+		case GM20B_FUSE_OPT_TPC_DISABLE:
+			gm20b_gr_tpc_disable_override(g, value);
+			break;
 		case GP10B_FUSE_OPT_ECC_EN:
 			g->gr.t18x.fecs_feature_override_ecc_val = value;
 			break;
-- 
cgit v1.2.2


From 436109f46d49a24b69bab7c85b112f192ab002c0 Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Thu, 7 Jul 2016 15:01:00 -0700
Subject: gpu: nvgpu: gp10b: add is_fmodel check

Check for is_fmodel instead of check
for simualtion platforms.

Bug 1735760

Change-Id: I14e349088e9414a73353a94613fa031e63bfa31f
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: http://git-master/r/1177200
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-by: Ayoosh Bansal <ayooshb@nvidia.com>
Reviewed-by: Adeel Raza <araza@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c            | 7 ++++---
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c            | 5 +++--
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index d82a03eb..87ba5bf6 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -187,11 +187,12 @@ int gp10b_init_hal(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
 
 	*gops = gp10b_ops;
 
 #ifdef CONFIG_TEGRA_ACR
-	if (tegra_platform_is_linsim()) {
+	if (platform->is_fmodel) {
 		gops->privsecurity = 0;
 		gops->securegpccs = 0;
 	} else {
@@ -206,8 +207,8 @@ int gp10b_init_hal(struct gk20a *g)
 		}
 	}
 #else
-	if (tegra_platform_is_linsim()) {
-		gk20a_dbg_info("running ASIM with PRIV security disabled");
+	if (platform->is_fmodel) {
+		gk20a_dbg_info("running simulator with PRIV security disabled");
 		gops->privsecurity = 0;
 		gops->securegpccs = 0;
 	} else {
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 0ccabaf8..3e64d435 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * GP10B L2
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -70,6 +70,7 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	u32 compbit_backing_size;
 
 	int err;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
 
 	gk20a_dbg_fn("");
 
@@ -100,7 +101,7 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 	gk20a_dbg_info("gobs_per_comptagline_per_slice: %d",
 		gobs_per_comptagline_per_slice);
 
-	if (tegra_platform_is_linsim())
+	if (platform->is_fmodel)
 		err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size);
 	else
 		err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size);
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 1e4796d4..210d9865 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -64,7 +64,7 @@ static int gp10b_tegra_get_clocks(struct device *dev)
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	int i;
 
-	if (tegra_platform_is_linsim())
+	if (platform->is_fmodel)
 		return 0;
 
 	platform->num_clks = 0;
-- 
cgit v1.2.2


From cae5d380d8b465f4d1389ae80d6cec1458951e29 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Thu, 28 Jul 2016 16:53:31 +0530
Subject: gpu: nvgpu: Add preemption mode support for gp10x

Added preemption mode (WFI, GFXP, CTA and CILP) support for gp10x
family gr class (PASCAL_B and PASCAL_COMPUTE_B).

Bug 200221149

Change-Id: Ia8b781c5baedba660db5997f190a0b363286ed7f
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1193209
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/gr_gp106.c | 118 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c |  24 ++++++--
 2 files changed, 136 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index 9d6ce6ec..01d06975 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -102,6 +102,123 @@ static void gr_gp106_cb_size_default(struct gk20a *g)
 		gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
 }
 
+static int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
+				struct gr_ctx_desc *gr_ctx,
+				struct vm_gk20a *vm, u32 class,
+				u32 graphics_preempt_mode,
+				u32 compute_preempt_mode)
+{
+	int err = 0;
+
+	if (class == PASCAL_B && g->gr.t18x.ctx_vars.force_preemption_gfxp)
+		graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
+
+	if (class == PASCAL_COMPUTE_B &&
+			g->gr.t18x.ctx_vars.force_preemption_cilp)
+		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	/* check for invalid combinations */
+	if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+		return -EINVAL;
+
+	if ((graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) &&
+		   (compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP))
+		return -EINVAL;
+
+	/* set preemption modes */
+	switch (graphics_preempt_mode) {
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
+		{
+		u32 spill_size =
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
+			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
+		u32 pagepool_size = g->ops.gr.pagepool_default_size(g) *
+			gr_scc_pagepool_total_pages_byte_granularity_v();
+		u32 betacb_size = g->gr.attrib_cb_default_size +
+				  (gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
+				   gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
+		u32 attrib_cb_size = (betacb_size + g->gr.alpha_cb_size) *
+				  gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+				  g->gr.max_tpc_count;
+		attrib_cb_size = ALIGN(attrib_cb_size, 128);
+
+		gk20a_dbg_info("gfxp context spill_size=%d", spill_size);
+		gk20a_dbg_info("gfxp context pagepool_size=%d", pagepool_size);
+		gk20a_dbg_info("gfxp context attrib_cb_size=%d",
+				attrib_cb_size);
+
+		err = gr_gp10b_alloc_buffer(vm,
+					g->gr.t18x.ctx_vars.preempt_image_size,
+					&gr_ctx->t18x.preempt_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+				  "cannot allocate preempt buffer");
+			goto fail;
+		}
+
+		err = gr_gp10b_alloc_buffer(vm,
+					spill_size,
+					&gr_ctx->t18x.spill_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+				  "cannot allocate spill buffer");
+			goto fail_free_preempt;
+		}
+
+		err = gr_gp10b_alloc_buffer(vm,
+					attrib_cb_size,
+					&gr_ctx->t18x.betacb_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+				  "cannot allocate beta buffer");
+			goto fail_free_spill;
+		}
+
+		err = gr_gp10b_alloc_buffer(vm,
+					pagepool_size,
+					&gr_ctx->t18x.pagepool_ctxsw_buffer);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+				  "cannot allocate page pool");
+			goto fail_free_betacb;
+		}
+
+		gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+		break;
+		}
+
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
+		gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+		break;
+
+	default:
+		break;
+	}
+
+	if (class == PASCAL_COMPUTE_B) {
+		switch (compute_preempt_mode) {
+		case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
+			gr_ctx->compute_preempt_mode = compute_preempt_mode;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+
+fail_free_betacb:
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
+fail_free_spill:
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.spill_ctxsw_buffer);
+fail_free_preempt:
+	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.preempt_ctxsw_buffer);
+fail:
+	return err;
+}
+
 void gp106_init_gr(struct gpu_ops *gops)
 {
 	gp10b_init_gr(gops);
@@ -110,4 +227,5 @@ void gp106_init_gr(struct gpu_ops *gops)
 	gops->gr.handle_sw_method = gr_gp106_handle_sw_method;
 	gops->gr.cb_size_default = gr_gp106_cb_size_default;
 	gops->gr.init_preemption_state = NULL;
+	gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 374242bf..ee73fed1 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -992,9 +992,15 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
 
 	if (graphics_preempt_mode || compute_preempt_mode) {
-		err = gr_gp10b_set_ctxsw_preemption_mode(g, *gr_ctx, vm,
+		if (g->ops.gr.set_ctxsw_preemption_mode) {
+			err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm,
 			    class, graphics_preempt_mode, compute_preempt_mode);
-		if (err)
+			if (err) {
+				gk20a_err(dev_from_gk20a(g),
+						"set_ctxsw_preemption_mode failed");
+				goto fail_free_gk20a_ctx;
+			}
+		} else
 			goto fail_free_gk20a_ctx;
 	}
 
@@ -2067,10 +2073,15 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		vm = ch->vm;
 	}
 
-	err = gr_gp10b_set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
-					graphics_preempt_mode, compute_preempt_mode);
-	if (err)
-		return err;
+	if (g->ops.gr.set_ctxsw_preemption_mode) {
+		err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
+						graphics_preempt_mode, compute_preempt_mode);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"set_ctxsw_preemption_mode failed");
+			return err;
+		}
+	}
 
 	if (gk20a_mem_begin(g, mem))
 		return -ENOMEM;
@@ -2225,6 +2236,7 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_lrf_tex_ltc_dram_override = get_ecc_override_val;
 	gops->gr.suspend_contexts = gr_gp10b_suspend_contexts;
 	gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
+	gops->gr.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode;
 	gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
 	gops->gr.fuse_override = gp10b_gr_fuse_override;
 	gops->gr.load_smid_config = gr_gp10b_load_smid_config;
-- 
cgit v1.2.2


From 0e1758a723541ad9b4507bc34737f4f0f25e2418 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Fri, 17 Jun 2016 15:26:24 -0700
Subject: gpu: nvgpu: move dbg_session_ops to gops

Move dbg_session_ops to gops for better code consistency

JIRA VFND-1905

Change-Id: I0ac10a69194c8ca485f361cd8cea61d8ab72145a
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1192642
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 ++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 55c263cb..a0db0bfd 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -45,6 +45,7 @@
 #include "gp106/gr_gp106.h"
 #include "nvgpu_gpuid_t18x.h"
 #include "hw_proj_gp106.h"
+#include "gk20a/dbg_gpu_gk20a.h"
 
 static struct gpu_ops gp106_ops = {
 	.clock_gating = {
@@ -198,6 +199,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp106_init_mm(gops);
 	gp106_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
+	gk20a_init_dbg_session_ops(gops);
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 87ba5bf6..ae92608c 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -43,6 +43,7 @@
 
 #include "gp10b.h"
 #include "hw_proj_gp10b.h"
+#include "gk20a/dbg_gpu_gk20a.h"
 
 #define FUSE_OPT_PRIV_SEC_EN_0 0x264
 #define PRIV_SECURITY_ENABLED 0x01
@@ -236,6 +237,7 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_mm(gops);
 	gp10b_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
+	gk20a_init_dbg_session_ops(gops);
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
-- 
cgit v1.2.2


From c0cbc337cad85ea962f433366290fa6e84df1244 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 1 Aug 2016 14:26:21 +0530
Subject: gpu: nvgpu: post bpt events after processing

Receive hww_global_esr in gr_gp10b_handle_sm_exception() and
pass it to gr_gk20a_handle_sm_exception()

Bug 200209410

Change-Id: I467355aa57dd3cf03c4ea2134fbc8691f8e76369
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1194986
GVS: Gerrit_Virtual_Submit
Reviewed-by: Cory Perry <cperry@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index ee73fed1..984241db 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -98,7 +98,8 @@ static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
 }
 
 static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
-			bool *post_event, struct channel_gk20a *fault_ch)
+			bool *post_event, struct channel_gk20a *fault_ch,
+			u32 *hww_global_esr)
 {
 	int ret = 0;
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
@@ -108,7 +109,7 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
 	u32 lrf_single_count_delta, lrf_double_count_delta;
 	u32 shm_ecc_status;
 
-	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch);
+	gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
 
 	/* Check for LRF ECC errors. */
         lrf_ecc_status = gk20a_readl(g,
-- 
cgit v1.2.2


From a862dd612204813b603dd0c07442488f47c50448 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Thu, 21 Jul 2016 16:56:15 -0700
Subject: gpu: nvgpu: vgpu: move to use vgpu_get_handle helper function

JIRA VFND-2103

Change-Id: Ic11cff40e64849cb6abb193bec54d03857433416
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1185205
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 9 +++------
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 5 ++---
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 3194fff1..78205afb 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -20,7 +20,6 @@
 static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 				struct gr_ctx_desc *gr_ctx)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
 	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
 	int err;
@@ -31,7 +30,7 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 		return;
 
 	msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
-	msg.handle = platform->virt_handle;
+	msg.handle = vgpu_get_handle(g);
 	p->gr_ctx_handle = gr_ctx->virt_ctx;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
@@ -52,7 +51,6 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 				u32 class,
 				u32 flags)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
 	struct tegra_vgpu_cmd_msg msg = {0};
 	struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
 			&msg.params.gr_bind_ctxsw_buffers;
@@ -162,7 +160,7 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 
 	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode) {
 		msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTXSW_BUFFERS;
-		msg.handle = platform->virt_handle;
+		msg.handle = vgpu_get_handle(g);
 		p->gr_ctx_handle = gr_ctx->virt_ctx;
 		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 		if (err || msg.ret) {
@@ -181,7 +179,6 @@ fail:
 
 static int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
 	int err;
 
 	gk20a_dbg_fn("");
@@ -190,7 +187,7 @@ static int vgpu_gr_gp10b_init_ctx_state(struct gk20a *g)
 	if (err)
 		return err;
 
-	vgpu_get_attribute(platform->virt_handle,
+	vgpu_get_attribute(vgpu_get_handle(g),
 			TEGRA_VGPU_ATTRIB_PREEMPT_CTX_SIZE,
 			&g->gr.t18x.ctx_vars.preempt_image_size);
 	if (!g->gr.t18x.ctx_vars.preempt_image_size)
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 1b6003b3..8be6b19c 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Memory Management
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -57,7 +57,6 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
 	int err = 0;
 	struct device *d = dev_from_vm(vm);
 	struct gk20a *g = gk20a_from_vm(vm);
-	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
 	struct tegra_vgpu_mem_desc *mem_desc;
@@ -149,7 +148,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
 	}
 
 	msg.cmd = TEGRA_VGPU_CMD_AS_MAP_EX;
-	msg.handle = platform->virt_handle;
+	msg.handle = vgpu_get_handle(g);
 	p->handle = vm->handle;
 	p->gpu_va = map_offset;
 	p->size = size;
-- 
cgit v1.2.2


From fa58dd3f19b286e04907179c4e3b1c75676482e1 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Thu, 11 Aug 2016 15:06:45 -0700
Subject: gpu: nvgpu: ppc register support

Fix support for ppc_in_gpc_base
Add support for ppc_in_gpc_shared_base

Bug 1771830

Change-Id: I3c4576c4d9233ec05f9a52952f42e3226532ff5b
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1201509
(cherry picked from commit 8594628ad4cb90e3298b0d1a3f94aeb50d9c27ab)
Reviewed-on: http://git-master/r/1203183
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c     | 4 ++++
 drivers/gpu/nvgpu/gp106/hw_proj_gp106.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index a0db0bfd..d4e843d8 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -139,9 +139,13 @@ static int gp106_get_litter_value(struct gk20a *g,
 	case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
 		ret = proj_tpc_in_gpc_shared_base_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_BASE:
+		ret = proj_ppc_in_gpc_base_v();
 	case GPU_LIT_PPC_IN_GPC_STRIDE:
 		ret = proj_ppc_in_gpc_stride_v();
 		break;
+	case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
+		ret = proj_ppc_in_gpc_shared_base_v();
 	case GPU_LIT_ROP_BASE:
 		ret = proj_rop_base_v();
 		break;
diff --git a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
index 01e835ec..0063712f 100644
--- a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
 {
 	return 0x00003000;
 }
+static inline u32 proj_ppc_in_gpc_shared_base_v(void)
+{
+	return 0x00003e00;
+}
 static inline u32 proj_ppc_in_gpc_stride_v(void)
 {
 	return 0x00000200;
-- 
cgit v1.2.2


From 442ee5321eddc0058d46ed4670682a520acbebab Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 23 Aug 2016 10:14:43 -0700
Subject: gpu: nvgpu: Do not print error on unknown engine

Unknown engine is expected, as we do not support all dGPU engines.
Remove the error spew.

JIRA DNVGPU-26

Change-Id: I3d43253b8cab4e51b426536e4899a62156d0da16
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1206465
(cherry picked from commit a3fa13f6be4ff60e90558326474af3d1b315aa43)
Reviewed-on: http://git-master/r/1208408
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 349f25fc..127d4632 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -200,8 +200,6 @@ static int gp10b_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
 		/* Default assumptions - all the CE engine have separate runlist */
 		ret = ENGINE_ASYNC_CE_GK20A;
 	}
-	else
-		gk20a_err(g->dev, "unknown engine %d", engine_type);
 
 	return ret;
 }
-- 
cgit v1.2.2


From 706a1271d83e2b42901ed3fd175b78ca413dd5f7 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 24 Aug 2016 15:11:12 -0700
Subject: gpu: nvgpu: Skip initializing thermals on dGPU

On dGPU devinit handles initializing thermals.

Bug 1799537

Change-Id: I12ade535d2ddb7fc406256e75f21a422195b36d5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208017
(cherry picked from commit 0e1327107c43dc9c2f5c5d9b79a54f27d2027e85)
Reviewed-on: http://git-master/r/1209122
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index d4e843d8..0423dcfc 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -206,7 +206,6 @@ int gp106_init_hal(struct gk20a *g)
 	gk20a_init_dbg_session_ops(gops);
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
-	gp10b_init_therm_ops(gops);
 	gk20a_init_tsg_ops(gops);
 	gm206_init_bios(gops);
 	gops->name = "gp10x";
-- 
cgit v1.2.2


From c7258e57e757fc4d983ef7de0b92aa9e0e26d235 Mon Sep 17 00:00:00 2001
From: Cory Perry <cperry@nvidia.com>
Date: Thu, 28 Jul 2016 22:31:35 -0700
Subject: gpu: nvgpu: send only one event to the debugger

Event notifications on TSGs should only be sent to the channel that caused the
event to happen in the first place, not evey channel in the tsg.  Any more and
the debugger will not be able to tell what channel actually got the event.
Worse yet, if all the channels in a tsg are bound to the same debug session
(as is the case with cuda-gdb), then multiple nvgpu events for the same gpu
event will be triggered, causing events to be buffered and the client to get
out of sync.

One gpu exception, one nvgpu event per tsg.

Bug 1793988

Change-Id: Iee36c774f193554ffb9ab7c1650ee0610e476a99
Signed-off-by: Cory Perry <cperry@nvidia.com>
Reviewed-on: http://git-master/r/1194206
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 984241db..eb6ee70f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1891,21 +1891,14 @@ static int gr_gp10b_handle_fecs_error(struct gk20a *g,
 		}
 
 		if (gk20a_gr_sm_debugger_attached(g)) {
+			gk20a_dbg_gpu_post_events(ch);
+
 			if (gk20a_is_channel_marked_as_tsg(ch)) {
 				struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
-				struct channel_gk20a *__ch;
-
-				mutex_lock(&tsg->ch_list_lock);
-				list_for_each_entry(__ch, &tsg->ch_list, ch_entry) {
-					gk20a_dbg_gpu_post_events(__ch);
-				}
-				mutex_unlock(&tsg->ch_list_lock);
 
 				gk20a_tsg_event_id_post_event(tsg,
 					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
 			} else {
-				gk20a_dbg_gpu_post_events(ch);
-
 				gk20a_channel_event_id_post_event(ch,
 					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
 			}
-- 
cgit v1.2.2


From 315c8714e950ade879221af04a7be3ed90813801 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 11 Aug 2016 22:08:54 +0530
Subject: gpu: nvgpu: use bootstrap base for WPR

Use bootstrap allocator's base as base address for WPR
buffers

Jira DNVGPU-84

Change-Id: Ifaeef9f3aa562f9171dd073000c158b513567ede
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1201348
(cherry picked from commit 72f8e727e6f27f867043d024e3d07218359d5faf)
Reviewed-on: http://git-master/r/1210960
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index c43b0165..96a2c1fd 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -38,7 +38,7 @@ typedef int (*get_ucode_details)(struct gk20a *g,
 
 /* Both size and address of WPR need to be 128K-aligned */
 #define WPR_ALIGNMENT	0x20000
-#define GP106_DGPU_NONWPR 0x18000000
+#define GP106_DGPU_NONWPR NVGPU_VIDMEM_BOOTSTRAP_ALLOCATOR_BASE
 #define GP106_DGPU_WPR (GP106_DGPU_NONWPR + 0x400000)
 #define DGPU_WPR_SIZE 0x100000
 
-- 
cgit v1.2.2


From 7a2cc9c58c74ee025625a64461b3045b256626c9 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 23 Aug 2016 14:29:27 +0530
Subject: gpu: nvgpu: check if pmu blob is already allocated

Jira DNVGPU-20

Change-Id: If917f97ee30f830b05467b15e1ae3f8be296d140
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1206406
(cherry picked from commit bc54e4c24d2f2671b412c79a0ff2944c9575f2a5)
Reviewed-on: http://git-master/r/1210961
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 96a2c1fd..850d07b1 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -95,6 +95,9 @@ static int gp106_alloc_blob_space(struct gk20a *g,
 	struct wpr_carveout_info wpr_inf;
 	int err;
 
+	if (mem->size)
+		return 0;
+
 	g->ops.pmu.get_wpr(g, &wpr_inf);
 
 	/*
-- 
cgit v1.2.2


From 758add10abe564fb58ed928e0f5a8f1b4d31414f Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 23 Aug 2016 14:33:56 +0530
Subject: gpu: nvgpu: use get_base_addr() for pdb and mm_entry

Since page tables could either reside either in sysmem
or vidmem, use gk20a_mem_get_base_addr() to get the
base address for buffer

This API will take care of returning proper base address

Jira DNVGPU-20

Change-Id: I3422b51c3ffb8fb86f1dc5095263fc8f19dae44d
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1206407
(cherry picked from commit 3c4b22c35b2c4eec33234c2f8dccd9de9422d093)
Reviewed-on: http://git-master/r/1210962
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 7778883e..c9da4c93 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -154,10 +154,11 @@ static u32 pte3_from_index(u32 i)
 static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
 	u64 addr;
+
 	if (g->mm.has_physical_mode)
 		addr = sg_phys(entry->mem.sgt->sgl);
 	else
-		addr = g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0);
+		addr = gk20a_mem_get_base_addr(g, &entry->mem, 0);
 
 	return addr;
 }
@@ -386,7 +387,7 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
 static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
 		struct vm_gk20a *vm)
 {
-	u64 pdb_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
+	u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0);
 	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
-- 
cgit v1.2.2


From a74a971f498084bf9131be3964c380c74e9d5960 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 7 Sep 2016 17:27:45 -0700
Subject: gpu: nvgpu: vgpu: cyclestat snapshot support

Add support for cyclestats snapshots in the virtual case

Bug 1700143
JIRA EVLR-278

Change-Id: I353efac6a17704c815a99745ac04d2c3d831351b
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1216644
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 4 ++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 0423dcfc..eadeb1b4 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -46,6 +46,7 @@
 #include "nvgpu_gpuid_t18x.h"
 #include "hw_proj_gp106.h"
 #include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
 
 static struct gpu_ops gp106_ops = {
 	.clock_gating = {
@@ -207,6 +208,9 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gk20a_init_tsg_ops(gops);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	gk20a_init_css_ops(gops);
+#endif
 	gm206_init_bios(gops);
 	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index ae92608c..ec81cf35 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -44,6 +44,7 @@
 #include "gp10b.h"
 #include "hw_proj_gp10b.h"
 #include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
 
 #define FUSE_OPT_PRIV_SEC_EN_0 0x264
 #define PRIV_SECURITY_ENABLED 0x01
@@ -242,6 +243,9 @@ int gp10b_init_hal(struct gk20a *g)
 	gp10b_init_cde_ops(gops);
 	gp10b_init_therm_ops(gops);
 	gk20a_init_tsg_ops(gops);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	gk20a_init_css_ops(gops);
+#endif
 	gops->name = "gp10b";
 	gops->chip_init_gpu_characteristics = gp10b_init_gpu_characteristics;
 	gops->get_litter_value = gp10b_get_litter_value;
-- 
cgit v1.2.2


From 6d4851e248f00a0c8188bcaa1375f94ba915f608 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 26 Aug 2016 10:19:14 -0700
Subject: gpu: nvgpu: gp106: Remove clock gating prod vals

We are using gp10b prod values for gp106, and they are incompatible.
Because of this we are accessing invalid registers.

Delete all prod vals for gp106 until we have generated new ones.

Bug 1799537

Change-Id: Id805e933bd19f6ccaf28274cd69140f9f93cd4ea
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208716
(cherry picked from commit 50d3ecfbfa42795d5eaa20c977cf83613498a804)
Reviewed-on: http://git-master/r/1217287
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 51 +------------------------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index eadeb1b4..5414eb4a 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -49,56 +49,7 @@
 #include "gk20a/css_gr_gk20a.h"
 
 static struct gpu_ops gp106_ops = {
-	.clock_gating = {
-		.slcg_bus_load_gating_prod =
-			gp10b_slcg_bus_load_gating_prod,
-		.slcg_ce2_load_gating_prod =
-			gp10b_slcg_ce2_load_gating_prod,
-		.slcg_chiplet_load_gating_prod =
-			gp10b_slcg_chiplet_load_gating_prod,
-		.slcg_ctxsw_firmware_load_gating_prod =
-			gp10b_slcg_ctxsw_firmware_load_gating_prod,
-		.slcg_fb_load_gating_prod =
-			gp10b_slcg_fb_load_gating_prod,
-		.slcg_fifo_load_gating_prod =
-			gp10b_slcg_fifo_load_gating_prod,
-		.slcg_gr_load_gating_prod =
-			gr_gp10b_slcg_gr_load_gating_prod,
-		.slcg_ltc_load_gating_prod =
-			ltc_gp10b_slcg_ltc_load_gating_prod,
-		.slcg_perf_load_gating_prod =
-			gp10b_slcg_perf_load_gating_prod,
-		.slcg_priring_load_gating_prod =
-			gp10b_slcg_priring_load_gating_prod,
-		.slcg_pmu_load_gating_prod =
-			gp10b_slcg_pmu_load_gating_prod,
-		.slcg_therm_load_gating_prod =
-			gp10b_slcg_therm_load_gating_prod,
-		.slcg_xbar_load_gating_prod =
-			gp10b_slcg_xbar_load_gating_prod,
-		.blcg_bus_load_gating_prod =
-			gp10b_blcg_bus_load_gating_prod,
-		.blcg_ce_load_gating_prod =
-			gp10b_blcg_ce_load_gating_prod,
-		.blcg_ctxsw_firmware_load_gating_prod =
-			gp10b_blcg_ctxsw_firmware_load_gating_prod,
-		.blcg_fb_load_gating_prod =
-			gp10b_blcg_fb_load_gating_prod,
-		.blcg_fifo_load_gating_prod =
-			gp10b_blcg_fifo_load_gating_prod,
-		.blcg_gr_load_gating_prod =
-			gp10b_blcg_gr_load_gating_prod,
-		.blcg_ltc_load_gating_prod =
-			gp10b_blcg_ltc_load_gating_prod,
-		.blcg_pwr_csb_load_gating_prod =
-			gp10b_blcg_pwr_csb_load_gating_prod,
-		.blcg_pmu_load_gating_prod =
-			gp10b_blcg_pmu_load_gating_prod,
-		.blcg_xbar_load_gating_prod =
-			gp10b_blcg_xbar_load_gating_prod,
-		.pg_gr_load_gating_prod =
-			gr_gp10b_pg_gr_load_gating_prod,
-	}
+	.clock_gating = { }
 };
 
 static int gp106_get_litter_value(struct gk20a *g,
-- 
cgit v1.2.2


From 5e486b518292b1178eae49d58a1bc0894788a281 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Tue, 23 Aug 2016 10:47:10 -0700
Subject: gpu: nvgpu: fix cbc base calculation for dGPU

JIRA DNVGPU-9

Change-Id: I22667acfadfcabf79af841ca5389e41d2ac34860
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1206478
(cherry picked from commit 098b932f7633a903c915b1257beb9304735b4113)
Reviewed-on: http://git-master/r/1210288
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |  3 ++-
 drivers/gpu/nvgpu/gp106/hal_gp106.c   |  4 ++--
 drivers/gpu/nvgpu/gp106/ltc_gp106.c   | 25 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/ltc_gp106.h   | 19 +++++++++++++++++++
 4 files changed, 48 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/ltc_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/ltc_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 548a9c0d..e7d18492 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -26,7 +26,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/gr_ctx_gp106.o \
 	$(nvgpu-t18x)/gp106/acr_gp106.o \
 	$(nvgpu-t18x)/gp106/sec2_gp106.o \
-	$(nvgpu-t18x)/gp106/fifo_gp106.o
+	$(nvgpu-t18x)/gp106/fifo_gp106.o \
+	$(nvgpu-t18x)/gp106/ltc_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 5414eb4a..a52fab7b 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -22,7 +22,7 @@
 
 #include "gp10b/gr_gp10b.h"
 #include "gp10b/mc_gp10b.h"
-#include "gp10b/ltc_gp10b.h"
+#include "gp106/ltc_gp106.h"
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/ce_gp10b.h"
 #include "gp10b/fb_gp10b.h"
@@ -147,7 +147,7 @@ int gp106_init_hal(struct gk20a *g)
 
 	gp10b_init_mc(gops);
 	gp106_init_gr(gops);
-	gp10b_init_ltc(gops);
+	gp106_init_ltc(gops);
 	gp10b_init_fb(gops);
 	gp106_init_fifo(gops);
 	gp10b_init_ce(gops);
diff --git a/drivers/gpu/nvgpu/gp106/ltc_gp106.c b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
new file mode 100644
index 00000000..8f630a41
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "gp10b/ltc_gp10b.h"
+
+void gp106_init_ltc(struct gpu_ops *gops)
+{
+	gp10b_init_ltc(gops);
+
+	/* dGPU does not need the LTC hack */
+	gops->ltc.cbc_fix_config = NULL;
+}
diff --git a/drivers/gpu/nvgpu/gp106/ltc_gp106.h b/drivers/gpu/nvgpu/gp106/ltc_gp106.h
new file mode 100644
index 00000000..4720d7a1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/ltc_gp106.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef LTC_GP106_H
+#define LTC_GP106_H
+struct gpu_ops;
+
+void gp106_init_ltc(struct gpu_ops *gops);
+#endif
-- 
cgit v1.2.2


From e4fa9712acacfeced2758b91721d31274e6fe2e7 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 24 Aug 2016 15:10:36 -0700
Subject: gpu: nvgpu: Do not initialize CBC on Pascal dGPU

CBC_BASE register is protected on Pascal dGPUs. Skip initializing it.

Bug 1799537

Change-Id: Ie4b0ac5a37c3c586d1b631ce38823d156b554e1e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208016
(cherry picked from commit 5f9dbca140573798bd05b5b27a7b6abe1871e90f)
Reviewed-on: http://git-master/r/1210289
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp106/ltc_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/ltc_gp106.c b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
index 8f630a41..ef0e351d 100644
--- a/drivers/gpu/nvgpu/gp106/ltc_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
@@ -22,4 +22,5 @@ void gp106_init_ltc(struct gpu_ops *gops)
 
 	/* dGPU does not need the LTC hack */
 	gops->ltc.cbc_fix_config = NULL;
+	gops->ltc.init_cbc = NULL;
 }
-- 
cgit v1.2.2


From ff4884c0afc982286211632cd2e08036977b77a4 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 9 Sep 2016 13:07:31 -0700
Subject: gpu: nvgpu: gp10b: update prod setting for slcg

Update prod settings for slcg fifo.

Bug 1785549

Change-Id: I0371ef7aeacce5933e06dd36d1368ddc06154ff9
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1218109
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index 4719b13e..df23d89d 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -54,7 +54,7 @@ static const struct gating_desc gp10b_slcg_fb[] = {
 
 /* slcg fifo */
 static const struct gating_desc gp10b_slcg_fifo[] = {
-	{.addr = 0x000026ac, .prod = 0x00000f00, .disable = 0x0001fffe},
+	{.addr = 0x000026ac, .prod = 0x00000f40, .disable = 0x0001fffe},
 };
 
 /* slcg gr */
-- 
cgit v1.2.2


From 49840c15efb36b3216357b93ba0477e53dbef3b6 Mon Sep 17 00:00:00 2001
From: Shardar Shariff Md <smohammed@nvidia.com>
Date: Fri, 9 Sep 2016 02:36:04 +0530
Subject: gpu: nvgpu: change the usage of tegra_fuse_readl

tegra_fuse_readl() prototype is changed to match upstreamed
fuse driver, so change implementation accordingly.

Bug 200233653

Change-Id: Ib690cf8a5a69e7b13146471a5ee211834dc40086
Signed-off-by: Shardar Shariff Md <smohammed@nvidia.com>
Reviewed-on: http://git-master/r/1217376
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c  | 9 ++++++---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 9 +++++----
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 9 +++++++--
 3 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index eb6ee70f..0705d8b6 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1924,10 +1924,13 @@ static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr)
 
 static u32 get_ecc_override_val(struct gk20a *g)
 {
-	if (tegra_fuse_readl(FUSE_OPT_ECC_EN))
+	u32 val;
+
+	tegra_fuse_readl(FUSE_OPT_ECC_EN, &val);
+	if (val)
 		return gk20a_readl(g, gr_fecs_feature_override_ecc_r());
-	else
-		return 0;
+
+	return 0;
 }
 
 static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index ec81cf35..c4e44483 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -190,6 +190,7 @@ int gp10b_init_hal(struct gk20a *g)
 	struct gpu_ops *gops = &g->ops;
 	struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
 	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	u32 val;
 
 	*gops = gp10b_ops;
 
@@ -198,8 +199,8 @@ int gp10b_init_hal(struct gk20a *g)
 		gops->privsecurity = 0;
 		gops->securegpccs = 0;
 	} else {
-		if (tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0) &
-				PRIV_SECURITY_ENABLED) {
+		tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0, &val);
+		if (val & PRIV_SECURITY_ENABLED) {
 			gops->privsecurity = 1;
 			gops->securegpccs =1;
 		} else {
@@ -214,8 +215,8 @@ int gp10b_init_hal(struct gk20a *g)
 		gops->privsecurity = 0;
 		gops->securegpccs = 0;
 	} else {
-		if (tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0) &
-				PRIV_SECURITY_ENABLED) {
+		tegra_fuse_readl(FUSE_OPT_PRIV_SEC_EN_0, &val);
+		if (val & PRIV_SECURITY_ENABLED) {
 			gk20a_dbg_info("priv security is not supported but enabled");
 			gops->privsecurity = 1;
 			gops->securegpccs =1;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index f40c1b7b..762e2af7 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -365,9 +365,11 @@ static int send_ecc_overide_en_dis_cmd(struct gk20a *g, u32 bitmask)
 	struct pmu_cmd cmd;
 	u32 seq;
 	int status;
+	u32 val;
 	gk20a_dbg_fn("");
 
-	if (!tegra_fuse_readl(FUSE_OPT_ECC_EN)) {
+	tegra_fuse_readl(FUSE_OPT_ECC_EN, &val);
+	if (!val) {
 		gk20a_err(dev_from_gk20a(g), "Board not ECC capable");
 		return -1;
 	}
@@ -436,12 +438,15 @@ static bool gp10b_is_priv_load(u32 falcon_id)
 /*Dump Security related fuses*/
 static void pmu_dump_security_fuses_gp10b(struct gk20a *g)
 {
+	u32 val;
+
 	gk20a_err(dev_from_gk20a(g), "FUSE_OPT_SEC_DEBUG_EN_0 : 0x%x",
 			gk20a_readl(g, fuse_opt_sec_debug_en_r()));
 	gk20a_err(dev_from_gk20a(g), "FUSE_OPT_PRIV_SEC_EN_0 : 0x%x",
 			gk20a_readl(g, fuse_opt_priv_sec_en_r()));
+	tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, &val);
 	gk20a_err(dev_from_gk20a(g), "FUSE_GCPLEX_CONFIG_FUSE_0 : 0x%x",
-			tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0));
+			val);
 }
 
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
-- 
cgit v1.2.2


From 5544272474d7b1e3103eed76fbaf91ba7a739345 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 26 Aug 2016 10:09:24 -0700
Subject: gpu: nvgpu: gp106: Skip LTCA initialization

Skip LTCA initialization on dGPU.

Bug 1799537

Change-Id: Ieb4c72e2169dc6bee73306c9b1e6c80866167a1a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208714
(cherry picked from commit 9a8dc5fe96b29b8a67f8203f17126b0093721312)
Reviewed-on: http://git-master/r/1219164
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/ltc_gp106.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/ltc_gp106.c b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
index ef0e351d..dcd4fbad 100644
--- a/drivers/gpu/nvgpu/gp106/ltc_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include "gk20a/gk20a.h"
+#include "gm20b/ltc_gm20b.h"
 #include "gp10b/ltc_gp10b.h"
 
 void gp106_init_ltc(struct gpu_ops *gops)
@@ -23,4 +24,5 @@ void gp106_init_ltc(struct gpu_ops *gops)
 	/* dGPU does not need the LTC hack */
 	gops->ltc.cbc_fix_config = NULL;
 	gops->ltc.init_cbc = NULL;
+	gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
 }
-- 
cgit v1.2.2


From bb6923908a2873b079ad67d22c9fcb014c00af28 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 6 Sep 2016 16:13:31 +0530
Subject: gpu: nvgpu: select target based on aperture

For bar2 and pmu instance blocks, use gk20a_aperture_mask()
to select target address (i.e. if address is in sysmem or
vidmem) based on aperture

Also add target accessors for gr_fecs_new_ctx and
gr_fecs_arb_ctx_ptr

Jira DNVGPU-22

Change-Id: Ieaa80bd83a4191fe57b7fba6e0f9cdaeb195a077
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1216138
(cherry picked from commit 7a9f4175abc5dddf0879215de4637b7b6eb0ab7b)
Reviewed-on: http://git-master/r/1219712
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hw_gr_gp106.h | 16 ++++++++++++++++
 drivers/gpu/nvgpu/gp106/sec2_gp106.c  |  8 ++++++--
 drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 16 ++++++++++++++++
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c    |  6 +++---
 4 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h b/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
index e5e1c527..bb1f9fa9 100644
--- a/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_gr_gp106.h
@@ -1342,6 +1342,14 @@ static inline u32 gr_fecs_new_ctx_target_v(u32 r)
 {
 	return (r >> 28) & 0x3;
 }
+static inline u32 gr_fecs_new_ctx_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_new_ctx_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 gr_fecs_new_ctx_valid_s(void)
 {
 	return 1;
@@ -1394,6 +1402,14 @@ static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
 {
 	return (r >> 28) & 0x3;
 }
+static inline u32 gr_fecs_arb_ctx_ptr_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 gr_fecs_arb_ctx_cmd_r(void)
 {
 	return 0x00409a10;
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index f8b32f8f..8f34edd1 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -169,7 +169,9 @@ int bl_bootstrap_sec2(struct pmu_gk20a *pmu,
 			pwr_pmu_new_instblk_ptr_f(
 			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 			pwr_pmu_new_instblk_valid_f(1) |
-			pwr_pmu_new_instblk_target_sys_coh_f());
+			gk20a_aperture_mask(g, &mm->pmu.inst_block,
+				pwr_pmu_new_instblk_target_sys_coh_f(),
+				pwr_pmu_new_instblk_target_fb_f()));
 
 	data = gk20a_readl(g, psec_falcon_debug1_r());
 	data |= psec_falcon_debug1_ctxsw_mode_m();
@@ -313,7 +315,9 @@ void init_pmu_setup_hw1(struct gk20a *g)
 				pwr_pmu_new_instblk_ptr_f(
 					gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 				pwr_pmu_new_instblk_valid_f(1) |
-				pwr_pmu_new_instblk_target_sys_coh_f());
+				gk20a_aperture_mask(g, &mm->pmu.inst_block,
+					pwr_pmu_new_instblk_target_sys_coh_f(),
+					pwr_pmu_new_instblk_target_fb_f()));
 
 	/*Copying pmu cmdline args*/
 	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 00f2ac5e..9e3137e7 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1414,6 +1414,14 @@ static inline u32 gr_fecs_new_ctx_target_v(u32 r)
 {
 	return (r >> 28) & 0x3;
 }
+static inline u32 gr_fecs_new_ctx_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_new_ctx_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 gr_fecs_new_ctx_valid_s(void)
 {
 	return 1;
@@ -1466,6 +1474,14 @@ static inline u32 gr_fecs_arb_ctx_ptr_target_v(u32 r)
 {
 	return (r >> 28) & 0x3;
 }
+static inline u32 gr_fecs_arb_ctx_ptr_target_vid_mem_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(void)
+{
+	return 0x30000000;
+}
 static inline u32 gr_fecs_arb_ctx_cmd_r(void)
 {
 	return 0x00409a10;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index c9da4c93..048a4662 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -105,9 +105,9 @@ static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 	gk20a_dbg_info("bar2 inst block ptr: 0x%08x",  (u32)inst_pa);
 
 	gk20a_writel(g, bus_bar2_block_r(),
-		     (g->mm.vidmem_is_vidmem ?
-		       bus_bar2_block_target_sys_mem_ncoh_f() :
-		       bus_bar2_block_target_vid_mem_f()) |
+		     gk20a_aperture_mask(g, inst_block,
+				bus_bar2_block_target_sys_mem_ncoh_f(),
+				bus_bar2_block_target_vid_mem_f()) |
 		     bus_bar2_block_mode_virtual_f() |
 		     bus_bar2_block_ptr_f(inst_pa));
 
-- 
cgit v1.2.2


From f107ff488c8f1ccd225cdd4c40c82f79c45136c7 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 12 Sep 2016 11:59:21 +0300
Subject: gpu: nvgpu: fix pde0 target bit programming

Use entry->mem for determining the target aperture bits of the memory
block represented by entry->mem in update_gmmu_pde0_locked(), instead of
pte->mem that holds the parent memory where this bit is written to.

Previously this has worked because all page tables have been in the same
aperture, but really large userspace allocations may push a part of them
suddendly to sysmem.

Bug 1809939

Change-Id: I3372487c6ae9793018ce44552ded3fb1ba4d145a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1218636
(cherry picked from commit a92596f6e8e621e51b6afae9ab7e62044d6311eb)
Reviewed-on: http://git-master/r/1220525
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 048a4662..03bab121 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -242,7 +242,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 
 	if (small_valid) {
 		pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
-		pde_v[2] |= gk20a_aperture_mask(g, &pte->mem,
+		pde_v[2] |= gk20a_aperture_mask(g, &entry->mem,
 			gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
 			gmmu_new_dual_pde_aperture_small_video_memory_f());
 		pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
@@ -252,7 +252,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	if (big_valid) {
 		pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
 		pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
-		pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
+		pde_v[0] |= gk20a_aperture_mask(g, &entry->mem,
 			gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
 			gmmu_new_dual_pde_aperture_big_video_memory_f());
 		pde_v[1] |= pte_addr_big >> 28;
-- 
cgit v1.2.2


From 190e97f89bd486363db9916967a173e0bde5f43b Mon Sep 17 00:00:00 2001
From: Sami Kiminki <skiminki@nvidia.com>
Date: Wed, 10 Aug 2016 21:57:30 +0300
Subject: gpu: nvgpu: gp106: Add NVC097_SET_GO_IDLE_TIMEOUT SW method

Add the NVC097_SET_GO_IDLE_TIMEOUT SW method for GP106. This
enables booting the X server.

Bug 1732372
Bug 1792002

Change-Id: I73abaaea240039dc91c66e3862ec01a342db2fa9
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/1200637
(cherry picked from commit 0d24a6f3d8e421ea5205279166c6dc2d0f15c6a0)
Reviewed-on: http://git-master/r/1223101
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/gr_gp106.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index 01d06975..8d8376d3 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -56,6 +56,11 @@ static u32 gr_gp106_pagepool_default_size(struct gk20a *g)
 	return gr_scc_pagepool_total_pages_hwmax_value_v();
 }
 
+static void gr_gp106_set_go_idle_timeout(struct gk20a *g, u32 data)
+{
+	gk20a_writel(g, gr_fe_go_idle_timeout_r(), data);
+}
+
 static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
 				     u32 class_num, u32 offset, u32 data)
 {
@@ -82,6 +87,9 @@ static int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
 		case NVC097_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
 			g->ops.gr.set_alpha_circular_buffer_size(g, data);
 			break;
+		case NVC097_SET_GO_IDLE_TIMEOUT:
+			gr_gp106_set_go_idle_timeout(g, data);
+			break;
 		default:
 			goto fail;
 		}
-- 
cgit v1.2.2


From 38ad90b4840434df4650c617a236e1b01f8a43c6 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 15 Aug 2016 20:19:20 +0530
Subject: gpu: nvgpu: Adding support for mclk module

JIRA DNVGPU-88

Change-Id: Idecfff5a80fadde77887385491dd6b73b1956bac
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1202551
(cherry picked from commit 3bcf9bad93fb6fdd4b87430b346ea41533149108)
Reviewed-on: http://git-master/r/1223854
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |    3 +-
 drivers/gpu/nvgpu/clk/clk_mclk.c      | 1808 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_mclk.h      |   19 +
 drivers/gpu/nvgpu/gp106/pmu_gp106.c   |    2 +
 drivers/gpu/nvgpu/pmuif/gpmuifseq.h   |   73 ++
 5 files changed, 1904 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/clk/clk_mclk.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_mclk.h
 create mode 100644 drivers/gpu/nvgpu/pmuif/gpmuifseq.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index e7d18492..3e54a989 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -27,7 +27,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/acr_gp106.o \
 	$(nvgpu-t18x)/gp106/sec2_gp106.o \
 	$(nvgpu-t18x)/gp106/fifo_gp106.o \
-	$(nvgpu-t18x)/gp106/ltc_gp106.o
+	$(nvgpu-t18x)/gp106/ltc_gp106.o \
+	$(nvgpu-t18x)/clk/clk_mclk.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
new file mode 100644
index 00000000..02e16fcc
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -0,0 +1,1808 @@
+/*
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include <linux/delay.h>
+#include "pmuif/gpmuifseq.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gk20a/hw_pwr_gk20a.h"
+
+#define VREG_COUNT 24
+
+struct memory_link_training_pattern {
+	u32 regaddr;
+	u32 writeval;
+};
+
+static struct memory_link_training_pattern memory_shadow_reglist[] = {
+	{0x9a065c, 0x20},
+	{0x98467c, 0xffff0000},
+	{0x984708, 0x30550},
+	{0x98470c, 0x4C4C},
+	{0x9006a0, 0x03030303},
+	{0x9006a4, 0x03030303},
+	{0x9046a0, 0x03030303},
+	{0x9046a4, 0x03030303},
+	{0x9086a0, 0x03030303},
+	{0x9086a4, 0x03030303},
+	{0x90c6a0, 0x03030303},
+	{0x90c6a4, 0x03030303},
+	{0x9106a0, 0x07070707},
+	{0x9106a4, 0x07070707},
+	{0x9146a0, 0x07070707},
+	{0x9146a4, 0x07070707},
+	{0x9846a8, 0x03030303},
+	{0x9846ac, 0x03030303},
+	{0x9a065c, 0x00},
+};
+
+static struct memory_link_training_pattern memory_pattern_reglist[] = {
+	{0x9a0968,	0x0},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0x0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0x0},
+	{0x9a096c,	0x0},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0x0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0x0},
+	{0x9a0968,	0x100},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffffffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffffffff},
+	{0x9a096c,	0x100},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0968,	0x200},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x55555555},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x55555555},
+	{0x9a096c,	0x200},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x55555555},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x55555555},
+	{0x9a0968,	0x300},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a096c,	0x300},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0968,	0x400},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x33333333},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x33333333},
+	{0x9a096c,	0x400},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x33333333},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x33333333},
+	{0x9a0968,	0x500},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xcccccccc},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xcccccccc},
+	{0x9a096c,	0x500},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0968,	0x600},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a096c,	0x600},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0968,	0x700},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a096c,	0x700},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0968,	0x800},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xff00ff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xff00ff},
+	{0x9a096c,	0x800},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0968,	0x900},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xff00ff00},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xff00ff00},
+	{0x9a096c,	0x900},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0968,	0xa00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffff},
+	{0x9a096c,	0xa00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffff},
+	{0x9a0968,	0xb00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xffff0000},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xffff0000},
+	{0x9a096c,	0xb00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xffff0000},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xffff0000},
+	{0x9a0968,	0xc00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0x0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0x0},
+	{0x9a096c,	0xc00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0x0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0x0},
+	{0x9a0968,	0xd00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffffffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffffffff},
+	{0x9a096c,	0xd00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0968,	0xe00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x55555555},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x55555555},
+	{0x9a096c,	0xe00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x55555555},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x55555555},
+	{0x9a0968,	0xf00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a096c,	0xf00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0968,	0x1000},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x33333333},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x33333333},
+	{0x9a096c,	0x1000},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x33333333},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x33333333},
+	{0x9a0968,	0x1100},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xcccccccc},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xcccccccc},
+	{0x9a096c,	0x1100},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0968,	0x1200},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a096c,	0x1200},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0968,	0x1300},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a096c,	0x1300},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0968,	0x1400},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xff00ff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xff00ff},
+	{0x9a096c,	0x1400},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0968,	0x1500},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xff00ff00},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xff00ff00},
+	{0x9a096c,	0x1500},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0968,	0x1600},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffff},
+	{0x9a096c,	0x1600},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffff},
+	{0x9a0968,	0x1700},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xffff0000},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xffff0000},
+	{0x9a096c,	0x1700},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xffff0000},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xffff0000},
+	{0x9a0968,	0x1800},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0x0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0x0},
+	{0x9a096c,	0x1800},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0x0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0x0},
+	{0x9a0968,	0x1900},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffffffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffffffff},
+	{0x9a096c,	0x1900},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0968,	0x1a00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x55555555},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x55555555},
+	{0x9a096c,	0x1a00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x55555555},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x55555555},
+	{0x9a0968,	0x1b00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a096c,	0x1b00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0968,	0x1c00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x33333333},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x33333333},
+	{0x9a096c,	0x1c00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x33333333},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x33333333},
+	{0x9a0968,	0x1d00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xcccccccc},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xcccccccc},
+	{0x9a096c,	0x1d00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0968,	0x1e00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a096c,	0x1e00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0968,	0x1f00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a096c,	0x1f00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0968,	0x2000},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xff00ff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xff00ff},
+	{0x9a096c,	0x2000},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0968,	0x2100},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xff00ff00},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xff00ff00},
+	{0x9a096c,	0x2100},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0968,	0x2200},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffff},
+	{0x9a096c,	0x2200},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffff},
+	{0x9a0968,	0x2300},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xffff0000},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xffff0000},
+	{0x9a096c,	0x2300},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xffff0000},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xffff0000},
+	{0x9a0968,	0x2400},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0x0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0x0},
+	{0x9a096c,	0x2400},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0x0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0x0},
+	{0x9a0968,	0x2500},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffffffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffffffff},
+	{0x9a096c,	0x2500},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffffffff},
+	{0x9a0968,	0x2600},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x55555555},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x55555555},
+	{0x9a096c,	0x2600},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x55555555},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x55555555},
+	{0x9a0968,	0x2700},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xaaaaaaaa},
+	{0x9a096c,	0x2700},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xaaaaaaaa},
+	{0x9a0968,	0x2800},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0x33333333},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0x33333333},
+	{0x9a096c,	0x2800},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0x33333333},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0x33333333},
+	{0x9a0968,	0x2900},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xcccccccc},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xcccccccc},
+	{0x9a096c,	0x2900},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xcccccccc},
+	{0x9a0968,	0x2a00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xf0f0f0f0},
+	{0x9a096c,	0x2a00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xf0f0f0f0},
+	{0x9a0968,	0x2b00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xf0f0f0f},
+	{0x9a096c,	0x2b00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xf0f0f0f},
+	{0x9a0968,	0x2c00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xff00ff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xff00ff},
+	{0x9a096c,	0x2c00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xff00ff},
+	{0x9a0968,	0x2d00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xff00ff00},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xff00ff00},
+	{0x9a096c,	0x2d00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xff00ff00},
+	{0x9a0968,	0x2e00},
+	{0x9a0920,	0xff},
+	{0x9a0918,	0xffff},
+	{0x9a0920,	0x1ff},
+	{0x9a0918,	0xffff},
+	{0x9a096c,	0x2e00},
+	{0x9a0924,	0xff},
+	{0x9a091c,	0xffff},
+	{0x9a0924,	0x1ff},
+	{0x9a091c,	0xffff},
+	{0x9a0968,	0x2f00},
+	{0x9a0920,	0x0},
+	{0x9a0918,	0xffff0000},
+	{0x9a0920,	0x100},
+	{0x9a0918,	0xffff0000},
+	{0x9a096c,	0x2f00},
+	{0x9a0924,	0x0},
+	{0x9a091c,	0xffff0000},
+	{0x9a0924,	0x100},
+	{0x9a091c,	0xffff0000},
+	{0x9a0968,	0x0},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x1},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x2},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x3},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x4},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x5},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x6},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x7},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x8},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x9},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xb},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xd},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xe},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xf},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x10},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x11},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x12},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x13},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x14},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x15},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x16},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x17},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x18},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x19},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x1a},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x1b},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x1c},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x1d},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x1e},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x1f},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x20},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x21},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x22},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x23},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x24},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x25},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x26},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x27},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x28},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x29},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x2a},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x2b},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x2c},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x2d},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x2e},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x2f},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x30},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x31},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x32},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x33},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x34},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x35},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x36},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x37},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x38},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x39},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x3a},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x3b},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x3c},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x3d},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x3e},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x3f},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x40},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x41},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x42},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x43},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x44},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x45},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x46},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x47},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x48},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x49},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x4a},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x4b},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x4c},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x4d},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x4e},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x4f},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x50},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x51},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x52},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x53},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x54},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x55},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x56},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x57},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x58},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x59},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x5a},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x5b},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x5c},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x5d},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x5e},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x5f},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x60},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x61},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x62},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x63},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x64},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x65},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x66},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x67},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x68},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x69},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x6a},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x6b},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x6c},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x6d},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0x6e},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0x6f},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x70},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x71},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x72},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x73},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x74},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x75},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x76},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x77},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x78},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x79},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x7a},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x7b},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x7c},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x7d},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0x7e},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0x7f},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x80},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x81},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x82},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x83},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x84},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x85},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x86},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x87},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x88},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x89},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x8a},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x8b},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x8c},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x8d},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0x8e},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0x8f},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x90},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x91},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x92},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x93},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x94},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x95},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x96},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x97},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x98},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x99},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x9a},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x9b},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x9c},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x9d},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0x9e},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0x9f},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xa0},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa1},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xa2},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa3},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xa4},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa5},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xa6},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa7},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xa8},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xa9},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xaa},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xab},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xac},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xad},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xae},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xaf},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xb0},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xb1},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xb2},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xb3},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xb4},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xb5},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xb6},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xb7},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xb8},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xb9},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xba},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xbb},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xbc},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xbd},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xbe},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xbf},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc0},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xc1},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc2},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xc3},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc4},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xc5},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc6},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xc7},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xc8},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xc9},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xca},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xcb},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xcc},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xcd},
+	{0x9a0900,	0xffffffff},
+	{0x9a0968,	0xce},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xcf},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xd0},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xd1},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xd2},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xd3},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xd4},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xd5},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xd6},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xd7},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xd8},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xd9},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xda},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xdb},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xdc},
+	{0x9a0900,	0x55555555},
+	{0x9a0968,	0xdd},
+	{0x9a0900,	0xaaaaaaaa},
+	{0x9a0968,	0xde},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xdf},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xe0},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xe1},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xe2},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xe3},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xe4},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xe5},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xe6},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xe7},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xe8},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xe9},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xea},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xeb},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xec},
+	{0x9a0900,	0xffff},
+	{0x9a0968,	0xed},
+	{0x9a0900,	0xffff0000},
+	{0x9a0968,	0xee},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xef},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xf0},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xf1},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xf2},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xf3},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xf4},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xf5},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xf6},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xf7},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xf8},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xf9},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xfa},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xfb},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xfc},
+	{0x9a0900,	0xff00ff},
+	{0x9a0968,	0xfd},
+	{0x9a0900,	0xff00ff00},
+	{0x9a0968,	0xfe},
+	{0x9a0900,	0x0},
+	{0x9a0968,	0xff},
+	{0x9a0900,	0xffffffff},
+	{0x9a096c,	0x0},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x1},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x2},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x3},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x4},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x5},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x6},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x7},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x8},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x9},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xb},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xd},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xe},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xf},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x10},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x11},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x12},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x13},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x14},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x15},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x16},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x17},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x18},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x19},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x1a},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x1b},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x1c},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x1d},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x1e},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x1f},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x20},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x21},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x22},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x23},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x24},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x25},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x26},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x27},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x28},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x29},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x2a},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x2b},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x2c},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x2d},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x2e},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x2f},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x30},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x31},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x32},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x33},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x34},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x35},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x36},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x37},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x38},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x39},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x3a},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x3b},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x3c},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x3d},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x3e},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x3f},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x40},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x41},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x42},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x43},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x44},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x45},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x46},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x47},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x48},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x49},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x4a},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x4b},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x4c},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x4d},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x4e},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x4f},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x50},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x51},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x52},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x53},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x54},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x55},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x56},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x57},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x58},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x59},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x5a},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x5b},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x5c},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x5d},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x5e},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x5f},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x60},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x61},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x62},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x63},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x64},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x65},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x66},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x67},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x68},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x69},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x6a},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x6b},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x6c},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x6d},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0x6e},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0x6f},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x70},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x71},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x72},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x73},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x74},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x75},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x76},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x77},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x78},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x79},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x7a},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x7b},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x7c},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x7d},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0x7e},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0x7f},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x80},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x81},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x82},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x83},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x84},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x85},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x86},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x87},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x88},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x89},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x8a},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x8b},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x8c},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x8d},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0x8e},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0x8f},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x90},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x91},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x92},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x93},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x94},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x95},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x96},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x97},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x98},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x99},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x9a},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x9b},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x9c},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x9d},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0x9e},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0x9f},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xa0},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa1},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xa2},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa3},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xa4},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa5},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xa6},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa7},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xa8},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xa9},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xaa},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xab},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xac},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xad},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xae},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xaf},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xb0},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xb1},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xb2},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xb3},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xb4},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xb5},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xb6},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xb7},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xb8},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xb9},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xba},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xbb},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xbc},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xbd},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xbe},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xbf},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc0},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xc1},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc2},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xc3},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc4},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xc5},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc6},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xc7},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xc8},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xc9},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xca},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xcb},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xcc},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xcd},
+	{0x9a0904,	0xffffffff},
+	{0x9a096c,	0xce},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xcf},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xd0},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xd1},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xd2},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xd3},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xd4},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xd5},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xd6},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xd7},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xd8},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xd9},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xda},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xdb},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xdc},
+	{0x9a0904,	0x55555555},
+	{0x9a096c,	0xdd},
+	{0x9a0904,	0xaaaaaaaa},
+	{0x9a096c,	0xde},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xdf},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xe0},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xe1},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xe2},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xe3},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xe4},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xe5},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xe6},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xe7},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xe8},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xe9},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xea},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xeb},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xec},
+	{0x9a0904,	0xffff},
+	{0x9a096c,	0xed},
+	{0x9a0904,	0xffff0000},
+	{0x9a096c,	0xee},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xef},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xf0},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xf1},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xf2},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xf3},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xf4},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xf5},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xf6},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xf7},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xf8},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xf9},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xfa},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xfb},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xfc},
+	{0x9a0904,	0xff00ff},
+	{0x9a096c,	0xfd},
+	{0x9a0904,	0xff00ff00},
+	{0x9a096c,	0xfe},
+	{0x9a0904,	0x0},
+	{0x9a096c,	0xff},
+	{0x9a0904,	0xffffffff},
+};
+
+static u8 seq_script_gp106[] = {
+  0x0b, 0x00, 0x02, 0x00, 0x40, 0xc0, 0x62, 0x00, 0x22, 0x00, 0x02, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x02, 0x00, 0x40, 0x63, 0x61, 0x00,
+  0x22, 0x00, 0x02, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x30, 0x03, 0x9a, 0x00,
+  0x14, 0x00, 0x10, 0x00, 0x38, 0xd6, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00,
+  0x04, 0xd6, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00,
+  0x20, 0x4e, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x00, 0x02, 0x9a, 0x00,
+  0x00, 0x90, 0x8f, 0x02, 0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x00,
+  0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x02, 0x9a, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
+  0x21, 0x00, 0x03, 0x00, 0x10, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0d, 0x00,
+  0x48, 0x03, 0x9a, 0x00, 0x88, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9a, 0x00,
+  0x00, 0x90, 0x8f, 0x82, 0x14, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x02, 0x9a, 0x00, 0x00, 0x90, 0x8f, 0x02, 0x90, 0x00, 0x9a, 0x00,
+  0x61, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9a, 0x00, 0x7f, 0x00, 0x00, 0xc0,
+  0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00,
+  0x98, 0x06, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0x06, 0x9a, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9a, 0x00, 0xe7, 0x8f, 0x88, 0xf7,
+  0x40, 0x0d, 0x9a, 0x00, 0x20, 0xe0, 0x01, 0x00, 0x00, 0x02, 0x9a, 0x00,
+  0x00, 0x90, 0x8f, 0x1a, 0x00, 0x08, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xf0, 0x73, 0x13, 0x00, 0x03, 0x00, 0x00, 0x00, 0x30, 0x08, 0x9a, 0x00,
+  0x90, 0x90, 0x67, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x11, 0x00, 0x01, 0x00,
+  0xf4, 0x73, 0x13, 0x00, 0x10, 0x00, 0x01, 0x00, 0xf4, 0x73, 0x13, 0x00,
+  0x00, 0x00, 0x01, 0x00, 0x20, 0x20, 0x13, 0x00, 0x00, 0x00, 0x03, 0x20,
+  0x20, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x20, 0x13, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x34, 0x20, 0x13, 0x00, 0x00, 0x00, 0x8a, 0xf9,
+  0x24, 0x20, 0x13, 0x00, 0x01, 0x32, 0x05, 0x00, 0x2c, 0x20, 0x13, 0x00,
+  0x00, 0x01, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x08, 0x10,
+  0x20, 0x20, 0x13, 0x00, 0x01, 0x00, 0x03, 0x20, 0x34, 0x00, 0x02, 0x00,
+  0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00,
+  0x01, 0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0d, 0x00, 0x2c, 0x20, 0x13, 0x00,
+  0x00, 0x03, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10,
+  0xf4, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x13, 0x00,
+  0x00, 0x00, 0x01, 0x98, 0x04, 0x20, 0x13, 0x00, 0x01, 0x0b, 0x01, 0x00,
+  0x00, 0x20, 0x13, 0x00, 0x01, 0x00, 0x01, 0x98, 0x34, 0x00, 0x02, 0x00,
+  0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x21, 0x00, 0x1d, 0x00, 0xf4, 0x73, 0x13, 0x00,
+  0x00, 0x11, 0x00, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x10, 0x11, 0x00, 0x00,
+  0xec, 0x73, 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0xf0, 0x73, 0x13, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x12, 0x11, 0x00, 0x00,
+  0xf4, 0x73, 0x13, 0x00, 0x12, 0x00, 0x00, 0x00, 0x08, 0x08, 0x9a, 0x00,
+  0x70, 0x00, 0x08, 0x48, 0x00, 0x02, 0x9a, 0x00, 0x00, 0x10, 0x8f, 0x1a,
+  0x24, 0x08, 0x9a, 0x00, 0xe5, 0x8f, 0x88, 0xf7, 0x08, 0x08, 0x9a, 0x00,
+  0x70, 0x00, 0xa8, 0x4a, 0x24, 0x08, 0x9a, 0x00, 0x85, 0x8f, 0x88, 0xf7,
+  0x38, 0x1f, 0x9a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x1f, 0x9a, 0x00,
+  0x00, 0x00, 0x01, 0x00, 0x34, 0x0d, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x13, 0x00, 0x02, 0x00, 0x2c, 0x01, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00,
+  0x5c, 0x06, 0x9a, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0c, 0x06, 0x9a, 0x00,
+  0xd0, 0x20, 0x00, 0xfd, 0xd4, 0x0e, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x40,
+  0xd4, 0x0e, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00,
+  0x2c, 0x01, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00, 0x2c, 0x08, 0x9a, 0x00,
+  0x00, 0x00, 0x05, 0x00, 0x30, 0x08, 0x9a, 0x00, 0x90, 0xa0, 0x67, 0x00,
+  0x48, 0x02, 0x9a, 0x00, 0xa2, 0x44, 0x1e, 0x93, 0x90, 0x02, 0x9a, 0x00,
+  0x42, 0xa5, 0x5a, 0x15, 0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x28,
+  0x98, 0x02, 0x9a, 0x00, 0x00, 0x08, 0x15, 0x88, 0x9c, 0x02, 0x9a, 0x00,
+  0xec, 0x30, 0x00, 0x22, 0xa0, 0x02, 0x9a, 0x00, 0x32, 0x00, 0x83, 0xd5,
+  0xa8, 0x02, 0x9a, 0x00, 0x0f, 0x86, 0x00, 0x02, 0xcc, 0x02, 0x9a, 0x00,
+  0x00, 0x39, 0x0f, 0x12, 0x14, 0x06, 0x9a, 0x00, 0x77, 0x4e, 0x04, 0x40,
+  0x10, 0x06, 0x9a, 0x00, 0x77, 0x4e, 0x04, 0x40, 0x78, 0x07, 0x10, 0x00,
+  0x44, 0x04, 0x00, 0x82, 0x4c, 0x02, 0x9a, 0x00, 0x85, 0x0c, 0x05, 0x13,
+  0xe0, 0x08, 0x9a, 0x00, 0x11, 0x00, 0x00, 0x00, 0x90, 0x03, 0x9a, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x94, 0x03, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x0b, 0x9a, 0x00, 0x06, 0x22, 0x22, 0x22, 0x90, 0x00, 0x9a, 0x00,
+  0x7e, 0x00, 0x00, 0x40, 0x2e, 0x00, 0x02, 0x00, 0xd0, 0x07, 0x00, 0x00,
+  0x21, 0x00, 0x0f, 0x00, 0x14, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9a, 0x00,
+  0x00, 0x00, 0x00, 0x80, 0x90, 0x03, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x24, 0x10, 0x09, 0x9a, 0x00,
+  0x00, 0x00, 0x01, 0xa4, 0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x01, 0xa4,
+  0x34, 0x00, 0x02, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x09, 0x90, 0x00,
+  0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00,
+  0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00,
+  0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00, 0x34, 0x00, 0x02, 0x00,
+  0x12, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
+  0x21, 0x00, 0x0b, 0x00, 0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x28,
+  0x38, 0x03, 0x9a, 0x00, 0x03, 0x01, 0x30, 0x00, 0x3c, 0x03, 0x9a, 0x00,
+  0xff, 0x01, 0x40, 0x00, 0x00, 0x03, 0x9a, 0x00, 0x05, 0x01, 0x00, 0x00,
+  0x54, 0x03, 0x9a, 0x00, 0x03, 0x00, 0x80, 0x00, 0x2e, 0x00, 0x02, 0x00,
+  0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x48, 0x03, 0x9a, 0x00,
+  0x00, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9a, 0x00, 0x00, 0x10, 0x8f, 0x9a,
+  0x18, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9a, 0x00,
+  0x00, 0x10, 0x8f, 0x1a, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
+  0x21, 0x00, 0x07, 0x00, 0x78, 0x09, 0x9a, 0x00, 0x0f, 0x1e, 0x7e, 0x88,
+  0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0e, 0xa4, 0x14, 0x09, 0x9a, 0x00,
+  0x00, 0x00, 0x0e, 0xa4, 0x34, 0x00, 0x02, 0x00, 0x13, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00,
+  0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x20, 0xa1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00,
+  0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00,
+  0x34, 0x00, 0x02, 0x00, 0x14, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00,
+  0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x00, 0x02, 0x9a, 0x00,
+  0x00, 0x10, 0x8f, 0x3a, 0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x25,
+  0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x25, 0x20, 0x00, 0x03, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
+  0x15, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x00, 0xc0, 0x62, 0x00,
+  0x00, 0x0f, 0x0f, 0x0f, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static void mclk_memory_load_training_pattern(struct gk20a *g)
+{
+	u32 reg_writes;
+	u32 index;
+
+	gk20a_dbg_info("");
+
+	reg_writes = ((sizeof(memory_pattern_reglist) /
+				sizeof((memory_pattern_reglist)[0])));
+
+	for (index = 0; index < reg_writes; index++) {
+		gk20a_writel(g, memory_pattern_reglist[index].regaddr,
+				memory_pattern_reglist[index].writeval);
+	}
+
+	gk20a_dbg_fn("done");
+}
+
+static void mclk_memory_load_shadow_regs(struct gk20a *g)
+{
+	u32 reg_writes;
+	u32 index;
+
+	gk20a_dbg_info("");
+
+	reg_writes = ((sizeof(memory_shadow_reglist) /
+				sizeof((memory_shadow_reglist)[0])));
+	for (index = 0; index < reg_writes; index++) {
+		gk20a_writel(g, memory_shadow_reglist[index].regaddr,
+				memory_shadow_reglist[index].writeval);
+	}
+
+	gk20a_dbg_fn("done");
+
+}
+
+static void mclk_seq_pmucmdhandler(struct gk20a *g, struct pmu_msg *_msg,
+			void *param, u32 handle, u32 status)
+{
+	struct nv_pmu_seq_msg *msg = (struct nv_pmu_seq_msg *)_msg;
+	struct nv_pmu_seq_msg_run_script *seq_msg;
+	u32 msg_status = 0;
+
+	gk20a_dbg_info("");
+
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g), "mclk seq_script cmd aborted");
+		msg_status = -ENOENT;
+		goto status_update;
+	}
+
+	seq_msg = &msg->run_script;
+
+	if (seq_msg->msg_type != NV_PMU_SEQ_MSG_ID_RUN_SCRIPT) {
+		msg_status = -ENOENT;
+		goto status_update;
+	}
+
+	if (seq_msg->error_code) {
+		msg_status = -ENOENT;
+		goto status_update;
+	}
+
+status_update:
+	*((u32 *)param) = msg_status;
+}
+
+int clk_mclkseq_build_prgm_gddr5(struct gk20a *g)
+{
+	struct pmu_payload payload = { {0} };
+	void *vreg_buf = NULL;
+	struct nv_pmu_seq_cmd cmd;
+	struct nv_pmu_seq_cmd_run_script *pseq_cmd;
+	u32 seqdesc;
+	u32 status = 0;
+	u32 seq_completion_status = ~0x0;
+
+	gk20a_dbg_info("");
+
+	/* Load Shadow registers */
+	mclk_memory_load_shadow_regs(g);
+
+	/* Load RAM pattern */
+	mclk_memory_load_training_pattern(g);
+
+	/* Fill command header with SEQ ID & size */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.hdr.unit_id	= PMU_UNIT_SEQ;
+	cmd.hdr.size = sizeof(struct nv_pmu_seq_cmd_run_script) +
+		sizeof(struct pmu_hdr);
+
+	/* Fill RM_PMU_SEQ_CMD_RUN_SCRIPT struct */
+	pseq_cmd = &cmd.run_script;
+	pseq_cmd->cmd_type = NV_PMU_SEQ_CMD_ID_RUN_SCRIPT;
+
+	gk20a_writel(g, 0x132000, 0x98010000);
+	udelay(0x5);
+	gk20a_writel(g, 0x137300, 0x20000103);
+
+	/* Read sequencer binary*/
+	payload.in.buf = seq_script_gp106;
+	payload.in.size = sizeof(seq_script_gp106);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = offsetof(struct nv_pmu_seq_cmd_run_script,
+			script_alloc);
+
+	vreg_buf = kzalloc((sizeof(u32) * VREG_COUNT), GFP_KERNEL);
+	if (!vreg_buf) {
+		status = -ENOMEM;
+		gk20a_err(dev_from_gk20a(g),
+				"unable to allocate memory for VREG");
+		goto exit_status;
+	}
+
+	payload.out.buf = vreg_buf;
+	payload.out.size = (VREG_COUNT * sizeof(u32));
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = offsetof(struct nv_pmu_seq_cmd_run_script,
+			reg_alloc);
+
+	/* Send command to PMU to execute sequencer script */
+	status = gk20a_pmu_cmd_post(g, (struct pmu_cmd *)&cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			mclk_seq_pmucmdhandler,
+			&seq_completion_status, &seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post seq script exec cmd for unit %x ",
+			cmd.hdr.unit_id);
+		goto exit_status;
+	}
+
+	/* wait till sequencer script complete */
+	pmu_wait_message_cond(&g->pmu, (gk20a_get_gr_idle_timeout(g)),
+			&seq_completion_status, 0);
+	if (seq_completion_status != 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"seq_scrip update failed");
+		status = -EBUSY;
+		goto exit_status;
+	}
+
+exit_status:
+	kfree(vreg_buf);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
new file mode 100644
index 00000000..f86893f7
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -0,0 +1,19 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKMCLK_H_
+#define _CLKMCLK_H_
+
+int clk_mclkseq_build_prgm_gddr5(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index f6fcd234..48653142 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -21,6 +21,7 @@
 #include "gp106/pmu_gp106.h"
 #include "gp106/acr_gp106.h"
 #include "gp106/hw_psec_gp106.h"
+#include "clk/clk_mclk.h"
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
 
@@ -194,6 +195,7 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
+	gops->pmu.mclk_init = clk_mclkseq_build_prgm_gddr5;
 
 	gk20a_dbg_fn("done");
 }
diff --git a/drivers/gpu/nvgpu/pmuif/gpmuifseq.h b/drivers/gpu/nvgpu/pmuif/gpmuifseq.h
new file mode 100644
index 00000000..69d55490
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmuif/gpmuifseq.h
@@ -0,0 +1,73 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+#ifndef _GPMUIFSEQ_H_
+#define _GPMUIFSEQ_H_
+
+#include "gk20a/pmu_common.h"
+
+#define PMU_UNIT_SEQ            (0x02)
+
+/*!
+* @file   gpmuifseq.h
+* @brief  PMU Command/Message Interfaces - Sequencer
+*/
+
+/*!
+* Defines the identifiers various high-level types of sequencer commands.
+*
+* _RUN_SCRIPT @ref NV_PMU_SEQ_CMD_RUN_SCRIPT
+*/
+enum {
+	NV_PMU_SEQ_CMD_ID_RUN_SCRIPT = 0,
+};
+
+struct nv_pmu_seq_cmd_run_script {
+	u8 cmd_type;
+	u8 pad[3];
+	struct pmu_allocation_v3 script_alloc;
+	struct pmu_allocation_v3 reg_alloc;
+};
+
+#define NV_PMU_SEQ_CMD_ALLOC_OFFSET              4
+
+#define NV_PMU_SEQ_MSG_ALLOC_OFFSET                                         \
+	(NV_PMU_SEQ_CMD_ALLOC_OFFSET + NV_PMU_CMD_ALLOC_SIZE)
+
+struct nv_pmu_seq_cmd {
+	struct pmu_hdr hdr;
+	union {
+		u8 cmd_type;
+		struct nv_pmu_seq_cmd_run_script run_script;
+	};
+};
+
+enum {
+	NV_PMU_SEQ_MSG_ID_RUN_SCRIPT = 0,
+};
+
+struct nv_pmu_seq_msg_run_script {
+	u8 msg_type;
+	u8 error_code;
+	u16 error_pc;
+	u32 timeout_stat;
+};
+
+struct nv_pmu_seq_msg {
+	struct pmu_hdr hdr;
+	union {
+		u8 msg_type;
+		struct nv_pmu_seq_msg_run_script run_script;
+	};
+};
+
+#endif
-- 
cgit v1.2.2


From 432017248e432df0619dc2df30f915a52634338f Mon Sep 17 00:00:00 2001
From: Vijayakumar Subbu <vsubbu@nvidia.com>
Date: Sat, 30 Jul 2016 10:44:30 -0700
Subject: gpu: nvgpu: Add dGPU clocks support

JIRA DNVGPU-42

Change-Id: Ic2fca9d0cf82f2823654ac5e8f0772a1eec7b3b5
Signed-off-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1205850
(cherry picked from commit b9f5c6bc4e649162d63e33d65b725872340ca114)
Reviewed-on: http://git-master/r/1227257
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x   |   12 +-
 drivers/gpu/nvgpu/clk/clk.c             |  190 ++++++
 drivers/gpu/nvgpu/clk/clk.h             |   86 +++
 drivers/gpu/nvgpu/clk/clk_domain.c      |  874 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_domain.h      |   94 +++
 drivers/gpu/nvgpu/clk/clk_fll.c         |  440 +++++++++++++
 drivers/gpu/nvgpu/clk/clk_fll.h         |   68 ++
 drivers/gpu/nvgpu/clk/clk_prog.c        |  834 ++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_prog.h        |   71 +++
 drivers/gpu/nvgpu/clk/clk_vf_point.c    |  347 ++++++++++
 drivers/gpu/nvgpu/clk/clk_vf_point.h    |   74 +++
 drivers/gpu/nvgpu/clk/clk_vin.c         |  466 ++++++++++++++
 drivers/gpu/nvgpu/clk/clk_vin.h         |   56 ++
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |    2 +-
 drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h |   88 +++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     |    2 +-
 drivers/gpu/nvgpu/include/bios.h        |  411 ++++++++++++
 drivers/gpu/nvgpu/perf/perf.c           |   98 +++
 drivers/gpu/nvgpu/perf/perf.h           |   60 ++
 drivers/gpu/nvgpu/perf/vfe_equ.c        |  590 +++++++++++++++++
 drivers/gpu/nvgpu/perf/vfe_equ.h        |   76 +++
 drivers/gpu/nvgpu/perf/vfe_var.c        | 1048 +++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/perf/vfe_var.h        |   97 +++
 drivers/gpu/nvgpu/pstate/pstate.c       |  101 +++
 drivers/gpu/nvgpu/pstate/pstate.h       |   19 +
 25 files changed, 6201 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/clk/clk.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk.h
 create mode 100644 drivers/gpu/nvgpu/clk/clk_domain.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_domain.h
 create mode 100644 drivers/gpu/nvgpu/clk/clk_fll.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_fll.h
 create mode 100644 drivers/gpu/nvgpu/clk/clk_prog.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_prog.h
 create mode 100644 drivers/gpu/nvgpu/clk/clk_vf_point.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_vf_point.h
 create mode 100644 drivers/gpu/nvgpu/clk/clk_vin.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_vin.h
 create mode 100644 drivers/gpu/nvgpu/include/bios.h
 create mode 100644 drivers/gpu/nvgpu/perf/perf.c
 create mode 100644 drivers/gpu/nvgpu/perf/perf.h
 create mode 100644 drivers/gpu/nvgpu/perf/vfe_equ.c
 create mode 100644 drivers/gpu/nvgpu/perf/vfe_equ.h
 create mode 100644 drivers/gpu/nvgpu/perf/vfe_var.c
 create mode 100644 drivers/gpu/nvgpu/perf/vfe_var.h
 create mode 100644 drivers/gpu/nvgpu/pstate/pstate.c
 create mode 100644 drivers/gpu/nvgpu/pstate/pstate.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 3e54a989..c6b6f0d2 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -28,7 +28,17 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/sec2_gp106.o \
 	$(nvgpu-t18x)/gp106/fifo_gp106.o \
 	$(nvgpu-t18x)/gp106/ltc_gp106.o \
-	$(nvgpu-t18x)/clk/clk_mclk.o
+	$(nvgpu-t18x)/clk/clk_mclk.o \
+	$(nvgpu-t18x)/pstate/pstate.o \
+	$(nvgpu-t18x)/clk/clk_vin.o \
+	$(nvgpu-t18x)/clk/clk_fll.o \
+	$(nvgpu-t18x)/clk/clk_domain.o \
+	$(nvgpu-t18x)/clk/clk_prog.o \
+	$(nvgpu-t18x)/clk/clk_vf_point.o \
+	$(nvgpu-t18x)/perf/vfe_var.o \
+	$(nvgpu-t18x)/perf/vfe_equ.o \
+	$(nvgpu-t18x)/perf/perf.o \
+	$(nvgpu-t18x)/clk/clk.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
new file mode 100644
index 00000000..0679efc0
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "pmuif/gpmuifclk.h"
+#include "pmuif/gpmuifvolt.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+struct clkrpc_pmucmdhandler_params {
+	struct nv_pmu_clk_rpc *prpccall;
+	u32 success;
+};
+
+static void clkrpc_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
+				 void *param, u32 handle, u32 status)
+{
+	struct clkrpc_pmucmdhandler_params *phandlerparams =
+		(struct clkrpc_pmucmdhandler_params *)param;
+
+	gk20a_dbg_info("");
+
+	if (msg->msg.clk.msg_type != NV_PMU_CLK_MSG_ID_RPC) {
+		gk20a_err(dev_from_gk20a(g),
+			  "unsupported msg for VFE LOAD RPC %x",
+			  msg->msg.clk.msg_type);
+		return;
+	}
+
+	if (phandlerparams->prpccall->b_supported)
+		phandlerparams->success = 1;
+}
+
+u32 clk_pmu_vin_load(struct gk20a *g)
+{
+	struct pmu_cmd cmd;
+	struct pmu_msg msg;
+	struct pmu_payload payload = { {0} };
+	u32 status;
+	u32 seqdesc;
+	struct nv_pmu_clk_rpc rpccall = {0};
+	struct clkrpc_pmucmdhandler_params handler = {0};
+	struct nv_pmu_clk_load *clkload;
+
+	rpccall.function = NV_PMU_CLK_RPC_ID_LOAD;
+	clkload = &rpccall.params.clk_load;
+	clkload->feature = NV_NV_PMU_CLK_LOAD_FEATURE_VIN;
+	clkload->action_mask = NV_NV_PMU_CLK_LOAD_ACTION_MASK_VIN_HW_CAL_PROGRAM_YES << 4;
+
+	cmd.hdr.unit_id = PMU_UNIT_CLK;
+	cmd.hdr.size =  (u32)sizeof(struct nv_pmu_clk_cmd) +
+			(u32)sizeof(struct pmu_hdr);
+
+	cmd.cmd.clk.cmd_type = NV_PMU_CLK_CMD_ID_RPC;
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)&rpccall;
+	payload.in.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_CLK_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)&rpccall;
+	payload.out.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_CLK_MSG_RPC_ALLOC_OFFSET;
+
+	handler.prpccall = &rpccall;
+	handler.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			clkrpc_pmucmdhandler, (void *)&handler,
+			&seqdesc, ~0);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post clk RPC cmd %x",
+			cmd.cmd.clk.cmd_type);
+		goto done;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handler.success, 1);
+
+	if (handler.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "rpc call to load vin cal failed");
+		status = -EINVAL;
+	}
+
+done:
+	return status;
+}
+
+u32 clk_pmu_vf_inject(struct gk20a *g)
+{
+	struct pmu_cmd cmd;
+	struct pmu_msg msg;
+	struct pmu_payload payload = { {0} };
+	u32 status;
+	u32 seqdesc;
+	struct nv_pmu_clk_rpc rpccall = {0};
+	struct clkrpc_pmucmdhandler_params handler = {0};
+	struct nv_pmu_clk_vf_change_inject *vfchange;
+
+	rpccall.function = NV_PMU_CLK_RPC_ID_CLK_VF_CHANGE_INJECT;
+	vfchange = &rpccall.params.clk_vf_change_inject;
+	vfchange->flags = 0;
+	vfchange->clk_list.num_domains = 3;
+	vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+	vfchange->clk_list.clk_domains[0].clk_freq_khz = 2581 * 1000;
+	vfchange->clk_list.clk_domains[0].clk_flags = 0;
+	vfchange->clk_list.clk_domains[0].current_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FFR;
+	vfchange->clk_list.clk_domains[0].target_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FR;
+	vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBAR2CLK;
+	vfchange->clk_list.clk_domains[1].clk_freq_khz = 2505 * 1000;
+	vfchange->clk_list.clk_domains[1].clk_flags = 0;
+	vfchange->clk_list.clk_domains[1].current_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FFR;
+	vfchange->clk_list.clk_domains[1].target_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FR;
+	vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYS2CLK;
+	vfchange->clk_list.clk_domains[2].clk_freq_khz = 2328 * 1000;
+	vfchange->clk_list.clk_domains[2].clk_flags = 0;
+	vfchange->clk_list.clk_domains[2].current_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FFR;
+	vfchange->clk_list.clk_domains[2].target_regime_id =
+		CTRL_CLK_FLL_REGIME_ID_FR;
+	vfchange->volt_list.num_rails = 1;
+	vfchange->volt_list.rails[0].volt_domain = CTRL_VOLT_DOMAIN_LOGIC;
+	vfchange->volt_list.rails[0].voltage_uv = 825000;
+	vfchange->volt_list.rails[0].voltage_min_noise_unaware_uv = 825000;
+
+	cmd.hdr.unit_id = PMU_UNIT_CLK;
+	cmd.hdr.size =  (u32)sizeof(struct nv_pmu_clk_cmd) +
+			(u32)sizeof(struct pmu_hdr);
+
+	cmd.cmd.clk.cmd_type = NV_PMU_CLK_CMD_ID_RPC;
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)&rpccall;
+	payload.in.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_CLK_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)&rpccall;
+	payload.out.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_CLK_MSG_RPC_ALLOC_OFFSET;
+
+	handler.prpccall = &rpccall;
+	handler.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			clkrpc_pmucmdhandler, (void *)&handler,
+			&seqdesc, ~0);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "unable to post clk RPC cmd %x",
+			  cmd.cmd.clk.cmd_type);
+		goto done;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handler.success, 1);
+
+	if (handler.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "rpc call to inject clock failed");
+		status = -EINVAL;
+	}
+done:
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
new file mode 100644
index 00000000..d638424f
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -0,0 +1,86 @@
+/*
+ * general clock structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _CLK_H_
+#define _CLK_H_
+
+#include "clk_vin.h"
+#include "clk_fll.h"
+#include "clk_domain.h"
+#include "clk_prog.h"
+#include "clk_vf_point.h"
+#include "gk20a/gk20a.h"
+
+#define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP 0x10
+#define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_MASK 0x1F
+#define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SHIFT 0
+
+/* clock related defines for GPUs supporting clock control from pmu*/
+struct clk_pmupstate {
+	struct avfsvinobjs avfs_vinobjs;
+	struct avfsfllobjs avfs_fllobjs;
+	struct clk_domains clk_domainobjs;
+	struct clk_progs clk_progobjs;
+	struct clk_vf_points clk_vf_pointobjs;
+};
+
+struct clockentry {
+		u8 vbios_clk_domain;
+		u8 clk_which;
+		u8 perf_index;
+		u32 api_clk_domain;
+};
+
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_MAX_NUMCLKS         9
+
+struct vbios_clock_domain {
+	u8 clock_type;
+	u8 num_domains;
+	struct clockentry clock_entry[NV_PERF_HEADER_4X_CLOCKS_DOMAINS_MAX_NUMCLKS];
+};
+
+struct vbios_clocks_table_1x_hal_clock_entry {
+	enum nv_pmu_clk_clkwhich domain;
+	bool b_noise_aware_capable;
+};
+
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_GPC2CLK           0
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_XBAR2CLK          1
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_DRAMCLK           2
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_SYS2CLK           3
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_HUB2CLK           4
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_MSDCLK            5
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_PWRCLK            6
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_DISPCLK           7
+#define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_4_NUMCLKS           8
+
+#define PERF_CLK_MCLK           0
+#define PERF_CLK_DISPCLK        1
+#define PERF_CLK_GPC2CLK        2
+#define PERF_CLK_HOSTCLK        3
+#define PERF_CLK_LTC2CLK        4
+#define PERF_CLK_SYS2CLK        5
+#define PERF_CLK_HUB2CLK        6
+#define PERF_CLK_LEGCLK         7
+#define PERF_CLK_MSDCLK         8
+#define PERF_CLK_XCLK           9
+#define PERF_CLK_PWRCLK         10
+#define PERF_CLK_XBAR2CLK       11
+#define PERF_CLK_PCIEGENCLK     12
+#define PERF_CLK_NUM            13
+
+u32 clk_pmu_vf_inject(struct gk20a *g);
+u32 clk_pmu_vin_load(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c
new file mode 100644
index 00000000..dc485e6b
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_domain.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_fll.h"
+#include "clk_domain.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static struct clk_domain *construct_clk_domain(struct gk20a *g, void *pargs);
+
+static u32 devinit_get_clocks_table(struct gk20a *g,
+	struct clk_domains *pdomainobjs);
+
+static u32 clk_domain_pmudatainit_super(struct gk20a *g, struct boardobj
+	*board_obj_ptr,	struct nv_pmu_boardobj *ppmudata);
+
+const struct vbios_clocks_table_1x_hal_clock_entry vbiosclktbl1xhalentry[] = {
+	{ clkwhich_gpc2clk,    true,  },
+	{ clkwhich_xbar2clk,   true,  },
+	{ clkwhich_mclk,       false, },
+	{ clkwhich_sys2clk,    true,  },
+	{ clkwhich_hub2clk,    false, },
+	{ clkwhich_nvdclk,     false, },
+	{ clkwhich_pwrclk,     false, },
+	{ clkwhich_dispclk,    false, },
+	{ clkwhich_pciegenclk, false, }
+};
+
+static u32 clktranslatehalmumsettoapinumset(u32 clkhaldomains)
+{
+	u32   clkapidomains = 0;
+
+	if (clkhaldomains & BIT(clkwhich_gpc2clk))
+		clkapidomains |= CTRL_CLK_DOMAIN_GPC2CLK;
+	if (clkhaldomains & BIT(clkwhich_xbar2clk))
+		clkapidomains |= CTRL_CLK_DOMAIN_XBAR2CLK;
+	if (clkhaldomains & BIT(clkwhich_sys2clk))
+		clkapidomains |= CTRL_CLK_DOMAIN_SYS2CLK;
+	if (clkhaldomains & BIT(clkwhich_hub2clk))
+		clkapidomains |= CTRL_CLK_DOMAIN_HUB2CLK;
+	if (clkhaldomains & BIT(clkwhich_pwrclk))
+		clkapidomains |= CTRL_CLK_DOMAIN_PWRCLK;
+	if (clkhaldomains & BIT(clkwhich_pciegenclk))
+		clkapidomains |= CTRL_CLK_DOMAIN_PCIEGENCLK;
+	if (clkhaldomains & BIT(clkwhich_mclk))
+		clkapidomains |= CTRL_CLK_DOMAIN_MCLK;
+	if (clkhaldomains & BIT(clkwhich_nvdclk))
+		clkapidomains |= CTRL_CLK_DOMAIN_NVDCLK;
+	if (clkhaldomains & BIT(clkwhich_dispclk))
+		clkapidomains |= CTRL_CLK_DOMAIN_DISPCLK;
+
+	return clkapidomains;
+}
+
+static u32 _clk_domains_pmudatainit_3x(struct gk20a *g,
+				       struct boardobjgrp *pboardobjgrp,
+				       struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_clk_clk_domain_boardobjgrp_set_header *pset =
+		(struct nv_pmu_clk_clk_domain_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct clk_domains *pdomains = (struct clk_domains *)pboardobjgrp;
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for clk domain 0x%x",
+			  status);
+		goto done;
+	}
+
+	pset->vbios_domains = pdomains->vbios_domains;
+	pset->cntr_sampling_periodms = pdomains->cntr_sampling_periodms;
+	pset->b_override_o_v_o_c = false;
+	pset->b_debug_mode = false;
+	pset->b_enforce_vf_monotonicity = pdomains->b_enforce_vf_monotonicity;
+	pset->volt_rails_max = 2;
+	status = boardobjgrpmask_export(
+				&pdomains->master_domains_mask.super,
+				pdomains->master_domains_mask.super.bitcount,
+				&pset->master_domains_mask.super);
+
+	memcpy(&pset->deltas, &pdomains->deltas,
+		(sizeof(struct ctrl_clk_clk_delta)));
+
+done:
+	return status;
+}
+
+static u32 _clk_domains_pmudata_instget(struct gk20a *g,
+					struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+					struct nv_pmu_boardobj **ppboardobjpmudata,
+					u8 idx)
+{
+	struct nv_pmu_clk_clk_domain_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_clk_clk_domain_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+u32 clk_domain_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct clk_domains *pclkdomainobjs;
+	struct clk_domain *pdomain;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->clk_pmu.clk_domainobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for clk domain, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->clk_pmu.clk_domainobjs.super.super;
+	pclkdomainobjs = &(g->clk_pmu.clk_domainobjs);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_DOMAIN);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_domain, CLK_DOMAIN);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit  = _clk_domains_pmudatainit_3x;
+	pboardobjgrp->pmudatainstget  = _clk_domains_pmudata_instget;
+
+	/* Initialize mask to zero.*/
+	boardobjgrpmask_e32_init(&pclkdomainobjs->prog_domains_mask, NULL);
+	boardobjgrpmask_e32_init(&pclkdomainobjs->master_domains_mask, NULL);
+	pclkdomainobjs->b_enforce_vf_monotonicity = true;
+
+	memset(&pclkdomainobjs->ordered_noise_aware_list, 0,
+		sizeof(pclkdomainobjs->ordered_noise_aware_list));
+
+	memset(&pclkdomainobjs->ordered_noise_unaware_list, 0,
+		sizeof(pclkdomainobjs->ordered_noise_unaware_list));
+
+	memset(&pclkdomainobjs->deltas, 0,
+		sizeof(struct ctrl_clk_clk_delta));
+
+	status = devinit_get_clocks_table(g, pclkdomainobjs);
+	if (status)
+		goto done;
+
+	BOARDOBJGRP_FOR_EACH(&(pclkdomainobjs->super.super),
+			     struct clk_domain *, pdomain, i) {
+		if (pdomain->super.implements(g, &pdomain->super,
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_PROG)) {
+			status = boardobjgrpmask_bitset(
+				&pclkdomainobjs->prog_domains_mask.super, i);
+			if (status)
+				goto done;
+		}
+
+		if (pdomain->super.implements(g, &pdomain->super,
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)) {
+			status = boardobjgrpmask_bitset(
+				&pclkdomainobjs->master_domains_mask.super, i);
+			if (status)
+				goto done;
+		}
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 clk_domain_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.clk_domainobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 devinit_get_clocks_table(struct gk20a *g,
+				    struct clk_domains *pclkdomainobjs)
+{
+	u32 status = 0;
+	u8 *clocks_table_ptr = NULL;
+	struct vbios_clocks_table_1x_header clocks_table_header = { 0 };
+	struct vbios_clocks_table_1x_entry clocks_table_entry = { 0 };
+	u8 *clocks_tbl_entry_ptr = NULL;
+	u32 index = 0;
+	struct clk_domain *pclkdomain_dev;
+	union {
+		struct boardobj boardobj;
+		struct clk_domain clk_domain;
+		struct clk_domain_3x v3x;
+		struct clk_domain_3x_fixed v3x_fixed;
+		struct clk_domain_3x_prog v3x_prog;
+		struct clk_domain_3x_master v3x_master;
+		struct clk_domain_3x_slave v3x_slave;
+	} clk_domain_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		clocks_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.clock_token, CLOCKS_TABLE);
+		if (clocks_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&clocks_table_header, clocks_table_ptr,
+			VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07);
+	if (clocks_table_header.header_size <
+			VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (clocks_table_header.entry_size <
+	    VBIOS_CLOCKS_TABLE_1X_ENTRY_SIZE_09) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	pclkdomainobjs->cntr_sampling_periodms =
+		(u16)clocks_table_header.cntr_sampling_periodms;
+
+	/* Read table entries*/
+	clocks_tbl_entry_ptr = clocks_table_ptr +
+		VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07;
+	for (index = 0; index < clocks_table_header.entry_count; index++) {
+		memcpy(&clocks_table_entry, clocks_tbl_entry_ptr,
+				clocks_table_header.entry_size);
+		clk_domain_data.clk_domain.domain =
+				vbiosclktbl1xhalentry[index].domain;
+		clk_domain_data.clk_domain.api_domain =
+				clktranslatehalmumsettoapinumset(
+					BIT(clk_domain_data.clk_domain.domain));
+		clk_domain_data.v3x.b_noise_aware_capable =
+			vbiosclktbl1xhalentry[index].b_noise_aware_capable;
+
+		switch (BIOS_GET_FIELD(clocks_table_entry.flags0,
+				NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE)) {
+		case  NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_FIXED:
+			clk_domain_data.boardobj.type =
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED;
+			clk_domain_data.v3x_fixed.freq_mhz = (u16)BIOS_GET_FIELD(
+				clocks_table_entry.param1,
+				NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_FIXED_FREQUENCY_MHZ);
+			break;
+
+		case  NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_MASTER:
+			clk_domain_data.boardobj.type =
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER;
+			clk_domain_data.v3x_prog.clk_prog_idx_first =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param0,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST));
+			clk_domain_data.v3x_prog.clk_prog_idx_last =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param0,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST));
+			clk_domain_data.v3x_prog.noise_unaware_ordering_index =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX));
+
+			if (clk_domain_data.v3x.b_noise_aware_capable) {
+				clk_domain_data.v3x_prog.noise_aware_ordering_index =
+					(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+					     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_AWARE_ORDERING_IDX));
+				clk_domain_data.v3x_prog.b_force_noise_unaware_ordering =
+					(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+					     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING));
+			} else {
+				clk_domain_data.v3x_prog.noise_aware_ordering_index =
+					CTRL_CLK_CLK_DOMAIN_3X_PROG_ORDERING_INDEX_INVALID;
+				clk_domain_data.v3x_prog.b_force_noise_unaware_ordering = false;
+			}
+			clk_domain_data.v3x_prog.factory_offset_khz = 0;
+
+			clk_domain_data.v3x_prog.freq_delta_min_mhz =
+				(u16)(BIOS_GET_FIELD(clocks_table_entry.param1,
+				      NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MIN_MHZ));
+
+			clk_domain_data.v3x_prog.freq_delta_max_mhz =
+				(u16)(BIOS_GET_FIELD(clocks_table_entry.param1,
+				      NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MAX_MHZ));
+			break;
+
+		case  NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_SLAVE:
+			clk_domain_data.boardobj.type =
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE;
+			clk_domain_data.v3x_prog.clk_prog_idx_first =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param0,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST));
+			clk_domain_data.v3x_prog.clk_prog_idx_last =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param0,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST));
+			clk_domain_data.v3x_prog.noise_unaware_ordering_index =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX));
+
+			if (clk_domain_data.v3x.b_noise_aware_capable) {
+				clk_domain_data.v3x_prog.noise_aware_ordering_index =
+					(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+					     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_AWARE_ORDERING_IDX));
+				clk_domain_data.v3x_prog.b_force_noise_unaware_ordering =
+					(u8)(BIOS_GET_FIELD(clocks_table_entry.param2,
+					     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING));
+			} else {
+				clk_domain_data.v3x_prog.noise_aware_ordering_index =
+					CTRL_CLK_CLK_DOMAIN_3X_PROG_ORDERING_INDEX_INVALID;
+				clk_domain_data.v3x_prog.b_force_noise_unaware_ordering = false;
+			}
+			clk_domain_data.v3x_prog.factory_offset_khz = 0;
+			clk_domain_data.v3x_prog.freq_delta_min_mhz = 0;
+			clk_domain_data.v3x_prog.freq_delta_max_mhz = 0;
+			clk_domain_data.v3x_slave.master_idx =
+				(u8)(BIOS_GET_FIELD(clocks_table_entry.param1,
+				     NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_SLAVE_MASTER_DOMAIN));
+			break;
+
+		default:
+			gk20a_err(dev_from_gk20a(g),
+				  "error reading clock domain entry %d", index);
+			status = -EINVAL;
+			goto done;
+
+		}
+		pclkdomain_dev = construct_clk_domain(g,
+				(void *)&clk_domain_data);
+		if (pclkdomain_dev == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				  "unable to construct clock domain boardobj for %d",
+				  index);
+			status = -EINVAL;
+			goto done;
+		}
+		status = boardobjgrp_objinsert(&pclkdomainobjs->super.super,
+				(struct boardobj *)pclkdomain_dev, index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to insert clock domain boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+		clocks_tbl_entry_ptr += clocks_table_header.entry_size;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+static u32 clkdomainclkproglink_not_supported(struct gk20a *g,
+					      struct clk_pmupstate *pclk,
+					      struct clk_domain *pdomain)
+{
+	gk20a_dbg_info("");
+	return -EINVAL;
+}
+
+static u32 clk_domain_construct_super(struct gk20a *g,
+				      struct boardobj **ppboardobj,
+				      u16 size, void *pargs)
+{
+	struct clk_domain *pdomain;
+	struct clk_domain *ptmpdomain = (struct clk_domain *)pargs;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj,
+		size, pargs);
+
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain *)*ppboardobj;
+
+	pdomain->super.pmudatainit =
+			clk_domain_pmudatainit_super;
+
+	pdomain->clkdomainclkproglink =
+			clkdomainclkproglink_not_supported;
+
+	pdomain->api_domain = ptmpdomain->api_domain;
+	pdomain->domain = ptmpdomain->domain;
+	pdomain->perf_domain_grp_idx =
+		ptmpdomain->perf_domain_grp_idx;
+
+	return status;
+}
+
+static u32 _clk_domain_pmudatainit_3x(struct gk20a *g,
+				      struct boardobj *board_obj_ptr,
+				      struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain_3x *pclk_domain_3x;
+	struct nv_pmu_clk_clk_domain_3x_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = clk_domain_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain_3x = (struct clk_domain_3x *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_3x_boardobj_set *)ppmudata;
+
+	pset->b_noise_aware_capable = pclk_domain_3x->b_noise_aware_capable;
+
+	return status;
+}
+
+static u32 clk_domain_construct_3x(struct gk20a *g,
+				   struct boardobj **ppboardobj,
+				   u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_domain_3x *pdomain;
+	struct clk_domain_3x *ptmpdomain =
+			(struct clk_domain_3x *)pargs;
+	u32 status = 0;
+
+	ptmpobj->type_mask = BIT(CTRL_CLK_CLK_DOMAIN_TYPE_3X);
+	status = clk_domain_construct_super(g, ppboardobj,
+					size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain_3x *)*ppboardobj;
+
+	pdomain->super.super.pmudatainit =
+			_clk_domain_pmudatainit_3x;
+
+	pdomain->b_noise_aware_capable = ptmpdomain->b_noise_aware_capable;
+
+	return status;
+}
+
+static u32 clkdomainclkproglink_3x_prog(struct gk20a *g,
+					struct clk_pmupstate *pclk,
+					struct clk_domain *pdomain)
+{
+	u32 status = 0;
+	struct clk_domain_3x_prog *p3xprog =
+		(struct clk_domain_3x_prog *)pdomain;
+	struct clk_prog *pprog = NULL;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	for (i = p3xprog->clk_prog_idx_first;
+	     i <= p3xprog->clk_prog_idx_last;
+	     i++) {
+		pprog = CLK_CLK_PROG_GET(pclk, i);
+		if (pprog == NULL)
+			status = -EINVAL;
+	}
+	return status;
+}
+
+static u32 _clk_domain_pmudatainit_3x_prog(struct gk20a *g,
+					   struct boardobj *board_obj_ptr,
+					   struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain_3x_prog *pclk_domain_3x_prog;
+	struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set *pset;
+	struct clk_domains *pdomains = &(g->clk_pmu.clk_domainobjs);
+
+	gk20a_dbg_info("");
+
+	status = _clk_domain_pmudatainit_3x(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain_3x_prog = (struct clk_domain_3x_prog *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_3x_prog_boardobj_set *)
+		ppmudata;
+
+	pset->clk_prog_idx_first = pclk_domain_3x_prog->clk_prog_idx_first;
+	pset->clk_prog_idx_last = pclk_domain_3x_prog->clk_prog_idx_last;
+	pset->noise_unaware_ordering_index =
+		pclk_domain_3x_prog->noise_unaware_ordering_index;
+	pset->noise_aware_ordering_index =
+		pclk_domain_3x_prog->noise_aware_ordering_index;
+	pset->b_force_noise_unaware_ordering =
+		pclk_domain_3x_prog->b_force_noise_unaware_ordering;
+	pset->factory_offset_khz = pclk_domain_3x_prog->factory_offset_khz;
+	pset->freq_delta_min_mhz = pclk_domain_3x_prog->freq_delta_min_mhz;
+	pset->freq_delta_max_mhz = pclk_domain_3x_prog->freq_delta_max_mhz;
+	memcpy(&pset->deltas, &pdomains->deltas,
+		(sizeof(struct ctrl_clk_clk_delta)));
+
+	return status;
+}
+
+static u32 clk_domain_construct_3x_prog(struct gk20a *g,
+					struct boardobj **ppboardobj,
+					u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_domain_3x_prog *pdomain;
+	struct clk_domain_3x_prog *ptmpdomain =
+			(struct clk_domain_3x_prog *)pargs;
+	u32 status = 0;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_DOMAIN_TYPE_3X_PROG);
+	status = clk_domain_construct_3x(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain_3x_prog *)*ppboardobj;
+
+	pdomain->super.super.super.pmudatainit =
+			_clk_domain_pmudatainit_3x_prog;
+
+	pdomain->super.super.clkdomainclkproglink =
+				clkdomainclkproglink_3x_prog;
+
+	pdomain->clk_prog_idx_first = ptmpdomain->clk_prog_idx_first;
+	pdomain->clk_prog_idx_last = ptmpdomain->clk_prog_idx_last;
+	pdomain->noise_unaware_ordering_index =
+		ptmpdomain->noise_unaware_ordering_index;
+	pdomain->noise_aware_ordering_index =
+		ptmpdomain->noise_aware_ordering_index;
+	pdomain->b_force_noise_unaware_ordering =
+		ptmpdomain->b_force_noise_unaware_ordering;
+	pdomain->factory_offset_khz = ptmpdomain->factory_offset_khz;
+	pdomain->freq_delta_min_mhz = ptmpdomain->freq_delta_min_mhz;
+	pdomain->freq_delta_max_mhz = ptmpdomain->freq_delta_max_mhz;
+
+	return status;
+}
+
+static u32 _clk_domain_pmudatainit_3x_slave(struct gk20a *g,
+					    struct boardobj *board_obj_ptr,
+					    struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain_3x_slave *pclk_domain_3x_slave;
+	struct nv_pmu_clk_clk_domain_3x_slave_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_domain_pmudatainit_3x_prog(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain_3x_slave = (struct clk_domain_3x_slave *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_3x_slave_boardobj_set *)
+		ppmudata;
+
+	pset->master_idx = pclk_domain_3x_slave->master_idx;
+
+	return status;
+}
+
+static u32 clk_domain_construct_3x_slave(struct gk20a *g,
+					 struct boardobj **ppboardobj,
+					 u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_domain_3x_slave *pdomain;
+	struct clk_domain_3x_slave *ptmpdomain =
+			(struct clk_domain_3x_slave *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE);
+	status = clk_domain_construct_3x_prog(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain_3x_slave *)*ppboardobj;
+
+	pdomain->super.super.super.super.pmudatainit =
+			_clk_domain_pmudatainit_3x_slave;
+
+	pdomain->master_idx = ptmpdomain->master_idx;
+
+	return status;
+}
+
+static u32 clkdomainclkproglink_3x_master(struct gk20a *g,
+					  struct clk_pmupstate *pclk,
+					  struct clk_domain *pdomain)
+{
+	u32 status = 0;
+	struct clk_domain_3x_master *p3xmaster  =
+		(struct clk_domain_3x_master *)pdomain;
+	struct clk_prog *pprog = NULL;
+	struct clk_prog_1x_master *pprog1xmaster = NULL;
+	u16 freq_max_last_mhz = 0;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	status = clkdomainclkproglink_3x_prog(g, pclk, pdomain);
+	if (status)
+		goto done;
+
+	/* Iterate over the set of CLK_PROGs pointed at by this domain.*/
+	for (i = p3xmaster->super.clk_prog_idx_first;
+	     i <= p3xmaster->super.clk_prog_idx_last;
+	     i++) {
+		pprog = CLK_CLK_PROG_GET(pclk, i);
+
+		/* MASTER CLK_DOMAINs must point to MASTER CLK_PROGs.*/
+		if (!pprog->super.implements(g, &pprog->super,
+				CTRL_CLK_CLK_PROG_TYPE_1X_MASTER)) {
+			status = -EINVAL;
+			goto done;
+		}
+
+		pprog1xmaster = (struct clk_prog_1x_master *)pprog;
+		status = pprog1xmaster->vfflatten(g, pclk, pprog1xmaster,
+			BOARDOBJ_GET_IDX(p3xmaster), &freq_max_last_mhz);
+		if (status)
+			goto done;
+	}
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
+
+static u32 _clk_domain_pmudatainit_3x_master(struct gk20a *g,
+					     struct boardobj *board_obj_ptr,
+					     struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain_3x_master *pclk_domain_3x_master;
+	struct nv_pmu_clk_clk_domain_3x_master_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_domain_pmudatainit_3x_prog(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain_3x_master = (struct clk_domain_3x_master *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_3x_master_boardobj_set *)
+		ppmudata;
+
+	pset->slave_idxs_mask = pclk_domain_3x_master->slave_idxs_mask;
+
+	return status;
+}
+
+static u32 clk_domain_construct_3x_master(struct gk20a *g,
+					  struct boardobj **ppboardobj,
+					  u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_domain_3x_master *pdomain;
+	struct clk_domain_3x_master *ptmpdomain =
+			(struct clk_domain_3x_master *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER);
+	status = clk_domain_construct_3x_prog(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain_3x_master *)*ppboardobj;
+
+	pdomain->super.super.super.super.pmudatainit =
+			_clk_domain_pmudatainit_3x_master;
+	pdomain->super.super.super.clkdomainclkproglink =
+				clkdomainclkproglink_3x_master;
+
+	pdomain->slave_idxs_mask = ptmpdomain->slave_idxs_mask;
+
+	return status;
+}
+
+static u32 clkdomainclkproglink_fixed(struct gk20a *g,
+				      struct clk_pmupstate *pclk,
+				      struct clk_domain *pdomain)
+{
+	gk20a_dbg_info("");
+	return 0;
+}
+
+static u32 _clk_domain_pmudatainit_3x_fixed(struct gk20a *g,
+					    struct boardobj *board_obj_ptr,
+					    struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain_3x_fixed *pclk_domain_3x_fixed;
+	struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_domain_pmudatainit_3x(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain_3x_fixed = (struct clk_domain_3x_fixed *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_3x_fixed_boardobj_set *)
+		ppmudata;
+
+	pset->freq_mhz = pclk_domain_3x_fixed->freq_mhz;
+
+	return status;
+}
+
+static u32 clk_domain_construct_3x_fixed(struct gk20a *g,
+					 struct boardobj **ppboardobj,
+					 u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_domain_3x_fixed *pdomain;
+	struct clk_domain_3x_fixed *ptmpdomain =
+			(struct clk_domain_3x_fixed *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED);
+	status = clk_domain_construct_3x(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pdomain = (struct clk_domain_3x_fixed *)*ppboardobj;
+
+	pdomain->super.super.super.pmudatainit =
+			_clk_domain_pmudatainit_3x_fixed;
+
+	pdomain->super.super.clkdomainclkproglink =
+			clkdomainclkproglink_fixed;
+
+	pdomain->freq_mhz = ptmpdomain->freq_mhz;
+
+	return status;
+}
+
+static struct clk_domain *construct_clk_domain(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info(" %d", BOARDOBJ_GET_TYPE(pargs));
+	switch (BOARDOBJ_GET_TYPE(pargs)) {
+	case CTRL_CLK_CLK_DOMAIN_TYPE_3X_FIXED:
+		status = clk_domain_construct_3x_fixed(g, &board_obj_ptr,
+			sizeof(struct clk_domain_3x_fixed), pargs);
+		break;
+
+	case CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER:
+		status = clk_domain_construct_3x_master(g, &board_obj_ptr,
+			sizeof(struct clk_domain_3x_master), pargs);
+		break;
+
+	case CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE:
+		status = clk_domain_construct_3x_slave(g, &board_obj_ptr,
+			sizeof(struct clk_domain_3x_slave), pargs);
+		break;
+
+	default:
+		return NULL;
+	}
+
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info(" Done");
+
+	return (struct clk_domain *)board_obj_ptr;
+}
+
+static u32 clk_domain_pmudatainit_super(struct gk20a *g,
+					struct boardobj *board_obj_ptr,
+					struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_domain *pclk_domain;
+	struct nv_pmu_clk_clk_domain_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_domain = (struct clk_domain *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_domain_boardobj_set *)ppmudata;
+
+	pset->domain = pclk_domain->domain;
+	pset->api_domain = pclk_domain->api_domain;
+	pset->perf_domain_grp_idx = pclk_domain->perf_domain_grp_idx;
+
+	return status;
+}
+
+u32 clk_domain_clk_prog_link(struct gk20a *g, struct clk_pmupstate *pclk)
+{
+	u32 status = 0;
+	struct clk_domain *pdomain;
+	u8 i;
+
+	/* Iterate over all CLK_DOMAINs and flatten their VF curves.*/
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+		status = pdomain->clkdomainclkproglink(g, pclk, pdomain);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				  "error flattening VF for CLK DOMAIN - 0x%x",
+				  pdomain->domain);
+			goto done;
+		}
+	}
+
+done:
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.h b/drivers/gpu/nvgpu/clk/clk_domain.h
new file mode 100644
index 00000000..94d612a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_domain.h
@@ -0,0 +1,94 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKDOMAIN_H_
+#define _CLKDOMAIN_H_
+
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "boardobj/boardobjgrpmask.h"
+
+struct clk_domains;
+struct clk_domain;
+
+/*data and function definition to talk to driver*/
+u32 clk_domain_sw_setup(struct gk20a *g);
+u32 clk_domain_pmu_setup(struct gk20a *g);
+typedef u32 clkproglink(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_domain *pdomain);
+struct clk_domains {
+	struct boardobjgrp_e32 super;
+	u8 n_num_entries;
+	u8 version;
+	bool b_enforce_vf_monotonicity;
+	u32 vbios_domains;
+	struct boardobjgrpmask_e32 prog_domains_mask;
+	struct boardobjgrpmask_e32 master_domains_mask;
+	u16 cntr_sampling_periodms;
+	struct ctrl_clk_clk_delta  deltas;
+
+	struct clk_domain *ordered_noise_aware_list[CTRL_BOARDOBJ_MAX_BOARD_OBJECTS];
+
+	struct clk_domain *ordered_noise_unaware_list[CTRL_BOARDOBJ_MAX_BOARD_OBJECTS];
+};
+
+struct clk_domain {
+	struct boardobj super;
+	u32 api_domain;
+	u32 part_mask;
+	u8 domain;
+	u8 perf_domain_index;
+	u8 perf_domain_grp_idx;
+	u8 ratio_domain;
+	u8 usage;
+	clkproglink *clkdomainclkproglink;
+};
+
+struct clk_domain_3x {
+	struct clk_domain super;
+	bool b_noise_aware_capable;
+};
+
+struct clk_domain_3x_fixed {
+	struct clk_domain_3x super;
+	u16  freq_mhz;
+};
+
+struct clk_domain_3x_prog {
+	struct clk_domain_3x super;
+	u8  clk_prog_idx_first;
+	u8  clk_prog_idx_last;
+	u8 noise_unaware_ordering_index;
+	u8 noise_aware_ordering_index;
+	bool b_force_noise_unaware_ordering;
+	int factory_offset_khz;
+	short freq_delta_min_mhz;
+	short freq_delta_max_mhz;
+	struct ctrl_clk_clk_delta deltas;
+};
+
+struct clk_domain_3x_master {
+	struct clk_domain_3x_prog super;
+	u32 slave_idxs_mask;
+};
+
+struct clk_domain_3x_slave {
+	struct clk_domain_3x_prog super;
+	u8 master_idx;
+};
+
+u32 clk_domain_clk_prog_link(struct gk20a *g, struct clk_pmupstate *pclk);
+
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_fll.c b/drivers/gpu/nvgpu/clk/clk_fll.c
new file mode 100644
index 00000000..0de857f5
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_fll.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_fll.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 devinit_get_fll_device_table(struct gk20a *g,
+				   struct avfsfllobjs *pfllobjs);
+static struct fll_device *construct_fll_device(struct gk20a *g,
+		void *pargs);
+static u32 fll_device_init_pmudata_super(struct gk20a *g,
+				    struct boardobj *board_obj_ptr,
+				    struct nv_pmu_boardobj *ppmudata);
+
+static u32 _clk_fll_devgrp_pmudatainit_super(struct gk20a *g,
+					       struct boardobjgrp *pboardobjgrp,
+					       struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_clk_clk_fll_device_boardobjgrp_set_header *pset =
+		(struct nv_pmu_clk_clk_fll_device_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct avfsfllobjs *pfll_objs = (struct avfsfllobjs *)
+		pboardobjgrp;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g), "failed to init fll pmuobjgrp");
+		return status;
+	}
+	pset->lut_num_entries = pfll_objs->lut_num_entries;
+	pset->lut_step_size_uv = pfll_objs->lut_step_size_uv;
+	pset->lut_min_voltage_uv = pfll_objs->lut_min_voltage_uv;
+	pset->max_min_freq_mhz = pfll_objs->max_min_freq_mhz;
+
+	status = boardobjgrpmask_export(
+		&pfll_objs->lut_prog_master_mask.super,
+		pfll_objs->lut_prog_master_mask.super.bitcount,
+		&pset->lut_prog_master_mask.super);
+
+	gk20a_dbg_info(" Done");
+	return status;
+}
+
+static u32 _clk_fll_devgrp_pmudata_instget(struct gk20a *g,
+					     struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+					     struct nv_pmu_boardobj **ppboardobjpmudata,
+					     u8 idx)
+{
+	struct nv_pmu_clk_clk_fll_device_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_clk_clk_fll_device_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _clk_fll_devgrp_pmustatus_instget(struct gk20a *g,
+					       void *pboardobjgrppmu,
+					       struct nv_pmu_boardobj_query **ppboardobjpmustatus,
+					       u8 idx)
+{
+	struct nv_pmu_clk_clk_fll_device_boardobj_grp_get_status *pgrp_get_status =
+		(struct nv_pmu_clk_clk_fll_device_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+u32 clk_fll_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct avfsfllobjs *pfllobjs;
+	struct fll_device *pfll;
+	struct fll_device *pfll_master;
+	struct fll_device *pfll_local;
+	u8 i;
+	u8 j;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->clk_pmu.avfs_fllobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+		"error creating boardobjgrp for fll, status - 0x%x", status);
+		goto done;
+	}
+	pfllobjs = &(g->clk_pmu.avfs_fllobjs);
+	pboardobjgrp = &(g->clk_pmu.avfs_fllobjs.super.super);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, FLL_DEVICE);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_fll_device, CLK_FLL_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit  = _clk_fll_devgrp_pmudatainit_super;
+	pboardobjgrp->pmudatainstget  = _clk_fll_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _clk_fll_devgrp_pmustatus_instget;
+	pfllobjs = (struct avfsfllobjs *)pboardobjgrp;
+	pfllobjs->lut_num_entries = CTRL_CLK_LUT_NUM_ENTRIES;
+	pfllobjs->lut_step_size_uv = CTRL_CLK_VIN_STEP_SIZE_UV;
+	pfllobjs->lut_min_voltage_uv = CTRL_CLK_LUT_MIN_VOLTAGE_UV;
+
+	/* Initialize lut prog master mask to zero.*/
+	boardobjgrpmask_e32_init(&pfllobjs->lut_prog_master_mask, NULL);
+
+	status = devinit_get_fll_device_table(g, pfllobjs);
+	if (status)
+		goto done;
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+				&g->clk_pmu.avfs_fllobjs.super.super,
+				clk, CLK, clk_fll_device, CLK_FLL_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+	BOARDOBJGRP_FOR_EACH(&(pfllobjs->super.super),
+			     struct fll_device *, pfll, i) {
+		pfll_master = NULL;
+		j = 0;
+		BOARDOBJGRP_ITERATOR(&(pfllobjs->super.super),
+				     struct fll_device *, pfll_local, j,
+				     &pfllobjs->lut_prog_master_mask.super) {
+			if (pfll_local->clk_domain == pfll->clk_domain) {
+				pfll_master = pfll_local;
+				break;
+			}
+		}
+
+		if (pfll_master == NULL) {
+			status = boardobjgrpmask_bitset(
+				&pfllobjs->lut_prog_master_mask.super,
+				BOARDOBJ_GET_IDX(pfll));
+			if (status) {
+				gk20a_err(dev_from_gk20a(g), "err setting lutprogmask");
+				goto done;
+			}
+			pfll_master = pfll;
+		}
+		status = pfll_master->lut_broadcast_slave_register(
+			g, pfllobjs, pfll_master, pfll);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g), "err setting lutslavemask");
+			goto done;
+		}
+	}
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 clk_fll_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.avfs_fllobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 devinit_get_fll_device_table(struct gk20a *g,
+				   struct avfsfllobjs *pfllobjs)
+{
+	u32 status = 0;
+	u8 *fll_table_ptr = NULL;
+	struct fll_descriptor_header fll_desc_table_header_sz = { 0 };
+	struct fll_descriptor_header_10 fll_desc_table_header = { 0 };
+	struct fll_descriptor_entry_10 fll_desc_table_entry = { 0 };
+	u8 *fll_tbl_entry_ptr = NULL;
+	u32 index = 0;
+	struct fll_device fll_dev_data;
+	struct fll_device *pfll_dev;
+	struct vin_device *pvin_dev;
+	u32 desctablesize;
+	u32 vbios_domain = NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP;
+	struct avfsvinobjs *pvinobjs = &g->clk_pmu.avfs_vinobjs;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		fll_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				  g->bios.clock_token, FLL_TABLE);
+		if (fll_table_ptr == NULL) {
+			status = -1;
+			goto done;
+		}
+	}
+
+	memcpy(&fll_desc_table_header_sz, fll_table_ptr,
+			sizeof(struct fll_descriptor_header));
+	if (fll_desc_table_header_sz.size >= FLL_DESCRIPTOR_HEADER_10_SIZE_6)
+		desctablesize = FLL_DESCRIPTOR_HEADER_10_SIZE_6;
+	else
+		desctablesize = FLL_DESCRIPTOR_HEADER_10_SIZE_4;
+
+	memcpy(&fll_desc_table_header, fll_table_ptr, desctablesize);
+
+	if (desctablesize == FLL_DESCRIPTOR_HEADER_10_SIZE_6)
+		pfllobjs->max_min_freq_mhz =
+			fll_desc_table_header.max_min_freq_mhz;
+	else
+		pfllobjs->max_min_freq_mhz = 0;
+
+	/* Read table entries*/
+	fll_tbl_entry_ptr = fll_table_ptr + desctablesize;
+	for (index = 0; index < fll_desc_table_header.entry_count; index++) {
+		u32 fll_id;
+
+		memcpy(&fll_desc_table_entry, fll_tbl_entry_ptr,
+				sizeof(struct fll_descriptor_entry_10));
+
+		if (fll_desc_table_entry.fll_device_type == CTRL_CLK_FLL_TYPE_DISABLED)
+			continue;
+
+		fll_id = fll_desc_table_entry.fll_device_id;
+
+		pvin_dev = CLK_GET_VIN_DEVICE(pvinobjs,
+				(u8)fll_desc_table_entry.vin_idx_logic);
+		if (pvin_dev == NULL)
+			return -EINVAL;
+
+		pvin_dev->flls_shared_mask |= BIT(fll_id);
+
+		pvin_dev = CLK_GET_VIN_DEVICE(pvinobjs,
+				(u8)fll_desc_table_entry.vin_idx_sram);
+		if (pvin_dev == NULL)
+			return -EINVAL;
+
+		pvin_dev->flls_shared_mask |= BIT(fll_id);
+
+		fll_dev_data.super.type =
+			(u8)fll_desc_table_entry.fll_device_type;
+		fll_dev_data.id = (u8)fll_desc_table_entry.fll_device_id;
+		fll_dev_data.mdiv = (u8)BIOS_GET_FIELD(
+			fll_desc_table_entry.fll_params,
+			NV_FLL_DESC_FLL_PARAMS_MDIV);
+		fll_dev_data.input_freq_mhz =
+			(u16)fll_desc_table_entry.ref_freq_mhz;
+		fll_dev_data.min_freq_vfe_idx =
+			(u8)fll_desc_table_entry.min_freq_vfe_idx;
+		fll_dev_data.freq_ctrl_idx = CTRL_BOARDOBJ_IDX_INVALID;
+
+		vbios_domain = (u32)(fll_desc_table_entry.clk_domain &
+					NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_MASK);
+		if (vbios_domain == 0)
+			fll_dev_data.clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		else if (vbios_domain == 1)
+			fll_dev_data.clk_domain = CTRL_CLK_DOMAIN_XBAR2CLK;
+		else if (vbios_domain == 3)
+			fll_dev_data.clk_domain = CTRL_CLK_DOMAIN_SYS2CLK;
+		else
+			continue;
+
+		fll_dev_data.rail_idx_for_lut = 0;
+
+		fll_dev_data.vin_idx_logic =
+			(u8)fll_desc_table_entry.vin_idx_logic;
+		fll_dev_data.vin_idx_sram =
+			(u8)fll_desc_table_entry.vin_idx_sram;
+		fll_dev_data.lut_device.vselect_mode =
+			(u8)BIOS_GET_FIELD(fll_desc_table_entry.lut_params,
+					   NV_FLL_DESC_LUT_PARAMS_VSELECT);
+		fll_dev_data.lut_device.hysteresis_threshold =
+			(u8)BIOS_GET_FIELD(fll_desc_table_entry.lut_params,
+					   NV_FLL_DESC_LUT_PARAMS_HYSTERISIS_THRESHOLD);
+		fll_dev_data.regime_desc.regime_id =
+			CTRL_CLK_FLL_REGIME_ID_FFR;
+		fll_dev_data.regime_desc.fixed_freq_regime_limit_mhz =
+			(u16)fll_desc_table_entry.ffr_cutoff_freq_mhz;
+
+		/*construct fll device*/
+		pfll_dev = construct_fll_device(g, (void *)&fll_dev_data);
+
+		status = boardobjgrp_objinsert(&pfllobjs->super.super,
+				(struct boardobj *)pfll_dev, index);
+
+		fll_tbl_entry_ptr += fll_desc_table_header.entry_size;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+static u32 lutbroadcastslaveregister(struct gk20a *g,
+				     struct avfsfllobjs *pfllobjs,
+				     struct fll_device *pfll,
+				     struct fll_device *pfll_slave)
+{
+	if (pfll->clk_domain != pfll_slave->clk_domain)
+		return -EINVAL;
+
+	return boardobjgrpmask_bitset(&pfll->
+		lut_prog_broadcast_slave_mask.super,
+		BOARDOBJ_GET_IDX(pfll_slave));
+}
+
+static struct fll_device *construct_fll_device(struct gk20a *g,
+		void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	struct fll_device *pfll_dev;
+	struct fll_device *board_obj_fll_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		sizeof(struct fll_device), pargs);
+	if (status)
+		return NULL;
+
+	pfll_dev = (struct fll_device *)pargs;
+	board_obj_fll_ptr = (struct fll_device *)board_obj_ptr;
+	board_obj_ptr->pmudatainit  = fll_device_init_pmudata_super;
+	board_obj_fll_ptr->lut_broadcast_slave_register =
+		lutbroadcastslaveregister;
+	board_obj_fll_ptr->id = pfll_dev->id;
+	board_obj_fll_ptr->mdiv = pfll_dev->mdiv;
+	board_obj_fll_ptr->rail_idx_for_lut = pfll_dev->rail_idx_for_lut;
+	board_obj_fll_ptr->input_freq_mhz = pfll_dev->input_freq_mhz;
+	board_obj_fll_ptr->clk_domain = pfll_dev->clk_domain;
+	board_obj_fll_ptr->vin_idx_logic = pfll_dev->vin_idx_logic;
+	board_obj_fll_ptr->vin_idx_sram = pfll_dev->vin_idx_sram;
+	board_obj_fll_ptr->min_freq_vfe_idx =
+		pfll_dev->min_freq_vfe_idx;
+	board_obj_fll_ptr->freq_ctrl_idx = pfll_dev->freq_ctrl_idx;
+	memcpy(&board_obj_fll_ptr->lut_device, &pfll_dev->lut_device,
+		sizeof(struct nv_pmu_clk_lut_device_desc));
+	memcpy(&board_obj_fll_ptr->regime_desc, &pfll_dev->regime_desc,
+		sizeof(struct nv_pmu_clk_regime_desc));
+	boardobjgrpmask_e32_init(
+		&board_obj_fll_ptr->lut_prog_broadcast_slave_mask, NULL);
+
+	gk20a_dbg_info(" Done");
+
+	return (struct fll_device *)board_obj_ptr;
+}
+
+static u32 fll_device_init_pmudata_super(struct gk20a *g,
+				    struct boardobj *board_obj_ptr,
+				    struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct fll_device *pfll_dev;
+	struct nv_pmu_clk_clk_fll_device_boardobj_set *perf_pmu_data;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pfll_dev = (struct fll_device *)board_obj_ptr;
+	perf_pmu_data = (struct nv_pmu_clk_clk_fll_device_boardobj_set *)
+		ppmudata;
+
+	perf_pmu_data->id = pfll_dev->id;
+	perf_pmu_data->mdiv = pfll_dev->mdiv;
+	perf_pmu_data->rail_idx_for_lut = pfll_dev->rail_idx_for_lut;
+	perf_pmu_data->input_freq_mhz = pfll_dev->input_freq_mhz;
+	perf_pmu_data->vin_idx_logic = pfll_dev->vin_idx_logic;
+	perf_pmu_data->vin_idx_sram = pfll_dev->vin_idx_sram;
+	perf_pmu_data->clk_domain = pfll_dev->clk_domain;
+	perf_pmu_data->min_freq_vfe_idx =
+		pfll_dev->min_freq_vfe_idx;
+	perf_pmu_data->freq_ctrl_idx = pfll_dev->freq_ctrl_idx;
+
+	memcpy(&perf_pmu_data->lut_device, &pfll_dev->lut_device,
+		sizeof(struct nv_pmu_clk_lut_device_desc));
+	memcpy(&perf_pmu_data->regime_desc, &pfll_dev->regime_desc,
+		sizeof(struct nv_pmu_clk_regime_desc));
+
+	status = boardobjgrpmask_export(
+		&pfll_dev->lut_prog_broadcast_slave_mask.super,
+		pfll_dev->lut_prog_broadcast_slave_mask.super.bitcount,
+		&perf_pmu_data->lut_prog_broadcast_slave_mask.super);
+
+	gk20a_dbg_info(" Done");
+
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_fll.h b/drivers/gpu/nvgpu/clk/clk_fll.h
new file mode 100644
index 00000000..06872f48
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_fll.h
@@ -0,0 +1,68 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKFLL_H_
+#define _CLKFLL_H_
+
+#include "pmuif/gpmuifclk.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "boardobj/boardobjgrpmask.h"
+
+/*data and function definition to talk to driver*/
+u32 clk_fll_sw_setup(struct gk20a *g);
+u32 clk_fll_pmu_setup(struct gk20a *g);
+
+struct avfsfllobjs {
+	struct boardobjgrp_e32 super;
+	struct boardobjgrpmask_e32 lut_prog_master_mask;
+	u32 lut_step_size_uv;
+	u32 lut_min_voltage_uv;
+	u8 lut_num_entries;
+	u16 max_min_freq_mhz;
+};
+
+struct fll_device;
+
+typedef u32 fll_lut_broadcast_slave_register(struct gk20a *g,
+	struct avfsfllobjs *pfllobjs,
+	struct fll_device *pfll,
+	struct fll_device *pfll_slave);
+
+struct fll_device {
+	struct boardobj super;
+	u8 id;
+	u8 mdiv;
+	u16 input_freq_mhz;
+	u32 clk_domain;
+	u8 vin_idx_logic;
+	u8 vin_idx_sram;
+	u8 rail_idx_for_lut;
+	struct nv_pmu_clk_lut_device_desc lut_device;
+	struct nv_pmu_clk_regime_desc regime_desc;
+	u8 min_freq_vfe_idx;
+	u8 freq_ctrl_idx;
+	u8 target_regime_id_override;
+	struct boardobjgrpmask_e32 lut_prog_broadcast_slave_mask;
+	fll_lut_broadcast_slave_register *lut_broadcast_slave_register;
+};
+
+#define CLK_FLL_LUT_VF_NUM_ENTRIES(pclk) \
+	(pclk->avfs_fllobjs.lut_num_entries)
+
+#define CLK_FLL_LUT_MIN_VOLTAGE_UV(pclk) \
+	(pclk->avfs_fllobjs.lut_min_voltage_uv)
+#define CLK_FLL_LUT_STEP_SIZE_UV(pclk) \
+	(pclk->avfs_fllobjs.lut_step_size_uv)
+
+#endif
+
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
new file mode 100644
index 00000000..d87581c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_prog.h"
+#include "clk_vf_point.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static struct clk_prog *construct_clk_prog(struct gk20a *g, void *pargs);
+static u32 devinit_get_clk_prog_table(struct gk20a *g,
+	struct clk_progs *pprogobjs);
+static vf_flatten vfflatten_prog_1x_master;
+
+static u32 _clk_progs_pmudatainit(struct gk20a *g,
+				  struct boardobjgrp *pboardobjgrp,
+				  struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_clk_clk_prog_boardobjgrp_set_header *pset =
+		(struct nv_pmu_clk_clk_prog_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct clk_progs *pprogs = (struct clk_progs *)pboardobjgrp;
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for clk prog 0x%x",
+			  status);
+		goto done;
+	}
+	pset->slave_entry_count = pprogs->slave_entry_count;
+	pset->vf_entry_count = pprogs->vf_entry_count;
+
+done:
+	return status;
+}
+
+static u32 _clk_progs_pmudata_instget(struct gk20a *g,
+				      struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+				      struct nv_pmu_boardobj **ppboardobjpmudata,
+				      u8 idx)
+{
+	struct nv_pmu_clk_clk_prog_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_clk_clk_prog_boardobj_grp_set *)pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+u32 clk_prog_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct clk_progs *pclkprogobjs;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e255(&g->clk_pmu.clk_progobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for clk prog, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->clk_pmu.clk_progobjs.super.super;
+	pclkprogobjs = &(g->clk_pmu.clk_progobjs);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_PROG);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_prog, CLK_PROG);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit = _clk_progs_pmudatainit;
+	pboardobjgrp->pmudatainstget  = _clk_progs_pmudata_instget;
+
+	status = devinit_get_clk_prog_table(g, pclkprogobjs);
+	if (status)
+		goto done;
+
+	status = clk_domain_clk_prog_link(g, &g->clk_pmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing VF point board objects");
+		goto done;
+	}
+
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 clk_prog_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.clk_progobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 devinit_get_clk_prog_table(struct gk20a *g,
+				      struct clk_progs *pclkprogobjs)
+{
+	u32 status = 0;
+	u8 *clkprogs_tbl_ptr = NULL;
+	struct vbios_clock_programming_table_1x_header header = { 0 };
+	struct vbios_clock_programming_table_1x_entry prog = { 0 };
+	struct vbios_clock_programming_table_1x_slave_entry slaveprog = { 0 };
+	struct vbios_clock_programming_table_1x_vf_entry vfprog = { 0 };
+	u8 *entry = NULL;
+	u8 *slaveentry = NULL;
+	u8 *vfentry = NULL;
+	u32 i, j = 0;
+	struct clk_prog *pprog;
+	u8 prog_type;
+	u32 szfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_0D;
+	u32 hszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_HEADER_SIZE_08;
+	u32 slaveszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_SIZE_03;
+	u32 vfszfmt = VBIOS_CLOCK_PROGRAMMING_TABLE_1X_VF_ENTRY_SIZE_02;
+	struct ctrl_clk_clk_prog_1x_master_vf_entry
+		vfentries[CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES];
+	struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry
+		ratioslaveentries[CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES];
+	struct ctrl_clk_clk_prog_1x_master_table_slave_entry
+		tableslaveentries[CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES];
+	union {
+		struct boardobj board_obj;
+		struct clk_prog clkprog;
+		struct clk_prog_1x v1x;
+		struct clk_prog_1x_master v1x_master;
+		struct clk_prog_1x_master_ratio v1x_master_ratio;
+		struct clk_prog_1x_master_table v1x_master_table;
+	} prog_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		clkprogs_tbl_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.clock_token, CLOCK_PROGRAMMING_TABLE);
+		if (clkprogs_tbl_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&header, clkprogs_tbl_ptr, hszfmt);
+	if (header.header_size < hszfmt) {
+		status = -EINVAL;
+		goto done;
+	}
+	hszfmt = header.header_size;
+
+	if (header.entry_size <= VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_05)
+		szfmt = header.entry_size;
+	else if (header.entry_size <= VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_0D)
+		szfmt = header.entry_size;
+	else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (header.vf_entry_size < vfszfmt) {
+		status = -EINVAL;
+		goto done;
+	}
+	vfszfmt = header.vf_entry_size;
+	if (header.slave_entry_size < slaveszfmt) {
+		status = -EINVAL;
+		goto done;
+	}
+	slaveszfmt = header.slave_entry_size;
+	if (header.vf_entry_count > CTRL_CLK_CLK_DELTA_MAX_VOLT_RAILS) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	pclkprogobjs->slave_entry_count = header.slave_entry_count;
+	pclkprogobjs->vf_entry_count = header.vf_entry_count;
+
+	for (i = 0; i < header.entry_count; i++) {
+		memset(&prog_data, 0x0, (u32)sizeof(prog_data));
+
+		/* Read table entries*/
+		entry = clkprogs_tbl_ptr + hszfmt +
+			(i * (szfmt + (header.slave_entry_count * slaveszfmt) +
+			(header.vf_entry_count * vfszfmt)));
+
+		memcpy(&prog, entry, szfmt);
+		memset(vfentries, 0xFF,
+			sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
+			CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES);
+		memset(ratioslaveentries, 0xFF,
+			sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) *
+			CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES);
+		memset(tableslaveentries, 0xFF,
+			sizeof(struct ctrl_clk_clk_prog_1x_master_table_slave_entry) *
+			CTRL_CLK_PROG_1X_MASTER_MAX_SLAVE_ENTRIES);
+		prog_type = (u8)BIOS_GET_FIELD(prog.flags0,
+					       NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE);
+
+		switch (prog_type) {
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_PLL:
+			prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_PLL;
+			prog_data.v1x.source_data.pll.pll_idx =
+				(u8)BIOS_GET_FIELD(prog.param0,
+					NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM0_PLL_PLL_INDEX);
+			prog_data.v1x.source_data.pll.freq_step_size_mhz =
+				(u8)BIOS_GET_FIELD(prog.param1,
+					NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM1_PLL_FREQ_STEP_SIZE);
+			break;
+
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_ONE_SOURCE:
+			prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_ONE_SOURCE;
+			break;
+
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_FLL:
+			prog_data.v1x.source = CTRL_CLK_PROG_1X_SOURCE_FLL;
+			break;
+
+		default:
+			gk20a_err(dev_from_gk20a(g),
+				  "invalid source %d", prog_type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		prog_data.v1x.freq_max_mhz = (u16)prog.freq_max_mhz;
+
+		prog_type = (u8)BIOS_GET_FIELD(prog.flags0,
+			NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE);
+
+		vfentry = entry + szfmt +
+			header.slave_entry_count * slaveszfmt;
+		slaveentry = entry + szfmt;
+		switch (prog_type) {
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO:
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_TABLE:
+			prog_data.v1x_master.b_o_c_o_v_enabled = false;
+			for (j = 0; j < header.vf_entry_count; j++) {
+				memcpy(&vfprog, vfentry, vfszfmt);
+
+				vfentries[j].vfe_idx = (u8)vfprog.vfe_idx;
+				if (CTRL_CLK_PROG_1X_SOURCE_FLL ==
+					prog_data.v1x.source) {
+					vfentries[j].gain_vfe_idx = (u8)BIOS_GET_FIELD(
+						vfprog.param0,
+						NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_VF_ENTRY_PARAM0_FLL_GAIN_VFE_IDX);
+				} else {
+					vfentries[j].gain_vfe_idx = CTRL_BOARDOBJ_IDX_INVALID;
+				}
+				vfentry += vfszfmt;
+			}
+
+			prog_data.v1x_master.p_vf_entries = vfentries;
+
+			for (j = 0; j < header.slave_entry_count; j++) {
+				memcpy(&slaveprog, slaveentry, slaveszfmt);
+
+				switch (prog_type) {
+				case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO:
+					ratioslaveentries[j].clk_dom_idx =
+						(u8)slaveprog.clk_dom_idx;
+					ratioslaveentries[j].ratio = (u8)
+					BIOS_GET_FIELD(slaveprog.param0,
+					NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_RATIO_RATIO);
+					break;
+
+				case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_TABLE:
+					tableslaveentries[j].clk_dom_idx =
+						(u8)slaveprog.clk_dom_idx;
+					tableslaveentries[j].freq_mhz =
+						(u16)BIOS_GET_FIELD(slaveprog.param0,
+							NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_TABLE_FREQ);
+					break;
+				}
+				slaveentry += slaveszfmt;
+			}
+
+			switch (prog_type) {
+			case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO:
+				prog_data.board_obj.type = CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO;
+				prog_data.v1x_master_ratio.p_slave_entries =
+					ratioslaveentries;
+				break;
+
+			case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_TABLE:
+				prog_data.board_obj.type = CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_TABLE;
+
+				prog_data.v1x_master_table.p_slave_entries =
+					tableslaveentries;
+				break;
+
+			}
+			break;
+
+		case NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_SLAVE:
+			prog_data.board_obj.type = CTRL_CLK_CLK_PROG_TYPE_1X;
+			break;
+
+
+		default:
+			gk20a_err(dev_from_gk20a(g),
+				  "source issue %d", prog_type);
+				  status = -EINVAL;
+			goto done;
+		}
+
+		pprog = construct_clk_prog(g, (void *)&prog_data);
+		if (pprog == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+			"error constructing clk_prog boardobj %d", i);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&pclkprogobjs->super.super,
+			(struct boardobj *)pprog, i);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				  "error adding clk_prog boardobj %d", i);
+			status = -EINVAL;
+			goto done;
+		}
+	}
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+static u32 _clk_prog_pmudatainit_super(struct gk20a *g,
+				       struct boardobj *board_obj_ptr,
+				       struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	return status;
+}
+
+static u32 _clk_prog_pmudatainit_1x(struct gk20a *g,
+				    struct boardobj *board_obj_ptr,
+				    struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_prog_1x *pclk_prog_1x;
+	struct nv_pmu_clk_clk_prog_1x_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_prog_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_prog_1x = (struct clk_prog_1x *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_prog_1x_boardobj_set *)
+		ppmudata;
+
+	pset->source = pclk_prog_1x->source;
+	pset->freq_max_mhz = pclk_prog_1x->freq_max_mhz;
+	pset->source_data = pclk_prog_1x->source_data;
+
+	return status;
+}
+
+static u32 _clk_prog_pmudatainit_1x_master(struct gk20a *g,
+					   struct boardobj *board_obj_ptr,
+					   struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_prog_1x_master *pclk_prog_1x_master;
+	struct nv_pmu_clk_clk_prog_1x_master_boardobj_set *pset;
+	u32 vfsize = sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
+		g->clk_pmu.clk_progobjs.vf_entry_count;
+
+	gk20a_dbg_info("");
+
+	status = _clk_prog_pmudatainit_1x(g, board_obj_ptr, ppmudata);
+
+	pclk_prog_1x_master =
+		(struct clk_prog_1x_master *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_prog_1x_master_boardobj_set *)
+		ppmudata;
+
+	memcpy(pset->vf_entries, pclk_prog_1x_master->p_vf_entries, vfsize);
+
+	pset->b_o_c_o_v_enabled = pclk_prog_1x_master->b_o_c_o_v_enabled;
+
+	memcpy(&pset->deltas, &pclk_prog_1x_master->deltas,
+		(u32) sizeof(struct ctrl_clk_clk_delta));
+
+	return status;
+}
+
+static u32 _clk_prog_pmudatainit_1x_master_ratio(struct gk20a *g,
+						 struct boardobj *board_obj_ptr,
+						 struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_prog_1x_master_ratio *pclk_prog_1x_master_ratio;
+	struct nv_pmu_clk_clk_prog_1x_master_ratio_boardobj_set *pset;
+	u32 slavesize = sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) *
+		g->clk_pmu.clk_progobjs.slave_entry_count;
+
+	gk20a_dbg_info("");
+
+	status = _clk_prog_pmudatainit_1x_master(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_prog_1x_master_ratio =
+		(struct clk_prog_1x_master_ratio *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_prog_1x_master_ratio_boardobj_set *)
+		ppmudata;
+
+	memcpy(pset->slave_entries,
+		pclk_prog_1x_master_ratio->p_slave_entries, slavesize);
+
+	return status;
+}
+
+static u32 _clk_prog_pmudatainit_1x_master_table(struct gk20a *g,
+						 struct boardobj *board_obj_ptr,
+						 struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_prog_1x_master_table *pclk_prog_1x_master_table;
+	struct nv_pmu_clk_clk_prog_1x_master_table_boardobj_set *pset;
+	u32 slavesize = sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) *
+		g->clk_pmu.clk_progobjs.slave_entry_count;
+
+	gk20a_dbg_info("");
+
+	status = _clk_prog_pmudatainit_1x_master(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_prog_1x_master_table =
+		(struct clk_prog_1x_master_table *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_prog_1x_master_table_boardobj_set *)
+		ppmudata;
+	memcpy(pset->slave_entries,
+		pclk_prog_1x_master_table->p_slave_entries, slavesize);
+
+	return status;
+}
+
+static u32 _clk_prog_1x_master_rail_construct_vf_point(struct gk20a *g,
+						       struct clk_pmupstate *pclk,
+						       struct clk_prog_1x_master *p1xmaster,
+						       struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_rail,
+						       struct clk_vf_point *p_vf_point_tmp,
+						       u8 *p_vf_point_idx)
+{
+	struct clk_vf_point *p_vf_point;
+	u32 status;
+
+	gk20a_dbg_info("");
+
+	p_vf_point = construct_clk_vf_point(g, (void *)p_vf_point_tmp);
+	if (p_vf_point == NULL) {
+		status = -ENOMEM;
+		goto done;
+	}
+	status = pclk->clk_vf_pointobjs.super.super.objinsert(
+				&pclk->clk_vf_pointobjs.super.super,
+				&p_vf_point->super,
+				*p_vf_point_idx);
+	if (status)
+		goto done;
+
+	p_vf_rail->vf_point_idx_last = (*p_vf_point_idx)++;
+
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
+
+static u32 clk_prog_construct_super(struct gk20a *g,
+				    struct boardobj **ppboardobj,
+				    u16 size, void *pargs)
+{
+	struct clk_prog *pclkprog;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj,
+		size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkprog = (struct clk_prog *)*ppboardobj;
+
+	pclkprog->super.pmudatainit =
+			_clk_prog_pmudatainit_super;
+	return status;
+}
+
+
+static u32 clk_prog_construct_1x(struct gk20a *g,
+				 struct boardobj **ppboardobj,
+				 u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_prog_1x *pclkprog;
+	struct clk_prog_1x *ptmpprog =
+			(struct clk_prog_1x *)pargs;
+	u32 status = 0;
+
+	gk20a_dbg_info(" ");
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_PROG_TYPE_1X);
+	status = clk_prog_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkprog = (struct clk_prog_1x *)*ppboardobj;
+
+	pclkprog->super.super.pmudatainit =
+			_clk_prog_pmudatainit_1x;
+
+	pclkprog->source = ptmpprog->source;
+	pclkprog->freq_max_mhz = ptmpprog->freq_max_mhz;
+	pclkprog->source_data = ptmpprog->source_data;
+
+	return status;
+}
+
+static u32 clk_prog_construct_1x_master(struct gk20a *g,
+					struct boardobj **ppboardobj,
+					u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_prog_1x_master *pclkprog;
+	struct clk_prog_1x_master *ptmpprog =
+			(struct clk_prog_1x_master *)pargs;
+	u32 status = 0;
+	u32 vfsize = sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
+		g->clk_pmu.clk_progobjs.vf_entry_count;
+	u8 railidx;
+
+	gk20a_dbg_info(" type - %x", BOARDOBJ_GET_TYPE(pargs));
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_PROG_TYPE_1X_MASTER);
+	status = clk_prog_construct_1x(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkprog = (struct clk_prog_1x_master *)*ppboardobj;
+
+	pclkprog->super.super.super.pmudatainit =
+			_clk_prog_pmudatainit_1x_master;
+
+	pclkprog->vfflatten =
+			vfflatten_prog_1x_master;
+
+	pclkprog->p_vf_entries = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)
+		kzalloc(vfsize, GFP_KERNEL);
+
+	memcpy(pclkprog->p_vf_entries, ptmpprog->p_vf_entries, vfsize);
+
+	pclkprog->b_o_c_o_v_enabled = ptmpprog->b_o_c_o_v_enabled;
+
+	for (railidx = 0;
+	     railidx < g->clk_pmu.clk_progobjs.vf_entry_count;
+	     railidx++) {
+		pclkprog->p_vf_entries[railidx].vf_point_idx_first =
+			CTRL_CLK_CLK_VF_POINT_IDX_INVALID;
+		pclkprog->p_vf_entries[railidx].vf_point_idx_last =
+			CTRL_CLK_CLK_VF_POINT_IDX_INVALID;
+	}
+
+	return status;
+}
+
+static u32 clk_prog_construct_1x_master_ratio(struct gk20a *g,
+					      struct boardobj **ppboardobj,
+					      u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_prog_1x_master_ratio *pclkprog;
+	struct clk_prog_1x_master_ratio *ptmpprog =
+			(struct clk_prog_1x_master_ratio *)pargs;
+	u32 status = 0;
+	u32 slavesize = sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) *
+		g->clk_pmu.clk_progobjs.slave_entry_count;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO);
+	status = clk_prog_construct_1x_master(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkprog = (struct clk_prog_1x_master_ratio *)*ppboardobj;
+
+	pclkprog->super.super.super.super.pmudatainit =
+			_clk_prog_pmudatainit_1x_master_ratio;
+
+	pclkprog->p_slave_entries =
+		(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *)
+		kzalloc(slavesize, GFP_KERNEL);
+	if (!pclkprog->p_slave_entries)
+		return -ENOMEM;
+
+	memset(pclkprog->p_slave_entries, CTRL_CLK_CLK_DOMAIN_INDEX_INVALID,
+		slavesize);
+
+	memcpy(pclkprog->p_slave_entries, ptmpprog->p_slave_entries, slavesize);
+
+	return status;
+}
+
+static u32 clk_prog_construct_1x_master_table(struct gk20a *g,
+					      struct boardobj **ppboardobj,
+					      u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_prog_1x_master_table *pclkprog;
+	struct clk_prog_1x_master_table *ptmpprog =
+			(struct clk_prog_1x_master_table *)pargs;
+	u32 status = 0;
+	u32 slavesize = sizeof(struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry) *
+		g->clk_pmu.clk_progobjs.slave_entry_count;
+
+	gk20a_dbg_info("type - %x", BOARDOBJ_GET_TYPE(pargs));
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_TABLE)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_TABLE);
+	status = clk_prog_construct_1x_master(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkprog = (struct clk_prog_1x_master_table *)*ppboardobj;
+
+	pclkprog->super.super.super.super.pmudatainit =
+			_clk_prog_pmudatainit_1x_master_table;
+
+	pclkprog->p_slave_entries =
+		(struct ctrl_clk_clk_prog_1x_master_table_slave_entry *)
+		kzalloc(slavesize, GFP_KERNEL);
+	if (!pclkprog->p_slave_entries)
+		return -ENOMEM;
+
+	memset(pclkprog->p_slave_entries, CTRL_CLK_CLK_DOMAIN_INDEX_INVALID,
+		slavesize);
+
+	memcpy(pclkprog->p_slave_entries, ptmpprog->p_slave_entries, slavesize);
+
+	return status;
+}
+
+static struct clk_prog *construct_clk_prog(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info(" type - %x", BOARDOBJ_GET_TYPE(pargs));
+	switch (BOARDOBJ_GET_TYPE(pargs)) {
+	case CTRL_CLK_CLK_PROG_TYPE_1X:
+		status = clk_prog_construct_1x(g, &board_obj_ptr,
+			sizeof(struct clk_prog_1x), pargs);
+		break;
+
+	case CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_TABLE:
+		status = clk_prog_construct_1x_master_table(g, &board_obj_ptr,
+			sizeof(struct clk_prog_1x_master_table), pargs);
+		break;
+
+	case CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO:
+		status = clk_prog_construct_1x_master_ratio(g, &board_obj_ptr,
+			sizeof(struct clk_prog_1x_master_ratio), pargs);
+		break;
+
+	default:
+		return NULL;
+	}
+
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info(" Done");
+
+	return (struct clk_prog *)board_obj_ptr;
+}
+
+static u32 vfflatten_prog_1x_master(struct gk20a *g,
+				    struct clk_pmupstate *pclk,
+				    struct clk_prog_1x_master *p1xmaster,
+				    u8 clk_domain_idx, u16 *pfreqmaxlastmhz)
+{
+	struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_rail;
+	union {
+		struct boardobj board_obj;
+		struct clk_vf_point vf_point;
+		struct clk_vf_point_freq freq;
+		struct clk_vf_point_volt volt;
+	} vf_point_data;
+	u32 status = 0;
+	u8 step_count;
+	u8 freq_step_size_mhz = 0;
+	u8 vf_point_idx;
+	u8 vf_rail_idx;
+
+	gk20a_dbg_info("");
+	memset(&vf_point_data, 0x0, sizeof(vf_point_data));
+
+	vf_point_idx = BOARDOBJGRP_NEXT_EMPTY_IDX(
+			&pclk->clk_vf_pointobjs.super.super);
+
+	for (vf_rail_idx = 0;
+	     vf_rail_idx < pclk->clk_progobjs.vf_entry_count;
+	     vf_rail_idx++) {
+		u32 voltage_min_uv;
+		u32 voltage_step_size_uv;
+		u8  i;
+
+		p_vf_rail = &p1xmaster->p_vf_entries[vf_rail_idx];
+		if (p_vf_rail->vfe_idx == CTRL_BOARDOBJ_IDX_INVALID)
+			continue;
+
+		p_vf_rail->vf_point_idx_first = vf_point_idx;
+
+		vf_point_data.vf_point.vfe_equ_idx = p_vf_rail->vfe_idx;
+		vf_point_data.vf_point.volt_rail_idx = vf_rail_idx;
+
+		step_count = 0;
+
+		switch (p1xmaster->super.source) {
+		case CTRL_CLK_PROG_1X_SOURCE_PLL:
+			freq_step_size_mhz =
+				p1xmaster->super.source_data.pll.freq_step_size_mhz;
+			step_count = (freq_step_size_mhz == 0) ? 0 :
+				(p1xmaster->super.freq_max_mhz - *pfreqmaxlastmhz - 1) /
+							freq_step_size_mhz;
+			/* Intentional fall-through.*/
+
+		case CTRL_CLK_PROG_1X_SOURCE_ONE_SOURCE:
+			vf_point_data.board_obj.type =
+				CTRL_CLK_CLK_VF_POINT_TYPE_FREQ;
+			do {
+				clkvfpointfreqmhzset(g, &vf_point_data.vf_point,
+					p1xmaster->super.freq_max_mhz -
+					  step_count * freq_step_size_mhz);
+
+				status = _clk_prog_1x_master_rail_construct_vf_point(g, pclk,
+					p1xmaster, p_vf_rail,
+					&vf_point_data.vf_point, &vf_point_idx);
+				if (status)
+					goto done;
+			} while (step_count-- > 0);
+			break;
+
+		case CTRL_CLK_PROG_1X_SOURCE_FLL:
+			voltage_min_uv = CLK_FLL_LUT_MIN_VOLTAGE_UV(pclk);
+			voltage_step_size_uv = CLK_FLL_LUT_STEP_SIZE_UV(pclk);
+			step_count = CLK_FLL_LUT_VF_NUM_ENTRIES(pclk);
+
+			/* FLL sources use a voltage-based VF_POINT.*/
+			vf_point_data.board_obj.type =
+				CTRL_CLK_CLK_VF_POINT_TYPE_VOLT;
+			vf_point_data.volt.clk_domain_idx = clk_domain_idx;
+			for (i = 0; i < step_count; i++) {
+				vf_point_data.volt.source_voltage_uv =
+					voltage_min_uv + i * voltage_step_size_uv;
+				vf_point_data.volt.vf_gain_vfe_equ_idx = p_vf_rail->gain_vfe_idx;
+
+				status = _clk_prog_1x_master_rail_construct_vf_point(g, pclk,
+					p1xmaster, p_vf_rail,
+					&vf_point_data.vf_point, &vf_point_idx);
+				if (status)
+					goto done;
+			}
+			break;
+		}
+	}
+
+	*pfreqmaxlastmhz = p1xmaster->super.freq_max_mhz;
+
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.h b/drivers/gpu/nvgpu/clk/clk_prog.h
new file mode 100644
index 00000000..2dd8f6c8
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_prog.h
@@ -0,0 +1,71 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKPROG_H_
+#define _CLKPROG_H_
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "boardobj/boardobjgrpmask.h"
+
+u32 clk_prog_sw_setup(struct gk20a *g);
+u32 clk_prog_pmu_setup(struct gk20a *g);
+struct clk_prog_1x_master;
+
+typedef u32 vf_flatten(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_prog_1x_master *p1xmaster,
+			u8 clk_domain_idx, u16 *pfreqmaxlastmhz);
+
+struct clk_progs {
+	struct boardobjgrp_e255 super;
+	u8 slave_entry_count;
+	u8 vf_entry_count;
+
+};
+
+struct clk_prog {
+	struct boardobj super;
+};
+
+struct clk_prog_1x {
+	struct clk_prog super;
+	u8  source;
+	u16 freq_max_mhz;
+	union ctrl_clk_clk_prog_1x_source_data source_data;
+};
+
+struct clk_prog_1x_master {
+	struct clk_prog_1x super;
+	bool b_o_c_o_v_enabled;
+	struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_entries;
+
+	struct ctrl_clk_clk_delta deltas;
+	vf_flatten *vfflatten;
+};
+
+struct clk_prog_1x_master_ratio {
+	struct clk_prog_1x_master super;
+	struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *p_slave_entries;
+};
+
+struct clk_prog_1x_master_table {
+	struct clk_prog_1x_master super;
+	struct ctrl_clk_clk_prog_1x_master_table_slave_entry *p_slave_entries;
+};
+
+#define CLK_CLK_PROG_GET(pclk, idx)                                            \
+	((struct clk_prog *)BOARDOBJGRP_OBJ_GET_BY_IDX(			\
+		&pclk->clk_progobjs.super.super, (u8)(idx)))
+
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/clk/clk_vf_point.c
new file mode 100644
index 00000000..275bef96
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_vf_point.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 _clk_vf_point_pmudatainit_super(struct gk20a *g, struct boardobj
+	*board_obj_ptr,	struct nv_pmu_boardobj *ppmudata);
+
+static u32 _clk_vf_points_pmudatainit(struct gk20a *g,
+				      struct boardobjgrp *pboardobjgrp,
+				      struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for clk vfpoint 0x%x",
+			  status);
+		goto done;
+	}
+
+done:
+	return status;
+}
+
+static u32 _clk_vf_points_pmudata_instget(struct gk20a *g,
+					  struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+					  struct nv_pmu_boardobj **ppboardobjpmudata,
+					  u8 idx)
+{
+	struct nv_pmu_clk_clk_vf_point_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_clk_clk_vf_point_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _clk_vf_points_pmustatus_instget(struct gk20a *g,
+					    void *pboardobjgrppmu,
+					    struct nv_pmu_boardobj_query **ppboardobjpmustatus,
+					    u8 idx)
+{
+	struct nv_pmu_clk_clk_vf_point_boardobj_grp_get_status *pgrp_get_status =
+		(struct nv_pmu_clk_clk_vf_point_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+u32 clk_vf_point_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct clk_vf_points *pclkvfpointobjs;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e255(&g->clk_pmu.clk_vf_pointobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+		"error creating boardobjgrp for clk vfpoint, status - 0x%x",
+		status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->clk_pmu.clk_vf_pointobjs.super.super;
+	pclkvfpointobjs = &(g->clk_pmu.clk_vf_pointobjs);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_VF_POINT);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_vf_point, CLK_VF_POINT);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+				&g->clk_pmu.clk_vf_pointobjs.super.super,
+				clk, CLK, clk_vf_point, CLK_VF_POINT);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit = _clk_vf_points_pmudatainit;
+	pboardobjgrp->pmudatainstget  = _clk_vf_points_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _clk_vf_points_pmustatus_instget;
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 clk_vf_point_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.clk_vf_pointobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 clk_vf_point_construct_super(struct gk20a *g,
+					struct boardobj **ppboardobj,
+					u16 size, void *pargs)
+{
+	struct clk_vf_point *pclkvfpoint;
+	struct clk_vf_point *ptmpvfpoint =
+			(struct clk_vf_point *)pargs;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj,
+		size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkvfpoint = (struct clk_vf_point *)*ppboardobj;
+
+	pclkvfpoint->super.pmudatainit =
+			_clk_vf_point_pmudatainit_super;
+
+	pclkvfpoint->vfe_equ_idx = ptmpvfpoint->vfe_equ_idx;
+	pclkvfpoint->volt_rail_idx = ptmpvfpoint->volt_rail_idx;
+
+	return status;
+}
+
+static u32 _clk_vf_point_pmudatainit_volt(struct gk20a *g,
+					  struct boardobj *board_obj_ptr,
+					  struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_vf_point_volt *pclk_vf_point_volt;
+	struct nv_pmu_clk_clk_vf_point_volt_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_vf_point_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_vf_point_volt =
+		(struct clk_vf_point_volt *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_vf_point_volt_boardobj_set *)
+		ppmudata;
+
+	pset->source_voltage_uv = pclk_vf_point_volt->source_voltage_uv;
+	pset->vf_gain_vfe_equ_idx = pclk_vf_point_volt->vf_gain_vfe_equ_idx;
+	pset->clk_domain_idx = pclk_vf_point_volt->clk_domain_idx;
+	pset->freq_delta_khz = pclk_vf_point_volt->freq_delta_khz;
+
+	return status;
+}
+
+static u32 _clk_vf_point_pmudatainit_freq(struct gk20a *g,
+					  struct boardobj *board_obj_ptr,
+					  struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_vf_point_freq *pclk_vf_point_freq;
+	struct nv_pmu_clk_clk_vf_point_freq_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = _clk_vf_point_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_vf_point_freq =
+		(struct clk_vf_point_freq *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_vf_point_freq_boardobj_set *)
+		ppmudata;
+
+	pset->freq_mhz =
+		clkvfpointfreqmhzget(g, &pclk_vf_point_freq->super);
+
+	pset->volt_delta_uv = pclk_vf_point_freq->volt_delta_uv;
+
+	return status;
+}
+
+static u32 clk_vf_point_construct_volt(struct gk20a *g,
+				       struct boardobj **ppboardobj,
+				       u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_vf_point_volt *pclkvfpoint;
+	struct clk_vf_point_volt *ptmpvfpoint =
+			(struct clk_vf_point_volt *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_VF_POINT_TYPE_VOLT)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_VF_POINT_TYPE_VOLT);
+	status = clk_vf_point_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkvfpoint = (struct clk_vf_point_volt *)*ppboardobj;
+
+	pclkvfpoint->super.super.pmudatainit =
+			_clk_vf_point_pmudatainit_volt;
+
+	pclkvfpoint->source_voltage_uv = ptmpvfpoint->source_voltage_uv;
+	pclkvfpoint->vf_gain_vfe_equ_idx = ptmpvfpoint->vf_gain_vfe_equ_idx;
+	pclkvfpoint->clk_domain_idx = ptmpvfpoint->clk_domain_idx;
+
+	return status;
+}
+
+static u32 clk_vf_point_construct_freq(struct gk20a *g,
+				       struct boardobj **ppboardobj,
+				       u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct clk_vf_point_freq *pclkvfpoint;
+	struct clk_vf_point_freq *ptmpvfpoint =
+			(struct clk_vf_point_freq *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_VF_POINT_TYPE_FREQ)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_VF_POINT_TYPE_FREQ);
+	status = clk_vf_point_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pclkvfpoint = (struct clk_vf_point_freq *)*ppboardobj;
+
+	pclkvfpoint->super.super.pmudatainit =
+			_clk_vf_point_pmudatainit_freq;
+
+	clkvfpointfreqmhzset(g, &pclkvfpoint->super,
+		clkvfpointfreqmhzget(g, &ptmpvfpoint->super));
+
+	return status;
+}
+
+struct clk_vf_point *construct_clk_vf_point(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+	switch (BOARDOBJ_GET_TYPE(pargs)) {
+	case CTRL_CLK_CLK_VF_POINT_TYPE_FREQ:
+		status = clk_vf_point_construct_freq(g, &board_obj_ptr,
+			sizeof(struct clk_vf_point_freq), pargs);
+		break;
+
+	case CTRL_CLK_CLK_VF_POINT_TYPE_VOLT:
+		status = clk_vf_point_construct_volt(g, &board_obj_ptr,
+			sizeof(struct clk_vf_point_volt), pargs);
+		break;
+
+	default:
+		return NULL;
+	}
+
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info(" Done");
+
+	return (struct clk_vf_point *)board_obj_ptr;
+}
+
+static u32 _clk_vf_point_pmudatainit_super(struct gk20a *g,
+					   struct boardobj *board_obj_ptr,
+					   struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct clk_vf_point *pclk_vf_point;
+	struct nv_pmu_clk_clk_vf_point_boardobj_set *pset;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pclk_vf_point =
+		(struct clk_vf_point *)board_obj_ptr;
+
+	pset = (struct nv_pmu_clk_clk_vf_point_boardobj_set *)
+		ppmudata;
+
+
+	pset->vfe_equ_idx = pclk_vf_point->vfe_equ_idx;
+	pset->volt_rail_idx = pclk_vf_point->volt_rail_idx;
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h
new file mode 100644
index 00000000..708f80f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h
@@ -0,0 +1,74 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKVFPOINT_H_
+#define _CLKVFPOINT_H_
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "boardobj/boardobjgrpmask.h"
+
+u32 clk_vf_point_sw_setup(struct gk20a *g);
+u32 clk_vf_point_pmu_setup(struct gk20a *g);
+
+struct clk_vf_points {
+	struct boardobjgrp_e255 super;
+};
+
+struct clk_vf_point {
+	struct boardobj super;
+	u8  vfe_equ_idx;
+	u8  volt_rail_idx;
+	struct ctrl_clk_vf_pair pair;
+};
+
+struct clk_vf_point_volt {
+	struct clk_vf_point super;
+	u32 source_voltage_uv;
+	u8 vf_gain_vfe_equ_idx;
+	u8 clk_domain_idx;
+	u16 vf_gain_value;
+	int freq_delta_khz;
+
+};
+
+struct clk_vf_point_freq {
+	struct clk_vf_point super;
+	int volt_delta_uv;
+};
+
+#define CLK_CLK_VF_POINT_GET(pclk, idx)                                        \
+	((struct clk_vf_point)BOARDOBJGRP_OBJ_GET_BY_IDX(                      \
+		&pclk->vfpoints.super.super, (u8)(idx)))
+
+#define clkvfpointpairget(pvfpoint)                                            \
+	(&((pvfpoint)->pair))
+
+#define clkvfpointfreqmhzget(pgpu, pvfpoint)                                   \
+	CTRL_CLK_VF_PAIR_FREQ_MHZ_GET(clkvfpointpairget(pvfpoint))
+
+#define clkvfpointfreqdeltamhzGet(pgpu, pvfPoint)                              \
+	((BOARDOBJ_GET_TYPE(pvfpoint) == CTRL_CLK_CLK_VF_POINT_TYPE_VOLT) ?    \
+	(((struct clk_vf_point_volt *)(pvfpoint))->freq_delta_khz / 1000) : 0)
+
+#define clkvfpointfreqmhzset(pgpu, pvfpoint, _freqmhz)                         \
+	CTRL_CLK_VF_PAIR_FREQ_MHZ_SET(clkvfpointpairget(pvfpoint), _freqmhz)
+
+#define clkvfpointvoltageuvset(pgpu, pvfpoint, _voltageuv)                     \
+	CTRL_CLK_VF_PAIR_VOLTAGE_UV_SET(clkvfpointpairget(pvfpoint),           \
+	_voltageuv)
+
+struct clk_vf_point *construct_clk_vf_point(struct gk20a *g, void *pargs);
+
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_vin.c b/drivers/gpu/nvgpu/clk/clk_vin.c
new file mode 100644
index 00000000..e8e4b753
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_vin.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_vin.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gp106/hw_fuse_gp106.h"
+
+static u32 devinit_get_vin_device_table(struct gk20a *g,
+		struct avfsvinobjs *pvinobjs);
+
+static struct vin_device *construct_vin_device(struct gk20a *g, void *pargs);
+
+static u32 vin_device_init_pmudata_super(struct gk20a *g,
+				  struct boardobj *board_obj_ptr,
+				  struct nv_pmu_boardobj *ppmudata);
+
+static u32 read_vin_cal_fuse_rev(struct gk20a *g)
+{
+	return fuse_vin_cal_fuse_rev_v(
+		gk20a_readl(g, fuse_vin_cal_fuse_rev_r()));
+}
+
+static u32 read_vin_cal_slope_intercept_fuse(struct gk20a *g,
+					     u32 vin_id, u32 *slope,
+					     u32 *intercept)
+{
+	u32 data = 0;
+	u32 interceptdata = 0;
+	u32 slopedata = 0;
+	u32 gpc0data;
+	u32 gpc0slopedata;
+	u32 gpc0interceptdata;
+
+	/* read gpc0 irrespective of vin id */
+	gpc0data = gk20a_readl(g, fuse_vin_cal_gpc0_r());
+	if (gpc0data == 0xFFFFFFFF)
+		return -EINVAL;
+
+	switch (vin_id) {
+	case CTRL_CLK_VIN_ID_GPC0:
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC1:
+		data = gk20a_readl(g, fuse_vin_cal_gpc1_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC2:
+		data = gk20a_readl(g, fuse_vin_cal_gpc2_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC3:
+		data = gk20a_readl(g, fuse_vin_cal_gpc3_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC4:
+		data = gk20a_readl(g, fuse_vin_cal_gpc4_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC5:
+		data = gk20a_readl(g, fuse_vin_cal_gpc5_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_SYS:
+	case CTRL_CLK_VIN_ID_XBAR:
+	case CTRL_CLK_VIN_ID_LTC:
+		data = gk20a_readl(g, fuse_vin_cal_shared_delta_r());
+		break;
+
+	case CTRL_CLK_VIN_ID_SRAM:
+		data = gk20a_readl(g, fuse_vin_cal_sram_delta_r());
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	if (data == 0xFFFFFFFF)
+		return -EINVAL;
+
+	gpc0interceptdata = fuse_vin_cal_gpc0_icpt_data_v(gpc0data) * 1000;
+	gpc0interceptdata = gpc0interceptdata >>
+		fuse_vin_cal_gpc0_icpt_frac_size_v();
+
+	switch (vin_id) {
+	case CTRL_CLK_VIN_ID_GPC0:
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC1:
+	case CTRL_CLK_VIN_ID_GPC2:
+	case CTRL_CLK_VIN_ID_GPC3:
+	case CTRL_CLK_VIN_ID_GPC4:
+	case CTRL_CLK_VIN_ID_GPC5:
+	case CTRL_CLK_VIN_ID_SYS:
+	case CTRL_CLK_VIN_ID_XBAR:
+	case CTRL_CLK_VIN_ID_LTC:
+		interceptdata =
+			(fuse_vin_cal_gpc1_icpt_data_v(data)) * 1000;
+		interceptdata = interceptdata >>
+			fuse_vin_cal_gpc1_icpt_frac_size_v();
+		break;
+
+	case CTRL_CLK_VIN_ID_SRAM:
+		interceptdata =
+			(fuse_vin_cal_sram_icpt_data_v(data)) * 1000;
+		interceptdata = interceptdata >>
+			fuse_vin_cal_sram_icpt_frac_size_v();
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (data & fuse_vin_cal_gpc1_icpt_sign_f())
+		*intercept = gpc0interceptdata - interceptdata;
+	else
+		*intercept = gpc0interceptdata + interceptdata;
+
+	/* slope */
+	gpc0slopedata = (fuse_vin_cal_gpc0_slope_data_v(gpc0data)) * 1000;
+	gpc0slopedata = gpc0slopedata >>
+		fuse_vin_cal_gpc0_slope_frac_size_v();
+
+	switch (vin_id) {
+	case CTRL_CLK_VIN_ID_GPC0:
+		break;
+
+	case CTRL_CLK_VIN_ID_GPC1:
+	case CTRL_CLK_VIN_ID_GPC2:
+	case CTRL_CLK_VIN_ID_GPC3:
+	case CTRL_CLK_VIN_ID_GPC4:
+	case CTRL_CLK_VIN_ID_GPC5:
+	case CTRL_CLK_VIN_ID_SYS:
+	case CTRL_CLK_VIN_ID_XBAR:
+	case CTRL_CLK_VIN_ID_LTC:
+	case CTRL_CLK_VIN_ID_SRAM:
+		slopedata =
+			(fuse_vin_cal_gpc1_slope_data_v(data)) * 1000;
+		slopedata = slopedata >>
+			fuse_vin_cal_gpc1_slope_frac_size_v();
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (data & fuse_vin_cal_gpc1_slope_sign_f())
+		*slope = gpc0slopedata - slopedata;
+	else
+		*slope = gpc0slopedata + slopedata;
+	return 0;
+}
+
+static u32 _clk_vin_devgrp_pmudatainit_super(struct gk20a *g,
+					     struct boardobjgrp *pboardobjgrp,
+					     struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_clk_clk_vin_device_boardobjgrp_set_header *pset =
+		(struct nv_pmu_clk_clk_vin_device_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct avfsvinobjs *pvin_obbj = (struct avfsvinobjs *)pboardobjgrp;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+
+	pset->b_vin_is_disable_allowed = pvin_obbj->vin_is_disable_allowed;
+
+	gk20a_dbg_info(" Done");
+	return status;
+}
+
+static u32 _clk_vin_devgrp_pmudata_instget(struct gk20a *g,
+					   struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+					   struct nv_pmu_boardobj **ppboardobjpmudata,
+					   u8 idx)
+{
+	struct nv_pmu_clk_clk_vin_device_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_clk_clk_vin_device_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _clk_vin_devgrp_pmustatus_instget(struct gk20a *g,
+					     void *pboardobjgrppmu,
+					     struct nv_pmu_boardobj_query **ppboardobjpmustatus,
+					     u8 idx)
+{
+	struct nv_pmu_clk_clk_vin_device_boardobj_grp_get_status *pgrp_get_status =
+		(struct nv_pmu_clk_clk_vin_device_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+u32 clk_vin_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	u32 slope;
+	u32 intercept;
+	struct vin_device *pvindev;
+	struct avfsvinobjs *pvinobjs;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->clk_pmu.avfs_vinobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for clk vin, statu - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->clk_pmu.avfs_vinobjs.super.super;
+	pvinobjs = &g->clk_pmu.avfs_vinobjs;
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, VIN_DEVICE);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_vin_device, CLK_VIN_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit  = _clk_vin_devgrp_pmudatainit_super;
+	pboardobjgrp->pmudatainstget  = _clk_vin_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _clk_vin_devgrp_pmustatus_instget;
+
+	status = devinit_get_vin_device_table(g, &g->clk_pmu.avfs_vinobjs);
+	if (status)
+		goto done;
+
+	/*update vin calibration to fuse */
+	if (pvinobjs->calibration_rev_vbios == read_vin_cal_fuse_rev(g)) {
+		BOARDOBJGRP_FOR_EACH(&(pvinobjs->super.super),
+				     struct vin_device *, pvindev, i) {
+			slope = 0;
+			intercept = 0;
+			pvindev = CLK_GET_VIN_DEVICE(pvinobjs, i);
+			status = read_vin_cal_slope_intercept_fuse(g,
+					pvindev->id, &slope, &intercept);
+			if (status) {
+				gk20a_err(dev_from_gk20a(g),
+				"err reading vin cal for id %x", pvindev->id);
+				goto done;
+			}
+			if (slope != 0 && intercept != 0) {
+				pvindev->slope = slope;
+				pvindev->intercept = intercept;
+			}
+		}
+	}
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+				&g->clk_pmu.avfs_vinobjs.super.super,
+				clk, CLK, clk_vin_device, CLK_VIN_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 clk_vin_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.avfs_vinobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 devinit_get_vin_device_table(struct gk20a *g,
+		struct avfsvinobjs *pvinobjs)
+{
+	u32 status = 0;
+	u8 *vin_table_ptr = NULL;
+	struct vin_descriptor_header_10 vin_desc_table_header = { 0 };
+	struct vin_descriptor_entry_10 vin_desc_table_entry = { 0 };
+	u8 *vin_tbl_entry_ptr = NULL;
+	u32 index = 0;
+	u32 slope, intercept;
+	struct vin_device vin_dev_data;
+	struct vin_device *pvin_dev;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		vin_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.clock_token, VIN_TABLE);
+		if (vin_table_ptr == NULL) {
+			status = -1;
+			goto done;
+		}
+	}
+
+	memcpy(&vin_desc_table_header, vin_table_ptr,
+	       sizeof(struct vin_descriptor_header_10));
+
+	pvinobjs->calibration_rev_vbios =
+			BIOS_GET_FIELD(vin_desc_table_header.flags0,
+				NV_VIN_DESC_FLAGS0_VIN_CAL_REVISION);
+	pvinobjs->vin_is_disable_allowed =
+			BIOS_GET_FIELD(vin_desc_table_header.flags0,
+				NV_VIN_DESC_FLAGS0_DISABLE_CONTROL);
+
+	/* VIN calibration slope: XX.YYY mV/code => XXYYY uV/code*/
+	slope = ((BIOS_GET_FIELD(vin_desc_table_header.vin_cal,
+			NV_VIN_DESC_VIN_CAL_SLOPE_INTEGER) * 1000)) +
+		((BIOS_GET_FIELD(vin_desc_table_header.vin_cal,
+			NV_VIN_DESC_VIN_CAL_SLOPE_FRACTION)));
+
+	/* VIN calibration intercept: ZZZ.W mV => ZZZW00 uV */
+	intercept = ((BIOS_GET_FIELD(vin_desc_table_header.vin_cal,
+			NV_VIN_DESC_VIN_CAL_INTERCEPT_INTEGER) * 1000)) +
+		    ((BIOS_GET_FIELD(vin_desc_table_header.vin_cal,
+			NV_VIN_DESC_VIN_CAL_INTERCEPT_FRACTION) * 100));
+
+	/* Read table entries*/
+	vin_tbl_entry_ptr = vin_table_ptr + vin_desc_table_header.header_sizee;
+	for (index = 0; index < vin_desc_table_header.entry_count; index++) {
+		u32 vin_id;
+
+		memcpy(&vin_desc_table_entry, vin_tbl_entry_ptr,
+		       sizeof(struct vin_descriptor_entry_10));
+
+		if (vin_desc_table_entry.vin_device_type == CTRL_CLK_VIN_TYPE_DISABLED)
+			continue;
+
+		vin_id = vin_desc_table_entry.vin_device_id;
+
+		vin_dev_data.super.type =
+			(u8)vin_desc_table_entry.vin_device_type;
+		vin_dev_data.id = (u8)vin_desc_table_entry.vin_device_id;
+		vin_dev_data.volt_domain_vbios =
+			(u8)vin_desc_table_entry.volt_domain_vbios;
+		vin_dev_data.slope = slope;
+		vin_dev_data.intercept = intercept;
+
+		vin_dev_data.flls_shared_mask = 0;
+
+		pvin_dev = construct_vin_device(g, (void *)&vin_dev_data);
+
+		status = boardobjgrp_objinsert(&pvinobjs->super.super,
+				(struct boardobj *)pvin_dev, index);
+
+		vin_tbl_entry_ptr += vin_desc_table_header.entry_size;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+static struct vin_device *construct_vin_device(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	struct vin_device *pvin_dev;
+	struct vin_device *board_obj_vin_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		sizeof(struct vin_device), pargs);
+	if (status)
+		return NULL;
+
+	/*got vin board obj allocated now fill it into boardobj grp*/
+	pvin_dev = (struct vin_device *)pargs;
+	board_obj_vin_ptr = (struct vin_device *)board_obj_ptr;
+	/* override super class interface */
+	board_obj_ptr->pmudatainit = vin_device_init_pmudata_super;
+	board_obj_vin_ptr->id = pvin_dev->id;
+	board_obj_vin_ptr->volt_domain_vbios = pvin_dev->volt_domain_vbios;
+	board_obj_vin_ptr->slope = pvin_dev->slope;
+	board_obj_vin_ptr->intercept = pvin_dev->intercept;
+	board_obj_vin_ptr->flls_shared_mask = pvin_dev->flls_shared_mask;
+	board_obj_vin_ptr->volt_domain = CTRL_VOLT_DOMAIN_LOGIC;
+
+	gk20a_dbg_info(" Done");
+
+	return (struct vin_device *)board_obj_ptr;
+}
+
+static u32 vin_device_init_pmudata_super(struct gk20a *g,
+					 struct boardobj *board_obj_ptr,
+					 struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vin_device *pvin_dev;
+	struct nv_pmu_clk_clk_vin_device_boardobj_set *perf_pmu_data;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvin_dev = (struct vin_device *)board_obj_ptr;
+	perf_pmu_data = (struct nv_pmu_clk_clk_vin_device_boardobj_set *)
+		ppmudata;
+
+	perf_pmu_data->id = pvin_dev->id;
+	perf_pmu_data->intercept = pvin_dev->intercept;
+	perf_pmu_data->volt_domain = pvin_dev->volt_domain;
+	perf_pmu_data->slope = pvin_dev->slope;
+	perf_pmu_data->flls_shared_mask = pvin_dev->flls_shared_mask;
+
+	gk20a_dbg_info(" Done");
+
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_vin.h b/drivers/gpu/nvgpu/clk/clk_vin.h
new file mode 100644
index 00000000..1ffd7971
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_vin.h
@@ -0,0 +1,56 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLKVIN_H_
+#define _CLKVIN_H_
+
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+#include "clk.h"
+
+struct vin_device;
+struct clk_pmupstate;
+
+struct avfsvinobjs {
+	struct boardobjgrp_e32 super;
+	u8 calibration_rev_vbios;
+	u8 calibration_rev_fused;
+	bool vin_is_disable_allowed;
+};
+typedef u32 vin_device_state_load(struct gk20a *g,
+			struct clk_pmupstate *clk, struct vin_device *pdev);
+
+struct vin_device {
+	struct boardobj super;
+	u8 id;
+	u8 volt_domain;
+	u8 volt_domain_vbios;
+	u32 slope;
+	u32 intercept;
+	u32 flls_shared_mask;
+
+	vin_device_state_load  *state_load;
+};
+
+/* get vin device object from descriptor table index*/
+#define CLK_GET_VIN_DEVICE(pvinobjs, dev_index)                               \
+	((struct vin_device *)BOARDOBJGRP_OBJ_GET_BY_IDX(                       \
+	((struct boardobjgrp *)&(pvinobjs->super.super)), (dev_index)))
+
+boardobj_construct construct_vindevice;
+boardobj_pmudatainit vindeviceinit_pmudata_super;
+
+u32 clk_vin_sw_setup(struct gk20a *g);
+u32 clk_vin_pmu_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index a52fab7b..822591ed 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -144,7 +144,7 @@ int gp106_init_hal(struct gk20a *g)
 
 	gops->privsecurity = 1;
 	gops->securegpccs = 1;
-
+	gops->pmupstate = true;
 	gp10b_init_mc(gops);
 	gp106_init_gr(gops);
 	gp106_init_ltc(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
index 0d4c0362..afabc943 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
@@ -126,4 +126,92 @@ static inline u32 fuse_status_opt_fbp_idx_v(u32 r, u32 i)
 {
 	return (r >> (0 + i*0)) & 0x1;
 }
+static inline u32 fuse_vin_cal_fuse_rev_r(void)
+{
+	return 0x0002164c;
+}
+static inline u32 fuse_vin_cal_fuse_rev_v(u32 r)
+{
+	return 0x3 & r;
+}
+static inline u32 fuse_vin_cal_gpc0_r(void)
+{
+	return 0x00021650;
+}
+static inline u32 fuse_vin_cal_gpc0_icpt_data_v(u32 r)
+{
+	return ((r & 0xFFFC000) >> 14);
+}
+static inline u32 fuse_vin_cal_gpc0_icpt_frac_size_v(void)
+{
+	return 2;
+}
+static inline u32 fuse_vin_cal_gpc0_slope_data_v(u32 r)
+{
+	return (r & 0x3FFF);
+}
+static inline u32 fuse_vin_cal_gpc0_slope_frac_size_v(void)
+{
+	return 10;
+}
+static inline u32 fuse_vin_cal_gpc1_delta_r(void)
+{
+	return 0x00021654;
+}
+static inline u32 fuse_vin_cal_gpc1_icpt_sign_f(void)
+{
+	return 0x400000;
+}
+static inline u32 fuse_vin_cal_gpc1_slope_sign_f(void)
+{
+	return 0x8000;
+}
+static inline u32 fuse_vin_cal_gpc1_icpt_data_v(u32 r)
+{
+	return ((r & 0x3FF000) >> 12);
+}
+static inline u32 fuse_vin_cal_gpc1_icpt_frac_size_v(void)
+{
+	return 2;
+}
+static inline u32 fuse_vin_cal_gpc1_slope_data_v(u32 r)
+{
+	return (r & 0x7FF);
+}
+static inline u32 fuse_vin_cal_gpc1_slope_frac_size_v(void)
+{
+	return 10;
+}
+static inline u32 fuse_vin_cal_gpc2_delta_r(void)
+{
+	return 0x00021658;
+}
+static inline u32 fuse_vin_cal_gpc3_delta_r(void)
+{
+	return 0x0002165c;
+}
+static inline u32 fuse_vin_cal_gpc4_delta_r(void)
+{
+	return 0x00021660;
+}
+static inline u32 fuse_vin_cal_gpc5_delta_r(void)
+{
+	return 0x00021664;
+}
+static inline u32 fuse_vin_cal_shared_delta_r(void)
+{
+	return 0x00021668;
+}
+static inline u32 fuse_vin_cal_sram_delta_r(void)
+{
+	return 0x0002166c;
+}
+static inline u32 fuse_vin_cal_sram_icpt_data_v(u32 r)
+{
+	return ((r & 0x3FF000) >> 12);
+}
+static inline u32 fuse_vin_cal_sram_icpt_frac_size_v(void)
+{
+	return 1;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index c4e44483..2699dd7a 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -193,7 +193,7 @@ int gp10b_init_hal(struct gk20a *g)
 	u32 val;
 
 	*gops = gp10b_ops;
-
+	gops->pmupstate = false;
 #ifdef CONFIG_TEGRA_ACR
 	if (platform->is_fmodel) {
 		gops->privsecurity = 0;
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
new file mode 100644
index 00000000..3af5bcf4
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -0,0 +1,411 @@
+/*
+ * vbios tables support
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef NVGPU_INCLUDE_BIOS_H
+#define NVGPU_INCLUDE_BIOS_H
+
+#include "gk20a/gk20a.h"
+
+#define BIOS_GET_FIELD(value, name) ((value & name##_MASK) >> name##_SHIFT)
+
+struct fll_descriptor_header {
+	u8 version;
+	u8 size;
+} __packed;
+
+#define FLL_DESCRIPTOR_HEADER_10_SIZE_4     4
+#define FLL_DESCRIPTOR_HEADER_10_SIZE_6     6
+
+struct fll_descriptor_header_10 {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u16 max_min_freq_mhz;
+} __packed;
+
+#define FLL_DESCRIPTOR_ENTRY_10_SIZE     15
+
+struct fll_descriptor_entry_10 {
+	u8 fll_device_type;
+	u8 clk_domain;
+	u8 fll_device_id;
+	u16 lut_params;
+	u8 vin_idx_logic;
+	u8 vin_idx_sram;
+	u16 fll_params;
+	u8 min_freq_vfe_idx;
+	u8 freq_ctrl_idx;
+	u16 ref_freq_mhz;
+	u16 ffr_cutoff_freq_mhz;
+} __packed;
+
+#define NV_FLL_DESC_FLL_PARAMS_MDIV_MASK 0x1F
+#define NV_FLL_DESC_FLL_PARAMS_MDIV_SHIFT 0
+
+#define NV_FLL_DESC_LUT_PARAMS_VSELECT_MASK 0x3
+#define NV_FLL_DESC_LUT_PARAMS_VSELECT_SHIFT 0
+
+#define NV_FLL_DESC_LUT_PARAMS_HYSTERISIS_THRESHOLD_MASK 0x3C
+#define NV_FLL_DESC_LUT_PARAMS_HYSTERISIS_THRESHOLD_SHIFT 2
+
+struct vin_descriptor_header_10 {
+	u8 version;
+	u8 header_sizee;
+	u8 entry_size;
+	u8 entry_count;
+	u8 flags0;
+	u32 vin_cal;
+} __packed;
+
+struct vin_descriptor_entry_10 {
+	u8 vin_device_type;
+	u8 volt_domain_vbios;
+	u8 vin_device_id;
+} __packed;
+
+#define NV_VIN_DESC_FLAGS0_VIN_CAL_REVISION_MASK 0x7
+#define NV_VIN_DESC_FLAGS0_VIN_CAL_REVISION_SHIFT 0
+
+#define NV_VIN_DESC_FLAGS0_DISABLE_CONTROL_MASK 0x8
+#define NV_VIN_DESC_FLAGS0_DISABLE_CONTROL_SHIFT 3
+
+#define NV_VIN_DESC_VIN_CAL_SLOPE_FRACTION_MASK 0x1FF
+#define NV_VIN_DESC_VIN_CAL_SLOPE_FRACTION_SHIFT 0
+
+#define NV_VIN_DESC_VIN_CAL_SLOPE_INTEGER_MASK 0x3C00
+#define NV_VIN_DESC_VIN_CAL_SLOPE_INTEGER_SHIFT  10
+
+#define NV_VIN_DESC_VIN_CAL_INTERCEPT_FRACTION_MASK 0x3C000
+#define NV_VIN_DESC_VIN_CAL_INTERCEPT_FRACTION_SHIFT 14
+
+#define NV_VIN_DESC_VIN_CAL_INTERCEPT_INTEGER_MASK 0xFFC0000
+#define NV_VIN_DESC_VIN_CAL_INTERCEPT_INTEGER_SHIFT 18
+
+#define VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07 0x07
+struct vbios_clocks_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u8 clocks_hal;
+	u16 cntr_sampling_periodms;
+} __packed;
+
+#define VBIOS_CLOCKS_TABLE_1X_ENTRY_SIZE_09                                 0x09
+struct vbios_clocks_table_1x_entry {
+	u8 flags0;
+	u16 param0;
+	u32 param1;
+	u16 param2;
+} __packed;
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_MASK                    0x1F
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_SHIFT                   0
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_FIXED                   0x00
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_MASTER                  0x01
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_FLAGS0_USAGE_SLAVE                   0x02
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST_MASK  0xFF
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_FIRST_SHIFT  0
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST_MASK  0xFF00
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM0_PROG_CLK_PROG_IDX_LAST_SHIFT 0x08
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_FIXED_FREQUENCY_MHZ_MASK        0xFFFF
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_FIXED_FREQUENCY_MHZ_SHIFT       0
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MIN_MHZ_MASK 0xFFFF
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MIN_MHZ_SHIFT 0
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MAX_MHZ_MASK 0xFFFF0000
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_MASTER_FREQ_OC_DELTA_MAX_MHZ_SHIFT 0
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_SLAVE_MASTER_DOMAIN_MASK         0xF
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM1_SLAVE_MASTER_DOMAIN_SHIFT       0
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX_MASK 0xF
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_UNAWARE_ORDERING_IDX_SHIFT 0
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_AWARE_ORDERING_IDX_MASK     0xF0
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_NOISE_AWARE_ORDERING_IDX_SHIFT   4
+
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING_MASK 0x100
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING_SHIFT 8
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING_FALSE   0x00
+#define NV_VBIOS_CLOCKS_TABLE_1X_ENTRY_PARAM2_PROG_FORCE_NOISE_UNAWARE_ORDERING_TRUE    0x01
+
+#define VBIOS_CLOCK_PROGRAMMING_TABLE_1X_HEADER_SIZE_08                              0x08
+struct vbios_clock_programming_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u8 slave_entry_size;
+	u8 slave_entry_count;
+	u8 vf_entry_size;
+	u8 vf_entry_count;
+} __packed;
+
+#define VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_05                      0x05
+#define VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_SIZE_0D                      0x0D
+struct vbios_clock_programming_table_1x_entry {
+	u8 flags0;
+	u16 freq_max_mhz;
+	u8 param0;
+	u8 param1;
+	u32 rsvd;
+	u32 rsvd1;
+} __packed;
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASK          0xF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_SHIFT         0
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_RATIO   0x00
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_MASTER_TABLE   0x01
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_TYPE_SLAVE          0x02
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_MASK          0x70
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_SHIFT         4
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_PLL          0x00
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_ONE_SOURCE   0x01
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_SOURCE_FLL        0x02
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_OVOC_ENABLED_MASK    0x80
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_OVOC_ENABLED_SHIFT   7
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_OVOC_ENABLED_FALSE  0x00
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_FLAGS0_OVOC_ENABLED_TRUE   0x01
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM0_PLL_PLL_INDEX_MASK   0xFF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM0_PLL_PLL_INDEX_SHIFT  0
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM1_PLL_FREQ_STEP_SIZE_MASK   0xFF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_ENTRY_PARAM1_PLL_FREQ_STEP_SIZE_SHIFT  0
+
+#define VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_SIZE_03              0x03
+struct vbios_clock_programming_table_1x_slave_entry {
+	u8 clk_dom_idx;
+	u16 param0;
+} __packed;
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_RATIO_RATIO_MASK 0xFF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_RATIO_RATIO_SHIFT 0
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_TABLE_FREQ_MASK  0x3FFF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_SLAVE_ENTRY_PARAM0_MASTER_TABLE_FREQ_SHIFT  0
+
+#define VBIOS_CLOCK_PROGRAMMING_TABLE_1X_VF_ENTRY_SIZE_02                   0x02
+struct vbios_clock_programming_table_1x_vf_entry {
+	u8 vfe_idx;
+	u8 param0;
+} __packed;
+
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_VF_ENTRY_PARAM0_FLL_GAIN_VFE_IDX_MASK 0xFF
+#define NV_VBIOS_CLOCK_PROGRAMMING_TABLE_1X_VF_ENTRY_PARAM0_FLL_GAIN_VFE_IDX_SHIFT 0
+
+struct vbios_vfe_3x_header_struct {
+	u8 version;
+	u8 header_size;
+	u8 vfe_var_entry_size;
+	u8 vfe_var_entry_count;
+	u8 vfe_equ_entry_size;
+	u8 vfe_equ_entry_count;
+	u8 polling_periodms;
+} __packed;
+
+#define VBIOS_VFE_3X_VAR_ENTRY_SIZE_11                                      0x11
+#define VBIOS_VFE_3X_VAR_ENTRY_SIZE_19                                      0x19
+struct vbios_vfe_3x_var_entry_struct {
+	u8 type;
+	u32 out_range_min;
+	u32 out_range_max;
+	u32 param0;
+	u32 param1;
+	u32 param2;
+	u32 param3;
+} __packed;
+
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_DISABLED                                0x00
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_FREQUENCY                        0x01
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_VOLTAGE                          0x02
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_TEMP                      0x03
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_FUSE                      0x04
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_PRODUCT                         0x05
+#define VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_SUM                             0x06
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_TH_CH_IDX_MASK 0xFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_TH_CH_IDX_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_POS_MASK 0xFF00
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_POS_SHIFT 8
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_NEG_MASK 0xFF0000
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_NEG_SHIFT 16
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_MASK 0xFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_VER_MASK 0xFF00
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_VER_SHIFT 8
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_EXPECTED_VER_MASK 0xFF0000
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_EXPECTED_VER_SHIFT 16
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_MASK 0x1000000
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_SHIFT 24
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_YES 0x00000001
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_NO 0x00000000
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_0_MASK 0xFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_0_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_1_MASK 0xFF00
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_1_SHIFT 8
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_0_MASK 0xFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_0_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_1_MASK 0xFF00
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_1_SHIFT 8
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_DEFAULT_VAL_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_DEFAULT_VAL_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_HW_CORRECTION_SCALE_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_HW_CORRECTION_SCALE_SHIFT 0
+
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_HW_CORRECTION_OFFSET_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_VAR_ENTRY_PAR1_SSFUSE_HW_CORRECTION_OFFSET_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_SIZE_17                                      0x17
+#define VBIOS_VFE_3X_EQU_ENTRY_SIZE_18                                      0x18
+
+struct vbios_vfe_3x_equ_entry_struct {
+	u8 type;
+	u8 var_idx;
+	u8 equ_idx_next;
+	u32 out_range_min;
+	u32 out_range_max;
+	u32 param0;
+	u32 param1;
+	u32 param2;
+	u8 param3;
+} __packed;
+
+
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_DISABLED                                0x00
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC                               0x01
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX                                  0x02
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_COMPARE                                 0x03
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC_FXP                           0x04
+#define VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX_FXP                              0x05
+
+#define VBIOS_VFE_3X_EQU_ENTRY_IDX_INVALID                                  0xFF
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_QUADRATIC_C0_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_QUADRATIC_C0_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_0_MASK 0xFF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_0_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_1_MASK 0xFF00
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_1_SHIFT 8
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_MASK 0x10000
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_SHIFT 16
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_MIN 0x00000000
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_MAX 0x00000001
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_COMPARE_CRIT_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR0_COMPARE_CRIT_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_QUADRATIC_C1_MASK 0xFFFFFFFF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_QUADRATIC_C1_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_TRUE_MASK 0xFF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_TRUE_SHIFT 0
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_FALSE_MASK 0xFF00
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_FALSE_SHIFT 8
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_MASK 0x70000
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_SHIFT 16
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_EQUAL 0x00000000
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER_EQ 0x00000001
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER 0x00000002
+
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_MASK 0xF
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_SHIFT 0
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_UNITLESS                     0x0
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_FREQ_MHZ                     0x1
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_UV                      0x2
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VF_GAIN                      0x3
+#define VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_DELTA_UV                0x4
+
+#define NV_VFIELD_DESC_SIZE_BYTE            0x00000000
+#define NV_VFIELD_DESC_SIZE_WORD            0x00000001
+#define NV_VFIELD_DESC_SIZE_DWORD           0x00000002
+#define VFIELD_SIZE(pvregentry) ((pvregentry->strap_reg_desc & 0x18) >> 3)
+
+#define NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID         0x00000000
+#define NV_PMU_BIOS_VFIELD_DESC_CODE_REG             0x00000001
+#define NV_PMU_BIOS_VFIELD_DESC_CODE_INDEX_REG       0x00000002
+
+#define NV_VFIELD_DESC_CODE_INVALID         NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID
+#define NV_VFIELD_DESC_CODE_REG             NV_PMU_BIOS_VFIELD_DESC_CODE_REG
+#define NV_VFIELD_DESC_CODE_INDEX_REG       NV_PMU_BIOS_VFIELD_DESC_CODE_INDEX_REG
+
+#define VFIELD_CODE(pvregentry) ((pvregentry->strap_reg_desc & 0xE0) >> 5)
+
+#define VFIELD_ID_STRAP_IDDQ                    0x09
+#define VFIELD_ID_STRAP_IDDQ_1                  0x0B
+
+#define VFIELD_REG_HEADER_SIZE 3
+struct vfield_reg_header {
+	u8 version;
+	u8 entry_size;
+	u8 count;
+} __packed;
+
+#define VBIOS_VFIELD_REG_TABLE_VERSION_1_0  0x10
+
+
+#define VFIELD_REG_ENTRY_SIZE 13
+struct vfield_reg_entry {
+	u8 strap_reg_desc;
+	u32 reg;
+	u32 reg_index;
+	u32 index;
+} __packed;
+
+#define VFIELD_HEADER_SIZE 3
+
+struct vfield_header {
+	u8 version;
+	u8 entry_size;
+	u8 count;
+} __packed;
+
+#define VBIOS_VFIELD_TABLE_VERSION_1_0  0x10
+
+#define VFIELD_BIT_START(ventry) (ventry.strap_desc & 0x1F)
+#define VFIELD_BIT_STOP(ventry)	((ventry.strap_desc & 0x3E0) >> 5)
+#define VFIELD_BIT_REG(ventry) ((ventry.strap_desc & 0x3C00) >> 10)
+
+#define VFIELD_ENTRY_SIZE 3
+
+struct vfield_entry {
+	u8 strap_id;
+	u16 strap_desc;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/nvgpu/perf/perf.c b/drivers/gpu/nvgpu/perf/perf.c
new file mode 100644
index 00000000..3821a8dc
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/perf.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "perf.h"
+#include "pmuif/gpmuifperf.h"
+#include "pmuif/gpmuifperfvfe.h"
+#include "gk20a/pmu_gk20a.h"
+
+struct perfrpc_pmucmdhandler_params {
+	struct nv_pmu_perf_rpc *prpccall;
+	u32 success;
+};
+
+static void perfrpc_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
+				  void *param, u32 handle, u32 status)
+{
+	struct perfrpc_pmucmdhandler_params *phandlerparams =
+		(struct perfrpc_pmucmdhandler_params *)param;
+
+	gk20a_dbg_info("");
+
+	if (msg->msg.perf.msg_type != NV_PMU_PERF_MSG_ID_RPC) {
+		gk20a_err(dev_from_gk20a(g),
+		"unsupported msg for VFE LOAD RPC %x",
+		msg->msg.perf.msg_type);
+		return;
+	}
+
+	if (phandlerparams->prpccall->b_supported)
+		phandlerparams->success = 1;
+}
+
+u32 perf_pmu_vfe_load(struct gk20a *g)
+{
+	struct pmu_cmd cmd;
+	struct pmu_msg msg;
+	struct pmu_payload payload = { {0} };
+	u32 status;
+	u32 seqdesc;
+	struct nv_pmu_perf_rpc rpccall = {0};
+	struct perfrpc_pmucmdhandler_params handler = {0};
+
+	rpccall.function = NV_PMU_PERF_RPC_ID_VFE_LOAD;
+	rpccall.params.vfe_load.b_load = true;
+	cmd.hdr.unit_id = PMU_UNIT_PERF;
+	cmd.hdr.size = (u32)sizeof(struct nv_pmu_perf_cmd) +
+		       (u32)sizeof(struct pmu_hdr);
+
+	cmd.cmd.perf.cmd_type = NV_PMU_PERF_CMD_ID_RPC;
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)&rpccall;
+	payload.in.size = (u32)sizeof(struct nv_pmu_perf_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_PERF_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)&rpccall;
+	payload.out.size = (u32)sizeof(struct nv_pmu_perf_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_PERF_MSG_RPC_ALLOC_OFFSET;
+
+	handler.prpccall = &rpccall;
+	handler.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			perfrpc_pmucmdhandler, (void *)&handler,
+			&seqdesc, ~0);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "unable to post perf RPC cmd %x",
+			  cmd.cmd.perf.cmd_type);
+		goto done;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handler.success, 1);
+
+	if (handler.success == 0) {
+		status = -EINVAL;
+		gk20a_err(dev_from_gk20a(g), "rpc call to load VFE failed");
+	}
+done:
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h
new file mode 100644
index 00000000..02aed7a6
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/perf.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PERF_H_
+#define _PERF_H_
+
+#include "vfe_equ.h"
+#include "vfe_var.h"
+#include "gk20a/gk20a.h"
+
+#define CTRL_PERF_VFE_VAR_TYPE_INVALID                               0x00
+#define CTRL_PERF_VFE_VAR_TYPE_DERIVED                               0x01
+#define CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT                       0x02
+#define CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM                           0x03
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE                                0x04
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY                      0x05
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED                         0x06
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE                    0x07
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP                    0x08
+#define CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE                        0x09
+
+#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_NONE                  0x00
+#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_VALUE                 0x01
+#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_OFFSET                0x02
+#define CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_SCALE                 0x03
+
+#define CTRL_PERF_VFE_EQU_TYPE_INVALID                               0x00
+#define CTRL_PERF_VFE_EQU_TYPE_COMPARE                               0x01
+#define CTRL_PERF_VFE_EQU_TYPE_MINMAX                                0x02
+#define CTRL_PERF_VFE_EQU_TYPE_QUADRATIC                             0x03
+
+#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS                       0x00
+#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_FREQ_MHZ                       0x01
+#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_UV                        0x02
+#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VF_GAIN                        0x03
+#define CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_DELTA_UV                  0x04
+
+#define CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT                      0x03
+
+#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_EQUAL                     0x00
+#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER_EQ                0x01
+#define CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER                   0x02
+
+struct perf_pmupstate {
+	struct vfe_vars vfe_varobjs;
+	struct vfe_equs vfe_equobjs;
+};
+
+u32 perf_pmu_vfe_load(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/perf/vfe_equ.c b/drivers/gpu/nvgpu/perf/vfe_equ.c
new file mode 100644
index 00000000..6630fb21
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/vfe_equ.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "perf.h"
+#include "vfe_equ.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e255.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifperf.h"
+#include "pmuif/gpmuifperfvfe.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static struct vfe_equ *construct_vfe_equ(struct gk20a *g, void *pargs);
+static u32 devinit_get_vfe_equ_table(struct gk20a *g,
+	struct vfe_equs *pequobjs);
+
+static u32 _vfe_equs_pmudatainit(struct gk20a *g,
+				 struct boardobjgrp *pboardobjgrp,
+				 struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e255(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for vfe equ 0x%x",
+			  status);
+		goto done;
+	}
+
+done:
+	return status;
+}
+
+static u32 _vfe_equs_pmudata_instget(struct gk20a *g,
+				     struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+				     struct nv_pmu_boardobj **ppboardobjpmudata,
+				     u8 idx)
+{
+	struct nv_pmu_perf_vfe_equ_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_perf_vfe_equ_boardobj_grp_set *)pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/* check whether pmuboardobjgrp has a valid boardobj in index */
+	if (idx >= CTRL_BOARDOBJGRP_E255_MAX_OBJECTS)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+u32 vfe_equ_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct vfe_equs *pvfeequobjs;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e255(&g->perf_pmu.vfe_equobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for clk domain, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->perf_pmu.vfe_equobjs.super.super;
+	pvfeequobjs = &(g->perf_pmu.vfe_equobjs);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, PERF, VFE_EQU);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			perf, PERF, vfe_equ, VFE_EQU);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit  = _vfe_equs_pmudatainit;
+	pboardobjgrp->pmudatainstget  = _vfe_equs_pmudata_instget;
+
+	status = devinit_get_vfe_equ_table(g, pvfeequobjs);
+	if (status)
+		goto done;
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 vfe_equ_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->perf_pmu.vfe_equobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 devinit_get_vfe_equ_table(struct gk20a *g,
+				     struct vfe_equs *pvfeequobjs)
+{
+	u32 status = 0;
+	u8 *vfeequs_tbl_ptr = NULL;
+	struct vbios_vfe_3x_header_struct vfeequs_tbl_header = { 0 };
+	struct vbios_vfe_3x_equ_entry_struct equ = { 0 };
+	u8 *vfeequs_tbl_entry_ptr = NULL;
+	u8 *rd_offset_ptr = NULL;
+	u32 index = 0;
+	struct vfe_equ *pequ;
+	u8 equ_type = 0;
+	u32 szfmt;
+	union {
+		struct boardobj board_obj;
+		struct vfe_equ super;
+		struct vfe_equ_compare compare;
+		struct vfe_equ_minmax minmax;
+		struct vfe_equ_quadratic quadratic;
+	} equ_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		vfeequs_tbl_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token,
+				CONTINUOUS_VIRTUAL_BINNING_TABLE);
+		if (vfeequs_tbl_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&vfeequs_tbl_header, vfeequs_tbl_ptr,
+			VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07);
+	if (vfeequs_tbl_header.header_size != VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (vfeequs_tbl_header.vfe_equ_entry_size ==
+			VBIOS_VFE_3X_EQU_ENTRY_SIZE_17)
+		szfmt = VBIOS_VFE_3X_EQU_ENTRY_SIZE_17;
+	else if (vfeequs_tbl_header.vfe_equ_entry_size ==
+			VBIOS_VFE_3X_EQU_ENTRY_SIZE_18)
+		szfmt = VBIOS_VFE_3X_EQU_ENTRY_SIZE_18;
+	else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	vfeequs_tbl_entry_ptr = vfeequs_tbl_ptr +
+		vfeequs_tbl_header.header_size +
+		(vfeequs_tbl_header.vfe_var_entry_count *
+		 vfeequs_tbl_header.vfe_var_entry_size);
+
+	for (index = 0;
+	     index < vfeequs_tbl_header.vfe_equ_entry_count;
+	     index++) {
+		memset(&equ, 0, sizeof(struct vbios_vfe_3x_equ_entry_struct));
+
+		rd_offset_ptr = vfeequs_tbl_entry_ptr +
+			(index * vfeequs_tbl_header.vfe_equ_entry_size);
+
+		memcpy(&equ, rd_offset_ptr, szfmt);
+
+		equ_data.super.var_idx = (u8)equ.var_idx;
+		equ_data.super.equ_idx_next =
+			(equ.equ_idx_next == VBIOS_VFE_3X_EQU_ENTRY_IDX_INVALID) ?
+			CTRL_BOARDOBJ_IDX_INVALID : (u8)equ.equ_idx_next;
+		equ_data.super.out_range_min = equ.out_range_min;
+		equ_data.super.out_range_max = equ.out_range_max;
+
+		switch (BIOS_GET_FIELD(equ.param3, VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE)) {
+		case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_UNITLESS:
+			equ_data.super.output_type =
+				CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_FREQ_MHZ:
+			equ_data.super.output_type =
+				CTRL_PERF_VFE_EQU_OUTPUT_TYPE_FREQ_MHZ;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_UV:
+			equ_data.super.output_type =
+				CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_UV;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VF_GAIN:
+			equ_data.super.output_type =
+				CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VF_GAIN;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_PAR3_OUTPUT_TYPE_VOLT_DELTA_UV:
+			equ_data.super.output_type =
+				CTRL_PERF_VFE_EQU_OUTPUT_TYPE_VOLT_DELTA_UV;
+			break;
+
+		default:
+			gk20a_err(dev_from_gk20a(g),
+				  "unrecognized output id @vfeequ index %d",
+				  index);
+			goto done;
+		}
+
+		switch ((u8)equ.type) {
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_DISABLED:
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC_FXP:
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX_FXP:
+			continue;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_QUADRATIC:
+			equ_type = CTRL_PERF_VFE_EQU_TYPE_QUADRATIC;
+			equ_data.quadratic.coeffs[0] = equ.param0;
+			equ_data.quadratic.coeffs[1] = equ.param1;
+			equ_data.quadratic.coeffs[2] = equ.param2;
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_MINMAX:
+			equ_type = CTRL_PERF_VFE_EQU_TYPE_MINMAX;
+			equ_data.minmax.b_max = BIOS_GET_FIELD(equ.param0,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT) &&
+				VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_CRIT_MAX;
+			equ_data.minmax.equ_idx0 = (u8)BIOS_GET_FIELD(
+				equ.param0,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_0);
+			equ_data.minmax.equ_idx1 = (u8)BIOS_GET_FIELD(
+				equ.param0,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR0_MINMAX_VFE_EQU_IDX_1);
+			break;
+
+		case VBIOS_VFE_3X_EQU_ENTRY_TYPE_COMPARE:
+		{
+			u8 cmp_func = (u8)BIOS_GET_FIELD(
+				equ.param1,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION);
+			equ_type = CTRL_PERF_VFE_EQU_TYPE_COMPARE;
+
+			switch (cmp_func) {
+			case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_EQUAL:
+				equ_data.compare.func_id =
+					CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_EQUAL;
+				break;
+
+			case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER_EQ:
+				equ_data.compare.func_id =
+					CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER_EQ;
+				break;
+			case VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_FUNCTION_GREATER:
+				equ_data.compare.func_id =
+					CTRL_PERF_VFE_EQU_COMPARE_FUNCTION_GREATER;
+				break;
+			default:
+				gk20a_err(dev_from_gk20a(g),
+					  "invalid vfe compare index %x type %x ",
+					  index, cmp_func);
+				status = -EINVAL;
+				goto done;
+			}
+			equ_data.compare.equ_idx_true = (u8)BIOS_GET_FIELD(
+				equ.param1,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_TRUE);
+			equ_data.compare.equ_idx_false = (u8)BIOS_GET_FIELD(
+				equ.param1,
+				VBIOS_VFE_3X_EQU_ENTRY_PAR1_COMPARE_VFE_EQU_IDX_FALSE);
+			equ_data.compare.criteria = equ.param0;
+			break;
+		}
+		default:
+			status = -EINVAL;
+			gk20a_err(dev_from_gk20a(g),
+				"Invalid equ[%d].type = 0x%x.",
+				index, (u8)equ.type);
+			goto done;
+		}
+
+		equ_data.board_obj.type = equ_type;
+		pequ = construct_vfe_equ(g, (void *)&equ_data);
+
+		if (pequ == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+			"error constructing vfe_equ boardobj %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&pvfeequobjs->super.super,
+					       (struct boardobj *)pequ, index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				  "error adding vfe_equ boardobj %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+	}
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+static u32 _vfe_equ_pmudatainit_super(struct gk20a *g,
+				      struct boardobj *board_obj_ptr,
+				      struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_equ *pvfe_equ;
+	struct nv_pmu_vfe_equ *pset;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_equ = (struct vfe_equ *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_equ *)
+		ppmudata;
+
+	pset->var_idx      = pvfe_equ->var_idx;
+	pset->equ_idx_next  = pvfe_equ->equ_idx_next;
+	pset->output_type  = pvfe_equ->output_type;
+	pset->out_range_min = pvfe_equ->out_range_min;
+	pset->out_range_max = pvfe_equ->out_range_max;
+
+	return status;
+}
+
+static u32 vfe_equ_construct_super(struct gk20a *g,
+				   struct boardobj **ppboardobj,
+				   u16 size, void *pargs)
+{
+	struct vfe_equ *pvfeequ;
+	struct vfe_equ *ptmpequ = (struct vfe_equ *)pargs;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj,
+		size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfeequ = (struct vfe_equ *)*ppboardobj;
+
+	pvfeequ->super.pmudatainit =
+			_vfe_equ_pmudatainit_super;
+
+	pvfeequ->var_idx = ptmpequ->var_idx;
+	pvfeequ->equ_idx_next = ptmpequ->equ_idx_next;
+	pvfeequ->output_type = ptmpequ->output_type;
+	pvfeequ->out_range_min = ptmpequ->out_range_min;
+	pvfeequ->out_range_max = ptmpequ->out_range_max;
+
+	return status;
+}
+
+static u32 _vfe_equ_pmudatainit_compare(struct gk20a *g,
+					struct boardobj *board_obj_ptr,
+					struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_equ_compare *pvfe_equ_compare;
+	struct nv_pmu_vfe_equ_compare *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_equ_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_equ_compare = (struct vfe_equ_compare *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_equ_compare *) ppmudata;
+
+	pset->func_id = pvfe_equ_compare->func_id;
+	pset->equ_idx_true = pvfe_equ_compare->equ_idx_true;
+	pset->equ_idx_false = pvfe_equ_compare->equ_idx_false;
+	pset->criteria = pvfe_equ_compare->criteria;
+
+	return status;
+}
+
+
+static u32 vfe_equ_construct_compare(struct gk20a *g,
+				     struct boardobj **ppboardobj,
+				     u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_equ_compare *pvfeequ;
+	struct vfe_equ_compare *ptmpequ =
+			(struct vfe_equ_compare *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_EQU_TYPE_COMPARE)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_EQU_TYPE_COMPARE);
+	status = vfe_equ_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfeequ = (struct vfe_equ_compare *)*ppboardobj;
+
+	pvfeequ->super.super.pmudatainit =
+			_vfe_equ_pmudatainit_compare;
+
+	pvfeequ->func_id = ptmpequ->func_id;
+	pvfeequ->equ_idx_true = ptmpequ->equ_idx_true;
+	pvfeequ->equ_idx_false = ptmpequ->equ_idx_false;
+	pvfeequ->criteria = ptmpequ->criteria;
+
+
+	return status;
+}
+
+static u32 _vfe_equ_pmudatainit_minmax(struct gk20a *g,
+				       struct boardobj *board_obj_ptr,
+				       struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_equ_minmax *pvfe_equ_minmax;
+	struct nv_pmu_vfe_equ_minmax *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_equ_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_equ_minmax = (struct vfe_equ_minmax *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_equ_minmax *)
+		ppmudata;
+
+	pset->b_max = pvfe_equ_minmax->b_max;
+	pset->equ_idx0 = pvfe_equ_minmax->equ_idx0;
+	pset->equ_idx1 = pvfe_equ_minmax->equ_idx1;
+
+	return status;
+}
+
+static u32 vfe_equ_construct_minmax(struct gk20a *g,
+				    struct boardobj **ppboardobj,
+				    u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_equ_minmax *pvfeequ;
+	struct vfe_equ_minmax *ptmpequ =
+			(struct vfe_equ_minmax *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_EQU_TYPE_MINMAX)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_EQU_TYPE_MINMAX);
+	status = vfe_equ_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfeequ = (struct vfe_equ_minmax *)*ppboardobj;
+
+	pvfeequ->super.super.pmudatainit =
+			_vfe_equ_pmudatainit_minmax;
+	pvfeequ->b_max = ptmpequ->b_max;
+	pvfeequ->equ_idx0 = ptmpequ->equ_idx0;
+	pvfeequ->equ_idx1 = ptmpequ->equ_idx1;
+
+	return status;
+}
+
+static u32 _vfe_equ_pmudatainit_quadratic(struct gk20a *g,
+					  struct boardobj *board_obj_ptr,
+					  struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_equ_quadratic *pvfe_equ_quadratic;
+	struct nv_pmu_vfe_equ_quadratic *pset;
+	u32 i;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_equ_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_equ_quadratic = (struct vfe_equ_quadratic *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_equ_quadratic *) ppmudata;
+
+	for (i = 0; i < CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT; i++)
+		pset->coeffs[i] = pvfe_equ_quadratic->coeffs[i];
+
+	return status;
+}
+
+static u32 vfe_equ_construct_quadratic(struct gk20a *g,
+				       struct boardobj **ppboardobj,
+				       u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_equ_quadratic *pvfeequ;
+	struct vfe_equ_quadratic *ptmpequ =
+			(struct vfe_equ_quadratic *)pargs;
+	u32 status = 0;
+	u32 i;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_EQU_TYPE_QUADRATIC)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_EQU_TYPE_QUADRATIC);
+	status = vfe_equ_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfeequ = (struct vfe_equ_quadratic *)*ppboardobj;
+
+	pvfeequ->super.super.pmudatainit =
+			_vfe_equ_pmudatainit_quadratic;
+
+	for (i = 0; i < CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT; i++)
+		pvfeequ->coeffs[i] = ptmpequ->coeffs[i];
+
+	return status;
+}
+
+static struct vfe_equ *construct_vfe_equ(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+
+	switch (BOARDOBJ_GET_TYPE(pargs)) {
+	case CTRL_PERF_VFE_EQU_TYPE_COMPARE:
+		status = vfe_equ_construct_compare(g, &board_obj_ptr,
+			sizeof(struct vfe_equ_compare), pargs);
+		break;
+
+	case CTRL_PERF_VFE_EQU_TYPE_MINMAX:
+		status = vfe_equ_construct_minmax(g, &board_obj_ptr,
+			sizeof(struct vfe_equ_minmax), pargs);
+		break;
+
+	case CTRL_PERF_VFE_EQU_TYPE_QUADRATIC:
+		status = vfe_equ_construct_quadratic(g, &board_obj_ptr,
+			sizeof(struct vfe_equ_quadratic), pargs);
+		break;
+
+	default:
+		return NULL;
+
+	}
+
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info(" Done");
+
+	return (struct vfe_equ *)board_obj_ptr;
+}
diff --git a/drivers/gpu/nvgpu/perf/vfe_equ.h b/drivers/gpu/nvgpu/perf/vfe_equ.h
new file mode 100644
index 00000000..8aaddccd
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/vfe_equ.h
@@ -0,0 +1,76 @@
+/*
+ * general perf structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _VFE_EQU_H_
+#define _VFE_EQU_H_
+
+#include "boardobj/boardobjgrp.h"
+#include "perf/vfe_var.h"
+#include "pmuif/gpmuifperf.h"
+#include "pmuif/gpmuifperfvfe.h"
+
+u32 vfe_equ_sw_setup(struct gk20a *g);
+u32 vfe_equ_pmu_setup(struct gk20a *g);
+
+#define VFE_EQU_GET(_pperf, _idx)                                              \
+	((struct vfe_equ *)BOARDOBJGRP_OBJ_GET_BY_IDX(                         \
+		&((_pperf)->vfe.equs.super.super), (_idx)))
+
+#define VFE_EQU_IDX_IS_VALID(_pperf, _idx)                                     \
+	boardobjgrp_idxisvalid(&((_pperf)->vfe.equs.super.super), (_idx))
+
+#define VFE_EQU_OUTPUT_TYPE_IS_VALID(_pperf, _idx, _outputtype)                \
+	(VFE_EQU_IDX_IS_VALID((_pperf), (_idx)) &&                             \
+	((_outputtype) != CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS) &&           \
+	((VFE_EQU_GET((_pperf), (_idx))->outputtype == (_outputtype)) ||       \
+	(VFE_EQU_GET((_pperf), (_idx))->outputtype ==                          \
+	CTRL_PERF_VFE_EQU_OUTPUT_TYPE_UNITLESS)))
+
+struct vfe_equ {
+	struct boardobj super;
+	u8 var_idx;
+	u8 equ_idx_next;
+	u8 output_type;
+	u32 out_range_min;
+	u32 out_range_max;
+
+	bool b_is_dynamic_valid;
+	bool b_is_dynamic;
+};
+
+struct vfe_equs {
+	struct boardobjgrp_e255 super;
+};
+
+struct vfe_equ_compare {
+	struct vfe_equ super;
+	u8 func_id;
+	u8 equ_idx_true;
+	u8 equ_idx_false;
+	u32 criteria;
+};
+
+struct vfe_equ_minmax {
+	struct vfe_equ super;
+	bool b_max;
+	u8 equ_idx0;
+	u8 equ_idx1;
+};
+
+struct vfe_equ_quadratic {
+	struct vfe_equ super;
+	u32   coeffs[CTRL_PERF_VFE_EQU_QUADRATIC_COEFF_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/nvgpu/perf/vfe_var.c b/drivers/gpu/nvgpu/perf/vfe_var.c
new file mode 100644
index 00000000..90963478
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/vfe_var.c
@@ -0,0 +1,1048 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "perf.h"
+#include "vfe_var.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifperf.h"
+#include "pmuif/gpmuifperfvfe.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 devinit_get_vfe_var_table(struct gk20a *g,
+				     struct vfe_vars *pvarobjs);
+static u32 vfe_var_construct_single(struct gk20a *g,
+				    struct boardobj **ppboardobj,
+				    u16 size, void *pargs);
+
+static u32 _vfe_vars_pmudatainit(struct gk20a *g,
+				 struct boardobjgrp *pboardobjgrp,
+				 struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_perf_vfe_var_boardobjgrp_set_header *pset =
+		(struct nv_pmu_perf_vfe_var_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct vfe_vars *pvars = (struct vfe_vars *)pboardobjgrp;
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error updating pmu boardobjgrp for vfe var 0x%x",
+			 status);
+		goto done;
+	}
+	pset->polling_periodms = pvars->polling_periodms;
+
+done:
+	return status;
+}
+
+static u32 _vfe_vars_pmudata_instget(struct gk20a *g,
+				     struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+				     struct nv_pmu_boardobj **ppboardobjpmudata,
+				     u8 idx)
+{
+	struct nv_pmu_perf_vfe_var_boardobj_grp_set  *pgrp_set =
+		(struct nv_pmu_perf_vfe_var_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (idx >= CTRL_BOARDOBJGRP_E32_MAX_OBJECTS)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _vfe_vars_pmustatus_instget(struct gk20a *g, void *pboardobjgrppmu,
+	struct nv_pmu_boardobj_query **ppboardobjpmustatus, u8 idx)
+{
+	struct nv_pmu_perf_vfe_var_boardobj_grp_get_status *pgrp_get_status =
+		(struct nv_pmu_perf_vfe_var_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	if (((u32)BIT(idx) &
+		pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+
+u32 vfe_var_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct vfe_vars *pvfevarobjs;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->perf_pmu.vfe_varobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for clk domain, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->perf_pmu.vfe_varobjs.super.super;
+	pvfevarobjs = &g->perf_pmu.vfe_varobjs;
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, PERF, VFE_VAR);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			perf, PERF, vfe_var, VFE_VAR);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			 status);
+		goto done;
+	}
+
+	pboardobjgrp->pmudatainit  = _vfe_vars_pmudatainit;
+	pboardobjgrp->pmudatainstget  = _vfe_vars_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _vfe_vars_pmustatus_instget;
+
+	status = devinit_get_vfe_var_table(g, pvfevarobjs);
+	if (status)
+		goto done;
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+				&g->perf_pmu.vfe_varobjs.super.super,
+				perf, PERF, vfe_var, VFE_VAR);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+		"error constructing PMU_BOARDOBJ_CMD_GRP_GET_STATUS interface - 0x%x",
+			status);
+		goto done;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 vfe_var_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->perf_pmu.vfe_varobjs.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+u32 dev_init_get_vfield_info(struct gk20a *g,
+	struct vfe_var_single_sensed_fuse *pvfevar)
+{
+	u8 *vfieldtableptr = NULL;
+	u32 vfieldheadersize = VFIELD_HEADER_SIZE;
+	u8 *vfieldregtableptr = NULL;
+	u32 vfieldregheadersize = VFIELD_REG_HEADER_SIZE;
+	u32 i;
+	u32 oldindex = 0xFFFFFFFF;
+	u32 currindex;
+	struct vfield_reg_header vregheader;
+	struct vfield_reg_entry vregentry;
+	struct vfield_header vheader;
+	struct vfield_entry ventry;
+	union nv_pmu_bios_vfield_register_segment *psegment = NULL;
+	u8 *psegmentcount = NULL;
+	u32 status = 0;
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		vfieldregtableptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.virt_token, VP_FIELD_REGISTER);
+		if (vfieldregtableptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+
+		vfieldtableptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.virt_token, VP_FIELD_TABLE);
+		if (vfieldtableptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&vregheader, vfieldregtableptr, VFIELD_REG_HEADER_SIZE);
+
+	if (vregheader.version != VBIOS_VFIELD_REG_TABLE_VERSION_1_0) {
+		gk20a_err(dev_from_gk20a(g), "invalid vreg header version");
+		goto done;
+	}
+
+	memcpy(&vheader, vfieldtableptr, VFIELD_HEADER_SIZE);
+
+	if (vregheader.version != VBIOS_VFIELD_TABLE_VERSION_1_0) {
+		gk20a_err(dev_from_gk20a(g), "invalid vfield header version");
+		goto done;
+	}
+
+	pvfevar->vfield_info.fuse.segment_count = 0;
+	pvfevar->vfield_ver_info.fuse.segment_count = 0;
+	for (i = 0; i < (u32)vheader.count; i++) {
+		memcpy(&ventry, vfieldtableptr + vfieldheadersize +
+			(i * vheader.entry_size),
+			vheader.entry_size);
+
+		currindex = VFIELD_BIT_REG(ventry);
+		if (currindex != oldindex) {
+
+			memcpy(&vregentry, vfieldregtableptr +
+				vfieldregheadersize +
+				(currindex * vregheader.entry_size),
+				vregheader.entry_size);
+			oldindex = currindex;
+		}
+
+		if (pvfevar->vfield_info.v_field_id == ventry.strap_id) {
+			psegmentcount =
+				&(pvfevar->vfield_info.fuse.segment_count);
+			psegment =
+				&(pvfevar->vfield_info.fuse.segments[*psegmentcount]);
+			if (*psegmentcount > NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX) {
+				status = -EINVAL;
+				goto done;
+			}
+		} else if (pvfevar->vfield_ver_info.v_field_id_ver == ventry.strap_id) {
+			psegmentcount =
+				&(pvfevar->vfield_ver_info.fuse.segment_count);
+			psegment =
+				&(pvfevar->vfield_ver_info.fuse.segments[*psegmentcount]);
+			if (*psegmentcount > NV_PMU_VFE_VAR_SINGLE_SENSED_FUSE_SEGMENTS_MAX) {
+				status = -EINVAL;
+				goto done;
+			}
+		} else {
+			continue;
+		}
+
+		psegment->super.high_bit = (u8)(VFIELD_BIT_STOP(ventry));
+		psegment->super.low_bit = (u8)(VFIELD_BIT_START(ventry));
+		switch (VFIELD_CODE((&vregentry))) {
+		case NV_VFIELD_DESC_CODE_REG:
+			psegment->reg.super.type =
+				NV_PMU_BIOS_VFIELD_DESC_CODE_REG;
+			psegment->reg.addr = vregentry.reg;
+			break;
+
+		case NV_VFIELD_DESC_CODE_INDEX_REG:
+			psegment->index_reg.super.type =
+				NV_PMU_BIOS_VFIELD_DESC_CODE_INDEX_REG;
+			psegment->index_reg.addr = vregentry.reg;
+			psegment->index_reg.index = vregentry.index;
+			psegment->index_reg.reg_index = vregentry.reg_index;
+			break;
+
+		default:
+			psegment->super.type =
+				NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID;
+			status = -EINVAL;
+			goto done;
+		}
+
+		if (VFIELD_SIZE((&vregentry)) != NV_VFIELD_DESC_SIZE_DWORD) {
+			psegment->super.type =
+				NV_PMU_BIOS_VFIELD_DESC_CODE_INVALID;
+			return -EINVAL;
+		}
+		(*psegmentcount)++;
+	}
+
+done:
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_super(struct gk20a *g,
+				      struct boardobj *board_obj_ptr,
+				      struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var *pvfe_var;
+	struct nv_pmu_vfe_var *pset;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var = (struct vfe_var *)board_obj_ptr;
+	pset = (struct nv_pmu_vfe_var *) ppmudata;
+
+	pset->out_range_min = pvfe_var->out_range_min;
+	pset->out_range_max = pvfe_var->out_range_max;
+
+	return status;
+}
+
+static u32 vfe_var_construct_super(struct gk20a *g,
+				   struct boardobj **ppboardobj,
+				   u16 size, void *pargs)
+{
+	struct vfe_var *pvfevar;
+	struct vfe_var *ptmpvar = (struct vfe_var *)pargs;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var *)*ppboardobj;
+
+	pvfevar->super.pmudatainit =
+			_vfe_var_pmudatainit_super;
+
+	pvfevar->out_range_min = ptmpvar->out_range_min;
+	pvfevar->out_range_max = ptmpvar->out_range_max;
+	pvfevar->b_is_dynamic_valid = false;
+
+	gk20a_dbg_info("");
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_derived(struct gk20a *g,
+					struct boardobj *board_obj_ptr,
+					struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_super(g, board_obj_ptr, ppmudata);
+
+	return status;
+}
+
+static u32 vfe_var_construct_derived(struct gk20a *g,
+				     struct boardobj **ppboardobj,
+				     u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	u32 status = 0;
+	struct vfe_var_derived *pvfevar;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED);
+	status = vfe_var_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_derived *)*ppboardobj;
+
+	pvfevar->super.super.pmudatainit =
+			_vfe_var_pmudatainit_derived;
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_derived_product(struct gk20a *g,
+						struct boardobj *board_obj_ptr,
+						struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var_derived_product *pvfe_var_derived_product;
+	struct nv_pmu_vfe_var_derived_product *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_derived(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var_derived_product =
+		(struct vfe_var_derived_product *)board_obj_ptr;
+	pset = (struct nv_pmu_vfe_var_derived_product *)ppmudata;
+
+	pset->var_idx0 = pvfe_var_derived_product->var_idx0;
+	pset->var_idx1 = pvfe_var_derived_product->var_idx1;
+
+	return status;
+}
+
+static u32 vfe_var_construct_derived_product(struct gk20a *g,
+					     struct boardobj **ppboardobj,
+					     u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_derived_product *pvfevar;
+	struct vfe_var_derived_product *ptmpvar =
+			(struct vfe_var_derived_product *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT);
+	status = vfe_var_construct_derived(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_derived_product *)*ppboardobj;
+
+	pvfevar->super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_derived_product;
+
+	pvfevar->var_idx0 = ptmpvar->var_idx0;
+	pvfevar->var_idx1 = ptmpvar->var_idx1;
+
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_derived_sum(struct gk20a *g,
+					    struct boardobj *board_obj_ptr,
+					    struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var_derived_sum *pvfe_var_derived_sum;
+	struct nv_pmu_vfe_var_derived_sum *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_derived(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var_derived_sum = (struct vfe_var_derived_sum *)board_obj_ptr;
+	pset = (struct nv_pmu_vfe_var_derived_sum *)ppmudata;
+
+	pset->var_idx0 = pvfe_var_derived_sum->var_idx0;
+	pset->var_idx1 = pvfe_var_derived_sum->var_idx1;
+
+	return status;
+}
+
+static u32 vfe_var_construct_derived_sum(struct gk20a *g,
+					 struct boardobj **ppboardobj,
+					 u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_derived_sum *pvfevar;
+	struct vfe_var_derived_sum *ptmpvar =
+			(struct vfe_var_derived_sum *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM);
+	status = vfe_var_construct_derived(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_derived_sum *)*ppboardobj;
+
+	pvfevar->super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_derived_sum;
+
+	pvfevar->var_idx0 = ptmpvar->var_idx0;
+	pvfevar->var_idx1 = ptmpvar->var_idx1;
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single(struct gk20a *g,
+				       struct boardobj *board_obj_ptr,
+				       struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var_single *pvfe_var_single;
+	struct nv_pmu_vfe_var_single *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var_single = (struct vfe_var_single *)board_obj_ptr;
+	pset = (struct nv_pmu_vfe_var_single *)
+		ppmudata;
+
+	pset->override_type = pvfe_var_single->override_type;
+	pset->override_value = pvfe_var_single->override_value;
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single_frequency(struct gk20a *g,
+						 struct boardobj *board_obj_ptr,
+						 struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_single(g, board_obj_ptr, ppmudata);
+
+	return status;
+}
+
+static u32 vfe_var_construct_single_frequency(struct gk20a *g,
+					      struct boardobj **ppboardobj,
+					      u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single_frequency *pvfevar;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY);
+	status = vfe_var_construct_single(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single_frequency *)*ppboardobj;
+
+	pvfevar->super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_single_frequency;
+
+	pvfevar->super.super.b_is_dynamic = false;
+	pvfevar->super.super.b_is_dynamic_valid = true;
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single_sensed(struct gk20a *g,
+					      struct boardobj *board_obj_ptr,
+					      struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_single(g, board_obj_ptr, ppmudata);
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single_sensed_fuse(struct gk20a *g,
+						   struct boardobj *board_obj_ptr,
+						   struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var_single_sensed_fuse *pvfe_var_single_sensed_fuse;
+	struct nv_pmu_vfe_var_single_sensed_fuse *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_single_sensed(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var_single_sensed_fuse =
+		(struct vfe_var_single_sensed_fuse *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_var_single_sensed_fuse *)
+		ppmudata;
+
+	memcpy(&pset->vfield_info, &pvfe_var_single_sensed_fuse->vfield_info,
+		sizeof(struct nv_pmu_vfe_var_single_sensed_fuse_vfield_info));
+
+	memcpy(&pset->vfield_ver_info,
+		&pvfe_var_single_sensed_fuse->vfield_ver_info,
+		sizeof(struct nv_pmu_vfe_var_single_sensed_fuse_ver_vfield_info));
+
+	memcpy(&pset->override_info,
+		&pvfe_var_single_sensed_fuse->override_info,
+		sizeof(struct nv_pmu_vfe_var_single_sensed_fuse_override_info));
+
+	return status;
+}
+
+static u32 vfe_var_construct_single_sensed(struct gk20a *g,
+					   struct boardobj **ppboardobj,
+					   u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single_sensed *pvfevar;
+
+	u32 status = 0;
+
+	gk20a_dbg_info(" ");
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED);
+	status = vfe_var_construct_single(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single_sensed *)*ppboardobj;
+
+	pvfevar->super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_single_sensed;
+
+	gk20a_dbg_info("Done");
+
+	return status;
+}
+
+static u32 vfe_var_construct_single_sensed_fuse(struct gk20a *g,
+						struct boardobj **ppboardobj,
+						u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single_sensed_fuse *pvfevar;
+	struct vfe_var_single_sensed_fuse *ptmpvar =
+			(struct vfe_var_single_sensed_fuse *)pargs;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE);
+	status = vfe_var_construct_single_sensed(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single_sensed_fuse *)*ppboardobj;
+
+	pvfevar->super.super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_single_sensed_fuse;
+
+	pvfevar->vfield_info.v_field_id = ptmpvar->vfield_info.v_field_id;
+	pvfevar->vfield_info.fuse_val_default =
+		ptmpvar->vfield_info.fuse_val_default;
+	pvfevar->vfield_info.hw_correction_scale =
+		ptmpvar->vfield_info.hw_correction_scale;
+	pvfevar->vfield_info.hw_correction_offset =
+		ptmpvar->vfield_info.hw_correction_offset;
+	pvfevar->vfield_ver_info.v_field_id_ver =
+		ptmpvar->vfield_ver_info.v_field_id_ver;
+	pvfevar->vfield_ver_info.ver_expected =
+		ptmpvar->vfield_ver_info.ver_expected;
+	pvfevar->vfield_ver_info.b_use_default_on_ver_check_fail =
+		ptmpvar->vfield_ver_info.b_use_default_on_ver_check_fail;
+	pvfevar->b_version_check_done = false;
+
+	pvfevar->super.super.super.b_is_dynamic = false;
+	pvfevar->super.super.super.b_is_dynamic_valid = true;
+
+	dev_init_get_vfield_info(g, pvfevar);
+	/*check whether fuse segment got initialized*/
+	if (pvfevar->vfield_info.fuse.segment_count == 0) {
+		gk20a_err(dev_from_gk20a(g), "unable to get fuse reg info %x",
+			pvfevar->vfield_info.v_field_id);
+		return -EINVAL;
+	}
+	if (pvfevar->vfield_ver_info.fuse.segment_count == 0) {
+		gk20a_err(dev_from_gk20a(g), "unable to get fuse reg info %x",
+			pvfevar->vfield_ver_info.v_field_id_ver);
+		return -EINVAL;
+	}
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single_sensed_temp(struct gk20a *g,
+						   struct boardobj *board_obj_ptr,
+						   struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct vfe_var_single_sensed_temp *pvfe_var_single_sensed_temp;
+	struct nv_pmu_vfe_var_single_sensed_temp *pset;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_single_sensed(g, board_obj_ptr, ppmudata);
+	if (status != 0)
+		return status;
+
+	pvfe_var_single_sensed_temp =
+		(struct vfe_var_single_sensed_temp *)board_obj_ptr;
+
+	pset = (struct nv_pmu_vfe_var_single_sensed_temp *)
+		ppmudata;
+	pset->therm_channel_index =
+		 pvfe_var_single_sensed_temp->therm_channel_index;
+	pset->temp_hysteresis_positive =
+		 pvfe_var_single_sensed_temp->temp_hysteresis_positive;
+	pset->temp_hysteresis_negative =
+		 pvfe_var_single_sensed_temp->temp_hysteresis_negative;
+	pset->temp_default =
+		 pvfe_var_single_sensed_temp->temp_default;
+	return status;
+}
+
+static u32 vfe_var_construct_single_sensed_temp(struct gk20a *g,
+						struct boardobj **ppboardobj,
+						u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single_sensed_temp *pvfevar;
+	struct vfe_var_single_sensed_temp *ptmpvar =
+			(struct vfe_var_single_sensed_temp *)pargs;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP);
+	status = vfe_var_construct_single_sensed(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single_sensed_temp *)*ppboardobj;
+
+	pvfevar->super.super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_single_sensed_temp;
+
+	pvfevar->therm_channel_index =
+		 ptmpvar->therm_channel_index;
+	pvfevar->temp_hysteresis_positive =
+		 ptmpvar->temp_hysteresis_positive;
+	pvfevar->temp_hysteresis_negative =
+		 ptmpvar->temp_hysteresis_negative;
+	pvfevar->temp_default =
+		 ptmpvar->temp_default;
+	pvfevar->super.super.super.b_is_dynamic = false;
+	pvfevar->super.super.super.b_is_dynamic_valid = true;
+
+	return status;
+}
+
+static u32 _vfe_var_pmudatainit_single_voltage(struct gk20a *g,
+					       struct boardobj *board_obj_ptr,
+					       struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	status = _vfe_var_pmudatainit_single(g, board_obj_ptr, ppmudata);
+
+	return status;
+}
+
+static u32 vfe_var_construct_single_voltage(struct gk20a *g,
+					    struct boardobj **ppboardobj,
+					    u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single_voltage *pvfevar;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE)
+		return -EINVAL;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE);
+	status = vfe_var_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single_voltage *)*ppboardobj;
+
+	pvfevar->super.super.super.pmudatainit =
+			_vfe_var_pmudatainit_single_voltage;
+
+	pvfevar->super.super.b_is_dynamic = false;
+	pvfevar->super.super.b_is_dynamic_valid = true;
+
+	return status;
+}
+
+static struct vfe_var *construct_vfe_var(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+	switch (BOARDOBJ_GET_TYPE(pargs)) {
+	case CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT:
+		status = vfe_var_construct_derived_product(g, &board_obj_ptr,
+			sizeof(struct vfe_var_derived_product), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM:
+		status = vfe_var_construct_derived_sum(g, &board_obj_ptr,
+			sizeof(struct vfe_var_derived_sum), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY:
+		status = vfe_var_construct_single_frequency(g, &board_obj_ptr,
+			sizeof(struct vfe_var_single_frequency), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE:
+		status = vfe_var_construct_single_sensed_fuse(g, &board_obj_ptr,
+			sizeof(struct vfe_var_single_sensed_fuse), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP:
+		status = vfe_var_construct_single_sensed_temp(g, &board_obj_ptr,
+			sizeof(struct vfe_var_single_sensed_temp), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE:
+		status = vfe_var_construct_single_voltage(g, &board_obj_ptr,
+			sizeof(struct vfe_var_single_voltage), pargs);
+		break;
+
+	case CTRL_PERF_VFE_VAR_TYPE_DERIVED:
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED:
+	case CTRL_PERF_VFE_VAR_TYPE_SINGLE:
+	default:
+		return NULL;
+	}
+
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info("done");
+
+	return (struct vfe_var *)board_obj_ptr;
+}
+
+static u32 devinit_get_vfe_var_table(struct gk20a *g,
+				     struct vfe_vars *pvfevarobjs)
+{
+	u32 status = 0;
+	u8 *vfevars_tbl_ptr = NULL;
+	struct vbios_vfe_3x_header_struct vfevars_tbl_header = { 0 };
+	struct vbios_vfe_3x_var_entry_struct var = { 0 };
+	u8 *vfevars_tbl_entry_ptr = NULL;
+	u8 *rd_offset_ptr = NULL;
+	u32 index = 0;
+	struct vfe_var *pvar;
+	u8 var_type;
+	u32 szfmt;
+	union {
+		struct boardobj board_obj;
+		struct vfe_var super;
+		struct vfe_var_derived_product derived_product;
+		struct vfe_var_derived_sum derived_sum;
+		struct vfe_var_single_sensed_fuse single_sensed_fuse;
+		struct vfe_var_single_sensed_temp single_sensed_temp;
+	} var_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		vfevars_tbl_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token,
+				CONTINUOUS_VIRTUAL_BINNING_TABLE);
+		if (vfevars_tbl_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&vfevars_tbl_header, vfevars_tbl_ptr,
+	       VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07);
+	if (vfevars_tbl_header.header_size !=
+	    VBIOS_CLOCKS_TABLE_1X_HEADER_SIZE_07){
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (vfevars_tbl_header.vfe_var_entry_size ==
+			VBIOS_VFE_3X_VAR_ENTRY_SIZE_19)
+		szfmt = VBIOS_VFE_3X_VAR_ENTRY_SIZE_19;
+	else if (vfevars_tbl_header.vfe_var_entry_size ==
+			VBIOS_VFE_3X_VAR_ENTRY_SIZE_11)
+		szfmt = VBIOS_VFE_3X_VAR_ENTRY_SIZE_11;
+	else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	/* Read table entries*/
+	vfevars_tbl_entry_ptr = vfevars_tbl_ptr +
+		vfevars_tbl_header.header_size;
+
+	for (index = 0;
+	     index < vfevars_tbl_header.vfe_var_entry_count;
+	     index++) {
+		rd_offset_ptr = vfevars_tbl_entry_ptr +
+				(index * vfevars_tbl_header.vfe_var_entry_size);
+		memcpy(&var, rd_offset_ptr, szfmt);
+
+		var_data.super.out_range_min = var.out_range_min;
+		var_data.super.out_range_max = var.out_range_max;
+
+		var_data.super.out_range_min = var.out_range_min;
+		var_data.super.out_range_max = var.out_range_max;
+
+		switch ((u8)var.type) {
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DISABLED:
+			continue;
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_FREQUENCY:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_SINGLE_FREQUENCY;
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_VOLTAGE:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_SINGLE_VOLTAGE;
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_TEMP:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP;
+			var_data.single_sensed_temp.temp_default = 105;
+			var_data.single_sensed_temp.therm_channel_index =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_TH_CH_IDX);
+			var_data.single_sensed_temp.temp_hysteresis_positive =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_POS) << 5;
+			var_data.single_sensed_temp.temp_hysteresis_negative =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_HYS_NEG) << 5;
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_FUSE:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_FUSE;
+			var_data.single_sensed_fuse.vfield_info.v_field_id =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID);
+			var_data.single_sensed_fuse.vfield_ver_info.v_field_id_ver =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_VFIELD_ID_VER);
+			var_data.single_sensed_fuse.vfield_ver_info.ver_expected =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_EXPECTED_VER);
+			var_data.single_sensed_fuse.vfield_ver_info.b_use_default_on_ver_check_fail =
+				(BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL) &&
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSFUSE_USE_DEFAULT_ON_VER_CHECK_FAIL_YES);
+			var_data.single_sensed_fuse.vfield_info.fuse_val_default =
+				var.param1;
+			if (szfmt >= VBIOS_VFE_3X_VAR_ENTRY_SIZE_19) {
+				var_data.single_sensed_fuse.vfield_info.hw_correction_scale  =
+					(int)var.param2;
+				var_data.single_sensed_fuse.vfield_info.hw_correction_offset =
+					var.param3;
+			} else {
+				var_data.single_sensed_fuse.vfield_info.hw_correction_scale =
+					1 << 12;
+				var_data.single_sensed_fuse.vfield_info.hw_correction_offset =
+					0;
+				if ((var_data.single_sensed_fuse.vfield_info.v_field_id ==
+				     VFIELD_ID_STRAP_IDDQ) ||
+				    (var_data.single_sensed_fuse.vfield_info.v_field_id ==
+				     VFIELD_ID_STRAP_IDDQ_1)) {
+					var_data.single_sensed_fuse.vfield_info.hw_correction_scale =
+						50 << 12;
+				}
+			}
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_PRODUCT:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_DERIVED_PRODUCT;
+			var_data.derived_product.var_idx0 =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_0);
+			var_data.derived_product.var_idx1 =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_DPROD_VFE_VAR_IDX_1);
+			break;
+
+		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_DERIVED_SUM:
+			var_type = CTRL_PERF_VFE_VAR_TYPE_DERIVED_SUM;
+			var_data.derived_sum.var_idx0 =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_0);
+			var_data.derived_sum.var_idx1 =
+				(u8)BIOS_GET_FIELD(var.param0,
+					VBIOS_VFE_3X_VAR_ENTRY_PAR0_DSUM_VFE_VAR_IDX_1);
+			break;
+		default:
+			status = -EINVAL;
+			goto done;
+		}
+		var_data.board_obj.type = var_type;
+		var_data.board_obj.type_mask = 0;
+
+		pvar = construct_vfe_var(g, &var_data);
+		if (pvar == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				  "error constructing vfe_var boardobj %d",
+				  index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&pvfevarobjs->super.super,
+					       (struct boardobj *)pvar, index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"error adding vfe_var boardobj %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+	}
+	pvfevarobjs->polling_periodms = vfevars_tbl_header.polling_periodms;
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
+
+static u32 vfe_var_construct_single(struct gk20a *g,
+				    struct boardobj **ppboardobj,
+				    u16 size, void *pargs)
+{
+	struct boardobj *ptmpobj = (struct boardobj *)pargs;
+	struct vfe_var_single *pvfevar;
+	u32 status = 0;
+
+	gk20a_dbg_info("");
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_VFE_VAR_TYPE_SINGLE);
+	status = vfe_var_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pvfevar = (struct vfe_var_single *)*ppboardobj;
+
+	pvfevar->super.super.pmudatainit =
+			_vfe_var_pmudatainit_single;
+
+	pvfevar->override_type = CTRL_PERF_VFE_VAR_SINGLE_OVERRIDE_TYPE_NONE;
+	pvfevar->override_value = 0;
+
+	gk20a_dbg_info("Done");
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/perf/vfe_var.h b/drivers/gpu/nvgpu/perf/vfe_var.h
new file mode 100644
index 00000000..fc43311b
--- /dev/null
+++ b/drivers/gpu/nvgpu/perf/vfe_var.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VFE_VAR_H_
+#define _VFE_VAR_H_
+
+#include "boardobj/boardobjgrp.h"
+#include "pmuif/gpmuifperf.h"
+#include "pmuif/gpmuifperfvfe.h"
+
+u32 vfe_var_sw_setup(struct gk20a *g);
+u32 vfe_var_pmu_setup(struct gk20a *g);
+
+#define VFE_VAR_GET(_pperf, _idx)                                              \
+	((struct vfe_var)BOARDOBJGRP_OBJ_GET_BY_IDX(                           \
+	&((_pperf)->vfe.vars.super.super), (_idx)))
+
+#define VFE_VAR_IDX_IS_VALID(_pperf, _idx)                                     \
+	boardobjgrp_idxisvalid(&((_pperf)->vfe.vars.super.super), (_idx))
+
+struct vfe_var {
+	struct boardobj super;
+	u32 out_range_min;
+	u32 out_range_max;
+	bool b_is_dynamic_valid;
+	bool b_is_dynamic;
+};
+
+struct vfe_vars {
+	struct boardobjgrp_e32 super;
+	u8 polling_periodms;
+};
+
+struct vfe_var_derived {
+	struct vfe_var super;
+};
+
+struct vfe_var_derived_product {
+	struct vfe_var_derived super;
+	u8 var_idx0;
+	u8 var_idx1;
+};
+
+struct vfe_var_derived_sum {
+	struct vfe_var_derived super;
+	u8 var_idx0;
+	u8 var_idx1;
+};
+
+struct vfe_var_single {
+	struct vfe_var super;
+	u8 override_type;
+	u32 override_value;
+};
+
+struct vfe_var_single_frequency {
+	struct vfe_var_single  super;
+};
+
+struct vfe_var_single_voltage {
+	struct vfe_var_single super;
+};
+
+struct vfe_var_single_sensed {
+	struct vfe_var_single super;
+};
+
+struct vfe_var_single_sensed_fuse {
+	struct vfe_var_single_sensed super;
+	struct nv_pmu_vfe_var_single_sensed_fuse_override_info	override_info;
+	struct nv_pmu_vfe_var_single_sensed_fuse_vfield_info vfield_info;
+	struct nv_pmu_vfe_var_single_sensed_fuse_ver_vfield_info vfield_ver_info;
+	u32 fuse_value_integer;
+	u32 fuse_value_hw_integer;
+	u8 fuse_version;
+	bool b_version_check_done;
+};
+
+struct vfe_var_single_sensed_temp {
+	struct vfe_var_single_sensed super;
+	u8 therm_channel_index;
+	int temp_hysteresis_positive;
+	int temp_hysteresis_negative;
+	int temp_default;
+};
+
+#endif
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
new file mode 100644
index 00000000..83f17937
--- /dev/null
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -0,0 +1,101 @@
+/*
+ * general p state infrastructure
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk/clk.h"
+#include "perf/perf.h"
+
+/*sw setup for pstate components*/
+int gk20a_init_pstate_support(struct gk20a *g)
+{
+	u32 err;
+
+	gk20a_dbg_fn("");
+
+	err = clk_vin_sw_setup(g);
+	if (err)
+		return err;
+
+	err = clk_fll_sw_setup(g);
+	if (err)
+		return err;
+
+	err = vfe_var_sw_setup(g);
+	if (err)
+		return err;
+
+	err = vfe_equ_sw_setup(g);
+	if (err)
+		return err;
+
+	err = clk_domain_sw_setup(g);
+	if (err)
+		return err;
+
+	err = clk_vf_point_sw_setup(g);
+	if (err)
+		return err;
+
+	err = clk_prog_sw_setup(g);
+	return err;
+}
+
+/*sw setup for pstate components*/
+int gk20a_init_pstate_pmu_support(struct gk20a *g)
+{
+	u32 err;
+
+	gk20a_dbg_fn("");
+
+	err = vfe_var_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = vfe_equ_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_domain_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_prog_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_vin_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_fll_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_vf_point_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = clk_pmu_vin_load(g);
+	if (err)
+		return err;
+
+	err = perf_pmu_vfe_load(g);
+	if (err)
+		return err;
+
+	err = clk_pmu_vf_inject(g);
+	return err;
+}
+
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
new file mode 100644
index 00000000..fb49adf3
--- /dev/null
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -0,0 +1,19 @@
+/*
+ * general p state infrastructure
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+
+int gk20a_init_pstate_support(struct gk20a *g);
+int gk20a_init_pstate_pmu_support(struct gk20a *g);
-- 
cgit v1.2.2


From d2b67f1ad606733a63d8261e36068e5bd1f96cdc Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 8 Aug 2016 03:13:37 -0700
Subject: gpu: nvgpu: add debugfs to dump clocks

* Removed unused registers from headers
* Added counter based MCLK
* Removed hardcoding

JIRA DNVGPU-98

Change-Id: Idffcd7fc17024582b41c29371a2295df8f0c206b
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1204019
(cherry picked from commit 48dfa41a641c3adbc4d25a35f418cf73b08d5e8c)
Reviewed-on: http://git-master/r/1227264
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x   |   3 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.c     | 226 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/clk_gp106.h     |  55 ++++++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |   3 +-
 drivers/gpu/nvgpu/gp106/hw_trim_gp106.h | 189 ++++++++++++++++++++++++++
 5 files changed, 474 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/clk_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/clk_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_trim_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index c6b6f0d2..cd4ce5ce 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -38,7 +38,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/perf/vfe_var.o \
 	$(nvgpu-t18x)/perf/vfe_equ.o \
 	$(nvgpu-t18x)/perf/perf.o \
-	$(nvgpu-t18x)/clk/clk.o
+	$(nvgpu-t18x)/clk/clk.o \
+	$(nvgpu-t18x)/gp106/clk_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
new file mode 100644
index 00000000..4bf03661
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -0,0 +1,226 @@
+/*
+ * GP106 Clocks
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>	/* for mdelay */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/clk/tegra.h>
+#include <linux/tegra-fuse.h>
+
+#include "gk20a/gk20a.h"
+#include "hw_trim_gp106.h"
+#include "clk_gp106.h"
+
+#define gk20a_dbg_clk(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
+
+#ifdef CONFIG_DEBUG_FS
+static int clk_gp106_debugfs_init(struct gk20a *g);
+#endif
+
+#define NUM_NAMEMAPS	4
+
+static int gp106_init_clk_support(struct gk20a *g) {
+	struct clk_gk20a *clk = &g->clk;
+	u32 err = 0;
+
+	gk20a_dbg_fn("");
+
+	mutex_init(&clk->clk_mutex);
+
+	clk->clk_namemap = (struct namemap_cfg *)
+		kzalloc(sizeof(struct namemap_cfg) * NUM_NAMEMAPS, GFP_KERNEL);
+
+	if (!clk->clk_namemap)
+		return -ENOMEM;
+
+	clk->clk_namemap[0] = (struct namemap_cfg) {
+		.namemap = CLK_NAMEMAP_INDEX_GPC2CLK,
+		.is_enable = 1,
+		.is_counter = 1,
+		.g = g,
+		.cntr.reg_ctrl_addr = trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_r(),
+		.cntr.reg_ctrl_idx  =
+			trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_source_gpc2clk_f(),
+		.cntr.reg_cntr_addr = trim_gpc_bcast_clk_cntr_ncgpcclk_cnt_r(),
+		.name = "gpc2clk"
+	};
+	clk->clk_namemap[1] = (struct namemap_cfg) {
+		.namemap = CLK_NAMEMAP_INDEX_SYS2CLK,
+		.is_enable = 1,
+		.is_counter = 1,
+		.g = g,
+		.cntr.reg_ctrl_addr = trim_sys_clk_cntr_ncsyspll_cfg_r(),
+		.cntr.reg_ctrl_idx  = trim_sys_clk_cntr_ncsyspll_cfg_source_sys2clk_f(),
+		.cntr.reg_cntr_addr = trim_sys_clk_cntr_ncsyspll_cnt_r(),
+		.name = "sys2clk"
+	};
+	clk->clk_namemap[2] = (struct namemap_cfg) {
+		.namemap = CLK_NAMEMAP_INDEX_XBAR2CLK,
+		.is_enable = 1,
+		.is_counter = 1,
+		.g = g,
+		.cntr.reg_ctrl_addr = trim_sys_clk_cntr_ncltcpll_cfg_r(),
+		.cntr.reg_ctrl_idx  = trim_sys_clk_cntr_ncltcpll_cfg_source_xbar2clk_f(),
+		.cntr.reg_cntr_addr = trim_sys_clk_cntr_ncltcpll_cnt_r(),
+		.name = "xbar2clk"
+	};
+	clk->clk_namemap[3] = (struct namemap_cfg) {
+		.namemap = CLK_NAMEMAP_INDEX_DRAMCLK,
+		.is_enable = 1,
+		.is_counter = 1,
+		.g = g,
+		.cntr.reg_ctrl_addr = trim_fbpa_bcast_clk_cntr_ncltcclk_cfg_r(),
+		.cntr.reg_ctrl_idx  =
+			trim_fbpa_bcast_clk_cntr_ncltcclk_cfg_source_dramdiv4_rec_clk1_f(),
+		.cntr.reg_cntr_addr = trim_fbpa_bcast_clk_cntr_ncltcclk_cnt_r(),
+		.name = "dramdiv2_rec_clk1"
+	};
+
+	clk->namemap_num = NUM_NAMEMAPS;
+
+	clk->g = g;
+
+#ifdef CONFIG_DEBUG_FS
+	if (!clk->debugfs_set) {
+		if (!clk_gp106_debugfs_init(g))
+			clk->debugfs_set = true;
+	}
+#endif
+	return err;
+}
+
+#ifdef CONFIG_DEBUG_FS
+typedef struct namemap_cfg namemap_cfg_t;
+static u32 gp106_get_rate_cntr(struct gk20a *, struct namemap_cfg *);
+
+static u32 gp106_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) {
+	u32 save_reg;
+	u32 retries;
+	u32 cntr = 0;
+
+	struct clk_gk20a *clk = &g->clk;
+
+	if (!c || !c->cntr.reg_ctrl_addr || !c->cntr.reg_cntr_addr)
+		return 0;
+
+	mutex_lock(&clk->clk_mutex);
+
+	/* Save the register */
+	save_reg = gk20a_readl(g, c->cntr.reg_ctrl_addr);
+
+	/* Disable and reset the current clock */
+	gk20a_writel(g, c->cntr.reg_ctrl_addr,
+				 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_asserted_f() |
+				 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_deasserted_f());
+
+	/* Force wb() */
+	gk20a_readl(g, c->cntr.reg_ctrl_addr);
+
+	/* Wait for reset to happen */
+	retries = CLK_DEFAULT_CNTRL_SETTLE_RETRIES;
+	do {
+		udelay(CLK_DEFAULT_CNTRL_SETTLE_USECS);
+	} while ((--retries) && (cntr = gk20a_readl(g, c->cntr.reg_cntr_addr)));
+
+	if (!retries) {
+		gk20a_err(dev_from_gk20a(g),
+             "unable to settle counter reset, bailing");
+		goto read_err;
+	}
+	/* Program counter */
+	gk20a_writel(g, c->cntr.reg_ctrl_addr,
+					trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_deasserted_f()          |
+				 	trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_asserted_f()           |
+				 	trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_asserted_f()         |
+				 	trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_asserted_f()         |
+				 	trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_asserted_f()         |
+				 	trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_noofipclks_f(XTAL_CNTR_CLKS)  |
+					c->cntr.reg_ctrl_idx);
+	gk20a_readl(g, c->cntr.reg_ctrl_addr);
+
+	udelay(XTAL_CNTR_DELAY);
+
+	cntr = XTAL_SCALE_TO_KHZ * gk20a_readl(g, c->cntr.reg_cntr_addr);
+
+read_err:
+	/* reset and restore control register */
+	gk20a_writel(g, c->cntr.reg_ctrl_addr,
+				 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_asserted_f() |
+				 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_deasserted_f());
+	gk20a_readl(g, c->cntr.reg_ctrl_addr);
+	gk20a_writel(g, c->cntr.reg_ctrl_addr, save_reg);
+	gk20a_readl(g, c->cntr.reg_ctrl_addr);
+	mutex_unlock(&clk->clk_mutex);
+
+	return cntr;
+
+}
+
+static int gp106_get_rate_show(void *data , u64 *val) {
+	struct namemap_cfg *c = (struct namemap_cfg *) data;
+	struct gk20a *g = c->g;
+
+	*val = c->is_counter ? gp106_get_rate_cntr(g, c) : 0 /* TODO PLL read */;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, gp106_get_rate_show, NULL, "%llu\n");
+
+
+static int clk_gp106_debugfs_init(struct gk20a *g) {
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	struct dentry *gpu_root = platform->debugfs;
+	struct dentry *clocks_root;
+	struct dentry *d;
+	int i;
+
+	if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root)))
+		return -ENOMEM;
+
+	gk20a_dbg(gpu_dbg_info, "g=%p", g);
+
+	for (i = 0; i < g->clk.namemap_num; i++) {
+		if (g->clk.clk_namemap[i].is_enable) {
+			d = debugfs_create_file(
+				g->clk.clk_namemap[i].name,
+				S_IRUGO,
+				clocks_root,
+				&g->clk.clk_namemap[i],
+				&get_rate_fops);
+			if (!d)
+				goto err_out;
+		}
+	}
+	return 0;
+
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	debugfs_remove_recursive(clocks_root);
+	return -ENOMEM;
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void gp106_init_clk_ops(struct gpu_ops *gops) {
+	gops->clk.init_clk_support = gp106_init_clk_support;
+}
+
+
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h
new file mode 100644
index 00000000..a50819aa
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef CLK_GP106_H
+#define CLK_GP106_H
+
+#include <linux/mutex.h>
+
+#define CLK_NAMEMAP_INDEX_GPC2CLK	0x00
+#define CLK_NAMEMAP_INDEX_XBAR2CLK	0x02
+#define CLK_NAMEMAP_INDEX_SYS2CLK	0x07	/* SYSPLL */
+#define CLK_NAMEMAP_INDEX_DRAMCLK	0x20	/* DRAMPLL */
+
+#define CLK_DEFAULT_CNTRL_SETTLE_RETRIES 10
+#define CLK_DEFAULT_CNTRL_SETTLE_USECS   5
+
+#define XTAL_CNTR_CLKS		2700	/* 100usec at 27KHz XTAL */
+#define XTAL_CNTR_DELAY		110	/* leave 10 extra usec   */
+#define XTAL_SCALE_TO_KHZ	10
+
+
+
+struct namemap_cfg {
+	u32 namemap;
+	u32 is_enable;	/* Namemap enabled */
+	u32 is_counter;	/* Using cntr */
+	struct gk20a *g;
+	union {
+		struct {
+			u32 reg_ctrl_addr;
+			u32 reg_ctrl_idx;
+			u32 reg_cntr_addr;
+		} cntr;
+		struct {
+			/* Todo */
+		} pll;
+	};
+	char name[24];
+};
+
+void gp106_init_clk_ops(struct gpu_ops *gops);
+
+#endif /* CLK_GP106_H */
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 822591ed..6c8ac7bc 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -37,7 +37,7 @@
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fifo_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
-#include "gm20b/clk_gm20b.h"
+#include "gp106/clk_gp106.h"
 
 #include "gp106/mm_gp106.h"
 #include "gp106/pmu_gp106.h"
@@ -156,6 +156,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp106_init_pmu_ops(gops);
 	gk20a_init_debug_ops(gops);
 	gk20a_init_dbg_session_ops(gops);
+	gp106_init_clk_ops(gops);
 	gp10b_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gk20a_init_tsg_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hw_trim_gp106.h b/drivers/gpu/nvgpu/gp106/hw_trim_gp106.h
new file mode 100644
index 00000000..42d3fd32
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_trim_gp106.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_trim_gp106_h_
+#define _hw_trim_gp106_h_
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_r(void)
+{
+	return 0x00132924;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_noofipclks_s(void)
+{
+	return 16;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_noofipclks_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_noofipclks_m(void)
+{
+	return 0xffff << 0;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_noofipclks_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_s(void)
+{
+	return 1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_f(u32 v)
+{
+	return (v & 0x1) << 16;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_m(void)
+{
+	return 0x1 << 16;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_v(u32 r)
+{
+	return (r >> 16) & 0x1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_deasserted_f(void)
+{
+	return 0;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_write_en_asserted_f(void)
+{
+	return 0x10000;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_s(void)
+{
+	return 1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_deasserted_f(void)
+{
+	return 0;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_enable_asserted_f(void)
+{
+	return 0x100000;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_s(void)
+{
+	return 1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_m(void)
+{
+	return 0x1 << 24;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_v(u32 r)
+{
+	return (r >> 24) & 0x1;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_deasserted_f(void)
+{
+	return 0;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_reset_asserted_f(void)
+{
+	return 0x1000000;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_source_gpc2clk_f(void)
+{
+	return 0x70000000;
+}
+static inline u32 trim_gpc_bcast_clk_cntr_ncgpcclk_cnt_r(void)
+{
+	return 0x00132928;
+}
+static inline u32 trim_fbpa_bcast_clk_cntr_ncltcclk_cfg_r(void)
+{
+	return 0x00132128;
+}
+static inline u32 trim_fbpa_bcast_clk_cntr_ncltcclk_cfg_source_dramdiv4_rec_clk1_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 trim_fbpa_bcast_clk_cntr_ncltcclk_cnt_r(void)
+{
+	return 0x0013212c;
+}
+static inline u32 trim_sys_clk_cntr_ncltcpll_cfg_r(void)
+{
+	return 0x001373c0;
+}
+static inline u32 trim_sys_clk_cntr_ncltcpll_cfg_source_xbar2clk_f(void)
+{
+	return 0x20000000;
+}
+static inline u32 trim_sys_clk_cntr_ncltcpll_cnt_r(void)
+{
+	return 0x001373c4;
+}
+static inline u32 trim_sys_clk_cntr_ncsyspll_cfg_r(void)
+{
+	return 0x001373b0;
+}
+static inline u32 trim_sys_clk_cntr_ncsyspll_cfg_source_sys2clk_f(void)
+{
+	return 0x0;
+}
+static inline u32 trim_sys_clk_cntr_ncsyspll_cnt_r(void)
+{
+	return 0x001373b4;
+}
+
+#endif
-- 
cgit v1.2.2


From 3bccca16b88a9629ef4ae72c0b9314761ab033ca Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Fri, 26 Aug 2016 10:18:16 -0700
Subject: gpu: nvgpu: gp106: Skip resetting FB

FB is enabled in devinit. Skip resetting it in GPU boot.

Bug 1799537

Change-Id: I0748127f0962e7d6d2bf0ecece6773fdf9c35bc8
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208715
(cherry picked from commit ceafac52f5711bd987b746686f11b22807f74698)
Reviewed-on: http://git-master/r/1227265
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |  1 +
 drivers/gpu/nvgpu/gp106/fb_gp106.c    | 25 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/fb_gp106.h    | 19 +++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c   |  4 ++--
 4 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/fb_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/fb_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index cd4ce5ce..167a0ae2 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -28,6 +28,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/sec2_gp106.o \
 	$(nvgpu-t18x)/gp106/fifo_gp106.o \
 	$(nvgpu-t18x)/gp106/ltc_gp106.o \
+	$(nvgpu-t18x)/gp106/fb_gp106.o \
 	$(nvgpu-t18x)/clk/clk_mclk.o \
 	$(nvgpu-t18x)/pstate/pstate.o \
 	$(nvgpu-t18x)/clk/clk_vin.o \
diff --git a/drivers/gpu/nvgpu/gp106/fb_gp106.c b/drivers/gpu/nvgpu/gp106/fb_gp106.c
new file mode 100644
index 00000000..1c5b3e0a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/fb_gp106.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#include "gk20a/gk20a.h"
+#include "gp10b/fb_gp10b.h"
+
+void gp106_init_fb(struct gpu_ops *gops)
+{
+	gp10b_init_fb(gops);
+
+	gops->fb.init_fs_state = NULL;
+	gops->fb.reset = NULL;
+}
diff --git a/drivers/gpu/nvgpu/gp106/fb_gp106.h b/drivers/gpu/nvgpu/gp106/fb_gp106.h
new file mode 100644
index 00000000..87b371e1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/fb_gp106.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef FB_GP106_H
+#define FB_GP106_H
+struct gpu_ops;
+
+void gp106_init_fb(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 6c8ac7bc..5f15a2c8 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -25,7 +25,6 @@
 #include "gp106/ltc_gp106.h"
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/ce_gp10b.h"
-#include "gp10b/fb_gp10b.h"
 #include "gp106/fifo_gp106.h"
 #include "gp10b/gp10b_gating_reglist.h"
 #include "gp10b/regops_gp10b.h"
@@ -43,6 +42,7 @@
 #include "gp106/pmu_gp106.h"
 #include "gp106/gr_ctx_gp106.h"
 #include "gp106/gr_gp106.h"
+#include "gp106/fb_gp106.h"
 #include "nvgpu_gpuid_t18x.h"
 #include "hw_proj_gp106.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -148,7 +148,7 @@ int gp106_init_hal(struct gk20a *g)
 	gp10b_init_mc(gops);
 	gp106_init_gr(gops);
 	gp106_init_ltc(gops);
-	gp10b_init_fb(gops);
+	gp106_init_fb(gops);
 	gp106_init_fifo(gops);
 	gp10b_init_ce(gops);
 	gp106_init_gr_ctx(gops);
-- 
cgit v1.2.2


From 3d275d19c34b17bde5694b333c25a09aa36d88ab Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 24 Aug 2016 15:11:45 -0700
Subject: gpu: nvgpu: gp106: Prune non-existing registers

Prune non-existing registers from mclk shadow register list.

Bug 1799537

Change-Id: I8034a1820ef21e550616a5135856b05c2f375d6f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1208018
(cherry picked from commit cb988bb28dd914ea291cedec799d055f3d71d877)
Reviewed-on: http://git-master/r/1227266
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_mclk.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 02e16fcc..f73a6002 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -36,12 +36,6 @@ static struct memory_link_training_pattern memory_shadow_reglist[] = {
 	{0x9046a4, 0x03030303},
 	{0x9086a0, 0x03030303},
 	{0x9086a4, 0x03030303},
-	{0x90c6a0, 0x03030303},
-	{0x90c6a4, 0x03030303},
-	{0x9106a0, 0x07070707},
-	{0x9106a4, 0x07070707},
-	{0x9146a0, 0x07070707},
-	{0x9146a4, 0x07070707},
 	{0x9846a8, 0x03030303},
 	{0x9846ac, 0x03030303},
 	{0x9a065c, 0x00},
-- 
cgit v1.2.2


From 490a8f3f5ff69fa16df3db495c9a378afd8349b1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 1 Sep 2016 15:31:52 -0700
Subject: gpu: nvgpu: Add gp106 clock gating tables

JIRA DNVGPU-72
JIRA DNVGPU-73

Change-Id: I4a979344649ced1bbf8df215c07a15b6149bba69
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1215915
(cherry picked from commit d5f49042010a18e2885e1213b463cb067d765390)
Reviewed-on: http://git-master/r/1227267
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x          |   3 +-
 drivers/gpu/nvgpu/gp106/gp106_gating_reglist.c | 649 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/gp106_gating_reglist.h |  87 ++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c            |  49 +-
 4 files changed, 785 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/gp106_gating_reglist.c
 create mode 100644 drivers/gpu/nvgpu/gp106/gp106_gating_reglist.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 167a0ae2..aaf02931 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -40,7 +40,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/perf/vfe_equ.o \
 	$(nvgpu-t18x)/perf/perf.o \
 	$(nvgpu-t18x)/clk/clk.o \
-	$(nvgpu-t18x)/gp106/clk_gp106.o
+	$(nvgpu-t18x)/gp106/clk_gp106.o \
+	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.c b/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.c
new file mode 100644
index 00000000..29870d60
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * This file is autogenerated.  Do not edit.
+ */
+
+#ifndef __gp106_gating_reglist_h__
+#define __gp106_gating_reglist_h__
+
+#include <linux/types.h>
+#include "gp106_gating_reglist.h"
+
+struct gating_desc {
+	u32 addr;
+	u32 prod;
+	u32 disable;
+};
+/* slcg bus */
+static const struct gating_desc gp106_slcg_bus[] = {
+	{.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000003fe},
+};
+
+/* slcg ce2 */
+static const struct gating_desc gp106_slcg_ce2[] = {
+	{.addr = 0x00104204, .prod = 0x00000000, .disable = 0x000007fe},
+};
+
+/* slcg chiplet */
+static const struct gating_desc gp106_slcg_chiplet[] = {
+	{.addr = 0x0010c07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010c0fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010c17c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010c1fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010c27c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010c2fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010e07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d07c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d0fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d17c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d1fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d27c, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010d2fc, .prod = 0x00000000, .disable = 0x00000007},
+	{.addr = 0x0010e17c, .prod = 0x00000000, .disable = 0x00000007},
+};
+
+/* slcg fb */
+static const struct gating_desc gp106_slcg_fb[] = {
+	{.addr = 0x00100d14, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x00100c9c, .prod = 0x00000000, .disable = 0x000001fe},
+};
+
+/* slcg fifo */
+static const struct gating_desc gp106_slcg_fifo[] = {
+	{.addr = 0x000026ac, .prod = 0x00000000, .disable = 0x0001fffe},
+};
+
+/* slcg gr */
+static const struct gating_desc gp106_slcg_gr[] = {
+	{.addr = 0x004041f4, .prod = 0x00000000, .disable = 0x07fffffe},
+	{.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x00409894, .prod = 0x00000040, .disable = 0x03fffffe},
+	{.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00405910, .prod = 0xfffffff0, .disable = 0xfffffffe},
+	{.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
+	{.addr = 0x00407004, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x0041a17c, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x0041a894, .prod = 0x00000040, .disable = 0x03fffffe},
+	{.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0007fffe},
+	{.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
+	{.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x000003fe},
+	{.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
+	{.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00418c74, .prod = 0xffffff80, .disable = 0xfffffffe},
+	{.addr = 0x00418cf4, .prod = 0xfffffff8, .disable = 0xfffffffe},
+	{.addr = 0x00418d74, .prod = 0xffffffe0, .disable = 0xfffffffe},
+	{.addr = 0x00418f10, .prod = 0xffffffe0, .disable = 0xfffffffe},
+	{.addr = 0x00418e10, .prod = 0xfffffffe, .disable = 0xfffffffe},
+	{.addr = 0x00419024, .prod = 0x000001fe, .disable = 0x000001fe},
+	{.addr = 0x0041889c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419d24, .prod = 0x00000000, .disable = 0x0000ffff},
+	{.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
+	{.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
+	{.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
+	{.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
+	{.addr = 0x0041986c, .prod = 0x00000104, .disable = 0x00fffffe},
+	{.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
+	{.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
+	{.addr = 0x00419c74, .prod = 0x0000001e, .disable = 0x0000001e},
+	{.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
+	{.addr = 0x00419fdc, .prod = 0xffedff00, .disable = 0xfffffffe},
+	{.addr = 0x00419fe4, .prod = 0x00001b00, .disable = 0x00001ffe},
+	{.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
+	{.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x0041be2c, .prod = 0x04115fc0, .disable = 0xfffffffe},
+	{.addr = 0x0041bfec, .prod = 0xfffffff0, .disable = 0xfffffffe},
+	{.addr = 0x0041bed4, .prod = 0xfffffff8, .disable = 0xfffffffe},
+	{.addr = 0x00412814, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00412a84, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x004129ac, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00412a24, .prod = 0x00000000, .disable = 0x0000ffff},
+	{.addr = 0x00412c14, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00412e84, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00412dac, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00412e24, .prod = 0x00000000, .disable = 0x0000ffff},
+	{.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
+	{.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x0000ffff},
+};
+
+/* slcg ltc */
+static const struct gating_desc gp106_slcg_ltc[] = {
+	{.addr = 0x00154050, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015455c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015475c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015435c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x00156050, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015655c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015675c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0015635c, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0017e050, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0017e35c, .prod = 0x00000000, .disable = 0xfffffffe},
+};
+
+/* slcg perf */
+static const struct gating_desc gp106_slcg_perf[] = {
+	{.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc218, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc418, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc618, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bc818, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001bca18, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8218, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8418, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8618, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8818, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b8a18, .prod = 0x000001ff, .disable = 0x00000000},
+	{.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
+};
+
+/* slcg PriRing */
+static const struct gating_desc gp106_slcg_priring[] = {
+	{.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
+};
+
+/* slcg pmu */
+static const struct gating_desc gp106_slcg_pmu[] = {
+	{.addr = 0x0010a134, .prod = 0x00020008, .disable = 0x0003fffe},
+	{.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe},
+	{.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* therm gr */
+static const struct gating_desc gp106_slcg_therm[] = {
+	{.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+
+/* slcg Xbar */
+static const struct gating_desc gp106_slcg_xbar[] = {
+	{.addr = 0x0013c824, .prod = 0x00000000, .disable = 0x7ffffffe},
+	{.addr = 0x0013dc08, .prod = 0x00000000, .disable = 0xfffffffe},
+	{.addr = 0x0013c924, .prod = 0x00000000, .disable = 0x7ffffffe},
+	{.addr = 0x0013cbe4, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc04, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc24, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc44, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc64, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cc84, .prod = 0x00000000, .disable = 0x1ffffffe},
+	{.addr = 0x0013cca4, .prod = 0x00000000, .disable = 0x1ffffffe},
+};
+
+/* blcg bus */
+static const struct gating_desc gp106_blcg_bus[] = {
+	{.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
+};
+
+/* blcg ce */
+static const struct gating_desc gp106_blcg_ce[] = {
+	{.addr = 0x00104200, .prod = 0x0000c242, .disable = 0x00000000},
+};
+
+/* blcg fb */
+static const struct gating_desc gp106_blcg_fb[] = {
+	{.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
+	{.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00100c98, .prod = 0x00004242, .disable = 0x00000000},
+};
+
+/* blcg fifo */
+static const struct gating_desc gp106_blcg_fifo[] = {
+	{.addr = 0x000026a4, .prod = 0x0000c242, .disable = 0x00000000},
+};
+
+/* blcg gr */
+static const struct gating_desc gp106_blcg_gr[] = {
+	{.addr = 0x004041f0, .prod = 0x0000c646, .disable = 0x00000000},
+	{.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x004078c0, .prod = 0x00004242, .disable = 0x00000000},
+	{.addr = 0x00406000, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00405860, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x0040590c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00408040, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00407000, .prod = 0x4000c242, .disable = 0x00000000},
+	{.addr = 0x00405bf0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x0041a890, .prod = 0x0000427f, .disable = 0x00000000},
+	{.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
+	{.addr = 0x00418500, .prod = 0x0000c244, .disable = 0x00000000},
+	{.addr = 0x00418608, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418688, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00418828, .prod = 0x00008444, .disable = 0x00000000},
+	{.addr = 0x00418bbc, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418970, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00418c70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418cf0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418d70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418f0c, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00418e0c, .prod = 0x00008444, .disable = 0x00000000},
+	{.addr = 0x00419020, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x00418898, .prod = 0x00004242, .disable = 0x00000000},
+	{.addr = 0x00419a40, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a48, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a50, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a58, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a60, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a68, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a70, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a78, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419a80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00419868, .prod = 0x00008242, .disable = 0x00000000},
+	{.addr = 0x00419cd4, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419cdc, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419c70, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00419fd0, .prod = 0x0000c044, .disable = 0x00000000},
+	{.addr = 0x00419fd8, .prod = 0x0000c046, .disable = 0x00000000},
+	{.addr = 0x00419fe0, .prod = 0x0000c044, .disable = 0x00000000},
+	{.addr = 0x00419fe8, .prod = 0x0000c042, .disable = 0x00000000},
+	{.addr = 0x00419ff0, .prod = 0x0000c045, .disable = 0x00000000},
+	{.addr = 0x00419ff8, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x00419f90, .prod = 0x00000002, .disable = 0x00000000},
+	{.addr = 0x0041be28, .prod = 0x00008242, .disable = 0x00000000},
+	{.addr = 0x0041bfe8, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x0041bed0, .prod = 0x0000c444, .disable = 0x00000000},
+	{.addr = 0x00412810, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00412a80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x004129a8, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00412c10, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00412e80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00412da8, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00408810, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x00408a80, .prod = 0x0000c242, .disable = 0x00000000},
+	{.addr = 0x004089a8, .prod = 0x0000c242, .disable = 0x00000000},
+};
+
+/* blcg ltc */
+static const struct gating_desc gp106_blcg_ltc[] = {
+	{.addr = 0x00154030, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x00154040, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001545e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001545c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001547e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001547c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001543e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001543c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x00156030, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x00156040, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001565e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001565c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001567e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001567c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001563e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x001563c8, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e3e0, .prod = 0x00000044, .disable = 0x00000000},
+	{.addr = 0x0017e3c8, .prod = 0x00000044, .disable = 0x00000000},
+};
+
+/* blcg pmu */
+static const struct gating_desc gp106_blcg_pmu[] = {
+	{.addr = 0x0010aa70, .prod = 0x00000045, .disable = 0x00000000},
+};
+
+/* blcg Xbar */
+static const struct gating_desc gp106_blcg_xbar[] = {
+	{.addr = 0x0013c820, .prod = 0x0001004a, .disable = 0x00000000},
+	{.addr = 0x0013dc04, .prod = 0x0001004a, .disable = 0x00000000},
+	{.addr = 0x0013c920, .prod = 0x0000004a, .disable = 0x00000000},
+	{.addr = 0x0013cbe0, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc00, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc20, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc40, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc60, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cc80, .prod = 0x00000042, .disable = 0x00000000},
+	{.addr = 0x0013cca0, .prod = 0x00000042, .disable = 0x00000000},
+};
+
+/* pg gr */
+static const struct gating_desc gp106_pg_gr[] = {
+};
+
+/* inline functions */
+void gp106_slcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_bus) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_bus[i].addr,
+				gp106_slcg_bus[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_bus[i].addr,
+				 gp106_slcg_bus[i].disable);
+	}
+}
+
+void gp106_slcg_ce2_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_ce2) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_ce2[i].addr,
+				gp106_slcg_ce2[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_ce2[i].addr,
+				 gp106_slcg_ce2[i].disable);
+	}
+}
+
+void gp106_slcg_chiplet_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_chiplet) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_chiplet[i].addr,
+				gp106_slcg_chiplet[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_chiplet[i].addr,
+				 gp106_slcg_chiplet[i].disable);
+	}
+}
+
+void gp106_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+}
+
+void gp106_slcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_fb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_fb[i].addr,
+				gp106_slcg_fb[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_fb[i].addr,
+				 gp106_slcg_fb[i].disable);
+	}
+}
+
+void gp106_slcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_fifo) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_fifo[i].addr,
+				gp106_slcg_fifo[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_fifo[i].addr,
+				 gp106_slcg_fifo[i].disable);
+	}
+}
+
+void gr_gp106_slcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_gr[i].addr,
+				gp106_slcg_gr[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_gr[i].addr,
+				 gp106_slcg_gr[i].disable);
+	}
+}
+
+void ltc_gp106_slcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_ltc) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_ltc[i].addr,
+				gp106_slcg_ltc[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_ltc[i].addr,
+				gp106_slcg_ltc[i].disable);
+	}
+}
+
+void gp106_slcg_perf_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_perf) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_perf[i].addr,
+				gp106_slcg_perf[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_perf[i].addr,
+				gp106_slcg_perf[i].disable);
+	}
+}
+
+void gp106_slcg_priring_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_priring) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_priring[i].addr,
+				gp106_slcg_priring[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_priring[i].addr,
+				gp106_slcg_priring[i].disable);
+	}
+}
+
+void gp106_slcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_pmu) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_pmu[i].addr,
+				gp106_slcg_pmu[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_pmu[i].addr,
+				gp106_slcg_pmu[i].disable);
+	}
+}
+
+void gp106_slcg_therm_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_therm) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_therm[i].addr,
+				gp106_slcg_therm[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_therm[i].addr,
+				gp106_slcg_therm[i].disable);
+	}
+}
+
+void gp106_slcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_slcg_xbar) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_slcg_xbar[i].addr,
+				gp106_slcg_xbar[i].prod);
+		else
+			gk20a_writel(g, gp106_slcg_xbar[i].addr,
+				gp106_slcg_xbar[i].disable);
+	}
+}
+
+void gp106_blcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_bus) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_bus[i].addr,
+				gp106_blcg_bus[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_bus[i].addr,
+				gp106_blcg_bus[i].disable);
+	}
+}
+
+void gp106_blcg_ce_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_ce) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_ce[i].addr,
+				gp106_blcg_ce[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_ce[i].addr,
+				gp106_blcg_ce[i].disable);
+	}
+}
+
+void gp106_blcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_fb) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_fb[i].addr,
+				gp106_blcg_fb[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_fb[i].addr,
+				gp106_blcg_fb[i].disable);
+	}
+}
+
+void gp106_blcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_fifo) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_fifo[i].addr,
+				gp106_blcg_fifo[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_fifo[i].addr,
+				gp106_blcg_fifo[i].disable);
+	}
+}
+
+void gp106_blcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_gr[i].addr,
+				gp106_blcg_gr[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_gr[i].addr,
+				gp106_blcg_gr[i].disable);
+	}
+}
+
+void gp106_blcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_ltc) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_ltc[i].addr,
+				gp106_blcg_ltc[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_ltc[i].addr,
+				gp106_blcg_ltc[i].disable);
+	}
+}
+
+void gp106_blcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_pmu) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_pmu[i].addr,
+				gp106_blcg_pmu[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_pmu[i].addr,
+				gp106_blcg_pmu[i].disable);
+	}
+}
+
+void gp106_blcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_blcg_xbar) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_blcg_xbar[i].addr,
+				gp106_blcg_xbar[i].prod);
+		else
+			gk20a_writel(g, gp106_blcg_xbar[i].addr,
+				gp106_blcg_xbar[i].disable);
+	}
+}
+
+void gr_gp106_pg_gr_load_gating_prod(struct gk20a *g,
+	bool prod)
+{
+	u32 i;
+	u32 size = sizeof(gp106_pg_gr) / sizeof(struct gating_desc);
+	for (i = 0; i < size; i++) {
+		if (prod)
+			gk20a_writel(g, gp106_pg_gr[i].addr,
+				gp106_pg_gr[i].prod);
+		else
+			gk20a_writel(g, gp106_pg_gr[i].addr,
+				gp106_pg_gr[i].disable);
+	}
+}
+
+#endif /* __gp106_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.h b/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.h
new file mode 100644
index 00000000..423ccf54
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/gp106_gating_reglist.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015-2016, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gk20a/gk20a.h"
+
+void gp106_slcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_ce2_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_chiplet_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gr_gp106_slcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void ltc_gp106_slcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_perf_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_priring_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_therm_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_slcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_bus_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_ce_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_fb_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_fifo_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_ltc_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_pmu_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gp106_blcg_xbar_load_gating_prod(struct gk20a *g,
+	bool prod);
+
+void gr_gp106_pg_gr_load_gating_prod(struct gk20a *g,
+	bool prod);
+
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 5f15a2c8..eb5c4eba 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -26,7 +26,6 @@
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/ce_gp10b.h"
 #include "gp106/fifo_gp106.h"
-#include "gp10b/gp10b_gating_reglist.h"
 #include "gp10b/regops_gp10b.h"
 #include "gp10b/cde_gp10b.h"
 #include "gp10b/therm_gp10b.h"
@@ -43,13 +42,59 @@
 #include "gp106/gr_ctx_gp106.h"
 #include "gp106/gr_gp106.h"
 #include "gp106/fb_gp106.h"
+#include "gp106/gp106_gating_reglist.h"
 #include "nvgpu_gpuid_t18x.h"
 #include "hw_proj_gp106.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
 
 static struct gpu_ops gp106_ops = {
-	.clock_gating = { }
+	.clock_gating = {
+		.slcg_bus_load_gating_prod =
+			gp106_slcg_bus_load_gating_prod,
+		.slcg_ce2_load_gating_prod =
+			gp106_slcg_ce2_load_gating_prod,
+		.slcg_chiplet_load_gating_prod =
+			gp106_slcg_chiplet_load_gating_prod,
+		.slcg_ctxsw_firmware_load_gating_prod =
+			gp106_slcg_ctxsw_firmware_load_gating_prod,
+		.slcg_fb_load_gating_prod =
+			gp106_slcg_fb_load_gating_prod,
+		.slcg_fifo_load_gating_prod =
+			gp106_slcg_fifo_load_gating_prod,
+		.slcg_gr_load_gating_prod =
+			gr_gp106_slcg_gr_load_gating_prod,
+		.slcg_ltc_load_gating_prod =
+			ltc_gp106_slcg_ltc_load_gating_prod,
+		.slcg_perf_load_gating_prod =
+			gp106_slcg_perf_load_gating_prod,
+		.slcg_priring_load_gating_prod =
+			gp106_slcg_priring_load_gating_prod,
+		.slcg_pmu_load_gating_prod =
+			gp106_slcg_pmu_load_gating_prod,
+		.slcg_therm_load_gating_prod =
+			gp106_slcg_therm_load_gating_prod,
+		.slcg_xbar_load_gating_prod =
+			gp106_slcg_xbar_load_gating_prod,
+		.blcg_bus_load_gating_prod =
+			gp106_blcg_bus_load_gating_prod,
+		.blcg_ce_load_gating_prod =
+			gp106_blcg_ce_load_gating_prod,
+		.blcg_fb_load_gating_prod =
+			gp106_blcg_fb_load_gating_prod,
+		.blcg_fifo_load_gating_prod =
+			gp106_blcg_fifo_load_gating_prod,
+		.blcg_gr_load_gating_prod =
+			gp106_blcg_gr_load_gating_prod,
+		.blcg_ltc_load_gating_prod =
+			gp106_blcg_ltc_load_gating_prod,
+		.blcg_pmu_load_gating_prod =
+			gp106_blcg_pmu_load_gating_prod,
+		.blcg_xbar_load_gating_prod =
+			gp106_blcg_xbar_load_gating_prod,
+		.pg_gr_load_gating_prod =
+			gr_gp106_pg_gr_load_gating_prod,
+		}
 };
 
 static int gp106_get_litter_value(struct gk20a *g,
-- 
cgit v1.2.2


From 5159f6bf43dc4822f6f05a957f0cf090ff3e1db7 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 24 Aug 2016 18:25:09 +0530
Subject: gpu: nvgpu: sequencer-script update

Update to sequencer script to support SKU without display.

Bug 200231242

Change-Id: Ibd983166be823370fc687eb2fe9bae3aa8c0dab7
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1207096
(cherry picked from commit b573a627b3fe7697c90def46eaf83d755c5d2dee)
Reviewed-on: http://git-master/r/1227247
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_mclk.c | 200 +++++++++++++++++++--------------------
 1 file changed, 96 insertions(+), 104 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index f73a6002..affcb54c 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -1549,110 +1549,102 @@ static struct memory_link_training_pattern memory_pattern_reglist[] = {
 };
 
 static u8 seq_script_gp106[] = {
-  0x0b, 0x00, 0x02, 0x00, 0x40, 0xc0, 0x62, 0x00, 0x22, 0x00, 0x02, 0x00,
-  0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x02, 0x00, 0x40, 0x63, 0x61, 0x00,
-  0x22, 0x00, 0x02, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
-  0x0c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x30, 0x03, 0x9a, 0x00,
-  0x14, 0x00, 0x10, 0x00, 0x38, 0xd6, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00,
-  0x04, 0xd6, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00,
-  0x20, 0x4e, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x00, 0x02, 0x9a, 0x00,
-  0x00, 0x90, 0x8f, 0x02, 0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x00,
-  0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x02, 0x9a, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
-  0x21, 0x00, 0x03, 0x00, 0x10, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0d, 0x00,
-  0x48, 0x03, 0x9a, 0x00, 0x88, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9a, 0x00,
-  0x00, 0x90, 0x8f, 0x82, 0x14, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x00, 0x02, 0x9a, 0x00, 0x00, 0x90, 0x8f, 0x02, 0x90, 0x00, 0x9a, 0x00,
-  0x61, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9a, 0x00, 0x7f, 0x00, 0x00, 0xc0,
-  0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00,
-  0x98, 0x06, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0x06, 0x9a, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9a, 0x00, 0xe7, 0x8f, 0x88, 0xf7,
-  0x40, 0x0d, 0x9a, 0x00, 0x20, 0xe0, 0x01, 0x00, 0x00, 0x02, 0x9a, 0x00,
-  0x00, 0x90, 0x8f, 0x1a, 0x00, 0x08, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0xf0, 0x73, 0x13, 0x00, 0x03, 0x00, 0x00, 0x00, 0x30, 0x08, 0x9a, 0x00,
-  0x90, 0x90, 0x67, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x11, 0x00, 0x01, 0x00,
-  0xf4, 0x73, 0x13, 0x00, 0x10, 0x00, 0x01, 0x00, 0xf4, 0x73, 0x13, 0x00,
-  0x00, 0x00, 0x01, 0x00, 0x20, 0x20, 0x13, 0x00, 0x00, 0x00, 0x03, 0x20,
-  0x20, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x20, 0x13, 0x00,
-  0x04, 0x00, 0x00, 0x00, 0x34, 0x20, 0x13, 0x00, 0x00, 0x00, 0x8a, 0xf9,
-  0x24, 0x20, 0x13, 0x00, 0x01, 0x32, 0x05, 0x00, 0x2c, 0x20, 0x13, 0x00,
-  0x00, 0x01, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x08, 0x10,
-  0x20, 0x20, 0x13, 0x00, 0x01, 0x00, 0x03, 0x20, 0x34, 0x00, 0x02, 0x00,
-  0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00,
-  0x01, 0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00,
-  0x00, 0x00, 0x02, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
-  0x0e, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0d, 0x00, 0x2c, 0x20, 0x13, 0x00,
-  0x00, 0x03, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10,
-  0xf4, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x13, 0x00,
-  0x00, 0x00, 0x01, 0x98, 0x04, 0x20, 0x13, 0x00, 0x01, 0x0b, 0x01, 0x00,
-  0x00, 0x20, 0x13, 0x00, 0x01, 0x00, 0x01, 0x98, 0x34, 0x00, 0x02, 0x00,
-  0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
-  0x01, 0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
-  0x10, 0x00, 0x00, 0x00, 0x21, 0x00, 0x1d, 0x00, 0xf4, 0x73, 0x13, 0x00,
-  0x00, 0x11, 0x00, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x10, 0x11, 0x00, 0x00,
-  0xec, 0x73, 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0xf0, 0x73, 0x13, 0x00,
-  0x02, 0x00, 0x00, 0x00, 0xf4, 0x73, 0x13, 0x00, 0x12, 0x11, 0x00, 0x00,
-  0xf4, 0x73, 0x13, 0x00, 0x12, 0x00, 0x00, 0x00, 0x08, 0x08, 0x9a, 0x00,
-  0x70, 0x00, 0x08, 0x48, 0x00, 0x02, 0x9a, 0x00, 0x00, 0x10, 0x8f, 0x1a,
-  0x24, 0x08, 0x9a, 0x00, 0xe5, 0x8f, 0x88, 0xf7, 0x08, 0x08, 0x9a, 0x00,
-  0x70, 0x00, 0xa8, 0x4a, 0x24, 0x08, 0x9a, 0x00, 0x85, 0x8f, 0x88, 0xf7,
-  0x38, 0x1f, 0x9a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x1f, 0x9a, 0x00,
-  0x00, 0x00, 0x01, 0x00, 0x34, 0x0d, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x13, 0x00, 0x02, 0x00, 0x2c, 0x01, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00,
-  0x5c, 0x06, 0x9a, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0c, 0x06, 0x9a, 0x00,
-  0xd0, 0x20, 0x00, 0xfd, 0xd4, 0x0e, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x40,
-  0xd4, 0x0e, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00,
-  0x2c, 0x01, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00, 0x2c, 0x08, 0x9a, 0x00,
-  0x00, 0x00, 0x05, 0x00, 0x30, 0x08, 0x9a, 0x00, 0x90, 0xa0, 0x67, 0x00,
-  0x48, 0x02, 0x9a, 0x00, 0xa2, 0x44, 0x1e, 0x93, 0x90, 0x02, 0x9a, 0x00,
-  0x42, 0xa5, 0x5a, 0x15, 0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x28,
-  0x98, 0x02, 0x9a, 0x00, 0x00, 0x08, 0x15, 0x88, 0x9c, 0x02, 0x9a, 0x00,
-  0xec, 0x30, 0x00, 0x22, 0xa0, 0x02, 0x9a, 0x00, 0x32, 0x00, 0x83, 0xd5,
-  0xa8, 0x02, 0x9a, 0x00, 0x0f, 0x86, 0x00, 0x02, 0xcc, 0x02, 0x9a, 0x00,
-  0x00, 0x39, 0x0f, 0x12, 0x14, 0x06, 0x9a, 0x00, 0x77, 0x4e, 0x04, 0x40,
-  0x10, 0x06, 0x9a, 0x00, 0x77, 0x4e, 0x04, 0x40, 0x78, 0x07, 0x10, 0x00,
-  0x44, 0x04, 0x00, 0x82, 0x4c, 0x02, 0x9a, 0x00, 0x85, 0x0c, 0x05, 0x13,
-  0xe0, 0x08, 0x9a, 0x00, 0x11, 0x00, 0x00, 0x00, 0x90, 0x03, 0x9a, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x94, 0x03, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x0b, 0x9a, 0x00, 0x06, 0x22, 0x22, 0x22, 0x90, 0x00, 0x9a, 0x00,
-  0x7e, 0x00, 0x00, 0x40, 0x2e, 0x00, 0x02, 0x00, 0xd0, 0x07, 0x00, 0x00,
-  0x21, 0x00, 0x0f, 0x00, 0x14, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00,
-  0x10, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9a, 0x00,
-  0x00, 0x00, 0x00, 0x80, 0x90, 0x03, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x24, 0x10, 0x09, 0x9a, 0x00,
-  0x00, 0x00, 0x01, 0xa4, 0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x01, 0xa4,
-  0x34, 0x00, 0x02, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x09, 0x90, 0x00,
-  0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00,
-  0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00,
-  0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00, 0x34, 0x00, 0x02, 0x00,
-  0x12, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
-  0x21, 0x00, 0x0b, 0x00, 0x94, 0x02, 0x9a, 0x00, 0x95, 0xc2, 0xe5, 0x28,
-  0x38, 0x03, 0x9a, 0x00, 0x03, 0x01, 0x30, 0x00, 0x3c, 0x03, 0x9a, 0x00,
-  0xff, 0x01, 0x40, 0x00, 0x00, 0x03, 0x9a, 0x00, 0x05, 0x01, 0x00, 0x00,
-  0x54, 0x03, 0x9a, 0x00, 0x03, 0x00, 0x80, 0x00, 0x2e, 0x00, 0x02, 0x00,
-  0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x48, 0x03, 0x9a, 0x00,
-  0x00, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9a, 0x00, 0x00, 0x10, 0x8f, 0x9a,
-  0x18, 0x03, 0x9a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9a, 0x00,
-  0x00, 0x10, 0x8f, 0x1a, 0x2e, 0x00, 0x02, 0x00, 0xe8, 0x03, 0x00, 0x00,
-  0x21, 0x00, 0x07, 0x00, 0x78, 0x09, 0x9a, 0x00, 0x0f, 0x1e, 0x7e, 0x88,
-  0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0e, 0xa4, 0x14, 0x09, 0x9a, 0x00,
-  0x00, 0x00, 0x0e, 0xa4, 0x34, 0x00, 0x02, 0x00, 0x13, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00,
-  0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00,
-  0x20, 0xa1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00,
-  0x15, 0x00, 0x03, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x20, 0xa1, 0x07, 0x00,
-  0x34, 0x00, 0x02, 0x00, 0x14, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00,
-  0xe8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x00, 0x02, 0x9a, 0x00,
-  0x00, 0x10, 0x8f, 0x3a, 0x10, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x25,
-  0x14, 0x09, 0x9a, 0x00, 0x00, 0x00, 0x0c, 0x25, 0x20, 0x00, 0x03, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00,
-  0x15, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x00, 0xc0, 0x62, 0x00,
-  0x00, 0x0f, 0x0f, 0x0f, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00
-};
+  0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x30, 0x03,
+  0x9A, 0x00, 0x14, 0x00, 0x10, 0x00, 0x38, 0xD6, 0x00, 0x00, 0x00, 0x60, 0x00,
+  0x00, 0x04, 0xD6, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00,
+  0x20, 0x4E, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00,
+  0x90, 0x8F, 0x02, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x14, 0x09,
+  0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C,
+  0x00, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10, 0x02, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21,
+  0x00, 0x03, 0x00, 0x10, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00,
+  0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x48, 0x03, 0x9A,
+  0x00, 0x88, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x82,
+  0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00,
+  0x90, 0x8F, 0x02, 0x90, 0x00, 0x9A, 0x00, 0x61, 0x00, 0x00, 0x00, 0x90, 0x00,
+  0x9A, 0x00, 0x7F, 0x00, 0x00, 0xC0, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00,
+  0x00, 0x21, 0x00, 0x27, 0x00, 0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x9C, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7,
+  0x8F, 0x88, 0xF7, 0x40, 0x0D, 0x9A, 0x00, 0x20, 0xE0, 0x01, 0x00, 0x00, 0x02,
+  0x9A, 0x00, 0x00, 0x90, 0x8F, 0x1A, 0x00, 0x08, 0x9A, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0xF0, 0x73, 0x13, 0x00, 0x03, 0x00, 0x00, 0x00, 0x30, 0x08, 0x9A, 0x00,
+  0x90, 0x90, 0x67, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x00, 0x01, 0x00, 0xF4,
+  0x73, 0x13, 0x00, 0x10, 0x00, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x20, 0x20, 0x13, 0x00, 0x00, 0x00, 0x03, 0x20, 0x20, 0x73, 0x13,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x20, 0x13, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x34, 0x20, 0x13, 0x00, 0x00, 0x00, 0x8A, 0xF9, 0x24, 0x20, 0x13, 0x00, 0x01,
+  0x32, 0x05, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00, 0x28, 0x20,
+  0x13, 0x00, 0x10, 0x00, 0x08, 0x10, 0x20, 0x20, 0x13, 0x00, 0x01, 0x00, 0x03,
+  0x20, 0x34, 0x00, 0x02, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15,
+  0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0xFA, 0x00, 0x00, 0x34, 0x00,
+  0x02, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x2C, 0x20, 0x13,
+  0x00, 0x00, 0x03, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10,
+  0xF4, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x13, 0x00, 0x00,
+  0x00, 0x01, 0x98, 0x04, 0x20, 0x13, 0x00, 0x01, 0x0B, 0x01, 0x00, 0x00, 0x20,
+  0x13, 0x00, 0x01, 0x00, 0x01, 0x98, 0x34, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00,
+  0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+  0xFA, 0x00, 0x00, 0x34, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x21, 0x00,
+  0x1D, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x00, 0x11, 0x00, 0x00, 0xF4, 0x73, 0x13,
+  0x00, 0x10, 0x11, 0x00, 0x00, 0xEC, 0x73, 0x13, 0x00, 0x00, 0x00, 0x03, 0x00,
+  0xF0, 0x73, 0x13, 0x00, 0x02, 0x00, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x12,
+  0x11, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x12, 0x00, 0x00, 0x00, 0x08, 0x08,
+  0x9A, 0x00, 0x70, 0x00, 0x08, 0x48, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F,
+  0x1A, 0x24, 0x08, 0x9A, 0x00, 0xE5, 0x8F, 0x88, 0xF7, 0x08, 0x08, 0x9A, 0x00,
+  0x70, 0x00, 0xA8, 0x4A, 0x24, 0x08, 0x9A, 0x00, 0x85, 0x8F, 0x88, 0xF7, 0x38,
+  0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x1F, 0x9A, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x34, 0x0D, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02,
+  0x00, 0x2C, 0x01, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x5C, 0x06, 0x9A, 0x00,
+  0x22, 0x00, 0x00, 0x00, 0x0C, 0x06, 0x9A, 0x00, 0xD0, 0x20, 0x00, 0xFD, 0xD4,
+  0x0E, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x40, 0xD4, 0x0E, 0x9A, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x2C, 0x01, 0x00, 0x00, 0x21, 0x00, 0x25,
+  0x00, 0x2C, 0x08, 0x9A, 0x00, 0x00, 0x00, 0x05, 0x00, 0x30, 0x08, 0x9A, 0x00,
+  0x90, 0xA0, 0x67, 0x00, 0x48, 0x02, 0x9A, 0x00, 0xA3, 0x44, 0x20, 0x95, 0x90,
+  0x02, 0x9A, 0x00, 0x46, 0xAE, 0x60, 0x16, 0x94, 0x02, 0x9A, 0x00, 0x96, 0x02,
+  0xF6, 0x28, 0x98, 0x02, 0x9A, 0x00, 0x00, 0x09, 0x16, 0x88, 0x9C, 0x02, 0x9A,
+  0x00, 0x4C, 0x39, 0x00, 0x24, 0xA0, 0x02, 0x9A, 0x00, 0x32, 0x80, 0x83, 0xD5,
+  0xA8, 0x02, 0x9A, 0x00, 0x0F, 0x86, 0x00, 0x02, 0x14, 0x06, 0x9A, 0x00, 0x77,
+  0x4E, 0x04, 0x40, 0x10, 0x06, 0x9A, 0x00, 0x77, 0x4E, 0x04, 0x40, 0x78, 0x07,
+  0x10, 0x00, 0x44, 0x04, 0x00, 0x82, 0x4C, 0x02, 0x9A, 0x00, 0x85, 0x0C, 0x05,
+  0x15, 0xE0, 0x08, 0x9A, 0x00, 0x11, 0x00, 0x00, 0x00, 0x90, 0x03, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x94, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0B, 0x9A, 0x00, 0x06, 0x22, 0x22, 0x22, 0x90, 0x00, 0x9A, 0x00, 0x7E, 0x00,
+  0x00, 0x40, 0x2E, 0x00, 0x02, 0x00, 0xD0, 0x07, 0x00, 0x00, 0x21, 0x00, 0x13,
+  0x00, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x80, 0x90,
+  0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0x02, 0x9A, 0x00, 0x96, 0x02,
+  0xF6, 0x24, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x01, 0xA4, 0x14, 0x09, 0x90,
+  0x00, 0x00, 0x00, 0x01, 0xA4, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x01, 0xA4,
+  0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x01, 0xA4, 0x34, 0x00, 0x02, 0x00, 0x0F,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+  0x02, 0x00, 0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00,
+  0x00, 0x20, 0xA1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00,
+  0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x34,
+  0x00, 0x02, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x0B, 0x00, 0x94, 0x02, 0x9A, 0x00, 0x96, 0x02, 0xF6,
+  0x28, 0x38, 0x03, 0x9A, 0x00, 0x03, 0x01, 0x30, 0x00, 0x3C, 0x03, 0x9A, 0x00,
+  0xFF, 0x01, 0x40, 0x00, 0x00, 0x03, 0x9A, 0x00, 0x0D, 0x02, 0x00, 0x00, 0x54,
+  0x03, 0x9A, 0x00, 0x03, 0x00, 0x80, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x48, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x70,
+  0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x9A, 0x18, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x1A, 0x2E,
+  0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0B, 0x00, 0x78, 0x09,
+  0x9A, 0x00, 0x0F, 0x1E, 0x7E, 0x88, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0E,
+  0xA4, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0E, 0xA4, 0x10, 0x49, 0x90, 0x00,
+  0x00, 0x00, 0x0E, 0xA4, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0E, 0xA4, 0x34,
+  0x00, 0x02, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03,
+  0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00,
+  0x74, 0x49, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20,
+  0xA1, 0x07, 0x00, 0x34, 0x00, 0x02, 0x00, 0x12, 0x00, 0x00, 0x00, 0x2E, 0x00,
+  0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0B, 0x00, 0x00, 0x02, 0x9A,
+  0x00, 0x00, 0x10, 0x8F, 0x3A, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x25,
+  0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x25, 0x10, 0x49, 0x90, 0x00, 0x00,
+  0x00, 0x0C, 0x25, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x25, 0x20, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02,
+  0x00, 0x13, 0x00, 0x00, 0x00, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00};
 
 static void mclk_memory_load_training_pattern(struct gk20a *g)
 {
-- 
cgit v1.2.2


From 2d3ba5478d0d9a12b123a0261653d33eb37e6e87 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 1 Sep 2016 15:17:34 +0530
Subject: gpu: nvgpu: Clocks params update

- Clocks params update as per r370

JIRA DNVGPU-116

Change-Id: I0aaa1e275aaa2027f2839f3fe24c9aee3e14fd8d
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1212827
(cherry picked from commit 54df6ad9668d46dffb5b9d03265948a47611ff13)
Reviewed-on: http://git-master/r/1227288
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_domain.c   | 2 ++
 drivers/gpu/nvgpu/clk/clk_domain.h   | 1 +
 drivers/gpu/nvgpu/clk/clk_prog.c     | 3 +--
 drivers/gpu/nvgpu/clk/clk_prog.h     | 2 +-
 drivers/gpu/nvgpu/clk/clk_vf_point.c | 4 ----
 drivers/gpu/nvgpu/clk/clk_vf_point.h | 4 ----
 6 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c
index dc485e6b..7371946c 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.c
+++ b/drivers/gpu/nvgpu/clk/clk_domain.c
@@ -94,6 +94,7 @@ static u32 _clk_domains_pmudatainit_3x(struct gk20a *g,
 	pset->b_override_o_v_o_c = false;
 	pset->b_debug_mode = false;
 	pset->b_enforce_vf_monotonicity = pdomains->b_enforce_vf_monotonicity;
+	pset->b_enforce_vf_smoothening = pdomains->b_enforce_vf_smoothening;
 	pset->volt_rails_max = 2;
 	status = boardobjgrpmask_export(
 				&pdomains->master_domains_mask.super,
@@ -168,6 +169,7 @@ u32 clk_domain_sw_setup(struct gk20a *g)
 	boardobjgrpmask_e32_init(&pclkdomainobjs->prog_domains_mask, NULL);
 	boardobjgrpmask_e32_init(&pclkdomainobjs->master_domains_mask, NULL);
 	pclkdomainobjs->b_enforce_vf_monotonicity = true;
+	pclkdomainobjs->b_enforce_vf_smoothening = true;
 
 	memset(&pclkdomainobjs->ordered_noise_aware_list, 0,
 		sizeof(pclkdomainobjs->ordered_noise_aware_list));
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.h b/drivers/gpu/nvgpu/clk/clk_domain.h
index 94d612a7..2670a066 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.h
+++ b/drivers/gpu/nvgpu/clk/clk_domain.h
@@ -33,6 +33,7 @@ struct clk_domains {
 	u8 n_num_entries;
 	u8 version;
 	bool b_enforce_vf_monotonicity;
+	bool b_enforce_vf_smoothening;
 	u32 vbios_domains;
 	struct boardobjgrpmask_e32 prog_domains_mask;
 	struct boardobjgrpmask_e32 master_domains_mask;
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
index d87581c4..4bf473ac 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.c
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -433,6 +433,7 @@ static u32 _clk_prog_pmudatainit_1x_master(struct gk20a *g,
 	memcpy(pset->vf_entries, pclk_prog_1x_master->p_vf_entries, vfsize);
 
 	pset->b_o_c_o_v_enabled = pclk_prog_1x_master->b_o_c_o_v_enabled;
+	pset->source_data = pclk_prog_1x_master->source_data;
 
 	memcpy(&pset->deltas, &pclk_prog_1x_master->deltas,
 		(u32) sizeof(struct ctrl_clk_clk_delta));
@@ -810,11 +811,9 @@ static u32 vfflatten_prog_1x_master(struct gk20a *g,
 			/* FLL sources use a voltage-based VF_POINT.*/
 			vf_point_data.board_obj.type =
 				CTRL_CLK_CLK_VF_POINT_TYPE_VOLT;
-			vf_point_data.volt.clk_domain_idx = clk_domain_idx;
 			for (i = 0; i < step_count; i++) {
 				vf_point_data.volt.source_voltage_uv =
 					voltage_min_uv + i * voltage_step_size_uv;
-				vf_point_data.volt.vf_gain_vfe_equ_idx = p_vf_rail->gain_vfe_idx;
 
 				status = _clk_prog_1x_master_rail_construct_vf_point(g, pclk,
 					p1xmaster, p_vf_rail,
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.h b/drivers/gpu/nvgpu/clk/clk_prog.h
index 2dd8f6c8..8718bdd6 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.h
+++ b/drivers/gpu/nvgpu/clk/clk_prog.h
@@ -49,8 +49,8 @@ struct clk_prog_1x_master {
 	struct clk_prog_1x super;
 	bool b_o_c_o_v_enabled;
 	struct ctrl_clk_clk_prog_1x_master_vf_entry *p_vf_entries;
-
 	struct ctrl_clk_clk_delta deltas;
+	union ctrl_clk_clk_prog_1x_master_source_data source_data;
 	vf_flatten *vfflatten;
 };
 
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/clk/clk_vf_point.c
index 275bef96..a8cc6403 100644
--- a/drivers/gpu/nvgpu/clk/clk_vf_point.c
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.c
@@ -197,8 +197,6 @@ static u32 _clk_vf_point_pmudatainit_volt(struct gk20a *g,
 		ppmudata;
 
 	pset->source_voltage_uv = pclk_vf_point_volt->source_voltage_uv;
-	pset->vf_gain_vfe_equ_idx = pclk_vf_point_volt->vf_gain_vfe_equ_idx;
-	pset->clk_domain_idx = pclk_vf_point_volt->clk_domain_idx;
 	pset->freq_delta_khz = pclk_vf_point_volt->freq_delta_khz;
 
 	return status;
@@ -256,8 +254,6 @@ static u32 clk_vf_point_construct_volt(struct gk20a *g,
 			_clk_vf_point_pmudatainit_volt;
 
 	pclkvfpoint->source_voltage_uv = ptmpvfpoint->source_voltage_uv;
-	pclkvfpoint->vf_gain_vfe_equ_idx = ptmpvfpoint->vf_gain_vfe_equ_idx;
-	pclkvfpoint->clk_domain_idx = ptmpvfpoint->clk_domain_idx;
 
 	return status;
 }
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h
index 708f80f1..b05ab818 100644
--- a/drivers/gpu/nvgpu/clk/clk_vf_point.h
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h
@@ -36,11 +36,7 @@ struct clk_vf_point {
 struct clk_vf_point_volt {
 	struct clk_vf_point super;
 	u32 source_voltage_uv;
-	u8 vf_gain_vfe_equ_idx;
-	u8 clk_domain_idx;
-	u16 vf_gain_value;
 	int freq_delta_khz;
-
 };
 
 struct clk_vf_point_freq {
-- 
cgit v1.2.2


From 455fc2806a06298831130d1adb617ddca48fabb1 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 1 Sep 2016 15:00:52 +0530
Subject: gpu: nvgpu: Update PMU bootloader params

- Bootloader of PMU is changed & bootloader
  takes params using flcn_bl_dmem_desc_v1
  descriptor to boot PMU

JIRA DNVGPU-116

Change-Id: I005b615b2323678fa605d190c6b9b629976f0b74
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1212818
(cherry picked from commit 89976a03c13cce6bbba25c99270b0da4ca0f2441)
Reviewed-on: http://git-master/r/1223842
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 850d07b1..e47c4eb1 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -510,7 +510,6 @@ static int lsfm_discover_ucode_images(struct gk20a *g,
 	return 0;
 }
 
-
 static int gp106_pmu_populate_loader_cfg(struct gk20a *g,
 	void *lsfm, u32 *p_bl_gen_desc_size)
 {
@@ -519,8 +518,8 @@ static int gp106_pmu_populate_loader_cfg(struct gk20a *g,
 	struct lsfm_managed_ucode_img_v2 *p_lsfm =
 		(struct lsfm_managed_ucode_img_v2 *)lsfm;
 	struct flcn_ucode_img_v1 *p_img = &(p_lsfm->ucode_img);
-	struct loader_config_v1 *ldr_cfg =
-			&(p_lsfm->bl_gen_desc.loader_cfg_v1);
+	struct flcn_bl_dmem_desc_v1 *ldr_cfg =
+				&(p_lsfm->bl_gen_desc.bl_dmem_desc_v1);
 	u64 addr_base;
 	struct pmu_ucode_desc_v1 *desc;
 	u64 addr_code, addr_data;
@@ -564,21 +563,22 @@ static int gp106_pmu_populate_loader_cfg(struct gk20a *g,
 
 	gp106_dbg_pmu("addr_args %x\n", addr_args);
 
-	/* Populate the loader_config state*/
-	ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	/* Populate the LOADER_CONFIG state */
+	memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v1));
+	ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
 	flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code);
-	ldr_cfg->code_size_total = desc->app_size;
-	ldr_cfg->code_size_to_load = desc->app_resident_code_size;
-	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	ldr_cfg->non_sec_code_off = desc->app_resident_code_offset;
+	ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
 	flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data);
 	ldr_cfg->data_size = desc->app_resident_data_size;
-	flcn64_set_dma(&ldr_cfg->overlay_dma_base, addr_code);
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
 
 	/* Update the argc/argv members*/
 	ldr_cfg->argc = 1;
 	ldr_cfg->argv = addr_args;
 
-	*p_bl_gen_desc_size = sizeof(struct loader_config_v1);
+	*p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc_v1);
+
 	g->acr.pmu_args = addr_args;
 	return 0;
 }
-- 
cgit v1.2.2


From 41838fc2bb6135bdd87d080a1efda8403f6f2657 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Fri, 26 Aug 2016 20:20:02 -0700
Subject: gpu: nvgpu: gp106: MCLK P8/P5 sequences and API

Adds P5/P8 sequences and simple debugfs API to
change from P0->P5

JIRA DNVGPU-117

Change-Id: I5811a5bddd0e11074524cce421bff1e3d441228d
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1208655
(cherry picked from commit dd410a86263e2407e043743945cf09a77910d745)
Reviewed-on: http://git-master/r/1231035
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.h         |   2 +
 drivers/gpu/nvgpu/clk/clk_mclk.c    | 627 ++++++++++++++++++++++++++++++++++--
 drivers/gpu/nvgpu/clk/clk_mclk.h    |  30 +-
 drivers/gpu/nvgpu/gp106/pmu_gp106.c |   2 +-
 4 files changed, 632 insertions(+), 29 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index d638424f..e24aada2 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -20,6 +20,7 @@
 #include "clk_domain.h"
 #include "clk_prog.h"
 #include "clk_vf_point.h"
+#include "clk_mclk.h"
 #include "gk20a/gk20a.h"
 
 #define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP 0x10
@@ -33,6 +34,7 @@ struct clk_pmupstate {
 	struct clk_domains clk_domainobjs;
 	struct clk_progs clk_progobjs;
 	struct clk_vf_points clk_vf_pointobjs;
+	struct clk_mclk_state clk_mclk;
 };
 
 struct clockentry {
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index affcb54c..df010221 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -20,12 +20,16 @@
 
 #define VREG_COUNT 24
 
+#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed
+#define MCLK_LOW_SPEED_LIMIT 405
+#define MCLK_MID_SPEED_LIMIT 810
+
 struct memory_link_training_pattern {
 	u32 regaddr;
 	u32 writeval;
 };
 
-static struct memory_link_training_pattern memory_shadow_reglist[] = {
+static struct memory_link_training_pattern memory_shadow_p0_reglist[] = {
 	{0x9a065c, 0x20},
 	{0x98467c, 0xffff0000},
 	{0x984708, 0x30550},
@@ -41,6 +45,47 @@ static struct memory_link_training_pattern memory_shadow_reglist[] = {
 	{0x9a065c, 0x00},
 };
 
+static struct memory_link_training_pattern memory_shadow_p5_reglist[] = {
+	{0x9a065c, 0x10},
+	{0x98467c, 0xfff10000},
+	{0x984708, 0x30002},
+	{0x98470c, 0x1414},
+	{0x9006a0, 0x12121212},
+	{0x9006a4, 0x12121212},
+	{0x9046a0, 0x12121212},
+	{0x9046a4, 0x12121212},
+	{0x9086a0, 0x12121212},
+	{0x9086a4, 0x12121212},
+	{0x90c6a0, 0x12121212},
+	{0x90c6a4, 0x12121212},
+	{0x9106a0, 0x12121212},
+	{0x9106a4, 0x12121212},
+	{0x9146a0, 0x12121212},
+	{0x9146a4, 0x12121212},
+	{0x9a065c, 0x0},
+	{0x9a08e0, 0x10},
+	{0x9846a8, 0x0f0f0f0f},
+	{0x9846ac, 0x0f0f0f0f},
+	{0x984d98, 0x22222222},
+	{0x984d9c, 0x22222222},
+	{0x984da0, 0x22222222},
+	{0x984da4, 0x22222222},
+	{0x984da8, 0x22222222},
+	{0x984dac, 0x22222222},
+	{0x984dac, 0x22222222},
+	{0x984d70, 0x0},
+	{0x984d74, 0x0},
+	{0x984d78, 0x0},
+	{0x984d7c, 0x0},
+	{0x984d80, 0x0},
+	{0x984d84, 0x0},
+	{0x984d88, 0x0},
+	{0x984d8c, 0x0},
+	{0x984d90, 0x0},
+	{0x984d94, 0x0},
+	{0x9a08e0, 0x0},
+};
+
 static struct memory_link_training_pattern memory_pattern_reglist[] = {
 	{0x9a0968,	0x0},
 	{0x9a0920,	0x0},
@@ -1548,7 +1593,154 @@ static struct memory_link_training_pattern memory_pattern_reglist[] = {
 	{0x9a0904,	0xffffffff},
 };
 
-static u8 seq_script_gp106[] = {
+/* MID SPEED TO LOW SPEED */
+static u8 seq_script_step33_ls_gp106[] = {
+  0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x00, 0x02,
+  0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C,
+  0x00, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10, 0x49, 0x90, 0x00,
+  0x00, 0x00, 0x0C, 0x00, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10,
+  0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x10, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00,
+  0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00,
+  0x48, 0x03, 0x9A, 0x00, 0x88, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00,
+  0x90, 0x8F, 0x82, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x90, 0x00, 0x9A, 0x00, 0x61, 0x00, 0x00,
+  0x00, 0x90, 0x00, 0x9A, 0x00, 0x7F, 0x00, 0x00, 0xC0, 0x2E, 0x00, 0x02, 0x00,
+  0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00, 0x98, 0x06, 0x9A, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x9C, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08,
+  0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0x38, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x34, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x0D, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x8B, 0xC0, 0x24,
+  0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x00,
+  0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10, 0x00, 0x01, 0x00, 0xF4, 0x73, 0x13,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x13, 0x00, 0x00, 0x00, 0x01, 0x18,
+  0x00, 0x20, 0x13, 0x00, 0x02, 0x00, 0x01, 0x18, 0x20, 0x20, 0x13, 0x00, 0x00,
+  0x00, 0x03, 0x20, 0x20, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x20,
+  0x13, 0x00, 0x01, 0x3B, 0x04, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x08, 0x10, 0x20, 0x20, 0x13, 0x00,
+  0x01, 0x00, 0x03, 0x20, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01,
+  0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0xFA, 0x00, 0x00, 0x21, 0x00, 0x1F, 0x00, 0x2C, 0x20, 0x13,
+  0x00, 0x00, 0x03, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10,
+  0xF4, 0x73, 0x13, 0x00, 0x00, 0x01, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10,
+  0x01, 0x01, 0x00, 0xEC, 0x73, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0xF4, 0x73,
+  0x13, 0x00, 0x11, 0x01, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x01, 0x00,
+  0x00, 0x5C, 0x06, 0x9A, 0x00, 0x11, 0x00, 0x00, 0x00, 0x70, 0x06, 0x9A, 0x00,
+  0x06, 0x13, 0x08, 0xB4, 0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C,
+  0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0x06, 0x9A, 0x00, 0x0E, 0x06,
+  0x0E, 0x06, 0xD4, 0x0E, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x40, 0xD4, 0x0E, 0x9A,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x80, 0x40,
+  0x13, 0x00, 0x02, 0x00, 0x2C, 0x01, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0x10,
+  0x27, 0x00, 0x00, 0x21, 0x00, 0x1D, 0x00, 0x70, 0x06, 0x9A, 0x00, 0x06, 0x13,
+  0x08, 0x34, 0x48, 0x02, 0x9A, 0x00, 0xA3, 0x44, 0x14, 0x86, 0x90, 0x02, 0x9A,
+  0x00, 0x12, 0x2C, 0x18, 0x06, 0x94, 0x02, 0x9A, 0x00, 0x8A, 0x82, 0x41, 0x24,
+  0x98, 0x02, 0x9A, 0x00, 0x11, 0x05, 0x06, 0x88, 0x9C, 0x02, 0x9A, 0x00, 0x8C,
+  0x10, 0x00, 0x22, 0xA8, 0x02, 0x9A, 0x00, 0x0B, 0x86, 0x00, 0x01, 0x4C, 0x02,
+  0x9A, 0x00, 0x85, 0x0C, 0x05, 0x06, 0x30, 0x1F, 0x9A, 0x00, 0x03, 0x16, 0x2C,
+  0x00, 0xE0, 0x08, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x03, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x94, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0B, 0x9A, 0x00, 0x06, 0x22, 0x22, 0x22, 0x90, 0x00, 0x9A, 0x00, 0x7E, 0x00,
+  0x00, 0x40, 0x2E, 0x00, 0x02, 0x00, 0xD0, 0x07, 0x00, 0x00, 0x21, 0x00, 0x0D,
+  0x00, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x80, 0x90,
+  0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x03, 0x9A, 0x00, 0x23, 0x01,
+  0x30, 0x00, 0x00, 0x03, 0x9A, 0x00, 0x2D, 0x02, 0x00, 0x00, 0x2E, 0x00, 0x02,
+  0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x00, 0x02, 0x9A, 0x00,
+  0x00, 0x90, 0x8F, 0x82, 0x18, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x78, 0x09, 0x9A, 0x00, 0x0B, 0x1E, 0x7A,
+  0x88, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x00, 0x10, 0x09, 0x90, 0x00,
+  0x00, 0x00, 0x0A, 0x98, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x10,
+  0x49, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00,
+  0x0A, 0x98, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02,
+  0x00, 0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00,
+  0x20, 0xA1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00, 0x15,
+  0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x21, 0x00,
+  0x0D, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x01, 0x30, 0x08, 0x9A,
+  0x00, 0x91, 0x10, 0x27, 0x00, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19,
+  0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19, 0x10, 0x49, 0x90, 0x00, 0x00,
+  0x00, 0x08, 0x19, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19, 0x20, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02,
+  0x00, 0x0B, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x00, 0x02, 0x9A, 0x00,
+  0x00, 0x98, 0x8F, 0x02, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* LOW SPEED TO MID SPEED */
+static u8 seq_script_step33_gp106[] = {
+  0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x00, 0x02,
+  0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C,
+  0x00, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10, 0x49, 0x90, 0x00,
+  0x00, 0x00, 0x0C, 0x00, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x10,
+  0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x10, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00,
+  0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00,
+  0x48, 0x03, 0x9A, 0x00, 0x88, 0x00, 0x70, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00,
+  0x90, 0x8F, 0x82, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x90, 0x00, 0x9A, 0x00, 0x61, 0x00, 0x00,
+  0x00, 0x90, 0x00, 0x9A, 0x00, 0x7F, 0x00, 0x00, 0xC0, 0x2E, 0x00, 0x02, 0x00,
+  0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x27, 0x00, 0x98, 0x06, 0x9A, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x9C, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08,
+  0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0x38, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x34, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x0D, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x8B, 0xC0, 0x24,
+  0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x00,
+  0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10, 0x00, 0x01, 0x00, 0xF4, 0x73, 0x13,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x13, 0x00, 0x00, 0x00, 0x01, 0x18,
+  0x00, 0x20, 0x13, 0x00, 0x02, 0x00, 0x01, 0x18, 0x20, 0x20, 0x13, 0x00, 0x00,
+  0x00, 0x03, 0x20, 0x20, 0x73, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x20,
+  0x13, 0x00, 0x01, 0x3B, 0x02, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x08, 0x10, 0x20, 0x20, 0x13, 0x00,
+  0x01, 0x00, 0x03, 0x20, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01,
+  0x00, 0x02, 0x00, 0x90, 0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0xFA, 0x00, 0x00, 0x21, 0x00, 0x1F, 0x00, 0x2C, 0x20, 0x13,
+  0x00, 0x00, 0x03, 0x00, 0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10,
+  0xF4, 0x73, 0x13, 0x00, 0x00, 0x01, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10,
+  0x01, 0x01, 0x00, 0xEC, 0x73, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0xF4, 0x73,
+  0x13, 0x00, 0x11, 0x01, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x01, 0x00,
+  0x00, 0x5C, 0x06, 0x9A, 0x00, 0x11, 0x00, 0x00, 0x00, 0x70, 0x06, 0x9A, 0x00,
+  0x06, 0x13, 0x08, 0xB4, 0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C,
+  0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0x06, 0x9A, 0x00, 0x0E, 0x06,
+  0x0E, 0x06, 0xD4, 0x0E, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x40, 0xD4, 0x0E, 0x9A,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x80, 0x40,
+  0x13, 0x00, 0x02, 0x00, 0x2C, 0x01, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0x10,
+  0x27, 0x00, 0x00, 0x21, 0x00, 0x1D, 0x00, 0x70, 0x06, 0x9A, 0x00, 0x06, 0x13,
+  0x08, 0x34, 0x48, 0x02, 0x9A, 0x00, 0xA3, 0x44, 0x14, 0x86, 0x90, 0x02, 0x9A,
+  0x00, 0x12, 0x2C, 0x18, 0x06, 0x94, 0x02, 0x9A, 0x00, 0x8A, 0x82, 0x41, 0x24,
+  0x98, 0x02, 0x9A, 0x00, 0x11, 0x05, 0x06, 0x88, 0x9C, 0x02, 0x9A, 0x00, 0x8C,
+  0x10, 0x00, 0x22, 0xA8, 0x02, 0x9A, 0x00, 0x0B, 0x86, 0x00, 0x01, 0x4C, 0x02,
+  0x9A, 0x00, 0x85, 0x0C, 0x05, 0x06, 0x30, 0x1F, 0x9A, 0x00, 0x03, 0x16, 0x2C,
+  0x00, 0xE0, 0x08, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x03, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x94, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0B, 0x9A, 0x00, 0x06, 0x22, 0x22, 0x22, 0x90, 0x00, 0x9A, 0x00, 0x7E, 0x00,
+  0x00, 0x40, 0x2E, 0x00, 0x02, 0x00, 0xD0, 0x07, 0x00, 0x00, 0x21, 0x00, 0x0D,
+  0x00, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x80, 0x90,
+  0x03, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x03, 0x9A, 0x00, 0x23, 0x01,
+  0x30, 0x00, 0x00, 0x03, 0x9A, 0x00, 0x2D, 0x02, 0x00, 0x00, 0x2E, 0x00, 0x02,
+  0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x00, 0x02, 0x9A, 0x00,
+  0x00, 0x90, 0x8F, 0x82, 0x18, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03,
+  0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x78, 0x09, 0x9A, 0x00, 0x0B, 0x1E, 0x7A,
+  0x88, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x00, 0x10, 0x09, 0x90, 0x00,
+  0x00, 0x00, 0x0A, 0x98, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x10,
+  0x49, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00,
+  0x0A, 0x98, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02,
+  0x00, 0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00,
+  0x20, 0xA1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x49, 0x90, 0x00, 0x15,
+  0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x21, 0x00,
+  0x0D, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x01, 0x30, 0x08, 0x9A,
+  0x00, 0x91, 0x10, 0x27, 0x00, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19,
+  0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19, 0x10, 0x49, 0x90, 0x00, 0x00,
+  0x00, 0x08, 0x19, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19, 0x20, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02,
+  0x00, 0x0B, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x00, 0x02, 0x9A, 0x00,
+  0x00, 0x98, 0x8F, 0x02, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* LOW/MID SPEED TO HIGH SPEED */
+static u8 seq_script_step28_gp106[] = {
   0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x07, 0x00, 0x30, 0x03,
   0x9A, 0x00, 0x14, 0x00, 0x10, 0x00, 0x38, 0xD6, 0x00, 0x00, 0x00, 0x60, 0x00,
@@ -1644,7 +1836,177 @@ static u8 seq_script_gp106[] = {
   0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x25, 0x10, 0x49, 0x90, 0x00, 0x00,
   0x00, 0x0C, 0x25, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x25, 0x20, 0x00,
   0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x02,
-  0x00, 0x13, 0x00, 0x00, 0x00, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00};
+  0x00, 0x13, 0x00, 0x00, 0x00, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/* HIGH SPEED TO LOW SPEED */
+static u8 seq_script_step32_ls_gp106[] = {
+  0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0B, 0x00, 0x10, 0x09,
+  0x90, 0x00, 0x00, 0x00, 0x0C, 0x24, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C,
+  0x24, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x24, 0x14, 0x49, 0x90, 0x00,
+  0x00, 0x00, 0x0C, 0x24, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E,
+  0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x10, 0x03,
+  0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00,
+  0x00, 0x21, 0x00, 0x0D, 0x00, 0x48, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x70, 0x00,
+  0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x82, 0x14, 0x03, 0x9A, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x02, 0x90, 0x00,
+  0x9A, 0x00, 0x61, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9A, 0x00, 0x7F, 0x00, 0x00,
+  0xC0, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x35, 0x00,
+  0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C, 0x06, 0x9A, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0x85, 0x8F, 0x8B, 0xF7, 0x38, 0x1F,
+  0x9A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x34, 0x0D, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00,
+  0xE7, 0x8F, 0x8B, 0xF7, 0x40, 0x0D, 0x9A, 0x00, 0x00, 0xE0, 0x01, 0x00, 0x24,
+  0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0x08, 0x08, 0x9A, 0x00, 0x70, 0x00,
+  0xA0, 0x4A, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x30, 0x08, 0x9A,
+  0x00, 0x90, 0x20, 0x67, 0x01, 0x30, 0x08, 0x9A, 0x00, 0x90, 0x20, 0x67, 0x00,
+  0xF4, 0x73, 0x13, 0x00, 0x12, 0x11, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10,
+  0x11, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20,
+  0x13, 0x00, 0x00, 0x00, 0x03, 0x98, 0x00, 0x20, 0x13, 0x00, 0x02, 0x00, 0x03,
+  0x98, 0x20, 0x20, 0x13, 0x00, 0x00, 0x00, 0x03, 0x20, 0x20, 0x73, 0x13, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x30, 0x20, 0x13, 0x00, 0x06, 0x00, 0x00, 0x10, 0x34,
+  0x20, 0x13, 0x00, 0x00, 0x10, 0x67, 0x06, 0x24, 0x20, 0x13, 0x00, 0x01, 0x3B,
+  0x04, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00, 0x28, 0x20, 0x13,
+  0x00, 0x10, 0x00, 0x08, 0x10, 0x20, 0x20, 0x13, 0x00, 0x01, 0x00, 0x03, 0x20,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x02, 0x00, 0x90,
+  0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0xFA,
+  0x00, 0x00, 0x21, 0x00, 0x23, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x03, 0x00,
+  0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10, 0xF4, 0x73, 0x13, 0x00,
+  0x00, 0x11, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10, 0x11, 0x01, 0x00, 0xF0,
+  0x73, 0x13, 0x00, 0x01, 0x00, 0x00, 0x00, 0xEC, 0x73, 0x13, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x11, 0x01, 0x00, 0xF4, 0x73, 0x13,
+  0x00, 0x11, 0x11, 0x00, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x20, 0x27, 0x00,
+  0x5C, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, 0x06, 0x9A, 0x00, 0x06,
+  0x06, 0x06, 0x06, 0x9C, 0x06, 0x9A, 0x00, 0x06, 0x06, 0x06, 0x06, 0x94, 0x06,
+  0x9A, 0x00, 0x0E, 0x06, 0x0E, 0x06, 0x0C, 0x06, 0x9A, 0x00, 0x50, 0x20, 0x00,
+  0xFD, 0xD4, 0x0E, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x40, 0xD4, 0x0E, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x80, 0x40, 0x13,
+  0x00, 0x02, 0x00, 0x2C, 0x01, 0x00, 0x00, 0x21, 0x00, 0x23, 0x00, 0x2C, 0x08,
+  0x9A, 0x00, 0x00, 0x00, 0x15, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27,
+  0x00, 0x48, 0x02, 0x9A, 0x00, 0xA3, 0x44, 0x14, 0x84, 0x90, 0x02, 0x9A, 0x00,
+  0x0A, 0x17, 0x0E, 0x03, 0x94, 0x02, 0x9A, 0x00, 0x89, 0x02, 0x21, 0x24, 0x98,
+  0x02, 0x9A, 0x00, 0x11, 0x04, 0x05, 0x88, 0x9C, 0x02, 0x9A, 0x00, 0x6C, 0x10,
+  0x00, 0x22, 0xA0, 0x02, 0x9A, 0x00, 0x32, 0x00, 0x61, 0xD3, 0xA8, 0x02, 0x9A,
+  0x00, 0x0B, 0x86, 0x00, 0x02, 0x14, 0x06, 0x9A, 0x00, 0x77, 0x3E, 0x03, 0x30,
+  0x10, 0x06, 0x9A, 0x00, 0x77, 0x3E, 0x03, 0x30, 0x08, 0x08, 0x9A, 0x00, 0x70,
+  0x00, 0x00, 0x48, 0x78, 0x07, 0x10, 0x00, 0x33, 0x03, 0x00, 0x82, 0x4C, 0x02,
+  0x9A, 0x00, 0x85, 0x0C, 0x05, 0x04, 0x30, 0x1F, 0x9A, 0x00, 0x03, 0x16, 0x2C,
+  0x00, 0xE0, 0x08, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9A, 0x00,
+  0x7E, 0x00, 0x00, 0x40, 0x2E, 0x00, 0x02, 0x00, 0xD0, 0x07, 0x00, 0x00, 0x21,
+  0x00, 0x0D, 0x00, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x03,
+  0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00,
+  0x80, 0x38, 0x03, 0x9A, 0x00, 0x23, 0x00, 0x30, 0x00, 0x00, 0x03, 0x9A, 0x00,
+  0x25, 0x01, 0x00, 0x00, 0x54, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x80, 0x00, 0x2E,
+  0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x30, 0x03,
+  0x9A, 0x00, 0x30, 0x00, 0x10, 0x00, 0x48, 0x03, 0x9A, 0x00, 0x88, 0x00, 0x70,
+  0x00, 0x38, 0xD6, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x04, 0xD6, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0x20, 0x4E, 0x00, 0x00, 0x21,
+  0x00, 0x07, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x82, 0x18, 0x03,
+  0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F,
+  0x02, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00,
+  0x78, 0x09, 0x9A, 0x00, 0x0F, 0x3E, 0x7A, 0x88, 0x30, 0x08, 0x9A, 0x00, 0x91,
+  0x10, 0x27, 0x00, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x88, 0x14, 0x09,
+  0x90, 0x00, 0x00, 0x00, 0x0A, 0x88, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0A,
+  0x88, 0x14, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x88, 0x00, 0x00, 0x02, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x09, 0x90, 0x00, 0x15,
+  0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x01, 0x00,
+  0x02, 0x00, 0x74, 0x49, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00,
+  0x00, 0x20, 0xA1, 0x07, 0x00, 0x21, 0x00, 0x05, 0x00, 0x30, 0x08, 0x9A, 0x00,
+  0x91, 0x10, 0x27, 0x01, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x00, 0x20,
+  0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00,
+  0x02, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x00, 0x02, 0x9A,
+  0x00, 0x00, 0x98, 0x8F, 0x02, 0x16, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* HIGH SPEED TO LOW/MID SPEED */
+static u8 seq_script_step32_gp106[] = {
+  0x34, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x03, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x0B, 0x00, 0x10, 0x09,
+  0x90, 0x00, 0x00, 0x00, 0x0C, 0x24, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0C,
+  0x24, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0C, 0x24, 0x14, 0x49, 0x90, 0x00,
+  0x00, 0x00, 0x0C, 0x24, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E,
+  0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x10, 0x03,
+  0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00,
+  0x00, 0x21, 0x00, 0x0D, 0x00, 0x48, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x70, 0x00,
+  0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x82, 0x14, 0x03, 0x9A, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x10, 0x8F, 0x02, 0x90, 0x00,
+  0x9A, 0x00, 0x61, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9A, 0x00, 0x7F, 0x00, 0x00,
+  0xC0, 0x2E, 0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x35, 0x00,
+  0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C, 0x06, 0x9A, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00, 0x85, 0x8F, 0x8B, 0xF7, 0x38, 0x1F,
+  0x9A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x34, 0x1F, 0x9A, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x34, 0x0D, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x08, 0x9A, 0x00,
+  0xE7, 0x8F, 0x8B, 0xF7, 0x40, 0x0D, 0x9A, 0x00, 0x00, 0xE0, 0x01, 0x00, 0x24,
+  0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x83, 0x40, 0x08, 0x08, 0x9A, 0x00, 0x70, 0x00,
+  0xA0, 0x4A, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x30, 0x08, 0x9A,
+  0x00, 0x90, 0x20, 0x67, 0x01, 0x30, 0x08, 0x9A, 0x00, 0x90, 0x20, 0x67, 0x00,
+  0xF4, 0x73, 0x13, 0x00, 0x12, 0x11, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10,
+  0x11, 0x00, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20,
+  0x13, 0x00, 0x00, 0x00, 0x03, 0x98, 0x00, 0x20, 0x13, 0x00, 0x02, 0x00, 0x03,
+  0x98, 0x20, 0x20, 0x13, 0x00, 0x00, 0x00, 0x03, 0x20, 0x20, 0x73, 0x13, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x30, 0x20, 0x13, 0x00, 0x06, 0x00, 0x00, 0x10, 0x34,
+  0x20, 0x13, 0x00, 0x00, 0x10, 0x67, 0x06, 0x24, 0x20, 0x13, 0x00, 0x01, 0x3B,
+  0x02, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00, 0x28, 0x20, 0x13,
+  0x00, 0x10, 0x00, 0x08, 0x10, 0x20, 0x20, 0x13, 0x00, 0x01, 0x00, 0x03, 0x20,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x02, 0x00, 0x90,
+  0x73, 0x13, 0x00, 0x15, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0xFA,
+  0x00, 0x00, 0x21, 0x00, 0x25, 0x00, 0x2C, 0x20, 0x13, 0x00, 0x00, 0x03, 0x00,
+  0x00, 0x28, 0x20, 0x13, 0x00, 0x10, 0x00, 0x04, 0x10, 0xF4, 0x73, 0x13, 0x00,
+  0x00, 0x11, 0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x10, 0x11, 0x01, 0x00, 0xF0,
+  0x73, 0x13, 0x00, 0x01, 0x00, 0x00, 0x00, 0xEC, 0x73, 0x13, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0xF4, 0x73, 0x13, 0x00, 0x11, 0x11, 0x01, 0x00, 0xF4, 0x73, 0x13,
+  0x00, 0x11, 0x11, 0x00, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x20, 0x27, 0x00,
+  0x5C, 0x06, 0x9A, 0x00, 0x11, 0x00, 0x00, 0x00, 0x70, 0x06, 0x9A, 0x00, 0x06,
+  0x13, 0x08, 0xB4, 0x98, 0x06, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9C, 0x06,
+  0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0x06, 0x9A, 0x00, 0x0E, 0x06, 0x0E,
+  0x06, 0x0C, 0x06, 0x9A, 0x00, 0x50, 0x20, 0x00, 0xFD, 0xD4, 0x0E, 0x9A, 0x00,
+  0x00, 0x00, 0x00, 0x40, 0xD4, 0x0E, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24,
+  0x08, 0x9A, 0x00, 0xE7, 0x8F, 0x80, 0x40, 0x13, 0x00, 0x02, 0x00, 0x2C, 0x01,
+  0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0x10, 0x27, 0x00, 0x00, 0x21, 0x00, 0x25,
+  0x00, 0x70, 0x06, 0x9A, 0x00, 0x06, 0x13, 0x08, 0x34, 0x2C, 0x08, 0x9A, 0x00,
+  0x00, 0x00, 0x15, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x00, 0x48,
+  0x02, 0x9A, 0x00, 0xA3, 0x44, 0x14, 0x86, 0x90, 0x02, 0x9A, 0x00, 0x12, 0x2C,
+  0x18, 0x06, 0x94, 0x02, 0x9A, 0x00, 0x8A, 0x82, 0x41, 0x24, 0x98, 0x02, 0x9A,
+  0x00, 0x11, 0x05, 0x06, 0x88, 0x9C, 0x02, 0x9A, 0x00, 0x8C, 0x10, 0x00, 0x22,
+  0xA0, 0x02, 0x9A, 0x00, 0x32, 0x00, 0x61, 0xD3, 0xA8, 0x02, 0x9A, 0x00, 0x0B,
+  0x86, 0x00, 0x01, 0x14, 0x06, 0x9A, 0x00, 0x77, 0x3E, 0x03, 0x30, 0x10, 0x06,
+  0x9A, 0x00, 0x77, 0x3E, 0x03, 0x30, 0x08, 0x08, 0x9A, 0x00, 0x70, 0x00, 0x00,
+  0x48, 0x78, 0x07, 0x10, 0x00, 0x33, 0x03, 0x00, 0x82, 0x4C, 0x02, 0x9A, 0x00,
+  0x85, 0x0C, 0x05, 0x06, 0x30, 0x1F, 0x9A, 0x00, 0x03, 0x16, 0x2C, 0x00, 0xE0,
+  0x08, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x9A, 0x00, 0x7E, 0x00,
+  0x00, 0x40, 0x2E, 0x00, 0x02, 0x00, 0xD0, 0x07, 0x00, 0x00, 0x21, 0x00, 0x0D,
+  0x00, 0x14, 0x03, 0x9A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x02, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x80, 0x38,
+  0x03, 0x9A, 0x00, 0x23, 0x01, 0x30, 0x00, 0x00, 0x03, 0x9A, 0x00, 0x2D, 0x02,
+  0x00, 0x00, 0x54, 0x03, 0x9A, 0x00, 0x00, 0x00, 0x80, 0x00, 0x2E, 0x00, 0x02,
+  0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x09, 0x00, 0x30, 0x03, 0x9A, 0x00,
+  0x30, 0x00, 0x10, 0x00, 0x48, 0x03, 0x9A, 0x00, 0x88, 0x00, 0x70, 0x00, 0x38,
+  0xD6, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x04, 0xD6, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x00, 0x2E, 0x00, 0x02, 0x00, 0x20, 0x4E, 0x00, 0x00, 0x21, 0x00, 0x07,
+  0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x82, 0x18, 0x03, 0x9A, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x90, 0x8F, 0x02, 0x2E,
+  0x00, 0x02, 0x00, 0xE8, 0x03, 0x00, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x78, 0x09,
+  0x9A, 0x00, 0x0B, 0x1E, 0x7A, 0x88, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27,
+  0x00, 0x10, 0x09, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x14, 0x09, 0x90, 0x00,
+  0x00, 0x00, 0x0A, 0x98, 0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x14,
+  0x49, 0x90, 0x00, 0x00, 0x00, 0x0A, 0x98, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x74, 0x09, 0x90, 0x00, 0x15, 0x00, 0x03,
+  0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0xA1, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00,
+  0x74, 0x49, 0x90, 0x00, 0x15, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20,
+  0xA1, 0x07, 0x00, 0x21, 0x00, 0x0D, 0x00, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10,
+  0x27, 0x01, 0x30, 0x08, 0x9A, 0x00, 0x91, 0x10, 0x27, 0x00, 0x10, 0x09, 0x90,
+  0x00, 0x00, 0x00, 0x08, 0x19, 0x14, 0x09, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19,
+  0x10, 0x49, 0x90, 0x00, 0x00, 0x00, 0x08, 0x19, 0x14, 0x49, 0x90, 0x00, 0x00,
+  0x00, 0x08, 0x19, 0x20, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x34, 0x00, 0x02, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03,
+  0x00, 0x00, 0x02, 0x9A, 0x00, 0x00, 0x98, 0x8F, 0x02, 0x16, 0x00, 0x02, 0x00,
+  0x00, 0x00, 0x00, 0x00,
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int mclk_debugfs_init(struct gk20a *g);
+#endif
 
 static void mclk_memory_load_training_pattern(struct gk20a *g)
 {
@@ -1671,11 +2033,18 @@ static void mclk_memory_load_shadow_regs(struct gk20a *g)
 
 	gk20a_dbg_info("");
 
-	reg_writes = ((sizeof(memory_shadow_reglist) /
-				sizeof((memory_shadow_reglist)[0])));
+	reg_writes = ((sizeof(memory_shadow_p0_reglist) /
+				sizeof((memory_shadow_p0_reglist)[0])));
+	for (index = 0; index < reg_writes; index++) {
+		gk20a_writel(g, memory_shadow_p0_reglist[index].regaddr,
+				memory_shadow_p0_reglist[index].writeval);
+	}
+
+	reg_writes = ((sizeof(memory_shadow_p5_reglist) /
+				sizeof((memory_shadow_p5_reglist)[0])));
 	for (index = 0; index < reg_writes; index++) {
-		gk20a_writel(g, memory_shadow_reglist[index].regaddr,
-				memory_shadow_reglist[index].writeval);
+		gk20a_writel(g, memory_shadow_p5_reglist[index].regaddr,
+				memory_shadow_p5_reglist[index].writeval);
 	}
 
 	gk20a_dbg_fn("done");
@@ -1713,23 +2082,95 @@ status_update:
 	*((u32 *)param) = msg_status;
 }
 
-int clk_mclkseq_build_prgm_gddr5(struct gk20a *g)
+int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 {
+	struct clk_mclk_state *mclk;
+
+	gk20a_dbg_fn("");
+
+	mclk = &g->clk_pmu.clk_mclk;
+
+	mutex_init(&mclk->mclk_mutex);
+
+	mclk->speed = gk20a_mclk_low_speed; /* Value from Devinit */
+
+	/* Load Shadow registers */
+	mclk_memory_load_shadow_regs(g);
+
+	/* Load RAM pattern */
+	mclk_memory_load_training_pattern(g);
+
+	mclk->vreg_buf = kzalloc((sizeof(u32) * VREG_COUNT), GFP_KERNEL);
+	if (!mclk->vreg_buf) {
+		gk20a_err(dev_from_gk20a(g),
+				"unable to allocate memory for VREG");
+		return -ENOMEM;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	if (!mclk->debugfs_set) {
+		if (mclk_debugfs_init(g))
+			mclk->debugfs_set = true;
+	}
+#endif
+	mclk->change = clk_mclkseq_change_mclk_gddr5;
+
+	return mclk->change(g, DEFAULT_BOOT_MCLK_SPEED);
+}
+
+int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
+{
+	struct clk_mclk_state *mclk;
 	struct pmu_payload payload = { {0} };
-	void *vreg_buf = NULL;
 	struct nv_pmu_seq_cmd cmd;
 	struct nv_pmu_seq_cmd_run_script *pseq_cmd;
 	u32 seqdesc;
 	u32 status = 0;
 	u32 seq_completion_status = ~0x0;
+	u8 *seq_script_ptr = NULL;
+	size_t seq_script_size = 0;
+#ifdef CONFIG_DEBUG_FS
+	u64 t0, t1;
+#endif
 
 	gk20a_dbg_info("");
 
-	/* Load Shadow registers */
-	mclk_memory_load_shadow_regs(g);
+	mclk = &g->clk_pmu.clk_mclk;
 
-	/* Load RAM pattern */
-	mclk_memory_load_training_pattern(g);
+	mutex_lock(&mclk->mclk_mutex);
+
+	if (speed == mclk->speed)
+		goto exit_status;
+
+	switch (speed) {
+	case gk20a_mclk_mid_speed:
+		if (mclk->speed == gk20a_mclk_low_speed) {
+			seq_script_ptr  = seq_script_step33_gp106;
+			seq_script_size = sizeof(seq_script_step33_gp106);
+		} else {
+			seq_script_ptr  = seq_script_step32_gp106;
+			seq_script_size = sizeof(seq_script_step32_gp106);
+		}
+		break;
+	case gk20a_mclk_high_speed:
+		seq_script_ptr  = seq_script_step28_gp106;
+		seq_script_size = sizeof(seq_script_step28_gp106);
+		break;
+	case gk20a_mclk_low_speed:
+		if (mclk->speed == gk20a_mclk_mid_speed) {
+			seq_script_ptr  = seq_script_step33_ls_gp106;
+			seq_script_size = sizeof(seq_script_step33_ls_gp106);
+		} else {
+			seq_script_ptr  = seq_script_step32_ls_gp106;
+			seq_script_size = sizeof(seq_script_step32_ls_gp106);
+		}
+		break;
+	default:
+		gk20a_err(dev_from_gk20a(g),
+			"Illegal MCLK clock change");
+		status = -EINVAL;
+		goto exit_status;
+	}
 
 	/* Fill command header with SEQ ID & size */
 	memset(&cmd, 0, sizeof(cmd));
@@ -1741,26 +2182,29 @@ int clk_mclkseq_build_prgm_gddr5(struct gk20a *g)
 	pseq_cmd = &cmd.run_script;
 	pseq_cmd->cmd_type = NV_PMU_SEQ_CMD_ID_RUN_SCRIPT;
 
-	gk20a_writel(g, 0x132000, 0x98010000);
-	udelay(0x5);
+#ifdef CONFIG_DEBUG_FS
+	g->ops.read_ptimer(g, &t0);
+#endif
+
+	if (speed == gk20a_mclk_high_speed) {
+		gk20a_writel(g, 0x132000, 0x98010000);
+		/* Introduce delay */
+		gk20a_readl(g, 0x132000);
+		gk20a_readl(g, 0x132000);
+	}
+
 	gk20a_writel(g, 0x137300, 0x20000103);
 
 	/* Read sequencer binary*/
-	payload.in.buf = seq_script_gp106;
-	payload.in.size = sizeof(seq_script_gp106);
+	payload.in.buf = seq_script_ptr;
+	payload.in.size = seq_script_size;
 	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
 	payload.in.offset = offsetof(struct nv_pmu_seq_cmd_run_script,
 			script_alloc);
 
-	vreg_buf = kzalloc((sizeof(u32) * VREG_COUNT), GFP_KERNEL);
-	if (!vreg_buf) {
-		status = -ENOMEM;
-		gk20a_err(dev_from_gk20a(g),
-				"unable to allocate memory for VREG");
-		goto exit_status;
-	}
+	memset(mclk->vreg_buf, 0, (sizeof(u32) * VREG_COUNT));
 
-	payload.out.buf = vreg_buf;
+	payload.out.buf = mclk->vreg_buf;
 	payload.out.size = (VREG_COUNT * sizeof(u32));
 	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
 	payload.out.offset = offsetof(struct nv_pmu_seq_cmd_run_script,
@@ -1777,7 +2221,6 @@ int clk_mclkseq_build_prgm_gddr5(struct gk20a *g)
 			cmd.hdr.unit_id);
 		goto exit_status;
 	}
-
 	/* wait till sequencer script complete */
 	pmu_wait_message_cond(&g->pmu, (gk20a_get_gr_idle_timeout(g)),
 			&seq_completion_status, 0);
@@ -1788,7 +2231,137 @@ int clk_mclkseq_build_prgm_gddr5(struct gk20a *g)
 		goto exit_status;
 	}
 
+	mclk->speed = speed;
+
+#ifdef CONFIG_DEBUG_FS
+	g->ops.read_ptimer(g, &t1);
+	mclk->switch_num++;
+
+	if (mclk->switch_num == 1) {
+		mclk->switch_max = mclk->switch_min =
+			mclk->switch_avg = (t1-t0)/1000;
+		mclk->switch_std = 0;
+	} else {
+		s64 prev_avg;
+		u64 curr = (t1-t0)/1000;
+
+		mclk->switch_max = curr > mclk->switch_max ?
+			curr : mclk->switch_max;
+		mclk->switch_min = mclk->switch_min ?
+			(curr < mclk->switch_min ?
+				curr : mclk->switch_min) : curr;
+		prev_avg = mclk->switch_avg;
+		mclk->switch_avg = (curr +
+			(mclk->switch_avg * (mclk->switch_num-1))) /
+			mclk->switch_num;
+		mclk->switch_std +=
+			(curr - mclk->switch_avg) * (curr - prev_avg);
+	}
+#endif
 exit_status:
-	kfree(vreg_buf);
+
+	mutex_unlock(&mclk->mclk_mutex);
 	return status;
 }
+
+#ifdef CONFIG_DEBUG_FS
+static int mclk_debug_speed_set(void *data, u64 val)
+{
+	enum gk20a_mclk_speed speed;
+	struct gk20a *g = (struct gk20a *) data;
+	struct clk_mclk_state *mclk;
+
+	mclk = &g->clk_pmu.clk_mclk;
+
+	/* TODO thia should be done according to VBIOS tables */
+
+	speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
+		(val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
+						gk20a_mclk_high_speed;
+
+	if (mclk->change)
+		return mclk->change(g, speed);
+	return 0;
+
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(
+	mclk_debug_speed_set_fops,
+	NULL,
+	mclk_debug_speed_set,
+	"%llu\n"
+);
+
+static int mclk_switch_stats_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct clk_mclk_state *mclk;
+	u64 num;
+	s64 tmp, avg, std, max, min;
+
+	mclk = &g->clk_pmu.clk_mclk;
+
+	/* Make copy of structure to reduce time with lock held */
+	mutex_lock(&mclk->mclk_mutex);
+	std = mclk->switch_std;
+	avg = mclk->switch_avg;
+	max = mclk->switch_max;
+	min = mclk->switch_min;
+	num = mclk->switch_num;
+	mutex_unlock(&mclk->mclk_mutex);
+
+	tmp = std;
+	do_div(tmp, num);
+	seq_printf(s, "MCLK:\n number of transitions: %lld\n",
+		num);
+	seq_printf(s, "max / min : %lld / %lld usec\n",
+		max, min);
+	seq_printf(s, "avg / std : %lld / %ld usec\n",
+		avg, int_sqrt(tmp));
+
+	return 0;
+}
+
+static int mclk_switch_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mclk_switch_stats_show, inode->i_private);
+}
+
+static const struct file_operations mclk_switch_stats_fops = {
+	.open		= mclk_switch_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+static int mclk_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	struct dentry *gpu_root = platform->debugfs;
+	struct dentry *d;
+
+	gk20a_dbg(gpu_dbg_info, "g=%p", g);
+
+	d = debugfs_create_file(
+			"mclk_speed_set",
+			S_IWUGO,
+			gpu_root,
+			g,
+			&mclk_debug_speed_set_fops);
+	if (!d)
+		return -ENOMEM;
+
+	d = debugfs_create_file(
+			"mclk_switch_stats",
+			S_IRUGO,
+			gpu_root,
+			g,
+			&mclk_switch_stats_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index f86893f7..c3261eac 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -14,6 +14,34 @@
 #ifndef _CLKMCLK_H_
 #define _CLKMCLK_H_
 
-int clk_mclkseq_build_prgm_gddr5(struct gk20a *g);
+#include <linux/mutex.h>
+
+enum gk20a_mclk_speed {
+	gk20a_mclk_low_speed,
+	gk20a_mclk_mid_speed,
+	gk20a_mclk_high_speed
+};
+
+struct clk_mclk_state {
+	enum gk20a_mclk_speed speed;
+	struct mutex mclk_mutex;
+	void *vreg_buf;
+
+	/* function pointers */
+	int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed);
+
+#ifdef CONFIG_DEBUG_FS
+	s64 switch_max;
+	s64 switch_min;
+	u64 switch_num;
+	s64 switch_avg;
+	s64 switch_std;
+	bool debugfs_set;
+#endif
+};
+
+int clk_mclkseq_init_mclk_gddr5(struct gk20a *g);
+int clk_mclkseq_change_mclk_gddr5(struct gk20a *g,
+	enum gk20a_mclk_speed speed);
 
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 48653142..f3e7b298 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -195,7 +195,7 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
-	gops->pmu.mclk_init = clk_mclkseq_build_prgm_gddr5;
+	gops->pmu.mclk_init = clk_mclkseq_init_mclk_gddr5;
 
 	gk20a_dbg_fn("done");
 }
-- 
cgit v1.2.2


From cb80abb2d2bee2f9feb987b83e8b106acdf14373 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 20 Sep 2016 08:48:34 -0700
Subject: gpu: nvgpu: Suppress error msg from VBIOS overlay

Suppress error message when nvgpu tries to load VBIOS overlay, but
one is not found. This situation is normal. This is done by moving
gk20a_request_firmware() to be nvgpu generic function
nvgpu_request_firmware(), and adding a NO_WARN flag to
it.

Introduce also a NO_SOC flag to suppress attempt to load firmware
from SoC specific directory in addition to the chip specific
directory. Use it for dGPU firmware files.

Bug 200236777

Change-Id: I4666bee512ae0914ef92b75f068685cb2b503cc8
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1223839
(cherry picked from commit e9ae74dfbde3c3d2b103e1927aa92ec7d97cd76d)
Reviewed-on: http://git-master/r/1233412
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index e47c4eb1..39371666 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -28,6 +28,7 @@
 #include "gm206/pmu_gm206.h"
 #include "sec2_gp106.h"
 #include "nvgpu_gpuid_t18x.h"
+#include "nvgpu_common.h"
 
 /*Defines*/
 #define gp106_dbg_pmu(fmt, arg...) \
@@ -138,7 +139,8 @@ static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 	int err;
 
 	gp106_dbg_pmu("requesting PMU ucode in gp106\n");
-	pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
+	pmu_fw = nvgpu_request_firmware(g, GM20B_PMU_UCODE_IMAGE,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 	if (!pmu_fw) {
 		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
 		return -ENOENT;
@@ -147,13 +149,15 @@ static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 	gp106_dbg_pmu("Loaded PMU ucode in for blob preparation");
 
 	gp106_dbg_pmu("requesting PMU ucode desc in GM20B\n");
-	pmu_desc = gk20a_request_firmware(g, GM20B_PMU_UCODE_DESC);
+	pmu_desc = nvgpu_request_firmware(g, GM20B_PMU_UCODE_DESC,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 	if (!pmu_desc) {
 		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode desc!!");
 		err = -ENOENT;
 		goto release_img_fw;
 	}
-	pmu_sig = gk20a_request_firmware(g, GM20B_PMU_UCODE_SIG);
+	pmu_sig = nvgpu_request_firmware(g, GM20B_PMU_UCODE_SIG,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 	if (!pmu_sig) {
 		gk20a_err(dev_from_gk20a(g), "failed to load pmu sig!!");
 		err = -ENOENT;
@@ -206,10 +210,14 @@ static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 
 	switch (ver) {
 		case NVGPU_GPUID_GP104:
-			fecs_sig = gk20a_request_firmware(g, GP104_FECS_UCODE_SIG);
+			fecs_sig = nvgpu_request_firmware(g,
+					GP104_FECS_UCODE_SIG,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 			break;
 		case NVGPU_GPUID_GP106:
-			fecs_sig = gk20a_request_firmware(g, GP106_FECS_UCODE_SIG);
+			fecs_sig = nvgpu_request_firmware(g,
+					GP106_FECS_UCODE_SIG,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 			break;
 		default:
 			gk20a_err(g->dev, "no support for GPUID %x", ver);
@@ -288,10 +296,14 @@ static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 
 	switch (ver) {
 		case NVGPU_GPUID_GP104:
-			gpccs_sig = gk20a_request_firmware(g, GP104_GPCCS_UCODE_SIG);
+			gpccs_sig = nvgpu_request_firmware(g,
+					GP104_GPCCS_UCODE_SIG,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 			break;
 		case NVGPU_GPUID_GP106:
-			gpccs_sig = gk20a_request_firmware(g, GP106_GPCCS_UCODE_SIG);
+			gpccs_sig = nvgpu_request_firmware(g,
+					GP106_GPCCS_UCODE_SIG,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
 			break;
 		default:
 			gk20a_err(g->dev, "no support for GPUID %x", ver);
@@ -1041,7 +1053,9 @@ static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 
 	if (!acr_fw) {
 		/*First time init case*/
-		acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
+		acr_fw = nvgpu_request_firmware(g,
+				GM20B_HSBIN_PMU_UCODE_IMAGE,
+				NVGPU_REQUEST_FIRMWARE_NO_SOC);
 		if (!acr_fw) {
 			gk20a_err(dev_from_gk20a(g), "pmu ucode get fail");
 			return -ENOENT;
-- 
cgit v1.2.2


From 49c3fb25822565a9078961cdef1222aaa8c7e89a Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Fri, 2 Sep 2016 16:42:04 +0530
Subject: gpu: nvgpu: gp10x: Add debugfs entry for temperature reading

Added current temperature reading support for gp10x.

JIRA DNVGPU-48

Change-Id: I45959da28bbd207dcf899a9eb37900c69895cfc1
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1213717
(cherry picked from commit 805245889d1df8aefce277cff9ea31ea5fb4706b)
Reviewed-on: http://git-master/r/1234092
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x    |  3 +-
 drivers/gpu/nvgpu/gp106/hal_gp106.c      |  3 +-
 drivers/gpu/nvgpu/gp106/hw_therm_gp106.h | 97 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/therm_gp106.c    | 62 ++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/therm_gp106.h    | 22 ++++++++
 5 files changed, 185 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/therm_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/therm_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index aaf02931..eb9d5977 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -41,7 +41,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/perf/perf.o \
 	$(nvgpu-t18x)/clk/clk.o \
 	$(nvgpu-t18x)/gp106/clk_gp106.o \
-	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o
+	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o \
+	$(nvgpu-t18x)/gp106/therm_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index eb5c4eba..d07da835 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -28,7 +28,7 @@
 #include "gp106/fifo_gp106.h"
 #include "gp10b/regops_gp10b.h"
 #include "gp10b/cde_gp10b.h"
-#include "gp10b/therm_gp10b.h"
+#include "gp106/therm_gp106.h"
 
 #include "gm206/bios_gm206.h"
 
@@ -209,6 +209,7 @@ int gp106_init_hal(struct gk20a *g)
 	gk20a_init_css_ops(gops);
 #endif
 	gm206_init_bios(gops);
+	gp106_init_therm_ops(gops);
 	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
diff --git a/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h b/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
new file mode 100644
index 00000000..ecc50980
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_therm_gp106_h_
+#define _hw_therm_gp106_h_
+
+static inline u32 therm_temp_sensor_tsense_r(void)
+{
+	return 0x00020460;
+}
+static inline u32 therm_temp_sensor_tsense_fixed_point_f(u32 v)
+{
+	return (v & 0x3fff) << 3;
+}
+static inline u32 therm_temp_sensor_tsense_fixed_point_m(void)
+{
+	return 0x3fff << 3;
+}
+static inline u32 therm_temp_sensor_tsense_fixed_point_v(u32 r)
+{
+	return (r >> 3) & 0x3fff;
+}
+static inline u32 therm_temp_sensor_tsense_fixed_point_min_v(void)
+{
+	return 0x00003b00;
+}
+static inline u32 therm_temp_sensor_tsense_fixed_point_max_v(void)
+{
+	return 0x000010e0;
+}
+static inline u32 therm_temp_sensor_tsense_state_f(u32 v)
+{
+	return (v & 0x3) << 29;
+}
+static inline u32 therm_temp_sensor_tsense_state_m(void)
+{
+	return 0x3 << 29;
+}
+static inline u32 therm_temp_sensor_tsense_state_v(u32 r)
+{
+	return (r >> 29) & 0x3;
+}
+static inline u32 therm_temp_sensor_tsense_state_valid_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 therm_temp_sensor_tsense_state_shadow_v(void)
+{
+	return 0x00000002;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c
new file mode 100644
index 00000000..153e953d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "therm_gp106.h"
+#include <linux/debugfs.h>
+#include "hw_therm_gp106.h"
+
+#ifdef CONFIG_DEBUG_FS
+static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err = 0;
+	u32 readval;
+
+	readval = gk20a_readl(g, therm_temp_sensor_tsense_r());
+
+	if (!(therm_temp_sensor_tsense_state_v(readval) &
+		therm_temp_sensor_tsense_state_valid_v())) {
+		gk20a_err(dev_from_gk20a(g),
+			"Attempt to read temperature while sensor is OFF!\n");
+		err = -EINVAL;
+	} else if (therm_temp_sensor_tsense_state_v(readval) &
+		therm_temp_sensor_tsense_state_shadow_v()) {
+		gk20a_err(dev_from_gk20a(g),
+			"Reading temperature from SHADOWed sensor!\n");
+	}
+
+	// Convert from F9.5 -> F27.5 -> F24.8.
+	readval &= therm_temp_sensor_tsense_fixed_point_m();
+
+	*val = readval;
+
+	return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(therm_ctrl_fops, therm_get_internal_sensor_curr_temp, NULL, "%llu\n");
+
+static void gp106_therm_debugfs_init(struct gk20a *g) {
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *dbgentry;
+
+	dbgentry = debugfs_create_file(
+		"temp", S_IRUGO, platform->debugfs, g, &therm_ctrl_fops);
+	if (!dbgentry)
+		gk20a_err(dev_from_gk20a(g), "debugfs entry create failed for therm_curr_temp");
+}
+#endif
+
+void gp106_init_therm_ops(struct gpu_ops *gops) {
+#ifdef CONFIG_DEBUG_FS
+	gops->therm.therm_debugfs_init = gp106_therm_debugfs_init;
+#endif
+}
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.h b/drivers/gpu/nvgpu/gp106/therm_gp106.h
new file mode 100644
index 00000000..6db17c47
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.h
@@ -0,0 +1,22 @@
+/*
+ * general thermal control structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef NVGPU_THERM_GP106_H
+#define NVGPU_THERM_GP106_H
+
+#include "gk20a/gk20a.h"
+
+void gp106_init_therm_ops(struct gpu_ops *gops);
+#endif
-- 
cgit v1.2.2


From 4afc6a1659ec058fd44953ccff7a1030275bcc92 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 19 Sep 2016 09:24:13 +0300
Subject: gpu: nvgpu: compact pte buffers

The lowest page table level may hold very few entries for mappings of
large pages, but a new page is allocated for each list of entries at the
lowest level, wasting memory and performance. Compact these so that the
new "allocation" of ptes is appended at the end of the previous
allocation, if there is space.

Bug 1736604

Change-Id: I4c7c4cad9019de202325750aee6034076e7e61c2
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1222810
(cherry picked from commit 97303ecc946c17150496486a2f52bd481311dbf7)
Reviewed-on: http://git-master/r/1234995
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 39 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 03bab121..1e073ab2 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -151,18 +151,6 @@ static u32 pte3_from_index(u32 i)
 	return i * gmmu_new_pte__size_v() / sizeof(u32);
 }
 
-static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
-{
-	u64 addr;
-
-	if (g->mm.has_physical_mode)
-		addr = sg_phys(entry->mem.sgt->sgl);
-	else
-		addr = gk20a_mem_get_base_addr(g, &entry->mem, 0);
-
-	return addr;
-}
-
 static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 			   struct gk20a_mm_entry *parent,
 			   u32 i, u32 gmmu_pgsz_idx,
@@ -176,15 +164,13 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	u64 pte_addr = 0;
-	u64 pde_addr = 0;
 	struct gk20a_mm_entry *pte = parent->entries + i;
 	u32 pde_v[2] = {0, 0};
 	u32 pde;
 
 	gk20a_dbg_fn("");
 
-	pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v();
-	pde_addr = entry_addr(g, parent);
+	pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v();
 
 	pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
 			gmmu_new_pde_aperture_sys_mem_ncoh_f(),
@@ -194,8 +180,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pde_v[1] |= pte_addr >> 24;
 	pde = pde3_from_index(i);
 
-	gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]);
-	gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]);
+	gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]);
+	gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]);
 
 	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
 		  i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -232,12 +218,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
 
 	if (small_valid) {
-		pte_addr_small = entry_addr(g, entry)
+		pte_addr_small = gk20a_pde_addr(g, entry)
 				 >> gmmu_new_dual_pde_address_shift_v();
 	}
 
 	if (big_valid)
-		pte_addr_big = entry_addr(g, entry)
+		pte_addr_big = gk20a_pde_addr(g, entry)
 			       >> gmmu_new_dual_pde_address_big_shift_v();
 
 	if (small_valid) {
@@ -260,10 +246,10 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
 
 	pde = pde0_from_index(i);
 
-	gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]);
-	gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]);
-	gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]);
-	gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]);
+	gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]);
+	gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]);
+	gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]);
+	gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]);
 
 	gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
 		  i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
@@ -286,6 +272,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 	u32 page_size  = vm->gmmu_page_sizes[gmmu_pgsz_idx];
 	u64 ctag_granularity = g->ops.fb.compression_page_size(g);
 	u32 pte_w[2] = {0, 0}; /* invalid pte */
+	u32 pte_i;
 
 	if (*iova) {
 		u32 pte_valid = unmapped_pte ?
@@ -331,8 +318,10 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
 		gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
 	}
 
-	gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]);
-	gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]);
+	pte_i = pte3_from_index(i);
+
+	gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]);
+	gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]);
 
 	if (*iova) {
 		*iova += page_size;
-- 
cgit v1.2.2


From 82bbd0cd5d3d82bacc5023830d0eeb21065dd5f2 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 9 Sep 2016 16:59:21 -0700
Subject: gpu: nvgpu: implement PCIe Gen2 frequency swap

Implement the basic code to swap between PCIe bus speeds for the GPU.
Other GPUs are not supported yet. Currently the following speeds can
be used:

  Gen1 (2.5 MTPS)
  Gen2 (5.0 MTPS)

gp106 on DPX2 does not support Gen3.

JIRA DNVGPU-89

Change-Id: I8bebfc9d99b682bdcff406fa56e806097dd51499
Reviewed-on: http://git-master/r/1218177
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1227925
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x  |   3 +-
 drivers/gpu/nvgpu/gp106/hal_gp106.c    |   3 +
 drivers/gpu/nvgpu/gp106/hw_xp_gp106.h  | 137 ++++++++
 drivers/gpu/nvgpu/gp106/hw_xve_gp106.h |  80 +++++
 drivers/gpu/nvgpu/gp106/xve_gp106.c    | 623 +++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/xve_gp106.h    |  99 ++++++
 6 files changed, 944 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_xp_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/xve_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/xve_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index eb9d5977..17b33959 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -42,7 +42,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/clk/clk.o \
 	$(nvgpu-t18x)/gp106/clk_gp106.o \
 	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o \
-	$(nvgpu-t18x)/gp106/therm_gp106.o
+	$(nvgpu-t18x)/gp106/therm_gp106.o \
+	$(nvgpu-t18x)/gp106/xve_gp106.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index d07da835..2217dfea 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -29,6 +29,7 @@
 #include "gp10b/regops_gp10b.h"
 #include "gp10b/cde_gp10b.h"
 #include "gp106/therm_gp106.h"
+#include "gp106/xve_gp106.h"
 
 #include "gm206/bios_gm206.h"
 
@@ -210,6 +211,8 @@ int gp106_init_hal(struct gk20a *g)
 #endif
 	gm206_init_bios(gops);
 	gp106_init_therm_ops(gops);
+	gp106_init_xve_ops(gops);
+
 	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
 	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
diff --git a/drivers/gpu/nvgpu/gp106/hw_xp_gp106.h b/drivers/gpu/nvgpu/gp106/hw_xp_gp106.h
new file mode 100644
index 00000000..40b14da1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_xp_gp106.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_xp_gp106_h_
+#define _hw_xp_gp106_h_
+
+static inline u32 xp_dl_mgr_r(u32 i)
+{
+	return 0x0008b8c0 + i*4;
+}
+static inline u32 xp_dl_mgr_safe_timing_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 xp_pl_link_config_r(u32 i)
+{
+	return 0x0008c040 + i*4;
+}
+static inline u32 xp_pl_link_config_ltssm_status_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 xp_pl_link_config_ltssm_status_idle_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 xp_pl_link_config_ltssm_directive_f(u32 v)
+{
+	return (v & 0xf) << 0;
+}
+static inline u32 xp_pl_link_config_ltssm_directive_m(void)
+{
+	return 0xf << 0;
+}
+static inline u32 xp_pl_link_config_ltssm_directive_normal_operations_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 xp_pl_link_config_ltssm_directive_change_speed_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 xp_pl_link_config_max_link_rate_f(u32 v)
+{
+	return (v & 0x3) << 18;
+}
+static inline u32 xp_pl_link_config_max_link_rate_m(void)
+{
+	return 0x3 << 18;
+}
+static inline u32 xp_pl_link_config_max_link_rate_2500_mtps_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 xp_pl_link_config_max_link_rate_5000_mtps_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 xp_pl_link_config_max_link_rate_8000_mtps_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 xp_pl_link_config_target_tx_width_f(u32 v)
+{
+	return (v & 0x7) << 20;
+}
+static inline u32 xp_pl_link_config_target_tx_width_m(void)
+{
+	return 0x7 << 20;
+}
+static inline u32 xp_pl_link_config_target_tx_width_x1_v(void)
+{
+	return 0x00000007;
+}
+static inline u32 xp_pl_link_config_target_tx_width_x2_v(void)
+{
+	return 0x00000006;
+}
+static inline u32 xp_pl_link_config_target_tx_width_x4_v(void)
+{
+	return 0x00000005;
+}
+static inline u32 xp_pl_link_config_target_tx_width_x8_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 xp_pl_link_config_target_tx_width_x16_v(void)
+{
+	return 0x00000000;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h b/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
index 74b6cf7c..24434ae0 100644
--- a/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_xve_gp106.h
@@ -66,4 +66,84 @@ static inline u32 xve_rom_ctrl_rom_shadow_enabled_f(void)
 {
 	return 0x1;
 }
+static inline u32 xve_link_control_status_r(void)
+{
+	return 0x00000088;
+}
+static inline u32 xve_link_control_status_link_speed_m(void)
+{
+	return 0xf << 16;
+}
+static inline u32 xve_link_control_status_link_speed_v(u32 r)
+{
+	return (r >> 16) & 0xf;
+}
+static inline u32 xve_link_control_status_link_speed_link_speed_2p5_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 xve_link_control_status_link_speed_link_speed_5p0_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 xve_link_control_status_link_speed_link_speed_8p0_v(void)
+{
+	return 0x00000003;
+}
+static inline u32 xve_link_control_status_link_width_m(void)
+{
+	return 0x3f << 20;
+}
+static inline u32 xve_link_control_status_link_width_v(u32 r)
+{
+	return (r >> 20) & 0x3f;
+}
+static inline u32 xve_link_control_status_link_width_x1_v(void)
+{
+	return 0x00000001;
+}
+static inline u32 xve_link_control_status_link_width_x2_v(void)
+{
+	return 0x00000002;
+}
+static inline u32 xve_link_control_status_link_width_x4_v(void)
+{
+	return 0x00000004;
+}
+static inline u32 xve_link_control_status_link_width_x8_v(void)
+{
+	return 0x00000008;
+}
+static inline u32 xve_link_control_status_link_width_x16_v(void)
+{
+	return 0x00000010;
+}
+static inline u32 xve_priv_xv_r(void)
+{
+	return 0x00000150;
+}
+static inline u32 xve_priv_xv_cya_l0s_enable_f(u32 v)
+{
+	return (v & 0x1) << 7;
+}
+static inline u32 xve_priv_xv_cya_l0s_enable_m(void)
+{
+	return 0x1 << 7;
+}
+static inline u32 xve_priv_xv_cya_l0s_enable_v(u32 r)
+{
+	return (r >> 7) & 0x1;
+}
+static inline u32 xve_priv_xv_cya_l1_enable_f(u32 v)
+{
+	return (v & 0x1) << 8;
+}
+static inline u32 xve_priv_xv_cya_l1_enable_m(void)
+{
+	return 0x1 << 8;
+}
+static inline u32 xve_priv_xv_cya_l1_enable_v(u32 r)
+{
+	return (r >> 8) & 0x1;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/xve_gp106.c b/drivers/gpu/nvgpu/gp106/xve_gp106.c
new file mode 100644
index 00000000..23a02fbd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/xve_gp106.c
@@ -0,0 +1,623 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+
+#include "gk20a/gk20a.h"
+#include "gm206/bios_gm206.h"
+#include "gp106/xve_gp106.h"
+
+#include "gp106/hw_xp_gp106.h"
+#include "gp106/hw_xve_gp106.h"
+
+/**
+ * Init a timer and place the timeout data in @timeout.
+ */
+static void init_timeout(u32 timeout_ms, u32 *timeout)
+{
+	*timeout = jiffies + msecs_to_jiffies(timeout_ms);
+}
+
+/**
+ * Returns 1 if the current time is after @timeout i.e: the timer timed
+ * out. Returns 0 if the timer still has time left.
+ */
+static int check_timeout(u32 *timeout)
+{
+	unsigned long now = jiffies;
+	unsigned long timeout_l = (unsigned long)*timeout;
+
+	if (time_after(now, timeout_l))
+		return 1;
+
+	return 0;
+}
+
+static void xve_xve_writel_gp106(struct gk20a *g, u32 reg, u32 val)
+{
+	gk20a_writel(g, NV_PCFG + reg, val);
+}
+
+static u32 xve_xve_readl_gp106(struct gk20a *g, u32 reg)
+{
+	return gk20a_readl(g, NV_PCFG + reg);
+}
+
+/**
+ * Places one of:
+ *
+ *   %GPU_XVE_SPEED_2P5
+ *   %GPU_XVE_SPEED_5P0
+ *   %GPU_XVE_SPEED_8P0
+ *
+ * in the u32 pointed to by @xve_link_speed. If for some reason an unknown PCIe
+ * bus speed is detected then *@xve_link_speed is not touched and -ENODEV is
+ * returned.
+ */
+static int xve_get_speed_gp106(struct gk20a *g, u32 *xve_link_speed)
+{
+	u32 status;
+	u32 link_speed, real_link_speed = 0;
+
+	status = g->ops.xve.xve_readl(g, xve_link_control_status_r());
+
+	link_speed = xve_link_control_status_link_speed_v(status);
+
+	/*
+	 * Can't use a switch statement becuase switch statements dont work with
+	 * function calls.
+	 */
+	if (link_speed == xve_link_control_status_link_speed_link_speed_2p5_v())
+		real_link_speed = GPU_XVE_SPEED_2P5;
+	if (link_speed == xve_link_control_status_link_speed_link_speed_5p0_v())
+		real_link_speed = GPU_XVE_SPEED_5P0;
+	if (link_speed == xve_link_control_status_link_speed_link_speed_8p0_v())
+		real_link_speed = GPU_XVE_SPEED_8P0;
+
+	if (!real_link_speed) {
+		pr_warn("%s: Unknown PCIe bus speed!\n", __func__);
+		return -ENODEV;
+	}
+
+	*xve_link_speed = real_link_speed;
+	return 0;
+}
+
+/**
+ * Set the mask for L0s in the XVE.
+ *
+ * When @status is non-zero the mask for L0s is set which _disables_ L0s. When
+ * @status is zero L0s is no longer masked and may be enabled.
+ */
+static void set_xve_l0s_mask(struct gk20a *g, bool status)
+{
+	u32 xve_priv;
+	u32 status_bit = status ? 1 : 0;
+
+	xve_priv = g->ops.xve.xve_readl(g, xve_priv_xv_r());
+
+	xve_priv = set_field(xve_priv,
+		  xve_priv_xv_cya_l0s_enable_m(),
+		  xve_priv_xv_cya_l0s_enable_f(status_bit));
+
+	g->ops.xve.xve_writel(g, xve_priv_xv_r(), xve_priv);
+}
+
+/**
+ * Set the mask for L1 in the XVE.
+ *
+ * When @status is non-zero the mask for L1 is set which _disables_ L0s. When
+ * @status is zero L1 is no longer masked and may be enabled.
+ */
+static void set_xve_l1_mask(struct gk20a *g, int status)
+{
+	u32 xve_priv;
+	u32 status_bit = status ? 1 : 0;
+
+	xve_priv = g->ops.xve.xve_readl(g, xve_priv_xv_r());
+
+	xve_priv = set_field(xve_priv,
+		  xve_priv_xv_cya_l1_enable_m(),
+		  xve_priv_xv_cya_l1_enable_f(status_bit));
+
+	g->ops.xve.xve_writel(g, xve_priv_xv_r(), xve_priv);
+}
+
+/**
+ * When doing the speed change disable power saving features.
+ */
+static void disable_aspm_gp106(struct gk20a *g)
+{
+	u32 xve_priv;
+
+	xve_priv = g->ops.xve.xve_readl(g, xve_priv_xv_r());
+
+	/*
+	 * Store prior ASPM state so we can restore it later on.
+	 */
+	g->xve_l0s = xve_priv_xv_cya_l0s_enable_v(xve_priv);
+	g->xve_l1  = xve_priv_xv_cya_l1_enable_v(xve_priv);
+
+	set_xve_l0s_mask(g, true);
+	set_xve_l1_mask(g, true);
+}
+
+/**
+ * Restore the state saved by disable_aspm_gp106().
+ */
+static void enable_aspm_gp106(struct gk20a *g)
+{
+	set_xve_l0s_mask(g, g->xve_l0s);
+	set_xve_l1_mask(g, g->xve_l1);
+}
+
+/*
+ * Error checking is done in xve_set_speed_gp106.
+ */
+static int __do_xve_set_speed_gp106(struct gk20a *g, u32 next_link_speed)
+{
+	u32 current_link_speed, new_link_speed;
+	u32 dl_mgr, saved_dl_mgr;
+	u32 pl_link_config;
+	u32 link_control_status, link_speed_setting, link_width;
+	u32 timeout;
+	int attempts = 10, err_status = 0;
+
+	g->ops.xve.get_speed(g, &current_link_speed);
+	xv_sc_dbg(PRE_CHANGE, "Executing PCIe link change.");
+	xv_sc_dbg(PRE_CHANGE, "  Current speed:  %s",
+		  xve_speed_to_str(current_link_speed));
+	xv_sc_dbg(PRE_CHANGE, "  Next speed:     %s",
+		  xve_speed_to_str(next_link_speed));
+	xv_sc_dbg(PRE_CHANGE, "  PL_LINK_CONFIG: 0x%08x",
+		  gk20a_readl(g, xp_pl_link_config_r(0)));
+
+	xv_sc_dbg(DISABLE_ASPM, "Disabling ASPM...");
+	disable_aspm_gp106(g);
+	xv_sc_dbg(DISABLE_ASPM, "  Done!");
+
+	xv_sc_dbg(DL_SAFE_MODE, "Putting DL in safe mode...");
+	saved_dl_mgr = gk20a_readl(g, xp_dl_mgr_r(0));
+
+	/*
+	 * Put the DL in safe mode.
+	 */
+	dl_mgr = saved_dl_mgr;
+	dl_mgr |= xp_dl_mgr_safe_timing_f(1);
+	gk20a_writel(g, xp_dl_mgr_r(0), dl_mgr);
+	xv_sc_dbg(DL_SAFE_MODE, "  Done!");
+
+	init_timeout(GPU_XVE_TIMEOUT_MS, &timeout);
+
+	xv_sc_dbg(CHECK_LINK, "Checking for link idle...");
+	while (1) {
+		pl_link_config = gk20a_readl(g, xp_pl_link_config_r(0));
+		if ((xp_pl_link_config_ltssm_status_f(pl_link_config) ==
+		     xp_pl_link_config_ltssm_status_idle_v()) &&
+		    (xp_pl_link_config_ltssm_directive_f(pl_link_config) ==
+		     xp_pl_link_config_ltssm_directive_normal_operations_v()))
+			break;
+
+		if (check_timeout(&timeout)) {
+			err_status = -ETIMEDOUT;
+			break;
+		}
+	}
+
+	if (err_status == -ETIMEDOUT)
+		/* TODO: debug message. */
+		goto done;
+
+	xv_sc_dbg(CHECK_LINK, "  Done");
+
+	xv_sc_dbg(LINK_SETTINGS, "Preparing next link settings");
+	pl_link_config &= ~xp_pl_link_config_max_link_rate_m();
+	switch (next_link_speed) {
+	case GPU_XVE_SPEED_2P5:
+		link_speed_setting =
+			xve_link_control_status_link_speed_link_speed_2p5_v();
+		pl_link_config |= xp_pl_link_config_max_link_rate_f(
+			xp_pl_link_config_max_link_rate_2500_mtps_v());
+		break;
+	case GPU_XVE_SPEED_5P0:
+		link_speed_setting =
+			xve_link_control_status_link_speed_link_speed_5p0_v();
+		pl_link_config |= xp_pl_link_config_max_link_rate_f(
+			xp_pl_link_config_max_link_rate_5000_mtps_v());
+		break;
+	case GPU_XVE_SPEED_8P0:
+		link_speed_setting =
+			xve_link_control_status_link_speed_link_speed_8p0_v();
+		pl_link_config |= xp_pl_link_config_max_link_rate_f(
+			xp_pl_link_config_max_link_rate_8000_mtps_v());
+		break;
+	default:
+		BUG(); /* Should never be hit. */
+	}
+
+	link_control_status =
+		g->ops.xve.xve_readl(g, xve_link_control_status_r());
+	link_width = xve_link_control_status_link_width_v(link_control_status);
+
+	pl_link_config &= ~xp_pl_link_config_target_tx_width_m();
+
+	/* Can't use a switch due to oddities in register definitions. */
+	if (link_width == xve_link_control_status_link_width_x1_v())
+		pl_link_config |= xp_pl_link_config_target_tx_width_f(
+			xp_pl_link_config_target_tx_width_x1_v());
+	else if (link_width == xve_link_control_status_link_width_x2_v())
+		pl_link_config |= xp_pl_link_config_target_tx_width_f(
+			xp_pl_link_config_target_tx_width_x2_v());
+	else if (link_width == xve_link_control_status_link_width_x4_v())
+		pl_link_config |= xp_pl_link_config_target_tx_width_f(
+			xp_pl_link_config_target_tx_width_x4_v());
+	else if (link_width == xve_link_control_status_link_width_x8_v())
+		pl_link_config |= xp_pl_link_config_target_tx_width_f(
+			xp_pl_link_config_target_tx_width_x8_v());
+	else if (link_width == xve_link_control_status_link_width_x16_v())
+		pl_link_config |= xp_pl_link_config_target_tx_width_f(
+			xp_pl_link_config_target_tx_width_x16_v());
+	else
+		BUG();
+
+	xv_sc_dbg(LINK_SETTINGS, "  pl_link_config = 0x%08x", pl_link_config);
+	xv_sc_dbg(LINK_SETTINGS, "  Done");
+
+	xv_sc_dbg(EXEC_CHANGE, "Running link speed change...");
+
+	init_timeout(GPU_XVE_TIMEOUT_MS, &timeout);
+	while (1) {
+		gk20a_writel(g, xp_pl_link_config_r(0), pl_link_config);
+		if (pl_link_config ==
+		    gk20a_readl(g, xp_pl_link_config_r(0)))
+			break;
+
+		if (check_timeout(&timeout)) {
+			err_status = -ETIMEDOUT;
+			break;
+		}
+	}
+
+	if (err_status == -ETIMEDOUT)
+		goto done;
+
+	xv_sc_dbg(EXEC_CHANGE, "  Wrote PL_LINK_CONFIG.");
+
+	pl_link_config = gk20a_readl(g, xp_pl_link_config_r(0));
+
+	do {
+		pl_link_config = set_field(pl_link_config,
+			  xp_pl_link_config_ltssm_directive_m(),
+			  xp_pl_link_config_ltssm_directive_f(
+			  xp_pl_link_config_ltssm_directive_change_speed_v()));
+
+		xv_sc_dbg(EXEC_CHANGE, "  Executing change (0x%08x)!",
+			  pl_link_config);
+		gk20a_writel(g, xp_pl_link_config_r(0), pl_link_config);
+
+		/*
+		 * Read NV_XP_PL_LINK_CONFIG until the link has swapped to
+		 * the target speed.
+		 */
+		init_timeout(GPU_XVE_TIMEOUT_MS, &timeout);
+		while (1) {
+			pl_link_config = gk20a_readl(g, xp_pl_link_config_r(0));
+			if (pl_link_config != 0xfffffff &&
+			    (xp_pl_link_config_ltssm_status_f(pl_link_config) ==
+			     xp_pl_link_config_ltssm_status_idle_v()) &&
+			    (xp_pl_link_config_ltssm_directive_f(pl_link_config) ==
+			     xp_pl_link_config_ltssm_directive_normal_operations_v()))
+				break;
+
+			if (check_timeout(&timeout)) {
+				err_status = -ETIMEDOUT;
+				xv_sc_dbg(EXEC_CHANGE, "  timeout; pl_link_config = 0x%x",
+					pl_link_config);
+				break;
+			}
+		}
+
+		xv_sc_dbg(EXEC_CHANGE, "  Change done... Checking status");
+
+		if (pl_link_config == 0xffffffff) {
+			WARN(1, "GPU fell of PCI bus!?");
+
+			/*
+			 * The rest of the driver is probably about to
+			 * explode...
+			 */
+			BUG();
+		}
+
+		link_control_status =
+			g->ops.xve.xve_readl(g, xve_link_control_status_r());
+		xv_sc_dbg(EXEC_CHANGE, "  target %d vs current %d",
+			  link_speed_setting,
+			  xve_link_control_status_link_speed_v(link_control_status));
+
+		if (err_status == -ETIMEDOUT)
+			xv_sc_dbg(EXEC_CHANGE, "  Oops timed out?");
+	} while (attempts-- > 0 &&
+		 link_speed_setting !=
+		 xve_link_control_status_link_speed_v(link_control_status));
+
+	xv_sc_dbg(EXEC_VERIF, "Verifying speed change...");
+
+	/*
+	 * Check that the new link speed is actually active. If we failed to
+	 * change to the new link speed then return to the link speed setting
+	 * pre-speed change.
+	 */
+	new_link_speed = xve_link_control_status_link_speed_v(
+		link_control_status);
+	if (link_speed_setting != new_link_speed) {
+		u32 link_config = gk20a_readl(g, xp_pl_link_config_r(0));
+
+		xv_sc_dbg(EXEC_VERIF, "  Current and target speeds mismatch!");
+		xv_sc_dbg(EXEC_VERIF, "    LINK_CONTROL_STATUS: 0x%08x",
+			  g->ops.xve.xve_readl(g, xve_link_control_status_r()));
+		xv_sc_dbg(EXEC_VERIF, "    Link speed is %s - should be %s",
+			  xve_speed_to_str(new_link_speed),
+			  xve_speed_to_str(link_speed_setting));
+
+		link_config &= ~xp_pl_link_config_max_link_rate_m();
+		if (new_link_speed ==
+		    xve_link_control_status_link_speed_link_speed_2p5_v())
+			link_config |= xp_pl_link_config_max_link_rate_f(
+				xp_pl_link_config_max_link_rate_2500_mtps_v());
+		else if (new_link_speed ==
+			 xve_link_control_status_link_speed_link_speed_5p0_v())
+			link_config |= xp_pl_link_config_max_link_rate_f(
+				xp_pl_link_config_max_link_rate_5000_mtps_v());
+		else if (new_link_speed ==
+			 xve_link_control_status_link_speed_link_speed_8p0_v())
+			link_config |= xp_pl_link_config_max_link_rate_f(
+				xp_pl_link_config_max_link_rate_8000_mtps_v());
+		else
+			link_config |= xp_pl_link_config_max_link_rate_f(
+				xp_pl_link_config_max_link_rate_2500_mtps_v());
+
+		gk20a_writel(g, xp_pl_link_config_r(0), link_config);
+		err_status = -ENODEV;
+	} else {
+		xv_sc_dbg(EXEC_VERIF, "  Current and target speeds match!");
+		err_status = 0;
+	}
+
+done:
+	/* Restore safe timings. */
+	xv_sc_dbg(CLEANUP, "Restoring saved DL settings...");
+	gk20a_writel(g, xp_dl_mgr_r(0), saved_dl_mgr);
+	xv_sc_dbg(CLEANUP, "  Done");
+
+	xv_sc_dbg(CLEANUP, "Re-enabling ASPM settings...");
+	enable_aspm_gp106(g);
+	xv_sc_dbg(CLEANUP, "  Done");
+
+	return err_status;
+}
+
+/**
+ * Sets the PCIe link speed to @xve_link_speed which must be one of:
+ *
+ *   %GPU_XVE_SPEED_2P5
+ *   %GPU_XVE_SPEED_5P0
+ *   %GPU_XVE_SPEED_8P0
+ *
+ * If an error is encountered an appropriate error will be returned.
+ */
+static int xve_set_speed_gp106(struct gk20a *g, u32 next_link_speed)
+{
+	u32 current_link_speed;
+	int err;
+
+	if ((next_link_speed & GPU_XVE_SPEED_MASK) == 0)
+		return -EINVAL;
+
+	err = g->ops.xve.get_speed(g, &current_link_speed);
+	if (err)
+		return err;
+
+	/* No-op. */
+	if (current_link_speed == next_link_speed)
+		return 0;
+
+	return __do_xve_set_speed_gp106(g, next_link_speed);
+}
+
+/**
+ * Places a bitmask of available speeds for gp106 in @speed_mask.
+ */
+static void xve_available_speeds_gp106(struct gk20a *g, u32 *speed_mask)
+{
+	*speed_mask = GPU_XVE_SPEED_2P5 | GPU_XVE_SPEED_5P0;
+}
+
+static ssize_t xve_link_speed_write(struct file *filp,
+				    const char __user *buff,
+				    size_t len, loff_t *off)
+{
+	struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
+	char kbuff[16];
+	u32 buff_size, check_len;
+	u32 link_speed = 0;
+	int ret;
+
+	buff_size = min_t(size_t, 16, len);
+
+	memset(kbuff, 0, 16);
+	if (copy_from_user(kbuff, buff, buff_size))
+		return -EFAULT;
+
+	check_len = strlen("Gen1");
+	if (strncmp(kbuff, "Gen1", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_2P5;
+	else if (strncmp(kbuff, "Gen2", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_5P0;
+	else if (strncmp(kbuff, "Gen3", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_8P0;
+	else
+		gk20a_err(g->dev, "%s: Unknown PCIe speed: %s\n",
+			  __func__, kbuff);
+
+	if (!link_speed)
+		return -EINVAL;
+
+	/* Brief pause... To help rate limit this. */
+	msleep(250);
+
+	/*
+	 * And actually set the speed. Yay.
+	 */
+	ret = g->ops.xve.set_speed(g, link_speed);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static int xve_link_speed_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 speed;
+	int err;
+
+	err = g->ops.xve.get_speed(g, &speed);
+	if (err)
+		return err;
+
+	seq_printf(s, "Current PCIe speed:\n  %s\n", xve_speed_to_str(speed));
+
+	return 0;
+}
+
+static int xve_link_speed_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_link_speed_show, inode->i_private);
+}
+
+static const struct file_operations xve_link_speed_fops = {
+	.open = xve_link_speed_open,
+	.read = seq_read,
+	.write = xve_link_speed_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int xve_available_speeds_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 available_speeds;
+
+	g->ops.xve.available_speeds(g, &available_speeds);
+
+	seq_puts(s, "Available PCIe bus speeds:\n");
+	if (available_speeds & GPU_XVE_SPEED_2P5)
+		seq_puts(s, "  Gen1\n");
+	if (available_speeds & GPU_XVE_SPEED_5P0)
+		seq_puts(s, "  Gen2\n");
+	if (available_speeds & GPU_XVE_SPEED_8P0)
+		seq_puts(s, "  Gen3\n");
+
+	return 0;
+}
+
+static int xve_available_speeds_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_available_speeds_show, inode->i_private);
+}
+
+static const struct file_operations xve_available_speeds_fops = {
+	.open = xve_available_speeds_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int xve_link_control_status_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 link_status;
+
+	link_status = g->ops.xve.xve_readl(g, xve_link_control_status_r());
+	seq_printf(s, "0x%08x\n", link_status);
+
+	return 0;
+}
+
+static int xve_link_control_status_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_link_control_status_show, inode->i_private);
+}
+
+static const struct file_operations xve_link_control_status_fops = {
+	.open = xve_link_control_status_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int xve_sw_init_gp106(struct device *dev)
+{
+	int err = -ENODEV;
+#ifdef CONFIG_DEBUG_FS
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *plat = gk20a_get_platform(dev);
+	struct dentry *gpu_root = plat->debugfs;
+
+	g->debugfs_xve = debugfs_create_dir("xve", gpu_root);
+	if (IS_ERR_OR_NULL(g->debugfs_xve))
+		goto fail;
+
+	/*
+	 * These are just debug nodes. If they fail to get made it's not worth
+	 * worrying the higher level SW.
+	 */
+	debugfs_create_file("link_speed", S_IRUGO,
+			    g->debugfs_xve, g,
+			    &xve_link_speed_fops);
+	debugfs_create_file("available_speeds", S_IRUGO,
+			    g->debugfs_xve, g,
+			    &xve_available_speeds_fops);
+	debugfs_create_file("link_control_status", S_IRUGO,
+			    g->debugfs_xve, g,
+			    &xve_link_control_status_fops);
+
+	err = 0;
+fail:
+	return err;
+#else
+	return err;
+#endif
+}
+
+/*
+ * Init the HAL functions and what not. xve_sw_init_gp106() is for initializing
+ * all the other stuff like debugfs nodes, etc.
+ */
+int gp106_init_xve_ops(struct gpu_ops *gops)
+{
+	gops->xve.sw_init          = xve_sw_init_gp106;
+	gops->xve.get_speed        = xve_get_speed_gp106;
+	gops->xve.set_speed        = xve_set_speed_gp106;
+	gops->xve.available_speeds = xve_available_speeds_gp106;
+	gops->xve.xve_readl        = xve_xve_readl_gp106;
+	gops->xve.xve_writel       = xve_xve_writel_gp106;
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gp106/xve_gp106.h b/drivers/gpu/nvgpu/gp106/xve_gp106.h
new file mode 100644
index 00000000..65c75bf0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/xve_gp106.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XVE_GP106_H__
+#define __XVE_GP106_H__
+
+#include "gk20a/gk20a.h"
+
+int gp106_init_xve_ops(struct gpu_ops *gops);
+
+/*
+ * Best guess for a reasonable timeout.
+ */
+#define GPU_XVE_TIMEOUT_MS	500
+
+/*
+ * For the available speeds bitmap.
+ */
+#define GPU_XVE_SPEED_2P5	(1 << 0)
+#define GPU_XVE_SPEED_5P0	(1 << 1)
+#define GPU_XVE_SPEED_8P0	(1 << 2)
+#define GPU_XVE_NR_SPEEDS	3
+
+#define GPU_XVE_SPEED_MASK	(GPU_XVE_SPEED_2P5 |	\
+				 GPU_XVE_SPEED_5P0 |	\
+				 GPU_XVE_SPEED_8P0)
+
+/*
+ * The HW uses a 2 bit field where speed is defined by a number:
+ *
+ *   NV_XVE_LINK_CONTROL_STATUS_LINK_SPEED_2P5 = 1
+ *   NV_XVE_LINK_CONTROL_STATUS_LINK_SPEED_5P0 = 2
+ *   NV_XVE_LINK_CONTROL_STATUS_LINK_SPEED_8P0 = 3
+ *
+ * This isn't ideal for a bitmap with available speeds. So the external
+ * APIs think about speeds as a bit in a bitmap and this function converts
+ * from those bits to the actual HW speed setting.
+ *
+ * @speed_bit must have only 1 bit set and must be one of the 3 available
+ * HW speeds. Not all chips support all speeds so use available_speeds() to
+ * determine what a given chip supports.
+ */
+static inline u32 xve_speed_to_hw_speed_setting(u32 speed_bit)
+{
+	if (!speed_bit ||
+	    !is_power_of_2(speed_bit) ||
+	    !(speed_bit & GPU_XVE_SPEED_MASK))
+		return -EINVAL;
+
+	return ilog2(speed_bit) + 1;
+}
+
+static inline const char *xve_speed_to_str(u32 speed)
+{
+	if (!speed || !is_power_of_2(speed) ||
+	    !(speed & GPU_XVE_SPEED_MASK))
+		return "Unknown ???";
+
+	return speed & GPU_XVE_SPEED_2P5 ? "Gen1" :
+	       speed & GPU_XVE_SPEED_5P0 ? "Gen2" :
+	       speed & GPU_XVE_SPEED_8P0 ? "Gen3" :
+	       "Unknown ???";
+}
+
+/*
+ * Debugging for the speed change.
+ */
+enum xv_speed_change_steps {
+	PRE_CHANGE = 0,
+	DISABLE_ASPM,
+	DL_SAFE_MODE,
+	CHECK_LINK,
+	LINK_SETTINGS,
+	EXEC_CHANGE,
+	EXEC_VERIF,
+	CLEANUP
+};
+
+#define xv_dbg(fmt, args...)			\
+	gk20a_dbg(gpu_dbg_xv, fmt, ##args)
+
+#define xv_sc_dbg(step, fmt, args...)					\
+	xv_dbg("[%d] %15s | " fmt, step, __stringify(step), ##args)
+
+
+#endif
-- 
cgit v1.2.2


From c527b36daa2019bbf1ba2a3c9b9474d8c7316ff0 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Mon, 17 Oct 2016 14:04:24 +0530
Subject: gpu: nvgpu: add accessors for invalid ctx_status

Bug 1826768

Change-Id: I8be2b9c074868206cb95b3bc84d66ea84683b19a
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1237522
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Cory Perry <cperry@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h | 4 ++++
 drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
index 763b58df..ec02257e 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fifo_gp106.h
@@ -430,6 +430,10 @@ static inline u32 fifo_engine_status_ctx_status_v(u32 r)
 {
 	return (r >> 13) & 0x7;
 }
+static inline u32 fifo_engine_status_ctx_status_invalid_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 fifo_engine_status_ctx_status_valid_v(void)
 {
 	return 0x00000001;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
index 89037a7c..8370d4c6 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_fifo_gp10b.h
@@ -434,6 +434,10 @@ static inline u32 fifo_engine_status_ctx_status_v(u32 r)
 {
 	return (r >> 13) & 0x7;
 }
+static inline u32 fifo_engine_status_ctx_status_invalid_v(void)
+{
+	return 0x00000000;
+}
 static inline u32 fifo_engine_status_ctx_status_valid_v(void)
 {
 	return 0x00000001;
-- 
cgit v1.2.2


From 4a94ce451b0352ce67e11a2971bbbd75c2e58df1 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 13 Sep 2016 10:53:14 -0700
Subject: gpu: nvgpu: Move ELCG programming to therm

Implement gp10b and gp106 ELCG programming.

JIRA DNVGPU-74

Change-Id: Ic0349b948a2870e0d39e95ddd2f49231e7b4cbe0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1220431
(cherry picked from commit d6bc48647982babdf642ea6004d4208c5daa243f)
Reviewed-on: http://git-master/r/1239422
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/hw_therm_gp106.h | 80 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/therm_gp106.c    | 45 ++++++++++++++++++
 drivers/gpu/nvgpu/gp10b/therm_gp10b.c    | 33 +++++++++++--
 3 files changed, 154 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h b/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
index ecc50980..36ffcc7a 100644
--- a/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_therm_gp106.h
@@ -94,4 +94,84 @@ static inline u32 therm_temp_sensor_tsense_state_shadow_v(void)
 {
 	return 0x00000002;
 }
+static inline u32 therm_gate_ctrl_r(u32 i)
+{
+	return 0x00020200 + i*4;
+}
+static inline u32 therm_gate_ctrl_eng_clk_m(void)
+{
+	return 0x3 << 0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_run_f(void)
+{
+	return 0x0;
+}
+static inline u32 therm_gate_ctrl_eng_clk_auto_f(void)
+{
+	return 0x1;
+}
+static inline u32 therm_gate_ctrl_eng_clk_stop_f(void)
+{
+	return 0x2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_m(void)
+{
+	return 0x3 << 2;
+}
+static inline u32 therm_gate_ctrl_blk_clk_run_f(void)
+{
+	return 0x0;
+}
+static inline u32 therm_gate_ctrl_blk_clk_auto_f(void)
+{
+	return 0x4;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_f(u32 v)
+{
+	return (v & 0x1f) << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_exp_m(void)
+{
+	return 0x1f << 8;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_f(u32 v)
+{
+	return (v & 0x7) << 13;
+}
+static inline u32 therm_gate_ctrl_eng_idle_filt_mant_m(void)
+{
+	return 0x7 << 13;
+}
+static inline u32 therm_gate_ctrl_eng_delay_before_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+static inline u32 therm_gate_ctrl_eng_delay_before_m(void)
+{
+	return 0xf << 16;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_f(u32 v)
+{
+	return (v & 0xf) << 20;
+}
+static inline u32 therm_gate_ctrl_eng_delay_after_m(void)
+{
+	return 0xf << 20;
+}
+static inline u32 therm_fecs_idle_filter_r(void)
+{
+	return 0x00020288;
+}
+static inline u32 therm_fecs_idle_filter_value_m(void)
+{
+	return 0xffffffff << 0;
+}
+static inline u32 therm_hubmmu_idle_filter_r(void)
+{
+	return 0x0002028c;
+}
+static inline u32 therm_hubmmu_idle_filter_value_m(void)
+{
+	return 0xffffffff << 0;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c
index 153e953d..a3aa3636 100644
--- a/drivers/gpu/nvgpu/gp106/therm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c
@@ -55,8 +55,53 @@ static void gp106_therm_debugfs_init(struct gk20a *g) {
 }
 #endif
 
+static int gp106_elcg_init_idle_filters(struct gk20a *g)
+{
+	u32 gate_ctrl, idle_filter;
+	u32 engine_id;
+	u32 active_engine_id = 0;
+	struct fifo_gk20a *f = &g->fifo;
+
+	gk20a_dbg_fn("");
+
+	for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
+		active_engine_id = f->active_engines_list[engine_id];
+		gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(active_engine_id));
+
+		if (tegra_platform_is_linsim()) {
+			gate_ctrl = set_field(gate_ctrl,
+				therm_gate_ctrl_eng_delay_after_m(),
+				therm_gate_ctrl_eng_delay_after_f(4));
+		}
+
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_idle_filt_exp_m(),
+			therm_gate_ctrl_eng_idle_filt_exp_f(2));
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_idle_filt_mant_m(),
+			therm_gate_ctrl_eng_idle_filt_mant_f(1));
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_delay_before_m(),
+			therm_gate_ctrl_eng_delay_before_f(0));
+		gk20a_writel(g, therm_gate_ctrl_r(active_engine_id), gate_ctrl);
+	}
+
+	/* default fecs_idle_filter to 0 */
+	idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
+	idle_filter &= ~therm_fecs_idle_filter_value_m();
+	gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
+	/* default hubmmu_idle_filter to 0 */
+	idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
+	idle_filter &= ~therm_hubmmu_idle_filter_value_m();
+	gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
 void gp106_init_therm_ops(struct gpu_ops *gops) {
 #ifdef CONFIG_DEBUG_FS
 	gops->therm.therm_debugfs_init = gp106_therm_debugfs_init;
 #endif
+	gops->therm.elcg_init_idle_filters = gp106_elcg_init_idle_filters;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
index 63efc945..7f43cb56 100644
--- a/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/therm_gp10b.c
@@ -78,28 +78,53 @@ static int gp10b_init_therm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-static int gp10b_update_therm_gate_ctrl(struct gk20a *g)
+static int gp10b_elcg_init_idle_filters(struct gk20a *g)
 {
-	u32 gate_ctrl;
+	u32 gate_ctrl, idle_filter;
 	u32 engine_id;
 	u32 active_engine_id = 0;
 	struct fifo_gk20a *f = &g->fifo;
 
+	gk20a_dbg_fn("");
+
 	for (engine_id = 0; engine_id < f->num_engines; engine_id++) {
 		active_engine_id = f->active_engines_list[engine_id];
 		gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(active_engine_id));
+
+		if (tegra_platform_is_linsim()) {
+			gate_ctrl = set_field(gate_ctrl,
+				therm_gate_ctrl_eng_delay_after_m(),
+				therm_gate_ctrl_eng_delay_after_f(4));
+		}
+
+		/* 2 * (1 << 9) = 1024 clks */
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_idle_filt_exp_m(),
+			therm_gate_ctrl_eng_idle_filt_exp_f(9));
+		gate_ctrl = set_field(gate_ctrl,
+			therm_gate_ctrl_eng_idle_filt_mant_m(),
+			therm_gate_ctrl_eng_idle_filt_mant_f(2));
 		gate_ctrl = set_field(gate_ctrl,
 			therm_gate_ctrl_eng_delay_before_m(),
 			therm_gate_ctrl_eng_delay_before_f(4));
 		gk20a_writel(g, therm_gate_ctrl_r(active_engine_id), gate_ctrl);
 	}
 
+	/* default fecs_idle_filter to 0 */
+	idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
+	idle_filter &= ~therm_fecs_idle_filter_value_m();
+	gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
+	/* default hubmmu_idle_filter to 0 */
+	idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
+	idle_filter &= ~therm_hubmmu_idle_filter_value_m();
+	gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
+
+	gk20a_dbg_fn("done");
 	return 0;
 }
 
 void gp10b_init_therm_ops(struct gpu_ops *gops)
 {
 	gops->therm.init_therm_setup_hw = gp10b_init_therm_setup_hw;
-	gops->therm.update_therm_gate_ctrl = gp10b_update_therm_gate_ctrl;
-
+	gops->therm.elcg_init_idle_filters = gp10b_elcg_init_idle_filters;
 }
-- 
cgit v1.2.2


From 905f1c0392bf244b321f56f82661eeb2fe00ee05 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Fri, 19 Aug 2016 17:09:35 -0700
Subject: gpu: nvgpu: parse and execute mclk shadow script

 * Parsing of shadow registers from VBIOS
 * Partial devinit engine interpreter implementation

JIRA DNVGPU-117

Change-Id: I42179748889f17d674ad0a986e81c418b3b8df11
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1214956
Reviewed-on: http://git-master/r/1237293
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x  |   1 +
 drivers/gpu/nvgpu/clk/clk_mclk.c       | 278 +++++++++++++++++++++++----------
 drivers/gpu/nvgpu/clk/clk_mclk.h       |   3 +-
 drivers/gpu/nvgpu/gp106/bios_gp106.c   | 121 ++++++++++++++
 drivers/gpu/nvgpu/gp106/bios_gp106.h   |  31 ++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c    |   4 +-
 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h  |  72 +++++++++
 drivers/gpu/nvgpu/gp106/hw_gc6_gp106.h |  56 +++++++
 drivers/gpu/nvgpu/include/bios.h       |  94 +++++++++++
 9 files changed, 572 insertions(+), 88 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/bios_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/bios_gp106.h
 create mode 100644 drivers/gpu/nvgpu/gp106/hw_gc6_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 17b33959..ceae6006 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -29,6 +29,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/fifo_gp106.o \
 	$(nvgpu-t18x)/gp106/ltc_gp106.o \
 	$(nvgpu-t18x)/gp106/fb_gp106.o \
+	$(nvgpu-t18x)/gp106/bios_gp106.o \
 	$(nvgpu-t18x)/clk/clk_mclk.o \
 	$(nvgpu-t18x)/pstate/pstate.o \
 	$(nvgpu-t18x)/clk/clk_vin.o \
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index df010221..b63fab1e 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -17,6 +17,9 @@
 #include "gm206/bios_gm206.h"
 #include "gk20a/pmu_gk20a.h"
 #include "gk20a/hw_pwr_gk20a.h"
+#include "gp106/hw_fb_gp106.h"
+
+#include "include/bios.h"
 
 #define VREG_COUNT 24
 
@@ -29,63 +32,6 @@ struct memory_link_training_pattern {
 	u32 writeval;
 };
 
-static struct memory_link_training_pattern memory_shadow_p0_reglist[] = {
-	{0x9a065c, 0x20},
-	{0x98467c, 0xffff0000},
-	{0x984708, 0x30550},
-	{0x98470c, 0x4C4C},
-	{0x9006a0, 0x03030303},
-	{0x9006a4, 0x03030303},
-	{0x9046a0, 0x03030303},
-	{0x9046a4, 0x03030303},
-	{0x9086a0, 0x03030303},
-	{0x9086a4, 0x03030303},
-	{0x9846a8, 0x03030303},
-	{0x9846ac, 0x03030303},
-	{0x9a065c, 0x00},
-};
-
-static struct memory_link_training_pattern memory_shadow_p5_reglist[] = {
-	{0x9a065c, 0x10},
-	{0x98467c, 0xfff10000},
-	{0x984708, 0x30002},
-	{0x98470c, 0x1414},
-	{0x9006a0, 0x12121212},
-	{0x9006a4, 0x12121212},
-	{0x9046a0, 0x12121212},
-	{0x9046a4, 0x12121212},
-	{0x9086a0, 0x12121212},
-	{0x9086a4, 0x12121212},
-	{0x90c6a0, 0x12121212},
-	{0x90c6a4, 0x12121212},
-	{0x9106a0, 0x12121212},
-	{0x9106a4, 0x12121212},
-	{0x9146a0, 0x12121212},
-	{0x9146a4, 0x12121212},
-	{0x9a065c, 0x0},
-	{0x9a08e0, 0x10},
-	{0x9846a8, 0x0f0f0f0f},
-	{0x9846ac, 0x0f0f0f0f},
-	{0x984d98, 0x22222222},
-	{0x984d9c, 0x22222222},
-	{0x984da0, 0x22222222},
-	{0x984da4, 0x22222222},
-	{0x984da8, 0x22222222},
-	{0x984dac, 0x22222222},
-	{0x984dac, 0x22222222},
-	{0x984d70, 0x0},
-	{0x984d74, 0x0},
-	{0x984d78, 0x0},
-	{0x984d7c, 0x0},
-	{0x984d80, 0x0},
-	{0x984d84, 0x0},
-	{0x984d88, 0x0},
-	{0x984d8c, 0x0},
-	{0x984d90, 0x0},
-	{0x984d94, 0x0},
-	{0x9a08e0, 0x0},
-};
-
 static struct memory_link_training_pattern memory_pattern_reglist[] = {
 	{0x9a0968,	0x0},
 	{0x9a0920,	0x0},
@@ -2026,31 +1972,6 @@ static void mclk_memory_load_training_pattern(struct gk20a *g)
 	gk20a_dbg_fn("done");
 }
 
-static void mclk_memory_load_shadow_regs(struct gk20a *g)
-{
-	u32 reg_writes;
-	u32 index;
-
-	gk20a_dbg_info("");
-
-	reg_writes = ((sizeof(memory_shadow_p0_reglist) /
-				sizeof((memory_shadow_p0_reglist)[0])));
-	for (index = 0; index < reg_writes; index++) {
-		gk20a_writel(g, memory_shadow_p0_reglist[index].regaddr,
-				memory_shadow_p0_reglist[index].writeval);
-	}
-
-	reg_writes = ((sizeof(memory_shadow_p5_reglist) /
-				sizeof((memory_shadow_p5_reglist)[0])));
-	for (index = 0; index < reg_writes; index++) {
-		gk20a_writel(g, memory_shadow_p5_reglist[index].regaddr,
-				memory_shadow_p5_reglist[index].writeval);
-	}
-
-	gk20a_dbg_fn("done");
-
-}
-
 static void mclk_seq_pmucmdhandler(struct gk20a *g, struct pmu_msg *_msg,
 			void *param, u32 handle, u32 status)
 {
@@ -2082,9 +2003,189 @@ status_update:
 	*((u32 *)param) = msg_status;
 }
 
+static int mclk_get_memclk_table(struct gk20a *g)
+{
+	int status = 0;
+	u8 *mem_table_ptr = NULL;
+	u32 idx_to_ptr_tbl[8];
+	u32 idx_to_cmd_ptr_tbl[8];
+
+	u32 old_fbio_delay;
+	u32 old_fbio_cmd_delay;
+
+	u32 cmd_idx;
+	u32 shadow_idx;
+
+	struct vbios_memory_clock_header_1x memclock_table_header = { 0 };
+	struct vbios_memory_clock_base_entry_11 memclock_base_entry = { 0 };
+
+	u8 *mem_entry_ptr = NULL;
+	int index;
+
+	gk20a_dbg_info("");
+
+	if (!(g->ops.bios.get_perf_table_ptrs &&
+		g->ops.bios.execute_script)) {
+		goto done;
+	}
+
+	mem_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+					g->bios.perf_token,
+					MEMORY_CLOCK_TABLE);
+	if (mem_table_ptr == NULL) {
+		status = -EPERM;
+		goto done;
+	}
+
+	memcpy(&memclock_table_header, mem_table_ptr,
+		sizeof(memclock_table_header));
+
+	if ((memclock_table_header.version <
+		VBIOS_MEMORY_CLOCK_HEADER_11_VERSION) ||
+		(memclock_table_header.base_entry_size <
+		VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_2_SIZE)) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	/* reset and save shadow table map and registers */
+	old_fbio_delay = gk20a_readl(g, fb_fbpa_fbio_delay_r());
+	old_fbio_cmd_delay = gk20a_readl(g, fb_fbpa_fbio_cmd_delay_r());
+
+	memset(idx_to_ptr_tbl, 0, sizeof(idx_to_ptr_tbl));
+	memset(idx_to_cmd_ptr_tbl, 0, sizeof(idx_to_cmd_ptr_tbl));
+
+	/* Read table entries */
+	mem_entry_ptr = mem_table_ptr + memclock_table_header.header_size;
+	for (index = 0; index < memclock_table_header.entry_count; index++) {
+		u8 script_index, cmd_script_index;
+		u32 script_ptr = 0, cmd_script_ptr = 0;
+
+		memcpy(&memclock_base_entry, mem_entry_ptr,
+			memclock_table_header.base_entry_size);
+		if (memclock_base_entry.maximum == 0)
+			continue;
+
+		script_index = BIOS_GET_FIELD(memclock_base_entry.flags1,
+			VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_FLAGS1_SCRIPT_INDEX);
+
+		script_ptr = gm206_bios_read_u32(g,
+			memclock_table_header.script_list_ptr +
+				script_index * sizeof(u32));
+
+		if (!script_ptr)
+			continue;
+
+		/* Link and execute shadow scripts */
+
+		for (shadow_idx = 0; shadow_idx <= fb_fbpa_fbio_delay_priv_max_v();
+				++shadow_idx) {
+			if (script_ptr == idx_to_ptr_tbl[shadow_idx]) {
+				break;
+			}
+		}
+
+		/* script has not been executed before */
+		if (shadow_idx > fb_fbpa_fbio_delay_priv_max_v()) {
+			/* find unused index */
+			for (shadow_idx = 0; shadow_idx <
+					fb_fbpa_fbio_delay_priv_max_v();
+					++shadow_idx) {
+				if (idx_to_ptr_tbl[shadow_idx] == 0)
+					break;
+			}
+
+			if (shadow_idx > fb_fbpa_fbio_delay_priv_max_v()) {
+				gk20a_err(dev_from_gk20a(g),
+				"invalid shadow reg script index");
+				status = -EINVAL;
+				goto done;
+			}
+
+			idx_to_ptr_tbl[shadow_idx] = script_ptr;
+
+			gk20a_writel(g, fb_fbpa_fbio_delay_r(),
+				set_field(old_fbio_delay,
+					fb_fbpa_fbio_delay_priv_m(),
+					fb_fbpa_fbio_delay_priv_f(shadow_idx)));
+
+			status = g->ops.bios.execute_script(g, script_ptr);
+			if (status < 0) {
+				gk20a_writel(g, fb_fbpa_fbio_delay_r(),
+					old_fbio_delay);
+				goto done;
+			}
+
+			gk20a_writel(g, fb_fbpa_fbio_delay_r(), old_fbio_delay);
+
+		}
+
+		cmd_script_index = BIOS_GET_FIELD(memclock_base_entry.flags2,
+			VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX);
+
+		cmd_script_ptr = gm206_bios_read_u32(g,
+			memclock_table_header.cmd_script_list_ptr +
+				cmd_script_index * sizeof(u32));
+
+		if (!cmd_script_ptr)
+			continue;
+
+		/* Link and execute cmd shadow scripts */
+		for (cmd_idx = 0; cmd_idx <= fb_fbpa_fbio_cmd_delay_cmd_priv_max_v();
+				++cmd_idx) {
+			if (cmd_script_ptr == idx_to_cmd_ptr_tbl[cmd_idx])
+				break;
+		}
+
+		/* script has not been executed before */
+		if (cmd_idx > fb_fbpa_fbio_cmd_delay_cmd_priv_max_v()) {
+			/* find unused index */
+			for (cmd_idx = 0; cmd_idx <
+					fb_fbpa_fbio_cmd_delay_cmd_priv_max_v();
+					++cmd_idx) {
+				if (idx_to_cmd_ptr_tbl[cmd_idx] == 0)
+					break;
+			}
+
+			if (cmd_idx > fb_fbpa_fbio_cmd_delay_cmd_priv_max_v()) {
+				gk20a_err(dev_from_gk20a(g),
+				"invalid shadow reg cmd script index");
+				status = -EINVAL;
+				goto done;
+			}
+
+			idx_to_cmd_ptr_tbl[cmd_idx] = cmd_script_ptr;
+			gk20a_writel(g, fb_fbpa_fbio_cmd_delay_r(),
+				set_field(old_fbio_cmd_delay,
+					fb_fbpa_fbio_cmd_delay_cmd_priv_m(),
+					fb_fbpa_fbio_cmd_delay_cmd_priv_f(
+						cmd_idx)));
+
+			status = g->ops.bios.execute_script(g, cmd_script_ptr);
+			if (status < 0) {
+				gk20a_writel(g, fb_fbpa_fbio_cmd_delay_r(),
+					old_fbio_cmd_delay);
+				goto done;
+			}
+
+			gk20a_writel(g, fb_fbpa_fbio_cmd_delay_r(),
+				old_fbio_cmd_delay);
+
+		}
+
+		mem_entry_ptr += memclock_table_header.base_entry_size +
+			memclock_table_header.strap_entry_count *
+			memclock_table_header.strap_entry_size;
+	}
+
+done:
+	return status;
+}
+
 int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 {
 	struct clk_mclk_state *mclk;
+	int status;
 
 	gk20a_dbg_fn("");
 
@@ -2094,8 +2195,10 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 
 	mclk->speed = gk20a_mclk_low_speed; /* Value from Devinit */
 
-	/* Load Shadow registers */
-	mclk_memory_load_shadow_regs(g);
+	/* Parse VBIOS */
+	status = mclk_get_memclk_table(g);
+	if (status < 0)
+		return status;
 
 	/* Load RAM pattern */
 	mclk_memory_load_training_pattern(g);
@@ -2115,6 +2218,8 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 #endif
 	mclk->change = clk_mclkseq_change_mclk_gddr5;
 
+	mclk->init = true;
+
 	return mclk->change(g, DEFAULT_BOOT_MCLK_SPEED);
 }
 
@@ -2125,7 +2230,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
 	struct nv_pmu_seq_cmd cmd;
 	struct nv_pmu_seq_cmd_run_script *pseq_cmd;
 	u32 seqdesc;
-	u32 status = 0;
+	int status = 0;
 	u32 seq_completion_status = ~0x0;
 	u8 *seq_script_ptr = NULL;
 	size_t seq_script_size = 0;
@@ -2139,6 +2244,9 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
 
 	mutex_lock(&mclk->mclk_mutex);
 
+	if (!mclk->init)
+		goto exit_status;
+
 	if (speed == mclk->speed)
 		goto exit_status;
 
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index c3261eac..edb7eb78 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -19,13 +19,14 @@
 enum gk20a_mclk_speed {
 	gk20a_mclk_low_speed,
 	gk20a_mclk_mid_speed,
-	gk20a_mclk_high_speed
+	gk20a_mclk_high_speed,
 };
 
 struct clk_mclk_state {
 	enum gk20a_mclk_speed speed;
 	struct mutex mclk_mutex;
 	void *vreg_buf;
+	bool init;
 
 	/* function pointers */
 	int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed);
diff --git a/drivers/gpu/nvgpu/gp106/bios_gp106.c b/drivers/gpu/nvgpu/gp106/bios_gp106.c
new file mode 100644
index 00000000..8be4314d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/bios_gp106.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gm206/bios_gm206.h"
+#include "bios_gp106.h"
+#include "hw_gc6_gp106.h"
+
+static void gp106_init_xmemsel_zm_nv_reg_array(struct gk20a *g, bool *condition,
+	u32 reg, u32 stride, u32 count, u32 data_table_offset)
+{
+	u8 i;
+	u32 data, strap, index;
+
+	if (*condition) {
+
+		strap = gk20a_readl(g, gc6_sci_strap_r()) & 0xf;
+
+		index = g->bios.mem_strap_xlat_tbl_ptr ?
+			gm206_bios_read_u8(g, g->bios.mem_strap_xlat_tbl_ptr +
+				strap) : strap;
+
+		for (i = 0; i < count; i++) {
+			data = gm206_bios_read_u32(g, data_table_offset + ((i *
+				g->bios.mem_strap_data_count + index) *
+				sizeof(u32)));
+			gk20a_writel(g, reg, data);
+			reg += stride;
+		}
+	}
+}
+
+static void gp106_init_condition(struct gk20a *g, bool *condition,
+	u32 condition_id)
+{
+	struct condition_entry entry;
+
+	entry.cond_addr = gm206_bios_read_u32(g, g->bios.condition_table_ptr +
+		sizeof(entry)*condition_id);
+	entry.cond_mask = gm206_bios_read_u32(g, g->bios.condition_table_ptr +
+		sizeof(entry)*condition_id + 4);
+	entry.cond_compare = gm206_bios_read_u32(g, g->bios.condition_table_ptr +
+		sizeof(entry)*condition_id + 8);
+
+	if ((gk20a_readl(g, entry.cond_addr) & entry.cond_mask)
+		!= entry.cond_compare) {
+		*condition = false;
+	}
+}
+
+static int gp106_execute_script(struct gk20a *g, u32 offset)
+{
+	u8 opcode;
+	u32 ip;
+	u32 operand[8];
+	bool condition, end;
+	int status = 0;
+
+	ip = offset;
+	condition = true;
+	end = false;
+
+	while (!end) {
+
+		opcode = gm206_bios_read_u8(g, ip++);
+
+		switch (opcode) {
+
+		case INIT_XMEMSEL_ZM_NV_REG_ARRAY:
+			operand[0] = gm206_bios_read_u32(g, ip);
+			operand[1] = gm206_bios_read_u8(g, ip+4);
+			operand[2] = gm206_bios_read_u8(g, ip+5);
+			ip += 6;
+
+			gp106_init_xmemsel_zm_nv_reg_array(g, &condition,
+				operand[0], operand[1], operand[2], ip);
+			ip += operand[2] * sizeof(u32) *
+				g->bios.mem_strap_data_count;
+			break;
+
+		case INIT_CONDITION:
+			operand[0] = gm206_bios_read_u8(g, ip);
+			ip++;
+
+			gp106_init_condition(g, &condition, operand[0]);
+			break;
+
+		case INIT_RESUME:
+			condition = true;
+			break;
+
+		case INIT_DONE:
+			end = true;
+			break;
+
+		default:
+			gk20a_err(dev_from_gk20a(g), "opcode: 0x%02x", opcode);
+			end = true;
+			status = -EINVAL;
+			break;
+		}
+	}
+
+	return status;
+}
+
+void gp106_init_bios(struct gpu_ops *gops)
+{
+	gm206_init_bios(gops);
+	gops->bios.execute_script = gp106_execute_script;
+}
diff --git a/drivers/gpu/nvgpu/gp106/bios_gp106.h b/drivers/gpu/nvgpu/gp106/bios_gp106.h
new file mode 100644
index 00000000..f47d11ca
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/bios_gp106.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef NVGPU_BIOS_GP106_H
+#define NVGPU_BIOS_GP106_H
+
+struct gpu_ops;
+
+#define INIT_DONE 0x71
+#define INIT_RESUME 0x72
+#define INIT_CONDITION 0x75
+#define INIT_XMEMSEL_ZM_NV_REG_ARRAY 0x8f
+
+struct condition_entry {
+	u32 cond_addr;
+	u32 cond_mask;
+	u32 cond_compare;
+} __packed;
+
+void gp106_init_bios(struct gpu_ops *gops);
+#endif
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 2217dfea..89e0e1fd 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -31,7 +31,7 @@
 #include "gp106/therm_gp106.h"
 #include "gp106/xve_gp106.h"
 
-#include "gm206/bios_gm206.h"
+#include "gp106/bios_gp106.h"
 
 #include "gm20b/gr_gm20b.h"
 #include "gm20b/fifo_gm20b.h"
@@ -209,7 +209,7 @@ int gp106_init_hal(struct gk20a *g)
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	gk20a_init_css_ops(gops);
 #endif
-	gm206_init_bios(gops);
+	gp106_init_bios(gops);
 	gp106_init_therm_ops(gops);
 	gp106_init_xve_ops(gops);
 
diff --git a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
index 1ab876cd..d76f78b9 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
@@ -502,4 +502,76 @@ static inline u32 fb_mmu_local_memory_range_ecc_mode_v(u32 r)
 {
 	return (r >> 30) & 0x1;
 }
+static inline u32 fb_fbpa_fbio_delay_r(void)
+{
+	return 0x9a065c;
+}
+static inline u32 fb_fbpa_fbio_delay_src_m(void)
+{
+	return 0x7;
+}
+static inline u32 fb_fbpa_fbio_delay_src_v(u32 r)
+{
+	return (r >> 0) & 0x7;
+}
+static inline u32 fb_fbpa_fbio_delay_src_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 fb_fbpa_fbio_delay_src_max_v(void)
+{
+	return 2;
+}
+static inline u32 fb_fbpa_fbio_delay_priv_m(void)
+{
+	return 0x7 << 4;
+}
+static inline u32 fb_fbpa_fbio_delay_priv_v(u32 r)
+{
+	return (r >> 4) & 0x7;
+}
+static inline u32 fb_fbpa_fbio_delay_priv_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 fb_fbpa_fbio_delay_priv_max_v(void)
+{
+	return 2;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_r(void)
+{
+	return 0x9a08e0;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_src_m(void)
+{
+	return 0x7;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_src_v(u32 r)
+{
+	return (r >> 0) & 0x7;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_src_f(u32 v)
+{
+	return (v & 0x7) << 0;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_src_max_v(void)
+{
+	return 1;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_priv_m(void)
+{
+	return 0x7 << 4;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_priv_v(u32 r)
+{
+	return (r >> 4) & 0x7;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_priv_f(u32 v)
+{
+	return (v & 0x7) << 4;
+}
+static inline u32 fb_fbpa_fbio_cmd_delay_cmd_priv_max_v(void)
+{
+	return 1;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/hw_gc6_gp106.h b/drivers/gpu/nvgpu/gp106/hw_gc6_gp106.h
new file mode 100644
index 00000000..25aca9b5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/hw_gc6_gp106.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Function naming determines intended use:
+ *
+ *     <x>_r(void) : Returns the offset for register <x>.
+ *
+ *     <x>_o(void) : Returns the offset for element <x>.
+ *
+ *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ *         and masked to place it at field <y> of register <x>.  This value
+ *         can be |'d with others to produce a full register value for
+ *         register <x>.
+ *
+ *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+ *         value can be ~'d and then &'d to clear the value of field <y> for
+ *         register <x>.
+ *
+ *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ *         to place it at field <y> of register <x>.  This value can be |'d
+ *         with others to produce a full register value for <x>.
+ *
+ *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ *         <x> value 'r' after being shifted to place its LSB at bit 0.
+ *         This value is suitable for direct comparison with other unshifted
+ *         values appropriate for use in field <y> of register <x>.
+ *
+ *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ *         field <y> of register <x>.  This value is suitable for direct
+ *         comparison with unshifted values appropriate for use in field <y>
+ *         of register <x>.
+ */
+#ifndef _hw_gc6_gp106_h_
+#define _hw_gc6_gp106_h_
+static inline u32 gc6_sci_strap_r(void)
+{
+	return 0x00010ebb0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index 3af5bcf4..83d972e3 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -408,4 +408,98 @@ struct vfield_entry {
 	u16 strap_desc;
 } __packed;
 
+#define PERF_CLK_DOMAINS_IDX_MAX		(32)
+#define PERF_CLK_DOMAINS_IDX_INVALID		PERF_CLK_DOMAINS_IDX_MAX
+
+#define VBIOS_PSTATE_TABLE_VERSION_5X		0x50
+#define VBIOS_PSTATE_HEADER_5X_SIZE_10		(10)
+
+struct vbios_pstate_header_5x {
+	u8 version;
+	u8 header_size;
+	u8 base_entry_size;
+	u8 base_entry_count;
+	u8 clock_entry_size;
+	u8 clock_entry_count;
+	u8 flags0;
+	u8 initial_pstate;
+	u8 cpi_support_level;
+u8 cpi_features;
+} __packed;
+
+#define VBIOS_PSTATE_CLOCK_ENTRY_5X_SIZE_6	6
+
+#define VBIOS_PSTATE_BASE_ENTRY_5X_SIZE_2	0x2
+#define VBIOS_PSTATE_BASE_ENTRY_5X_SIZE_3	0x3
+
+struct vbios_pstate_entry_clock_5x {
+	u16 param0;
+	u32 param1;
+} __packed;
+
+struct vbios_pstate_entry_5x {
+	u8 pstate_level;
+	u8 flags0;
+	u8 lpwr_entry_idx;
+	struct vbios_pstate_entry_clock_5x clockEntry[PERF_CLK_DOMAINS_IDX_MAX];
+} __packed;
+
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM0_NOM_FREQ_MHZ_SHIFT	0
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM0_NOM_FREQ_MHZ_MASK	0x00003FFF
+
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MIN_FREQ_MHZ_SHIFT	0
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MIN_FREQ_MHZ_MASK	0x00003FFF
+
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MAX_FREQ_MHZ_SHIFT	14
+#define VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MAX_FREQ_MHZ_MASK	0x0FFFC000
+
+#define VBIOS_PERFLEVEL_SKIP_ENTRY				0xFF
+
+#define VBIOS_MEMORY_CLOCK_HEADER_11_VERSION				0x11
+
+#define VBIOS_MEMORY_CLOCK_HEADER_11_0_SIZE				16
+#define VBIOS_MEMORY_CLOCK_HEADER_11_1_SIZE				21
+#define VBIOS_MEMORY_CLOCK_HEADER_11_2_SIZE				26
+
+struct vbios_memory_clock_header_1x {
+	u8 version;
+	u8 header_size;
+	u8 base_entry_size;
+	u8 strap_entry_size;
+	u8 strap_entry_count;
+	u8 entry_count;
+	u8 flags;
+	u8 fbvdd_settle_time;
+	u32 cfg_pwrd_val;
+	u16 fbvddq_high;
+	u16 fbvddq_low;
+	u32 script_list_ptr;
+	u8 script_list_count;
+	u32 cmd_script_list_ptr;
+	u8 cmd_script_list_count;
+} __packed;
+
+#define VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_2_SIZE				20
+
+struct vbios_memory_clock_base_entry_11 {
+	u16 minimum;
+	u16 maximum;
+	u32 script_pointer;
+	u8 flags0;
+	u32 fbpa_config;
+	u32 fbpa_config1;
+	u8 flags1;
+	u8 ref_mpllssf_freq_delta;
+	u8 flags2;
+} __packed;
+
+/* Script Pointer Index */
+/* #define VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_FLAGS1_SCRIPT_INDEX		3:2*/
+#define VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_FLAGS1_SCRIPT_INDEX_MASK	0xc
+#define VBIOS_MEMORY_CLOCK_BASE_ENTRY_11_FLAGS1_SCRIPT_INDEX_SHIFT	2
+/* #define VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX	1:0*/
+#define VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX_MASK	0x3
+#define VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX_SHIFT	0
+
 #endif
+
-- 
cgit v1.2.2


From 7f7bf15564ad6a1198807e10bab156337f9dde9b Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 17 Oct 2016 13:39:56 -0700
Subject: gpu: nvgpu: fix sparse warning in LTC code

bug 200088648

sparse warning reported that the function was not
defined. This was due to a missing include

Change-Id: Ia6153a2f3348a86e78add95bcfff998505b47cdd
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1237845
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/ltc_gp106.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/ltc_gp106.c b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
index dcd4fbad..b162ddb3 100644
--- a/drivers/gpu/nvgpu/gp106/ltc_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/ltc_gp106.c
@@ -16,6 +16,7 @@
 #include "gk20a/gk20a.h"
 #include "gm20b/ltc_gm20b.h"
 #include "gp10b/ltc_gp10b.h"
+#include "gp106/ltc_gp106.h"
 
 void gp106_init_ltc(struct gpu_ops *gops)
 {
-- 
cgit v1.2.2


From b6408e26c1a6c4c91233c144bae84db9728c4338 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 12 Oct 2016 15:24:11 -0700
Subject: gpu: nvgpu: At FB reset wait for scrubber

We need to wait for scrubber to have finished before we can allow
any accesses to memory. Do the wait in place where on iGPU we would
do FB reset.

Bug 1799537
Bug 1815139

Change-Id: Ic92dee936388a13c4abf0b295fd99581522c430f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1235541
(cherry picked from commit 1ef73ecb4e37da042e7117426ab2823b7f4528dc)
Reviewed-on: http://git-master/r/1239955
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/fb_gp106.c    | 21 ++++++++++++++++++++-
 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/fb_gp106.c b/drivers/gpu/nvgpu/gp106/fb_gp106.c
index 1c5b3e0a..ef9f1094 100644
--- a/drivers/gpu/nvgpu/gp106/fb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/fb_gp106.c
@@ -12,14 +12,33 @@
  */
 
 #include <linux/types.h>
+#include <linux/delay.h>
 
 #include "gk20a/gk20a.h"
 #include "gp10b/fb_gp10b.h"
+#include "hw_fb_gp106.h"
+
+#define HW_SCRUB_TIMEOUT_DEFAULT	100 /* usec */
+#define HW_SCRUB_TIMEOUT_MAX		2000000 /* usec */
+
+static void gp106_fb_reset(struct gk20a *g)
+{
+	int retries = HW_SCRUB_TIMEOUT_MAX / HW_SCRUB_TIMEOUT_DEFAULT;
+	/* wait for memory to be accessible */
+	do {
+		u32 w = gk20a_readl(g, fb_niso_scrub_status_r());
+		if (fb_niso_scrub_status_flag_v(w)) {
+			gk20a_dbg_fn("done");
+			break;
+		}
+		udelay(HW_SCRUB_TIMEOUT_DEFAULT);
+	} while (--retries);
+}
 
 void gp106_init_fb(struct gpu_ops *gops)
 {
 	gp10b_init_fb(gops);
 
 	gops->fb.init_fs_state = NULL;
-	gops->fb.reset = NULL;
+	gops->fb.reset = gp106_fb_reset;
 }
diff --git a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
index d76f78b9..19d88464 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
@@ -574,4 +574,32 @@ static inline u32 fb_fbpa_fbio_cmd_delay_cmd_priv_max_v(void)
 {
 	return 1;
 }
+static inline u32 fb_niso_scrubber_status_r(void)
+{
+	return 0x00100b20;
+}
+static inline u32 fb_niso_scrubber_status_flag_s(void)
+{
+	return 1;
+}
+static inline u32 fb_niso_scrubber_status_flag_f(u32 v)
+{
+	return (v & 0x1) << 0;
+}
+static inline u32 fb_niso_scrubber_status_flag_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 fb_niso_scrubber_status_flag_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+static inline u32 fb_niso_scrub_status_r(void)
+{
+	return 0x00100b20;
+}
+static inline u32 fb_niso_scrub_status_flag_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
 #endif
-- 
cgit v1.2.2


From c6e64649bbb30113c4cbd97f4e4217bab9f21429 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Mon, 17 Oct 2016 09:59:21 -0700
Subject: gpu: nvgpu: gp10b: make commit_userd global

Make channel_gp10b_commit_userd global, so other
gpus can re-use that function.

JIRA GV11B-11

Change-Id: Ibe03063befc2da6c67822121f880a141cad46e84
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1237738
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 2 +-
 drivers/gpu/nvgpu/gp10b/fifo_gp10b.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 127d4632..40bfa2a5 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -49,7 +49,7 @@ static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g,
 	gk20a_dbg_fn("done");
 }
 
-static int channel_gp10b_commit_userd(struct channel_gk20a *c)
+int channel_gp10b_commit_userd(struct channel_gk20a *c)
 {
 	u32 addr_lo;
 	u32 addr_hi;
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
index ad3679aa..3ef8247f 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.h
@@ -17,4 +17,5 @@
 #define FIFO_GP10B_H
 struct gpu_ops;
 void gp10b_init_fifo(struct gpu_ops *gops);
+int channel_gp10b_commit_userd(struct channel_gk20a *c);
 #endif
-- 
cgit v1.2.2


From e28ef73ec9baea7df631606298f8c210dc8f31a8 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Tue, 30 Aug 2016 16:48:38 +0530
Subject: gpu: nvgpu: add support to cache VF table from PMU

JIRA DNVGPU-118

Change-Id: I5c6a919d18e6de077e03180ba70441cfc9791350
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1209849
(cherry picked from commit 469f35e3f65964a3402f7e0c49862bd44b68936a)
Reviewed-on: http://git-master/r/1233040
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_vf_point.c | 79 +++++++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/clk/clk_vf_point.h |  1 +
 2 files changed, 78 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.c b/drivers/gpu/nvgpu/clk/clk_vf_point.c
index a8cc6403..4fde7226 100644
--- a/drivers/gpu/nvgpu/clk/clk_vf_point.c
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.c
@@ -243,7 +243,7 @@ static u32 clk_vf_point_construct_volt(struct gk20a *g,
 	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_VF_POINT_TYPE_VOLT)
 		return -EINVAL;
 
-	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_VF_POINT_TYPE_VOLT);
+	ptmpobj->type_mask = BIT(CTRL_CLK_CLK_VF_POINT_TYPE_VOLT);
 	status = clk_vf_point_construct_super(g, ppboardobj, size, pargs);
 	if (status)
 		return -EINVAL;
@@ -271,7 +271,7 @@ static u32 clk_vf_point_construct_freq(struct gk20a *g,
 	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_VF_POINT_TYPE_FREQ)
 		return -EINVAL;
 
-	ptmpobj->type_mask |= BIT(CTRL_CLK_CLK_VF_POINT_TYPE_FREQ);
+	ptmpobj->type_mask = BIT(CTRL_CLK_CLK_VF_POINT_TYPE_FREQ);
 	status = clk_vf_point_construct_super(g, ppboardobj, size, pargs);
 	if (status)
 		return -EINVAL;
@@ -341,3 +341,78 @@ static u32 _clk_vf_point_pmudatainit_super(struct gk20a *g,
 	pset->volt_rail_idx = pclk_vf_point->volt_rail_idx;
 	return status;
 }
+
+
+static u32 clk_vf_point_update(struct gk20a *g,
+				struct boardobj *board_obj_ptr,
+				struct nv_pmu_boardobj *ppmudata)
+{
+	struct clk_vf_point *pclk_vf_point;
+	struct nv_pmu_clk_clk_vf_point_boardobj_get_status *pstatus;
+
+	gk20a_dbg_info("");
+
+
+	pclk_vf_point =
+		(struct clk_vf_point *)board_obj_ptr;
+
+	pstatus = (struct nv_pmu_clk_clk_vf_point_boardobj_get_status *)
+		ppmudata;
+
+	if (pstatus->super.type != pclk_vf_point->super.type) {
+		gk20a_err(dev_from_gk20a(g),
+			"pmu data and boardobj type not matching");
+		return -EINVAL;
+	}
+	/* now copy VF pair */
+	memcpy(&pclk_vf_point->pair, &pstatus->pair,
+		sizeof(struct ctrl_clk_vf_pair));
+	return 0;
+}
+
+/*get latest vf point data from PMU */
+u32 clk_vf_point_cache(struct gk20a *g)
+{
+
+	struct clk_vf_points *pclk_vf_points;
+	struct boardobjgrp *pboardobjgrp;
+	struct boardobjgrpmask *pboardobjgrpmask;
+	struct nv_pmu_boardobjgrp_super *pboardobjgrppmu;
+	struct boardobj *pboardobj = NULL;
+	struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL;
+	u32 status;
+	u8 index;
+
+	gk20a_dbg_info("");
+	pclk_vf_points = &g->clk_pmu.clk_vf_pointobjs;
+	pboardobjgrp = &pclk_vf_points->super.super;
+	pboardobjgrpmask = &pclk_vf_points->super.mask.super;
+
+	status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g), "err getting boardobjs from pmu");
+		return status;
+	}
+	pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf;
+
+	BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) {
+		status = pboardobjgrp->pmustatusinstget(g,
+				(struct nv_pmu_boardobjgrp *)pboardobjgrppmu,
+				&pboardobjpmustatus, index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"could not get status object instance");
+			return status;
+		}
+
+		status = clk_vf_point_update(g, pboardobj,
+			(struct nv_pmu_boardobj *)pboardobjpmustatus);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"invalid data from pmu at %d", index);
+			return status;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h
index b05ab818..306bec41 100644
--- a/drivers/gpu/nvgpu/clk/clk_vf_point.h
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h
@@ -21,6 +21,7 @@
 
 u32 clk_vf_point_sw_setup(struct gk20a *g);
 u32 clk_vf_point_pmu_setup(struct gk20a *g);
+u32 clk_vf_point_cache(struct gk20a *g);
 
 struct clk_vf_points {
 	struct boardobjgrp_e255 super;
-- 
cgit v1.2.2


From 1b1090512020369df18dbe36336ac5a85d2cd693 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Wed, 31 Aug 2016 17:40:24 +0530
Subject: gpu: nvgpu: support to parse VF table

JIRA DNVGPU-123

function was added to retrieve V for F or
F for V for a given clock domain.
Clock domain can be master or slave.
F or V can be intermediate point between two
successive V or F values in VF table.
VF table should be cached before calling this function.
A F value below Fmin will return Vmin.
F > Fmax will return error
A V value above Vmax wil return F max.
A V value below Vmin will return error.

Change-Id: I28b4e8647510c6933e9e1204cfff31d74616e11a
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1211234
(cherry-picked from commit 5b83b03f2454fbec8d49a064ed09b09c92d3e9fa)
Reviewed-on: http://git-master/r/1235054
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c          |  48 +++++++++++
 drivers/gpu/nvgpu/clk/clk.h          |   8 ++
 drivers/gpu/nvgpu/clk/clk_domain.c   | 125 +++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/clk/clk_domain.h   |  10 +++
 drivers/gpu/nvgpu/clk/clk_prog.c     | 153 +++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_prog.h     |   6 ++
 drivers/gpu/nvgpu/clk/clk_vf_point.h |   7 +-
 drivers/gpu/nvgpu/pstate/pstate.c    |   4 +
 8 files changed, 356 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 0679efc0..34b344c8 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -188,3 +188,51 @@ u32 clk_pmu_vf_inject(struct gk20a *g)
 done:
 	return status;
 }
+
+u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain)
+{
+	u32 status = -EINVAL;
+	struct clk_domain *pdomain;
+	u8 i;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+	u16 clkmhz = 0;
+	u32 volt = 0;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+		if (pdomain->api_domain == clkapidomain) {
+			status = pdomain->clkdomainclkvfsearch(g, pclk,
+				pdomain, &clkmhz, &volt,
+				CLK_PROG_VFE_ENTRY_LOGIC);
+			return status;
+		}
+	}
+	return status;
+}
+
+u32 clk_domain_get_f_or_v(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u16 *pclkmhz,
+	u32 *pvoltuv
+)
+{
+	u32 status = -EINVAL;
+	struct clk_domain *pdomain;
+	u8 i;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	if ((pclkmhz == NULL) || (pvoltuv == NULL))
+		return -EINVAL;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+		if (pdomain->api_domain == clkapidomain) {
+			status = pdomain->clkdomainclkvfsearch(g, pclk,
+				pdomain, pclkmhz, pvoltuv,
+				CLK_PROG_VFE_ENTRY_LOGIC);
+			return status;
+		}
+	}
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index e24aada2..0d12ba7d 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -84,5 +84,13 @@ struct vbios_clocks_table_1x_hal_clock_entry {
 
 u32 clk_pmu_vf_inject(struct gk20a *g);
 u32 clk_pmu_vin_load(struct gk20a *g);
+u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain);
+u32 clk_domain_get_f_or_v
+(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u16 *pclkmhz,
+	u32 *pvoltuv
+);
 
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c
index 7371946c..c8da851a 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.c
+++ b/drivers/gpu/nvgpu/clk/clk_domain.c
@@ -136,6 +136,8 @@ u32 clk_domain_sw_setup(struct gk20a *g)
 	struct boardobjgrp *pboardobjgrp = NULL;
 	struct clk_domains *pclkdomainobjs;
 	struct clk_domain *pdomain;
+	struct clk_domain_3x_master *pdomain_master;
+	struct clk_domain_3x_slave *pdomain_slave;
 	u8 i;
 
 	gk20a_dbg_info("");
@@ -186,6 +188,7 @@ u32 clk_domain_sw_setup(struct gk20a *g)
 
 	BOARDOBJGRP_FOR_EACH(&(pclkdomainobjs->super.super),
 			     struct clk_domain *, pdomain, i) {
+		pdomain_master = NULL;
 		if (pdomain->super.implements(g, &pdomain->super,
 				CTRL_CLK_CLK_DOMAIN_TYPE_3X_PROG)) {
 			status = boardobjgrpmask_bitset(
@@ -201,6 +204,18 @@ u32 clk_domain_sw_setup(struct gk20a *g)
 			if (status)
 				goto done;
 		}
+
+		if (pdomain->super.implements(g, &pdomain->super,
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE)) {
+				pdomain_slave =
+					(struct clk_domain_3x_slave *)pdomain;
+				pdomain_master =
+					(struct clk_domain_3x_master *)
+					(CLK_CLK_DOMAIN_GET((&g->clk_pmu),
+					pdomain_slave->master_idx));
+			pdomain_master->slave_idxs_mask |= BIT(i);
+		}
+
 	}
 
 done:
@@ -407,6 +422,20 @@ static u32 clkdomainclkproglink_not_supported(struct gk20a *g,
 	return -EINVAL;
 }
 
+static u32 clkdomainvfsearch_stub(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_domain *pdomain,
+	u16 *clkmhz,
+	u32 *voltuv,
+	u8 rail
+)
+{
+	gk20a_dbg_info("");
+	return -EINVAL;
+}
+
+
 static u32 clk_domain_construct_super(struct gk20a *g,
 				      struct boardobj **ppboardobj,
 				      u16 size, void *pargs)
@@ -429,6 +458,9 @@ static u32 clk_domain_construct_super(struct gk20a *g,
 	pdomain->clkdomainclkproglink =
 			clkdomainclkproglink_not_supported;
 
+	pdomain->clkdomainclkvfsearch =
+			clkdomainvfsearch_stub;
+
 	pdomain->api_domain = ptmpdomain->api_domain;
 	pdomain->domain = ptmpdomain->domain;
 	pdomain->perf_domain_grp_idx =
@@ -508,6 +540,92 @@ static u32 clkdomainclkproglink_3x_prog(struct gk20a *g,
 	return status;
 }
 
+static u32 clkdomainvfsearch
+(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_domain *pdomain,
+	u16 *pclkmhz,
+	u32 *pvoltuv,
+	u8 rail
+)
+{
+	u32 status = 0;
+	struct clk_domain_3x_master *p3xmaster  =
+		(struct clk_domain_3x_master *)pdomain;
+	struct clk_prog *pprog = NULL;
+	struct clk_prog_1x_master *pprog1xmaster = NULL;
+	u8 i;
+	u8 *pslaveidx = NULL;
+	u8 slaveidx;
+	u16 clkmhz;
+	u32 voltuv;
+	u16 bestclkmhz;
+	u32 bestvoltuv;
+
+	gk20a_dbg_info("");
+	if ((*pclkmhz != 0) && (*pvoltuv != 0))
+		return -EINVAL;
+
+	bestclkmhz = *pclkmhz;
+	bestvoltuv = *pvoltuv;
+
+	if (pdomain->super.implements(g, &pdomain->super,
+			CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE)) {
+		slaveidx = BOARDOBJ_GET_IDX(pdomain);
+		pslaveidx = &slaveidx;
+		p3xmaster = (struct clk_domain_3x_master *)
+				CLK_CLK_DOMAIN_GET(pclk,
+				((struct clk_domain_3x_slave *)
+					pdomain)->master_idx);
+	}
+
+	/* Iterate over the set of CLK_PROGs pointed at by this domain.*/
+	for (i = p3xmaster->super.clk_prog_idx_first;
+	     i <= p3xmaster->super.clk_prog_idx_last;
+	     i++) {
+		clkmhz = *pclkmhz;
+		voltuv = *pvoltuv;
+		pprog = CLK_CLK_PROG_GET(pclk, i);
+
+		/* MASTER CLK_DOMAINs must point to MASTER CLK_PROGs.*/
+		if (!pprog->super.implements(g, &pprog->super,
+				CTRL_CLK_CLK_PROG_TYPE_1X_MASTER)) {
+			status = -EINVAL;
+			goto done;
+		}
+
+		pprog1xmaster = (struct clk_prog_1x_master *)pprog;
+		status = pprog1xmaster->vflookup(g, pclk, pprog1xmaster,
+				pslaveidx, &clkmhz, &voltuv, rail);
+		/* if look up has found the V or F value matching to other
+		 exit */
+		if (status == 0) {
+			if (*pclkmhz == 0) {
+				bestclkmhz = clkmhz;
+			} else {
+				bestvoltuv = voltuv;
+				break;
+			}
+		}
+	}
+	/* clk and volt sent as zero to pring vf table */
+	if ((*pclkmhz == 0) && (*pvoltuv == 0)) {
+		status = 0;
+		goto done;
+	}
+	/* atleast one search found a matching value? */
+	if ((bestvoltuv != 0) && (bestclkmhz != 0)) {
+		*pclkmhz = bestclkmhz;
+		*pvoltuv = bestvoltuv;
+		status = 0;
+		goto done;
+	}
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
+
 static u32 _clk_domain_pmudatainit_3x_prog(struct gk20a *g,
 					   struct boardobj *board_obj_ptr,
 					   struct nv_pmu_boardobj *ppmudata)
@@ -568,6 +686,9 @@ static u32 clk_domain_construct_3x_prog(struct gk20a *g,
 	pdomain->super.super.clkdomainclkproglink =
 				clkdomainclkproglink_3x_prog;
 
+	pdomain->super.super.clkdomainclkvfsearch =
+				clkdomainvfsearch;
+
 	pdomain->clk_prog_idx_first = ptmpdomain->clk_prog_idx_first;
 	pdomain->clk_prog_idx_last = ptmpdomain->clk_prog_idx_last;
 	pdomain->noise_unaware_ordering_index =
@@ -707,8 +828,6 @@ static u32 clk_domain_construct_3x_master(struct gk20a *g,
 {
 	struct boardobj *ptmpobj = (struct boardobj *)pargs;
 	struct clk_domain_3x_master *pdomain;
-	struct clk_domain_3x_master *ptmpdomain =
-			(struct clk_domain_3x_master *)pargs;
 	u32 status = 0;
 
 	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)
@@ -726,7 +845,7 @@ static u32 clk_domain_construct_3x_master(struct gk20a *g,
 	pdomain->super.super.super.clkdomainclkproglink =
 				clkdomainclkproglink_3x_master;
 
-	pdomain->slave_idxs_mask = ptmpdomain->slave_idxs_mask;
+	pdomain->slave_idxs_mask = 0;
 
 	return status;
 }
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.h b/drivers/gpu/nvgpu/clk/clk_domain.h
index 2670a066..07976a2a 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.h
+++ b/drivers/gpu/nvgpu/clk/clk_domain.h
@@ -26,8 +26,13 @@ struct clk_domain;
 /*data and function definition to talk to driver*/
 u32 clk_domain_sw_setup(struct gk20a *g);
 u32 clk_domain_pmu_setup(struct gk20a *g);
+
 typedef u32 clkproglink(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_domain *pdomain);
+
+typedef u32 clkvfsearch(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_domain *pdomain, u16 *clkmhz,
+			u32 *voltuv, u8 rail);
 struct clk_domains {
 	struct boardobjgrp_e32 super;
 	u8 n_num_entries;
@@ -55,6 +60,7 @@ struct clk_domain {
 	u8 ratio_domain;
 	u8 usage;
 	clkproglink *clkdomainclkproglink;
+	clkvfsearch *clkdomainclkvfsearch;
 };
 
 struct clk_domain_3x {
@@ -92,4 +98,8 @@ struct clk_domain_3x_slave {
 
 u32 clk_domain_clk_prog_link(struct gk20a *g, struct clk_pmupstate *pclk);
 
+#define CLK_CLK_DOMAIN_GET(pclk, idx)                                   \
+	((struct clk_domain *)BOARDOBJGRP_OBJ_GET_BY_IDX(		\
+		&pclk->clk_domainobjs.super.super, (u8)(idx)))
+
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
index 4bf473ac..5e4700a0 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.c
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -29,6 +29,7 @@ static struct clk_prog *construct_clk_prog(struct gk20a *g, void *pargs);
 static u32 devinit_get_clk_prog_table(struct gk20a *g,
 	struct clk_progs *pprogobjs);
 static vf_flatten vfflatten_prog_1x_master;
+static vf_lookup vflookup_prog_1x_master;
 
 static u32 _clk_progs_pmudatainit(struct gk20a *g,
 				  struct boardobjgrp *pboardobjgrp,
@@ -603,6 +604,9 @@ static u32 clk_prog_construct_1x_master(struct gk20a *g,
 	pclkprog->vfflatten =
 			vfflatten_prog_1x_master;
 
+	pclkprog->vflookup =
+			vflookup_prog_1x_master;
+
 	pclkprog->p_vf_entries = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)
 		kzalloc(vfsize, GFP_KERNEL);
 
@@ -831,3 +835,152 @@ done:
 	gk20a_dbg_info("done status %x", status);
 	return status;
 }
+
+static u32 vflookup_prog_1x_master
+(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_prog_1x_master *p1xmaster,
+	u8 *slave_clk_domain,
+	u16 *pclkmhz,
+	u32 *pvoltuv,
+	u8 rail
+)
+{
+	u8 j;
+	struct ctrl_clk_clk_prog_1x_master_vf_entry
+		*pvfentry;
+	struct clk_vf_point *pvfpoint;
+	struct clk_progs *pclkprogobjs;
+	struct clk_prog_1x_master_ratio *p1xmasterratio;
+	u16 clkmhz;
+	u32 voltuv;
+	u8 slaveentrycount;
+	u8 i;
+	struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents;
+
+	if ((*pclkmhz != 0) && (*pvoltuv != 0))
+		return -EINVAL;
+
+	pclkprogobjs = &(pclk->clk_progobjs);
+
+	slaveentrycount = pclkprogobjs->slave_entry_count;
+
+	if (pclkprogobjs->vf_entry_count >
+		CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES)
+		return -EINVAL;
+
+	if (rail >= pclkprogobjs->vf_entry_count)
+		return -EINVAL;
+
+	pvfentry =  p1xmaster->p_vf_entries;
+
+	pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)(
+			(u8 *)pvfentry +
+			(sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
+			(rail+1)));
+
+	clkmhz = *pclkmhz;
+	voltuv = *pvoltuv;
+
+	/*if domain is slave domain and freq is input
+		then derive master clk */
+	if ((slave_clk_domain != NULL) && (*pclkmhz != 0)) {
+		if (p1xmaster->super.super.super.implements(g,
+			&p1xmaster->super.super.super,
+			CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) {
+
+			p1xmasterratio =
+			(struct clk_prog_1x_master_ratio *)p1xmaster;
+			pslaveents = p1xmasterratio->p_slave_entries;
+			for (i = 0; i < slaveentrycount;  i++) {
+				if (pslaveents->clk_dom_idx ==
+					*slave_clk_domain)
+					break;
+				pslaveents++;
+			}
+			if (i == slaveentrycount)
+				return -EINVAL;
+			clkmhz = (clkmhz * 100)/pslaveents->ratio;
+		} else {
+			/* only support ratio for now */
+			return -EINVAL;
+		}
+	}
+
+	/* if both volt and clks are zero simply print*/
+	if ((*pvoltuv == 0) && (*pclkmhz == 0)) {
+		for (j = pvfentry->vf_point_idx_first;
+			j <= pvfentry->vf_point_idx_last; j++) {
+			pvfpoint = CLK_CLK_VF_POINT_GET(pclk, j);
+			gk20a_err(dev_from_gk20a(g), "v %x c %x",
+				clkvfpointvoltageuvget(g, pvfpoint),
+				clkvfpointfreqmhzget(g, pvfpoint));
+		}
+		return -EINVAL;
+	}
+	/* start looking up f for v for v for f */
+	/* looking for volt? */
+	if (*pvoltuv == 0) {
+		pvfpoint = CLK_CLK_VF_POINT_GET(pclk,
+				pvfentry->vf_point_idx_last);
+		/* above range? */
+		if (clkmhz > clkvfpointfreqmhzget(g, pvfpoint))
+			return -EINVAL;
+
+		for (j = pvfentry->vf_point_idx_last;
+			j >= pvfentry->vf_point_idx_first; j--) {
+			pvfpoint = CLK_CLK_VF_POINT_GET(pclk, j);
+			if (clkmhz <= clkvfpointfreqmhzget(g, pvfpoint))
+				voltuv = clkvfpointvoltageuvget(g, pvfpoint);
+			else
+				break;
+		}
+	} else {	/* looking for clk? */
+
+		pvfpoint = CLK_CLK_VF_POINT_GET(pclk,
+				pvfentry->vf_point_idx_first);
+		/* below range? */
+		if (voltuv < clkvfpointvoltageuvget(g, pvfpoint))
+			return -EINVAL;
+
+		for (j = pvfentry->vf_point_idx_first;
+			j <= pvfentry->vf_point_idx_last; j++) {
+			pvfpoint = CLK_CLK_VF_POINT_GET(pclk, j);
+			if (voltuv >= clkvfpointvoltageuvget(g, pvfpoint))
+				clkmhz = clkvfpointfreqmhzget(g, pvfpoint);
+			else
+				break;
+		}
+	}
+
+	/*if domain is slave domain and freq was looked up
+		then derive slave clk */
+	if ((slave_clk_domain != NULL) && (*pclkmhz == 0)) {
+		if (p1xmaster->super.super.super.implements(g,
+			&p1xmaster->super.super.super,
+			CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) {
+
+			p1xmasterratio =
+			(struct clk_prog_1x_master_ratio *)p1xmaster;
+			pslaveents = p1xmasterratio->p_slave_entries;
+			for (i = 0; i < slaveentrycount;  i++) {
+				if (pslaveents->clk_dom_idx ==
+					*slave_clk_domain)
+					break;
+				pslaveents++;
+			}
+			if (i == slaveentrycount)
+				return -EINVAL;
+			clkmhz = (clkmhz * pslaveents->ratio)/100;
+		} else {
+			/* only support ratio for now */
+			return -EINVAL;
+		}
+	}
+	*pclkmhz = clkmhz;
+	*pvoltuv = voltuv;
+	if ((clkmhz == 0) || (voltuv == 0))
+		return -EINVAL;
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.h b/drivers/gpu/nvgpu/clk/clk_prog.h
index 8718bdd6..979d327d 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.h
+++ b/drivers/gpu/nvgpu/clk/clk_prog.h
@@ -27,6 +27,11 @@ typedef u32 vf_flatten(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_prog_1x_master *p1xmaster,
 			u8 clk_domain_idx, u16 *pfreqmaxlastmhz);
 
+typedef u32 vf_lookup(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_prog_1x_master *p1xmaster,
+			u8 *slave_clk_domain_idx, u16 *pclkmhz,
+			u32 *pvoltuv, u8 rail);
+
 struct clk_progs {
 	struct boardobjgrp_e255 super;
 	u8 slave_entry_count;
@@ -52,6 +57,7 @@ struct clk_prog_1x_master {
 	struct ctrl_clk_clk_delta deltas;
 	union ctrl_clk_clk_prog_1x_master_source_data source_data;
 	vf_flatten *vfflatten;
+	vf_lookup *vflookup;
 };
 
 struct clk_prog_1x_master_ratio {
diff --git a/drivers/gpu/nvgpu/clk/clk_vf_point.h b/drivers/gpu/nvgpu/clk/clk_vf_point.h
index 306bec41..15920066 100644
--- a/drivers/gpu/nvgpu/clk/clk_vf_point.h
+++ b/drivers/gpu/nvgpu/clk/clk_vf_point.h
@@ -46,8 +46,8 @@ struct clk_vf_point_freq {
 };
 
 #define CLK_CLK_VF_POINT_GET(pclk, idx)                                        \
-	((struct clk_vf_point)BOARDOBJGRP_OBJ_GET_BY_IDX(                      \
-		&pclk->vfpoints.super.super, (u8)(idx)))
+	((struct clk_vf_point *)BOARDOBJGRP_OBJ_GET_BY_IDX(                    \
+		&pclk->clk_vf_pointobjs.super.super, (u8)(idx)))
 
 #define clkvfpointpairget(pvfpoint)                                            \
 	(&((pvfpoint)->pair))
@@ -66,6 +66,9 @@ struct clk_vf_point_freq {
 	CTRL_CLK_VF_PAIR_VOLTAGE_UV_SET(clkvfpointpairget(pvfpoint),           \
 	_voltageuv)
 
+#define clkvfpointvoltageuvget(pgpu, pvfpoint)                          \
+	CTRL_CLK_VF_PAIR_VOLTAGE_UV_GET(clkvfpointpairget(pvfpoint))	\
+
 struct clk_vf_point *construct_clk_vf_point(struct gk20a *g, void *pargs);
 
 #endif
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 83f17937..94ff5010 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -96,6 +96,10 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 		return err;
 
 	err = clk_pmu_vf_inject(g);
+	if (err)
+		return err;
+
+	err = clk_vf_point_cache(g);
 	return err;
 }
 
-- 
cgit v1.2.2


From 3c351f5bb2d04c1f70c72f3f2fd758bbb340877c Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 12 Sep 2016 22:36:33 +0530
Subject: gpu: nvgpu: add function to retrieve clk points

JIRA DNVGPU-123

Function will copy possible clock points for
a given master clock domain to pointer passed.
pointer with NULL value and count of zero can be passed
to query number of clock points for a given domain so that
memory can be allocated and function called again to
fill clock points

Change-Id: Iec6206f23789980036be99793599e934bd221035
Reviewed-on: http://git-master/r/1218912
(cherry picked from commit 9219697bff1e12deb605325055a02a7b387996e9)
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1235055
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c        | 31 ++++++++++++++
 drivers/gpu/nvgpu/clk/clk.h        |  6 +++
 drivers/gpu/nvgpu/clk/clk_domain.c | 83 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_domain.h |  6 +++
 drivers/gpu/nvgpu/clk/clk_prog.c   | 61 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_prog.h   |  7 ++++
 6 files changed, 194 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 34b344c8..7ee4f283 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -236,3 +236,34 @@ u32 clk_domain_get_f_or_v(
 	}
 	return status;
 }
+
+u32 clk_domain_get_f_points(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u32 *pfpointscount,
+	u16 *pfreqpointsinmhz
+)
+{
+	u32 status = -EINVAL;
+	struct clk_domain *pdomain;
+	u8 i;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	if (pfpointscount == NULL)
+		return -EINVAL;
+
+	if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0))
+		return -EINVAL;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+		if (pdomain->api_domain == clkapidomain) {
+			status = pdomain->clkdomainclkgetfpoints(g, pclk,
+				pdomain, pfpointscount,
+				pfreqpointsinmhz,
+				CLK_PROG_VFE_ENTRY_LOGIC);
+			return status;
+		}
+	}
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index 0d12ba7d..1f25fa4e 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -92,5 +92,11 @@ u32 clk_domain_get_f_or_v
 	u16 *pclkmhz,
 	u32 *pvoltuv
 );
+u32 clk_domain_get_f_points(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u32 *fpointscount,
+	u16 *freqpointsinmhz
+);
 
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c
index c8da851a..f87530dc 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.c
+++ b/drivers/gpu/nvgpu/clk/clk_domain.c
@@ -435,6 +435,19 @@ static u32 clkdomainvfsearch_stub(
 	return -EINVAL;
 }
 
+static u32 clkdomaingetfpoints_stub(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_domain *pdomain,
+	u32 *pfpointscount,
+	u16 *pfreqpointsinmhz,
+	u8 rail
+)
+{
+	gk20a_dbg_info("");
+	return -EINVAL;
+}
+
 
 static u32 clk_domain_construct_super(struct gk20a *g,
 				      struct boardobj **ppboardobj,
@@ -461,6 +474,9 @@ static u32 clk_domain_construct_super(struct gk20a *g,
 	pdomain->clkdomainclkvfsearch =
 			clkdomainvfsearch_stub;
 
+	pdomain->clkdomainclkgetfpoints =
+			clkdomaingetfpoints_stub;
+
 	pdomain->api_domain = ptmpdomain->api_domain;
 	pdomain->domain = ptmpdomain->domain;
 	pdomain->perf_domain_grp_idx =
@@ -626,6 +642,70 @@ done:
 	return status;
 }
 
+static u32 clkdomaingetfpoints
+(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_domain *pdomain,
+	u32 *pfpointscount,
+	u16 *pfreqpointsinmhz,
+	u8 rail
+)
+{
+	u32 status = 0;
+	struct clk_domain_3x_master *p3xmaster  =
+		(struct clk_domain_3x_master *)pdomain;
+	struct clk_prog *pprog = NULL;
+	struct clk_prog_1x_master *pprog1xmaster = NULL;
+	u32 fpointscount = 0;
+	u32 remainingcount;
+	u32 totalcount;
+	u16 *freqpointsdata;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	if (pfpointscount == NULL)
+		return -EINVAL;
+
+	if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0))
+		return -EINVAL;
+
+	if (pdomain->super.implements(g, &pdomain->super,
+			CTRL_CLK_CLK_DOMAIN_TYPE_3X_SLAVE))
+		return -EINVAL;
+
+	freqpointsdata = pfreqpointsinmhz;
+	totalcount = 0;
+	fpointscount = *pfpointscount;
+	remainingcount = fpointscount;
+	/* Iterate over the set of CLK_PROGs pointed at by this domain.*/
+	for (i = p3xmaster->super.clk_prog_idx_first;
+	     i <= p3xmaster->super.clk_prog_idx_last;
+	     i++) {
+		pprog = CLK_CLK_PROG_GET(pclk, i);
+		pprog1xmaster = (struct clk_prog_1x_master *)pprog;
+		status = pprog1xmaster->getfpoints(g, pclk, pprog1xmaster,
+				&fpointscount, &freqpointsdata, rail);
+		if (status) {
+			*pfpointscount = 0;
+			goto done;
+		}
+		totalcount += fpointscount;
+		if (*pfpointscount) {
+			remainingcount -= fpointscount;
+			fpointscount = remainingcount;
+		} else
+			fpointscount = 0;
+
+	}
+
+	*pfpointscount = totalcount;
+done:
+	gk20a_dbg_info("done status %x", status);
+	return status;
+}
+
 static u32 _clk_domain_pmudatainit_3x_prog(struct gk20a *g,
 					   struct boardobj *board_obj_ptr,
 					   struct nv_pmu_boardobj *ppmudata)
@@ -689,6 +769,9 @@ static u32 clk_domain_construct_3x_prog(struct gk20a *g,
 	pdomain->super.super.clkdomainclkvfsearch =
 				clkdomainvfsearch;
 
+	pdomain->super.super.clkdomainclkgetfpoints =
+				clkdomaingetfpoints;
+
 	pdomain->clk_prog_idx_first = ptmpdomain->clk_prog_idx_first;
 	pdomain->clk_prog_idx_last = ptmpdomain->clk_prog_idx_last;
 	pdomain->noise_unaware_ordering_index =
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.h b/drivers/gpu/nvgpu/clk/clk_domain.h
index 07976a2a..eeb7c256 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.h
+++ b/drivers/gpu/nvgpu/clk/clk_domain.h
@@ -33,6 +33,11 @@ typedef u32 clkproglink(struct gk20a *g, struct clk_pmupstate *pclk,
 typedef u32 clkvfsearch(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_domain *pdomain, u16 *clkmhz,
 			u32 *voltuv, u8 rail);
+
+typedef u32 clkgetfpoints(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_domain *pdomain, u32 *pfpointscount,
+			  u16 *pfreqpointsinmhz, u8 rail);
+
 struct clk_domains {
 	struct boardobjgrp_e32 super;
 	u8 n_num_entries;
@@ -61,6 +66,7 @@ struct clk_domain {
 	u8 usage;
 	clkproglink *clkdomainclkproglink;
 	clkvfsearch *clkdomainclkvfsearch;
+	clkgetfpoints *clkdomainclkgetfpoints;
 };
 
 struct clk_domain_3x {
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
index 5e4700a0..cb9a0e8d 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.c
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -30,6 +30,7 @@ static u32 devinit_get_clk_prog_table(struct gk20a *g,
 	struct clk_progs *pprogobjs);
 static vf_flatten vfflatten_prog_1x_master;
 static vf_lookup vflookup_prog_1x_master;
+static get_fpoints getfpoints_prog_1x_master;
 
 static u32 _clk_progs_pmudatainit(struct gk20a *g,
 				  struct boardobjgrp *pboardobjgrp,
@@ -607,6 +608,9 @@ static u32 clk_prog_construct_1x_master(struct gk20a *g,
 	pclkprog->vflookup =
 			vflookup_prog_1x_master;
 
+	pclkprog->getfpoints =
+			getfpoints_prog_1x_master;
+
 	pclkprog->p_vf_entries = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)
 		kzalloc(vfsize, GFP_KERNEL);
 
@@ -984,3 +988,60 @@ static u32 vflookup_prog_1x_master
 		return -EINVAL;
 	return 0;
 }
+
+static u32 getfpoints_prog_1x_master
+(
+	struct gk20a *g,
+	struct clk_pmupstate *pclk,
+	struct clk_prog_1x_master *p1xmaster,
+	u32 *pfpointscount,
+	u16 **ppfreqpointsinmhz,
+	u8 rail
+)
+{
+
+	struct ctrl_clk_clk_prog_1x_master_vf_entry
+		*pvfentry;
+	struct clk_vf_point *pvfpoint;
+	struct clk_progs *pclkprogobjs;
+	u8 j;
+	u32 fpointscount = 0;
+
+	if (pfpointscount == NULL)
+		return -EINVAL;
+
+	pclkprogobjs = &(pclk->clk_progobjs);
+
+	if (pclkprogobjs->vf_entry_count >
+		CTRL_CLK_CLK_PROG_1X_MASTER_VF_ENTRY_MAX_ENTRIES)
+		return -EINVAL;
+
+	if (rail >= pclkprogobjs->vf_entry_count)
+		return -EINVAL;
+
+	pvfentry =  p1xmaster->p_vf_entries;
+
+	pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)(
+			(u8 *)pvfentry +
+			(sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
+			(rail+1)));
+
+	fpointscount = pvfentry->vf_point_idx_last -
+		pvfentry->vf_point_idx_first + 1;
+
+	/* if pointer for freq data is NULL simply return count */
+	if (*ppfreqpointsinmhz == NULL)
+		goto done;
+
+	if (fpointscount > *pfpointscount)
+		return -ENOMEM;
+	for (j = pvfentry->vf_point_idx_first;
+		j <= pvfentry->vf_point_idx_last; j++) {
+		pvfpoint = CLK_CLK_VF_POINT_GET(pclk, j);
+		**ppfreqpointsinmhz = clkvfpointfreqmhzget(g, pvfpoint);
+		(*ppfreqpointsinmhz)++;
+	}
+done:
+	*pfpointscount = fpointscount;
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.h b/drivers/gpu/nvgpu/clk/clk_prog.h
index 979d327d..be92b3fc 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.h
+++ b/drivers/gpu/nvgpu/clk/clk_prog.h
@@ -32,6 +32,12 @@ typedef u32 vf_lookup(struct gk20a *g, struct clk_pmupstate *pclk,
 			u8 *slave_clk_domain_idx, u16 *pclkmhz,
 			u32 *pvoltuv, u8 rail);
 
+typedef u32 get_fpoints(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_prog_1x_master *p1xmaster,
+			u32 *pfpointscount,
+			u16 **ppfreqpointsinmhz, u8 rail);
+
+
 struct clk_progs {
 	struct boardobjgrp_e255 super;
 	u8 slave_entry_count;
@@ -58,6 +64,7 @@ struct clk_prog_1x_master {
 	union ctrl_clk_clk_prog_1x_master_source_data source_data;
 	vf_flatten *vfflatten;
 	vf_lookup *vflookup;
+	get_fpoints *getfpoints;
 };
 
 struct clk_prog_1x_master_ratio {
-- 
cgit v1.2.2


From c7fbd76e7101b7dedc8c0f04437288d1d6b78adc Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Fri, 16 Sep 2016 18:56:22 +0530
Subject: gpu: nvgpu: create function to program coreclk

JIRA DNVGPU-123

now a function can be called with GPC2CLK value
It will take care calculating slave clock values
and calling VF inject to program clock
Made programming of boot clock code to use this
newly created function.

Change-Id: I74de7e9d98e379e94175ed2d9745ce3ab6c70691
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1221976
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1235056
---
 drivers/gpu/nvgpu/clk/clk.c        | 229 +++++++++++++++++++++++++++++++++++--
 drivers/gpu/nvgpu/clk/clk.h        |  24 +++-
 drivers/gpu/nvgpu/clk/clk_domain.c |  69 ++++++++---
 drivers/gpu/nvgpu/clk/clk_domain.h |   7 +-
 drivers/gpu/nvgpu/clk/clk_prog.c   |  55 ++++++++-
 drivers/gpu/nvgpu/clk/clk_prog.h   |   6 +
 drivers/gpu/nvgpu/pstate/pstate.c  |   4 +-
 7 files changed, 358 insertions(+), 36 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 7ee4f283..918cd43c 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -104,7 +104,7 @@ done:
 	return status;
 }
 
-u32 clk_pmu_vf_inject(struct gk20a *g)
+static u32 clk_pmu_vf_inject(struct gk20a *g, struct set_fll_clk *setfllclk)
 {
 	struct pmu_cmd cmd;
 	struct pmu_msg msg;
@@ -115,35 +115,48 @@ u32 clk_pmu_vf_inject(struct gk20a *g)
 	struct clkrpc_pmucmdhandler_params handler = {0};
 	struct nv_pmu_clk_vf_change_inject *vfchange;
 
+	if ((setfllclk->gpc2clkmhz == 0) || (setfllclk->xbar2clkmhz == 0) ||
+		(setfllclk->sys2clkmhz == 0) || (setfllclk->voltuv == 0))
+		return -EINVAL;
+
+	if ((setfllclk->target_regime_id_gpc > CTRL_CLK_FLL_REGIME_ID_FR) ||
+		(setfllclk->target_regime_id_sys > CTRL_CLK_FLL_REGIME_ID_FR) ||
+		(setfllclk->target_regime_id_xbar > CTRL_CLK_FLL_REGIME_ID_FR))
+		return -EINVAL;
+
 	rpccall.function = NV_PMU_CLK_RPC_ID_CLK_VF_CHANGE_INJECT;
 	vfchange = &rpccall.params.clk_vf_change_inject;
 	vfchange->flags = 0;
 	vfchange->clk_list.num_domains = 3;
 	vfchange->clk_list.clk_domains[0].clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-	vfchange->clk_list.clk_domains[0].clk_freq_khz = 2581 * 1000;
+	vfchange->clk_list.clk_domains[0].clk_freq_khz =
+					setfllclk->gpc2clkmhz * 1000;
 	vfchange->clk_list.clk_domains[0].clk_flags = 0;
 	vfchange->clk_list.clk_domains[0].current_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FFR;
+		setfllclk->current_regime_id_gpc;
 	vfchange->clk_list.clk_domains[0].target_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FR;
+		setfllclk->target_regime_id_gpc;
 	vfchange->clk_list.clk_domains[1].clk_domain = CTRL_CLK_DOMAIN_XBAR2CLK;
-	vfchange->clk_list.clk_domains[1].clk_freq_khz = 2505 * 1000;
+	vfchange->clk_list.clk_domains[1].clk_freq_khz =
+					setfllclk->xbar2clkmhz * 1000;
 	vfchange->clk_list.clk_domains[1].clk_flags = 0;
 	vfchange->clk_list.clk_domains[1].current_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FFR;
+		setfllclk->current_regime_id_xbar;
 	vfchange->clk_list.clk_domains[1].target_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FR;
+		setfllclk->target_regime_id_xbar;
 	vfchange->clk_list.clk_domains[2].clk_domain = CTRL_CLK_DOMAIN_SYS2CLK;
-	vfchange->clk_list.clk_domains[2].clk_freq_khz = 2328 * 1000;
+	vfchange->clk_list.clk_domains[2].clk_freq_khz =
+					setfllclk->sys2clkmhz * 1000;
 	vfchange->clk_list.clk_domains[2].clk_flags = 0;
 	vfchange->clk_list.clk_domains[2].current_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FFR;
+		setfllclk->current_regime_id_sys;
 	vfchange->clk_list.clk_domains[2].target_regime_id =
-		CTRL_CLK_FLL_REGIME_ID_FR;
+		setfllclk->target_regime_id_sys;
 	vfchange->volt_list.num_rails = 1;
 	vfchange->volt_list.rails[0].volt_domain = CTRL_VOLT_DOMAIN_LOGIC;
-	vfchange->volt_list.rails[0].voltage_uv = 825000;
-	vfchange->volt_list.rails[0].voltage_min_noise_unaware_uv = 825000;
+	vfchange->volt_list.rails[0].voltage_uv = setfllclk->voltuv;
+	vfchange->volt_list.rails[0].voltage_min_noise_unaware_uv =
+				setfllclk->voltuv;
 
 	cmd.hdr.unit_id = PMU_UNIT_CLK;
 	cmd.hdr.size =  (u32)sizeof(struct nv_pmu_clk_cmd) +
@@ -189,6 +202,198 @@ done:
 	return status;
 }
 
+static u32 find_regime_id(struct gk20a *g, u32 domain, u16 clkmhz)
+{
+	struct fll_device *pflldev;
+	u8 j;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->avfs_fllobjs.super.super),
+		struct fll_device *, pflldev, j) {
+		if (pflldev->clk_domain == domain) {
+			if (pflldev->regime_desc.fixed_freq_regime_limit_mhz >=
+							clkmhz)
+				return CTRL_CLK_FLL_REGIME_ID_FR;
+			else
+				return CTRL_CLK_FLL_REGIME_ID_FFR;
+		}
+	}
+	return CTRL_CLK_FLL_REGIME_ID_INVALID;
+}
+
+static int set_regime_id(struct gk20a *g, u32 domain, u32 regimeid)
+{
+	struct fll_device *pflldev;
+	u8 j;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->avfs_fllobjs.super.super),
+		struct fll_device *, pflldev, j) {
+		if (pflldev->clk_domain == domain) {
+			pflldev->regime_desc.regime_id = regimeid;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid)
+{
+	struct fll_device *pflldev;
+	u8 j;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->avfs_fllobjs.super.super),
+		struct fll_device *, pflldev, j) {
+		if (pflldev->clk_domain == domain) {
+			*regimeid = pflldev->regime_desc.regime_id;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
+{
+	int status = -EINVAL;
+	struct clk_domain *pdomain;
+	u8 i;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+	u16 clkmhz = 0;
+	struct clk_domain_3x_master *p3xmaster;
+	struct clk_domain_3x_slave *p3xslave;
+	unsigned long slaveidxmask;
+	struct set_fll_clk setfllclk;
+	bool foundxbar2clk = false;
+	bool foundsys2clk = false;
+
+	memset(&setfllclk, 0, sizeof(setfllclk));
+	if (fllclk->api_clk_domain != CTRL_CLK_DOMAIN_GPC2CLK)
+		return -EINVAL;
+	if (fllclk->voltuv == 0)
+		return -EINVAL;
+	if (fllclk->clkmhz == 0)
+		return -EINVAL;
+
+	mutex_lock(&pclk->changeclkmutex);
+
+	setfllclk.voltuv = fllclk->voltuv;
+	setfllclk.gpc2clkmhz = fllclk->clkmhz;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+
+		if (pdomain->api_domain == fllclk->api_clk_domain) {
+
+			if (!pdomain->super.implements(g, &pdomain->super,
+				CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)) {
+				status = -EINVAL;
+				goto done;
+			}
+			p3xmaster = (struct clk_domain_3x_master *)pdomain;
+			slaveidxmask = p3xmaster->slave_idxs_mask;
+			for_each_set_bit(i, &slaveidxmask, 32) {
+				p3xslave = (struct clk_domain_3x_slave *)
+						CLK_CLK_DOMAIN_GET(pclk, i);
+				if ((p3xslave->super.super.super.api_domain !=
+				     CTRL_CLK_DOMAIN_XBAR2CLK) &&
+				    (p3xslave->super.super.super.api_domain !=
+				     CTRL_CLK_DOMAIN_SYS2CLK))
+					continue;
+				clkmhz = 0;
+				status = p3xslave->clkdomainclkgetslaveclk(g,
+						pclk,
+						(struct clk_domain *)p3xslave,
+						&clkmhz,
+						fllclk->clkmhz);
+				if (status) {
+					status = -EINVAL;
+					goto done;
+				}
+				if (p3xslave->super.super.super.api_domain ==
+					CTRL_CLK_DOMAIN_XBAR2CLK) {
+					setfllclk.xbar2clkmhz = clkmhz;
+					foundxbar2clk = true;
+				}
+				if (p3xslave->super.super.super.api_domain ==
+					CTRL_CLK_DOMAIN_SYS2CLK) {
+					setfllclk.sys2clkmhz = clkmhz;
+					foundsys2clk = true;
+				}
+			}
+		}
+	}
+	if (!(foundxbar2clk && foundsys2clk)) {
+		status = -EINVAL;
+		goto done;
+	}
+	/*set regime ids */
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			&setfllclk.current_regime_id_gpc);
+	if (status)
+		goto done;
+
+	setfllclk.target_regime_id_gpc = find_regime_id(g,
+			CTRL_CLK_DOMAIN_GPC2CLK, setfllclk.gpc2clkmhz);
+
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
+			&setfllclk.current_regime_id_sys);
+	if (status)
+		goto done;
+
+	setfllclk.target_regime_id_sys = find_regime_id(g,
+			CTRL_CLK_DOMAIN_SYS2CLK, setfllclk.sys2clkmhz);
+
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
+			&setfllclk.current_regime_id_xbar);
+	if (status)
+		goto done;
+
+	setfllclk.target_regime_id_xbar = find_regime_id(g,
+			CTRL_CLK_DOMAIN_XBAR2CLK, setfllclk.xbar2clkmhz);
+
+	status = clk_pmu_vf_inject(g, &setfllclk);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"vf inject to change clk failed");
+
+	/* save regime ids */
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
+			setfllclk.target_regime_id_xbar);
+	if (status)
+		goto done;
+
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			setfllclk.target_regime_id_gpc);
+	if (status)
+		goto done;
+
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
+			setfllclk.target_regime_id_sys);
+	if (status)
+		goto done;
+done:
+	mutex_unlock(&pclk->changeclkmutex);
+	return status;
+}
+
+int clk_set_boot_fll_clk(struct gk20a *g)
+{
+	int status;
+	struct change_fll_clk bootfllclk;
+
+	mutex_init(&g->clk_pmu.changeclkmutex);
+
+	bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+	bootfllclk.clkmhz = 2581;
+	bootfllclk.voltuv = 825000;
+	status = clk_program_fllclks(g, &bootfllclk);
+	if (status)
+		gk20a_err(dev_from_gk20a(g), "attemp to set boot clk failed");
+	return status;
+}
+
 u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain)
 {
 	u32 status = -EINVAL;
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index 1f25fa4e..e54af521 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -35,6 +35,7 @@ struct clk_pmupstate {
 	struct clk_progs clk_progobjs;
 	struct clk_vf_points clk_vf_pointobjs;
 	struct clk_mclk_state clk_mclk;
+	struct mutex changeclkmutex;
 };
 
 struct clockentry {
@@ -44,6 +45,25 @@ struct clockentry {
 		u32 api_clk_domain;
 };
 
+struct change_fll_clk {
+		u32 api_clk_domain;
+		u16 clkmhz;
+		u32 voltuv;
+};
+
+struct set_fll_clk {
+		u32 voltuv;
+		u16 gpc2clkmhz;
+		u32 current_regime_id_gpc;
+		u32 target_regime_id_gpc;
+		u16 sys2clkmhz;
+		u32 current_regime_id_sys;
+		u32 target_regime_id_sys;
+		u16 xbar2clkmhz;
+		u32 current_regime_id_xbar;
+		u32 target_regime_id_xbar;
+};
+
 #define NV_PERF_HEADER_4X_CLOCKS_DOMAINS_MAX_NUMCLKS         9
 
 struct vbios_clock_domain {
@@ -82,7 +102,6 @@ struct vbios_clocks_table_1x_hal_clock_entry {
 #define PERF_CLK_PCIEGENCLK     12
 #define PERF_CLK_NUM            13
 
-u32 clk_pmu_vf_inject(struct gk20a *g);
 u32 clk_pmu_vin_load(struct gk20a *g);
 u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain);
 u32 clk_domain_get_f_or_v
@@ -98,5 +117,6 @@ u32 clk_domain_get_f_points(
 	u32 *fpointscount,
 	u16 *freqpointsinmhz
 );
-
+int clk_set_boot_fll_clk(struct gk20a *g);
+int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk);
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.c b/drivers/gpu/nvgpu/clk/clk_domain.c
index f87530dc..fe3db5d6 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.c
+++ b/drivers/gpu/nvgpu/clk/clk_domain.c
@@ -422,14 +422,14 @@ static u32 clkdomainclkproglink_not_supported(struct gk20a *g,
 	return -EINVAL;
 }
 
-static u32 clkdomainvfsearch_stub(
+static int clkdomainvfsearch_stub(
 	struct gk20a *g,
 	struct clk_pmupstate *pclk,
 	struct clk_domain *pdomain,
 	u16 *clkmhz,
 	u32 *voltuv,
-	u8 rail
-)
+	u8 rail)
+
 {
 	gk20a_dbg_info("");
 	return -EINVAL;
@@ -441,8 +441,7 @@ static u32 clkdomaingetfpoints_stub(
 	struct clk_domain *pdomain,
 	u32 *pfpointscount,
 	u16 *pfreqpointsinmhz,
-	u8 rail
-)
+	u8 rail)
 {
 	gk20a_dbg_info("");
 	return -EINVAL;
@@ -556,17 +555,47 @@ static u32 clkdomainclkproglink_3x_prog(struct gk20a *g,
 	return status;
 }
 
-static u32 clkdomainvfsearch
-(
-	struct gk20a *g,
-	struct clk_pmupstate *pclk,
-	struct clk_domain *pdomain,
-	u16 *pclkmhz,
-	u32 *pvoltuv,
-	u8 rail
-)
+static int clkdomaingetslaveclk(struct gk20a *g,
+				struct clk_pmupstate *pclk,
+				struct clk_domain *pdomain,
+				u16 *pclkmhz,
+				u16 masterclkmhz)
 {
-	u32 status = 0;
+	int status = 0;
+	struct clk_prog *pprog = NULL;
+	struct clk_prog_1x_master *pprog1xmaster = NULL;
+	u8 slaveidx;
+	struct clk_domain_3x_master *p3xmaster;
+
+	gk20a_dbg_info("");
+
+	if (pclkmhz == NULL)
+		return -EINVAL;
+
+	if (masterclkmhz == 0)
+		return -EINVAL;
+
+	slaveidx = BOARDOBJ_GET_IDX(pdomain);
+	p3xmaster = (struct clk_domain_3x_master *)
+			CLK_CLK_DOMAIN_GET(pclk,
+			((struct clk_domain_3x_slave *)
+				pdomain)->master_idx);
+	pprog = CLK_CLK_PROG_GET(pclk, p3xmaster->super.clk_prog_idx_first);
+	pprog1xmaster = (struct clk_prog_1x_master *)pprog;
+
+	status = pprog1xmaster->getslaveclk(g, pclk, pprog1xmaster,
+			slaveidx, pclkmhz, masterclkmhz);
+	return status;
+}
+
+static int clkdomainvfsearch(struct gk20a *g,
+				struct clk_pmupstate *pclk,
+				struct clk_domain *pdomain,
+				u16 *pclkmhz,
+				u32 *pvoltuv,
+				u8 rail)
+{
+	int status = 0;
 	struct clk_domain_3x_master *p3xmaster  =
 		(struct clk_domain_3x_master *)pdomain;
 	struct clk_prog *pprog = NULL;
@@ -580,6 +609,10 @@ static u32 clkdomainvfsearch
 	u32 bestvoltuv;
 
 	gk20a_dbg_info("");
+
+	if ((pclkmhz == NULL) || (pvoltuv == NULL))
+		return -EINVAL;
+
 	if ((*pclkmhz != 0) && (*pvoltuv != 0))
 		return -EINVAL;
 
@@ -595,7 +628,6 @@ static u32 clkdomainvfsearch
 				((struct clk_domain_3x_slave *)
 					pdomain)->master_idx);
 	}
-
 	/* Iterate over the set of CLK_PROGs pointed at by this domain.*/
 	for (i = p3xmaster->super.clk_prog_idx_first;
 	     i <= p3xmaster->super.clk_prog_idx_last;
@@ -625,7 +657,7 @@ static u32 clkdomainvfsearch
 			}
 		}
 	}
-	/* clk and volt sent as zero to pring vf table */
+	/* clk and volt sent as zero to print vf table */
 	if ((*pclkmhz == 0) && (*pvoltuv == 0)) {
 		status = 0;
 		goto done;
@@ -836,6 +868,9 @@ static u32 clk_domain_construct_3x_slave(struct gk20a *g,
 
 	pdomain->master_idx = ptmpdomain->master_idx;
 
+	pdomain->clkdomainclkgetslaveclk =
+				clkdomaingetslaveclk;
+
 	return status;
 }
 
diff --git a/drivers/gpu/nvgpu/clk/clk_domain.h b/drivers/gpu/nvgpu/clk/clk_domain.h
index eeb7c256..443e1c4c 100644
--- a/drivers/gpu/nvgpu/clk/clk_domain.h
+++ b/drivers/gpu/nvgpu/clk/clk_domain.h
@@ -30,10 +30,14 @@ u32 clk_domain_pmu_setup(struct gk20a *g);
 typedef u32 clkproglink(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_domain *pdomain);
 
-typedef u32 clkvfsearch(struct gk20a *g, struct clk_pmupstate *pclk,
+typedef int clkvfsearch(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_domain *pdomain, u16 *clkmhz,
 			u32 *voltuv, u8 rail);
 
+typedef int clkgetslaveclk(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_domain *pdomain, u16 *clkmhz,
+			u16 masterclkmhz);
+
 typedef u32 clkgetfpoints(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_domain *pdomain, u32 *pfpointscount,
 			  u16 *pfreqpointsinmhz, u8 rail);
@@ -100,6 +104,7 @@ struct clk_domain_3x_master {
 struct clk_domain_3x_slave {
 	struct clk_domain_3x_prog super;
 	u8 master_idx;
+	clkgetslaveclk *clkdomainclkgetslaveclk;
 };
 
 u32 clk_domain_clk_prog_link(struct gk20a *g, struct clk_pmupstate *pclk);
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
index cb9a0e8d..9fdd8b25 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.c
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -31,6 +31,7 @@ static u32 devinit_get_clk_prog_table(struct gk20a *g,
 static vf_flatten vfflatten_prog_1x_master;
 static vf_lookup vflookup_prog_1x_master;
 static get_fpoints getfpoints_prog_1x_master;
+static get_slaveclk getslaveclk_prog_1x_master;
 
 static u32 _clk_progs_pmudatainit(struct gk20a *g,
 				  struct boardobjgrp *pboardobjgrp,
@@ -611,6 +612,9 @@ static u32 clk_prog_construct_1x_master(struct gk20a *g,
 	pclkprog->getfpoints =
 			getfpoints_prog_1x_master;
 
+	pclkprog->getslaveclk =
+			getslaveclk_prog_1x_master;
+
 	pclkprog->p_vf_entries = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)
 		kzalloc(vfsize, GFP_KERNEL);
 
@@ -851,7 +855,7 @@ static u32 vflookup_prog_1x_master
 	u8 rail
 )
 {
-	u8 j;
+	int j;
 	struct ctrl_clk_clk_prog_1x_master_vf_entry
 		*pvfentry;
 	struct clk_vf_point *pvfpoint;
@@ -860,7 +864,7 @@ static u32 vflookup_prog_1x_master
 	u16 clkmhz;
 	u32 voltuv;
 	u8 slaveentrycount;
-	u8 i;
+	int i;
 	struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents;
 
 	if ((*pclkmhz != 0) && (*pvoltuv != 0))
@@ -1045,3 +1049,50 @@ done:
 	*pfpointscount = fpointscount;
 	return 0;
 }
+
+static int getslaveclk_prog_1x_master(struct gk20a *g,
+				struct clk_pmupstate *pclk,
+				struct clk_prog_1x_master *p1xmaster,
+				u8 slave_clk_domain,
+				u16 *pclkmhz,
+				u16 masterclkmhz
+)
+{
+	struct clk_progs *pclkprogobjs;
+	struct clk_prog_1x_master_ratio *p1xmasterratio;
+	u8 slaveentrycount;
+	u8 i;
+	struct ctrl_clk_clk_prog_1x_master_ratio_slave_entry *pslaveents;
+
+	if (pclkmhz == NULL)
+		return -EINVAL;
+
+	if (masterclkmhz == 0)
+		return -EINVAL;
+
+	*pclkmhz = 0;
+	pclkprogobjs = &(pclk->clk_progobjs);
+
+	slaveentrycount = pclkprogobjs->slave_entry_count;
+
+	if (p1xmaster->super.super.super.implements(g,
+		&p1xmaster->super.super.super,
+		CTRL_CLK_CLK_PROG_TYPE_1X_MASTER_RATIO)) {
+		p1xmasterratio =
+		(struct clk_prog_1x_master_ratio *)p1xmaster;
+		pslaveents = p1xmasterratio->p_slave_entries;
+		for (i = 0; i < slaveentrycount;  i++) {
+			if (pslaveents->clk_dom_idx ==
+				slave_clk_domain)
+				break;
+			pslaveents++;
+		}
+		if (i == slaveentrycount)
+			return -EINVAL;
+		*pclkmhz = (masterclkmhz * pslaveents->ratio)/100;
+	} else {
+		/* only support ratio for now */
+		return -EINVAL;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.h b/drivers/gpu/nvgpu/clk/clk_prog.h
index be92b3fc..60711b4c 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.h
+++ b/drivers/gpu/nvgpu/clk/clk_prog.h
@@ -32,6 +32,11 @@ typedef u32 vf_lookup(struct gk20a *g, struct clk_pmupstate *pclk,
 			u8 *slave_clk_domain_idx, u16 *pclkmhz,
 			u32 *pvoltuv, u8 rail);
 
+typedef int get_slaveclk(struct gk20a *g, struct clk_pmupstate *pclk,
+			struct clk_prog_1x_master *p1xmaster,
+			u8 slave_clk_domain_idx, u16 *pclkmhz,
+			u16 masterclkmhz);
+
 typedef u32 get_fpoints(struct gk20a *g, struct clk_pmupstate *pclk,
 			struct clk_prog_1x_master *p1xmaster,
 			u32 *pfpointscount,
@@ -65,6 +70,7 @@ struct clk_prog_1x_master {
 	vf_flatten *vfflatten;
 	vf_lookup *vflookup;
 	get_fpoints *getfpoints;
+	get_slaveclk *getslaveclk;
 };
 
 struct clk_prog_1x_master_ratio {
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 94ff5010..d6173275 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -95,11 +95,11 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 	if (err)
 		return err;
 
-	err = clk_pmu_vf_inject(g);
+	err = clk_vf_point_cache(g);
 	if (err)
 		return err;
 
-	err = clk_vf_point_cache(g);
+	err = clk_set_boot_fll_clk(g);
 	return err;
 }
 
-- 
cgit v1.2.2


From 9b11fb9b8d5f8d98ae8479d0da455e66a692e6c8 Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Tue, 11 Oct 2016 13:25:21 -0700
Subject: gpu: nvgpu: gp10b: Don't call already called function

gm20b_init_fb already calls gm20b_init_uncompressed_kind_map()
and gm20b_init_kind_attr().

JIRA GV11B-8

Change-Id: Id72ee1ae04d3a47ae7a6a972f6d8bd7e7bda7389
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: http://git-master/r/1234570
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
index 8b3b2153..9c0bc992 100644
--- a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -23,8 +23,6 @@
 
 static void gp10b_init_uncompressed_kind_map(void)
 {
-	gm20b_init_uncompressed_kind_map();
-
 	gk20a_uc_kind_map[gmmu_pte_kind_z16_2cz_v()] =
 	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms2_2cz_v()] =
 	gk20a_uc_kind_map[gmmu_pte_kind_z16_ms4_2cz_v()] =
@@ -77,8 +75,6 @@ static void gp10b_init_kind_attr(void)
 {
 	u16 k;
 
-	gm20b_init_kind_attr();
-
 	for (k = 0; k < 256; k++) {
 		if (gp10b_kind_supported((u8)k))
 			gk20a_kind_attr[k] |= GK20A_KIND_ATTR_SUPPORTED;
-- 
cgit v1.2.2


From cb78f5aa749fcea198851ae4adf6e3acd47b37ac Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Mon, 10 Oct 2016 11:32:05 -0700
Subject: gpu: nvgpu: vgpu: add set_preemption_mode

Implement HAL callback set_preemption_mode

Bug 200238497
JIRA VFND-2683

Change-Id: I8fca8e1ba112d8782ce18f0899eca38a1d12b512
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1236976
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 149 ++++++++++++++++++++++++---
 1 file changed, 132 insertions(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 78205afb..4746f04b 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -51,31 +51,80 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 				u32 class,
 				u32 flags)
 {
-	struct tegra_vgpu_cmd_msg msg = {0};
-	struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
-			&msg.params.gr_bind_ctxsw_buffers;
 	struct gr_ctx_desc *gr_ctx;
+	u32 graphics_preempt_mode = 0;
+	u32 compute_preempt_mode = 0;
 	int err;
 
 	gk20a_dbg_fn("");
 
-	WARN_ON(TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAX !=
-		TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_LAST);
-
 	err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags);
 	if (err)
 		return err;
 
 	gr_ctx = *__gr_ctx;
 
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
+		graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
+	if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
+		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	if (graphics_preempt_mode || compute_preempt_mode) {
+		if (g->ops.gr.set_ctxsw_preemption_mode) {
+			err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
+			    class, graphics_preempt_mode, compute_preempt_mode);
+			if (err) {
+				gk20a_err(dev_from_gk20a(g),
+					"set_ctxsw_preemption_mode failed");
+				goto fail;
+			}
+		} else {
+			err = -ENOSYS;
+			goto fail;
+		}
+	}
+
+	gk20a_dbg_fn("done");
+	return err;
+
+fail:
+	vgpu_gr_gp10b_free_gr_ctx(g, vm, gr_ctx);
+	return err;
+}
+
+static int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
+				struct gr_ctx_desc *gr_ctx,
+				struct vm_gk20a *vm, u32 class,
+				u32 graphics_preempt_mode,
+				u32 compute_preempt_mode)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gr_bind_ctxsw_buffers_params *p =
+				&msg.params.gr_bind_ctxsw_buffers;
+	int err = 0;
+
+	WARN_ON(TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAX !=
+		TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_LAST);
+
 	if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp)
-		flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP;
+		graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
 
 	if (class == PASCAL_COMPUTE_A &&
 			g->gr.t18x.ctx_vars.force_preemption_cilp)
-		flags |= NVGPU_ALLOC_OBJ_FLAGS_CILP;
+		compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	/* check for invalid combinations */
+	if ((graphics_preempt_mode == 0) && (compute_preempt_mode == 0))
+		return -EINVAL;
+
+	if ((graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) &&
+		   (compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP))
+		return -EINVAL;
 
-	if (flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) {
+	/* set preemption modes */
+	switch (graphics_preempt_mode) {
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
+	{
 		u32 spill_size =
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_default_v() *
 			gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
@@ -146,15 +195,37 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 
 		gr_ctx->graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
 		p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_GFX_GFXP;
+		break;
+	}
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
+		gr_ctx->graphics_preempt_mode = graphics_preempt_mode;
+		break;
+
+	default:
+		break;
 	}
 
 	if (class == PASCAL_COMPUTE_A) {
-		if (flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) {
-			gr_ctx->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
-			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP;
-		} else {
-			gr_ctx->compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
-			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA;
+		switch (compute_preempt_mode) {
+		case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
+			gr_ctx->compute_preempt_mode =
+				NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
+			p->mode = TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_WFI;
+			break;
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
+			gr_ctx->compute_preempt_mode =
+				NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
+			p->mode =
+				TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CTA;
+			break;
+		case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
+			gr_ctx->compute_preempt_mode =
+				NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+			p->mode =
+				TEGRA_VGPU_GR_CTXSW_PREEMPTION_MODE_COMPUTE_CILP;
+			break;
+		default:
+			break;
 		}
 	}
 
@@ -169,11 +240,52 @@ static int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
 		}
 	}
 
-	gk20a_dbg_fn("done");
 	return err;
 
 fail:
-	vgpu_gr_gp10b_free_gr_ctx(g, vm, gr_ctx);
+	gk20a_err(dev_from_gk20a(g), "%s failed %d", __func__, err);
+	return err;
+}
+
+static int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
+					u32 graphics_preempt_mode,
+					u32 compute_preempt_mode)
+{
+	struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
+	struct gk20a *g = ch->g;
+	struct tsg_gk20a *tsg;
+	struct vm_gk20a *vm;
+	u32 class;
+	int err;
+
+	class = ch->obj_class;
+	if (!class)
+		return -EINVAL;
+
+	/* preemption already set ? */
+	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode)
+		return -EINVAL;
+
+	if (gk20a_is_channel_marked_as_tsg(ch)) {
+		tsg = &g->fifo.tsg[ch->tsgid];
+		vm = tsg->vm;
+	} else {
+		vm = ch->vm;
+	}
+
+	if (g->ops.gr.set_ctxsw_preemption_mode) {
+		err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
+						graphics_preempt_mode,
+						compute_preempt_mode);
+		if (err) {
+			gk20a_err(dev_from_gk20a(g),
+					"set_ctxsw_preemption_mode failed");
+			return err;
+		}
+	} else {
+		err = -ENOSYS;
+	}
+
 	return err;
 }
 
@@ -202,4 +314,7 @@ void vgpu_gp10b_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.alloc_gr_ctx = vgpu_gr_gp10b_alloc_gr_ctx;
 	gops->gr.free_gr_ctx = vgpu_gr_gp10b_free_gr_ctx;
 	gops->gr.init_ctx_state = vgpu_gr_gp10b_init_ctx_state;
+	gops->gr.set_preemption_mode = vgpu_gr_gp10b_set_preemption_mode;
+	gops->gr.set_ctxsw_preemption_mode =
+			vgpu_gr_gp10b_set_ctxsw_preemption_mode;
 }
-- 
cgit v1.2.2


From 90f80a282eff04412858361df35c2f88372e88cb Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Thu, 8 Sep 2016 22:58:19 +0530
Subject: gpu: nvgpu: Add pmgr support

This CL covers the following implementation,
1) Power Sensor Table parsing.
2) Power Topology Table parsing.
3) Add debugfs interface to get the current power(mW), current(mA) and
   voltage(uV) information from PMU.
4) Power Policy Table Parsing
5) Implement PMU boardobj interface for pmgr module.
6) Over current protection.

JIRA DNVGPU-47

Change-Id: I7b1eefacc4f0a9824ab94ec8dcebefe81b7660d3
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1217189
(cherry picked from commit ecd0b16316cb4110118c6677f5f03e02921c29b6)
Reviewed-on: http://git-master/r/1241953
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |   7 +-
 drivers/gpu/nvgpu/include/bios.h      | 156 +++++++-
 drivers/gpu/nvgpu/pmgr/pmgr.c         | 143 +++++++
 drivers/gpu/nvgpu/pmgr/pmgr.h         |  31 ++
 drivers/gpu/nvgpu/pmgr/pmgrpmu.c      | 524 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/pmgr/pmgrpmu.h      |  29 ++
 drivers/gpu/nvgpu/pmgr/pwrdev.c       | 310 ++++++++++++++++
 drivers/gpu/nvgpu/pmgr/pwrdev.h       |  51 +++
 drivers/gpu/nvgpu/pmgr/pwrmonitor.c   | 365 ++++++++++++++++++
 drivers/gpu/nvgpu/pmgr/pwrmonitor.h   |  60 +++
 drivers/gpu/nvgpu/pmgr/pwrpolicy.c    | 680 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/pmgr/pwrpolicy.h    | 117 ++++++
 drivers/gpu/nvgpu/pstate/pstate.c     |   9 +
 13 files changed, 2480 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/pmgr/pmgr.c
 create mode 100644 drivers/gpu/nvgpu/pmgr/pmgr.h
 create mode 100644 drivers/gpu/nvgpu/pmgr/pmgrpmu.c
 create mode 100644 drivers/gpu/nvgpu/pmgr/pmgrpmu.h
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrdev.c
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrdev.h
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrmonitor.c
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrmonitor.h
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrpolicy.c
 create mode 100644 drivers/gpu/nvgpu/pmgr/pwrpolicy.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index ceae6006..d5162332 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -44,7 +44,12 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/clk_gp106.o \
 	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o \
 	$(nvgpu-t18x)/gp106/therm_gp106.o \
-	$(nvgpu-t18x)/gp106/xve_gp106.o
+	$(nvgpu-t18x)/gp106/xve_gp106.o \
+	$(nvgpu-t18x)/pmgr/pwrdev.o \
+	$(nvgpu-t18x)/pmgr/pmgr.o \
+	$(nvgpu-t18x)/pmgr/pmgrpmu.o \
+	$(nvgpu-t18x)/pmgr/pwrmonitor.o \
+	$(nvgpu-t18x)/pmgr/pwrpolicy.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index 83d972e3..d3a677f8 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -501,5 +501,159 @@ struct vbios_memory_clock_base_entry_11 {
 #define VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX_MASK	0x3
 #define VBIOS_MEMORY_CLOCK_BASE_ENTRY_12_FLAGS2_CMD_SCRIPT_INDEX_SHIFT	0
 
-#endif
+#define VBIOS_POWER_SENSORS_VERSION_2X                                      0x20
+#define VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08                         0x00000008
+
+struct pwr_sensors_2x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u32 ba_script_pointer;
+};
+
+#define VBIOS_POWER_SENSORS_2X_ENTRY_SIZE_15                          0x00000015
+
+struct pwr_sensors_2x_entry {
+	u8 flags0;
+	u32 class_param0;
+	u32 sensor_param0;
+	u32 sensor_param1;
+	u32 sensor_param2;
+	u32 sensor_param3;
+};
+
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS_MASK                   0xF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS_SHIFT                    0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS_I2C              0x00000001
+
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_INDEX_MASK        0xFF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_INDEX_SHIFT          0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_USE_FXP8_8_MASK  0x100
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_USE_FXP8_8_SHIFT  8
+
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT0_MOHM_MASK  0xFFFF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT0_MOHM_SHIFT  0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT1_MOHM_MASK  0xFFFF0000
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT1_MOHM_SHIFT  16
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_RSHUNT2_MOHM_MASK   0xFFFF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_RSHUNT2_MOHM_SHIFT   0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_CONFIGURATION_MASK  0xFFFF0000
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_CONFIGURATION_SHIFT  16
+
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_MASKENABLE_MASK    0xFFFF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_MASKENABLE_SHIFT    0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_GPIOFUNCTION_MASK   0xFF0000
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_GPIOFUNCTION_SHIFT   16
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_M_MASK  0xFFFF
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_M_SHIFT  0
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_B_MASK  0xFFFF0000
+#define NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_B_SHIFT  16
+
+#define VBIOS_POWER_TOPOLOGY_VERSION_2X                                      0x20
+#define VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06                         0x00000006
+
+struct pwr_topology_2x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u8 rel_entry_size;
+	u8 num_rel_entries;
+};
+
+#define VBIOS_POWER_TOPOLOGY_2X_ENTRY_SIZE_16                          0x00000016
+
+struct pwr_topology_2x_entry {
+	u8 flags0;
+	u8 pwr_rail;
+	u32 param0;
+	u32 curr_corr_slope;
+	u32 curr_corr_offset;
+	u32 param1;
+	u32 param2;
+};
+
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS_MASK                  0xF
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS_SHIFT                   0
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS_SENSOR                0x00000001
+
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_INDEX_MASK          0xFF
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_INDEX_SHIFT            0
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_PROVIDER_INDEX_MASK 0xFF00
+#define NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_PROVIDER_INDEX_SHIFT   8
+
+#define VBIOS_POWER_POLICY_VERSION_3X                                       0x30
+#define VBIOS_POWER_POLICY_3X_HEADER_SIZE_25                          0x00000025
+
+struct pwr_policy_3x_header_struct {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u16 base_sample_period;
+	u16 min_client_sample_period;
+	u8 table_rel_entry_size;
+	u8 num_table_rel_entries;
+	u8 tgp_policy_idx;
+	u8 rtp_policy_idx;
+	u8 mxm_policy_idx;
+	u8 dnotifier_policy_idx;
+	u32 d2_limit;
+	u32 d3_limit;
+	u32 d4_limit;
+	u32 d5_limit;
+	u8 low_sampling_mult;
+	u8 pwr_tgt_policy_idx;
+	u8 pwr_tgt_floor_policy_idx;
+	u8 sm_bus_policy_idx;
+	u8 table_viol_entry_size;
+	u8 num_table_viol_entries;
+};
+
+#define VBIOS_POWER_POLICY_3X_ENTRY_SIZE_2E                           0x0000002E
+
+struct pwr_policy_3x_entry_struct {
+	u8 flags0;
+	u8 ch_idx;
+	u32 limit_min;
+	u32 limit_rated;
+	u32 limit_max;
+	u32 param0;
+	u32 param1;
+	u32 param2;
+	u32 param3;
+	u32 limit_batt;
+	u8 flags1;
+	u8 past_length;
+	u8 next_length;
+	u16 ratio_min;
+	u16 ratio_max;
+	u8 sample_mult;
+	u32 filter_param;
+};
+
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS_MASK                    0xF
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS_SHIFT                    0
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS_HW_THRESHOLD        0x00000005
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_LIMIT_UNIT_MASK              0x10
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_LIMIT_UNIT_SHIFT                4
+
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FULL_DEFLECTION_LIMIT_MASK    0x1
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FULL_DEFLECTION_LIMIT_SHIFT     0
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_INTEGRAL_CONTROL_MASK         0x2
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_INTEGRAL_CONTROL_SHIFT          1
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FILTER_TYPE_MASK             0x3C
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FILTER_TYPE_SHIFT               2
+
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_THRES_IDX_MASK  0xFF
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_THRES_IDX_SHIFT    0
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_IDX_MASK 0xFF00
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_IDX_SHIFT 8
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_USE_MASK 0x10000
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_USE_SHIFT 16
+
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL_MASK 0xFFFF
+#define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL_SHIFT 0
 
+#endif
diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.c b/drivers/gpu/nvgpu/pmgr/pmgr.c
new file mode 100644
index 00000000..f625e37d
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pmgr.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "pwrdev.h"
+#include "pmgrpmu.h"
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+static int pmgr_pwr_devices_get_current_power(void *data, u64 *val)
+{
+	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
+	int status;
+	struct gk20a *g = (struct gk20a *)data;
+
+	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pwr_devices_get_current_power failed %x",
+			status);
+
+	*val = payload.devices[0].powerm_w;
+
+	return status;
+}
+
+static int pmgr_pwr_devices_get_current(void *data, u64 *val)
+{
+	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
+	int status;
+	struct gk20a *g = (struct gk20a *)data;
+
+	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pwr_devices_get_current failed %x",
+			status);
+
+	*val = payload.devices[0].currentm_a;
+
+	return status;
+}
+
+static int pmgr_pwr_devices_get_current_voltage(void *data, u64 *val)
+{
+	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
+	int status;
+	struct gk20a *g = (struct gk20a *)data;
+
+	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pwr_devices_get_current_voltage failed %x",
+			status);
+
+	*val = payload.devices[0].voltageu_v;
+
+	return status;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(
+		pmgr_power_ctrl_fops, pmgr_pwr_devices_get_current_power, NULL, "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(
+		pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current, NULL, "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(
+		pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_current_voltage, NULL, "%llu\n");
+
+static void pmgr_debugfs_init(struct gk20a *g) {
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *dbgentry;
+
+	dbgentry = debugfs_create_file(
+				"power", S_IRUGO, platform->debugfs, g, &pmgr_power_ctrl_fops);
+	if (!dbgentry)
+		gk20a_err(dev_from_gk20a(g),
+				"debugfs entry create failed for power");
+
+	dbgentry = debugfs_create_file(
+				"current", S_IRUGO, platform->debugfs, g, &pmgr_current_ctrl_fops);
+	if (!dbgentry)
+		gk20a_err(dev_from_gk20a(g),
+				"debugfs entry create failed for current");
+
+	dbgentry = debugfs_create_file(
+				"voltage", S_IRUGO, platform->debugfs, g, &pmgr_voltage_ctrl_fops);
+	if (!dbgentry)
+		gk20a_err(dev_from_gk20a(g),
+				"debugfs entry create failed for voltage");
+}
+#endif
+
+u32 pmgr_domain_sw_setup(struct gk20a *g)
+{
+	u32 status;
+
+	status = pmgr_device_sw_setup(g);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr devices, status - 0x%x",
+			status);
+		goto exit;
+	}
+
+	status = pmgr_monitor_sw_setup(g);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr monitor, status - 0x%x",
+			status);
+		goto exit;
+	}
+
+	status = pmgr_policy_sw_setup(g);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr policy, status - 0x%x",
+			status);
+		goto exit;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	pmgr_debugfs_init(g);
+#endif
+
+exit:
+	return status;
+}
+
+u32 pmgr_domain_pmu_setup(struct gk20a *g)
+{
+	return pmgr_send_pmgr_tables_to_pmu(g);
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.h b/drivers/gpu/nvgpu/pmgr/pmgr.h
new file mode 100644
index 00000000..97e7b609
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pmgr.h
@@ -0,0 +1,31 @@
+/*
+ * general power device structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PMGR_H_
+#define _PMGR_H_
+
+#include "pwrdev.h"
+#include "pwrmonitor.h"
+#include "pwrpolicy.h"
+
+struct pmgr_pmupstate {
+	struct pwr_devices pmgr_deviceobjs;
+	struct pmgr_pwr_monitor pmgr_monitorobjs;
+	struct pmgr_pwr_policy pmgr_policyobjs;
+};
+
+u32 pmgr_domain_sw_setup(struct gk20a *g);
+u32 pmgr_domain_pmu_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/pmgr/pmgrpmu.c b/drivers/gpu/nvgpu/pmgr/pmgrpmu.c
new file mode 100644
index 00000000..ea070060
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pmgrpmu.c
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "pwrdev.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+#include "pmgrpmu.h"
+
+struct pmgr_pmucmdhandler_params {
+	u32 success;
+};
+
+static void pmgr_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
+			void *param, u32 handle, u32 status)
+{
+	struct pmgr_pmucmdhandler_params *phandlerparams =
+		(struct pmgr_pmucmdhandler_params *)param;
+
+	if ((msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_SET_OBJECT) &&
+		(msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_QUERY) &&
+		(msg->msg.pmgr.msg_type != NV_PMU_PMGR_MSG_ID_LOAD)) {
+		gk20a_err(dev_from_gk20a(g),
+			"unknow msg %x",
+			msg->msg.pmgr.msg_type);
+		return;
+	}
+
+	if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_SET_OBJECT) {
+		if ((msg->msg.pmgr.set_object.b_success != 1) ||
+			(msg->msg.pmgr.set_object.flcnstatus != 0) ) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr msg failed %x %x %x %x",
+				msg->msg.pmgr.set_object.msg_type,
+				msg->msg.pmgr.set_object.b_success,
+				msg->msg.pmgr.set_object.flcnstatus,
+				msg->msg.pmgr.set_object.object_type);
+			return;
+		}
+	} else if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_QUERY) {
+		if ((msg->msg.pmgr.query.b_success != 1) ||
+			(msg->msg.pmgr.query.flcnstatus != 0) ) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr msg failed %x %x %x %x",
+				msg->msg.pmgr.query.msg_type,
+				msg->msg.pmgr.query.b_success,
+				msg->msg.pmgr.query.flcnstatus,
+				msg->msg.pmgr.query.cmd_type);
+			return;
+		}
+	} else if (msg->msg.pmgr.msg_type == NV_PMU_PMGR_MSG_ID_LOAD) {
+		if ((msg->msg.pmgr.query.b_success != 1) ||
+			(msg->msg.pmgr.query.flcnstatus != 0) ) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr msg failed %x %x %x",
+				msg->msg.pmgr.load.msg_type,
+				msg->msg.pmgr.load.b_success,
+				msg->msg.pmgr.load.flcnstatus);
+			return;
+		}
+	}
+
+	phandlerparams->success = 1;
+}
+
+static u32 pmgr_pmu_set_object(struct gk20a *g,
+		u8 type,
+		u16 dmem_size,
+		u16 fb_size,
+		void *pobj)
+{
+	struct pmu_cmd cmd = { {0} };
+	struct pmu_payload payload = { {0} };
+	struct nv_pmu_pmgr_cmd_set_object *pcmd;
+	u32 status;
+	u32 seqdesc;
+	struct pmgr_pmucmdhandler_params handlerparams = {0};
+
+	cmd.hdr.unit_id = PMU_UNIT_PMGR;
+	cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_set_object) +
+			(u32)sizeof(struct pmu_hdr);;
+
+	pcmd = &cmd.cmd.pmgr.set_object;
+	pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_SET_OBJECT;
+	pcmd->object_type = type;
+
+	payload.in.buf = pobj;
+	payload.in.size = dmem_size;
+	payload.in.fb_size = fb_size;
+	payload.in.offset = NV_PMU_PMGR_SET_OBJECT_ALLOC_OFFSET;
+
+	/* Setup the handler params to communicate back results.*/
+	handlerparams.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+				PMU_COMMAND_QUEUE_LPQ,
+				pmgr_pmucmdhandler,
+				(void *)&handlerparams,
+				&seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post pmgr cmd for unit %x cmd id %x obj type %x",
+			cmd.hdr.unit_id, pcmd->cmd_type, pcmd->object_type);
+		goto exit;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handlerparams.success, 1);
+
+	if (handlerparams.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "could not process cmd\n");
+		status = -ETIMEDOUT;
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+static u32 pmgr_send_i2c_device_topology_to_pmu(struct gk20a *g)
+{
+	struct nv_pmu_pmgr_i2c_device_desc_table i2c_desc_table;
+	u32 status = 0;
+
+	/* INA3221 I2C device info */
+	i2c_desc_table.dev_mask = 0x01;
+
+	/* INA3221 */
+	i2c_desc_table.devices[0].super.type = 0x4E;
+
+	i2c_desc_table.devices[0].dcb_index = 0;
+	i2c_desc_table.devices[0].i2c_address = 0x84;
+	i2c_desc_table.devices[0].i2c_flags = 0xC2F;
+	i2c_desc_table.devices[0].i2c_port = 0x2;
+
+	/* Pass the table down the PMU as an object */
+	status = pmgr_pmu_set_object(
+				g,
+				NV_PMU_PMGR_OBJECT_I2C_DEVICE_DESC_TABLE,
+				(u16)sizeof(struct nv_pmu_pmgr_i2c_device_desc_table),
+				PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED,
+				&i2c_desc_table);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pmu_set_object failed %x",
+			status);
+
+	return status;
+}
+
+static u32 pmgr_send_pwr_device_topology_to_pmu(struct gk20a *g)
+{
+	struct nv_pmu_pmgr_pwr_device_desc_table pwr_desc_table;
+	struct nv_pmu_pmgr_pwr_device_desc_table_header *ppwr_desc_header;
+	u32 status = 0;
+
+	/* Set the BA-device-independent HW information */
+	ppwr_desc_header = &(pwr_desc_table.hdr.data);
+	ppwr_desc_header->ba_info.b_initialized_and_used = false;
+
+	/* populate the table */
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)&ppwr_desc_header->super,
+			g->pmgr_pmu.pmgr_deviceobjs.super.super.objmask);
+
+	status = boardobjgrp_pmudatainit_legacy(g,
+			&g->pmgr_pmu.pmgr_deviceobjs.super.super,
+			(struct nv_pmu_boardobjgrp_super *)&pwr_desc_table);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"boardobjgrp_pmudatainit_legacy failed %x",
+			status);
+		goto exit;
+	}
+
+	/* Pass the table down the PMU as an object */
+	status = pmgr_pmu_set_object(
+				g,
+				NV_PMU_PMGR_OBJECT_PWR_DEVICE_DESC_TABLE,
+				(u16)sizeof(
+				union nv_pmu_pmgr_pwr_device_dmem_size),
+				(u16)sizeof(struct nv_pmu_pmgr_pwr_device_desc_table),
+				&pwr_desc_table);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pmu_set_object failed %x",
+			status);
+
+exit:
+	return status;
+}
+
+static u32 pmgr_send_pwr_mointer_to_pmu(struct gk20a *g)
+{
+	struct nv_pmu_pmgr_pwr_monitor_pack pwr_monitor_pack;
+	struct nv_pmu_pmgr_pwr_channel_header *pwr_channel_hdr;
+	struct nv_pmu_pmgr_pwr_chrelationship_header *pwr_chrelationship_header;
+	u32 max_dmem_size;
+	u32 status = 0;
+
+	/* Copy all the global settings from the RM copy */
+	pwr_channel_hdr = &(pwr_monitor_pack.channels.hdr.data);
+	pwr_monitor_pack = g->pmgr_pmu.pmgr_monitorobjs.pmu_data;
+
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)&pwr_channel_hdr->super,
+			g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super.objmask);
+
+	/* Copy in each channel */
+	status = boardobjgrp_pmudatainit_legacy(g,
+			&g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super,
+			(struct nv_pmu_boardobjgrp_super *)&(pwr_monitor_pack.channels));
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"boardobjgrp_pmudatainit_legacy failed %x",
+			status);
+		goto exit;
+	}
+
+	/* Copy in each channel relationship */
+	pwr_chrelationship_header =  &(pwr_monitor_pack.ch_rels.hdr.data);
+
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)&pwr_chrelationship_header->super,
+			g->pmgr_pmu.pmgr_monitorobjs.pwr_ch_rels.super.objmask);
+
+	pwr_channel_hdr->physical_channel_mask = g->pmgr_pmu.pmgr_monitorobjs.physical_channel_mask;
+	pwr_channel_hdr->type = NV_PMU_PMGR_PWR_MONITOR_TYPE_NO_POLLING;
+
+	status = boardobjgrp_pmudatainit_legacy(g,
+		&g->pmgr_pmu.pmgr_monitorobjs.pwr_ch_rels.super,
+		(struct nv_pmu_boardobjgrp_super *)&(pwr_monitor_pack.ch_rels));
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"boardobjgrp_pmudatainit_legacy failed %x",
+			status);
+		goto exit;
+	}
+
+	/* Calculate the max Dmem buffer size */
+	max_dmem_size = sizeof(union nv_pmu_pmgr_pwr_monitor_dmem_size);
+
+	/* Pass the table down the PMU as an object */
+	status = pmgr_pmu_set_object(
+				g,
+				NV_PMU_PMGR_OBJECT_PWR_MONITOR,
+				(u16)max_dmem_size,
+				(u16)sizeof(struct nv_pmu_pmgr_pwr_monitor_pack),
+				&pwr_monitor_pack);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pmu_set_object failed %x",
+			status);
+
+exit:
+	return status;
+}
+
+u32 pmgr_send_pwr_policy_to_pmu(struct gk20a *g)
+{
+	struct nv_pmu_pmgr_pwr_policy_pack *ppwrpack = NULL;
+	struct pwr_policy *ppolicy = NULL;
+	u32 status = 0;
+	u8 indx;
+	u32 max_dmem_size;
+
+	ppwrpack = kzalloc(sizeof(struct nv_pmu_pmgr_pwr_policy_pack), GFP_KERNEL);
+	if (!ppwrpack) {
+		gk20a_err(dev_from_gk20a(g),
+			"pwr policy alloc failed %x",
+			status);
+		status = -ENOMEM;
+		goto exit;
+	}
+
+	ppwrpack->policies.hdr.data.version = g->pmgr_pmu.pmgr_policyobjs.version;
+	ppwrpack->policies.hdr.data.b_enabled = g->pmgr_pmu.pmgr_policyobjs.b_enabled;
+
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)
+			&ppwrpack->policies.hdr.data.super,
+			g->pmgr_pmu.pmgr_policyobjs.pwr_policies.super.objmask);
+
+	memset(&ppwrpack->policies.hdr.data.reserved_pmu_policy_mask,
+			0,
+			sizeof(ppwrpack->policies.hdr.data.reserved_pmu_policy_mask));
+
+	ppwrpack->policies.hdr.data.base_sample_period =
+			g->pmgr_pmu.pmgr_policyobjs.base_sample_period;
+	ppwrpack->policies.hdr.data.min_client_sample_period =
+			g->pmgr_pmu.pmgr_policyobjs.min_client_sample_period;
+	ppwrpack->policies.hdr.data.low_sampling_mult =
+			g->pmgr_pmu.pmgr_policyobjs.low_sampling_mult;
+
+	memcpy(&ppwrpack->policies.hdr.data.global_ceiling,
+			&g->pmgr_pmu.pmgr_policyobjs.global_ceiling,
+			sizeof(struct nv_pmu_perf_domain_group_limits));
+
+	memcpy(&ppwrpack->policies.hdr.data.semantic_policy_tbl,
+			&g->pmgr_pmu.pmgr_policyobjs.policy_idxs,
+			sizeof(g->pmgr_pmu.pmgr_policyobjs.policy_idxs));
+
+	BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK(32, indx,
+			ppwrpack->policies.hdr.data.super.obj_mask.super.data[0]) {
+		ppolicy = PMGR_GET_PWR_POLICY(g, indx);
+
+		status = ((struct boardobj *)ppolicy)->pmudatainit(g, (struct boardobj *)ppolicy,
+				(struct nv_pmu_boardobj *)&(ppwrpack->policies.policies[indx].data));
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmudatainit failed %x indx %x",
+				status, indx);
+			status = -ENOMEM;
+			goto exit;
+		}
+	}
+	BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK_END;
+
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)
+			&ppwrpack->policy_rels.hdr.data.super,
+			g->pmgr_pmu.pmgr_policyobjs.pwr_policy_rels.super.objmask);
+
+	boardobjgrpe32hdrset((struct nv_pmu_boardobjgrp *)
+			&ppwrpack->violations.hdr.data.super,
+			g->pmgr_pmu.pmgr_policyobjs.pwr_violations.super.objmask);
+
+	max_dmem_size = sizeof(union nv_pmu_pmgr_pwr_policy_dmem_size);
+
+	/* Pass the table down the PMU as an object */
+	status = pmgr_pmu_set_object(
+				g,
+				NV_PMU_PMGR_OBJECT_PWR_POLICY,
+				(u16)max_dmem_size,
+				(u16)sizeof(struct nv_pmu_pmgr_pwr_policy_pack),
+				ppwrpack);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_pmu_set_object failed %x",
+			status);
+
+exit:
+	if (ppwrpack) {
+		kfree(ppwrpack);
+	}
+
+	return status;
+}
+
+u32 pmgr_pmu_pwr_devices_query_blocking(
+		struct gk20a *g,
+		u32 pwr_dev_mask,
+		struct nv_pmu_pmgr_pwr_devices_query_payload *ppayload)
+{
+	struct pmu_cmd cmd = { {0} };
+	struct pmu_payload payload = { {0} };
+	struct nv_pmu_pmgr_cmd_pwr_devices_query *pcmd;
+	u32 status;
+	u32 seqdesc;
+	struct pmgr_pmucmdhandler_params handlerparams = {0};
+
+	cmd.hdr.unit_id = PMU_UNIT_PMGR;
+	cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_pwr_devices_query) +
+			(u32)sizeof(struct pmu_hdr);
+
+	pcmd = &cmd.cmd.pmgr.pwr_dev_query;
+	pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_PWR_DEVICES_QUERY;
+	pcmd->dev_mask = pwr_dev_mask;
+
+	payload.out.buf = ppayload;
+	payload.out.size = sizeof(struct nv_pmu_pmgr_pwr_devices_query_payload);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_PMGR_PWR_DEVICES_QUERY_ALLOC_OFFSET;
+
+	/* Setup the handler params to communicate back results.*/
+	handlerparams.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+				PMU_COMMAND_QUEUE_LPQ,
+				pmgr_pmucmdhandler,
+				(void *)&handlerparams,
+				&seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post pmgr query cmd for unit %x cmd id %x dev mask %x",
+			cmd.hdr.unit_id, pcmd->cmd_type, pcmd->dev_mask);
+		goto exit;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handlerparams.success, 1);
+
+	if (handlerparams.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "could not process cmd\n");
+		status = -ETIMEDOUT;
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+static u32 pmgr_pmu_load_blocking(struct gk20a *g)
+{
+	struct pmu_cmd cmd = { {0} };
+	struct nv_pmu_pmgr_cmd_load *pcmd;
+	u32 status;
+	u32 seqdesc;
+	struct pmgr_pmucmdhandler_params handlerparams = {0};
+
+	cmd.hdr.unit_id = PMU_UNIT_PMGR;
+	cmd.hdr.size = (u32)sizeof(struct nv_pmu_pmgr_cmd_load) +
+			(u32)sizeof(struct pmu_hdr);
+
+	pcmd = &cmd.cmd.pmgr.load;
+	pcmd->cmd_type = NV_PMU_PMGR_CMD_ID_LOAD;
+
+	/* Setup the handler params to communicate back results.*/
+	handlerparams.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL,
+				PMU_COMMAND_QUEUE_LPQ,
+				pmgr_pmucmdhandler,
+				(void *)&handlerparams,
+				&seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post pmgr load cmd for unit %x cmd id %x",
+			cmd.hdr.unit_id, pcmd->cmd_type);
+		goto exit;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handlerparams.success, 1);
+
+	if (handlerparams.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "could not process cmd\n");
+		status = -ETIMEDOUT;
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+u32 pmgr_send_pmgr_tables_to_pmu(struct gk20a *g)
+{
+	u32 status = 0;
+
+	status = pmgr_send_i2c_device_topology_to_pmu(g);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"pmgr_send_i2c_device_topology_to_pmu failed %x",
+			status);
+		goto exit;
+	}
+
+	if (!BOARDOBJGRP_IS_EMPTY(&g->pmgr_pmu.pmgr_deviceobjs.super.super)) {
+		status = pmgr_send_pwr_device_topology_to_pmu(g);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr_send_pwr_device_topology_to_pmu failed %x",
+				status);
+			goto exit;
+		}
+	}
+
+	if (!(BOARDOBJGRP_IS_EMPTY(
+			&g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super)) ||
+		!(BOARDOBJGRP_IS_EMPTY(
+			&g->pmgr_pmu.pmgr_monitorobjs.pwr_ch_rels.super))) {
+		status = pmgr_send_pwr_mointer_to_pmu(g);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr_send_pwr_mointer_to_pmu failed %x", status);
+			goto exit;
+		}
+	}
+
+	if (!(BOARDOBJGRP_IS_EMPTY(
+			&g->pmgr_pmu.pmgr_policyobjs.pwr_policies.super)) ||
+		!(BOARDOBJGRP_IS_EMPTY(
+			&g->pmgr_pmu.pmgr_policyobjs.pwr_policy_rels.super)) ||
+		!(BOARDOBJGRP_IS_EMPTY(
+			&g->pmgr_pmu.pmgr_policyobjs.pwr_violations.super))) {
+		status = pmgr_send_pwr_policy_to_pmu(g);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr_send_pwr_policy_to_pmu failed %x", status);
+			goto exit;
+		}
+	}
+
+		status = pmgr_pmu_load_blocking(g);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"pmgr_send_pwr_mointer_to_pmu failed %x", status);
+			goto exit;
+		}
+
+exit:
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pmgrpmu.h b/drivers/gpu/nvgpu/pmgr/pmgrpmu.h
new file mode 100644
index 00000000..6b48396c
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pmgrpmu.h
@@ -0,0 +1,29 @@
+/*
+ * general power device control structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PMGRPMU_H_
+#define _PMGRPMU_H_
+
+#include "gk20a/gk20a.h"
+#include "pwrdev.h"
+#include "pwrmonitor.h"
+
+u32 pmgr_send_pmgr_tables_to_pmu(struct gk20a *g);
+
+u32 pmgr_pmu_pwr_devices_query_blocking(
+		struct gk20a *g,
+		u32 pwr_dev_mask,
+		struct nv_pmu_pmgr_pwr_devices_query_payload *ppayload);
+
+#endif
diff --git a/drivers/gpu/nvgpu/pmgr/pwrdev.c b/drivers/gpu/nvgpu/pmgr/pwrdev.c
new file mode 100644
index 00000000..03e2eb34
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrdev.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "pwrdev.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 _pwr_device_pmudata_instget(struct gk20a *g,
+			struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+			struct nv_pmu_boardobj **ppboardobjpmudata,
+			u8 idx)
+{
+	struct nv_pmu_pmgr_pwr_device_desc_table *ppmgrdevice =
+		(struct nv_pmu_pmgr_pwr_device_desc_table *)pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		ppmgrdevice->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&ppmgrdevice->devices[idx].data.board_obj;
+
+	gk20a_dbg_info(" Done");
+
+	return 0;
+}
+
+static u32 _pwr_domains_pmudatainit_ina3221(struct gk20a *g,
+			struct boardobj *board_obj_ptr,
+			struct nv_pmu_boardobj *ppmudata)
+{
+	struct nv_pmu_pmgr_pwr_device_desc_ina3221 *ina3221_desc;
+	struct pwr_device_ina3221 *ina3221;
+	u32 status = 0;
+	u32 indx;
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for pwr domain 0x%x",
+			  status);
+		goto done;
+	}
+
+	ina3221 = (struct pwr_device_ina3221 *)board_obj_ptr;
+	ina3221_desc = (struct nv_pmu_pmgr_pwr_device_desc_ina3221 *) ppmudata;
+
+	ina3221_desc->super.power_corr_factor = ina3221->super.power_corr_factor;
+	ina3221_desc->i2c_dev_idx = ina3221->super.i2c_dev_idx;
+	ina3221_desc->configuration = ina3221->configuration;
+	ina3221_desc->mask_enable = ina3221->mask_enable;
+	/* configure NV_PMU_THERM_EVENT_EXT_OVERT */
+	ina3221_desc->event_mask = (1 << 0);
+	ina3221_desc->curr_correct_m  = ina3221->curr_correct_m;
+	ina3221_desc->curr_correct_b  = ina3221->curr_correct_b;
+
+	for (indx = 0; indx < NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM; indx++) {
+		ina3221_desc->r_shuntm_ohm[indx] = ina3221->r_shuntm_ohm[indx];
+	}
+
+done:
+	return status;
+}
+
+static struct boardobj *construct_pwr_device(struct gk20a *g,
+			void *pargs, u16 pargs_size, u8 type)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+	u32 indx;
+	struct pwr_device_ina3221 *pwrdev;
+	struct pwr_device_ina3221 *ina3221 = (struct pwr_device_ina3221*)pargs;
+
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		pargs_size, pargs);
+	if (status)
+		return NULL;
+
+	pwrdev = (struct pwr_device_ina3221*)board_obj_ptr;
+
+	/* Set Super class interfaces */
+	board_obj_ptr->pmudatainit = _pwr_domains_pmudatainit_ina3221;
+	pwrdev->super.power_rail          = ina3221->super.power_rail;
+	pwrdev->super.i2c_dev_idx       = ina3221->super.i2c_dev_idx;
+	pwrdev->super.power_corr_factor = (1 << 12);
+	pwrdev->super.bIs_inforom_config = false;
+
+	/* Set INA3221-specific information */
+	pwrdev->configuration   = ina3221->configuration;
+	pwrdev->mask_enable      = ina3221->mask_enable;
+	pwrdev->gpio_function    = ina3221->gpio_function;
+	pwrdev->curr_correct_m    = ina3221->curr_correct_m;
+	pwrdev->curr_correct_b    = ina3221->curr_correct_b;
+
+	for (indx = 0; indx < NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM; indx++) {
+		pwrdev->r_shuntm_ohm[indx] = ina3221->r_shuntm_ohm[indx];
+	}
+
+	gk20a_dbg_info(" Done");
+
+	return board_obj_ptr;
+}
+
+static u32 devinit_get_pwr_device_table(struct gk20a *g,
+			struct pwr_devices *ppwrdeviceobjs)
+{
+	u32 status = 0;
+	u8 *pwr_device_table_ptr = NULL;
+	u8 *curr_pwr_device_table_ptr = NULL;
+	struct boardobj *boardobj;
+	struct pwr_sensors_2x_header pwr_sensor_table_header = { 0 };
+	struct pwr_sensors_2x_entry pwr_sensor_table_entry = { 0 };
+	u32 index;
+	u32 obj_index = 0;
+	u16 pwr_device_size;
+	union {
+		struct boardobj boardobj;
+		struct pwr_device pwrdev;
+		struct pwr_device_ina3221 ina3221;
+	} pwr_device_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs != NULL) {
+		pwr_device_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, POWER_SENSORS_TABLE);
+		if (pwr_device_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&pwr_sensor_table_header, pwr_device_table_ptr,
+		VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08);
+
+	if (pwr_sensor_table_header.version !=
+			VBIOS_POWER_SENSORS_VERSION_2X) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (pwr_sensor_table_header.header_size <
+			VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (pwr_sensor_table_header.table_entry_size !=
+			VBIOS_POWER_SENSORS_2X_ENTRY_SIZE_15) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	curr_pwr_device_table_ptr = (pwr_device_table_ptr +
+		VBIOS_POWER_SENSORS_2X_HEADER_SIZE_08);
+
+	for (index = 0; index < pwr_sensor_table_header.num_table_entries; index++) {
+		bool use_fxp8_8 = false;
+		u8 i2c_dev_idx;
+		u8 device_type;
+
+		curr_pwr_device_table_ptr += (pwr_sensor_table_header.table_entry_size * index);
+
+		pwr_sensor_table_entry.flags0 = *curr_pwr_device_table_ptr;
+
+		memcpy(&pwr_sensor_table_entry.class_param0,
+			(curr_pwr_device_table_ptr + 1),
+			(VBIOS_POWER_SENSORS_2X_ENTRY_SIZE_15 - 1));
+
+		device_type = (u8)BIOS_GET_FIELD(
+			pwr_sensor_table_entry.flags0,
+			NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS);
+
+		if (device_type == NV_VBIOS_POWER_SENSORS_2X_ENTRY_FLAGS0_CLASS_I2C) {
+			i2c_dev_idx = (u8)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.class_param0,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_INDEX);
+			use_fxp8_8 = (u8)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.class_param0,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_CLASS_PARAM0_I2C_USE_FXP8_8);
+
+			pwr_device_data.ina3221.super.i2c_dev_idx = i2c_dev_idx;
+			pwr_device_data.ina3221.r_shuntm_ohm[0].use_fxp8_8 = use_fxp8_8;
+			pwr_device_data.ina3221.r_shuntm_ohm[1].use_fxp8_8 = use_fxp8_8;
+			pwr_device_data.ina3221.r_shuntm_ohm[2].use_fxp8_8 = use_fxp8_8;
+			pwr_device_data.ina3221.r_shuntm_ohm[0].rshunt_value =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param0,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT0_MOHM);
+
+			pwr_device_data.ina3221.r_shuntm_ohm[1].rshunt_value =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param0,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM0_INA3221_RSHUNT1_MOHM);
+
+			pwr_device_data.ina3221.r_shuntm_ohm[2].rshunt_value =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param1,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_RSHUNT2_MOHM);
+			pwr_device_data.ina3221.configuration =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param1,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM1_INA3221_CONFIGURATION);
+
+			pwr_device_data.ina3221.mask_enable =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param2,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_MASKENABLE);
+
+			pwr_device_data.ina3221.gpio_function =
+				(u8)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param2,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM2_INA3221_GPIOFUNCTION);
+
+			pwr_device_data.ina3221.curr_correct_m =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param3,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_M);
+
+			pwr_device_data.ina3221.curr_correct_b =
+				(u16)BIOS_GET_FIELD(
+				pwr_sensor_table_entry.sensor_param3,
+				NV_VBIOS_POWER_SENSORS_2X_ENTRY_SENSOR_PARAM3_INA3221_CURR_CORRECT_B);
+
+			if (!pwr_device_data.ina3221.curr_correct_m) {
+				pwr_device_data.ina3221.curr_correct_m = (1 << 12);
+			}
+			pwr_device_size = sizeof(struct pwr_device_ina3221);
+		} else
+			continue;
+
+		pwr_device_data.boardobj.type = CTRL_PMGR_PWR_DEVICE_TYPE_INA3221;
+		pwr_device_data.pwrdev.power_rail = (u8)0;
+
+		boardobj = construct_pwr_device(g, &pwr_device_data,
+					pwr_device_size, pwr_device_data.boardobj.type);
+
+		if (!boardobj) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to create pwr device for %d type %d", index, pwr_device_data.boardobj.type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&ppwrdeviceobjs->super.super,
+				boardobj, obj_index);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to insert pwr device boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		++obj_index;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 pmgr_device_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct pwr_devices *ppwrdeviceobjs;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(&g->pmgr_pmu.pmgr_deviceobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr devices, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->pmgr_pmu.pmgr_deviceobjs.super.super;
+	ppwrdeviceobjs = &(g->pmgr_pmu.pmgr_deviceobjs);
+
+	/* Override the Interfaces */
+	pboardobjgrp->pmudatainstget = _pwr_device_pmudata_instget;
+
+	status = devinit_get_pwr_device_table(g, ppwrdeviceobjs);
+	if (status)
+		goto done;
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pwrdev.h b/drivers/gpu/nvgpu/pmgr/pwrdev.h
new file mode 100644
index 00000000..b8592a18
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrdev.h
@@ -0,0 +1,51 @@
+/*
+ * general power device structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PWRDEV_H_
+#define _PWRDEV_H_
+
+#include "boardobj/boardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "ctrl/ctrlpmgr.h"
+
+#define  PWRDEV_I2CDEV_DEVICE_INDEX_NONE  (0xFF)
+
+#define  PWR_DEVICE_PROV_NUM_DEFAULT                                           1
+
+struct pwr_device {
+	struct boardobj super;
+	u8 power_rail;
+	u8 i2c_dev_idx;
+	bool bIs_inforom_config;
+	u32 power_corr_factor;
+};
+
+struct pwr_devices {
+	struct boardobjgrp_e32 super;
+};
+
+struct pwr_device_ina3221 {
+	struct pwr_device super;
+	struct ctrl_pmgr_pwr_device_info_rshunt
+		r_shuntm_ohm[NV_PMU_PMGR_PWR_DEVICE_INA3221_CH_NUM];
+	u16 configuration;
+	u16 mask_enable;
+	u8 gpio_function;
+	u16 curr_correct_m;
+	s16 curr_correct_b;
+} ;
+
+u32 pmgr_device_sw_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/pmgr/pwrmonitor.c b/drivers/gpu/nvgpu/pmgr/pwrmonitor.c
new file mode 100644
index 00000000..c28751fd
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrmonitor.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "pwrdev.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 _pwr_channel_pmudata_instget(struct gk20a *g,
+			struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+			struct nv_pmu_boardobj **ppboardobjpmudata,
+			u8 idx)
+{
+	struct nv_pmu_pmgr_pwr_channel_desc *ppmgrchannel =
+		(struct nv_pmu_pmgr_pwr_channel_desc *)pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		ppmgrchannel->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&ppmgrchannel->channels[idx].data.board_obj;
+
+	/* handle Global/common data here as we need index */
+	ppmgrchannel->channels[idx].data.pwr_channel.ch_idx = idx;
+
+	gk20a_dbg_info(" Done");
+
+	return 0;
+}
+
+static u32 _pwr_channel_rels_pmudata_instget(struct gk20a *g,
+			struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+			struct nv_pmu_boardobj **ppboardobjpmudata,
+			u8 idx)
+{
+	struct nv_pmu_pmgr_pwr_chrelationship_desc *ppmgrchrels =
+		(struct nv_pmu_pmgr_pwr_chrelationship_desc *)pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		ppmgrchrels->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&ppmgrchrels->ch_rels[idx].data.board_obj;
+
+	gk20a_dbg_info(" Done");
+
+	return 0;
+}
+
+static u32 _pwr_channel_state_init(struct gk20a *g)
+{
+	u8 indx = 0;
+	struct pwr_channel *pchannel;
+	u32 objmask =
+		g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super.objmask;
+
+	/* Initialize each PWR_CHANNEL's dependent channel mask */
+	BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK(32, indx, objmask) {
+		pchannel = PMGR_PWR_MONITOR_GET_PWR_CHANNEL(g, indx);
+		if (pchannel == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				"PMGR_PWR_MONITOR_GET_PWR_CHANNEL-failed %d", indx);
+			return -EINVAL;
+		}
+		pchannel->dependent_ch_mask =0;
+	}
+	BOARDOBJGRP_FOR_EACH_INDEX_IN_MASK_END
+
+	return 0;
+}
+
+static bool _pwr_channel_implements(struct pwr_channel *pchannel,
+			u8 type)
+{
+	return (type == BOARDOBJ_GET_TYPE(pchannel));
+}
+
+static u32 _pwr_domains_pmudatainit_sensor(struct gk20a *g,
+					struct boardobj *board_obj_ptr,
+					struct nv_pmu_boardobj *ppmudata)
+{
+	struct nv_pmu_pmgr_pwr_channel_sensor *pmu_sensor_data;
+	struct pwr_channel_sensor *sensor;
+	u32 status = 0;
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error updating pmu boardobjgrp for pwr sensor 0x%x",
+			  status);
+		goto done;
+	}
+
+	sensor = (struct pwr_channel_sensor *)board_obj_ptr;
+	pmu_sensor_data = (struct nv_pmu_pmgr_pwr_channel_sensor *) ppmudata;
+
+	pmu_sensor_data->super.pwr_rail = sensor->super.pwr_rail;
+	pmu_sensor_data->super.volt_fixedu_v = sensor->super.volt_fixed_uv;
+	pmu_sensor_data->super.pwr_corr_slope = sensor->super.pwr_corr_slope;
+	pmu_sensor_data->super.pwr_corr_offsetm_w = sensor->super.pwr_corr_offset_mw;
+	pmu_sensor_data->super.curr_corr_slope = sensor->super.curr_corr_slope;
+	pmu_sensor_data->super.curr_corr_offsetm_a = sensor->super.curr_corr_offset_ma;
+	pmu_sensor_data->super.dependent_ch_mask = sensor->super.dependent_ch_mask;
+	pmu_sensor_data->super.ch_idx = 0;
+
+	pmu_sensor_data->pwr_dev_idx = sensor->pwr_dev_idx;
+	pmu_sensor_data->pwr_dev_prov_idx = sensor->pwr_dev_prov_idx;
+
+done:
+	return status;
+}
+
+static struct boardobj *construct_pwr_topology(struct gk20a *g,
+				void *pargs, u16 pargs_size, u8 type)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+	struct pwr_channel_sensor *pwrchannel;
+	struct pwr_channel_sensor *sensor = (struct pwr_channel_sensor*)pargs;
+
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		pargs_size, pargs);
+	if (status)
+		return NULL;
+
+	pwrchannel = (struct pwr_channel_sensor*)board_obj_ptr;
+
+	/* Set Super class interfaces */
+	board_obj_ptr->pmudatainit = _pwr_domains_pmudatainit_sensor;
+
+	pwrchannel->super.pwr_rail = sensor->super.pwr_rail;
+	pwrchannel->super.volt_fixed_uv = sensor->super.volt_fixed_uv;
+	pwrchannel->super.pwr_corr_slope = sensor->super.pwr_corr_slope;
+	pwrchannel->super.pwr_corr_offset_mw = sensor->super.pwr_corr_offset_mw;
+	pwrchannel->super.curr_corr_slope = sensor->super.curr_corr_slope;
+	pwrchannel->super.curr_corr_offset_ma = sensor->super.curr_corr_offset_ma;
+	pwrchannel->super.dependent_ch_mask = 0;
+
+	pwrchannel->pwr_dev_idx = sensor->pwr_dev_idx;
+	pwrchannel->pwr_dev_prov_idx = sensor->pwr_dev_prov_idx;
+
+	gk20a_dbg_info(" Done");
+
+	return board_obj_ptr;
+}
+
+static u32 devinit_get_pwr_topology_table(struct gk20a *g,
+				struct pmgr_pwr_monitor *ppwrmonitorobjs)
+{
+	u32 status = 0;
+	u8 *pwr_topology_table_ptr = NULL;
+	u8 *curr_pwr_topology_table_ptr = NULL;
+	struct boardobj *boardobj;
+	struct pwr_topology_2x_header pwr_topology_table_header = { 0 };
+	struct pwr_topology_2x_entry pwr_topology_table_entry = { 0 };
+	u32 index;
+	u32 obj_index = 0;
+	u16 pwr_topology_size;
+	union {
+		struct boardobj boardobj;
+		struct pwr_channel pwrchannel;
+		struct pwr_channel_sensor sensor;
+	} pwr_topology_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs != NULL) {
+		pwr_topology_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, POWER_TOPOLOGY_TABLE);
+		if (pwr_topology_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&pwr_topology_table_header, pwr_topology_table_ptr,
+		VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06);
+
+	if (pwr_topology_table_header.version !=
+			VBIOS_POWER_TOPOLOGY_VERSION_2X) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	g->pmgr_pmu.pmgr_monitorobjs.b_is_topology_tbl_ver_1x = false;
+
+	if (pwr_topology_table_header.header_size <
+			VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (pwr_topology_table_header.table_entry_size !=
+			VBIOS_POWER_TOPOLOGY_2X_ENTRY_SIZE_16) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	curr_pwr_topology_table_ptr = (pwr_topology_table_ptr +
+		VBIOS_POWER_TOPOLOGY_2X_HEADER_SIZE_06);
+
+	for (index = 0; index < pwr_topology_table_header.num_table_entries;
+		index++) {
+		u8 class_type;
+
+		curr_pwr_topology_table_ptr += (pwr_topology_table_header.table_entry_size * index);
+
+		pwr_topology_table_entry.flags0 = *curr_pwr_topology_table_ptr;
+		pwr_topology_table_entry.pwr_rail = *(curr_pwr_topology_table_ptr + 1);
+
+		memcpy(&pwr_topology_table_entry.param0,
+			(curr_pwr_topology_table_ptr + 2),
+			(VBIOS_POWER_TOPOLOGY_2X_ENTRY_SIZE_16 - 2));
+
+		class_type = (u8)BIOS_GET_FIELD(
+			pwr_topology_table_entry.flags0,
+			NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS);
+
+		if (class_type == NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_FLAGS0_CLASS_SENSOR) {
+			pwr_topology_data.sensor.pwr_dev_idx = (u8)BIOS_GET_FIELD(
+				pwr_topology_table_entry.param1,
+				NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_INDEX);
+			pwr_topology_data.sensor.pwr_dev_prov_idx = (u8)BIOS_GET_FIELD(
+				pwr_topology_table_entry.param1,
+				NV_VBIOS_POWER_TOPOLOGY_2X_ENTRY_PARAM1_SENSOR_PROVIDER_INDEX);
+
+			pwr_topology_size = sizeof(struct pwr_channel_sensor);
+		} else
+			continue;
+
+		/* Initialize data for the parent class */
+		pwr_topology_data.boardobj.type = CTRL_PMGR_PWR_CHANNEL_TYPE_SENSOR;
+		pwr_topology_data.pwrchannel.pwr_rail = (u8)pwr_topology_table_entry.pwr_rail;
+		pwr_topology_data.pwrchannel.volt_fixed_uv = pwr_topology_table_entry.param0;
+		pwr_topology_data.pwrchannel.pwr_corr_slope = (1 << 12);
+		pwr_topology_data.pwrchannel.pwr_corr_offset_mw = 0;
+		pwr_topology_data.pwrchannel.curr_corr_slope  =
+			(u32)pwr_topology_table_entry.curr_corr_slope;
+		pwr_topology_data.pwrchannel.curr_corr_offset_ma =
+			(s32)pwr_topology_table_entry.curr_corr_offset;
+
+		boardobj = construct_pwr_topology(g, &pwr_topology_data,
+					pwr_topology_size, pwr_topology_data.boardobj.type);
+
+		if (!boardobj) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to create pwr topology for %d type %d",
+				index, pwr_topology_data.boardobj.type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&ppwrmonitorobjs->pwr_channels.super,
+				boardobj, obj_index);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to insert pwr topology boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		++obj_index;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 pmgr_monitor_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct pwr_channel *pchannel;
+	struct pmgr_pwr_monitor *ppwrmonitorobjs;
+	u8 indx = 0;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(
+		&g->pmgr_pmu.pmgr_monitorobjs.pwr_channels);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr channel, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &(g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super);
+
+	/* Override the Interfaces */
+	pboardobjgrp->pmudatainstget = _pwr_channel_pmudata_instget;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(
+			&g->pmgr_pmu.pmgr_monitorobjs.pwr_ch_rels);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr channel relationship, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &(g->pmgr_pmu.pmgr_monitorobjs.pwr_ch_rels.super);
+
+	/* Override the Interfaces */
+	pboardobjgrp->pmudatainstget = _pwr_channel_rels_pmudata_instget;
+
+	/* Initialize the Total GPU Power Channel Mask to 0 */
+	g->pmgr_pmu.pmgr_monitorobjs.pmu_data.channels.hdr.data.total_gpu_power_channel_mask = 0;
+	g->pmgr_pmu.pmgr_monitorobjs.total_gpu_channel_idx =
+			CTRL_PMGR_PWR_CHANNEL_INDEX_INVALID;
+
+	/* Supported topology table version 1.0 */
+	g->pmgr_pmu.pmgr_monitorobjs.b_is_topology_tbl_ver_1x = true;
+
+	ppwrmonitorobjs = &(g->pmgr_pmu.pmgr_monitorobjs);
+
+	status = devinit_get_pwr_topology_table(g, ppwrmonitorobjs);
+	if (status)
+		goto done;
+
+	status = _pwr_channel_state_init(g);
+	if (status)
+		goto done;
+
+	/* Initialise physicalChannelMask */
+	g->pmgr_pmu.pmgr_monitorobjs.physical_channel_mask = 0;
+
+	pboardobjgrp = &g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super;
+
+	BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pwr_channel *, pchannel, indx) {
+		if (_pwr_channel_implements(pchannel,
+				CTRL_PMGR_PWR_CHANNEL_TYPE_SENSOR)) {
+			g->pmgr_pmu.pmgr_monitorobjs.physical_channel_mask  |= BIT(indx);
+		}
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pwrmonitor.h b/drivers/gpu/nvgpu/pmgr/pwrmonitor.h
new file mode 100644
index 00000000..7cd6b8c9
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrmonitor.h
@@ -0,0 +1,60 @@
+/*
+ * general power channel structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PWRMONITOR_H_
+#define _PWRMONITOR_H_
+
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "ctrl/ctrlpmgr.h"
+
+struct pwr_channel {
+	struct boardobj super;
+	u8 pwr_rail;
+	u32 volt_fixed_uv;
+	u32 pwr_corr_slope;
+	s32 pwr_corr_offset_mw;
+	u32 curr_corr_slope;
+	s32 curr_corr_offset_ma;
+	u32 dependent_ch_mask;
+};
+
+struct pwr_chrelationship {
+	struct boardobj super;
+	u8 chIdx;
+};
+
+struct pwr_channel_sensor {
+	struct pwr_channel super;
+	u8 pwr_dev_idx;
+	u8 pwr_dev_prov_idx;
+};
+
+struct pmgr_pwr_monitor {
+	bool b_is_topology_tbl_ver_1x;
+	struct boardobjgrp_e32 pwr_channels;
+	struct boardobjgrp_e32 pwr_ch_rels;
+	u8 total_gpu_channel_idx;
+	u32 physical_channel_mask;
+	struct nv_pmu_pmgr_pwr_monitor_pack pmu_data;
+};
+
+#define PMGR_PWR_MONITOR_GET_PWR_CHANNEL(g, channel_idx)                    \
+	((struct pwr_channel *)BOARDOBJGRP_OBJ_GET_BY_IDX(                                 \
+		&(g->pmgr_pmu.pmgr_monitorobjs.pwr_channels.super), (channel_idx)))
+
+u32 pmgr_monitor_sw_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/pmgr/pwrpolicy.c b/drivers/gpu/nvgpu/pmgr/pwrpolicy.c
new file mode 100644
index 00000000..bec13b0c
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrpolicy.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "pwrpolicy.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+
+#define _pwr_policy_limitarboutputget_helper(p_limit_arb) (p_limit_arb)->output
+#define _pwr_policy_limitdeltaapply(limit, delta) ((u32)max(((s32)limit) + (delta), 0))
+
+static u32 _pwr_policy_limitarbinputset_helper(struct gk20a *g,
+			struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb,
+			u8  client_idx,
+			u32 limit_value)
+{
+	u8 indx;
+	bool b_found = false;
+	u32 status = 0;
+	u32 output = limit_value;
+
+	for (indx = 0; indx< p_limit_arb->num_inputs; indx++) {
+		if (p_limit_arb->inputs[indx].pwr_policy_idx == client_idx) {
+			p_limit_arb->inputs[indx].limit_value = limit_value;
+			b_found = true;
+		} else if (p_limit_arb->b_arb_max) {
+			output = max(output, p_limit_arb->inputs[indx].limit_value);
+		} else {
+			output = min(output, p_limit_arb->inputs[indx].limit_value);
+		}
+	}
+
+	if (!b_found) {
+		if (p_limit_arb->num_inputs <
+				CTRL_PMGR_PWR_POLICY_MAX_LIMIT_INPUTS) {
+			p_limit_arb->inputs[
+				p_limit_arb->num_inputs].pwr_policy_idx = client_idx;
+			p_limit_arb->inputs[
+				p_limit_arb->num_inputs].limit_value = limit_value;
+			p_limit_arb->num_inputs++;
+		} else {
+			gk20a_err(g->dev, "No entries remaining for clientIdx=%d",
+				client_idx);
+			status = -EINVAL;
+		}
+	}
+
+	if (!status) {
+		p_limit_arb->output = output;
+	}
+
+    return status;
+}
+
+static u32 _pwr_policy_limitid_translate(struct gk20a *g,
+			struct pwr_policy *ppolicy,
+			enum pwr_policy_limit_id limit_id,
+			struct ctrl_pmgr_pwr_policy_limit_arbitration **p_limit_arb,
+			struct ctrl_pmgr_pwr_policy_limit_arbitration **p_limit_arb_sec)
+{
+	u32 status = 0;
+
+	switch (limit_id) {
+		case PWR_POLICY_LIMIT_ID_MIN:
+			*p_limit_arb = &ppolicy->limit_arb_min;
+			break;
+
+		case PWR_POLICY_LIMIT_ID_RATED:
+			*p_limit_arb = &ppolicy->limit_arb_rated;
+
+			if (p_limit_arb_sec != NULL) {
+				*p_limit_arb_sec = &ppolicy->limit_arb_curr;
+			}
+			break;
+
+		case PWR_POLICY_LIMIT_ID_MAX:
+			*p_limit_arb = &ppolicy->limit_arb_max;
+			break;
+
+		case PWR_POLICY_LIMIT_ID_CURR:
+			*p_limit_arb = &ppolicy->limit_arb_curr;
+			break;
+
+		case PWR_POLICY_LIMIT_ID_BATT:
+			*p_limit_arb = &ppolicy->limit_arb_batt;
+			break;
+
+		default:
+			gk20a_err(g->dev, "Unsupported limitId=%d",
+				limit_id);
+			status = -EINVAL;
+			break;
+	}
+
+	return status;
+}
+
+static u32 _pwr_policy_limitarbinputset(struct gk20a *g,
+			struct pwr_policy *ppolicy,
+			enum pwr_policy_limit_id limit_id,
+			u8 client_idx,
+			u32 limit)
+{
+	u32 status = 0;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb = NULL;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb_sec = NULL;
+
+	status = _pwr_policy_limitid_translate(g,
+			ppolicy,
+			limit_id,
+			&p_limit_arb,
+			&p_limit_arb_sec);
+	if (status) {
+		goto exit;
+	}
+
+	status = _pwr_policy_limitarbinputset_helper(g, p_limit_arb, client_idx, limit);
+	if (status) {
+		gk20a_err(g->dev,
+			"Error setting client limit value: status=0x%08x, limitId=0x%x, clientIdx=0x%x, limit=%d",
+			status, limit_id, client_idx, limit);
+		goto exit;
+	}
+
+	if (NULL != p_limit_arb_sec) {
+		status = _pwr_policy_limitarbinputset_helper(g, p_limit_arb_sec,
+					CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM,
+					_pwr_policy_limitarboutputget_helper(p_limit_arb));
+	}
+
+exit:
+	return status;
+}
+
+static inline void _pwr_policy_limitarbconstruct(
+			struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb,
+			bool b_arb_max)
+{
+	p_limit_arb->num_inputs = 0;
+	p_limit_arb->b_arb_max = b_arb_max;
+}
+
+static u32 _pwr_policy_limitarboutputget(struct gk20a *g,
+			struct pwr_policy *ppolicy,
+			enum pwr_policy_limit_id limit_id)
+{
+	u32 status = 0;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration *p_limit_arb = NULL;
+
+	status = _pwr_policy_limitid_translate(g,
+				ppolicy,
+				limit_id,
+				&p_limit_arb,
+				NULL);
+	if (status) {
+		return 0;
+	}
+
+	return _pwr_policy_limitarboutputget_helper(p_limit_arb);
+}
+
+static u32 _pwr_domains_pmudatainit_hw_threshold(struct gk20a *g,
+				struct boardobj *board_obj_ptr,
+				struct nv_pmu_boardobj *ppmudata)
+{
+	struct nv_pmu_pmgr_pwr_policy_hw_threshold *pmu_hw_threshold_data;
+	struct pwr_policy_hw_threshold *p_hw_threshold;
+	struct pwr_policy *p_pwr_policy;
+	struct nv_pmu_pmgr_pwr_policy *pmu_pwr_policy;
+	u32 status = 0;
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error updating pmu boardobjgrp for pwr sensor 0x%x",
+			status);
+		status = -ENOMEM;
+		goto done;
+	}
+
+	p_hw_threshold = (struct pwr_policy_hw_threshold *)board_obj_ptr;
+	pmu_hw_threshold_data = (struct nv_pmu_pmgr_pwr_policy_hw_threshold *) ppmudata;
+	pmu_pwr_policy = (struct nv_pmu_pmgr_pwr_policy *) ppmudata;
+	p_pwr_policy = (struct pwr_policy *)&(p_hw_threshold->super.super);
+
+	pmu_pwr_policy->ch_idx = 0;
+	pmu_pwr_policy->limit_unit = p_pwr_policy->limit_unit;
+	pmu_pwr_policy->num_limit_inputs = p_pwr_policy->num_limit_inputs;
+
+	pmu_pwr_policy->limit_min = _pwr_policy_limitdeltaapply(
+			_pwr_policy_limitarboutputget(g, p_pwr_policy,
+				PWR_POLICY_LIMIT_ID_MIN),
+			p_pwr_policy->limit_delta);
+
+	pmu_pwr_policy->limit_max = _pwr_policy_limitdeltaapply(
+			_pwr_policy_limitarboutputget(g, p_pwr_policy,
+				PWR_POLICY_LIMIT_ID_MAX),
+			p_pwr_policy->limit_delta);
+
+	pmu_pwr_policy->limit_curr = _pwr_policy_limitdeltaapply(
+			_pwr_policy_limitarboutputget(g, p_pwr_policy,
+				PWR_POLICY_LIMIT_ID_CURR),
+			p_pwr_policy->limit_delta);
+
+	memcpy(&pmu_pwr_policy->integral, &p_pwr_policy->integral,
+			sizeof(struct ctrl_pmgr_pwr_policy_info_integral));
+
+	pmu_pwr_policy->sample_mult = p_pwr_policy->sample_mult;
+	pmu_pwr_policy->filter_type = p_pwr_policy->filter_type;
+	pmu_pwr_policy->filter_param = p_pwr_policy->filter_param;
+
+	pmu_hw_threshold_data->threshold_idx = p_hw_threshold->threshold_idx;
+	pmu_hw_threshold_data->low_threshold_idx = p_hw_threshold->low_threshold_idx;
+	pmu_hw_threshold_data->b_use_low_threshold = p_hw_threshold->b_use_low_threshold;
+	pmu_hw_threshold_data->low_threshold_value = p_hw_threshold->low_threshold_value;
+
+done:
+	return status;
+}
+
+static struct boardobj *construct_pwr_policy(struct gk20a *g,
+			void *pargs, u16 pargs_size, u8 type)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+	struct pwr_policy_hw_threshold *pwrpolicyhwthreshold;
+	struct pwr_policy *pwrpolicy;
+	struct pwr_policy *pwrpolicyparams = (struct pwr_policy*)pargs;
+	struct pwr_policy_hw_threshold *hwthreshold = (struct pwr_policy_hw_threshold*)pargs;
+
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		pargs_size, pargs);
+	if (status)
+		return NULL;
+
+	pwrpolicyhwthreshold = (struct pwr_policy_hw_threshold*)board_obj_ptr;
+	pwrpolicy = (struct pwr_policy *)board_obj_ptr;
+
+	/* Set Super class interfaces */
+	board_obj_ptr->pmudatainit = _pwr_domains_pmudatainit_hw_threshold;
+
+	pwrpolicy->ch_idx = pwrpolicyparams->ch_idx;
+	pwrpolicy->num_limit_inputs = 0;
+	pwrpolicy->limit_unit = pwrpolicyparams->limit_unit;
+	pwrpolicy->filter_type = (enum ctrl_pmgr_pwr_policy_filter_type)(pwrpolicyparams->filter_type);
+	pwrpolicy->sample_mult = pwrpolicyparams->sample_mult;
+	switch (pwrpolicy->filter_type)
+	{
+		case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_NONE:
+			break;
+
+		case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_BLOCK:
+			pwrpolicy->filter_param.block.block_size =
+				pwrpolicyparams->filter_param.block.block_size;
+			break;
+
+		case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_MOVING_AVERAGE:
+			pwrpolicy->filter_param.moving_avg.window_size =
+				pwrpolicyparams->filter_param.moving_avg.window_size;
+			break;
+
+		case CTRL_PMGR_PWR_POLICY_FILTER_TYPE_IIR:
+			pwrpolicy->filter_param.iir.divisor = pwrpolicyparams->filter_param.iir.divisor;
+			break;
+
+		default:
+		gk20a_err(g->dev,
+			"Error: unrecognized Power Policy filter type: %d.\n",
+			pwrpolicy->filter_type);
+	}
+
+	_pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_curr, false);
+
+	pwrpolicy->limit_delta = 0;
+
+	_pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_min, true);
+	status = _pwr_policy_limitarbinputset(g,
+			pwrpolicy,
+			PWR_POLICY_LIMIT_ID_MIN,
+			CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM,
+			pwrpolicyparams->limit_min);
+
+	_pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_max, false);
+	status = _pwr_policy_limitarbinputset(g,
+			pwrpolicy,
+			PWR_POLICY_LIMIT_ID_MAX,
+			CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM,
+			pwrpolicyparams->limit_max);
+
+	_pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_rated, false);
+	status = _pwr_policy_limitarbinputset(g,
+			pwrpolicy,
+			PWR_POLICY_LIMIT_ID_RATED,
+			CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM,
+			pwrpolicyparams->limit_rated);
+
+	_pwr_policy_limitarbconstruct(&pwrpolicy->limit_arb_batt, false);
+	status = _pwr_policy_limitarbinputset(g,
+			pwrpolicy,
+			PWR_POLICY_LIMIT_ID_BATT,
+			CTRL_PMGR_PWR_POLICY_LIMIT_INPUT_CLIENT_IDX_RM,
+			((pwrpolicyparams->limit_batt != 0) ?
+				pwrpolicyparams->limit_batt:
+				CTRL_PMGR_PWR_POLICY_LIMIT_MAX));
+
+	memcpy(&pwrpolicy->integral, &pwrpolicyparams->integral,
+			sizeof(struct ctrl_pmgr_pwr_policy_info_integral));
+
+	pwrpolicyhwthreshold->threshold_idx = hwthreshold->threshold_idx;
+	pwrpolicyhwthreshold->b_use_low_threshold = hwthreshold->b_use_low_threshold;
+	pwrpolicyhwthreshold->low_threshold_idx = hwthreshold->low_threshold_idx;
+	pwrpolicyhwthreshold->low_threshold_value = hwthreshold->low_threshold_value;
+
+	gk20a_dbg_info(" Done");
+
+	return board_obj_ptr;
+}
+
+static u32 _pwr_policy_construct_WAR_policy(struct gk20a *g,
+			struct pmgr_pwr_policy *ppwrpolicyobjs,
+			union pwr_policy_data_union *ppwrpolicydata,
+			u16 pwr_policy_size,
+			u32 hw_threshold_policy_index,
+			u32 obj_index)
+{
+	u32 status = 0;
+	struct boardobj *boardobj;
+
+	if (!(hw_threshold_policy_index & 0x1)) {
+		/* CRIT policy */
+		ppwrpolicydata->pwrpolicy.limit_min = 1000;
+		ppwrpolicydata->pwrpolicy.limit_rated = 20000;
+		ppwrpolicydata->pwrpolicy.limit_max = 20000;
+		ppwrpolicydata->hw_threshold.threshold_idx = 0;
+	} else {
+		/* WARN policy */
+		ppwrpolicydata->pwrpolicy.limit_min = 1000;
+		ppwrpolicydata->pwrpolicy.limit_rated = 11600;
+		ppwrpolicydata->pwrpolicy.limit_max = 11600;
+		ppwrpolicydata->hw_threshold.threshold_idx = 1;
+	}
+
+	boardobj = construct_pwr_policy(g, ppwrpolicydata,
+				pwr_policy_size, ppwrpolicydata->boardobj.type);
+
+	if (!boardobj) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to create pwr policy for type %d", ppwrpolicydata->boardobj.type);
+		status = -EINVAL;
+		goto done;
+	}
+
+	status = boardobjgrp_objinsert(&ppwrpolicyobjs->pwr_policies.super,
+			boardobj, obj_index);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to insert pwr policy boardobj for %d", obj_index);
+		status = -EINVAL;
+		goto done;
+	}
+done:
+	return status;
+}
+
+static u32 devinit_get_pwr_policy_table(struct gk20a *g,
+			struct pmgr_pwr_policy *ppwrpolicyobjs)
+{
+	u32 status = 0;
+	u8 *pwr_policy_table_ptr = NULL;
+	u8 *curr_pwr_policy_table_ptr = NULL;
+	struct boardobj *boardobj;
+	struct pwr_policy_3x_header_struct pwr_policy_table_header = { 0 };
+	struct pwr_policy_3x_entry_struct pwr_policy_table_entry = { 0 };
+	u32 index;
+	u32 obj_index = 0;
+	u16 pwr_policy_size;
+	bool integral_control = false;
+	u32 hw_threshold_policy_index = 0;
+	union pwr_policy_data_union pwr_policy_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs != NULL) {
+		pwr_policy_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, POWER_CAPPING_TABLE);
+		if (pwr_policy_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&pwr_policy_table_header.version,
+		(pwr_policy_table_ptr),
+		14);
+
+	memcpy(&pwr_policy_table_header.d2_limit,
+		(pwr_policy_table_ptr + 14),
+		(VBIOS_POWER_POLICY_3X_ENTRY_SIZE_2E - 14));
+
+	if (pwr_policy_table_header.version !=
+			VBIOS_POWER_POLICY_VERSION_3X) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (pwr_policy_table_header.header_size <
+			VBIOS_POWER_POLICY_3X_HEADER_SIZE_25) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (pwr_policy_table_header.table_entry_size !=
+			VBIOS_POWER_POLICY_3X_ENTRY_SIZE_2E) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	curr_pwr_policy_table_ptr = (pwr_policy_table_ptr +
+		VBIOS_POWER_POLICY_3X_HEADER_SIZE_25);
+
+	for (index = 0; index < pwr_policy_table_header.num_table_entries;
+		index++) {
+		u8 class_type;
+
+		curr_pwr_policy_table_ptr += (pwr_policy_table_header.table_entry_size * index);
+
+		pwr_policy_table_entry.flags0 = *curr_pwr_policy_table_ptr;
+		pwr_policy_table_entry.ch_idx = *(curr_pwr_policy_table_ptr + 1);
+
+		memcpy(&pwr_policy_table_entry.limit_min,
+			(curr_pwr_policy_table_ptr + 2),
+			35);
+
+		memcpy(&pwr_policy_table_entry.ratio_min,
+			(curr_pwr_policy_table_ptr + 2 + 35),
+			4);
+
+		pwr_policy_table_entry.sample_mult =
+				*(curr_pwr_policy_table_ptr + 2 + 35 + 4);
+
+		memcpy(&pwr_policy_table_entry.filter_param,
+			(curr_pwr_policy_table_ptr + 2 + 35 + 4 + 1),
+			4);
+
+		class_type = (u8)BIOS_GET_FIELD(
+			pwr_policy_table_entry.flags0,
+			NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS);
+
+		if (class_type == NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_CLASS_HW_THRESHOLD) {
+			ppwrpolicyobjs->version = CTRL_PMGR_PWR_POLICY_TABLE_VERSION_3X;
+			ppwrpolicyobjs->base_sample_period = (u16)
+					pwr_policy_table_header.base_sample_period;
+			ppwrpolicyobjs->min_client_sample_period = (u16)
+					pwr_policy_table_header.min_client_sample_period;
+			ppwrpolicyobjs->low_sampling_mult =
+					pwr_policy_table_header.low_sampling_mult;
+
+			ppwrpolicyobjs->policy_idxs[1] =
+				(u8)pwr_policy_table_header.tgp_policy_idx;
+			ppwrpolicyobjs->policy_idxs[0] =
+				(u8)pwr_policy_table_header.rtp_policy_idx;
+			ppwrpolicyobjs->policy_idxs[2] =
+				pwr_policy_table_header.mxm_policy_idx;
+			ppwrpolicyobjs->policy_idxs[3] =
+				pwr_policy_table_header.dnotifier_policy_idx;
+			ppwrpolicyobjs->ext_limits[0].limit =
+				pwr_policy_table_header.d2_limit;
+			ppwrpolicyobjs->ext_limits[1].limit =
+				pwr_policy_table_header.d3_limit;
+			ppwrpolicyobjs->ext_limits[2].limit =
+				pwr_policy_table_header.d4_limit;
+			ppwrpolicyobjs->ext_limits[3].limit =
+				pwr_policy_table_header.d5_limit;
+			ppwrpolicyobjs->policy_idxs[4] =
+				pwr_policy_table_header.pwr_tgt_policy_idx;
+			ppwrpolicyobjs->policy_idxs[5] =
+				pwr_policy_table_header.pwr_tgt_floor_policy_idx;
+			ppwrpolicyobjs->policy_idxs[6] =
+				pwr_policy_table_header.sm_bus_policy_idx;
+
+			integral_control = (bool)BIOS_GET_FIELD(
+				pwr_policy_table_entry.flags1,
+				NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_INTEGRAL_CONTROL);
+
+			if (integral_control == 0x01) {
+				pwr_policy_data.pwrpolicy.integral.past_sample_count = (u8)
+						pwr_policy_table_entry.past_length;
+				pwr_policy_data.pwrpolicy.integral.next_sample_count = (u8)
+						pwr_policy_table_entry.next_length;
+				pwr_policy_data.pwrpolicy.integral.ratio_limit_max = (u16)
+						pwr_policy_table_entry.ratio_max;
+				pwr_policy_data.pwrpolicy.integral.ratio_limit_min = (u16)
+						pwr_policy_table_entry.ratio_min;
+			} else {
+				memset(&(pwr_policy_data.pwrpolicy.integral), 0x0,
+					sizeof(struct ctrl_pmgr_pwr_policy_info_integral));
+			}
+			pwr_policy_data.hw_threshold.threshold_idx = (u8)
+				BIOS_GET_FIELD(
+					pwr_policy_table_entry.param0,
+					NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_THRES_IDX);
+
+			pwr_policy_data.hw_threshold.b_use_low_threshold =
+				BIOS_GET_FIELD(
+					pwr_policy_table_entry.param0,
+					NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_USE);
+
+			if (pwr_policy_data.hw_threshold.b_use_low_threshold) {
+				pwr_policy_data.hw_threshold.low_threshold_idx = (u8)
+					BIOS_GET_FIELD(
+						pwr_policy_table_entry.param0,
+						NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM0_HW_THRESHOLD_LOW_THRESHOLD_IDX);
+
+				pwr_policy_data.hw_threshold.low_threshold_value = (u16)
+					BIOS_GET_FIELD(
+						pwr_policy_table_entry.param1,
+						NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL);
+			}
+
+			pwr_policy_size = sizeof(struct pwr_policy_hw_threshold);
+		} else
+			continue;
+
+		/* Initialize data for the parent class */
+		pwr_policy_data.boardobj.type = CTRL_PMGR_PWR_POLICY_TYPE_HW_THRESHOLD;
+		pwr_policy_data.pwrpolicy.ch_idx = (u8)pwr_policy_table_entry.ch_idx;
+		pwr_policy_data.pwrpolicy.limit_unit = (u8)
+				BIOS_GET_FIELD(
+					pwr_policy_table_entry.flags0,
+					NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS0_LIMIT_UNIT);
+		pwr_policy_data.pwrpolicy.filter_type = (u8)
+				BIOS_GET_FIELD(
+					pwr_policy_table_entry.flags1,
+					NV_VBIOS_POWER_POLICY_3X_ENTRY_FLAGS1_FILTER_TYPE);
+		pwr_policy_data.pwrpolicy.limit_min = pwr_policy_table_entry.limit_min;
+		pwr_policy_data.pwrpolicy.limit_rated = pwr_policy_table_entry.limit_rated;
+		pwr_policy_data.pwrpolicy.limit_max = pwr_policy_table_entry.limit_max;
+		pwr_policy_data.pwrpolicy.limit_batt = pwr_policy_table_entry.limit_batt;
+
+		pwr_policy_data.pwrpolicy.sample_mult  = (u8)pwr_policy_table_entry.sample_mult;
+
+		/* Filled the entry.filterParam value in the filterParam */
+		pwr_policy_data.pwrpolicy.filter_param.block.block_size = 0;
+		pwr_policy_data.pwrpolicy.filter_param.moving_avg.window_size = 0;
+		pwr_policy_data.pwrpolicy.filter_param.iir.divisor = 0;
+
+		hw_threshold_policy_index |=
+			BIT(pwr_policy_data.hw_threshold.threshold_idx);
+
+		boardobj = construct_pwr_policy(g, &pwr_policy_data,
+					pwr_policy_size, pwr_policy_data.boardobj.type);
+
+		if (!boardobj) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to create pwr policy for %d type %d", index, pwr_policy_data.boardobj.type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&ppwrpolicyobjs->pwr_policies.super,
+				boardobj, obj_index);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to insert pwr policy boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		++obj_index;
+	}
+
+	if (hw_threshold_policy_index &&
+		(hw_threshold_policy_index < 0x3)) {
+		status = _pwr_policy_construct_WAR_policy(g,
+					ppwrpolicyobjs,
+					&pwr_policy_data,
+					pwr_policy_size,
+					hw_threshold_policy_index,
+					obj_index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to construct_WAR_policy");
+			status = -EINVAL;
+			goto done;
+		}
+		++obj_index;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 pmgr_policy_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct pwr_policy *ppolicy;
+	struct pmgr_pwr_policy *ppwrpolicyobjs;
+	u8 indx = 0;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(
+		&g->pmgr_pmu.pmgr_policyobjs.pwr_policies);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr policy, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = boardobjgrpconstruct_e32(
+		&g->pmgr_pmu.pmgr_policyobjs.pwr_policy_rels);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr policy rels, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = boardobjgrpconstruct_e32(
+		&g->pmgr_pmu.pmgr_policyobjs.pwr_violations);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for pmgr violations, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	memset(g->pmgr_pmu.pmgr_policyobjs.policy_idxs, CTRL_PMGR_PWR_POLICY_INDEX_INVALID,
+				sizeof(u8) * CTRL_PMGR_PWR_POLICY_IDX_NUM_INDEXES);
+
+	/* Initialize external power limit policy indexes to _INVALID/0xFF */
+	for (indx = 0; indx < PWR_POLICY_EXT_POWER_STATE_ID_COUNT; indx++) {
+		g->pmgr_pmu.pmgr_policyobjs.ext_limits[indx].policy_table_idx =
+			CTRL_PMGR_PWR_POLICY_INDEX_INVALID;
+	}
+
+	/* Initialize external power state to _D1 */
+	g->pmgr_pmu.pmgr_policyobjs.ext_power_state = 0xFFFFFFFF;
+
+	ppwrpolicyobjs = &(g->pmgr_pmu.pmgr_policyobjs);
+	pboardobjgrp = &(g->pmgr_pmu.pmgr_policyobjs.pwr_policies.super);
+
+	status = devinit_get_pwr_policy_table(g, ppwrpolicyobjs);
+	if (status)
+		goto done;
+
+	g->pmgr_pmu.pmgr_policyobjs.b_enabled = true;
+
+	BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct pwr_policy *, ppolicy, indx) {
+		PMGR_PWR_POLICY_INCREMENT_LIMIT_INPUT_COUNT(ppolicy);
+	}
+
+	g->pmgr_pmu.pmgr_policyobjs.global_ceiling.values[0] =
+				0xFF;
+
+	g->pmgr_pmu.pmgr_policyobjs.client_work_item.b_pending = false;
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pwrpolicy.h b/drivers/gpu/nvgpu/pmgr/pwrpolicy.h
new file mode 100644
index 00000000..82289137
--- /dev/null
+++ b/drivers/gpu/nvgpu/pmgr/pwrpolicy.h
@@ -0,0 +1,117 @@
+/*
+ * general power channel structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _PWRPOLICY_H_
+#define _PWRPOLICY_H_
+
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobj.h"
+#include "pmuif/gpmuifpmgr.h"
+#include "ctrl/ctrlpmgr.h"
+
+#define PWR_POLICY_EXT_POWER_STATE_ID_COUNT 0x4
+
+enum pwr_policy_limit_id {
+	PWR_POLICY_LIMIT_ID_MIN    = 0x00000000,
+	PWR_POLICY_LIMIT_ID_RATED,
+	PWR_POLICY_LIMIT_ID_MAX,
+	PWR_POLICY_LIMIT_ID_CURR,
+	PWR_POLICY_LIMIT_ID_BATT,
+};
+
+struct pwr_policy {
+	struct boardobj super;
+	u8 ch_idx;
+	u8 num_limit_inputs;
+	u8 limit_unit;
+	s32 limit_delta;
+	u32 limit_min;
+	u32 limit_rated;
+	u32 limit_max;
+	u32 limit_batt;
+	struct ctrl_pmgr_pwr_policy_info_integral integral;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_min;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_rated;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_max;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_batt;
+	struct ctrl_pmgr_pwr_policy_limit_arbitration limit_arb_curr;
+	u8 sample_mult;
+	enum ctrl_pmgr_pwr_policy_filter_type filter_type;
+	union ctrl_pmgr_pwr_policy_filter_param filter_param;
+};
+
+struct pwr_policy_ext_limit {
+	u8 policy_table_idx;
+	u32 limit;
+};
+
+struct pwr_policy_batt_workitem {
+	u32 power_state;
+	bool b_full_deflection;
+};
+
+struct pwr_policy_client_workitem {
+	u32 limit;
+	bool b_pending;
+};
+
+struct pwr_policy_relationship {
+	struct boardobj super;
+	u8 policy_idx;
+};
+
+struct pmgr_pwr_policy {
+	u8 version;
+	bool b_enabled;
+	struct nv_pmu_perf_domain_group_limits global_ceiling;
+	u8 policy_idxs[CTRL_PMGR_PWR_POLICY_IDX_NUM_INDEXES];
+	struct pwr_policy_ext_limit ext_limits[PWR_POLICY_EXT_POWER_STATE_ID_COUNT];
+	s32 ext_power_state;
+	u16 base_sample_period;
+	u16 min_client_sample_period;
+	u8 low_sampling_mult;
+	struct boardobjgrp_e32 pwr_policies;
+	struct boardobjgrp_e32 pwr_policy_rels;
+	struct boardobjgrp_e32 pwr_violations;
+	struct pwr_policy_client_workitem client_work_item;
+};
+
+struct pwr_policy_limit {
+	struct pwr_policy super;
+};
+
+struct pwr_policy_hw_threshold {
+	struct pwr_policy_limit super;
+	u8 threshold_idx;
+	u8 low_threshold_idx;
+	bool b_use_low_threshold;
+	u16 low_threshold_value;
+};
+
+union pwr_policy_data_union {
+	struct boardobj boardobj;
+	struct pwr_policy pwrpolicy;
+	struct pwr_policy_hw_threshold hw_threshold;
+} ;
+
+#define PMGR_GET_PWR_POLICY(g, policy_idx)                                 \
+	((struct pwr_policy *)BOARDOBJGRP_OBJ_GET_BY_IDX(                                 \
+		&(g->pmgr_pmu.pmgr_policyobjs.pwr_policies.super), (policy_idx)))
+
+#define PMGR_PWR_POLICY_INCREMENT_LIMIT_INPUT_COUNT(ppolicy)                 \
+	((ppolicy)->num_limit_inputs++)
+
+u32 pmgr_policy_sw_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index d6173275..da1a49db 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -16,6 +16,7 @@
 #include "gk20a/gk20a.h"
 #include "clk/clk.h"
 #include "perf/perf.h"
+#include "pmgr/pmgr.h"
 
 /*sw setup for pstate components*/
 int gk20a_init_pstate_support(struct gk20a *g)
@@ -49,6 +50,10 @@ int gk20a_init_pstate_support(struct gk20a *g)
 		return err;
 
 	err = clk_prog_sw_setup(g);
+	if (err)
+		return err;
+
+	err = pmgr_domain_sw_setup(g);
 	return err;
 }
 
@@ -100,6 +105,10 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 		return err;
 
 	err = clk_set_boot_fll_clk(g);
+	if (err)
+		return err;
+
+	err = pmgr_domain_pmu_setup(g);
 	return err;
 }
 
-- 
cgit v1.2.2


From 776ab920a7a4d31c6180e7dcb9f3fcea611e92bd Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Fri, 7 Oct 2016 14:30:12 +0530
Subject: gpu: nvgpu: Add SW_THRESHOLD policy support

Added SW_THRESHOLD policy support for over power protection.

JIRA DNVGPU-70

Change-Id: I7a9d202619c997d6cab6fb750db7f3018229b2fd
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1233055
(cherry picked from commit b233c74b9ba4a3802f111757aecf24a27c830fc1)
Reviewed-on: http://git-master/r/1241960
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/pmgr/pwrpolicy.c | 85 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/pmgr/pwrpolicy.h | 10 +++++
 2 files changed, 95 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/pmgr/pwrpolicy.c b/drivers/gpu/nvgpu/pmgr/pwrpolicy.c
index bec13b0c..d7926773 100644
--- a/drivers/gpu/nvgpu/pmgr/pwrpolicy.c
+++ b/drivers/gpu/nvgpu/pmgr/pwrpolicy.c
@@ -229,6 +229,17 @@ static u32 _pwr_domains_pmudatainit_hw_threshold(struct gk20a *g,
 	pmu_hw_threshold_data->b_use_low_threshold = p_hw_threshold->b_use_low_threshold;
 	pmu_hw_threshold_data->low_threshold_value = p_hw_threshold->low_threshold_value;
 
+	if (BOARDOBJ_GET_TYPE(board_obj_ptr) ==
+		CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD) {
+		struct nv_pmu_pmgr_pwr_policy_sw_threshold *pmu_sw_threshold_data;
+		struct pwr_policy_sw_threshold *p_sw_threshold;
+
+		p_sw_threshold = (struct pwr_policy_sw_threshold *)board_obj_ptr;
+		pmu_sw_threshold_data =
+			(struct nv_pmu_pmgr_pwr_policy_sw_threshold *) ppmudata;
+		pmu_sw_threshold_data->event_id =
+			p_sw_threshold->event_id;
+	}
 done:
 	return status;
 }
@@ -326,6 +337,15 @@ static struct boardobj *construct_pwr_policy(struct gk20a *g,
 	pwrpolicyhwthreshold->low_threshold_idx = hwthreshold->low_threshold_idx;
 	pwrpolicyhwthreshold->low_threshold_value = hwthreshold->low_threshold_value;
 
+	if (type == CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD) {
+		struct pwr_policy_sw_threshold *pwrpolicyswthreshold;
+		struct pwr_policy_sw_threshold *swthreshold =
+			(struct pwr_policy_sw_threshold*)pargs;
+
+		pwrpolicyswthreshold = (struct pwr_policy_sw_threshold*)board_obj_ptr;
+		pwrpolicyswthreshold->event_id = swthreshold->event_id;
+	}
+
 	gk20a_dbg_info(" Done");
 
 	return board_obj_ptr;
@@ -378,6 +398,55 @@ done:
 	return status;
 }
 
+static u32 _pwr_policy_construct_WAR_SW_Threshold_policy(struct gk20a *g,
+			struct pmgr_pwr_policy *ppwrpolicyobjs,
+			union pwr_policy_data_union *ppwrpolicydata,
+			u16 pwr_policy_size,
+			u32 obj_index)
+{
+	u32 status = 0;
+	struct boardobj *boardobj;
+
+	/* WARN policy */
+	ppwrpolicydata->pwrpolicy.limit_unit = 0;
+	ppwrpolicydata->pwrpolicy.limit_min = 10000;
+	ppwrpolicydata->pwrpolicy.limit_rated = 100000;
+	ppwrpolicydata->pwrpolicy.limit_max = 100000;
+	ppwrpolicydata->sw_threshold.threshold_idx = 1;
+	ppwrpolicydata->pwrpolicy.filter_type =
+			CTRL_PMGR_PWR_POLICY_FILTER_TYPE_MOVING_AVERAGE;
+	ppwrpolicydata->pwrpolicy.sample_mult  = 5;
+
+	/* Filled the entry.filterParam value in the filterParam */
+	ppwrpolicydata->pwrpolicy.filter_param.moving_avg.window_size = 10;
+
+	ppwrpolicydata->sw_threshold.event_id = 0x01;
+
+	ppwrpolicydata->boardobj.type = CTRL_PMGR_PWR_POLICY_TYPE_SW_THRESHOLD;
+
+	boardobj = construct_pwr_policy(g, ppwrpolicydata,
+				pwr_policy_size, ppwrpolicydata->boardobj.type);
+
+	if (!boardobj) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to create pwr policy for type %d", ppwrpolicydata->boardobj.type);
+		status = -EINVAL;
+		goto done;
+	}
+
+	status = boardobjgrp_objinsert(&ppwrpolicyobjs->pwr_policies.super,
+			boardobj, obj_index);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to insert pwr policy boardobj for %d", obj_index);
+		status = -EINVAL;
+		goto done;
+	}
+done:
+	return status;
+}
+
 static u32 devinit_get_pwr_policy_table(struct gk20a *g,
 			struct pmgr_pwr_policy *ppwrpolicyobjs)
 {
@@ -392,6 +461,7 @@ static u32 devinit_get_pwr_policy_table(struct gk20a *g,
 	u16 pwr_policy_size;
 	bool integral_control = false;
 	u32 hw_threshold_policy_index = 0;
+	u32 sw_threshold_policy_index = 0;
 	union pwr_policy_data_union pwr_policy_data;
 
 	gk20a_dbg_info("");
@@ -603,6 +673,21 @@ static u32 devinit_get_pwr_policy_table(struct gk20a *g,
 		++obj_index;
 	}
 
+	if (!sw_threshold_policy_index) {
+		status = _pwr_policy_construct_WAR_SW_Threshold_policy(g,
+					ppwrpolicyobjs,
+					&pwr_policy_data,
+					sizeof(struct pwr_policy_sw_threshold),
+					obj_index);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to construct_WAR_policy");
+			status = -EINVAL;
+			goto done;
+		}
+		++obj_index;
+	}
+
 done:
 	gk20a_dbg_info(" done status %x", status);
 	return status;
diff --git a/drivers/gpu/nvgpu/pmgr/pwrpolicy.h b/drivers/gpu/nvgpu/pmgr/pwrpolicy.h
index 82289137..008282d3 100644
--- a/drivers/gpu/nvgpu/pmgr/pwrpolicy.h
+++ b/drivers/gpu/nvgpu/pmgr/pwrpolicy.h
@@ -99,10 +99,20 @@ struct pwr_policy_hw_threshold {
 	u16 low_threshold_value;
 };
 
+struct pwr_policy_sw_threshold {
+	struct pwr_policy_limit super;
+	u8 threshold_idx;
+	u8 low_threshold_idx;
+	bool b_use_low_threshold;
+	u16 low_threshold_value;
+	u8 event_id;
+};
+
 union pwr_policy_data_union {
 	struct boardobj boardobj;
 	struct pwr_policy pwrpolicy;
 	struct pwr_policy_hw_threshold hw_threshold;
+	struct pwr_policy_sw_threshold sw_threshold;
 } ;
 
 #define PMGR_GET_PWR_POLICY(g, policy_idx)                                 \
-- 
cgit v1.2.2


From db529935a5f50e9e683d44d2eb01d0d76a915792 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Thu, 8 Sep 2016 17:35:13 -0700
Subject: gpu: nvgpu: parse performance table

Parse VBIOS performance table to retrieve clock ranges.

Jira DNVGPU-125

Change-Id: Ia8e4ede158de5c5374205a510099d00b497fe1a6
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1218935
(cherry picked from commit b5b7c789e98a20eb4cc5c30f0e2eb45d4a882cc4)
Reviewed-on: http://git-master/r/1232593
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/perf/perf.h     |   2 +
 drivers/gpu/nvgpu/pstate/pstate.c | 204 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/pstate/pstate.h |  32 ++++++
 3 files changed, 238 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h
index 02aed7a6..3ffaf4e1 100644
--- a/drivers/gpu/nvgpu/perf/perf.h
+++ b/drivers/gpu/nvgpu/perf/perf.h
@@ -15,6 +15,7 @@
 
 #include "vfe_equ.h"
 #include "vfe_var.h"
+#include "pstate/pstate.h"
 #include "gk20a/gk20a.h"
 
 #define CTRL_PERF_VFE_VAR_TYPE_INVALID                               0x00
@@ -53,6 +54,7 @@
 struct perf_pmupstate {
 	struct vfe_vars vfe_varobjs;
 	struct vfe_equs vfe_equobjs;
+	struct pstates pstatesobjs;
 };
 
 u32 perf_pmu_vfe_load(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index da1a49db..f4cc50ab 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -17,6 +17,10 @@
 #include "clk/clk.h"
 #include "perf/perf.h"
 #include "pmgr/pmgr.h"
+#include "include/bios.h"
+#include "pstate/pstate.h"
+
+static int pstate_sw_setup(struct gk20a *g);
 
 /*sw setup for pstate components*/
 int gk20a_init_pstate_support(struct gk20a *g)
@@ -53,6 +57,10 @@ int gk20a_init_pstate_support(struct gk20a *g)
 	if (err)
 		return err;
 
+	err = pstate_sw_setup(g);
+	if (err)
+		return err;
+
 	err = pmgr_domain_sw_setup(g);
 	return err;
 }
@@ -112,3 +120,199 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 	return err;
 }
 
+int pstate_construct_super(struct gk20a *g, struct boardobj **ppboardobj,
+				u16 size, void *args)
+{
+	struct pstate *ptmppstate = (struct pstate *)args;
+	struct pstate *pstate;
+	int err;
+
+	err = boardobj_construct_super(g, ppboardobj, size, args);
+	if (err)
+		return err;
+
+	pstate = (struct pstate *)*ppboardobj;
+
+	pstate->num = ptmppstate->num;
+	pstate->clklist = ptmppstate->clklist;
+
+	return 0;
+}
+
+int pstate_construct_3x(struct gk20a *g, struct boardobj **ppboardobj,
+				u16 size, void *args)
+{
+	struct boardobj  *ptmpobj = (struct boardobj *)args;
+
+	ptmpobj->type_mask |= BIT(CTRL_PERF_PSTATE_TYPE_3X);
+	return pstate_construct_super(g, ppboardobj, size, args);
+}
+
+struct pstate *pstate_construct(struct gk20a *g, void *args)
+{
+	struct pstate *pstate = NULL;
+	struct pstate *tmp = (struct pstate *)args;
+
+	if ((tmp->super.type != CTRL_PERF_PSTATE_TYPE_3X) ||
+	    (pstate_construct_3x(g, (struct boardobj **)&pstate,
+			    sizeof(struct pstate), args)))
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing pstate num=%u", tmp->num);
+
+	return pstate;
+}
+
+int pstate_insert(struct gk20a *g, struct pstate *pstate, int index)
+{
+	struct pstates *pstates = &(g->perf_pmu.pstatesobjs);
+	int err;
+
+	err = boardobjgrp_objinsert(&pstates->super.super,
+			(struct boardobj *)pstate, index);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error adding pstate boardobj %d", index);
+		return err;
+	}
+
+	pstates->num_levels++;
+
+	return err;
+}
+
+static int parse_pstate_entry_5x(struct gk20a *g,
+		struct vbios_pstate_header_5x *hdr,
+		struct vbios_pstate_entry_5x *entry,
+		struct pstate *pstate)
+{
+	u8 *p = (u8 *)entry;
+	u32 clkidx;
+
+	p += hdr->base_entry_size;
+
+	memset(pstate, 0, sizeof(struct pstate));
+	pstate->super.type = CTRL_PERF_PSTATE_TYPE_3X;
+	pstate->num = 0x0F - entry->pstate_level;
+	pstate->clklist.clksetinfolistsize = hdr->clock_entry_count;
+
+	gk20a_dbg_info("pstate P%u", pstate->num);
+
+	for (clkidx = 0; clkidx < hdr->clock_entry_count; clkidx++) {
+		struct clk_set_info *pclksetinfo;
+		struct vbios_pstate_entry_clock_5x *clk_entry;
+		struct clk_domain *clk_domain;
+
+		clk_domain = (struct clk_domain *)BOARDOBJGRP_OBJ_GET_BY_IDX(
+			    &g->clk_pmu.clk_domainobjs.super.super, clkidx);
+
+		pclksetinfo = &pstate->clklist.clksetinfo[clkidx];
+		clk_entry = (struct vbios_pstate_entry_clock_5x *)p;
+
+		pclksetinfo->clkwhich = clk_domain->domain;
+		pclksetinfo->nominal_mhz =
+			BIOS_GET_FIELD(clk_entry->param0,
+				VBIOS_PSTATE_5X_CLOCK_PROG_PARAM0_NOM_FREQ_MHZ);
+		pclksetinfo->min_mhz =
+			BIOS_GET_FIELD(clk_entry->param1,
+				VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MIN_FREQ_MHZ);
+		pclksetinfo->max_mhz =
+			BIOS_GET_FIELD(clk_entry->param1,
+				VBIOS_PSTATE_5X_CLOCK_PROG_PARAM1_MAX_FREQ_MHZ);
+
+		gk20a_dbg_info(
+			"clk_domain=%u nominal_mhz=%u min_mhz=%u max_mhz=%u",
+			pclksetinfo->clkwhich, pclksetinfo->nominal_mhz,
+			pclksetinfo->min_mhz, pclksetinfo->max_mhz);
+
+		p += hdr->clock_entry_size;
+	}
+
+	return 0;
+}
+
+static int parse_pstate_table_5x(struct gk20a *g,
+		struct vbios_pstate_header_5x *hdr)
+{
+	struct pstate _pstate, *pstate = &_pstate;
+	struct vbios_pstate_entry_5x *entry;
+	u32 entry_size;
+	u8 i;
+	u8 *p = (u8 *)hdr;
+	int err = 0;
+
+	if ((hdr->header_size != VBIOS_PSTATE_HEADER_5X_SIZE_10) ||
+		(hdr->base_entry_count == 0) ||
+		((hdr->base_entry_size != VBIOS_PSTATE_BASE_ENTRY_5X_SIZE_2) &&
+		 (hdr->base_entry_size != VBIOS_PSTATE_BASE_ENTRY_5X_SIZE_3)) ||
+		(hdr->clock_entry_size != VBIOS_PSTATE_CLOCK_ENTRY_5X_SIZE_6) ||
+		(hdr->clock_entry_count > CLK_SET_INFO_MAX_SIZE))
+		return -EINVAL;
+
+	p += hdr->header_size;
+
+	entry_size = hdr->base_entry_size +
+			hdr->clock_entry_count * hdr->clock_entry_size;
+
+	for (i = 0; i < hdr->base_entry_count; i++, p += entry_size) {
+		entry = (struct vbios_pstate_entry_5x *)p;
+
+		if (entry->pstate_level == VBIOS_PERFLEVEL_SKIP_ENTRY)
+			continue;
+
+		err = parse_pstate_entry_5x(g, hdr, entry, pstate);
+		if (err)
+			goto done;
+
+		pstate = pstate_construct(g, pstate);
+		if (!pstate)
+			goto done;
+
+		err = pstate_insert(g, pstate, i);
+		if (err)
+			goto done;
+	}
+
+done:
+	return err;
+}
+
+static int pstate_sw_setup(struct gk20a *g)
+{
+	struct vbios_pstate_header_5x *hdr = NULL;
+	int err = 0;
+
+	gk20a_dbg_fn("");
+
+	err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for pstates, err=%d",
+			  err);
+		goto done;
+	}
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		hdr = (struct vbios_pstate_header_5x *)
+				g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, PERFORMANCE_TABLE);
+	}
+
+	if (!hdr) {
+		gk20a_err(dev_from_gk20a(g),
+				"performance table not found");
+		err = -EINVAL;
+		goto done;
+	}
+
+	if (hdr->version != VBIOS_PSTATE_TABLE_VERSION_5X) {
+		gk20a_err(dev_from_gk20a(g),
+				"unknown/unsupported clocks table version=0x%02x",
+				hdr->version);
+		err = -EINVAL;
+		goto done;
+	}
+
+	err = parse_pstate_table_5x(g, hdr);
+done:
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index fb49adf3..11fa4c77 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -12,8 +12,40 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
+#ifndef __PSTATE_H__
+#define __PSTATE_H__
 
 #include "gk20a/gk20a.h"
+#include "clk/clk.h"
+
+#define CTRL_PERF_PSTATE_TYPE_3X	0x3
+
+#define CLK_SET_INFO_MAX_SIZE		(32)
+
+struct clk_set_info {
+	enum nv_pmu_clk_clkwhich clkwhich;
+	u32 nominal_mhz;
+	u32 min_mhz;
+	u32 max_mhz;
+};
+
+struct clk_set_info_list {
+	u32 clksetinfolistsize;
+	struct clk_set_info clksetinfo[CLK_SET_INFO_MAX_SIZE];
+};
+
+struct pstate {
+	struct boardobj super;
+	u32 num;
+	struct clk_set_info_list clklist;
+};
+
+struct pstates {
+	struct boardobjgrp_e32 super;
+	u32  num_levels;
+};
 
 int gk20a_init_pstate_support(struct gk20a *g);
 int gk20a_init_pstate_pmu_support(struct gk20a *g);
+
+#endif /* __PSTATE_H__ */
-- 
cgit v1.2.2


From 173bdefc92e2e4ef8f1e7e6ead7f86e746bee935 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Mon, 19 Sep 2016 11:07:46 +0530
Subject: gpu: nvgpu: add support for voltage config

- changes to read voltage tables from VBIOS
  & create boardobj then send to pmu
- Rail, Device & Policy objects are read from VBIOS & created boardobjs
- RPC support to load, Set & get voltage.

JIRA DNVGPU-122

Change-Id: I61621a514eef9c081a64c4ab066f01dfc28f8402
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1222774
(cherry picked from commit 9da86d8c2c547623cf5f38c89afeb3f5bb1667ac)
Reviewed-on: http://git-master/r/1244656
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |   7 +-
 drivers/gpu/nvgpu/include/bios.h      | 132 ++++++++
 drivers/gpu/nvgpu/perf/perf.h         |   2 +
 drivers/gpu/nvgpu/volt/volt.h         |  30 ++
 drivers/gpu/nvgpu/volt/volt_dev.c     | 582 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/volt/volt_dev.h     |  69 ++++
 drivers/gpu/nvgpu/volt/volt_pmu.c     | 243 ++++++++++++++
 drivers/gpu/nvgpu/volt/volt_pmu.h     |  22 ++
 drivers/gpu/nvgpu/volt/volt_policy.c  | 360 +++++++++++++++++++++
 drivers/gpu/nvgpu/volt/volt_policy.h  |  64 ++++
 drivers/gpu/nvgpu/volt/volt_rail.c    | 438 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/volt/volt_rail.h    |  77 +++++
 12 files changed, 2025 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/volt/volt.h
 create mode 100644 drivers/gpu/nvgpu/volt/volt_dev.c
 create mode 100644 drivers/gpu/nvgpu/volt/volt_dev.h
 create mode 100644 drivers/gpu/nvgpu/volt/volt_pmu.c
 create mode 100644 drivers/gpu/nvgpu/volt/volt_pmu.h
 create mode 100644 drivers/gpu/nvgpu/volt/volt_policy.c
 create mode 100644 drivers/gpu/nvgpu/volt/volt_policy.h
 create mode 100644 drivers/gpu/nvgpu/volt/volt_rail.c
 create mode 100644 drivers/gpu/nvgpu/volt/volt_rail.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index d5162332..ea770e43 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -43,13 +43,18 @@ nvgpu-y += \
 	$(nvgpu-t18x)/clk/clk.o \
 	$(nvgpu-t18x)/gp106/clk_gp106.o \
 	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o \
+	$(nvgpu-t18x)/gp106/xve_gp106.o \
 	$(nvgpu-t18x)/gp106/therm_gp106.o \
 	$(nvgpu-t18x)/gp106/xve_gp106.o \
 	$(nvgpu-t18x)/pmgr/pwrdev.o \
 	$(nvgpu-t18x)/pmgr/pmgr.o \
 	$(nvgpu-t18x)/pmgr/pmgrpmu.o \
 	$(nvgpu-t18x)/pmgr/pwrmonitor.o \
-	$(nvgpu-t18x)/pmgr/pwrpolicy.o
+	$(nvgpu-t18x)/pmgr/pwrpolicy.o \
+	$(nvgpu-t18x)/volt/volt_rail.o \
+	$(nvgpu-t18x)/volt/volt_dev.o \
+	$(nvgpu-t18x)/volt/volt_policy.o \
+	$(nvgpu-t18x)/volt/volt_pmu.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index d3a677f8..fb1e1f46 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -656,4 +656,136 @@ struct pwr_policy_3x_entry_struct {
 #define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL_MASK 0xFFFF
 #define NV_VBIOS_POWER_POLICY_3X_ENTRY_PARAM1_HW_THRESHOLD_LOW_THRESHOLD_VAL_SHIFT 0
 
+/* Voltage Rail Table */
+struct vbios_voltage_rail_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u8 volt_domain_hal;
+} __packed;
+
+#define NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_07		0X00000007
+#define NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_08		0X00000008
+#define NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_09		0X00000009
+#define NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0A		0X0000000A
+#define NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0B		0X0000000B
+
+struct vbios_voltage_rail_table_1x_entry {
+	u32 boot_voltage_uv;
+	u8 rel_limit_vfe_equ_idx;
+	u8 alt_rel_limit_vfe_equidx;
+	u8 ov_limit_vfe_equ_idx;
+	u8 pwr_equ_idx;
+	u8 boot_volt_vfe_equ_idx;
+	u8 vmin_limit_vfe_equ_idx;
+	u8 volt_margin_limit_vfe_equ_idx;
+} __packed;
+
+/* Voltage Device Table */
+struct vbios_voltage_device_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+};
+
+struct vbios_voltage_device_table_1x_entry {
+	u8 type;
+	u8 volt_domain;
+	u16 settle_time_us;
+	u32 param0;
+	u32 param1;
+	u32 param2;
+	u32 param3;
+	u32 param4;
+};
+
+#define NV_VBIOS_VOLTAGE_DEVICE_1X_ENTRY_TYPE_INVALID	0x00
+#define NV_VBIOS_VOLTAGE_DEVICE_1X_ENTRY_TYPE_PSV		0x02
+
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_INPUT_FREQUENCY_MASK	\
+		GENMASK(23, 0)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_INPUT_FREQUENCY_SHIFT	0
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_EXT_DEVICE_INDEX_MASK	\
+	GENMASK(31, 24)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_EXT_DEVICE_INDEX_SHIFT	24
+
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_VOLTAGE_MINIMUM_MASK	\
+		GENMASK(23, 0)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_VOLTAGE_MINIMUM_SHIFT	0
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_MASK	\
+	GENMASK(31, 24)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_SHIFT		24
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_DEFAULT	0x00
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_LPWR_STEADY_STATE \
+		0x01
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_LPWR_SLEEP_STATE \
+		0x02
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_VOLTAGE_MAXIMUM_MASK	\
+		GENMASK(23, 0)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_VOLTAGE_MAXIMUM_SHIFT	0
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_RSVD_MASK		\
+		GENMASK(31, 24)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_RSVD_SHIFT		24
+
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_BASE_MASK	\
+		GENMASK(23, 0)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_BASE_SHIFT	0
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_STEPS_MASK	\
+		GENMASK(31, 24)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_STEPS_SHIFT	24
+
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_OFFSET_SCALE_MASK \
+		GENMASK(23, 0)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_OFFSET_SCALE_SHIFT	0
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_RSVD_MASK		\
+		GENMASK(31, 24)
+#define NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_RSVD_SHIFT	24
+
+/* Voltage Policy Table */
+struct vbios_voltage_policy_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u8 perf_core_vf_seq_policy_idx;
+};
+
+struct vbios_voltage_policy_table_1x_entry {
+	u8 type;
+	u32 param0;
+	u32 param1;
+};
+
+#define NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_INVALID		0x00
+#define NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SINGLE_RAIL	0x01
+#define NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SR_MULTI_STEP	0x02
+#define NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SR_SINGLE_STEP	0x03
+
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SINGLE_RAIL_VOLT_DOMAIN_MASK \
+		GENMASK(7, 0)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SINGLE_RAIL_VOLT_DOMAIN_SHIFT	0
+#define NV_VBIOS_VPT_ENTRY_PARAM0_RSVD_MASK	GENMASK(8, 31)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_RSVD_SHIFT	8
+
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_MASTER_MASK \
+		GENMASK(7, 0)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_MASTER_SHIFT 0
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_SLAVE_MASK \
+		GENMASK(15, 8)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_SLAVE_SHIFT 8
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MIN_MASK \
+		GENMASK(23, 16)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MIN_SHIFT 16
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MAX_MASK \
+		GENMASK(31, 24)
+#define NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MAX_SHIFT 24
+
+/* Type-Specific Parameter DWORD 0 - Type = _SR_MULTI_STEP */
+#define NV_VBIOS_VPT_ENTRY_PARAM1_SR_SETTLE_TIME_INTERMEDIATE_MASK \
+		GENMASK(15, 0)
+#define NV_VBIOS_VPT_ENTRY_PARAM1_SR_SETTLE_TIME_INTERMEDIATE_SHIFT \
+		0
+
 #endif
diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h
index 3ffaf4e1..c03bf2ae 100644
--- a/drivers/gpu/nvgpu/perf/perf.h
+++ b/drivers/gpu/nvgpu/perf/perf.h
@@ -17,6 +17,7 @@
 #include "vfe_var.h"
 #include "pstate/pstate.h"
 #include "gk20a/gk20a.h"
+#include "volt/volt.h"
 
 #define CTRL_PERF_VFE_VAR_TYPE_INVALID                               0x00
 #define CTRL_PERF_VFE_VAR_TYPE_DERIVED                               0x01
@@ -55,6 +56,7 @@ struct perf_pmupstate {
 	struct vfe_vars vfe_varobjs;
 	struct vfe_equs vfe_equobjs;
 	struct pstates pstatesobjs;
+	struct obj_volt volt;
 };
 
 u32 perf_pmu_vfe_load(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/volt/volt.h b/drivers/gpu/nvgpu/volt/volt.h
new file mode 100644
index 00000000..0d64c265
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt.h
@@ -0,0 +1,30 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _VOLT_H_
+#define _VOLT_H_
+
+#include "volt_rail.h"
+#include "volt_dev.h"
+#include "volt_policy.h"
+#include "volt_pmu.h"
+
+#define VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID	0xFF
+
+struct obj_volt {
+	struct voltage_rail_metadata volt_rail_metadata;
+	struct voltage_device_metadata volt_dev_metadata;
+	struct voltage_policy_metadata volt_policy_metadata;
+};
+
+#endif /* DRIVERS_GPU_NVGPU_VOLT_VOLT_H_ */
diff --git a/drivers/gpu/nvgpu/volt/volt_dev.c b/drivers/gpu/nvgpu/volt/volt_dev.c
new file mode 100644
index 00000000..89040658
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_dev.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sort.h>
+
+#include "gk20a/gk20a.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "pmuif/gpmuifperfvfe.h"
+#include "include/bios.h"
+#include "volt.h"
+
+#define RAW_PERIOD	160
+#define VOLT_DEV_PWM_VOLTAGE_STEPS_INVALID	0
+#define VOLT_DEV_PWM_VOLTAGE_STEPS_DEFAULT	1
+
+u32 volt_device_pmu_data_init_super(struct gk20a *g,
+	struct boardobj *pboard_obj, struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status;
+	struct voltage_device *pdev;
+	struct nv_pmu_volt_volt_device_boardobj_set *pset;
+
+	status = boardobj_pmudatainit_super(g, pboard_obj, ppmudata);
+	if (status)
+		return status;
+
+	pdev = (struct voltage_device *)pboard_obj;
+	pset = (struct nv_pmu_volt_volt_device_boardobj_set *)ppmudata;
+
+	pset->switch_delay_us = pdev->switch_delay_us;
+	pset->voltage_min_uv = pdev->voltage_min_uv;
+	pset->voltage_max_uv = pdev->voltage_max_uv;
+	pset->volt_step_uv = pdev->volt_step_uv;
+
+	return status;
+}
+
+static u32 volt_device_pmu_data_init_pwm(struct gk20a *g,
+		struct boardobj *pboard_obj, struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct voltage_device_pwm *pdev;
+	struct nv_pmu_volt_volt_device_pwm_boardobj_set *pset;
+
+	status = volt_device_pmu_data_init_super(g, pboard_obj, ppmudata);
+	if (status)
+		return  status;
+
+	pdev = (struct voltage_device_pwm *)pboard_obj;
+	pset = (struct nv_pmu_volt_volt_device_pwm_boardobj_set *)ppmudata;
+
+	pset->raw_period = pdev->raw_period;
+	pset->voltage_base_uv = pdev->voltage_base_uv;
+	pset->voltage_offset_scale_uv = pdev->voltage_offset_scale_uv;
+	pset->pwm_source = pdev->source;
+
+	return status;
+}
+
+u32 construct_volt_device(struct gk20a *g,
+	struct boardobj **ppboardobj, u16 size, void *pargs)
+{
+	struct voltage_device *ptmp_dev = (struct voltage_device *)pargs;
+	struct voltage_device *pvolt_dev = NULL;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return status;
+
+	pvolt_dev = (struct voltage_device *)*ppboardobj;
+
+	pvolt_dev->volt_domain = ptmp_dev->volt_domain;
+	pvolt_dev->i2c_dev_idx = ptmp_dev->i2c_dev_idx;
+	pvolt_dev->switch_delay_us = ptmp_dev->switch_delay_us;
+	pvolt_dev->rsvd_0 = VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID;
+	pvolt_dev->rsvd_1 =
+			VOLTAGE_DESCRIPTOR_TABLE_ENTRY_INVALID;
+	pvolt_dev->operation_type = ptmp_dev->operation_type;
+	pvolt_dev->voltage_min_uv = ptmp_dev->voltage_min_uv;
+	pvolt_dev->voltage_max_uv = ptmp_dev->voltage_max_uv;
+
+	pvolt_dev->super.pmudatainit = volt_device_pmu_data_init_super;
+
+	return status;
+}
+
+u32 construct_pwm_volt_device(struct gk20a *g, struct boardobj **ppboardobj,
+		u16 size, void *pargs)
+{
+	struct boardobj *pboard_obj = NULL;
+	struct voltage_device_pwm *ptmp_dev =
+			(struct voltage_device_pwm *)pargs;
+	struct voltage_device_pwm *pdev = NULL;
+	u32 status = 0;
+
+	status = construct_volt_device(g, ppboardobj, size, pargs);
+	if (status)
+		return status;
+
+	pboard_obj = (*ppboardobj);
+	pdev  = (struct voltage_device_pwm *)*ppboardobj;
+
+	pboard_obj->pmudatainit  = volt_device_pmu_data_init_pwm;
+
+	/* Set VOLTAGE_DEVICE_PWM-specific parameters */
+	pdev->voltage_base_uv = ptmp_dev->voltage_base_uv;
+	pdev->voltage_offset_scale_uv = ptmp_dev->voltage_offset_scale_uv;
+	pdev->source = ptmp_dev->source;
+	pdev->raw_period = ptmp_dev->raw_period;
+
+	return status;
+}
+
+
+struct voltage_device_entry *volt_dev_construct_dev_entry_pwm(struct gk20a *g,
+		u32 voltage_uv, void *pargs)
+{
+	struct voltage_device_pwm_entry *pentry = NULL;
+	struct voltage_device_pwm_entry *ptmp_entry =
+			(struct voltage_device_pwm_entry *)pargs;
+
+	pentry = kzalloc(sizeof(struct voltage_device_pwm_entry), GFP_KERNEL);
+	if (pentry == NULL)
+		return NULL;
+
+	memset(pentry, 0, sizeof(struct voltage_device_pwm_entry));
+
+	pentry->super.voltage_uv = voltage_uv;
+	pentry->duty_cycle = ptmp_entry->duty_cycle;
+
+	return (struct voltage_device_entry *)pentry;
+}
+
+static u8 volt_dev_operation_type_convert(u8 vbios_type)
+{
+	switch (vbios_type) {
+	case NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_DEFAULT:
+		return CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT;
+
+	case NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_LPWR_STEADY_STATE:
+		return CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_STEADY_STATE;
+
+	case NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE_LPWR_SLEEP_STATE:
+		return CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_SLEEP_STATE;
+	}
+
+	return CTRL_VOLT_DEVICE_OPERATION_TYPE_INVALID;
+}
+
+struct voltage_device *volt_volt_device_construct(struct gk20a *g,
+		void *pargs)
+{
+	struct boardobj *pboard_obj = NULL;
+
+	if (BOARDOBJ_GET_TYPE(pargs) == CTRL_VOLT_DEVICE_TYPE_PWM) {
+		u32 status = construct_pwm_volt_device(g, &pboard_obj,
+				sizeof(struct voltage_device_pwm), pargs);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				" Could not allocate memory for VOLTAGE_DEVICE type (%x).",
+				BOARDOBJ_GET_TYPE(pargs));
+			pboard_obj = NULL;
+		}
+	}
+
+	return (struct voltage_device *)pboard_obj;
+}
+
+static u32 volt_get_voltage_device_table_1x_psv(struct gk20a *g,
+		struct vbios_voltage_device_table_1x_entry *p_bios_entry,
+		struct voltage_device_metadata *p_Volt_Device_Meta_Data,
+		u8 entry_Idx)
+{
+	u32 status = 0;
+	u32 entry_cnt = 0;
+	struct voltage_device *pvolt_dev = NULL;
+	struct voltage_device_pwm *pvolt_dev_pwm = NULL;
+	struct voltage_device_pwm *ptmp_dev = NULL;
+	u32 duty_cycle;
+	u32 frequency_hz;
+	u32 voltage_uv;
+	u8 ext_dev_idx;
+	u8 steps;
+	u8 volt_domain = 0;
+	struct voltage_device_pwm_entry pwm_entry = { { 0 } };
+
+	ptmp_dev = kzalloc(sizeof(struct voltage_device_pwm), GFP_KERNEL);
+	if (ptmp_dev == NULL)
+		return -ENOMEM;
+
+	frequency_hz = (u32)BIOS_GET_FIELD(p_bios_entry->param0,
+		NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_INPUT_FREQUENCY);
+
+	ext_dev_idx = (u8)BIOS_GET_FIELD(p_bios_entry->param0,
+		NV_VBIOS_VDT_1X_ENTRY_PARAM0_PSV_EXT_DEVICE_INDEX);
+
+	ptmp_dev->super.operation_type = volt_dev_operation_type_convert(
+			(u8)BIOS_GET_FIELD(p_bios_entry->param1,
+			NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_OPERATION_TYPE));
+
+	if (ptmp_dev->super.operation_type ==
+			CTRL_VOLT_DEVICE_OPERATION_TYPE_INVALID) {
+		gk20a_err(dev_from_gk20a(g),
+			" Invalid Voltage Device Operation Type.");
+
+		status = -EINVAL;
+		goto done;
+	}
+
+	ptmp_dev->super.voltage_min_uv =
+		(u32)BIOS_GET_FIELD(p_bios_entry->param1,
+			NV_VBIOS_VDT_1X_ENTRY_PARAM1_PSV_VOLTAGE_MINIMUM);
+
+	ptmp_dev->super.voltage_max_uv =
+		(u32)BIOS_GET_FIELD(p_bios_entry->param2,
+			NV_VBIOS_VDT_1X_ENTRY_PARAM2_PSV_VOLTAGE_MAXIMUM);
+
+	ptmp_dev->voltage_base_uv = BIOS_GET_FIELD(p_bios_entry->param3,
+		NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_BASE);
+
+	steps = (u8)BIOS_GET_FIELD(p_bios_entry->param3,
+		NV_VBIOS_VDT_1X_ENTRY_PARAM3_PSV_VOLTAGE_STEPS);
+	if (steps == VOLT_DEV_PWM_VOLTAGE_STEPS_INVALID)
+		steps = VOLT_DEV_PWM_VOLTAGE_STEPS_DEFAULT;
+
+	ptmp_dev->voltage_offset_scale_uv =
+			BIOS_GET_FIELD(p_bios_entry->param4,
+				NV_VBIOS_VDT_1X_ENTRY_PARAM4_PSV_OFFSET_SCALE);
+
+	volt_domain = volt_rail_vbios_volt_domain_convert_to_internal(g,
+		(u8)p_bios_entry->volt_domain);
+	if (volt_domain == CTRL_VOLT_DOMAIN_INVALID) {
+		gk20a_err(dev_from_gk20a(g),
+			"invalid voltage domain = %d",
+			(u8)p_bios_entry->volt_domain);
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (ptmp_dev->super.operation_type ==
+			CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT) {
+		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_1;
+	} else if (ptmp_dev->super.operation_type ==
+		CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_STEADY_STATE) {
+		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_RSVD_0;
+	} else if (ptmp_dev->super.operation_type ==
+		CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_SLEEP_STATE) {
+		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_RSVD_1;
+	}
+
+	ptmp_dev->raw_period = RAW_PERIOD;
+
+	/* Initialize data for parent class. */
+	ptmp_dev->super.super.type = CTRL_VOLT_DEVICE_TYPE_PWM;
+	ptmp_dev->super.volt_domain = volt_domain;
+	ptmp_dev->super.i2c_dev_idx = ext_dev_idx;
+	ptmp_dev->super.switch_delay_us = (u16)p_bios_entry->settle_time_us;
+
+	pvolt_dev = volt_volt_device_construct(g, ptmp_dev);
+	if (pvolt_dev == NULL) {
+		gk20a_err(dev_from_gk20a(g),
+			" Failure to construct VOLTAGE_DEVICE object.");
+
+		status = -EINVAL;
+		goto done;
+	}
+
+	status = boardobjgrp_objinsert(
+				&p_Volt_Device_Meta_Data->volt_devices.super,
+				(struct boardobj *)pvolt_dev, entry_Idx);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"could not add VOLTAGE_DEVICE for entry %d into boardobjgrp ",
+			entry_Idx);
+		goto done;
+	}
+
+	pvolt_dev_pwm = (struct voltage_device_pwm *)pvolt_dev;
+
+	duty_cycle = 0;
+	do {
+		voltage_uv = (u32)(pvolt_dev_pwm->voltage_base_uv +
+			(s32)((((s64)((s32)duty_cycle)) *
+			pvolt_dev_pwm->voltage_offset_scale_uv)
+			/ ((s64)((s32) pvolt_dev_pwm->raw_period))));
+
+		/* Skip creating entry for invalid voltage. */
+		if ((voltage_uv >= pvolt_dev_pwm->super.voltage_min_uv) &&
+			(voltage_uv <= pvolt_dev_pwm->super.voltage_max_uv)) {
+			if (pvolt_dev_pwm->voltage_offset_scale_uv < 0)
+				pwm_entry.duty_cycle =
+					pvolt_dev_pwm->raw_period - duty_cycle;
+			else
+				pwm_entry.duty_cycle = duty_cycle;
+
+			/* Check if there is room left in the voltage table. */
+			if (entry_cnt == VOLTAGE_TABLE_MAX_ENTRIES) {
+				gk20a_err(dev_from_gk20a(g), "Voltage table is full");
+				status = -EINVAL;
+				goto done;
+			}
+
+			pvolt_dev->pentry[entry_cnt] =
+				volt_dev_construct_dev_entry_pwm(g,
+					voltage_uv, &pwm_entry);
+			if (pvolt_dev->pentry[entry_cnt] == NULL) {
+				gk20a_err(dev_from_gk20a(g),
+					" Error creating voltage_device_pwm_entry!");
+				status = -EINVAL;
+				goto done;
+			}
+
+			entry_cnt++;
+		}
+
+		/* Obtain next value after the specified steps. */
+		duty_cycle = duty_cycle + (u32)steps;
+
+		/* Cap duty cycle to PWM period. */
+		if (duty_cycle > pvolt_dev_pwm->raw_period)
+			duty_cycle = pvolt_dev_pwm->raw_period;
+
+	} while (duty_cycle < pvolt_dev_pwm->raw_period);
+
+done:
+	if (pvolt_dev != NULL)
+		pvolt_dev->num_entries = entry_cnt;
+
+	kfree(ptmp_dev);
+	return status;
+}
+
+static u32 volt_get_volt_devices_table(struct gk20a *g,
+		struct voltage_device_metadata *pvolt_device_metadata)
+{
+	u32 status = 0;
+	u8 *volt_device_table_ptr = NULL;
+	struct vbios_voltage_device_table_1x_header header = { 0 };
+	struct vbios_voltage_device_table_1x_entry entry  = { 0 };
+	u8 entry_idx;
+	u8 *entry_offset;
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		volt_device_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, VOLTAGE_DEVICE_TABLE);
+		if (volt_device_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	} else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	memcpy(&header, volt_device_table_ptr,
+			sizeof(struct vbios_voltage_device_table_1x_header));
+
+	/* Read in the entries. */
+	for (entry_idx = 0; entry_idx < header.num_table_entries; entry_idx++) {
+		entry_offset = (volt_device_table_ptr + header.header_size +
+					(entry_idx * header.table_entry_size));
+
+		memcpy(&entry, entry_offset,
+			sizeof(struct vbios_voltage_device_table_1x_entry));
+
+		if (entry.type == NV_VBIOS_VOLTAGE_DEVICE_1X_ENTRY_TYPE_PSV)
+			status = volt_get_voltage_device_table_1x_psv(g,
+					&entry, pvolt_device_metadata,
+					entry_idx);
+	}
+
+done:
+	return status;
+}
+
+static u32 _volt_device_devgrp_pmudata_instget(struct gk20a *g,
+	struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+	struct nv_pmu_boardobj **ppboardobjpmudata, u8 idx)
+{
+	struct nv_pmu_volt_volt_device_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_volt_volt_device_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info("Done");
+	return 0;
+}
+
+static u32 _volt_device_devgrp_pmustatus_instget(struct gk20a *g,
+	void *pboardobjgrppmu,
+	struct nv_pmu_boardobj_query **ppboardobjpmustatus, u8 idx)
+{
+	struct nv_pmu_volt_volt_device_boardobj_grp_get_status *pgrp_get_status
+		= (struct nv_pmu_volt_volt_device_boardobj_grp_get_status *)
+			pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+static int volt_device_volt_cmp(const void *a, const void *b)
+{
+	const struct voltage_device_entry *a_entry = *(const struct voltage_device_entry **)a;
+	const struct voltage_device_entry *b_entry = *(const struct voltage_device_entry **)b;
+
+	return (int)a_entry->voltage_uv - (int)b_entry->voltage_uv;
+}
+
+u32 volt_device_state_init(struct gk20a *g, struct voltage_device *pvolt_dev)
+{
+	u32 status = 0;
+	struct voltage_rail *pRail = NULL;
+	u8 rail_idx = 0;
+
+	sort(pvolt_dev->pentry, pvolt_dev->num_entries,
+	     sizeof(*pvolt_dev->pentry), volt_device_volt_cmp,
+	     NULL);
+
+	/* Initialize VOLT_DEVICE step size. */
+	if (pvolt_dev->num_entries <= VOLTAGE_TABLE_MAX_ENTRIES_ONE)
+		pvolt_dev->volt_step_uv = NV_PMU_VOLT_VALUE_0V_IN_UV;
+	else
+		pvolt_dev->volt_step_uv = (pvolt_dev->pentry[1]->voltage_uv -
+				pvolt_dev->pentry[0]->voltage_uv);
+
+	/* Build VOLT_RAIL SW state from VOLT_DEVICE SW state. */
+	/* If VOLT_RAIL isn't supported, exit. */
+	if (VOLT_RAIL_VOLT_3X_SUPPORTED(&g->perf_pmu.volt)) {
+		rail_idx = volt_rail_volt_domain_convert_to_idx(g,
+				pvolt_dev->volt_domain);
+		if (rail_idx == CTRL_BOARDOBJ_IDX_INVALID) {
+			gk20a_err(dev_from_gk20a(g),
+				" could not convert voltage domain to rail index.");
+			status = -EINVAL;
+			goto done;
+		}
+
+		pRail = VOLT_GET_VOLT_RAIL(&g->perf_pmu.volt, rail_idx);
+		if (pRail == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				"could not obtain ptr to rail object from rail index");
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = volt_rail_volt_dev_register(g, pRail,
+			BOARDOBJ_GET_IDX(pvolt_dev), pvolt_dev->operation_type);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"Failed to register the device with rail obj");
+			goto done;
+		}
+	}
+
+done:
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"Error in building rail sw state device sw");
+
+	return status;
+}
+
+u32 volt_dev_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->perf_pmu.volt.volt_dev_metadata.volt_devices.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+u32 volt_dev_sw_setup(struct gk20a *g)
+{
+	u32 status = 0;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct voltage_device *pvolt_device;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->perf_pmu.volt.volt_dev_metadata.
+			volt_devices);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for volt rail, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->perf_pmu.volt.volt_dev_metadata.volt_devices.super;
+
+	pboardobjgrp->pmudatainstget  = _volt_device_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _volt_device_devgrp_pmustatus_instget;
+
+	/* Obtain Voltage Rail Table from VBIOS */
+	status = volt_get_volt_devices_table(g, &g->perf_pmu.volt.
+			volt_dev_metadata);
+	if (status)
+		goto done;
+
+	/* Populate data for the VOLT_RAIL PMU interface */
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_DEVICE);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			volt, VOLT, volt_device, VOLT_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+			&g->perf_pmu.volt.volt_dev_metadata.volt_devices.super,
+			volt, VOLT, volt_device, VOLT_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	/* update calibration to fuse */
+	BOARDOBJGRP_FOR_EACH(&(g->perf_pmu.volt.volt_dev_metadata.volt_devices.
+			       super),
+			     struct voltage_device *, pvolt_device, i) {
+		status = volt_device_state_init(g, pvolt_device);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"failure while executing devices's state init interface");
+			gk20a_err(dev_from_gk20a(g),
+				" railIdx = %d, status = 0x%x", i, status);
+			goto done;
+		}
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
diff --git a/drivers/gpu/nvgpu/volt/volt_dev.h b/drivers/gpu/nvgpu/volt/volt_dev.h
new file mode 100644
index 00000000..5113567d
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_dev.h
@@ -0,0 +1,69 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+
+#ifndef _VOLTDEV_H_
+#define _VOLTDEV_H_
+
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+#include "ctrl/ctrlvolt.h"
+
+#define VOLTAGE_TABLE_MAX_ENTRIES_ONE	1
+#define VOLTAGE_TABLE_MAX_ENTRIES	256
+
+struct voltage_device {
+	struct boardobj super;
+	u8 volt_domain;
+	u8 i2c_dev_idx;
+	u32 switch_delay_us;
+	u32 num_entries;
+	struct voltage_device_entry *pentry[VOLTAGE_TABLE_MAX_ENTRIES];
+	struct voltage_device_entry *pcurr_entry;
+	u8 rsvd_0;
+	u8 rsvd_1;
+	u8 operation_type;
+	u32 voltage_min_uv;
+	u32 voltage_max_uv;
+	u32 volt_step_uv;
+};
+
+struct voltage_device_entry {
+	u32  voltage_uv;
+};
+
+struct voltage_device_metadata {
+	struct boardobjgrp_e32 volt_devices;
+};
+
+/*!
+ * Extends VOLTAGE_DEVICE providing attributes specific to PWM controllers.
+ */
+struct voltage_device_pwm {
+	struct voltage_device super;
+	s32 voltage_base_uv;
+	s32 voltage_offset_scale_uv;
+	enum nv_pmu_pmgr_pwm_source source;
+	u32 raw_period;
+};
+
+struct voltage_device_pwm_entry {
+	struct voltage_device_entry  super;
+	u32 duty_cycle;
+};
+/* PWM end */
+
+u32 volt_dev_sw_setup(struct gk20a *g);
+u32 volt_dev_pmu_setup(struct gk20a *g);
+
+#endif /* _VOLTDEV_H_ */
diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.c b/drivers/gpu/nvgpu/volt/volt_pmu.c
new file mode 100644
index 00000000..4d451b65
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlvolt.h"
+#include "ctrl/ctrlperf.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "pmuif/gpmuifperfvfe.h"
+#include "pmuif/gpmuifvolt.h"
+#include "include/bios.h"
+#include "volt.h"
+
+#define RAIL_COUNT 2
+
+struct volt_rpc_pmucmdhandler_params {
+	struct nv_pmu_volt_rpc *prpc_call;
+	u32 success;
+};
+
+static void volt_rpc_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
+				  void *param, u32 handle, u32 status)
+{
+	struct volt_rpc_pmucmdhandler_params *phandlerparams =
+		(struct volt_rpc_pmucmdhandler_params *)param;
+
+	gk20a_dbg_info("");
+
+	if (msg->msg.volt.msg_type != NV_PMU_VOLT_MSG_ID_RPC) {
+		gk20a_err(dev_from_gk20a(g), "unsupported msg for VOLT RPC %x",
+			msg->msg.volt.msg_type);
+		return;
+	}
+
+	if (phandlerparams->prpc_call->b_supported)
+		phandlerparams->success = 1;
+}
+
+
+static u32 volt_pmu_rpc_execute(struct gk20a *g,
+	struct nv_pmu_volt_rpc *prpc_call)
+{
+	struct pmu_cmd cmd = { { 0 } };
+	struct pmu_msg msg = { { 0 } };
+	struct pmu_payload payload = { { 0 } };
+	u32 status = 0;
+	u32 seqdesc;
+	struct volt_rpc_pmucmdhandler_params handler = {0};
+
+	cmd.hdr.unit_id = PMU_UNIT_VOLT;
+	cmd.hdr.size = (u32)sizeof(struct nv_pmu_volt_cmd) +
+					(u32)sizeof(struct pmu_hdr);
+	cmd.cmd.volt.cmd_type = NV_PMU_VOLT_CMD_ID_RPC;
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)prpc_call;
+	payload.in.size = (u32)sizeof(struct nv_pmu_volt_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_VOLT_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)prpc_call;
+	payload.out.size = (u32)sizeof(struct nv_pmu_volt_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_VOLT_MSG_RPC_ALLOC_OFFSET;
+
+	handler.prpc_call = prpc_call;
+	handler.success = 0;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			volt_rpc_pmucmdhandler, (void *)&handler,
+			&seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g), "unable to post volt RPC cmd %x",
+			cmd.cmd.volt.cmd_type);
+		goto volt_pmu_rpc_execute;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handler.success, 1);
+
+	if (handler.success == 0) {
+		status = -EINVAL;
+		gk20a_err(dev_from_gk20a(g), "rpc call to volt failed");
+	}
+
+volt_pmu_rpc_execute:
+	return status;
+}
+
+u32 volt_pmu_send_load_cmd_to_pmu(struct gk20a *g)
+{
+	struct nv_pmu_volt_rpc rpc_call = { 0 };
+	u32 status = 0;
+
+	rpc_call.function = NV_PMU_VOLT_RPC_ID_LOAD;
+
+	status =  volt_pmu_rpc_execute(g, &rpc_call);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"Error while executing LOAD RPC: status = 0x%08x.",
+			status);
+
+	return status;
+}
+
+static u32 volt_rail_get_voltage(struct gk20a *g,
+	u8 volt_domain, u32 *pvoltage_uv)
+{
+	struct nv_pmu_volt_rpc rpc_call = { 0 };
+	u32 status  = 0;
+	u8 rail_idx;
+
+	rail_idx = volt_rail_volt_domain_convert_to_idx(g, volt_domain);
+	if ((rail_idx == CTRL_VOLT_RAIL_INDEX_INVALID) ||
+		(!VOLT_RAIL_INDEX_IS_VALID(&g->perf_pmu.volt, rail_idx))) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed: volt_domain = %d, voltage rail table = %d.",
+			volt_domain, rail_idx);
+		return -EINVAL;
+	}
+
+	/* Set RPC parameters. */
+	rpc_call.function = NV_PMU_VOLT_RPC_ID_VOLT_RAIL_GET_VOLTAGE;
+	rpc_call.params.volt_rail_get_voltage.rail_idx = rail_idx;
+
+	/* Execute the voltage get request via PMU RPC. */
+	status = volt_pmu_rpc_execute(g, &rpc_call);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"Error while executing volt_rail_get_voltage rpc");
+		return status;
+	}
+
+	/* Copy out the current voltage. */
+	*pvoltage_uv = rpc_call.params.volt_rail_get_voltage.voltage_uv;
+
+	return status;
+}
+
+
+static u32 volt_policy_set_voltage(struct gk20a *g, u8 client_id,
+		struct ctrl_perf_volt_rail_list *prail_list)
+{
+	struct nv_pmu_volt_rpc rpc_call = { 0 };
+	struct obj_volt *pvolt = &g->perf_pmu.volt;
+	u32 status = 0;
+	u8 policy_idx = CTRL_VOLT_POLICY_INDEX_INVALID;
+	u8 i = 0;
+
+	/* Sanity check input rail list. */
+	for (i = 0; i < prail_list->num_rails; i++) {
+		if ((prail_list->rails[i].volt_domain ==
+				CTRL_VOLT_DOMAIN_INVALID) ||
+			(prail_list->rails[i].voltage_uv ==
+				NV_PMU_VOLT_VALUE_0V_IN_UV)) {
+			gk20a_err(dev_from_gk20a(g), "Invalid voltage domain or target ");
+			gk20a_err(dev_from_gk20a(g), " client_id = %d, listEntry = %d ",
+					client_id, i);
+			gk20a_err(dev_from_gk20a(g),
+				"volt_domain = %d, voltage_uv = %d uV.",
+				prail_list->rails[i].volt_domain,
+				prail_list->rails[i].voltage_uv);
+			status = -EINVAL;
+			goto exit;
+		}
+	}
+
+	/* Convert the client ID to index. */
+	if (client_id == CTRL_VOLT_POLICY_CLIENT_PERF_CORE_VF_SEQ)
+		policy_idx =
+			pvolt->volt_policy_metadata.perf_core_vf_seq_policy_idx;
+	else {
+		status = -EINVAL;
+		goto exit;
+	}
+
+	/* Set RPC parameters. */
+	rpc_call.function = NV_PMU_VOLT_RPC_ID_VOLT_POLICY_SET_VOLTAGE;
+	rpc_call.params.volt_policy_voltage_data.policy_idx = policy_idx;
+	memcpy(&rpc_call.params.volt_policy_voltage_data.rail_list, prail_list,
+		(sizeof(struct ctrl_perf_volt_rail_list)));
+
+	/* Execute the voltage change request via PMU RPC. */
+	status = volt_pmu_rpc_execute(g, &rpc_call);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"Error while executing VOLT_POLICY_SET_VOLTAGE RPC");
+
+exit:
+	return status;
+}
+
+u32 volt_set_voltage(struct gk20a *g, u32 logic_voltage_uv, u32 sram_voltage_uv)
+{
+	u32 status = 0;
+	struct ctrl_perf_volt_rail_list rail_list = { 0 };
+
+	rail_list.num_rails = RAIL_COUNT;
+	rail_list.rails[0].volt_domain = CTRL_VOLT_DOMAIN_LOGIC;
+	rail_list.rails[0].voltage_uv = logic_voltage_uv;
+	rail_list.rails[0].voltage_min_noise_unaware_uv = logic_voltage_uv;
+	rail_list.rails[1].volt_domain = CTRL_VOLT_DOMAIN_SRAM;
+	rail_list.rails[1].voltage_uv = sram_voltage_uv;
+	rail_list.rails[1].voltage_min_noise_unaware_uv = sram_voltage_uv;
+
+	status = volt_policy_set_voltage(g,
+		CTRL_VOLT_POLICY_CLIENT_PERF_CORE_VF_SEQ, &rail_list);
+
+	return status;
+
+}
+
+u32 volt_get_voltage(struct gk20a *g, u32 volt_domain)
+{
+	u32 status = 0;
+	u32 voltage_uv = 0;
+
+	status = volt_rail_get_voltage(g, volt_domain, &voltage_uv);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"CTRL_VOLT_DOMAIN_LOGIC get voltage failed");
+		return 0;
+	}
+
+	return voltage_uv;
+}
diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.h b/drivers/gpu/nvgpu/volt/volt_pmu.h
new file mode 100644
index 00000000..c98ba321
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.h
@@ -0,0 +1,22 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _VOLT_PMU_H_
+#define _VOLT_PMU_H_
+
+u32 volt_pmu_send_load_cmd_to_pmu(struct gk20a *g);
+u32 volt_set_voltage(struct gk20a *g, u32 logic_voltage_uv,
+		u32 sram_voltage_uv);
+u32 volt_get_voltage(struct gk20a *g, u32 volt_domain);
+
+#endif
diff --git a/drivers/gpu/nvgpu/volt/volt_policy.c b/drivers/gpu/nvgpu/volt/volt_policy.c
new file mode 100644
index 00000000..ee3e74b8
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_policy.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "pmuif/gpmuifperfvfe.h"
+#include "include/bios.h"
+#include "volt.h"
+
+static u32 volt_policy_pmu_data_init_super(struct gk20a *g,
+	struct boardobj *pboardobj, struct nv_pmu_boardobj *ppmudata)
+{
+	return boardobj_pmudatainit_super(g, pboardobj, ppmudata);
+}
+
+static u32 construct_volt_policy(struct gk20a *g,
+	struct boardobj  **ppboardobj, u16 size, void *pArgs)
+{
+	struct voltage_policy *pvolt_policy = NULL;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj, size, pArgs);
+	if (status)
+		return status;
+
+	pvolt_policy = (struct voltage_policy *)*ppboardobj;
+
+	pvolt_policy->super.pmudatainit = volt_policy_pmu_data_init_super;
+
+	return status;
+}
+
+static u32 construct_volt_policy_split_rail(struct gk20a *g,
+	struct boardobj **ppboardobj, u16 size, void *pArgs)
+{
+	struct voltage_policy_split_rail *ptmp_policy  =
+			(struct voltage_policy_split_rail *)pArgs;
+	struct voltage_policy_split_rail *pvolt_policy = NULL;
+	u32 status = 0;
+
+	status = construct_volt_policy(g, ppboardobj, size, pArgs);
+	if (status)
+		return status;
+
+	pvolt_policy = (struct voltage_policy_split_rail *)*ppboardobj;
+
+	pvolt_policy->rail_idx_master = ptmp_policy->rail_idx_master;
+	pvolt_policy->rail_idx_slave = ptmp_policy->rail_idx_slave;
+	pvolt_policy->delta_min_vfe_equ_idx =
+			ptmp_policy->delta_min_vfe_equ_idx;
+	pvolt_policy->delta_max_vfe_equ_idx =
+			ptmp_policy->delta_max_vfe_equ_idx;
+
+	return status;
+}
+
+u32 volt_policy_pmu_data_init_split_rail(struct gk20a *g,
+	struct boardobj *pboardobj, struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct voltage_policy_split_rail *ppolicy;
+	struct nv_pmu_volt_volt_policy_splt_r_boardobj_set *pset;
+
+	status = volt_policy_pmu_data_init_super(g, pboardobj, ppmudata);
+	if (status)
+		goto done;
+
+	ppolicy = (struct voltage_policy_split_rail *)pboardobj;
+	pset = (struct nv_pmu_volt_volt_policy_splt_r_boardobj_set *)
+				ppmudata;
+
+	pset->rail_idx_master = ppolicy->rail_idx_master;
+	pset->rail_idx_slave = ppolicy->rail_idx_slave;
+	pset->delta_min_vfe_equ_idx = ppolicy->delta_min_vfe_equ_idx;
+	pset->delta_max_vfe_equ_idx = ppolicy->delta_max_vfe_equ_idx;
+	pset->offset_delta_min_uv = ppolicy->offset_delta_min_uv;
+	pset->offset_delta_max_uv = ppolicy->offset_delta_max_uv;
+
+done:
+	return status;
+}
+
+static u32 volt_construct_volt_policy_split_rail_single_step(struct gk20a *g,
+	struct boardobj **ppboardobj, u16 size, void *pargs)
+{
+	struct boardobj *pboardobj   = NULL;
+	struct voltage_policy_split_rail_single_step *p_volt_policy = NULL;
+	u32 status = 0;
+
+	status = construct_volt_policy_split_rail(g, ppboardobj, size, pargs);
+	if (status)
+		return status;
+
+	pboardobj = (*ppboardobj);
+	p_volt_policy = (struct voltage_policy_split_rail_single_step *)
+						*ppboardobj;
+
+	pboardobj->pmudatainit = volt_policy_pmu_data_init_split_rail;
+
+	return status;
+}
+
+struct voltage_policy *volt_volt_policy_construct(struct gk20a *g, void *pargs)
+{
+	struct boardobj *pboard_obj = NULL;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) ==
+		CTRL_VOLT_POLICY_TYPE_SR_SINGLE_STEP) {
+		status = volt_construct_volt_policy_split_rail_single_step(g,
+			&pboard_obj,
+			sizeof(struct voltage_policy_split_rail_single_step),
+			pargs);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"Could not allocate memory for voltage_policy");
+				pboard_obj = NULL;
+		}
+	}
+
+	return (struct voltage_policy *)pboard_obj;
+}
+
+static u8 volt_policy_type_convert(u8 vbios_type)
+{
+	switch (vbios_type) {
+	case NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SINGLE_RAIL:
+		return CTRL_VOLT_POLICY_TYPE_SINGLE_RAIL;
+
+	case NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SR_MULTI_STEP:
+		return CTRL_VOLT_POLICY_TYPE_SR_MULTI_STEP;
+
+	case NV_VBIOS_VOLTAGE_POLICY_1X_ENTRY_TYPE_SR_SINGLE_STEP:
+		return CTRL_VOLT_POLICY_TYPE_SR_SINGLE_STEP;
+	}
+
+	return CTRL_VOLT_POLICY_TYPE_INVALID;
+}
+
+static u32 volt_get_volt_policy_table(struct gk20a *g,
+		struct voltage_policy_metadata *pvolt_policy_metadata)
+{
+	u32 status = 0;
+	u8 *voltage_policy_table_ptr = NULL;
+	struct voltage_policy *ppolicy = NULL;
+	struct vbios_voltage_policy_table_1x_header header = { 0 };
+	struct vbios_voltage_policy_table_1x_entry entry  = { 0 };
+	u8 i;
+	u8 policy_type = 0;
+	u8 *entry_offset;
+	union policy_type {
+		struct boardobj		board_obj;
+		struct voltage_policy	volt_policy;
+		struct voltage_policy_split_rail	split_rail;
+	} policy_type_data;
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		voltage_policy_table_ptr =
+			(u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, VOLTAGE_POLICY_TABLE);
+		if (voltage_policy_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	} else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	memcpy(&header, voltage_policy_table_ptr,
+			sizeof(struct vbios_voltage_policy_table_1x_header));
+
+	/* Set Voltage Policy Table Index for Perf Core VF Sequence client. */
+	pvolt_policy_metadata->perf_core_vf_seq_policy_idx =
+		(u8)header.perf_core_vf_seq_policy_idx;
+
+	/* Read in the entries. */
+	for (i = 0; i < header.num_table_entries; i++) {
+		entry_offset = (voltage_policy_table_ptr + header.header_size +
+						i * header.table_entry_size);
+
+		memcpy(&entry, entry_offset,
+			sizeof(struct vbios_voltage_policy_table_1x_entry));
+
+		memset(&policy_type_data, 0x0, sizeof(policy_type_data));
+
+		policy_type = volt_policy_type_convert((u8)entry.type);
+
+		if (policy_type == CTRL_VOLT_POLICY_TYPE_SR_SINGLE_STEP) {
+			policy_type_data.split_rail.rail_idx_master =
+				(u8)BIOS_GET_FIELD(entry.param0,
+				  NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_MASTER);
+
+			policy_type_data.split_rail.rail_idx_slave =
+				(u8)BIOS_GET_FIELD(entry.param0,
+				  NV_VBIOS_VPT_ENTRY_PARAM0_SR_VD_SLAVE);
+
+			policy_type_data.split_rail.delta_min_vfe_equ_idx =
+				(u8)BIOS_GET_FIELD(entry.param0,
+				  NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MIN);
+
+			policy_type_data.split_rail.delta_max_vfe_equ_idx =
+				(u8)BIOS_GET_FIELD(entry.param0,
+				  NV_VBIOS_VPT_ENTRY_PARAM0_SR_DELTA_SM_MAX);
+		}
+
+		policy_type_data.board_obj.type = policy_type;
+
+		ppolicy = volt_volt_policy_construct(g,
+				(void *)&policy_type_data);
+		if (ppolicy == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				"Failure to construct VOLT_POLICY object.");
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(
+				&pvolt_policy_metadata->volt_policies.super,
+				(struct boardobj *)ppolicy, i);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"could not add volt_policy for entry %d into boardobjgrp ",
+				i);
+			goto done;
+		}
+	}
+
+done:
+	return status;
+}
+static u32 _volt_policy_devgrp_pmudata_instget(struct gk20a *g,
+	struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+	struct nv_pmu_boardobj **ppboardobjpmudata, u8 idx)
+{
+	struct nv_pmu_volt_volt_policy_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_volt_volt_policy_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _volt_policy_devgrp_pmustatus_instget(struct gk20a *g,
+	void *pboardobjgrppmu,
+	struct nv_pmu_boardobj_query **ppboardobjpmustatus, u8 idx)
+{
+	struct nv_pmu_volt_volt_policy_boardobj_grp_get_status *p_get_status =
+		(struct nv_pmu_volt_volt_policy_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		p_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&p_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+u32 volt_policy_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp =
+		&g->perf_pmu.volt.volt_policy_metadata.volt_policies.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+u32 volt_policy_sw_setup(struct gk20a *g)
+{
+	u32 status = 0;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(
+			&g->perf_pmu.volt.volt_policy_metadata.volt_policies);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for volt rail, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp =
+		&g->perf_pmu.volt.volt_policy_metadata.volt_policies.super;
+
+	pboardobjgrp->pmudatainstget  = _volt_policy_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _volt_policy_devgrp_pmustatus_instget;
+
+	/* Obtain Voltage Rail Table from VBIOS */
+	status = volt_get_volt_policy_table(g, &g->perf_pmu.volt.
+			volt_policy_metadata);
+	if (status)
+		goto done;
+
+	/* Populate data for the VOLT_RAIL PMU interface */
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_POLICY);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			volt, VOLT, volt_policy, VOLT_POLICY);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+		&g->perf_pmu.volt.volt_policy_metadata.volt_policies.super,
+			volt, VOLT, volt_policy, VOLT_POLICY);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/volt/volt_policy.h b/drivers/gpu/nvgpu/volt/volt_policy.h
new file mode 100644
index 00000000..6adbfd43
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_policy.h
@@ -0,0 +1,64 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _VOLT_POLICY_H_
+#define _VOLT_POLICY_H_
+
+#define VOLT_POLICY_INDEX_IS_VALID(pvolt, policy_idx)	\
+		(boardobjgrp_idxisvalid(	\
+		&((pvolt)->volt_policy_metadata.volt_policies.super), \
+		(policy_idx)))
+
+/*!
+ * extends boardobj providing attributes common to all voltage_policies.
+ */
+struct voltage_policy {
+	struct boardobj super;
+};
+
+struct voltage_policy_metadata {
+	u8 perf_core_vf_seq_policy_idx;
+	struct boardobjgrp_e32 volt_policies;
+};
+
+/*!
+ * extends voltage_policy providing attributes
+ * common to all voltage_policy_split_rail.
+ */
+struct voltage_policy_split_rail {
+	struct voltage_policy super;
+	u8 rail_idx_master;
+	u8 rail_idx_slave;
+	u8 delta_min_vfe_equ_idx;
+	u8 delta_max_vfe_equ_idx;
+	s32 offset_delta_min_uv;
+	s32 offset_delta_max_uv;
+};
+
+struct voltage_policy_split_rail_single_step {
+	struct voltage_policy_split_rail super;
+};
+
+struct voltage_policy_split_rail_multi_step {
+	struct voltage_policy_split_rail super;
+	u16 inter_switch_delay_us;
+};
+
+struct voltage_policy_single_rail {
+	struct voltage_policy  super;
+	u8 rail_idx;
+};
+
+u32 volt_policy_sw_setup(struct gk20a *g);
+u32 volt_policy_pmu_setup(struct gk20a *g);
+#endif
diff --git a/drivers/gpu/nvgpu/volt/volt_rail.c b/drivers/gpu/nvgpu/volt/volt_rail.c
new file mode 100644
index 00000000..87b85160
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_rail.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+#include "pmuif/gpmuifperfvfe.h"
+#include "include/bios.h"
+#include "volt.h"
+
+u8 volt_rail_volt_domain_convert_to_idx(struct gk20a *g, u8 volt_domain)
+{
+	switch (g->perf_pmu.volt.volt_rail_metadata.volt_domain_hal) {
+	case CTRL_VOLT_DOMAIN_HAL_GP10X_SINGLE_RAIL:
+		if (volt_domain == CTRL_BOARDOBJ_IDX_INVALID)
+			return 0;
+		break;
+	case CTRL_VOLT_DOMAIN_HAL_GP10X_SPLIT_RAIL:
+		switch (volt_domain) {
+		case CTRL_VOLT_DOMAIN_LOGIC:
+			return 0;
+		case CTRL_VOLT_DOMAIN_SRAM:
+			return 1;
+		}
+		break;
+	}
+
+	return CTRL_BOARDOBJ_IDX_INVALID;
+}
+
+u32 volt_rail_volt_dev_register(struct gk20a *g, struct voltage_rail
+	*pvolt_rail, u8 volt_dev_idx, u8 operation_type)
+{
+	u32 status = 0;
+
+	if (operation_type == CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT) {
+		if (pvolt_rail->volt_dev_idx_default ==
+				CTRL_BOARDOBJ_IDX_INVALID) {
+			pvolt_rail->volt_dev_idx_default = volt_dev_idx;
+		} else {
+			status = -EINVAL;
+			goto exit;
+		}
+	} else {
+		goto exit;
+	}
+
+	status = boardobjgrpmask_bitset(&pvolt_rail->volt_dev_mask.super,
+			volt_dev_idx);
+
+exit:
+	if (status)
+		gk20a_err(dev_from_gk20a(g), "Failed to register VOLTAGE_DEVICE");
+
+	return status;
+}
+
+static u32 volt_rail_state_init(struct gk20a *g,
+		struct voltage_rail *pvolt_rail)
+{
+	u32 status = 0;
+	u32 i;
+
+	pvolt_rail->volt_dev_idx_default = CTRL_BOARDOBJ_IDX_INVALID;
+
+	for (i = 0; i < CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES; i++) {
+		pvolt_rail->volt_delta_uv[i] = NV_PMU_VOLT_VALUE_0V_IN_UV;
+		g->perf_pmu.volt.volt_rail_metadata.ext_rel_delta_uv[i] =
+			NV_PMU_VOLT_VALUE_0V_IN_UV;
+	}
+
+	pvolt_rail->volt_margin_limit_vfe_equ_mon_handle =
+		NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX;
+	pvolt_rail->rel_limit_vfe_equ_mon_handle =
+		NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX;
+	pvolt_rail->alt_rel_limit_vfe_equ_mon_handle =
+		NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX;
+	pvolt_rail->ov_limit_vfe_equ_mon_handle =
+		NV_PMU_PERF_RPC_VFE_EQU_MONITOR_COUNT_MAX;
+
+	status = boardobjgrpmask_e32_init(&pvolt_rail->volt_dev_mask, NULL);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to initialize BOARDOBJGRPMASK of VOLTAGE_DEVICEs");
+	}
+
+	return status;
+}
+
+static u32 volt_rail_init_pmudata_super(struct gk20a *g,
+	struct boardobj *board_obj_ptr, struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct voltage_rail *prail;
+	struct nv_pmu_volt_volt_rail_boardobj_set *rail_pmu_data;
+	u32 i;
+
+	gk20a_dbg_info("");
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status)
+		return status;
+
+	prail = (struct voltage_rail *)board_obj_ptr;
+	rail_pmu_data = (struct nv_pmu_volt_volt_rail_boardobj_set *)
+		ppmudata;
+
+	rail_pmu_data->rel_limit_vfe_equ_idx = prail->rel_limit_vfe_equ_idx;
+	rail_pmu_data->alt_rel_limit_vfe_equ_idx =
+			prail->alt_rel_limit_vfe_equ_idx;
+	rail_pmu_data->ov_limit_vfe_equ_idx = prail->ov_limit_vfe_equ_idx;
+	rail_pmu_data->vmin_limit_vfe_equ_idx = prail->vmin_limit_vfe_equ_idx;
+	rail_pmu_data->volt_margin_limit_vfe_equ_idx =
+			prail->volt_margin_limit_vfe_equ_idx;
+	rail_pmu_data->pwr_equ_idx = prail->pwr_equ_idx;
+	rail_pmu_data->volt_dev_idx_default = prail->volt_dev_idx_default;
+
+	for (i = 0; i < CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES; i++) {
+		rail_pmu_data->volt_delta_uv[i] = prail->volt_delta_uv[i] +
+			g->perf_pmu.volt.volt_rail_metadata.ext_rel_delta_uv[i];
+	}
+
+	status = boardobjgrpmask_export(&prail->volt_dev_mask.super,
+				prail->volt_dev_mask.super.bitcount,
+				&rail_pmu_data->volt_dev_mask.super);
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to export BOARDOBJGRPMASK of VOLTAGE_DEVICEs");
+
+	gk20a_dbg_info("Done");
+
+	return status;
+}
+
+static struct voltage_rail *construct_volt_rail(struct gk20a *g, void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	struct voltage_rail *ptemp_rail = (struct voltage_rail *)pargs;
+	struct voltage_rail  *board_obj_volt_rail_ptr = NULL;
+	u32 status;
+
+	gk20a_dbg_info("");
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		sizeof(struct voltage_rail), pargs);
+	if (status)
+		return NULL;
+
+	board_obj_volt_rail_ptr = (struct voltage_rail *)board_obj_ptr;
+	/* override super class interface */
+	board_obj_ptr->pmudatainit = volt_rail_init_pmudata_super;
+
+	board_obj_volt_rail_ptr->boot_voltage_uv =
+			ptemp_rail->boot_voltage_uv;
+	board_obj_volt_rail_ptr->rel_limit_vfe_equ_idx =
+			ptemp_rail->rel_limit_vfe_equ_idx;
+	board_obj_volt_rail_ptr->alt_rel_limit_vfe_equ_idx =
+			ptemp_rail->alt_rel_limit_vfe_equ_idx;
+	board_obj_volt_rail_ptr->ov_limit_vfe_equ_idx =
+			ptemp_rail->ov_limit_vfe_equ_idx;
+	board_obj_volt_rail_ptr->pwr_equ_idx =
+			ptemp_rail->pwr_equ_idx;
+	board_obj_volt_rail_ptr->boot_volt_vfe_equ_idx =
+			ptemp_rail->boot_volt_vfe_equ_idx;
+	board_obj_volt_rail_ptr->vmin_limit_vfe_equ_idx =
+			ptemp_rail->vmin_limit_vfe_equ_idx;
+	board_obj_volt_rail_ptr->volt_margin_limit_vfe_equ_idx =
+			ptemp_rail->volt_margin_limit_vfe_equ_idx;
+
+	gk20a_dbg_info("Done");
+
+	return (struct voltage_rail *)board_obj_ptr;
+}
+
+u8 volt_rail_vbios_volt_domain_convert_to_internal(struct gk20a *g,
+	u8 vbios_volt_domain)
+{
+	switch (g->perf_pmu.volt.volt_rail_metadata.volt_domain_hal) {
+	case CTRL_VOLT_DOMAIN_HAL_GP10X_SINGLE_RAIL:
+		if (vbios_volt_domain == 0)
+			return CTRL_VOLT_DOMAIN_LOGIC;
+		break;
+	case CTRL_VOLT_DOMAIN_HAL_GP10X_SPLIT_RAIL:
+		switch (vbios_volt_domain) {
+		case 0:
+			return CTRL_VOLT_DOMAIN_LOGIC;
+		case 1:
+			return CTRL_VOLT_DOMAIN_SRAM;
+		}
+		break;
+	}
+
+	return CTRL_VOLT_DOMAIN_INVALID;
+}
+
+u32 volt_rail_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->perf_pmu.volt.volt_rail_metadata.volt_rails.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 volt_get_volt_rail_table(struct gk20a *g,
+		struct voltage_rail_metadata *pvolt_rail_metadata)
+{
+	u32 status = 0;
+	u8 *volt_rail_table_ptr = NULL;
+	struct voltage_rail *prail = NULL;
+	struct vbios_voltage_rail_table_1x_header header = { 0 };
+	struct vbios_voltage_rail_table_1x_entry entry = { 0 };
+	u8 i;
+	u8 volt_domain;
+	u8 *entry_ptr;
+	union rail_type {
+		struct boardobj board_obj;
+		struct voltage_rail volt_rail;
+	} rail_type_data;
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		volt_rail_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, VOLTAGE_RAIL_TABLE);
+		if (volt_rail_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	} else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	memcpy(&header, volt_rail_table_ptr,
+			sizeof(struct vbios_voltage_rail_table_1x_header));
+
+	pvolt_rail_metadata->volt_domain_hal = (u8)header.volt_domain_hal;
+
+	for (i = 0; i < header.num_table_entries; i++) {
+		entry_ptr = (volt_rail_table_ptr + header.header_size +
+			(i * header.table_entry_size));
+
+		memset(&rail_type_data, 0x0, sizeof(rail_type_data));
+
+		memcpy(&entry, entry_ptr,
+			sizeof(struct vbios_voltage_rail_table_1x_entry));
+
+		volt_domain = volt_rail_vbios_volt_domain_convert_to_internal(g,
+			i);
+		if (volt_domain == CTRL_VOLT_DOMAIN_INVALID)
+			continue;
+
+		rail_type_data.board_obj.type = volt_domain;
+		rail_type_data.volt_rail.boot_voltage_uv =
+			(u32)entry.boot_voltage_uv;
+		rail_type_data.volt_rail.rel_limit_vfe_equ_idx =
+			(u8)entry.rel_limit_vfe_equ_idx;
+		rail_type_data.volt_rail.alt_rel_limit_vfe_equ_idx =
+			(u8)entry.alt_rel_limit_vfe_equidx;
+		rail_type_data.volt_rail.ov_limit_vfe_equ_idx =
+			(u8)entry.ov_limit_vfe_equ_idx;
+
+		if (header.table_entry_size >=
+			NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0B)
+			rail_type_data.volt_rail.volt_margin_limit_vfe_equ_idx =
+				(u8)entry.volt_margin_limit_vfe_equ_idx;
+		else
+			rail_type_data.volt_rail.volt_margin_limit_vfe_equ_idx =
+				CTRL_BOARDOBJ_IDX_INVALID;
+
+		if (header.table_entry_size >=
+			NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_0A)
+			rail_type_data.volt_rail.vmin_limit_vfe_equ_idx =
+				(u8)entry.vmin_limit_vfe_equ_idx;
+		else
+			rail_type_data.volt_rail.vmin_limit_vfe_equ_idx =
+				CTRL_BOARDOBJ_IDX_INVALID;
+
+		if (header.table_entry_size >=
+			NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_09)
+			rail_type_data.volt_rail.boot_volt_vfe_equ_idx =
+				(u8)entry.boot_volt_vfe_equ_idx;
+		else
+			rail_type_data.volt_rail.boot_volt_vfe_equ_idx =
+				CTRL_BOARDOBJ_IDX_INVALID;
+
+		if (header.table_entry_size >=
+			NV_VBIOS_VOLTAGE_RAIL_1X_ENTRY_SIZE_08)
+			rail_type_data.volt_rail.pwr_equ_idx =
+				(u8)entry.pwr_equ_idx;
+		else
+			rail_type_data.volt_rail.pwr_equ_idx =
+				CTRL_PMGR_PWR_EQUATION_INDEX_INVALID;
+
+		prail = construct_volt_rail(g, &rail_type_data);
+
+		status = boardobjgrp_objinsert(
+				&pvolt_rail_metadata->volt_rails.super,
+				(struct boardobj *)prail, i);
+	}
+
+done:
+	return status;
+}
+
+static u32 _volt_rail_devgrp_pmudata_instget(struct gk20a *g,
+	struct nv_pmu_boardobjgrp *pmuboardobjgrp, struct nv_pmu_boardobj
+	**ppboardobjpmudata, u8 idx)
+{
+	struct nv_pmu_volt_volt_rail_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_volt_volt_rail_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _volt_rail_devgrp_pmustatus_instget(struct gk20a *g,
+	void *pboardobjgrppmu, struct nv_pmu_boardobj_query
+	**ppboardobjpmustatus, u8 idx)
+{
+	struct nv_pmu_volt_volt_rail_boardobj_grp_get_status *pgrp_get_status =
+		(struct nv_pmu_volt_volt_rail_boardobj_grp_get_status *)
+		pboardobjgrppmu;
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_get_status->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmustatus = (struct nv_pmu_boardobj_query *)
+			&pgrp_get_status->objects[idx].data.board_obj;
+	return 0;
+}
+
+u32 volt_rail_sw_setup(struct gk20a *g)
+{
+	u32 status = 0;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct voltage_rail *pvolt_rail;
+	u8 i;
+
+	gk20a_dbg_info("");
+
+	status = boardobjgrpconstruct_e32(&g->perf_pmu.volt.volt_rail_metadata.
+			volt_rails);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for volt rail, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->perf_pmu.volt.volt_rail_metadata.volt_rails.super;
+
+	pboardobjgrp->pmudatainstget  = _volt_rail_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = _volt_rail_devgrp_pmustatus_instget;
+
+	g->perf_pmu.volt.volt_rail_metadata.pct_delta =
+			NV_PMU_VOLT_VALUE_0V_IN_UV;
+
+	/* Obtain Voltage Rail Table from VBIOS */
+	status = volt_get_volt_rail_table(g, &g->perf_pmu.volt.
+			volt_rail_metadata);
+	if (status)
+		goto done;
+
+	/* Populate data for the VOLT_RAIL PMU interface */
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, VOLT, VOLT_RAIL);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			volt, VOLT, volt_rail, VOLT_RAIL);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g,
+		&g->perf_pmu.volt.volt_rail_metadata.volt_rails.super,
+			volt, VOLT, volt_rail, VOLT_RAIL);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			status);
+		goto done;
+	}
+
+	/* update calibration to fuse */
+	BOARDOBJGRP_FOR_EACH(&(g->perf_pmu.volt.volt_rail_metadata.
+			       volt_rails.super),
+			     struct voltage_rail *, pvolt_rail, i) {
+		status = volt_rail_state_init(g, pvolt_rail);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"Failure while executing RAIL's state init railIdx = %d",
+				i);
+			goto done;
+		}
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/volt/volt_rail.h b/drivers/gpu/nvgpu/volt/volt_rail.h
new file mode 100644
index 00000000..0180992c
--- /dev/null
+++ b/drivers/gpu/nvgpu/volt/volt_rail.h
@@ -0,0 +1,77 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+
+#ifndef _VOLT_RAIL_H_
+#define _VOLT_RAIL_H_
+
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+
+#define CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES	0x04
+#define CTRL_PMGR_PWR_EQUATION_INDEX_INVALID	0xFF
+
+#define VOLT_GET_VOLT_RAIL(pvolt, rail_idx)	\
+	((struct voltage_rail *)BOARDOBJGRP_OBJ_GET_BY_IDX( \
+		&((pvolt)->volt_rail_metadata.volt_rails.super), (rail_idx)))
+
+#define VOLT_RAIL_INDEX_IS_VALID(pvolt, rail_idx)	\
+	(boardobjgrp_idxisvalid( \
+		&((pvolt)->volt_rail_metadata.volt_rails.super), (rail_idx)))
+
+#define VOLT_RAIL_VOLT_3X_SUPPORTED(pvolt) \
+	(!BOARDOBJGRP_IS_EMPTY(&((pvolt)->volt_rail_metadata.volt_rails.super)))
+
+/*!
+ * extends boardobj providing attributes common to all voltage_rails.
+ */
+struct voltage_rail {
+	struct boardobj super;
+	u32 boot_voltage_uv;
+	u8 rel_limit_vfe_equ_idx;
+	u8 alt_rel_limit_vfe_equ_idx;
+	u8 ov_limit_vfe_equ_idx;
+	u8 pwr_equ_idx;
+	u8 volt_dev_idx_default;
+	u8 boot_volt_vfe_equ_idx;
+	u8 vmin_limit_vfe_equ_idx;
+	u8 volt_margin_limit_vfe_equ_idx;
+	u32 volt_margin_limit_vfe_equ_mon_handle;
+	u32 rel_limit_vfe_equ_mon_handle;
+	u32 alt_rel_limit_vfe_equ_mon_handle;
+	u32 ov_limit_vfe_equ_mon_handle;
+	struct boardobjgrpmask_e32 volt_dev_mask;
+	s32  volt_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES];
+};
+
+/*!
+ * metadata of voltage rail functionality.
+ */
+struct voltage_rail_metadata {
+	u8 volt_domain_hal;
+	u8 pct_delta;
+	u32 ext_rel_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES];
+	struct boardobjgrp_e32 volt_rails;
+};
+
+u8 volt_rail_vbios_volt_domain_convert_to_internal
+	(struct gk20a *g, u8 vbios_volt_domain);
+
+u32 volt_rail_volt_dev_register(struct gk20a *g, struct voltage_rail
+	*pvolt_rail, u8 volt_dev_idx, u8 operation_type);
+
+u8 volt_rail_volt_domain_convert_to_idx(struct gk20a *g, u8 volt_domain);
+
+u32 volt_rail_sw_setup(struct gk20a *g);
+u32 volt_rail_pmu_setup(struct gk20a *g);
+#endif
-- 
cgit v1.2.2


From 741d78ec45f6c48348743617ba5ae7163c95e49a Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 21 Sep 2016 15:02:59 +0530
Subject: gpu: nvgpu: construct/load tabels & set voltage

- Read voltage tables from VBIOS & construct
  then send to PMU.
- compare & set voltage based on
  mclk/gpc2clk clk, take higher voltage
  between two & set.

JIRA DNVGPU-122

Change-Id: I23e7b101a3b1c1b6596620fc6b8319c70bd9a488
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1224365
(cherry picked from commit e0055c3ec798b8312df3fa9bf92bde8c57c6f58c)
Reviewed-on: http://git-master/r/1244657
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c       | 33 ++++++++++++++++++++++++++++++---
 drivers/gpu/nvgpu/clk/clk_mclk.c  |  6 +-----
 drivers/gpu/nvgpu/clk/clk_mclk.h  |  3 +++
 drivers/gpu/nvgpu/pstate/pstate.c | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 918cd43c..ce071018 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -17,8 +17,12 @@
 #include "pmuif/gpmuifvolt.h"
 #include "ctrl/ctrlclk.h"
 #include "ctrl/ctrlvolt.h"
+#include "volt/volt.h"
 #include "gk20a/pmu_gk20a.h"
 
+#define BOOT_GPC2CLK_MHZ  2581
+#define BOOT_MCLK_MHZ     3003
+
 struct clkrpc_pmucmdhandler_params {
 	struct nv_pmu_clk_rpc *prpccall;
 	u32 success;
@@ -382,15 +386,38 @@ int clk_set_boot_fll_clk(struct gk20a *g)
 {
 	int status;
 	struct change_fll_clk bootfllclk;
+	u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ;
+	u32 gpc2clk_voltuv = 0;
+	u16 mclk_clkmhz = BOOT_MCLK_MHZ;
+	u32 mclk_voltuv = 0;
+	u32 voltuv = 0;
 
 	mutex_init(&g->clk_pmu.changeclkmutex);
 
+	clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, &gpc2clk_clkmhz,
+			&gpc2clk_voltuv);
+	clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, &mclk_clkmhz,
+			&mclk_voltuv);
+
+	voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv)
+			: (mclk_voltuv);
+
+	status = volt_set_voltage(g, voltuv, voltuv);
+	if (status)
+		gk20a_err(dev_from_gk20a(g), "attempt to set boot voltage failed %d",
+			voltuv);
+
 	bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-	bootfllclk.clkmhz = 2581;
-	bootfllclk.voltuv = 825000;
+	bootfllclk.clkmhz = gpc2clk_clkmhz;
+	bootfllclk.voltuv = voltuv;
 	status = clk_program_fllclks(g, &bootfllclk);
 	if (status)
-		gk20a_err(dev_from_gk20a(g), "attemp to set boot clk failed");
+		gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed");
+
+	status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED);
+	if (status)
+		gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed");
+
 	return status;
 }
 
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index b63fab1e..7b15767b 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -23,10 +23,6 @@
 
 #define VREG_COUNT 24
 
-#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed
-#define MCLK_LOW_SPEED_LIMIT 405
-#define MCLK_MID_SPEED_LIMIT 810
-
 struct memory_link_training_pattern {
 	u32 regaddr;
 	u32 writeval;
@@ -2220,7 +2216,7 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 
 	mclk->init = true;
 
-	return mclk->change(g, DEFAULT_BOOT_MCLK_SPEED);
+	return 0;
 }
 
 int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index edb7eb78..9d193c96 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -22,6 +22,9 @@ enum gk20a_mclk_speed {
 	gk20a_mclk_high_speed,
 };
 
+#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed
+#define MCLK_LOW_SPEED_LIMIT 405
+#define MCLK_MID_SPEED_LIMIT 810
 struct clk_mclk_state {
 	enum gk20a_mclk_speed speed;
 	struct mutex mclk_mutex;
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index f4cc50ab..a4787f35 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -29,6 +29,18 @@ int gk20a_init_pstate_support(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
+	err = volt_rail_sw_setup(g);
+	if (err)
+		return err;
+
+	err = volt_dev_sw_setup(g);
+	if (err)
+		return err;
+
+	err = volt_policy_sw_setup(g);
+	if (err)
+		return err;
+
 	err = clk_vin_sw_setup(g);
 	if (err)
 		return err;
@@ -72,6 +84,26 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
+	err = volt_rail_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = volt_dev_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = volt_policy_pmu_setup(g);
+	if (err)
+		return err;
+
+	err = volt_pmu_send_load_cmd_to_pmu(g);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to send VOLT LOAD CMD to PMU: status = 0x%08x.",
+			err);
+		return err;
+	}
+
 	err = vfe_var_pmu_setup(g);
 	if (err)
 		return err;
-- 
cgit v1.2.2


From af637c81fe6360dbe81373b6f1e5dbdd3ca35536 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Tue, 27 Sep 2016 14:54:50 +0530
Subject: gpu: nvgpu: Update volt pwm source & raw period

- calculate raw period as per pwm source
- update pwm source for logic & sram rails.

JIRA DNVGPU-123

Change-Id: I50b41d51b6aba760710700522dced7859f815463
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1227626
(cherry picked from commit 6eb5a235dd7bf9031ef1bcfadd6312a2f8758fd4)
Reviewed-on: http://git-master/r/1244663
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/clk_gp106.c |  6 ++++++
 drivers/gpu/nvgpu/volt/volt_dev.c   | 14 ++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 4bf03661..1dd3922a 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -36,7 +36,12 @@ static int clk_gp106_debugfs_init(struct gk20a *g);
 #endif
 
 #define NUM_NAMEMAPS	4
+#define XTAL4X_KHZ 108000
 
+static u32 gp106_crystal_clk_hz(struct gk20a *g)
+{
+	return (XTAL4X_KHZ * 1000);
+}
 static int gp106_init_clk_support(struct gk20a *g) {
 	struct clk_gk20a *clk = &g->clk;
 	u32 err = 0;
@@ -221,6 +226,7 @@ err_out:
 
 void gp106_init_clk_ops(struct gpu_ops *gops) {
 	gops->clk.init_clk_support = gp106_init_clk_support;
+	gops->clk.get_crystal_clk_hz = gp106_crystal_clk_hz;
 }
 
 
diff --git a/drivers/gpu/nvgpu/volt/volt_dev.c b/drivers/gpu/nvgpu/volt/volt_dev.c
index 89040658..3a7ed1b5 100644
--- a/drivers/gpu/nvgpu/volt/volt_dev.c
+++ b/drivers/gpu/nvgpu/volt/volt_dev.c
@@ -26,7 +26,6 @@
 #include "include/bios.h"
 #include "volt.h"
 
-#define RAW_PERIOD	160
 #define VOLT_DEV_PWM_VOLTAGE_STEPS_INVALID	0
 #define VOLT_DEV_PWM_VOLTAGE_STEPS_DEFAULT	1
 
@@ -257,17 +256,24 @@ static u32 volt_get_voltage_device_table_1x_psv(struct gk20a *g,
 
 	if (ptmp_dev->super.operation_type ==
 			CTRL_VOLT_DEVICE_OPERATION_TYPE_DEFAULT) {
-		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_1;
+		if (volt_domain == CTRL_VOLT_DOMAIN_LOGIC)
+			ptmp_dev->source =
+				NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_0;
+		if (volt_domain == CTRL_VOLT_DOMAIN_SRAM)
+			ptmp_dev->source =
+				NV_PMU_PMGR_PWM_SOURCE_THERM_VID_PWM_1;
+		ptmp_dev->raw_period =
+			g->ops.clk.get_crystal_clk_hz(g) / frequency_hz;
 	} else if (ptmp_dev->super.operation_type ==
 		CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_STEADY_STATE) {
 		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_RSVD_0;
+		ptmp_dev->raw_period = 0;
 	} else if (ptmp_dev->super.operation_type ==
 		CTRL_VOLT_DEVICE_OPERATION_TYPE_LPWR_SLEEP_STATE) {
 		ptmp_dev->source = NV_PMU_PMGR_PWM_SOURCE_RSVD_1;
+		ptmp_dev->raw_period = 0;
 	}
 
-	ptmp_dev->raw_period = RAW_PERIOD;
-
 	/* Initialize data for parent class. */
 	ptmp_dev->super.super.type = CTRL_VOLT_DEVICE_TYPE_PWM;
 	ptmp_dev->super.volt_domain = volt_domain;
-- 
cgit v1.2.2


From f533ec3cbddfff7944c4b9021cccf026dc27e572 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Tue, 27 Sep 2016 16:29:14 +0530
Subject: gpu: nvgpu: fix sram rail volt calculation

JIRA DNVGPU-120

SRAM rail voltage needs to be picked up from SRAM
index of VF entries in CLK prog table.

Change-Id: Iabfff62edeec5aa9c2ead62d6b943fb2ebb952ed
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1227665
(cherry picked from commit 8053260438bc94397b20e74cf18453624ebeb325)
Reviewed-on: http://git-master/r/1244664
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c      | 54 +++++++++++++++++++++++++++++-----------
 drivers/gpu/nvgpu/clk/clk.h      |  6 ++---
 drivers/gpu/nvgpu/clk/clk_prog.c |  2 +-
 3 files changed, 44 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index ce071018..5aafa701 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -82,7 +82,6 @@ u32 clk_pmu_vin_load(struct gk20a *g)
 
 	handler.prpccall = &rpccall;
 	handler.success = 0;
-
 	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
 			PMU_COMMAND_QUEUE_LPQ,
 			clkrpc_pmucmdhandler, (void *)&handler,
@@ -388,24 +387,42 @@ int clk_set_boot_fll_clk(struct gk20a *g)
 	struct change_fll_clk bootfllclk;
 	u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ;
 	u32 gpc2clk_voltuv = 0;
+	u32 gpc2clk_voltuv_sram = 0;
 	u16 mclk_clkmhz = BOOT_MCLK_MHZ;
 	u32 mclk_voltuv = 0;
+	u32 mclk_voltuv_sram = 0;
 	u32 voltuv = 0;
+	u32 voltuv_sram = 0;
 
 	mutex_init(&g->clk_pmu.changeclkmutex);
-
-	clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, &gpc2clk_clkmhz,
-			&gpc2clk_voltuv);
-	clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, &mclk_clkmhz,
-			&mclk_voltuv);
+	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+		&gpc2clk_clkmhz, &gpc2clk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
+	if (status)
+		return status;
+	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+		&gpc2clk_clkmhz, &gpc2clk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
+	if (status)
+		return status;
+	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+		&mclk_clkmhz, &mclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
+	if (status)
+		return status;
+	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+		&mclk_clkmhz, &mclk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
+	if (status)
+		return status;
 
 	voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv)
 			: (mclk_voltuv);
 
-	status = volt_set_voltage(g, voltuv, voltuv);
+	voltuv_sram = ((gpc2clk_voltuv_sram) > (mclk_voltuv_sram)) ?
+		(gpc2clk_voltuv_sram) : (mclk_voltuv_sram);
+
+	status = volt_set_voltage(g, voltuv, voltuv_sram);
 	if (status)
-		gk20a_err(dev_from_gk20a(g), "attempt to set boot voltage failed %d",
-			voltuv);
+		gk20a_err(dev_from_gk20a(g),
+			"attempt to set boot voltage failed %d %d",
+			voltuv, voltuv_sram);
 
 	bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
 	bootfllclk.clkmhz = gpc2clk_clkmhz;
@@ -413,7 +430,6 @@ int clk_set_boot_fll_clk(struct gk20a *g)
 	status = clk_program_fllclks(g, &bootfllclk);
 	if (status)
 		gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed");
-
 	status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED);
 	if (status)
 		gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed");
@@ -436,7 +452,9 @@ u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain)
 			status = pdomain->clkdomainclkvfsearch(g, pclk,
 				pdomain, &clkmhz, &volt,
 				CLK_PROG_VFE_ENTRY_LOGIC);
-			return status;
+			status = pdomain->clkdomainclkvfsearch(g, pclk,
+				pdomain, &clkmhz, &volt,
+				CLK_PROG_VFE_ENTRY_SRAM);
 		}
 	}
 	return status;
@@ -446,23 +464,31 @@ u32 clk_domain_get_f_or_v(
 	struct gk20a *g,
 	u32 clkapidomain,
 	u16 *pclkmhz,
-	u32 *pvoltuv
+	u32 *pvoltuv,
+	u8 railidx
 )
 {
 	u32 status = -EINVAL;
 	struct clk_domain *pdomain;
 	u8 i;
 	struct clk_pmupstate *pclk = &g->clk_pmu;
+	u8 rail;
 
 	if ((pclkmhz == NULL) || (pvoltuv == NULL))
 		return -EINVAL;
 
+	if (railidx == CTRL_VOLT_DOMAIN_LOGIC)
+		rail = CLK_PROG_VFE_ENTRY_LOGIC;
+	else if (railidx == CTRL_VOLT_DOMAIN_SRAM)
+		rail = CLK_PROG_VFE_ENTRY_SRAM;
+	else
+		return -EINVAL;
+
 	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
 			struct clk_domain *, pdomain, i) {
 		if (pdomain->api_domain == clkapidomain) {
 			status = pdomain->clkdomainclkvfsearch(g, pclk,
-				pdomain, pclkmhz, pvoltuv,
-				CLK_PROG_VFE_ENTRY_LOGIC);
+				pdomain, pclkmhz, pvoltuv, rail);
 			return status;
 		}
 	}
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index e54af521..2d6425b5 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -104,12 +104,12 @@ struct vbios_clocks_table_1x_hal_clock_entry {
 
 u32 clk_pmu_vin_load(struct gk20a *g);
 u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain);
-u32 clk_domain_get_f_or_v
-(
+u32 clk_domain_get_f_or_v(
 	struct gk20a *g,
 	u32 clkapidomain,
 	u16 *pclkmhz,
-	u32 *pvoltuv
+	u32 *pvoltuv,
+	u8 railidx
 );
 u32 clk_domain_get_f_points(
 	struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/clk/clk_prog.c b/drivers/gpu/nvgpu/clk/clk_prog.c
index 9fdd8b25..6b81650e 100644
--- a/drivers/gpu/nvgpu/clk/clk_prog.c
+++ b/drivers/gpu/nvgpu/clk/clk_prog.c
@@ -886,7 +886,7 @@ static u32 vflookup_prog_1x_master
 	pvfentry = (struct ctrl_clk_clk_prog_1x_master_vf_entry *)(
 			(u8 *)pvfentry +
 			(sizeof(struct ctrl_clk_clk_prog_1x_master_vf_entry) *
-			(rail+1)));
+			rail));
 
 	clkmhz = *pclkmhz;
 	voltuv = *pvoltuv;
-- 
cgit v1.2.2


From 84219f3a7f022e684c83ed9e6414bd9f2827c025 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Thu, 27 Oct 2016 11:09:10 -0700
Subject: gpu: nvgpu: gp10b: pmu HAL update

Update pmu HAL to have function for
is_pmu_supported.

JIRA GV11B-21

Change-Id: Id08efa82aa04a6f92c7fea0eb5d4735db2699b5a
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1243918
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 762e2af7..e7b2e70c 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -449,8 +449,14 @@ static void pmu_dump_security_fuses_gp10b(struct gk20a *g)
 			val);
 }
 
+static bool gp10b_is_pmu_supported(struct gk20a *g)
+{
+	return true;
+}
+
 void gp10b_init_pmu_ops(struct gpu_ops *gops)
 {
+	gops->pmu.is_pmu_supported = gp10b_is_pmu_supported;
 	if (gops->privsecurity) {
 		gm20b_init_secure_pmu(gops);
 		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
-- 
cgit v1.2.2


From 2f4405ddcb1cd7bb939d3b22ab72789afb435da6 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 1 Nov 2016 15:40:06 -0700
Subject: gpu: nvgpu: gp106: Add PMU HAL is_pmu_supported

Add implementation for PMU HAL is_pmu_supported to gp106.

JIRA GV11B-21

Change-Id: If4268465ffade7c3c8e7bb853a1d2070c0e2ae4f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1246026
Tested-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp106/pmu_gp106.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index f3e7b298..88be6d22 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -169,6 +169,11 @@ static int gp106_falcon_reset(struct gk20a *g)
 	return 0;
 }
 
+static bool gp106_is_pmu_supported(struct gk20a *g)
+{
+	return true;
+}
+
 void gp106_init_pmu_ops(struct gpu_ops *gops)
 {
 	gk20a_dbg_fn("");
@@ -196,6 +201,7 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
 	gops->pmu.mclk_init = clk_mclkseq_init_mclk_gddr5;
+	gops->pmu.is_pmu_supported = gp106_is_pmu_supported;
 
 	gk20a_dbg_fn("done");
 }
-- 
cgit v1.2.2


From 58b85dd106f35d16ff568f8836dcbc7a019854b4 Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Fri, 21 Oct 2016 16:57:15 +0530
Subject: gpu: nvgpu: Add thermal module support

The following CL contains the following VBIOS thermal table parsing
and PMU interface support.
1) Thermal device table
2) Thermal channel table

JIRA DNVGPU-130

Change-Id: Ie3abab4bf099a022b1b59db96811c2ed44079519
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1240630
(cherry picked from commit 814962a4be0a8cd0cddc7bc5211c62308ab1fea2)
Reviewed-on: http://git-master/r/1246210
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |   6 +-
 drivers/gpu/nvgpu/include/bios.h      |  54 ++++++++
 drivers/gpu/nvgpu/pstate/pstate.c     |   9 ++
 drivers/gpu/nvgpu/therm/thrm.c        |  45 +++++++
 drivers/gpu/nvgpu/therm/thrm.h        |  29 ++++
 drivers/gpu/nvgpu/therm/thrmchannel.c | 247 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/therm/thrmchannel.h |  42 ++++++
 drivers/gpu/nvgpu/therm/thrmdev.c     | 193 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/therm/thrmdev.h     |  31 +++++
 drivers/gpu/nvgpu/therm/thrmpmu.c     |  51 +++++++
 drivers/gpu/nvgpu/therm/thrmpmu.h     |  20 +++
 11 files changed, 726 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/therm/thrm.c
 create mode 100644 drivers/gpu/nvgpu/therm/thrm.h
 create mode 100644 drivers/gpu/nvgpu/therm/thrmchannel.c
 create mode 100644 drivers/gpu/nvgpu/therm/thrmchannel.h
 create mode 100644 drivers/gpu/nvgpu/therm/thrmdev.c
 create mode 100644 drivers/gpu/nvgpu/therm/thrmdev.h
 create mode 100644 drivers/gpu/nvgpu/therm/thrmpmu.c
 create mode 100644 drivers/gpu/nvgpu/therm/thrmpmu.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index ea770e43..91243de0 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -54,7 +54,11 @@ nvgpu-y += \
 	$(nvgpu-t18x)/volt/volt_rail.o \
 	$(nvgpu-t18x)/volt/volt_dev.o \
 	$(nvgpu-t18x)/volt/volt_policy.o \
-	$(nvgpu-t18x)/volt/volt_pmu.o
+	$(nvgpu-t18x)/volt/volt_pmu.o \
+	$(nvgpu-t18x)/therm/thrm.o \
+	$(nvgpu-t18x)/therm/thrmdev.o \
+	$(nvgpu-t18x)/therm/thrmchannel.o \
+	$(nvgpu-t18x)/therm/thrmpmu.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index fb1e1f46..02991db9 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -788,4 +788,58 @@ struct vbios_voltage_policy_table_1x_entry {
 #define NV_VBIOS_VPT_ENTRY_PARAM1_SR_SETTLE_TIME_INTERMEDIATE_SHIFT \
 		0
 
+#define VBIOS_THERM_DEVICE_VERSION_1X                                      0x10
+
+#define VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04                         0x00000004
+
+struct therm_device_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+} ;
+
+struct therm_device_1x_entry {
+	u8 class_id;
+	u8 param0;
+	u8 flags;
+} ;
+
+#define NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU                               0x01
+
+#define NV_VBIOS_THERM_DEVICE_1X_ENTRY_PARAM0_I2C_DEVICE_INDEX_MASK        0xFF
+#define NV_VBIOS_THERM_DEVICE_1X_ENTRY_PARAM0_I2C_DEVICE_INDEX_SHIFT          0
+
+#define VBIOS_THERM_CHANNEL_VERSION_1X                                     0x10
+
+#define VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09                        0x00000009
+
+struct therm_channel_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 table_entry_size;
+	u8 num_table_entries;
+	u8 gpu_avg_pri_ch_idx;
+	u8 gpu_max_pri_ch_idx;
+	u8 board_pri_ch_idx;
+	u8 mem_pri_ch_idx;
+	u8 pwr_supply_pri_ch_idx;
+};
+
+struct therm_channel_1x_entry {
+	u8 class_id;
+	u8 param0;
+	u8 param1;
+	u8 param2;
+	u8 flags;
+};
+
+#define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_CLASS_DEVICE                       0x01
+
+#define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM0_DEVICE_INDEX_MASK           0xFF
+#define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM0_DEVICE_INDEX_SHIFT             0
+
+#define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM1_DEVICE_PROVIDER_INDEX_MASK  0xFF
+#define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM1_DEVICE_PROVIDER_INDEX_SHIFT    0
+
 #endif
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index a4787f35..e9b9775e 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -19,6 +19,7 @@
 #include "pmgr/pmgr.h"
 #include "include/bios.h"
 #include "pstate/pstate.h"
+#include "therm/thrm.h"
 
 static int pstate_sw_setup(struct gk20a *g);
 
@@ -49,6 +50,10 @@ int gk20a_init_pstate_support(struct gk20a *g)
 	if (err)
 		return err;
 
+	err = therm_domain_sw_setup(g);
+	if (err)
+		return err;
+
 	err = vfe_var_sw_setup(g);
 	if (err)
 		return err;
@@ -104,6 +109,10 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 		return err;
 	}
 
+	err = therm_domain_pmu_setup(g);
+	if (err)
+		return err;
+
 	err = vfe_var_pmu_setup(g);
 	if (err)
 		return err;
diff --git a/drivers/gpu/nvgpu/therm/thrm.c b/drivers/gpu/nvgpu/therm/thrm.c
new file mode 100644
index 00000000..731cf89e
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrm.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "thrm.h"
+#include "thrmpmu.h"
+
+u32 therm_domain_sw_setup(struct gk20a *g)
+{
+	u32 status;
+
+	status = therm_device_sw_setup(g);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for therm devices, status - 0x%x",
+			status);
+		goto exit;
+	}
+
+	status = therm_channel_sw_setup(g);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for therm channel, status - 0x%x",
+			status);
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+u32 therm_domain_pmu_setup(struct gk20a *g)
+{
+	return therm_send_pmgr_tables_to_pmu(g);
+}
diff --git a/drivers/gpu/nvgpu/therm/thrm.h b/drivers/gpu/nvgpu/therm/thrm.h
new file mode 100644
index 00000000..1db93b49
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrm.h
@@ -0,0 +1,29 @@
+/*
+ * general thermal table structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _THRM_H_
+#define _THRM_H_
+
+#include "thrmdev.h"
+#include "thrmchannel.h"
+
+struct therm_pmupstate {
+	struct therm_devices therm_deviceobjs;
+	struct therm_channels therm_channelobjs;
+};
+
+u32 therm_domain_sw_setup(struct gk20a *g);
+u32 therm_domain_pmu_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/therm/thrmchannel.c b/drivers/gpu/nvgpu/therm/thrmchannel.c
new file mode 100644
index 00000000..015e065b
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmchannel.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "thrmchannel.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifthermsensor.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 _therm_channel_pmudatainit_device(struct gk20a *g,
+			struct boardobj *board_obj_ptr,
+			struct nv_pmu_boardobj *ppmudata)
+{
+	u32 status = 0;
+	struct therm_channel *pchannel;
+	struct therm_channel_device *ptherm_channel;
+	struct nv_pmu_therm_therm_channel_device_boardobj_set *pset;
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error updating pmu boardobjgrp for therm channel 0x%x",
+			status);
+		status = -ENOMEM;
+		goto done;
+	}
+
+	pchannel = (struct therm_channel *)board_obj_ptr;
+	pset = (struct nv_pmu_therm_therm_channel_device_boardobj_set *)ppmudata;
+	ptherm_channel = (struct therm_channel_device *)board_obj_ptr;
+
+	pset->super.scaling = pchannel->scaling;
+	pset->super.offset = pchannel->offset;
+	pset->super.temp_min = pchannel->temp_min;
+	pset->super.temp_max = pchannel->temp_max;
+
+	pset->therm_dev_idx = ptherm_channel->therm_dev_idx;
+	pset->therm_dev_prov_idx = ptherm_channel->therm_dev_prov_idx;
+
+done:
+	return status;
+}
+static struct boardobj *construct_channel_device(struct gk20a *g,
+			void *pargs, u16 pargs_size, u8 type)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	struct therm_channel *pchannel;
+	struct therm_channel_device *pchannel_device;
+	u32 status;
+	struct therm_channel_device *therm_device = (struct therm_channel_device*)pargs;
+
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		pargs_size, pargs);
+	if (status)
+		return NULL;
+
+	/* Set Super class interfaces */
+	board_obj_ptr->pmudatainit = _therm_channel_pmudatainit_device;
+
+	pchannel = (struct therm_channel *)board_obj_ptr;
+	pchannel_device = (struct therm_channel_device *)board_obj_ptr;
+
+	pchannel->temp_min = 0;
+	pchannel->temp_max = 0;
+
+	pchannel->scaling = (1 << 8);
+	pchannel->offset = 0;
+
+	pchannel_device->therm_dev_idx = therm_device->therm_dev_idx;
+	pchannel_device->therm_dev_prov_idx = therm_device->therm_dev_prov_idx;
+
+	gk20a_dbg_info(" Done");
+
+	return board_obj_ptr;
+}
+
+static u32 _therm_channel_pmudata_instget(struct gk20a *g,
+			struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+			struct nv_pmu_boardobj **ppboardobjpmudata,
+			u8 idx)
+{
+	struct nv_pmu_therm_therm_channel_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_therm_therm_channel_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+			pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+
+	gk20a_dbg_info(" Done");
+
+	return 0;
+}
+
+static u32 devinit_get_therm_channel_table(struct gk20a *g,
+				struct therm_channels *pthermchannelobjs)
+{
+	u32 status = 0;
+	u8 *therm_channel_table_ptr = NULL;
+	u8 *curr_therm_channel_table_ptr = NULL;
+	struct boardobj *boardobj;
+	struct therm_channel_1x_header therm_channel_table_header = { 0 };
+	struct therm_channel_1x_entry *therm_channel_table_entry = NULL;
+	u32 index;
+	u32 obj_index = 0;
+	u16 therm_channel_size = 0;
+	union {
+		struct boardobj boardobj;
+		struct therm_channel therm_channel;
+		struct therm_channel_device device;
+	} therm_channel_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		therm_channel_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, THERMAL_CHANNEL_TABLE);
+		if (therm_channel_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&therm_channel_table_header, therm_channel_table_ptr,
+		VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09);
+
+	if (therm_channel_table_header.version !=
+			VBIOS_THERM_CHANNEL_VERSION_1X) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (therm_channel_table_header.header_size <
+			VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	curr_therm_channel_table_ptr = (therm_channel_table_ptr +
+		VBIOS_THERM_CHANNEL_1X_HEADER_SIZE_09);
+
+	for (index = 0; index < therm_channel_table_header.num_table_entries;
+		index++) {
+		therm_channel_table_entry = (struct therm_channel_1x_entry *)
+			(curr_therm_channel_table_ptr +
+				(therm_channel_table_header.table_entry_size * index));
+
+		if (therm_channel_table_entry->class_id !=
+				NV_VBIOS_THERM_CHANNEL_1X_ENTRY_CLASS_DEVICE) {
+			continue;
+		}
+
+		therm_channel_data.device.therm_dev_idx = therm_channel_table_entry->param0;
+		therm_channel_data.device.therm_dev_prov_idx = therm_channel_table_entry->param1;
+
+		therm_channel_size = sizeof(struct therm_channel_device);
+		therm_channel_data.boardobj.type = CTRL_THERMAL_THERM_CHANNEL_CLASS_DEVICE;
+
+		boardobj = construct_channel_device(g, &therm_channel_data,
+					therm_channel_size, therm_channel_data.boardobj.type);
+
+		if (!boardobj) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to create thermal device for %d type %d",
+				index, therm_channel_data.boardobj.type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&pthermchannelobjs->super.super,
+				boardobj, obj_index);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to insert thermal device boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		++obj_index;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 therm_channel_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct therm_channels *pthermchannelobjs;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(&g->therm_pmu.therm_channelobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for therm devices, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->therm_pmu.therm_channelobjs.super.super;
+	pthermchannelobjs = &(g->therm_pmu.therm_channelobjs);
+
+	/* Override the Interfaces */
+	pboardobjgrp->pmudatainstget = _therm_channel_pmudata_instget;
+
+	status = devinit_get_therm_channel_table(g, pthermchannelobjs);
+	if (status)
+		goto done;
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, THERM, THERM_CHANNEL);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			therm, THERM, therm_channel, THERM_CHANNEL);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/therm/thrmchannel.h b/drivers/gpu/nvgpu/therm/thrmchannel.h
new file mode 100644
index 00000000..4b9d19da
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmchannel.h
@@ -0,0 +1,42 @@
+/*
+ * general thermal device structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _THRMCHANNEL_H_
+#define _THRMCHANNEL_H_
+
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+#include "ctrl/ctrltherm.h"
+
+struct therm_channel {
+	struct boardobj super;
+	s16 scaling;
+	s16 offset;
+	s32 temp_min;
+	s32 temp_max;
+};
+
+struct therm_channels {
+	struct boardobjgrp_e32 super;
+};
+
+struct therm_channel_device {
+	struct therm_channel super;
+	u8 therm_dev_idx;
+	u8 therm_dev_prov_idx;
+};
+
+u32 therm_channel_sw_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/therm/thrmdev.c b/drivers/gpu/nvgpu/therm/thrmdev.c
new file mode 100644
index 00000000..83ac9739
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmdev.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "thrmdev.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifthermsensor.h"
+#include "gm206/bios_gm206.h"
+#include "gk20a/pmu_gk20a.h"
+#include "ctrl/ctrltherm.h"
+
+static struct boardobj *construct_therm_device(struct gk20a *g,
+			void *pargs, u16 pargs_size, u8 type)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status;
+
+	status = boardobj_construct_super(g, &board_obj_ptr,
+		pargs_size, pargs);
+	if (status)
+		return NULL;
+
+	gk20a_dbg_info(" Done");
+
+	return board_obj_ptr;
+}
+
+static u32 _therm_device_pmudata_instget(struct gk20a *g,
+			struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+			struct nv_pmu_boardobj **ppboardobjpmudata,
+			u8 idx)
+{
+	struct nv_pmu_therm_therm_device_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_therm_therm_device_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+			pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data;
+
+	gk20a_dbg_info(" Done");
+
+	return 0;
+}
+
+static u32 devinit_get_therm_device_table(struct gk20a *g,
+				struct therm_devices *pthermdeviceobjs)
+{
+	u32 status = 0;
+	u8 *therm_device_table_ptr = NULL;
+	u8 *curr_therm_device_table_ptr = NULL;
+	struct boardobj *boardobj;
+	struct therm_device_1x_header therm_device_table_header = { 0 };
+	struct therm_device_1x_entry *therm_device_table_entry = NULL;
+	u32 index;
+	u32 obj_index = 0;
+	u16 therm_device_size = 0;
+	union {
+		struct boardobj boardobj;
+		struct therm_device therm_device;
+	} therm_device_data;
+
+	gk20a_dbg_info("");
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		therm_device_table_ptr = (u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.perf_token, THERMAL_DEVICE_TABLE);
+		if (therm_device_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	}
+
+	memcpy(&therm_device_table_header, therm_device_table_ptr,
+		VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04);
+
+	if (therm_device_table_header.version !=
+			VBIOS_THERM_DEVICE_VERSION_1X) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (therm_device_table_header.header_size <
+			VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04) {
+		status = -EINVAL;
+		goto done;
+	}
+
+	curr_therm_device_table_ptr = (therm_device_table_ptr +
+		VBIOS_THERM_DEVICE_1X_HEADER_SIZE_04);
+
+	for (index = 0; index < therm_device_table_header.num_table_entries;
+		index++) {
+		therm_device_table_entry = (struct therm_device_1x_entry *)
+			(curr_therm_device_table_ptr +
+				(therm_device_table_header.table_entry_size * index));
+
+		if (therm_device_table_entry->class_id !=
+				NV_VBIOS_THERM_DEVICE_1X_ENTRY_CLASS_GPU) {
+			continue;
+		}
+
+		therm_device_size = sizeof(struct therm_device);
+		therm_device_data.boardobj.type = CTRL_THERMAL_THERM_DEVICE_CLASS_GPU;
+
+		boardobj = construct_therm_device(g, &therm_device_data,
+					therm_device_size, therm_device_data.boardobj.type);
+
+		if (!boardobj) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to create thermal device for %d type %d",
+				index, therm_device_data.boardobj.type);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(&pthermdeviceobjs->super.super,
+				boardobj, obj_index);
+
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to insert thermal device boardobj for %d", index);
+			status = -EINVAL;
+			goto done;
+		}
+
+		++obj_index;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
+
+u32 therm_device_sw_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct therm_devices *pthermdeviceobjs;
+
+	/* Construct the Super Class and override the Interfaces */
+	status = boardobjgrpconstruct_e32(&g->therm_pmu.therm_deviceobjs.super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error creating boardobjgrp for therm devices, status - 0x%x",
+			  status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->therm_pmu.therm_deviceobjs.super.super;
+	pthermdeviceobjs = &(g->therm_pmu.therm_deviceobjs);
+
+	/* Override the Interfaces */
+	pboardobjgrp->pmudatainstget = _therm_device_pmudata_instget;
+
+	status = devinit_get_therm_device_table(g, pthermdeviceobjs);
+	if (status)
+		goto done;
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, THERM, THERM_DEVICE);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			therm, THERM, therm_device, THERM_DEVICE);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+done:
+	gk20a_dbg_info(" done status %x", status);
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/therm/thrmdev.h b/drivers/gpu/nvgpu/therm/thrmdev.h
new file mode 100644
index 00000000..35be47c0
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmdev.h
@@ -0,0 +1,31 @@
+/*
+ * general thermal device structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _THRMDEV_H_
+#define _THRMDEV_H_
+
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+
+struct therm_devices {
+	struct boardobjgrp_e32 super;
+};
+
+struct therm_device {
+	struct therm_devices super;
+};
+
+u32 therm_device_sw_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/therm/thrmpmu.c b/drivers/gpu/nvgpu/therm/thrmpmu.c
new file mode 100644
index 00000000..0ff7090b
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmpmu.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "thrmpmu.h"
+
+u32 therm_send_pmgr_tables_to_pmu(struct gk20a *g)
+{
+	u32 status = 0;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	if (!BOARDOBJGRP_IS_EMPTY(&g->therm_pmu.therm_deviceobjs.super.super)) {
+		pboardobjgrp = &g->therm_pmu.therm_deviceobjs.super.super;
+		status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"therm_send_pmgr_tables_to_pmu - therm_device failed %x",
+				status);
+			goto exit;
+		}
+	}
+
+	if (!BOARDOBJGRP_IS_EMPTY(
+			&g->therm_pmu.therm_channelobjs.super.super)) {
+		pboardobjgrp = &g->therm_pmu.therm_channelobjs.super.super;
+		status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+				"therm_send_pmgr_tables_to_pmu - therm_channel failed %x",
+				status);
+			goto exit;
+		}
+	}
+
+exit:
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/therm/thrmpmu.h b/drivers/gpu/nvgpu/therm/thrmpmu.h
new file mode 100644
index 00000000..007af720
--- /dev/null
+++ b/drivers/gpu/nvgpu/therm/thrmpmu.h
@@ -0,0 +1,20 @@
+/*
+ * general thermal pmu control structures & definitions
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _THRMPMU_H_
+#define _THRMPMU_H_
+
+u32 therm_send_pmgr_tables_to_pmu(struct gk20a *g);
+
+#endif
-- 
cgit v1.2.2


From 3621d35f95d6060d87a31164b7884fc1e896989f Mon Sep 17 00:00:00 2001
From: Lakshmanan M <lm@nvidia.com>
Date: Tue, 25 Oct 2016 16:33:11 +0530
Subject: gpu: nvgpu: Add PMU thermal RPC for WARN_TEMP

Added PMU thermal slct RPC handling for WARN_TEMP threshold
configuration.

JIRA DNVGPU-130

Change-Id: Iac96557080907bd091217fe983d6a951d0be1da4
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1242133
(cherry picked from commit 8bca85490e716b974315093c47d0d54ec5fb7e0f)
Reviewed-on: http://git-master/r/1246213
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/therm/thrmpmu.c | 91 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/therm/thrmpmu.h |  2 +
 2 files changed, 93 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/therm/thrmpmu.c b/drivers/gpu/nvgpu/therm/thrmpmu.c
index 0ff7090b..0d0a4b3a 100644
--- a/drivers/gpu/nvgpu/therm/thrmpmu.c
+++ b/drivers/gpu/nvgpu/therm/thrmpmu.c
@@ -17,6 +17,33 @@
 #include "boardobj/boardobjgrp_e32.h"
 #include "pmuif/gpmuifboardobj.h"
 #include "thrmpmu.h"
+#include "pmuif/gpmuiftherm.h"
+
+struct therm_pmucmdhandler_params {
+	struct nv_pmu_therm_rpc *prpccall;
+	u32 success;
+};
+
+static void therm_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
+			void *param, u32 handle, u32 status)
+{
+	struct therm_pmucmdhandler_params *phandlerparams =
+		(struct therm_pmucmdhandler_params *)param;
+
+	if (msg->msg.therm.msg_type != NV_PMU_THERM_MSG_ID_RPC) {
+		gk20a_err(dev_from_gk20a(g),
+			"unknow msg %x",
+			msg->msg.pmgr.msg_type);
+		return;
+	}
+
+	if (!phandlerparams->prpccall->b_supported)
+		gk20a_err(dev_from_gk20a(g),
+			"RPC msg %x failed",
+			msg->msg.pmgr.msg_type);
+	else
+		phandlerparams->success = 1;
+}
 
 u32 therm_send_pmgr_tables_to_pmu(struct gk20a *g)
 {
@@ -49,3 +76,67 @@ u32 therm_send_pmgr_tables_to_pmu(struct gk20a *g)
 exit:
 	return status;
 }
+
+u32 therm_set_warn_temp_limit(struct gk20a *g)
+{
+	u32 status;
+	u32 seqdesc = 0;
+	struct pmu_cmd cmd = { {0} };
+	struct pmu_msg msg = { {0} };
+	struct pmu_payload payload = { {0} };
+	struct nv_pmu_therm_rpc rpccall = {0};
+	struct therm_pmucmdhandler_params handlerparams = {0};
+
+	rpccall.function = NV_PMU_THERM_RPC_ID_SLCT_EVENT_TEMP_TH_SET;
+	rpccall.params.slct_event_temp_th_set.event_id =
+		NV_PMU_THERM_EVENT_THERMAL_1;
+	rpccall.params.slct_event_temp_th_set.temp_threshold = g->curr_warn_temp;
+	rpccall.b_supported = 0;
+
+	cmd.hdr.unit_id = PMU_UNIT_THERM;
+	cmd.hdr.size = ((u32)sizeof(struct nv_pmu_therm_cmd) +
+			(u32)sizeof(struct pmu_hdr));
+	cmd.cmd.therm.cmd_type = NV_PMU_THERM_CMD_ID_RPC;
+
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)&rpccall;
+	payload.in.size = (u32)sizeof(struct nv_pmu_therm_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_THERM_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)&rpccall;
+	payload.out.size = (u32)sizeof(struct nv_pmu_therm_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_CLK_MSG_RPC_ALLOC_OFFSET;
+
+	/* Setup the handler params to communicate back results.*/
+	handlerparams.success = 0;
+	handlerparams.prpccall = &rpccall;
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+				PMU_COMMAND_QUEUE_LPQ,
+				therm_pmucmdhandler,
+				(void *)&handlerparams,
+				&seqdesc, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post pmgr cmd for unit %x cmd id %x size %x",
+			cmd.hdr.unit_id, cmd.cmd.therm.cmd_type, cmd.hdr.size);
+		goto exit;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handlerparams.success, 1);
+
+	if (handlerparams.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "could not process cmd\n");
+		status = -ETIMEDOUT;
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
diff --git a/drivers/gpu/nvgpu/therm/thrmpmu.h b/drivers/gpu/nvgpu/therm/thrmpmu.h
index 007af720..e6f70411 100644
--- a/drivers/gpu/nvgpu/therm/thrmpmu.h
+++ b/drivers/gpu/nvgpu/therm/thrmpmu.h
@@ -17,4 +17,6 @@
 
 u32 therm_send_pmgr_tables_to_pmu(struct gk20a *g);
 
+u32 therm_set_warn_temp_limit(struct gk20a *g);
+
 #endif
-- 
cgit v1.2.2


From 3a032c33fb70453494e0b143a93db61f859381ea Mon Sep 17 00:00:00 2001
From: Shardar Shariff Md <smohammed@nvidia.com>
Date: Tue, 1 Nov 2016 19:09:46 +0530
Subject: gpu: nvgpu: gp10b: define fuse macro depend on kernel version

- Define fuse macros depending on kernel version as fuse
  offset got changed in K4.4 and for K4.4 fuse defines are
  defined in common header file (tegra-fuse.h)
- Use fuse control read/write APIs when reading control
  registers for K4.4

Bug 200243956

Change-Id: I34dabd1a307d10010cb89ac6a5f1e3f5b177c0fc
Signed-off-by: Shardar Shariff Md <smohammed@nvidia.com>
Reviewed-on: http://git-master/r/1245825
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h | 4 ++++
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c    | 6 ++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c   | 3 +++
 3 files changed, 13 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
index 7c3d3400..786a3bb0 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.h
@@ -16,8 +16,12 @@
 #ifndef _GP10B_SYSFS_H_
 #define _GP10B_SYSFS_H_
 
+#include <linux/version.h>
+
 /*ECC Fuse*/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 #define FUSE_OPT_ECC_EN  0x358
+#endif
 
 void gp10b_create_sysfs(struct device *dev);
 void gp10b_remove_sysfs(struct device *dev);
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 0705d8b6..09c2558c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/tegra-fuse.h>
+#include <linux/version.h>
 
 #include <dt-bindings/soc/gm20b-fuse.h>
 #include <dt-bindings/soc/gp10b-fuse.h>
@@ -1533,8 +1534,13 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
 
 static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 	tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0);
 	tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0);
+#else
+	tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0);
+	tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0);
+#endif
 
 	if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
 		tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 2699dd7a..f0137a70 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/printk.h>
+#include <linux/version.h>
 
 #include <linux/types.h>
 
@@ -46,7 +47,9 @@
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/css_gr_gk20a.h"
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 #define FUSE_OPT_PRIV_SEC_EN_0 0x264
+#endif
 #define PRIV_SECURITY_ENABLED 0x01
 
 static struct gpu_ops gp10b_ops = {
-- 
cgit v1.2.2


From 3491b6c3217babb47c451e96a0bf2145c111b2d5 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 8 Nov 2016 13:06:21 -0800
Subject: gpu: nvgpu: Return correct GPC base addresses

Due to missing break statements, GPC base and GPC shared base were
overwritten by values in following select cases.

Change-Id: Iba50d8256c1cf07ff8e631e2fcf22a68cdc992e0
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1249970
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 89e0e1fd..347f813b 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -139,11 +139,13 @@ static int gp106_get_litter_value(struct gk20a *g,
 		break;
 	case GPU_LIT_PPC_IN_GPC_BASE:
 		ret = proj_ppc_in_gpc_base_v();
+		break;
 	case GPU_LIT_PPC_IN_GPC_STRIDE:
 		ret = proj_ppc_in_gpc_stride_v();
 		break;
 	case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
 		ret = proj_ppc_in_gpc_shared_base_v();
+		break;
 	case GPU_LIT_ROP_BASE:
 		ret = proj_rop_base_v();
 		break;
-- 
cgit v1.2.2


From d1c722b19428f5f1be9aa3dbbca3bd4f13d50b8b Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 8 Nov 2016 13:20:09 -0800
Subject: gpu: nvgpu: gp106: Free pmu_sig if init PMU fails

If gk20a_init_pmu() fails, go to the error path that frees pmu_sig.

Change-Id: I2f6fcb86570aba54ab45aec14ee6f341e3faebd5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1249971
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 39371666..3bd79bcd 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -170,7 +170,7 @@ static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img_v1 *p_img)
 	err = gk20a_init_pmu(pmu);
 	if (err) {
 		gp106_dbg_pmu("failed to set function pointers\n");
-		goto release_desc;
+		goto release_sig;
 	}
 
 	lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc_v1), GFP_KERNEL);
-- 
cgit v1.2.2


From fd8555d54e8499f1430ed9c9d1658da177fb30d4 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 26 Oct 2016 10:55:30 -0700
Subject: gpu: nvgpu: gp106: Add regops whitelists

Add regops whitelists for gp106. The whitelist is generated, and is the
same for context switched and global registers.

Bug 200239422

Change-Id: Ib6689956c191c8f346da8cc5c7e3791f105db4eb
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1243253
(cherry picked from commit 1bdc23c9f9aac7ba91a50b83397925237851f8db)
Reviewed-on: http://git-master/r/1247645
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x  |    1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c    |    4 +-
 drivers/gpu/nvgpu/gp106/regops_gp106.c | 1815 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/regops_gp106.h |   24 +
 4 files changed, 1842 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gp106/regops_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/regops_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 91243de0..bb19d595 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -30,6 +30,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/gp106/ltc_gp106.o \
 	$(nvgpu-t18x)/gp106/fb_gp106.o \
 	$(nvgpu-t18x)/gp106/bios_gp106.o \
+	$(nvgpu-t18x)/gp106/regops_gp106.o \
 	$(nvgpu-t18x)/clk/clk_mclk.o \
 	$(nvgpu-t18x)/pstate/pstate.o \
 	$(nvgpu-t18x)/clk/clk_vin.o \
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 347f813b..9afcdb69 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -26,7 +26,7 @@
 #include "gp10b/mm_gp10b.h"
 #include "gp10b/ce_gp10b.h"
 #include "gp106/fifo_gp106.h"
-#include "gp10b/regops_gp10b.h"
+#include "gp106/regops_gp106.h"
 #include "gp10b/cde_gp10b.h"
 #include "gp106/therm_gp106.h"
 #include "gp106/xve_gp106.h"
@@ -205,7 +205,7 @@ int gp106_init_hal(struct gk20a *g)
 	gk20a_init_debug_ops(gops);
 	gk20a_init_dbg_session_ops(gops);
 	gp106_init_clk_ops(gops);
-	gp10b_init_regops(gops);
+	gp106_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gk20a_init_tsg_ops(gops);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gp106/regops_gp106.c b/drivers/gpu/nvgpu/gp106/regops_gp106.c
new file mode 100644
index 00000000..5b6897c1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/regops_gp106.c
@@ -0,0 +1,1815 @@
+/*
+ * Tegra GP106 GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/bsearch.h>
+#include <uapi/linux/nvgpu.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "regops_gp106.h"
+
+static const struct regop_offset_range gp106_global_whitelist_ranges[] = {
+	{ 0x000004f0,   1},
+	{ 0x00001a00,   3},
+	{ 0x00002800, 128},
+	{ 0x00009400,   1},
+	{ 0x00009410,   1},
+	{ 0x00009480,   1},
+	{ 0x00020200,  24},
+	{ 0x00021c04,   3},
+	{ 0x00021c14,   3},
+	{ 0x00021c24,  71},
+	{ 0x00021d44,   1},
+	{ 0x00021d4c,   1},
+	{ 0x00021d54,   1},
+	{ 0x00021d5c,   1},
+	{ 0x00021d68,  19},
+	{ 0x00021dbc,  16},
+	{ 0x00022430,   7},
+	{ 0x00022450,   1},
+	{ 0x0002245c,   2},
+	{ 0x00070000,   5},
+	{ 0x000840a8,   1},
+	{ 0x00084b5c,   1},
+	{ 0x000870a8,   1},
+	{ 0x000884e0,   1},
+	{ 0x000884f4,   1},
+	{ 0x0008e00c,   1},
+	{ 0x00100c18,   3},
+	{ 0x00100c84,   1},
+	{ 0x0010a0a8,   1},
+	{ 0x0010a4f0,   1},
+	{ 0x0013c808,   2},
+	{ 0x0013cc14,   1},
+	{ 0x0013cc34,   1},
+	{ 0x0013cc54,   1},
+	{ 0x0013cc74,   1},
+	{ 0x0013cc94,   1},
+	{ 0x0013ccb4,   1},
+	{ 0x0013ec18,   1},
+	{ 0x00140028,   1},
+	{ 0x00140280,   1},
+	{ 0x001402a0,   1},
+	{ 0x00140350,   1},
+	{ 0x00140480,   1},
+	{ 0x001404a0,   1},
+	{ 0x00140550,   1},
+	{ 0x00140680,   1},
+	{ 0x001406a0,   1},
+	{ 0x00140750,   1},
+	{ 0x00142028,   1},
+	{ 0x00142280,   1},
+	{ 0x001422a0,   1},
+	{ 0x00142350,   1},
+	{ 0x00142480,   1},
+	{ 0x001424a0,   1},
+	{ 0x00142550,   1},
+	{ 0x00142680,   1},
+	{ 0x001426a0,   1},
+	{ 0x00142750,   1},
+	{ 0x00144028,   1},
+	{ 0x00144280,   1},
+	{ 0x00144350,   1},
+	{ 0x00144480,   1},
+	{ 0x00144550,   1},
+	{ 0x00144680,   1},
+	{ 0x00144750,   1},
+	{ 0x00146028,   1},
+	{ 0x00146280,   1},
+	{ 0x00146350,   1},
+	{ 0x00146480,   1},
+	{ 0x00146550,   1},
+	{ 0x00146680,   1},
+	{ 0x00146750,   1},
+	{ 0x00148028,   1},
+	{ 0x00148280,   1},
+	{ 0x00148350,   1},
+	{ 0x00148480,   1},
+	{ 0x00148550,   1},
+	{ 0x00148680,   1},
+	{ 0x00148750,   1},
+	{ 0x0014a028,   1},
+	{ 0x0014a280,   1},
+	{ 0x0014a350,   1},
+	{ 0x0014a480,   1},
+	{ 0x0014a550,   1},
+	{ 0x0014a680,   1},
+	{ 0x0014a750,   1},
+	{ 0x0014c028,   1},
+	{ 0x0014c280,   1},
+	{ 0x0014c350,   1},
+	{ 0x0014c480,   1},
+	{ 0x0014c550,   1},
+	{ 0x0014c680,   1},
+	{ 0x0014c750,   1},
+	{ 0x0014e028,   1},
+	{ 0x0014e280,   1},
+	{ 0x0014e350,   1},
+	{ 0x0014e480,   1},
+	{ 0x0014e550,   1},
+	{ 0x0014e680,   1},
+	{ 0x0014e750,   1},
+	{ 0x00150028,   1},
+	{ 0x00150280,   1},
+	{ 0x00150350,   1},
+	{ 0x00150480,   1},
+	{ 0x00150550,   1},
+	{ 0x00150680,   1},
+	{ 0x00150750,   1},
+	{ 0x00152028,   1},
+	{ 0x00152280,   1},
+	{ 0x00152350,   1},
+	{ 0x00152480,   1},
+	{ 0x00152550,   1},
+	{ 0x00152680,   1},
+	{ 0x00152750,   1},
+	{ 0x00154028,   1},
+	{ 0x00154280,   1},
+	{ 0x00154350,   1},
+	{ 0x00154480,   1},
+	{ 0x00154550,   1},
+	{ 0x00154680,   1},
+	{ 0x00154750,   1},
+	{ 0x00156028,   1},
+	{ 0x00156280,   1},
+	{ 0x00156350,   1},
+	{ 0x00156480,   1},
+	{ 0x00156550,   1},
+	{ 0x00156680,   1},
+	{ 0x00156750,   1},
+	{ 0x0017e028,   1},
+	{ 0x0017e280,   1},
+	{ 0x0017e294,   1},
+	{ 0x0017e29c,   2},
+	{ 0x0017e2ac,   1},
+	{ 0x0017e350,   1},
+	{ 0x0017e39c,   1},
+	{ 0x0017e480,   1},
+	{ 0x0017e4a0,   1},
+	{ 0x0017e550,   1},
+	{ 0x0017e680,   1},
+	{ 0x0017e6a0,   1},
+	{ 0x0017e750,   1},
+	{ 0x00180040,  41},
+	{ 0x001800ec,   1},
+	{ 0x001800f8,   7},
+	{ 0x00180240,  41},
+	{ 0x001802ec,   1},
+	{ 0x001802f8,   7},
+	{ 0x00180440,  41},
+	{ 0x001804ec,   1},
+	{ 0x001804f8,   7},
+	{ 0x00180640,  41},
+	{ 0x001806ec,   1},
+	{ 0x001806f8,   7},
+	{ 0x00180840,  41},
+	{ 0x001808ec,   1},
+	{ 0x001808f8,   7},
+	{ 0x00180a40,  41},
+	{ 0x00180aec,   1},
+	{ 0x00180af8,   7},
+	{ 0x00180c40,  41},
+	{ 0x00180cec,   1},
+	{ 0x00180cf8,   7},
+	{ 0x00180e40,  41},
+	{ 0x00180eec,   1},
+	{ 0x00180ef8,   7},
+	{ 0x00181040,  41},
+	{ 0x001810ec,   1},
+	{ 0x001810f8,   7},
+	{ 0x00181240,  41},
+	{ 0x001812ec,   1},
+	{ 0x001812f8,   7},
+	{ 0x00181440,  41},
+	{ 0x001814ec,   1},
+	{ 0x001814f8,   7},
+	{ 0x00181640,  41},
+	{ 0x001816ec,   1},
+	{ 0x001816f8,   7},
+	{ 0x00181840,  41},
+	{ 0x001818ec,   1},
+	{ 0x001818f8,   7},
+	{ 0x00181a40,  41},
+	{ 0x00181aec,   1},
+	{ 0x00181af8,   7},
+	{ 0x00181c40,  41},
+	{ 0x00181cec,   1},
+	{ 0x00181cf8,   7},
+	{ 0x00181e40,  41},
+	{ 0x00181eec,   1},
+	{ 0x00181ef8,   7},
+	{ 0x00182040,  41},
+	{ 0x001820ec,   1},
+	{ 0x001820f8,   7},
+	{ 0x00182240,  41},
+	{ 0x001822ec,   1},
+	{ 0x001822f8,   7},
+	{ 0x00182440,  41},
+	{ 0x001824ec,   1},
+	{ 0x001824f8,   7},
+	{ 0x00182640,  41},
+	{ 0x001826ec,   1},
+	{ 0x001826f8,   7},
+	{ 0x00182840,  41},
+	{ 0x001828ec,   1},
+	{ 0x001828f8,   7},
+	{ 0x00182a40,  41},
+	{ 0x00182aec,   1},
+	{ 0x00182af8,   7},
+	{ 0x00182c40,  41},
+	{ 0x00182cec,   1},
+	{ 0x00182cf8,   7},
+	{ 0x00182e40,  41},
+	{ 0x00182eec,   1},
+	{ 0x00182ef8,   7},
+	{ 0x00183040,  41},
+	{ 0x001830ec,   1},
+	{ 0x001830f8,   7},
+	{ 0x00183240,  41},
+	{ 0x001832ec,   1},
+	{ 0x001832f8,   7},
+	{ 0x00183440,  41},
+	{ 0x001834ec,   1},
+	{ 0x001834f8,   7},
+	{ 0x00183640,  41},
+	{ 0x001836ec,   1},
+	{ 0x001836f8,   7},
+	{ 0x00183840,  41},
+	{ 0x001838ec,   1},
+	{ 0x001838f8,   7},
+	{ 0x00183a40,  41},
+	{ 0x00183aec,   1},
+	{ 0x00183af8,   7},
+	{ 0x00183c40,  41},
+	{ 0x00183cec,   1},
+	{ 0x00183cf8,   7},
+	{ 0x00183e40,  41},
+	{ 0x00183eec,   1},
+	{ 0x00183ef8,   7},
+	{ 0x00184040,  41},
+	{ 0x001840ec,   1},
+	{ 0x001840f8,   7},
+	{ 0x00184240,  41},
+	{ 0x001842ec,   1},
+	{ 0x001842f8,   7},
+	{ 0x00184440,  41},
+	{ 0x001844ec,   1},
+	{ 0x001844f8,   7},
+	{ 0x00184640,  41},
+	{ 0x001846ec,   1},
+	{ 0x001846f8,   7},
+	{ 0x00184840,  41},
+	{ 0x001848ec,   1},
+	{ 0x001848f8,   7},
+	{ 0x00184a40,  41},
+	{ 0x00184aec,   1},
+	{ 0x00184af8,   7},
+	{ 0x00184c40,  41},
+	{ 0x00184cec,   1},
+	{ 0x00184cf8,   7},
+	{ 0x00184e40,  41},
+	{ 0x00184eec,   1},
+	{ 0x00184ef8,   7},
+	{ 0x00185040,  41},
+	{ 0x001850ec,   1},
+	{ 0x001850f8,   7},
+	{ 0x00185240,  41},
+	{ 0x001852ec,   1},
+	{ 0x001852f8,   7},
+	{ 0x00185440,  41},
+	{ 0x001854ec,   1},
+	{ 0x001854f8,   7},
+	{ 0x00185640,  41},
+	{ 0x001856ec,   1},
+	{ 0x001856f8,   7},
+	{ 0x00185840,  41},
+	{ 0x001858ec,   1},
+	{ 0x001858f8,   7},
+	{ 0x00185a40,  41},
+	{ 0x00185aec,   1},
+	{ 0x00185af8,   7},
+	{ 0x00185c40,  41},
+	{ 0x00185cec,   1},
+	{ 0x00185cf8,   7},
+	{ 0x00185e40,  41},
+	{ 0x00185eec,   1},
+	{ 0x00185ef8,   7},
+	{ 0x001a0040,  41},
+	{ 0x001a00ec,   1},
+	{ 0x001a00f8,   7},
+	{ 0x001a0240,  41},
+	{ 0x001a02ec,   1},
+	{ 0x001a02f8,   7},
+	{ 0x001a0440,  41},
+	{ 0x001a04ec,   1},
+	{ 0x001a04f8,   7},
+	{ 0x001a0640,  41},
+	{ 0x001a06ec,   1},
+	{ 0x001a06f8,   7},
+	{ 0x001a0840,  41},
+	{ 0x001a08ec,   1},
+	{ 0x001a08f8,   7},
+	{ 0x001a0a40,  41},
+	{ 0x001a0aec,   1},
+	{ 0x001a0af8,   7},
+	{ 0x001a0c40,  41},
+	{ 0x001a0cec,   1},
+	{ 0x001a0cf8,   7},
+	{ 0x001a0e40,  41},
+	{ 0x001a0eec,   1},
+	{ 0x001a0ef8,   7},
+	{ 0x001a1040,  41},
+	{ 0x001a10ec,   1},
+	{ 0x001a10f8,   7},
+	{ 0x001a1240,  41},
+	{ 0x001a12ec,   1},
+	{ 0x001a12f8,   7},
+	{ 0x001a1440,  41},
+	{ 0x001a14ec,   1},
+	{ 0x001a14f8,   7},
+	{ 0x001a1640,  41},
+	{ 0x001a16ec,   1},
+	{ 0x001a16f8,   7},
+	{ 0x001a1840,  41},
+	{ 0x001a18ec,   1},
+	{ 0x001a18f8,   7},
+	{ 0x001a1a40,  41},
+	{ 0x001a1aec,   1},
+	{ 0x001a1af8,   7},
+	{ 0x001a1c40,  41},
+	{ 0x001a1cec,   1},
+	{ 0x001a1cf8,   7},
+	{ 0x001a1e40,  41},
+	{ 0x001a1eec,   1},
+	{ 0x001a1ef8,   7},
+	{ 0x001a2040,  41},
+	{ 0x001a20ec,   1},
+	{ 0x001a20f8,   7},
+	{ 0x001a2240,  41},
+	{ 0x001a22ec,   1},
+	{ 0x001a22f8,   7},
+	{ 0x001a2440,  41},
+	{ 0x001a24ec,   1},
+	{ 0x001a24f8,   7},
+	{ 0x001a2640,  41},
+	{ 0x001a26ec,   1},
+	{ 0x001a26f8,   7},
+	{ 0x001a2840,  41},
+	{ 0x001a28ec,   1},
+	{ 0x001a28f8,   7},
+	{ 0x001a2a40,  41},
+	{ 0x001a2aec,   1},
+	{ 0x001a2af8,   7},
+	{ 0x001a2c40,  41},
+	{ 0x001a2cec,   1},
+	{ 0x001a2cf8,   7},
+	{ 0x001a2e40,  41},
+	{ 0x001a2eec,   1},
+	{ 0x001a2ef8,   7},
+	{ 0x001a3040,  41},
+	{ 0x001a30ec,   1},
+	{ 0x001a30f8,   7},
+	{ 0x001a3240,  41},
+	{ 0x001a32ec,   1},
+	{ 0x001a32f8,   7},
+	{ 0x001a3440,  41},
+	{ 0x001a34ec,   1},
+	{ 0x001a34f8,   7},
+	{ 0x001a3640,  41},
+	{ 0x001a36ec,   1},
+	{ 0x001a36f8,   7},
+	{ 0x001a3840,  41},
+	{ 0x001a38ec,   1},
+	{ 0x001a38f8,   7},
+	{ 0x001a3a40,  41},
+	{ 0x001a3aec,   1},
+	{ 0x001a3af8,   7},
+	{ 0x001a3c40,  41},
+	{ 0x001a3cec,   1},
+	{ 0x001a3cf8,   7},
+	{ 0x001a3e40,  41},
+	{ 0x001a3eec,   1},
+	{ 0x001a3ef8,   7},
+	{ 0x001a4040,  41},
+	{ 0x001a40ec,   1},
+	{ 0x001a40f8,   7},
+	{ 0x001a4240,  41},
+	{ 0x001a42ec,   1},
+	{ 0x001a42f8,   7},
+	{ 0x001a4440,  41},
+	{ 0x001a44ec,   1},
+	{ 0x001a44f8,   7},
+	{ 0x001a4640,  41},
+	{ 0x001a46ec,   1},
+	{ 0x001a46f8,   7},
+	{ 0x001a4840,  41},
+	{ 0x001a48ec,   1},
+	{ 0x001a48f8,   7},
+	{ 0x001a4a40,  41},
+	{ 0x001a4aec,   1},
+	{ 0x001a4af8,   7},
+	{ 0x001a4c40,  41},
+	{ 0x001a4cec,   1},
+	{ 0x001a4cf8,   7},
+	{ 0x001a4e40,  41},
+	{ 0x001a4eec,   1},
+	{ 0x001a4ef8,   7},
+	{ 0x001a5040,  41},
+	{ 0x001a50ec,   1},
+	{ 0x001a50f8,   7},
+	{ 0x001a5240,  41},
+	{ 0x001a52ec,   1},
+	{ 0x001a52f8,   7},
+	{ 0x001a5440,  41},
+	{ 0x001a54ec,   1},
+	{ 0x001a54f8,   7},
+	{ 0x001a5640,  41},
+	{ 0x001a56ec,   1},
+	{ 0x001a56f8,   7},
+	{ 0x001a5840,  41},
+	{ 0x001a58ec,   1},
+	{ 0x001a58f8,   7},
+	{ 0x001a5a40,  41},
+	{ 0x001a5aec,   1},
+	{ 0x001a5af8,   7},
+	{ 0x001a5c40,  41},
+	{ 0x001a5cec,   1},
+	{ 0x001a5cf8,   7},
+	{ 0x001a5e40,  41},
+	{ 0x001a5eec,   1},
+	{ 0x001a5ef8,   7},
+	{ 0x001b0040,  41},
+	{ 0x001b00ec,   1},
+	{ 0x001b00f8,   7},
+	{ 0x001b0240,  41},
+	{ 0x001b02ec,   1},
+	{ 0x001b02f8,   7},
+	{ 0x001b0440,  41},
+	{ 0x001b04ec,   1},
+	{ 0x001b04f8,   7},
+	{ 0x001b0640,  41},
+	{ 0x001b06ec,   1},
+	{ 0x001b06f8,   7},
+	{ 0x001b0840,  41},
+	{ 0x001b08ec,   1},
+	{ 0x001b08f8,   7},
+	{ 0x001b0a40,  41},
+	{ 0x001b0aec,   1},
+	{ 0x001b0af8,   7},
+	{ 0x001b0c40,  41},
+	{ 0x001b0cec,   1},
+	{ 0x001b0cf8,   7},
+	{ 0x001b0e40,  41},
+	{ 0x001b0eec,   1},
+	{ 0x001b0ef8,   7},
+	{ 0x001b4000,   1},
+	{ 0x001b4008,   1},
+	{ 0x001b4010,   3},
+	{ 0x001b4020,   3},
+	{ 0x001b4030,   3},
+	{ 0x001b4040,   3},
+	{ 0x001b4050,   3},
+	{ 0x001b4060,   4},
+	{ 0x001b4074,   7},
+	{ 0x001b4094,   3},
+	{ 0x001b40a4,   1},
+	{ 0x001b4100,   6},
+	{ 0x001b4124,   2},
+	{ 0x001b8000,   1},
+	{ 0x001b8008,   1},
+	{ 0x001b8010,   3},
+	{ 0x001b8200,   1},
+	{ 0x001b8208,   1},
+	{ 0x001b8210,   3},
+	{ 0x001b8400,   1},
+	{ 0x001b8408,   1},
+	{ 0x001b8410,   3},
+	{ 0x001b8600,   1},
+	{ 0x001b8608,   1},
+	{ 0x001b8610,   3},
+	{ 0x001b8800,   1},
+	{ 0x001b8808,   1},
+	{ 0x001b8810,   3},
+	{ 0x001b8a00,   1},
+	{ 0x001b8a08,   1},
+	{ 0x001b8a10,   3},
+	{ 0x001bc000,   1},
+	{ 0x001bc008,   1},
+	{ 0x001bc010,   3},
+	{ 0x001bc200,   1},
+	{ 0x001bc208,   1},
+	{ 0x001bc210,   3},
+	{ 0x001bc400,   1},
+	{ 0x001bc408,   1},
+	{ 0x001bc410,   3},
+	{ 0x001bc600,   1},
+	{ 0x001bc608,   1},
+	{ 0x001bc610,   3},
+	{ 0x001bc800,   1},
+	{ 0x001bc808,   1},
+	{ 0x001bc810,   3},
+	{ 0x001bca00,   1},
+	{ 0x001bca08,   1},
+	{ 0x001bca10,   3},
+	{ 0x001bd218,   1},
+	{ 0x001be000,   1},
+	{ 0x001be008,   1},
+	{ 0x001be010,   3},
+	{ 0x001be218,   1},
+	{ 0x001bf218,   1},
+	{ 0x001c0218,   1},
+	{ 0x001c1218,   1},
+	{ 0x001c80a8,   1},
+	{ 0x001c9100,   1},
+	{ 0x001cc0a8,   1},
+	{ 0x001cd100,   1},
+	{ 0x00400500,   1},
+	{ 0x0040415c,   1},
+	{ 0x00404468,   1},
+	{ 0x00404498,   1},
+	{ 0x00405800,   1},
+	{ 0x00405840,   2},
+	{ 0x00405850,   1},
+	{ 0x00405908,   1},
+	{ 0x00405a00,   1},
+	{ 0x00405b40,   1},
+	{ 0x00405b50,   1},
+	{ 0x00406024,   5},
+	{ 0x00407010,   1},
+	{ 0x00407808,   1},
+	{ 0x0040803c,   1},
+	{ 0x00408804,   1},
+	{ 0x0040880c,   1},
+	{ 0x00408900,   2},
+	{ 0x00408910,   1},
+	{ 0x00408944,   1},
+	{ 0x00408984,   1},
+	{ 0x004090a8,   1},
+	{ 0x004098a0,   1},
+	{ 0x00409b00,   1},
+	{ 0x0041000c,   1},
+	{ 0x00410110,   1},
+	{ 0x00410184,   1},
+	{ 0x0041040c,   1},
+	{ 0x00410510,   1},
+	{ 0x00410584,   1},
+	{ 0x0041080c,   1},
+	{ 0x00410910,   1},
+	{ 0x00410984,   1},
+	{ 0x00410c0c,   1},
+	{ 0x00410d10,   1},
+	{ 0x00410d84,   1},
+	{ 0x0041100c,   1},
+	{ 0x00411110,   1},
+	{ 0x00411184,   1},
+	{ 0x0041140c,   1},
+	{ 0x00411510,   1},
+	{ 0x00411584,   1},
+	{ 0x0041180c,   1},
+	{ 0x00411910,   1},
+	{ 0x00411984,   1},
+	{ 0x00411c0c,   1},
+	{ 0x00411d10,   1},
+	{ 0x00411d84,   1},
+	{ 0x0041200c,   1},
+	{ 0x00412110,   1},
+	{ 0x00412184,   1},
+	{ 0x0041240c,   1},
+	{ 0x00412510,   1},
+	{ 0x00412584,   1},
+	{ 0x0041280c,   1},
+	{ 0x00412910,   1},
+	{ 0x00412984,   1},
+	{ 0x00412c0c,   1},
+	{ 0x00412d10,   1},
+	{ 0x00412d84,   1},
+	{ 0x00418000,   1},
+	{ 0x00418008,   1},
+	{ 0x00418380,   2},
+	{ 0x00418400,   2},
+	{ 0x004184a0,   1},
+	{ 0x00418604,   1},
+	{ 0x00418680,   1},
+	{ 0x00418704,   1},
+	{ 0x00418714,   1},
+	{ 0x00418800,   1},
+	{ 0x0041881c,   1},
+	{ 0x00418830,   1},
+	{ 0x00418884,   1},
+	{ 0x004188b0,   1},
+	{ 0x004188c8,   3},
+	{ 0x004188fc,   1},
+	{ 0x00418b04,   1},
+	{ 0x00418c04,   1},
+	{ 0x00418c10,   8},
+	{ 0x00418c88,   1},
+	{ 0x00418d00,   1},
+	{ 0x00418e00,   1},
+	{ 0x00418e08,   1},
+	{ 0x00418e34,   1},
+	{ 0x00418e40,   4},
+	{ 0x00418e58,  16},
+	{ 0x00418f08,   1},
+	{ 0x00419000,   1},
+	{ 0x0041900c,   1},
+	{ 0x00419018,   1},
+	{ 0x00419854,   1},
+	{ 0x00419864,   1},
+	{ 0x00419a04,   2},
+	{ 0x00419a14,   1},
+	{ 0x00419ab0,   1},
+	{ 0x00419ab8,   3},
+	{ 0x00419c0c,   1},
+	{ 0x00419c8c,   2},
+	{ 0x00419d00,   1},
+	{ 0x00419d08,   2},
+	{ 0x00419e00,  11},
+	{ 0x00419e34,   2},
+	{ 0x00419e44,  11},
+	{ 0x00419e74,  10},
+	{ 0x00419ea4,   1},
+	{ 0x00419eac,   2},
+	{ 0x00419ee8,   1},
+	{ 0x00419ef0,  28},
+	{ 0x00419f70,   1},
+	{ 0x00419f78,   2},
+	{ 0x00419f98,   2},
+	{ 0x00419fdc,   1},
+	{ 0x0041a02c,   2},
+	{ 0x0041a0a0,   1},
+	{ 0x0041a0a8,   1},
+	{ 0x0041a890,   2},
+	{ 0x0041a8a0,   3},
+	{ 0x0041a8b0,   2},
+	{ 0x0041b014,   1},
+	{ 0x0041b0cc,   1},
+	{ 0x0041b1dc,   1},
+	{ 0x0041b214,   1},
+	{ 0x0041b2cc,   1},
+	{ 0x0041b3dc,   1},
+	{ 0x0041b414,   1},
+	{ 0x0041b4cc,   1},
+	{ 0x0041b5dc,   1},
+	{ 0x0041be0c,   3},
+	{ 0x0041becc,   1},
+	{ 0x0041bfdc,   1},
+	{ 0x0041c054,   1},
+	{ 0x0041c2b0,   1},
+	{ 0x0041c2b8,   3},
+	{ 0x0041c40c,   1},
+	{ 0x0041c48c,   2},
+	{ 0x0041c500,   1},
+	{ 0x0041c508,   2},
+	{ 0x0041c600,  11},
+	{ 0x0041c634,   2},
+	{ 0x0041c644,  11},
+	{ 0x0041c674,  10},
+	{ 0x0041c6a4,   1},
+	{ 0x0041c6ac,   2},
+	{ 0x0041c6e8,   1},
+	{ 0x0041c6f0,  28},
+	{ 0x0041c770,   1},
+	{ 0x0041c778,   2},
+	{ 0x0041c798,   2},
+	{ 0x0041c7dc,   1},
+	{ 0x0041c854,   1},
+	{ 0x0041cab0,   1},
+	{ 0x0041cab8,   3},
+	{ 0x0041cc0c,   1},
+	{ 0x0041cc8c,   2},
+	{ 0x0041cd00,   1},
+	{ 0x0041cd08,   2},
+	{ 0x0041ce00,  11},
+	{ 0x0041ce34,   2},
+	{ 0x0041ce44,  11},
+	{ 0x0041ce74,  10},
+	{ 0x0041cea4,   1},
+	{ 0x0041ceac,   2},
+	{ 0x0041cee8,   1},
+	{ 0x0041cef0,  28},
+	{ 0x0041cf70,   1},
+	{ 0x0041cf78,   2},
+	{ 0x0041cf98,   2},
+	{ 0x0041cfdc,   1},
+	{ 0x0041d054,   1},
+	{ 0x0041d2b0,   1},
+	{ 0x0041d2b8,   3},
+	{ 0x0041d40c,   1},
+	{ 0x0041d48c,   2},
+	{ 0x0041d500,   1},
+	{ 0x0041d508,   2},
+	{ 0x0041d600,  11},
+	{ 0x0041d634,   2},
+	{ 0x0041d644,  11},
+	{ 0x0041d674,  10},
+	{ 0x0041d6a4,   1},
+	{ 0x0041d6ac,   2},
+	{ 0x0041d6e8,   1},
+	{ 0x0041d6f0,  28},
+	{ 0x0041d770,   1},
+	{ 0x0041d778,   2},
+	{ 0x0041d798,   2},
+	{ 0x0041d7dc,   1},
+	{ 0x0041d854,   1},
+	{ 0x0041dab0,   1},
+	{ 0x0041dab8,   3},
+	{ 0x0041dc0c,   1},
+	{ 0x0041dc8c,   2},
+	{ 0x0041dd00,   1},
+	{ 0x0041dd08,   2},
+	{ 0x0041de00,  11},
+	{ 0x0041de34,   2},
+	{ 0x0041de44,  11},
+	{ 0x0041de74,  10},
+	{ 0x0041dea4,   1},
+	{ 0x0041deac,   2},
+	{ 0x0041dee8,   1},
+	{ 0x0041def0,  28},
+	{ 0x0041df70,   1},
+	{ 0x0041df78,   2},
+	{ 0x0041df98,   2},
+	{ 0x0041dfdc,   1},
+	{ 0x0041e054,   1},
+	{ 0x0041e2b0,   1},
+	{ 0x0041e2b8,   3},
+	{ 0x0041e40c,   1},
+	{ 0x0041e48c,   2},
+	{ 0x0041e500,   1},
+	{ 0x0041e508,   2},
+	{ 0x0041e600,  11},
+	{ 0x0041e634,   2},
+	{ 0x0041e644,  11},
+	{ 0x0041e674,  10},
+	{ 0x0041e6a4,   1},
+	{ 0x0041e6ac,   2},
+	{ 0x0041e6e8,   1},
+	{ 0x0041e6f0,  28},
+	{ 0x0041e770,   1},
+	{ 0x0041e778,   2},
+	{ 0x0041e798,   2},
+	{ 0x0041e7dc,   1},
+	{ 0x00500384,   1},
+	{ 0x005004a0,   1},
+	{ 0x00500604,   1},
+	{ 0x00500680,   1},
+	{ 0x00500714,   1},
+	{ 0x0050081c,   1},
+	{ 0x00500884,   1},
+	{ 0x005008b0,   1},
+	{ 0x005008c8,   3},
+	{ 0x005008fc,   1},
+	{ 0x00500b04,   1},
+	{ 0x00500c04,   1},
+	{ 0x00500c10,   8},
+	{ 0x00500c88,   1},
+	{ 0x00500d00,   1},
+	{ 0x00500e08,   1},
+	{ 0x00500f08,   1},
+	{ 0x00501000,   1},
+	{ 0x0050100c,   1},
+	{ 0x00501018,   1},
+	{ 0x00501854,   1},
+	{ 0x00501ab0,   1},
+	{ 0x00501ab8,   3},
+	{ 0x00501c0c,   1},
+	{ 0x00501c8c,   2},
+	{ 0x00501d00,   1},
+	{ 0x00501d08,   2},
+	{ 0x00501e00,  11},
+	{ 0x00501e34,   2},
+	{ 0x00501e44,  11},
+	{ 0x00501e74,  10},
+	{ 0x00501ea4,   1},
+	{ 0x00501eac,   2},
+	{ 0x00501ee8,   1},
+	{ 0x00501ef0,  28},
+	{ 0x00501f70,   1},
+	{ 0x00501f78,   2},
+	{ 0x00501f98,   2},
+	{ 0x00501fdc,   1},
+	{ 0x0050202c,   2},
+	{ 0x005020a0,   1},
+	{ 0x005020a8,   1},
+	{ 0x00502890,   2},
+	{ 0x005028a0,   3},
+	{ 0x005028b0,   2},
+	{ 0x00503014,   1},
+	{ 0x005030cc,   1},
+	{ 0x005031dc,   1},
+	{ 0x00503214,   1},
+	{ 0x005032cc,   1},
+	{ 0x005033dc,   1},
+	{ 0x00503414,   1},
+	{ 0x005034cc,   1},
+	{ 0x005035dc,   1},
+	{ 0x00503e14,   1},
+	{ 0x00503ecc,   1},
+	{ 0x00503fdc,   1},
+	{ 0x00504054,   1},
+	{ 0x005042b0,   1},
+	{ 0x005042b8,   3},
+	{ 0x0050440c,   1},
+	{ 0x0050448c,   2},
+	{ 0x00504500,   1},
+	{ 0x00504508,   2},
+	{ 0x00504600,  11},
+	{ 0x00504634,   2},
+	{ 0x00504644,  11},
+	{ 0x00504674,  10},
+	{ 0x005046a4,   1},
+	{ 0x005046ac,   2},
+	{ 0x005046e8,   1},
+	{ 0x005046f0,  28},
+	{ 0x00504770,   1},
+	{ 0x00504778,   2},
+	{ 0x00504798,   2},
+	{ 0x005047dc,   1},
+	{ 0x00504854,   1},
+	{ 0x00504ab0,   1},
+	{ 0x00504ab8,   3},
+	{ 0x00504c0c,   1},
+	{ 0x00504c8c,   2},
+	{ 0x00504d00,   1},
+	{ 0x00504d08,   2},
+	{ 0x00504e00,  11},
+	{ 0x00504e34,   2},
+	{ 0x00504e44,  11},
+	{ 0x00504e74,  10},
+	{ 0x00504ea4,   1},
+	{ 0x00504eac,   2},
+	{ 0x00504ee8,   1},
+	{ 0x00504ef0,  28},
+	{ 0x00504f70,   1},
+	{ 0x00504f78,   2},
+	{ 0x00504f98,   2},
+	{ 0x00504fdc,   1},
+	{ 0x00505054,   1},
+	{ 0x005052b0,   1},
+	{ 0x005052b8,   3},
+	{ 0x0050540c,   1},
+	{ 0x0050548c,   2},
+	{ 0x00505500,   1},
+	{ 0x00505508,   2},
+	{ 0x00505600,  11},
+	{ 0x00505634,   2},
+	{ 0x00505644,  11},
+	{ 0x00505674,  10},
+	{ 0x005056a4,   1},
+	{ 0x005056ac,   2},
+	{ 0x005056e8,   1},
+	{ 0x005056f0,  28},
+	{ 0x00505770,   1},
+	{ 0x00505778,   2},
+	{ 0x00505798,   2},
+	{ 0x005057dc,   1},
+	{ 0x00505854,   1},
+	{ 0x00505ab0,   1},
+	{ 0x00505ab8,   3},
+	{ 0x00505c0c,   1},
+	{ 0x00505c8c,   2},
+	{ 0x00505d00,   1},
+	{ 0x00505d08,   2},
+	{ 0x00505e00,  11},
+	{ 0x00505e34,   2},
+	{ 0x00505e44,  11},
+	{ 0x00505e74,  10},
+	{ 0x00505ea4,   1},
+	{ 0x00505eac,   2},
+	{ 0x00505ee8,   1},
+	{ 0x00505ef0,  28},
+	{ 0x00505f70,   1},
+	{ 0x00505f78,   2},
+	{ 0x00505f98,   2},
+	{ 0x00505fdc,   1},
+	{ 0x00506054,   1},
+	{ 0x005062b0,   1},
+	{ 0x005062b8,   3},
+	{ 0x0050640c,   1},
+	{ 0x0050648c,   2},
+	{ 0x00506500,   1},
+	{ 0x00506508,   2},
+	{ 0x00506600,  11},
+	{ 0x00506634,   2},
+	{ 0x00506644,  11},
+	{ 0x00506674,  10},
+	{ 0x005066a4,   1},
+	{ 0x005066ac,   2},
+	{ 0x005066e8,   1},
+	{ 0x005066f0,  28},
+	{ 0x00506770,   1},
+	{ 0x00506778,   2},
+	{ 0x00506798,   2},
+	{ 0x005067dc,   1},
+	{ 0x00508384,   1},
+	{ 0x005084a0,   1},
+	{ 0x00508604,   1},
+	{ 0x00508680,   1},
+	{ 0x00508714,   1},
+	{ 0x0050881c,   1},
+	{ 0x00508884,   1},
+	{ 0x005088b0,   1},
+	{ 0x005088c8,   2},
+	{ 0x00508b04,   1},
+	{ 0x00508c04,   1},
+	{ 0x00508c10,   8},
+	{ 0x00508c88,   1},
+	{ 0x00508d00,   1},
+	{ 0x00508e08,   1},
+	{ 0x00508f08,   1},
+	{ 0x00509000,   1},
+	{ 0x0050900c,   1},
+	{ 0x00509018,   1},
+	{ 0x00509854,   1},
+	{ 0x00509ab0,   1},
+	{ 0x00509ab8,   3},
+	{ 0x00509c0c,   1},
+	{ 0x00509c8c,   2},
+	{ 0x00509d00,   1},
+	{ 0x00509d08,   2},
+	{ 0x00509e00,  11},
+	{ 0x00509e34,   2},
+	{ 0x00509e44,  11},
+	{ 0x00509e74,  10},
+	{ 0x00509ea4,   1},
+	{ 0x00509eac,   2},
+	{ 0x00509ee8,   1},
+	{ 0x00509ef0,  28},
+	{ 0x00509f70,   1},
+	{ 0x00509f78,   2},
+	{ 0x00509f98,   2},
+	{ 0x00509fdc,   1},
+	{ 0x0050a02c,   2},
+	{ 0x0050a0a0,   1},
+	{ 0x0050a0a8,   1},
+	{ 0x0050a890,   2},
+	{ 0x0050a8a0,   3},
+	{ 0x0050a8b0,   2},
+	{ 0x0050b014,   1},
+	{ 0x0050b0cc,   1},
+	{ 0x0050b1dc,   1},
+	{ 0x0050b214,   1},
+	{ 0x0050b2cc,   1},
+	{ 0x0050b3dc,   1},
+	{ 0x0050b414,   1},
+	{ 0x0050b4cc,   1},
+	{ 0x0050b5dc,   1},
+	{ 0x0050be14,   1},
+	{ 0x0050becc,   1},
+	{ 0x0050bfdc,   1},
+	{ 0x0050c054,   1},
+	{ 0x0050c2b0,   1},
+	{ 0x0050c2b8,   3},
+	{ 0x0050c40c,   1},
+	{ 0x0050c48c,   2},
+	{ 0x0050c500,   1},
+	{ 0x0050c508,   2},
+	{ 0x0050c600,  11},
+	{ 0x0050c634,   2},
+	{ 0x0050c644,  11},
+	{ 0x0050c674,  10},
+	{ 0x0050c6a4,   1},
+	{ 0x0050c6ac,   2},
+	{ 0x0050c6e8,   1},
+	{ 0x0050c6f0,  28},
+	{ 0x0050c770,   1},
+	{ 0x0050c778,   2},
+	{ 0x0050c798,   2},
+	{ 0x0050c7dc,   1},
+	{ 0x0050c854,   1},
+	{ 0x0050cab0,   1},
+	{ 0x0050cab8,   3},
+	{ 0x0050cc0c,   1},
+	{ 0x0050cc8c,   2},
+	{ 0x0050cd00,   1},
+	{ 0x0050cd08,   2},
+	{ 0x0050ce00,  11},
+	{ 0x0050ce34,   2},
+	{ 0x0050ce44,  11},
+	{ 0x0050ce74,  10},
+	{ 0x0050cea4,   1},
+	{ 0x0050ceac,   2},
+	{ 0x0050cee8,   1},
+	{ 0x0050cef0,  28},
+	{ 0x0050cf70,   1},
+	{ 0x0050cf78,   2},
+	{ 0x0050cf98,   2},
+	{ 0x0050cfdc,   1},
+	{ 0x0050d054,   1},
+	{ 0x0050d2b0,   1},
+	{ 0x0050d2b8,   3},
+	{ 0x0050d40c,   1},
+	{ 0x0050d48c,   2},
+	{ 0x0050d500,   1},
+	{ 0x0050d508,   2},
+	{ 0x0050d600,  11},
+	{ 0x0050d634,   2},
+	{ 0x0050d644,  11},
+	{ 0x0050d674,  10},
+	{ 0x0050d6a4,   1},
+	{ 0x0050d6ac,   2},
+	{ 0x0050d6e8,   1},
+	{ 0x0050d6f0,  28},
+	{ 0x0050d770,   1},
+	{ 0x0050d778,   2},
+	{ 0x0050d798,   2},
+	{ 0x0050d7dc,   1},
+	{ 0x0050d854,   1},
+	{ 0x0050dab0,   1},
+	{ 0x0050dab8,   3},
+	{ 0x0050dc0c,   1},
+	{ 0x0050dc8c,   2},
+	{ 0x0050dd00,   1},
+	{ 0x0050dd08,   2},
+	{ 0x0050de00,  11},
+	{ 0x0050de34,   2},
+	{ 0x0050de44,  11},
+	{ 0x0050de74,  10},
+	{ 0x0050dea4,   1},
+	{ 0x0050deac,   2},
+	{ 0x0050dee8,   1},
+	{ 0x0050def0,  28},
+	{ 0x0050df70,   1},
+	{ 0x0050df78,   2},
+	{ 0x0050df98,   2},
+	{ 0x0050dfdc,   1},
+	{ 0x0050e054,   1},
+	{ 0x0050e2b0,   1},
+	{ 0x0050e2b8,   3},
+	{ 0x0050e40c,   1},
+	{ 0x0050e48c,   2},
+	{ 0x0050e500,   1},
+	{ 0x0050e508,   2},
+	{ 0x0050e600,  11},
+	{ 0x0050e634,   2},
+	{ 0x0050e644,  11},
+	{ 0x0050e674,  10},
+	{ 0x0050e6a4,   1},
+	{ 0x0050e6ac,   2},
+	{ 0x0050e6e8,   1},
+	{ 0x0050e6f0,  28},
+	{ 0x0050e770,   1},
+	{ 0x0050e778,   2},
+	{ 0x0050e798,   2},
+	{ 0x0050e7dc,   1},
+	{ 0x00510384,   1},
+	{ 0x005104a0,   1},
+	{ 0x00510604,   1},
+	{ 0x00510680,   1},
+	{ 0x00510714,   1},
+	{ 0x0051081c,   1},
+	{ 0x00510884,   1},
+	{ 0x005108b0,   1},
+	{ 0x005108c8,   2},
+	{ 0x00510b04,   1},
+	{ 0x00510c04,   1},
+	{ 0x00510c10,   8},
+	{ 0x00510c88,   1},
+	{ 0x00510d00,   1},
+	{ 0x00510e08,   1},
+	{ 0x00510f08,   1},
+	{ 0x00511000,   1},
+	{ 0x0051100c,   1},
+	{ 0x00511018,   1},
+	{ 0x00511854,   1},
+	{ 0x00511ab0,   1},
+	{ 0x00511ab8,   3},
+	{ 0x00511c0c,   1},
+	{ 0x00511c8c,   2},
+	{ 0x00511d00,   1},
+	{ 0x00511d08,   2},
+	{ 0x00511e00,  11},
+	{ 0x00511e34,   2},
+	{ 0x00511e44,  11},
+	{ 0x00511e74,  10},
+	{ 0x00511ea4,   1},
+	{ 0x00511eac,   2},
+	{ 0x00511ee8,   1},
+	{ 0x00511ef0,  28},
+	{ 0x00511f70,   1},
+	{ 0x00511f78,   2},
+	{ 0x00511f98,   2},
+	{ 0x00511fdc,   1},
+	{ 0x0051202c,   2},
+	{ 0x005120a0,   1},
+	{ 0x005120a8,   1},
+	{ 0x00512890,   2},
+	{ 0x005128a0,   3},
+	{ 0x005128b0,   2},
+	{ 0x00513014,   1},
+	{ 0x005130cc,   1},
+	{ 0x005131dc,   1},
+	{ 0x00513214,   1},
+	{ 0x005132cc,   1},
+	{ 0x005133dc,   1},
+	{ 0x00513414,   1},
+	{ 0x005134cc,   1},
+	{ 0x005135dc,   1},
+	{ 0x00513e14,   1},
+	{ 0x00513ecc,   1},
+	{ 0x00513fdc,   1},
+	{ 0x00514054,   1},
+	{ 0x005142b0,   1},
+	{ 0x005142b8,   3},
+	{ 0x0051440c,   1},
+	{ 0x0051448c,   2},
+	{ 0x00514500,   1},
+	{ 0x00514508,   2},
+	{ 0x00514600,  11},
+	{ 0x00514634,   2},
+	{ 0x00514644,  11},
+	{ 0x00514674,  10},
+	{ 0x005146a4,   1},
+	{ 0x005146ac,   2},
+	{ 0x005146e8,   1},
+	{ 0x005146f0,  28},
+	{ 0x00514770,   1},
+	{ 0x00514778,   2},
+	{ 0x00514798,   2},
+	{ 0x005147dc,   1},
+	{ 0x00514854,   1},
+	{ 0x00514ab0,   1},
+	{ 0x00514ab8,   3},
+	{ 0x00514c0c,   1},
+	{ 0x00514c8c,   2},
+	{ 0x00514d00,   1},
+	{ 0x00514d08,   2},
+	{ 0x00514e00,  11},
+	{ 0x00514e34,   2},
+	{ 0x00514e44,  11},
+	{ 0x00514e74,  10},
+	{ 0x00514ea4,   1},
+	{ 0x00514eac,   2},
+	{ 0x00514ee8,   1},
+	{ 0x00514ef0,  28},
+	{ 0x00514f70,   1},
+	{ 0x00514f78,   2},
+	{ 0x00514f98,   2},
+	{ 0x00514fdc,   1},
+	{ 0x00515054,   1},
+	{ 0x005152b0,   1},
+	{ 0x005152b8,   3},
+	{ 0x0051540c,   1},
+	{ 0x0051548c,   2},
+	{ 0x00515500,   1},
+	{ 0x00515508,   2},
+	{ 0x00515600,  11},
+	{ 0x00515634,   2},
+	{ 0x00515644,  11},
+	{ 0x00515674,  10},
+	{ 0x005156a4,   1},
+	{ 0x005156ac,   2},
+	{ 0x005156e8,   1},
+	{ 0x005156f0,  28},
+	{ 0x00515770,   1},
+	{ 0x00515778,   2},
+	{ 0x00515798,   2},
+	{ 0x005157dc,   1},
+	{ 0x00515854,   1},
+	{ 0x00515ab0,   1},
+	{ 0x00515ab8,   3},
+	{ 0x00515c0c,   1},
+	{ 0x00515c8c,   2},
+	{ 0x00515d00,   1},
+	{ 0x00515d08,   2},
+	{ 0x00515e00,  11},
+	{ 0x00515e34,   2},
+	{ 0x00515e44,  11},
+	{ 0x00515e74,  10},
+	{ 0x00515ea4,   1},
+	{ 0x00515eac,   2},
+	{ 0x00515ee8,   1},
+	{ 0x00515ef0,  28},
+	{ 0x00515f70,   1},
+	{ 0x00515f78,   2},
+	{ 0x00515f98,   2},
+	{ 0x00515fdc,   1},
+	{ 0x00516054,   1},
+	{ 0x005162b0,   1},
+	{ 0x005162b8,   3},
+	{ 0x0051640c,   1},
+	{ 0x0051648c,   2},
+	{ 0x00516500,   1},
+	{ 0x00516508,   2},
+	{ 0x00516600,  11},
+	{ 0x00516634,   2},
+	{ 0x00516644,  11},
+	{ 0x00516674,  10},
+	{ 0x005166a4,   1},
+	{ 0x005166ac,   2},
+	{ 0x005166e8,   1},
+	{ 0x005166f0,  28},
+	{ 0x00516770,   1},
+	{ 0x00516778,   2},
+	{ 0x00516798,   2},
+	{ 0x005167dc,   1},
+	{ 0x00518384,   1},
+	{ 0x005184a0,   1},
+	{ 0x00518604,   1},
+	{ 0x00518680,   1},
+	{ 0x00518714,   1},
+	{ 0x0051881c,   1},
+	{ 0x00518884,   1},
+	{ 0x005188b0,   1},
+	{ 0x005188c8,   2},
+	{ 0x00518b04,   1},
+	{ 0x00518c04,   1},
+	{ 0x00518c10,   8},
+	{ 0x00518c88,   1},
+	{ 0x00518d00,   1},
+	{ 0x00518e08,   1},
+	{ 0x00518f08,   1},
+	{ 0x00519000,   1},
+	{ 0x0051900c,   1},
+	{ 0x00519018,   1},
+	{ 0x00519854,   1},
+	{ 0x00519ab0,   1},
+	{ 0x00519ab8,   3},
+	{ 0x00519c0c,   1},
+	{ 0x00519c8c,   2},
+	{ 0x00519d00,   1},
+	{ 0x00519d08,   2},
+	{ 0x00519e00,  11},
+	{ 0x00519e34,   2},
+	{ 0x00519e44,  11},
+	{ 0x00519e74,  10},
+	{ 0x00519ea4,   1},
+	{ 0x00519eac,   2},
+	{ 0x00519ee8,   1},
+	{ 0x00519ef0,  28},
+	{ 0x00519f70,   1},
+	{ 0x00519f78,   2},
+	{ 0x00519f98,   2},
+	{ 0x00519fdc,   1},
+	{ 0x0051a02c,   2},
+	{ 0x0051a0a0,   1},
+	{ 0x0051a0a8,   1},
+	{ 0x0051a890,   2},
+	{ 0x0051a8a0,   3},
+	{ 0x0051a8b0,   2},
+	{ 0x0051b014,   1},
+	{ 0x0051b0cc,   1},
+	{ 0x0051b1dc,   1},
+	{ 0x0051b214,   1},
+	{ 0x0051b2cc,   1},
+	{ 0x0051b3dc,   1},
+	{ 0x0051b414,   1},
+	{ 0x0051b4cc,   1},
+	{ 0x0051b5dc,   1},
+	{ 0x0051be14,   1},
+	{ 0x0051becc,   1},
+	{ 0x0051bfdc,   1},
+	{ 0x0051c054,   1},
+	{ 0x0051c2b0,   1},
+	{ 0x0051c2b8,   3},
+	{ 0x0051c40c,   1},
+	{ 0x0051c48c,   2},
+	{ 0x0051c500,   1},
+	{ 0x0051c508,   2},
+	{ 0x0051c600,  11},
+	{ 0x0051c634,   2},
+	{ 0x0051c644,  11},
+	{ 0x0051c674,  10},
+	{ 0x0051c6a4,   1},
+	{ 0x0051c6ac,   2},
+	{ 0x0051c6e8,   1},
+	{ 0x0051c6f0,  28},
+	{ 0x0051c770,   1},
+	{ 0x0051c778,   2},
+	{ 0x0051c798,   2},
+	{ 0x0051c7dc,   1},
+	{ 0x0051c854,   1},
+	{ 0x0051cab0,   1},
+	{ 0x0051cab8,   3},
+	{ 0x0051cc0c,   1},
+	{ 0x0051cc8c,   2},
+	{ 0x0051cd00,   1},
+	{ 0x0051cd08,   2},
+	{ 0x0051ce00,  11},
+	{ 0x0051ce34,   2},
+	{ 0x0051ce44,  11},
+	{ 0x0051ce74,  10},
+	{ 0x0051cea4,   1},
+	{ 0x0051ceac,   2},
+	{ 0x0051cee8,   1},
+	{ 0x0051cef0,  28},
+	{ 0x0051cf70,   1},
+	{ 0x0051cf78,   2},
+	{ 0x0051cf98,   2},
+	{ 0x0051cfdc,   1},
+	{ 0x0051d054,   1},
+	{ 0x0051d2b0,   1},
+	{ 0x0051d2b8,   3},
+	{ 0x0051d40c,   1},
+	{ 0x0051d48c,   2},
+	{ 0x0051d500,   1},
+	{ 0x0051d508,   2},
+	{ 0x0051d600,  11},
+	{ 0x0051d634,   2},
+	{ 0x0051d644,  11},
+	{ 0x0051d674,  10},
+	{ 0x0051d6a4,   1},
+	{ 0x0051d6ac,   2},
+	{ 0x0051d6e8,   1},
+	{ 0x0051d6f0,  28},
+	{ 0x0051d770,   1},
+	{ 0x0051d778,   2},
+	{ 0x0051d798,   2},
+	{ 0x0051d7dc,   1},
+	{ 0x0051d854,   1},
+	{ 0x0051dab0,   1},
+	{ 0x0051dab8,   3},
+	{ 0x0051dc0c,   1},
+	{ 0x0051dc8c,   2},
+	{ 0x0051dd00,   1},
+	{ 0x0051dd08,   2},
+	{ 0x0051de00,  11},
+	{ 0x0051de34,   2},
+	{ 0x0051de44,  11},
+	{ 0x0051de74,  10},
+	{ 0x0051dea4,   1},
+	{ 0x0051deac,   2},
+	{ 0x0051dee8,   1},
+	{ 0x0051def0,  28},
+	{ 0x0051df70,   1},
+	{ 0x0051df78,   2},
+	{ 0x0051df98,   2},
+	{ 0x0051dfdc,   1},
+	{ 0x0051e054,   1},
+	{ 0x0051e2b0,   1},
+	{ 0x0051e2b8,   3},
+	{ 0x0051e40c,   1},
+	{ 0x0051e48c,   2},
+	{ 0x0051e500,   1},
+	{ 0x0051e508,   2},
+	{ 0x0051e600,  11},
+	{ 0x0051e634,   2},
+	{ 0x0051e644,  11},
+	{ 0x0051e674,  10},
+	{ 0x0051e6a4,   1},
+	{ 0x0051e6ac,   2},
+	{ 0x0051e6e8,   1},
+	{ 0x0051e6f0,  28},
+	{ 0x0051e770,   1},
+	{ 0x0051e778,   2},
+	{ 0x0051e798,   2},
+	{ 0x0051e7dc,   1},
+	{ 0x00520384,   1},
+	{ 0x005204a0,   1},
+	{ 0x00520604,   1},
+	{ 0x00520680,   1},
+	{ 0x00520714,   1},
+	{ 0x0052081c,   1},
+	{ 0x00520884,   1},
+	{ 0x005208b0,   1},
+	{ 0x005208c8,   2},
+	{ 0x00520b04,   1},
+	{ 0x00520c04,   1},
+	{ 0x00520c10,   8},
+	{ 0x00520c88,   1},
+	{ 0x00520d00,   1},
+	{ 0x00520e08,   1},
+	{ 0x00520f08,   1},
+	{ 0x00521000,   1},
+	{ 0x0052100c,   1},
+	{ 0x00521018,   1},
+	{ 0x00521854,   1},
+	{ 0x00521ab0,   1},
+	{ 0x00521ab8,   3},
+	{ 0x00521c0c,   1},
+	{ 0x00521c8c,   2},
+	{ 0x00521d00,   1},
+	{ 0x00521d08,   2},
+	{ 0x00521e00,  11},
+	{ 0x00521e34,   2},
+	{ 0x00521e44,  11},
+	{ 0x00521e74,  10},
+	{ 0x00521ea4,   1},
+	{ 0x00521eac,   2},
+	{ 0x00521ee8,   1},
+	{ 0x00521ef0,  28},
+	{ 0x00521f70,   1},
+	{ 0x00521f78,   2},
+	{ 0x00521f98,   2},
+	{ 0x00521fdc,   1},
+	{ 0x0052202c,   2},
+	{ 0x005220a0,   1},
+	{ 0x005220a8,   1},
+	{ 0x00522890,   2},
+	{ 0x005228a0,   3},
+	{ 0x005228b0,   2},
+	{ 0x00523014,   1},
+	{ 0x005230cc,   1},
+	{ 0x005231dc,   1},
+	{ 0x00523214,   1},
+	{ 0x005232cc,   1},
+	{ 0x005233dc,   1},
+	{ 0x00523414,   1},
+	{ 0x005234cc,   1},
+	{ 0x005235dc,   1},
+	{ 0x00523e14,   1},
+	{ 0x00523ecc,   1},
+	{ 0x00523fdc,   1},
+	{ 0x00524054,   1},
+	{ 0x005242b0,   1},
+	{ 0x005242b8,   3},
+	{ 0x0052440c,   1},
+	{ 0x0052448c,   2},
+	{ 0x00524500,   1},
+	{ 0x00524508,   2},
+	{ 0x00524600,  11},
+	{ 0x00524634,   2},
+	{ 0x00524644,  11},
+	{ 0x00524674,  10},
+	{ 0x005246a4,   1},
+	{ 0x005246ac,   2},
+	{ 0x005246e8,   1},
+	{ 0x005246f0,  28},
+	{ 0x00524770,   1},
+	{ 0x00524778,   2},
+	{ 0x00524798,   2},
+	{ 0x005247dc,   1},
+	{ 0x00524854,   1},
+	{ 0x00524ab0,   1},
+	{ 0x00524ab8,   3},
+	{ 0x00524c0c,   1},
+	{ 0x00524c8c,   2},
+	{ 0x00524d00,   1},
+	{ 0x00524d08,   2},
+	{ 0x00524e00,  11},
+	{ 0x00524e34,   2},
+	{ 0x00524e44,  11},
+	{ 0x00524e74,  10},
+	{ 0x00524ea4,   1},
+	{ 0x00524eac,   2},
+	{ 0x00524ee8,   1},
+	{ 0x00524ef0,  28},
+	{ 0x00524f70,   1},
+	{ 0x00524f78,   2},
+	{ 0x00524f98,   2},
+	{ 0x00524fdc,   1},
+	{ 0x00525054,   1},
+	{ 0x005252b0,   1},
+	{ 0x005252b8,   3},
+	{ 0x0052540c,   1},
+	{ 0x0052548c,   2},
+	{ 0x00525500,   1},
+	{ 0x00525508,   2},
+	{ 0x00525600,  11},
+	{ 0x00525634,   2},
+	{ 0x00525644,  11},
+	{ 0x00525674,  10},
+	{ 0x005256a4,   1},
+	{ 0x005256ac,   2},
+	{ 0x005256e8,   1},
+	{ 0x005256f0,  28},
+	{ 0x00525770,   1},
+	{ 0x00525778,   2},
+	{ 0x00525798,   2},
+	{ 0x005257dc,   1},
+	{ 0x00525854,   1},
+	{ 0x00525ab0,   1},
+	{ 0x00525ab8,   3},
+	{ 0x00525c0c,   1},
+	{ 0x00525c8c,   2},
+	{ 0x00525d00,   1},
+	{ 0x00525d08,   2},
+	{ 0x00525e00,  11},
+	{ 0x00525e34,   2},
+	{ 0x00525e44,  11},
+	{ 0x00525e74,  10},
+	{ 0x00525ea4,   1},
+	{ 0x00525eac,   2},
+	{ 0x00525ee8,   1},
+	{ 0x00525ef0,  28},
+	{ 0x00525f70,   1},
+	{ 0x00525f78,   2},
+	{ 0x00525f98,   2},
+	{ 0x00525fdc,   1},
+	{ 0x00526054,   1},
+	{ 0x005262b0,   1},
+	{ 0x005262b8,   3},
+	{ 0x0052640c,   1},
+	{ 0x0052648c,   2},
+	{ 0x00526500,   1},
+	{ 0x00526508,   2},
+	{ 0x00526600,  11},
+	{ 0x00526634,   2},
+	{ 0x00526644,  11},
+	{ 0x00526674,  10},
+	{ 0x005266a4,   1},
+	{ 0x005266ac,   2},
+	{ 0x005266e8,   1},
+	{ 0x005266f0,  28},
+	{ 0x00526770,   1},
+	{ 0x00526778,   2},
+	{ 0x00526798,   2},
+	{ 0x005267dc,   1},
+	{ 0x00528384,   1},
+	{ 0x005284a0,   1},
+	{ 0x00528604,   1},
+	{ 0x00528680,   1},
+	{ 0x00528714,   1},
+	{ 0x0052881c,   1},
+	{ 0x00528884,   1},
+	{ 0x005288b0,   1},
+	{ 0x005288c8,   2},
+	{ 0x00528b04,   1},
+	{ 0x00528c04,   1},
+	{ 0x00528c10,   8},
+	{ 0x00528c88,   1},
+	{ 0x00528d00,   1},
+	{ 0x00528e08,   1},
+	{ 0x00528f08,   1},
+	{ 0x00529000,   1},
+	{ 0x0052900c,   1},
+	{ 0x00529018,   1},
+	{ 0x00529854,   1},
+	{ 0x00529ab0,   1},
+	{ 0x00529ab8,   3},
+	{ 0x00529c0c,   1},
+	{ 0x00529c8c,   2},
+	{ 0x00529d00,   1},
+	{ 0x00529d08,   2},
+	{ 0x00529e00,  11},
+	{ 0x00529e34,   2},
+	{ 0x00529e44,  11},
+	{ 0x00529e74,  10},
+	{ 0x00529ea4,   1},
+	{ 0x00529eac,   2},
+	{ 0x00529ee8,   1},
+	{ 0x00529ef0,  28},
+	{ 0x00529f70,   1},
+	{ 0x00529f78,   2},
+	{ 0x00529f98,   2},
+	{ 0x00529fdc,   1},
+	{ 0x0052a02c,   2},
+	{ 0x0052a0a0,   1},
+	{ 0x0052a0a8,   1},
+	{ 0x0052a890,   2},
+	{ 0x0052a8a0,   3},
+	{ 0x0052a8b0,   2},
+	{ 0x0052b014,   1},
+	{ 0x0052b0cc,   1},
+	{ 0x0052b1dc,   1},
+	{ 0x0052b214,   1},
+	{ 0x0052b2cc,   1},
+	{ 0x0052b3dc,   1},
+	{ 0x0052b414,   1},
+	{ 0x0052b4cc,   1},
+	{ 0x0052b5dc,   1},
+	{ 0x0052be14,   1},
+	{ 0x0052becc,   1},
+	{ 0x0052bfdc,   1},
+	{ 0x0052c054,   1},
+	{ 0x0052c2b0,   1},
+	{ 0x0052c2b8,   3},
+	{ 0x0052c40c,   1},
+	{ 0x0052c48c,   2},
+	{ 0x0052c500,   1},
+	{ 0x0052c508,   2},
+	{ 0x0052c600,  11},
+	{ 0x0052c634,   2},
+	{ 0x0052c644,  11},
+	{ 0x0052c674,  10},
+	{ 0x0052c6a4,   1},
+	{ 0x0052c6ac,   2},
+	{ 0x0052c6e8,   1},
+	{ 0x0052c6f0,  28},
+	{ 0x0052c770,   1},
+	{ 0x0052c778,   2},
+	{ 0x0052c798,   2},
+	{ 0x0052c7dc,   1},
+	{ 0x0052c854,   1},
+	{ 0x0052cab0,   1},
+	{ 0x0052cab8,   3},
+	{ 0x0052cc0c,   1},
+	{ 0x0052cc8c,   2},
+	{ 0x0052cd00,   1},
+	{ 0x0052cd08,   2},
+	{ 0x0052ce00,  11},
+	{ 0x0052ce34,   2},
+	{ 0x0052ce44,  11},
+	{ 0x0052ce74,  10},
+	{ 0x0052cea4,   1},
+	{ 0x0052ceac,   2},
+	{ 0x0052cee8,   1},
+	{ 0x0052cef0,  28},
+	{ 0x0052cf70,   1},
+	{ 0x0052cf78,   2},
+	{ 0x0052cf98,   2},
+	{ 0x0052cfdc,   1},
+	{ 0x0052d054,   1},
+	{ 0x0052d2b0,   1},
+	{ 0x0052d2b8,   3},
+	{ 0x0052d40c,   1},
+	{ 0x0052d48c,   2},
+	{ 0x0052d500,   1},
+	{ 0x0052d508,   2},
+	{ 0x0052d600,  11},
+	{ 0x0052d634,   2},
+	{ 0x0052d644,  11},
+	{ 0x0052d674,  10},
+	{ 0x0052d6a4,   1},
+	{ 0x0052d6ac,   2},
+	{ 0x0052d6e8,   1},
+	{ 0x0052d6f0,  28},
+	{ 0x0052d770,   1},
+	{ 0x0052d778,   2},
+	{ 0x0052d798,   2},
+	{ 0x0052d7dc,   1},
+	{ 0x0052d854,   1},
+	{ 0x0052dab0,   1},
+	{ 0x0052dab8,   3},
+	{ 0x0052dc0c,   1},
+	{ 0x0052dc8c,   2},
+	{ 0x0052dd00,   1},
+	{ 0x0052dd08,   2},
+	{ 0x0052de00,  11},
+	{ 0x0052de34,   2},
+	{ 0x0052de44,  11},
+	{ 0x0052de74,  10},
+	{ 0x0052dea4,   1},
+	{ 0x0052deac,   2},
+	{ 0x0052dee8,   1},
+	{ 0x0052def0,  28},
+	{ 0x0052df70,   1},
+	{ 0x0052df78,   2},
+	{ 0x0052df98,   2},
+	{ 0x0052dfdc,   1},
+	{ 0x0052e054,   1},
+	{ 0x0052e2b0,   1},
+	{ 0x0052e2b8,   3},
+	{ 0x0052e40c,   1},
+	{ 0x0052e48c,   2},
+	{ 0x0052e500,   1},
+	{ 0x0052e508,   2},
+	{ 0x0052e600,  11},
+	{ 0x0052e634,   2},
+	{ 0x0052e644,  11},
+	{ 0x0052e674,  10},
+	{ 0x0052e6a4,   1},
+	{ 0x0052e6ac,   2},
+	{ 0x0052e6e8,   1},
+	{ 0x0052e6f0,  28},
+	{ 0x0052e770,   1},
+	{ 0x0052e778,   2},
+	{ 0x0052e798,   2},
+	{ 0x0052e7dc,   1},
+	{ 0x00900100,   1},
+	{ 0x00904100,   1},
+	{ 0x00908100,   1},
+	{ 0x0090c100,   1},
+	{ 0x00910100,   1},
+	{ 0x00914100,   1},
+	{ 0x009a0100,   1},
+};
+
+
+static const u32 gp106_global_whitelist_ranges_count =
+	ARRAY_SIZE(gp106_global_whitelist_ranges);
+
+/* context */
+
+/* runcontrol */
+static const u32 gp106_runcontrol_whitelist[] = {
+};
+static const u32 gp106_runcontrol_whitelist_count =
+	ARRAY_SIZE(gp106_runcontrol_whitelist);
+
+static const struct regop_offset_range gp106_runcontrol_whitelist_ranges[] = {
+};
+static const u32 gp106_runcontrol_whitelist_ranges_count =
+	ARRAY_SIZE(gp106_runcontrol_whitelist_ranges);
+
+
+/* quad ctl */
+static const u32 gp106_qctl_whitelist[] = {
+};
+static const u32 gp106_qctl_whitelist_count =
+	ARRAY_SIZE(gp106_qctl_whitelist);
+
+static const struct regop_offset_range gp106_qctl_whitelist_ranges[] = {
+};
+static const u32 gp106_qctl_whitelist_ranges_count =
+	ARRAY_SIZE(gp106_qctl_whitelist_ranges);
+
+static const struct regop_offset_range *gp106_get_global_whitelist_ranges(void)
+{
+	return gp106_global_whitelist_ranges;
+}
+
+static int gp106_get_global_whitelist_ranges_count(void)
+{
+	return gp106_global_whitelist_ranges_count;
+}
+
+static const struct regop_offset_range *gp106_get_context_whitelist_ranges(void)
+{
+	return gp106_global_whitelist_ranges;
+}
+
+static int gp106_get_context_whitelist_ranges_count(void)
+{
+	return gp106_global_whitelist_ranges_count;
+}
+
+static const u32 *gp106_get_runcontrol_whitelist(void)
+{
+	return gp106_runcontrol_whitelist;
+}
+
+static int gp106_get_runcontrol_whitelist_count(void)
+{
+	return gp106_runcontrol_whitelist_count;
+}
+
+static const
+struct regop_offset_range *gp106_get_runcontrol_whitelist_ranges(void)
+{
+	return gp106_runcontrol_whitelist_ranges;
+}
+
+static int gp106_get_runcontrol_whitelist_ranges_count(void)
+{
+	return gp106_runcontrol_whitelist_ranges_count;
+}
+
+static const u32 *gp106_get_qctl_whitelist(void)
+{
+	return gp106_qctl_whitelist;
+}
+
+static int gp106_get_qctl_whitelist_count(void)
+{
+	return gp106_qctl_whitelist_count;
+}
+
+static const struct regop_offset_range *gp106_get_qctl_whitelist_ranges(void)
+{
+	return gp106_qctl_whitelist_ranges;
+}
+
+static int gp106_get_qctl_whitelist_ranges_count(void)
+{
+	return gp106_qctl_whitelist_ranges_count;
+}
+
+static int gp106_apply_smpc_war(struct dbg_session_gk20a *dbg_s)
+{
+	/* Not needed on gp106 */
+	return 0;
+}
+
+void gp106_init_regops(struct gpu_ops *gops)
+{
+	gops->regops.get_global_whitelist_ranges =
+		gp106_get_global_whitelist_ranges;
+	gops->regops.get_global_whitelist_ranges_count =
+		gp106_get_global_whitelist_ranges_count;
+
+	gops->regops.get_context_whitelist_ranges =
+		gp106_get_context_whitelist_ranges;
+	gops->regops.get_context_whitelist_ranges_count =
+		gp106_get_context_whitelist_ranges_count;
+
+	gops->regops.get_runcontrol_whitelist =
+		gp106_get_runcontrol_whitelist;
+	gops->regops.get_runcontrol_whitelist_count =
+		gp106_get_runcontrol_whitelist_count;
+
+	gops->regops.get_runcontrol_whitelist_ranges =
+		gp106_get_runcontrol_whitelist_ranges;
+	gops->regops.get_runcontrol_whitelist_ranges_count =
+		gp106_get_runcontrol_whitelist_ranges_count;
+
+	gops->regops.get_qctl_whitelist =
+		gp106_get_qctl_whitelist;
+	gops->regops.get_qctl_whitelist_count =
+		gp106_get_qctl_whitelist_count;
+
+	gops->regops.get_qctl_whitelist_ranges =
+		gp106_get_qctl_whitelist_ranges;
+	gops->regops.get_qctl_whitelist_ranges_count =
+		gp106_get_qctl_whitelist_ranges_count;
+
+	gops->regops.apply_smpc_war =
+		gp106_apply_smpc_war;
+}
diff --git a/drivers/gpu/nvgpu/gp106/regops_gp106.h b/drivers/gpu/nvgpu/gp106/regops_gp106.h
new file mode 100644
index 00000000..7f6b6861
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/regops_gp106.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * Tegra GP106 GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __REGOPS_GP106_H_
+#define __REGOPS_GP106_H_
+
+void gp106_init_regops(struct gpu_ops *gops);
+
+#endif /* __REGOPS_GP106_H_ */
-- 
cgit v1.2.2


From 6579c1ec6fbe9d8bbd0c5da5dfaa51c02402527b Mon Sep 17 00:00:00 2001
From: Juha Lainema <jlainema@nvidia.com>
Date: Wed, 7 Sep 2016 12:41:05 +0300
Subject: gpu: nvgpu: no emc change when railgated

GPU frequencies can be set by powerhal when GPU is railgated,
and before this change that would cause EMC floors to remain
set until GPU is unrailgated.

After this change, EMC floors are not requested by the GPU
client when the GPU is railgated. It is ok to ignore the
requests, as the GPU client maxes the floor when powering
up.

Bug 1807560

Change-Id: I9a0d58b0288edbd03b2edf09580ecabd9b74f0c2
Signed-off-by: Juha Lainema <jlainema@nvidia.com>
Reviewed-on: http://git-master/r/1216233
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Ilan Aelion <iaelion@nvidia.com>
Reviewed-by: Cyril Raju <craju@nvidia.com>
Tested-by: Cyril Raju <craju@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index 210d9865..db4f14dd 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -301,7 +301,7 @@ static void gp10b_tegra_postscale(struct device *pdev,
 	unsigned long emc_rate;
 
 	gk20a_dbg_fn("");
-	if (profile) {
+	if (profile && !gp10b_tegra_is_railgated(pdev)) {
 		emc_rate = (freq * EMC_BW_RATIO * g->emc3d_ratio) / 1000;
 
 		if (emc_rate > tegra_bwmgr_get_max_emc_rate())
-- 
cgit v1.2.2


From d8da1d64381bb1265e194d81cb02673efa6ca54c Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 9 Nov 2016 15:54:27 -0800
Subject: gpu: nvgpu: Fix signed comparison bugs

Fix small problems related to signed versus unsigned comparisons
throughout the driver. Bump up the warning level to prevent
such problems from occuring in future.

Change-Id: Ib7026728ef0e8c3c9e68956fc9794ec3a786a8a2
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1252069
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_mclk.c               | 2 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.c            | 2 +-
 drivers/gpu/nvgpu/gp10b/fb_gp10b.c             | 4 ++--
 drivers/gpu/nvgpu/gp10b/ltc_gp10b.c            | 2 +-
 drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 7b15767b..ea238c15 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2347,7 +2347,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
 		mclk->switch_std = 0;
 	} else {
 		s64 prev_avg;
-		u64 curr = (t1-t0)/1000;
+		s64 curr = (t1-t0)/1000;
 
 		mclk->switch_max = curr > mclk->switch_max ?
 			curr : mclk->switch_max;
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 1dd3922a..39c308a3 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -195,7 +195,7 @@ static int clk_gp106_debugfs_init(struct gk20a *g) {
 	struct dentry *gpu_root = platform->debugfs;
 	struct dentry *clocks_root;
 	struct dentry *d;
-	int i;
+	unsigned int i;
 
 	if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root)))
 		return -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
index 9c0bc992..5324b5ef 100644
--- a/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fb_gp10b.c
@@ -87,12 +87,12 @@ static void gp10b_init_kind_attr(void)
 	}
 }
 
-static int gp10b_fb_compression_page_size(struct gk20a *g)
+static unsigned int gp10b_fb_compression_page_size(struct gk20a *g)
 {
 	return SZ_64K;
 }
 
-static int gp10b_fb_compressible_page_size(struct gk20a *g)
+static unsigned int gp10b_fb_compressible_page_size(struct gk20a *g)
 {
 	return SZ_4K;
 }
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 3e64d435..31c79aff 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -124,7 +124,7 @@ static int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 static void gp10b_ltc_isr(struct gk20a *g)
 {
 	u32 mc_intr, ltc_intr;
-	int ltc, slice;
+	unsigned int ltc, slice;
 	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
 	u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
 
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
index db4f14dd..8cf6d5e8 100644
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c
@@ -62,7 +62,7 @@ static void gr_gp10b_remove_sysfs(struct device *dev);
 static int gp10b_tegra_get_clocks(struct device *dev)
 {
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int i;
+	unsigned int i;
 
 	if (platform->is_fmodel)
 		return 0;
-- 
cgit v1.2.2


From 90fbd43cbeb1439dd633f28f9a28b0f4e1cba1a3 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 11 Nov 2016 17:03:25 -0800
Subject: gpu: nvgpu: gp10x: updated API for get_litter_value

get_litter_value API is updated to use int instead of
enum type.

JIRA GV11B-21

Change-Id: I982fdfe372f4be38aa4ed026a23e936d73190e79
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1252212
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c | 3 +--
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 9afcdb69..cdac76b7 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -98,8 +98,7 @@ static struct gpu_ops gp106_ops = {
 		}
 };
 
-static int gp106_get_litter_value(struct gk20a *g,
-		enum nvgpu_litter_value value)
+static int gp106_get_litter_value(struct gk20a *g, int value)
 {
 	int ret = -EINVAL;
 
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index f0137a70..e9385db0 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -105,8 +105,7 @@ static struct gpu_ops gp10b_ops = {
 	}
 };
 
-static int gp10b_get_litter_value(struct gk20a *g,
-		enum nvgpu_litter_value value)
+static int gp10b_get_litter_value(struct gk20a *g, int value)
 {
 	int ret = EINVAL;
 	switch (value) {
-- 
cgit v1.2.2


From 0bef6c98974be712381a95f7fca143b2679a6ea4 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Mon, 14 Nov 2016 10:54:12 -0800
Subject: gpu: nvgpu: gp106: Expose the boot max freq

Expose the currently hard coded boot frequency, which is at the same
time the max frequency. We use it for filling in GPU characteristics.

Bug 200251486

Change-Id: I3c0abb7a385a83f61b93ddfa857b982c850853e3
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1252906
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shreshtha Sahu <ssahu@nvidia.com>
Tested-by: Shreshtha Sahu <ssahu@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c | 1 -
 drivers/gpu/nvgpu/clk/clk.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 5aafa701..ef0834f4 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -20,7 +20,6 @@
 #include "volt/volt.h"
 #include "gk20a/pmu_gk20a.h"
 
-#define BOOT_GPC2CLK_MHZ  2581
 #define BOOT_MCLK_MHZ     3003
 
 struct clkrpc_pmucmdhandler_params {
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index 2d6425b5..a0b88dcb 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -102,6 +102,8 @@ struct vbios_clocks_table_1x_hal_clock_entry {
 #define PERF_CLK_PCIEGENCLK     12
 #define PERF_CLK_NUM            13
 
+#define BOOT_GPC2CLK_MHZ  2581
+
 u32 clk_pmu_vin_load(struct gk20a *g);
 u32 clk_domain_print_vf_table(struct gk20a *g, u32 clkapidomain);
 u32 clk_domain_get_f_or_v(
-- 
cgit v1.2.2


From 294e2520d5b2fd8225935b30403cfd59018dd525 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 15 Nov 2016 09:46:22 -0800
Subject: gpu: nvgpu: gp106: Program RX bias current

Hard code RX bias current to 0x2.

Bug 1833830

Change-Id: I1107bad52de7b38d311bc5795739777a4bb4239a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1253656
(cherry picked from commit 7e853b0c62043cc53847e3535e05886d574dc779)
Reviewed-on: http://git-master/r/1255724
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_mclk.c      | 3 +++
 drivers/gpu/nvgpu/gp106/hw_fb_gp106.h | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index ea238c15..86f4ff6d 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2189,6 +2189,9 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 
 	mutex_init(&mclk->mclk_mutex);
 
+	/* FBPA gain WAR */
+	gk20a_writel(g, fb_fbpa_fbio_iref_byte_rx_ctrl_r(), 0x22222222);
+
 	mclk->speed = gk20a_mclk_low_speed; /* Value from Devinit */
 
 	/* Parse VBIOS */
diff --git a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
index 19d88464..519679f5 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fb_gp106.h
@@ -602,4 +602,8 @@ static inline u32 fb_niso_scrub_status_flag_v(u32 r)
 {
 	return (r >> 0) & 0x1;
 }
+static inline u32 fb_fbpa_fbio_iref_byte_rx_ctrl_r(void)
+{
+	return 0x009a0eb0;
+}
 #endif
-- 
cgit v1.2.2


From 06a03fba267ce34c3a601941f25476ae937da1fc Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Thu, 3 Nov 2016 16:14:05 -0700
Subject: gpu: nvgpu: add FBPA base addresses

Add FBPA base addresses

Bug 200249125

Change-Id: I235fa12a00ef2c5b2f0415bb18755523e8a2754b
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1247802
(cherry picked from commit d2c73ee989d3abeae305ff68ab355772c5e0af5a)
Reviewed-on: http://git-master/r/1252163
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c     | 6 ++++++
 drivers/gpu/nvgpu/gp106/hw_proj_gp106.h | 8 ++++++++
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c     | 6 ++++++
 drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h | 8 ++++++++
 4 files changed, 28 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index cdac76b7..0f926be8 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -169,6 +169,12 @@ static int gp106_get_litter_value(struct gk20a *g, int value)
 	case GPU_LIT_NUM_FBPAS:
 		ret = proj_scal_litter_num_fbpas_v();
 		break;
+	case GPU_LIT_FBPA_SHARED_BASE:
+		ret = proj_fbpa_shared_base_v();
+		break;
+	case GPU_LIT_FBPA_BASE:
+		ret = proj_fbpa_base_v();
+		break;
 	case GPU_LIT_FBPA_STRIDE:
 		ret = proj_fbpa_stride_v();
 		break;
diff --git a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
index 0063712f..8042bcae 100644
--- a/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_proj_gp106.h
@@ -70,6 +70,14 @@ static inline u32 proj_lts_stride_v(void)
 {
 	return 0x00000200;
 }
+static inline u32 proj_fbpa_base_v(void)
+{
+	return 0x00900000;
+}
+static inline u32 proj_fbpa_shared_base_v(void)
+{
+	return 0x009a0000;
+}
 static inline u32 proj_fbpa_stride_v(void)
 {
 	return 0x00004000;
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e9385db0..b92bdfe2 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -178,6 +178,12 @@ static int gp10b_get_litter_value(struct gk20a *g, int value)
 	case GPU_LIT_FBPA_STRIDE:
 		ret = proj_fbpa_stride_v();
 		break;
+	case GPU_LIT_FBPA_BASE:
+		ret = proj_fbpa_base_v();
+		break;
+	case GPU_LIT_FBPA_SHARED_BASE:
+		ret = proj_fbpa_shared_base_v();
+		break;
 	default:
 		gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
 		BUG();
diff --git a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
index 08a7cb82..3392242c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_proj_gp10b.h
@@ -70,6 +70,14 @@ static inline u32 proj_lts_stride_v(void)
 {
 	return 0x00000200;
 }
+static inline u32 proj_fbpa_base_v(void)
+{
+	return 0x00900000;
+}
+static inline u32 proj_fbpa_shared_base_v(void)
+{
+	return 0x009a0000;
+}
 static inline u32 proj_fbpa_stride_v(void)
 {
 	return 0x00004000;
-- 
cgit v1.2.2


From 108c98a7d011f6ba275ff039193e5bc35e061e24 Mon Sep 17 00:00:00 2001
From: Sachit Kadle <skadle@nvidia.com>
Date: Mon, 14 Nov 2016 16:26:40 -0800
Subject: gpu: nvgpu: gp10b: clear ce isr before wakeup

In gp10b_ce_nonstall_isr(), we trigger a semaphore wakeup.
Currently, we clear the interrupt status register after the
wakeup is complete. There is potential for an interrupt to
come in while the wake-up operation is in progress, and it
is possible that:

1) We miss processing the interrupt in that ISR iteration AND
2) We clear the interrupt status register anyways

This change clears the status register before triggering wakeup,
so the interrupt will properly re-fire.

Bug 200244458

Change-Id: Ia3338252eeea4eb60d11c0e241279989a46dac04
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1253107
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/ce_gp10b.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
index a35c9817..e5082778 100644
--- a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ce_gp10b.c
@@ -20,13 +20,13 @@
 #include "hw_ce_gp10b.h"
 #include "ce_gp10b.h"
 
-static u32 ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
+static void ce_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
 {
 	gk20a_dbg(gpu_dbg_intr, "ce non-blocking pipe interrupt\n");
 
 	/* wake theads waiting in this channel */
 	gk20a_channel_semaphore_wakeup(g, true);
-	return ce_intr_status_nonblockpipe_pending_f();
+	return;
 }
 
 static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr)
@@ -64,14 +64,14 @@ static void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 static void gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
 {
 	u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id));
-	u32 clear_intr = 0;
 
 	gk20a_dbg(gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id);
 
-	if (ce_intr & ce_intr_status_nonblockpipe_pending_f())
-		clear_intr |= ce_nonblockpipe_isr(g, ce_intr);
-
-	gk20a_writel(g, ce_intr_status_r(inst_id), clear_intr);
+	if (ce_intr & ce_intr_status_nonblockpipe_pending_f()) {
+		gk20a_writel(g, ce_intr_status_r(inst_id),
+			ce_intr_status_nonblockpipe_pending_f());
+		ce_nonblockpipe_isr(g, ce_intr);
+	}
 
 	return;
 }
-- 
cgit v1.2.2


From fc5b7f7c4555e6db9465b0e113a60833d4fb3b77 Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Wed, 16 Nov 2016 10:30:40 -0800
Subject: gpu:nvgpu: gp10x: use chip specific init_inst_block

JIRA GV11B-21

Change-Id: I3e2cb2384b6a26ba339a79d38c91dd47480fe6a6
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1254876
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 1e073ab2..1b6b6641 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -81,7 +81,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 	if (err)
 		goto clean_up_va;
 
-	gk20a_init_inst_block(inst_block, vm, big_page_size);
+	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
 
 	return 0;
 
-- 
cgit v1.2.2


From b3dbc3161e91948b90a42779f28791aa2ed5f3da Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Fri, 11 Nov 2016 01:48:57 +0530
Subject: gpu: nvgpu: correct adc slope ofset sign bit

bug 1838549

Change-Id: I40457aebd49a02d0dd54d6dc9c965f89613ee21f
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1251371
(cherry picked from commit 2c80f36830baf48bab043d7f1ebbcbe7759789c4)
Reviewed-on: http://git-master/r/1251452
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
index afabc943..32d8a4f2 100644
--- a/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/hw_fuse_gp106.h
@@ -164,7 +164,7 @@ static inline u32 fuse_vin_cal_gpc1_icpt_sign_f(void)
 }
 static inline u32 fuse_vin_cal_gpc1_slope_sign_f(void)
 {
-	return 0x8000;
+	return 0x800;
 }
 static inline u32 fuse_vin_cal_gpc1_icpt_data_v(u32 r)
 {
-- 
cgit v1.2.2


From d8dc7b130e4b93a1c29c26b852af686eb67444de Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Thu, 17 Nov 2016 16:38:57 -0800
Subject: gpu: nvgpu: hardcode gp10b fbpa values

 gp10b does not have an fbpa unit, although the
 hw header files claim it does. Hardcode all fbpa
 values to 0.

 Bug 200249125

Change-Id: I6ed63b3231d7af8e31ccf5047d56bdb85f05a9d9
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1256422
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index b92bdfe2..a656f10d 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -172,17 +172,15 @@ static int gp10b_get_litter_value(struct gk20a *g, int value)
 	case GPU_LIT_LTS_STRIDE:
 		ret = proj_lts_stride_v();
 		break;
+	/* GP10B does not have a FBPA unit, despite what's listed in the
+	 * hw headers or read back through NV_PTOP_SCAL_NUM_FBPAS,
+	 * so hardcode all values to 0.
+	 */
 	case GPU_LIT_NUM_FBPAS:
-		ret = proj_scal_litter_num_fbpas_v();
-		break;
 	case GPU_LIT_FBPA_STRIDE:
-		ret = proj_fbpa_stride_v();
-		break;
 	case GPU_LIT_FBPA_BASE:
-		ret = proj_fbpa_base_v();
-		break;
 	case GPU_LIT_FBPA_SHARED_BASE:
-		ret = proj_fbpa_shared_base_v();
+		ret = 0;
 		break;
 	default:
 		gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
-- 
cgit v1.2.2


From c320ccfa952a2796db27d97111791bcbeff9f5c7 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 2 Dec 2016 11:46:17 +0530
Subject: gpu: nvgpu: remove debugger check for cilp completion event

We can trigger CILP only if SM debug mode is on.

So in fecs interrupt handler, we could have graphics context
running for which SM debug mode is disabled.
And in that case we skip posting of cilp completion
events to UMD.

But since CILP event was anyways triggered, we need to post
events to UMD irrespective of SM debug mode is enabled
at that point or not

Hence remove check gk20a_gr_sm_debugger_attached() for
posting events to UMD

Bug 200243092

Change-Id: I54ad205be11ec6d5034d524bfbb28f8a1fa72993
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1263591
(cherry picked from commit e6259e2d0d5a4bb5929e70e03e154f8b82ae3600)
Reviewed-on: http://git-master/r/1264780
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 09c2558c..051e16a3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1896,18 +1896,17 @@ static int gr_gp10b_handle_fecs_error(struct gk20a *g,
 			goto clean_up;
 		}
 
-		if (gk20a_gr_sm_debugger_attached(g)) {
-			gk20a_dbg_gpu_post_events(ch);
+		/* Post events to UMD */
+		gk20a_dbg_gpu_post_events(ch);
 
-			if (gk20a_is_channel_marked_as_tsg(ch)) {
-				struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+		if (gk20a_is_channel_marked_as_tsg(ch)) {
+			struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
 
-				gk20a_tsg_event_id_post_event(tsg,
-					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
-			} else {
-				gk20a_channel_event_id_post_event(ch,
-					NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
-			}
+			gk20a_tsg_event_id_post_event(tsg,
+				NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
+		} else {
+			gk20a_channel_event_id_post_event(ch,
+				NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE);
 		}
 
 		gk20a_channel_put(ch);
-- 
cgit v1.2.2


From 3d9c33c5953e383527c7e4af594adfe0c82b5788 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Tue, 13 Sep 2016 14:23:45 -0700
Subject: gpu: nvgpu: clk arbiter skeleton

Add clock arbiter skeleton with support of clock sessions,
notifications on clock changes, request numbering, and asynchronous
handling of clock requests. Provides minimum behaviour to allow
unit tests implementation. Actual arbitration and clock settings
will be done separately. For now, dummy arbiter keeps last
requested target mhz. Actual arbiter may move to a lockless
implementation.

Jira DNVGPU-125

Change-Id: I6a8e443fb0d15dc5f1993e7260256d71acddd106
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1223476
(cherry picked from commit cb130825d84e4124d273bd443e2b62d493377461)
Reviewed-on: http://git-master/r/1243105
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x   |   2 +
 drivers/gpu/nvgpu/clk/clk_arb.c         | 387 ++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_arb.h         |  63 ++++++
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c |  95 ++++++++
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.h |  21 ++
 drivers/gpu/nvgpu/gp106/clk_gp106.c     |   1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |   2 +
 drivers/gpu/nvgpu/pstate/pstate.c       |  40 +++-
 drivers/gpu/nvgpu/pstate/pstate.h       |   9 +-
 9 files changed, 618 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/clk/clk_arb.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_arb.h
 create mode 100644 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
 create mode 100644 drivers/gpu/nvgpu/gp106/clk_arb_gp106.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index bb19d595..a096a438 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -38,11 +38,13 @@ nvgpu-y += \
 	$(nvgpu-t18x)/clk/clk_domain.o \
 	$(nvgpu-t18x)/clk/clk_prog.o \
 	$(nvgpu-t18x)/clk/clk_vf_point.o \
+	$(nvgpu-t18x)/clk/clk_arb.o \
 	$(nvgpu-t18x)/perf/vfe_var.o \
 	$(nvgpu-t18x)/perf/vfe_equ.o \
 	$(nvgpu-t18x)/perf/perf.o \
 	$(nvgpu-t18x)/clk/clk.o \
 	$(nvgpu-t18x)/gp106/clk_gp106.o \
+	$(nvgpu-t18x)/gp106/clk_arb_gp106.o \
 	$(nvgpu-t18x)/gp106/gp106_gating_reglist.o \
 	$(nvgpu-t18x)/gp106/xve_gp106.o \
 	$(nvgpu-t18x)/gp106/therm_gp106.o \
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
new file mode 100644
index 00000000..1d02c7d7
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+
+#include <linux/cdev.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/nvgpu.h>
+#include <linux/bitops.h>
+
+#include "clk/clk_arb.h"
+
+static int nvgpu_clk_arb_release_session_dev(struct inode *inode, struct file *filp);
+static unsigned int nvgpu_clk_arb_poll_session_dev(struct file *filp, poll_table *wait);
+
+static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
+
+struct nvgpu_clk_arb {
+	struct mutex wlock;
+	struct mutex users_lock;
+	struct list_head users;
+	u32 gpc2clk_current_mhz;
+	u32 gpc2clk_target_mhz;
+	u32 gpc2clk_default_mhz;
+	u32 mclk_current_mhz;
+	u32 mclk_target_mhz;
+	u32 mclk_default_mhz;
+	atomic_t usercount;
+	struct work_struct update_fn_work;
+
+	atomic_t req_nr;	/* for allocations */
+	atomic_t last_req_nr;	/* last completed by arbiter */
+};
+
+struct nvgpu_clk_session {
+	struct gk20a *g;
+	int fd;
+	atomic_t req_nr;
+	struct kref refcount;
+	wait_queue_head_t readout_wq;
+	atomic_t poll_mask;
+	struct list_head user;
+	u32 gpc2clk_target_mhz;
+	u32 mclk_target_mhz;
+};
+
+const struct file_operations clk_dev_ops = {
+	.owner = THIS_MODULE,
+	.release = nvgpu_clk_arb_release_session_dev,
+	.poll = nvgpu_clk_arb_poll_session_dev,
+};
+
+int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb;
+	u16 default_mhz;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (!g->ops.clk_arb.get_arbiter_clk_domains)
+		return 0;
+
+	arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL);
+	if (!arb)
+		return -ENOMEM;
+
+	g->clk_arb = arb;
+
+	mutex_init(&arb->wlock);
+	mutex_init(&arb->users_lock);
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
+	if (err)
+		return -EINVAL;
+
+	arb->mclk_target_mhz = default_mhz;
+	arb->mclk_current_mhz = default_mhz;
+	arb->mclk_default_mhz = default_mhz;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz);
+	if (err)
+		return -EINVAL;
+
+	arb->gpc2clk_target_mhz = default_mhz;
+	arb->gpc2clk_current_mhz = default_mhz;
+	arb->gpc2clk_default_mhz = default_mhz;
+
+	atomic_set(&arb->usercount, 0);
+	atomic_set(&arb->req_nr, 0);
+	atomic_set(&arb->last_req_nr, 0);
+
+	INIT_LIST_HEAD(&arb->users);
+	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
+
+	return 0;
+}
+
+void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
+{
+	kfree(g->clk_arb);
+}
+
+
+int nvgpu_clk_arb_install_session_fd(struct gk20a *g,
+		struct nvgpu_clk_session *session)
+{
+	struct file *file;
+	char *name;
+	int fd;
+	int err;
+
+	gk20a_dbg_fn("");
+
+	if (session->fd >= 0)
+		goto done;
+
+	fd = get_unused_fd_flags(O_RDWR);
+	if (fd < 0)
+		return fd;
+
+	name = kasprintf(GFP_KERNEL, "%s-clk-fd%d", dev_name(g->dev), fd);
+	file = anon_inode_getfile(name, &clk_dev_ops, session, O_RDWR);
+	kfree(name);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto clean_up_fd;
+	}
+
+	BUG_ON(file->private_data != session);
+
+	fd_install(fd, file);
+	kref_get(&session->refcount);
+
+	session->fd = fd;
+done:
+	return session->fd;
+
+clean_up_fd:
+	put_unused_fd(fd);
+
+	return err;
+}
+
+int nvgpu_clk_arb_init_session(struct gk20a *g,
+		struct nvgpu_clk_session **_session)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_session *session = *(_session);
+
+	gk20a_dbg_fn("");
+
+	*_session = NULL;
+
+	if (!g->ops.clk_arb.get_arbiter_clk_domains)
+		return 0;
+
+	session = kzalloc(sizeof(struct nvgpu_clk_session), GFP_KERNEL);
+	if (!session)
+		return -ENOMEM;
+	session->g = g;
+	session->fd = -1;
+
+	kref_init(&session->refcount);
+	init_waitqueue_head(&session->readout_wq);
+
+	atomic_set(&session->poll_mask, 0);
+	atomic_set(&session->req_nr, 0);
+
+	mutex_lock(&arb->users_lock);
+	list_add_tail(&session->user, &arb->users);
+	mutex_unlock(&arb->users_lock);
+	atomic_inc(&arb->usercount);
+
+	mutex_lock(&arb->wlock);
+	session->mclk_target_mhz = arb->mclk_default_mhz;
+	session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
+	mutex_unlock(&arb->wlock);
+
+	*_session = session;
+
+	return 0;
+}
+
+void nvgpu_clk_arb_free_session(struct kref *refcount)
+{
+	struct nvgpu_clk_session *session = container_of(refcount,
+			struct nvgpu_clk_session, refcount);
+	struct gk20a *g = session->g;
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	mutex_lock(&arb->users_lock);
+	list_del_init(&session->user);
+	mutex_unlock(&arb->users_lock);
+
+	if (atomic_dec_and_test(&arb->usercount))
+		nvgpu_clk_arb_apply_session_constraints(g, NULL);
+
+	kfree(session);
+}
+
+void nvgpu_clk_arb_cleanup_session(struct gk20a *g,
+		struct nvgpu_clk_session *session)
+{
+	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+}
+
+static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
+{
+	struct nvgpu_clk_arb *arb =
+		container_of(work, struct nvgpu_clk_arb, update_fn_work);
+	struct nvgpu_clk_session *session;
+
+	mutex_lock(&arb->wlock);
+
+	/* TODO: loop up higher or equal VF points */
+
+	arb->mclk_current_mhz = arb->mclk_target_mhz;
+	arb->gpc2clk_current_mhz = arb->gpc2clk_target_mhz;
+
+	/* TODO: actually program the clocks */
+
+	atomic_set(&arb->last_req_nr, atomic_read(&arb->req_nr));
+	mutex_unlock(&arb->wlock);
+
+	mutex_lock(&arb->users_lock);
+	list_for_each_entry(session, &arb->users, user) {
+		atomic_set(&session->poll_mask, POLLIN | POLLRDNORM);
+		wake_up_interruptible(&session->readout_wq);
+	}
+	mutex_unlock(&arb->users_lock);
+
+}
+
+void nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
+		struct nvgpu_clk_session *session)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	mutex_lock(&arb->wlock);
+	atomic_inc(&arb->req_nr);
+
+	/* TODO: arbitration between users.
+	   For now, last session to run arbiter wins.
+	 */
+
+	if (session) {
+		arb->mclk_target_mhz = session->mclk_target_mhz;
+		arb->gpc2clk_target_mhz = session->gpc2clk_target_mhz;
+
+		atomic_set(&session->req_nr, atomic_read(&arb->req_nr));
+	} else {
+		arb->mclk_target_mhz = arb->mclk_default_mhz;
+		arb->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
+	}
+	mutex_unlock(&arb->wlock);
+
+	schedule_work(&arb->update_fn_work);
+}
+
+static unsigned int nvgpu_clk_arb_poll_session_dev(struct file *filp, poll_table *wait)
+{
+	struct nvgpu_clk_session *session = filp->private_data;
+
+	gk20a_dbg_fn("");
+
+	poll_wait(filp, &session->readout_wq, wait);
+	return atomic_xchg(&session->poll_mask, 0);
+}
+
+static int nvgpu_clk_arb_release_session_dev(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_clk_session *session = filp->private_data;
+	struct gk20a *g = session->g;
+
+	session->fd = -1;
+	nvgpu_clk_arb_cleanup_session(g, session);
+
+	return 0;
+}
+
+int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 target_mhz)
+{
+
+	gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
+
+	switch (api_domain) {
+	case NVGPU_GPU_CLK_DOMAIN_MCLK:
+		session->mclk_target_mhz = target_mhz;
+		return 0;
+
+	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
+		session->gpc2clk_target_mhz = target_mhz;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 *target_mhz)
+{
+	switch (api_domain) {
+	case NVGPU_GPU_CLK_DOMAIN_MCLK:
+		*target_mhz = session->mclk_target_mhz;
+		return 0;
+
+	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
+		*target_mhz = session->gpc2clk_target_mhz;
+		return 0;
+
+	default:
+		*target_mhz = 0;
+		return -EINVAL;
+	}
+}
+
+int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
+		u32 api_domain, u16 *actual_mhz)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	int err = 0;
+
+	mutex_lock(&arb->wlock);
+	switch (api_domain) {
+	case NVGPU_GPU_CLK_DOMAIN_MCLK:
+		*actual_mhz = arb->mclk_current_mhz;
+		break;
+
+	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
+		*actual_mhz = arb->gpc2clk_current_mhz;
+		break;
+
+	default:
+		*actual_mhz = 0;
+		err = -EINVAL;
+	}
+	mutex_unlock(&arb->wlock);
+
+	return err;
+}
+
+u32 nvgpu_clk_arb_get_arbiter_req_nr(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	return atomic_read(&arb->last_req_nr);
+}
+
+int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz)
+{
+	return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain,
+			min_mhz, max_mhz);
+}
+
+u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
+{
+	return g->ops.clk_arb.get_arbiter_clk_domains(g);
+}
+
+u32 nvgpu_clk_arb_get_session_req_nr(struct gk20a *g,
+		struct nvgpu_clk_session *session)
+{
+	return atomic_read(&session->req_nr);
+}
+
+int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
+	u32 api_domain, u32 *max_points, u16 *fpoints)
+{
+	return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
new file mode 100644
index 00000000..9981041b
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+
+#ifndef _CLK_ARB_H_
+#define _CLK_ARB_H_
+
+struct nvgpu_clk_arb;
+struct nvgpu_clk_session;
+
+int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
+
+int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz);
+
+int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
+		u32 api_domain, u16 *actual_mhz);
+
+int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
+	u32 api_domain, u32 *max_points, u16 *fpoints);
+
+u32 nvgpu_clk_arb_get_arbiter_req_nr(struct gk20a *g);
+
+u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g);
+
+void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g);
+
+int nvgpu_clk_arb_install_session_fd(struct gk20a *g,
+		struct nvgpu_clk_session *session);
+
+int nvgpu_clk_arb_init_session(struct gk20a *g,
+		struct nvgpu_clk_session **_session);
+
+void nvgpu_clk_arb_cleanup_session(struct gk20a *g,
+		struct nvgpu_clk_session *session);
+
+void nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
+		struct nvgpu_clk_session *session);
+
+int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 target_mhz);
+
+int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 *target_mhz);
+
+u32 nvgpu_clk_arb_get_session_req_nr(struct gk20a *g,
+		struct nvgpu_clk_session *session);
+
+
+
+#endif /* _CLK_ARB_H_ */
+
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
new file mode 100644
index 00000000..d1cbb32b
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+
+#include "clk/clk_arb.h"
+#include "clk_arb_gp106.h"
+
+static u32 gp106_get_arbiter_clk_domains(struct gk20a *g)
+{
+	(void)g;
+	return (CTRL_CLK_DOMAIN_MCLK|CTRL_CLK_DOMAIN_GPC2CLK);
+}
+
+static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz)
+{
+	enum nv_pmu_clk_clkwhich clkwhich;
+	struct clk_set_info *p0_info;
+	struct clk_set_info *p5_info;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_MCLK:
+		clkwhich = clkwhich_mclk;
+		break;
+
+	case CTRL_CLK_DOMAIN_GPC2CLK:
+		clkwhich = clkwhich_gpc2clk;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	p5_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P5, clkwhich);
+	if (!p5_info)
+		return -EINVAL;
+
+	p0_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich);
+	if (!p0_info)
+		return -EINVAL;
+
+	*min_mhz = p5_info->min_mhz;
+	*max_mhz = p0_info->max_mhz;
+
+	return 0;
+}
+
+static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
+		u16 *default_mhz)
+{
+	enum nv_pmu_clk_clkwhich clkwhich;
+	struct clk_set_info *p0_info;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_MCLK:
+		clkwhich = clkwhich_mclk;
+		break;
+
+	case CTRL_CLK_DOMAIN_GPC2CLK:
+		clkwhich = clkwhich_gpc2clk;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	p0_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich);
+	if (!p0_info)
+		return -EINVAL;
+
+	*default_mhz = p0_info->max_mhz;
+
+	return 0;
+}
+
+void gp106_init_clk_arb_ops(struct gpu_ops *gops)
+{
+	gops->clk_arb.get_arbiter_clk_domains = gp106_get_arbiter_clk_domains;
+	gops->clk_arb.get_arbiter_clk_range = gp106_get_arbiter_clk_range;
+	gops->clk_arb.get_arbiter_clk_default = gp106_get_arbiter_clk_default;
+}
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h
new file mode 100644
index 00000000..a9877199
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef CLK_ARB_GP106_H
+#define CLK_ARB_GP106_H
+
+void gp106_init_clk_arb_ops(struct gpu_ops *gops);
+
+#endif /* CLK_ARB_GP106_H */
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 39c308a3..85dde69f 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -27,6 +27,7 @@
 #include "gk20a/gk20a.h"
 #include "hw_trim_gp106.h"
 #include "clk_gp106.h"
+#include "clk/clk_arb.h"
 
 #define gk20a_dbg_clk(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 0f926be8..dc27cdae 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -37,6 +37,7 @@
 #include "gm20b/fifo_gm20b.h"
 #include "gm20b/pmu_gm20b.h"
 #include "gp106/clk_gp106.h"
+#include "gp106/clk_arb_gp106.h"
 
 #include "gp106/mm_gp106.h"
 #include "gp106/pmu_gp106.h"
@@ -210,6 +211,7 @@ int gp106_init_hal(struct gk20a *g)
 	gk20a_init_debug_ops(gops);
 	gk20a_init_dbg_session_ops(gops);
 	gp106_init_clk_ops(gops);
+	gp106_init_clk_arb_ops(gops);
 	gp106_init_regops(gops);
 	gp10b_init_cde_ops(gops);
 	gk20a_init_tsg_ops(gops);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index e9b9775e..0dc15201 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -234,7 +234,7 @@ static int parse_pstate_entry_5x(struct gk20a *g,
 	memset(pstate, 0, sizeof(struct pstate));
 	pstate->super.type = CTRL_PERF_PSTATE_TYPE_3X;
 	pstate->num = 0x0F - entry->pstate_level;
-	pstate->clklist.clksetinfolistsize = hdr->clock_entry_count;
+	pstate->clklist.num_info = hdr->clock_entry_count;
 
 	gk20a_dbg_info("pstate P%u", pstate->num);
 
@@ -357,3 +357,41 @@ static int pstate_sw_setup(struct gk20a *g)
 done:
 	return err;
 }
+
+static struct pstate *pstate_find(struct gk20a *g, u32 num)
+{
+	struct pstates *pstates = &(g->perf_pmu.pstatesobjs);
+	struct pstate *pstate;
+	u8 i;
+
+	gk20a_dbg_info("pstates = %p", pstates);
+
+	BOARDOBJGRP_FOR_EACH(&pstates->super.super,
+			struct pstate *, pstate, i) {
+		gk20a_dbg_info("pstate=%p num=%u (looking for num=%u)",
+				pstate, pstate->num, num);
+		if (pstate->num == num)
+			return pstate;
+	}
+	return NULL;
+}
+
+struct clk_set_info *pstate_get_clk_set_info(struct gk20a *g,
+		u32 pstate_num, enum nv_pmu_clk_clkwhich clkwhich)
+{
+	struct pstate *pstate = pstate_find(g, pstate_num);
+	struct clk_set_info *info;
+	u32 clkidx;
+
+	gk20a_dbg_info("pstate = %p", pstate);
+
+	if (!pstate)
+		return NULL;
+
+	for (clkidx = 0; clkidx < pstate->clklist.num_info; clkidx++) {
+		info = &pstate->clklist.clksetinfo[clkidx];
+		if (info->clkwhich == clkwhich)
+			return info;
+	}
+	return NULL;
+}
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index 11fa4c77..4ae72aa9 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -20,6 +20,10 @@
 
 #define CTRL_PERF_PSTATE_TYPE_3X	0x3
 
+#define CTRL_PERF_PSTATE_P0		0
+#define CTRL_PERF_PSTATE_P5		5
+#define CTRL_PERF_PSTATE_P8		8
+
 #define CLK_SET_INFO_MAX_SIZE		(32)
 
 struct clk_set_info {
@@ -30,7 +34,7 @@ struct clk_set_info {
 };
 
 struct clk_set_info_list {
-	u32 clksetinfolistsize;
+	u32 num_info;
 	struct clk_set_info clksetinfo[CLK_SET_INFO_MAX_SIZE];
 };
 
@@ -48,4 +52,7 @@ struct pstates {
 int gk20a_init_pstate_support(struct gk20a *g);
 int gk20a_init_pstate_pmu_support(struct gk20a *g);
 
+struct clk_set_info *pstate_get_clk_set_info(struct gk20a *g, u32 pstate_num,
+		enum nv_pmu_clk_clkwhich clkwhich);
+
 #endif /* __PSTATE_H__ */
-- 
cgit v1.2.2


From 7a8b12ab634c05cd39c08e704c28ee3f4e111c7f Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Fri, 30 Sep 2016 16:40:29 -0700
Subject: gpu: nvgpu: clk requests completion and event fds

Install one completion fd per SET request.
Notifications on dedicated event fd.
Changed frequencies unit to Hz from MHz.
Remove sequence numbers from dummy arbiter.
Added effective clock type (query frequency from counters).

Jira DNVGPU-125

Change-Id: Id5445c6ae1d6bf06f7f59c82ff6c5d3b34e26483
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1230239
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
(cherry picked from commit d17083f4ceb69725c661678607a3e43148d38560)
Reviewed-on: http://git-master/r/1243106
---
 drivers/gpu/nvgpu/clk/clk_arb.c         | 326 +++++++++++++++++++-------------
 drivers/gpu/nvgpu/clk/clk_arb.h         |  29 +--
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c |  10 +-
 3 files changed, 211 insertions(+), 154 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1d02c7d7..98b7cb5f 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -21,50 +21,63 @@
 
 #include "clk/clk_arb.h"
 
-static int nvgpu_clk_arb_release_session_dev(struct inode *inode, struct file *filp);
-static unsigned int nvgpu_clk_arb_poll_session_dev(struct file *filp, poll_table *wait);
+static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
+		struct file *filp);
+static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
+		struct file *filp);
+static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
 
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
 
 struct nvgpu_clk_arb {
-	struct mutex wlock;
+	struct mutex req_lock;
 	struct mutex users_lock;
 	struct list_head users;
-	u32 gpc2clk_current_mhz;
-	u32 gpc2clk_target_mhz;
-	u32 gpc2clk_default_mhz;
-	u32 mclk_current_mhz;
-	u32 mclk_target_mhz;
-	u32 mclk_default_mhz;
+	struct list_head requests;
+
+	u64 gpc2clk_current_hz;
+	u64 gpc2clk_target_hz;
+	u64 gpc2clk_default_hz;
+	u64 mclk_current_hz;
+	u64 mclk_target_hz;
+	u64 mclk_default_hz;
 	atomic_t usercount;
 	struct work_struct update_fn_work;
+};
+
 
-	atomic_t req_nr;	/* for allocations */
-	atomic_t last_req_nr;	/* last completed by arbiter */
+struct nvgpu_clk_dev {
+	struct nvgpu_clk_session *session;
+	struct list_head link;
+	wait_queue_head_t readout_wq;
+	atomic_t poll_mask;
 };
 
 struct nvgpu_clk_session {
+	bool zombie;
 	struct gk20a *g;
-	int fd;
-	atomic_t req_nr;
 	struct kref refcount;
-	wait_queue_head_t readout_wq;
-	atomic_t poll_mask;
-	struct list_head user;
-	u32 gpc2clk_target_mhz;
-	u32 mclk_target_mhz;
+
+	u64 gpc2clk_target_hz;
+	u64 mclk_target_hz;
 };
 
-const struct file_operations clk_dev_ops = {
+static const struct file_operations completion_dev_ops = {
 	.owner = THIS_MODULE,
-	.release = nvgpu_clk_arb_release_session_dev,
-	.poll = nvgpu_clk_arb_poll_session_dev,
+	.release = nvgpu_clk_arb_release_completion_dev,
+	.poll = nvgpu_clk_arb_poll_dev,
+};
+
+static const struct file_operations event_dev_ops = {
+	.owner = THIS_MODULE,
+	.release = nvgpu_clk_arb_release_event_dev,
+	.poll = nvgpu_clk_arb_poll_dev,
 };
 
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 {
 	struct nvgpu_clk_arb *arb;
-	u16 default_mhz;
+	u64 default_hz;
 	int err;
 
 	gk20a_dbg_fn("");
@@ -78,32 +91,31 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 
 	g->clk_arb = arb;
 
-	mutex_init(&arb->wlock);
+	mutex_init(&arb->req_lock);
 	mutex_init(&arb->users_lock);
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
-			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
+			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz);
 	if (err)
 		return -EINVAL;
 
-	arb->mclk_target_mhz = default_mhz;
-	arb->mclk_current_mhz = default_mhz;
-	arb->mclk_default_mhz = default_mhz;
+	arb->mclk_target_hz = default_hz;
+	arb->mclk_current_hz = default_hz;
+	arb->mclk_default_hz = default_hz;
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
-			NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz);
+			NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz);
 	if (err)
 		return -EINVAL;
 
-	arb->gpc2clk_target_mhz = default_mhz;
-	arb->gpc2clk_current_mhz = default_mhz;
-	arb->gpc2clk_default_mhz = default_mhz;
+	arb->gpc2clk_target_hz = default_hz;
+	arb->gpc2clk_current_hz = default_hz;
+	arb->gpc2clk_default_hz = default_hz;
 
 	atomic_set(&arb->usercount, 0);
-	atomic_set(&arb->req_nr, 0);
-	atomic_set(&arb->last_req_nr, 0);
 
 	INIT_LIST_HEAD(&arb->users);
+	INIT_LIST_HEAD(&arb->requests);
 	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
 
 	return 0;
@@ -114,44 +126,50 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
 	kfree(g->clk_arb);
 }
 
-
-int nvgpu_clk_arb_install_session_fd(struct gk20a *g,
-		struct nvgpu_clk_session *session)
+static int nvgpu_clk_arb_install_fd(struct gk20a *g,
+		struct nvgpu_clk_session *session,
+		const struct file_operations *fops,
+		struct nvgpu_clk_dev **_dev)
 {
 	struct file *file;
 	char *name;
 	int fd;
 	int err;
+	struct nvgpu_clk_dev *dev;
 
 	gk20a_dbg_fn("");
 
-	if (session->fd >= 0)
-		goto done;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
 
 	fd = get_unused_fd_flags(O_RDWR);
 	if (fd < 0)
 		return fd;
 
 	name = kasprintf(GFP_KERNEL, "%s-clk-fd%d", dev_name(g->dev), fd);
-	file = anon_inode_getfile(name, &clk_dev_ops, session, O_RDWR);
+	file = anon_inode_getfile(name, fops, dev, O_RDWR);
 	kfree(name);
 	if (IS_ERR(file)) {
 		err = PTR_ERR(file);
-		goto clean_up_fd;
+		goto fail;
 	}
 
-	BUG_ON(file->private_data != session);
-
 	fd_install(fd, file);
+
+	init_waitqueue_head(&dev->readout_wq);
+	atomic_set(&dev->poll_mask, 0);
+
+	dev->session = session;
 	kref_get(&session->refcount);
 
-	session->fd = fd;
-done:
-	return session->fd;
+	*_dev = dev;
 
-clean_up_fd:
-	put_unused_fd(fd);
+	return fd;
 
+fail:
+	kfree(dev);
+	put_unused_fd(fd);
 	return err;
 }
 
@@ -163,8 +181,6 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 
 	gk20a_dbg_fn("");
 
-	*_session = NULL;
-
 	if (!g->ops.clk_arb.get_arbiter_clk_domains)
 		return 0;
 
@@ -172,23 +188,14 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	if (!session)
 		return -ENOMEM;
 	session->g = g;
-	session->fd = -1;
 
 	kref_init(&session->refcount);
-	init_waitqueue_head(&session->readout_wq);
-
-	atomic_set(&session->poll_mask, 0);
-	atomic_set(&session->req_nr, 0);
 
-	mutex_lock(&arb->users_lock);
-	list_add_tail(&session->user, &arb->users);
-	mutex_unlock(&arb->users_lock);
 	atomic_inc(&arb->usercount);
 
-	mutex_lock(&arb->wlock);
-	session->mclk_target_mhz = arb->mclk_default_mhz;
-	session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
-	mutex_unlock(&arb->wlock);
+	session->zombie = false;
+	session->mclk_target_hz = arb->mclk_default_hz;
+	session->gpc2clk_target_hz = arb->gpc2clk_default_hz;
 
 	*_session = session;
 
@@ -199,23 +206,41 @@ void nvgpu_clk_arb_free_session(struct kref *refcount)
 {
 	struct nvgpu_clk_session *session = container_of(refcount,
 			struct nvgpu_clk_session, refcount);
-	struct gk20a *g = session->g;
-	struct nvgpu_clk_arb *arb = g->clk_arb;
-
-	mutex_lock(&arb->users_lock);
-	list_del_init(&session->user);
-	mutex_unlock(&arb->users_lock);
-
-	if (atomic_dec_and_test(&arb->usercount))
-		nvgpu_clk_arb_apply_session_constraints(g, NULL);
 
 	kfree(session);
 }
 
-void nvgpu_clk_arb_cleanup_session(struct gk20a *g,
-		struct nvgpu_clk_session *session)
+void nvgpu_clk_arb_release_session(struct gk20a *g,
+	struct nvgpu_clk_session *session)
 {
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	session->zombie = true;
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+
+	/* schedule arbiter if no more user */
+	if (!atomic_dec_and_test(&arb->usercount))
+		schedule_work(&arb->update_fn_work);
+}
+
+int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *event_fd)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_dev *dev;
+	int fd;
+
+	fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
+	if (fd < 0)
+		return fd;
+
+	mutex_lock(&arb->users_lock);
+	list_add_tail(&dev->link, &arb->users);
+	mutex_unlock(&arb->users_lock);
+
+	*event_fd = fd;
+
+	return 0;
 }
 
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
@@ -223,88 +248,125 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	struct nvgpu_clk_arb *arb =
 		container_of(work, struct nvgpu_clk_arb, update_fn_work);
 	struct nvgpu_clk_session *session;
+	struct nvgpu_clk_dev *dev;
+	struct nvgpu_clk_dev *tmp;
+
+	mutex_lock(&arb->req_lock);
 
-	mutex_lock(&arb->wlock);
+	arb->mclk_target_hz = arb->mclk_default_hz;
+	arb->gpc2clk_target_hz = arb->gpc2clk_default_hz;
+
+	list_for_each_entry(dev, &arb->requests, link) {
+		session = dev->session;
+		if (!session->zombie) {
+			/* TODO: arbiter policy. For now last request wins */
+
+			arb->mclk_target_hz = session->mclk_target_hz;
+			arb->gpc2clk_target_hz = session->gpc2clk_target_hz;
+		}
+	}
 
 	/* TODO: loop up higher or equal VF points */
 
-	arb->mclk_current_mhz = arb->mclk_target_mhz;
-	arb->gpc2clk_current_mhz = arb->gpc2clk_target_mhz;
+	arb->mclk_current_hz = arb->mclk_target_hz;
+	arb->gpc2clk_current_hz = arb->gpc2clk_target_hz;
 
 	/* TODO: actually program the clocks */
 
-	atomic_set(&arb->last_req_nr, atomic_read(&arb->req_nr));
-	mutex_unlock(&arb->wlock);
+	/* notify completion for all requests */
+	list_for_each_entry_safe(dev, tmp, &arb->requests, link) {
+		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
+		wake_up_interruptible(&dev->readout_wq);
+		list_del_init(&dev->link);
+	}
+	mutex_unlock(&arb->req_lock);
 
+	/* notify event for all users */
 	mutex_lock(&arb->users_lock);
-	list_for_each_entry(session, &arb->users, user) {
-		atomic_set(&session->poll_mask, POLLIN | POLLRDNORM);
-		wake_up_interruptible(&session->readout_wq);
+	list_for_each_entry(dev, &arb->users, link) {
+		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
+		wake_up_interruptible(&dev->readout_wq);
 	}
 	mutex_unlock(&arb->users_lock);
 
 }
 
-void nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
-		struct nvgpu_clk_session *session)
+int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
+		struct nvgpu_clk_session *session, int *completion_fd)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_dev *dev;
+	int fd;
 
-	mutex_lock(&arb->wlock);
-	atomic_inc(&arb->req_nr);
-
-	/* TODO: arbitration between users.
-	   For now, last session to run arbiter wins.
-	 */
+	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
+	if (fd < 0)
+		return fd;
 
-	if (session) {
-		arb->mclk_target_mhz = session->mclk_target_mhz;
-		arb->gpc2clk_target_mhz = session->gpc2clk_target_mhz;
+	*completion_fd = fd;
 
-		atomic_set(&session->req_nr, atomic_read(&arb->req_nr));
-	} else {
-		arb->mclk_target_mhz = arb->mclk_default_mhz;
-		arb->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
-	}
-	mutex_unlock(&arb->wlock);
+	mutex_lock(&arb->req_lock);
+	list_add_tail(&dev->link, &arb->requests);
+	mutex_unlock(&arb->req_lock);
 
 	schedule_work(&arb->update_fn_work);
+
+	return 0;
 }
 
-static unsigned int nvgpu_clk_arb_poll_session_dev(struct file *filp, poll_table *wait)
+static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
 {
-	struct nvgpu_clk_session *session = filp->private_data;
+	struct nvgpu_clk_dev *dev = filp->private_data;
 
 	gk20a_dbg_fn("");
 
-	poll_wait(filp, &session->readout_wq, wait);
-	return atomic_xchg(&session->poll_mask, 0);
+	poll_wait(filp, &dev->readout_wq, wait);
+	return atomic_xchg(&dev->poll_mask, 0);
 }
 
-static int nvgpu_clk_arb_release_session_dev(struct inode *inode, struct file *filp)
+static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
+		struct file *filp)
 {
-	struct nvgpu_clk_session *session = filp->private_data;
-	struct gk20a *g = session->g;
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct nvgpu_clk_session *session = dev->session;
 
-	session->fd = -1;
-	nvgpu_clk_arb_cleanup_session(g, session);
+	gk20a_dbg_fn("");
+
+	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	kfree(dev);
+	return 0;
+}
+
+static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
+		struct file *filp)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct nvgpu_clk_session *session = dev->session;
+	struct nvgpu_clk_arb *arb = session->g->clk_arb;
+
+	gk20a_dbg_fn("");
 
+	mutex_lock(&arb->users_lock);
+	list_del_init(&dev->link);
+	mutex_unlock(&arb->users_lock);
+
+	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	kfree(dev);
 	return 0;
 }
 
-int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
-		u32 api_domain, u16 target_mhz)
+int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session,
+		u32 api_domain, u64 target_hz)
 {
 
-	gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
+	gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz);
 
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		session->mclk_target_mhz = target_mhz;
+		session->mclk_target_hz = target_hz;
 		return 0;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		session->gpc2clk_target_mhz = target_mhz;
+		session->gpc2clk_target_hz = target_hz;
 		return 0;
 
 	default:
@@ -312,61 +374,61 @@ int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
 	}
 }
 
-int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
-		u32 api_domain, u16 *target_mhz)
+int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session,
+		u32 api_domain, u64 *freq_hz)
 {
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*target_mhz = session->mclk_target_mhz;
+		*freq_hz = session->mclk_target_hz;
 		return 0;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*target_mhz = session->gpc2clk_target_mhz;
+		*freq_hz = session->gpc2clk_target_hz;
 		return 0;
 
 	default:
-		*target_mhz = 0;
+		*freq_hz = 0;
 		return -EINVAL;
 	}
 }
 
-int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
-		u32 api_domain, u16 *actual_mhz)
+int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g,
+		u32 api_domain, u64 *freq_hz)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 	int err = 0;
 
-	mutex_lock(&arb->wlock);
+	mutex_lock(&arb->req_lock);
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*actual_mhz = arb->mclk_current_mhz;
+		*freq_hz = arb->mclk_current_hz;
 		break;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*actual_mhz = arb->gpc2clk_current_mhz;
+		*freq_hz = arb->gpc2clk_current_hz;
 		break;
 
 	default:
-		*actual_mhz = 0;
+		*freq_hz = 0;
 		err = -EINVAL;
 	}
-	mutex_unlock(&arb->wlock);
+	mutex_unlock(&arb->req_lock);
 
 	return err;
 }
 
-u32 nvgpu_clk_arb_get_arbiter_req_nr(struct gk20a *g)
+int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g,
+		u32 api_domain, u64 *freq_hz)
 {
-	struct nvgpu_clk_arb *arb = g->clk_arb;
-
-	return atomic_read(&arb->last_req_nr);
+	/* TODO: measure clocks from counters */
+	return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz);
 }
 
 int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u16 *min_mhz, u16 *max_mhz)
+		u64 *min_hz, u64 *max_hz)
 {
 	return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain,
-			min_mhz, max_mhz);
+			min_hz, max_hz);
 }
 
 u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
@@ -374,12 +436,6 @@ u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
 	return g->ops.clk_arb.get_arbiter_clk_domains(g);
 }
 
-u32 nvgpu_clk_arb_get_session_req_nr(struct gk20a *g,
-		struct nvgpu_clk_session *session)
-{
-	return atomic_read(&session->req_nr);
-}
-
 int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 	u32 api_domain, u32 *max_points, u16 *fpoints)
 {
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 9981041b..95749369 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -22,16 +22,17 @@ struct nvgpu_clk_session;
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
 
 int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u16 *min_mhz, u16 *max_mhz);
+		u64 *min_hz, u64 *max_hz);
 
-int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
-		u32 api_domain, u16 *actual_mhz);
+int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g,
+		u32 api_domain, u64 *actual_hz);
+
+int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g,
+		u32 api_domain, u64 *actual_hz);
 
 int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 	u32 api_domain, u32 *max_points, u16 *fpoints);
 
-u32 nvgpu_clk_arb_get_arbiter_req_nr(struct gk20a *g);
-
 u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g);
 
 void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g);
@@ -42,20 +43,20 @@ int nvgpu_clk_arb_install_session_fd(struct gk20a *g,
 int nvgpu_clk_arb_init_session(struct gk20a *g,
 		struct nvgpu_clk_session **_session);
 
-void nvgpu_clk_arb_cleanup_session(struct gk20a *g,
+void nvgpu_clk_arb_release_session(struct gk20a *g,
 		struct nvgpu_clk_session *session);
 
-void nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
-		struct nvgpu_clk_session *session);
+int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *completion_fd);
 
-int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
-		u32 api_domain, u16 target_mhz);
+int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session,
+		u32 api_domain, u64 target_hz);
 
-int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
-		u32 api_domain, u16 *target_mhz);
+int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session,
+		u32 api_domain, u64 *target_hz);
 
-u32 nvgpu_clk_arb_get_session_req_nr(struct gk20a *g,
-		struct nvgpu_clk_session *session);
+int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *event_fd);
 
 
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
index d1cbb32b..112cb588 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -23,7 +23,7 @@ static u32 gp106_get_arbiter_clk_domains(struct gk20a *g)
 }
 
 static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u16 *min_mhz, u16 *max_mhz)
+		u64 *min_hz, u64 *max_hz)
 {
 	enum nv_pmu_clk_clkwhich clkwhich;
 	struct clk_set_info *p0_info;
@@ -52,14 +52,14 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
 	if (!p0_info)
 		return -EINVAL;
 
-	*min_mhz = p5_info->min_mhz;
-	*max_mhz = p0_info->max_mhz;
+	*min_hz = (u64)(p5_info->min_mhz) * (u64)MHZ;
+	*max_hz = (u64)(p0_info->max_mhz) * (u64)MHZ;
 
 	return 0;
 }
 
 static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
-		u16 *default_mhz)
+		u64 *default_hz)
 {
 	enum nv_pmu_clk_clkwhich clkwhich;
 	struct clk_set_info *p0_info;
@@ -82,7 +82,7 @@ static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
 	if (!p0_info)
 		return -EINVAL;
 
-	*default_mhz = p0_info->max_mhz;
+	*default_hz = (u64)p0_info->max_mhz * (u64)MHZ;
 
 	return 0;
 }
-- 
cgit v1.2.2


From d7f15a2b507f05445b97fb410bf788327fef7376 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Wed, 30 Nov 2016 13:50:27 -0800
Subject: gpu: nvgpu: vgpu: fix va leak when call gk20a_vm_free_va

page size index needs to be set explicitly when call gk20a_vm_free_va.

Bug 200255799
JIRA VFND-3033

Change-Id: Ic23ea68905ea423173d1859fd100e7b2c82a1bcc
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/1262590
(cherry picked from commit 918aea147b395f7337db348d2616fb4b195dc53a)
Reviewed-on: http://git-master/r/1263400
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 4746f04b..85dc8c22 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -35,7 +35,8 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
-	gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size, 0);
+	gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size,
+			gmmu_page_size_kernel);
 
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
 	gk20a_gmmu_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
-- 
cgit v1.2.2


From 206a5c2d39825fcac63f7a1bb09319252626520d Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Sun, 27 Nov 2016 21:38:05 -0800
Subject: gpu: nvgpu: Remove FB ISO CG from init list

Remove FB ISO clock gating register from initializion list. The
register does not exist on GPUs without own memory.

Change-Id: I86a8c8050baad88a99029771511363f2a1d44341
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1265297
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
index df23d89d..563819de 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_gating_reglist.c
@@ -178,7 +178,6 @@ static const struct gating_desc gp10b_blcg_fb[] = {
 	{.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
 	{.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
 	{.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
-	{.addr = 0x00100d1c, .prod = 0x00000042, .disable = 0x00000000},
 	{.addr = 0x00100c98, .prod = 0x00004242, .disable = 0x00000000},
 };
 
-- 
cgit v1.2.2


From bfc12d25a41c2b5a4d06f233f16331e43c489d8e Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Tue, 25 Oct 2016 10:05:46 -0700
Subject: gpu: nvgpu: Use bootstrap base for WPR address

Use the bootstrap base address for calculating the address of WPR.
The bootstrap base is dynamic and depends on amount of memory, so
we should not hard code any address.

Bug 200244445

Change-Id: Ia700d24c8d572a25946f7b1847faec72c40c6796
Signed-off-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1242252
Reviewed-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-on: http://git-master/r/1267125
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 3bd79bcd..b883ad83 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -40,7 +40,7 @@ typedef int (*get_ucode_details)(struct gk20a *g,
 /* Both size and address of WPR need to be 128K-aligned */
 #define WPR_ALIGNMENT	0x20000
 #define GP106_DGPU_NONWPR NVGPU_VIDMEM_BOOTSTRAP_ALLOCATOR_BASE
-#define GP106_DGPU_WPR (GP106_DGPU_NONWPR + 0x400000)
+#define GP106_DGPU_WPR_OFFSET 0x400000
 #define DGPU_WPR_SIZE 0x100000
 
 /*Externs*/
@@ -79,8 +79,8 @@ static get_ucode_details pmu_acr_supp_ucode_list[] = {
 
 static void gp106_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
 {
-	inf->wpr_base = GP106_DGPU_WPR;
-	inf->nonwpr_base = GP106_DGPU_NONWPR;
+	inf->nonwpr_base = g->mm.vidmem.bootstrap_base;
+	inf->wpr_base = inf->nonwpr_base + GP106_DGPU_WPR_OFFSET;
 	inf->size = DGPU_WPR_SIZE;
 }
 
-- 
cgit v1.2.2


From c4bb19d46e1c9121a0948fa506098cbf2f64e2a6 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Fri, 7 Oct 2016 16:25:04 -0700
Subject: nvgpu: gpu: arbiter for vf switch management

JIRA DNVGPU-143

The arbiter is charged with selecting the proper
frequencies when multiple applications submit
simultaneously clock change requests

On the current implementation, the arbiter guarantees
that the selected frequency will be always higher
or equal to the request, as long as the request is
in range.

The current code is not yet realtime friendly, as
requests are not pre-allocated.

Summary of changes:
(1) pstate/vf switch no longer selects boot frequency
(2) changed mclk code change to accept input freq
(3) added arbiter
(4) now a single session can submit concurrent requests
the last request is the one that applies for that
session
(5) modified locking mechanism to reduce lock contention
(6) Added callback to notify the arbiter that the VF
table has changed and is no longer valid (PMU/Thermals
must call this when VF table is invalid)
(7) changed internal API to work with MHz
(8) added debugfs for stats

Change-Id: I6a7b05c9447761e8536f84ef86b5ab0793164d63
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1239461
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1267120
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/clk/clk.c             |  61 +--
 drivers/gpu/nvgpu/clk/clk.h             |   1 -
 drivers/gpu/nvgpu/clk/clk_arb.c         | 765 ++++++++++++++++++++++++++++----
 drivers/gpu/nvgpu/clk/clk_arb.h         |  26 +-
 drivers/gpu/nvgpu/clk/clk_mclk.c        |  19 +-
 drivers/gpu/nvgpu/clk/clk_mclk.h        |  10 +-
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c |  10 +-
 drivers/gpu/nvgpu/pstate/pstate.c       |   4 -
 8 files changed, 703 insertions(+), 193 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index ef0834f4..bec5fad1 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -255,7 +255,7 @@ static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid)
 	return -EINVAL;
 }
 
-int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
+int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk)
 {
 	int status = -EINVAL;
 	struct clk_domain *pdomain;
@@ -277,8 +277,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
 	if (fllclk->clkmhz == 0)
 		return -EINVAL;
 
-	mutex_lock(&pclk->changeclkmutex);
-
 	setfllclk.voltuv = fllclk->voltuv;
 	setfllclk.gpc2clkmhz = fllclk->clkmhz;
 
@@ -376,63 +374,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk)
 	if (status)
 		goto done;
 done:
-	mutex_unlock(&pclk->changeclkmutex);
-	return status;
-}
-
-int clk_set_boot_fll_clk(struct gk20a *g)
-{
-	int status;
-	struct change_fll_clk bootfllclk;
-	u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ;
-	u32 gpc2clk_voltuv = 0;
-	u32 gpc2clk_voltuv_sram = 0;
-	u16 mclk_clkmhz = BOOT_MCLK_MHZ;
-	u32 mclk_voltuv = 0;
-	u32 mclk_voltuv_sram = 0;
-	u32 voltuv = 0;
-	u32 voltuv_sram = 0;
-
-	mutex_init(&g->clk_pmu.changeclkmutex);
-	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
-		&gpc2clk_clkmhz, &gpc2clk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
-	if (status)
-		return status;
-	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
-		&gpc2clk_clkmhz, &gpc2clk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
-	if (status)
-		return status;
-	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-		&mclk_clkmhz, &mclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
-	if (status)
-		return status;
-	status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-		&mclk_clkmhz, &mclk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM);
-	if (status)
-		return status;
-
-	voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv)
-			: (mclk_voltuv);
-
-	voltuv_sram = ((gpc2clk_voltuv_sram) > (mclk_voltuv_sram)) ?
-		(gpc2clk_voltuv_sram) : (mclk_voltuv_sram);
-
-	status = volt_set_voltage(g, voltuv, voltuv_sram);
-	if (status)
-		gk20a_err(dev_from_gk20a(g),
-			"attempt to set boot voltage failed %d %d",
-			voltuv, voltuv_sram);
-
-	bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-	bootfllclk.clkmhz = gpc2clk_clkmhz;
-	bootfllclk.voltuv = voltuv;
-	status = clk_program_fllclks(g, &bootfllclk);
-	if (status)
-		gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed");
-	status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED);
-	if (status)
-		gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed");
-
 	return status;
 }
 
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index a0b88dcb..d0e82173 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -119,6 +119,5 @@ u32 clk_domain_get_f_points(
 	u32 *fpointscount,
 	u16 *freqpointsinmhz
 );
-int clk_set_boot_fll_clk(struct gk20a *g);
 int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk);
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 98b7cb5f..f868100b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -18,9 +18,17 @@
 #include <linux/anon_inodes.h>
 #include <linux/nvgpu.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include "clk/clk_arb.h"
 
+
+#define MAX_F_POINTS 127
+
+#ifdef CONFIG_DEBUG_FS
+static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
+#endif
+
 static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
 		struct file *filp);
 static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
@@ -28,21 +36,57 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
 static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
 
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
+static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work);
+static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *);
+
+struct nvgpu_clk_vf_point {
+	u16 mhz;
+	u32 uvolt;
+	u32 uvolt_sram;
+};
 
 struct nvgpu_clk_arb {
-	struct mutex req_lock;
-	struct mutex users_lock;
+	spinlock_t sessions_lock;
+	spinlock_t users_lock;
+	spinlock_t req_lock;
+
 	struct list_head users;
+	struct list_head sessions;
 	struct list_head requests;
 
-	u64 gpc2clk_current_hz;
-	u64 gpc2clk_target_hz;
-	u64 gpc2clk_default_hz;
-	u64 mclk_current_hz;
-	u64 mclk_target_hz;
-	u64 mclk_default_hz;
-	atomic_t usercount;
+	struct gk20a *g;
+	spinlock_t data_lock;
+	spinlock_t vf_lock;
+
+	u16 gpc2clk_actual_mhz;
+	u16 gpc2clk_default_mhz;
+
+	u16 mclk_actual_mhz;
+	u16 mclk_default_mhz;
+	u32 voltuv_actual;
+
 	struct work_struct update_fn_work;
+	struct work_struct vftable_fn_work;
+	wait_queue_head_t vftable_wq;
+
+	u16 *mclk_f_points;
+	bool vftable_set;
+
+	struct nvgpu_clk_vf_point *mclk_vf_points;
+	u32 mclk_f_numpoints;
+	u16 *gpc2clk_f_points;
+	u32 gpc2clk_f_numpoints;
+	struct nvgpu_clk_vf_point *gpc2clk_vf_points;
+
+#ifdef CONFIG_DEBUG_FS
+	struct mutex debug_lock;
+	s64 switch_max;
+	s64 switch_min;
+	u64 switch_num;
+	s64 switch_avg;
+	s64 switch_std;
+	bool debugfs_set;
+#endif
 };
 
 
@@ -51,15 +95,20 @@ struct nvgpu_clk_dev {
 	struct list_head link;
 	wait_queue_head_t readout_wq;
 	atomic_t poll_mask;
+	u16 gpc2clk_target_mhz;
+	u16 mclk_target_mhz;
 };
 
 struct nvgpu_clk_session {
 	bool zombie;
 	struct gk20a *g;
 	struct kref refcount;
+	struct list_head link;
+	struct list_head targets;
 
-	u64 gpc2clk_target_hz;
-	u64 mclk_target_hz;
+	spinlock_t target_lock;
+	u16 gpc2clk_target_mhz;
+	u16 mclk_target_mhz;
 };
 
 static const struct file_operations completion_dev_ops = {
@@ -77,7 +126,7 @@ static const struct file_operations event_dev_ops = {
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 {
 	struct nvgpu_clk_arb *arb;
-	u64 default_hz;
+	u16 default_mhz;
 	int err;
 
 	gk20a_dbg_fn("");
@@ -86,39 +135,104 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 		return 0;
 
 	arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL);
-	if (!arb)
-		return -ENOMEM;
+	if (!arb) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_f_numpoints = MAX_F_POINTS;
+	arb->mclk_f_numpoints = MAX_F_POINTS;
+
+	arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
+	if (!arb->gpc2clk_f_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
+	if (!arb->mclk_f_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS,
+		sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
+	if (!arb->gpc2clk_vf_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->mclk_vf_points = kcalloc(MAX_F_POINTS,
+		sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
+	if (!arb->mclk_vf_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
 
 	g->clk_arb = arb;
+	arb->g = g;
 
-	mutex_init(&arb->req_lock);
-	mutex_init(&arb->users_lock);
+	spin_lock_init(&arb->sessions_lock);
+	spin_lock_init(&arb->users_lock);
+	spin_lock_init(&arb->req_lock);
+	spin_lock_init(&arb->data_lock);
+	spin_lock_init(&arb->vf_lock);
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
-			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz);
-	if (err)
-		return -EINVAL;
+			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
+	if (err) {
+		err = -EINVAL;
+		goto init_fail;
+	}
 
-	arb->mclk_target_hz = default_hz;
-	arb->mclk_current_hz = default_hz;
-	arb->mclk_default_hz = default_hz;
+	arb->mclk_default_mhz = default_mhz;
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
-			NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz);
-	if (err)
-		return -EINVAL;
-
-	arb->gpc2clk_target_hz = default_hz;
-	arb->gpc2clk_current_hz = default_hz;
-	arb->gpc2clk_default_hz = default_hz;
+			NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz);
+	if (err) {
+		err = -EINVAL;
+		goto init_fail;
+	}
 
-	atomic_set(&arb->usercount, 0);
+	arb->gpc2clk_default_mhz = default_mhz;
 
 	INIT_LIST_HEAD(&arb->users);
+	INIT_LIST_HEAD(&arb->sessions);
 	INIT_LIST_HEAD(&arb->requests);
+
+	init_waitqueue_head(&arb->vftable_wq);
+
+	INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb);
+
 	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
 
+#ifdef CONFIG_DEBUG_FS
+	mutex_init(&arb->debug_lock);
+	if (!arb->debugfs_set) {
+		if (nvgpu_clk_arb_debugfs_init(g))
+			arb->debugfs_set = true;
+	}
+#endif
+	err = nvgpu_clk_arb_update_vftable(arb);
+	if (err < 0)
+		goto init_fail;
+
+	/* Schedule first run */
+	schedule_work(&arb->update_fn_work);
+
 	return 0;
+
+init_fail:
+
+	kfree(arb->gpc2clk_f_points);
+	kfree(arb->gpc2clk_vf_points);
+
+	kfree(arb->mclk_f_points);
+	kfree(arb->mclk_vf_points);
+
+	kfree(arb);
+
+	return err;
 }
 
 void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -170,6 +284,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
 fail:
 	kfree(dev);
 	put_unused_fd(fd);
+
 	return err;
 }
 
@@ -190,12 +305,16 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	session->g = g;
 
 	kref_init(&session->refcount);
-
-	atomic_inc(&arb->usercount);
+	spin_lock_init(&session->target_lock);
 
 	session->zombie = false;
-	session->mclk_target_hz = arb->mclk_default_hz;
-	session->gpc2clk_target_hz = arb->gpc2clk_default_hz;
+	session->mclk_target_mhz = arb->mclk_default_mhz;
+	session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
+	INIT_LIST_HEAD(&session->targets);
+
+	spin_lock(&arb->sessions_lock);
+	list_add_tail(&session->link, &arb->sessions);
+	spin_unlock(&arb->sessions_lock);
 
 	*_session = session;
 
@@ -206,8 +325,15 @@ void nvgpu_clk_arb_free_session(struct kref *refcount)
 {
 	struct nvgpu_clk_session *session = container_of(refcount,
 			struct nvgpu_clk_session, refcount);
+	struct nvgpu_clk_arb *arb = session->g->clk_arb;
 
+	gk20a_dbg_fn("");
+
+	spin_lock(&arb->sessions_lock);
+	list_del(&session->link);
+	spin_unlock(&arb->sessions_lock);
 	kfree(session);
+;
 }
 
 void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -215,12 +341,12 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 
+	gk20a_dbg_fn("");
+
 	session->zombie = true;
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
 
-	/* schedule arbiter if no more user */
-	if (!atomic_dec_and_test(&arb->usercount))
-		schedule_work(&arb->update_fn_work);
+	schedule_work(&arb->update_fn_work);
 }
 
 int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -230,19 +356,155 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 	struct nvgpu_clk_dev *dev;
 	int fd;
 
+	gk20a_dbg_fn("");
+
 	fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
 	if (fd < 0)
 		return fd;
 
-	mutex_lock(&arb->users_lock);
+	spin_lock(&arb->users_lock);
 	list_add_tail(&dev->link, &arb->users);
-	mutex_unlock(&arb->users_lock);
+	spin_unlock(&arb->users_lock);
 
 	*event_fd = fd;
 
 	return 0;
 }
 
+int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *request_fd)
+{
+	struct nvgpu_clk_dev *dev;
+	int fd;
+
+	gk20a_dbg_fn("");
+
+	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
+	if (fd < 0)
+		return fd;
+
+	*request_fd = fd;
+
+	return 0;
+}
+
+static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+
+	int i;
+	int status = 0;
+	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
+	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
+
+	/* the flag must be visible in all threads */
+	mb();
+	ACCESS_ONCE(arb->vftable_set) = false;
+
+	spin_lock(&arb->vf_lock);
+
+	if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
+		&arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to fetch GPC2CLK frequency points");
+		goto exit_vftable;
+	}
+	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
+		&arb->mclk_f_numpoints, arb->mclk_f_points) < 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to fetch MCLK frequency points");
+		goto exit_vftable;
+	}
+
+
+	memset(arb->mclk_vf_points, 0,
+		arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
+	memset(arb->gpc2clk_vf_points, 0,
+		arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
+
+	for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
+		arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
+		mclk_voltuv = mclk_voltuv_sram = 0;
+
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+			&arb->mclk_vf_points[i].mhz, &mclk_voltuv,
+			CTRL_VOLT_DOMAIN_LOGIC);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get MCLK LOGIC voltage");
+			goto exit_vftable;
+		}
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+			&arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
+			CTRL_VOLT_DOMAIN_SRAM);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get MCLK SRAM voltage");
+			goto exit_vftable;
+		}
+
+		arb->mclk_vf_points[i].uvolt = mclk_voltuv;
+		arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
+	}
+
+	for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) {
+		arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i];
+		gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
+
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			&arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv,
+			CTRL_VOLT_DOMAIN_LOGIC);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get GPC2CLK LOGIC voltage");
+			goto exit_vftable;
+		}
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			&arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
+			CTRL_VOLT_DOMAIN_SRAM);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get GPC2CLK SRAM voltage");
+			goto exit_vftable;
+		}
+
+		arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
+		arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
+
+	}
+
+	/* make flag visible when all data has resolved in the tables */
+	wmb();
+	ACCESS_ONCE(arb->vftable_set) = true;
+
+	wake_up(&arb->vftable_wq);
+exit_vftable:
+
+	spin_unlock(&arb->vf_lock);
+
+	return status;
+}
+
+void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	ACCESS_ONCE(arb->vftable_set) = false;
+	/* Disable the flag in case arbiter gets scheduled first */
+	mb();
+
+	schedule_work(&arb->vftable_fn_work);
+	schedule_work(&arb->update_fn_work);
+}
+
+static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work)
+{
+	struct nvgpu_clk_arb *arb =
+		container_of(work, struct nvgpu_clk_arb, update_fn_work);
+
+	nvgpu_clk_arb_update_vftable(arb);
+}
+
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 {
 	struct nvgpu_clk_arb *arb =
@@ -250,67 +512,270 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	struct nvgpu_clk_session *session;
 	struct nvgpu_clk_dev *dev;
 	struct nvgpu_clk_dev *tmp;
+	struct gk20a *g = arb->g;
+
+	struct change_fll_clk fllclk;
+	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
+	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
+
+	u32 voltuv, voltuv_sram;
+
+	int status;
+
+	/* Temporary variables for checking target frequency */
+	u16 gpc2clk_target, mclk_target;
 
-	mutex_lock(&arb->req_lock);
+	/* iteration index */
+	u32 index;
 
-	arb->mclk_target_hz = arb->mclk_default_hz;
-	arb->gpc2clk_target_hz = arb->gpc2clk_default_hz;
+#ifdef CONFIG_DEBUG_FS
+	u64 t0, t1;
+#endif
 
-	list_for_each_entry(dev, &arb->requests, link) {
-		session = dev->session;
+	gk20a_dbg_fn("");
+
+#ifdef CONFIG_DEBUG_FS
+	g->ops.read_ptimer(g, &t0);
+#endif
+
+	/* Only one arbiter should be running */
+	gpc2clk_target = 0;
+	mclk_target = 0;
+
+	spin_lock(&arb->sessions_lock);
+	list_for_each_entry(session, &arb->sessions, link) {
 		if (!session->zombie) {
-			/* TODO: arbiter policy. For now last request wins */
+			spin_lock(&arb->req_lock);
+			spin_lock(&session->target_lock);
+
+			mclk_target = mclk_target > session->mclk_target_mhz ?
+				mclk_target : session->mclk_target_mhz;
+
+			gpc2clk_target =
+				gpc2clk_target > session->gpc2clk_target_mhz ?
+				gpc2clk_target : session->gpc2clk_target_mhz;
+			/* Move processed requests to notification list*/
+			list_for_each_entry_safe(dev, tmp, &session->targets,
+						link) {
+				list_del_init(&dev->link);
+				list_add_tail(&dev->link, &arb->requests);
+			}
+			spin_unlock(&session->target_lock);
+			spin_unlock(&arb->req_lock);
+
+		}
+	}
+	spin_unlock(&arb->sessions_lock);
+
+	gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
+		arb->gpc2clk_actual_mhz ? gpc2clk_target :
+		arb->gpc2clk_default_mhz;
 
-			arb->mclk_target_hz = session->mclk_target_hz;
-			arb->gpc2clk_target_hz = session->gpc2clk_target_hz;
+	mclk_target = (mclk_target > 0) ? mclk_target :
+		arb->mclk_actual_mhz ? mclk_target :
+		arb->mclk_default_mhz;
+
+	if (!gpc2clk_target && !mclk_target) {
+		mclk_target = arb->mclk_default_mhz;
+		gpc2clk_target = arb->gpc2clk_default_mhz;
+	}
+
+	if (!gpc2clk_target)
+		gpc2clk_target = arb->gpc2clk_actual_mhz;
+
+	do {
+		/* Check that the table is set */
+		mb();
+		wait_event(arb->vftable_wq, arb->vftable_set);
+	} while (!ACCESS_ONCE(arb->vftable_set));
+
+	spin_lock(&arb->vf_lock);
+	/* round up the freq requests */
+	for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
+		if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
+			gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
+			gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
+			gpc2clk_voltuv_sram =
+				arb->gpc2clk_vf_points[index].uvolt_sram;
+			break;
 		}
 	}
 
-	/* TODO: loop up higher or equal VF points */
+	if (index == arb->gpc2clk_f_numpoints) {
+		gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
+		gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
+		gpc2clk_voltuv_sram =
+			arb->gpc2clk_vf_points[index].uvolt_sram;
+	}
+
+	if (!mclk_target)
+		mclk_target = arb->mclk_actual_mhz;
+
+	for (index = 0; index < arb->mclk_f_numpoints; index++) {
+		if (arb->mclk_vf_points[index].mhz >= mclk_target) {
+			mclk_target = arb->mclk_vf_points[index].mhz;
+			mclk_voltuv = arb->mclk_vf_points[index].uvolt;
+			mclk_voltuv_sram =
+				arb->mclk_vf_points[index].uvolt_sram;
+			break;
+		}
+	}
+	if (index == arb->mclk_f_numpoints) {
+		mclk_target = arb->mclk_vf_points[index].mhz;
+		mclk_voltuv = arb->mclk_vf_points[index].uvolt;
+		mclk_voltuv_sram =
+			arb->mclk_vf_points[index].uvolt_sram;
+	}
+	spin_unlock(&arb->vf_lock);
+
+	/* Program clocks */
+	/* A change in both mclk of gpc2clk may require a change in voltage */
+	if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
+		(arb->mclk_actual_mhz == mclk_target)) {
+		goto exit_arb;
+	}
+
+	voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
+	voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
+		gpc2clk_voltuv_sram : mclk_voltuv_sram;
+
+	/* if voltage ascends we do:
+	 * (1) FLL change
+	 * (2) Voltage change
+	 * (3) MCLK change
+	 * If it goes down
+	 * (1) MCLK change
+	 * (2) Voltage change
+	 * (3) FLL change
+	 */
+
+	/* descending */
+	if (voltuv <= arb->voltuv_actual) {
+		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
+		if (status < 0)
+			goto exit_arb;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			goto exit_arb;
+
+		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		fllclk.clkmhz = gpc2clk_target;
+		fllclk.voltuv = voltuv;
+		status = clk_program_fll_clks(g, &fllclk);
+		if (status < 0)
+			goto exit_arb;
+	} else {
+		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		fllclk.clkmhz = gpc2clk_target;
+		fllclk.voltuv = voltuv;
+		status = clk_program_fll_clks(g, &fllclk);
+		if (status < 0)
+			goto exit_arb;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			goto exit_arb;
+
+		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
+		if (status < 0)
+			goto exit_arb;
+	}
+
+	spin_lock(&arb->data_lock);
+	arb->gpc2clk_actual_mhz = gpc2clk_target;
+	arb->mclk_actual_mhz = mclk_target;
+	arb->voltuv_actual = voltuv;
+	/* Make changes visible to other threads */
+	wmb();
+
+	spin_unlock(&arb->data_lock);
+
+#ifdef CONFIG_DEBUG_FS
+	g->ops.read_ptimer(g, &t1);
+	arb->switch_num++;
+
+	mutex_lock(&arb->debug_lock);
+	if (arb->switch_num == 1) {
+		arb->switch_max = arb->switch_min =
+			arb->switch_avg = (t1-t0)/1000;
+		arb->switch_std = 0;
+	} else {
+		s64 prev_avg;
+		u64 curr = (t1-t0)/1000;
+
+		arb->switch_max = curr > arb->switch_max ?
+			curr : arb->switch_max;
+		arb->switch_min = arb->switch_min ?
+			(curr < arb->switch_min ?
+				curr : arb->switch_min) : curr;
+		prev_avg = arb->switch_avg;
+		arb->switch_avg = (curr +
+			(arb->switch_avg * (arb->switch_num-1))) /
+			arb->switch_num;
+		arb->switch_std +=
+			(curr - arb->switch_avg) * (curr - prev_avg);
+	}
+	mutex_unlock(&arb->debug_lock);
 
-	arb->mclk_current_hz = arb->mclk_target_hz;
-	arb->gpc2clk_current_hz = arb->gpc2clk_target_hz;
+#endif
 
-	/* TODO: actually program the clocks */
+exit_arb:
 
+	spin_lock(&arb->req_lock);
 	/* notify completion for all requests */
 	list_for_each_entry_safe(dev, tmp, &arb->requests, link) {
 		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
 		list_del_init(&dev->link);
 	}
-	mutex_unlock(&arb->req_lock);
+	spin_unlock(&arb->req_lock);
 
 	/* notify event for all users */
-	mutex_lock(&arb->users_lock);
+	spin_lock(&arb->users_lock);
 	list_for_each_entry(dev, &arb->users, link) {
 		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
 	}
-	mutex_unlock(&arb->users_lock);
-
+	spin_unlock(&arb->users_lock);
 }
 
-int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
-		struct nvgpu_clk_session *session, int *completion_fd)
+int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int request_fd)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 	struct nvgpu_clk_dev *dev;
-	int fd;
+	struct fd fd;
+	int err = 0;
 
-	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
-	if (fd < 0)
-		return fd;
+	gk20a_dbg_fn("");
+
+	fd  = fdget(request_fd);
+
+	if (!fd.file)
+		return -EINVAL;
+
+	dev = (struct nvgpu_clk_dev *) fd.file->private_data;
 
-	*completion_fd = fd;
+	if (!dev || dev->session != session) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
+	spin_lock(&session->target_lock);
+	session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz :
+		session->mclk_target_mhz;
+	session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
+		dev->gpc2clk_target_mhz :
+		session->gpc2clk_target_mhz;
 
-	mutex_lock(&arb->req_lock);
-	list_add_tail(&dev->link, &arb->requests);
-	mutex_unlock(&arb->req_lock);
+	list_add_tail(&dev->link, &session->targets);
+	spin_unlock(&session->target_lock);
 
 	schedule_work(&arb->update_fn_work);
 
-	return 0;
+fdput_fd:
+	fdput(fd);
+	return err;
 }
 
 static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
@@ -328,11 +793,22 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
 {
 	struct nvgpu_clk_dev *dev = filp->private_data;
 	struct nvgpu_clk_session *session = dev->session;
+	struct nvgpu_clk_arb *arb;
+
+	arb = session->g->clk_arb;
 
 	gk20a_dbg_fn("");
 
+	spin_lock(&arb->req_lock);
+	spin_lock(&session->target_lock);
+	if (!list_empty(&dev->link))
+		list_del_init(&dev->link);
+	spin_unlock(&session->target_lock);
+	spin_unlock(&arb->req_lock);
+
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
 	kfree(dev);
+
 	return 0;
 }
 
@@ -341,94 +817,123 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
 {
 	struct nvgpu_clk_dev *dev = filp->private_data;
 	struct nvgpu_clk_session *session = dev->session;
-	struct nvgpu_clk_arb *arb = session->g->clk_arb;
+	struct nvgpu_clk_arb *arb;
+
+	arb = session->g->clk_arb;
 
 	gk20a_dbg_fn("");
 
-	mutex_lock(&arb->users_lock);
-	list_del_init(&dev->link);
-	mutex_unlock(&arb->users_lock);
+	spin_lock(&arb->users_lock);
+	list_del(&dev->link);
+	spin_unlock(&arb->users_lock);
 
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
 	kfree(dev);
+
 	return 0;
 }
 
-int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session,
-		u32 api_domain, u64 target_hz)
+int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
+		int request_fd, u32 api_domain, u16 target_mhz)
 {
+	struct nvgpu_clk_dev *dev;
+	struct fd fd;
+	int err = 0;
 
-	gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz);
+	gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
+
+	fd = fdget(request_fd);
+
+	if (!fd.file)
+		return -EINVAL;
+
+	dev = fd.file->private_data;
+	if (!dev || dev->session != session) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
 
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		session->mclk_target_hz = target_hz;
-		return 0;
+		dev->mclk_target_mhz = target_mhz;
+		break;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		session->gpc2clk_target_hz = target_hz;
-		return 0;
+		dev->gpc2clk_target_mhz = target_mhz;
+		break;
 
 	default:
-		return -EINVAL;
+		err = -EINVAL;
 	}
+
+fdput_fd:
+	fdput(fd);
+	return err;
 }
 
-int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session,
-		u32 api_domain, u64 *freq_hz)
+int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 *freq_mhz)
 {
+	int err = 0;
+
+	spin_lock(&session->target_lock);
+
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*freq_hz = session->mclk_target_hz;
-		return 0;
+		*freq_mhz = session->mclk_target_mhz;
+		break;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*freq_hz = session->gpc2clk_target_hz;
-		return 0;
+		*freq_mhz = session->gpc2clk_target_mhz;
+		break;
 
 	default:
-		*freq_hz = 0;
-		return -EINVAL;
+		*freq_mhz = 0;
+		err = -EINVAL;
 	}
+
+	spin_unlock(&session->target_lock);
+	return err;
 }
 
-int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g,
-		u32 api_domain, u64 *freq_hz)
+int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
+		u32 api_domain, u16 *freq_mhz)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 	int err = 0;
 
-	mutex_lock(&arb->req_lock);
+	spin_lock(&arb->data_lock);
+
 	switch (api_domain) {
 	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*freq_hz = arb->mclk_current_hz;
+		*freq_mhz = arb->mclk_actual_mhz;
 		break;
 
 	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*freq_hz = arb->gpc2clk_current_hz;
+		*freq_mhz = arb->gpc2clk_actual_mhz;
 		break;
 
 	default:
-		*freq_hz = 0;
+		*freq_mhz = 0;
 		err = -EINVAL;
 	}
-	mutex_unlock(&arb->req_lock);
 
+	spin_unlock(&arb->data_lock);
 	return err;
 }
 
-int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g,
-		u32 api_domain, u64 *freq_hz)
+int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
+		u32 api_domain, u16 *freq_mhz)
 {
 	/* TODO: measure clocks from counters */
-	return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz);
+	return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz);
 }
 
 int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u64 *min_hz, u64 *max_hz)
+		u16 *min_mhz, u16 *max_mhz)
 {
 	return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain,
-			min_hz, max_hz);
+			min_mhz, max_mhz);
 }
 
 u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
@@ -441,3 +946,67 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 {
 	return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
 }
+
+#ifdef CONFIG_DEBUG_FS
+static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	u64 num;
+	s64 tmp, avg, std, max, min;
+
+	/* Make copy of structure to reduce time with lock held */
+	mutex_lock(&arb->debug_lock);
+	std = arb->switch_std;
+	avg = arb->switch_avg;
+	max = arb->switch_max;
+	min = arb->switch_min;
+	num = arb->switch_num;
+	mutex_unlock(&arb->debug_lock);
+
+	tmp = std;
+	do_div(tmp, num);
+	seq_printf(s, "Number of transitions: %lld\n",
+		num);
+	seq_printf(s, "max / min : %lld / %lld usec\n",
+		max, min);
+	seq_printf(s, "avg / std : %lld / %ld usec\n",
+		avg, int_sqrt(tmp));
+
+	return 0;
+}
+
+static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
+}
+
+static const struct file_operations nvgpu_clk_arb_stats_fops = {
+	.open		= nvgpu_clk_arb_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	struct dentry *gpu_root = platform->debugfs;
+	struct dentry *d;
+
+	gk20a_dbg(gpu_dbg_info, "g=%p", g);
+
+	d = debugfs_create_file(
+			"arb_stats",
+			S_IRUGO,
+			gpu_root,
+			g,
+			&nvgpu_clk_arb_stats_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 95749369..717cca9b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -22,13 +22,13 @@ struct nvgpu_clk_session;
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
 
 int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u64 *min_hz, u64 *max_hz);
+		u16 *min_mhz, u16 *max_mhz);
 
-int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g,
-		u32 api_domain, u64 *actual_hz);
+int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
+		u32 api_domain, u16 *actual_mhz);
 
-int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g,
-		u32 api_domain, u64 *actual_hz);
+int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
+		u32 api_domain, u16 *effective_mhz);
 
 int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 	u32 api_domain, u32 *max_points, u16 *fpoints);
@@ -46,19 +46,21 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 void nvgpu_clk_arb_release_session(struct gk20a *g,
 		struct nvgpu_clk_session *session);
 
-int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g,
-	struct nvgpu_clk_session *session, int *completion_fd);
+int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int request_fd);
 
-int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session,
-		u32 api_domain, u64 target_hz);
+int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
+		int fd, u32 api_domain, u16 target_mhz);
 
-int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session,
-		u32 api_domain, u64 *target_hz);
+int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 *target_mhz);
 
 int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 	struct nvgpu_clk_session *session, int *event_fd);
 
+int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *event_fd);
 
-
+void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g);
 #endif /* _CLK_ARB_H_ */
 
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 86f4ff6d..6ad6c054 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2222,7 +2222,7 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 	return 0;
 }
 
-int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
+int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 {
 	struct clk_mclk_state *mclk;
 	struct pmu_payload payload = { {0} };
@@ -2236,6 +2236,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
 #ifdef CONFIG_DEBUG_FS
 	u64 t0, t1;
 #endif
+	enum gk20a_mclk_speed speed;
 
 	gk20a_dbg_info("");
 
@@ -2246,6 +2247,13 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed)
 	if (!mclk->init)
 		goto exit_status;
 
+	/* TODO thia should be done according to VBIOS tables */
+
+	speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
+		(val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
+						gk20a_mclk_high_speed;
+
+
 	if (speed == mclk->speed)
 		goto exit_status;
 
@@ -2374,20 +2382,13 @@ exit_status:
 #ifdef CONFIG_DEBUG_FS
 static int mclk_debug_speed_set(void *data, u64 val)
 {
-	enum gk20a_mclk_speed speed;
 	struct gk20a *g = (struct gk20a *) data;
 	struct clk_mclk_state *mclk;
 
 	mclk = &g->clk_pmu.clk_mclk;
 
-	/* TODO thia should be done according to VBIOS tables */
-
-	speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
-		(val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
-						gk20a_mclk_high_speed;
-
 	if (mclk->change)
-		return mclk->change(g, speed);
+		return mclk->change(g, (u16) val);
 	return 0;
 
 }
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index 9d193c96..e3e6c1ee 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -22,9 +22,12 @@ enum gk20a_mclk_speed {
 	gk20a_mclk_high_speed,
 };
 
-#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed
 #define MCLK_LOW_SPEED_LIMIT 405
 #define MCLK_MID_SPEED_LIMIT 810
+#define MCLK_HIGH_SPEED_LIMIT 3003
+
+#define DEFAULT_BOOT_MCLK_SPEED MCLK_HIGH_SPEED_LIMIT
+
 struct clk_mclk_state {
 	enum gk20a_mclk_speed speed;
 	struct mutex mclk_mutex;
@@ -32,7 +35,7 @@ struct clk_mclk_state {
 	bool init;
 
 	/* function pointers */
-	int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed);
+	int (*change)(struct gk20a *g, u16 val);
 
 #ifdef CONFIG_DEBUG_FS
 	s64 switch_max;
@@ -45,7 +48,6 @@ struct clk_mclk_state {
 };
 
 int clk_mclkseq_init_mclk_gddr5(struct gk20a *g);
-int clk_mclkseq_change_mclk_gddr5(struct gk20a *g,
-	enum gk20a_mclk_speed speed);
+int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val);
 
 #endif
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
index 112cb588..d1cbb32b 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -23,7 +23,7 @@ static u32 gp106_get_arbiter_clk_domains(struct gk20a *g)
 }
 
 static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
-		u64 *min_hz, u64 *max_hz)
+		u16 *min_mhz, u16 *max_mhz)
 {
 	enum nv_pmu_clk_clkwhich clkwhich;
 	struct clk_set_info *p0_info;
@@ -52,14 +52,14 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
 	if (!p0_info)
 		return -EINVAL;
 
-	*min_hz = (u64)(p5_info->min_mhz) * (u64)MHZ;
-	*max_hz = (u64)(p0_info->max_mhz) * (u64)MHZ;
+	*min_mhz = p5_info->min_mhz;
+	*max_mhz = p0_info->max_mhz;
 
 	return 0;
 }
 
 static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
-		u64 *default_hz)
+		u16 *default_mhz)
 {
 	enum nv_pmu_clk_clkwhich clkwhich;
 	struct clk_set_info *p0_info;
@@ -82,7 +82,7 @@ static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
 	if (!p0_info)
 		return -EINVAL;
 
-	*default_hz = (u64)p0_info->max_mhz * (u64)MHZ;
+	*default_mhz = p0_info->max_mhz;
 
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 0dc15201..f01b52ad 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -153,10 +153,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 	if (err)
 		return err;
 
-	err = clk_set_boot_fll_clk(g);
-	if (err)
-		return err;
-
 	err = pmgr_domain_pmu_setup(g);
 	return err;
 }
-- 
cgit v1.2.2


From 1cf7baa7fde73ae1e3f28a115dfe7bc28bc5cc75 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Wed, 12 Oct 2016 14:53:29 -0700
Subject: nvgpu: gpu: Use pstates for MCLK range

JIRA DNVGPU-168

Change-Id: I7ac05dca745b22b411fc0aa797969b97536dd2e6
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1239466
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1267121
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_mclk.c | 46 ++++++++++++++++++++++++++++++----------
 drivers/gpu/nvgpu/clk/clk_mclk.h | 13 ++++++------
 2 files changed, 41 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c
index 6ad6c054..06ff9082 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.c
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.c
@@ -2182,12 +2182,16 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 {
 	struct clk_mclk_state *mclk;
 	int status;
+	struct clk_set_info *p5_info;
+	struct clk_set_info *p0_info;
+
 
 	gk20a_dbg_fn("");
 
 	mclk = &g->clk_pmu.clk_mclk;
 
-	mutex_init(&mclk->mclk_mutex);
+	mutex_init(&mclk->mclk_lock);
+	mutex_init(&mclk->data_lock);
 
 	/* FBPA gain WAR */
 	gk20a_writel(g, fb_fbpa_fbio_iref_byte_rx_ctrl_r(), 0x22222222);
@@ -2202,7 +2206,23 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g)
 	/* Load RAM pattern */
 	mclk_memory_load_training_pattern(g);
 
-	mclk->vreg_buf = kzalloc((sizeof(u32) * VREG_COUNT), GFP_KERNEL);
+	p5_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P5, clkwhich_mclk);
+	if (!p5_info)
+		return -EINVAL;
+
+	p0_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich_mclk);
+	if (!p0_info)
+		return -EINVAL;
+
+
+	mclk->p5_min = p5_info->min_mhz;
+	mclk->p0_min = p0_info->min_mhz;
+
+
+	mclk->vreg_buf = kcalloc(VREG_COUNT,
+		sizeof(u32), GFP_KERNEL);
 	if (!mclk->vreg_buf) {
 		gk20a_err(dev_from_gk20a(g),
 				"unable to allocate memory for VREG");
@@ -2242,15 +2262,13 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 
 	mclk = &g->clk_pmu.clk_mclk;
 
-	mutex_lock(&mclk->mclk_mutex);
+	mutex_lock(&mclk->mclk_lock);
 
 	if (!mclk->init)
 		goto exit_status;
 
-	/* TODO thia should be done according to VBIOS tables */
-
-	speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed :
-		(val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed :
+	speed = (val < mclk->p5_min) ? gk20a_mclk_low_speed :
+		(val < mclk->p0_min) ? gk20a_mclk_mid_speed :
 						gk20a_mclk_high_speed;
 
 
@@ -2341,7 +2359,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 			&seq_completion_status, 0);
 	if (seq_completion_status != 0) {
 		gk20a_err(dev_from_gk20a(g),
-			"seq_scrip update failed");
+			"seq_script update failed");
 		status = -EBUSY;
 		goto exit_status;
 	}
@@ -2350,6 +2368,8 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 
 #ifdef CONFIG_DEBUG_FS
 	g->ops.read_ptimer(g, &t1);
+
+	mutex_lock(&mclk->data_lock);
 	mclk->switch_num++;
 
 	if (mclk->switch_num == 1) {
@@ -2372,10 +2392,11 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val)
 		mclk->switch_std +=
 			(curr - mclk->switch_avg) * (curr - prev_avg);
 	}
+	mutex_unlock(&mclk->data_lock);
 #endif
 exit_status:
 
-	mutex_unlock(&mclk->mclk_mutex);
+	mutex_unlock(&mclk->mclk_lock);
 	return status;
 }
 
@@ -2387,6 +2408,9 @@ static int mclk_debug_speed_set(void *data, u64 val)
 
 	mclk = &g->clk_pmu.clk_mclk;
 
+	/* This is problematic because it can interrupt the arbiter
+	 * and send it to sleep. we need to consider removing this
+	 */
 	if (mclk->change)
 		return mclk->change(g, (u16) val);
 	return 0;
@@ -2410,13 +2434,13 @@ static int mclk_switch_stats_show(struct seq_file *s, void *unused)
 	mclk = &g->clk_pmu.clk_mclk;
 
 	/* Make copy of structure to reduce time with lock held */
-	mutex_lock(&mclk->mclk_mutex);
+	mutex_lock(&mclk->data_lock);
 	std = mclk->switch_std;
 	avg = mclk->switch_avg;
 	max = mclk->switch_max;
 	min = mclk->switch_min;
 	num = mclk->switch_num;
-	mutex_unlock(&mclk->mclk_mutex);
+	mutex_unlock(&mclk->data_lock);
 
 	tmp = std;
 	do_div(tmp, num);
diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h
index e3e6c1ee..cb7f0de0 100644
--- a/drivers/gpu/nvgpu/clk/clk_mclk.h
+++ b/drivers/gpu/nvgpu/clk/clk_mclk.h
@@ -22,15 +22,14 @@ enum gk20a_mclk_speed {
 	gk20a_mclk_high_speed,
 };
 
-#define MCLK_LOW_SPEED_LIMIT 405
-#define MCLK_MID_SPEED_LIMIT 810
-#define MCLK_HIGH_SPEED_LIMIT 3003
-
-#define DEFAULT_BOOT_MCLK_SPEED MCLK_HIGH_SPEED_LIMIT
-
 struct clk_mclk_state {
 	enum gk20a_mclk_speed speed;
-	struct mutex mclk_mutex;
+	struct mutex mclk_lock;
+	struct mutex data_lock;
+
+	u16 p5_min;
+	u16 p0_min;
+
 	void *vreg_buf;
 	bool init;
 
-- 
cgit v1.2.2


From c123d5056d3af043e2376fa0c5429db7deb3b31b Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Wed, 12 Oct 2016 14:54:37 -0700
Subject: gpu: nvgpu: gpu: read effective frequence from counter

(1) modified counters debug code to export to the driver
(2) modified arbiter to read from those functions
(3) modified counter for higher accuracy on MHz range

JIRA DNVGPU-164

Change-Id: I2bbf7c9be4dc59718d1d91c53028a39020b5aea5
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1239467
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1268008
---
 drivers/gpu/nvgpu/clk/clk_arb.c     |  5 ++--
 drivers/gpu/nvgpu/gp106/clk_gp106.c | 59 ++++++++++++++++++++++++++++++-------
 drivers/gpu/nvgpu/gp106/clk_gp106.h |  7 +++--
 3 files changed, 56 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index f868100b..1f7c2aea 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -925,8 +925,9 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
 int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
 		u32 api_domain, u16 *freq_mhz)
 {
-	/* TODO: measure clocks from counters */
-	return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz);
+
+	*freq_mhz = g->ops.clk.get_rate(g, api_domain);
+	return 0;
 }
 
 int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 85dde69f..2a32690d 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -39,10 +39,37 @@ static int clk_gp106_debugfs_init(struct gk20a *g);
 #define NUM_NAMEMAPS	4
 #define XTAL4X_KHZ 108000
 
+
+static u32 gp106_get_rate_cntr(struct gk20a *g, struct namemap_cfg *);
+static u16 gp106_clk_get_rate(struct gk20a *g, u32 api_domain);
 static u32 gp106_crystal_clk_hz(struct gk20a *g)
 {
 	return (XTAL4X_KHZ * 1000);
 }
+
+static u16 gp106_clk_get_rate(struct gk20a *g, u32 api_domain)
+{
+	struct clk_gk20a *clk = &g->clk;
+	u32 freq_khz;
+	int i;
+	struct namemap_cfg *c = NULL;
+
+	for (i = 0; i < clk->namemap_num; i++) {
+		if (api_domain == clk->namemap_xlat_table[i]) {
+			c = &clk->clk_namemap[i];
+			break;
+		}
+	}
+
+	if (!c)
+		return 0;
+
+	freq_khz = c->is_counter ? c->scale * gp106_get_rate_cntr(g, c) :
+		0; /* TODO: PLL read */
+
+	return (u16) freq_khz/1000;
+}
+
 static int gp106_init_clk_support(struct gk20a *g) {
 	struct clk_gk20a *clk = &g->clk;
 	u32 err = 0;
@@ -57,6 +84,14 @@ static int gp106_init_clk_support(struct gk20a *g) {
 	if (!clk->clk_namemap)
 		return -ENOMEM;
 
+	clk->namemap_xlat_table = kcalloc(NUM_NAMEMAPS, sizeof(u32),
+		GFP_KERNEL);
+
+	if (!clk->namemap_xlat_table) {
+		kfree(clk->clk_namemap);
+		return -ENOMEM;
+	}
+
 	clk->clk_namemap[0] = (struct namemap_cfg) {
 		.namemap = CLK_NAMEMAP_INDEX_GPC2CLK,
 		.is_enable = 1,
@@ -66,8 +101,10 @@ static int gp106_init_clk_support(struct gk20a *g) {
 		.cntr.reg_ctrl_idx  =
 			trim_gpc_bcast_clk_cntr_ncgpcclk_cfg_source_gpc2clk_f(),
 		.cntr.reg_cntr_addr = trim_gpc_bcast_clk_cntr_ncgpcclk_cnt_r(),
-		.name = "gpc2clk"
+		.name = "gpc2clk",
+		.scale = 1
 	};
+	clk->namemap_xlat_table[0] = CTRL_CLK_DOMAIN_GPC2CLK;
 	clk->clk_namemap[1] = (struct namemap_cfg) {
 		.namemap = CLK_NAMEMAP_INDEX_SYS2CLK,
 		.is_enable = 1,
@@ -76,8 +113,10 @@ static int gp106_init_clk_support(struct gk20a *g) {
 		.cntr.reg_ctrl_addr = trim_sys_clk_cntr_ncsyspll_cfg_r(),
 		.cntr.reg_ctrl_idx  = trim_sys_clk_cntr_ncsyspll_cfg_source_sys2clk_f(),
 		.cntr.reg_cntr_addr = trim_sys_clk_cntr_ncsyspll_cnt_r(),
-		.name = "sys2clk"
+		.name = "sys2clk",
+		.scale = 1
 	};
+	clk->namemap_xlat_table[1] = CTRL_CLK_DOMAIN_SYS2CLK;
 	clk->clk_namemap[2] = (struct namemap_cfg) {
 		.namemap = CLK_NAMEMAP_INDEX_XBAR2CLK,
 		.is_enable = 1,
@@ -86,8 +125,10 @@ static int gp106_init_clk_support(struct gk20a *g) {
 		.cntr.reg_ctrl_addr = trim_sys_clk_cntr_ncltcpll_cfg_r(),
 		.cntr.reg_ctrl_idx  = trim_sys_clk_cntr_ncltcpll_cfg_source_xbar2clk_f(),
 		.cntr.reg_cntr_addr = trim_sys_clk_cntr_ncltcpll_cnt_r(),
-		.name = "xbar2clk"
+		.name = "xbar2clk",
+		.scale = 1
 	};
+	clk->namemap_xlat_table[2] = CTRL_CLK_DOMAIN_XBAR2CLK;
 	clk->clk_namemap[3] = (struct namemap_cfg) {
 		.namemap = CLK_NAMEMAP_INDEX_DRAMCLK,
 		.is_enable = 1,
@@ -97,8 +138,10 @@ static int gp106_init_clk_support(struct gk20a *g) {
 		.cntr.reg_ctrl_idx  =
 			trim_fbpa_bcast_clk_cntr_ncltcclk_cfg_source_dramdiv4_rec_clk1_f(),
 		.cntr.reg_cntr_addr = trim_fbpa_bcast_clk_cntr_ncltcclk_cnt_r(),
-		.name = "dramdiv2_rec_clk1"
+		.name = "dramdiv2_rec_clk1",
+		.scale = 2
 	};
+	clk->namemap_xlat_table[3] = CTRL_CLK_DOMAIN_MCLK;
 
 	clk->namemap_num = NUM_NAMEMAPS;
 
@@ -113,10 +156,6 @@ static int gp106_init_clk_support(struct gk20a *g) {
 	return err;
 }
 
-#ifdef CONFIG_DEBUG_FS
-typedef struct namemap_cfg namemap_cfg_t;
-static u32 gp106_get_rate_cntr(struct gk20a *, struct namemap_cfg *);
-
 static u32 gp106_get_rate_cntr(struct gk20a *g, struct namemap_cfg *c) {
 	u32 save_reg;
 	u32 retries;
@@ -180,6 +219,7 @@ read_err:
 
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int gp106_get_rate_show(void *data , u64 *val) {
 	struct namemap_cfg *c = (struct namemap_cfg *) data;
 	struct gk20a *g = c->g;
@@ -222,12 +262,11 @@ err_out:
 	debugfs_remove_recursive(clocks_root);
 	return -ENOMEM;
 }
-
 #endif /* CONFIG_DEBUG_FS */
 
 void gp106_init_clk_ops(struct gpu_ops *gops) {
 	gops->clk.init_clk_support = gp106_init_clk_support;
 	gops->clk.get_crystal_clk_hz = gp106_crystal_clk_hz;
+	gops->clk.get_rate = gp106_clk_get_rate;
 }
 
-
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h
index a50819aa..7df4b974 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h
@@ -26,9 +26,9 @@
 #define CLK_DEFAULT_CNTRL_SETTLE_RETRIES 10
 #define CLK_DEFAULT_CNTRL_SETTLE_USECS   5
 
-#define XTAL_CNTR_CLKS		2700	/* 100usec at 27KHz XTAL */
-#define XTAL_CNTR_DELAY		110	/* leave 10 extra usec   */
-#define XTAL_SCALE_TO_KHZ	10
+#define XTAL_CNTR_CLKS		27000	/* 1000usec at 27KHz XTAL */
+#define XTAL_CNTR_DELAY		1000	/* we need acuracy up to the ms   */
+#define XTAL_SCALE_TO_KHZ	1
 
 
@@ -47,6 +47,7 @@ struct namemap_cfg {
 			/* Todo */
 		} pll;
 	};
+	u32 scale;
 	char name[24];
 };
 
-- 
cgit v1.2.2


From 5ab254c6e84d741f56e9bcc93512f82eb7ce518c Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 17 Oct 2016 08:49:27 -0700
Subject: gpu: nvgpu: make clock arbiter lockless

Modification of the arbiter to remove all locking
from the arbiter callback and move the callbacks
out of the global queue

Adding modification to call for updated values of
vf table from arbiter init function and on
vf table update

JIRA: DNVGPU-170

Change-Id: I7a7d34c0590522901e06356c06fd8114ebf10f37
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1243212
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1268009
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c   | 893 +++++++++++++++++++++++---------------
 drivers/gpu/nvgpu/pstate/pstate.c |   4 -
 2 files changed, 546 insertions(+), 351 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1f7c2aea..aea32cb8 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -19,7 +19,8 @@
 #include <linux/nvgpu.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
-
+#include <linux/rculist.h>
+#include <linux/llist.h>
 #include "clk/clk_arb.h"
 
 
@@ -36,8 +37,15 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
 static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
 
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
-static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work);
-static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *);
+static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
+static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
+static void nvgpu_clk_arb_free_fd(struct kref *refcount);
+static void nvgpu_clk_arb_free_session(struct kref *refcount);
+static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk,
+	u16 mclk, u32 voltuv, u32 voltuv_sram);
+static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
+		u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv,
+		u32 *voltuv_sram);
 
 struct nvgpu_clk_vf_point {
 	u16 mhz;
@@ -45,58 +53,80 @@ struct nvgpu_clk_vf_point {
 	u32 uvolt_sram;
 };
 
+struct nvgpu_clk_vf_table {
+	u32 mclk_num_points;
+	struct nvgpu_clk_vf_point *mclk_points;
+	u32 gpc2clk_num_points;
+	struct nvgpu_clk_vf_point *gpc2clk_points;
+};
+#ifdef CONFIG_DEBUG_FS
+struct nvgpu_clk_arb_debug {
+	s64 switch_max;
+	s64 switch_min;
+	u64 switch_num;
+	s64 switch_avg;
+	s64 switch_std;
+};
+#endif
+
+struct nvgpu_clk_arb_target {
+	u16 mclk;
+	u16 gpc2clk;
+};
+
 struct nvgpu_clk_arb {
 	spinlock_t sessions_lock;
 	spinlock_t users_lock;
-	spinlock_t req_lock;
 
 	struct list_head users;
 	struct list_head sessions;
-	struct list_head requests;
+	struct llist_head requests;
 
 	struct gk20a *g;
-	spinlock_t data_lock;
-	spinlock_t vf_lock;
 
-	u16 gpc2clk_actual_mhz;
-	u16 gpc2clk_default_mhz;
+	struct nvgpu_clk_arb_target actual_pool[2];
+	struct nvgpu_clk_arb_target *actual;
 
-	u16 mclk_actual_mhz;
+	u16 gpc2clk_default_mhz;
 	u16 mclk_default_mhz;
 	u32 voltuv_actual;
 
 	struct work_struct update_fn_work;
-	struct work_struct vftable_fn_work;
-	wait_queue_head_t vftable_wq;
+	struct workqueue_struct *update_work_queue;
+	struct work_struct vf_table_fn_work;
+	struct workqueue_struct *vf_table_work_queue;
+
+	wait_queue_head_t request_wq;
+
+	struct nvgpu_clk_vf_table *current_vf_table;
+	struct nvgpu_clk_vf_table vf_table_pool[2];
+	u32 vf_table_index;
 
 	u16 *mclk_f_points;
-	bool vftable_set;
+	atomic_t req_nr;
 
-	struct nvgpu_clk_vf_point *mclk_vf_points;
 	u32 mclk_f_numpoints;
 	u16 *gpc2clk_f_points;
 	u32 gpc2clk_f_numpoints;
-	struct nvgpu_clk_vf_point *gpc2clk_vf_points;
 
 #ifdef CONFIG_DEBUG_FS
-	struct mutex debug_lock;
-	s64 switch_max;
-	s64 switch_min;
-	u64 switch_num;
-	s64 switch_avg;
-	s64 switch_std;
+	struct nvgpu_clk_arb_debug debug_pool[2];
+	struct nvgpu_clk_arb_debug *debug;
 	bool debugfs_set;
 #endif
 };
 
-
 struct nvgpu_clk_dev {
 	struct nvgpu_clk_session *session;
-	struct list_head link;
+	union {
+		struct list_head link;
+		struct llist_node node;
+	};
 	wait_queue_head_t readout_wq;
 	atomic_t poll_mask;
 	u16 gpc2clk_target_mhz;
 	u16 mclk_target_mhz;
+	struct kref refcount;
 };
 
 struct nvgpu_clk_session {
@@ -104,11 +134,10 @@ struct nvgpu_clk_session {
 	struct gk20a *g;
 	struct kref refcount;
 	struct list_head link;
-	struct list_head targets;
+	struct llist_head targets;
 
-	spinlock_t target_lock;
-	u16 gpc2clk_target_mhz;
-	u16 mclk_target_mhz;
+	struct nvgpu_clk_arb_target target_pool[2];
+	struct nvgpu_clk_arb_target *target;
 };
 
 static const struct file_operations completion_dev_ops = {
@@ -128,6 +157,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 	struct nvgpu_clk_arb *arb;
 	u16 default_mhz;
 	int err;
+	int index;
+	struct nvgpu_clk_vf_table *table;
 
 	gk20a_dbg_fn("");
 
@@ -140,33 +171,37 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 		goto init_fail;
 	}
 
-	arb->gpc2clk_f_numpoints = MAX_F_POINTS;
-	arb->mclk_f_numpoints = MAX_F_POINTS;
-
-	arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
-	if (!arb->gpc2clk_f_points) {
-		err = -ENOMEM;
-		goto init_fail;
-	}
-
 	arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
 	if (!arb->mclk_f_points) {
 		err = -ENOMEM;
 		goto init_fail;
 	}
 
-	arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS,
-		sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
-	if (!arb->gpc2clk_vf_points) {
+	arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
+	if (!arb->gpc2clk_f_points) {
 		err = -ENOMEM;
 		goto init_fail;
 	}
 
-	arb->mclk_vf_points = kcalloc(MAX_F_POINTS,
-		sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
-	if (!arb->mclk_vf_points) {
-		err = -ENOMEM;
-		goto init_fail;
+	for (index = 0; index < 2; index++) {
+		table = &arb->vf_table_pool[index];
+		table->gpc2clk_num_points = MAX_F_POINTS;
+		table->mclk_num_points = MAX_F_POINTS;
+
+		table->gpc2clk_points = kcalloc(MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
+		if (!table->gpc2clk_points) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+
+
+		table->mclk_points = kcalloc(MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
+		if (!table->mclk_points) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
 	}
 
 	g->clk_arb = arb;
@@ -174,9 +209,6 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 
 	spin_lock_init(&arb->sessions_lock);
 	spin_lock_init(&arb->users_lock);
-	spin_lock_init(&arb->req_lock);
-	spin_lock_init(&arb->data_lock);
-	spin_lock_init(&arb->vf_lock);
 
 	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
 			NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
@@ -196,39 +228,58 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 
 	arb->gpc2clk_default_mhz = default_mhz;
 
-	INIT_LIST_HEAD(&arb->users);
-	INIT_LIST_HEAD(&arb->sessions);
-	INIT_LIST_HEAD(&arb->requests);
+	arb->actual = &arb->actual_pool[0];
+
+	atomic_set(&arb->req_nr, 0);
 
-	init_waitqueue_head(&arb->vftable_wq);
+	INIT_LIST_HEAD_RCU(&arb->users);
+	INIT_LIST_HEAD_RCU(&arb->sessions);
+	init_llist_head(&arb->requests);
 
-	INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb);
+	init_waitqueue_head(&arb->request_wq);
+	arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
+		"vf_table_update");
+	arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
+		"arbiter_update");
+
+
+	INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
 
 	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
 
 #ifdef CONFIG_DEBUG_FS
-	mutex_init(&arb->debug_lock);
+	arb->debug = &arb->debug_pool[0];
+
 	if (!arb->debugfs_set) {
 		if (nvgpu_clk_arb_debugfs_init(g))
 			arb->debugfs_set = true;
 	}
 #endif
-	err = nvgpu_clk_arb_update_vftable(arb);
+	err = clk_vf_point_cache(g);
 	if (err < 0)
 		goto init_fail;
 
-	/* Schedule first run */
-	schedule_work(&arb->update_fn_work);
+	err = nvgpu_clk_arb_update_vf_table(arb);
+	if (err < 0)
+		goto init_fail;
+	do {
+		/* Check that first run is completed */
+		smp_mb();
+		wait_event_interruptible(arb->request_wq,
+			atomic_read(&arb->req_nr));
+	} while (!atomic_read(&arb->req_nr));
 
 	return 0;
 
 init_fail:
 
 	kfree(arb->gpc2clk_f_points);
-	kfree(arb->gpc2clk_vf_points);
-
 	kfree(arb->mclk_f_points);
-	kfree(arb->mclk_vf_points);
+
+	for (index = 0; index < 2; index++) {
+		kfree(arb->vf_table_pool[index].gpc2clk_points);
+		kfree(arb->vf_table_pool[index].mclk_points);
+	}
 
 	kfree(arb);
 
@@ -275,6 +326,8 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
 	atomic_set(&dev->poll_mask, 0);
 
 	dev->session = session;
+	kref_init(&dev->refcount);
+
 	kref_get(&session->refcount);
 
 	*_dev = dev;
@@ -305,15 +358,15 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	session->g = g;
 
 	kref_init(&session->refcount);
-	spin_lock_init(&session->target_lock);
 
 	session->zombie = false;
-	session->mclk_target_mhz = arb->mclk_default_mhz;
-	session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
-	INIT_LIST_HEAD(&session->targets);
+	session->target = &session->target_pool[0];
+	session->target->mclk  = arb->mclk_default_mhz;
+	session->target->gpc2clk = arb->gpc2clk_default_mhz;
+	init_llist_head(&session->targets);
 
 	spin_lock(&arb->sessions_lock);
-	list_add_tail(&session->link, &arb->sessions);
+	list_add_tail_rcu(&session->link, &arb->sessions);
 	spin_unlock(&arb->sessions_lock);
 
 	*_session = session;
@@ -321,19 +374,34 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	return 0;
 }
 
-void nvgpu_clk_arb_free_session(struct kref *refcount)
+static void nvgpu_clk_arb_free_fd(struct kref *refcount)
+{
+	struct nvgpu_clk_dev *dev = container_of(refcount,
+			struct nvgpu_clk_dev, refcount);
+
+	kfree(dev);
+}
+
+static void nvgpu_clk_arb_free_session(struct kref *refcount)
 {
 	struct nvgpu_clk_session *session = container_of(refcount,
 			struct nvgpu_clk_session, refcount);
 	struct nvgpu_clk_arb *arb = session->g->clk_arb;
+	struct nvgpu_clk_dev *dev, *tmp;
+	struct llist_node *head;
 
 	gk20a_dbg_fn("");
 
 	spin_lock(&arb->sessions_lock);
-	list_del(&session->link);
+	list_del_rcu(&session->link);
 	spin_unlock(&arb->sessions_lock);
+
+	head = llist_del_all(&session->targets);
+	llist_for_each_entry_safe(dev, tmp, head, node) {
+		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+	}
+	synchronize_rcu();
 	kfree(session);
-;
 }
 
 void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -346,7 +414,7 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
 	session->zombie = true;
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
 
-	schedule_work(&arb->update_fn_work);
+	queue_work(arb->update_work_queue, &arb->update_fn_work);
 }
 
 int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -363,7 +431,7 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 		return fd;
 
 	spin_lock(&arb->users_lock);
-	list_add_tail(&dev->link, &arb->users);
+	list_add_tail_rcu(&dev->link, &arb->users);
 	spin_unlock(&arb->users_lock);
 
 	*event_fd = fd;
@@ -388,121 +456,159 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
 	return 0;
 }
 
-static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb)
+static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 {
 	struct gk20a *g = arb->g;
+	struct nvgpu_clk_vf_table *table;
 
-	int i;
+	int i, j;
 	int status = 0;
 	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
 	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
+	u16 gpc2clk_min, gpc2clk_max, clk_cur;
+	u16 mclk_min, mclk_max;
+	u32 num_points;
+
+	table = ACCESS_ONCE(arb->current_vf_table);
+	/* make flag visible when all data has resolved in the tables */
+	smp_rmb();
 
-	/* the flag must be visible in all threads */
-	mb();
-	ACCESS_ONCE(arb->vftable_set) = false;
+	table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
+		&arb->vf_table_pool[0];
 
-	spin_lock(&arb->vf_lock);
+	/* Get allowed memory ranges */
+	if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
+						&gpc2clk_min, &gpc2clk_max) < 0)
+		goto exit_vf_table;
+	if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_MCLK,
+						&mclk_min, &mclk_max) < 0)
+		goto exit_vf_table;
 
 	if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
-		&arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) {
+		&table->gpc2clk_num_points, arb->gpc2clk_f_points) < 0) {
 		gk20a_err(dev_from_gk20a(g),
 			"failed to fetch GPC2CLK frequency points");
-		goto exit_vftable;
+		goto exit_vf_table;
 	}
 	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
-		&arb->mclk_f_numpoints, arb->mclk_f_points) < 0) {
+		&table->mclk_num_points, arb->mclk_f_points) < 0) {
 		gk20a_err(dev_from_gk20a(g),
 			"failed to fetch MCLK frequency points");
-		goto exit_vftable;
+		goto exit_vf_table;
 	}
 
+	memset(table->mclk_points, 0,
+		table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
+	memset(table->gpc2clk_points, 0,
+		table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
+
+	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
+			i < table->mclk_num_points; i++) {
+		if ((arb->mclk_f_points[i] >= mclk_min) &&
+			(arb->mclk_f_points[i] <= mclk_max) &&
+			(arb->mclk_f_points[i] != clk_cur)) {
+
+			table->mclk_points[j].mhz = arb->mclk_f_points[i];
+			mclk_voltuv = mclk_voltuv_sram = 0;
+
+			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+				&table->mclk_points[j].mhz, &mclk_voltuv,
+				CTRL_VOLT_DOMAIN_LOGIC);
+			if (status < 0) {
+				gk20a_err(dev_from_gk20a(g),
+					"failed to get MCLK LOGIC voltage");
+				goto exit_vf_table;
+			}
+			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
+				&table->mclk_points[j].mhz, &mclk_voltuv_sram,
+				CTRL_VOLT_DOMAIN_SRAM);
+			if (status < 0) {
+				gk20a_err(dev_from_gk20a(g),
+					"failed to get MCLK SRAM voltage");
+				goto exit_vf_table;
+			}
 
-	memset(arb->mclk_vf_points, 0,
-		arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
-	memset(arb->gpc2clk_vf_points, 0,
-		arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
-
-	for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
-		arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
-		mclk_voltuv = mclk_voltuv_sram = 0;
-
-		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-			&arb->mclk_vf_points[i].mhz, &mclk_voltuv,
-			CTRL_VOLT_DOMAIN_LOGIC);
-		if (status < 0) {
-			gk20a_err(dev_from_gk20a(g),
-				"failed to get MCLK LOGIC voltage");
-			goto exit_vftable;
-		}
-		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-			&arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
-			CTRL_VOLT_DOMAIN_SRAM);
-		if (status < 0) {
-			gk20a_err(dev_from_gk20a(g),
-				"failed to get MCLK SRAM voltage");
-			goto exit_vftable;
+			table->mclk_points[j].uvolt = mclk_voltuv;
+			table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
+			clk_cur = table->mclk_points[j].mhz;
+			j++;
+			num_points++;
 		}
-
-		arb->mclk_vf_points[i].uvolt = mclk_voltuv;
-		arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
 	}
+	table->mclk_num_points = num_points;
+
+	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
+			i < table->gpc2clk_num_points; i++) {
+		if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
+			(arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
+			(arb->gpc2clk_f_points[i] != clk_cur)) {
+
+			table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i];
+			gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
+
+			status = clk_domain_get_f_or_v(g,
+				CTRL_CLK_DOMAIN_GPC2CLK,
+				&table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
+				CTRL_VOLT_DOMAIN_LOGIC);
+			if (status < 0) {
+				gk20a_err(dev_from_gk20a(g),
+					"failed to get GPC2CLK LOGIC voltage");
+				goto exit_vf_table;
+			}
 
-	for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) {
-		arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i];
-		gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
-
-		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
-			&arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv,
-			CTRL_VOLT_DOMAIN_LOGIC);
-		if (status < 0) {
-			gk20a_err(dev_from_gk20a(g),
-				"failed to get GPC2CLK LOGIC voltage");
-			goto exit_vftable;
-		}
-		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
-			&arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
-			CTRL_VOLT_DOMAIN_SRAM);
-		if (status < 0) {
-			gk20a_err(dev_from_gk20a(g),
-				"failed to get GPC2CLK SRAM voltage");
-			goto exit_vftable;
-		}
-
-		arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
-		arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
+			status = clk_domain_get_f_or_v(g,
+				CTRL_CLK_DOMAIN_GPC2CLK,
+				&table->gpc2clk_points[j].mhz,
+				&gpc2clk_voltuv_sram,
+				CTRL_VOLT_DOMAIN_SRAM);
+			if (status < 0) {
+				gk20a_err(dev_from_gk20a(g),
+					"failed to get GPC2CLK SRAM voltage");
+				goto exit_vf_table;
+			}
 
+			table->gpc2clk_points[j].uvolt = gpc2clk_voltuv;
+			table->gpc2clk_points[j].uvolt_sram =
+				gpc2clk_voltuv_sram;
+			clk_cur = table->gpc2clk_points[j].mhz;
+			j++;
+			num_points++;
+		}
 	}
+	table->gpc2clk_num_points = num_points;
 
-	/* make flag visible when all data has resolved in the tables */
-	wmb();
-	ACCESS_ONCE(arb->vftable_set) = true;
-
-	wake_up(&arb->vftable_wq);
-exit_vftable:
+	/* make table visible when all data has resolved in the tables */
+	smp_wmb();
+	xchg(&arb->current_vf_table, table);
 
-	spin_unlock(&arb->vf_lock);
+	queue_work(arb->update_work_queue, &arb->update_fn_work);
+exit_vf_table:
 
 	return status;
 }
 
-void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g)
+void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 
-	ACCESS_ONCE(arb->vftable_set) = false;
-	/* Disable the flag in case arbiter gets scheduled first */
-	mb();
-
-	schedule_work(&arb->vftable_fn_work);
-	schedule_work(&arb->update_fn_work);
+	queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
 }
 
-static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work)
+static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
 {
 	struct nvgpu_clk_arb *arb =
-		container_of(work, struct nvgpu_clk_arb, update_fn_work);
+		container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
+	struct gk20a *g = arb->g;
+	u32 err;
 
-	nvgpu_clk_arb_update_vftable(arb);
+	/* get latest vf curve from pmu */
+	err = clk_vf_point_cache(g);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get GPC2CLK SRAM voltage");
+		return;
+	}
+	nvgpu_clk_arb_update_vf_table(arb);
 }
 
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
@@ -512,24 +618,22 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	struct nvgpu_clk_session *session;
 	struct nvgpu_clk_dev *dev;
 	struct nvgpu_clk_dev *tmp;
+	struct nvgpu_clk_arb_target *target, *actual;
 	struct gk20a *g = arb->g;
-
-	struct change_fll_clk fllclk;
-	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
-	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
+	struct llist_node *head;
 
 	u32 voltuv, voltuv_sram;
+	bool mclk_set, gpc2clk_set;
 
 	int status;
 
 	/* Temporary variables for checking target frequency */
 	u16 gpc2clk_target, mclk_target;
 
-	/* iteration index */
-	u32 index;
-
 #ifdef CONFIG_DEBUG_FS
 	u64 t0, t1;
+	struct nvgpu_clk_arb_debug *debug;
+
 #endif
 
 	gk20a_dbg_fn("");
@@ -542,37 +646,61 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	gpc2clk_target = 0;
 	mclk_target = 0;
 
-	spin_lock(&arb->sessions_lock);
-	list_for_each_entry(session, &arb->sessions, link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(session, &arb->sessions, link) {
 		if (!session->zombie) {
-			spin_lock(&arb->req_lock);
-			spin_lock(&session->target_lock);
+			mclk_set = false;
+			gpc2clk_set = false;
+			target = ACCESS_ONCE(session->target) ==
+				&session->target_pool[0] ?
+					&session->target_pool[1] :
+					&session->target_pool[0];
+			/* Do not reorder pointer */
+			smp_rmb();
+			head = llist_del_all(&session->targets);
+			if (head) {
+
+				/* Copy over state */
+				target->mclk = session->target->mclk;
+				target->gpc2clk = session->target->gpc2clk;
+				/* Query the latest committed request */
+				llist_for_each_entry_safe(dev, tmp, head,
+									node) {
+					if (!mclk_set && dev->mclk_target_mhz) {
+						target->mclk =
+							dev->mclk_target_mhz;
+						mclk_set = true;
+					}
+					if (!gpc2clk_set &&
+						dev->gpc2clk_target_mhz) {
+						target->gpc2clk =
+							dev->gpc2clk_target_mhz;
+						gpc2clk_set = true;
+					}
+					kref_get(&dev->refcount);
+					llist_add(&dev->node, &arb->requests);
+				}
+				/* Ensure target is updated before ptr sawp */
+				smp_wmb();
+				xchg(&session->target, target);
+			}
 
-			mclk_target = mclk_target > session->mclk_target_mhz ?
-				mclk_target : session->mclk_target_mhz;
+			mclk_target = mclk_target > session->target->mclk ?
+				mclk_target : session->target->mclk;
 
 			gpc2clk_target =
-				gpc2clk_target > session->gpc2clk_target_mhz ?
-				gpc2clk_target : session->gpc2clk_target_mhz;
-			/* Move processed requests to notification list*/
-			list_for_each_entry_safe(dev, tmp, &session->targets,
-						link) {
-				list_del_init(&dev->link);
-				list_add_tail(&dev->link, &arb->requests);
-			}
-			spin_unlock(&session->target_lock);
-			spin_unlock(&arb->req_lock);
-
+				gpc2clk_target > session->target->gpc2clk ?
+				gpc2clk_target : session->target->gpc2clk;
 		}
 	}
-	spin_unlock(&arb->sessions_lock);
+	rcu_read_unlock();
 
 	gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
-		arb->gpc2clk_actual_mhz ? gpc2clk_target :
+		arb->actual->gpc2clk ? gpc2clk_target :
 		arb->gpc2clk_default_mhz;
 
 	mclk_target = (mclk_target > 0) ? mclk_target :
-		arb->mclk_actual_mhz ? mclk_target :
+		arb->actual->mclk ? mclk_target :
 		arb->mclk_default_mhz;
 
 	if (!gpc2clk_target && !mclk_target) {
@@ -581,163 +709,100 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	}
 
 	if (!gpc2clk_target)
-		gpc2clk_target = arb->gpc2clk_actual_mhz;
+		gpc2clk_target = arb->actual->mclk;
 
-	do {
-		/* Check that the table is set */
-		mb();
-		wait_event(arb->vftable_wq, arb->vftable_set);
-	} while (!ACCESS_ONCE(arb->vftable_set));
-
-	spin_lock(&arb->vf_lock);
-	/* round up the freq requests */
-	for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
-		if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
-			gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
-			gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
-			gpc2clk_voltuv_sram =
-				arb->gpc2clk_vf_points[index].uvolt_sram;
-			break;
-		}
-	}
+	if (!mclk_target)
+		mclk_target = arb->actual->mclk;
 
-	if (index == arb->gpc2clk_f_numpoints) {
-		gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
-		gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
-		gpc2clk_voltuv_sram =
-			arb->gpc2clk_vf_points[index].uvolt_sram;
-	}
 
-	if (!mclk_target)
-		mclk_target = arb->mclk_actual_mhz;
+	/* Query the table for the closest vf point to program */
+	nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv,
+		&voltuv_sram);
 
-	for (index = 0; index < arb->mclk_f_numpoints; index++) {
-		if (arb->mclk_vf_points[index].mhz >= mclk_target) {
-			mclk_target = arb->mclk_vf_points[index].mhz;
-			mclk_voltuv = arb->mclk_vf_points[index].uvolt;
-			mclk_voltuv_sram =
-				arb->mclk_vf_points[index].uvolt_sram;
-			break;
-		}
-	}
-	if (index == arb->mclk_f_numpoints) {
-		mclk_target = arb->mclk_vf_points[index].mhz;
-		mclk_voltuv = arb->mclk_vf_points[index].uvolt;
-		mclk_voltuv_sram =
-			arb->mclk_vf_points[index].uvolt_sram;
+	if ((arb->actual->gpc2clk == gpc2clk_target) &&
+		(arb->actual->mclk == mclk_target) &&
+		(arb->voltuv_actual == voltuv)) {
+		goto exit_arb;
 	}
-	spin_unlock(&arb->vf_lock);
 
 	/* Program clocks */
 	/* A change in both mclk of gpc2clk may require a change in voltage */
-	if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
-		(arb->mclk_actual_mhz == mclk_target)) {
-		goto exit_arb;
-	}
-
-	voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
-	voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
-		gpc2clk_voltuv_sram : mclk_voltuv_sram;
 
-	/* if voltage ascends we do:
-	 * (1) FLL change
-	 * (2) Voltage change
-	 * (3) MCLK change
-	 * If it goes down
-	 * (1) MCLK change
-	 * (2) Voltage change
-	 * (3) FLL change
-	 */
+	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target,
+		voltuv, voltuv_sram);
 
-	/* descending */
-	if (voltuv <= arb->voltuv_actual) {
-		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
-		if (status < 0)
-			goto exit_arb;
+	if (status < 0)
+		goto exit_arb;
 
-		status = volt_set_voltage(g, voltuv, voltuv_sram);
-		if (status < 0)
-			goto exit_arb;
+	actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
+			&arb->actual_pool[1] : &arb->actual_pool[0];
 
-		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-		fllclk.clkmhz = gpc2clk_target;
-		fllclk.voltuv = voltuv;
-		status = clk_program_fll_clks(g, &fllclk);
-		if (status < 0)
-			goto exit_arb;
-	} else {
-		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-		fllclk.clkmhz = gpc2clk_target;
-		fllclk.voltuv = voltuv;
-		status = clk_program_fll_clks(g, &fllclk);
-		if (status < 0)
-			goto exit_arb;
+	/* do not reorder this pointer */
+	smp_rmb();
+	actual->gpc2clk = gpc2clk_target;
+	actual->mclk = mclk_target;
+	arb->voltuv_actual = voltuv;
 
-		status = volt_set_voltage(g, voltuv, voltuv_sram);
-		if (status < 0)
-			goto exit_arb;
+	/* Make changes visible to other threads */
+	smp_wmb();
+	xchg(&arb->actual, actual);
 
-		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
-		if (status < 0)
-			goto exit_arb;
-	}
+	atomic_inc(&arb->req_nr);
 
-	spin_lock(&arb->data_lock);
-	arb->gpc2clk_actual_mhz = gpc2clk_target;
-	arb->mclk_actual_mhz = mclk_target;
-	arb->voltuv_actual = voltuv;
-	/* Make changes visible to other threads */
-	wmb();
+	wake_up_interruptible(&arb->request_wq);
 
-	spin_unlock(&arb->data_lock);
 
 #ifdef CONFIG_DEBUG_FS
 	g->ops.read_ptimer(g, &t1);
-	arb->switch_num++;
 
-	mutex_lock(&arb->debug_lock);
-	if (arb->switch_num == 1) {
-		arb->switch_max = arb->switch_min =
-			arb->switch_avg = (t1-t0)/1000;
-		arb->switch_std = 0;
+	debug = arb->debug == &arb->debug_pool[0] ?
+		&arb->debug_pool[1] : &arb->debug_pool[0];
+
+	memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
+	debug->switch_num++;
+
+	if (debug->switch_num == 1) {
+		debug->switch_max = debug->switch_min =
+			debug->switch_avg = (t1-t0)/1000;
+		debug->switch_std = 0;
 	} else {
 		s64 prev_avg;
 		u64 curr = (t1-t0)/1000;
 
-		arb->switch_max = curr > arb->switch_max ?
-			curr : arb->switch_max;
-		arb->switch_min = arb->switch_min ?
-			(curr < arb->switch_min ?
-				curr : arb->switch_min) : curr;
-		prev_avg = arb->switch_avg;
-		arb->switch_avg = (curr +
-			(arb->switch_avg * (arb->switch_num-1))) /
-			arb->switch_num;
-		arb->switch_std +=
-			(curr - arb->switch_avg) * (curr - prev_avg);
+		debug->switch_max = curr > debug->switch_max ?
+			curr : debug->switch_max;
+		debug->switch_min = debug->switch_min ?
+			(curr < debug->switch_min ?
+				curr : debug->switch_min) : curr;
+		prev_avg = debug->switch_avg;
+		debug->switch_avg = (curr +
+			(debug->switch_avg * (debug->switch_num-1))) /
+			debug->switch_num;
+		debug->switch_std +=
+			(curr - debug->switch_avg) * (curr - prev_avg);
 	}
-	mutex_unlock(&arb->debug_lock);
-
+	/* commit changes before exchanging debug pointer */
+	smp_wmb();
+	xchg(&arb->debug, debug);
 #endif
 
 exit_arb:
 
-	spin_lock(&arb->req_lock);
 	/* notify completion for all requests */
-	list_for_each_entry_safe(dev, tmp, &arb->requests, link) {
+	head = llist_del_all(&arb->requests);
+	llist_for_each_entry_safe(dev, tmp, head, node) {
 		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
-		list_del_init(&dev->link);
+		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 	}
-	spin_unlock(&arb->req_lock);
 
 	/* notify event for all users */
-	spin_lock(&arb->users_lock);
-	list_for_each_entry(dev, &arb->users, link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(dev, &arb->users, link) {
 		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
 	}
-	spin_unlock(&arb->users_lock);
+	rcu_read_unlock();
 }
 
 int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
@@ -761,17 +826,10 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
 		err = -EINVAL;
 		goto fdput_fd;
 	}
-	spin_lock(&session->target_lock);
-	session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz :
-		session->mclk_target_mhz;
-	session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
-		dev->gpc2clk_target_mhz :
-		session->gpc2clk_target_mhz;
-
-	list_add_tail(&dev->link, &session->targets);
-	spin_unlock(&session->target_lock);
+	kref_get(&dev->refcount);
+	llist_add(&dev->node, &session->targets);
 
-	schedule_work(&arb->update_fn_work);
+	queue_work(arb->update_work_queue, &arb->update_fn_work);
 
 fdput_fd:
 	fdput(fd);
@@ -799,15 +857,8 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
 
 	gk20a_dbg_fn("");
 
-	spin_lock(&arb->req_lock);
-	spin_lock(&session->target_lock);
-	if (!list_empty(&dev->link))
-		list_del_init(&dev->link);
-	spin_unlock(&session->target_lock);
-	spin_unlock(&arb->req_lock);
-
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	kfree(dev);
+	kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 
 	return 0;
 }
@@ -824,10 +875,11 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
 	gk20a_dbg_fn("");
 
 	spin_lock(&arb->users_lock);
-	list_del(&dev->link);
+	list_del_rcu(&dev->link);
 	spin_unlock(&arb->users_lock);
 
 	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	synchronize_rcu();
 	kfree(dev);
 
 	return 0;
@@ -875,24 +927,27 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
 		u32 api_domain, u16 *freq_mhz)
 {
 	int err = 0;
+	struct nvgpu_clk_arb_target *target;
 
-	spin_lock(&session->target_lock);
-
-	switch (api_domain) {
-	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*freq_mhz = session->mclk_target_mhz;
-		break;
+	do {
+		target = ACCESS_ONCE(session->target);
+		/* no reordering of this pointer */
+		smp_rmb();
 
-	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*freq_mhz = session->gpc2clk_target_mhz;
-		break;
+		switch (api_domain) {
+		case NVGPU_GPU_CLK_DOMAIN_MCLK:
+			*freq_mhz = target->mclk;
+			break;
 
-	default:
-		*freq_mhz = 0;
-		err = -EINVAL;
-	}
+		case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
+			*freq_mhz = target->gpc2clk;
+			break;
 
-	spin_unlock(&session->target_lock);
+		default:
+			*freq_mhz = 0;
+			err = -EINVAL;
+		}
+	} while (target != ACCESS_ONCE(session->target));
 	return err;
 }
 
@@ -901,24 +956,27 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 	int err = 0;
+	struct nvgpu_clk_arb_target *actual;
 
-	spin_lock(&arb->data_lock);
-
-	switch (api_domain) {
-	case NVGPU_GPU_CLK_DOMAIN_MCLK:
-		*freq_mhz = arb->mclk_actual_mhz;
-		break;
+	do {
+		actual = ACCESS_ONCE(arb->actual);
+		/* no reordering of this pointer */
+		smp_rmb();
 
-	case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
-		*freq_mhz = arb->gpc2clk_actual_mhz;
-		break;
+		switch (api_domain) {
+		case NVGPU_GPU_CLK_DOMAIN_MCLK:
+			*freq_mhz = actual->mclk;
+			break;
 
-	default:
-		*freq_mhz = 0;
-		err = -EINVAL;
-	}
+		case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
+			*freq_mhz = actual->gpc2clk;
+			break;
 
-	spin_unlock(&arb->data_lock);
+		default:
+			*freq_mhz = 0;
+			err = -EINVAL;
+		}
+	} while (actual != ACCESS_ONCE(arb->actual));
 	return err;
 }
 
@@ -948,22 +1006,163 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 	return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
 }
 
+static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
+		u16 *gpc2clk, u16 *mclk, u32 *voltuv,
+		u32 *voltuv_sram)
+{
+	u16 gpc2clk_target, mclk_target;
+	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
+	u32 mclk_voltuv, mclk_voltuv_sram;
+	struct nvgpu_clk_vf_table *table;
+	int index;
+
+	gpc2clk_target = *gpc2clk;
+	mclk_target = *mclk;
+	gpc2clk_voltuv = 0;
+	gpc2clk_voltuv_sram = 0;
+	mclk_voltuv = 0;
+	mclk_voltuv_sram = 0;
+
+	do {
+		table = ACCESS_ONCE(arb->current_vf_table);
+		/* pointer to table can be updated by callback */
+		smp_rmb();
+
+		if (!table)
+			continue;
+		/* round up the freq requests */
+		for (index = 0; index < table->gpc2clk_num_points; index++) {
+			if (table->gpc2clk_points[index].mhz >=
+			gpc2clk_target) {
+				gpc2clk_target =
+					table->gpc2clk_points[index].mhz;
+				gpc2clk_voltuv =
+					table->gpc2clk_points[index].uvolt;
+				gpc2clk_voltuv_sram =
+					table->gpc2clk_points[index].uvolt_sram;
+				break;
+			}
+		}
+
+		if (index == table->gpc2clk_num_points) {
+			gpc2clk_target = table->gpc2clk_points[index].mhz;
+			gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;
+			gpc2clk_voltuv_sram =
+				table->gpc2clk_points[index].uvolt_sram;
+		}
+
+		for (index = 0; index < table->mclk_num_points; index++) {
+			if (table->mclk_points[index].mhz >= mclk_target) {
+				mclk_target = table->mclk_points[index].mhz;
+				mclk_voltuv = table->mclk_points[index].uvolt;
+				mclk_voltuv_sram =
+					table->mclk_points[index].uvolt_sram;
+				break;
+			}
+		}
+		if (index == table->mclk_num_points) {
+			mclk_target = table->mclk_points[index].mhz;
+			mclk_voltuv = table->mclk_points[index].uvolt;
+			mclk_voltuv_sram =
+				table->mclk_points[index].uvolt_sram;
+		}
+	} while (!table ||
+		(ACCESS_ONCE(arb->current_vf_table) != table));
+
+	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
+	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
+		gpc2clk_voltuv_sram : mclk_voltuv_sram;
+
+	*gpc2clk = gpc2clk_target;
+	*mclk = mclk_target;
+}
+
+static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
+	u16 mclk_target, u32 voltuv, u32 voltuv_sram)
+{
+	struct change_fll_clk fllclk;
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	int status;
+
+	/* if voltage ascends we do:
+	 * (1) FLL change
+	 * (2) Voltage change
+	 * (3) MCLK change
+	 * If it goes down
+	 * (1) MCLK change
+	 * (2) Voltage change
+	 * (3) FLL change
+	 */
+
+	/* descending */
+	if (voltuv < arb->voltuv_actual) {
+		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
+		if (status < 0)
+			return status;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			return status;
+
+		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		fllclk.clkmhz = gpc2clk_target;
+		fllclk.voltuv = voltuv;
+		status = clk_program_fll_clks(g, &fllclk);
+		if (status < 0)
+			return status;
+	} else if (voltuv > arb->voltuv_actual) {
+		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		fllclk.clkmhz = gpc2clk_target;
+		fllclk.voltuv = voltuv;
+		status = clk_program_fll_clks(g, &fllclk);
+		if (status < 0)
+			return status;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			return status;
+
+		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
+		if (status < 0)
+			return status;
+	} else {
+		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
+		if (status < 0)
+			return status;
+
+		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
+		fllclk.clkmhz = gpc2clk_target;
+		fllclk.voltuv = voltuv;
+		status = clk_program_fll_clks(g, &fllclk);
+		if (status < 0)
+			return status;
+
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
 {
 	struct gk20a *g = s->private;
 	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_arb_debug *debug;
+
 	u64 num;
 	s64 tmp, avg, std, max, min;
 
-	/* Make copy of structure to reduce time with lock held */
-	mutex_lock(&arb->debug_lock);
-	std = arb->switch_std;
-	avg = arb->switch_avg;
-	max = arb->switch_max;
-	min = arb->switch_min;
-	num = arb->switch_num;
-	mutex_unlock(&arb->debug_lock);
+	debug = ACCESS_ONCE(arb->debug);
+	/* Make copy of structure and ensure no reordering */
+	smp_rmb();
+	if (!debug)
+		return -EINVAL;
+
+	std = debug->switch_std;
+	avg = debug->switch_avg;
+	max = debug->switch_max;
+	min = debug->switch_min;
+	num = debug->switch_num;
 
 	tmp = std;
 	do_div(tmp, num);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index f01b52ad..cf758023 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -149,10 +149,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 	if (err)
 		return err;
 
-	err = clk_vf_point_cache(g);
-	if (err)
-		return err;
-
 	err = pmgr_domain_pmu_setup(g);
 	return err;
 }
-- 
cgit v1.2.2


From 849c5317e8509b390da626bcb607e66cc5ef847f Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Fri, 21 Oct 2016 16:43:39 -0700
Subject: gpu: nvgpu: get voltage, current, power and temperature

Add ioctls to retrieve voltage, current, power and temperature.
Add flags in GPU characteristics to indicate if feature is supported.

Jira DNVGPU-166

Change-Id: Ifaafe2efdb6b09d7b28215b641814f28e894151e
Signed-off-by: David Martinez Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1241861
Tested-by: Thomas Fleury <tfleury@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1267122
---
 drivers/gpu/nvgpu/gp106/hal_gp106.c   | 20 ++++++++++++-
 drivers/gpu/nvgpu/gp106/therm_gp106.c | 21 +++++++++++---
 drivers/gpu/nvgpu/pmgr/pmgr.c         | 53 ++++++++++++++++++++++++++++-------
 drivers/gpu/nvgpu/pmgr/pmgr.h         |  3 ++
 drivers/gpu/nvgpu/volt/volt_pmu.c     | 14 ++-------
 drivers/gpu/nvgpu/volt/volt_pmu.h     |  2 +-
 6 files changed, 85 insertions(+), 28 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index dc27cdae..ee361953 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -187,6 +187,24 @@ static int gp106_get_litter_value(struct gk20a *g, int value)
 	return ret;
 }
 
+int gp106_init_gpu_characteristics(struct gk20a *g)
+{
+	struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
+
+	int err;
+
+	err = gk20a_init_gpu_characteristics(g);
+	if (err)
+		return err;
+
+	gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE |
+			NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT |
+			NVGPU_GPU_FLAGS_SUPPORT_GET_POWER |
+			NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE;
+
+	return 0;
+}
+
 int gp106_init_hal(struct gk20a *g)
 {
 	struct gpu_ops *gops = &g->ops;
@@ -224,7 +242,7 @@ int gp106_init_hal(struct gk20a *g)
 
 	gops->name = "gp10x";
 	gops->get_litter_value = gp106_get_litter_value;
-	gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
+	gops->chip_init_gpu_characteristics = gp106_init_gpu_characteristics;
 	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 	gops->read_ptimer = gk20a_read_ptimer;
 
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c
index a3aa3636..15aff89c 100644
--- a/drivers/gpu/nvgpu/gp106/therm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c
@@ -15,10 +15,8 @@
 #include <linux/debugfs.h>
 #include "hw_therm_gp106.h"
 
-#ifdef CONFIG_DEBUG_FS
-static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
+static int gp106_get_internal_sensor_curr_temp(struct gk20a *g, u32 *temp_f24_8)
 {
-	struct gk20a *g = (struct gk20a *)data;
 	int err = 0;
 	u32 readval;
 
@@ -38,7 +36,21 @@ static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
 	// Convert from F9.5 -> F27.5 -> F24.8.
 	readval &= therm_temp_sensor_tsense_fixed_point_m();
 
-	*val = readval;
+	*temp_f24_8 = readval;
+
+	return err;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	u32 readval;
+	int err;
+
+	err = gp106_get_internal_sensor_curr_temp(g, &readval);
+	if (!err)
+		*val = readval;
 
 	return err;
 }
@@ -104,4 +116,5 @@ void gp106_init_therm_ops(struct gpu_ops *gops) {
 	gops->therm.therm_debugfs_init = gp106_therm_debugfs_init;
 #endif
 	gops->therm.elcg_init_idle_filters = gp106_elcg_init_idle_filters;
+	gops->therm.get_internal_sensor_curr_temp = gp106_get_internal_sensor_curr_temp;
 }
diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.c b/drivers/gpu/nvgpu/pmgr/pmgr.c
index f625e37d..e101aba8 100644
--- a/drivers/gpu/nvgpu/pmgr/pmgr.c
+++ b/drivers/gpu/nvgpu/pmgr/pmgr.c
@@ -16,12 +16,10 @@
 #include "pmgrpmu.h"
 #include <linux/debugfs.h>
 
-#ifdef CONFIG_DEBUG_FS
-static int pmgr_pwr_devices_get_current_power(void *data, u64 *val)
+int pmgr_pwr_devices_get_power(struct gk20a *g, u32 *val)
 {
 	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
 	int status;
-	struct gk20a *g = (struct gk20a *)data;
 
 	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
 	if (status)
@@ -34,11 +32,10 @@ static int pmgr_pwr_devices_get_current_power(void *data, u64 *val)
 	return status;
 }
 
-static int pmgr_pwr_devices_get_current(void *data, u64 *val)
+int pmgr_pwr_devices_get_current(struct gk20a *g, u32 *val)
 {
 	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
 	int status;
-	struct gk20a *g = (struct gk20a *)data;
 
 	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
 	if (status)
@@ -51,11 +48,10 @@ static int pmgr_pwr_devices_get_current(void *data, u64 *val)
 	return status;
 }
 
-static int pmgr_pwr_devices_get_current_voltage(void *data, u64 *val)
+int pmgr_pwr_devices_get_voltage(struct gk20a *g, u32 *val)
 {
 	struct nv_pmu_pmgr_pwr_devices_query_payload payload;
 	int status;
-	struct gk20a *g = (struct gk20a *)data;
 
 	status = pmgr_pmu_pwr_devices_query_blocking(g, 1, &payload);
 	if (status)
@@ -68,14 +64,51 @@ static int pmgr_pwr_devices_get_current_voltage(void *data, u64 *val)
 	return status;
 }
 
+#ifdef CONFIG_DEBUG_FS
+int pmgr_pwr_devices_get_power_u64(void *data, u64 *p)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err;
+	u32 val;
+
+	err = pmgr_pwr_devices_get_power(g, &val);
+	*p = val;
+
+	return err;
+}
+
+int pmgr_pwr_devices_get_current_u64(void *data, u64 *p)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err;
+	u32 val;
+
+	err = pmgr_pwr_devices_get_current(g, &val);
+	*p = val;
+
+	return err;
+}
+
+int pmgr_pwr_devices_get_voltage_u64(void *data, u64 *p)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err;
+	u32 val;
+
+	err = pmgr_pwr_devices_get_voltage(g, &val);
+	*p = val;
+
+	return err;
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(
-		pmgr_power_ctrl_fops, pmgr_pwr_devices_get_current_power, NULL, "%llu\n");
+		pmgr_power_ctrl_fops, pmgr_pwr_devices_get_power_u64, NULL, "%llu\n");
 
 DEFINE_SIMPLE_ATTRIBUTE(
-		pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current, NULL, "%llu\n");
+		pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current_u64, NULL, "%llu\n");
 
 DEFINE_SIMPLE_ATTRIBUTE(
-		pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_current_voltage, NULL, "%llu\n");
+		pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_voltage_u64, NULL, "%llu\n");
 
 static void pmgr_debugfs_init(struct gk20a *g) {
 	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.h b/drivers/gpu/nvgpu/pmgr/pmgr.h
index 97e7b609..cf511fd1 100644
--- a/drivers/gpu/nvgpu/pmgr/pmgr.h
+++ b/drivers/gpu/nvgpu/pmgr/pmgr.h
@@ -27,5 +27,8 @@ struct pmgr_pmupstate {
 
 u32 pmgr_domain_sw_setup(struct gk20a *g);
 u32 pmgr_domain_pmu_setup(struct gk20a *g);
+int pmgr_pwr_devices_get_current(struct gk20a *g, u32 *val);
+int pmgr_pwr_devices_get_voltage(struct gk20a *g, u32 *val);
+int pmgr_pwr_devices_get_power(struct gk20a *g, u32 *val);
 
 #endif
diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.c b/drivers/gpu/nvgpu/volt/volt_pmu.c
index 4d451b65..a92eb777 100644
--- a/drivers/gpu/nvgpu/volt/volt_pmu.c
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.c
@@ -227,17 +227,7 @@ u32 volt_set_voltage(struct gk20a *g, u32 logic_voltage_uv, u32 sram_voltage_uv)
 
 }
 
-u32 volt_get_voltage(struct gk20a *g, u32 volt_domain)
+u32 volt_get_voltage(struct gk20a *g, u32 volt_domain, u32 *voltage_uv)
 {
-	u32 status = 0;
-	u32 voltage_uv = 0;
-
-	status = volt_rail_get_voltage(g, volt_domain, &voltage_uv);
-	if (status) {
-		gk20a_err(dev_from_gk20a(g),
-			"CTRL_VOLT_DOMAIN_LOGIC get voltage failed");
-		return 0;
-	}
-
-	return voltage_uv;
+	return volt_rail_get_voltage(g, volt_domain, voltage_uv);
 }
diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.h b/drivers/gpu/nvgpu/volt/volt_pmu.h
index c98ba321..9af3fb68 100644
--- a/drivers/gpu/nvgpu/volt/volt_pmu.h
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.h
@@ -17,6 +17,6 @@
 u32 volt_pmu_send_load_cmd_to_pmu(struct gk20a *g);
 u32 volt_set_voltage(struct gk20a *g, u32 logic_voltage_uv,
 		u32 sram_voltage_uv);
-u32 volt_get_voltage(struct gk20a *g, u32 volt_domain);
+u32 volt_get_voltage(struct gk20a *g, u32 volt_domain, u32 *voltage_uv);
 
 #endif
-- 
cgit v1.2.2


From 136a9919e648fd15cc0bb4a2e3de381b29746d3a Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 8 Dec 2016 19:33:11 -0800
Subject: gpu: nvgpu: resolve signed/unsigned mismatch

JIRA DNVGPU-143

Change-Id: I0536aff4f994efc3dbd2db949068f8e1345ece9c
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1268107
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c     | 12 ++++++------
 drivers/gpu/nvgpu/gp106/clk_gp106.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index aea32cb8..1b974d17 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -461,7 +461,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 	struct gk20a *g = arb->g;
 	struct nvgpu_clk_vf_table *table;
 
-	int i, j;
+	u32 i, j;
 	int status = 0;
 	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
 	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
@@ -484,14 +484,14 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 						&mclk_min, &mclk_max) < 0)
 		goto exit_vf_table;
 
-	if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
-		&table->gpc2clk_num_points, arb->gpc2clk_f_points) < 0) {
+	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
+		&table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
 		gk20a_err(dev_from_gk20a(g),
 			"failed to fetch GPC2CLK frequency points");
 		goto exit_vf_table;
 	}
 	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
-		&table->mclk_num_points, arb->mclk_f_points) < 0) {
+		&table->mclk_num_points, arb->mclk_f_points)) {
 		gk20a_err(dev_from_gk20a(g),
 			"failed to fetch MCLK frequency points");
 		goto exit_vf_table;
@@ -767,7 +767,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		debug->switch_std = 0;
 	} else {
 		s64 prev_avg;
-		u64 curr = (t1-t0)/1000;
+		s64 curr = (t1-t0)/1000;
 
 		debug->switch_max = curr > debug->switch_max ?
 			curr : debug->switch_max;
@@ -1014,7 +1014,7 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
 	u32 mclk_voltuv, mclk_voltuv_sram;
 	struct nvgpu_clk_vf_table *table;
-	int index;
+	u32 index;
 
 	gpc2clk_target = *gpc2clk;
 	mclk_target = *mclk;
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 2a32690d..0e9f8d3a 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -51,7 +51,7 @@ static u16 gp106_clk_get_rate(struct gk20a *g, u32 api_domain)
 {
 	struct clk_gk20a *clk = &g->clk;
 	u32 freq_khz;
-	int i;
+	u32 i;
 	struct namemap_cfg *c = NULL;
 
 	for (i = 0; i < clk->namemap_num; i++) {
-- 
cgit v1.2.2


From 1f0a38797fbd86b5f5e7f6b43d1c81b2028a82b0 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Tue, 1 Nov 2016 11:29:39 -0700
Subject: gpu: nvgpu: prevent hang on failing arbiter init

The current code can lead to application hang when
opening device on first time if the arbiter fails
to set the clocks

Also it solves a buffer overrun condition in case
the requested frequency is above the maximum range
of the VF curve

bug 1835042

Change-Id: I385401ea27d5cc4bfa41b7ca2eb3a1db53138418
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1245911
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1268061
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_arb.c | 46 ++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1b974d17..f3d6cfab 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -83,6 +83,7 @@ struct nvgpu_clk_arb {
 	struct llist_head requests;
 
 	struct gk20a *g;
+	int status;
 
 	struct nvgpu_clk_arb_target actual_pool[2];
 	struct nvgpu_clk_arb_target *actual;
@@ -269,7 +270,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 			atomic_read(&arb->req_nr));
 	} while (!atomic_read(&arb->req_nr));
 
-	return 0;
+
+	return arb->status;
 
 init_fail:
 
@@ -533,6 +535,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			clk_cur = table->mclk_points[j].mhz;
 			j++;
 			num_points++;
+
 		}
 	}
 	table->mclk_num_points = num_points;
@@ -605,7 +608,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
 	err = clk_vf_point_cache(g);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g),
-			"failed to get GPC2CLK SRAM voltage");
+			"failed to cache VF table");
 		return;
 	}
 	nvgpu_clk_arb_update_vf_table(arb);
@@ -625,7 +628,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	u32 voltuv, voltuv_sram;
 	bool mclk_set, gpc2clk_set;
 
-	int status;
+	int status = 0;
 
 	/* Temporary variables for checking target frequency */
 	u16 gpc2clk_target, mclk_target;
@@ -742,15 +745,21 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	actual->gpc2clk = gpc2clk_target;
 	actual->mclk = mclk_target;
 	arb->voltuv_actual = voltuv;
+	arb->status = status;
 
 	/* Make changes visible to other threads */
 	smp_wmb();
 	xchg(&arb->actual, actual);
 
+	/* status must be visible before atomic inc */
+	smp_wmb();
 	atomic_inc(&arb->req_nr);
 
 	wake_up_interruptible(&arb->request_wq);
 
+	if (status < 0)
+		gk20a_err(dev_from_gk20a(g),
+			"Error in arbiter update");
 
 #ifdef CONFIG_DEBUG_FS
 	g->ops.read_ptimer(g, &t1);
@@ -1016,20 +1025,23 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 	struct nvgpu_clk_vf_table *table;
 	u32 index;
 
-	gpc2clk_target = *gpc2clk;
-	mclk_target = *mclk;
-	gpc2clk_voltuv = 0;
-	gpc2clk_voltuv_sram = 0;
-	mclk_voltuv = 0;
-	mclk_voltuv_sram = 0;
-
 	do {
+		gpc2clk_target = *gpc2clk;
+		mclk_target = *mclk;
+		gpc2clk_voltuv = 0;
+		gpc2clk_voltuv_sram = 0;
+		mclk_voltuv = 0;
+		mclk_voltuv_sram = 0;
+
 		table = ACCESS_ONCE(arb->current_vf_table);
 		/* pointer to table can be updated by callback */
 		smp_rmb();
 
 		if (!table)
 			continue;
+		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
+			goto find_exit;
+
 		/* round up the freq requests */
 		for (index = 0; index < table->gpc2clk_num_points; index++) {
 			if (table->gpc2clk_points[index].mhz >=
@@ -1045,10 +1057,10 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 		}
 
 		if (index == table->gpc2clk_num_points) {
-			gpc2clk_target = table->gpc2clk_points[index].mhz;
-			gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;
+			gpc2clk_target = table->gpc2clk_points[index-1].mhz;
+			gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt;
 			gpc2clk_voltuv_sram =
-				table->gpc2clk_points[index].uvolt_sram;
+				table->gpc2clk_points[index-1].uvolt_sram;
 		}
 
 		for (index = 0; index < table->mclk_num_points; index++) {
@@ -1061,14 +1073,15 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 			}
 		}
 		if (index == table->mclk_num_points) {
-			mclk_target = table->mclk_points[index].mhz;
-			mclk_voltuv = table->mclk_points[index].uvolt;
+			mclk_target = table->mclk_points[index-1].mhz;
+			mclk_voltuv = table->mclk_points[index-1].uvolt;
 			mclk_voltuv_sram =
-				table->mclk_points[index].uvolt_sram;
+				table->mclk_points[index-1].uvolt_sram;
 		}
 	} while (!table ||
 		(ACCESS_ONCE(arb->current_vf_table) != table));
 
+find_exit:
 	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
 	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
 		gpc2clk_voltuv_sram : mclk_voltuv_sram;
@@ -1136,7 +1149,6 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 		status = clk_program_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
-
 	}
 
 	return 0;
-- 
cgit v1.2.2


From 2ebf09920755daeb7e1be71bf317f88ec9d533e1 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Thu, 27 Oct 2016 19:05:13 +0530
Subject: gpu: nvgpu: handle vf curve change due to temp

JIRA DNVGPU-129

1)send 150'c as default temperature to PMU so that PMU will
start reading temperature from sensor to evaluate VFE equations

2)Send GP106's temp min and max range for GPU sensor so that PMU
will read right temperature

3)PMU will send event whenever temperature goes above +ve hysteresis
or goes below -ve hysteresis. Call the Arbiter's VF re-evaluation
function in the event handler.

Change-Id: Iaebc0655f60e17998f0864824095f4fc8bba5b62
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1245392
(cherry picked from commit 7e59d0faa8cee6aace5524c724001e88248b2da7)
Reviewed-on: http://git-master/r/1268062
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Tested-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.h       |  2 +-
 drivers/gpu/nvgpu/gp106/therm_gp106.c |  8 ++++++++
 drivers/gpu/nvgpu/perf/perf.c         | 20 ++++++++++++++++++++
 drivers/gpu/nvgpu/perf/vfe_var.c      |  2 +-
 drivers/gpu/nvgpu/therm/thrmchannel.c |  5 ++---
 5 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 717cca9b..8355dac5 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -61,6 +61,6 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
 	struct nvgpu_clk_session *session, int *event_fd);
 
-void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g);
+void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g);
 #endif /* _CLK_ARB_H_ */
 
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c
index 15aff89c..7bdf0b9e 100644
--- a/drivers/gpu/nvgpu/gp106/therm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c
@@ -15,6 +15,12 @@
 #include <linux/debugfs.h>
 #include "hw_therm_gp106.h"
 
+static void gp106_get_internal_sensor_limits(s32 *max_24_8, s32 *min_24_8)
+{
+	*max_24_8 = (0x87 << 8);
+	*min_24_8 = ((-216) << 8);
+}
+
 static int gp106_get_internal_sensor_curr_temp(struct gk20a *g, u32 *temp_f24_8)
 {
 	int err = 0;
@@ -117,4 +123,6 @@ void gp106_init_therm_ops(struct gpu_ops *gops) {
 #endif
 	gops->therm.elcg_init_idle_filters = gp106_elcg_init_idle_filters;
 	gops->therm.get_internal_sensor_curr_temp = gp106_get_internal_sensor_curr_temp;
+	gops->therm.get_internal_sensor_limits =
+			gp106_get_internal_sensor_limits;
 }
diff --git a/drivers/gpu/nvgpu/perf/perf.c b/drivers/gpu/nvgpu/perf/perf.c
index 3821a8dc..41ebb315 100644
--- a/drivers/gpu/nvgpu/perf/perf.c
+++ b/drivers/gpu/nvgpu/perf/perf.c
@@ -16,6 +16,7 @@
 #include "pmuif/gpmuifperf.h"
 #include "pmuif/gpmuifperfvfe.h"
 #include "gk20a/pmu_gk20a.h"
+#include "clk/clk_arb.h"
 
 struct perfrpc_pmucmdhandler_params {
 	struct nv_pmu_perf_rpc *prpccall;
@@ -41,6 +42,22 @@ static void perfrpc_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
 		phandlerparams->success = 1;
 }
 
+static int pmu_handle_perf_event(struct gk20a *g, void *pmu_msg)
+{
+	struct nv_pmu_perf_msg *msg = (struct nv_pmu_perf_msg *)pmu_msg;
+
+	gk20a_dbg_fn("");
+	switch (msg->msg_type) {
+	case NV_PMU_PERF_MSG_ID_VFE_CALLBACK:
+		nvgpu_clk_arb_schedule_vf_table_update(g);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	return 0;
+}
+
 u32 perf_pmu_vfe_load(struct gk20a *g)
 {
 	struct pmu_cmd cmd;
@@ -51,6 +68,9 @@ u32 perf_pmu_vfe_load(struct gk20a *g)
 	struct nv_pmu_perf_rpc rpccall = {0};
 	struct perfrpc_pmucmdhandler_params handler = {0};
 
+	/*register call back for future VFE updates*/
+	g->ops.perf.handle_pmu_perf_event = pmu_handle_perf_event;
+
 	rpccall.function = NV_PMU_PERF_RPC_ID_VFE_LOAD;
 	rpccall.params.vfe_load.b_load = true;
 	cmd.hdr.unit_id = PMU_UNIT_PERF;
diff --git a/drivers/gpu/nvgpu/perf/vfe_var.c b/drivers/gpu/nvgpu/perf/vfe_var.c
index 90963478..4f8dc83b 100644
--- a/drivers/gpu/nvgpu/perf/vfe_var.c
+++ b/drivers/gpu/nvgpu/perf/vfe_var.c
@@ -921,7 +921,7 @@ static u32 devinit_get_vfe_var_table(struct gk20a *g,
 
 		case VBIOS_VFE_3X_VAR_ENTRY_TYPE_SINGLE_SENSED_TEMP:
 			var_type = CTRL_PERF_VFE_VAR_TYPE_SINGLE_SENSED_TEMP;
-			var_data.single_sensed_temp.temp_default = 105;
+			var_data.single_sensed_temp.temp_default = 0x9600;
 			var_data.single_sensed_temp.therm_channel_index =
 				(u8)BIOS_GET_FIELD(var.param0,
 					VBIOS_VFE_3X_VAR_ENTRY_PAR0_SSTEMP_TH_CH_IDX);
diff --git a/drivers/gpu/nvgpu/therm/thrmchannel.c b/drivers/gpu/nvgpu/therm/thrmchannel.c
index 015e065b..b5a7dfd2 100644
--- a/drivers/gpu/nvgpu/therm/thrmchannel.c
+++ b/drivers/gpu/nvgpu/therm/thrmchannel.c
@@ -74,9 +74,8 @@ static struct boardobj *construct_channel_device(struct gk20a *g,
 	pchannel = (struct therm_channel *)board_obj_ptr;
 	pchannel_device = (struct therm_channel_device *)board_obj_ptr;
 
-	pchannel->temp_min = 0;
-	pchannel->temp_max = 0;
-
+	g->ops.therm.get_internal_sensor_limits(&pchannel->temp_max,
+		&pchannel->temp_min);
 	pchannel->scaling = (1 << 8);
 	pchannel->offset = 0;
 
-- 
cgit v1.2.2


From 2f258670e40be1c92ab57b6fe77908add768ad9d Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 3 Nov 2016 22:01:32 -0700
Subject: gpu: nvgpu: p-state bound arbiter

Modification of the ARBITER clocks to be P-State aware
Up to now the arbiter just considered the whole range
of the GPC and MCLK domains, which could end up on
illegal combinations of MCLK, GPC2CLK, and set the
SYSCLK and XBARCLK domains below their minimum VCO

The following has been implemented:

(1) Modified VF tables to add which PState are
supported on each point.
(2) Return and store the current PState on the
arbiter state.
(3) Modified logic to prevent illegal combinations of
MCLK and GPC2CLK.
(4) Modified logic to prevent setting VF points for
XBAR and SYS domains below VCO limits.
(5) Modified voltage calculation to account for increased
values of XBAR and SYS on some VF points.
(6) Modified arbiter clock target logic to prevent an
application that has not requested a particular VF point
to set target to default targets.
(7) Remove unnecesary mutexes from critical path

JIRA DNVGPU-182
JIRA DNVGPU-183

Change-Id: I3d1c30903278f848681b8da833a867835acc99bb
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1247937
(cherry picked from commit b8bcc07eb3b5b70ec1ee19ace237df99d6170138)
Reviewed-on: http://git-master/r/1268063
Tested-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c     | 136 ++++++-------
 drivers/gpu/nvgpu/clk/clk.h     |  11 +-
 drivers/gpu/nvgpu/clk/clk_arb.c | 426 +++++++++++++++++++++++++++++++---------
 3 files changed, 397 insertions(+), 176 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index bec5fad1..07c80e22 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -20,6 +20,7 @@
 #include "volt/volt.h"
 #include "gk20a/pmu_gk20a.h"
 
+#define BOOT_GPC2CLK_MHZ  2581
 #define BOOT_MCLK_MHZ     3003
 
 struct clkrpc_pmucmdhandler_params {
@@ -255,7 +256,61 @@ static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid)
 	return -EINVAL;
 }
 
-int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk)
+int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk)
+{
+	int status = -EINVAL;
+
+	/*set regime ids */
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			&setfllclk->current_regime_id_gpc);
+	if (status)
+		goto done;
+
+	setfllclk->target_regime_id_gpc = find_regime_id(g,
+			CTRL_CLK_DOMAIN_GPC2CLK, setfllclk->gpc2clkmhz);
+
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
+			&setfllclk->current_regime_id_sys);
+	if (status)
+		goto done;
+
+	setfllclk->target_regime_id_sys = find_regime_id(g,
+			CTRL_CLK_DOMAIN_SYS2CLK, setfllclk->sys2clkmhz);
+
+	status = get_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
+			&setfllclk->current_regime_id_xbar);
+	if (status)
+		goto done;
+
+	setfllclk->target_regime_id_xbar = find_regime_id(g,
+			CTRL_CLK_DOMAIN_XBAR2CLK, setfllclk->xbar2clkmhz);
+
+	status = clk_pmu_vf_inject(g, setfllclk);
+
+	if (status)
+		gk20a_err(dev_from_gk20a(g),
+			"vf inject to change clk failed");
+
+	/* save regime ids */
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
+			setfllclk->target_regime_id_xbar);
+	if (status)
+		goto done;
+
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
+			setfllclk->target_regime_id_gpc);
+	if (status)
+		goto done;
+
+	status = set_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
+			setfllclk->target_regime_id_sys);
+	if (status)
+		goto done;
+done:
+	return status;
+}
+
+int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *setfllclk)
 {
 	int status = -EINVAL;
 	struct clk_domain *pdomain;
@@ -265,25 +320,14 @@ int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk)
 	struct clk_domain_3x_master *p3xmaster;
 	struct clk_domain_3x_slave *p3xslave;
 	unsigned long slaveidxmask;
-	struct set_fll_clk setfllclk;
-	bool foundxbar2clk = false;
-	bool foundsys2clk = false;
 
-	memset(&setfllclk, 0, sizeof(setfllclk));
-	if (fllclk->api_clk_domain != CTRL_CLK_DOMAIN_GPC2CLK)
-		return -EINVAL;
-	if (fllclk->voltuv == 0)
+	if (setfllclk->gpc2clkmhz == 0)
 		return -EINVAL;
-	if (fllclk->clkmhz == 0)
-		return -EINVAL;
-
-	setfllclk.voltuv = fllclk->voltuv;
-	setfllclk.gpc2clkmhz = fllclk->clkmhz;
 
 	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
 			struct clk_domain *, pdomain, i) {
 
-		if (pdomain->api_domain == fllclk->api_clk_domain) {
+		if (pdomain->api_domain == CTRL_CLK_DOMAIN_GPC2CLK) {
 
 			if (!pdomain->super.implements(g, &pdomain->super,
 				CTRL_CLK_CLK_DOMAIN_TYPE_3X_MASTER)) {
@@ -305,74 +349,20 @@ int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk)
 						pclk,
 						(struct clk_domain *)p3xslave,
 						&clkmhz,
-						fllclk->clkmhz);
+						setfllclk->gpc2clkmhz);
 				if (status) {
 					status = -EINVAL;
 					goto done;
 				}
 				if (p3xslave->super.super.super.api_domain ==
-					CTRL_CLK_DOMAIN_XBAR2CLK) {
-					setfllclk.xbar2clkmhz = clkmhz;
-					foundxbar2clk = true;
-				}
+				     CTRL_CLK_DOMAIN_XBAR2CLK)
+					setfllclk->xbar2clkmhz = clkmhz;
 				if (p3xslave->super.super.super.api_domain ==
-					CTRL_CLK_DOMAIN_SYS2CLK) {
-					setfllclk.sys2clkmhz = clkmhz;
-					foundsys2clk = true;
-				}
+				     CTRL_CLK_DOMAIN_SYS2CLK)
+					setfllclk->sys2clkmhz = clkmhz;
 			}
 		}
 	}
-	if (!(foundxbar2clk && foundsys2clk)) {
-		status = -EINVAL;
-		goto done;
-	}
-	/*set regime ids */
-	status = get_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
-			&setfllclk.current_regime_id_gpc);
-	if (status)
-		goto done;
-
-	setfllclk.target_regime_id_gpc = find_regime_id(g,
-			CTRL_CLK_DOMAIN_GPC2CLK, setfllclk.gpc2clkmhz);
-
-	status = get_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
-			&setfllclk.current_regime_id_sys);
-	if (status)
-		goto done;
-
-	setfllclk.target_regime_id_sys = find_regime_id(g,
-			CTRL_CLK_DOMAIN_SYS2CLK, setfllclk.sys2clkmhz);
-
-	status = get_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
-			&setfllclk.current_regime_id_xbar);
-	if (status)
-		goto done;
-
-	setfllclk.target_regime_id_xbar = find_regime_id(g,
-			CTRL_CLK_DOMAIN_XBAR2CLK, setfllclk.xbar2clkmhz);
-
-	status = clk_pmu_vf_inject(g, &setfllclk);
-
-	if (status)
-		gk20a_err(dev_from_gk20a(g),
-			"vf inject to change clk failed");
-
-	/* save regime ids */
-	status = set_regime_id(g, CTRL_CLK_DOMAIN_XBAR2CLK,
-			setfllclk.target_regime_id_xbar);
-	if (status)
-		goto done;
-
-	status = set_regime_id(g, CTRL_CLK_DOMAIN_GPC2CLK,
-			setfllclk.target_regime_id_gpc);
-	if (status)
-		goto done;
-
-	status = set_regime_id(g, CTRL_CLK_DOMAIN_SYS2CLK,
-			setfllclk.target_regime_id_sys);
-	if (status)
-		goto done;
 done:
 	return status;
 }
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index d0e82173..42cb9f7d 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -35,7 +35,6 @@ struct clk_pmupstate {
 	struct clk_progs clk_progobjs;
 	struct clk_vf_points clk_vf_pointobjs;
 	struct clk_mclk_state clk_mclk;
-	struct mutex changeclkmutex;
 };
 
 struct clockentry {
@@ -45,12 +44,6 @@ struct clockentry {
 		u32 api_clk_domain;
 };
 
-struct change_fll_clk {
-		u32 api_clk_domain;
-		u16 clkmhz;
-		u32 voltuv;
-};
-
 struct set_fll_clk {
 		u32 voltuv;
 		u16 gpc2clkmhz;
@@ -119,5 +112,7 @@ u32 clk_domain_get_f_points(
 	u32 *fpointscount,
 	u16 *freqpointsinmhz
 );
-int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk);
+int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk);
+int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk);
+
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index f3d6cfab..b816a570 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -41,14 +41,32 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
 static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
 static void nvgpu_clk_arb_free_fd(struct kref *refcount);
 static void nvgpu_clk_arb_free_session(struct kref *refcount);
-static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk,
-	u16 mclk, u32 voltuv, u32 voltuv_sram);
-static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
-		u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv,
-		u32 *voltuv_sram);
+static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
+	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
+	u32 voltuv_sram);
+static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
+		u16 *gpc2clk_target, u16 *sys2clk_target, u16 *xbar2clk_target,
+		u16 *mclk_target, u32 *voltuv, u32 *voltuv_sram);
+
+#define VF_POINT_INVALID_PSTATE ~0U
+#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
+#define VF_POINT_GET_PSTATE(a)	(((a)->pstates) ?\
+	__fls((a)->pstates) :\
+	VF_POINT_INVALID_PSTATE)
+#define VF_POINT_COMMON_PSTATE(a, b)	(((a)->pstates & (b)->pstates) ?\
+	__fls((a)->pstates & (b)->pstates) :\
+	VF_POINT_INVALID_PSTATE)
 
 struct nvgpu_clk_vf_point {
-	u16 mhz;
+	u16 pstates;
+	union {
+		struct {
+			u16 gpc_mhz;
+			u16 sys_mhz;
+			u16 xbar_mhz;
+		};
+		u16 mem_mhz;
+	};
 	u32 uvolt;
 	u32 uvolt_sram;
 };
@@ -72,6 +90,7 @@ struct nvgpu_clk_arb_debug {
 struct nvgpu_clk_arb_target {
 	u16 mclk;
 	u16 gpc2clk;
+	u32 pstate;
 };
 
 struct nvgpu_clk_arb {
@@ -362,9 +381,12 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	kref_init(&session->refcount);
 
 	session->zombie = false;
+	session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
+	/* make sure that the initialization of the pool is visible
+	 * before the update */
+	smp_wmb();
 	session->target = &session->target_pool[0];
-	session->target->mclk  = arb->mclk_default_mhz;
-	session->target->gpc2clk = arb->gpc2clk_default_mhz;
+
 	init_llist_head(&session->targets);
 
 	spin_lock(&arb->sessions_lock);
@@ -464,13 +486,15 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 	struct nvgpu_clk_vf_table *table;
 
 	u32 i, j;
-	int status = 0;
+	int status = -EINVAL;
 	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
 	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
 	u16 gpc2clk_min, gpc2clk_max, clk_cur;
 	u16 mclk_min, mclk_max;
 	u32 num_points;
 
+	struct clk_set_info *p5_info, *p0_info;
+
 	table = ACCESS_ONCE(arb->current_vf_table);
 	/* make flag visible when all data has resolved in the tables */
 	smp_rmb();
@@ -504,17 +528,28 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 	memset(table->gpc2clk_points, 0,
 		table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
 
+	p5_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P5, clkwhich_mclk);
+	if (!p5_info)
+		goto exit_vf_table;
+
+	p0_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich_mclk);
+	if (!p0_info)
+		goto exit_vf_table;
+
 	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
 			i < table->mclk_num_points; i++) {
+
 		if ((arb->mclk_f_points[i] >= mclk_min) &&
 			(arb->mclk_f_points[i] <= mclk_max) &&
 			(arb->mclk_f_points[i] != clk_cur)) {
 
-			table->mclk_points[j].mhz = arb->mclk_f_points[i];
+			table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
 			mclk_voltuv = mclk_voltuv_sram = 0;
 
 			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-				&table->mclk_points[j].mhz, &mclk_voltuv,
+				&table->mclk_points[j].mem_mhz, &mclk_voltuv,
 				CTRL_VOLT_DOMAIN_LOGIC);
 			if (status < 0) {
 				gk20a_err(dev_from_gk20a(g),
@@ -522,7 +557,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 				goto exit_vf_table;
 			}
 			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
-				&table->mclk_points[j].mhz, &mclk_voltuv_sram,
+				&table->mclk_points[j].mem_mhz,
+				&mclk_voltuv_sram,
 				CTRL_VOLT_DOMAIN_SRAM);
 			if (status < 0) {
 				gk20a_err(dev_from_gk20a(g),
@@ -532,7 +568,19 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 			table->mclk_points[j].uvolt = mclk_voltuv;
 			table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
-			clk_cur = table->mclk_points[j].mhz;
+			clk_cur = table->mclk_points[j].mem_mhz;
+
+			if ((clk_cur >= p5_info->min_mhz) &&
+					(clk_cur <= p5_info->max_mhz))
+				VF_POINT_SET_PSTATE_SUPPORTED(
+					&table->mclk_points[j],
+					CTRL_PERF_PSTATE_P5);
+			if ((clk_cur >= p0_info->min_mhz) &&
+					(clk_cur <= p0_info->max_mhz))
+				VF_POINT_SET_PSTATE_SUPPORTED(
+					&table->mclk_points[j],
+					CTRL_PERF_PSTATE_P0);
+
 			j++;
 			num_points++;
 
@@ -540,45 +588,187 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 	}
 	table->mclk_num_points = num_points;
 
+	p5_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
+	if (!p5_info) {
+		status = -EINVAL;
+		goto exit_vf_table;
+	}
+
+	p0_info = pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
+	if (!p0_info) {
+		status = -EINVAL;
+		goto exit_vf_table;
+	}
+
+	/* GPC2CLK needs to be checked in two passes. The first determines the
+	 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
+	 * second verifies that the clocks minimum DVCO is satisfied and sets
+	 * the voltages
+	 */
 	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
 			i < table->gpc2clk_num_points; i++) {
+		struct set_fll_clk setfllclk;
+
 		if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
 			(arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
 			(arb->gpc2clk_f_points[i] != clk_cur)) {
 
-			table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i];
-			gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
+			table->gpc2clk_points[j].gpc_mhz =
+				arb->gpc2clk_f_points[i];
 
-			status = clk_domain_get_f_or_v(g,
-				CTRL_CLK_DOMAIN_GPC2CLK,
-				&table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
-				CTRL_VOLT_DOMAIN_LOGIC);
+			setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
+			status = clk_get_fll_clks(g, &setfllclk);
 			if (status < 0) {
 				gk20a_err(dev_from_gk20a(g),
-					"failed to get GPC2CLK LOGIC voltage");
+					"failed to get GPC2CLK slave clocks");
 				goto exit_vf_table;
 			}
 
-			status = clk_domain_get_f_or_v(g,
-				CTRL_CLK_DOMAIN_GPC2CLK,
-				&table->gpc2clk_points[j].mhz,
-				&gpc2clk_voltuv_sram,
-				CTRL_VOLT_DOMAIN_SRAM);
+
+			table->gpc2clk_points[j].sys_mhz =
+				setfllclk.sys2clkmhz;
+			table->gpc2clk_points[j].xbar_mhz =
+				setfllclk.xbar2clkmhz;
+
+			clk_cur = table->gpc2clk_points[j].gpc_mhz;
+
+			if ((clk_cur >= p5_info->min_mhz) &&
+					(clk_cur <= p5_info->max_mhz))
+				VF_POINT_SET_PSTATE_SUPPORTED(
+					&table->gpc2clk_points[j],
+					CTRL_PERF_PSTATE_P5);
+			if ((clk_cur >= p0_info->min_mhz) &&
+					(clk_cur <= p0_info->max_mhz))
+				VF_POINT_SET_PSTATE_SUPPORTED(
+					&table->gpc2clk_points[j],
+					CTRL_PERF_PSTATE_P0);
+
+			j++;
+			num_points++;
+		}
+	}
+	table->gpc2clk_num_points = num_points;
+
+	/* Second pass */
+	for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
+		struct set_fll_clk setfllclk;
+
+		u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
+		gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
+
+		/* Check sysclk */
+		p5_info = pstate_get_clk_set_info(g,
+			VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
+			clkwhich_sys2clk);
+		if (!p5_info) {
+			status = -EINVAL;
+			goto exit_vf_table;
+		}
+		/* sys2clk below DVCO min, need to find correct clock */
+		if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
+			for (j = i + 1; j < table->gpc2clk_num_points; j++) {
+
+				if (table->gpc2clk_points[j].sys_mhz >=
+							p5_info->min_mhz) {
+
+					table->gpc2clk_points[i].sys_mhz =
+						table->gpc2clk_points[j].
+									sys_mhz;
+
+					alt_gpc2clk = alt_gpc2clk <
+						table->gpc2clk_points[j].
+								gpc_mhz ?
+						table->gpc2clk_points[j].
+									gpc_mhz:
+						alt_gpc2clk;
+					break;
+				}
+			}
+			/* no VF exists that satisfies condition */
+			if (j == table->gpc2clk_num_points) {
+				status = -EINVAL;
+				goto exit_vf_table;
+			}
+		}
+
+		/* Check xbarclk */
+		p5_info = pstate_get_clk_set_info(g,
+			VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
+			clkwhich_xbar2clk);
+		if (!p5_info) {
+			status = -EINVAL;
+			goto exit_vf_table;
+		}
+
+		/* xbar2clk below DVCO min, need to find correct clock */
+		if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
+			for (j = i; j < table->gpc2clk_num_points; j++) {
+				if (table->gpc2clk_points[j].xbar_mhz >=
+							p5_info->min_mhz) {
+
+					table->gpc2clk_points[i].xbar_mhz =
+						table->gpc2clk_points[j].
+								xbar_mhz;
+					alt_gpc2clk = alt_gpc2clk <
+						table->gpc2clk_points[j].
+								gpc_mhz ?
+						table->gpc2clk_points[j].
+									gpc_mhz:
+						alt_gpc2clk;
+					break;
+				}
+			}
+			/* no VF exists that satisfies condition */
+			if (j == table->gpc2clk_num_points) {
+				status = -EINVAL;
+
+				goto exit_vf_table;
+			}
+		}
+
+		/* alternate gpc2clk clock has been requested, we need to
+		 * calculate new ratios */
+		if (alt_gpc2clk != table->gpc2clk_points[i].gpc_mhz) {
+			setfllclk.gpc2clkmhz = alt_gpc2clk;
+
+			status = clk_get_fll_clks(g, &setfllclk);
 			if (status < 0) {
 				gk20a_err(dev_from_gk20a(g),
-					"failed to get GPC2CLK SRAM voltage");
+					"failed to get GPC2CLK slave clocks");
 				goto exit_vf_table;
 			}
 
-			table->gpc2clk_points[j].uvolt = gpc2clk_voltuv;
-			table->gpc2clk_points[j].uvolt_sram =
-				gpc2clk_voltuv_sram;
-			clk_cur = table->gpc2clk_points[j].mhz;
-			j++;
-			num_points++;
+			table->gpc2clk_points[i].sys_mhz =
+				setfllclk.sys2clkmhz;
+			table->gpc2clk_points[i].xbar_mhz =
+				setfllclk.xbar2clkmhz;
+		}
+
+		/* Calculate voltages */
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+						&alt_gpc2clk, &gpc2clk_voltuv,
+						CTRL_VOLT_DOMAIN_LOGIC);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get GPC2CLK LOGIC voltage");
+			goto exit_vf_table;
+		}
+
+		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
+						&alt_gpc2clk,
+						&gpc2clk_voltuv_sram,
+						CTRL_VOLT_DOMAIN_SRAM);
+		if (status < 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get GPC2CLK SRAM voltage");
+			goto exit_vf_table;
 		}
+
+		table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
+		table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
 	}
-	table->gpc2clk_num_points = num_points;
 
 	/* make table visible when all data has resolved in the tables */
 	smp_wmb();
@@ -625,13 +815,14 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	struct gk20a *g = arb->g;
 	struct llist_node *head;
 
+	u32 pstate = VF_POINT_INVALID_PSTATE;
 	u32 voltuv, voltuv_sram;
 	bool mclk_set, gpc2clk_set;
 
 	int status = 0;
 
 	/* Temporary variables for checking target frequency */
-	u16 gpc2clk_target, mclk_target;
+	u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
 
 #ifdef CONFIG_DEBUG_FS
 	u64 t0, t1;
@@ -699,29 +890,25 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	rcu_read_unlock();
 
 	gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
-		arb->actual->gpc2clk ? gpc2clk_target :
-		arb->gpc2clk_default_mhz;
-
-	mclk_target = (mclk_target > 0) ? mclk_target :
-		arb->actual->mclk ? mclk_target :
-		arb->mclk_default_mhz;
-
-	if (!gpc2clk_target && !mclk_target) {
-		mclk_target = arb->mclk_default_mhz;
-		gpc2clk_target = arb->gpc2clk_default_mhz;
-	}
-
-	if (!gpc2clk_target)
-		gpc2clk_target = arb->actual->mclk;
-
-	if (!mclk_target)
-		mclk_target = arb->actual->mclk;
+			arb->gpc2clk_default_mhz;
 
+	mclk_target = (mclk_target > 0) ? mclk_target:
+			arb->mclk_default_mhz;
 
+	sys2clk_target = 0;
+	xbar2clk_target = 0;
 	/* Query the table for the closest vf point to program */
-	nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv,
+	pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
+		&sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
 		&voltuv_sram);
 
+	if (pstate == VF_POINT_INVALID_PSTATE) {
+		arb->status = -EINVAL;
+		/* make status visible */
+		smp_mb();
+		goto exit_arb;
+	}
+
 	if ((arb->actual->gpc2clk == gpc2clk_target) &&
 		(arb->actual->mclk == mclk_target) &&
 		(arb->voltuv_actual == voltuv)) {
@@ -731,12 +918,17 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	/* Program clocks */
 	/* A change in both mclk of gpc2clk may require a change in voltage */
 
-	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target,
-		voltuv, voltuv_sram);
+	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
+		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+		voltuv_sram);
 
-	if (status < 0)
-		goto exit_arb;
+	if (status < 0) {
+		arb->status = status;
+		/* make status visible */
+		smp_mb();
 
+		goto exit_arb;
+	}
 	actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
 			&arb->actual_pool[1] : &arb->actual_pool[0];
 
@@ -745,6 +937,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	actual->gpc2clk = gpc2clk_target;
 	actual->mclk = mclk_target;
 	arb->voltuv_actual = voltuv;
+	actual->pstate = pstate;
 	arb->status = status;
 
 	/* Make changes visible to other threads */
@@ -1015,15 +1208,17 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 	return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
 }
 
-static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
-		u16 *gpc2clk, u16 *mclk, u32 *voltuv,
-		u32 *voltuv_sram)
+static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
+		u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
+		u32 *voltuv, u32 *voltuv_sram)
 {
 	u16 gpc2clk_target, mclk_target;
 	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
 	u32 mclk_voltuv, mclk_voltuv_sram;
+	u32 pstate = VF_POINT_INVALID_PSTATE;
 	struct nvgpu_clk_vf_table *table;
-	u32 index;
+	u32 index, index_mclk;
+	struct nvgpu_clk_vf_point *mclk_vf = NULL;
 
 	do {
 		gpc2clk_target = *gpc2clk;
@@ -1042,12 +1237,39 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
 			goto find_exit;
 
+		/* First we check MCLK to find out which PSTATE we are
+		 * are requesting, and from there try to find the minimum
+		 * GPC2CLK on the same PSTATE that satisfies the request.
+		 * If no GPC2CLK can be found, then we need to up the PSTATE
+		 */
+
+recalculate_vf_point:
+		for (index = 0; index < table->mclk_num_points; index++) {
+			if (table->mclk_points[index].mem_mhz >= mclk_target) {
+				mclk_vf = &table->mclk_points[index];
+				break;
+			}
+		}
+		if (index == table->mclk_num_points) {
+			mclk_vf = &table->mclk_points[index-1];
+		}
+		index_mclk = index;
+
 		/* round up the freq requests */
 		for (index = 0; index < table->gpc2clk_num_points; index++) {
-			if (table->gpc2clk_points[index].mhz >=
-			gpc2clk_target) {
+			pstate = VF_POINT_COMMON_PSTATE(
+					&table->gpc2clk_points[index], mclk_vf);
+
+			if ((table->gpc2clk_points[index].gpc_mhz >=
+							gpc2clk_target) &&
+					(pstate != VF_POINT_INVALID_PSTATE)){
 				gpc2clk_target =
-					table->gpc2clk_points[index].mhz;
+					table->gpc2clk_points[index].gpc_mhz;
+				*sys2clk =
+					table->gpc2clk_points[index].sys_mhz;
+				*xbar2clk =
+					table->gpc2clk_points[index].xbar_mhz;
+
 				gpc2clk_voltuv =
 					table->gpc2clk_points[index].uvolt;
 				gpc2clk_voltuv_sram =
@@ -1057,27 +1279,42 @@ static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 		}
 
 		if (index == table->gpc2clk_num_points) {
-			gpc2clk_target = table->gpc2clk_points[index-1].mhz;
-			gpc2clk_voltuv = table->gpc2clk_points[index-1].uvolt;
-			gpc2clk_voltuv_sram =
-				table->gpc2clk_points[index-1].uvolt_sram;
-		}
+			pstate = VF_POINT_COMMON_PSTATE(
+				&table->gpc2clk_points[index-1], mclk_vf);
+			if (pstate != VF_POINT_INVALID_PSTATE) {
+				gpc2clk_target =
+					table->gpc2clk_points[index-1].gpc_mhz;
+				*sys2clk =
+					table->gpc2clk_points[index-1].sys_mhz;
+				*xbar2clk  =
+					table->gpc2clk_points[index-1].xbar_mhz;
 
-		for (index = 0; index < table->mclk_num_points; index++) {
-			if (table->mclk_points[index].mhz >= mclk_target) {
-				mclk_target = table->mclk_points[index].mhz;
-				mclk_voltuv = table->mclk_points[index].uvolt;
-				mclk_voltuv_sram =
-					table->mclk_points[index].uvolt_sram;
-				break;
+				gpc2clk_voltuv =
+					table->gpc2clk_points[index-1].uvolt;
+				gpc2clk_voltuv_sram =
+					table->gpc2clk_points[index-1].
+						uvolt_sram;
+			} else if (index_mclk == table->mclk_num_points - 1) {
+				/* There is no available combination of MCLK
+				 * and GPC2CLK, we need to fail this
+				 */
+				gpc2clk_target = 0;
+				mclk_target = 0;
+				pstate = VF_POINT_INVALID_PSTATE;
+				goto find_exit;
+			} else {
+				/* recalculate with higher PSTATE */
+				gpc2clk_target = *gpc2clk;
+				mclk_target = table->mclk_points[index_mclk+1].
+									mem_mhz;
+				goto recalculate_vf_point;
 			}
 		}
-		if (index == table->mclk_num_points) {
-			mclk_target = table->mclk_points[index-1].mhz;
-			mclk_voltuv = table->mclk_points[index-1].uvolt;
-			mclk_voltuv_sram =
-				table->mclk_points[index-1].uvolt_sram;
-		}
+
+		mclk_target = mclk_vf->mem_mhz;
+		mclk_voltuv = mclk_vf->uvolt;
+		mclk_voltuv_sram = mclk_vf->uvolt_sram;
+
 	} while (!table ||
 		(ACCESS_ONCE(arb->current_vf_table) != table));
 
@@ -1088,15 +1325,23 @@ find_exit:
 
 	*gpc2clk = gpc2clk_target;
 	*mclk = mclk_target;
+	return pstate;
 }
 
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
-	u16 mclk_target, u32 voltuv, u32 voltuv_sram)
+	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
+	u32 voltuv_sram)
 {
-	struct change_fll_clk fllclk;
+	struct set_fll_clk fllclk;
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 	int status;
 
+	fllclk.gpc2clkmhz = gpc2clk_target;
+	fllclk.sys2clkmhz = sys2clk_target;
+	fllclk.xbar2clkmhz = xbar2clk_target;
+
+	fllclk.voltuv = voltuv;
+
 	/* if voltage ascends we do:
 	 * (1) FLL change
 	 * (2) Voltage change
@@ -1117,17 +1362,11 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 		if (status < 0)
 			return status;
 
-		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-		fllclk.clkmhz = gpc2clk_target;
-		fllclk.voltuv = voltuv;
-		status = clk_program_fll_clks(g, &fllclk);
+		status = clk_set_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
 	} else if (voltuv > arb->voltuv_actual) {
-		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-		fllclk.clkmhz = gpc2clk_target;
-		fllclk.voltuv = voltuv;
-		status = clk_program_fll_clks(g, &fllclk);
+		status = clk_set_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
 
@@ -1143,10 +1382,7 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 		if (status < 0)
 			return status;
 
-		fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
-		fllclk.clkmhz = gpc2clk_target;
-		fllclk.voltuv = voltuv;
-		status = clk_program_fll_clks(g, &fllclk);
+		status = clk_set_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
 	}
-- 
cgit v1.2.2


From 30bf630bded30376a929345247b134b60db1c9f5 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 31 Oct 2016 20:19:07 +0530
Subject: gpu: nvgpu: fn to send noise-unaware vmin to pmu

JIRA DNVGPU-184

implement a function which takes noise unaware vmin for
logic and sram rails as input and sends them to pmu via RPC

Change-Id: Ic0d72daf99870477d4dbd17e1c609dd0c39f8197
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1248210
(cherry picked from commit 2ad833c1edf65ada6c72b56ecd3551e7c4d396f6)
Reviewed-on: http://git-master/r/1270885
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/volt/volt_pmu.c  | 43 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/volt/volt_pmu.h  |  3 ++-
 drivers/gpu/nvgpu/volt/volt_rail.h |  2 ++
 3 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.c b/drivers/gpu/nvgpu/volt/volt_pmu.c
index a92eb777..4e7f73c9 100644
--- a/drivers/gpu/nvgpu/volt/volt_pmu.c
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.c
@@ -231,3 +231,46 @@ u32 volt_get_voltage(struct gk20a *g, u32 volt_domain, u32 *voltage_uv)
 {
 	return volt_rail_get_voltage(g, volt_domain, voltage_uv);
 }
+
+static int volt_policy_set_noiseaware_vmin(struct gk20a *g,
+		struct ctrl_volt_volt_rail_list *prail_list)
+{
+	struct nv_pmu_volt_rpc rpc_call = { 0 };
+	u32 status = 0;
+
+	/* Set RPC parameters. */
+	rpc_call.function = NV_PMU_VOLT_RPC_ID_VOLT_RAIL_SET_NOISE_UNAWARE_VMIN;
+	rpc_call.params.volt_rail_set_noise_unaware_vmin.num_rails =
+			prail_list->num_rails;
+	memcpy(&rpc_call.params.volt_rail_set_noise_unaware_vmin.rail_list,
+		prail_list, (sizeof(struct ctrl_volt_volt_rail_list)));
+
+	/* Execute the voltage change request via PMU RPC. */
+	status = volt_pmu_rpc_execute(g, &rpc_call);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"Error while executing VOLT_POLICY_SET_VOLTAGE RPC");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int volt_set_noiseaware_vmin(struct gk20a *g, u32 logic_voltage_uv,
+	u32 sram_voltage_uv)
+{
+	int status = 0;
+	struct ctrl_volt_volt_rail_list rail_list = { 0 };
+
+	rail_list.num_rails = RAIL_COUNT;
+	rail_list.rails[0].rail_idx = 0;
+	rail_list.rails[0].voltage_uv = logic_voltage_uv;
+	rail_list.rails[1].rail_idx = 1;
+	rail_list.rails[1].voltage_uv = sram_voltage_uv;
+
+	status = volt_policy_set_noiseaware_vmin(g, &rail_list);
+
+	return status;
+
+}
+
diff --git a/drivers/gpu/nvgpu/volt/volt_pmu.h b/drivers/gpu/nvgpu/volt/volt_pmu.h
index 9af3fb68..7e639375 100644
--- a/drivers/gpu/nvgpu/volt/volt_pmu.h
+++ b/drivers/gpu/nvgpu/volt/volt_pmu.h
@@ -18,5 +18,6 @@ u32 volt_pmu_send_load_cmd_to_pmu(struct gk20a *g);
 u32 volt_set_voltage(struct gk20a *g, u32 logic_voltage_uv,
 		u32 sram_voltage_uv);
 u32 volt_get_voltage(struct gk20a *g, u32 volt_domain, u32 *voltage_uv);
-
+int volt_set_noiseaware_vmin(struct gk20a *g, u32 logic_voltage_uv,
+		u32 sram_voltage_uv);
 #endif
diff --git a/drivers/gpu/nvgpu/volt/volt_rail.h b/drivers/gpu/nvgpu/volt/volt_rail.h
index 0180992c..8b930010 100644
--- a/drivers/gpu/nvgpu/volt/volt_rail.h
+++ b/drivers/gpu/nvgpu/volt/volt_rail.h
@@ -61,6 +61,8 @@ struct voltage_rail_metadata {
 	u8 volt_domain_hal;
 	u8 pct_delta;
 	u32 ext_rel_delta_uv[CTRL_VOLT_RAIL_VOLT_DELTA_MAX_ENTRIES];
+	u8 logic_rail_idx;
+	u8 sram_rail_idx;
 	struct boardobjgrp_e32 volt_rails;
 };
 
-- 
cgit v1.2.2


From 8cc67f60644a117eec868fc1b11da9a60d2915d7 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Fri, 4 Nov 2016 16:47:51 +0530
Subject: gpu: nvgpu: add clock freq controller support

JIRA DNVGPU-170

1) Add clock frequency controller VBIOS structure definitions

2) Parse VBIOS tables and build boardobj structures for clock frequency
controller.

3) send clock frequency controller data structures to PMU

4) implement public function to send load/unload command to pmu
to enable/disable clock frequency controller support

Change-Id: I2f37f6a94f342b6fcc71bb802e6e440a0a454486
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1248209
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
(cherry picked from commit ed3e27933f21e10b3d7a5257f1b751526945bd07)
Reviewed-on: http://git-master/r/1270897
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x       |   1 +
 drivers/gpu/nvgpu/clk/clk.c                 |  72 +++++
 drivers/gpu/nvgpu/clk/clk.h                 |   4 +-
 drivers/gpu/nvgpu/clk/clk_freq_controller.c | 454 ++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/clk/clk_freq_controller.h |  74 +++++
 drivers/gpu/nvgpu/include/bios.h            |  83 +++++
 drivers/gpu/nvgpu/pstate/pstate.c           |   9 +
 7 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nvgpu/clk/clk_freq_controller.c
 create mode 100644 drivers/gpu/nvgpu/clk/clk_freq_controller.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index a096a438..9e08e2c6 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -39,6 +39,7 @@ nvgpu-y += \
 	$(nvgpu-t18x)/clk/clk_prog.o \
 	$(nvgpu-t18x)/clk/clk_vf_point.o \
 	$(nvgpu-t18x)/clk/clk_arb.o \
+	$(nvgpu-t18x)/clk/clk_freq_controller.o \
 	$(nvgpu-t18x)/perf/vfe_var.o \
 	$(nvgpu-t18x)/perf/vfe_equ.o \
 	$(nvgpu-t18x)/perf/perf.o \
diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index 07c80e22..dffbefec 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -47,6 +47,78 @@ static void clkrpc_pmucmdhandler(struct gk20a *g, struct pmu_msg *msg,
 		phandlerparams->success = 1;
 }
 
+int clk_pmu_freq_controller_load(struct gk20a *g, bool bload)
+{
+	struct pmu_cmd cmd;
+	struct pmu_msg msg;
+	struct pmu_payload payload = { {0} };
+	u32 status;
+	u32 seqdesc;
+	struct nv_pmu_clk_rpc rpccall = {0};
+	struct clkrpc_pmucmdhandler_params handler = {0};
+	struct nv_pmu_clk_load *clkload;
+	struct clk_freq_controllers *pclk_freq_controllers;
+	struct ctrl_boardobjgrp_mask_e32 *load_mask;
+
+	pclk_freq_controllers = &g->clk_pmu.clk_freq_controllers;
+	rpccall.function = NV_PMU_CLK_RPC_ID_LOAD;
+	clkload = &rpccall.params.clk_load;
+	clkload->feature = NV_NV_PMU_CLK_LOAD_FEATURE_FREQ_CONTROLLER;
+	clkload->action_mask = bload ?
+		NV_NV_PMU_CLK_LOAD_ACTION_MASK_FREQ_CONTROLLER_CALLBACK_YES :
+		NV_NV_PMU_CLK_LOAD_ACTION_MASK_FREQ_CONTROLLER_CALLBACK_NO;
+
+	load_mask = &rpccall.params.clk_load.payload.freq_controllers.load_mask;
+
+	status = boardobjgrpmask_export(
+		&pclk_freq_controllers->freq_ctrl_load_mask.super,
+		pclk_freq_controllers->freq_ctrl_load_mask.super.bitcount,
+		&load_mask->super);
+
+	cmd.hdr.unit_id = PMU_UNIT_CLK;
+	cmd.hdr.size =  (u32)sizeof(struct nv_pmu_clk_cmd) +
+			(u32)sizeof(struct pmu_hdr);
+
+	cmd.cmd.clk.cmd_type = NV_PMU_CLK_CMD_ID_RPC;
+	msg.hdr.size = sizeof(struct pmu_msg);
+
+	payload.in.buf = (u8 *)&rpccall;
+	payload.in.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.in.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.in.offset = NV_PMU_CLK_CMD_RPC_ALLOC_OFFSET;
+
+	payload.out.buf = (u8 *)&rpccall;
+	payload.out.size = (u32)sizeof(struct nv_pmu_clk_rpc);
+	payload.out.fb_size = PMU_CMD_SUBMIT_PAYLOAD_PARAMS_FB_SIZE_UNUSED;
+	payload.out.offset = NV_PMU_CLK_MSG_RPC_ALLOC_OFFSET;
+
+	handler.prpccall = &rpccall;
+	handler.success = 0;
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, &payload,
+			PMU_COMMAND_QUEUE_LPQ,
+			clkrpc_pmucmdhandler, (void *)&handler,
+			&seqdesc, ~0);
+
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"unable to post clk RPC cmd %x",
+			cmd.cmd.clk.cmd_type);
+		goto done;
+	}
+
+	pmu_wait_message_cond(&g->pmu,
+			gk20a_get_gr_idle_timeout(g),
+			&handler.success, 1);
+
+	if (handler.success == 0) {
+		gk20a_err(dev_from_gk20a(g), "rpc call to load freq cntlr cal failed");
+		status = -EINVAL;
+	}
+
+done:
+	return status;
+}
+
 u32 clk_pmu_vin_load(struct gk20a *g)
 {
 	struct pmu_cmd cmd;
diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h
index 42cb9f7d..b173a09e 100644
--- a/drivers/gpu/nvgpu/clk/clk.h
+++ b/drivers/gpu/nvgpu/clk/clk.h
@@ -21,6 +21,7 @@
 #include "clk_prog.h"
 #include "clk_vf_point.h"
 #include "clk_mclk.h"
+#include "clk_freq_controller.h"
 #include "gk20a/gk20a.h"
 
 #define NV_PERF_DOMAIN_4X_CLOCK_DOMAIN_SKIP 0x10
@@ -35,6 +36,7 @@ struct clk_pmupstate {
 	struct clk_progs clk_progobjs;
 	struct clk_vf_points clk_vf_pointobjs;
 	struct clk_mclk_state clk_mclk;
+	struct clk_freq_controllers clk_freq_controllers;
 };
 
 struct clockentry {
@@ -114,5 +116,5 @@ u32 clk_domain_get_f_points(
 );
 int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk);
 int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk);
-
+int clk_pmu_freq_controller_load(struct gk20a *g, bool bload);
 #endif
diff --git a/drivers/gpu/nvgpu/clk/clk_freq_controller.c b/drivers/gpu/nvgpu/clk/clk_freq_controller.c
new file mode 100644
index 00000000..17f79168
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_freq_controller.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "clk.h"
+#include "clk_fll.h"
+#include "clk_domain.h"
+#include "clk_freq_controller.h"
+#include "include/bios.h"
+#include "boardobj/boardobjgrp.h"
+#include "boardobj/boardobjgrp_e32.h"
+#include "pmuif/gpmuifboardobj.h"
+#include "pmuif/gpmuifclk.h"
+#include "gm206/bios_gm206.h"
+#include "ctrl/ctrlclk.h"
+#include "ctrl/ctrlvolt.h"
+#include "gk20a/pmu_gk20a.h"
+
+static u32 clk_freq_controller_pmudatainit_super(struct gk20a *g,
+	struct boardobj *board_obj_ptr,
+	struct nv_pmu_boardobj *ppmudata)
+{
+	struct nv_pmu_clk_clk_freq_controller_boardobj_set *pfreq_cntlr_set;
+	struct clk_freq_controller *pfreq_cntlr;
+	u32 status = 0;
+
+	status = boardobj_pmudatainit_super(g, board_obj_ptr, ppmudata);
+	if (status)
+		return status;
+
+	pfreq_cntlr_set =
+		(struct nv_pmu_clk_clk_freq_controller_boardobj_set *)ppmudata;
+	pfreq_cntlr    = (struct clk_freq_controller *)board_obj_ptr;
+
+	pfreq_cntlr_set->controller_id = pfreq_cntlr->controller_id;
+	pfreq_cntlr_set->clk_domain = pfreq_cntlr->clk_domain;
+	pfreq_cntlr_set->parts_freq_mode = pfreq_cntlr->parts_freq_mode;
+	pfreq_cntlr_set->bdisable = pfreq_cntlr->bdisable;
+	pfreq_cntlr_set->freq_cap_noise_unaware_vmin_above =
+		pfreq_cntlr->freq_cap_noise_unaware_vmin_above;
+	pfreq_cntlr_set->freq_cap_noise_unaware_vmin_below =
+		pfreq_cntlr->freq_cap_noise_unaware_vmin_below;
+	pfreq_cntlr_set->freq_hyst_pos_mhz = pfreq_cntlr->freq_hyst_pos_mhz;
+	pfreq_cntlr_set->freq_hyst_neg_mhz = pfreq_cntlr->freq_hyst_neg_mhz;
+
+	return status;
+}
+
+static u32 clk_freq_controller_pmudatainit_pi(struct gk20a *g,
+	struct boardobj *board_obj_ptr,
+	struct nv_pmu_boardobj *ppmudata)
+{
+	struct nv_pmu_clk_clk_freq_controller_pi_boardobj_set
+		*pfreq_cntlr_pi_set;
+	struct clk_freq_controller_pi *pfreq_cntlr_pi;
+	u32 status = 0;
+
+	status = clk_freq_controller_pmudatainit_super(g,
+		board_obj_ptr, ppmudata);
+	if (status)
+		return -1;
+
+	pfreq_cntlr_pi_set =
+		(struct nv_pmu_clk_clk_freq_controller_pi_boardobj_set *)
+		ppmudata;
+	pfreq_cntlr_pi = (struct clk_freq_controller_pi *)board_obj_ptr;
+
+	pfreq_cntlr_pi_set->prop_gain = pfreq_cntlr_pi->prop_gain;
+	pfreq_cntlr_pi_set->integ_gain = pfreq_cntlr_pi->integ_gain;
+	pfreq_cntlr_pi_set->integ_decay = pfreq_cntlr_pi->integ_decay;
+	pfreq_cntlr_pi_set->volt_delta_min = pfreq_cntlr_pi->volt_delta_min;
+	pfreq_cntlr_pi_set->volt_delta_max = pfreq_cntlr_pi->volt_delta_max;
+	pfreq_cntlr_pi_set->slowdown_pct_min = pfreq_cntlr_pi->slowdown_pct_min;
+	pfreq_cntlr_pi_set->bpoison = pfreq_cntlr_pi->bpoison;
+
+	return status;
+}
+
+static u32 clk_freq_controller_construct_super(struct gk20a *g,
+	struct boardobj **ppboardobj,
+	u16 size, void *pargs)
+{
+	struct clk_freq_controller *pfreq_cntlr = NULL;
+	struct clk_freq_controller *pfreq_cntlr_tmp = NULL;
+	u32 status = 0;
+
+	status = boardobj_construct_super(g, ppboardobj, size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pfreq_cntlr_tmp = (struct clk_freq_controller *)pargs;
+	pfreq_cntlr = (struct clk_freq_controller *)*ppboardobj;
+
+	pfreq_cntlr->super.pmudatainit = clk_freq_controller_pmudatainit_super;
+
+	pfreq_cntlr->controller_id   = pfreq_cntlr_tmp->controller_id;
+	pfreq_cntlr->clk_domain      = pfreq_cntlr_tmp->clk_domain;
+	pfreq_cntlr->parts_freq_mode  = pfreq_cntlr_tmp->parts_freq_mode;
+	pfreq_cntlr->freq_cap_noise_unaware_vmin_above  =
+		pfreq_cntlr_tmp->freq_cap_noise_unaware_vmin_above;
+	pfreq_cntlr->freq_cap_noise_unaware_vmin_below  =
+		pfreq_cntlr_tmp->freq_cap_noise_unaware_vmin_below;
+	pfreq_cntlr->freq_hyst_pos_mhz = pfreq_cntlr_tmp->freq_hyst_pos_mhz;
+	pfreq_cntlr->freq_hyst_neg_mhz = pfreq_cntlr_tmp->freq_hyst_neg_mhz;
+
+	return status;
+}
+
+static u32 clk_freq_controller_construct_pi(struct gk20a *g,
+	struct boardobj **ppboardobj,
+	u16 size, void *pargs)
+{
+	struct clk_freq_controller_pi *pfreq_cntlr_pi = NULL;
+	struct clk_freq_controller_pi *pfreq_cntlr_pi_tmp = NULL;
+	u32 status = 0;
+
+	status = clk_freq_controller_construct_super(g, ppboardobj,
+			size, pargs);
+	if (status)
+		return -EINVAL;
+
+	pfreq_cntlr_pi = (struct clk_freq_controller_pi *)*ppboardobj;
+	pfreq_cntlr_pi_tmp = (struct clk_freq_controller_pi *)pargs;
+
+	pfreq_cntlr_pi->super.super.pmudatainit =
+		clk_freq_controller_pmudatainit_pi;
+
+	pfreq_cntlr_pi->prop_gain = pfreq_cntlr_pi_tmp->prop_gain;
+	pfreq_cntlr_pi->integ_gain = pfreq_cntlr_pi_tmp->integ_gain;
+	pfreq_cntlr_pi->integ_decay = pfreq_cntlr_pi_tmp->integ_decay;
+	pfreq_cntlr_pi->volt_delta_min = pfreq_cntlr_pi_tmp->volt_delta_min;
+	pfreq_cntlr_pi->volt_delta_max = pfreq_cntlr_pi_tmp->volt_delta_max;
+	pfreq_cntlr_pi->slowdown_pct_min = pfreq_cntlr_pi_tmp->slowdown_pct_min;
+	pfreq_cntlr_pi->bpoison = pfreq_cntlr_pi_tmp->bpoison;
+
+	return status;
+}
+
+struct clk_freq_controller *clk_clk_freq_controller_construct(struct gk20a *g,
+	void *pargs)
+{
+	struct boardobj *board_obj_ptr = NULL;
+	u32 status = 0;
+
+	if (BOARDOBJ_GET_TYPE(pargs) != CTRL_CLK_CLK_FREQ_CONTROLLER_TYPE_PI)
+		return NULL;
+
+	status = clk_freq_controller_construct_pi(g, &board_obj_ptr,
+				sizeof(struct clk_freq_controller_pi), pargs);
+	if (status)
+		return NULL;
+
+	return (struct clk_freq_controller *)board_obj_ptr;
+}
+
+
+static u32 clk_get_freq_controller_table(struct gk20a *g,
+		struct clk_freq_controllers *pclk_freq_controllers)
+{
+	u32 status = 0;
+	u8 *pfreq_controller_table_ptr = NULL;
+	struct vbios_fct_1x_header header = { 0 };
+	struct vbios_fct_1x_entry entry  = { 0 };
+	u8 entry_idx;
+	u8 *entry_offset;
+	u32 freq_controller_id;
+	struct clk_freq_controller *pclk_freq_cntr = NULL;
+	struct clk_freq_controller *ptmp_freq_cntr = NULL;
+	struct clk_freq_controller_pi *ptmp_freq_cntr_pi = NULL;
+	struct clk_domain *pclk_domain;
+
+	struct freq_controller_data_type {
+		union {
+			struct boardobj board_obj;
+			struct clk_freq_controller freq_controller;
+			struct clk_freq_controller_pi  freq_controller_pi;
+		};
+	} freq_controller_data;
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		pfreq_controller_table_ptr =
+			(u8 *)g->ops.bios.get_perf_table_ptrs(g,
+				g->bios.clock_token,
+				FREQUENCY_CONTROLLER_TABLE);
+		if (pfreq_controller_table_ptr == NULL) {
+			status = -EINVAL;
+			goto done;
+		}
+	} else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	memcpy(&header, pfreq_controller_table_ptr,
+			sizeof(struct vbios_fct_1x_header));
+
+	pclk_freq_controllers->sampling_period_ms = header.sampling_period_ms;
+	pclk_freq_controllers->volt_policy_idx = 0;
+
+	/* Read in the entries. */
+	for (entry_idx = 0; entry_idx < header.entry_count; entry_idx++) {
+		entry_offset = (pfreq_controller_table_ptr +
+			header.header_size + (entry_idx * header.entry_size));
+
+		memset(&freq_controller_data, 0x0,
+				sizeof(struct freq_controller_data_type));
+		ptmp_freq_cntr = &freq_controller_data.freq_controller;
+		ptmp_freq_cntr_pi = &freq_controller_data.freq_controller_pi;
+
+		memcpy(&entry, entry_offset,
+			sizeof(struct vbios_fct_1x_entry));
+
+		if (!BIOS_GET_FIELD(entry.flags0,
+				NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE))
+			continue;
+
+		freq_controller_data.board_obj.type = (u8)BIOS_GET_FIELD(
+			entry.flags0, NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE);
+
+		ptmp_freq_cntr->controller_id =
+			(u8)BIOS_GET_FIELD(entry.param0,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID);
+
+		freq_controller_id = ptmp_freq_cntr->controller_id;
+
+		pclk_domain = CLK_CLK_DOMAIN_GET((&g->clk_pmu),
+				(u32)entry.clk_domain_idx);
+		freq_controller_data.freq_controller.clk_domain =
+			pclk_domain->api_domain;
+
+		ptmp_freq_cntr->parts_freq_mode =
+			(u8)BIOS_GET_FIELD(entry.param0,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE);
+
+		/* Populate PI specific data */
+		ptmp_freq_cntr_pi->slowdown_pct_min =
+			(u8)BIOS_GET_FIELD(entry.param1,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM1_SLOWDOWN_PCT_MIN);
+
+		ptmp_freq_cntr_pi->bpoison =
+			BIOS_GET_FIELD(entry.param1,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM1_POISON);
+
+		ptmp_freq_cntr_pi->prop_gain =
+			(s32)BIOS_GET_FIELD(entry.param2,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM2_PROP_GAIN);
+
+		ptmp_freq_cntr_pi->integ_gain =
+			(s32)BIOS_GET_FIELD(entry.param3,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM3_INTEG_GAIN);
+
+		ptmp_freq_cntr_pi->integ_decay =
+			(s32)BIOS_GET_FIELD(entry.param4,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM4_INTEG_DECAY);
+
+		ptmp_freq_cntr_pi->volt_delta_min =
+		(s32)BIOS_GET_FIELD(entry.param5,
+			NV_VBIOS_FCT_1X_ENTRY_PARAM5_VOLT_DELTA_MIN);
+
+		ptmp_freq_cntr_pi->volt_delta_max =
+		(s32)BIOS_GET_FIELD(entry.param6,
+			NV_VBIOS_FCT_1X_ENTRY_PARAM6_VOLT_DELTA_MAX);
+
+		ptmp_freq_cntr->freq_cap_noise_unaware_vmin_above =
+			(s16)BIOS_GET_FIELD(entry.param7,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VF);
+
+		ptmp_freq_cntr->freq_cap_noise_unaware_vmin_below =
+		(s16)BIOS_GET_FIELD(entry.param7,
+			NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VMIN);
+
+		ptmp_freq_cntr->freq_hyst_pos_mhz =
+			(s16)BIOS_GET_FIELD(entry.param8,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_POS);
+		ptmp_freq_cntr->freq_hyst_neg_mhz =
+			(s16)BIOS_GET_FIELD(entry.param8,
+				NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG);
+
+		if (ptmp_freq_cntr_pi->volt_delta_max <
+			ptmp_freq_cntr_pi->volt_delta_min)
+			goto done;
+
+		pclk_freq_cntr = clk_clk_freq_controller_construct(g,
+						(void *)&freq_controller_data);
+
+		if (pclk_freq_cntr == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				"unable to construct clock freq cntlr boardobj for %d",
+				entry_idx);
+			status = -EINVAL;
+			goto done;
+		}
+
+		status = boardobjgrp_objinsert(
+				&pclk_freq_controllers->super.super,
+				(struct boardobj *)pclk_freq_cntr, entry_idx);
+		if (status) {
+			gk20a_err(dev_from_gk20a(g),
+			"unable to insert clock freq cntlr boardobj for");
+			status = -EINVAL;
+			goto done;
+		}
+
+	}
+
+done:
+	return status;
+}
+
+u32 clk_freq_controller_pmu_setup(struct gk20a *g)
+{
+	u32 status;
+	struct boardobjgrp *pboardobjgrp = NULL;
+
+	gk20a_dbg_info("");
+
+	pboardobjgrp = &g->clk_pmu.clk_freq_controllers.super.super;
+
+	if (!pboardobjgrp->bconstructed)
+		return -EINVAL;
+
+	status = pboardobjgrp->pmuinithandle(g, pboardobjgrp);
+
+	gk20a_dbg_info("Done");
+	return status;
+}
+
+static u32 _clk_freq_controller_devgrp_pmudata_instget(struct gk20a *g,
+				   struct nv_pmu_boardobjgrp *pmuboardobjgrp,
+				   struct nv_pmu_boardobj **ppboardobjpmudata,
+					   u8 idx)
+{
+	struct nv_pmu_clk_clk_freq_controller_boardobj_grp_set *pgrp_set =
+		(struct nv_pmu_clk_clk_freq_controller_boardobj_grp_set *)
+		pmuboardobjgrp;
+
+	gk20a_dbg_info("");
+
+	/*check whether pmuboardobjgrp has a valid boardobj in index*/
+	if (((u32)BIT(idx) &
+		pgrp_set->hdr.data.super.obj_mask.super.data[0]) == 0)
+		return -EINVAL;
+
+	*ppboardobjpmudata = (struct nv_pmu_boardobj *)
+		&pgrp_set->objects[idx].data.board_obj;
+	gk20a_dbg_info(" Done");
+	return 0;
+}
+
+static u32 _clk_freq_controllers_pmudatainit(struct gk20a *g,
+			 struct boardobjgrp *pboardobjgrp,
+			 struct nv_pmu_boardobjgrp_super *pboardobjgrppmu)
+{
+	struct nv_pmu_clk_clk_freq_controller_boardobjgrp_set_header *pset =
+		(struct nv_pmu_clk_clk_freq_controller_boardobjgrp_set_header *)
+		pboardobjgrppmu;
+	struct clk_freq_controllers *pcntrs =
+		(struct clk_freq_controllers *)pboardobjgrp;
+	u32 status = 0;
+
+	status = boardobjgrp_pmudatainit_e32(g, pboardobjgrp, pboardobjgrppmu);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error updating pmu boardobjgrp for clk freq ctrs 0x%x",
+			 status);
+		goto done;
+	}
+	pset->sampling_period_ms = pcntrs->sampling_period_ms;
+	pset->volt_policy_idx = pcntrs->volt_policy_idx;
+
+done:
+	return status;
+}
+
+u32 clk_freq_controller_sw_setup(struct gk20a *g)
+{
+	u32 status = 0;
+	struct boardobjgrp *pboardobjgrp = NULL;
+	struct clk_freq_controllers *pclk_freq_controllers;
+	struct avfsfllobjs *pfllobjs = &(g->clk_pmu.avfs_fllobjs);
+	struct fll_device *pfll;
+	struct clk_freq_controller *pclkfreqctrl;
+	u8 i;
+	u8 j;
+
+	gk20a_dbg_info("");
+
+	pclk_freq_controllers = &g->clk_pmu.clk_freq_controllers;
+	status = boardobjgrpconstruct_e32(&pclk_freq_controllers->super);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error creating boardobjgrp for clk FCT, status - 0x%x",
+			status);
+		goto done;
+	}
+
+	pboardobjgrp = &g->clk_pmu.clk_freq_controllers.super.super;
+
+	pboardobjgrp->pmudatainit  = _clk_freq_controllers_pmudatainit;
+	pboardobjgrp->pmudatainstget  =
+			_clk_freq_controller_devgrp_pmudata_instget;
+	pboardobjgrp->pmustatusinstget  = NULL;
+
+	/* Initialize mask to zero.*/
+	boardobjgrpmask_e32_init(&pclk_freq_controllers->freq_ctrl_load_mask,
+		NULL);
+
+	BOARDOBJGRP_PMU_CONSTRUCT(pboardobjgrp, CLK, CLK_FREQ_CONTROLLER);
+
+	status = BOARDOBJGRP_PMU_CMD_GRP_SET_CONSTRUCT(g, pboardobjgrp,
+			clk, CLK, clk_freq_controller, CLK_FREQ_CONTROLLER);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			  "error constructing PMU_BOARDOBJ_CMD_GRP_SET interface - 0x%x",
+			  status);
+		goto done;
+	}
+
+	status = clk_get_freq_controller_table(g, pclk_freq_controllers);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g),
+			"error reading freq controller table - 0x%x",
+			status);
+		goto done;
+	}
+
+	BOARDOBJGRP_FOR_EACH(&(pclk_freq_controllers->super.super),
+			     struct clk_freq_controller *, pclkfreqctrl, i) {
+		pfll = NULL;
+		j = 0;
+		BOARDOBJGRP_FOR_EACH(&(pfllobjs->super.super),
+			     struct fll_device *, pfll, j) {
+			if (pclkfreqctrl->controller_id == pfll->id) {
+				pfll->freq_ctrl_idx = i;
+				break;
+			}
+		}
+		boardobjgrpmask_bitset(&pclk_freq_controllers->
+			freq_ctrl_load_mask.super, i);
+	}
+done:
+		gk20a_dbg_info(" done status %x", status);
+		return status;
+}
diff --git a/drivers/gpu/nvgpu/clk/clk_freq_controller.h b/drivers/gpu/nvgpu/clk/clk_freq_controller.h
new file mode 100644
index 00000000..957a4f08
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_freq_controller.h
@@ -0,0 +1,74 @@
+/*
+* Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms and conditions of the GNU General Public License,
+* version 2, as published by the Free Software Foundation.
+*
+* This program is distributed in the hope it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+* more details.
+*/
+
+#ifndef _CLK_FREQ_CONTROLLER_H_
+#define _CLK_FREQ_CONTROLLER_H_
+
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_SYS  0x00
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_LTC  0x01
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_XBAR 0x02
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0 0x03
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC1 0x04
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC2 0x05
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC3 0x06
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC4 0x07
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC5 0x08
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPCS 0x09
+
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_MASK_UNICAST_GPC     \
+			(BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC0) | \
+			BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC1) | \
+			BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC2) | \
+			BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC3) | \
+			BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC4) | \
+			BIT(CTRL_CLK_CLK_FREQ_CONTROLLER_ID_GPC5))
+
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_TYPE_DISABLED  0x00
+#define CTRL_CLK_CLK_FREQ_CONTROLLER_TYPE_PI        0x01
+
+
+struct clk_freq_controller {
+	struct boardobj    super;
+	u8   controller_id;
+	u8   parts_freq_mode;
+	bool bdisable;
+	u32  clk_domain;
+	s16  freq_cap_noise_unaware_vmin_above;
+	s16  freq_cap_noise_unaware_vmin_below;
+	s16  freq_hyst_pos_mhz;
+	s16  freq_hyst_neg_mhz;
+};
+
+struct clk_freq_controller_pi {
+	struct clk_freq_controller super;
+	s32 prop_gain;
+	s32 integ_gain;
+	s32 integ_decay;
+	s32 volt_delta_min;
+	s32 volt_delta_max;
+	u8  slowdown_pct_min;
+	bool bpoison;
+};
+
+struct clk_freq_controllers {
+	struct boardobjgrp_e32 super;
+	u32 sampling_period_ms;
+	struct boardobjgrpmask_e32 freq_ctrl_load_mask;
+	u8 volt_policy_idx;
+	void *pprereq_load;
+};
+
+u32 clk_freq_controller_sw_setup(struct gk20a *g);
+u32 clk_freq_controller_pmu_setup(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index 02991db9..f3939d14 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -842,4 +842,87 @@ struct therm_channel_1x_entry {
 #define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM1_DEVICE_PROVIDER_INDEX_MASK  0xFF
 #define NV_VBIOS_THERM_CHANNEL_1X_ENTRY_PARAM1_DEVICE_PROVIDER_INDEX_SHIFT    0
 
+/* Frequency Controller Table */
+struct vbios_fct_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u16 sampling_period_ms;
+} __packed;
+
+struct vbios_fct_1x_entry {
+	u8 flags0;
+	u8 clk_domain_idx;
+	u16 param0;
+	u16 param1;
+	u32 param2;
+	u32 param3;
+	u32 param4;
+	u32 param5;
+	u32 param6;
+	u32 param7;
+	u32 param8;
+} __packed;
+
+#define NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE_MASK GENMASK(3, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE_SHIFT 0
+#define NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE_DISABLED 0x0
+#define NV_VBIOS_FCT_1X_ENTRY_FLAGS0_TYPE_PI       0x1
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_MASK GENMASK(7, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_SHIFT 0
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_SYS   0x00
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_LTC   0x01
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_XBAR  0x02
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC0  0x03
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC1  0x04
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC2  0x05
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC3  0x06
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC4  0x07
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPC5  0x08
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_ID_GPCS  0x09
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_MASK GENMASK(9, 8)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_SHIFT 8
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_BCAST 0x0
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_MIN   0x1
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_MAX   0x2
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM0_FREQ_MODE_AVG   0x3
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_SLOWDOWN_PCT_MIN_MASK GENMASK(7, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_SLOWDOWN_PCT_MIN_SHIFT 0
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_POISON_MASK GENMASK(8, 8)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_POISON_SHIFT 8
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_POISON_NO  0x0
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM1_POISON_YES 0x1
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM2_PROP_GAIN_MASK GENMASK(31, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM2_PROP_GAIN_SHIFT 0
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM3_INTEG_GAIN_MASK GENMASK(31, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM3_INTEG_GAIN_SHIFT 0
+
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM4_INTEG_DECAY_MASK GENMASK(31, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM4_INTEG_DECAY_SHIFT 0
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM5_VOLT_DELTA_MIN_MASK GENMASK(31, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM5_VOLT_DELTA_MIN_SHIFT 0
+
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM6_VOLT_DELTA_MAX_MASK GENMASK(31, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM6_VOLT_DELTA_MAX_SHIFT 0
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VF_MASK GENMASK(15, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VF_SHIFT 0
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VMIN_MASK GENMASK(31, 16)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM7_FREQ_CAP_VMIN_SHIFT 16
+
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_POS_MASK GENMASK(15, 0)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_POS_SHIFT 0
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_MASK GENMASK(31, 16)
+#define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_SHIFT 16
+
 #endif
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index cf758023..cca6c445 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -79,6 +79,11 @@ int gk20a_init_pstate_support(struct gk20a *g)
 		return err;
 
 	err = pmgr_domain_sw_setup(g);
+	if (err)
+		return err;
+
+	err = clk_freq_controller_sw_setup(g);
+
 	return err;
 }
 
@@ -141,6 +146,10 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
 	if (err)
 		return err;
 
+	err = clk_freq_controller_pmu_setup(g);
+	if (err)
+		return err;
+
 	err = clk_pmu_vin_load(g);
 	if (err)
 		return err;
-- 
cgit v1.2.2


From 8edfc9ee67d8c346310b2ad653754440719a29d7 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 21 Nov 2016 17:49:13 +0530
Subject: gpu: nvgpu: set p state floor for sys and xbar clk

bug 200254784

If XBAR and SYS clocks for a given GPC clock point is lower
than minimum value mentioned in P state set the floor to
minimum value mentioned in p state. it was set to value
based ratio of a GPC clk value in VF table which can give
value higher than one mentioned in p state. Ignore ratio
and just set to p state value

Change-Id: I9f7cd1d5842d057aff6d8243a31ab503ce35a8ca
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1257251
Reviewed-by: Automatic_Commit_Validation_User
(cherry picked from commit 5798680286967ff999f674bedd4fc0411615f914)
Reviewed-on: http://git-master/r/1270949
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_arb.c | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index b816a570..ee75ce64 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -617,7 +617,6 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 			table->gpc2clk_points[j].gpc_mhz =
 				arb->gpc2clk_f_points[i];
-
 			setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
 			status = clk_get_fll_clks(g, &setfllclk);
 			if (status < 0) {
@@ -626,7 +625,6 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 				goto exit_vf_table;
 			}
 
-
 			table->gpc2clk_points[j].sys_mhz =
 				setfllclk.sys2clkmhz;
 			table->gpc2clk_points[j].xbar_mhz =
@@ -653,7 +651,6 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 	/* Second pass */
 	for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
-		struct set_fll_clk setfllclk;
 
 		u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
 		gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
@@ -673,9 +670,9 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 				if (table->gpc2clk_points[j].sys_mhz >=
 							p5_info->min_mhz) {
 
+
 					table->gpc2clk_points[i].sys_mhz =
-						table->gpc2clk_points[j].
-									sys_mhz;
+						p5_info->min_mhz;
 
 					alt_gpc2clk = alt_gpc2clk <
 						table->gpc2clk_points[j].
@@ -709,8 +706,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 							p5_info->min_mhz) {
 
 					table->gpc2clk_points[i].xbar_mhz =
-						table->gpc2clk_points[j].
-								xbar_mhz;
+						p5_info->min_mhz;
+
 					alt_gpc2clk = alt_gpc2clk <
 						table->gpc2clk_points[j].
 								gpc_mhz ?
@@ -728,24 +725,6 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			}
 		}
 
-		/* alternate gpc2clk clock has been requested, we need to
-		 * calculate new ratios */
-		if (alt_gpc2clk != table->gpc2clk_points[i].gpc_mhz) {
-			setfllclk.gpc2clkmhz = alt_gpc2clk;
-
-			status = clk_get_fll_clks(g, &setfllclk);
-			if (status < 0) {
-				gk20a_err(dev_from_gk20a(g),
-					"failed to get GPC2CLK slave clocks");
-				goto exit_vf_table;
-			}
-
-			table->gpc2clk_points[i].sys_mhz =
-				setfllclk.sys2clkmhz;
-			table->gpc2clk_points[i].xbar_mhz =
-				setfllclk.xbar2clkmhz;
-		}
-
 		/* Calculate voltages */
 		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
 						&alt_gpc2clk, &gpc2clk_voltuv,
-- 
cgit v1.2.2


From f7290e6a83282ed96b4af225d7d7b63230138d7c Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 7 Nov 2016 18:12:19 +0530
Subject: gpu: nvgpu: fix fll regime check

For target clocks >= FFR cutoff clock use FR, else use FFR.

JIRA DNVGPU-180

Change-Id: Iefed871d2acf1552230b066c32e1b3f69d96079e
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1249041
(cherry picked from commit edcb12d8784c62aa857dcab2e27d4e45033fbf11)
Reviewed-on: http://git-master/r/1270883
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c
index dffbefec..ecd53c02 100644
--- a/drivers/gpu/nvgpu/clk/clk.c
+++ b/drivers/gpu/nvgpu/clk/clk.c
@@ -288,9 +288,9 @@ static u32 find_regime_id(struct gk20a *g, u32 domain, u16 clkmhz)
 		if (pflldev->clk_domain == domain) {
 			if (pflldev->regime_desc.fixed_freq_regime_limit_mhz >=
 							clkmhz)
-				return CTRL_CLK_FLL_REGIME_ID_FR;
-			else
 				return CTRL_CLK_FLL_REGIME_ID_FFR;
+			else
+				return CTRL_CLK_FLL_REGIME_ID_FR;
 		}
 	}
 	return CTRL_CLK_FLL_REGIME_ID_INVALID;
-- 
cgit v1.2.2


From fd2b0a48605b8019906650e829f45b6260edaae7 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 3 Nov 2016 15:53:54 +0530
Subject: gpu: nvgpu: update pg engine init/list/features HAL

- Updated gp10b_pg_gr_init() to post init param based
  on PG engine parameter
- Assigned pg engine list/features HAL to respective
  functions/NULL

JIRA DNVGPU-71

Change-Id: I7d059796746694b22800c6ae0327cbc90331e929
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1247407
(cherry-picked from commit aee4e565ca2b475c0680674e4e6345b3b30cc502)
Reviewed-on: http://git-master/r/1269321
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/pmu_gp106.c |  4 +++-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 38 +++++++++++++++++++++----------------
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h |  2 +-
 3 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 88be6d22..6db80abe 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -196,7 +196,9 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.fecsbootstrapdone = false;
 	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
 	gops->pmu.pmu_elpg_statistics = NULL;
-	gops->pmu.pmu_pg_grinit_param = NULL;
+	gops->pmu.pmu_pg_init_param = NULL;
+	gops->pmu.pmu_pg_supported_engines_list = NULL;
+	gops->pmu.pmu_pg_engines_feature_list = NULL;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index e7b2e70c..b5fdf2fd 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -222,26 +222,30 @@ static void pmu_handle_gr_param_msg(struct gk20a *g, struct pmu_msg *msg,
 	return;
 }
 
-int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask)
+int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
 	u32 seq;
 
-	memset(&cmd, 0, sizeof(struct pmu_cmd));
-	cmd.hdr.unit_id = PMU_UNIT_PG;
-	cmd.hdr.size = PMU_CMD_HDR_SIZE +
-			sizeof(struct pmu_pg_cmd_gr_init_param);
-	cmd.cmd.pg.gr_init_param.cmd_type =
-			PMU_PG_CMD_ID_PG_PARAM;
-	cmd.cmd.pg.gr_init_param.sub_cmd_id =
-			PMU_PG_PARAM_CMD_GR_INIT_PARAM;
-	cmd.cmd.pg.gr_init_param.featuremask =
-			grfeaturemask;
-
-	gp10b_dbg_pmu("cmd post PMU_PG_CMD_ID_PG_PARAM %x", grfeaturemask);
-	gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
-			pmu_handle_gr_param_msg, pmu, &seq, ~0);
+	if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) {
+		memset(&cmd, 0, sizeof(struct pmu_cmd));
+		cmd.hdr.unit_id = PMU_UNIT_PG;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE +
+				sizeof(struct pmu_pg_cmd_gr_init_param);
+		cmd.cmd.pg.gr_init_param.cmd_type =
+				PMU_PG_CMD_ID_PG_PARAM;
+		cmd.cmd.pg.gr_init_param.sub_cmd_id =
+				PMU_PG_PARAM_CMD_GR_INIT_PARAM;
+		cmd.cmd.pg.gr_init_param.featuremask =
+				PMU_PG_FEATURE_GR_POWER_GATING_ENABLED;
+
+		gp10b_dbg_pmu("cmd post PMU_PG_CMD_ID_PG_PARAM ");
+		gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+				pmu_handle_gr_param_msg, pmu, &seq, ~0);
+
+	} else
+		return -EINVAL;
 
 	return 0;
 }
@@ -474,7 +478,9 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.fecsbootstrapdone = false;
 	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
 	gops->pmu.pmu_elpg_statistics = gp10b_pmu_elpg_statistics;
-	gops->pmu.pmu_pg_grinit_param = gp10b_pg_gr_init;
+	gops->pmu.pmu_pg_init_param = gp10b_pg_gr_init;
+	gops->pmu.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list;
+	gops->pmu.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
 			send_ecc_overide_en_dis_cmd;
 	gops->pmu.reset = gk20a_pmu_reset;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
index 18e7bdd3..30245554 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
@@ -18,7 +18,7 @@
 
 void gp10b_init_pmu_ops(struct gpu_ops *gops);
 int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
-int gp10b_pg_gr_init(struct gk20a *g, u8 grfeaturemask);
+int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id);
 void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr);
 
 #endif /*__PMU_GP10B_H_*/
-- 
cgit v1.2.2


From 476f44a0a4bea5ae8a9a22d7d6f05d48ee3096fd Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 3 Nov 2016 13:16:42 +0530
Subject: gpu: nvgpu: pstate interface update

lpwr_entry_idx member is required
to map pstate with lwpr tables

JIRA DNVGPU-71

Change-Id: I4cad54c61dec7ad7e3c1a60178938d0eeaf65e24
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1247303
(cherry-picked from commit b1f6e0036922d2104b3d08548219e72a38f2e231)
Reviewed-on: http://git-master/r/1267403
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/pstate/pstate.c | 2 ++
 drivers/gpu/nvgpu/pstate/pstate.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index cca6c445..2e08ef01 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -177,6 +177,7 @@ int pstate_construct_super(struct gk20a *g, struct boardobj **ppboardobj,
 
 	pstate->num = ptmppstate->num;
 	pstate->clklist = ptmppstate->clklist;
+	pstate->lpwr_entry_idx = ptmppstate->lpwr_entry_idx;
 
 	return 0;
 }
@@ -236,6 +237,7 @@ static int parse_pstate_entry_5x(struct gk20a *g,
 	pstate->super.type = CTRL_PERF_PSTATE_TYPE_3X;
 	pstate->num = 0x0F - entry->pstate_level;
 	pstate->clklist.num_info = hdr->clock_entry_count;
+	pstate->lpwr_entry_idx = entry->lpwr_entry_idx;
 
 	gk20a_dbg_info("pstate P%u", pstate->num);
 
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index 4ae72aa9..b6519c20 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -41,6 +41,7 @@ struct clk_set_info_list {
 struct pstate {
 	struct boardobj super;
 	u32 num;
+	u8 lpwr_entry_idx;
 	struct clk_set_info_list clklist;
 };
 
-- 
cgit v1.2.2


From 23ff2eb5c9d498eddd36eb710d4058b23619a0c8 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 3 Nov 2016 12:58:28 +0530
Subject: gpu: nvgpu: keep sec2 idle

sec2 falcon reset after secure boot complete
to keep sec2 idle

issue: ELPG does not engage if PG engine is
dependent on sec2 idleness

JIRA DNVGPU-71

Change-Id: If264c610dae857ae9b9f9bc77c59f24f239ce1ce
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1247290
(cherry-picked from commit de2337de1211e4167993982e50237fe1115053a1)
Reviewed-on: http://git-master/r/1267260
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index b883ad83..5ed6300c 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -22,7 +22,8 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
-#include "gm20b/hw_pwr_gm20b.h"
+#include "gp106/hw_psec_gp106.h"
+#include "gp106/hw_pwr_gp106.h"
 #include "gm206/acr_gm206.h"
 #include "gm20b/acr_gm20b.h"
 #include "gm206/pmu_gm206.h"
@@ -1150,6 +1151,14 @@ static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 		err = status;
 		goto err_free_ucode_map;
 	}
+
+	/* sec2 reset - to keep it idle */
+	gk20a_writel(g, psec_falcon_engine_r(),
+		pwr_falcon_engine_reset_true_f());
+	udelay(10);
+	gk20a_writel(g, psec_falcon_engine_r(),
+		pwr_falcon_engine_reset_false_f());
+
 	return 0;
 err_free_ucode_map:
 	gk20a_gmmu_unmap_free(vm, &acr->acr_ucode);
-- 
cgit v1.2.2


From 9664b8e50ab66eb3a690998c4b6541870dd677b6 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Mon, 31 Oct 2016 20:19:29 +0530
Subject: gpu: nvgpu: actions before and after vf change

JIRA DNVGPU-175

1)Add functions to be called before and after vf change
2)Capture noise unaware vmin value and pass it as param
 to vf change functions
3)Before VF change disable CLFC and update noise unware vmin
4)After VF change is done enable CLFC

Change-Id: I4bb59fbe96ec5a792e8930db3ab4a39ec74c9a71
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1248211
(cherry picked from commit cf07892204d7ce11a0d27ecbc1f5826fbabbde61)
Reviewed-on: http://git-master/r/1270950
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/gpu/nvgpu/clk/clk_arb.c | 67 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index ee75ce64..762c2466 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -44,9 +44,17 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount);
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
 	u32 voltuv_sram);
+static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
+	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
+	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
+	u32 nuvmin_sram);
+static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
+	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
+	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
+	u32 nuvmin_sram);
 static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
-		u16 *gpc2clk_target, u16 *sys2clk_target, u16 *xbar2clk_target,
-		u16 *mclk_target, u32 *voltuv, u32 *voltuv_sram);
+	u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
+	u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
 
 #define VF_POINT_INVALID_PSTATE ~0U
 #define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
@@ -797,6 +805,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	u32 pstate = VF_POINT_INVALID_PSTATE;
 	u32 voltuv, voltuv_sram;
 	bool mclk_set, gpc2clk_set;
+	u32 nuvmin, nuvmin_sram;
 
 	int status = 0;
 
@@ -879,7 +888,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	/* Query the table for the closest vf point to program */
 	pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
 		&sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
-		&voltuv_sram);
+		&voltuv_sram, &nuvmin, &nuvmin_sram);
 
 	if (pstate == VF_POINT_INVALID_PSTATE) {
 		arb->status = -EINVAL;
@@ -897,6 +906,17 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	/* Program clocks */
 	/* A change in both mclk of gpc2clk may require a change in voltage */
 
+	status = nvgpu_clk_arb_change_vf_point_prefix(g, gpc2clk_target,
+		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+		voltuv_sram, nuvmin, nuvmin_sram);
+
+	if (status < 0) {
+		arb->status = status;
+		/* make status visible */
+		smp_mb();
+		goto exit_arb;
+	}
+
 	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
 		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
 		voltuv_sram);
@@ -905,9 +925,20 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 		arb->status = status;
 		/* make status visible */
 		smp_mb();
+		goto exit_arb;
+	}
+
+	status = nvgpu_clk_arb_change_vf_point_postfix(g, gpc2clk_target,
+		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+		voltuv_sram, nuvmin, nuvmin_sram);
 
+	if (status < 0) {
+		arb->status = status;
+		/* make status visible */
+		smp_mb();
 		goto exit_arb;
 	}
+
 	actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
 			&arb->actual_pool[1] : &arb->actual_pool[0];
 
@@ -1189,7 +1220,7 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
 
 static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 		u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
-		u32 *voltuv, u32 *voltuv_sram)
+		u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
 {
 	u16 gpc2clk_target, mclk_target;
 	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
@@ -1301,12 +1332,38 @@ find_exit:
 	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
 	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
 		gpc2clk_voltuv_sram : mclk_voltuv_sram;
-
+	/* noise unaware vmin */
+	*nuvmin = mclk_voltuv;
+	*nuvmin_sram = mclk_voltuv_sram;
 	*gpc2clk = gpc2clk_target;
 	*mclk = mclk_target;
 	return pstate;
 }
 
+static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
+	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
+	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
+	u32 nuvmin_sram)
+{
+
+	int status;
+
+	status = clk_pmu_freq_controller_load(g, false);
+	if (status < 0)
+		return status;
+
+	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
+	return status;
+}
+
+static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
+	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
+	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
+	u32 nuvmin_sram)
+{
+	return clk_pmu_freq_controller_load(g, true);
+}
+
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
 	u32 voltuv_sram)
-- 
cgit v1.2.2


From be6e02a6d42b0a7092f92371474c42d41019c917 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Wed, 7 Dec 2016 19:00:43 +0530
Subject: gpu: nvgpu: call set volt always during vf switch

bug 1845211

clfc is reset during vf switch. it resets delta values stored in
pmu structure. if voltage has not changed, delta calculation happens
on top of corrected voltage causing volt to keep on increasing in cases
where voltage does not change during a vf switch.

Change-Id: I9d1a58b6b7652f22c3a7304162bb8ca6f7d1da6f
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/1266632
(cherry picked from commit 74e2e97d4d149d3eac65b5f65b358b977fba463e)
Reviewed-on: http://git-master/r/1273911
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 762c2466..7c22a83d 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -1401,7 +1401,7 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 		status = clk_set_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
-	} else if (voltuv > arb->voltuv_actual) {
+	} else {
 		status = clk_set_fll_clks(g, &fllclk);
 		if (status < 0)
 			return status;
@@ -1413,14 +1413,6 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
 		if (status < 0)
 			return status;
-	} else {
-		status = g->clk_pmu.clk_mclk.change(g, mclk_target);
-		if (status < 0)
-			return status;
-
-		status = clk_set_fll_clks(g, &fllclk);
-		if (status < 0)
-			return status;
 	}
 
 	return 0;
-- 
cgit v1.2.2


From 1710bdb0783a85ce183a77d182fb18d7f98af66e Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Fri, 16 Dec 2016 15:36:10 +0530
Subject: gpu: nvgpu: allow upgrading preemption modes

We currently do not allow resetting preemption
modes once set

Relax this check to allow upgrading preemption
modes. Downgrading of preemption modes is still
not allowed

Bug 200263471

Change-Id: Ie2dae910028929090899a661f4b8b9dd4d6d7ee7
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1269472
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 051e16a3..aa2e3b65 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -877,6 +877,15 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
 		   (compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP))
 		return -EINVAL;
 
+	/* Do not allow lower preemption modes than current ones */
+	if (graphics_preempt_mode &&
+	   (graphics_preempt_mode < gr_ctx->graphics_preempt_mode))
+		return -EINVAL;
+
+	if (compute_preempt_mode &&
+	   (compute_preempt_mode < gr_ctx->compute_preempt_mode))
+		return -EINVAL;
+
 	/* set preemption modes */
 	switch (graphics_preempt_mode) {
 	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
@@ -2064,10 +2073,6 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 	if (!class)
 		return -EINVAL;
 
-	/* preemption already set ? */
-	if (gr_ctx->graphics_preempt_mode || gr_ctx->compute_preempt_mode)
-		return -EINVAL;
-
 	if (gk20a_is_channel_marked_as_tsg(ch)) {
 		tsg = &g->fifo.tsg[ch->tsgid];
 		vm = tsg->vm;
-- 
cgit v1.2.2


From 8f8ee32cd6082b8eee9585e9334656d1365c5273 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 13 Dec 2016 14:11:24 +0530
Subject: gpu: nvgpu: use common API to write TPC fuses

Use common fuse write API tegra_fuse_control_write
which should work on all kernel versions

Bug 200262155

Change-Id: I29e8514e9660549ecf94711287ec4bbf4c897a86
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1270169
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index aa2e3b65..91ea086c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1543,20 +1543,15 @@ static void gr_gp10b_init_cyclestats(struct gk20a *g)
 
 static void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
-	tegra_fuse_writel(0x1, FUSE_FUSEBYPASS_0);
-	tegra_fuse_writel(0x0, FUSE_WRITE_ACCESS_SW_0);
-#else
 	tegra_fuse_control_write(0x1, FUSE_FUSEBYPASS_0);
 	tegra_fuse_control_write(0x0, FUSE_WRITE_ACCESS_SW_0);
-#endif
 
 	if (g->gr.gpc_tpc_mask[gpc_index] == 0x1)
-		tegra_fuse_writel(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
+		tegra_fuse_control_write(0x2, FUSE_OPT_GPU_TPC0_DISABLE_0);
 	else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2)
-		tegra_fuse_writel(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
+		tegra_fuse_control_write(0x1, FUSE_OPT_GPU_TPC0_DISABLE_0);
 	else
-		tegra_fuse_writel(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
+		tegra_fuse_control_write(0x0, FUSE_OPT_GPU_TPC0_DISABLE_0);
 }
 
 static void gr_gp10b_get_access_map(struct gk20a *g,
-- 
cgit v1.2.2


From cd25b202361ace2a04594de38a48f66aa75e1cb8 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 10 Nov 2016 17:12:33 -0800
Subject: gpu: nvgpu: cap minimum gpc clocks to HW limits

JIRA: DNVGPU-180

Change-Id: I1928e77cea4ac87bf2ba2b6b7b2f2942dfb97de9
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1251493
(cherry picked from commit 7b8a105652a3169d9ec0cb7ce52c3b92e42ca310)
Reviewed-on: http://git-master/r/1274545
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
index d1cbb32b..b4d1afbc 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -28,6 +28,9 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
 	enum nv_pmu_clk_clkwhich clkwhich;
 	struct clk_set_info *p0_info;
 	struct clk_set_info *p5_info;
+	struct avfsfllobjs *pfllobjs =  &(g->clk_pmu.avfs_fllobjs);
+
+	u16 limit_min_mhz;
 
 	switch (api_domain) {
 	case CTRL_CLK_DOMAIN_MCLK:
@@ -52,7 +55,14 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
 	if (!p0_info)
 		return -EINVAL;
 
-	*min_mhz = p5_info->min_mhz;
+	limit_min_mhz = p5_info->min_mhz;
+	/* WAR for DVCO min */
+	if (api_domain == CTRL_CLK_DOMAIN_GPC2CLK)
+		if ((pfllobjs->max_min_freq_mhz) &&
+		(pfllobjs->max_min_freq_mhz > limit_min_mhz))
+			limit_min_mhz = pfllobjs->max_min_freq_mhz;
+
+	*min_mhz = limit_min_mhz;
 	*max_mhz = p0_info->max_mhz;
 
 	return 0;
-- 
cgit v1.2.2


From 26265b997447c8c1fddac6a904ac4b6ef2b2f3b5 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Thu, 10 Nov 2016 22:50:20 -0800
Subject: gpu: nvgpu: arb: fix rounding in effective clocks

Current code cast casting the frequency to 16 bit
before division, giving out incorrect data

JIRA DNVGPU-164

Change-Id: I4eb1fa73fb9e8963f550d3d853db39b49b990fa4
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1251663
(cherry picked from commit c0d91a054f8278af9a2383bb3f837465779132a9)
Reviewed-on: http://git-master/r/1274546
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/clk_gp106.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 0e9f8d3a..4c9bc782 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -67,7 +67,8 @@ static u16 gp106_clk_get_rate(struct gk20a *g, u32 api_domain)
 	freq_khz = c->is_counter ? c->scale * gp106_get_rate_cntr(g, c) :
 		0; /* TODO: PLL read */
 
-	return (u16) freq_khz/1000;
+	/* Convert to MHZ */
+	return (u16) (freq_khz/1000);
 }
 
 static int gp106_init_clk_support(struct gk20a *g) {
-- 
cgit v1.2.2


From 7e2d79cd65cd1c0dc32b161f6309d51e4bd5b630 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Fri, 11 Nov 2016 10:02:34 -0800
Subject: gpu: nvgpu: fix CLFC arbiter vf table update

(1) Adding additional debug in case of VF update failure
(2) The length of the tables must be re-initialized
prior to requesting the vf table update

JIRA: DNVGPU-193

Change-Id: Id5a369359bc5f52ee58da539bfc3ec1ec7887de1
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1252061
(cherry picked from commit dfb6fec02f8e3ff6a22bbea000347923b0306018)
Reviewed-on: http://git-master/r/1274547
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/clk/clk_arb.c | 60 +++++++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 7c22a83d..3f35fac7 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -503,6 +503,7 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 	struct clk_set_info *p5_info, *p0_info;
 
+
 	table = ACCESS_ONCE(arb->current_vf_table);
 	/* make flag visible when all data has resolved in the tables */
 	smp_rmb();
@@ -512,11 +513,18 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 	/* Get allowed memory ranges */
 	if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
-						&gpc2clk_min, &gpc2clk_max) < 0)
+						&gpc2clk_min,
+						&gpc2clk_max) < 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to fetch GPC2CLK range");
 		goto exit_vf_table;
+	}
 	if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_MCLK,
-						&mclk_min, &mclk_max) < 0)
+						&mclk_min, &mclk_max) < 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to fetch MCLK range");
 		goto exit_vf_table;
+	}
 
 	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
 		&table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
@@ -524,12 +532,23 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			"failed to fetch GPC2CLK frequency points");
 		goto exit_vf_table;
 	}
+
+	table->gpc2clk_num_points = MAX_F_POINTS;
+	table->mclk_num_points = MAX_F_POINTS;
+
 	if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
 		&table->mclk_num_points, arb->mclk_f_points)) {
 		gk20a_err(dev_from_gk20a(g),
 			"failed to fetch MCLK frequency points");
 		goto exit_vf_table;
 	}
+	if (!table->mclk_num_points || !table->gpc2clk_num_points) {
+		gk20a_err(dev_from_gk20a(g),
+			"empty queries to f points mclk %d gpc2clk %d",
+			table->mclk_num_points, table->gpc2clk_num_points);
+		status = -EINVAL;
+		goto exit_vf_table;
+	}
 
 	memset(table->mclk_points, 0,
 		table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
@@ -538,13 +557,18 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 
 	p5_info = pstate_get_clk_set_info(g,
 			CTRL_PERF_PSTATE_P5, clkwhich_mclk);
-	if (!p5_info)
+	if (!p5_info) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get MCLK P5 info");
 		goto exit_vf_table;
-
+	}
 	p0_info = pstate_get_clk_set_info(g,
 			CTRL_PERF_PSTATE_P0, clkwhich_mclk);
-	if (!p0_info)
+	if (!p0_info) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get MCLK P0 info");
 		goto exit_vf_table;
+	}
 
 	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
 			i < table->mclk_num_points; i++) {
@@ -600,6 +624,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
 	if (!p5_info) {
 		status = -EINVAL;
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get GPC2CLK P5 info");
 		goto exit_vf_table;
 	}
 
@@ -607,12 +633,14 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
 	if (!p0_info) {
 		status = -EINVAL;
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get GPC2CLK P0 info");
 		goto exit_vf_table;
 	}
 
 	/* GPC2CLK needs to be checked in two passes. The first determines the
 	 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
-	 * second verifies that the clocks minimum DVCO is satisfied and sets
+	 * second verifies that the clocks minimum is satisfied and sets
 	 * the voltages
 	 */
 	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
@@ -669,9 +697,12 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			clkwhich_sys2clk);
 		if (!p5_info) {
 			status = -EINVAL;
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get SYS2CLK P5 info");
 			goto exit_vf_table;
 		}
-		/* sys2clk below DVCO min, need to find correct clock */
+
+		/* sys2clk below clk min, need to find correct clock */
 		if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
 			for (j = i + 1; j < table->gpc2clk_num_points; j++) {
 
@@ -693,6 +724,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			}
 			/* no VF exists that satisfies condition */
 			if (j == table->gpc2clk_num_points) {
+				gk20a_err(dev_from_gk20a(g),
+					"NO SYS2CLK VF point possible");
 				status = -EINVAL;
 				goto exit_vf_table;
 			}
@@ -704,10 +737,12 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			clkwhich_xbar2clk);
 		if (!p5_info) {
 			status = -EINVAL;
+			gk20a_err(dev_from_gk20a(g),
+				"failed to get SYS2CLK P5 info");
 			goto exit_vf_table;
 		}
 
-		/* xbar2clk below DVCO min, need to find correct clock */
+		/* xbar2clk below clk min, need to find correct clock */
 		if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
 			for (j = i; j < table->gpc2clk_num_points; j++) {
 				if (table->gpc2clk_points[j].xbar_mhz >=
@@ -728,6 +763,8 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
 			/* no VF exists that satisfies condition */
 			if (j == table->gpc2clk_num_points) {
 				status = -EINVAL;
+				gk20a_err(dev_from_gk20a(g),
+					"NO XBAR2CLK VF point possible");
 
 				goto exit_vf_table;
 			}
@@ -1244,9 +1281,10 @@ static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 
 		if (!table)
 			continue;
-		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points))
+		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
+			gk20a_err(dev_from_gk20a(arb->g), "found empty table");
 			goto find_exit;
-
+		}
 		/* First we check MCLK to find out which PSTATE we are
 		 * are requesting, and from there try to find the minimum
 		 * GPC2CLK on the same PSTATE that satisfies the request.
@@ -1304,7 +1342,7 @@ recalculate_vf_point:
 				gpc2clk_voltuv_sram =
 					table->gpc2clk_points[index-1].
 						uvolt_sram;
-			} else if (index_mclk == table->mclk_num_points - 1) {
+			} else if (index_mclk >= table->mclk_num_points - 1) {
 				/* There is no available combination of MCLK
 				 * and GPC2CLK, we need to fail this
 				 */
-- 
cgit v1.2.2


From f41740bf08c77f54561f1b957fe552d8234524b7 Mon Sep 17 00:00:00 2001
From: Aparna Das <aparnad@nvidia.com>
Date: Mon, 12 Dec 2016 12:28:59 -0800
Subject: gpu: nvgpu: vgpu: no support for sparse mapping

Currently sparse mapping is not supported for gp10b
in virtualized environment. Modify gpu characteristics
to reflect non-implementation of this functionality.

Also fix return value in vgpu_gp10b_locked_gmmu_map()
on error condition.

Bug 200243373

Change-Id: Ia367b923b87738a5cad0617cdb074f5a24fb1c81
Signed-off-by: Aparna Das <aparnad@nvidia.com>
Reviewed-on: http://git-master/r/1269710
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 8be6b19c..66fda2d9 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -74,10 +74,10 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
 	/* FIXME: add support for sparse mappings */
 
 	if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
-		return -EINVAL;
+		return 0;
 
 	if (space_to_skip & (page_size - 1))
-		return -EINVAL;
+		return 0;
 
 	/* Allocate (or validate when map_offset != 0) the virtual address. */
 	if (!map_offset) {
@@ -182,4 +182,7 @@ void vgpu_gp10b_init_mm_ops(struct gpu_ops *gops)
 
 	gops->mm.gmmu_map = vgpu_gp10b_locked_gmmu_map;
 	gops->mm.init_mm_setup_hw = vgpu_gp10b_init_mm_setup_hw;
+
+	/* FIXME: add support for sparse mappings */
+	gops->mm.support_sparse = NULL;
 }
-- 
cgit v1.2.2


From 62d13e613807e9bce3a9d1ef0c61725ef3a885ce Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 3 Nov 2016 17:34:12 +0530
Subject: gpu: nvgpu: RPPG support

- Added rppg module to init GR/MS-RPPG.
  mscg is dependent on gr-rppg & without
  gr-rppg engage mscg does not engage.
- Update pg engines HAL to return supported
  pg engines & its sub features

JIRA DNVGPU-71

Change-Id: Ib0fd2d79b509f6f2f1dabae6e2b5aebcc80b5691
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1247486
(cherry picked from commit 86e45fa62e6a6b295f73c0173f0117ae9f78a5e9)
Reviewed-on: http://git-master/r/1270762
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |   3 +-
 drivers/gpu/nvgpu/gp106/pmu_gp106.c   |  70 ++++++++++++++-
 drivers/gpu/nvgpu/gp106/pmu_gp106.h   |   3 +
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c   |   3 +-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h   |   2 +
 drivers/gpu/nvgpu/lpwr/rppg.c         | 158 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/lpwr/rppg.h         |  17 ++++
 7 files changed, 250 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/lpwr/rppg.c
 create mode 100644 drivers/gpu/nvgpu/lpwr/rppg.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 9e08e2c6..2b650ad8 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -62,7 +62,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/therm/thrm.o \
 	$(nvgpu-t18x)/therm/thrmdev.o \
 	$(nvgpu-t18x)/therm/thrmchannel.o \
-	$(nvgpu-t18x)/therm/thrmpmu.o
+	$(nvgpu-t18x)/therm/thrmpmu.o \
+	$(nvgpu-t18x)/lpwr/rppg.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 6db80abe..6f5e71eb 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -24,6 +24,7 @@
 #include "clk/clk_mclk.h"
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
+#include "lpwr/rppg.h"
 
 #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
 #define PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
@@ -174,6 +175,67 @@ static bool gp106_is_pmu_supported(struct gk20a *g)
 	return true;
 }
 
+static u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id)
+{
+	if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
+		return PMU_PG_FEATURE_GR_RPPG_ENABLED;
+
+	return 0;
+}
+
+static u32 gp106_pmu_pg_engines_list(struct gk20a *g)
+{
+	return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
+}
+
+static void pmu_handle_param_msg(struct gk20a *g, struct pmu_msg *msg,
+			void *param, u32 handle, u32 status)
+{
+	gk20a_dbg_fn("");
+
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g), "PG PARAM cmd aborted");
+		return;
+	}
+
+	gp106_dbg_pmu("PG PARAM is acknowledged from PMU %x",
+			msg->msg.pg.msg_type);
+}
+
+static int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct pmu_cmd cmd;
+	u32 seq;
+	u32 status;
+
+	memset(&cmd, 0, sizeof(struct pmu_cmd));
+	if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS) {
+
+		status = init_rppg(g);
+		if (status != 0) {
+			gk20a_err(dev_from_gk20a(g), "RPPG init Failed");
+			return -1;
+		}
+
+		cmd.hdr.unit_id = PMU_UNIT_PG;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE +
+				sizeof(struct pmu_pg_cmd_gr_init_param);
+		cmd.cmd.pg.gr_init_param.cmd_type =
+				PMU_PG_CMD_ID_PG_PARAM;
+		cmd.cmd.pg.gr_init_param.sub_cmd_id =
+				PMU_PG_PARAM_CMD_GR_INIT_PARAM;
+		cmd.cmd.pg.gr_init_param.featuremask =
+				PMU_PG_FEATURE_GR_RPPG_ENABLED;
+
+		gp106_dbg_pmu("cmd post GR PMU_PG_CMD_ID_PG_PARAM");
+		gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+				pmu_handle_param_msg, pmu, &seq, ~0);
+	}
+
+	return 0;
+}
+
 void gp106_init_pmu_ops(struct gpu_ops *gops)
 {
 	gk20a_dbg_fn("");
@@ -195,10 +257,10 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.lspmuwprinitdone = 0;
 	gops->pmu.fecsbootstrapdone = false;
 	gops->pmu.write_dmatrfbase = gp10b_write_dmatrfbase;
-	gops->pmu.pmu_elpg_statistics = NULL;
-	gops->pmu.pmu_pg_init_param = NULL;
-	gops->pmu.pmu_pg_supported_engines_list = NULL;
-	gops->pmu.pmu_pg_engines_feature_list = NULL;
+	gops->pmu.pmu_elpg_statistics = gp10b_pmu_elpg_statistics;
+	gops->pmu.pmu_pg_init_param = gp106_pg_param_init;
+	gops->pmu.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list;
+	gops->pmu.pmu_pg_engines_feature_list = gp106_pmu_pg_feature_list;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.h b/drivers/gpu/nvgpu/gp106/pmu_gp106.h
index 8fb4c736..a42ff620 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.h
@@ -14,6 +14,9 @@
 #ifndef __PMU_GP106_H_
 #define __PMU_GP106_H_
 
+#define gp106_dbg_pmu(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+
 void gp106_init_pmu_ops(struct gpu_ops *gops);
 
 #endif /*__PMU_GP106_H_*/
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index b5fdf2fd..cd9cd0b0 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -249,7 +249,8 @@ int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id)
 
 	return 0;
 }
-static void gp10b_pmu_elpg_statistics(struct gk20a *g,
+
+void gp10b_pmu_elpg_statistics(struct gk20a *g,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
index 30245554..ee0158ec 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
@@ -20,5 +20,7 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops);
 int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
 int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id);
 void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr);
+void gp10b_pmu_elpg_statistics(struct gk20a *g,
+		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
 
 #endif /*__PMU_GP10B_H_*/
diff --git a/drivers/gpu/nvgpu/lpwr/rppg.c b/drivers/gpu/nvgpu/lpwr/rppg.c
new file mode 100644
index 00000000..40e857ee
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/rppg.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gp106/pmu_gp106.h"
+#include "gk20a/pmu_api.h"
+#include "gm206/bios_gm206.h"
+#include "pstate/pstate.h"
+#include "include/bios.h"
+#include "pmuif/gpmuif_pg_rppg.h"
+
+static void pmu_handle_rppg_init_msg(struct gk20a *g, struct pmu_msg *msg,
+	void *param, u32 handle, u32 status)
+{
+
+	u8 ctrlId = NV_PMU_RPPG_CTRL_ID_MAX;
+	u32 *success = param;
+
+	if (status == 0) {
+		switch (msg->msg.pg.rppg_msg.cmn.msg_id) {
+		case NV_PMU_RPPG_MSG_ID_INIT_CTRL_ACK:
+			ctrlId = msg->msg.pg.rppg_msg.init_ctrl_ack.ctrl_id;
+			*success = 1;
+			gp106_dbg_pmu("RPPG is acknowledged from PMU %x",
+				msg->msg.pg.msg_type);
+		break;
+		}
+	}
+
+	gp106_dbg_pmu("RPPG is acknowledged from PMU %x",
+				msg->msg.pg.msg_type);
+}
+
+static u32 rppg_send_cmd(struct gk20a *g, struct nv_pmu_rppg_cmd *prppg_cmd)
+{
+	struct pmu_cmd cmd;
+	u32 seq;
+	u32 status = 0;
+	u32 success = 0;
+
+	memset(&cmd, 0, sizeof(struct pmu_cmd));
+	cmd.hdr.unit_id = PMU_UNIT_PG;
+	cmd.hdr.size   = PMU_CMD_HDR_SIZE +
+			sizeof(struct nv_pmu_rppg_cmd);
+
+	cmd.cmd.pg.rppg_cmd.cmn.cmd_type = PMU_PMU_PG_CMD_ID_RPPG;
+	cmd.cmd.pg.rppg_cmd.cmn.cmd_id   = prppg_cmd->cmn.cmd_id;
+
+	switch (prppg_cmd->cmn.cmd_id) {
+	case NV_PMU_RPPG_CMD_ID_INIT:
+		break;
+	case NV_PMU_RPPG_CMD_ID_INIT_CTRL:
+		cmd.cmd.pg.rppg_cmd.init_ctrl.ctrl_id =
+			prppg_cmd->init_ctrl.ctrl_id;
+		cmd.cmd.pg.rppg_cmd.init_ctrl.domain_id =
+			prppg_cmd->init_ctrl.domain_id;
+		break;
+	case NV_PMU_RPPG_CMD_ID_STATS_RESET:
+		cmd.cmd.pg.rppg_cmd.stats_reset.ctrl_id =
+			prppg_cmd->stats_reset.ctrl_id;
+		break;
+	default:
+		gk20a_err(dev_from_gk20a(g), "Inivalid RPPG command %d",
+			prppg_cmd->cmn.cmd_id);
+		return -1;
+	}
+
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+			pmu_handle_rppg_init_msg, &success, &seq, ~0);
+	if (status) {
+		gk20a_err(dev_from_gk20a(g), "Unable to submit parameter command %d",
+			prppg_cmd->cmn.cmd_id);
+		goto exit;
+	}
+
+	if (prppg_cmd->cmn.cmd_id == NV_PMU_RPPG_CMD_ID_INIT_CTRL) {
+		pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g),
+			&success, 1);
+		if (success == 0) {
+			status = -EINVAL;
+			gk20a_err(dev_from_gk20a(g), "Ack for the parameter command %x",
+				prppg_cmd->cmn.cmd_id);
+		}
+	}
+
+exit:
+	return status;
+}
+
+static u32 rppg_init(struct gk20a *g)
+{
+	struct nv_pmu_rppg_cmd rppg_cmd;
+
+	rppg_cmd.init.cmd_id = NV_PMU_RPPG_CMD_ID_INIT;
+
+	return rppg_send_cmd(g, &rppg_cmd);
+}
+
+static u32 rppg_ctrl_init(struct gk20a *g, u8 ctrl_id)
+{
+	struct nv_pmu_rppg_cmd rppg_cmd;
+
+	rppg_cmd.init_ctrl.cmd_id  = NV_PMU_RPPG_CMD_ID_INIT_CTRL;
+	rppg_cmd.init_ctrl.ctrl_id = ctrl_id;
+
+	switch (ctrl_id) {
+	case NV_PMU_RPPG_CTRL_ID_GR:
+	case NV_PMU_RPPG_CTRL_ID_MS:
+		rppg_cmd.init_ctrl.domain_id = NV_PMU_RPPG_DOMAIN_ID_GFX;
+		break;
+	}
+
+	return rppg_send_cmd(g, &rppg_cmd);
+}
+
+u32 init_rppg(struct gk20a *g)
+{
+	u32 status;
+
+	status = rppg_init(g);
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to initialize RPPG in PMU: 0x%08x", status);
+		return status;
+	}
+
+
+	status = rppg_ctrl_init(g, NV_PMU_RPPG_CTRL_ID_GR);
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to initialize RPPG_CTRL: GR in PMU: 0x%08x",
+			status);
+		return status;
+	}
+
+	status = rppg_ctrl_init(g, NV_PMU_RPPG_CTRL_ID_MS);
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g),
+			"Failed to initialize RPPG_CTRL: MS in PMU: 0x%08x",
+			status);
+		return status;
+	}
+
+	return status;
+}
+
+
diff --git a/drivers/gpu/nvgpu/lpwr/rppg.h b/drivers/gpu/nvgpu/lpwr/rppg.h
new file mode 100644
index 00000000..8dc8d36c
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/rppg.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _RPPG_H_
+#define _RPPG_H_
+
+u32 init_rppg(struct gk20a *g);
+#endif
-- 
cgit v1.2.2


From e5824d8014c321fbe2c1e04e12307125dd50a472 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 3 Nov 2016 21:16:21 +0530
Subject: gpu: nvgpu: MSCG support

- update gp106 pg engine init/list/features HALs
  to support MS engine
- Added defines & interface for lpwr tables read  from vbios.
- lpwr module which reads idx/gr/ms table from vbios to
  map rppg/mscg support with respective p-state
- lpwr module public functions to control lpwr
  features enable/disable mscg/rppg & mclk-change
  request whenever change in mclk-change parameters
- lpwr public functions to know rppg/mscg support for
  requested pstate,
- added mutex t prevent PG transition while arbiter
  executes pstate transition
- nvgpu_clk_arb_get_current_pstate() of clk arbiter to
  get current pstate

JIRA DNVGPU-71

Change-Id: Ifcd640cc19ef630be1e2a9ba07ec84023d8202a0
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1247553
(cherry picked from commit 8a441dea2410e1b5196ef24e56a7768b6980e46b)
Reviewed-on: http://git-master/r/1270989
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu-t18x |   3 +-
 drivers/gpu/nvgpu/clk/clk_arb.c       | 102 ++++----
 drivers/gpu/nvgpu/clk/clk_arb.h       |   5 +
 drivers/gpu/nvgpu/gp106/pmu_gp106.c   |  27 ++-
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c   |   3 +
 drivers/gpu/nvgpu/include/bios.h      |  64 +++++
 drivers/gpu/nvgpu/lpwr/lpwr.c         | 423 ++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/lpwr/lpwr.h         |  92 ++++++++
 drivers/gpu/nvgpu/perf/perf.h         |   2 +
 drivers/gpu/nvgpu/pstate/pstate.c     |   9 +-
 drivers/gpu/nvgpu/pstate/pstate.h     |   4 +
 11 files changed, 691 insertions(+), 43 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/lpwr/lpwr.c
 create mode 100644 drivers/gpu/nvgpu/lpwr/lpwr.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
index 2b650ad8..30119345 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu-t18x
@@ -63,7 +63,8 @@ nvgpu-y += \
 	$(nvgpu-t18x)/therm/thrmdev.o \
 	$(nvgpu-t18x)/therm/thrmchannel.o \
 	$(nvgpu-t18x)/therm/thrmpmu.o \
-	$(nvgpu-t18x)/lpwr/rppg.o
+	$(nvgpu-t18x)/lpwr/rppg.o \
+	$(nvgpu-t18x)/lpwr/lpwr.o
 
 nvgpu-$(CONFIG_TEGRA_GK20A) += $(nvgpu-t18x)/gp10b/platform_gp10b_tegra.o
 
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 3f35fac7..c440dc3b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -44,14 +44,6 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount);
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
 	u32 voltuv_sram);
-static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
-	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-	u32 nuvmin_sram);
-static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
-	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-	u32 nuvmin_sram);
 static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
 	u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
 	u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
@@ -105,6 +97,7 @@ struct nvgpu_clk_arb {
 	spinlock_t sessions_lock;
 	spinlock_t users_lock;
 
+	struct mutex pstate_lock;
 	struct list_head users;
 	struct list_head sessions;
 	struct llist_head requests;
@@ -235,6 +228,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 	g->clk_arb = arb;
 	arb->g = g;
 
+	mutex_init(&arb->pstate_lock);
 	spin_lock_init(&arb->sessions_lock);
 	spin_lock_init(&arb->users_lock);
 
@@ -943,12 +937,23 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	/* Program clocks */
 	/* A change in both mclk of gpc2clk may require a change in voltage */
 
-	status = nvgpu_clk_arb_change_vf_point_prefix(g, gpc2clk_target,
-		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
-		voltuv_sram, nuvmin, nuvmin_sram);
+	mutex_lock(&arb->pstate_lock);
+	status = nvgpu_lpwr_disable_pg(g, false);
 
+	status = clk_pmu_freq_controller_load(g, false);
+	if (status < 0) {
+		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
+		/* make status visible */
+		smp_mb();
+		goto exit_arb;
+	}
+	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
 	if (status < 0) {
 		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
 		/* make status visible */
 		smp_mb();
 		goto exit_arb;
@@ -957,20 +962,30 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
 		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
 		voltuv_sram);
-
 	if (status < 0) {
 		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
 		/* make status visible */
 		smp_mb();
 		goto exit_arb;
 	}
 
-	status = nvgpu_clk_arb_change_vf_point_postfix(g, gpc2clk_target,
-		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
-		voltuv_sram, nuvmin, nuvmin_sram);
+	status = clk_pmu_freq_controller_load(g, true);
+	if (status < 0) {
+		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
+		/* make status visible */
+		smp_mb();
+		goto exit_arb;
+	}
 
+	status = nvgpu_lwpr_mclk_change(g, pstate);
 	if (status < 0) {
 		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
 		/* make status visible */
 		smp_mb();
 		goto exit_arb;
@@ -991,15 +1006,24 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	smp_wmb();
 	xchg(&arb->actual, actual);
 
+	status = nvgpu_lpwr_enable_pg(g, false);
+	if (status < 0) {
+		arb->status = status;
+		mutex_unlock(&arb->pstate_lock);
+
+		/* make status visible */
+		smp_mb();
+		goto exit_arb;
+	}
+
 	/* status must be visible before atomic inc */
 	smp_wmb();
 	atomic_inc(&arb->req_nr);
 
-	wake_up_interruptible(&arb->request_wq);
+	/* Unlock pstate change for PG */
+	mutex_unlock(&arb->pstate_lock);
 
-	if (status < 0)
-		gk20a_err(dev_from_gk20a(g),
-			"Error in arbiter update");
+	wake_up_interruptible(&arb->request_wq);
 
 #ifdef CONFIG_DEBUG_FS
 	g->ops.read_ptimer(g, &t1);
@@ -1036,6 +1060,9 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 #endif
 
 exit_arb:
+	if (status < 0)
+		gk20a_err(dev_from_gk20a(g),
+				"Error in arbiter update");
 
 	/* notify completion for all requests */
 	head = llist_del_all(&arb->requests);
@@ -1300,6 +1327,7 @@ recalculate_vf_point:
 		}
 		if (index == table->mclk_num_points) {
 			mclk_vf = &table->mclk_points[index-1];
+			index = table->mclk_num_points - 1;
 		}
 		index_mclk = index;
 
@@ -1378,28 +1406,11 @@ find_exit:
 	return pstate;
 }
 
-static int nvgpu_clk_arb_change_vf_point_prefix(struct gk20a *g,
-	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-	u32 nuvmin_sram)
-{
-
-	int status;
-
-	status = clk_pmu_freq_controller_load(g, false);
-	if (status < 0)
-		return status;
-
-	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
-	return status;
-}
-
-static int nvgpu_clk_arb_change_vf_point_postfix(struct gk20a *g,
-	u16 gpc2clk_target, u16 sys2clk_target, u16 xbar2clk_target,
-	u16 mclk_target, u32 voltuv, u32 voltuv_sram, u32 nuvmin,
-	u32 nuvmin_sram)
+/* This function is inherently unsafe to call while arbiter is running
+ * arbiter must be blocked before calling this function */
+int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
 {
-	return clk_pmu_freq_controller_load(g, true);
+	return ACCESS_ONCE(g->clk_arb->actual->pstate);
 }
 
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
@@ -1456,6 +1467,17 @@ static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 	return 0;
 }
 
+void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	if (lock)
+		mutex_lock(&arb->pstate_lock);
+	else
+		mutex_unlock(&arb->pstate_lock);
+
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
 {
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h
index 8355dac5..700804b3 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.h
+++ b/drivers/gpu/nvgpu/clk/clk_arb.h
@@ -62,5 +62,10 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
 	struct nvgpu_clk_session *session, int *event_fd);
 
 void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g);
+
+int nvgpu_clk_arb_get_current_pstate(struct gk20a *g);
+
+void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
+
 #endif /* _CLK_ARB_H_ */
 
diff --git a/drivers/gpu/nvgpu/gp106/pmu_gp106.c b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
index 6f5e71eb..eecd7351 100644
--- a/drivers/gpu/nvgpu/gp106/pmu_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/pmu_gp106.c
@@ -24,6 +24,7 @@
 #include "clk/clk_mclk.h"
 #include "hw_mc_gp106.h"
 #include "hw_pwr_gp106.h"
+#include "lpwr/lpwr.h"
 #include "lpwr/rppg.h"
 
 #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -180,12 +181,16 @@ static u32 gp106_pmu_pg_feature_list(struct gk20a *g, u32 pg_engine_id)
 	if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
 		return PMU_PG_FEATURE_GR_RPPG_ENABLED;
 
+	if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+		return NVGPU_PMU_MS_FEATURE_MASK_ALL;
+
 	return 0;
 }
 
 static u32 gp106_pmu_pg_engines_list(struct gk20a *g)
 {
-	return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
+	return BIT(PMU_PG_ELPG_ENGINE_ID_GRAPHICS) |
+			BIT(PMU_PG_ELPG_ENGINE_ID_MS);
 }
 
 static void pmu_handle_param_msg(struct gk20a *g, struct pmu_msg *msg,
@@ -231,6 +236,23 @@ static int gp106_pg_param_init(struct gk20a *g, u32 pg_engine_id)
 		gp106_dbg_pmu("cmd post GR PMU_PG_CMD_ID_PG_PARAM");
 		gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
 				pmu_handle_param_msg, pmu, &seq, ~0);
+	} else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS) {
+		cmd.hdr.unit_id = PMU_UNIT_PG;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE +
+			sizeof(struct pmu_pg_cmd_ms_init_param);
+		cmd.cmd.pg.ms_init_param.cmd_type =
+			PMU_PG_CMD_ID_PG_PARAM;
+		cmd.cmd.pg.ms_init_param.cmd_id =
+			PMU_PG_PARAM_CMD_MS_INIT_PARAM;
+		cmd.cmd.pg.ms_init_param.support_mask =
+			NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING |
+			NVGPU_PMU_MS_FEATURE_MASK_SW_ASR |
+			NVGPU_PMU_MS_FEATURE_MASK_RPPG |
+			NVGPU_PMU_MS_FEATURE_MASK_FB_TRAINING;
+
+		gp106_dbg_pmu("cmd post MS PMU_PG_CMD_ID_PG_PARAM");
+		gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+			pmu_handle_param_msg, pmu, &seq, ~0);
 	}
 
 	return 0;
@@ -261,6 +283,9 @@ void gp106_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_pg_init_param = gp106_pg_param_init;
 	gops->pmu.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list;
 	gops->pmu.pmu_pg_engines_feature_list = gp106_pmu_pg_feature_list;
+	gops->pmu.pmu_lpwr_enable_pg = nvgpu_lpwr_enable_pg;
+	gops->pmu.pmu_lpwr_disable_pg = nvgpu_lpwr_disable_pg;
+	gops->pmu.pmu_pg_param_post_init = nvgpu_lpwr_post_init;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd = NULL;
 	gops->pmu.dump_secure_fuses = NULL;
 	gops->pmu.reset = gp106_falcon_reset;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index cd9cd0b0..9274990a 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -482,6 +482,9 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_pg_init_param = gp10b_pg_gr_init;
 	gops->pmu.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list;
 	gops->pmu.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list;
+	gops->pmu.pmu_lpwr_enable_pg = NULL;
+	gops->pmu.pmu_lpwr_disable_pg = NULL;
+	gops->pmu.pmu_pg_param_post_init = NULL;
 	gops->pmu.send_lrf_tex_ltc_dram_overide_en_dis_cmd =
 			send_ecc_overide_en_dis_cmd;
 	gops->pmu.reset = gk20a_pmu_reset;
diff --git a/drivers/gpu/nvgpu/include/bios.h b/drivers/gpu/nvgpu/include/bios.h
index f3939d14..097e90ec 100644
--- a/drivers/gpu/nvgpu/include/bios.h
+++ b/drivers/gpu/nvgpu/include/bios.h
@@ -925,4 +925,68 @@ struct vbios_fct_1x_entry {
 #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_MASK GENMASK(31, 16)
 #define NV_VBIOS_FCT_1X_ENTRY_PARAM8_FREQ_HYST_NEG_SHIFT 16
 
+/* LPWR Index Table */
+struct nvgpu_bios_lpwr_idx_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u16 base_sampling_period;
+} __packed;
+
+struct nvgpu_bios_lpwr_idx_table_1x_entry {
+	u8 pcie_idx;
+	u8 gr_idx;
+	u8 ms_idx;
+	u8 di_idx;
+	u8 gc6_idx;
+} __packed;
+
+/* LPWR MS Table*/
+struct nvgpu_bios_lpwr_ms_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u8 default_entry_idx;
+	u16 idle_threshold_us;
+} __packed;
+
+struct nvgpu_bios_lpwr_ms_table_1x_entry {
+	u32 feautre_mask;
+	u16 dynamic_current_logic;
+	u16 dynamic_current_sram;
+} __packed;
+
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_MASK    GENMASK(0, 0)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SHIFT    0
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_MASK    GENMASK(2, 2)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR_SHIFT    2
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_MASK    \
+			GENMASK(3, 3)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING_SHIFT    3
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_MASK    GENMASK(5, 5)
+#define NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG_SHIFT    5
+
+/* LPWR GR Table */
+struct nvgpu_bios_lpwr_gr_table_1x_header {
+	u8 version;
+	u8 header_size;
+	u8 entry_size;
+	u8 entry_count;
+	u8 default_entry_idx;
+	u16 idle_threshold_us;
+	u8 adaptive_gr_multiplier;
+} __packed;
+
+struct nvgpu_bios_lpwr_gr_table_1x_entry {
+	u32 feautre_mask;
+} __packed;
+
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_MASK GENMASK(0, 0)
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_SHIFT 0
+
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_MASK GENMASK(4, 4)
+#define NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG_SHIFT 4
+
 #endif
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.c b/drivers/gpu/nvgpu/lpwr/lpwr.c
new file mode 100644
index 00000000..4f8d2eec
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.c
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gp106/pmu_gp106.h"
+#include "gk20a/pmu_api.h"
+#include "gm206/bios_gm206.h"
+#include "pstate/pstate.h"
+#include "include/bios.h"
+#include "perf/perf.h"
+#include "lpwr.h"
+
+static int get_lpwr_idx_table(struct gk20a *g)
+{
+	u32 *lpwr_idx_table_ptr;
+	u8 *entry_addr;
+	u32 idx;
+	struct nvgpu_lpwr_bios_idx_data *pidx_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.idx;
+	struct nvgpu_bios_lpwr_idx_table_1x_header header = { 0 };
+	struct nvgpu_bios_lpwr_idx_table_1x_entry entry = { 0 };
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		lpwr_idx_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+			g->bios.perf_token, LOWPOWER_TABLE);
+		if (lpwr_idx_table_ptr == NULL)
+			return -EINVAL;
+	} else
+		 return -EINVAL;
+
+	memcpy(&header, lpwr_idx_table_ptr,
+		sizeof(struct nvgpu_bios_lpwr_idx_table_1x_header));
+
+	if (header.entry_count >= LPWR_VBIOS_IDX_ENTRY_COUNT_MAX)
+		return -EINVAL;
+
+	pidx_data->base_sampling_period = (u16)header.base_sampling_period;
+
+	/* Parse the LPWR Index Table entries.*/
+	for (idx = 0; idx < header.entry_count; idx++) {
+		entry_addr = (u8 *)lpwr_idx_table_ptr + header.header_size +
+			(idx * header.entry_size);
+
+		memcpy(&entry, entry_addr,
+			sizeof(struct nvgpu_bios_lpwr_idx_table_1x_entry));
+
+		pidx_data->entry[idx].pcie_idx = entry.pcie_idx;
+		pidx_data->entry[idx].gr_idx = entry.gr_idx;
+		pidx_data->entry[idx].ms_idx = entry.ms_idx;
+		pidx_data->entry[idx].di_idx = entry.di_idx;
+		pidx_data->entry[idx].gc6_idx = entry.gc6_idx;
+
+	}
+
+	return 0;
+}
+
+static int get_lpwr_gr_table(struct gk20a *g)
+{
+	u32 *lpwr_gr_table_ptr;
+	u8 *entry_addr;
+	u32 idx;
+	struct nvgpu_lpwr_bios_gr_data *pgr_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.gr;
+	struct nvgpu_bios_lpwr_gr_table_1x_header header = { 0 };
+	struct nvgpu_bios_lpwr_gr_table_1x_entry entry = { 0 };
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		lpwr_gr_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+			g->bios.perf_token, LOWPOWER_GR_TABLE);
+		if (lpwr_gr_table_ptr == NULL)
+			return -EINVAL;
+	} else
+		 return -EINVAL;
+
+	memcpy(&header, lpwr_gr_table_ptr,
+		sizeof(struct nvgpu_bios_lpwr_gr_table_1x_header));
+
+	/* Parse the LPWR Index Table entries.*/
+	for (idx = 0; idx < header.entry_count; idx++) {
+		entry_addr = (u8 *)lpwr_gr_table_ptr + header.header_size +
+			(idx * header.entry_size);
+
+		memcpy(&entry, entry_addr,
+			sizeof(struct nvgpu_bios_lpwr_gr_table_1x_entry));
+
+		if (BIOS_GET_FIELD(entry.feautre_mask,
+			NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) {
+			pgr_data->entry[idx].gr_enabled = true;
+
+			pgr_data->entry[idx].feature_mask =
+				NVGPU_PMU_GR_FEATURE_MASK_ALL;
+
+			if (!BIOS_GET_FIELD(entry.feautre_mask,
+				NV_VBIOS_LPWR_GR_FEATURE_MASK_GR_RPPG))
+				pgr_data->entry[idx].feature_mask &=
+					~NVGPU_PMU_GR_FEATURE_MASK_RPPG;
+		}
+
+	}
+
+	return 0;
+}
+
+static int get_lpwr_ms_table(struct gk20a *g)
+{
+	u32 *lpwr_ms_table_ptr;
+	u8 *entry_addr;
+	u32 idx;
+	struct nvgpu_lpwr_bios_ms_data *pms_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.ms;
+	struct nvgpu_bios_lpwr_ms_table_1x_header header = { 0 };
+	struct nvgpu_bios_lpwr_ms_table_1x_entry entry = { 0 };
+
+	if (g->ops.bios.get_perf_table_ptrs) {
+		lpwr_ms_table_ptr = (u32 *)g->ops.bios.get_perf_table_ptrs(g,
+			g->bios.perf_token, LOWPOWER_MS_TABLE);
+		if (lpwr_ms_table_ptr == NULL)
+			return -EINVAL;
+	} else
+		 return -EINVAL;
+
+	memcpy(&header, lpwr_ms_table_ptr,
+		sizeof(struct nvgpu_bios_lpwr_ms_table_1x_header));
+
+	if (header.entry_count >= LPWR_VBIOS_MS_ENTRY_COUNT_MAX)
+		return -EINVAL;
+
+	pms_data->default_entry_idx = (u8)header.default_entry_idx;
+
+	pms_data->idle_threshold_us = (u32)(header.idle_threshold_us * 10);
+
+	/* Parse the LPWR MS Table entries.*/
+	for (idx = 0; idx < header.entry_count; idx++) {
+		entry_addr = (u8 *)lpwr_ms_table_ptr + header.header_size +
+			(idx * header.entry_size);
+
+		memcpy(&entry, entry_addr,
+			sizeof(struct nvgpu_bios_lpwr_ms_table_1x_entry));
+
+		if (BIOS_GET_FIELD(entry.feautre_mask,
+			NV_VBIOS_LPWR_MS_FEATURE_MASK_MS)) {
+			pms_data->entry[idx].ms_enabled = true;
+
+			pms_data->entry[idx].feature_mask =
+				NVGPU_PMU_MS_FEATURE_MASK_ALL;
+
+			if (!BIOS_GET_FIELD(entry.feautre_mask,
+				NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_CLOCK_GATING))
+				pms_data->entry[idx].feature_mask &=
+					~NVGPU_PMU_MS_FEATURE_MASK_CLOCK_GATING;
+
+			if (!BIOS_GET_FIELD(entry.feautre_mask,
+				NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_SWASR))
+				pms_data->entry[idx].feature_mask &=
+					~NVGPU_PMU_MS_FEATURE_MASK_SW_ASR;
+
+			if (!BIOS_GET_FIELD(entry.feautre_mask,
+				NV_VBIOS_LPWR_MS_FEATURE_MASK_MS_RPPG))
+				pms_data->entry[idx].feature_mask &=
+					~NVGPU_PMU_MS_FEATURE_MASK_RPPG;
+		}
+
+		pms_data->entry[idx].dynamic_current_logic =
+				entry.dynamic_current_logic;
+
+		pms_data->entry[idx].dynamic_current_sram =
+				entry.dynamic_current_sram;
+	}
+
+	return 0;
+}
+
+u32 nvgpu_lpwr_pg_setup(struct gk20a *g)
+{
+	u32 err = 0;
+
+	gk20a_dbg_fn("");
+
+	err = get_lpwr_gr_table(g);
+	if (err)
+		return err;
+
+	err = get_lpwr_ms_table(g);
+	if (err)
+		return err;
+
+	err = get_lpwr_idx_table(g);
+
+	return err;
+}
+
+static void nvgpu_pmu_handle_param_lpwr_msg(struct gk20a *g,
+		struct pmu_msg *msg, void *param,
+		u32 handle, u32 status)
+{
+	u32 *ack_status = param;
+
+	gk20a_dbg_fn("");
+
+	if (status != 0) {
+		gk20a_err(dev_from_gk20a(g), "LWPR PARAM cmd aborted");
+		return;
+	}
+
+	*ack_status = 1;
+
+	gp106_dbg_pmu("lpwr-param is acknowledged from PMU %x",
+			msg->msg.pg.msg_type);
+}
+
+int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate)
+{
+	struct pmu_cmd cmd;
+	u32 seq, status = 0;
+	u32 payload = NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED;
+	struct clk_set_info *pstate_info;
+	u32 ack_status = 0;
+
+	gk20a_dbg_fn("");
+
+	pstate_info = pstate_get_clk_set_info(g, pstate,
+			clkwhich_mclk);
+	if (!pstate_info)
+		return -EINVAL;
+
+	if (pstate_info->max_mhz >
+			MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ)
+		payload |=
+			NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED;
+
+	if (payload != g->perf_pmu.lpwr.mclk_change_cache) {
+		g->perf_pmu.lpwr.mclk_change_cache = payload;
+
+		cmd.hdr.unit_id = PMU_UNIT_PG;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE +
+			sizeof(struct pmu_pg_cmd_mclk_change);
+		cmd.cmd.pg.mclk_change.cmd_type =
+			PMU_PG_CMD_ID_PG_PARAM;
+		cmd.cmd.pg.mclk_change.cmd_id =
+			PMU_PG_PARAM_CMD_MCLK_CHANGE;
+		cmd.cmd.pg.mclk_change.data = payload;
+
+		gp106_dbg_pmu("cmd post MS PMU_PG_PARAM_CMD_MCLK_CHANGE");
+		status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL,
+			PMU_COMMAND_QUEUE_HPQ,
+			nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0);
+
+		pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g),
+			&ack_status, 1);
+		if (ack_status == 0) {
+			status = -EINVAL;
+			gk20a_err(dev_from_gk20a(g), "MCLK-CHANGE ACK failed");
+		}
+	}
+
+	return status;
+}
+
+u32 nvgpu_lpwr_post_init(struct gk20a *g)
+{
+	struct pmu_cmd cmd;
+	u32 seq;
+	u32 status = 0;
+	u32 ack_status = 0;
+
+	memset(&cmd, 0, sizeof(struct pmu_cmd));
+	cmd.hdr.unit_id = PMU_UNIT_PG;
+	cmd.hdr.size   = PMU_CMD_HDR_SIZE +
+		sizeof(struct pmu_pg_cmd_post_init_param);
+
+	cmd.cmd.pg.post_init.cmd_type =
+		PMU_PG_CMD_ID_PG_PARAM;
+	cmd.cmd.pg.post_init.cmd_id =
+		PMU_PG_PARAM_CMD_POST_INIT;
+
+	gp106_dbg_pmu("cmd post post-init PMU_PG_PARAM_CMD_POST_INIT");
+	status = gk20a_pmu_cmd_post(g, &cmd, NULL, NULL,
+		PMU_COMMAND_QUEUE_LPQ,
+		nvgpu_pmu_handle_param_lpwr_msg, &ack_status, &seq, ~0);
+
+	pmu_wait_message_cond(&g->pmu, gk20a_get_gr_idle_timeout(g),
+		&ack_status, 1);
+	if (ack_status == 0) {
+		status = -EINVAL;
+		gk20a_err(dev_from_gk20a(g), "post-init ack failed");
+	}
+
+	return status;
+}
+
+u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num)
+{
+	struct nvgpu_lpwr_bios_ms_data *pms_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.ms;
+	struct nvgpu_lpwr_bios_idx_data *pidx_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.idx;
+	struct pstate *pstate = pstate_find(g, pstate_num);
+	u32 ms_idx;
+
+	gk20a_dbg_fn("");
+
+	if (!pstate)
+		return 0;
+
+	ms_idx = pidx_data->entry[pstate->lpwr_entry_idx].ms_idx;
+	if (pms_data->entry[ms_idx].ms_enabled)
+		return 1;
+	else
+		return 0;
+}
+
+u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num)
+{
+	struct nvgpu_lpwr_bios_gr_data *pgr_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.gr;
+	struct nvgpu_lpwr_bios_idx_data *pidx_data =
+			&g->perf_pmu.lpwr.lwpr_bios_data.idx;
+	struct pstate *pstate = pstate_find(g, pstate_num);
+	u32 idx;
+
+	gk20a_dbg_fn("");
+
+	if (!pstate)
+		return 0;
+
+	idx = pidx_data->entry[pstate->lpwr_entry_idx].gr_idx;
+	if (pgr_data->entry[idx].gr_enabled)
+		return 1;
+	else
+		return 0;
+}
+
+
+int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	u32  status = 0;
+	u32 is_mscg_supported = 0;
+	u32 is_rppg_supported = 0;
+	u32 present_pstate = 0;
+
+	gk20a_dbg_fn("");
+
+	if (pstate_lock)
+		nvgpu_clk_arb_pstate_change_lock(g, true);
+	mutex_lock(&pmu->pg_mutex);
+
+	present_pstate = nvgpu_clk_arb_get_current_pstate(g);
+
+	is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g,
+			present_pstate);
+	if (is_mscg_supported && g->mscg_enabled) {
+		if (!pmu->mscg_stat)
+			pmu->mscg_stat = PMU_MSCG_ENABLED;
+	}
+
+	is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g,
+			present_pstate);
+	if (is_rppg_supported) {
+		if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
+			status = gk20a_pmu_enable_elpg(g);
+	}
+
+	mutex_unlock(&pmu->pg_mutex);
+	if (pstate_lock)
+		nvgpu_clk_arb_pstate_change_lock(g, false);
+
+	return status;
+}
+
+int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	int status = 0;
+	u32 is_mscg_supported = 0;
+	u32 is_rppg_supported = 0;
+	u32 present_pstate = 0;
+
+	gk20a_dbg_fn("");
+
+	if (pstate_lock)
+		nvgpu_clk_arb_pstate_change_lock(g, true);
+	mutex_lock(&pmu->pg_mutex);
+
+	present_pstate = nvgpu_clk_arb_get_current_pstate(g);
+
+	is_rppg_supported = nvgpu_lpwr_is_rppg_supported(g,
+			present_pstate);
+	if (is_rppg_supported) {
+		if (support_gk20a_pmu(g->dev) && g->elpg_enabled) {
+			status = gk20a_pmu_disable_elpg(g);
+			if (status)
+				goto exit_unlock;
+		}
+	}
+
+	is_mscg_supported = nvgpu_lpwr_is_mscg_supported(g,
+			present_pstate);
+	if (is_mscg_supported && g->mscg_enabled) {
+		if (pmu->mscg_stat)
+			pmu->mscg_stat = PMU_MSCG_DISABLED;
+	}
+
+exit_unlock:
+	mutex_unlock(&pmu->pg_mutex);
+	if (pstate_lock)
+		nvgpu_clk_arb_pstate_change_lock(g, false);
+
+	gk20a_dbg_fn("done");
+	return status;
+}
diff --git a/drivers/gpu/nvgpu/lpwr/lpwr.h b/drivers/gpu/nvgpu/lpwr/lpwr.h
new file mode 100644
index 00000000..6b3259df
--- /dev/null
+++ b/drivers/gpu/nvgpu/lpwr/lpwr.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _MSCG_H_
+#define _MSCG_H_
+
+#define MAX_SWASR_MCLK_FREQ_WITHOUT_WR_TRAINING_MAXWELL_MHZ   540
+
+#define NV_PMU_PG_PARAM_MCLK_CHANGE_MS_SWASR_ENABLED BIT(0x1)
+#define NV_PMU_PG_PARAM_MCLK_CHANGE_GDDR5_WR_TRAINING_ENABLED BIT(0x3)
+
+#define LPWR_ENTRY_COUNT_MAX 0x06
+
+#define LPWR_VBIOS_IDX_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+
+#define LPWR_VBIOS_IDX_ENTRY_RSVD \
+	(LPWR_VBIOS_IDX_ENTRY_COUNT_MAX - 1)
+
+#define LPWR_VBIOS_BASE_SAMPLING_PERIOD_DEFAULT    (500)
+
+struct nvgpu_lpwr_bios_idx_entry {
+	u8 pcie_idx;
+	u8 gr_idx;
+	u8 ms_idx;
+	u8 di_idx;
+	u8 gc6_idx;
+};
+
+struct nvgpu_lpwr_bios_idx_data {
+	u16 base_sampling_period;
+	struct nvgpu_lpwr_bios_idx_entry entry[LPWR_VBIOS_IDX_ENTRY_COUNT_MAX];
+};
+
+#define LPWR_VBIOS_MS_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+
+struct nvgpu_lpwr_bios_ms_entry {
+	bool ms_enabled;
+	u32 feature_mask;
+	u32 asr_efficiency_thresholdl;
+	u16 dynamic_current_logic;
+	u16 dynamic_current_sram;
+};
+
+struct nvgpu_lpwr_bios_ms_data {
+	u8 default_entry_idx;
+	u32 idle_threshold_us;
+	struct nvgpu_lpwr_bios_ms_entry entry[LPWR_VBIOS_MS_ENTRY_COUNT_MAX];
+};
+
+#define LPWR_VBIOS_GR_ENTRY_COUNT_MAX (LPWR_ENTRY_COUNT_MAX)
+
+struct nvgpu_lpwr_bios_gr_entry {
+	bool  gr_enabled;
+	u32   feature_mask;
+};
+
+struct nvgpu_lpwr_bios_gr_data {
+	u8 default_entry_idx;
+	u32 idle_threshold_us;
+	u8 adaptive_gr_multiplier;
+	struct nvgpu_lpwr_bios_gr_entry  entry[LPWR_VBIOS_GR_ENTRY_COUNT_MAX];
+};
+
+struct nvgpu_lpwr_bios_data {
+	struct nvgpu_lpwr_bios_idx_data idx;
+	struct nvgpu_lpwr_bios_ms_data ms;
+	struct nvgpu_lpwr_bios_gr_data gr;
+};
+
+struct obj_lwpr {
+	struct nvgpu_lpwr_bios_data lwpr_bios_data;
+	u32 mclk_change_cache;
+};
+
+u32 nvgpu_lpwr_pg_setup(struct gk20a *g);
+int nvgpu_lwpr_mclk_change(struct gk20a *g, u32 pstate);
+int nvgpu_lpwr_enable_pg(struct gk20a *g, bool pstate_lock);
+int nvgpu_lpwr_disable_pg(struct gk20a *g, bool pstate_lock);
+u32 nvgpu_lpwr_is_mscg_supported(struct gk20a *g, u32 pstate_num);
+u32 nvgpu_lpwr_is_rppg_supported(struct gk20a *g, u32 pstate_num);
+u32 nvgpu_lpwr_post_init(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h
index c03bf2ae..a3213f7a 100644
--- a/drivers/gpu/nvgpu/perf/perf.h
+++ b/drivers/gpu/nvgpu/perf/perf.h
@@ -18,6 +18,7 @@
 #include "pstate/pstate.h"
 #include "gk20a/gk20a.h"
 #include "volt/volt.h"
+#include "lpwr/lpwr.h"
 
 #define CTRL_PERF_VFE_VAR_TYPE_INVALID                               0x00
 #define CTRL_PERF_VFE_VAR_TYPE_DERIVED                               0x01
@@ -57,6 +58,7 @@ struct perf_pmupstate {
 	struct vfe_equs vfe_equobjs;
 	struct pstates pstatesobjs;
 	struct obj_volt volt;
+	struct obj_lwpr lpwr;
 };
 
 u32 perf_pmu_vfe_load(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index 2e08ef01..82e809bb 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -83,6 +83,10 @@ int gk20a_init_pstate_support(struct gk20a *g)
 		return err;
 
 	err = clk_freq_controller_sw_setup(g);
+	if (err)
+		return err;
+
+	err = nvgpu_lpwr_pg_setup(g);
 
 	return err;
 }
@@ -327,6 +331,9 @@ static int pstate_sw_setup(struct gk20a *g)
 
 	gk20a_dbg_fn("");
 
+	init_waitqueue_head(&g->perf_pmu.pstatesobjs.pstate_notifier_wq);
+	mutex_init(&g->perf_pmu.pstatesobjs.pstate_mutex);
+
 	err = boardobjgrpconstruct_e32(&g->perf_pmu.pstatesobjs.super);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g),
@@ -361,7 +368,7 @@ done:
 	return err;
 }
 
-static struct pstate *pstate_find(struct gk20a *g, u32 num)
+struct pstate *pstate_find(struct gk20a *g, u32 num)
 {
 	struct pstates *pstates = &(g->perf_pmu.pstatesobjs);
 	struct pstate *pstate;
diff --git a/drivers/gpu/nvgpu/pstate/pstate.h b/drivers/gpu/nvgpu/pstate/pstate.h
index b6519c20..af0956e8 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.h
+++ b/drivers/gpu/nvgpu/pstate/pstate.h
@@ -48,6 +48,9 @@ struct pstate {
 struct pstates {
 	struct boardobjgrp_e32 super;
 	u32  num_levels;
+	wait_queue_head_t pstate_notifier_wq;
+	u32 is_pstate_switch_on;
+	struct mutex pstate_mutex; /* protect is_pstate_switch_on */
 };
 
 int gk20a_init_pstate_support(struct gk20a *g);
@@ -55,5 +58,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g);
 
 struct clk_set_info *pstate_get_clk_set_info(struct gk20a *g, u32 pstate_num,
 		enum nv_pmu_clk_clkwhich clkwhich);
+struct pstate *pstate_find(struct gk20a *g, u32 num);
 
 #endif /* __PSTATE_H__ */
-- 
cgit v1.2.2


From 76a18f5e762c79e8e6902ec93b7d6ea741475365 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Wed, 9 Nov 2016 19:33:41 +0530
Subject: gpu: nvgpu: PG statistics update

- PG statistics read support for multiple engines

JIRA DNVGPU-71

Change-Id: I2dc3aad243300d21dc3d20a54a5e4736977e071b
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: http://git-master/r/1250507
(cherry picked from commit 985cb3be1d6d990bc6651e417d9e6ba9bfe306e0)
Reviewed-on: http://git-master/r/1270991
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.c | 5 +++--
 drivers/gpu/nvgpu/gp10b/pmu_gp10b.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
index 9274990a..12337934 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.c
@@ -250,13 +250,14 @@ int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id)
 	return 0;
 }
 
-void gp10b_pmu_elpg_statistics(struct gk20a *g,
+void gp10b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_pg_stats_v1 stats;
 
-	pmu_copy_from_dmem(pmu, pmu->stat_dmem_offset,
+	pmu_copy_from_dmem(pmu,
+		pmu->stat_dmem_offset[pg_engine_id],
 		(u8 *)&stats, sizeof(struct pmu_pg_stats_v1), 0);
 
 	*ingating_time = stats.total_sleep_timeus;
diff --git a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
index ee0158ec..c9ac9d41 100644
--- a/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/pmu_gp10b.h
@@ -20,7 +20,7 @@ void gp10b_init_pmu_ops(struct gpu_ops *gops);
 int gp10b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
 int gp10b_pg_gr_init(struct gk20a *g, u32 pg_engine_id);
 void gp10b_write_dmatrfbase(struct gk20a *g, u32 addr);
-void gp10b_pmu_elpg_statistics(struct gk20a *g,
+void gp10b_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
 
 #endif /*__PMU_GP10B_H_*/
-- 
cgit v1.2.2


From 2aa3c85f8e82b3c07c39e677663abd3687c1822a Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Thu, 22 Dec 2016 13:45:21 +0530
Subject: gpu: nvgpu: skip setting preemption modes if already set

In gr_gp10b_set_preemption_mode(), skip setting anything
if both graphics and compute preemption modes are
already set

Bug 200263471

Change-Id: I2788464750835da8f6396c6c1ca8356a63758c80
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1275465
GVS: Gerrit_Virtual_Submit
Reviewed-by: Lakshmanan M <lm@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 91ea086c..9de7d675 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -2075,6 +2075,18 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		vm = ch->vm;
 	}
 
+	/* skip setting anything if both modes are already set */
+	if (graphics_preempt_mode &&
+	   (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
+		graphics_preempt_mode = 0;
+
+	if (compute_preempt_mode &&
+	   (compute_preempt_mode == gr_ctx->compute_preempt_mode))
+		compute_preempt_mode = 0;
+
+	if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
+		return 0;
+
 	if (g->ops.gr.set_ctxsw_preemption_mode) {
 		err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
 						graphics_preempt_mode, compute_preempt_mode);
-- 
cgit v1.2.2