From 16ad9f537979c5f3717fc5781b1c2fad22a76f96 Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Tue, 26 Jun 2018 15:41:12 +0530
Subject: gpu: nvgpu: move gp106 specific clk_arbiter code into HAL

Currently, clock arbiter code is extensively using dgpu specific
implementation. This patch restructures the clk_arbiter code and moves
gp106 specific code into HAL. Following changes are made in this patch

1) clk_domain_get_f_points is now invoked via HAL for gp106 i.e.
g->ops.clk.clk_domain_get_f_points.

2) moved nvgpu_clk_arb_change_vf_point and other related static
functions to clk_arb_gp106.c.

3) Instead of only checking if get_arbiter_clk_domain is empty, a
check for support_clk_freq_controller is also added. This is to enable
the clk_arbiter based on support from both the OS and the chips.

Bug 2061372

Change-Id: I65b0a4e02145a86fbbfb420ed591b1fa3c86f6dc
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1774279
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.c | 649 +++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gp106/clk_arb_gp106.h |   8 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.c     |  33 ++
 drivers/gpu/nvgpu/gp106/clk_gp106.h     |   7 +-
 drivers/gpu/nvgpu/gp106/hal_gp106.c     |   5 +
 5 files changed, 699 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/nvgpu/gp106')

diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
index ca8015d6..860344d0 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -21,6 +21,7 @@
  */
 
 #include "gk20a/gk20a.h"
+#include <nvgpu/clk_arb.h>
 
 #include "clk_arb_gp106.h"
 
@@ -109,3 +110,649 @@ int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
 
 	return 0;
 }
+
+int gp106_init_clk_arbiter(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb;
+	u16 default_mhz;
+	int err;
+	int index;
+	struct nvgpu_clk_vf_table *table;
+
+	clk_arb_dbg(g, " ");
+
+	arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
+	if (!arb)
+		return -ENOMEM;
+
+	err = nvgpu_mutex_init(&arb->pstate_lock);
+	if (err)
+		goto mutex_fail;
+	nvgpu_spinlock_init(&arb->sessions_lock);
+	nvgpu_spinlock_init(&arb->users_lock);
+	nvgpu_spinlock_init(&arb->requests_lock);
+
+	arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
+	if (!arb->mclk_f_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
+	if (!arb->gpc2clk_f_points) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	for (index = 0; index < 2; index++) {
+		table = &arb->vf_table_pool[index];
+		table->gpc2clk_num_points = MAX_F_POINTS;
+		table->mclk_num_points = MAX_F_POINTS;
+
+		table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point));
+		if (!table->gpc2clk_points) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+
+
+		table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point));
+		if (!table->mclk_points) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+	}
+
+	g->clk_arb = arb;
+	arb->g = g;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			CTRL_CLK_DOMAIN_MCLK, &default_mhz);
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->mclk_default_mhz = default_mhz;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_default_mhz = default_mhz;
+
+	arb->actual = &arb->actual_pool[0];
+
+	nvgpu_atomic_set(&arb->req_nr, 0);
+
+	nvgpu_atomic64_set(&arb->alarm_mask, 0);
+	err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
+		DEFAULT_EVENT_NUMBER);
+	if (err < 0)
+		goto init_fail;
+
+	nvgpu_init_list_node(&arb->users);
+	nvgpu_init_list_node(&arb->sessions);
+	nvgpu_init_list_node(&arb->requests);
+
+	nvgpu_cond_init(&arb->request_wq);
+
+	nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
+	nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
+	arb->update_vf_table_work_item.arb = arb;
+	arb->update_arb_work_item.arb = arb;
+	arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
+	arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
+
+	err = nvgpu_clk_arb_worker_init(g);
+	if (err < 0)
+		goto init_fail;
+
+#ifdef CONFIG_DEBUG_FS
+	arb->debug = &arb->debug_pool[0];
+
+	if (!arb->debugfs_set) {
+		if (nvgpu_clk_arb_debugfs_init(g))
+			arb->debugfs_set = true;
+	}
+#endif
+	err = clk_vf_point_cache(g);
+	if (err < 0)
+		goto init_fail;
+
+	err = nvgpu_clk_arb_update_vf_table(arb);
+	if (err < 0)
+		goto init_fail;
+	do {
+		/* Check that first run is completed */
+		nvgpu_smp_mb();
+		NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
+			nvgpu_atomic_read(&arb->req_nr), 0);
+	} while (!nvgpu_atomic_read(&arb->req_nr));
+
+
+	return arb->status;
+
+init_fail:
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+	nvgpu_kfree(g, arb->mclk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
+		nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
+	}
+
+	nvgpu_mutex_destroy(&arb->pstate_lock);
+
+mutex_fail:
+	nvgpu_kfree(g, arb);
+
+	return err;
+}
+
+static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
+		u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
+		u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
+{
+	u16 gpc2clk_target, mclk_target;
+	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
+	u32 mclk_voltuv, mclk_voltuv_sram;
+	u32 pstate = VF_POINT_INVALID_PSTATE;
+	struct nvgpu_clk_vf_table *table;
+	u32 index, index_mclk;
+	struct nvgpu_clk_vf_point *mclk_vf = NULL;
+
+	do {
+		gpc2clk_target = *gpc2clk;
+		mclk_target = *mclk;
+		gpc2clk_voltuv = 0;
+		gpc2clk_voltuv_sram = 0;
+		mclk_voltuv = 0;
+		mclk_voltuv_sram = 0;
+
+		table = NV_ACCESS_ONCE(arb->current_vf_table);
+		/* pointer to table can be updated by callback */
+		nvgpu_smp_rmb();
+
+		if (!table)
+			continue;
+		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
+			nvgpu_err(arb->g, "found empty table");
+			goto find_exit;
+		}
+		/* First we check MCLK to find out which PSTATE we are
+		 * are requesting, and from there try to find the minimum
+		 * GPC2CLK on the same PSTATE that satisfies the request.
+		 * If no GPC2CLK can be found, then we need to up the PSTATE
+		 */
+
+recalculate_vf_point:
+		for (index = 0; index < table->mclk_num_points; index++) {
+			if (table->mclk_points[index].mem_mhz >= mclk_target) {
+				mclk_vf = &table->mclk_points[index];
+				break;
+			}
+		}
+		if (index == table->mclk_num_points) {
+			mclk_vf = &table->mclk_points[index-1];
+			index = table->mclk_num_points - 1;
+		}
+		index_mclk = index;
+
+		/* round up the freq requests */
+		for (index = 0; index < table->gpc2clk_num_points; index++) {
+			pstate = VF_POINT_COMMON_PSTATE(
+					&table->gpc2clk_points[index], mclk_vf);
+
+			if ((table->gpc2clk_points[index].gpc_mhz >=
+							gpc2clk_target) &&
+					(pstate != VF_POINT_INVALID_PSTATE)) {
+				gpc2clk_target =
+					table->gpc2clk_points[index].gpc_mhz;
+				*sys2clk =
+					table->gpc2clk_points[index].sys_mhz;
+				*xbar2clk =
+					table->gpc2clk_points[index].xbar_mhz;
+
+				gpc2clk_voltuv =
+					table->gpc2clk_points[index].uvolt;
+				gpc2clk_voltuv_sram =
+					table->gpc2clk_points[index].uvolt_sram;
+				break;
+			}
+		}
+
+		if (index == table->gpc2clk_num_points) {
+			pstate = VF_POINT_COMMON_PSTATE(
+				&table->gpc2clk_points[index-1], mclk_vf);
+			if (pstate != VF_POINT_INVALID_PSTATE) {
+				gpc2clk_target =
+					table->gpc2clk_points[index-1].gpc_mhz;
+				*sys2clk =
+					table->gpc2clk_points[index-1].sys_mhz;
+				*xbar2clk  =
+					table->gpc2clk_points[index-1].xbar_mhz;
+
+				gpc2clk_voltuv =
+					table->gpc2clk_points[index-1].uvolt;
+				gpc2clk_voltuv_sram =
+					table->gpc2clk_points[index-1].
+						uvolt_sram;
+			} else if (index_mclk >= table->mclk_num_points - 1) {
+				/* There is no available combination of MCLK
+				 * and GPC2CLK, we need to fail this
+				 */
+				gpc2clk_target = 0;
+				mclk_target = 0;
+				pstate = VF_POINT_INVALID_PSTATE;
+				goto find_exit;
+			} else {
+				/* recalculate with higher PSTATE */
+				gpc2clk_target = *gpc2clk;
+				mclk_target = table->mclk_points[index_mclk+1].
+									mem_mhz;
+				goto recalculate_vf_point;
+			}
+		}
+
+		mclk_target = mclk_vf->mem_mhz;
+		mclk_voltuv = mclk_vf->uvolt;
+		mclk_voltuv_sram = mclk_vf->uvolt_sram;
+
+	} while (!table ||
+		(NV_ACCESS_ONCE(arb->current_vf_table) != table));
+
+find_exit:
+	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
+	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
+		gpc2clk_voltuv_sram : mclk_voltuv_sram;
+	/* noise unaware vmin */
+	*nuvmin = mclk_voltuv;
+	*nuvmin_sram = mclk_voltuv_sram;
+	*gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
+	*mclk = mclk_target;
+	return pstate;
+}
+
+static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
+	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
+	u32 voltuv_sram)
+{
+	struct set_fll_clk fllclk;
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	int status;
+
+	fllclk.gpc2clkmhz = gpc2clk_target;
+	fllclk.sys2clkmhz = sys2clk_target;
+	fllclk.xbar2clkmhz = xbar2clk_target;
+
+	fllclk.voltuv = voltuv;
+
+	/* if voltage ascends we do:
+	 * (1) FLL change
+	 * (2) Voltage change
+	 * (3) MCLK change
+	 * If it goes down
+	 * (1) MCLK change
+	 * (2) Voltage change
+	 * (3) FLL change
+	 */
+
+	/* descending */
+	if (voltuv < arb->voltuv_actual) {
+		status = g->ops.clk.mclk_change(g, mclk_target);
+		if (status < 0)
+			return status;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			return status;
+
+		status = clk_set_fll_clks(g, &fllclk);
+		if (status < 0)
+			return status;
+	} else {
+		status = clk_set_fll_clks(g, &fllclk);
+		if (status < 0)
+			return status;
+
+		status = volt_set_voltage(g, voltuv, voltuv_sram);
+		if (status < 0)
+			return status;
+
+		status = g->ops.clk.mclk_change(g, mclk_target);
+		if (status < 0)
+			return status;
+	}
+
+	return 0;
+}
+
+void gp106_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
+{
+	struct nvgpu_clk_session *session;
+	struct nvgpu_clk_dev *dev;
+	struct nvgpu_clk_dev *tmp;
+	struct nvgpu_clk_arb_target *target, *actual;
+	struct gk20a *g = arb->g;
+
+	u32 pstate = VF_POINT_INVALID_PSTATE;
+	u32 voltuv, voltuv_sram;
+	bool mclk_set, gpc2clk_set;
+	u32 nuvmin, nuvmin_sram;
+
+	u32 alarms_notified = 0;
+	u32 current_alarm;
+	int status = 0;
+
+	/* Temporary variables for checking target frequency */
+	u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
+	u16 gpc2clk_session_target, mclk_session_target;
+
+#ifdef CONFIG_DEBUG_FS
+	u64 t0, t1;
+	struct nvgpu_clk_arb_debug *debug;
+
+#endif
+
+	clk_arb_dbg(g, " ");
+
+	/* bail out if gpu is down */
+	if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
+		goto exit_arb;
+
+#ifdef CONFIG_DEBUG_FS
+	g->ops.ptimer.read_ptimer(g, &t0);
+#endif
+
+	/* Only one arbiter should be running */
+	gpc2clk_target = 0;
+	mclk_target = 0;
+
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
+	nvgpu_list_for_each_entry(session, &arb->sessions,
+			nvgpu_clk_session, link) {
+		if (!session->zombie) {
+			mclk_set = false;
+			gpc2clk_set = false;
+			target = (session->target == &session->target_pool[0] ?
+					&session->target_pool[1] :
+					&session->target_pool[0]);
+			nvgpu_spinlock_acquire(&session->session_lock);
+			if (!nvgpu_list_empty(&session->targets)) {
+				/* Copy over state */
+				target->mclk = session->target->mclk;
+				target->gpc2clk = session->target->gpc2clk;
+				/* Query the latest committed request */
+				nvgpu_list_for_each_entry_safe(dev, tmp,
+				 &session->targets, nvgpu_clk_dev, node) {
+					if (!mclk_set && dev->mclk_target_mhz) {
+						target->mclk =
+							dev->mclk_target_mhz;
+						mclk_set = true;
+					}
+					if (!gpc2clk_set &&
+						dev->gpc2clk_target_mhz) {
+						target->gpc2clk =
+							dev->gpc2clk_target_mhz;
+						gpc2clk_set = true;
+					}
+					nvgpu_ref_get(&dev->refcount);
+					nvgpu_list_del(&dev->node);
+					nvgpu_spinlock_acquire(
+						&arb->requests_lock);
+					nvgpu_list_add(
+						&dev->node, &arb->requests);
+					nvgpu_spinlock_release(&arb->requests_lock);
+				}
+				session->target = target;
+			}
+			nvgpu_spinlock_release(
+				&session->session_lock);
+
+			mclk_target = mclk_target > session->target->mclk ?
+				mclk_target : session->target->mclk;
+
+			gpc2clk_target =
+				gpc2clk_target > session->target->gpc2clk ?
+				gpc2clk_target : session->target->gpc2clk;
+		}
+	}
+	nvgpu_spinlock_release(&arb->sessions_lock);
+
+	gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
+			arb->gpc2clk_default_mhz;
+
+	if (gpc2clk_target < arb->gpc2clk_min)
+		gpc2clk_target = arb->gpc2clk_min;
+
+	if (gpc2clk_target > arb->gpc2clk_max)
+		gpc2clk_target = arb->gpc2clk_max;
+
+	mclk_target = (mclk_target > 0) ? mclk_target :
+			arb->mclk_default_mhz;
+
+	if (mclk_target < arb->mclk_min)
+		mclk_target = arb->mclk_min;
+
+	if (mclk_target > arb->mclk_max)
+		mclk_target = arb->mclk_max;
+
+	sys2clk_target = 0;
+	xbar2clk_target = 0;
+
+	gpc2clk_session_target = gpc2clk_target;
+	mclk_session_target = mclk_target;
+
+	/* Query the table for the closest vf point to program */
+	pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
+		&sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
+		&voltuv_sram, &nuvmin, &nuvmin_sram);
+
+	if (pstate == VF_POINT_INVALID_PSTATE) {
+		arb->status = -EINVAL;
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	if ((gpc2clk_target < gpc2clk_session_target) ||
+			(mclk_target < mclk_session_target))
+		nvgpu_clk_arb_set_global_alarm(g,
+			EVENT(ALARM_TARGET_VF_NOT_POSSIBLE));
+
+	if ((arb->actual->gpc2clk == gpc2clk_target) &&
+		(arb->actual->mclk == mclk_target) &&
+		(arb->voltuv_actual == voltuv)) {
+		goto exit_arb;
+	}
+
+	/* Program clocks */
+	/* A change in both mclk of gpc2clk may require a change in voltage */
+
+	nvgpu_mutex_acquire(&arb->pstate_lock);
+	status = nvgpu_lpwr_disable_pg(g, false);
+
+	status = clk_pmu_freq_controller_load(g, false,
+					CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
+		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
+		voltuv_sram);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	status = clk_pmu_freq_controller_load(g, true,
+					 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	status = nvgpu_lwpr_mclk_change(g, pstate);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
+			&arb->actual_pool[1] : &arb->actual_pool[0];
+
+	/* do not reorder this pointer */
+	nvgpu_smp_rmb();
+	actual->gpc2clk = gpc2clk_target;
+	actual->mclk = mclk_target;
+	arb->voltuv_actual = voltuv;
+	actual->pstate = pstate;
+	arb->status = status;
+
+	/* Make changes visible to other threads */
+	nvgpu_smp_wmb();
+	arb->actual = actual;
+
+	status = nvgpu_lpwr_enable_pg(g, false);
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		goto exit_arb;
+	}
+
+	/* status must be visible before atomic inc */
+	nvgpu_smp_wmb();
+	nvgpu_atomic_inc(&arb->req_nr);
+
+	/* Unlock pstate change for PG */
+	nvgpu_mutex_release(&arb->pstate_lock);
+
+	/* VF Update complete */
+	nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
+
+	nvgpu_cond_signal_interruptible(&arb->request_wq);
+
+#ifdef CONFIG_DEBUG_FS
+	g->ops.ptimer.read_ptimer(g, &t1);
+
+	debug = arb->debug == &arb->debug_pool[0] ?
+		&arb->debug_pool[1] : &arb->debug_pool[0];
+
+	memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
+	debug->switch_num++;
+
+	if (debug->switch_num == 1) {
+		debug->switch_max = debug->switch_min =
+			debug->switch_avg = (t1-t0)/1000;
+		debug->switch_std = 0;
+	} else {
+		s64 prev_avg;
+		s64 curr = (t1-t0)/1000;
+
+		debug->switch_max = curr > debug->switch_max ?
+			curr : debug->switch_max;
+		debug->switch_min = debug->switch_min ?
+			(curr < debug->switch_min ?
+				curr : debug->switch_min) : curr;
+		prev_avg = debug->switch_avg;
+		debug->switch_avg = (curr +
+			(debug->switch_avg * (debug->switch_num-1))) /
+			debug->switch_num;
+		debug->switch_std +=
+			(curr - debug->switch_avg) * (curr - prev_avg);
+	}
+	/* commit changes before exchanging debug pointer */
+	nvgpu_smp_wmb();
+	arb->debug = debug;
+#endif
+
+exit_arb:
+	if (status < 0) {
+		nvgpu_err(g, "Error in arbiter update");
+		nvgpu_clk_arb_set_global_alarm(g,
+			EVENT(ALARM_CLOCK_ARBITER_FAILED));
+	}
+
+	current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
+	/* notify completion for all requests */
+	nvgpu_spinlock_acquire(&arb->requests_lock);
+	nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests,
+			nvgpu_clk_dev, node) {
+		nvgpu_atomic_set(&dev->poll_mask,
+		 NVGPU_POLLIN | NVGPU_POLLRDNORM);
+		nvgpu_clk_arb_event_post_event(dev);
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_list_del(&dev->node);
+	}
+	nvgpu_spinlock_release(&arb->requests_lock);
+
+	nvgpu_atomic_set(&arb->notification_queue.head,
+		nvgpu_atomic_read(&arb->notification_queue.tail));
+	/* notify event for all users */
+	nvgpu_spinlock_acquire(&arb->users_lock);
+	nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) {
+		alarms_notified |=
+			nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
+	}
+	nvgpu_spinlock_release(&arb->users_lock);
+
+	/* clear alarms */
+	nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
+		~EVENT(ALARM_GPU_LOST));
+}
+
+void gp106_clk_arb_cleanup(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+	int index;
+
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+	nvgpu_kfree(g, arb->mclk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g,
+			arb->vf_table_pool[index].gpc2clk_points);
+		nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
+	}
+
+	nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
+	nvgpu_kfree(g, g->clk_arb);
+
+	g->clk_arb = NULL;
+}
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h
index fc4657f5..e2b2834c 100644
--- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -22,10 +22,16 @@
 #ifndef CLK_ARB_GP106_H
 #define CLK_ARB_GP106_H
 
+struct nvgpu_clk_session;
+struct nvgpu_clk_arb;
+
 u32 gp106_get_arbiter_clk_domains(struct gk20a *g);
 int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
 		u16 *min_mhz, u16 *max_mhz);
 int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
 		u16 *default_mhz);
+int gp106_init_clk_arbiter(struct gk20a *g);
+void gp106_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb);
+void gp106_clk_arb_cleanup(struct nvgpu_clk_arb *arb);
 
 #endif /* CLK_ARB_GP106_H */
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 24b07112..dd7a2dd6 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -32,7 +32,9 @@
 #include <nvgpu/list.h>
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/timers.h>
+#include <nvgpu/pmu.h>
 
+#include "clk/clk.h"
 #include "gk20a/gk20a.h"
 #include "gp106/mclk_gp106.h"
 
@@ -243,6 +245,37 @@ read_err:
 
 }
 
+int gp106_clk_domain_get_f_points(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u32 *pfpointscount,
+	u16 *pfreqpointsinmhz)
+{
+	int status = -EINVAL;
+	struct clk_domain *pdomain;
+	u8 i;
+	struct clk_pmupstate *pclk = &g->clk_pmu;
+
+	if (pfpointscount == NULL)
+		return -EINVAL;
+
+	if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0))
+		return -EINVAL;
+
+	BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super),
+			struct clk_domain *, pdomain, i) {
+		if (pdomain->api_domain == clkapidomain) {
+			status = pdomain->clkdomainclkgetfpoints(g, pclk,
+				pdomain, pfpointscount,
+				pfreqpointsinmhz,
+				CLK_PROG_VFE_ENTRY_LOGIC);
+			return status;
+		}
+	}
+	return status;
+}
+
+
 #ifdef CONFIG_DEBUG_FS
 static int gp106_get_rate_show(void *data , u64 *val) {
 	struct namemap_cfg *c = (struct namemap_cfg *) data;
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h
index 97baa224..b7ab3164 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -56,5 +56,10 @@ int gp106_init_clk_support(struct gk20a *g);
 u32 gp106_crystal_clk_hz(struct gk20a *g);
 unsigned long gp106_clk_measure_freq(struct gk20a *g, u32 api_domain);
 int gp106_suspend_clk_support(struct gk20a *g);
+int gp106_clk_domain_get_f_points(
+	struct gk20a *g,
+	u32 clkapidomain,
+	u32 *pfpointscount,
+	u16 *pfreqpointsinmhz);
 
 #endif /* CLK_GP106_H */
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 78a3ea63..167bfaac 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -675,6 +675,7 @@ static const struct gpu_ops gp106_ops = {
 		.get_crystal_clk_hz = gp106_crystal_clk_hz,
 		.measure_freq = gp106_clk_measure_freq,
 		.suspend_clk_support = gp106_suspend_clk_support,
+		.clk_domain_get_f_points = gp106_clk_domain_get_f_points,
 		.mclk_init = gp106_mclk_init,
 		.mclk_change = gp106_mclk_change,
 		.mclk_deinit = gp106_mclk_deinit,
@@ -684,6 +685,9 @@ static const struct gpu_ops gp106_ops = {
 		.get_arbiter_clk_range = gp106_get_arbiter_clk_range,
 		.get_arbiter_clk_default = gp106_get_arbiter_clk_default,
 		.get_current_pstate = nvgpu_clk_arb_get_current_pstate,
+		.arbiter_clk_init = gp106_init_clk_arbiter,
+		.clk_arb_run_arbiter_cb = gp106_clk_arb_run_arbiter_cb,
+		.clk_arb_cleanup = gp106_clk_arb_cleanup,
 	},
 	.regops = {
 		.exec_regops = exec_regops_gk20a,
@@ -849,6 +853,7 @@ int gp106_init_hal(struct gk20a *g)
 	gops->clk.mclk_init = gp106_ops.clk.mclk_init;
 	gops->clk.mclk_change = gp106_ops.clk.mclk_change;
 	gops->clk.mclk_deinit = gp106_ops.clk.mclk_deinit;
+	gops->clk.clk_domain_get_f_points = gp106_ops.clk.clk_domain_get_f_points;
 
 	gops->clk_arb = gp106_ops.clk_arb;
 	gops->regops = gp106_ops.regops;
-- 
cgit v1.2.2