/*
 * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */


#include <linux/cdev.h>
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/rculist.h>
#include <linux/llist.h>
#include <linux/uaccess.h>
#include <linux/poll.h>

#include <nvgpu/bitops.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/kref.h>
#include <nvgpu/log.h>

#include "gk20a/gk20a.h"
#include "clk/clk_arb.h"

#ifdef CONFIG_DEBUG_FS
#include "gk20a/platform_gk20a.h"
#endif

#define MAX_F_POINTS 256
#define DEFAULT_EVENT_NUMBER 32

struct nvgpu_clk_dev;
struct nvgpu_clk_arb_target;
struct nvgpu_clk_notification_queue;

#ifdef CONFIG_DEBUG_FS
static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
#endif

static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
		struct file *filp);
static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
		struct file *filp);
static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
					size_t size, loff_t *off);

static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
			unsigned long arg);

static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
static void nvgpu_clk_arb_free_fd(struct kref *refcount);
static void nvgpu_clk_arb_free_session(struct kref *refcount);
static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
	u32 voltuv_sram);
static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
	u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
	u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
				struct nvgpu_clk_arb_target *target,
				u32 alarm_mask);
static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm);
static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm);

static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
				struct nvgpu_clk_notification_queue *queue,
				u32 alarm_mask);
static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
				struct nvgpu_clk_notification_queue *queue,
				size_t events_number);

static void nvgpu_clk_notification_queue_free(struct gk20a *g,
				struct nvgpu_clk_notification_queue *queue);

#define VF_POINT_INVALID_PSTATE ~0U
#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
#define VF_POINT_GET_PSTATE(a)	(((a)->pstates) ?\
	__fls((a)->pstates) :\
	VF_POINT_INVALID_PSTATE)
#define VF_POINT_COMMON_PSTATE(a, b)	(((a)->pstates & (b)->pstates) ?\
	__fls((a)->pstates & (b)->pstates) :\
	VF_POINT_INVALID_PSTATE)

/* Local Alarms */
#define EVENT(alarm)	(0x1UL << NVGPU_GPU_EVENT_##alarm)

#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
				EVENT(VF_UPDATE))

#define _WRAPGTEQ(a, b) ((a-b) > 0)

struct nvgpu_clk_notification {
	u32 notification;
	u64 timestamp;
};

struct nvgpu_clk_notification_queue {
	u32 size;
	atomic_t head;
	atomic_t tail;
	struct nvgpu_clk_notification *notifications;
};

struct nvgpu_clk_vf_point {
	u16 pstates;
	union {
		struct {
			u16 gpc_mhz;
			u16 sys_mhz;
			u16 xbar_mhz;
		};
		u16 mem_mhz;
	};
	u32 uvolt;
	u32 uvolt_sram;
};

struct nvgpu_clk_vf_table {
	u32 mclk_num_points;
	struct nvgpu_clk_vf_point *mclk_points;
	u32 gpc2clk_num_points;
	struct nvgpu_clk_vf_point *gpc2clk_points;
};
#ifdef CONFIG_DEBUG_FS
struct nvgpu_clk_arb_debug {
	s64 switch_max;
	s64 switch_min;
	u64 switch_num;
	s64 switch_avg;
	s64 switch_std;
};
#endif

struct nvgpu_clk_arb_target {
	u16 mclk;
	u16 gpc2clk;
	u32 pstate;
};

struct nvgpu_clk_arb {
	struct nvgpu_spinlock sessions_lock;
	struct nvgpu_spinlock users_lock;

	struct nvgpu_mutex pstate_lock;
	struct list_head users;
	struct list_head sessions;
	struct llist_head requests;

	struct gk20a *g;
	int status;

	struct nvgpu_clk_arb_target actual_pool[2];
	struct nvgpu_clk_arb_target *actual;

	u16 gpc2clk_default_mhz;
	u16 mclk_default_mhz;
	u32 voltuv_actual;

	u16 gpc2clk_min, gpc2clk_max;
	u16 mclk_min, mclk_max;

	struct work_struct update_fn_work;
	struct workqueue_struct *update_work_queue;
	struct work_struct vf_table_fn_work;
	struct workqueue_struct *vf_table_work_queue;

	wait_queue_head_t request_wq;

	struct nvgpu_clk_vf_table *current_vf_table;
	struct nvgpu_clk_vf_table vf_table_pool[2];
	u32 vf_table_index;

	u16 *mclk_f_points;
	atomic_t req_nr;

	u32 mclk_f_numpoints;
	u16 *gpc2clk_f_points;
	u32 gpc2clk_f_numpoints;

	atomic64_t alarm_mask;
	struct nvgpu_clk_notification_queue notification_queue;

#ifdef CONFIG_DEBUG_FS
	struct nvgpu_clk_arb_debug debug_pool[2];
	struct nvgpu_clk_arb_debug *debug;
	bool debugfs_set;
#endif
};

struct nvgpu_clk_dev {
	struct nvgpu_clk_session *session;
	union {
		struct list_head link;
		struct llist_node node;
	};
	wait_queue_head_t readout_wq;
	atomic_t poll_mask;
	u16 gpc2clk_target_mhz;
	u16 mclk_target_mhz;
	u32 alarms_reported;
	atomic_t enabled_mask;
	struct nvgpu_clk_notification_queue queue;
	u32 arb_queue_head;
	struct kref refcount;
};

struct nvgpu_clk_session {
	bool zombie;
	struct gk20a *g;
	struct kref refcount;
	struct list_head link;
	struct llist_head targets;

	struct nvgpu_clk_arb_target target_pool[2];
	struct nvgpu_clk_arb_target *target;
};

static const struct file_operations completion_dev_ops = {
	.owner = THIS_MODULE,
	.release = nvgpu_clk_arb_release_completion_dev,
	.poll = nvgpu_clk_arb_poll_dev,
};

static const struct file_operations event_dev_ops = {
	.owner = THIS_MODULE,
	.release = nvgpu_clk_arb_release_event_dev,
	.poll = nvgpu_clk_arb_poll_dev,
	.read = nvgpu_clk_arb_read_event_dev,
#ifdef CONFIG_COMPAT
	.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
#endif
	.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
};

static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
				struct nvgpu_clk_notification_queue *queue,
				size_t events_number) {
	queue->notifications = nvgpu_kcalloc(g, events_number,
		sizeof(struct nvgpu_clk_notification));
	if (!queue->notifications)
		return -ENOMEM;
	queue->size = events_number;

	atomic_set(&queue->head, 0);
	atomic_set(&queue->tail, 0);

	return 0;
}

static void nvgpu_clk_notification_queue_free(struct gk20a *g,
		struct nvgpu_clk_notification_queue *queue) {
	nvgpu_kfree(g, queue->notifications);
	queue->size = 0;
	atomic_set(&queue->head, 0);
	atomic_set(&queue->tail, 0);
}

int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
{
	struct nvgpu_clk_arb *arb;
	u16 default_mhz;
	int err;
	int index;
	struct nvgpu_clk_vf_table *table;

	gk20a_dbg_fn("");

	if (!g->ops.clk_arb.get_arbiter_clk_domains)
		return 0;

	arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
	if (!arb)
		return -ENOMEM;

	err = nvgpu_mutex_init(&arb->pstate_lock);
	if (err)
		goto mutex_fail;
	nvgpu_spinlock_init(&arb->sessions_lock);
	nvgpu_spinlock_init(&arb->users_lock);

	arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
	if (!arb->mclk_f_points) {
		err = -ENOMEM;
		goto init_fail;
	}

	arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
	if (!arb->gpc2clk_f_points) {
		err = -ENOMEM;
		goto init_fail;
	}

	for (index = 0; index < 2; index++) {
		table = &arb->vf_table_pool[index];
		table->gpc2clk_num_points = MAX_F_POINTS;
		table->mclk_num_points = MAX_F_POINTS;

		table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
			sizeof(struct nvgpu_clk_vf_point));
		if (!table->gpc2clk_points) {
			err = -ENOMEM;
			goto init_fail;
		}


		table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
			sizeof(struct nvgpu_clk_vf_point));
		if (!table->mclk_points) {
			err = -ENOMEM;
			goto init_fail;
		}
	}

	g->clk_arb = arb;
	arb->g = g;

	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
			CTRL_CLK_DOMAIN_MCLK, &default_mhz);
	if (err < 0) {
		err = -EINVAL;
		goto init_fail;
	}

	arb->mclk_default_mhz = default_mhz;

	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
			CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
	if (err < 0) {
		err = -EINVAL;
		goto init_fail;
	}

	arb->gpc2clk_default_mhz = default_mhz;

	arb->actual = &arb->actual_pool[0];

	atomic_set(&arb->req_nr, 0);

	atomic64_set(&arb->alarm_mask, 0);
	err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
		DEFAULT_EVENT_NUMBER);
	if (err < 0)
		goto init_fail;

	INIT_LIST_HEAD_RCU(&arb->users);
	INIT_LIST_HEAD_RCU(&arb->sessions);
	init_llist_head(&arb->requests);

	init_waitqueue_head(&arb->request_wq);
	arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
		"vf_table_update");
	arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
		"arbiter_update");


	INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);

	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);

#ifdef CONFIG_DEBUG_FS
	arb->debug = &arb->debug_pool[0];

	if (!arb->debugfs_set) {
		if (nvgpu_clk_arb_debugfs_init(g))
			arb->debugfs_set = true;
	}
#endif
	err = clk_vf_point_cache(g);
	if (err < 0)
		goto init_fail;

	err = nvgpu_clk_arb_update_vf_table(arb);
	if (err < 0)
		goto init_fail;
	do {
		/* Check that first run is completed */
		smp_mb();
		wait_event_interruptible(arb->request_wq,
			atomic_read(&arb->req_nr));
	} while (!atomic_read(&arb->req_nr));


	return arb->status;

init_fail:
	nvgpu_kfree(g, arb->gpc2clk_f_points);
	nvgpu_kfree(g, arb->mclk_f_points);

	for (index = 0; index < 2; index++) {
		nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
		nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
	}

	nvgpu_mutex_destroy(&arb->pstate_lock);

mutex_fail:
	nvgpu_kfree(g, arb);

	return err;
}

void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;

	nvgpu_clk_arb_set_global_alarm(g, alarm);
	if (arb->update_work_queue)
		queue_work(arb->update_work_queue, &arb->update_fn_work);
}

static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;

	u64 current_mask;
	u32 refcnt;
	u32 alarm_mask;
	u64 new_mask;

	do {
		current_mask = atomic64_read(&arb->alarm_mask);
		/* atomic operations are strong so they do not need masks */

		refcnt = ((u32) (current_mask >> 32)) + 1;
		alarm_mask =  (u32) (current_mask & ~alarm);
		new_mask = ((u64) refcnt << 32) | alarm_mask;

	} while (unlikely(current_mask !=
			(u64)atomic64_cmpxchg(&arb->alarm_mask,
					current_mask, new_mask)));
}

static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;

	u64 current_mask;
	u32 refcnt;
	u32 alarm_mask;
	u64 new_mask;

	do {
		current_mask = atomic64_read(&arb->alarm_mask);
		/* atomic operations are strong so they do not need masks */

		refcnt = ((u32) (current_mask >> 32)) + 1;
		alarm_mask =  (u32) (current_mask &  ~0) | alarm;
		new_mask = ((u64) refcnt << 32) | alarm_mask;

	} while (unlikely(current_mask !=
			(u64)atomic64_cmpxchg(&arb->alarm_mask,
						current_mask, new_mask)));

	nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);

}

void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	int index;

	if (arb) {
		cancel_work_sync(&arb->vf_table_fn_work);
		destroy_workqueue(arb->vf_table_work_queue);
		arb->vf_table_work_queue = NULL;

		cancel_work_sync(&arb->update_fn_work);
		destroy_workqueue(arb->update_work_queue);
		arb->update_work_queue = NULL;

		nvgpu_kfree(g, arb->gpc2clk_f_points);
		nvgpu_kfree(g, arb->mclk_f_points);

		for (index = 0; index < 2; index++) {
			nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
			nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
		}
	}

	nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
	nvgpu_kfree(g, g->clk_arb);
	g->clk_arb = NULL;
}

static int nvgpu_clk_arb_install_fd(struct gk20a *g,
		struct nvgpu_clk_session *session,
		const struct file_operations *fops,
		struct nvgpu_clk_dev **_dev)
{
	struct file *file;
	int fd;
	int err;
	int status;
	char name[64];
	struct nvgpu_clk_dev *dev;

	gk20a_dbg_fn("");

	dev = nvgpu_kzalloc(g, sizeof(*dev));
	if (!dev)
		return -ENOMEM;

	status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
		DEFAULT_EVENT_NUMBER);
	if (status < 0)  {
		err = status;
		goto fail;
	}

	fd = get_unused_fd_flags(O_RDWR);
	if (fd < 0) {
		err = fd;
		goto fail;
	}

	snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
	file = anon_inode_getfile(name, fops, dev, O_RDWR);
	if (IS_ERR(file)) {
		err = PTR_ERR(file);
		goto fail_fd;
	}

	fd_install(fd, file);

	init_waitqueue_head(&dev->readout_wq);

	atomic_set(&dev->poll_mask, 0);

	dev->session = session;
	kref_init(&dev->refcount);

	kref_get(&session->refcount);

	*_dev = dev;

	return fd;

fail_fd:
	put_unused_fd(fd);
fail:
	nvgpu_kfree(g, dev);

	return err;
}

int nvgpu_clk_arb_init_session(struct gk20a *g,
		struct nvgpu_clk_session **_session)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	struct nvgpu_clk_session *session = *(_session);

	gk20a_dbg_fn("");

	if (!g->ops.clk_arb.get_arbiter_clk_domains)
		return 0;

	session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
	if (!session)
		return -ENOMEM;
	session->g = g;

	kref_init(&session->refcount);

	session->zombie = false;
	session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
	/* make sure that the initialization of the pool is visible
	 * before the update */
	smp_wmb();
	session->target = &session->target_pool[0];

	init_llist_head(&session->targets);

	nvgpu_spinlock_acquire(&arb->sessions_lock);
	list_add_tail_rcu(&session->link, &arb->sessions);
	nvgpu_spinlock_release(&arb->sessions_lock);

	*_session = session;

	return 0;
}

static void nvgpu_clk_arb_free_fd(struct kref *refcount)
{
	struct nvgpu_clk_dev *dev = container_of(refcount,
			struct nvgpu_clk_dev, refcount);
	struct nvgpu_clk_session *session = dev->session;

	nvgpu_kfree(session->g, dev);
}

static void nvgpu_clk_arb_free_session(struct kref *refcount)
{
	struct nvgpu_clk_session *session = container_of(refcount,
			struct nvgpu_clk_session, refcount);
	struct nvgpu_clk_arb *arb = session->g->clk_arb;
	struct gk20a *g = session->g;
	struct nvgpu_clk_dev *dev, *tmp;
	struct llist_node *head;

	gk20a_dbg_fn("");

	if (arb) {
		nvgpu_spinlock_acquire(&arb->sessions_lock);
		list_del_rcu(&session->link);
		nvgpu_spinlock_release(&arb->sessions_lock);
	}

	head = llist_del_all(&session->targets);
	llist_for_each_entry_safe(dev, tmp, head, node) {
		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
	}
	synchronize_rcu();
	nvgpu_kfree(g, session);
}

void nvgpu_clk_arb_release_session(struct gk20a *g,
	struct nvgpu_clk_session *session)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;

	gk20a_dbg_fn("");

	session->zombie = true;
	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
	if (arb && arb->update_work_queue)
		queue_work(arb->update_work_queue, &arb->update_fn_work);
}

int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
	struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	struct nvgpu_clk_dev *dev;
	int fd;

	gk20a_dbg_fn("");

	fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
	if (fd < 0)
		return fd;

	/* TODO: alarm mask needs to be set to default value to prevent
	 * failures of legacy tests. This will be removed when sanity is
	 * updated
	 */
	if (alarm_mask)
		atomic_set(&dev->enabled_mask, alarm_mask);
	else
		atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));

	dev->arb_queue_head = atomic_read(&arb->notification_queue.head);

	nvgpu_spinlock_acquire(&arb->users_lock);
	list_add_tail_rcu(&dev->link, &arb->users);
	nvgpu_spinlock_release(&arb->users_lock);

	*event_fd = fd;

	return 0;
}

int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
	struct nvgpu_clk_session *session, int *request_fd)
{
	struct nvgpu_clk_dev *dev;
	int fd;

	gk20a_dbg_fn("");

	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
	if (fd < 0)
		return fd;

	*request_fd = fd;

	return 0;
}

static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
{
	struct gk20a *g = arb->g;
	struct nvgpu_clk_vf_table *table;

	u32 i, j;
	int status = -EINVAL;
	u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
	u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
	u16 clk_cur;
	u32 num_points;

	struct clk_set_info *p5_info, *p0_info;


	table = ACCESS_ONCE(arb->current_vf_table);
	/* make flag visible when all data has resolved in the tables */
	smp_rmb();

	table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
		&arb->vf_table_pool[0];

	/* Get allowed memory ranges */
	if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK,
						&arb->gpc2clk_min,
						&arb->gpc2clk_max) < 0) {
		nvgpu_err(g, "failed to fetch GPC2CLK range");
		goto exit_vf_table;
	}
	if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK,
						&arb->mclk_min,
						&arb->mclk_max) < 0) {
		nvgpu_err(g, "failed to fetch MCLK range");
		goto exit_vf_table;
	}

	table->gpc2clk_num_points = MAX_F_POINTS;
	table->mclk_num_points = MAX_F_POINTS;

	if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK,
		&table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
		nvgpu_err(g, "failed to fetch GPC2CLK frequency points");
		goto exit_vf_table;
	}

	if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK,
		&table->mclk_num_points, arb->mclk_f_points)) {
		nvgpu_err(g, "failed to fetch MCLK frequency points");
		goto exit_vf_table;
	}
	if (!table->mclk_num_points || !table->gpc2clk_num_points) {
		nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d",
			table->mclk_num_points, table->gpc2clk_num_points);
		status = -EINVAL;
		goto exit_vf_table;
	}

	memset(table->mclk_points, 0,
		table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
	memset(table->gpc2clk_points, 0,
		table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));

	p5_info = pstate_get_clk_set_info(g,
			CTRL_PERF_PSTATE_P5, clkwhich_mclk);
	if (!p5_info) {
		nvgpu_err(g, "failed to get MCLK P5 info");
		goto exit_vf_table;
	}
	p0_info = pstate_get_clk_set_info(g,
			CTRL_PERF_PSTATE_P0, clkwhich_mclk);
	if (!p0_info) {
		nvgpu_err(g, "failed to get MCLK P0 info");
		goto exit_vf_table;
	}

	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
			i < table->mclk_num_points; i++) {

		if ((arb->mclk_f_points[i] >= arb->mclk_min) &&
			(arb->mclk_f_points[i] <= arb->mclk_max) &&
			(arb->mclk_f_points[i] != clk_cur)) {

			table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
			mclk_voltuv = mclk_voltuv_sram = 0;

			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
				&table->mclk_points[j].mem_mhz, &mclk_voltuv,
				CTRL_VOLT_DOMAIN_LOGIC);
			if (status < 0) {
				nvgpu_err(g,
					"failed to get MCLK LOGIC voltage");
				goto exit_vf_table;
			}
			status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
				&table->mclk_points[j].mem_mhz,
				&mclk_voltuv_sram,
				CTRL_VOLT_DOMAIN_SRAM);
			if (status < 0) {
				nvgpu_err(g, "failed to get MCLK SRAM voltage");
				goto exit_vf_table;
			}

			table->mclk_points[j].uvolt = mclk_voltuv;
			table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
			clk_cur = table->mclk_points[j].mem_mhz;

			if ((clk_cur >= p5_info->min_mhz) &&
					(clk_cur <= p5_info->max_mhz))
				VF_POINT_SET_PSTATE_SUPPORTED(
					&table->mclk_points[j],
					CTRL_PERF_PSTATE_P5);
			if ((clk_cur >= p0_info->min_mhz) &&
					(clk_cur <= p0_info->max_mhz))
				VF_POINT_SET_PSTATE_SUPPORTED(
					&table->mclk_points[j],
					CTRL_PERF_PSTATE_P0);

			j++;
			num_points++;

		}
	}
	table->mclk_num_points = num_points;

	p5_info = pstate_get_clk_set_info(g,
			CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
	if (!p5_info) {
		status = -EINVAL;
		nvgpu_err(g, "failed to get GPC2CLK P5 info");
		goto exit_vf_table;
	}

	p0_info = pstate_get_clk_set_info(g,
			CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
	if (!p0_info) {
		status = -EINVAL;
		nvgpu_err(g, "failed to get GPC2CLK P0 info");
		goto exit_vf_table;
	}

	/* GPC2CLK needs to be checked in two passes. The first determines the
	 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
	 * second verifies that the clocks minimum is satisfied and sets
	 * the voltages
	 */
	for (i = 0, j = 0, num_points = 0, clk_cur = 0;
			i < table->gpc2clk_num_points; i++) {
		struct set_fll_clk setfllclk;

		if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) &&
			(arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) &&
			(arb->gpc2clk_f_points[i] != clk_cur)) {

			table->gpc2clk_points[j].gpc_mhz =
				arb->gpc2clk_f_points[i];
			setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
			status = clk_get_fll_clks(g, &setfllclk);
			if (status < 0) {
				nvgpu_err(g,
					"failed to get GPC2CLK slave clocks");
				goto exit_vf_table;
			}

			table->gpc2clk_points[j].sys_mhz =
				setfllclk.sys2clkmhz;
			table->gpc2clk_points[j].xbar_mhz =
				setfllclk.xbar2clkmhz;

			clk_cur = table->gpc2clk_points[j].gpc_mhz;

			if ((clk_cur >= p5_info->min_mhz) &&
					(clk_cur <= p5_info->max_mhz))
				VF_POINT_SET_PSTATE_SUPPORTED(
					&table->gpc2clk_points[j],
					CTRL_PERF_PSTATE_P5);
			if ((clk_cur >= p0_info->min_mhz) &&
					(clk_cur <= p0_info->max_mhz))
				VF_POINT_SET_PSTATE_SUPPORTED(
					&table->gpc2clk_points[j],
					CTRL_PERF_PSTATE_P0);

			j++;
			num_points++;
		}
	}
	table->gpc2clk_num_points = num_points;

	/* Second pass */
	for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {

		u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
		gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;

		/* Check sysclk */
		p5_info = pstate_get_clk_set_info(g,
			VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
			clkwhich_sys2clk);
		if (!p5_info) {
			status = -EINVAL;
			nvgpu_err(g, "failed to get SYS2CLK P5 info");
			goto exit_vf_table;
		}

		/* sys2clk below clk min, need to find correct clock */
		if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
			for (j = i + 1; j < table->gpc2clk_num_points; j++) {

				if (table->gpc2clk_points[j].sys_mhz >=
							p5_info->min_mhz) {


					table->gpc2clk_points[i].sys_mhz =
						p5_info->min_mhz;

					alt_gpc2clk = alt_gpc2clk <
						table->gpc2clk_points[j].
								gpc_mhz ?
						table->gpc2clk_points[j].
									gpc_mhz:
						alt_gpc2clk;
					break;
				}
			}
			/* no VF exists that satisfies condition */
			if (j == table->gpc2clk_num_points) {
				nvgpu_err(g, "NO SYS2CLK VF point possible");
				status = -EINVAL;
				goto exit_vf_table;
			}
		}

		/* Check xbarclk */
		p5_info = pstate_get_clk_set_info(g,
			VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
			clkwhich_xbar2clk);
		if (!p5_info) {
			status = -EINVAL;
			nvgpu_err(g, "failed to get SYS2CLK P5 info");
			goto exit_vf_table;
		}

		/* xbar2clk below clk min, need to find correct clock */
		if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
			for (j = i; j < table->gpc2clk_num_points; j++) {
				if (table->gpc2clk_points[j].xbar_mhz >=
							p5_info->min_mhz) {

					table->gpc2clk_points[i].xbar_mhz =
						p5_info->min_mhz;

					alt_gpc2clk = alt_gpc2clk <
						table->gpc2clk_points[j].
								gpc_mhz ?
						table->gpc2clk_points[j].
									gpc_mhz:
						alt_gpc2clk;
					break;
				}
			}
			/* no VF exists that satisfies condition */
			if (j == table->gpc2clk_num_points) {
				status = -EINVAL;
				nvgpu_err(g, "NO XBAR2CLK VF point possible");

				goto exit_vf_table;
			}
		}

		/* Calculate voltages */
		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
						&alt_gpc2clk, &gpc2clk_voltuv,
						CTRL_VOLT_DOMAIN_LOGIC);
		if (status < 0) {
			nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage");
			goto exit_vf_table;
		}

		status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
						&alt_gpc2clk,
						&gpc2clk_voltuv_sram,
						CTRL_VOLT_DOMAIN_SRAM);
		if (status < 0) {
			nvgpu_err(g, "failed to get GPC2CLK SRAM voltage");
			goto exit_vf_table;
		}

		table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
		table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
	}

	/* make table visible when all data has resolved in the tables */
	smp_wmb();
	xchg(&arb->current_vf_table, table);

exit_vf_table:

	if (status < 0)
		nvgpu_clk_arb_set_global_alarm(g,
			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
	if (arb->update_work_queue)
		queue_work(arb->update_work_queue, &arb->update_fn_work);

	return status;
}

void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	if (arb->vf_table_work_queue)
		queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
}

static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
{
	struct nvgpu_clk_arb *arb =
		container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
	struct gk20a *g = arb->g;
	u32 err;

	/* get latest vf curve from pmu */
	err = clk_vf_point_cache(g);
	if (err) {
		nvgpu_err(g, "failed to cache VF table");
		nvgpu_clk_arb_set_global_alarm(g,
			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
		if (arb->update_work_queue)
			queue_work(arb->update_work_queue,
				&arb->update_fn_work);

		return;
	}
	nvgpu_clk_arb_update_vf_table(arb);
}

static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
{
	struct nvgpu_clk_arb *arb =
		container_of(work, struct nvgpu_clk_arb, update_fn_work);
	struct nvgpu_clk_session *session;
	struct nvgpu_clk_dev *dev;
	struct nvgpu_clk_dev *tmp;
	struct nvgpu_clk_arb_target *target, *actual;
	struct gk20a *g = arb->g;
	struct llist_node *head;

	u32 pstate = VF_POINT_INVALID_PSTATE;
	u32 voltuv, voltuv_sram;
	bool mclk_set, gpc2clk_set;
	u32 nuvmin, nuvmin_sram;

	u32 alarms_notified = 0;
	u32 current_alarm;
	int status = 0;

	/* Temporary variables for checking target frequency */
	u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
	u16 gpc2clk_session_target, mclk_session_target;

#ifdef CONFIG_DEBUG_FS
	u64 t0, t1;
	struct nvgpu_clk_arb_debug *debug;

#endif

	gk20a_dbg_fn("");

	/* bail out if gpu is down */
	if (atomic_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
		goto exit_arb;

#ifdef CONFIG_DEBUG_FS
	g->ops.bus.read_ptimer(g, &t0);
#endif

	/* Only one arbiter should be running */
	gpc2clk_target = 0;
	mclk_target = 0;

	rcu_read_lock();
	list_for_each_entry_rcu(session, &arb->sessions, link) {
		if (!session->zombie) {
			mclk_set = false;
			gpc2clk_set = false;
			target = ACCESS_ONCE(session->target) ==
				&session->target_pool[0] ?
					&session->target_pool[1] :
					&session->target_pool[0];
			/* Do not reorder pointer */
			smp_rmb();
			head = llist_del_all(&session->targets);
			if (head) {

				/* Copy over state */
				target->mclk = session->target->mclk;
				target->gpc2clk = session->target->gpc2clk;
				/* Query the latest committed request */
				llist_for_each_entry_safe(dev, tmp, head,
									node) {
					if (!mclk_set && dev->mclk_target_mhz) {
						target->mclk =
							dev->mclk_target_mhz;
						mclk_set = true;
					}
					if (!gpc2clk_set &&
						dev->gpc2clk_target_mhz) {
						target->gpc2clk =
							dev->gpc2clk_target_mhz;
						gpc2clk_set = true;
					}
					kref_get(&dev->refcount);
					llist_add(&dev->node, &arb->requests);
				}
				/* Ensure target is updated before ptr sawp */
				smp_wmb();
				xchg(&session->target, target);
			}

			mclk_target = mclk_target > session->target->mclk ?
				mclk_target : session->target->mclk;

			gpc2clk_target =
				gpc2clk_target > session->target->gpc2clk ?
				gpc2clk_target : session->target->gpc2clk;
		}
	}
	rcu_read_unlock();

	gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
			arb->gpc2clk_default_mhz;

	if (gpc2clk_target < arb->gpc2clk_min)
		gpc2clk_target = arb->gpc2clk_min;

	if (gpc2clk_target > arb->gpc2clk_max)
		gpc2clk_target = arb->gpc2clk_max;

	mclk_target = (mclk_target > 0) ? mclk_target:
			arb->mclk_default_mhz;

	if (mclk_target < arb->mclk_min)
		mclk_target = arb->mclk_min;

	if (mclk_target > arb->mclk_max)
		mclk_target = arb->mclk_max;

	sys2clk_target = 0;
	xbar2clk_target = 0;

	gpc2clk_session_target = gpc2clk_target;
	mclk_session_target = mclk_target;

	/* Query the table for the closest vf point to program */
	pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
		&sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
		&voltuv_sram, &nuvmin, &nuvmin_sram);

	if (pstate == VF_POINT_INVALID_PSTATE) {
		arb->status = -EINVAL;
		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	if ((gpc2clk_target < gpc2clk_session_target) ||
			(mclk_target < mclk_session_target))
		nvgpu_clk_arb_set_global_alarm(g,
			EVENT(ALARM_TARGET_VF_NOT_POSSIBLE));

	if ((arb->actual->gpc2clk == gpc2clk_target) &&
		(arb->actual->mclk == mclk_target) &&
		(arb->voltuv_actual == voltuv)) {
		goto exit_arb;
	}

	/* Program clocks */
	/* A change in both mclk of gpc2clk may require a change in voltage */

	nvgpu_mutex_acquire(&arb->pstate_lock);
	status = nvgpu_lpwr_disable_pg(g, false);

	status = clk_pmu_freq_controller_load(g, false);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}
	status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
		sys2clk_target, xbar2clk_target, mclk_target, voltuv,
		voltuv_sram);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	status = clk_pmu_freq_controller_load(g, true);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	status = nvgpu_lwpr_mclk_change(g, pstate);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
			&arb->actual_pool[1] : &arb->actual_pool[0];

	/* do not reorder this pointer */
	smp_rmb();
	actual->gpc2clk = gpc2clk_target;
	actual->mclk = mclk_target;
	arb->voltuv_actual = voltuv;
	actual->pstate = pstate;
	arb->status = status;

	/* Make changes visible to other threads */
	smp_wmb();
	xchg(&arb->actual, actual);

	status = nvgpu_lpwr_enable_pg(g, false);
	if (status < 0) {
		arb->status = status;
		nvgpu_mutex_release(&arb->pstate_lock);

		/* make status visible */
		smp_mb();
		goto exit_arb;
	}

	/* status must be visible before atomic inc */
	smp_wmb();
	atomic_inc(&arb->req_nr);

	/* Unlock pstate change for PG */
	nvgpu_mutex_release(&arb->pstate_lock);

	/* VF Update complete */
	nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));

	wake_up_interruptible(&arb->request_wq);

#ifdef CONFIG_DEBUG_FS
	g->ops.bus.read_ptimer(g, &t1);

	debug = arb->debug == &arb->debug_pool[0] ?
		&arb->debug_pool[1] : &arb->debug_pool[0];

	memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
	debug->switch_num++;

	if (debug->switch_num == 1) {
		debug->switch_max = debug->switch_min =
			debug->switch_avg = (t1-t0)/1000;
		debug->switch_std = 0;
	} else {
		s64 prev_avg;
		s64 curr = (t1-t0)/1000;

		debug->switch_max = curr > debug->switch_max ?
			curr : debug->switch_max;
		debug->switch_min = debug->switch_min ?
			(curr < debug->switch_min ?
				curr : debug->switch_min) : curr;
		prev_avg = debug->switch_avg;
		debug->switch_avg = (curr +
			(debug->switch_avg * (debug->switch_num-1))) /
			debug->switch_num;
		debug->switch_std +=
			(curr - debug->switch_avg) * (curr - prev_avg);
	}
	/* commit changes before exchanging debug pointer */
	smp_wmb();
	xchg(&arb->debug, debug);
#endif

exit_arb:
	if (status < 0) {
		nvgpu_err(g, "Error in arbiter update");
		nvgpu_clk_arb_set_global_alarm(g,
			EVENT(ALARM_CLOCK_ARBITER_FAILED));
	}

	current_alarm = (u32) atomic64_read(&arb->alarm_mask);
	/* notify completion for all requests */
	head = llist_del_all(&arb->requests);
	llist_for_each_entry_safe(dev, tmp, head, node) {
		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
		wake_up_interruptible(&dev->readout_wq);
		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
	}

	atomic_set(&arb->notification_queue.head,
		atomic_read(&arb->notification_queue.tail));
	/* notify event for all users */
	rcu_read_lock();
	list_for_each_entry_rcu(dev, &arb->users, link) {
		alarms_notified |=
			nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
	}
	rcu_read_unlock();

	/* clear alarms */
	nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
		~EVENT(ALARM_GPU_LOST));
}

static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
				struct nvgpu_clk_notification_queue *queue,
				u32 alarm_mask) {

	u32 queue_index;
	u64 timestamp;

	queue_index = (atomic_inc_return(&queue->tail)) % queue->size;
	/* get current timestamp */
	timestamp = (u64) sched_clock();

	queue->notifications[queue_index].timestamp = timestamp;
	queue->notifications[queue_index].notification = alarm_mask;

}

static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
				struct nvgpu_clk_arb_target *target,
				u32 alarm) {

	struct nvgpu_clk_session *session = dev->session;
	struct nvgpu_clk_arb *arb = session->g->clk_arb;
	struct nvgpu_clk_notification *notification;

	u32 queue_alarm_mask = 0;
	u32 enabled_mask = 0;
	u32 new_alarms_reported = 0;
	u32 poll_mask = 0;
	u32 tail, head;
	u32 queue_index;
	size_t size;
	int index;

	enabled_mask = atomic_read(&dev->enabled_mask);
	size = arb->notification_queue.size;

	/* queue global arbiter notifications in buffer */
	do {
		tail = atomic_read(&arb->notification_queue.tail);
		/* copy items to the queue */
		queue_index = atomic_read(&dev->queue.tail);
		head = dev->arb_queue_head;
		head = (tail - head) < arb->notification_queue.size ?
			head : tail - arb->notification_queue.size;

		for (index = head; _WRAPGTEQ(tail, index); index++) {
			u32 alarm_detected;

			notification = &arb->notification_queue.
						notifications[(index+1) % size];
			alarm_detected =
				ACCESS_ONCE(notification->notification);

			if (!(enabled_mask & alarm_detected))
				continue;

			queue_index++;
			dev->queue.notifications[
				queue_index % dev->queue.size].timestamp =
					ACCESS_ONCE(notification->timestamp);

			dev->queue.notifications[
				queue_index % dev->queue.size].notification =
					alarm_detected;

			queue_alarm_mask |= alarm_detected;
		}
	} while (unlikely(atomic_read(&arb->notification_queue.tail) !=
			(int)tail));

	atomic_set(&dev->queue.tail, queue_index);
	/* update the last notification we processed from global queue */

	dev->arb_queue_head = tail;

	/* Check if current session targets are met */
	if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
		if ((target->gpc2clk < session->target->gpc2clk)
			|| (target->mclk < session->target->mclk)) {

			poll_mask |= (POLLIN | POLLPRI);
			nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
				EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
		}
	}

	/* Check if there is a new VF update */
	if (queue_alarm_mask & EVENT(VF_UPDATE))
		poll_mask |= (POLLIN | POLLRDNORM);

	/* Notify sticky alarms that were not reported on previous run*/
	new_alarms_reported = (queue_alarm_mask |
			(alarm & ~dev->alarms_reported & queue_alarm_mask));

	if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
		/* check that we are not re-reporting */
		if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
			poll_mask |= POLLHUP;

		poll_mask |= (POLLIN | POLLPRI);
		/* On next run do not report global alarms that were already
		 * reported, but report SHUTDOWN always */
		dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
							~EVENT(ALARM_GPU_LOST);
	}

	if (poll_mask) {
		atomic_set(&dev->poll_mask, poll_mask);
		wake_up_interruptible_all(&dev->readout_wq);
	}

	return new_alarms_reported;
}

static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
		struct nvgpu_gpu_set_event_filter_args *args)
{
	u32 mask;

	gk20a_dbg(gpu_dbg_fn, "");

	if (args->flags)
		return -EINVAL;

	if (args->size != 1)
		return -EINVAL;

	if (copy_from_user(&mask, (void __user *) args->buffer,
			args->size * sizeof(u32)))
		return -EFAULT;

	/* update alarm mask */
	atomic_set(&dev->enabled_mask, mask);

	return 0;
}

static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
		unsigned long arg)
{
	struct nvgpu_clk_dev *dev = filp->private_data;
	struct gk20a *g = dev->session->g;
	u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
	int err = 0;

	gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));

	if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
		|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
		return -EINVAL;

	BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);

	memset(buf, 0, sizeof(buf));
	if (_IOC_DIR(cmd) & _IOC_WRITE) {
		if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
			return -EFAULT;
	}

	switch (cmd) {
	case NVGPU_EVENT_IOCTL_SET_FILTER:
		err = nvgpu_clk_arb_set_event_filter(dev,
				(struct nvgpu_gpu_set_event_filter_args *)buf);
		break;
	default:
		nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
		err = -ENOTTY;
	}

	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
		err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));

	return err;
}

int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
	struct nvgpu_clk_session *session, int request_fd)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	struct nvgpu_clk_dev *dev;
	struct fd fd;
	int err = 0;

	gk20a_dbg_fn("");

	fd  = fdget(request_fd);
	if (!fd.file)
		return -EINVAL;

	if (fd.file->f_op != &completion_dev_ops) {
		err = -EINVAL;
		goto fdput_fd;
	}

	dev = (struct nvgpu_clk_dev *) fd.file->private_data;

	if (!dev || dev->session != session) {
		err = -EINVAL;
		goto fdput_fd;
	}
	kref_get(&dev->refcount);
	llist_add(&dev->node, &session->targets);
	if (arb->update_work_queue)
		queue_work(arb->update_work_queue, &arb->update_fn_work);

fdput_fd:
	fdput(fd);
	return err;
}

static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
		struct nvgpu_gpu_event_info *info) {

	u32 tail, head;
	u32 events = 0;
	struct nvgpu_clk_notification *p_notif;

	tail = atomic_read(&dev->queue.tail);
	head = atomic_read(&dev->queue.head);

	head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;

	if (_WRAPGTEQ(tail, head) && info) {
		head++;
		p_notif = &dev->queue.notifications[head % dev->queue.size];
		events |= p_notif->notification;
		info->event_id = ffs(events) - 1;
		info->timestamp = p_notif->timestamp;
		atomic_set(&dev->queue.head, head);
	}

	return events;
}

static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
					size_t size, loff_t *off)
{
	struct nvgpu_clk_dev *dev = filp->private_data;
	struct nvgpu_gpu_event_info info;
	ssize_t err;

	gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size);

	if ((size - *off) < sizeof(info))
		return 0;

	memset(&info, 0, sizeof(info));
	/* Get the oldest event from the queue */
	while (!__pending_event(dev, &info)) {
		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;
		err = wait_event_interruptible(dev->readout_wq,
				__pending_event(dev, &info));
		if (err)
			return err;
		if (info.timestamp)
			break;
	}

	if (copy_to_user(buf + *off, &info, sizeof(info)))
		return -EFAULT;

	return sizeof(info);
}

static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
{
	struct nvgpu_clk_dev *dev = filp->private_data;

	gk20a_dbg_fn("");

	poll_wait(filp, &dev->readout_wq, wait);
	return atomic_xchg(&dev->poll_mask, 0);
}

static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
		struct file *filp)
{
	struct nvgpu_clk_dev *dev = filp->private_data;
	struct nvgpu_clk_session *session = dev->session;


	gk20a_dbg_fn("");

	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
	kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
	return 0;
}

static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
		struct file *filp)
{
	struct nvgpu_clk_dev *dev = filp->private_data;
	struct nvgpu_clk_session *session = dev->session;
	struct nvgpu_clk_arb *arb;

	arb = session->g->clk_arb;

	gk20a_dbg_fn("");

	if (arb) {
		nvgpu_spinlock_acquire(&arb->users_lock);
		list_del_rcu(&dev->link);
		nvgpu_spinlock_release(&arb->users_lock);
		nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
	}

	synchronize_rcu();
	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
	kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);

	return 0;
}

int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
		int request_fd, u32 api_domain, u16 target_mhz)
{
	struct nvgpu_clk_dev *dev;
	struct fd fd;
	int err = 0;

	gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);

	fd = fdget(request_fd);
	if (!fd.file)
		return -EINVAL;

	if (fd.file->f_op != &completion_dev_ops) {
		err = -EINVAL;
		goto fdput_fd;
	}

	dev = fd.file->private_data;
	if (!dev || dev->session != session) {
		err = -EINVAL;
		goto fdput_fd;
	}

	switch (api_domain) {
	case NVGPU_GPU_CLK_DOMAIN_MCLK:
		dev->mclk_target_mhz = target_mhz;
		break;

	case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
		dev->gpc2clk_target_mhz = target_mhz * 2ULL;
		break;

	default:
		err = -EINVAL;
	}

fdput_fd:
	fdput(fd);
	return err;
}

int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
		u32 api_domain, u16 *freq_mhz)
{
	int err = 0;
	struct nvgpu_clk_arb_target *target;

	do {
		target = ACCESS_ONCE(session->target);
		/* no reordering of this pointer */
		smp_rmb();

		switch (api_domain) {
		case NVGPU_GPU_CLK_DOMAIN_MCLK:
			*freq_mhz = target->mclk;
			break;

		case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
			*freq_mhz = target->gpc2clk / 2ULL;
			break;

		default:
			*freq_mhz = 0;
			err = -EINVAL;
		}
	} while (target != ACCESS_ONCE(session->target));
	return err;
}

int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
		u32 api_domain, u16 *freq_mhz)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;
	int err = 0;
	struct nvgpu_clk_arb_target *actual;

	do {
		actual = ACCESS_ONCE(arb->actual);
		/* no reordering of this pointer */
		smp_rmb();

		switch (api_domain) {
		case NVGPU_GPU_CLK_DOMAIN_MCLK:
			*freq_mhz = actual->mclk;
			break;

		case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
			*freq_mhz = actual->gpc2clk / 2ULL;
			break;

		default:
			*freq_mhz = 0;
			err = -EINVAL;
		}
	} while (actual != ACCESS_ONCE(arb->actual));
	return err;
}

int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
		u32 api_domain, u16 *freq_mhz)
{
	switch(api_domain) {
	case NVGPU_GPU_CLK_DOMAIN_MCLK:
		*freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) /
			1000000ULL;
		return 0;

	case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
		*freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_GPC2CLK) /
			2000000ULL;
		return 0;

	default:
		return -EINVAL;
	}
}

int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
		u16 *min_mhz, u16 *max_mhz)
{
	int ret;

	switch(api_domain) {
	case NVGPU_GPU_CLK_DOMAIN_MCLK:
		ret = g->ops.clk_arb.get_arbiter_clk_range(g,
				CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
		return ret;

	case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
		ret = g->ops.clk_arb.get_arbiter_clk_range(g,
				CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
		if (!ret) {
			*min_mhz /= 2;
			*max_mhz /= 2;
		}
		return ret;

	default:
		return -EINVAL;
	}
}

u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
{
	u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
	u32 api_domains = 0;

	if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);

	if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);

	return api_domains;
}

bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain)
{
	u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);

	switch(api_domain) {
	case NVGPU_GPU_CLK_DOMAIN_MCLK:
		return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0);

	case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
		return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);

	default:
		return false;
	}
}

int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
	u32 api_domain, u32 *max_points, u16 *fpoints)
{
	int err;
	u32 i;

	switch (api_domain) {
	case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
		err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
				max_points, fpoints);
		if (err || !fpoints)
			return err;
		for (i = 0; i < *max_points; i++)
			fpoints[i] /= 2;
		return 0;
	case NVGPU_GPU_CLK_DOMAIN_MCLK:
		return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
				max_points, fpoints);
	default:
		return -EINVAL;
	}
}

static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
		u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
		u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
{
	u16 gpc2clk_target, mclk_target;
	u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
	u32 mclk_voltuv, mclk_voltuv_sram;
	u32 pstate = VF_POINT_INVALID_PSTATE;
	struct nvgpu_clk_vf_table *table;
	u32 index, index_mclk;
	struct nvgpu_clk_vf_point *mclk_vf = NULL;

	do {
		gpc2clk_target = *gpc2clk;
		mclk_target = *mclk;
		gpc2clk_voltuv = 0;
		gpc2clk_voltuv_sram = 0;
		mclk_voltuv = 0;
		mclk_voltuv_sram = 0;

		table = ACCESS_ONCE(arb->current_vf_table);
		/* pointer to table can be updated by callback */
		smp_rmb();

		if (!table)
			continue;
		if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
			nvgpu_err(arb->g, "found empty table");
			goto find_exit;
		}
		/* First we check MCLK to find out which PSTATE we are
		 * are requesting, and from there try to find the minimum
		 * GPC2CLK on the same PSTATE that satisfies the request.
		 * If no GPC2CLK can be found, then we need to up the PSTATE
		 */

recalculate_vf_point:
		for (index = 0; index < table->mclk_num_points; index++) {
			if (table->mclk_points[index].mem_mhz >= mclk_target) {
				mclk_vf = &table->mclk_points[index];
				break;
			}
		}
		if (index == table->mclk_num_points) {
			mclk_vf = &table->mclk_points[index-1];
			index = table->mclk_num_points - 1;
		}
		index_mclk = index;

		/* round up the freq requests */
		for (index = 0; index < table->gpc2clk_num_points; index++) {
			pstate = VF_POINT_COMMON_PSTATE(
					&table->gpc2clk_points[index], mclk_vf);

			if ((table->gpc2clk_points[index].gpc_mhz >=
							gpc2clk_target) &&
					(pstate != VF_POINT_INVALID_PSTATE)){
				gpc2clk_target =
					table->gpc2clk_points[index].gpc_mhz;
				*sys2clk =
					table->gpc2clk_points[index].sys_mhz;
				*xbar2clk =
					table->gpc2clk_points[index].xbar_mhz;

				gpc2clk_voltuv =
					table->gpc2clk_points[index].uvolt;
				gpc2clk_voltuv_sram =
					table->gpc2clk_points[index].uvolt_sram;
				break;
			}
		}

		if (index == table->gpc2clk_num_points) {
			pstate = VF_POINT_COMMON_PSTATE(
				&table->gpc2clk_points[index-1], mclk_vf);
			if (pstate != VF_POINT_INVALID_PSTATE) {
				gpc2clk_target =
					table->gpc2clk_points[index-1].gpc_mhz;
				*sys2clk =
					table->gpc2clk_points[index-1].sys_mhz;
				*xbar2clk  =
					table->gpc2clk_points[index-1].xbar_mhz;

				gpc2clk_voltuv =
					table->gpc2clk_points[index-1].uvolt;
				gpc2clk_voltuv_sram =
					table->gpc2clk_points[index-1].
						uvolt_sram;
			} else if (index_mclk >= table->mclk_num_points - 1) {
				/* There is no available combination of MCLK
				 * and GPC2CLK, we need to fail this
				 */
				gpc2clk_target = 0;
				mclk_target = 0;
				pstate = VF_POINT_INVALID_PSTATE;
				goto find_exit;
			} else {
				/* recalculate with higher PSTATE */
				gpc2clk_target = *gpc2clk;
				mclk_target = table->mclk_points[index_mclk+1].
									mem_mhz;
				goto recalculate_vf_point;
			}
		}

		mclk_target = mclk_vf->mem_mhz;
		mclk_voltuv = mclk_vf->uvolt;
		mclk_voltuv_sram = mclk_vf->uvolt_sram;

	} while (!table ||
		(ACCESS_ONCE(arb->current_vf_table) != table));

find_exit:
	*voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
	*voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
		gpc2clk_voltuv_sram : mclk_voltuv_sram;
	/* noise unaware vmin */
	*nuvmin = mclk_voltuv;
	*nuvmin_sram = mclk_voltuv_sram;
	*gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
	*mclk = mclk_target;
	return pstate;
}

/* This function is inherently unsafe to call while arbiter is running
 * arbiter must be blocked before calling this function */
int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
{
	return ACCESS_ONCE(g->clk_arb->actual->pstate);
}

static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
	u32 voltuv_sram)
{
	struct set_fll_clk fllclk;
	struct nvgpu_clk_arb *arb = g->clk_arb;
	int status;

	fllclk.gpc2clkmhz = gpc2clk_target;
	fllclk.sys2clkmhz = sys2clk_target;
	fllclk.xbar2clkmhz = xbar2clk_target;

	fllclk.voltuv = voltuv;

	/* if voltage ascends we do:
	 * (1) FLL change
	 * (2) Voltage change
	 * (3) MCLK change
	 * If it goes down
	 * (1) MCLK change
	 * (2) Voltage change
	 * (3) FLL change
	 */

	/* descending */
	if (voltuv < arb->voltuv_actual) {
		status = g->ops.clk.mclk_change(g, mclk_target);
		if (status < 0)
			return status;

		status = volt_set_voltage(g, voltuv, voltuv_sram);
		if (status < 0)
			return status;

		status = clk_set_fll_clks(g, &fllclk);
		if (status < 0)
			return status;
	} else {
		status = clk_set_fll_clks(g, &fllclk);
		if (status < 0)
			return status;

		status = volt_set_voltage(g, voltuv, voltuv_sram);
		if (status < 0)
			return status;

		status = g->ops.clk.mclk_change(g, mclk_target);
		if (status < 0)
			return status;
	}

	return 0;
}

void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
{
	struct nvgpu_clk_arb *arb = g->clk_arb;

	if (lock)
		nvgpu_mutex_acquire(&arb->pstate_lock);
	else
		nvgpu_mutex_release(&arb->pstate_lock);
}

#ifdef CONFIG_DEBUG_FS
static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
{
	struct gk20a *g = s->private;
	struct nvgpu_clk_arb *arb = g->clk_arb;
	struct nvgpu_clk_arb_debug *debug;

	u64 num;
	s64 tmp, avg, std, max, min;

	debug = ACCESS_ONCE(arb->debug);
	/* Make copy of structure and ensure no reordering */
	smp_rmb();
	if (!debug)
		return -EINVAL;

	std = debug->switch_std;
	avg = debug->switch_avg;
	max = debug->switch_max;
	min = debug->switch_min;
	num = debug->switch_num;

	tmp = std;
	do_div(tmp, num);
	seq_printf(s, "Number of transitions: %lld\n",
		num);
	seq_printf(s, "max / min : %lld / %lld usec\n",
		max, min);
	seq_printf(s, "avg / std : %lld / %ld usec\n",
		avg, int_sqrt(tmp));

	return 0;
}

static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
{
	return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
}

static const struct file_operations nvgpu_clk_arb_stats_fops = {
	.open		= nvgpu_clk_arb_stats_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};


static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
{
	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));

	struct dentry *gpu_root = platform->debugfs;
	struct dentry *d;

	gk20a_dbg(gpu_dbg_info, "g=%p", g);

	d = debugfs_create_file(
			"arb_stats",
			S_IRUGO,
			gpu_root,
			g,
			&nvgpu_clk_arb_stats_fops);
	if (!d)
		return -ENOMEM;

	return 0;
}
#endif