5 files changed, 251 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index bb0fd628..2a6278e8 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -397,17 +397,14 @@ exit_vf_table:
        if (status < 0)
                nvgpu_clk_arb_set_global_alarm(g,
                        EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
-        if (arb->update_work_queue)
+        nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
-                queue_work(arb->update_work_queue, &arb->update_fn_work);
        return status;
 }
-static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
+static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
 {
-        struct nvgpu_clk_arb *arb =
-                container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
        struct gk20a *g = arb->g;
        u32 err;
@@ -417,9 +414,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
                nvgpu_err(g, "failed to cache VF table");
                nvgpu_clk_arb_set_global_alarm(g,
                        EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
-                if (arb->update_work_queue)
+                nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
-                        queue_work(arb->update_work_queue,
-                                &arb->update_fn_work);
                return;
        }
@@ -725,10 +720,8 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
                                        current_mask, new_mask)));
 }
-static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
+static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
 {
-        struct nvgpu_clk_arb *arb =
-                container_of(work, struct nvgpu_clk_arb, update_fn_work);
        struct nvgpu_clk_session *session;
        struct nvgpu_clk_dev *dev;
        struct nvgpu_clk_dev *tmp;
@@ -1027,6 +1020,205 @@ exit_arb:
                ~EVENT(ALARM_GPU_LOST));
 }
+/*
+ * Process one scheduled work item.
+ */
+static void nvgpu_clk_arb_worker_process_item(
+                struct nvgpu_clk_arb_work_item *work_item)
+{
+        nvgpu_log(work_item->arb->g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+        if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
+                nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
+        else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
+                nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
+}
+/**
+ * Tell the worker that one more work needs to be done.
+ *
+ * Increase the work counter to synchronize the worker with the new work. Wake
+ * up the worker. If the worker was already running, it will handle this work
+ * before going to sleep.
+ */
+static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
+{
+        int put;
+        nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+        put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
+        nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
+        return put;
+}
+/**
+ * Test if there is some work pending.
+ *
+ * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
+ * worker. The worker has an internal work counter which is incremented once
+ * per finished work item. This is compared with the number of queued jobs.
+ */
+static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
+{
+        bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
+        /* We don't need barriers because they are implicit in locking */
+        return pending;
+}
+/**
+ * Process the queued works for the worker thread serially.
+ *
+ * Flush all the work items in the queue one by one. This may block timeout
+ * handling for a short while, as these are serialized.
+ */
+static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
+{
+        while (nvgpu_clk_arb_worker_pending(g, *get)) {
+                struct nvgpu_clk_arb_work_item *work_item = NULL;
+                nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
+                if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
+                        work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
+                                nvgpu_clk_arb_work_item, worker_item);
+                        nvgpu_list_del(&work_item->worker_item);
+                }
+                nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+                if (!work_item) {
+                        /*
+                         * Woke up for some other reason, but there are no
+                         * other reasons than a work item added in the items list
+                         * currently, so warn and ack the message.
+                         */
+                        nvgpu_warn(g, "Spurious worker event!");
+                        ++*get;
+                        break;
+                }
+                nvgpu_clk_arb_worker_process_item(work_item);
+                ++*get;
+        }
+}
+/*
+ * Process all work items found in the clk arbiter work queue.
+ */
+static int nvgpu_clk_arb_poll_worker(void *arg)
+{
+        struct gk20a *g = (struct gk20a *)arg;
+        struct gk20a_worker *worker = &g->clk_arb_worker;
+        int get = 0;
+        nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+        while (!nvgpu_thread_should_stop(&worker->poll_task)) {
+                int ret;
+                ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+                                &worker->wq,
+                                nvgpu_clk_arb_worker_pending(g, get), 0);
+                if (ret == 0)
+                        nvgpu_clk_arb_worker_process(g, &get);
+        }
+        return 0;
+}
+static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
+{
+        char thread_name[64];
+        int err = 0;
+        if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
+                return err;
+        nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
+        /*
+         * Mutexes have implicit barriers, so there is no risk of a thread
+         * having a stale copy of the poll_task variable as the call to
+         * thread_is_running is volatile
+         */
+        if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
+                nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
+                return err;
+        }
+        snprintf(thread_name, sizeof(thread_name),
+                        "nvgpu_clk_arb_poll_%s", g->name);
+        err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
+                        nvgpu_clk_arb_poll_worker, thread_name);
+        nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
+        return err;
+}
+/**
+ * Append a work item to the worker's list.
+ *
+ * This adds work item to the end of the list and wakes the worker
+ * up immediately. If the work item already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
+                struct nvgpu_clk_arb_work_item *work_item)
+{
+        nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+        /*
+         * Warn if worker thread cannot run
+         */
+        if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
+                nvgpu_warn(g, "clk arb worker cannot run!");
+                return;
+        }
+        nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
+        if (!nvgpu_list_empty(&work_item->worker_item)) {
+                /*
+                 * Already queued, so will get processed eventually.
+                 * The worker is probably awake already.
+                 */
+                nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+                return;
+        }
+        nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
+        nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+        nvgpu_clk_arb_worker_wakeup(g);
+}
+/**
+ * Initialize the clk arb worker's metadata and start the background thread.
+ */
+int nvgpu_clk_arb_worker_init(struct gk20a *g)
+{
+        int err;
+        nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
+        nvgpu_cond_init(&g->clk_arb_worker.wq);
+        nvgpu_init_list_node(&g->clk_arb_worker.items);
+        nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
+        err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
+        if (err)
+                goto error_check;
+        err = __nvgpu_clk_arb_worker_start(g);
+error_check:
+        if (err) {
+                nvgpu_err(g, "failed to start clk arb poller thread");
+                return err;
+        }
+        return 0;
+}
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 {
        struct nvgpu_clk_arb *arb;
@@ -1120,15 +1312,17 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
        nvgpu_init_list_node(&arb->requests);
        nvgpu_cond_init(&arb->request_wq);
-        arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
-                "vf_table_update");
-        arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
-                "arbiter_update");
-        INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
+        nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
+        nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
+        arb->update_vf_table_work_item.arb = arb;
+        arb->update_arb_work_item.arb = arb;
+        arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
+        arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
-        INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
+        err = nvgpu_clk_arb_worker_init(g);
+        if (err < 0)
+                goto init_fail;
 #ifdef CONFIG_DEBUG_FS
        arb->debug = &arb->debug_pool[0];
@@ -1183,8 +1377,14 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
        struct nvgpu_clk_arb *arb = g->clk_arb;
        nvgpu_clk_arb_set_global_alarm(g, alarm);
-        if (arb->update_work_queue)
+        nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
-                queue_work(arb->update_work_queue, &arb->update_fn_work);
+}
+void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
+{
+        nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
+        nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
+        nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
 }
 void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -1193,13 +1393,7 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
        int index;
        if (arb) {
-                cancel_work_sync(&arb->vf_table_fn_work);
+                nvgpu_clk_arb_worker_deinit(g);
-                destroy_workqueue(arb->vf_table_work_queue);
-                arb->vf_table_work_queue = NULL;
-                cancel_work_sync(&arb->update_fn_work);
-                destroy_workqueue(arb->update_work_queue);
-                arb->update_work_queue = NULL;
                nvgpu_kfree(g, arb->gpc2clk_f_points);
                nvgpu_kfree(g, arb->mclk_f_points);
@@ -1298,16 +1492,15 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
        session->zombie = true;
        nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
-        if (arb && arb->update_work_queue)
+        if (arb)
-                queue_work(arb->update_work_queue, &arb->update_fn_work);
+                nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
 }
 void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
 {
        struct nvgpu_clk_arb *arb = g->clk_arb;
-        if (arb->vf_table_work_queue)
+        nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
-                queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
 }
 /* This function is inherently unsafe to call while arbiter is running
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
index e5ada25d..464590d5 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -39,6 +39,18 @@
 * The defines here should finally move to clk_arb.h, once these are
 * refactored to be free of Linux fields.
 */
+enum clk_arb_work_item_type {
+        CLK_ARB_WORK_UPDATE_VF_TABLE,
+        CLK_ARB_WORK_UPDATE_ARB
+};
+struct nvgpu_clk_arb_work_item {
+        enum clk_arb_work_item_type item_type;
+        struct nvgpu_clk_arb *arb;
+        struct nvgpu_list_node worker_item;
+};
 struct nvgpu_clk_arb {
        struct nvgpu_spinlock sessions_lock;
        struct nvgpu_spinlock users_lock;
@@ -62,10 +74,8 @@ struct nvgpu_clk_arb {
        u16 gpc2clk_min, gpc2clk_max;
        u16 mclk_min, mclk_max;
-        struct work_struct update_fn_work;
+        struct nvgpu_clk_arb_work_item update_vf_table_work_item;
-        struct workqueue_struct *update_work_queue;
+        struct nvgpu_clk_arb_work_item update_arb_work_item;
-        struct work_struct vf_table_fn_work;
-        struct workqueue_struct *vf_table_work_queue;
        struct nvgpu_cond request_wq;
@@ -140,5 +150,14 @@ nvgpu_clk_dev_from_link(struct nvgpu_list_node *node)
           ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));
 };
+static inline struct nvgpu_clk_arb_work_item *
+nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node)
+{
+        return (struct nvgpu_clk_arb_work_item *)
+           ((uintptr_t)node - offsetof(struct nvgpu_clk_arb_work_item, worker_item));
+};
+void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
+                struct nvgpu_clk_arb_work_item *work_item);
 #endif /* __NVGPU_CLK_ARB_LINUX_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
index 6d09b4b8..039f65f8 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -424,8 +424,7 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
        nvgpu_spinlock_acquire(&session->session_lock);
        nvgpu_list_add(&dev->node, &session->targets);
        nvgpu_spinlock_release(&session->session_lock);
-        if (arb->update_work_queue)
+        nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
-                queue_work(arb->update_work_queue, &arb->update_fn_work);
 fdput_fd:
        fdput(fd);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a7a08b5a..e65ed278 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1591,7 +1591,7 @@ static void gk20a_channel_worker_process(struct gk20a *g, int *get)
 static int gk20a_channel_poll_worker(void *arg)
 {
        struct gk20a *g = (struct gk20a *)arg;
-        struct gk20a_channel_worker *worker = &g->channel_worker;
+        struct gk20a_worker *worker = &g->channel_worker;
        unsigned long watchdog_interval = 100; /* milliseconds */
        struct nvgpu_timeout timeout;
        int get = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 75357a82..03cfe285 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1406,14 +1406,14 @@ struct gk20a {
        u32 ltc_count;
        u32 ltc_streamid;
-        struct gk20a_channel_worker {
+        struct gk20a_worker {
                struct nvgpu_thread poll_task;
                nvgpu_atomic_t put;
                struct nvgpu_cond wq;
                struct nvgpu_list_node items;
                struct nvgpu_spinlock items_lock;
                struct nvgpu_mutex start_lock;
-        } channel_worker;
+        } channel_worker, clk_arb_worker;
        struct {
                void (*open)(struct channel_gk20a *ch);

diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c index bb0fd628..2a6278e8 100644 --- a/drivers/gpu/nvgpu/common/linux/clk_arb.c +++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -397,17 +397,14 @@ exit_vf_table:
397	if (status < 0)	397	if (status < 0)
398	nvgpu_clk_arb_set_global_alarm(g,	398	nvgpu_clk_arb_set_global_alarm(g,
399	EVENT(ALARM_VF_TABLE_UPDATE_FAILED));	399	EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
400	if (arb->update_work_queue)	400	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
401	queue_work(arb->update_work_queue, &arb->update_fn_work);
402		401
403	return status;	402	return status;
404	}	403	}
405		404
406		405
407	static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)	406	static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
408	{	407	{
409	struct nvgpu_clk_arb *arb =
410	container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
411	struct gk20a *g = arb->g;	408	struct gk20a *g = arb->g;
412	u32 err;	409	u32 err;
413		410
@@ -417,9 +414,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
417	nvgpu_err(g, "failed to cache VF table");	414	nvgpu_err(g, "failed to cache VF table");
418	nvgpu_clk_arb_set_global_alarm(g,	415	nvgpu_clk_arb_set_global_alarm(g,
419	EVENT(ALARM_VF_TABLE_UPDATE_FAILED));	416	EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
420	if (arb->update_work_queue)	417	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
421	queue_work(arb->update_work_queue,
422	&arb->update_fn_work);
423		418
424	return;	419	return;
425	}	420	}
@@ -725,10 +720,8 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
725	current_mask, new_mask)));	720	current_mask, new_mask)));
726	}	721	}
727		722
728	static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)	723	static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
729	{	724	{
730	struct nvgpu_clk_arb *arb =
731	container_of(work, struct nvgpu_clk_arb, update_fn_work);
732	struct nvgpu_clk_session *session;	725	struct nvgpu_clk_session *session;
733	struct nvgpu_clk_dev *dev;	726	struct nvgpu_clk_dev *dev;
734	struct nvgpu_clk_dev *tmp;	727	struct nvgpu_clk_dev *tmp;
@@ -1027,6 +1020,205 @@ exit_arb:
1027	~EVENT(ALARM_GPU_LOST));	1020	~EVENT(ALARM_GPU_LOST));
1028	}	1021	}
1029		1022
		1023	/*
		1024	* Process one scheduled work item.
		1025	*/
		1026	static void nvgpu_clk_arb_worker_process_item(
		1027	struct nvgpu_clk_arb_work_item *work_item)
		1028	{
		1029	nvgpu_log(work_item->arb->g, gpu_dbg_fn \| gpu_dbg_clk_arb, " ");
		1030
		1031	if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
		1032	nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
		1033	else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
		1034	nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
		1035	}
		1036
		1037	/**
		1038	* Tell the worker that one more work needs to be done.
		1039	*
		1040	* Increase the work counter to synchronize the worker with the new work. Wake
		1041	* up the worker. If the worker was already running, it will handle this work
		1042	* before going to sleep.
		1043	*/
		1044	static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
		1045	{
		1046	int put;
		1047
		1048	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_clk_arb, " ");
		1049
		1050	put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
		1051	nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
		1052
		1053	return put;
		1054	}
		1055
		1056	/**
		1057	* Test if there is some work pending.
		1058	*
		1059	* This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
		1060	* worker. The worker has an internal work counter which is incremented once
		1061	* per finished work item. This is compared with the number of queued jobs.
		1062	*/
		1063	static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
		1064	{
		1065	bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
		1066
		1067	/* We don't need barriers because they are implicit in locking */
		1068	return pending;
		1069	}
		1070
		1071	/**
		1072	* Process the queued works for the worker thread serially.
		1073	*
		1074	* Flush all the work items in the queue one by one. This may block timeout
		1075	* handling for a short while, as these are serialized.
		1076	*/
		1077	static void nvgpu_clk_arb_worker_process(struct gk20a g, int get)
		1078	{
		1079
		1080	while (nvgpu_clk_arb_worker_pending(g, *get)) {
		1081	struct nvgpu_clk_arb_work_item *work_item = NULL;
		1082
		1083	nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
		1084	if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
		1085	work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
		1086	nvgpu_clk_arb_work_item, worker_item);
		1087	nvgpu_list_del(&work_item->worker_item);
		1088	}
		1089	nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
		1090
		1091	if (!work_item) {
		1092	/*
		1093	* Woke up for some other reason, but there are no
		1094	* other reasons than a work item added in the items list
		1095	* currently, so warn and ack the message.
		1096	*/
		1097	nvgpu_warn(g, "Spurious worker event!");
		1098	++*get;
		1099	break;
		1100	}
		1101
		1102	nvgpu_clk_arb_worker_process_item(work_item);
		1103	++*get;
		1104	}
		1105	}
		1106
		1107	/*
		1108	* Process all work items found in the clk arbiter work queue.
		1109	*/
		1110	static int nvgpu_clk_arb_poll_worker(void *arg)
		1111	{
		1112	struct gk20a g = (struct gk20a )arg;
		1113	struct gk20a_worker *worker = &g->clk_arb_worker;
		1114	int get = 0;
		1115
		1116	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_clk_arb, " ");
		1117
		1118	while (!nvgpu_thread_should_stop(&worker->poll_task)) {
		1119	int ret;
		1120
		1121	ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
		1122	&worker->wq,
		1123	nvgpu_clk_arb_worker_pending(g, get), 0);
		1124
		1125	if (ret == 0)
		1126	nvgpu_clk_arb_worker_process(g, &get);
		1127	}
		1128	return 0;
		1129	}
		1130
		1131	static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
		1132	{
		1133	char thread_name[64];
		1134	int err = 0;
		1135
		1136	if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
		1137	return err;
		1138
		1139	nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
		1140
		1141	/*
		1142	* Mutexes have implicit barriers, so there is no risk of a thread
		1143	* having a stale copy of the poll_task variable as the call to
		1144	* thread_is_running is volatile
		1145	*/
		1146
		1147	if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
		1148	nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
		1149	return err;
		1150	}
		1151
		1152	snprintf(thread_name, sizeof(thread_name),
		1153	"nvgpu_clk_arb_poll_%s", g->name);
		1154
		1155	err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
		1156	nvgpu_clk_arb_poll_worker, thread_name);
		1157
		1158	nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
		1159	return err;
		1160	}
		1161
		1162	/**
		1163	* Append a work item to the worker's list.
		1164	*
		1165	* This adds work item to the end of the list and wakes the worker
		1166	* up immediately. If the work item already existed in the list, it's not added,
		1167	* because in that case it has been scheduled already but has not yet been
		1168	* processed.
		1169	*/
		1170	void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
		1171	struct nvgpu_clk_arb_work_item *work_item)
		1172	{
		1173	nvgpu_log(g, gpu_dbg_fn \| gpu_dbg_clk_arb, " ");
		1174
		1175	/*
		1176	* Warn if worker thread cannot run
		1177	*/
		1178	if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
		1179	nvgpu_warn(g, "clk arb worker cannot run!");
		1180	return;
		1181	}
		1182
		1183	nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
		1184	if (!nvgpu_list_empty(&work_item->worker_item)) {
		1185	/*
		1186	* Already queued, so will get processed eventually.
		1187	* The worker is probably awake already.
		1188	*/
		1189	nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
		1190	return;
		1191	}
		1192	nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
		1193	nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
		1194
		1195	nvgpu_clk_arb_worker_wakeup(g);
		1196	}
		1197
		1198	/**
		1199	* Initialize the clk arb worker's metadata and start the background thread.
		1200	*/
		1201	int nvgpu_clk_arb_worker_init(struct gk20a *g)
		1202	{
		1203	int err;
		1204
		1205	nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
		1206	nvgpu_cond_init(&g->clk_arb_worker.wq);
		1207	nvgpu_init_list_node(&g->clk_arb_worker.items);
		1208	nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
		1209	err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
		1210	if (err)
		1211	goto error_check;
		1212
		1213	err = __nvgpu_clk_arb_worker_start(g);
		1214	error_check:
		1215	if (err) {
		1216	nvgpu_err(g, "failed to start clk arb poller thread");
		1217	return err;
		1218	}
		1219	return 0;
		1220	}
		1221
1030	int nvgpu_clk_arb_init_arbiter(struct gk20a *g)	1222	int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1031	{	1223	{
1032	struct nvgpu_clk_arb *arb;	1224	struct nvgpu_clk_arb *arb;
@@ -1120,15 +1312,17 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1120	nvgpu_init_list_node(&arb->requests);	1312	nvgpu_init_list_node(&arb->requests);
1121		1313
1122	nvgpu_cond_init(&arb->request_wq);	1314	nvgpu_cond_init(&arb->request_wq);
1123	arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1124	"vf_table_update");
1125	arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1126	"arbiter_update");
1127
1128		1315
1129	INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);	1316	nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
		1317	nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
		1318	arb->update_vf_table_work_item.arb = arb;
		1319	arb->update_arb_work_item.arb = arb;
		1320	arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
		1321	arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
1130		1322
1131	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);	1323	err = nvgpu_clk_arb_worker_init(g);
		1324	if (err < 0)
		1325	goto init_fail;
1132		1326
1133	#ifdef CONFIG_DEBUG_FS	1327	#ifdef CONFIG_DEBUG_FS
1134	arb->debug = &arb->debug_pool[0];	1328	arb->debug = &arb->debug_pool[0];
@@ -1183,8 +1377,14 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1183	struct nvgpu_clk_arb *arb = g->clk_arb;	1377	struct nvgpu_clk_arb *arb = g->clk_arb;
1184		1378
1185	nvgpu_clk_arb_set_global_alarm(g, alarm);	1379	nvgpu_clk_arb_set_global_alarm(g, alarm);
1186	if (arb->update_work_queue)	1380	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1187	queue_work(arb->update_work_queue, &arb->update_fn_work);	1381	}
		1382
		1383	void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
		1384	{
		1385	nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
		1386	nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
		1387	nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1188	}	1388	}
1189		1389
1190	void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)	1390	void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -1193,13 +1393,7 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1193	int index;	1393	int index;
1194		1394
1195	if (arb) {	1395	if (arb) {
1196	cancel_work_sync(&arb->vf_table_fn_work);	1396	nvgpu_clk_arb_worker_deinit(g);
1197	destroy_workqueue(arb->vf_table_work_queue);
1198	arb->vf_table_work_queue = NULL;
1199
1200	cancel_work_sync(&arb->update_fn_work);
1201	destroy_workqueue(arb->update_work_queue);
1202	arb->update_work_queue = NULL;
1203		1397
1204	nvgpu_kfree(g, arb->gpc2clk_f_points);	1398	nvgpu_kfree(g, arb->gpc2clk_f_points);
1205	nvgpu_kfree(g, arb->mclk_f_points);	1399	nvgpu_kfree(g, arb->mclk_f_points);
@@ -1298,16 +1492,15 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
1298		1492
1299	session->zombie = true;	1493	session->zombie = true;
1300	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);	1494	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1301	if (arb && arb->update_work_queue)	1495	if (arb)
1302	queue_work(arb->update_work_queue, &arb->update_fn_work);	1496	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1303	}	1497	}
1304		1498
1305	void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)	1499	void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1306	{	1500	{
1307	struct nvgpu_clk_arb *arb = g->clk_arb;	1501	struct nvgpu_clk_arb *arb = g->clk_arb;
1308		1502
1309	if (arb->vf_table_work_queue)	1503	nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
1310	queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1311	}	1504	}
1312		1505
1313	/* This function is inherently unsafe to call while arbiter is running	1506	/* This function is inherently unsafe to call while arbiter is running


diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h index e5ada25d..464590d5 100644 --- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h +++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -39,6 +39,18 @@
39	* The defines here should finally move to clk_arb.h, once these are	39	* The defines here should finally move to clk_arb.h, once these are
40	* refactored to be free of Linux fields.	40	* refactored to be free of Linux fields.
41	*/	41	*/
		42
		43	enum clk_arb_work_item_type {
		44	CLK_ARB_WORK_UPDATE_VF_TABLE,
		45	CLK_ARB_WORK_UPDATE_ARB
		46	};
		47
		48	struct nvgpu_clk_arb_work_item {
		49	enum clk_arb_work_item_type item_type;
		50	struct nvgpu_clk_arb *arb;
		51	struct nvgpu_list_node worker_item;
		52	};
		53
42	struct nvgpu_clk_arb {	54	struct nvgpu_clk_arb {
43	struct nvgpu_spinlock sessions_lock;	55	struct nvgpu_spinlock sessions_lock;
44	struct nvgpu_spinlock users_lock;	56	struct nvgpu_spinlock users_lock;
@@ -62,10 +74,8 @@ struct nvgpu_clk_arb {
62	u16 gpc2clk_min, gpc2clk_max;	74	u16 gpc2clk_min, gpc2clk_max;
63	u16 mclk_min, mclk_max;	75	u16 mclk_min, mclk_max;
64		76
65	struct work_struct update_fn_work;	77	struct nvgpu_clk_arb_work_item update_vf_table_work_item;
66	struct workqueue_struct *update_work_queue;	78	struct nvgpu_clk_arb_work_item update_arb_work_item;
67	struct work_struct vf_table_fn_work;
68	struct workqueue_struct *vf_table_work_queue;
69		79
70	struct nvgpu_cond request_wq;	80	struct nvgpu_cond request_wq;
71		81
@@ -140,5 +150,14 @@ nvgpu_clk_dev_from_link(struct nvgpu_list_node *node)
140	((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));	150	((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));
141	};	151	};
142		152
		153	static inline struct nvgpu_clk_arb_work_item *
		154	nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node)
		155	{
		156	return (struct nvgpu_clk_arb_work_item *)
		157	((uintptr_t)node - offsetof(struct nvgpu_clk_arb_work_item, worker_item));
		158	};
		159
		160	void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
		161	struct nvgpu_clk_arb_work_item *work_item);
143	#endif /* __NVGPU_CLK_ARB_LINUX_H__ */	162	#endif /* __NVGPU_CLK_ARB_LINUX_H__ */
144		163


diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c index 6d09b4b8..039f65f8 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -424,8 +424,7 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
424	nvgpu_spinlock_acquire(&session->session_lock);	424	nvgpu_spinlock_acquire(&session->session_lock);
425	nvgpu_list_add(&dev->node, &session->targets);	425	nvgpu_list_add(&dev->node, &session->targets);
426	nvgpu_spinlock_release(&session->session_lock);	426	nvgpu_spinlock_release(&session->session_lock);
427	if (arb->update_work_queue)	427	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
428	queue_work(arb->update_work_queue, &arb->update_fn_work);
429		428
430	fdput_fd:	429	fdput_fd:
431	fdput(fd);	430	fdput(fd);


diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a7a08b5a..e65ed278 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1591,7 +1591,7 @@ static void gk20a_channel_worker_process(struct gk20a g, int get)
1591	static int gk20a_channel_poll_worker(void *arg)	1591	static int gk20a_channel_poll_worker(void *arg)
1592	{	1592	{
1593	struct gk20a g = (struct gk20a )arg;	1593	struct gk20a g = (struct gk20a )arg;
1594	struct gk20a_channel_worker *worker = &g->channel_worker;	1594	struct gk20a_worker *worker = &g->channel_worker;
1595	unsigned long watchdog_interval = 100; /* milliseconds */	1595	unsigned long watchdog_interval = 100; /* milliseconds */
1596	struct nvgpu_timeout timeout;	1596	struct nvgpu_timeout timeout;
1597	int get = 0;	1597	int get = 0;


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 75357a82..03cfe285 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1406,14 +1406,14 @@ struct gk20a {
1406	u32 ltc_count;	1406	u32 ltc_count;
1407	u32 ltc_streamid;	1407	u32 ltc_streamid;
1408		1408
1409	struct gk20a_channel_worker {	1409	struct gk20a_worker {
1410	struct nvgpu_thread poll_task;	1410	struct nvgpu_thread poll_task;
1411	nvgpu_atomic_t put;	1411	nvgpu_atomic_t put;
1412	struct nvgpu_cond wq;	1412	struct nvgpu_cond wq;
1413	struct nvgpu_list_node items;	1413	struct nvgpu_list_node items;
1414	struct nvgpu_spinlock items_lock;	1414	struct nvgpu_spinlock items_lock;
1415	struct nvgpu_mutex start_lock;	1415	struct nvgpu_mutex start_lock;
1416	} channel_worker;	1416	} channel_worker, clk_arb_worker;
1417		1417
1418	struct {	1418	struct {
1419	void (open)(struct channel_gk20a ch);	1419	void (open)(struct channel_gk20a ch);