2 files changed, 51 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index ea69d7cb..a0494e31 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1878,34 +1878,70 @@ static int gk20a_channel_poll_worker(void *arg)
        return 0;
 }
+static int __nvgpu_channel_worker_start(struct gk20a *g)
+{
+        char thread_name[64];
+        int err = 0;
+        if (nvgpu_thread_is_running(&g->channel_worker.poll_task))
+                return err;
+        nvgpu_mutex_acquire(&g->channel_worker.start_lock);
+        /*
+         * We don't want to grab a mutex on every channel update so we check
+         * again if the worker has been initialized before creating a new thread
+         */
+        /*
+         * Mutexes have implicit barriers, so there is no risk of a thread
+         * having a stale copy of the poll_task variable as the call to
+         * thread_is_running is volatile
+         */
+        if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) {
+                nvgpu_mutex_release(&g->channel_worker.start_lock);
+                return err;
+        }
+        snprintf(thread_name, sizeof(thread_name),
+                        "nvgpu_channel_poll_%s", g->name);
+        err = nvgpu_thread_create(&g->channel_worker.poll_task, g,
+                        gk20a_channel_poll_worker, thread_name);
+        nvgpu_mutex_release(&g->channel_worker.start_lock);
+        return err;
+}
 /**
 * Initialize the channel worker's metadata and start the background thread.
 */
 int nvgpu_channel_worker_init(struct gk20a *g)
 {
        int err;
-        char thread_name[64];
        nvgpu_atomic_set(&g->channel_worker.put, 0);
        nvgpu_cond_init(&g->channel_worker.wq);
        nvgpu_init_list_node(&g->channel_worker.items);
        nvgpu_spinlock_init(&g->channel_worker.items_lock);
-        snprintf(thread_name, sizeof(thread_name),
+        err = nvgpu_mutex_init(&g->channel_worker.start_lock);
-                        "nvgpu_channel_poll_%s", g->name);
+        if (err)
+                goto error_check;
-        err = nvgpu_thread_create(&g->channel_worker.poll_task, g,
+        err = __nvgpu_channel_worker_start(g);
-                        gk20a_channel_poll_worker, thread_name);
+error_check:
        if (err) {
                nvgpu_err(g, "failed to start channel poller thread");
                return err;
        }
        return 0;
 }
 void nvgpu_channel_worker_deinit(struct gk20a *g)
 {
+        nvgpu_mutex_acquire(&g->channel_worker.start_lock);
        nvgpu_thread_stop(&g->channel_worker.poll_task);
+        nvgpu_mutex_release(&g->channel_worker.start_lock);
 }
 /**
@@ -1924,6 +1960,14 @@ static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
        gk20a_dbg_fn("");
        /*
+         * Warn if worker thread cannot run
+         */
+        if (WARN_ON(__nvgpu_channel_worker_start(g))) {
+                nvgpu_warn(g, "channel worker cannot run!");
+                return;
+        }
+        /*
         * Ref released when this item gets processed. The caller should hold
         * one ref already, so normally shouldn't fail, but the channel could
         * end up being freed between the time the caller got its reference and
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 0cd77d1e..35d58ef1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1215,6 +1215,7 @@ struct gk20a {
                struct nvgpu_cond wq;
                struct nvgpu_list_node items;
                struct nvgpu_spinlock items_lock;
+                struct nvgpu_mutex start_lock;
        } channel_worker;
        struct gk20a_scale_profile *scale_profile;

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index ea69d7cb..a0494e31 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1878,34 +1878,70 @@ static int gk20a_channel_poll_worker(void *arg)
1878	return 0;	1878	return 0;
1879	}	1879	}
1880		1880
		1881	static int __nvgpu_channel_worker_start(struct gk20a *g)
		1882	{
		1883	char thread_name[64];
		1884	int err = 0;
		1885
		1886	if (nvgpu_thread_is_running(&g->channel_worker.poll_task))
		1887	return err;
		1888
		1889	nvgpu_mutex_acquire(&g->channel_worker.start_lock);
		1890
		1891	/*
		1892	* We don't want to grab a mutex on every channel update so we check
		1893	* again if the worker has been initialized before creating a new thread
		1894	*/
		1895
		1896	/*
		1897	* Mutexes have implicit barriers, so there is no risk of a thread
		1898	* having a stale copy of the poll_task variable as the call to
		1899	* thread_is_running is volatile
		1900	*/
		1901
		1902	if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) {
		1903	nvgpu_mutex_release(&g->channel_worker.start_lock);
		1904	return err;
		1905	}
		1906
		1907	snprintf(thread_name, sizeof(thread_name),
		1908	"nvgpu_channel_poll_%s", g->name);
		1909
		1910	err = nvgpu_thread_create(&g->channel_worker.poll_task, g,
		1911	gk20a_channel_poll_worker, thread_name);
		1912
		1913	nvgpu_mutex_release(&g->channel_worker.start_lock);
		1914	return err;
		1915	}
1881	/**	1916	/**
1882	* Initialize the channel worker's metadata and start the background thread.	1917	* Initialize the channel worker's metadata and start the background thread.
1883	*/	1918	*/
1884	int nvgpu_channel_worker_init(struct gk20a *g)	1919	int nvgpu_channel_worker_init(struct gk20a *g)
1885	{	1920	{
1886	int err;	1921	int err;
1887	char thread_name[64];
1888		1922
1889	nvgpu_atomic_set(&g->channel_worker.put, 0);	1923	nvgpu_atomic_set(&g->channel_worker.put, 0);
1890	nvgpu_cond_init(&g->channel_worker.wq);	1924	nvgpu_cond_init(&g->channel_worker.wq);
1891	nvgpu_init_list_node(&g->channel_worker.items);	1925	nvgpu_init_list_node(&g->channel_worker.items);
1892	nvgpu_spinlock_init(&g->channel_worker.items_lock);	1926	nvgpu_spinlock_init(&g->channel_worker.items_lock);
1893	snprintf(thread_name, sizeof(thread_name),	1927	err = nvgpu_mutex_init(&g->channel_worker.start_lock);
1894	"nvgpu_channel_poll_%s", g->name);	1928	if (err)
		1929	goto error_check;
1895		1930
1896	err = nvgpu_thread_create(&g->channel_worker.poll_task, g,	1931	err = __nvgpu_channel_worker_start(g);
1897	gk20a_channel_poll_worker, thread_name);	1932	error_check:
1898	if (err) {	1933	if (err) {
1899	nvgpu_err(g, "failed to start channel poller thread");	1934	nvgpu_err(g, "failed to start channel poller thread");
1900	return err;	1935	return err;
1901	}	1936	}
1902
1903	return 0;	1937	return 0;
1904	}	1938	}
1905		1939
1906	void nvgpu_channel_worker_deinit(struct gk20a *g)	1940	void nvgpu_channel_worker_deinit(struct gk20a *g)
1907	{	1941	{
		1942	nvgpu_mutex_acquire(&g->channel_worker.start_lock);
1908	nvgpu_thread_stop(&g->channel_worker.poll_task);	1943	nvgpu_thread_stop(&g->channel_worker.poll_task);
		1944	nvgpu_mutex_release(&g->channel_worker.start_lock);
1909	}	1945	}
1910		1946
1911	/**	1947	/**
@@ -1924,6 +1960,14 @@ static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
1924	gk20a_dbg_fn("");	1960	gk20a_dbg_fn("");
1925		1961
1926	/*	1962	/*
		1963	* Warn if worker thread cannot run
		1964	*/
		1965	if (WARN_ON(__nvgpu_channel_worker_start(g))) {
		1966	nvgpu_warn(g, "channel worker cannot run!");
		1967	return;
		1968	}
		1969
		1970	/*
1927	* Ref released when this item gets processed. The caller should hold	1971	* Ref released when this item gets processed. The caller should hold
1928	* one ref already, so normally shouldn't fail, but the channel could	1972	* one ref already, so normally shouldn't fail, but the channel could
1929	* end up being freed between the time the caller got its reference and	1973	* end up being freed between the time the caller got its reference and


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 0cd77d1e..35d58ef1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1215,6 +1215,7 @@ struct gk20a {
1215	struct nvgpu_cond wq;	1215	struct nvgpu_cond wq;
1216	struct nvgpu_list_node items;	1216	struct nvgpu_list_node items;
1217	struct nvgpu_spinlock items_lock;	1217	struct nvgpu_spinlock items_lock;
		1218	struct nvgpu_mutex start_lock;
1218	} channel_worker;	1219	} channel_worker;
1219		1220
1220	struct gk20a_scale_profile *scale_profile;	1221	struct gk20a_scale_profile *scale_profile;