diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-04-06 18:46:36 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 18:49:37 -0400 |
commit | 7eabc16b8488e20a6cbfe1a80dc99a0b046750eb (patch) | |
tree | 1adf2b98e4473d575ebce14bb45298eb9efc7e35 | |
parent | 90568a2ce58c03f457bdd4fab6675cd327ed13fd (diff) |
gpu: nvgpu: defer channel worker initialization
kthread_run can fail if SIGKILL is triggered on an application during
driver load.
On this change we defer the channel worker init to the enqueue to avoid
this condition during driver power on which would cause the driver state to be
corrupted leaving subsequent attempts to load the driver unsuccesful.
By moving this code to a later time, it is now needed to protect the task
structure with a mutex.
JIRA: EVLR-956
Bug 1816515
Change-Id: I3a159de2d1f03e70b2a3969730a927532ede2d6e
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1462490
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1460689
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 56 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 |
2 files changed, 51 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index ea69d7cb..a0494e31 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1878,34 +1878,70 @@ static int gk20a_channel_poll_worker(void *arg) | |||
1878 | return 0; | 1878 | return 0; |
1879 | } | 1879 | } |
1880 | 1880 | ||
1881 | static int __nvgpu_channel_worker_start(struct gk20a *g) | ||
1882 | { | ||
1883 | char thread_name[64]; | ||
1884 | int err = 0; | ||
1885 | |||
1886 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) | ||
1887 | return err; | ||
1888 | |||
1889 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1890 | |||
1891 | /* | ||
1892 | * We don't want to grab a mutex on every channel update so we check | ||
1893 | * again if the worker has been initialized before creating a new thread | ||
1894 | */ | ||
1895 | |||
1896 | /* | ||
1897 | * Mutexes have implicit barriers, so there is no risk of a thread | ||
1898 | * having a stale copy of the poll_task variable as the call to | ||
1899 | * thread_is_running is volatile | ||
1900 | */ | ||
1901 | |||
1902 | if (nvgpu_thread_is_running(&g->channel_worker.poll_task)) { | ||
1903 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1904 | return err; | ||
1905 | } | ||
1906 | |||
1907 | snprintf(thread_name, sizeof(thread_name), | ||
1908 | "nvgpu_channel_poll_%s", g->name); | ||
1909 | |||
1910 | err = nvgpu_thread_create(&g->channel_worker.poll_task, g, | ||
1911 | gk20a_channel_poll_worker, thread_name); | ||
1912 | |||
1913 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1914 | return err; | ||
1915 | } | ||
1881 | /** | 1916 | /** |
1882 | * Initialize the channel worker's metadata and start the background thread. | 1917 | * Initialize the channel worker's metadata and start the background thread. |
1883 | */ | 1918 | */ |
1884 | int nvgpu_channel_worker_init(struct gk20a *g) | 1919 | int nvgpu_channel_worker_init(struct gk20a *g) |
1885 | { | 1920 | { |
1886 | int err; | 1921 | int err; |
1887 | char thread_name[64]; | ||
1888 | 1922 | ||
1889 | nvgpu_atomic_set(&g->channel_worker.put, 0); | 1923 | nvgpu_atomic_set(&g->channel_worker.put, 0); |
1890 | nvgpu_cond_init(&g->channel_worker.wq); | 1924 | nvgpu_cond_init(&g->channel_worker.wq); |
1891 | nvgpu_init_list_node(&g->channel_worker.items); | 1925 | nvgpu_init_list_node(&g->channel_worker.items); |
1892 | nvgpu_spinlock_init(&g->channel_worker.items_lock); | 1926 | nvgpu_spinlock_init(&g->channel_worker.items_lock); |
1893 | snprintf(thread_name, sizeof(thread_name), | 1927 | err = nvgpu_mutex_init(&g->channel_worker.start_lock); |
1894 | "nvgpu_channel_poll_%s", g->name); | 1928 | if (err) |
1929 | goto error_check; | ||
1895 | 1930 | ||
1896 | err = nvgpu_thread_create(&g->channel_worker.poll_task, g, | 1931 | err = __nvgpu_channel_worker_start(g); |
1897 | gk20a_channel_poll_worker, thread_name); | 1932 | error_check: |
1898 | if (err) { | 1933 | if (err) { |
1899 | nvgpu_err(g, "failed to start channel poller thread"); | 1934 | nvgpu_err(g, "failed to start channel poller thread"); |
1900 | return err; | 1935 | return err; |
1901 | } | 1936 | } |
1902 | |||
1903 | return 0; | 1937 | return 0; |
1904 | } | 1938 | } |
1905 | 1939 | ||
1906 | void nvgpu_channel_worker_deinit(struct gk20a *g) | 1940 | void nvgpu_channel_worker_deinit(struct gk20a *g) |
1907 | { | 1941 | { |
1942 | nvgpu_mutex_acquire(&g->channel_worker.start_lock); | ||
1908 | nvgpu_thread_stop(&g->channel_worker.poll_task); | 1943 | nvgpu_thread_stop(&g->channel_worker.poll_task); |
1944 | nvgpu_mutex_release(&g->channel_worker.start_lock); | ||
1909 | } | 1945 | } |
1910 | 1946 | ||
1911 | /** | 1947 | /** |
@@ -1924,6 +1960,14 @@ static void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) | |||
1924 | gk20a_dbg_fn(""); | 1960 | gk20a_dbg_fn(""); |
1925 | 1961 | ||
1926 | /* | 1962 | /* |
1963 | * Warn if worker thread cannot run | ||
1964 | */ | ||
1965 | if (WARN_ON(__nvgpu_channel_worker_start(g))) { | ||
1966 | nvgpu_warn(g, "channel worker cannot run!"); | ||
1967 | return; | ||
1968 | } | ||
1969 | |||
1970 | /* | ||
1927 | * Ref released when this item gets processed. The caller should hold | 1971 | * Ref released when this item gets processed. The caller should hold |
1928 | * one ref already, so normally shouldn't fail, but the channel could | 1972 | * one ref already, so normally shouldn't fail, but the channel could |
1929 | * end up being freed between the time the caller got its reference and | 1973 | * end up being freed between the time the caller got its reference and |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 0cd77d1e..35d58ef1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1215,6 +1215,7 @@ struct gk20a { | |||
1215 | struct nvgpu_cond wq; | 1215 | struct nvgpu_cond wq; |
1216 | struct nvgpu_list_node items; | 1216 | struct nvgpu_list_node items; |
1217 | struct nvgpu_spinlock items_lock; | 1217 | struct nvgpu_spinlock items_lock; |
1218 | struct nvgpu_mutex start_lock; | ||
1218 | } channel_worker; | 1219 | } channel_worker; |
1219 | 1220 | ||
1220 | struct gk20a_scale_profile *scale_profile; | 1221 | struct gk20a_scale_profile *scale_profile; |