summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2017-02-01 03:28:38 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-02 20:51:03 -0500
commitf1072a28be09dc7f5433b5e1013a76d8a87c2505 (patch)
tree68d1a5b5123834859f8ae8c4481b886b49364811 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent0c155313e75a82a409d3438cc982ee30bb453d16 (diff)
gpu: nvgpu: add worker for watchdog and job cleanup
Implement a worker thread to replace the delayed works in channel watchdog and job cleanups. Watchdog runs by polling the channel states periodically, and job cleanup is performed on channels that are appended on a work queue consumed by the worker thread. Handling both of these two in the same thread makes it impossible for them to cause a deadlock, as has previously happened. The watchdog takes references to channels during checking and possibly recovering channels. Jobs in the cleanup queue have an additional reference taken which is released after the channel is processed. The worker is woken up from periodic sleep when channels are added to the queue. Currently, the queue is only used for job cleanups, but it is extendable for other per-channel works too. The worker can also process other periodic actions dependent on channels. Neither the semantics of timeout handling or of job cleanups are yet significantly changed - this patch only serializes them into one background thread. Each job that needs cleanup is tracked and holds a reference to its channel and a power reference, and timeouts can only be processed on channels that are tracked, so the thread will always be idle if the system is going to be suspended, so there is currently no need to explicitly suspend or stop it. Bug 1848834 Bug 1851689 Bug 1814773 Bug 200270332 Jira NVGPU-21 Change-Id: I355101802f50841ea9bd8042a017f91c931d2dc7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1297183 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c531
1 files changed, 414 insertions, 117 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index ef8a3e7d..6eb1cb06 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -20,6 +20,7 @@
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/highmem.h> /* need for nvmap.h*/ 22#include <linux/highmem.h> /* need for nvmap.h*/
23#include <linux/kthread.h>
23#include <trace/events/gk20a.h> 24#include <trace/events/gk20a.h>
24#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
25#include <linux/file.h> 26#include <linux/file.h>
@@ -91,8 +92,6 @@ static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
91 92
92static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, 93static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
93 bool clean_all); 94 bool clean_all);
94static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
95 bool wait_for_completion);
96 95
97/* allocate GPU channel */ 96/* allocate GPU channel */
98static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) 97static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
@@ -491,7 +490,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
491 bool released_job_semaphore = false; 490 bool released_job_semaphore = false;
492 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); 491 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
493 492
494 gk20a_channel_cancel_job_clean_up(ch, true); 493 /* synchronize with actual job cleanup */
494 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
495 495
496 /* ensure no fences are pending */ 496 /* ensure no fences are pending */
497 nvgpu_mutex_acquire(&ch->sync_lock); 497 nvgpu_mutex_acquire(&ch->sync_lock);
@@ -533,10 +533,16 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
533 } 533 }
534 channel_gk20a_joblist_unlock(ch); 534 channel_gk20a_joblist_unlock(ch);
535 535
536 nvgpu_mutex_release(&ch->joblist.cleanup_lock);
537
536 if (released_job_semaphore) 538 if (released_job_semaphore)
537 wake_up_interruptible_all(&ch->semaphore_wq); 539 wake_up_interruptible_all(&ch->semaphore_wq);
538 540
539 gk20a_channel_update(ch, 0); 541 /*
542 * When closing the channel, this scheduled update holds one ref which
543 * is waited for before advancing with freeing.
544 */
545 gk20a_channel_update(ch);
540} 546}
541 547
542void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) 548void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
@@ -1016,8 +1022,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
1016 ch->update_fn_data = NULL; 1022 ch->update_fn_data = NULL;
1017 nvgpu_spinlock_release(&ch->update_fn_lock); 1023 nvgpu_spinlock_release(&ch->update_fn_lock);
1018 cancel_work_sync(&ch->update_fn_work); 1024 cancel_work_sync(&ch->update_fn_work);
1019 cancel_delayed_work_sync(&ch->clean_up.wq);
1020 cancel_delayed_work_sync(&ch->timeout.wq);
1021 1025
1022 /* make sure we don't have deferred interrupts pending that 1026 /* make sure we don't have deferred interrupts pending that
1023 * could still touch the channel */ 1027 * could still touch the channel */
@@ -1345,7 +1349,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
1345 ch->has_timedout = false; 1349 ch->has_timedout = false;
1346 ch->wdt_enabled = true; 1350 ch->wdt_enabled = true;
1347 ch->obj_class = 0; 1351 ch->obj_class = 0;
1348 ch->clean_up.scheduled = false;
1349 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; 1352 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
1350 ch->timeslice_us = g->timeslice_low_priority_us; 1353 ch->timeslice_us = g->timeslice_low_priority_us;
1351 1354
@@ -2075,6 +2078,30 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2075 nvgpu_kfree(g); 2078 nvgpu_kfree(g);
2076} 2079}
2077 2080
2081static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
2082{
2083 ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
2084 ch->timeout.running = true;
2085 nvgpu_timeout_init(ch->g, &ch->timeout.timer,
2086 gk20a_get_channel_watchdog_timeout(ch),
2087 NVGPU_TIMER_CPU_TIMER);
2088}
2089
2090/**
2091 * Start a timeout counter (watchdog) on this channel.
2092 *
2093 * Trigger a watchdog to recover the channel after the per-platform timeout
2094 * duration (but strictly no earlier) if the channel hasn't advanced within
2095 * that time.
2096 *
2097 * If the timeout is already running, do nothing. This should be called when
2098 * new jobs are submitted. The timeout will stop when the last tracked job
2099 * finishes, making the channel idle.
2100 *
2101 * The channel's gpfifo read pointer will be used to determine if the job has
2102 * actually stuck at that time. After the timeout duration has expired, a
2103 * worker thread will consider the channel stuck and recover it if stuck.
2104 */
2078static void gk20a_channel_timeout_start(struct channel_gk20a *ch) 2105static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
2079{ 2106{
2080 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); 2107 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
@@ -2087,94 +2114,108 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
2087 2114
2088 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2115 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2089 2116
2090 if (ch->timeout.initialized) { 2117 if (ch->timeout.running) {
2091 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2118 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2092 return; 2119 return;
2093 } 2120 }
2094 2121 __gk20a_channel_timeout_start(ch);
2095 ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
2096 ch->timeout.initialized = true;
2097 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2122 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2098
2099 schedule_delayed_work(&ch->timeout.wq,
2100 msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
2101} 2123}
2102 2124
2103static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) 2125/**
2126 * Stop a running timeout counter (watchdog) on this channel.
2127 *
2128 * Make the watchdog consider the channel not running, so that it won't get
2129 * recovered even if no progress is detected. Progress is not tracked if the
2130 * watchdog is turned off.
2131 *
2132 * No guarantees are made about concurrent execution of the timeout handler.
2133 * (This should be called from an update handler running in the same thread
2134 * with the watchdog.)
2135 */
2136static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
2104{ 2137{
2138 bool was_running;
2139
2105 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2140 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2106 if (!ch->timeout.initialized) { 2141 was_running = ch->timeout.running;
2107 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2142 ch->timeout.running = false;
2108 return;
2109 }
2110 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2143 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2144 return was_running;
2145}
2111 2146
2112 cancel_delayed_work_sync(&ch->timeout.wq); 2147/**
2113 2148 * Continue a previously stopped timeout
2149 *
2150 * Enable the timeout again but don't reinitialize its timer.
2151 *
2152 * No guarantees are made about concurrent execution of the timeout handler.
2153 * (This should be called from an update handler running in the same thread
2154 * with the watchdog.)
2155 */
2156static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
2157{
2114 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2158 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2115 ch->timeout.initialized = false; 2159 ch->timeout.running = true;
2116 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2160 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2117} 2161}
2118 2162
2163/**
2164 * Rewind the timeout on each non-dormant channel.
2165 *
2166 * Reschedule the timeout of each active channel for which timeouts are running
2167 * as if something was happened on each channel right now. This should be
2168 * called when a global hang is detected that could cause a false positive on
2169 * other innocent channels.
2170 */
2119void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) 2171void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
2120{ 2172{
2121 u32 chid;
2122 struct fifo_gk20a *f = &g->fifo; 2173 struct fifo_gk20a *f = &g->fifo;
2174 u32 chid;
2123 2175
2124 for (chid = 0; chid < f->num_channels; chid++) { 2176 for (chid = 0; chid < f->num_channels; chid++) {
2125 struct channel_gk20a *ch = &f->channel[chid]; 2177 struct channel_gk20a *ch = &f->channel[chid];
2126 2178
2127 if (gk20a_channel_get(ch)) { 2179 if (!gk20a_channel_get(ch))
2128 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2180 continue;
2129 if (!ch->timeout.initialized) {
2130 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2131 gk20a_channel_put(ch);
2132 continue;
2133 }
2134 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2135 2181
2136 cancel_delayed_work_sync(&ch->timeout.wq); 2182 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2137 if (!ch->has_timedout) 2183 if (ch->timeout.running)
2138 schedule_delayed_work(&ch->timeout.wq, 2184 __gk20a_channel_timeout_start(ch);
2139 msecs_to_jiffies( 2185 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2140 gk20a_get_channel_watchdog_timeout(ch)));
2141 2186
2142 gk20a_channel_put(ch); 2187 gk20a_channel_put(ch);
2143 }
2144 } 2188 }
2145} 2189}
2146 2190
2147static void gk20a_channel_timeout_handler(struct work_struct *work) 2191/**
2192 * Check if a timed out channel has hung and recover it if it has.
2193 *
2194 * Test if this channel has really got stuck at this point (should be called
2195 * when the watchdog timer has expired) by checking if its gp_get has advanced
2196 * or not. If no gp_get action happened since when the watchdog was started,
2197 * force-reset the channel.
2198 *
2199 * The gpu is implicitly on at this point, because the watchdog can only run on
2200 * channels that have submitted jobs pending for cleanup.
2201 */
2202static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
2148{ 2203{
2204 struct gk20a *g = ch->g;
2149 u32 gp_get; 2205 u32 gp_get;
2150 struct gk20a *g;
2151 struct channel_gk20a *ch;
2152 2206
2153 ch = container_of(to_delayed_work(work), struct channel_gk20a, 2207 gk20a_dbg_fn("");
2154 timeout.wq);
2155 ch = gk20a_channel_get(ch);
2156 if (!ch)
2157 return;
2158
2159 g = ch->g;
2160
2161 if (gk20a_busy(dev_from_gk20a(g))) {
2162 gk20a_channel_put(ch);
2163 return;
2164 }
2165
2166 /* Need global lock since multiple channels can timeout at a time */
2167 nvgpu_mutex_acquire(&g->ch_wdt_lock);
2168 2208
2169 /* Get timed out job and reset the timer */ 2209 /* Get status and clear the timer */
2170 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2210 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2171 gp_get = ch->timeout.gp_get; 2211 gp_get = ch->timeout.gp_get;
2172 ch->timeout.initialized = false; 2212 ch->timeout.running = false;
2173 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2213 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2174 2214
2175 if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { 2215 if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
2216 /* Channel has advanced, reschedule */
2176 gk20a_channel_timeout_start(ch); 2217 gk20a_channel_timeout_start(ch);
2177 goto fail_unlock; 2218 return;
2178 } 2219 }
2179 2220
2180 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", 2221 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
@@ -2185,11 +2226,262 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
2185 2226
2186 g->ops.fifo.force_reset_ch(ch, 2227 g->ops.fifo.force_reset_ch(ch,
2187 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); 2228 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
2229}
2230
2231/**
2232 * Test if the per-channel timeout is expired and handle the timeout in that case.
2233 *
2234 * Each channel has an expiration time based watchdog. The timer is
2235 * (re)initialized in two situations: when a new job is submitted on an idle
2236 * channel and when the timeout is checked but progress is detected.
2237 *
2238 * Watchdog timeout does not yet necessarily mean a stuck channel so this may
2239 * or may not cause recovery.
2240 *
2241 * The timeout is stopped (disabled) after the last job in a row finishes
2242 * making the channel idle.
2243 */
2244static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
2245{
2246 bool timed_out;
2188 2247
2189fail_unlock: 2248 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2190 nvgpu_mutex_release(&g->ch_wdt_lock); 2249 timed_out = ch->timeout.running &&
2250 nvgpu_timeout_expired(&ch->timeout.timer);
2251 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2252
2253 if (timed_out)
2254 gk20a_channel_timeout_handler(ch);
2255}
2256
2257/**
2258 * Loop every living channel, check timeouts and handle stuck channels.
2259 */
2260static void gk20a_channel_poll_timeouts(struct gk20a *g)
2261{
2262 unsigned int chid;
2263
2264 gk20a_dbg_fn("");
2265
2266 for (chid = 0; chid < g->fifo.num_channels; chid++) {
2267 struct channel_gk20a *ch = &g->fifo.channel[chid];
2268
2269 if (gk20a_channel_get(ch)) {
2270 gk20a_channel_timeout_check(ch);
2271 gk20a_channel_put(ch);
2272 }
2273 }
2274}
2275
2276/*
2277 * Process one scheduled work item for this channel. Currently, the only thing
2278 * the worker does is job cleanup handling.
2279 */
2280static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
2281{
2282 gk20a_dbg_fn("");
2283
2284 gk20a_channel_clean_up_jobs(ch, true);
2285
2286 /* ref taken when enqueued */
2191 gk20a_channel_put(ch); 2287 gk20a_channel_put(ch);
2192 gk20a_idle(dev_from_gk20a(g)); 2288}
2289
2290/**
2291 * Tell the worker that one more work needs to be done.
2292 *
2293 * Increase the work counter to synchronize the worker with the new work. Wake
2294 * up the worker. If the worker was already running, it will handle this work
2295 * before going to sleep.
2296 */
2297static int __gk20a_channel_worker_wakeup(struct gk20a *g)
2298{
2299 int put;
2300
2301 gk20a_dbg_fn("");
2302
2303 /*
2304 * Currently, the only work type is associated with a lock, which deals
2305 * with any necessary barriers. If a work type with no locking were
2306 * added, a a wmb() would be needed here. See ..worker_pending() for a
2307 * pair.
2308 */
2309
2310 put = atomic_inc_return(&g->channel_worker.put);
2311 wake_up(&g->channel_worker.wq);
2312
2313 return put;
2314}
2315
2316/**
2317 * Test if there is some work pending.
2318 *
2319 * This is a pair for __gk20a_channel_worker_wakeup to be called from the
2320 * worker. The worker has an internal work counter which is incremented once
2321 * per finished work item. This is compared with the number of queued jobs,
2322 * which may be channels on the items list or any other types of work.
2323 */
2324static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
2325{
2326 bool pending = atomic_read(&g->channel_worker.put) != get;
2327
2328 /*
2329 * This would be the place for a rmb() pairing a wmb() for a wakeup
2330 * if we had any work with no implicit barriers caused by locking.
2331 */
2332
2333 return pending;
2334}
2335
2336/**
2337 * Process the queued works for the worker thread serially.
2338 *
2339 * Flush all the work items in the queue one by one. This may block timeout
2340 * handling for a short while, as these are serialized.
2341 */
2342static void gk20a_channel_worker_process(struct gk20a *g, int *get)
2343{
2344 gk20a_dbg_fn("");
2345
2346 while (__gk20a_channel_worker_pending(g, *get)) {
2347 struct channel_gk20a *ch;
2348
2349 /*
2350 * If a channel is on the list, it's guaranteed to be handled
2351 * eventually just once. However, the opposite is not true. A
2352 * channel may be being processed if it's on the list or not.
2353 *
2354 * With this, processing channel works should be conservative
2355 * as follows: it's always safe to look at a channel found in
2356 * the list, and if someone enqueues the channel, it will be
2357 * handled eventually, even if it's being handled at the same
2358 * time. A channel is on the list only once; multiple calls to
2359 * enqueue are harmless.
2360 */
2361 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
2362 ch = list_first_entry_or_null(&g->channel_worker.items,
2363 struct channel_gk20a,
2364 worker_item);
2365 if (ch)
2366 list_del_init(&ch->worker_item);
2367 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2368
2369 if (!ch) {
2370 /*
2371 * Woke up for some other reason, but there are no
2372 * other reasons than a channel added in the items list
2373 * currently, so warn and ack the message.
2374 */
2375 gk20a_warn(g->dev, "Spurious worker event!");
2376 ++*get;
2377 break;
2378 }
2379
2380 gk20a_channel_worker_process_ch(ch);
2381 ++*get;
2382 }
2383}
2384
2385/*
2386 * Look at channel states periodically, until canceled. Abort timed out
2387 * channels serially. Process all work items found in the queue.
2388 */
2389static int gk20a_channel_poll_worker(void *arg)
2390{
2391 struct gk20a *g = (struct gk20a *)arg;
2392 struct gk20a_channel_worker *worker = &g->channel_worker;
2393 unsigned long start_wait;
2394 /* event timeout for also polling the watchdog */
2395 unsigned long timeout = msecs_to_jiffies(100);
2396 int get = 0;
2397
2398 gk20a_dbg_fn("");
2399
2400 start_wait = jiffies;
2401 while (!kthread_should_stop()) {
2402 bool got_events;
2403
2404 got_events = wait_event_timeout(
2405 worker->wq,
2406 __gk20a_channel_worker_pending(g, get),
2407 timeout) > 0;
2408
2409 if (got_events)
2410 gk20a_channel_worker_process(g, &get);
2411
2412 if (jiffies - start_wait >= timeout) {
2413 gk20a_channel_poll_timeouts(g);
2414 start_wait = jiffies;
2415 }
2416 }
2417 return 0;
2418}
2419
2420/**
2421 * Initialize the channel worker's metadata and start the background thread.
2422 */
2423int nvgpu_channel_worker_init(struct gk20a *g)
2424{
2425 struct task_struct *task;
2426
2427 atomic_set(&g->channel_worker.put, 0);
2428 init_waitqueue_head(&g->channel_worker.wq);
2429 INIT_LIST_HEAD(&g->channel_worker.items);
2430 nvgpu_spinlock_init(&g->channel_worker.items_lock);
2431 task = kthread_run(gk20a_channel_poll_worker, g,
2432 "nvgpu_channel_poll_%s", dev_name(g->dev));
2433 if (IS_ERR(task)) {
2434 gk20a_err(g->dev, "failed to start channel poller thread");
2435 return PTR_ERR(task);
2436 }
2437 g->channel_worker.poll_task = task;
2438
2439 return 0;
2440}
2441
2442void nvgpu_channel_worker_deinit(struct gk20a *g)
2443{
2444 kthread_stop(g->channel_worker.poll_task);
2445}
2446
2447/**
2448 * Append a channel to the worker's list, if not there already.
2449 *
2450 * The worker thread processes work items (channels in its work list) and polls
2451 * for other things. This adds @ch to the end of the list and wakes the worker
2452 * up immediately. If the channel already existed in the list, it's not added,
2453 * because in that case it has been scheduled already but has not yet been
2454 * processed.
2455 */
2456void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
2457{
2458 struct gk20a *g = ch->g;
2459
2460 gk20a_dbg_fn("");
2461
2462 /*
2463 * Ref released when this item gets processed. The caller should hold
2464 * one ref already, so can't fail.
2465 */
2466 if (WARN_ON(!gk20a_channel_get(ch))) {
2467 gk20a_warn(g->dev, "cannot get ch ref for worker!");
2468 return;
2469 }
2470
2471 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
2472 if (!list_empty(&ch->worker_item)) {
2473 /*
2474 * Already queued, so will get processed eventually.
2475 * The worker is probably awake already.
2476 */
2477 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2478 gk20a_channel_put(ch);
2479 return;
2480 }
2481 list_add_tail(&ch->worker_item, &g->channel_worker.items);
2482 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2483
2484 __gk20a_channel_worker_wakeup(g);
2193} 2485}
2194 2486
2195int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) 2487int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
@@ -2214,32 +2506,6 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
2214 return 0; 2506 return 0;
2215} 2507}
2216 2508
2217static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
2218{
2219 nvgpu_mutex_acquire(&c->clean_up.lock);
2220
2221 if (c->clean_up.scheduled) {
2222 nvgpu_mutex_release(&c->clean_up.lock);
2223 return;
2224 }
2225
2226 c->clean_up.scheduled = true;
2227 schedule_delayed_work(&c->clean_up.wq, 1);
2228
2229 nvgpu_mutex_release(&c->clean_up.lock);
2230}
2231
2232static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
2233 bool wait_for_completion)
2234{
2235 if (wait_for_completion)
2236 cancel_delayed_work_sync(&c->clean_up.wq);
2237
2238 nvgpu_mutex_acquire(&c->clean_up.lock);
2239 c->clean_up.scheduled = false;
2240 nvgpu_mutex_release(&c->clean_up.lock);
2241}
2242
2243static int gk20a_channel_add_job(struct channel_gk20a *c, 2509static int gk20a_channel_add_job(struct channel_gk20a *c,
2244 struct channel_gk20a_job *job, 2510 struct channel_gk20a_job *job,
2245 bool skip_buffer_refcounting) 2511 bool skip_buffer_refcounting)
@@ -2256,7 +2522,10 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
2256 return err; 2522 return err;
2257 } 2523 }
2258 2524
2259 /* put() is done in gk20a_channel_update() when the job is done */ 2525 /*
2526 * Ref to hold the channel open during the job lifetime. This is
2527 * released by job cleanup launched via syncpt or sema interrupt.
2528 */
2260 c = gk20a_channel_get(c); 2529 c = gk20a_channel_get(c);
2261 2530
2262 if (c) { 2531 if (c) {
@@ -2291,14 +2560,16 @@ err_put_buffers:
2291 return err; 2560 return err;
2292} 2561}
2293 2562
2294static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work) 2563/**
2295{ 2564 * Clean up job resources for further jobs to use.
2296 struct channel_gk20a *c = container_of(to_delayed_work(work), 2565 * @clean_all: If true, process as many jobs as possible, otherwise just one.
2297 struct channel_gk20a, clean_up.wq); 2566 *
2298 2567 * Loop all jobs from the joblist until a pending job is found, or just one if
2299 gk20a_channel_clean_up_jobs(c, true); 2568 * clean_all is not set. Pending jobs are detected from the job's post fence,
2300} 2569 * so this is only done for jobs that have job tracking resources. Free all
2301 2570 * per-job memory for completed jobs; in case of preallocated resources, this
2571 * opens up slots for new jobs to be submitted.
2572 */
2302static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, 2573static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2303 bool clean_all) 2574 bool clean_all)
2304{ 2575{
@@ -2307,6 +2578,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2307 struct gk20a_platform *platform; 2578 struct gk20a_platform *platform;
2308 struct gk20a *g; 2579 struct gk20a *g;
2309 int job_finished = 0; 2580 int job_finished = 0;
2581 bool watchdog_on = false;
2310 2582
2311 c = gk20a_channel_get(c); 2583 c = gk20a_channel_get(c);
2312 if (!c) 2584 if (!c)
@@ -2321,13 +2593,25 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2321 g = c->g; 2593 g = c->g;
2322 platform = gk20a_get_platform(g->dev); 2594 platform = gk20a_get_platform(g->dev);
2323 2595
2324 gk20a_channel_cancel_job_clean_up(c, false); 2596 /*
2597 * If !clean_all, we're in a condition where watchdog isn't supported
2598 * anyway (this would be a no-op).
2599 */
2600 if (clean_all)
2601 watchdog_on = gk20a_channel_timeout_stop(c);
2602
2603 /* Synchronize with abort cleanup that needs the jobs. */
2604 nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
2325 2605
2326 while (1) { 2606 while (1) {
2327 bool completed; 2607 bool completed;
2328 2608
2329 channel_gk20a_joblist_lock(c); 2609 channel_gk20a_joblist_lock(c);
2330 if (channel_gk20a_joblist_is_empty(c)) { 2610 if (channel_gk20a_joblist_is_empty(c)) {
2611 /*
2612 * No jobs in flight, timeout will remain stopped until
2613 * new jobs are submitted.
2614 */
2331 channel_gk20a_joblist_unlock(c); 2615 channel_gk20a_joblist_unlock(c);
2332 break; 2616 break;
2333 } 2617 }
@@ -2343,7 +2627,15 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2343 2627
2344 completed = gk20a_fence_is_expired(job->post_fence); 2628 completed = gk20a_fence_is_expired(job->post_fence);
2345 if (!completed) { 2629 if (!completed) {
2346 gk20a_channel_timeout_start(c); 2630 /*
2631 * The watchdog eventually sees an updated gp_get if
2632 * something happened in this loop. A new job can have
2633 * been submitted between the above call to stop and
2634 * this - in that case, this is a no-op and the new
2635 * later timeout is still used.
2636 */
2637 if (clean_all && watchdog_on)
2638 gk20a_channel_timeout_continue(c);
2347 break; 2639 break;
2348 } 2640 }
2349 2641
@@ -2394,32 +2686,38 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2394 job_finished = 1; 2686 job_finished = 1;
2395 gk20a_idle(g->dev); 2687 gk20a_idle(g->dev);
2396 2688
2397 if (!clean_all) 2689 if (!clean_all) {
2690 /* Timeout isn't supported here so don't touch it. */
2398 break; 2691 break;
2692 }
2399 } 2693 }
2400 2694
2695 nvgpu_mutex_release(&c->joblist.cleanup_lock);
2696
2401 if (job_finished && c->update_fn) 2697 if (job_finished && c->update_fn)
2402 schedule_work(&c->update_fn_work); 2698 schedule_work(&c->update_fn_work);
2403 2699
2404 gk20a_channel_put(c); 2700 gk20a_channel_put(c);
2405} 2701}
2406 2702
2407void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) 2703/**
2704 * Schedule a job cleanup work on this channel to free resources and to signal
2705 * about completion.
2706 *
2707 * Call this when there has been an interrupt about finished jobs, or when job
2708 * cleanup needs to be performed, e.g., when closing a channel. This is always
2709 * safe to call even if there is nothing to clean up. Any visible actions on
2710 * jobs just before calling this are guaranteed to be processed.
2711 */
2712void gk20a_channel_update(struct channel_gk20a *c)
2408{ 2713{
2409 c = gk20a_channel_get(c);
2410 if (!c)
2411 return;
2412
2413 if (!c->g->power_on) { /* shutdown case */ 2714 if (!c->g->power_on) { /* shutdown case */
2414 gk20a_channel_put(c);
2415 return; 2715 return;
2416 } 2716 }
2417 2717
2418 trace_gk20a_channel_update(c->hw_chid); 2718 trace_gk20a_channel_update(c->hw_chid);
2419 gk20a_channel_timeout_stop(c); 2719 /* A queued channel is always checked for job cleanup. */
2420 gk20a_channel_schedule_job_clean_up(c); 2720 gk20a_channel_worker_enqueue(c);
2421
2422 gk20a_channel_put(c);
2423} 2721}
2424 2722
2425static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, 2723static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
@@ -2809,7 +3107,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2809 if (c->deterministic && need_deferred_cleanup) 3107 if (c->deterministic && need_deferred_cleanup)
2810 return -EINVAL; 3108 return -EINVAL;
2811 3109
2812 /* gk20a_channel_update releases this ref. */ 3110 /* released by job cleanup via syncpt or sema interrupt */
2813 err = gk20a_busy(g->dev); 3111 err = gk20a_busy(g->dev);
2814 if (err) { 3112 if (err) {
2815 gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s", 3113 gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s",
@@ -2929,13 +3227,12 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2929#endif 3227#endif
2930 nvgpu_mutex_init(&c->ioctl_lock); 3228 nvgpu_mutex_init(&c->ioctl_lock);
2931 nvgpu_mutex_init(&c->error_notifier_mutex); 3229 nvgpu_mutex_init(&c->error_notifier_mutex);
3230 nvgpu_mutex_init(&c->joblist.cleanup_lock);
2932 nvgpu_spinlock_init(&c->joblist.dynamic.lock); 3231 nvgpu_spinlock_init(&c->joblist.dynamic.lock);
2933 nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); 3232 nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
2934 nvgpu_raw_spinlock_init(&c->timeout.lock); 3233 nvgpu_raw_spinlock_init(&c->timeout.lock);
2935 nvgpu_mutex_init(&c->sync_lock); 3234 nvgpu_mutex_init(&c->sync_lock);
2936 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); 3235
2937 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
2938 nvgpu_mutex_init(&c->clean_up.lock);
2939 INIT_LIST_HEAD(&c->joblist.dynamic.jobs); 3236 INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
2940#if defined(CONFIG_GK20A_CYCLE_STATS) 3237#if defined(CONFIG_GK20A_CYCLE_STATS)
2941 nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); 3238 nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
@@ -2947,6 +3244,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2947 nvgpu_mutex_init(&c->dbg_s_lock); 3244 nvgpu_mutex_init(&c->dbg_s_lock);
2948 list_add(&c->free_chs, &g->fifo.free_chs); 3245 list_add(&c->free_chs, &g->fifo.free_chs);
2949 3246
3247 INIT_LIST_HEAD(&c->worker_item);
3248
2950 return 0; 3249 return 0;
2951} 3250}
2952 3251
@@ -3384,8 +3683,6 @@ int gk20a_channel_suspend(struct gk20a *g)
3384 gk20a_disable_channel_tsg(g, ch); 3683 gk20a_disable_channel_tsg(g, ch);
3385 /* preempt the channel */ 3684 /* preempt the channel */
3386 gk20a_fifo_preempt(g, ch); 3685 gk20a_fifo_preempt(g, ch);
3387 gk20a_channel_timeout_stop(ch);
3388 gk20a_channel_cancel_job_clean_up(ch, true);
3389 /* wait for channel update notifiers */ 3686 /* wait for channel update notifiers */
3390 if (ch->update_fn) 3687 if (ch->update_fn)
3391 cancel_work_sync(&ch->update_fn_work); 3688 cancel_work_sync(&ch->update_fn_work);
@@ -3481,7 +3778,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
3481 * semaphore. 3778 * semaphore.
3482 */ 3779 */
3483 if (!c->deterministic) 3780 if (!c->deterministic)
3484 gk20a_channel_update(c, 0); 3781 gk20a_channel_update(c);
3485 } 3782 }
3486 gk20a_channel_put(c); 3783 gk20a_channel_put(c);
3487 } 3784 }