summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c531
1 files changed, 414 insertions, 117 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index ef8a3e7d..6eb1cb06 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -20,6 +20,7 @@
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/highmem.h> /* need for nvmap.h*/ 22#include <linux/highmem.h> /* need for nvmap.h*/
23#include <linux/kthread.h>
23#include <trace/events/gk20a.h> 24#include <trace/events/gk20a.h>
24#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
25#include <linux/file.h> 26#include <linux/file.h>
@@ -91,8 +92,6 @@ static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
91 92
92static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, 93static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
93 bool clean_all); 94 bool clean_all);
94static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
95 bool wait_for_completion);
96 95
97/* allocate GPU channel */ 96/* allocate GPU channel */
98static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) 97static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
@@ -491,7 +490,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
491 bool released_job_semaphore = false; 490 bool released_job_semaphore = false;
492 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); 491 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
493 492
494 gk20a_channel_cancel_job_clean_up(ch, true); 493 /* synchronize with actual job cleanup */
494 nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
495 495
496 /* ensure no fences are pending */ 496 /* ensure no fences are pending */
497 nvgpu_mutex_acquire(&ch->sync_lock); 497 nvgpu_mutex_acquire(&ch->sync_lock);
@@ -533,10 +533,16 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
533 } 533 }
534 channel_gk20a_joblist_unlock(ch); 534 channel_gk20a_joblist_unlock(ch);
535 535
536 nvgpu_mutex_release(&ch->joblist.cleanup_lock);
537
536 if (released_job_semaphore) 538 if (released_job_semaphore)
537 wake_up_interruptible_all(&ch->semaphore_wq); 539 wake_up_interruptible_all(&ch->semaphore_wq);
538 540
539 gk20a_channel_update(ch, 0); 541 /*
542 * When closing the channel, this scheduled update holds one ref which
543 * is waited for before advancing with freeing.
544 */
545 gk20a_channel_update(ch);
540} 546}
541 547
542void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) 548void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
@@ -1016,8 +1022,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
1016 ch->update_fn_data = NULL; 1022 ch->update_fn_data = NULL;
1017 nvgpu_spinlock_release(&ch->update_fn_lock); 1023 nvgpu_spinlock_release(&ch->update_fn_lock);
1018 cancel_work_sync(&ch->update_fn_work); 1024 cancel_work_sync(&ch->update_fn_work);
1019 cancel_delayed_work_sync(&ch->clean_up.wq);
1020 cancel_delayed_work_sync(&ch->timeout.wq);
1021 1025
1022 /* make sure we don't have deferred interrupts pending that 1026 /* make sure we don't have deferred interrupts pending that
1023 * could still touch the channel */ 1027 * could still touch the channel */
@@ -1345,7 +1349,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
1345 ch->has_timedout = false; 1349 ch->has_timedout = false;
1346 ch->wdt_enabled = true; 1350 ch->wdt_enabled = true;
1347 ch->obj_class = 0; 1351 ch->obj_class = 0;
1348 ch->clean_up.scheduled = false;
1349 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; 1352 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
1350 ch->timeslice_us = g->timeslice_low_priority_us; 1353 ch->timeslice_us = g->timeslice_low_priority_us;
1351 1354
@@ -2075,6 +2078,30 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2075 nvgpu_kfree(g); 2078 nvgpu_kfree(g);
2076} 2079}
2077 2080
2081static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
2082{
2083 ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
2084 ch->timeout.running = true;
2085 nvgpu_timeout_init(ch->g, &ch->timeout.timer,
2086 gk20a_get_channel_watchdog_timeout(ch),
2087 NVGPU_TIMER_CPU_TIMER);
2088}
2089
2090/**
2091 * Start a timeout counter (watchdog) on this channel.
2092 *
2093 * Trigger a watchdog to recover the channel after the per-platform timeout
2094 * duration (but strictly no earlier) if the channel hasn't advanced within
2095 * that time.
2096 *
2097 * If the timeout is already running, do nothing. This should be called when
2098 * new jobs are submitted. The timeout will stop when the last tracked job
2099 * finishes, making the channel idle.
2100 *
2101 * The channel's gpfifo read pointer will be used to determine if the job has
2102 * actually stuck at that time. After the timeout duration has expired, a
2103 * worker thread will consider the channel stuck and recover it if stuck.
2104 */
2078static void gk20a_channel_timeout_start(struct channel_gk20a *ch) 2105static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
2079{ 2106{
2080 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); 2107 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
@@ -2087,94 +2114,108 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
2087 2114
2088 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2115 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2089 2116
2090 if (ch->timeout.initialized) { 2117 if (ch->timeout.running) {
2091 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2118 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2092 return; 2119 return;
2093 } 2120 }
2094 2121 __gk20a_channel_timeout_start(ch);
2095 ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
2096 ch->timeout.initialized = true;
2097 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2122 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2098
2099 schedule_delayed_work(&ch->timeout.wq,
2100 msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
2101} 2123}
2102 2124
2103static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) 2125/**
2126 * Stop a running timeout counter (watchdog) on this channel.
2127 *
2128 * Make the watchdog consider the channel not running, so that it won't get
2129 * recovered even if no progress is detected. Progress is not tracked if the
2130 * watchdog is turned off.
2131 *
2132 * No guarantees are made about concurrent execution of the timeout handler.
2133 * (This should be called from an update handler running in the same thread
2134 * with the watchdog.)
2135 */
2136static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
2104{ 2137{
2138 bool was_running;
2139
2105 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2140 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2106 if (!ch->timeout.initialized) { 2141 was_running = ch->timeout.running;
2107 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2142 ch->timeout.running = false;
2108 return;
2109 }
2110 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2143 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2144 return was_running;
2145}
2111 2146
2112 cancel_delayed_work_sync(&ch->timeout.wq); 2147/**
2113 2148 * Continue a previously stopped timeout
2149 *
2150 * Enable the timeout again but don't reinitialize its timer.
2151 *
2152 * No guarantees are made about concurrent execution of the timeout handler.
2153 * (This should be called from an update handler running in the same thread
2154 * with the watchdog.)
2155 */
2156static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
2157{
2114 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2158 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2115 ch->timeout.initialized = false; 2159 ch->timeout.running = true;
2116 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2160 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2117} 2161}
2118 2162
2163/**
2164 * Rewind the timeout on each non-dormant channel.
2165 *
2166 * Reschedule the timeout of each active channel for which timeouts are running
2167 * as if something was happened on each channel right now. This should be
2168 * called when a global hang is detected that could cause a false positive on
2169 * other innocent channels.
2170 */
2119void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) 2171void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
2120{ 2172{
2121 u32 chid;
2122 struct fifo_gk20a *f = &g->fifo; 2173 struct fifo_gk20a *f = &g->fifo;
2174 u32 chid;
2123 2175
2124 for (chid = 0; chid < f->num_channels; chid++) { 2176 for (chid = 0; chid < f->num_channels; chid++) {
2125 struct channel_gk20a *ch = &f->channel[chid]; 2177 struct channel_gk20a *ch = &f->channel[chid];
2126 2178
2127 if (gk20a_channel_get(ch)) { 2179 if (!gk20a_channel_get(ch))
2128 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2180 continue;
2129 if (!ch->timeout.initialized) {
2130 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2131 gk20a_channel_put(ch);
2132 continue;
2133 }
2134 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2135 2181
2136 cancel_delayed_work_sync(&ch->timeout.wq); 2182 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2137 if (!ch->has_timedout) 2183 if (ch->timeout.running)
2138 schedule_delayed_work(&ch->timeout.wq, 2184 __gk20a_channel_timeout_start(ch);
2139 msecs_to_jiffies( 2185 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2140 gk20a_get_channel_watchdog_timeout(ch)));
2141 2186
2142 gk20a_channel_put(ch); 2187 gk20a_channel_put(ch);
2143 }
2144 } 2188 }
2145} 2189}
2146 2190
2147static void gk20a_channel_timeout_handler(struct work_struct *work) 2191/**
2192 * Check if a timed out channel has hung and recover it if it has.
2193 *
2194 * Test if this channel has really got stuck at this point (should be called
2195 * when the watchdog timer has expired) by checking if its gp_get has advanced
2196 * or not. If no gp_get action happened since when the watchdog was started,
2197 * force-reset the channel.
2198 *
2199 * The gpu is implicitly on at this point, because the watchdog can only run on
2200 * channels that have submitted jobs pending for cleanup.
2201 */
2202static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
2148{ 2203{
2204 struct gk20a *g = ch->g;
2149 u32 gp_get; 2205 u32 gp_get;
2150 struct gk20a *g;
2151 struct channel_gk20a *ch;
2152 2206
2153 ch = container_of(to_delayed_work(work), struct channel_gk20a, 2207 gk20a_dbg_fn("");
2154 timeout.wq);
2155 ch = gk20a_channel_get(ch);
2156 if (!ch)
2157 return;
2158
2159 g = ch->g;
2160
2161 if (gk20a_busy(dev_from_gk20a(g))) {
2162 gk20a_channel_put(ch);
2163 return;
2164 }
2165
2166 /* Need global lock since multiple channels can timeout at a time */
2167 nvgpu_mutex_acquire(&g->ch_wdt_lock);
2168 2208
2169 /* Get timed out job and reset the timer */ 2209 /* Get status and clear the timer */
2170 nvgpu_raw_spinlock_acquire(&ch->timeout.lock); 2210 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2171 gp_get = ch->timeout.gp_get; 2211 gp_get = ch->timeout.gp_get;
2172 ch->timeout.initialized = false; 2212 ch->timeout.running = false;
2173 nvgpu_raw_spinlock_release(&ch->timeout.lock); 2213 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2174 2214
2175 if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { 2215 if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
2216 /* Channel has advanced, reschedule */
2176 gk20a_channel_timeout_start(ch); 2217 gk20a_channel_timeout_start(ch);
2177 goto fail_unlock; 2218 return;
2178 } 2219 }
2179 2220
2180 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", 2221 gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
@@ -2185,11 +2226,262 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
2185 2226
2186 g->ops.fifo.force_reset_ch(ch, 2227 g->ops.fifo.force_reset_ch(ch,
2187 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); 2228 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
2229}
2230
2231/**
2232 * Test if the per-channel timeout is expired and handle the timeout in that case.
2233 *
2234 * Each channel has an expiration time based watchdog. The timer is
2235 * (re)initialized in two situations: when a new job is submitted on an idle
2236 * channel and when the timeout is checked but progress is detected.
2237 *
2238 * Watchdog timeout does not yet necessarily mean a stuck channel so this may
2239 * or may not cause recovery.
2240 *
2241 * The timeout is stopped (disabled) after the last job in a row finishes
2242 * making the channel idle.
2243 */
2244static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
2245{
2246 bool timed_out;
2188 2247
2189fail_unlock: 2248 nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
2190 nvgpu_mutex_release(&g->ch_wdt_lock); 2249 timed_out = ch->timeout.running &&
2250 nvgpu_timeout_expired(&ch->timeout.timer);
2251 nvgpu_raw_spinlock_release(&ch->timeout.lock);
2252
2253 if (timed_out)
2254 gk20a_channel_timeout_handler(ch);
2255}
2256
2257/**
2258 * Loop every living channel, check timeouts and handle stuck channels.
2259 */
2260static void gk20a_channel_poll_timeouts(struct gk20a *g)
2261{
2262 unsigned int chid;
2263
2264 gk20a_dbg_fn("");
2265
2266 for (chid = 0; chid < g->fifo.num_channels; chid++) {
2267 struct channel_gk20a *ch = &g->fifo.channel[chid];
2268
2269 if (gk20a_channel_get(ch)) {
2270 gk20a_channel_timeout_check(ch);
2271 gk20a_channel_put(ch);
2272 }
2273 }
2274}
2275
2276/*
2277 * Process one scheduled work item for this channel. Currently, the only thing
2278 * the worker does is job cleanup handling.
2279 */
2280static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
2281{
2282 gk20a_dbg_fn("");
2283
2284 gk20a_channel_clean_up_jobs(ch, true);
2285
2286 /* ref taken when enqueued */
2191 gk20a_channel_put(ch); 2287 gk20a_channel_put(ch);
2192 gk20a_idle(dev_from_gk20a(g)); 2288}
2289
2290/**
2291 * Tell the worker that one more work needs to be done.
2292 *
2293 * Increase the work counter to synchronize the worker with the new work. Wake
2294 * up the worker. If the worker was already running, it will handle this work
2295 * before going to sleep.
2296 */
2297static int __gk20a_channel_worker_wakeup(struct gk20a *g)
2298{
2299 int put;
2300
2301 gk20a_dbg_fn("");
2302
2303 /*
2304 * Currently, the only work type is associated with a lock, which deals
2305 * with any necessary barriers. If a work type with no locking were
2306 * added, a a wmb() would be needed here. See ..worker_pending() for a
2307 * pair.
2308 */
2309
2310 put = atomic_inc_return(&g->channel_worker.put);
2311 wake_up(&g->channel_worker.wq);
2312
2313 return put;
2314}
2315
2316/**
2317 * Test if there is some work pending.
2318 *
2319 * This is a pair for __gk20a_channel_worker_wakeup to be called from the
2320 * worker. The worker has an internal work counter which is incremented once
2321 * per finished work item. This is compared with the number of queued jobs,
2322 * which may be channels on the items list or any other types of work.
2323 */
2324static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
2325{
2326 bool pending = atomic_read(&g->channel_worker.put) != get;
2327
2328 /*
2329 * This would be the place for a rmb() pairing a wmb() for a wakeup
2330 * if we had any work with no implicit barriers caused by locking.
2331 */
2332
2333 return pending;
2334}
2335
2336/**
2337 * Process the queued works for the worker thread serially.
2338 *
2339 * Flush all the work items in the queue one by one. This may block timeout
2340 * handling for a short while, as these are serialized.
2341 */
2342static void gk20a_channel_worker_process(struct gk20a *g, int *get)
2343{
2344 gk20a_dbg_fn("");
2345
2346 while (__gk20a_channel_worker_pending(g, *get)) {
2347 struct channel_gk20a *ch;
2348
2349 /*
2350 * If a channel is on the list, it's guaranteed to be handled
2351 * eventually just once. However, the opposite is not true. A
2352 * channel may be being processed if it's on the list or not.
2353 *
2354 * With this, processing channel works should be conservative
2355 * as follows: it's always safe to look at a channel found in
2356 * the list, and if someone enqueues the channel, it will be
2357 * handled eventually, even if it's being handled at the same
2358 * time. A channel is on the list only once; multiple calls to
2359 * enqueue are harmless.
2360 */
2361 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
2362 ch = list_first_entry_or_null(&g->channel_worker.items,
2363 struct channel_gk20a,
2364 worker_item);
2365 if (ch)
2366 list_del_init(&ch->worker_item);
2367 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2368
2369 if (!ch) {
2370 /*
2371 * Woke up for some other reason, but there are no
2372 * other reasons than a channel added in the items list
2373 * currently, so warn and ack the message.
2374 */
2375 gk20a_warn(g->dev, "Spurious worker event!");
2376 ++*get;
2377 break;
2378 }
2379
2380 gk20a_channel_worker_process_ch(ch);
2381 ++*get;
2382 }
2383}
2384
2385/*
2386 * Look at channel states periodically, until canceled. Abort timed out
2387 * channels serially. Process all work items found in the queue.
2388 */
2389static int gk20a_channel_poll_worker(void *arg)
2390{
2391 struct gk20a *g = (struct gk20a *)arg;
2392 struct gk20a_channel_worker *worker = &g->channel_worker;
2393 unsigned long start_wait;
2394 /* event timeout for also polling the watchdog */
2395 unsigned long timeout = msecs_to_jiffies(100);
2396 int get = 0;
2397
2398 gk20a_dbg_fn("");
2399
2400 start_wait = jiffies;
2401 while (!kthread_should_stop()) {
2402 bool got_events;
2403
2404 got_events = wait_event_timeout(
2405 worker->wq,
2406 __gk20a_channel_worker_pending(g, get),
2407 timeout) > 0;
2408
2409 if (got_events)
2410 gk20a_channel_worker_process(g, &get);
2411
2412 if (jiffies - start_wait >= timeout) {
2413 gk20a_channel_poll_timeouts(g);
2414 start_wait = jiffies;
2415 }
2416 }
2417 return 0;
2418}
2419
2420/**
2421 * Initialize the channel worker's metadata and start the background thread.
2422 */
2423int nvgpu_channel_worker_init(struct gk20a *g)
2424{
2425 struct task_struct *task;
2426
2427 atomic_set(&g->channel_worker.put, 0);
2428 init_waitqueue_head(&g->channel_worker.wq);
2429 INIT_LIST_HEAD(&g->channel_worker.items);
2430 nvgpu_spinlock_init(&g->channel_worker.items_lock);
2431 task = kthread_run(gk20a_channel_poll_worker, g,
2432 "nvgpu_channel_poll_%s", dev_name(g->dev));
2433 if (IS_ERR(task)) {
2434 gk20a_err(g->dev, "failed to start channel poller thread");
2435 return PTR_ERR(task);
2436 }
2437 g->channel_worker.poll_task = task;
2438
2439 return 0;
2440}
2441
2442void nvgpu_channel_worker_deinit(struct gk20a *g)
2443{
2444 kthread_stop(g->channel_worker.poll_task);
2445}
2446
2447/**
2448 * Append a channel to the worker's list, if not there already.
2449 *
2450 * The worker thread processes work items (channels in its work list) and polls
2451 * for other things. This adds @ch to the end of the list and wakes the worker
2452 * up immediately. If the channel already existed in the list, it's not added,
2453 * because in that case it has been scheduled already but has not yet been
2454 * processed.
2455 */
2456void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
2457{
2458 struct gk20a *g = ch->g;
2459
2460 gk20a_dbg_fn("");
2461
2462 /*
2463 * Ref released when this item gets processed. The caller should hold
2464 * one ref already, so can't fail.
2465 */
2466 if (WARN_ON(!gk20a_channel_get(ch))) {
2467 gk20a_warn(g->dev, "cannot get ch ref for worker!");
2468 return;
2469 }
2470
2471 nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
2472 if (!list_empty(&ch->worker_item)) {
2473 /*
2474 * Already queued, so will get processed eventually.
2475 * The worker is probably awake already.
2476 */
2477 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2478 gk20a_channel_put(ch);
2479 return;
2480 }
2481 list_add_tail(&ch->worker_item, &g->channel_worker.items);
2482 nvgpu_spinlock_release(&g->channel_worker.items_lock);
2483
2484 __gk20a_channel_worker_wakeup(g);
2193} 2485}
2194 2486
2195int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) 2487int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
@@ -2214,32 +2506,6 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
2214 return 0; 2506 return 0;
2215} 2507}
2216 2508
2217static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
2218{
2219 nvgpu_mutex_acquire(&c->clean_up.lock);
2220
2221 if (c->clean_up.scheduled) {
2222 nvgpu_mutex_release(&c->clean_up.lock);
2223 return;
2224 }
2225
2226 c->clean_up.scheduled = true;
2227 schedule_delayed_work(&c->clean_up.wq, 1);
2228
2229 nvgpu_mutex_release(&c->clean_up.lock);
2230}
2231
2232static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
2233 bool wait_for_completion)
2234{
2235 if (wait_for_completion)
2236 cancel_delayed_work_sync(&c->clean_up.wq);
2237
2238 nvgpu_mutex_acquire(&c->clean_up.lock);
2239 c->clean_up.scheduled = false;
2240 nvgpu_mutex_release(&c->clean_up.lock);
2241}
2242
2243static int gk20a_channel_add_job(struct channel_gk20a *c, 2509static int gk20a_channel_add_job(struct channel_gk20a *c,
2244 struct channel_gk20a_job *job, 2510 struct channel_gk20a_job *job,
2245 bool skip_buffer_refcounting) 2511 bool skip_buffer_refcounting)
@@ -2256,7 +2522,10 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
2256 return err; 2522 return err;
2257 } 2523 }
2258 2524
2259 /* put() is done in gk20a_channel_update() when the job is done */ 2525 /*
2526 * Ref to hold the channel open during the job lifetime. This is
2527 * released by job cleanup launched via syncpt or sema interrupt.
2528 */
2260 c = gk20a_channel_get(c); 2529 c = gk20a_channel_get(c);
2261 2530
2262 if (c) { 2531 if (c) {
@@ -2291,14 +2560,16 @@ err_put_buffers:
2291 return err; 2560 return err;
2292} 2561}
2293 2562
2294static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work) 2563/**
2295{ 2564 * Clean up job resources for further jobs to use.
2296 struct channel_gk20a *c = container_of(to_delayed_work(work), 2565 * @clean_all: If true, process as many jobs as possible, otherwise just one.
2297 struct channel_gk20a, clean_up.wq); 2566 *
2298 2567 * Loop all jobs from the joblist until a pending job is found, or just one if
2299 gk20a_channel_clean_up_jobs(c, true); 2568 * clean_all is not set. Pending jobs are detected from the job's post fence,
2300} 2569 * so this is only done for jobs that have job tracking resources. Free all
2301 2570 * per-job memory for completed jobs; in case of preallocated resources, this
2571 * opens up slots for new jobs to be submitted.
2572 */
2302static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, 2573static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2303 bool clean_all) 2574 bool clean_all)
2304{ 2575{
@@ -2307,6 +2578,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2307 struct gk20a_platform *platform; 2578 struct gk20a_platform *platform;
2308 struct gk20a *g; 2579 struct gk20a *g;
2309 int job_finished = 0; 2580 int job_finished = 0;
2581 bool watchdog_on = false;
2310 2582
2311 c = gk20a_channel_get(c); 2583 c = gk20a_channel_get(c);
2312 if (!c) 2584 if (!c)
@@ -2321,13 +2593,25 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2321 g = c->g; 2593 g = c->g;
2322 platform = gk20a_get_platform(g->dev); 2594 platform = gk20a_get_platform(g->dev);
2323 2595
2324 gk20a_channel_cancel_job_clean_up(c, false); 2596 /*
2597 * If !clean_all, we're in a condition where watchdog isn't supported
2598 * anyway (this would be a no-op).
2599 */
2600 if (clean_all)
2601 watchdog_on = gk20a_channel_timeout_stop(c);
2602
2603 /* Synchronize with abort cleanup that needs the jobs. */
2604 nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
2325 2605
2326 while (1) { 2606 while (1) {
2327 bool completed; 2607 bool completed;
2328 2608
2329 channel_gk20a_joblist_lock(c); 2609 channel_gk20a_joblist_lock(c);
2330 if (channel_gk20a_joblist_is_empty(c)) { 2610 if (channel_gk20a_joblist_is_empty(c)) {
2611 /*
2612 * No jobs in flight, timeout will remain stopped until
2613 * new jobs are submitted.
2614 */
2331 channel_gk20a_joblist_unlock(c); 2615 channel_gk20a_joblist_unlock(c);
2332 break; 2616 break;
2333 } 2617 }
@@ -2343,7 +2627,15 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2343 2627
2344 completed = gk20a_fence_is_expired(job->post_fence); 2628 completed = gk20a_fence_is_expired(job->post_fence);
2345 if (!completed) { 2629 if (!completed) {
2346 gk20a_channel_timeout_start(c); 2630 /*
2631 * The watchdog eventually sees an updated gp_get if
2632 * something happened in this loop. A new job can have
2633 * been submitted between the above call to stop and
2634 * this - in that case, this is a no-op and the new
2635 * later timeout is still used.
2636 */
2637 if (clean_all && watchdog_on)
2638 gk20a_channel_timeout_continue(c);
2347 break; 2639 break;
2348 } 2640 }
2349 2641
@@ -2394,32 +2686,38 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2394 job_finished = 1; 2686 job_finished = 1;
2395 gk20a_idle(g->dev); 2687 gk20a_idle(g->dev);
2396 2688
2397 if (!clean_all) 2689 if (!clean_all) {
2690 /* Timeout isn't supported here so don't touch it. */
2398 break; 2691 break;
2692 }
2399 } 2693 }
2400 2694
2695 nvgpu_mutex_release(&c->joblist.cleanup_lock);
2696
2401 if (job_finished && c->update_fn) 2697 if (job_finished && c->update_fn)
2402 schedule_work(&c->update_fn_work); 2698 schedule_work(&c->update_fn_work);
2403 2699
2404 gk20a_channel_put(c); 2700 gk20a_channel_put(c);
2405} 2701}
2406 2702
2407void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) 2703/**
2704 * Schedule a job cleanup work on this channel to free resources and to signal
2705 * about completion.
2706 *
2707 * Call this when there has been an interrupt about finished jobs, or when job
2708 * cleanup needs to be performed, e.g., when closing a channel. This is always
2709 * safe to call even if there is nothing to clean up. Any visible actions on
2710 * jobs just before calling this are guaranteed to be processed.
2711 */
2712void gk20a_channel_update(struct channel_gk20a *c)
2408{ 2713{
2409 c = gk20a_channel_get(c);
2410 if (!c)
2411 return;
2412
2413 if (!c->g->power_on) { /* shutdown case */ 2714 if (!c->g->power_on) { /* shutdown case */
2414 gk20a_channel_put(c);
2415 return; 2715 return;
2416 } 2716 }
2417 2717
2418 trace_gk20a_channel_update(c->hw_chid); 2718 trace_gk20a_channel_update(c->hw_chid);
2419 gk20a_channel_timeout_stop(c); 2719 /* A queued channel is always checked for job cleanup. */
2420 gk20a_channel_schedule_job_clean_up(c); 2720 gk20a_channel_worker_enqueue(c);
2421
2422 gk20a_channel_put(c);
2423} 2721}
2424 2722
2425static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, 2723static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
@@ -2809,7 +3107,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2809 if (c->deterministic && need_deferred_cleanup) 3107 if (c->deterministic && need_deferred_cleanup)
2810 return -EINVAL; 3108 return -EINVAL;
2811 3109
2812 /* gk20a_channel_update releases this ref. */ 3110 /* released by job cleanup via syncpt or sema interrupt */
2813 err = gk20a_busy(g->dev); 3111 err = gk20a_busy(g->dev);
2814 if (err) { 3112 if (err) {
2815 gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s", 3113 gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s",
@@ -2929,13 +3227,12 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2929#endif 3227#endif
2930 nvgpu_mutex_init(&c->ioctl_lock); 3228 nvgpu_mutex_init(&c->ioctl_lock);
2931 nvgpu_mutex_init(&c->error_notifier_mutex); 3229 nvgpu_mutex_init(&c->error_notifier_mutex);
3230 nvgpu_mutex_init(&c->joblist.cleanup_lock);
2932 nvgpu_spinlock_init(&c->joblist.dynamic.lock); 3231 nvgpu_spinlock_init(&c->joblist.dynamic.lock);
2933 nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); 3232 nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
2934 nvgpu_raw_spinlock_init(&c->timeout.lock); 3233 nvgpu_raw_spinlock_init(&c->timeout.lock);
2935 nvgpu_mutex_init(&c->sync_lock); 3234 nvgpu_mutex_init(&c->sync_lock);
2936 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); 3235
2937 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
2938 nvgpu_mutex_init(&c->clean_up.lock);
2939 INIT_LIST_HEAD(&c->joblist.dynamic.jobs); 3236 INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
2940#if defined(CONFIG_GK20A_CYCLE_STATS) 3237#if defined(CONFIG_GK20A_CYCLE_STATS)
2941 nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); 3238 nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
@@ -2947,6 +3244,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2947 nvgpu_mutex_init(&c->dbg_s_lock); 3244 nvgpu_mutex_init(&c->dbg_s_lock);
2948 list_add(&c->free_chs, &g->fifo.free_chs); 3245 list_add(&c->free_chs, &g->fifo.free_chs);
2949 3246
3247 INIT_LIST_HEAD(&c->worker_item);
3248
2950 return 0; 3249 return 0;
2951} 3250}
2952 3251
@@ -3384,8 +3683,6 @@ int gk20a_channel_suspend(struct gk20a *g)
3384 gk20a_disable_channel_tsg(g, ch); 3683 gk20a_disable_channel_tsg(g, ch);
3385 /* preempt the channel */ 3684 /* preempt the channel */
3386 gk20a_fifo_preempt(g, ch); 3685 gk20a_fifo_preempt(g, ch);
3387 gk20a_channel_timeout_stop(ch);
3388 gk20a_channel_cancel_job_clean_up(ch, true);
3389 /* wait for channel update notifiers */ 3686 /* wait for channel update notifiers */
3390 if (ch->update_fn) 3687 if (ch->update_fn)
3391 cancel_work_sync(&ch->update_fn_work); 3688 cancel_work_sync(&ch->update_fn_work);
@@ -3481,7 +3778,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
3481 * semaphore. 3778 * semaphore.
3482 */ 3779 */
3483 if (!c->deterministic) 3780 if (!c->deterministic)
3484 gk20a_channel_update(c, 0); 3781 gk20a_channel_update(c);
3485 } 3782 }
3486 gk20a_channel_put(c); 3783 gk20a_channel_put(c);
3487 } 3784 }