diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/nvgpu_common.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 531 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 33 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.c | 1 |
8 files changed, 452 insertions, 134 deletions
diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c index 521ccd9d..16640aa6 100644 --- a/drivers/gpu/nvgpu/common/nvgpu_common.c +++ b/drivers/gpu/nvgpu/common/nvgpu_common.c | |||
@@ -39,7 +39,6 @@ static void nvgpu_init_vars(struct gk20a *g) | |||
39 | nvgpu_mutex_init(&platform->railgate_lock); | 39 | nvgpu_mutex_init(&platform->railgate_lock); |
40 | nvgpu_mutex_init(&g->dbg_sessions_lock); | 40 | nvgpu_mutex_init(&g->dbg_sessions_lock); |
41 | nvgpu_mutex_init(&g->client_lock); | 41 | nvgpu_mutex_init(&g->client_lock); |
42 | nvgpu_mutex_init(&g->ch_wdt_lock); | ||
43 | nvgpu_mutex_init(&g->poweroff_lock); | 42 | nvgpu_mutex_init(&g->poweroff_lock); |
44 | 43 | ||
45 | g->regs_saved = g->regs; | 44 | g->regs_saved = g->regs; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index ef8a3e7d..6eb1cb06 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/highmem.h> /* need for nvmap.h*/ | 22 | #include <linux/highmem.h> /* need for nvmap.h*/ |
23 | #include <linux/kthread.h> | ||
23 | #include <trace/events/gk20a.h> | 24 | #include <trace/events/gk20a.h> |
24 | #include <linux/scatterlist.h> | 25 | #include <linux/scatterlist.h> |
25 | #include <linux/file.h> | 26 | #include <linux/file.h> |
@@ -91,8 +92,6 @@ static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); | |||
91 | 92 | ||
92 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | 93 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, |
93 | bool clean_all); | 94 | bool clean_all); |
94 | static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | ||
95 | bool wait_for_completion); | ||
96 | 95 | ||
97 | /* allocate GPU channel */ | 96 | /* allocate GPU channel */ |
98 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | 97 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) |
@@ -491,7 +490,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
491 | bool released_job_semaphore = false; | 490 | bool released_job_semaphore = false; |
492 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); | 491 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); |
493 | 492 | ||
494 | gk20a_channel_cancel_job_clean_up(ch, true); | 493 | /* synchronize with actual job cleanup */ |
494 | nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); | ||
495 | 495 | ||
496 | /* ensure no fences are pending */ | 496 | /* ensure no fences are pending */ |
497 | nvgpu_mutex_acquire(&ch->sync_lock); | 497 | nvgpu_mutex_acquire(&ch->sync_lock); |
@@ -533,10 +533,16 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
533 | } | 533 | } |
534 | channel_gk20a_joblist_unlock(ch); | 534 | channel_gk20a_joblist_unlock(ch); |
535 | 535 | ||
536 | nvgpu_mutex_release(&ch->joblist.cleanup_lock); | ||
537 | |||
536 | if (released_job_semaphore) | 538 | if (released_job_semaphore) |
537 | wake_up_interruptible_all(&ch->semaphore_wq); | 539 | wake_up_interruptible_all(&ch->semaphore_wq); |
538 | 540 | ||
539 | gk20a_channel_update(ch, 0); | 541 | /* |
542 | * When closing the channel, this scheduled update holds one ref which | ||
543 | * is waited for before advancing with freeing. | ||
544 | */ | ||
545 | gk20a_channel_update(ch); | ||
540 | } | 546 | } |
541 | 547 | ||
542 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) | 548 | void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) |
@@ -1016,8 +1022,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
1016 | ch->update_fn_data = NULL; | 1022 | ch->update_fn_data = NULL; |
1017 | nvgpu_spinlock_release(&ch->update_fn_lock); | 1023 | nvgpu_spinlock_release(&ch->update_fn_lock); |
1018 | cancel_work_sync(&ch->update_fn_work); | 1024 | cancel_work_sync(&ch->update_fn_work); |
1019 | cancel_delayed_work_sync(&ch->clean_up.wq); | ||
1020 | cancel_delayed_work_sync(&ch->timeout.wq); | ||
1021 | 1025 | ||
1022 | /* make sure we don't have deferred interrupts pending that | 1026 | /* make sure we don't have deferred interrupts pending that |
1023 | * could still touch the channel */ | 1027 | * could still touch the channel */ |
@@ -1345,7 +1349,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | |||
1345 | ch->has_timedout = false; | 1349 | ch->has_timedout = false; |
1346 | ch->wdt_enabled = true; | 1350 | ch->wdt_enabled = true; |
1347 | ch->obj_class = 0; | 1351 | ch->obj_class = 0; |
1348 | ch->clean_up.scheduled = false; | ||
1349 | ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; | 1352 | ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; |
1350 | ch->timeslice_us = g->timeslice_low_priority_us; | 1353 | ch->timeslice_us = g->timeslice_low_priority_us; |
1351 | 1354 | ||
@@ -2075,6 +2078,30 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, | |||
2075 | nvgpu_kfree(g); | 2078 | nvgpu_kfree(g); |
2076 | } | 2079 | } |
2077 | 2080 | ||
2081 | static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) | ||
2082 | { | ||
2083 | ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); | ||
2084 | ch->timeout.running = true; | ||
2085 | nvgpu_timeout_init(ch->g, &ch->timeout.timer, | ||
2086 | gk20a_get_channel_watchdog_timeout(ch), | ||
2087 | NVGPU_TIMER_CPU_TIMER); | ||
2088 | } | ||
2089 | |||
2090 | /** | ||
2091 | * Start a timeout counter (watchdog) on this channel. | ||
2092 | * | ||
2093 | * Trigger a watchdog to recover the channel after the per-platform timeout | ||
2094 | * duration (but strictly no earlier) if the channel hasn't advanced within | ||
2095 | * that time. | ||
2096 | * | ||
2097 | * If the timeout is already running, do nothing. This should be called when | ||
2098 | * new jobs are submitted. The timeout will stop when the last tracked job | ||
2099 | * finishes, making the channel idle. | ||
2100 | * | ||
2101 | * The channel's gpfifo read pointer will be used to determine if the job has | ||
2102 | * actually stuck at that time. After the timeout duration has expired, a | ||
2103 | * worker thread will consider the channel stuck and recover it if stuck. | ||
2104 | */ | ||
2078 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch) | 2105 | static void gk20a_channel_timeout_start(struct channel_gk20a *ch) |
2079 | { | 2106 | { |
2080 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | 2107 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); |
@@ -2087,94 +2114,108 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch) | |||
2087 | 2114 | ||
2088 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 2115 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2089 | 2116 | ||
2090 | if (ch->timeout.initialized) { | 2117 | if (ch->timeout.running) { |
2091 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2118 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2092 | return; | 2119 | return; |
2093 | } | 2120 | } |
2094 | 2121 | __gk20a_channel_timeout_start(ch); | |
2095 | ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); | ||
2096 | ch->timeout.initialized = true; | ||
2097 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2122 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2098 | |||
2099 | schedule_delayed_work(&ch->timeout.wq, | ||
2100 | msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch))); | ||
2101 | } | 2123 | } |
2102 | 2124 | ||
2103 | static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) | 2125 | /** |
2126 | * Stop a running timeout counter (watchdog) on this channel. | ||
2127 | * | ||
2128 | * Make the watchdog consider the channel not running, so that it won't get | ||
2129 | * recovered even if no progress is detected. Progress is not tracked if the | ||
2130 | * watchdog is turned off. | ||
2131 | * | ||
2132 | * No guarantees are made about concurrent execution of the timeout handler. | ||
2133 | * (This should be called from an update handler running in the same thread | ||
2134 | * with the watchdog.) | ||
2135 | */ | ||
2136 | static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) | ||
2104 | { | 2137 | { |
2138 | bool was_running; | ||
2139 | |||
2105 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 2140 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2106 | if (!ch->timeout.initialized) { | 2141 | was_running = ch->timeout.running; |
2107 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2142 | ch->timeout.running = false; |
2108 | return; | ||
2109 | } | ||
2110 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2143 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2144 | return was_running; | ||
2145 | } | ||
2111 | 2146 | ||
2112 | cancel_delayed_work_sync(&ch->timeout.wq); | 2147 | /** |
2113 | 2148 | * Continue a previously stopped timeout | |
2149 | * | ||
2150 | * Enable the timeout again but don't reinitialize its timer. | ||
2151 | * | ||
2152 | * No guarantees are made about concurrent execution of the timeout handler. | ||
2153 | * (This should be called from an update handler running in the same thread | ||
2154 | * with the watchdog.) | ||
2155 | */ | ||
2156 | static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) | ||
2157 | { | ||
2114 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 2158 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2115 | ch->timeout.initialized = false; | 2159 | ch->timeout.running = true; |
2116 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2160 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2117 | } | 2161 | } |
2118 | 2162 | ||
2163 | /** | ||
2164 | * Rewind the timeout on each non-dormant channel. | ||
2165 | * | ||
2166 | * Reschedule the timeout of each active channel for which timeouts are running | ||
2167 | * as if something was happened on each channel right now. This should be | ||
2168 | * called when a global hang is detected that could cause a false positive on | ||
2169 | * other innocent channels. | ||
2170 | */ | ||
2119 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) | 2171 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) |
2120 | { | 2172 | { |
2121 | u32 chid; | ||
2122 | struct fifo_gk20a *f = &g->fifo; | 2173 | struct fifo_gk20a *f = &g->fifo; |
2174 | u32 chid; | ||
2123 | 2175 | ||
2124 | for (chid = 0; chid < f->num_channels; chid++) { | 2176 | for (chid = 0; chid < f->num_channels; chid++) { |
2125 | struct channel_gk20a *ch = &f->channel[chid]; | 2177 | struct channel_gk20a *ch = &f->channel[chid]; |
2126 | 2178 | ||
2127 | if (gk20a_channel_get(ch)) { | 2179 | if (!gk20a_channel_get(ch)) |
2128 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 2180 | continue; |
2129 | if (!ch->timeout.initialized) { | ||
2130 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
2131 | gk20a_channel_put(ch); | ||
2132 | continue; | ||
2133 | } | ||
2134 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
2135 | 2181 | ||
2136 | cancel_delayed_work_sync(&ch->timeout.wq); | 2182 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2137 | if (!ch->has_timedout) | 2183 | if (ch->timeout.running) |
2138 | schedule_delayed_work(&ch->timeout.wq, | 2184 | __gk20a_channel_timeout_start(ch); |
2139 | msecs_to_jiffies( | 2185 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2140 | gk20a_get_channel_watchdog_timeout(ch))); | ||
2141 | 2186 | ||
2142 | gk20a_channel_put(ch); | 2187 | gk20a_channel_put(ch); |
2143 | } | ||
2144 | } | 2188 | } |
2145 | } | 2189 | } |
2146 | 2190 | ||
2147 | static void gk20a_channel_timeout_handler(struct work_struct *work) | 2191 | /** |
2192 | * Check if a timed out channel has hung and recover it if it has. | ||
2193 | * | ||
2194 | * Test if this channel has really got stuck at this point (should be called | ||
2195 | * when the watchdog timer has expired) by checking if its gp_get has advanced | ||
2196 | * or not. If no gp_get action happened since when the watchdog was started, | ||
2197 | * force-reset the channel. | ||
2198 | * | ||
2199 | * The gpu is implicitly on at this point, because the watchdog can only run on | ||
2200 | * channels that have submitted jobs pending for cleanup. | ||
2201 | */ | ||
2202 | static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) | ||
2148 | { | 2203 | { |
2204 | struct gk20a *g = ch->g; | ||
2149 | u32 gp_get; | 2205 | u32 gp_get; |
2150 | struct gk20a *g; | ||
2151 | struct channel_gk20a *ch; | ||
2152 | 2206 | ||
2153 | ch = container_of(to_delayed_work(work), struct channel_gk20a, | 2207 | gk20a_dbg_fn(""); |
2154 | timeout.wq); | ||
2155 | ch = gk20a_channel_get(ch); | ||
2156 | if (!ch) | ||
2157 | return; | ||
2158 | |||
2159 | g = ch->g; | ||
2160 | |||
2161 | if (gk20a_busy(dev_from_gk20a(g))) { | ||
2162 | gk20a_channel_put(ch); | ||
2163 | return; | ||
2164 | } | ||
2165 | |||
2166 | /* Need global lock since multiple channels can timeout at a time */ | ||
2167 | nvgpu_mutex_acquire(&g->ch_wdt_lock); | ||
2168 | 2208 | ||
2169 | /* Get timed out job and reset the timer */ | 2209 | /* Get status and clear the timer */ |
2170 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); | 2210 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2171 | gp_get = ch->timeout.gp_get; | 2211 | gp_get = ch->timeout.gp_get; |
2172 | ch->timeout.initialized = false; | 2212 | ch->timeout.running = false; |
2173 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | 2213 | nvgpu_raw_spinlock_release(&ch->timeout.lock); |
2174 | 2214 | ||
2175 | if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { | 2215 | if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { |
2216 | /* Channel has advanced, reschedule */ | ||
2176 | gk20a_channel_timeout_start(ch); | 2217 | gk20a_channel_timeout_start(ch); |
2177 | goto fail_unlock; | 2218 | return; |
2178 | } | 2219 | } |
2179 | 2220 | ||
2180 | gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", | 2221 | gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", |
@@ -2185,11 +2226,262 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
2185 | 2226 | ||
2186 | g->ops.fifo.force_reset_ch(ch, | 2227 | g->ops.fifo.force_reset_ch(ch, |
2187 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); | 2228 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); |
2229 | } | ||
2230 | |||
2231 | /** | ||
2232 | * Test if the per-channel timeout is expired and handle the timeout in that case. | ||
2233 | * | ||
2234 | * Each channel has an expiration time based watchdog. The timer is | ||
2235 | * (re)initialized in two situations: when a new job is submitted on an idle | ||
2236 | * channel and when the timeout is checked but progress is detected. | ||
2237 | * | ||
2238 | * Watchdog timeout does not yet necessarily mean a stuck channel so this may | ||
2239 | * or may not cause recovery. | ||
2240 | * | ||
2241 | * The timeout is stopped (disabled) after the last job in a row finishes | ||
2242 | * making the channel idle. | ||
2243 | */ | ||
2244 | static void gk20a_channel_timeout_check(struct channel_gk20a *ch) | ||
2245 | { | ||
2246 | bool timed_out; | ||
2188 | 2247 | ||
2189 | fail_unlock: | 2248 | nvgpu_raw_spinlock_acquire(&ch->timeout.lock); |
2190 | nvgpu_mutex_release(&g->ch_wdt_lock); | 2249 | timed_out = ch->timeout.running && |
2250 | nvgpu_timeout_expired(&ch->timeout.timer); | ||
2251 | nvgpu_raw_spinlock_release(&ch->timeout.lock); | ||
2252 | |||
2253 | if (timed_out) | ||
2254 | gk20a_channel_timeout_handler(ch); | ||
2255 | } | ||
2256 | |||
2257 | /** | ||
2258 | * Loop every living channel, check timeouts and handle stuck channels. | ||
2259 | */ | ||
2260 | static void gk20a_channel_poll_timeouts(struct gk20a *g) | ||
2261 | { | ||
2262 | unsigned int chid; | ||
2263 | |||
2264 | gk20a_dbg_fn(""); | ||
2265 | |||
2266 | for (chid = 0; chid < g->fifo.num_channels; chid++) { | ||
2267 | struct channel_gk20a *ch = &g->fifo.channel[chid]; | ||
2268 | |||
2269 | if (gk20a_channel_get(ch)) { | ||
2270 | gk20a_channel_timeout_check(ch); | ||
2271 | gk20a_channel_put(ch); | ||
2272 | } | ||
2273 | } | ||
2274 | } | ||
2275 | |||
2276 | /* | ||
2277 | * Process one scheduled work item for this channel. Currently, the only thing | ||
2278 | * the worker does is job cleanup handling. | ||
2279 | */ | ||
2280 | static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) | ||
2281 | { | ||
2282 | gk20a_dbg_fn(""); | ||
2283 | |||
2284 | gk20a_channel_clean_up_jobs(ch, true); | ||
2285 | |||
2286 | /* ref taken when enqueued */ | ||
2191 | gk20a_channel_put(ch); | 2287 | gk20a_channel_put(ch); |
2192 | gk20a_idle(dev_from_gk20a(g)); | 2288 | } |
2289 | |||
2290 | /** | ||
2291 | * Tell the worker that one more work needs to be done. | ||
2292 | * | ||
2293 | * Increase the work counter to synchronize the worker with the new work. Wake | ||
2294 | * up the worker. If the worker was already running, it will handle this work | ||
2295 | * before going to sleep. | ||
2296 | */ | ||
2297 | static int __gk20a_channel_worker_wakeup(struct gk20a *g) | ||
2298 | { | ||
2299 | int put; | ||
2300 | |||
2301 | gk20a_dbg_fn(""); | ||
2302 | |||
2303 | /* | ||
2304 | * Currently, the only work type is associated with a lock, which deals | ||
2305 | * with any necessary barriers. If a work type with no locking were | ||
2306 | * added, a a wmb() would be needed here. See ..worker_pending() for a | ||
2307 | * pair. | ||
2308 | */ | ||
2309 | |||
2310 | put = atomic_inc_return(&g->channel_worker.put); | ||
2311 | wake_up(&g->channel_worker.wq); | ||
2312 | |||
2313 | return put; | ||
2314 | } | ||
2315 | |||
2316 | /** | ||
2317 | * Test if there is some work pending. | ||
2318 | * | ||
2319 | * This is a pair for __gk20a_channel_worker_wakeup to be called from the | ||
2320 | * worker. The worker has an internal work counter which is incremented once | ||
2321 | * per finished work item. This is compared with the number of queued jobs, | ||
2322 | * which may be channels on the items list or any other types of work. | ||
2323 | */ | ||
2324 | static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) | ||
2325 | { | ||
2326 | bool pending = atomic_read(&g->channel_worker.put) != get; | ||
2327 | |||
2328 | /* | ||
2329 | * This would be the place for a rmb() pairing a wmb() for a wakeup | ||
2330 | * if we had any work with no implicit barriers caused by locking. | ||
2331 | */ | ||
2332 | |||
2333 | return pending; | ||
2334 | } | ||
2335 | |||
2336 | /** | ||
2337 | * Process the queued works for the worker thread serially. | ||
2338 | * | ||
2339 | * Flush all the work items in the queue one by one. This may block timeout | ||
2340 | * handling for a short while, as these are serialized. | ||
2341 | */ | ||
2342 | static void gk20a_channel_worker_process(struct gk20a *g, int *get) | ||
2343 | { | ||
2344 | gk20a_dbg_fn(""); | ||
2345 | |||
2346 | while (__gk20a_channel_worker_pending(g, *get)) { | ||
2347 | struct channel_gk20a *ch; | ||
2348 | |||
2349 | /* | ||
2350 | * If a channel is on the list, it's guaranteed to be handled | ||
2351 | * eventually just once. However, the opposite is not true. A | ||
2352 | * channel may be being processed if it's on the list or not. | ||
2353 | * | ||
2354 | * With this, processing channel works should be conservative | ||
2355 | * as follows: it's always safe to look at a channel found in | ||
2356 | * the list, and if someone enqueues the channel, it will be | ||
2357 | * handled eventually, even if it's being handled at the same | ||
2358 | * time. A channel is on the list only once; multiple calls to | ||
2359 | * enqueue are harmless. | ||
2360 | */ | ||
2361 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
2362 | ch = list_first_entry_or_null(&g->channel_worker.items, | ||
2363 | struct channel_gk20a, | ||
2364 | worker_item); | ||
2365 | if (ch) | ||
2366 | list_del_init(&ch->worker_item); | ||
2367 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
2368 | |||
2369 | if (!ch) { | ||
2370 | /* | ||
2371 | * Woke up for some other reason, but there are no | ||
2372 | * other reasons than a channel added in the items list | ||
2373 | * currently, so warn and ack the message. | ||
2374 | */ | ||
2375 | gk20a_warn(g->dev, "Spurious worker event!"); | ||
2376 | ++*get; | ||
2377 | break; | ||
2378 | } | ||
2379 | |||
2380 | gk20a_channel_worker_process_ch(ch); | ||
2381 | ++*get; | ||
2382 | } | ||
2383 | } | ||
2384 | |||
2385 | /* | ||
2386 | * Look at channel states periodically, until canceled. Abort timed out | ||
2387 | * channels serially. Process all work items found in the queue. | ||
2388 | */ | ||
2389 | static int gk20a_channel_poll_worker(void *arg) | ||
2390 | { | ||
2391 | struct gk20a *g = (struct gk20a *)arg; | ||
2392 | struct gk20a_channel_worker *worker = &g->channel_worker; | ||
2393 | unsigned long start_wait; | ||
2394 | /* event timeout for also polling the watchdog */ | ||
2395 | unsigned long timeout = msecs_to_jiffies(100); | ||
2396 | int get = 0; | ||
2397 | |||
2398 | gk20a_dbg_fn(""); | ||
2399 | |||
2400 | start_wait = jiffies; | ||
2401 | while (!kthread_should_stop()) { | ||
2402 | bool got_events; | ||
2403 | |||
2404 | got_events = wait_event_timeout( | ||
2405 | worker->wq, | ||
2406 | __gk20a_channel_worker_pending(g, get), | ||
2407 | timeout) > 0; | ||
2408 | |||
2409 | if (got_events) | ||
2410 | gk20a_channel_worker_process(g, &get); | ||
2411 | |||
2412 | if (jiffies - start_wait >= timeout) { | ||
2413 | gk20a_channel_poll_timeouts(g); | ||
2414 | start_wait = jiffies; | ||
2415 | } | ||
2416 | } | ||
2417 | return 0; | ||
2418 | } | ||
2419 | |||
2420 | /** | ||
2421 | * Initialize the channel worker's metadata and start the background thread. | ||
2422 | */ | ||
2423 | int nvgpu_channel_worker_init(struct gk20a *g) | ||
2424 | { | ||
2425 | struct task_struct *task; | ||
2426 | |||
2427 | atomic_set(&g->channel_worker.put, 0); | ||
2428 | init_waitqueue_head(&g->channel_worker.wq); | ||
2429 | INIT_LIST_HEAD(&g->channel_worker.items); | ||
2430 | nvgpu_spinlock_init(&g->channel_worker.items_lock); | ||
2431 | task = kthread_run(gk20a_channel_poll_worker, g, | ||
2432 | "nvgpu_channel_poll_%s", dev_name(g->dev)); | ||
2433 | if (IS_ERR(task)) { | ||
2434 | gk20a_err(g->dev, "failed to start channel poller thread"); | ||
2435 | return PTR_ERR(task); | ||
2436 | } | ||
2437 | g->channel_worker.poll_task = task; | ||
2438 | |||
2439 | return 0; | ||
2440 | } | ||
2441 | |||
2442 | void nvgpu_channel_worker_deinit(struct gk20a *g) | ||
2443 | { | ||
2444 | kthread_stop(g->channel_worker.poll_task); | ||
2445 | } | ||
2446 | |||
2447 | /** | ||
2448 | * Append a channel to the worker's list, if not there already. | ||
2449 | * | ||
2450 | * The worker thread processes work items (channels in its work list) and polls | ||
2451 | * for other things. This adds @ch to the end of the list and wakes the worker | ||
2452 | * up immediately. If the channel already existed in the list, it's not added, | ||
2453 | * because in that case it has been scheduled already but has not yet been | ||
2454 | * processed. | ||
2455 | */ | ||
2456 | void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) | ||
2457 | { | ||
2458 | struct gk20a *g = ch->g; | ||
2459 | |||
2460 | gk20a_dbg_fn(""); | ||
2461 | |||
2462 | /* | ||
2463 | * Ref released when this item gets processed. The caller should hold | ||
2464 | * one ref already, so can't fail. | ||
2465 | */ | ||
2466 | if (WARN_ON(!gk20a_channel_get(ch))) { | ||
2467 | gk20a_warn(g->dev, "cannot get ch ref for worker!"); | ||
2468 | return; | ||
2469 | } | ||
2470 | |||
2471 | nvgpu_spinlock_acquire(&g->channel_worker.items_lock); | ||
2472 | if (!list_empty(&ch->worker_item)) { | ||
2473 | /* | ||
2474 | * Already queued, so will get processed eventually. | ||
2475 | * The worker is probably awake already. | ||
2476 | */ | ||
2477 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
2478 | gk20a_channel_put(ch); | ||
2479 | return; | ||
2480 | } | ||
2481 | list_add_tail(&ch->worker_item, &g->channel_worker.items); | ||
2482 | nvgpu_spinlock_release(&g->channel_worker.items_lock); | ||
2483 | |||
2484 | __gk20a_channel_worker_wakeup(g); | ||
2193 | } | 2485 | } |
2194 | 2486 | ||
2195 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | 2487 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) |
@@ -2214,32 +2506,6 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | |||
2214 | return 0; | 2506 | return 0; |
2215 | } | 2507 | } |
2216 | 2508 | ||
2217 | static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c) | ||
2218 | { | ||
2219 | nvgpu_mutex_acquire(&c->clean_up.lock); | ||
2220 | |||
2221 | if (c->clean_up.scheduled) { | ||
2222 | nvgpu_mutex_release(&c->clean_up.lock); | ||
2223 | return; | ||
2224 | } | ||
2225 | |||
2226 | c->clean_up.scheduled = true; | ||
2227 | schedule_delayed_work(&c->clean_up.wq, 1); | ||
2228 | |||
2229 | nvgpu_mutex_release(&c->clean_up.lock); | ||
2230 | } | ||
2231 | |||
2232 | static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | ||
2233 | bool wait_for_completion) | ||
2234 | { | ||
2235 | if (wait_for_completion) | ||
2236 | cancel_delayed_work_sync(&c->clean_up.wq); | ||
2237 | |||
2238 | nvgpu_mutex_acquire(&c->clean_up.lock); | ||
2239 | c->clean_up.scheduled = false; | ||
2240 | nvgpu_mutex_release(&c->clean_up.lock); | ||
2241 | } | ||
2242 | |||
2243 | static int gk20a_channel_add_job(struct channel_gk20a *c, | 2509 | static int gk20a_channel_add_job(struct channel_gk20a *c, |
2244 | struct channel_gk20a_job *job, | 2510 | struct channel_gk20a_job *job, |
2245 | bool skip_buffer_refcounting) | 2511 | bool skip_buffer_refcounting) |
@@ -2256,7 +2522,10 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
2256 | return err; | 2522 | return err; |
2257 | } | 2523 | } |
2258 | 2524 | ||
2259 | /* put() is done in gk20a_channel_update() when the job is done */ | 2525 | /* |
2526 | * Ref to hold the channel open during the job lifetime. This is | ||
2527 | * released by job cleanup launched via syncpt or sema interrupt. | ||
2528 | */ | ||
2260 | c = gk20a_channel_get(c); | 2529 | c = gk20a_channel_get(c); |
2261 | 2530 | ||
2262 | if (c) { | 2531 | if (c) { |
@@ -2291,14 +2560,16 @@ err_put_buffers: | |||
2291 | return err; | 2560 | return err; |
2292 | } | 2561 | } |
2293 | 2562 | ||
2294 | static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work) | 2563 | /** |
2295 | { | 2564 | * Clean up job resources for further jobs to use. |
2296 | struct channel_gk20a *c = container_of(to_delayed_work(work), | 2565 | * @clean_all: If true, process as many jobs as possible, otherwise just one. |
2297 | struct channel_gk20a, clean_up.wq); | 2566 | * |
2298 | 2567 | * Loop all jobs from the joblist until a pending job is found, or just one if | |
2299 | gk20a_channel_clean_up_jobs(c, true); | 2568 | * clean_all is not set. Pending jobs are detected from the job's post fence, |
2300 | } | 2569 | * so this is only done for jobs that have job tracking resources. Free all |
2301 | 2570 | * per-job memory for completed jobs; in case of preallocated resources, this | |
2571 | * opens up slots for new jobs to be submitted. | ||
2572 | */ | ||
2302 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | 2573 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, |
2303 | bool clean_all) | 2574 | bool clean_all) |
2304 | { | 2575 | { |
@@ -2307,6 +2578,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2307 | struct gk20a_platform *platform; | 2578 | struct gk20a_platform *platform; |
2308 | struct gk20a *g; | 2579 | struct gk20a *g; |
2309 | int job_finished = 0; | 2580 | int job_finished = 0; |
2581 | bool watchdog_on = false; | ||
2310 | 2582 | ||
2311 | c = gk20a_channel_get(c); | 2583 | c = gk20a_channel_get(c); |
2312 | if (!c) | 2584 | if (!c) |
@@ -2321,13 +2593,25 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2321 | g = c->g; | 2593 | g = c->g; |
2322 | platform = gk20a_get_platform(g->dev); | 2594 | platform = gk20a_get_platform(g->dev); |
2323 | 2595 | ||
2324 | gk20a_channel_cancel_job_clean_up(c, false); | 2596 | /* |
2597 | * If !clean_all, we're in a condition where watchdog isn't supported | ||
2598 | * anyway (this would be a no-op). | ||
2599 | */ | ||
2600 | if (clean_all) | ||
2601 | watchdog_on = gk20a_channel_timeout_stop(c); | ||
2602 | |||
2603 | /* Synchronize with abort cleanup that needs the jobs. */ | ||
2604 | nvgpu_mutex_acquire(&c->joblist.cleanup_lock); | ||
2325 | 2605 | ||
2326 | while (1) { | 2606 | while (1) { |
2327 | bool completed; | 2607 | bool completed; |
2328 | 2608 | ||
2329 | channel_gk20a_joblist_lock(c); | 2609 | channel_gk20a_joblist_lock(c); |
2330 | if (channel_gk20a_joblist_is_empty(c)) { | 2610 | if (channel_gk20a_joblist_is_empty(c)) { |
2611 | /* | ||
2612 | * No jobs in flight, timeout will remain stopped until | ||
2613 | * new jobs are submitted. | ||
2614 | */ | ||
2331 | channel_gk20a_joblist_unlock(c); | 2615 | channel_gk20a_joblist_unlock(c); |
2332 | break; | 2616 | break; |
2333 | } | 2617 | } |
@@ -2343,7 +2627,15 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2343 | 2627 | ||
2344 | completed = gk20a_fence_is_expired(job->post_fence); | 2628 | completed = gk20a_fence_is_expired(job->post_fence); |
2345 | if (!completed) { | 2629 | if (!completed) { |
2346 | gk20a_channel_timeout_start(c); | 2630 | /* |
2631 | * The watchdog eventually sees an updated gp_get if | ||
2632 | * something happened in this loop. A new job can have | ||
2633 | * been submitted between the above call to stop and | ||
2634 | * this - in that case, this is a no-op and the new | ||
2635 | * later timeout is still used. | ||
2636 | */ | ||
2637 | if (clean_all && watchdog_on) | ||
2638 | gk20a_channel_timeout_continue(c); | ||
2347 | break; | 2639 | break; |
2348 | } | 2640 | } |
2349 | 2641 | ||
@@ -2394,32 +2686,38 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | |||
2394 | job_finished = 1; | 2686 | job_finished = 1; |
2395 | gk20a_idle(g->dev); | 2687 | gk20a_idle(g->dev); |
2396 | 2688 | ||
2397 | if (!clean_all) | 2689 | if (!clean_all) { |
2690 | /* Timeout isn't supported here so don't touch it. */ | ||
2398 | break; | 2691 | break; |
2692 | } | ||
2399 | } | 2693 | } |
2400 | 2694 | ||
2695 | nvgpu_mutex_release(&c->joblist.cleanup_lock); | ||
2696 | |||
2401 | if (job_finished && c->update_fn) | 2697 | if (job_finished && c->update_fn) |
2402 | schedule_work(&c->update_fn_work); | 2698 | schedule_work(&c->update_fn_work); |
2403 | 2699 | ||
2404 | gk20a_channel_put(c); | 2700 | gk20a_channel_put(c); |
2405 | } | 2701 | } |
2406 | 2702 | ||
2407 | void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | 2703 | /** |
2704 | * Schedule a job cleanup work on this channel to free resources and to signal | ||
2705 | * about completion. | ||
2706 | * | ||
2707 | * Call this when there has been an interrupt about finished jobs, or when job | ||
2708 | * cleanup needs to be performed, e.g., when closing a channel. This is always | ||
2709 | * safe to call even if there is nothing to clean up. Any visible actions on | ||
2710 | * jobs just before calling this are guaranteed to be processed. | ||
2711 | */ | ||
2712 | void gk20a_channel_update(struct channel_gk20a *c) | ||
2408 | { | 2713 | { |
2409 | c = gk20a_channel_get(c); | ||
2410 | if (!c) | ||
2411 | return; | ||
2412 | |||
2413 | if (!c->g->power_on) { /* shutdown case */ | 2714 | if (!c->g->power_on) { /* shutdown case */ |
2414 | gk20a_channel_put(c); | ||
2415 | return; | 2715 | return; |
2416 | } | 2716 | } |
2417 | 2717 | ||
2418 | trace_gk20a_channel_update(c->hw_chid); | 2718 | trace_gk20a_channel_update(c->hw_chid); |
2419 | gk20a_channel_timeout_stop(c); | 2719 | /* A queued channel is always checked for job cleanup. */ |
2420 | gk20a_channel_schedule_job_clean_up(c); | 2720 | gk20a_channel_worker_enqueue(c); |
2421 | |||
2422 | gk20a_channel_put(c); | ||
2423 | } | 2721 | } |
2424 | 2722 | ||
2425 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | 2723 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, |
@@ -2809,7 +3107,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2809 | if (c->deterministic && need_deferred_cleanup) | 3107 | if (c->deterministic && need_deferred_cleanup) |
2810 | return -EINVAL; | 3108 | return -EINVAL; |
2811 | 3109 | ||
2812 | /* gk20a_channel_update releases this ref. */ | 3110 | /* released by job cleanup via syncpt or sema interrupt */ |
2813 | err = gk20a_busy(g->dev); | 3111 | err = gk20a_busy(g->dev); |
2814 | if (err) { | 3112 | if (err) { |
2815 | gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s", | 3113 | gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s", |
@@ -2929,13 +3227,12 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
2929 | #endif | 3227 | #endif |
2930 | nvgpu_mutex_init(&c->ioctl_lock); | 3228 | nvgpu_mutex_init(&c->ioctl_lock); |
2931 | nvgpu_mutex_init(&c->error_notifier_mutex); | 3229 | nvgpu_mutex_init(&c->error_notifier_mutex); |
3230 | nvgpu_mutex_init(&c->joblist.cleanup_lock); | ||
2932 | nvgpu_spinlock_init(&c->joblist.dynamic.lock); | 3231 | nvgpu_spinlock_init(&c->joblist.dynamic.lock); |
2933 | nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); | 3232 | nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); |
2934 | nvgpu_raw_spinlock_init(&c->timeout.lock); | 3233 | nvgpu_raw_spinlock_init(&c->timeout.lock); |
2935 | nvgpu_mutex_init(&c->sync_lock); | 3234 | nvgpu_mutex_init(&c->sync_lock); |
2936 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); | 3235 | |
2937 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn); | ||
2938 | nvgpu_mutex_init(&c->clean_up.lock); | ||
2939 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); | 3236 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); |
2940 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 3237 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
2941 | nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | 3238 | nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); |
@@ -2947,6 +3244,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
2947 | nvgpu_mutex_init(&c->dbg_s_lock); | 3244 | nvgpu_mutex_init(&c->dbg_s_lock); |
2948 | list_add(&c->free_chs, &g->fifo.free_chs); | 3245 | list_add(&c->free_chs, &g->fifo.free_chs); |
2949 | 3246 | ||
3247 | INIT_LIST_HEAD(&c->worker_item); | ||
3248 | |||
2950 | return 0; | 3249 | return 0; |
2951 | } | 3250 | } |
2952 | 3251 | ||
@@ -3384,8 +3683,6 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
3384 | gk20a_disable_channel_tsg(g, ch); | 3683 | gk20a_disable_channel_tsg(g, ch); |
3385 | /* preempt the channel */ | 3684 | /* preempt the channel */ |
3386 | gk20a_fifo_preempt(g, ch); | 3685 | gk20a_fifo_preempt(g, ch); |
3387 | gk20a_channel_timeout_stop(ch); | ||
3388 | gk20a_channel_cancel_job_clean_up(ch, true); | ||
3389 | /* wait for channel update notifiers */ | 3686 | /* wait for channel update notifiers */ |
3390 | if (ch->update_fn) | 3687 | if (ch->update_fn) |
3391 | cancel_work_sync(&ch->update_fn_work); | 3688 | cancel_work_sync(&ch->update_fn_work); |
@@ -3481,7 +3778,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) | |||
3481 | * semaphore. | 3778 | * semaphore. |
3482 | */ | 3779 | */ |
3483 | if (!c->deterministic) | 3780 | if (!c->deterministic) |
3484 | gk20a_channel_update(c, 0); | 3781 | gk20a_channel_update(c); |
3485 | } | 3782 | } |
3486 | gk20a_channel_put(c); | 3783 | gk20a_channel_put(c); |
3487 | } | 3784 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 14ee9f69..d9913cd7 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <uapi/linux/nvgpu.h> | 27 | #include <uapi/linux/nvgpu.h> |
28 | 28 | ||
29 | #include <nvgpu/lock.h> | 29 | #include <nvgpu/lock.h> |
30 | #include <nvgpu/timers.h> | ||
30 | 31 | ||
31 | struct gk20a; | 32 | struct gk20a; |
32 | struct gr_gk20a; | 33 | struct gr_gk20a; |
@@ -87,12 +88,19 @@ struct channel_gk20a_joblist { | |||
87 | struct list_head jobs; | 88 | struct list_head jobs; |
88 | struct nvgpu_spinlock lock; | 89 | struct nvgpu_spinlock lock; |
89 | } dynamic; | 90 | } dynamic; |
91 | |||
92 | /* | ||
93 | * Synchronize abort cleanup (when closing a channel) and job cleanup | ||
94 | * (asynchronously from worker) - protect from concurrent access when | ||
95 | * job resources are being freed. | ||
96 | */ | ||
97 | struct nvgpu_mutex cleanup_lock; | ||
90 | }; | 98 | }; |
91 | 99 | ||
92 | struct channel_gk20a_timeout { | 100 | struct channel_gk20a_timeout { |
93 | struct delayed_work wq; | ||
94 | struct nvgpu_raw_spinlock lock; | 101 | struct nvgpu_raw_spinlock lock; |
95 | bool initialized; | 102 | struct nvgpu_timeout timer; |
103 | bool running; | ||
96 | u32 gp_get; | 104 | u32 gp_get; |
97 | }; | 105 | }; |
98 | 106 | ||
@@ -110,12 +118,6 @@ struct gk20a_event_id_data { | |||
110 | struct list_head event_id_node; | 118 | struct list_head event_id_node; |
111 | }; | 119 | }; |
112 | 120 | ||
113 | struct channel_gk20a_clean_up { | ||
114 | struct nvgpu_mutex lock; | ||
115 | bool scheduled; | ||
116 | struct delayed_work wq; | ||
117 | }; | ||
118 | |||
119 | /* | 121 | /* |
120 | * Track refcount actions, saving their stack traces. This number specifies how | 122 | * Track refcount actions, saving their stack traces. This number specifies how |
121 | * many most recent actions are stored in a buffer. Set to 0 to disable. 128 | 123 | * many most recent actions are stored in a buffer. Set to 0 to disable. 128 |
@@ -214,7 +216,8 @@ struct channel_gk20a { | |||
214 | u32 timeout_gpfifo_get; | 216 | u32 timeout_gpfifo_get; |
215 | 217 | ||
216 | struct channel_gk20a_timeout timeout; | 218 | struct channel_gk20a_timeout timeout; |
217 | struct channel_gk20a_clean_up clean_up; | 219 | /* for job cleanup handling in the background worker */ |
220 | struct list_head worker_item; | ||
218 | 221 | ||
219 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 222 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
220 | struct { | 223 | struct { |
@@ -250,8 +253,11 @@ struct channel_gk20a { | |||
250 | u64 virt_ctx; | 253 | u64 virt_ctx; |
251 | #endif | 254 | #endif |
252 | 255 | ||
253 | /* signal channel owner via a callback, if set, in gk20a_channel_update | 256 | /* |
254 | * via schedule_work */ | 257 | * Signal channel owner via a callback, if set, in job cleanup with |
258 | * schedule_work. Means that something finished on the channel (perhaps | ||
259 | * more than one job). | ||
260 | */ | ||
255 | void (*update_fn)(struct channel_gk20a *, void *); | 261 | void (*update_fn)(struct channel_gk20a *, void *); |
256 | void *update_fn_data; | 262 | void *update_fn_data; |
257 | struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */ | 263 | struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */ |
@@ -293,6 +299,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | |||
293 | int gk20a_channel_suspend(struct gk20a *g); | 299 | int gk20a_channel_suspend(struct gk20a *g); |
294 | int gk20a_channel_resume(struct gk20a *g); | 300 | int gk20a_channel_resume(struct gk20a *g); |
295 | 301 | ||
302 | int nvgpu_channel_worker_init(struct gk20a *g); | ||
303 | void nvgpu_channel_worker_deinit(struct gk20a *g); | ||
304 | |||
296 | /* Channel file operations */ | 305 | /* Channel file operations */ |
297 | int gk20a_channel_open(struct inode *inode, struct file *filp); | 306 | int gk20a_channel_open(struct inode *inode, struct file *filp); |
298 | int gk20a_channel_open_ioctl(struct gk20a *g, | 307 | int gk20a_channel_open_ioctl(struct gk20a *g, |
@@ -302,7 +311,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
302 | unsigned long arg); | 311 | unsigned long arg); |
303 | int gk20a_channel_release(struct inode *inode, struct file *filp); | 312 | int gk20a_channel_release(struct inode *inode, struct file *filp); |
304 | struct channel_gk20a *gk20a_get_channel_from_file(int fd); | 313 | struct channel_gk20a *gk20a_get_channel_from_file(int fd); |
305 | void gk20a_channel_update(struct channel_gk20a *c, int nr_completed); | 314 | void gk20a_channel_update(struct channel_gk20a *c); |
306 | 315 | ||
307 | void gk20a_init_channel(struct gpu_ops *gops); | 316 | void gk20a_init_channel(struct gpu_ops *gops); |
308 | 317 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 097635a7..0aa202c5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -179,7 +179,7 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed) | |||
179 | { | 179 | { |
180 | struct channel_gk20a *ch = priv; | 180 | struct channel_gk20a *ch = priv; |
181 | 181 | ||
182 | gk20a_channel_update(ch, nr_completed); | 182 | gk20a_channel_update(ch); |
183 | 183 | ||
184 | /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ | 184 | /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ |
185 | gk20a_channel_put(ch); | 185 | gk20a_channel_put(ch); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 95351a43..e9eab551 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -502,6 +502,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | |||
502 | 502 | ||
503 | gk20a_dbg_fn(""); | 503 | gk20a_dbg_fn(""); |
504 | 504 | ||
505 | nvgpu_channel_worker_deinit(g); | ||
505 | /* | 506 | /* |
506 | * Make sure all channels are closed before deleting them. | 507 | * Make sure all channels are closed before deleting them. |
507 | */ | 508 | */ |
@@ -900,6 +901,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
900 | } | 901 | } |
901 | nvgpu_mutex_init(&f->tsg_inuse_mutex); | 902 | nvgpu_mutex_init(&f->tsg_inuse_mutex); |
902 | 903 | ||
904 | err = nvgpu_channel_worker_init(g); | ||
905 | if (err) | ||
906 | goto clean_up; | ||
903 | f->remove_support = gk20a_remove_fifo_support; | 907 | f->remove_support = gk20a_remove_fifo_support; |
904 | 908 | ||
905 | f->deferred_reset_pending = false; | 909 | f->deferred_reset_pending = false; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c30a8eaf..c79cc6c8 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -874,8 +874,6 @@ struct gk20a { | |||
874 | bool timeouts_enabled; | 874 | bool timeouts_enabled; |
875 | #endif | 875 | #endif |
876 | 876 | ||
877 | struct nvgpu_mutex ch_wdt_lock; | ||
878 | |||
879 | struct nvgpu_mutex poweroff_lock; | 877 | struct nvgpu_mutex poweroff_lock; |
880 | 878 | ||
881 | /* Channel priorities */ | 879 | /* Channel priorities */ |
@@ -1008,6 +1006,14 @@ struct gk20a { | |||
1008 | atomic_t sw_irq_nonstall_last_handled; | 1006 | atomic_t sw_irq_nonstall_last_handled; |
1009 | wait_queue_head_t sw_irq_nonstall_last_handled_wq; | 1007 | wait_queue_head_t sw_irq_nonstall_last_handled_wq; |
1010 | 1008 | ||
1009 | struct gk20a_channel_worker { | ||
1010 | struct task_struct *poll_task; | ||
1011 | atomic_t put; | ||
1012 | wait_queue_head_t wq; | ||
1013 | struct list_head items; | ||
1014 | struct nvgpu_spinlock items_lock; | ||
1015 | } channel_worker; | ||
1016 | |||
1011 | struct devfreq *devfreq; | 1017 | struct devfreq *devfreq; |
1012 | 1018 | ||
1013 | struct gk20a_scale_profile *scale_profile; | 1019 | struct gk20a_scale_profile *scale_profile; |
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 7b6ed322..027a92fc 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -308,6 +308,10 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
308 | } | 308 | } |
309 | nvgpu_mutex_init(&f->tsg_inuse_mutex); | 309 | nvgpu_mutex_init(&f->tsg_inuse_mutex); |
310 | 310 | ||
311 | err = nvgpu_channel_worker_init(g); | ||
312 | if (err) | ||
313 | goto clean_up; | ||
314 | |||
311 | f->deferred_reset_pending = false; | 315 | f->deferred_reset_pending = false; |
312 | nvgpu_mutex_init(&f->deferred_reset_mutex); | 316 | nvgpu_mutex_init(&f->deferred_reset_mutex); |
313 | 317 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 4c88ab96..c8ab23f1 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -252,7 +252,6 @@ static int vgpu_init_support(struct platform_device *pdev) | |||
252 | 252 | ||
253 | nvgpu_mutex_init(&g->dbg_sessions_lock); | 253 | nvgpu_mutex_init(&g->dbg_sessions_lock); |
254 | nvgpu_mutex_init(&g->client_lock); | 254 | nvgpu_mutex_init(&g->client_lock); |
255 | nvgpu_mutex_init(&g->ch_wdt_lock); | ||
256 | 255 | ||
257 | g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); | 256 | g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); |
258 | if (!g->dbg_regops_tmp_buf) { | 257 | if (!g->dbg_regops_tmp_buf) { |