summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c328
1 files changed, 172 insertions, 156 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..029a713f 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -25,6 +25,7 @@
25 25
26#include "gk20a.h" 26#include "gk20a.h"
27#include "debug_gk20a.h" 27#include "debug_gk20a.h"
28#include "ctxsw_trace_gk20a.h"
28#include "semaphore_gk20a.h" 29#include "semaphore_gk20a.h"
29#include "hw_fifo_gk20a.h" 30#include "hw_fifo_gk20a.h"
30#include "hw_pbdma_gk20a.h" 31#include "hw_pbdma_gk20a.h"
@@ -303,12 +304,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
303 if (!runlist->active_tsgs) 304 if (!runlist->active_tsgs)
304 goto clean_up_runlist_info; 305 goto clean_up_runlist_info;
305 306
306 runlist->high_prio_channels =
307 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
308 GFP_KERNEL);
309 if (!runlist->high_prio_channels)
310 goto clean_up_runlist_info;
311
312 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; 307 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries;
313 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 308 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
314 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 309 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +332,6 @@ clean_up_runlist_info:
337 kfree(runlist->active_tsgs); 332 kfree(runlist->active_tsgs);
338 runlist->active_tsgs = NULL; 333 runlist->active_tsgs = NULL;
339 334
340 kfree(runlist->high_prio_channels);
341 runlist->high_prio_channels = NULL;
342
343 kfree(f->runlist_info); 335 kfree(f->runlist_info);
344 f->runlist_info = NULL; 336 f->runlist_info = NULL;
345 337
@@ -471,8 +463,7 @@ static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f)
471 /* Can be used for sw-methods, or represents 463 /* Can be used for sw-methods, or represents
472 * a recoverable timeout. */ 464 * a recoverable timeout. */
473 f->intr.pbdma.restartable_0 = 465 f->intr.pbdma.restartable_0 =
474 pbdma_intr_0_device_pending_f() | 466 pbdma_intr_0_device_pending_f();
475 pbdma_intr_0_acquire_pending_f();
476} 467}
477 468
478static int gk20a_init_fifo_setup_sw(struct gk20a *g) 469static int gk20a_init_fifo_setup_sw(struct gk20a *g)
@@ -786,13 +777,17 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
786 if (engine_id == top_device_info_type_enum_graphics_v()) { 777 if (engine_id == top_device_info_type_enum_graphics_v()) {
787 if (support_gk20a_pmu(g->dev) && g->elpg_enabled) 778 if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
788 gk20a_pmu_disable_elpg(g); 779 gk20a_pmu_disable_elpg(g);
789 /*HALT_PIPELINE method, halt GR engine*/ 780 /*HALT_PIPELINE method, halt GR engine*/
790 if (gr_gk20a_halt_pipe(g)) 781 if (gr_gk20a_halt_pipe(g))
791 gk20a_err(dev_from_gk20a(g), 782 gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe");
792 "failed to HALT gr pipe"); 783 /* resetting engine will alter read/write index.
793 /* resetting engine using mc_enable_r() is not 784 * need to flush circular buffer before re-enabling FECS.
794 enough, we do full init sequence */ 785 */
795 gk20a_gr_reset(g); 786 if (g->ops.fecs_trace.reset)
787 g->ops.fecs_trace.reset(g);
788 /* resetting engine using mc_enable_r() is not
789 enough, we do full init sequence */
790 gk20a_gr_reset(g);
796 if (support_gk20a_pmu(g->dev) && g->elpg_enabled) 791 if (support_gk20a_pmu(g->dev) && g->elpg_enabled)
797 gk20a_pmu_enable_elpg(g); 792 gk20a_pmu_enable_elpg(g);
798 } 793 }
@@ -1662,6 +1657,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1662 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id)); 1657 u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id));
1663 val &= ~pbdma_acquire_timeout_en_enable_f(); 1658 val &= ~pbdma_acquire_timeout_en_enable_f();
1664 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val); 1659 gk20a_writel(g, pbdma_acquire_r(pbdma_id), val);
1660 if (g->timeouts_enabled) {
1661 reset = true;
1662 gk20a_err(dev_from_gk20a(g),
1663 "semaphore acquire timeout!");
1664 }
1665 handled |= pbdma_intr_0_acquire_pending_f();
1665 } 1666 }
1666 1667
1667 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) { 1668 if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) {
@@ -2162,32 +2163,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
2162 return runlist_entry_0; 2163 return runlist_entry_0;
2163} 2164}
2164 2165
2165/* add all active high priority channels */ 2166/* recursively construct a runlist with interleaved bare channels and TSGs */
2166static inline u32 gk20a_fifo_runlist_add_high_prio_entries( 2167static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
2167 struct fifo_gk20a *f, 2168 struct fifo_runlist_info_gk20a *runlist,
2168 struct fifo_runlist_info_gk20a *runlist, 2169 u32 cur_level,
2169 u32 *runlist_entry) 2170 u32 *runlist_entry,
2171 bool interleave_enabled,
2172 bool prev_empty,
2173 u32 *entries_left)
2170{ 2174{
2171 struct channel_gk20a *ch = NULL; 2175 bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
2172 unsigned long high_prio_chid; 2176 struct channel_gk20a *ch;
2173 u32 count = 0; 2177 bool skip_next = false;
2178 u32 chid, tsgid, count = 0;
2179
2180 gk20a_dbg_fn("");
2181
2182 /* for each bare channel, CH, on this level, insert all higher-level
2183 channels and TSGs before inserting CH. */
2184 for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
2185 ch = &f->channel[chid];
2186
2187 if (ch->interleave_level != cur_level)
2188 continue;
2174 2189
2175 for_each_set_bit(high_prio_chid, 2190 if (gk20a_is_channel_marked_as_tsg(ch))
2176 runlist->high_prio_channels, f->num_channels) { 2191 continue;
2177 ch = &f->channel[high_prio_chid]; 2192
2193 if (!last_level && !skip_next) {
2194 runlist_entry = gk20a_runlist_construct_locked(f,
2195 runlist,
2196 cur_level + 1,
2197 runlist_entry,
2198 interleave_enabled,
2199 false,
2200 entries_left);
2201 /* if interleaving is disabled, higher-level channels
2202 and TSGs only need to be inserted once */
2203 if (!interleave_enabled)
2204 skip_next = true;
2205 }
2178 2206
2179 if (!gk20a_is_channel_marked_as_tsg(ch) && 2207 if (!(*entries_left))
2180 test_bit(high_prio_chid, runlist->active_channels) == 1) { 2208 return NULL;
2181 gk20a_dbg_info("add high prio channel %lu to runlist", 2209
2182 high_prio_chid); 2210 gk20a_dbg_info("add channel %d to runlist", chid);
2183 runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); 2211 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2212 runlist_entry[1] = 0;
2213 runlist_entry += 2;
2214 count++;
2215 (*entries_left)--;
2216 }
2217
2218 /* for each TSG, T, on this level, insert all higher-level channels
2219 and TSGs before inserting T. */
2220 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
2221 struct tsg_gk20a *tsg = &f->tsg[tsgid];
2222
2223 if (tsg->interleave_level != cur_level)
2224 continue;
2225
2226 if (!last_level && !skip_next) {
2227 runlist_entry = gk20a_runlist_construct_locked(f,
2228 runlist,
2229 cur_level + 1,
2230 runlist_entry,
2231 interleave_enabled,
2232 false,
2233 entries_left);
2234 if (!interleave_enabled)
2235 skip_next = true;
2236 }
2237
2238 if (!(*entries_left))
2239 return NULL;
2240
2241 /* add TSG entry */
2242 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2243 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2244 runlist_entry[1] = 0;
2245 runlist_entry += 2;
2246 count++;
2247 (*entries_left)--;
2248
2249 mutex_lock(&tsg->ch_list_lock);
2250 /* add runnable channels bound to this TSG */
2251 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2252 if (!test_bit(ch->hw_chid,
2253 runlist->active_channels))
2254 continue;
2255
2256 if (!(*entries_left)) {
2257 mutex_unlock(&tsg->ch_list_lock);
2258 return NULL;
2259 }
2260
2261 gk20a_dbg_info("add channel %d to runlist",
2262 ch->hw_chid);
2263 runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
2184 runlist_entry[1] = 0; 2264 runlist_entry[1] = 0;
2185 runlist_entry += 2; 2265 runlist_entry += 2;
2186 count++; 2266 count++;
2267 (*entries_left)--;
2187 } 2268 }
2269 mutex_unlock(&tsg->ch_list_lock);
2188 } 2270 }
2189 2271
2190 return count; 2272 /* append entries from higher level if this level is empty */
2273 if (!count && !last_level)
2274 runlist_entry = gk20a_runlist_construct_locked(f,
2275 runlist,
2276 cur_level + 1,
2277 runlist_entry,
2278 interleave_enabled,
2279 true,
2280 entries_left);
2281
2282 /*
2283 * if previous and this level have entries, append
2284 * entries from higher level.
2285 *
2286 * ex. dropping from MEDIUM to LOW, need to insert HIGH
2287 */
2288 if (interleave_enabled && count && !prev_empty && !last_level)
2289 runlist_entry = gk20a_runlist_construct_locked(f,
2290 runlist,
2291 cur_level + 1,
2292 runlist_entry,
2293 interleave_enabled,
2294 false,
2295 entries_left);
2296 return runlist_entry;
2297}
2298
2299int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
2300 u32 id,
2301 bool is_tsg,
2302 u32 runlist_id,
2303 u32 new_level)
2304{
2305 gk20a_dbg_fn("");
2306
2307 if (is_tsg)
2308 g->fifo.tsg[id].interleave_level = new_level;
2309 else
2310 g->fifo.channel[id].interleave_level = new_level;
2311
2312 return 0;
2191} 2313}
2192 2314
2193static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 2315static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2320,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2198 struct fifo_gk20a *f = &g->fifo; 2320 struct fifo_gk20a *f = &g->fifo;
2199 struct fifo_runlist_info_gk20a *runlist = NULL; 2321 struct fifo_runlist_info_gk20a *runlist = NULL;
2200 u32 *runlist_entry_base = NULL; 2322 u32 *runlist_entry_base = NULL;
2201 u32 *runlist_entry = NULL;
2202 u64 runlist_iova; 2323 u64 runlist_iova;
2203 u32 old_buf, new_buf; 2324 u32 old_buf, new_buf;
2204 u32 chid, tsgid;
2205 struct channel_gk20a *ch = NULL; 2325 struct channel_gk20a *ch = NULL;
2206 struct tsg_gk20a *tsg = NULL; 2326 struct tsg_gk20a *tsg = NULL;
2207 u32 count = 0; 2327 u32 count = 0;
2208 u32 count_channels_in_tsg;
2209 runlist = &f->runlist_info[runlist_id]; 2328 runlist = &f->runlist_info[runlist_id];
2210 2329
2211 /* valid channel, add/remove it from active list. 2330 /* valid channel, add/remove it from active list.
@@ -2254,91 +2373,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2254 2373
2255 if (hw_chid != ~0 || /* add/remove a valid channel */ 2374 if (hw_chid != ~0 || /* add/remove a valid channel */
2256 add /* resume to add all channels back */) { 2375 add /* resume to add all channels back */) {
2257 runlist_entry = runlist_entry_base; 2376 u32 max_entries = f->num_runlist_entries;
2258 2377 u32 *runlist_end;
2259 /* Runlist manipulation:
2260 Insert an entry of all high priority channels inbetween
2261 all lower priority channels. This ensure that the maximum
2262 delay a runnable high priority channel has to wait is one
2263 medium timeslice + any context switching overhead +
2264 wait on other high priority channels.
2265 add non-TSG channels first */
2266 for_each_set_bit(chid,
2267 runlist->active_channels, f->num_channels) {
2268 ch = &f->channel[chid];
2269
2270 if (!gk20a_is_channel_marked_as_tsg(ch) &&
2271 !ch->interleave) {
2272 u32 added;
2273
2274 gk20a_dbg_info("add normal prio channel %d to runlist",
2275 chid);
2276 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2277 runlist_entry[1] = 0;
2278 runlist_entry += 2;
2279 count++;
2280
2281 added = gk20a_fifo_runlist_add_high_prio_entries(
2282 f,
2283 runlist,
2284 runlist_entry);
2285 count += added;
2286 runlist_entry += 2 * added;
2287 }
2288 }
2289 2378
2290 /* if there were no lower priority channels, then just 2379 runlist_end = gk20a_runlist_construct_locked(f,
2291 * add the high priority channels once. */ 2380 runlist,
2292 if (count == 0) { 2381 0,
2293 count = gk20a_fifo_runlist_add_high_prio_entries( 2382 runlist_entry_base,
2294 f, 2383 g->runlist_interleave,
2295 runlist, 2384 true,
2296 runlist_entry); 2385 &max_entries);
2297 runlist_entry += 2 * count; 2386 if (!runlist_end) {
2387 ret = -E2BIG;
2388 goto clean_up;
2298 } 2389 }
2299 2390
2300 /* now add TSG entries and channels bound to TSG */ 2391 count = (runlist_end - runlist_entry_base) / 2;
2301 mutex_lock(&f->tsg_inuse_mutex); 2392 WARN_ON(count > f->num_runlist_entries);
2302 for_each_set_bit(tsgid,
2303 runlist->active_tsgs, f->num_channels) {
2304 u32 added;
2305 tsg = &f->tsg[tsgid];
2306 /* add TSG entry */
2307 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2308 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2309 runlist_entry[1] = 0;
2310 runlist_entry += 2;
2311 count++;
2312
2313 /* add runnable channels bound to this TSG */
2314 count_channels_in_tsg = 0;
2315 mutex_lock(&tsg->ch_list_lock);
2316 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2317 if (!test_bit(ch->hw_chid,
2318 runlist->active_channels))
2319 continue;
2320 gk20a_dbg_info("add channel %d to runlist",
2321 ch->hw_chid);
2322 runlist_entry[0] =
2323 ram_rl_entry_chid_f(ch->hw_chid);
2324 runlist_entry[1] = 0;
2325 runlist_entry += 2;
2326 count++;
2327 count_channels_in_tsg++;
2328 }
2329 mutex_unlock(&tsg->ch_list_lock);
2330
2331 WARN_ON(tsg->num_active_channels !=
2332 count_channels_in_tsg);
2333
2334 added = gk20a_fifo_runlist_add_high_prio_entries(
2335 f,
2336 runlist,
2337 runlist_entry);
2338 count += added;
2339 runlist_entry += 2 * added;
2340 }
2341 mutex_unlock(&f->tsg_inuse_mutex);
2342 } else /* suspend to remove all channels */ 2393 } else /* suspend to remove all channels */
2343 count = 0; 2394 count = 0;
2344 2395
@@ -2493,42 +2544,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
2493 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); 2544 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
2494} 2545}
2495 2546
2496int gk20a_fifo_set_channel_priority(
2497 struct gk20a *g,
2498 u32 runlist_id,
2499 u32 hw_chid,
2500 bool interleave)
2501{
2502 struct fifo_runlist_info_gk20a *runlist = NULL;
2503 struct fifo_gk20a *f = &g->fifo;
2504 struct channel_gk20a *ch = NULL;
2505
2506 if (hw_chid >= f->num_channels)
2507 return -EINVAL;
2508
2509 if (runlist_id >= f->max_runlists)
2510 return -EINVAL;
2511
2512 ch = &f->channel[hw_chid];
2513
2514 gk20a_dbg_fn("");
2515
2516 runlist = &f->runlist_info[runlist_id];
2517
2518 mutex_lock(&runlist->mutex);
2519
2520 if (ch->interleave)
2521 set_bit(hw_chid, runlist->high_prio_channels);
2522 else
2523 clear_bit(hw_chid, runlist->high_prio_channels);
2524
2525 gk20a_dbg_fn("done");
2526
2527 mutex_unlock(&runlist->mutex);
2528
2529 return 0;
2530}
2531
2532struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 2547struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
2533 u32 hw_chid) 2548 u32 hw_chid)
2534{ 2549{
@@ -2545,4 +2560,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
2545 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 2560 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
2546 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; 2561 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
2547 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 2562 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
2563 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
2548} 2564}