summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c300
1 files changed, 153 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..28cc3086 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
303 if (!runlist->active_tsgs) 303 if (!runlist->active_tsgs)
304 goto clean_up_runlist_info; 304 goto clean_up_runlist_info;
305 305
306 runlist->high_prio_channels =
307 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
308 GFP_KERNEL);
309 if (!runlist->high_prio_channels)
310 goto clean_up_runlist_info;
311
312 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; 306 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries;
313 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 307 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
314 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 308 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
337 kfree(runlist->active_tsgs); 331 kfree(runlist->active_tsgs);
338 runlist->active_tsgs = NULL; 332 runlist->active_tsgs = NULL;
339 333
340 kfree(runlist->high_prio_channels);
341 runlist->high_prio_channels = NULL;
342
343 kfree(f->runlist_info); 334 kfree(f->runlist_info);
344 f->runlist_info = NULL; 335 f->runlist_info = NULL;
345 336
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
2162 return runlist_entry_0; 2153 return runlist_entry_0;
2163} 2154}
2164 2155
2165/* add all active high priority channels */ 2156/* recursively construct a runlist with interleaved bare channels and TSGs */
2166static inline u32 gk20a_fifo_runlist_add_high_prio_entries( 2157static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
2167 struct fifo_gk20a *f, 2158 struct fifo_runlist_info_gk20a *runlist,
2168 struct fifo_runlist_info_gk20a *runlist, 2159 u32 cur_level,
2169 u32 *runlist_entry) 2160 u32 *runlist_entry,
2161 bool interleave_enabled,
2162 bool prev_empty,
2163 u32 *entries_left)
2170{ 2164{
2171 struct channel_gk20a *ch = NULL; 2165 bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
2172 unsigned long high_prio_chid; 2166 struct channel_gk20a *ch;
2173 u32 count = 0; 2167 bool skip_next = false;
2168 u32 chid, tsgid, count = 0;
2169
2170 gk20a_dbg_fn("");
2174 2171
2175 for_each_set_bit(high_prio_chid, 2172 /* for each bare channel, CH, on this level, insert all higher-level
2176 runlist->high_prio_channels, f->num_channels) { 2173 channels and TSGs before inserting CH. */
2177 ch = &f->channel[high_prio_chid]; 2174 for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
2175 ch = &f->channel[chid];
2176
2177 if (ch->interleave_level != cur_level)
2178 continue;
2179
2180 if (gk20a_is_channel_marked_as_tsg(ch))
2181 continue;
2182
2183 if (!last_level && !skip_next) {
2184 runlist_entry = gk20a_runlist_construct_locked(f,
2185 runlist,
2186 cur_level + 1,
2187 runlist_entry,
2188 interleave_enabled,
2189 false,
2190 entries_left);
2191 /* if interleaving is disabled, higher-level channels
2192 and TSGs only need to be inserted once */
2193 if (!interleave_enabled)
2194 skip_next = true;
2195 }
2196
2197 if (!(*entries_left))
2198 return NULL;
2199
2200 gk20a_dbg_info("add channel %d to runlist", chid);
2201 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2202 runlist_entry[1] = 0;
2203 runlist_entry += 2;
2204 count++;
2205 (*entries_left)--;
2206 }
2178 2207
2179 if (!gk20a_is_channel_marked_as_tsg(ch) && 2208 /* for each TSG, T, on this level, insert all higher-level channels
2180 test_bit(high_prio_chid, runlist->active_channels) == 1) { 2209 and TSGs before inserting T. */
2181 gk20a_dbg_info("add high prio channel %lu to runlist", 2210 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
2182 high_prio_chid); 2211 struct tsg_gk20a *tsg = &f->tsg[tsgid];
2183 runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); 2212
2213 if (tsg->interleave_level != cur_level)
2214 continue;
2215
2216 if (!last_level && !skip_next) {
2217 runlist_entry = gk20a_runlist_construct_locked(f,
2218 runlist,
2219 cur_level + 1,
2220 runlist_entry,
2221 interleave_enabled,
2222 false,
2223 entries_left);
2224 if (!interleave_enabled)
2225 skip_next = true;
2226 }
2227
2228 if (!(*entries_left))
2229 return NULL;
2230
2231 /* add TSG entry */
2232 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2233 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2234 runlist_entry[1] = 0;
2235 runlist_entry += 2;
2236 count++;
2237 (*entries_left)--;
2238
2239 mutex_lock(&tsg->ch_list_lock);
2240 /* add runnable channels bound to this TSG */
2241 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2242 if (!test_bit(ch->hw_chid,
2243 runlist->active_channels))
2244 continue;
2245
2246 if (!(*entries_left)) {
2247 mutex_unlock(&tsg->ch_list_lock);
2248 return NULL;
2249 }
2250
2251 gk20a_dbg_info("add channel %d to runlist",
2252 ch->hw_chid);
2253 runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
2184 runlist_entry[1] = 0; 2254 runlist_entry[1] = 0;
2185 runlist_entry += 2; 2255 runlist_entry += 2;
2186 count++; 2256 count++;
2257 (*entries_left)--;
2187 } 2258 }
2259 mutex_unlock(&tsg->ch_list_lock);
2188 } 2260 }
2189 2261
2190 return count; 2262 /* append entries from higher level if this level is empty */
2263 if (!count && !last_level)
2264 runlist_entry = gk20a_runlist_construct_locked(f,
2265 runlist,
2266 cur_level + 1,
2267 runlist_entry,
2268 interleave_enabled,
2269 true,
2270 entries_left);
2271
2272 /*
2273 * if previous and this level have entries, append
2274 * entries from higher level.
2275 *
2276 * ex. dropping from MEDIUM to LOW, need to insert HIGH
2277 */
2278 if (interleave_enabled && count && !prev_empty && !last_level)
2279 runlist_entry = gk20a_runlist_construct_locked(f,
2280 runlist,
2281 cur_level + 1,
2282 runlist_entry,
2283 interleave_enabled,
2284 false,
2285 entries_left);
2286 return runlist_entry;
2287}
2288
2289int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
2290 u32 id,
2291 bool is_tsg,
2292 u32 runlist_id,
2293 u32 new_level)
2294{
2295 gk20a_dbg_fn("");
2296
2297 if (is_tsg)
2298 g->fifo.tsg[id].interleave_level = new_level;
2299 else
2300 g->fifo.channel[id].interleave_level = new_level;
2301
2302 return 0;
2191} 2303}
2192 2304
2193static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 2305static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2198 struct fifo_gk20a *f = &g->fifo; 2310 struct fifo_gk20a *f = &g->fifo;
2199 struct fifo_runlist_info_gk20a *runlist = NULL; 2311 struct fifo_runlist_info_gk20a *runlist = NULL;
2200 u32 *runlist_entry_base = NULL; 2312 u32 *runlist_entry_base = NULL;
2201 u32 *runlist_entry = NULL;
2202 u64 runlist_iova; 2313 u64 runlist_iova;
2203 u32 old_buf, new_buf; 2314 u32 old_buf, new_buf;
2204 u32 chid, tsgid;
2205 struct channel_gk20a *ch = NULL; 2315 struct channel_gk20a *ch = NULL;
2206 struct tsg_gk20a *tsg = NULL; 2316 struct tsg_gk20a *tsg = NULL;
2207 u32 count = 0; 2317 u32 count = 0;
2208 u32 count_channels_in_tsg;
2209 runlist = &f->runlist_info[runlist_id]; 2318 runlist = &f->runlist_info[runlist_id];
2210 2319
2211 /* valid channel, add/remove it from active list. 2320 /* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2254 2363
2255 if (hw_chid != ~0 || /* add/remove a valid channel */ 2364 if (hw_chid != ~0 || /* add/remove a valid channel */
2256 add /* resume to add all channels back */) { 2365 add /* resume to add all channels back */) {
2257 runlist_entry = runlist_entry_base; 2366 u32 max_entries = f->num_runlist_entries;
2258 2367 u32 *runlist_end;
2259 /* Runlist manipulation:
2260 Insert an entry of all high priority channels inbetween
2261 all lower priority channels. This ensure that the maximum
2262 delay a runnable high priority channel has to wait is one
2263 medium timeslice + any context switching overhead +
2264 wait on other high priority channels.
2265 add non-TSG channels first */
2266 for_each_set_bit(chid,
2267 runlist->active_channels, f->num_channels) {
2268 ch = &f->channel[chid];
2269
2270 if (!gk20a_is_channel_marked_as_tsg(ch) &&
2271 !ch->interleave) {
2272 u32 added;
2273
2274 gk20a_dbg_info("add normal prio channel %d to runlist",
2275 chid);
2276 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2277 runlist_entry[1] = 0;
2278 runlist_entry += 2;
2279 count++;
2280
2281 added = gk20a_fifo_runlist_add_high_prio_entries(
2282 f,
2283 runlist,
2284 runlist_entry);
2285 count += added;
2286 runlist_entry += 2 * added;
2287 }
2288 }
2289 2368
2290 /* if there were no lower priority channels, then just 2369 runlist_end = gk20a_runlist_construct_locked(f,
2291 * add the high priority channels once. */ 2370 runlist,
2292 if (count == 0) { 2371 0,
2293 count = gk20a_fifo_runlist_add_high_prio_entries( 2372 runlist_entry_base,
2294 f, 2373 g->runlist_interleave,
2295 runlist, 2374 true,
2296 runlist_entry); 2375 &max_entries);
2297 runlist_entry += 2 * count; 2376 if (!runlist_end) {
2377 ret = -E2BIG;
2378 goto clean_up;
2298 } 2379 }
2299 2380
2300 /* now add TSG entries and channels bound to TSG */ 2381 count = (runlist_end - runlist_entry_base) / 2;
2301 mutex_lock(&f->tsg_inuse_mutex); 2382 WARN_ON(count > f->num_runlist_entries);
2302 for_each_set_bit(tsgid,
2303 runlist->active_tsgs, f->num_channels) {
2304 u32 added;
2305 tsg = &f->tsg[tsgid];
2306 /* add TSG entry */
2307 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2308 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2309 runlist_entry[1] = 0;
2310 runlist_entry += 2;
2311 count++;
2312
2313 /* add runnable channels bound to this TSG */
2314 count_channels_in_tsg = 0;
2315 mutex_lock(&tsg->ch_list_lock);
2316 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2317 if (!test_bit(ch->hw_chid,
2318 runlist->active_channels))
2319 continue;
2320 gk20a_dbg_info("add channel %d to runlist",
2321 ch->hw_chid);
2322 runlist_entry[0] =
2323 ram_rl_entry_chid_f(ch->hw_chid);
2324 runlist_entry[1] = 0;
2325 runlist_entry += 2;
2326 count++;
2327 count_channels_in_tsg++;
2328 }
2329 mutex_unlock(&tsg->ch_list_lock);
2330
2331 WARN_ON(tsg->num_active_channels !=
2332 count_channels_in_tsg);
2333
2334 added = gk20a_fifo_runlist_add_high_prio_entries(
2335 f,
2336 runlist,
2337 runlist_entry);
2338 count += added;
2339 runlist_entry += 2 * added;
2340 }
2341 mutex_unlock(&f->tsg_inuse_mutex);
2342 } else /* suspend to remove all channels */ 2383 } else /* suspend to remove all channels */
2343 count = 0; 2384 count = 0;
2344 2385
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
2493 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); 2534 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
2494} 2535}
2495 2536
2496int gk20a_fifo_set_channel_priority(
2497 struct gk20a *g,
2498 u32 runlist_id,
2499 u32 hw_chid,
2500 bool interleave)
2501{
2502 struct fifo_runlist_info_gk20a *runlist = NULL;
2503 struct fifo_gk20a *f = &g->fifo;
2504 struct channel_gk20a *ch = NULL;
2505
2506 if (hw_chid >= f->num_channels)
2507 return -EINVAL;
2508
2509 if (runlist_id >= f->max_runlists)
2510 return -EINVAL;
2511
2512 ch = &f->channel[hw_chid];
2513
2514 gk20a_dbg_fn("");
2515
2516 runlist = &f->runlist_info[runlist_id];
2517
2518 mutex_lock(&runlist->mutex);
2519
2520 if (ch->interleave)
2521 set_bit(hw_chid, runlist->high_prio_channels);
2522 else
2523 clear_bit(hw_chid, runlist->high_prio_channels);
2524
2525 gk20a_dbg_fn("done");
2526
2527 mutex_unlock(&runlist->mutex);
2528
2529 return 0;
2530}
2531
2532struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 2537struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
2533 u32 hw_chid) 2538 u32 hw_chid)
2534{ 2539{
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
2545 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 2550 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
2546 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; 2551 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
2547 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 2552 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
2553 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
2548} 2554}