diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 300 |
1 files changed, 153 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 769960af..28cc3086 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
303 | if (!runlist->active_tsgs) | 303 | if (!runlist->active_tsgs) |
304 | goto clean_up_runlist_info; | 304 | goto clean_up_runlist_info; |
305 | 305 | ||
306 | runlist->high_prio_channels = | ||
307 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
308 | GFP_KERNEL); | ||
309 | if (!runlist->high_prio_channels) | ||
310 | goto clean_up_runlist_info; | ||
311 | |||
312 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; | 306 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; |
313 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 307 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
314 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 308 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
@@ -337,9 +331,6 @@ clean_up_runlist_info: | |||
337 | kfree(runlist->active_tsgs); | 331 | kfree(runlist->active_tsgs); |
338 | runlist->active_tsgs = NULL; | 332 | runlist->active_tsgs = NULL; |
339 | 333 | ||
340 | kfree(runlist->high_prio_channels); | ||
341 | runlist->high_prio_channels = NULL; | ||
342 | |||
343 | kfree(f->runlist_info); | 334 | kfree(f->runlist_info); |
344 | f->runlist_info = NULL; | 335 | f->runlist_info = NULL; |
345 | 336 | ||
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) | |||
2162 | return runlist_entry_0; | 2153 | return runlist_entry_0; |
2163 | } | 2154 | } |
2164 | 2155 | ||
2165 | /* add all active high priority channels */ | 2156 | /* recursively construct a runlist with interleaved bare channels and TSGs */ |
2166 | static inline u32 gk20a_fifo_runlist_add_high_prio_entries( | 2157 | static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f, |
2167 | struct fifo_gk20a *f, | 2158 | struct fifo_runlist_info_gk20a *runlist, |
2168 | struct fifo_runlist_info_gk20a *runlist, | 2159 | u32 cur_level, |
2169 | u32 *runlist_entry) | 2160 | u32 *runlist_entry, |
2161 | bool interleave_enabled, | ||
2162 | bool prev_empty, | ||
2163 | u32 *entries_left) | ||
2170 | { | 2164 | { |
2171 | struct channel_gk20a *ch = NULL; | 2165 | bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH; |
2172 | unsigned long high_prio_chid; | 2166 | struct channel_gk20a *ch; |
2173 | u32 count = 0; | 2167 | bool skip_next = false; |
2168 | u32 chid, tsgid, count = 0; | ||
2169 | |||
2170 | gk20a_dbg_fn(""); | ||
2174 | 2171 | ||
2175 | for_each_set_bit(high_prio_chid, | 2172 | /* for each bare channel, CH, on this level, insert all higher-level |
2176 | runlist->high_prio_channels, f->num_channels) { | 2173 | channels and TSGs before inserting CH. */ |
2177 | ch = &f->channel[high_prio_chid]; | 2174 | for_each_set_bit(chid, runlist->active_channels, f->num_channels) { |
2175 | ch = &f->channel[chid]; | ||
2176 | |||
2177 | if (ch->interleave_level != cur_level) | ||
2178 | continue; | ||
2179 | |||
2180 | if (gk20a_is_channel_marked_as_tsg(ch)) | ||
2181 | continue; | ||
2182 | |||
2183 | if (!last_level && !skip_next) { | ||
2184 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2185 | runlist, | ||
2186 | cur_level + 1, | ||
2187 | runlist_entry, | ||
2188 | interleave_enabled, | ||
2189 | false, | ||
2190 | entries_left); | ||
2191 | /* if interleaving is disabled, higher-level channels | ||
2192 | and TSGs only need to be inserted once */ | ||
2193 | if (!interleave_enabled) | ||
2194 | skip_next = true; | ||
2195 | } | ||
2196 | |||
2197 | if (!(*entries_left)) | ||
2198 | return NULL; | ||
2199 | |||
2200 | gk20a_dbg_info("add channel %d to runlist", chid); | ||
2201 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | ||
2202 | runlist_entry[1] = 0; | ||
2203 | runlist_entry += 2; | ||
2204 | count++; | ||
2205 | (*entries_left)--; | ||
2206 | } | ||
2178 | 2207 | ||
2179 | if (!gk20a_is_channel_marked_as_tsg(ch) && | 2208 | /* for each TSG, T, on this level, insert all higher-level channels |
2180 | test_bit(high_prio_chid, runlist->active_channels) == 1) { | 2209 | and TSGs before inserting T. */ |
2181 | gk20a_dbg_info("add high prio channel %lu to runlist", | 2210 | for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { |
2182 | high_prio_chid); | 2211 | struct tsg_gk20a *tsg = &f->tsg[tsgid]; |
2183 | runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); | 2212 | |
2213 | if (tsg->interleave_level != cur_level) | ||
2214 | continue; | ||
2215 | |||
2216 | if (!last_level && !skip_next) { | ||
2217 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2218 | runlist, | ||
2219 | cur_level + 1, | ||
2220 | runlist_entry, | ||
2221 | interleave_enabled, | ||
2222 | false, | ||
2223 | entries_left); | ||
2224 | if (!interleave_enabled) | ||
2225 | skip_next = true; | ||
2226 | } | ||
2227 | |||
2228 | if (!(*entries_left)) | ||
2229 | return NULL; | ||
2230 | |||
2231 | /* add TSG entry */ | ||
2232 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2233 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2234 | runlist_entry[1] = 0; | ||
2235 | runlist_entry += 2; | ||
2236 | count++; | ||
2237 | (*entries_left)--; | ||
2238 | |||
2239 | mutex_lock(&tsg->ch_list_lock); | ||
2240 | /* add runnable channels bound to this TSG */ | ||
2241 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2242 | if (!test_bit(ch->hw_chid, | ||
2243 | runlist->active_channels)) | ||
2244 | continue; | ||
2245 | |||
2246 | if (!(*entries_left)) { | ||
2247 | mutex_unlock(&tsg->ch_list_lock); | ||
2248 | return NULL; | ||
2249 | } | ||
2250 | |||
2251 | gk20a_dbg_info("add channel %d to runlist", | ||
2252 | ch->hw_chid); | ||
2253 | runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid); | ||
2184 | runlist_entry[1] = 0; | 2254 | runlist_entry[1] = 0; |
2185 | runlist_entry += 2; | 2255 | runlist_entry += 2; |
2186 | count++; | 2256 | count++; |
2257 | (*entries_left)--; | ||
2187 | } | 2258 | } |
2259 | mutex_unlock(&tsg->ch_list_lock); | ||
2188 | } | 2260 | } |
2189 | 2261 | ||
2190 | return count; | 2262 | /* append entries from higher level if this level is empty */ |
2263 | if (!count && !last_level) | ||
2264 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2265 | runlist, | ||
2266 | cur_level + 1, | ||
2267 | runlist_entry, | ||
2268 | interleave_enabled, | ||
2269 | true, | ||
2270 | entries_left); | ||
2271 | |||
2272 | /* | ||
2273 | * if previous and this level have entries, append | ||
2274 | * entries from higher level. | ||
2275 | * | ||
2276 | * ex. dropping from MEDIUM to LOW, need to insert HIGH | ||
2277 | */ | ||
2278 | if (interleave_enabled && count && !prev_empty && !last_level) | ||
2279 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2280 | runlist, | ||
2281 | cur_level + 1, | ||
2282 | runlist_entry, | ||
2283 | interleave_enabled, | ||
2284 | false, | ||
2285 | entries_left); | ||
2286 | return runlist_entry; | ||
2287 | } | ||
2288 | |||
2289 | int gk20a_fifo_set_runlist_interleave(struct gk20a *g, | ||
2290 | u32 id, | ||
2291 | bool is_tsg, | ||
2292 | u32 runlist_id, | ||
2293 | u32 new_level) | ||
2294 | { | ||
2295 | gk20a_dbg_fn(""); | ||
2296 | |||
2297 | if (is_tsg) | ||
2298 | g->fifo.tsg[id].interleave_level = new_level; | ||
2299 | else | ||
2300 | g->fifo.channel[id].interleave_level = new_level; | ||
2301 | |||
2302 | return 0; | ||
2191 | } | 2303 | } |
2192 | 2304 | ||
2193 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | 2305 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, |
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2198 | struct fifo_gk20a *f = &g->fifo; | 2310 | struct fifo_gk20a *f = &g->fifo; |
2199 | struct fifo_runlist_info_gk20a *runlist = NULL; | 2311 | struct fifo_runlist_info_gk20a *runlist = NULL; |
2200 | u32 *runlist_entry_base = NULL; | 2312 | u32 *runlist_entry_base = NULL; |
2201 | u32 *runlist_entry = NULL; | ||
2202 | u64 runlist_iova; | 2313 | u64 runlist_iova; |
2203 | u32 old_buf, new_buf; | 2314 | u32 old_buf, new_buf; |
2204 | u32 chid, tsgid; | ||
2205 | struct channel_gk20a *ch = NULL; | 2315 | struct channel_gk20a *ch = NULL; |
2206 | struct tsg_gk20a *tsg = NULL; | 2316 | struct tsg_gk20a *tsg = NULL; |
2207 | u32 count = 0; | 2317 | u32 count = 0; |
2208 | u32 count_channels_in_tsg; | ||
2209 | runlist = &f->runlist_info[runlist_id]; | 2318 | runlist = &f->runlist_info[runlist_id]; |
2210 | 2319 | ||
2211 | /* valid channel, add/remove it from active list. | 2320 | /* valid channel, add/remove it from active list. |
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2254 | 2363 | ||
2255 | if (hw_chid != ~0 || /* add/remove a valid channel */ | 2364 | if (hw_chid != ~0 || /* add/remove a valid channel */ |
2256 | add /* resume to add all channels back */) { | 2365 | add /* resume to add all channels back */) { |
2257 | runlist_entry = runlist_entry_base; | 2366 | u32 max_entries = f->num_runlist_entries; |
2258 | 2367 | u32 *runlist_end; | |
2259 | /* Runlist manipulation: | ||
2260 | Insert an entry of all high priority channels inbetween | ||
2261 | all lower priority channels. This ensure that the maximum | ||
2262 | delay a runnable high priority channel has to wait is one | ||
2263 | medium timeslice + any context switching overhead + | ||
2264 | wait on other high priority channels. | ||
2265 | add non-TSG channels first */ | ||
2266 | for_each_set_bit(chid, | ||
2267 | runlist->active_channels, f->num_channels) { | ||
2268 | ch = &f->channel[chid]; | ||
2269 | |||
2270 | if (!gk20a_is_channel_marked_as_tsg(ch) && | ||
2271 | !ch->interleave) { | ||
2272 | u32 added; | ||
2273 | |||
2274 | gk20a_dbg_info("add normal prio channel %d to runlist", | ||
2275 | chid); | ||
2276 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | ||
2277 | runlist_entry[1] = 0; | ||
2278 | runlist_entry += 2; | ||
2279 | count++; | ||
2280 | |||
2281 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2282 | f, | ||
2283 | runlist, | ||
2284 | runlist_entry); | ||
2285 | count += added; | ||
2286 | runlist_entry += 2 * added; | ||
2287 | } | ||
2288 | } | ||
2289 | 2368 | ||
2290 | /* if there were no lower priority channels, then just | 2369 | runlist_end = gk20a_runlist_construct_locked(f, |
2291 | * add the high priority channels once. */ | 2370 | runlist, |
2292 | if (count == 0) { | 2371 | 0, |
2293 | count = gk20a_fifo_runlist_add_high_prio_entries( | 2372 | runlist_entry_base, |
2294 | f, | 2373 | g->runlist_interleave, |
2295 | runlist, | 2374 | true, |
2296 | runlist_entry); | 2375 | &max_entries); |
2297 | runlist_entry += 2 * count; | 2376 | if (!runlist_end) { |
2377 | ret = -E2BIG; | ||
2378 | goto clean_up; | ||
2298 | } | 2379 | } |
2299 | 2380 | ||
2300 | /* now add TSG entries and channels bound to TSG */ | 2381 | count = (runlist_end - runlist_entry_base) / 2; |
2301 | mutex_lock(&f->tsg_inuse_mutex); | 2382 | WARN_ON(count > f->num_runlist_entries); |
2302 | for_each_set_bit(tsgid, | ||
2303 | runlist->active_tsgs, f->num_channels) { | ||
2304 | u32 added; | ||
2305 | tsg = &f->tsg[tsgid]; | ||
2306 | /* add TSG entry */ | ||
2307 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2308 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2309 | runlist_entry[1] = 0; | ||
2310 | runlist_entry += 2; | ||
2311 | count++; | ||
2312 | |||
2313 | /* add runnable channels bound to this TSG */ | ||
2314 | count_channels_in_tsg = 0; | ||
2315 | mutex_lock(&tsg->ch_list_lock); | ||
2316 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2317 | if (!test_bit(ch->hw_chid, | ||
2318 | runlist->active_channels)) | ||
2319 | continue; | ||
2320 | gk20a_dbg_info("add channel %d to runlist", | ||
2321 | ch->hw_chid); | ||
2322 | runlist_entry[0] = | ||
2323 | ram_rl_entry_chid_f(ch->hw_chid); | ||
2324 | runlist_entry[1] = 0; | ||
2325 | runlist_entry += 2; | ||
2326 | count++; | ||
2327 | count_channels_in_tsg++; | ||
2328 | } | ||
2329 | mutex_unlock(&tsg->ch_list_lock); | ||
2330 | |||
2331 | WARN_ON(tsg->num_active_channels != | ||
2332 | count_channels_in_tsg); | ||
2333 | |||
2334 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2335 | f, | ||
2336 | runlist, | ||
2337 | runlist_entry); | ||
2338 | count += added; | ||
2339 | runlist_entry += 2 * added; | ||
2340 | } | ||
2341 | mutex_unlock(&f->tsg_inuse_mutex); | ||
2342 | } else /* suspend to remove all channels */ | 2383 | } else /* suspend to remove all channels */ |
2343 | count = 0; | 2384 | count = 0; |
2344 | 2385 | ||
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
2493 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 2534 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
2494 | } | 2535 | } |
2495 | 2536 | ||
2496 | int gk20a_fifo_set_channel_priority( | ||
2497 | struct gk20a *g, | ||
2498 | u32 runlist_id, | ||
2499 | u32 hw_chid, | ||
2500 | bool interleave) | ||
2501 | { | ||
2502 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
2503 | struct fifo_gk20a *f = &g->fifo; | ||
2504 | struct channel_gk20a *ch = NULL; | ||
2505 | |||
2506 | if (hw_chid >= f->num_channels) | ||
2507 | return -EINVAL; | ||
2508 | |||
2509 | if (runlist_id >= f->max_runlists) | ||
2510 | return -EINVAL; | ||
2511 | |||
2512 | ch = &f->channel[hw_chid]; | ||
2513 | |||
2514 | gk20a_dbg_fn(""); | ||
2515 | |||
2516 | runlist = &f->runlist_info[runlist_id]; | ||
2517 | |||
2518 | mutex_lock(&runlist->mutex); | ||
2519 | |||
2520 | if (ch->interleave) | ||
2521 | set_bit(hw_chid, runlist->high_prio_channels); | ||
2522 | else | ||
2523 | clear_bit(hw_chid, runlist->high_prio_channels); | ||
2524 | |||
2525 | gk20a_dbg_fn("done"); | ||
2526 | |||
2527 | mutex_unlock(&runlist->mutex); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2532 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | 2537 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, |
2533 | u32 hw_chid) | 2538 | u32 hw_chid) |
2534 | { | 2539 | { |
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops) | |||
2545 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | 2550 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; |
2546 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; | 2551 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; |
2547 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; | 2552 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; |
2553 | gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; | ||
2548 | } | 2554 | } |