diff options
author | Aingara Paramakuru <aparamakuru@nvidia.com> | 2016-02-22 12:35:49 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-03-15 19:23:44 -0400 |
commit | 2a58d3c27b45ca9d0d9dc2136377b7a41b9ed82d (patch) | |
tree | 9d7464bfd0eea8e4b65f591996db59a98f4070e2 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |
parent | f07a046a52e7a8074bd1572a12ac65747d3f827d (diff) |
gpu: nvgpu: improve channel interleave support
Previously, only "high" priority bare channels were interleaved
between all other bare channels and TSGs. This patch decouples
priority from interleaving and introduces 3 levels for interleaving
a bare channel or TSG: high, medium, and low. The levels define
the number of times a channel or TSG will appear on a runlist (see
nvgpu.h for details).
By default, all bare channels and TSGs are set to interleave level
low. Userspace can then request the interleave level to be increased
via the CHANNEL_SET_RUNLIST_INTERLEAVE ioctl (TSG-specific ioctl will
be added later).
As timeslice settings will soon be coming from userspace, the default
timeslice for "high" priority channels has been restored.
JIRA VFND-1302
Bug 1729664
Change-Id: I178bc1cecda23f5002fec6d791e6dcaedfa05c0c
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/1014962
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 300 |
1 files changed, 153 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 769960af..28cc3086 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
303 | if (!runlist->active_tsgs) | 303 | if (!runlist->active_tsgs) |
304 | goto clean_up_runlist_info; | 304 | goto clean_up_runlist_info; |
305 | 305 | ||
306 | runlist->high_prio_channels = | ||
307 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
308 | GFP_KERNEL); | ||
309 | if (!runlist->high_prio_channels) | ||
310 | goto clean_up_runlist_info; | ||
311 | |||
312 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; | 306 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; |
313 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 307 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
314 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 308 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
@@ -337,9 +331,6 @@ clean_up_runlist_info: | |||
337 | kfree(runlist->active_tsgs); | 331 | kfree(runlist->active_tsgs); |
338 | runlist->active_tsgs = NULL; | 332 | runlist->active_tsgs = NULL; |
339 | 333 | ||
340 | kfree(runlist->high_prio_channels); | ||
341 | runlist->high_prio_channels = NULL; | ||
342 | |||
343 | kfree(f->runlist_info); | 334 | kfree(f->runlist_info); |
344 | f->runlist_info = NULL; | 335 | f->runlist_info = NULL; |
345 | 336 | ||
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) | |||
2162 | return runlist_entry_0; | 2153 | return runlist_entry_0; |
2163 | } | 2154 | } |
2164 | 2155 | ||
2165 | /* add all active high priority channels */ | 2156 | /* recursively construct a runlist with interleaved bare channels and TSGs */ |
2166 | static inline u32 gk20a_fifo_runlist_add_high_prio_entries( | 2157 | static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f, |
2167 | struct fifo_gk20a *f, | 2158 | struct fifo_runlist_info_gk20a *runlist, |
2168 | struct fifo_runlist_info_gk20a *runlist, | 2159 | u32 cur_level, |
2169 | u32 *runlist_entry) | 2160 | u32 *runlist_entry, |
2161 | bool interleave_enabled, | ||
2162 | bool prev_empty, | ||
2163 | u32 *entries_left) | ||
2170 | { | 2164 | { |
2171 | struct channel_gk20a *ch = NULL; | 2165 | bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH; |
2172 | unsigned long high_prio_chid; | 2166 | struct channel_gk20a *ch; |
2173 | u32 count = 0; | 2167 | bool skip_next = false; |
2168 | u32 chid, tsgid, count = 0; | ||
2169 | |||
2170 | gk20a_dbg_fn(""); | ||
2174 | 2171 | ||
2175 | for_each_set_bit(high_prio_chid, | 2172 | /* for each bare channel, CH, on this level, insert all higher-level |
2176 | runlist->high_prio_channels, f->num_channels) { | 2173 | channels and TSGs before inserting CH. */ |
2177 | ch = &f->channel[high_prio_chid]; | 2174 | for_each_set_bit(chid, runlist->active_channels, f->num_channels) { |
2175 | ch = &f->channel[chid]; | ||
2176 | |||
2177 | if (ch->interleave_level != cur_level) | ||
2178 | continue; | ||
2179 | |||
2180 | if (gk20a_is_channel_marked_as_tsg(ch)) | ||
2181 | continue; | ||
2182 | |||
2183 | if (!last_level && !skip_next) { | ||
2184 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2185 | runlist, | ||
2186 | cur_level + 1, | ||
2187 | runlist_entry, | ||
2188 | interleave_enabled, | ||
2189 | false, | ||
2190 | entries_left); | ||
2191 | /* if interleaving is disabled, higher-level channels | ||
2192 | and TSGs only need to be inserted once */ | ||
2193 | if (!interleave_enabled) | ||
2194 | skip_next = true; | ||
2195 | } | ||
2196 | |||
2197 | if (!(*entries_left)) | ||
2198 | return NULL; | ||
2199 | |||
2200 | gk20a_dbg_info("add channel %d to runlist", chid); | ||
2201 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | ||
2202 | runlist_entry[1] = 0; | ||
2203 | runlist_entry += 2; | ||
2204 | count++; | ||
2205 | (*entries_left)--; | ||
2206 | } | ||
2178 | 2207 | ||
2179 | if (!gk20a_is_channel_marked_as_tsg(ch) && | 2208 | /* for each TSG, T, on this level, insert all higher-level channels |
2180 | test_bit(high_prio_chid, runlist->active_channels) == 1) { | 2209 | and TSGs before inserting T. */ |
2181 | gk20a_dbg_info("add high prio channel %lu to runlist", | 2210 | for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { |
2182 | high_prio_chid); | 2211 | struct tsg_gk20a *tsg = &f->tsg[tsgid]; |
2183 | runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); | 2212 | |
2213 | if (tsg->interleave_level != cur_level) | ||
2214 | continue; | ||
2215 | |||
2216 | if (!last_level && !skip_next) { | ||
2217 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2218 | runlist, | ||
2219 | cur_level + 1, | ||
2220 | runlist_entry, | ||
2221 | interleave_enabled, | ||
2222 | false, | ||
2223 | entries_left); | ||
2224 | if (!interleave_enabled) | ||
2225 | skip_next = true; | ||
2226 | } | ||
2227 | |||
2228 | if (!(*entries_left)) | ||
2229 | return NULL; | ||
2230 | |||
2231 | /* add TSG entry */ | ||
2232 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2233 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2234 | runlist_entry[1] = 0; | ||
2235 | runlist_entry += 2; | ||
2236 | count++; | ||
2237 | (*entries_left)--; | ||
2238 | |||
2239 | mutex_lock(&tsg->ch_list_lock); | ||
2240 | /* add runnable channels bound to this TSG */ | ||
2241 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2242 | if (!test_bit(ch->hw_chid, | ||
2243 | runlist->active_channels)) | ||
2244 | continue; | ||
2245 | |||
2246 | if (!(*entries_left)) { | ||
2247 | mutex_unlock(&tsg->ch_list_lock); | ||
2248 | return NULL; | ||
2249 | } | ||
2250 | |||
2251 | gk20a_dbg_info("add channel %d to runlist", | ||
2252 | ch->hw_chid); | ||
2253 | runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid); | ||
2184 | runlist_entry[1] = 0; | 2254 | runlist_entry[1] = 0; |
2185 | runlist_entry += 2; | 2255 | runlist_entry += 2; |
2186 | count++; | 2256 | count++; |
2257 | (*entries_left)--; | ||
2187 | } | 2258 | } |
2259 | mutex_unlock(&tsg->ch_list_lock); | ||
2188 | } | 2260 | } |
2189 | 2261 | ||
2190 | return count; | 2262 | /* append entries from higher level if this level is empty */ |
2263 | if (!count && !last_level) | ||
2264 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2265 | runlist, | ||
2266 | cur_level + 1, | ||
2267 | runlist_entry, | ||
2268 | interleave_enabled, | ||
2269 | true, | ||
2270 | entries_left); | ||
2271 | |||
2272 | /* | ||
2273 | * if previous and this level have entries, append | ||
2274 | * entries from higher level. | ||
2275 | * | ||
2276 | * ex. dropping from MEDIUM to LOW, need to insert HIGH | ||
2277 | */ | ||
2278 | if (interleave_enabled && count && !prev_empty && !last_level) | ||
2279 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2280 | runlist, | ||
2281 | cur_level + 1, | ||
2282 | runlist_entry, | ||
2283 | interleave_enabled, | ||
2284 | false, | ||
2285 | entries_left); | ||
2286 | return runlist_entry; | ||
2287 | } | ||
2288 | |||
2289 | int gk20a_fifo_set_runlist_interleave(struct gk20a *g, | ||
2290 | u32 id, | ||
2291 | bool is_tsg, | ||
2292 | u32 runlist_id, | ||
2293 | u32 new_level) | ||
2294 | { | ||
2295 | gk20a_dbg_fn(""); | ||
2296 | |||
2297 | if (is_tsg) | ||
2298 | g->fifo.tsg[id].interleave_level = new_level; | ||
2299 | else | ||
2300 | g->fifo.channel[id].interleave_level = new_level; | ||
2301 | |||
2302 | return 0; | ||
2191 | } | 2303 | } |
2192 | 2304 | ||
2193 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | 2305 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, |
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2198 | struct fifo_gk20a *f = &g->fifo; | 2310 | struct fifo_gk20a *f = &g->fifo; |
2199 | struct fifo_runlist_info_gk20a *runlist = NULL; | 2311 | struct fifo_runlist_info_gk20a *runlist = NULL; |
2200 | u32 *runlist_entry_base = NULL; | 2312 | u32 *runlist_entry_base = NULL; |
2201 | u32 *runlist_entry = NULL; | ||
2202 | u64 runlist_iova; | 2313 | u64 runlist_iova; |
2203 | u32 old_buf, new_buf; | 2314 | u32 old_buf, new_buf; |
2204 | u32 chid, tsgid; | ||
2205 | struct channel_gk20a *ch = NULL; | 2315 | struct channel_gk20a *ch = NULL; |
2206 | struct tsg_gk20a *tsg = NULL; | 2316 | struct tsg_gk20a *tsg = NULL; |
2207 | u32 count = 0; | 2317 | u32 count = 0; |
2208 | u32 count_channels_in_tsg; | ||
2209 | runlist = &f->runlist_info[runlist_id]; | 2318 | runlist = &f->runlist_info[runlist_id]; |
2210 | 2319 | ||
2211 | /* valid channel, add/remove it from active list. | 2320 | /* valid channel, add/remove it from active list. |
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2254 | 2363 | ||
2255 | if (hw_chid != ~0 || /* add/remove a valid channel */ | 2364 | if (hw_chid != ~0 || /* add/remove a valid channel */ |
2256 | add /* resume to add all channels back */) { | 2365 | add /* resume to add all channels back */) { |
2257 | runlist_entry = runlist_entry_base; | 2366 | u32 max_entries = f->num_runlist_entries; |
2258 | 2367 | u32 *runlist_end; | |
2259 | /* Runlist manipulation: | ||
2260 | Insert an entry of all high priority channels inbetween | ||
2261 | all lower priority channels. This ensure that the maximum | ||
2262 | delay a runnable high priority channel has to wait is one | ||
2263 | medium timeslice + any context switching overhead + | ||
2264 | wait on other high priority channels. | ||
2265 | add non-TSG channels first */ | ||
2266 | for_each_set_bit(chid, | ||
2267 | runlist->active_channels, f->num_channels) { | ||
2268 | ch = &f->channel[chid]; | ||
2269 | |||
2270 | if (!gk20a_is_channel_marked_as_tsg(ch) && | ||
2271 | !ch->interleave) { | ||
2272 | u32 added; | ||
2273 | |||
2274 | gk20a_dbg_info("add normal prio channel %d to runlist", | ||
2275 | chid); | ||
2276 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | ||
2277 | runlist_entry[1] = 0; | ||
2278 | runlist_entry += 2; | ||
2279 | count++; | ||
2280 | |||
2281 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2282 | f, | ||
2283 | runlist, | ||
2284 | runlist_entry); | ||
2285 | count += added; | ||
2286 | runlist_entry += 2 * added; | ||
2287 | } | ||
2288 | } | ||
2289 | 2368 | ||
2290 | /* if there were no lower priority channels, then just | 2369 | runlist_end = gk20a_runlist_construct_locked(f, |
2291 | * add the high priority channels once. */ | 2370 | runlist, |
2292 | if (count == 0) { | 2371 | 0, |
2293 | count = gk20a_fifo_runlist_add_high_prio_entries( | 2372 | runlist_entry_base, |
2294 | f, | 2373 | g->runlist_interleave, |
2295 | runlist, | 2374 | true, |
2296 | runlist_entry); | 2375 | &max_entries); |
2297 | runlist_entry += 2 * count; | 2376 | if (!runlist_end) { |
2377 | ret = -E2BIG; | ||
2378 | goto clean_up; | ||
2298 | } | 2379 | } |
2299 | 2380 | ||
2300 | /* now add TSG entries and channels bound to TSG */ | 2381 | count = (runlist_end - runlist_entry_base) / 2; |
2301 | mutex_lock(&f->tsg_inuse_mutex); | 2382 | WARN_ON(count > f->num_runlist_entries); |
2302 | for_each_set_bit(tsgid, | ||
2303 | runlist->active_tsgs, f->num_channels) { | ||
2304 | u32 added; | ||
2305 | tsg = &f->tsg[tsgid]; | ||
2306 | /* add TSG entry */ | ||
2307 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2308 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2309 | runlist_entry[1] = 0; | ||
2310 | runlist_entry += 2; | ||
2311 | count++; | ||
2312 | |||
2313 | /* add runnable channels bound to this TSG */ | ||
2314 | count_channels_in_tsg = 0; | ||
2315 | mutex_lock(&tsg->ch_list_lock); | ||
2316 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2317 | if (!test_bit(ch->hw_chid, | ||
2318 | runlist->active_channels)) | ||
2319 | continue; | ||
2320 | gk20a_dbg_info("add channel %d to runlist", | ||
2321 | ch->hw_chid); | ||
2322 | runlist_entry[0] = | ||
2323 | ram_rl_entry_chid_f(ch->hw_chid); | ||
2324 | runlist_entry[1] = 0; | ||
2325 | runlist_entry += 2; | ||
2326 | count++; | ||
2327 | count_channels_in_tsg++; | ||
2328 | } | ||
2329 | mutex_unlock(&tsg->ch_list_lock); | ||
2330 | |||
2331 | WARN_ON(tsg->num_active_channels != | ||
2332 | count_channels_in_tsg); | ||
2333 | |||
2334 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2335 | f, | ||
2336 | runlist, | ||
2337 | runlist_entry); | ||
2338 | count += added; | ||
2339 | runlist_entry += 2 * added; | ||
2340 | } | ||
2341 | mutex_unlock(&f->tsg_inuse_mutex); | ||
2342 | } else /* suspend to remove all channels */ | 2383 | } else /* suspend to remove all channels */ |
2343 | count = 0; | 2384 | count = 0; |
2344 | 2385 | ||
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
2493 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 2534 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
2494 | } | 2535 | } |
2495 | 2536 | ||
2496 | int gk20a_fifo_set_channel_priority( | ||
2497 | struct gk20a *g, | ||
2498 | u32 runlist_id, | ||
2499 | u32 hw_chid, | ||
2500 | bool interleave) | ||
2501 | { | ||
2502 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
2503 | struct fifo_gk20a *f = &g->fifo; | ||
2504 | struct channel_gk20a *ch = NULL; | ||
2505 | |||
2506 | if (hw_chid >= f->num_channels) | ||
2507 | return -EINVAL; | ||
2508 | |||
2509 | if (runlist_id >= f->max_runlists) | ||
2510 | return -EINVAL; | ||
2511 | |||
2512 | ch = &f->channel[hw_chid]; | ||
2513 | |||
2514 | gk20a_dbg_fn(""); | ||
2515 | |||
2516 | runlist = &f->runlist_info[runlist_id]; | ||
2517 | |||
2518 | mutex_lock(&runlist->mutex); | ||
2519 | |||
2520 | if (ch->interleave) | ||
2521 | set_bit(hw_chid, runlist->high_prio_channels); | ||
2522 | else | ||
2523 | clear_bit(hw_chid, runlist->high_prio_channels); | ||
2524 | |||
2525 | gk20a_dbg_fn("done"); | ||
2526 | |||
2527 | mutex_unlock(&runlist->mutex); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2532 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | 2537 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, |
2533 | u32 hw_chid) | 2538 | u32 hw_chid) |
2534 | { | 2539 | { |
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops) | |||
2545 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | 2550 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; |
2546 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; | 2551 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; |
2547 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; | 2552 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; |
2553 | gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; | ||
2548 | } | 2554 | } |