summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2016-02-22 12:35:49 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-15 19:23:44 -0400
commit2a58d3c27b45ca9d0d9dc2136377b7a41b9ed82d (patch)
tree9d7464bfd0eea8e4b65f591996db59a98f4070e2 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parentf07a046a52e7a8074bd1572a12ac65747d3f827d (diff)
gpu: nvgpu: improve channel interleave support
Previously, only "high" priority bare channels were interleaved between all other bare channels and TSGs. This patch decouples priority from interleaving and introduces 3 levels for interleaving a bare channel or TSG: high, medium, and low. The levels define the number of times a channel or TSG will appear on a runlist (see nvgpu.h for details). By default, all bare channels and TSGs are set to interleave level low. Userspace can then request the interleave level to be increased via the CHANNEL_SET_RUNLIST_INTERLEAVE ioctl (TSG-specific ioctl will be added later). As timeslice settings will soon be coming from userspace, the default timeslice for "high" priority channels has been restored. JIRA VFND-1302 Bug 1729664 Change-Id: I178bc1cecda23f5002fec6d791e6dcaedfa05c0c Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/1014962 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c300
1 files changed, 153 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..28cc3086 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
303 if (!runlist->active_tsgs) 303 if (!runlist->active_tsgs)
304 goto clean_up_runlist_info; 304 goto clean_up_runlist_info;
305 305
306 runlist->high_prio_channels =
307 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
308 GFP_KERNEL);
309 if (!runlist->high_prio_channels)
310 goto clean_up_runlist_info;
311
312 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; 306 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries;
313 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 307 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
314 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 308 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
337 kfree(runlist->active_tsgs); 331 kfree(runlist->active_tsgs);
338 runlist->active_tsgs = NULL; 332 runlist->active_tsgs = NULL;
339 333
340 kfree(runlist->high_prio_channels);
341 runlist->high_prio_channels = NULL;
342
343 kfree(f->runlist_info); 334 kfree(f->runlist_info);
344 f->runlist_info = NULL; 335 f->runlist_info = NULL;
345 336
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
2162 return runlist_entry_0; 2153 return runlist_entry_0;
2163} 2154}
2164 2155
2165/* add all active high priority channels */ 2156/* recursively construct a runlist with interleaved bare channels and TSGs */
2166static inline u32 gk20a_fifo_runlist_add_high_prio_entries( 2157static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
2167 struct fifo_gk20a *f, 2158 struct fifo_runlist_info_gk20a *runlist,
2168 struct fifo_runlist_info_gk20a *runlist, 2159 u32 cur_level,
2169 u32 *runlist_entry) 2160 u32 *runlist_entry,
2161 bool interleave_enabled,
2162 bool prev_empty,
2163 u32 *entries_left)
2170{ 2164{
2171 struct channel_gk20a *ch = NULL; 2165 bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
2172 unsigned long high_prio_chid; 2166 struct channel_gk20a *ch;
2173 u32 count = 0; 2167 bool skip_next = false;
2168 u32 chid, tsgid, count = 0;
2169
2170 gk20a_dbg_fn("");
2174 2171
2175 for_each_set_bit(high_prio_chid, 2172 /* for each bare channel, CH, on this level, insert all higher-level
2176 runlist->high_prio_channels, f->num_channels) { 2173 channels and TSGs before inserting CH. */
2177 ch = &f->channel[high_prio_chid]; 2174 for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
2175 ch = &f->channel[chid];
2176
2177 if (ch->interleave_level != cur_level)
2178 continue;
2179
2180 if (gk20a_is_channel_marked_as_tsg(ch))
2181 continue;
2182
2183 if (!last_level && !skip_next) {
2184 runlist_entry = gk20a_runlist_construct_locked(f,
2185 runlist,
2186 cur_level + 1,
2187 runlist_entry,
2188 interleave_enabled,
2189 false,
2190 entries_left);
2191 /* if interleaving is disabled, higher-level channels
2192 and TSGs only need to be inserted once */
2193 if (!interleave_enabled)
2194 skip_next = true;
2195 }
2196
2197 if (!(*entries_left))
2198 return NULL;
2199
2200 gk20a_dbg_info("add channel %d to runlist", chid);
2201 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2202 runlist_entry[1] = 0;
2203 runlist_entry += 2;
2204 count++;
2205 (*entries_left)--;
2206 }
2178 2207
2179 if (!gk20a_is_channel_marked_as_tsg(ch) && 2208 /* for each TSG, T, on this level, insert all higher-level channels
2180 test_bit(high_prio_chid, runlist->active_channels) == 1) { 2209 and TSGs before inserting T. */
2181 gk20a_dbg_info("add high prio channel %lu to runlist", 2210 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
2182 high_prio_chid); 2211 struct tsg_gk20a *tsg = &f->tsg[tsgid];
2183 runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); 2212
2213 if (tsg->interleave_level != cur_level)
2214 continue;
2215
2216 if (!last_level && !skip_next) {
2217 runlist_entry = gk20a_runlist_construct_locked(f,
2218 runlist,
2219 cur_level + 1,
2220 runlist_entry,
2221 interleave_enabled,
2222 false,
2223 entries_left);
2224 if (!interleave_enabled)
2225 skip_next = true;
2226 }
2227
2228 if (!(*entries_left))
2229 return NULL;
2230
2231 /* add TSG entry */
2232 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2233 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2234 runlist_entry[1] = 0;
2235 runlist_entry += 2;
2236 count++;
2237 (*entries_left)--;
2238
2239 mutex_lock(&tsg->ch_list_lock);
2240 /* add runnable channels bound to this TSG */
2241 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2242 if (!test_bit(ch->hw_chid,
2243 runlist->active_channels))
2244 continue;
2245
2246 if (!(*entries_left)) {
2247 mutex_unlock(&tsg->ch_list_lock);
2248 return NULL;
2249 }
2250
2251 gk20a_dbg_info("add channel %d to runlist",
2252 ch->hw_chid);
2253 runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
2184 runlist_entry[1] = 0; 2254 runlist_entry[1] = 0;
2185 runlist_entry += 2; 2255 runlist_entry += 2;
2186 count++; 2256 count++;
2257 (*entries_left)--;
2187 } 2258 }
2259 mutex_unlock(&tsg->ch_list_lock);
2188 } 2260 }
2189 2261
2190 return count; 2262 /* append entries from higher level if this level is empty */
2263 if (!count && !last_level)
2264 runlist_entry = gk20a_runlist_construct_locked(f,
2265 runlist,
2266 cur_level + 1,
2267 runlist_entry,
2268 interleave_enabled,
2269 true,
2270 entries_left);
2271
2272 /*
2273 * if previous and this level have entries, append
2274 * entries from higher level.
2275 *
2276 * ex. dropping from MEDIUM to LOW, need to insert HIGH
2277 */
2278 if (interleave_enabled && count && !prev_empty && !last_level)
2279 runlist_entry = gk20a_runlist_construct_locked(f,
2280 runlist,
2281 cur_level + 1,
2282 runlist_entry,
2283 interleave_enabled,
2284 false,
2285 entries_left);
2286 return runlist_entry;
2287}
2288
2289int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
2290 u32 id,
2291 bool is_tsg,
2292 u32 runlist_id,
2293 u32 new_level)
2294{
2295 gk20a_dbg_fn("");
2296
2297 if (is_tsg)
2298 g->fifo.tsg[id].interleave_level = new_level;
2299 else
2300 g->fifo.channel[id].interleave_level = new_level;
2301
2302 return 0;
2191} 2303}
2192 2304
2193static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 2305static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2198 struct fifo_gk20a *f = &g->fifo; 2310 struct fifo_gk20a *f = &g->fifo;
2199 struct fifo_runlist_info_gk20a *runlist = NULL; 2311 struct fifo_runlist_info_gk20a *runlist = NULL;
2200 u32 *runlist_entry_base = NULL; 2312 u32 *runlist_entry_base = NULL;
2201 u32 *runlist_entry = NULL;
2202 u64 runlist_iova; 2313 u64 runlist_iova;
2203 u32 old_buf, new_buf; 2314 u32 old_buf, new_buf;
2204 u32 chid, tsgid;
2205 struct channel_gk20a *ch = NULL; 2315 struct channel_gk20a *ch = NULL;
2206 struct tsg_gk20a *tsg = NULL; 2316 struct tsg_gk20a *tsg = NULL;
2207 u32 count = 0; 2317 u32 count = 0;
2208 u32 count_channels_in_tsg;
2209 runlist = &f->runlist_info[runlist_id]; 2318 runlist = &f->runlist_info[runlist_id];
2210 2319
2211 /* valid channel, add/remove it from active list. 2320 /* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2254 2363
2255 if (hw_chid != ~0 || /* add/remove a valid channel */ 2364 if (hw_chid != ~0 || /* add/remove a valid channel */
2256 add /* resume to add all channels back */) { 2365 add /* resume to add all channels back */) {
2257 runlist_entry = runlist_entry_base; 2366 u32 max_entries = f->num_runlist_entries;
2258 2367 u32 *runlist_end;
2259 /* Runlist manipulation:
2260 Insert an entry of all high priority channels inbetween
2261 all lower priority channels. This ensure that the maximum
2262 delay a runnable high priority channel has to wait is one
2263 medium timeslice + any context switching overhead +
2264 wait on other high priority channels.
2265 add non-TSG channels first */
2266 for_each_set_bit(chid,
2267 runlist->active_channels, f->num_channels) {
2268 ch = &f->channel[chid];
2269
2270 if (!gk20a_is_channel_marked_as_tsg(ch) &&
2271 !ch->interleave) {
2272 u32 added;
2273
2274 gk20a_dbg_info("add normal prio channel %d to runlist",
2275 chid);
2276 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2277 runlist_entry[1] = 0;
2278 runlist_entry += 2;
2279 count++;
2280
2281 added = gk20a_fifo_runlist_add_high_prio_entries(
2282 f,
2283 runlist,
2284 runlist_entry);
2285 count += added;
2286 runlist_entry += 2 * added;
2287 }
2288 }
2289 2368
2290 /* if there were no lower priority channels, then just 2369 runlist_end = gk20a_runlist_construct_locked(f,
2291 * add the high priority channels once. */ 2370 runlist,
2292 if (count == 0) { 2371 0,
2293 count = gk20a_fifo_runlist_add_high_prio_entries( 2372 runlist_entry_base,
2294 f, 2373 g->runlist_interleave,
2295 runlist, 2374 true,
2296 runlist_entry); 2375 &max_entries);
2297 runlist_entry += 2 * count; 2376 if (!runlist_end) {
2377 ret = -E2BIG;
2378 goto clean_up;
2298 } 2379 }
2299 2380
2300 /* now add TSG entries and channels bound to TSG */ 2381 count = (runlist_end - runlist_entry_base) / 2;
2301 mutex_lock(&f->tsg_inuse_mutex); 2382 WARN_ON(count > f->num_runlist_entries);
2302 for_each_set_bit(tsgid,
2303 runlist->active_tsgs, f->num_channels) {
2304 u32 added;
2305 tsg = &f->tsg[tsgid];
2306 /* add TSG entry */
2307 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2308 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2309 runlist_entry[1] = 0;
2310 runlist_entry += 2;
2311 count++;
2312
2313 /* add runnable channels bound to this TSG */
2314 count_channels_in_tsg = 0;
2315 mutex_lock(&tsg->ch_list_lock);
2316 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2317 if (!test_bit(ch->hw_chid,
2318 runlist->active_channels))
2319 continue;
2320 gk20a_dbg_info("add channel %d to runlist",
2321 ch->hw_chid);
2322 runlist_entry[0] =
2323 ram_rl_entry_chid_f(ch->hw_chid);
2324 runlist_entry[1] = 0;
2325 runlist_entry += 2;
2326 count++;
2327 count_channels_in_tsg++;
2328 }
2329 mutex_unlock(&tsg->ch_list_lock);
2330
2331 WARN_ON(tsg->num_active_channels !=
2332 count_channels_in_tsg);
2333
2334 added = gk20a_fifo_runlist_add_high_prio_entries(
2335 f,
2336 runlist,
2337 runlist_entry);
2338 count += added;
2339 runlist_entry += 2 * added;
2340 }
2341 mutex_unlock(&f->tsg_inuse_mutex);
2342 } else /* suspend to remove all channels */ 2383 } else /* suspend to remove all channels */
2343 count = 0; 2384 count = 0;
2344 2385
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
2493 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); 2534 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
2494} 2535}
2495 2536
2496int gk20a_fifo_set_channel_priority(
2497 struct gk20a *g,
2498 u32 runlist_id,
2499 u32 hw_chid,
2500 bool interleave)
2501{
2502 struct fifo_runlist_info_gk20a *runlist = NULL;
2503 struct fifo_gk20a *f = &g->fifo;
2504 struct channel_gk20a *ch = NULL;
2505
2506 if (hw_chid >= f->num_channels)
2507 return -EINVAL;
2508
2509 if (runlist_id >= f->max_runlists)
2510 return -EINVAL;
2511
2512 ch = &f->channel[hw_chid];
2513
2514 gk20a_dbg_fn("");
2515
2516 runlist = &f->runlist_info[runlist_id];
2517
2518 mutex_lock(&runlist->mutex);
2519
2520 if (ch->interleave)
2521 set_bit(hw_chid, runlist->high_prio_channels);
2522 else
2523 clear_bit(hw_chid, runlist->high_prio_channels);
2524
2525 gk20a_dbg_fn("done");
2526
2527 mutex_unlock(&runlist->mutex);
2528
2529 return 0;
2530}
2531
2532struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 2537struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
2533 u32 hw_chid) 2538 u32 hw_chid)
2534{ 2539{
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
2545 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 2550 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
2546 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; 2551 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
2547 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 2552 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
2553 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
2548} 2554}