diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 133 |
1 files changed, 123 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5c99877b..ca5c0ee6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -303,7 +303,13 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
303 | if (!runlist->active_tsgs) | 303 | if (!runlist->active_tsgs) |
304 | goto clean_up_runlist_info; | 304 | goto clean_up_runlist_info; |
305 | 305 | ||
306 | runlist_size = ram_rl_entry_size_v() * f->num_channels; | 306 | runlist->high_prio_channels = |
307 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
308 | GFP_KERNEL); | ||
309 | if (!runlist->high_prio_channels) | ||
310 | goto clean_up_runlist_info; | ||
311 | |||
312 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; | ||
307 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 313 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
308 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 314 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
309 | if (err) { | 315 | if (err) { |
@@ -324,10 +330,16 @@ clean_up_runlist: | |||
324 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) | 330 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
325 | gk20a_gmmu_free(g, &runlist->mem[i]); | 331 | gk20a_gmmu_free(g, &runlist->mem[i]); |
326 | 332 | ||
333 | clean_up_runlist_info: | ||
327 | kfree(runlist->active_channels); | 334 | kfree(runlist->active_channels); |
328 | runlist->active_channels = NULL; | 335 | runlist->active_channels = NULL; |
329 | 336 | ||
330 | clean_up_runlist_info: | 337 | kfree(runlist->active_tsgs); |
338 | runlist->active_tsgs = NULL; | ||
339 | |||
340 | kfree(runlist->high_prio_channels); | ||
341 | runlist->high_prio_channels = NULL; | ||
342 | |||
331 | kfree(f->runlist_info); | 343 | kfree(f->runlist_info); |
332 | f->runlist_info = NULL; | 344 | f->runlist_info = NULL; |
333 | 345 | ||
@@ -483,6 +495,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
483 | gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ | 495 | gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ |
484 | 496 | ||
485 | f->num_channels = g->ops.fifo.get_num_fifos(g); | 497 | f->num_channels = g->ops.fifo.get_num_fifos(g); |
498 | f->num_runlist_entries = fifo_eng_runlist_length_max_v(); | ||
486 | f->num_pbdma = proj_host_num_pbdma_v(); | 499 | f->num_pbdma = proj_host_num_pbdma_v(); |
487 | f->max_engines = ENGINE_INVAL_GK20A; | 500 | f->max_engines = ENGINE_INVAL_GK20A; |
488 | 501 | ||
@@ -2149,6 +2162,34 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) | |||
2149 | return runlist_entry_0; | 2162 | return runlist_entry_0; |
2150 | } | 2163 | } |
2151 | 2164 | ||
2165 | /* add all active high priority channels */ | ||
2166 | static inline u32 gk20a_fifo_runlist_add_high_prio_entries( | ||
2167 | struct fifo_gk20a *f, | ||
2168 | struct fifo_runlist_info_gk20a *runlist, | ||
2169 | u32 *runlist_entry) | ||
2170 | { | ||
2171 | struct channel_gk20a *ch = NULL; | ||
2172 | unsigned long high_prio_chid; | ||
2173 | u32 count = 0; | ||
2174 | |||
2175 | for_each_set_bit(high_prio_chid, | ||
2176 | runlist->high_prio_channels, f->num_channels) { | ||
2177 | ch = &f->channel[high_prio_chid]; | ||
2178 | |||
2179 | if (!gk20a_is_channel_marked_as_tsg(ch) && | ||
2180 | test_bit(high_prio_chid, runlist->active_channels) == 1) { | ||
2181 | gk20a_dbg_info("add high prio channel %lu to runlist", | ||
2182 | high_prio_chid); | ||
2183 | runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); | ||
2184 | runlist_entry[1] = 0; | ||
2185 | runlist_entry += 2; | ||
2186 | count++; | ||
2187 | } | ||
2188 | } | ||
2189 | |||
2190 | return count; | ||
2191 | } | ||
2192 | |||
2152 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | 2193 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, |
2153 | u32 hw_chid, bool add, | 2194 | u32 hw_chid, bool add, |
2154 | bool wait_for_finish) | 2195 | bool wait_for_finish) |
@@ -2158,7 +2199,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2158 | struct fifo_runlist_info_gk20a *runlist = NULL; | 2199 | struct fifo_runlist_info_gk20a *runlist = NULL; |
2159 | u32 *runlist_entry_base = NULL; | 2200 | u32 *runlist_entry_base = NULL; |
2160 | u32 *runlist_entry = NULL; | 2201 | u32 *runlist_entry = NULL; |
2161 | phys_addr_t runlist_pa; | 2202 | u64 runlist_iova; |
2162 | u32 old_buf, new_buf; | 2203 | u32 old_buf, new_buf; |
2163 | u32 chid, tsgid; | 2204 | u32 chid, tsgid; |
2164 | struct channel_gk20a *ch = NULL; | 2205 | struct channel_gk20a *ch = NULL; |
@@ -2194,11 +2235,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2194 | old_buf = runlist->cur_buffer; | 2235 | old_buf = runlist->cur_buffer; |
2195 | new_buf = !runlist->cur_buffer; | 2236 | new_buf = !runlist->cur_buffer; |
2196 | 2237 | ||
2238 | runlist_iova = g->ops.mm.get_iova_addr( | ||
2239 | g, runlist->mem[new_buf].sgt->sgl, 0); | ||
2240 | |||
2197 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", | 2241 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", |
2198 | runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf])); | 2242 | runlist_id, (u64)runlist_iova); |
2199 | 2243 | ||
2200 | runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]); | 2244 | if (!runlist_iova) { |
2201 | if (!runlist_pa) { | ||
2202 | ret = -EINVAL; | 2245 | ret = -EINVAL; |
2203 | goto clean_up; | 2246 | goto clean_up; |
2204 | } | 2247 | } |
@@ -2213,25 +2256,52 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2213 | add /* resume to add all channels back */) { | 2256 | add /* resume to add all channels back */) { |
2214 | runlist_entry = runlist_entry_base; | 2257 | runlist_entry = runlist_entry_base; |
2215 | 2258 | ||
2216 | /* add non-TSG channels first */ | 2259 | /* Runlist manipulation: |
2260 | Insert an entry of all high priority channels inbetween | ||
2261 | all lower priority channels. This ensure that the maximum | ||
2262 | delay a runnable high priority channel has to wait is one | ||
2263 | medium timeslice + any context switching overhead + | ||
2264 | wait on other high priority channels. | ||
2265 | add non-TSG channels first */ | ||
2217 | for_each_set_bit(chid, | 2266 | for_each_set_bit(chid, |
2218 | runlist->active_channels, f->num_channels) { | 2267 | runlist->active_channels, f->num_channels) { |
2219 | ch = &f->channel[chid]; | 2268 | ch = &f->channel[chid]; |
2220 | 2269 | ||
2221 | if (!gk20a_is_channel_marked_as_tsg(ch)) { | 2270 | if (!gk20a_is_channel_marked_as_tsg(ch) && |
2222 | gk20a_dbg_info("add channel %d to runlist", | 2271 | !ch->interleave) { |
2272 | u32 added; | ||
2273 | |||
2274 | gk20a_dbg_info("add normal prio channel %d to runlist", | ||
2223 | chid); | 2275 | chid); |
2224 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | 2276 | runlist_entry[0] = ram_rl_entry_chid_f(chid); |
2225 | runlist_entry[1] = 0; | 2277 | runlist_entry[1] = 0; |
2226 | runlist_entry += 2; | 2278 | runlist_entry += 2; |
2227 | count++; | 2279 | count++; |
2280 | |||
2281 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2282 | f, | ||
2283 | runlist, | ||
2284 | runlist_entry); | ||
2285 | count += added; | ||
2286 | runlist_entry += 2 * added; | ||
2228 | } | 2287 | } |
2229 | } | 2288 | } |
2230 | 2289 | ||
2290 | /* if there were no lower priority channels, then just | ||
2291 | * add the high priority channels once. */ | ||
2292 | if (count == 0) { | ||
2293 | count = gk20a_fifo_runlist_add_high_prio_entries( | ||
2294 | f, | ||
2295 | runlist, | ||
2296 | runlist_entry); | ||
2297 | runlist_entry += 2 * count; | ||
2298 | } | ||
2299 | |||
2231 | /* now add TSG entries and channels bound to TSG */ | 2300 | /* now add TSG entries and channels bound to TSG */ |
2232 | mutex_lock(&f->tsg_inuse_mutex); | 2301 | mutex_lock(&f->tsg_inuse_mutex); |
2233 | for_each_set_bit(tsgid, | 2302 | for_each_set_bit(tsgid, |
2234 | runlist->active_tsgs, f->num_channels) { | 2303 | runlist->active_tsgs, f->num_channels) { |
2304 | u32 added; | ||
2235 | tsg = &f->tsg[tsgid]; | 2305 | tsg = &f->tsg[tsgid]; |
2236 | /* add TSG entry */ | 2306 | /* add TSG entry */ |
2237 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | 2307 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); |
@@ -2260,6 +2330,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2260 | 2330 | ||
2261 | WARN_ON(tsg->num_active_channels != | 2331 | WARN_ON(tsg->num_active_channels != |
2262 | count_channels_in_tsg); | 2332 | count_channels_in_tsg); |
2333 | |||
2334 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2335 | f, | ||
2336 | runlist, | ||
2337 | runlist_entry); | ||
2338 | count += added; | ||
2339 | runlist_entry += 2 * added; | ||
2263 | } | 2340 | } |
2264 | mutex_unlock(&f->tsg_inuse_mutex); | 2341 | mutex_unlock(&f->tsg_inuse_mutex); |
2265 | } else /* suspend to remove all channels */ | 2342 | } else /* suspend to remove all channels */ |
@@ -2267,7 +2344,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2267 | 2344 | ||
2268 | if (count != 0) { | 2345 | if (count != 0) { |
2269 | gk20a_writel(g, fifo_runlist_base_r(), | 2346 | gk20a_writel(g, fifo_runlist_base_r(), |
2270 | fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) | | 2347 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | |
2271 | fifo_runlist_base_target_vid_mem_f()); | 2348 | fifo_runlist_base_target_vid_mem_f()); |
2272 | } | 2349 | } |
2273 | 2350 | ||
@@ -2416,6 +2493,42 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
2416 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 2493 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
2417 | } | 2494 | } |
2418 | 2495 | ||
2496 | int gk20a_fifo_set_channel_priority( | ||
2497 | struct gk20a *g, | ||
2498 | u32 runlist_id, | ||
2499 | u32 hw_chid, | ||
2500 | bool interleave) | ||
2501 | { | ||
2502 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
2503 | struct fifo_gk20a *f = &g->fifo; | ||
2504 | struct channel_gk20a *ch = NULL; | ||
2505 | |||
2506 | if (hw_chid >= f->num_channels) | ||
2507 | return -EINVAL; | ||
2508 | |||
2509 | if (runlist_id >= f->max_runlists) | ||
2510 | return -EINVAL; | ||
2511 | |||
2512 | ch = &f->channel[hw_chid]; | ||
2513 | |||
2514 | gk20a_dbg_fn(""); | ||
2515 | |||
2516 | runlist = &f->runlist_info[runlist_id]; | ||
2517 | |||
2518 | mutex_lock(&runlist->mutex); | ||
2519 | |||
2520 | if (ch->interleave) | ||
2521 | set_bit(hw_chid, runlist->high_prio_channels); | ||
2522 | else | ||
2523 | clear_bit(hw_chid, runlist->high_prio_channels); | ||
2524 | |||
2525 | gk20a_dbg_fn("done"); | ||
2526 | |||
2527 | mutex_unlock(&runlist->mutex); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2419 | void gk20a_init_fifo(struct gpu_ops *gops) | 2532 | void gk20a_init_fifo(struct gpu_ops *gops) |
2420 | { | 2533 | { |
2421 | gk20a_init_channel(gops); | 2534 | gk20a_init_channel(gops); |