diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 50 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 133 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 16 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_fifo_gm20b.h | 4 |
8 files changed, 210 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a5c2efb3..0421c0f6 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -175,7 +175,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
175 | } | 175 | } |
176 | 176 | ||
177 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | 177 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, |
178 | u32 timeslice_period) | 178 | u32 timeslice_period, bool interleave) |
179 | { | 179 | { |
180 | void *inst_ptr; | 180 | void *inst_ptr; |
181 | int shift = 0, value = 0; | 181 | int shift = 0, value = 0; |
@@ -203,6 +203,30 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
203 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | 203 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | |
204 | ccsr_channel_enable_set_true_f()); | 204 | ccsr_channel_enable_set_true_f()); |
205 | 205 | ||
206 | if (c->interleave != interleave) { | ||
207 | mutex_lock(&c->g->interleave_lock); | ||
208 | c->interleave = interleave; | ||
209 | if (interleave) | ||
210 | if (c->g->num_interleaved_channels >= | ||
211 | MAX_INTERLEAVED_CHANNELS) { | ||
212 | gk20a_err(dev_from_gk20a(c->g), | ||
213 | "Change of priority would exceed runlist length, only changing timeslice\n"); | ||
214 | c->interleave = false; | ||
215 | } else | ||
216 | c->g->num_interleaved_channels += 1; | ||
217 | else | ||
218 | c->g->num_interleaved_channels -= 1; | ||
219 | |||
220 | mutex_unlock(&c->g->interleave_lock); | ||
221 | gk20a_dbg_info("Set channel %d to interleave %d", | ||
222 | c->hw_chid, c->interleave); | ||
223 | |||
224 | gk20a_fifo_set_channel_priority( | ||
225 | c->g, 0, c->hw_chid, c->interleave); | ||
226 | c->g->ops.fifo.update_runlist( | ||
227 | c->g, 0, ~0, true, false); | ||
228 | } | ||
229 | |||
206 | return 0; | 230 | return 0; |
207 | } | 231 | } |
208 | 232 | ||
@@ -836,6 +860,17 @@ static void gk20a_free_channel(struct channel_gk20a *ch) | |||
836 | } | 860 | } |
837 | mutex_unlock(&f->deferred_reset_mutex); | 861 | mutex_unlock(&f->deferred_reset_mutex); |
838 | 862 | ||
863 | if (ch->interleave) { | ||
864 | ch->interleave = false; | ||
865 | gk20a_fifo_set_channel_priority( | ||
866 | ch->g, 0, ch->hw_chid, ch->interleave); | ||
867 | |||
868 | mutex_lock(&f->g->interleave_lock); | ||
869 | WARN_ON(f->g->num_interleaved_channels == 0); | ||
870 | f->g->num_interleaved_channels -= 1; | ||
871 | mutex_unlock(&f->g->interleave_lock); | ||
872 | } | ||
873 | |||
839 | if (!ch->bound) | 874 | if (!ch->bound) |
840 | goto release; | 875 | goto release; |
841 | 876 | ||
@@ -1079,6 +1114,10 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | |||
1079 | ch->timeout_debug_dump = true; | 1114 | ch->timeout_debug_dump = true; |
1080 | ch->has_timedout = false; | 1115 | ch->has_timedout = false; |
1081 | ch->obj_class = 0; | 1116 | ch->obj_class = 0; |
1117 | ch->interleave = false; | ||
1118 | gk20a_fifo_set_channel_priority( | ||
1119 | ch->g, 0, ch->hw_chid, ch->interleave); | ||
1120 | |||
1082 | 1121 | ||
1083 | /* The channel is *not* runnable at this point. It still needs to have | 1122 | /* The channel is *not* runnable at this point. It still needs to have |
1084 | * an address space bound and allocate a gpfifo and grctx. */ | 1123 | * an address space bound and allocate a gpfifo and grctx. */ |
@@ -2458,6 +2497,7 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch, | |||
2458 | u32 priority) | 2497 | u32 priority) |
2459 | { | 2498 | { |
2460 | u32 timeslice_timeout; | 2499 | u32 timeslice_timeout; |
2500 | bool interleave = false; | ||
2461 | 2501 | ||
2462 | if (gk20a_is_channel_marked_as_tsg(ch)) { | 2502 | if (gk20a_is_channel_marked_as_tsg(ch)) { |
2463 | gk20a_err(dev_from_gk20a(ch->g), | 2503 | gk20a_err(dev_from_gk20a(ch->g), |
@@ -2474,15 +2514,17 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch, | |||
2474 | timeslice_timeout = ch->g->timeslice_medium_priority_us; | 2514 | timeslice_timeout = ch->g->timeslice_medium_priority_us; |
2475 | break; | 2515 | break; |
2476 | case NVGPU_PRIORITY_HIGH: | 2516 | case NVGPU_PRIORITY_HIGH: |
2517 | if (ch->g->interleave_high_priority) | ||
2518 | interleave = true; | ||
2477 | timeslice_timeout = ch->g->timeslice_high_priority_us; | 2519 | timeslice_timeout = ch->g->timeslice_high_priority_us; |
2478 | break; | 2520 | break; |
2479 | default: | 2521 | default: |
2480 | pr_err("Unsupported priority"); | 2522 | pr_err("Unsupported priority"); |
2481 | return -EINVAL; | 2523 | return -EINVAL; |
2482 | } | 2524 | } |
2483 | channel_gk20a_set_schedule_params(ch, | 2525 | |
2484 | timeslice_timeout); | 2526 | return channel_gk20a_set_schedule_params(ch, |
2485 | return 0; | 2527 | timeslice_timeout, interleave); |
2486 | } | 2528 | } |
2487 | 2529 | ||
2488 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | 2530 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index ddc517b9..91ae0e7a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -180,6 +180,9 @@ struct channel_gk20a { | |||
180 | void *update_fn_data; | 180 | void *update_fn_data; |
181 | spinlock_t update_fn_lock; /* make access to the two above atomic */ | 181 | spinlock_t update_fn_lock; /* make access to the two above atomic */ |
182 | struct work_struct update_fn_work; | 182 | struct work_struct update_fn_work; |
183 | |||
184 | /* true if channel is interleaved with lower priority channels */ | ||
185 | bool interleave; | ||
183 | }; | 186 | }; |
184 | 187 | ||
185 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | 188 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5c99877b..ca5c0ee6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -303,7 +303,13 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
303 | if (!runlist->active_tsgs) | 303 | if (!runlist->active_tsgs) |
304 | goto clean_up_runlist_info; | 304 | goto clean_up_runlist_info; |
305 | 305 | ||
306 | runlist_size = ram_rl_entry_size_v() * f->num_channels; | 306 | runlist->high_prio_channels = |
307 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
308 | GFP_KERNEL); | ||
309 | if (!runlist->high_prio_channels) | ||
310 | goto clean_up_runlist_info; | ||
311 | |||
312 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; | ||
307 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 313 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
308 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 314 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
309 | if (err) { | 315 | if (err) { |
@@ -324,10 +330,16 @@ clean_up_runlist: | |||
324 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) | 330 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
325 | gk20a_gmmu_free(g, &runlist->mem[i]); | 331 | gk20a_gmmu_free(g, &runlist->mem[i]); |
326 | 332 | ||
333 | clean_up_runlist_info: | ||
327 | kfree(runlist->active_channels); | 334 | kfree(runlist->active_channels); |
328 | runlist->active_channels = NULL; | 335 | runlist->active_channels = NULL; |
329 | 336 | ||
330 | clean_up_runlist_info: | 337 | kfree(runlist->active_tsgs); |
338 | runlist->active_tsgs = NULL; | ||
339 | |||
340 | kfree(runlist->high_prio_channels); | ||
341 | runlist->high_prio_channels = NULL; | ||
342 | |||
331 | kfree(f->runlist_info); | 343 | kfree(f->runlist_info); |
332 | f->runlist_info = NULL; | 344 | f->runlist_info = NULL; |
333 | 345 | ||
@@ -483,6 +495,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
483 | gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ | 495 | gk20a_init_fifo_pbdma_intr_descs(f); /* just filling in data/tables */ |
484 | 496 | ||
485 | f->num_channels = g->ops.fifo.get_num_fifos(g); | 497 | f->num_channels = g->ops.fifo.get_num_fifos(g); |
498 | f->num_runlist_entries = fifo_eng_runlist_length_max_v(); | ||
486 | f->num_pbdma = proj_host_num_pbdma_v(); | 499 | f->num_pbdma = proj_host_num_pbdma_v(); |
487 | f->max_engines = ENGINE_INVAL_GK20A; | 500 | f->max_engines = ENGINE_INVAL_GK20A; |
488 | 501 | ||
@@ -2149,6 +2162,34 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) | |||
2149 | return runlist_entry_0; | 2162 | return runlist_entry_0; |
2150 | } | 2163 | } |
2151 | 2164 | ||
2165 | /* add all active high priority channels */ | ||
2166 | static inline u32 gk20a_fifo_runlist_add_high_prio_entries( | ||
2167 | struct fifo_gk20a *f, | ||
2168 | struct fifo_runlist_info_gk20a *runlist, | ||
2169 | u32 *runlist_entry) | ||
2170 | { | ||
2171 | struct channel_gk20a *ch = NULL; | ||
2172 | unsigned long high_prio_chid; | ||
2173 | u32 count = 0; | ||
2174 | |||
2175 | for_each_set_bit(high_prio_chid, | ||
2176 | runlist->high_prio_channels, f->num_channels) { | ||
2177 | ch = &f->channel[high_prio_chid]; | ||
2178 | |||
2179 | if (!gk20a_is_channel_marked_as_tsg(ch) && | ||
2180 | test_bit(high_prio_chid, runlist->active_channels) == 1) { | ||
2181 | gk20a_dbg_info("add high prio channel %lu to runlist", | ||
2182 | high_prio_chid); | ||
2183 | runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); | ||
2184 | runlist_entry[1] = 0; | ||
2185 | runlist_entry += 2; | ||
2186 | count++; | ||
2187 | } | ||
2188 | } | ||
2189 | |||
2190 | return count; | ||
2191 | } | ||
2192 | |||
2152 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | 2193 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, |
2153 | u32 hw_chid, bool add, | 2194 | u32 hw_chid, bool add, |
2154 | bool wait_for_finish) | 2195 | bool wait_for_finish) |
@@ -2158,7 +2199,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2158 | struct fifo_runlist_info_gk20a *runlist = NULL; | 2199 | struct fifo_runlist_info_gk20a *runlist = NULL; |
2159 | u32 *runlist_entry_base = NULL; | 2200 | u32 *runlist_entry_base = NULL; |
2160 | u32 *runlist_entry = NULL; | 2201 | u32 *runlist_entry = NULL; |
2161 | phys_addr_t runlist_pa; | 2202 | u64 runlist_iova; |
2162 | u32 old_buf, new_buf; | 2203 | u32 old_buf, new_buf; |
2163 | u32 chid, tsgid; | 2204 | u32 chid, tsgid; |
2164 | struct channel_gk20a *ch = NULL; | 2205 | struct channel_gk20a *ch = NULL; |
@@ -2194,11 +2235,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2194 | old_buf = runlist->cur_buffer; | 2235 | old_buf = runlist->cur_buffer; |
2195 | new_buf = !runlist->cur_buffer; | 2236 | new_buf = !runlist->cur_buffer; |
2196 | 2237 | ||
2238 | runlist_iova = g->ops.mm.get_iova_addr( | ||
2239 | g, runlist->mem[new_buf].sgt->sgl, 0); | ||
2240 | |||
2197 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", | 2241 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", |
2198 | runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf])); | 2242 | runlist_id, (u64)runlist_iova); |
2199 | 2243 | ||
2200 | runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]); | 2244 | if (!runlist_iova) { |
2201 | if (!runlist_pa) { | ||
2202 | ret = -EINVAL; | 2245 | ret = -EINVAL; |
2203 | goto clean_up; | 2246 | goto clean_up; |
2204 | } | 2247 | } |
@@ -2213,25 +2256,52 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2213 | add /* resume to add all channels back */) { | 2256 | add /* resume to add all channels back */) { |
2214 | runlist_entry = runlist_entry_base; | 2257 | runlist_entry = runlist_entry_base; |
2215 | 2258 | ||
2216 | /* add non-TSG channels first */ | 2259 | /* Runlist manipulation: |
2260 | Insert an entry of all high priority channels inbetween | ||
2261 | all lower priority channels. This ensure that the maximum | ||
2262 | delay a runnable high priority channel has to wait is one | ||
2263 | medium timeslice + any context switching overhead + | ||
2264 | wait on other high priority channels. | ||
2265 | add non-TSG channels first */ | ||
2217 | for_each_set_bit(chid, | 2266 | for_each_set_bit(chid, |
2218 | runlist->active_channels, f->num_channels) { | 2267 | runlist->active_channels, f->num_channels) { |
2219 | ch = &f->channel[chid]; | 2268 | ch = &f->channel[chid]; |
2220 | 2269 | ||
2221 | if (!gk20a_is_channel_marked_as_tsg(ch)) { | 2270 | if (!gk20a_is_channel_marked_as_tsg(ch) && |
2222 | gk20a_dbg_info("add channel %d to runlist", | 2271 | !ch->interleave) { |
2272 | u32 added; | ||
2273 | |||
2274 | gk20a_dbg_info("add normal prio channel %d to runlist", | ||
2223 | chid); | 2275 | chid); |
2224 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | 2276 | runlist_entry[0] = ram_rl_entry_chid_f(chid); |
2225 | runlist_entry[1] = 0; | 2277 | runlist_entry[1] = 0; |
2226 | runlist_entry += 2; | 2278 | runlist_entry += 2; |
2227 | count++; | 2279 | count++; |
2280 | |||
2281 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2282 | f, | ||
2283 | runlist, | ||
2284 | runlist_entry); | ||
2285 | count += added; | ||
2286 | runlist_entry += 2 * added; | ||
2228 | } | 2287 | } |
2229 | } | 2288 | } |
2230 | 2289 | ||
2290 | /* if there were no lower priority channels, then just | ||
2291 | * add the high priority channels once. */ | ||
2292 | if (count == 0) { | ||
2293 | count = gk20a_fifo_runlist_add_high_prio_entries( | ||
2294 | f, | ||
2295 | runlist, | ||
2296 | runlist_entry); | ||
2297 | runlist_entry += 2 * count; | ||
2298 | } | ||
2299 | |||
2231 | /* now add TSG entries and channels bound to TSG */ | 2300 | /* now add TSG entries and channels bound to TSG */ |
2232 | mutex_lock(&f->tsg_inuse_mutex); | 2301 | mutex_lock(&f->tsg_inuse_mutex); |
2233 | for_each_set_bit(tsgid, | 2302 | for_each_set_bit(tsgid, |
2234 | runlist->active_tsgs, f->num_channels) { | 2303 | runlist->active_tsgs, f->num_channels) { |
2304 | u32 added; | ||
2235 | tsg = &f->tsg[tsgid]; | 2305 | tsg = &f->tsg[tsgid]; |
2236 | /* add TSG entry */ | 2306 | /* add TSG entry */ |
2237 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | 2307 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); |
@@ -2260,6 +2330,13 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2260 | 2330 | ||
2261 | WARN_ON(tsg->num_active_channels != | 2331 | WARN_ON(tsg->num_active_channels != |
2262 | count_channels_in_tsg); | 2332 | count_channels_in_tsg); |
2333 | |||
2334 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2335 | f, | ||
2336 | runlist, | ||
2337 | runlist_entry); | ||
2338 | count += added; | ||
2339 | runlist_entry += 2 * added; | ||
2263 | } | 2340 | } |
2264 | mutex_unlock(&f->tsg_inuse_mutex); | 2341 | mutex_unlock(&f->tsg_inuse_mutex); |
2265 | } else /* suspend to remove all channels */ | 2342 | } else /* suspend to remove all channels */ |
@@ -2267,7 +2344,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2267 | 2344 | ||
2268 | if (count != 0) { | 2345 | if (count != 0) { |
2269 | gk20a_writel(g, fifo_runlist_base_r(), | 2346 | gk20a_writel(g, fifo_runlist_base_r(), |
2270 | fifo_runlist_base_ptr_f(u64_lo32(runlist_pa >> 12)) | | 2347 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | |
2271 | fifo_runlist_base_target_vid_mem_f()); | 2348 | fifo_runlist_base_target_vid_mem_f()); |
2272 | } | 2349 | } |
2273 | 2350 | ||
@@ -2416,6 +2493,42 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
2416 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 2493 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
2417 | } | 2494 | } |
2418 | 2495 | ||
2496 | int gk20a_fifo_set_channel_priority( | ||
2497 | struct gk20a *g, | ||
2498 | u32 runlist_id, | ||
2499 | u32 hw_chid, | ||
2500 | bool interleave) | ||
2501 | { | ||
2502 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
2503 | struct fifo_gk20a *f = &g->fifo; | ||
2504 | struct channel_gk20a *ch = NULL; | ||
2505 | |||
2506 | if (hw_chid >= f->num_channels) | ||
2507 | return -EINVAL; | ||
2508 | |||
2509 | if (runlist_id >= f->max_runlists) | ||
2510 | return -EINVAL; | ||
2511 | |||
2512 | ch = &f->channel[hw_chid]; | ||
2513 | |||
2514 | gk20a_dbg_fn(""); | ||
2515 | |||
2516 | runlist = &f->runlist_info[runlist_id]; | ||
2517 | |||
2518 | mutex_lock(&runlist->mutex); | ||
2519 | |||
2520 | if (ch->interleave) | ||
2521 | set_bit(hw_chid, runlist->high_prio_channels); | ||
2522 | else | ||
2523 | clear_bit(hw_chid, runlist->high_prio_channels); | ||
2524 | |||
2525 | gk20a_dbg_fn("done"); | ||
2526 | |||
2527 | mutex_unlock(&runlist->mutex); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2419 | void gk20a_init_fifo(struct gpu_ops *gops) | 2532 | void gk20a_init_fifo(struct gpu_ops *gops) |
2420 | { | 2533 | { |
2421 | gk20a_init_channel(gops); | 2534 | gk20a_init_channel(gops); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 1b47677b..6ba4153b 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -31,6 +31,7 @@ | |||
31 | struct fifo_runlist_info_gk20a { | 31 | struct fifo_runlist_info_gk20a { |
32 | unsigned long *active_channels; | 32 | unsigned long *active_channels; |
33 | unsigned long *active_tsgs; | 33 | unsigned long *active_tsgs; |
34 | unsigned long *high_prio_channels; | ||
34 | /* Each engine has its own SW and HW runlist buffer.*/ | 35 | /* Each engine has its own SW and HW runlist buffer.*/ |
35 | struct mem_desc mem[MAX_RUNLIST_BUFFERS]; | 36 | struct mem_desc mem[MAX_RUNLIST_BUFFERS]; |
36 | u32 cur_buffer; | 37 | u32 cur_buffer; |
@@ -91,6 +92,7 @@ struct fifo_engine_info_gk20a { | |||
91 | struct fifo_gk20a { | 92 | struct fifo_gk20a { |
92 | struct gk20a *g; | 93 | struct gk20a *g; |
93 | int num_channels; | 94 | int num_channels; |
95 | int num_runlist_entries; | ||
94 | 96 | ||
95 | int num_pbdma; | 97 | int num_pbdma; |
96 | u32 *pbdma_map; | 98 | u32 *pbdma_map; |
@@ -182,6 +184,8 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | |||
182 | int gk20a_fifo_wait_engine_idle(struct gk20a *g); | 184 | int gk20a_fifo_wait_engine_idle(struct gk20a *g); |
183 | u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); | 185 | u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); |
184 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); | 186 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); |
187 | int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id, | ||
188 | u32 hw_chid, bool interleave); | ||
185 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, | 189 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, |
186 | int *__id, bool *__is_tsg); | 190 | int *__id, bool *__is_tsg); |
187 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | 191 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9bbc9bd8..c5124c51 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -670,6 +670,9 @@ static int gk20a_init_support(struct platform_device *dev) | |||
670 | mutex_init(&g->client_lock); | 670 | mutex_init(&g->client_lock); |
671 | mutex_init(&g->ch_wdt_lock); | 671 | mutex_init(&g->ch_wdt_lock); |
672 | 672 | ||
673 | mutex_init(&g->interleave_lock); | ||
674 | g->num_interleaved_channels = 0; | ||
675 | |||
673 | g->remove_support = gk20a_remove_support; | 676 | g->remove_support = gk20a_remove_support; |
674 | return 0; | 677 | return 0; |
675 | 678 | ||
@@ -1437,9 +1440,14 @@ static int gk20a_probe(struct platform_device *dev) | |||
1437 | if (tegra_platform_is_silicon()) | 1440 | if (tegra_platform_is_silicon()) |
1438 | gk20a->timeouts_enabled = true; | 1441 | gk20a->timeouts_enabled = true; |
1439 | 1442 | ||
1443 | gk20a->interleave_high_priority = true; | ||
1444 | |||
1440 | gk20a->timeslice_low_priority_us = 1300; | 1445 | gk20a->timeslice_low_priority_us = 1300; |
1441 | gk20a->timeslice_medium_priority_us = 2600; | 1446 | gk20a->timeslice_medium_priority_us = 2600; |
1442 | gk20a->timeslice_high_priority_us = 5200; | 1447 | if (gk20a->interleave_high_priority) |
1448 | gk20a->timeslice_high_priority_us = 3000; | ||
1449 | else | ||
1450 | gk20a->timeslice_high_priority_us = 5200; | ||
1443 | 1451 | ||
1444 | /* Set up initial power settings. For non-slicon platforms, disable * | 1452 | /* Set up initial power settings. For non-slicon platforms, disable * |
1445 | * power features and for silicon platforms, read from platform data */ | 1453 | * power features and for silicon platforms, read from platform data */ |
@@ -1512,6 +1520,12 @@ static int gk20a_probe(struct platform_device *dev) | |||
1512 | platform->debugfs, | 1520 | platform->debugfs, |
1513 | &gk20a->timeslice_high_priority_us); | 1521 | &gk20a->timeslice_high_priority_us); |
1514 | 1522 | ||
1523 | gk20a->debugfs_interleave_high_priority = | ||
1524 | debugfs_create_bool("interleave_high_priority", | ||
1525 | S_IRUGO|S_IWUSR, | ||
1526 | platform->debugfs, | ||
1527 | &gk20a->interleave_high_priority); | ||
1528 | |||
1515 | gr_gk20a_debugfs_init(gk20a); | 1529 | gr_gk20a_debugfs_init(gk20a); |
1516 | gk20a_pmu_debugfs_init(dev); | 1530 | gk20a_pmu_debugfs_init(dev); |
1517 | gk20a_cde_debugfs_init(dev); | 1531 | gk20a_cde_debugfs_init(dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index f7b98e39..da115fa8 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -54,6 +54,8 @@ struct acr_gm20b; | |||
54 | 32 ns is the resolution of ptimer. */ | 54 | 32 ns is the resolution of ptimer. */ |
55 | #define PTIMER_REF_FREQ_HZ 31250000 | 55 | #define PTIMER_REF_FREQ_HZ 31250000 |
56 | 56 | ||
57 | #define MAX_INTERLEAVED_CHANNELS 32 | ||
58 | |||
57 | struct cooling_device_gk20a { | 59 | struct cooling_device_gk20a { |
58 | struct thermal_cooling_device *gk20a_cooling_dev; | 60 | struct thermal_cooling_device *gk20a_cooling_dev; |
59 | unsigned int gk20a_freq_state; | 61 | unsigned int gk20a_freq_state; |
@@ -512,6 +514,10 @@ struct gk20a { | |||
512 | u32 timeslice_low_priority_us; | 514 | u32 timeslice_low_priority_us; |
513 | u32 timeslice_medium_priority_us; | 515 | u32 timeslice_medium_priority_us; |
514 | u32 timeslice_high_priority_us; | 516 | u32 timeslice_high_priority_us; |
517 | u32 interleave_high_priority; | ||
518 | |||
519 | struct mutex interleave_lock; | ||
520 | u32 num_interleaved_channels; | ||
515 | 521 | ||
516 | bool slcg_enabled; | 522 | bool slcg_enabled; |
517 | bool blcg_enabled; | 523 | bool blcg_enabled; |
@@ -533,9 +539,11 @@ struct gk20a { | |||
533 | struct dentry *debugfs_disable_bigpage; | 539 | struct dentry *debugfs_disable_bigpage; |
534 | struct dentry *debugfs_gr_default_attrib_cb_size; | 540 | struct dentry *debugfs_gr_default_attrib_cb_size; |
535 | 541 | ||
536 | struct dentry * debugfs_timeslice_low_priority_us; | 542 | struct dentry *debugfs_timeslice_low_priority_us; |
537 | struct dentry * debugfs_timeslice_medium_priority_us; | 543 | struct dentry *debugfs_timeslice_medium_priority_us; |
538 | struct dentry * debugfs_timeslice_high_priority_us; | 544 | struct dentry *debugfs_timeslice_high_priority_us; |
545 | struct dentry *debugfs_interleave_high_priority; | ||
546 | |||
539 | #endif | 547 | #endif |
540 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | 548 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; |
541 | 549 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h index a131972e..99d92782 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_fifo_gk20a.h | |||
@@ -110,6 +110,10 @@ static inline u32 fifo_eng_runlist_length_f(u32 v) | |||
110 | { | 110 | { |
111 | return (v & 0xffff) << 0; | 111 | return (v & 0xffff) << 0; |
112 | } | 112 | } |
113 | static inline u32 fifo_eng_runlist_length_max_v(void) | ||
114 | { | ||
115 | return 0x0000ffff; | ||
116 | } | ||
113 | static inline u32 fifo_eng_runlist_pending_true_f(void) | 117 | static inline u32 fifo_eng_runlist_pending_true_f(void) |
114 | { | 118 | { |
115 | return 0x100000; | 119 | return 0x100000; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fifo_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fifo_gm20b.h index f3a24f61..eb9f1694 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_fifo_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_fifo_gm20b.h | |||
@@ -110,6 +110,10 @@ static inline u32 fifo_eng_runlist_length_f(u32 v) | |||
110 | { | 110 | { |
111 | return (v & 0xffff) << 0; | 111 | return (v & 0xffff) << 0; |
112 | } | 112 | } |
113 | static inline u32 fifo_eng_runlist_length_max_v(void) | ||
114 | { | ||
115 | return 0x0000ffff; | ||
116 | } | ||
113 | static inline u32 fifo_eng_runlist_pending_true_f(void) | 117 | static inline u32 fifo_eng_runlist_pending_true_f(void) |
114 | { | 118 | { |
115 | return 0x100000; | 119 | return 0x100000; |