summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c85
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c300
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c16
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h12
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c11
10 files changed, 216 insertions, 225 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 2c2850c6..6eecebf5 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -177,7 +177,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
177} 177}
178 178
179static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, 179static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
180 u32 timeslice_period, bool interleave) 180 u32 timeslice_period)
181{ 181{
182 void *inst_ptr; 182 void *inst_ptr;
183 int shift = 0, value = 0; 183 int shift = 0, value = 0;
@@ -205,30 +205,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
205 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | 205 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
206 ccsr_channel_enable_set_true_f()); 206 ccsr_channel_enable_set_true_f());
207 207
208 if (c->interleave != interleave) {
209 mutex_lock(&c->g->interleave_lock);
210 c->interleave = interleave;
211 if (interleave)
212 if (c->g->num_interleaved_channels >=
213 MAX_INTERLEAVED_CHANNELS) {
214 gk20a_err(dev_from_gk20a(c->g),
215 "Change of priority would exceed runlist length, only changing timeslice\n");
216 c->interleave = false;
217 } else
218 c->g->num_interleaved_channels += 1;
219 else
220 c->g->num_interleaved_channels -= 1;
221
222 mutex_unlock(&c->g->interleave_lock);
223 gk20a_dbg_info("Set channel %d to interleave %d",
224 c->hw_chid, c->interleave);
225
226 gk20a_fifo_set_channel_priority(
227 c->g, 0, c->hw_chid, c->interleave);
228 c->g->ops.fifo.update_runlist(
229 c->g, 0, ~0, true, false);
230 }
231
232 return 0; 208 return 0;
233} 209}
234 210
@@ -711,6 +687,32 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
711 return 0; 687 return 0;
712} 688}
713 689
690static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
691 u32 level)
692{
693 struct gk20a *g = ch->g;
694 int ret;
695
696 if (gk20a_is_channel_marked_as_tsg(ch)) {
697 gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
698 return -EINVAL;
699 }
700
701 switch (level) {
702 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
703 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
704 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
705 ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
706 false, 0, level);
707 break;
708 default:
709 ret = -EINVAL;
710 break;
711 }
712
713 return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
714}
715
714static int gk20a_init_error_notifier(struct channel_gk20a *ch, 716static int gk20a_init_error_notifier(struct channel_gk20a *ch,
715 struct nvgpu_set_error_notifier *args) 717 struct nvgpu_set_error_notifier *args)
716{ 718{
@@ -899,17 +901,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
899 } 901 }
900 mutex_unlock(&f->deferred_reset_mutex); 902 mutex_unlock(&f->deferred_reset_mutex);
901 903
902 if (ch->interleave) {
903 ch->interleave = false;
904 gk20a_fifo_set_channel_priority(
905 ch->g, 0, ch->hw_chid, ch->interleave);
906
907 mutex_lock(&f->g->interleave_lock);
908 WARN_ON(f->g->num_interleaved_channels == 0);
909 f->g->num_interleaved_channels -= 1;
910 mutex_unlock(&f->g->interleave_lock);
911 }
912
913 if (!ch->bound) 904 if (!ch->bound)
914 goto release; 905 goto release;
915 906
@@ -1154,11 +1145,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
1154 ch->has_timedout = false; 1145 ch->has_timedout = false;
1155 ch->wdt_enabled = true; 1146 ch->wdt_enabled = true;
1156 ch->obj_class = 0; 1147 ch->obj_class = 0;
1157 ch->interleave = false;
1158 ch->clean_up.scheduled = false; 1148 ch->clean_up.scheduled = false;
1159 gk20a_fifo_set_channel_priority( 1149 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
1160 ch->g, 0, ch->hw_chid, ch->interleave);
1161
1162 1150
1163 /* The channel is *not* runnable at this point. It still needs to have 1151 /* The channel is *not* runnable at this point. It still needs to have
1164 * an address space bound and allocate a gpfifo and grctx. */ 1152 * an address space bound and allocate a gpfifo and grctx. */
@@ -2613,7 +2601,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2613int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) 2601int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2614{ 2602{
2615 u32 timeslice_timeout; 2603 u32 timeslice_timeout;
2616 bool interleave = false;
2617 2604
2618 if (gk20a_is_channel_marked_as_tsg(ch)) { 2605 if (gk20a_is_channel_marked_as_tsg(ch)) {
2619 gk20a_err(dev_from_gk20a(ch->g), 2606 gk20a_err(dev_from_gk20a(ch->g),
@@ -2630,8 +2617,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2630 timeslice_timeout = ch->g->timeslice_medium_priority_us; 2617 timeslice_timeout = ch->g->timeslice_medium_priority_us;
2631 break; 2618 break;
2632 case NVGPU_PRIORITY_HIGH: 2619 case NVGPU_PRIORITY_HIGH:
2633 if (ch->g->interleave_high_priority)
2634 interleave = true;
2635 timeslice_timeout = ch->g->timeslice_high_priority_us; 2620 timeslice_timeout = ch->g->timeslice_high_priority_us;
2636 break; 2621 break;
2637 default: 2622 default:
@@ -2640,7 +2625,7 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2640 } 2625 }
2641 2626
2642 return channel_gk20a_set_schedule_params(ch, 2627 return channel_gk20a_set_schedule_params(ch,
2643 timeslice_timeout, interleave); 2628 timeslice_timeout);
2644} 2629}
2645 2630
2646static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, 2631static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
@@ -3045,6 +3030,18 @@ long gk20a_channel_ioctl(struct file *filp,
3045 err = gk20a_channel_set_wdt_status(ch, 3030 err = gk20a_channel_set_wdt_status(ch,
3046 (struct nvgpu_channel_wdt_args *)buf); 3031 (struct nvgpu_channel_wdt_args *)buf);
3047 break; 3032 break;
3033 case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
3034 err = gk20a_busy(dev);
3035 if (err) {
3036 dev_err(&dev->dev,
3037 "%s: failed to host gk20a for ioctl cmd: 0x%x",
3038 __func__, cmd);
3039 break;
3040 }
3041 err = gk20a_channel_set_runlist_interleave(ch,
3042 ((struct nvgpu_runlist_interleave_args *)buf)->level);
3043 gk20a_idle(dev);
3044 break;
3048 default: 3045 default:
3049 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); 3046 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
3050 err = -ENOTTY; 3047 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4aea9d19..3f5a657a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -188,8 +188,7 @@ struct channel_gk20a {
188 spinlock_t update_fn_lock; /* make access to the two above atomic */ 188 spinlock_t update_fn_lock; /* make access to the two above atomic */
189 struct work_struct update_fn_work; 189 struct work_struct update_fn_work;
190 190
191 /* true if channel is interleaved with lower priority channels */ 191 u32 interleave_level;
192 bool interleave;
193}; 192};
194 193
195static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) 194static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..28cc3086 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
303 if (!runlist->active_tsgs) 303 if (!runlist->active_tsgs)
304 goto clean_up_runlist_info; 304 goto clean_up_runlist_info;
305 305
306 runlist->high_prio_channels =
307 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
308 GFP_KERNEL);
309 if (!runlist->high_prio_channels)
310 goto clean_up_runlist_info;
311
312 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; 306 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries;
313 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 307 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
314 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 308 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
337 kfree(runlist->active_tsgs); 331 kfree(runlist->active_tsgs);
338 runlist->active_tsgs = NULL; 332 runlist->active_tsgs = NULL;
339 333
340 kfree(runlist->high_prio_channels);
341 runlist->high_prio_channels = NULL;
342
343 kfree(f->runlist_info); 334 kfree(f->runlist_info);
344 f->runlist_info = NULL; 335 f->runlist_info = NULL;
345 336
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
2162 return runlist_entry_0; 2153 return runlist_entry_0;
2163} 2154}
2164 2155
2165/* add all active high priority channels */ 2156/* recursively construct a runlist with interleaved bare channels and TSGs */
2166static inline u32 gk20a_fifo_runlist_add_high_prio_entries( 2157static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
2167 struct fifo_gk20a *f, 2158 struct fifo_runlist_info_gk20a *runlist,
2168 struct fifo_runlist_info_gk20a *runlist, 2159 u32 cur_level,
2169 u32 *runlist_entry) 2160 u32 *runlist_entry,
2161 bool interleave_enabled,
2162 bool prev_empty,
2163 u32 *entries_left)
2170{ 2164{
2171 struct channel_gk20a *ch = NULL; 2165 bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
2172 unsigned long high_prio_chid; 2166 struct channel_gk20a *ch;
2173 u32 count = 0; 2167 bool skip_next = false;
2168 u32 chid, tsgid, count = 0;
2169
2170 gk20a_dbg_fn("");
2174 2171
2175 for_each_set_bit(high_prio_chid, 2172 /* for each bare channel, CH, on this level, insert all higher-level
2176 runlist->high_prio_channels, f->num_channels) { 2173 channels and TSGs before inserting CH. */
2177 ch = &f->channel[high_prio_chid]; 2174 for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
2175 ch = &f->channel[chid];
2176
2177 if (ch->interleave_level != cur_level)
2178 continue;
2179
2180 if (gk20a_is_channel_marked_as_tsg(ch))
2181 continue;
2182
2183 if (!last_level && !skip_next) {
2184 runlist_entry = gk20a_runlist_construct_locked(f,
2185 runlist,
2186 cur_level + 1,
2187 runlist_entry,
2188 interleave_enabled,
2189 false,
2190 entries_left);
2191 /* if interleaving is disabled, higher-level channels
2192 and TSGs only need to be inserted once */
2193 if (!interleave_enabled)
2194 skip_next = true;
2195 }
2196
2197 if (!(*entries_left))
2198 return NULL;
2199
2200 gk20a_dbg_info("add channel %d to runlist", chid);
2201 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2202 runlist_entry[1] = 0;
2203 runlist_entry += 2;
2204 count++;
2205 (*entries_left)--;
2206 }
2178 2207
2179 if (!gk20a_is_channel_marked_as_tsg(ch) && 2208 /* for each TSG, T, on this level, insert all higher-level channels
2180 test_bit(high_prio_chid, runlist->active_channels) == 1) { 2209 and TSGs before inserting T. */
2181 gk20a_dbg_info("add high prio channel %lu to runlist", 2210 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
2182 high_prio_chid); 2211 struct tsg_gk20a *tsg = &f->tsg[tsgid];
2183 runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); 2212
2213 if (tsg->interleave_level != cur_level)
2214 continue;
2215
2216 if (!last_level && !skip_next) {
2217 runlist_entry = gk20a_runlist_construct_locked(f,
2218 runlist,
2219 cur_level + 1,
2220 runlist_entry,
2221 interleave_enabled,
2222 false,
2223 entries_left);
2224 if (!interleave_enabled)
2225 skip_next = true;
2226 }
2227
2228 if (!(*entries_left))
2229 return NULL;
2230
2231 /* add TSG entry */
2232 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2233 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2234 runlist_entry[1] = 0;
2235 runlist_entry += 2;
2236 count++;
2237 (*entries_left)--;
2238
2239 mutex_lock(&tsg->ch_list_lock);
2240 /* add runnable channels bound to this TSG */
2241 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2242 if (!test_bit(ch->hw_chid,
2243 runlist->active_channels))
2244 continue;
2245
2246 if (!(*entries_left)) {
2247 mutex_unlock(&tsg->ch_list_lock);
2248 return NULL;
2249 }
2250
2251 gk20a_dbg_info("add channel %d to runlist",
2252 ch->hw_chid);
2253 runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
2184 runlist_entry[1] = 0; 2254 runlist_entry[1] = 0;
2185 runlist_entry += 2; 2255 runlist_entry += 2;
2186 count++; 2256 count++;
2257 (*entries_left)--;
2187 } 2258 }
2259 mutex_unlock(&tsg->ch_list_lock);
2188 } 2260 }
2189 2261
2190 return count; 2262 /* append entries from higher level if this level is empty */
2263 if (!count && !last_level)
2264 runlist_entry = gk20a_runlist_construct_locked(f,
2265 runlist,
2266 cur_level + 1,
2267 runlist_entry,
2268 interleave_enabled,
2269 true,
2270 entries_left);
2271
2272 /*
2273 * if previous and this level have entries, append
2274 * entries from higher level.
2275 *
2276 * ex. dropping from MEDIUM to LOW, need to insert HIGH
2277 */
2278 if (interleave_enabled && count && !prev_empty && !last_level)
2279 runlist_entry = gk20a_runlist_construct_locked(f,
2280 runlist,
2281 cur_level + 1,
2282 runlist_entry,
2283 interleave_enabled,
2284 false,
2285 entries_left);
2286 return runlist_entry;
2287}
2288
2289int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
2290 u32 id,
2291 bool is_tsg,
2292 u32 runlist_id,
2293 u32 new_level)
2294{
2295 gk20a_dbg_fn("");
2296
2297 if (is_tsg)
2298 g->fifo.tsg[id].interleave_level = new_level;
2299 else
2300 g->fifo.channel[id].interleave_level = new_level;
2301
2302 return 0;
2191} 2303}
2192 2304
2193static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 2305static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2198 struct fifo_gk20a *f = &g->fifo; 2310 struct fifo_gk20a *f = &g->fifo;
2199 struct fifo_runlist_info_gk20a *runlist = NULL; 2311 struct fifo_runlist_info_gk20a *runlist = NULL;
2200 u32 *runlist_entry_base = NULL; 2312 u32 *runlist_entry_base = NULL;
2201 u32 *runlist_entry = NULL;
2202 u64 runlist_iova; 2313 u64 runlist_iova;
2203 u32 old_buf, new_buf; 2314 u32 old_buf, new_buf;
2204 u32 chid, tsgid;
2205 struct channel_gk20a *ch = NULL; 2315 struct channel_gk20a *ch = NULL;
2206 struct tsg_gk20a *tsg = NULL; 2316 struct tsg_gk20a *tsg = NULL;
2207 u32 count = 0; 2317 u32 count = 0;
2208 u32 count_channels_in_tsg;
2209 runlist = &f->runlist_info[runlist_id]; 2318 runlist = &f->runlist_info[runlist_id];
2210 2319
2211 /* valid channel, add/remove it from active list. 2320 /* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2254 2363
2255 if (hw_chid != ~0 || /* add/remove a valid channel */ 2364 if (hw_chid != ~0 || /* add/remove a valid channel */
2256 add /* resume to add all channels back */) { 2365 add /* resume to add all channels back */) {
2257 runlist_entry = runlist_entry_base; 2366 u32 max_entries = f->num_runlist_entries;
2258 2367 u32 *runlist_end;
2259 /* Runlist manipulation:
2260 Insert an entry of all high priority channels inbetween
2261 all lower priority channels. This ensure that the maximum
2262 delay a runnable high priority channel has to wait is one
2263 medium timeslice + any context switching overhead +
2264 wait on other high priority channels.
2265 add non-TSG channels first */
2266 for_each_set_bit(chid,
2267 runlist->active_channels, f->num_channels) {
2268 ch = &f->channel[chid];
2269
2270 if (!gk20a_is_channel_marked_as_tsg(ch) &&
2271 !ch->interleave) {
2272 u32 added;
2273
2274 gk20a_dbg_info("add normal prio channel %d to runlist",
2275 chid);
2276 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2277 runlist_entry[1] = 0;
2278 runlist_entry += 2;
2279 count++;
2280
2281 added = gk20a_fifo_runlist_add_high_prio_entries(
2282 f,
2283 runlist,
2284 runlist_entry);
2285 count += added;
2286 runlist_entry += 2 * added;
2287 }
2288 }
2289 2368
2290 /* if there were no lower priority channels, then just 2369 runlist_end = gk20a_runlist_construct_locked(f,
2291 * add the high priority channels once. */ 2370 runlist,
2292 if (count == 0) { 2371 0,
2293 count = gk20a_fifo_runlist_add_high_prio_entries( 2372 runlist_entry_base,
2294 f, 2373 g->runlist_interleave,
2295 runlist, 2374 true,
2296 runlist_entry); 2375 &max_entries);
2297 runlist_entry += 2 * count; 2376 if (!runlist_end) {
2377 ret = -E2BIG;
2378 goto clean_up;
2298 } 2379 }
2299 2380
2300 /* now add TSG entries and channels bound to TSG */ 2381 count = (runlist_end - runlist_entry_base) / 2;
2301 mutex_lock(&f->tsg_inuse_mutex); 2382 WARN_ON(count > f->num_runlist_entries);
2302 for_each_set_bit(tsgid,
2303 runlist->active_tsgs, f->num_channels) {
2304 u32 added;
2305 tsg = &f->tsg[tsgid];
2306 /* add TSG entry */
2307 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2308 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2309 runlist_entry[1] = 0;
2310 runlist_entry += 2;
2311 count++;
2312
2313 /* add runnable channels bound to this TSG */
2314 count_channels_in_tsg = 0;
2315 mutex_lock(&tsg->ch_list_lock);
2316 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2317 if (!test_bit(ch->hw_chid,
2318 runlist->active_channels))
2319 continue;
2320 gk20a_dbg_info("add channel %d to runlist",
2321 ch->hw_chid);
2322 runlist_entry[0] =
2323 ram_rl_entry_chid_f(ch->hw_chid);
2324 runlist_entry[1] = 0;
2325 runlist_entry += 2;
2326 count++;
2327 count_channels_in_tsg++;
2328 }
2329 mutex_unlock(&tsg->ch_list_lock);
2330
2331 WARN_ON(tsg->num_active_channels !=
2332 count_channels_in_tsg);
2333
2334 added = gk20a_fifo_runlist_add_high_prio_entries(
2335 f,
2336 runlist,
2337 runlist_entry);
2338 count += added;
2339 runlist_entry += 2 * added;
2340 }
2341 mutex_unlock(&f->tsg_inuse_mutex);
2342 } else /* suspend to remove all channels */ 2383 } else /* suspend to remove all channels */
2343 count = 0; 2384 count = 0;
2344 2385
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
2493 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); 2534 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
2494} 2535}
2495 2536
2496int gk20a_fifo_set_channel_priority(
2497 struct gk20a *g,
2498 u32 runlist_id,
2499 u32 hw_chid,
2500 bool interleave)
2501{
2502 struct fifo_runlist_info_gk20a *runlist = NULL;
2503 struct fifo_gk20a *f = &g->fifo;
2504 struct channel_gk20a *ch = NULL;
2505
2506 if (hw_chid >= f->num_channels)
2507 return -EINVAL;
2508
2509 if (runlist_id >= f->max_runlists)
2510 return -EINVAL;
2511
2512 ch = &f->channel[hw_chid];
2513
2514 gk20a_dbg_fn("");
2515
2516 runlist = &f->runlist_info[runlist_id];
2517
2518 mutex_lock(&runlist->mutex);
2519
2520 if (ch->interleave)
2521 set_bit(hw_chid, runlist->high_prio_channels);
2522 else
2523 clear_bit(hw_chid, runlist->high_prio_channels);
2524
2525 gk20a_dbg_fn("done");
2526
2527 mutex_unlock(&runlist->mutex);
2528
2529 return 0;
2530}
2531
2532struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 2537struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
2533 u32 hw_chid) 2538 u32 hw_chid)
2534{ 2539{
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
2545 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 2550 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
2546 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; 2551 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
2547 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 2552 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
2553 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
2548} 2554}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index ee4e7328..0979bf2b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -31,7 +31,6 @@
31struct fifo_runlist_info_gk20a { 31struct fifo_runlist_info_gk20a {
32 unsigned long *active_channels; 32 unsigned long *active_channels;
33 unsigned long *active_tsgs; 33 unsigned long *active_tsgs;
34 unsigned long *high_prio_channels;
35 /* Each engine has its own SW and HW runlist buffer.*/ 34 /* Each engine has its own SW and HW runlist buffer.*/
36 struct mem_desc mem[MAX_RUNLIST_BUFFERS]; 35 struct mem_desc mem[MAX_RUNLIST_BUFFERS];
37 u32 cur_buffer; 36 u32 cur_buffer;
@@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
184int gk20a_fifo_wait_engine_idle(struct gk20a *g); 183int gk20a_fifo_wait_engine_idle(struct gk20a *g);
185u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); 184u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
186u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); 185u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
187int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id,
188 u32 hw_chid, bool interleave);
189u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, 186u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
190 int *__id, bool *__is_tsg); 187 int *__id, bool *__is_tsg);
191bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, 188bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
198 u32 hw_chid); 195 u32 hw_chid);
199 196
200void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); 197void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
198int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
199 u32 id,
200 bool is_tsg,
201 u32 runlist_id,
202 u32 new_level);
201#endif /*__GR_GK20A_H__*/ 203#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index fa2c61e1..0fee58e8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -672,9 +672,6 @@ static int gk20a_init_support(struct platform_device *dev)
672 mutex_init(&g->ch_wdt_lock); 672 mutex_init(&g->ch_wdt_lock);
673 mutex_init(&g->poweroff_lock); 673 mutex_init(&g->poweroff_lock);
674 674
675 mutex_init(&g->interleave_lock);
676 g->num_interleaved_channels = 0;
677
678 g->remove_support = gk20a_remove_support; 675 g->remove_support = gk20a_remove_support;
679 return 0; 676 return 0;
680 677
@@ -1439,14 +1436,11 @@ static int gk20a_probe(struct platform_device *dev)
1439 if (tegra_platform_is_silicon()) 1436 if (tegra_platform_is_silicon())
1440 gk20a->timeouts_enabled = true; 1437 gk20a->timeouts_enabled = true;
1441 1438
1442 gk20a->interleave_high_priority = true; 1439 gk20a->runlist_interleave = true;
1443 1440
1444 gk20a->timeslice_low_priority_us = 1300; 1441 gk20a->timeslice_low_priority_us = 1300;
1445 gk20a->timeslice_medium_priority_us = 2600; 1442 gk20a->timeslice_medium_priority_us = 2600;
1446 if (gk20a->interleave_high_priority) 1443 gk20a->timeslice_high_priority_us = 5200;
1447 gk20a->timeslice_high_priority_us = 3000;
1448 else
1449 gk20a->timeslice_high_priority_us = 5200;
1450 1444
1451 /* Set up initial power settings. For non-slicon platforms, disable * 1445 /* Set up initial power settings. For non-slicon platforms, disable *
1452 * power features and for silicon platforms, read from platform data */ 1446 * power features and for silicon platforms, read from platform data */
@@ -1527,11 +1521,11 @@ static int gk20a_probe(struct platform_device *dev)
1527 platform->debugfs, 1521 platform->debugfs,
1528 &gk20a->timeslice_high_priority_us); 1522 &gk20a->timeslice_high_priority_us);
1529 1523
1530 gk20a->debugfs_interleave_high_priority = 1524 gk20a->debugfs_runlist_interleave =
1531 debugfs_create_bool("interleave_high_priority", 1525 debugfs_create_bool("runlist_interleave",
1532 S_IRUGO|S_IWUSR, 1526 S_IRUGO|S_IWUSR,
1533 platform->debugfs, 1527 platform->debugfs,
1534 &gk20a->interleave_high_priority); 1528 &gk20a->runlist_interleave);
1535 1529
1536 gr_gk20a_debugfs_init(gk20a); 1530 gr_gk20a_debugfs_init(gk20a);
1537 gk20a_pmu_debugfs_init(dev); 1531 gk20a_pmu_debugfs_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index afdbeef7..faccf04a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -54,8 +54,6 @@ struct acr_gm20b;
54 32 ns is the resolution of ptimer. */ 54 32 ns is the resolution of ptimer. */
55#define PTIMER_REF_FREQ_HZ 31250000 55#define PTIMER_REF_FREQ_HZ 31250000
56 56
57#define MAX_INTERLEAVED_CHANNELS 32
58
59struct cooling_device_gk20a { 57struct cooling_device_gk20a {
60 struct thermal_cooling_device *gk20a_cooling_dev; 58 struct thermal_cooling_device *gk20a_cooling_dev;
61 unsigned int gk20a_freq_state; 59 unsigned int gk20a_freq_state;
@@ -268,6 +266,9 @@ struct gpu_ops {
268 u32 (*get_num_fifos)(struct gk20a *g); 266 u32 (*get_num_fifos)(struct gk20a *g);
269 u32 (*get_pbdma_signature)(struct gk20a *g); 267 u32 (*get_pbdma_signature)(struct gk20a *g);
270 int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); 268 int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority);
269 int (*set_runlist_interleave)(struct gk20a *g, u32 id,
270 bool is_tsg, u32 runlist_id,
271 u32 new_level);
271 } fifo; 272 } fifo;
272 struct pmu_v { 273 struct pmu_v {
273 /*used for change of enum zbc update cmd id from ver 0 to ver1*/ 274 /*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -536,10 +537,7 @@ struct gk20a {
536 u32 timeslice_low_priority_us; 537 u32 timeslice_low_priority_us;
537 u32 timeslice_medium_priority_us; 538 u32 timeslice_medium_priority_us;
538 u32 timeslice_high_priority_us; 539 u32 timeslice_high_priority_us;
539 u32 interleave_high_priority; 540 u32 runlist_interleave;
540
541 struct mutex interleave_lock;
542 u32 num_interleaved_channels;
543 541
544 bool slcg_enabled; 542 bool slcg_enabled;
545 bool blcg_enabled; 543 bool blcg_enabled;
@@ -564,7 +562,7 @@ struct gk20a {
564 struct dentry *debugfs_timeslice_low_priority_us; 562 struct dentry *debugfs_timeslice_low_priority_us;
565 struct dentry *debugfs_timeslice_medium_priority_us; 563 struct dentry *debugfs_timeslice_medium_priority_us;
566 struct dentry *debugfs_timeslice_high_priority_us; 564 struct dentry *debugfs_timeslice_high_priority_us;
567 struct dentry *debugfs_interleave_high_priority; 565 struct dentry *debugfs_runlist_interleave;
568 566
569#endif 567#endif
570 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 568 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 4421744c..b41cca08 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
228 228
229 tsg->tsg_gr_ctx = NULL; 229 tsg->tsg_gr_ctx = NULL;
230 tsg->vm = NULL; 230 tsg->vm = NULL;
231 tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
231 232
232 filp->private_data = tsg; 233 filp->private_data = tsg;
233 234
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index bcc4d0c4..7e0a75d1 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -49,6 +49,8 @@ struct tsg_gk20a {
49 struct gr_ctx_desc *tsg_gr_ctx; 49 struct gr_ctx_desc *tsg_gr_ctx;
50 50
51 struct vm_gk20a *vm; 51 struct vm_gk20a *vm;
52
53 u32 interleave_level;
52}; 54};
53 55
54int gk20a_enable_tsg(struct tsg_gk20a *tsg); 56int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index d1deffb9..3fded03c 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B Fifo 2 * GM20B Fifo
3 * 3 *
4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -121,4 +121,5 @@ void gm20b_init_fifo(struct gpu_ops *gops)
121 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 121 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
122 gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos; 122 gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos;
123 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 123 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
124 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
124} 125}
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index e776e97c..b4bb7f38 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Virtualized GPU Fifo 2 * Virtualized GPU Fifo
3 * 3 *
4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -194,12 +194,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
194 if (!runlist->active_channels) 194 if (!runlist->active_channels)
195 goto clean_up_runlist_info; 195 goto clean_up_runlist_info;
196 196
197 runlist->high_prio_channels =
198 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
199 GFP_KERNEL);
200 if (!runlist->high_prio_channels)
201 goto clean_up_runlist_info;
202
203 runlist_size = sizeof(u16) * f->num_channels; 197 runlist_size = sizeof(u16) * f->num_channels;
204 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 198 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
205 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 199 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -222,9 +216,6 @@ clean_up_runlist:
222 gk20a_gmmu_free(g, &runlist->mem[i]); 216 gk20a_gmmu_free(g, &runlist->mem[i]);
223 217
224clean_up_runlist_info: 218clean_up_runlist_info:
225 kfree(runlist->high_prio_channels);
226 runlist->high_prio_channels = NULL;
227
228 kfree(runlist->active_channels); 219 kfree(runlist->active_channels);
229 runlist->active_channels = NULL; 220 runlist->active_channels = NULL;
230 221