summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2016-02-22 12:35:49 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-15 19:23:44 -0400
commit2a58d3c27b45ca9d0d9dc2136377b7a41b9ed82d (patch)
tree9d7464bfd0eea8e4b65f591996db59a98f4070e2 /drivers
parentf07a046a52e7a8074bd1572a12ac65747d3f827d (diff)
gpu: nvgpu: improve channel interleave support
Previously, only "high" priority bare channels were interleaved between all other bare channels and TSGs. This patch decouples priority from interleaving and introduces 3 levels for interleaving a bare channel or TSG: high, medium, and low. The levels define the number of times a channel or TSG will appear on a runlist (see nvgpu.h for details). By default, all bare channels and TSGs are set to interleave level low. Userspace can then request the interleave level to be increased via the CHANNEL_SET_RUNLIST_INTERLEAVE ioctl (TSG-specific ioctl will be added later). As timeslice settings will soon be coming from userspace, the default timeslice for "high" priority channels has been restored. JIRA VFND-1302 Bug 1729664 Change-Id: I178bc1cecda23f5002fec6d791e6dcaedfa05c0c Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/1014962 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c85
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c300
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c16
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h12
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c3
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c11
10 files changed, 216 insertions, 225 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 2c2850c6..6eecebf5 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -177,7 +177,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
177} 177}
178 178
179static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, 179static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
180 u32 timeslice_period, bool interleave) 180 u32 timeslice_period)
181{ 181{
182 void *inst_ptr; 182 void *inst_ptr;
183 int shift = 0, value = 0; 183 int shift = 0, value = 0;
@@ -205,30 +205,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
205 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | 205 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
206 ccsr_channel_enable_set_true_f()); 206 ccsr_channel_enable_set_true_f());
207 207
208 if (c->interleave != interleave) {
209 mutex_lock(&c->g->interleave_lock);
210 c->interleave = interleave;
211 if (interleave)
212 if (c->g->num_interleaved_channels >=
213 MAX_INTERLEAVED_CHANNELS) {
214 gk20a_err(dev_from_gk20a(c->g),
215 "Change of priority would exceed runlist length, only changing timeslice\n");
216 c->interleave = false;
217 } else
218 c->g->num_interleaved_channels += 1;
219 else
220 c->g->num_interleaved_channels -= 1;
221
222 mutex_unlock(&c->g->interleave_lock);
223 gk20a_dbg_info("Set channel %d to interleave %d",
224 c->hw_chid, c->interleave);
225
226 gk20a_fifo_set_channel_priority(
227 c->g, 0, c->hw_chid, c->interleave);
228 c->g->ops.fifo.update_runlist(
229 c->g, 0, ~0, true, false);
230 }
231
232 return 0; 208 return 0;
233} 209}
234 210
@@ -711,6 +687,32 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
711 return 0; 687 return 0;
712} 688}
713 689
690static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
691 u32 level)
692{
693 struct gk20a *g = ch->g;
694 int ret;
695
696 if (gk20a_is_channel_marked_as_tsg(ch)) {
697 gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
698 return -EINVAL;
699 }
700
701 switch (level) {
702 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
703 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
704 case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
705 ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
706 false, 0, level);
707 break;
708 default:
709 ret = -EINVAL;
710 break;
711 }
712
713 return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
714}
715
714static int gk20a_init_error_notifier(struct channel_gk20a *ch, 716static int gk20a_init_error_notifier(struct channel_gk20a *ch,
715 struct nvgpu_set_error_notifier *args) 717 struct nvgpu_set_error_notifier *args)
716{ 718{
@@ -899,17 +901,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
899 } 901 }
900 mutex_unlock(&f->deferred_reset_mutex); 902 mutex_unlock(&f->deferred_reset_mutex);
901 903
902 if (ch->interleave) {
903 ch->interleave = false;
904 gk20a_fifo_set_channel_priority(
905 ch->g, 0, ch->hw_chid, ch->interleave);
906
907 mutex_lock(&f->g->interleave_lock);
908 WARN_ON(f->g->num_interleaved_channels == 0);
909 f->g->num_interleaved_channels -= 1;
910 mutex_unlock(&f->g->interleave_lock);
911 }
912
913 if (!ch->bound) 904 if (!ch->bound)
914 goto release; 905 goto release;
915 906
@@ -1154,11 +1145,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
1154 ch->has_timedout = false; 1145 ch->has_timedout = false;
1155 ch->wdt_enabled = true; 1146 ch->wdt_enabled = true;
1156 ch->obj_class = 0; 1147 ch->obj_class = 0;
1157 ch->interleave = false;
1158 ch->clean_up.scheduled = false; 1148 ch->clean_up.scheduled = false;
1159 gk20a_fifo_set_channel_priority( 1149 ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
1160 ch->g, 0, ch->hw_chid, ch->interleave);
1161
1162 1150
1163 /* The channel is *not* runnable at this point. It still needs to have 1151 /* The channel is *not* runnable at this point. It still needs to have
1164 * an address space bound and allocate a gpfifo and grctx. */ 1152 * an address space bound and allocate a gpfifo and grctx. */
@@ -2613,7 +2601,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2613int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) 2601int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2614{ 2602{
2615 u32 timeslice_timeout; 2603 u32 timeslice_timeout;
2616 bool interleave = false;
2617 2604
2618 if (gk20a_is_channel_marked_as_tsg(ch)) { 2605 if (gk20a_is_channel_marked_as_tsg(ch)) {
2619 gk20a_err(dev_from_gk20a(ch->g), 2606 gk20a_err(dev_from_gk20a(ch->g),
@@ -2630,8 +2617,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2630 timeslice_timeout = ch->g->timeslice_medium_priority_us; 2617 timeslice_timeout = ch->g->timeslice_medium_priority_us;
2631 break; 2618 break;
2632 case NVGPU_PRIORITY_HIGH: 2619 case NVGPU_PRIORITY_HIGH:
2633 if (ch->g->interleave_high_priority)
2634 interleave = true;
2635 timeslice_timeout = ch->g->timeslice_high_priority_us; 2620 timeslice_timeout = ch->g->timeslice_high_priority_us;
2636 break; 2621 break;
2637 default: 2622 default:
@@ -2640,7 +2625,7 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
2640 } 2625 }
2641 2626
2642 return channel_gk20a_set_schedule_params(ch, 2627 return channel_gk20a_set_schedule_params(ch,
2643 timeslice_timeout, interleave); 2628 timeslice_timeout);
2644} 2629}
2645 2630
2646static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, 2631static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
@@ -3045,6 +3030,18 @@ long gk20a_channel_ioctl(struct file *filp,
3045 err = gk20a_channel_set_wdt_status(ch, 3030 err = gk20a_channel_set_wdt_status(ch,
3046 (struct nvgpu_channel_wdt_args *)buf); 3031 (struct nvgpu_channel_wdt_args *)buf);
3047 break; 3032 break;
3033 case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
3034 err = gk20a_busy(dev);
3035 if (err) {
3036 dev_err(&dev->dev,
3037 "%s: failed to host gk20a for ioctl cmd: 0x%x",
3038 __func__, cmd);
3039 break;
3040 }
3041 err = gk20a_channel_set_runlist_interleave(ch,
3042 ((struct nvgpu_runlist_interleave_args *)buf)->level);
3043 gk20a_idle(dev);
3044 break;
3048 default: 3045 default:
3049 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); 3046 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
3050 err = -ENOTTY; 3047 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4aea9d19..3f5a657a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -188,8 +188,7 @@ struct channel_gk20a {
188 spinlock_t update_fn_lock; /* make access to the two above atomic */ 188 spinlock_t update_fn_lock; /* make access to the two above atomic */
189 struct work_struct update_fn_work; 189 struct work_struct update_fn_work;
190 190
191 /* true if channel is interleaved with lower priority channels */ 191 u32 interleave_level;
192 bool interleave;
193}; 192};
194 193
195static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) 194static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..28cc3086 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
303 if (!runlist->active_tsgs) 303 if (!runlist->active_tsgs)
304 goto clean_up_runlist_info; 304 goto clean_up_runlist_info;
305 305
306 runlist->high_prio_channels =
307 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
308 GFP_KERNEL);
309 if (!runlist->high_prio_channels)
310 goto clean_up_runlist_info;
311
312 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; 306 runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries;
313 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 307 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
314 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 308 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
337 kfree(runlist->active_tsgs); 331 kfree(runlist->active_tsgs);
338 runlist->active_tsgs = NULL; 332 runlist->active_tsgs = NULL;
339 333
340 kfree(runlist->high_prio_channels);
341 runlist->high_prio_channels = NULL;
342
343 kfree(f->runlist_info); 334 kfree(f->runlist_info);
344 f->runlist_info = NULL; 335 f->runlist_info = NULL;
345 336
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
2162 return runlist_entry_0; 2153 return runlist_entry_0;
2163} 2154}
2164 2155
2165/* add all active high priority channels */ 2156/* recursively construct a runlist with interleaved bare channels and TSGs */
2166static inline u32 gk20a_fifo_runlist_add_high_prio_entries( 2157static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
2167 struct fifo_gk20a *f, 2158 struct fifo_runlist_info_gk20a *runlist,
2168 struct fifo_runlist_info_gk20a *runlist, 2159 u32 cur_level,
2169 u32 *runlist_entry) 2160 u32 *runlist_entry,
2161 bool interleave_enabled,
2162 bool prev_empty,
2163 u32 *entries_left)
2170{ 2164{
2171 struct channel_gk20a *ch = NULL; 2165 bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
2172 unsigned long high_prio_chid; 2166 struct channel_gk20a *ch;
2173 u32 count = 0; 2167 bool skip_next = false;
2168 u32 chid, tsgid, count = 0;
2169
2170 gk20a_dbg_fn("");
2174 2171
2175 for_each_set_bit(high_prio_chid, 2172 /* for each bare channel, CH, on this level, insert all higher-level
2176 runlist->high_prio_channels, f->num_channels) { 2173 channels and TSGs before inserting CH. */
2177 ch = &f->channel[high_prio_chid]; 2174 for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
2175 ch = &f->channel[chid];
2176
2177 if (ch->interleave_level != cur_level)
2178 continue;
2179
2180 if (gk20a_is_channel_marked_as_tsg(ch))
2181 continue;
2182
2183 if (!last_level && !skip_next) {
2184 runlist_entry = gk20a_runlist_construct_locked(f,
2185 runlist,
2186 cur_level + 1,
2187 runlist_entry,
2188 interleave_enabled,
2189 false,
2190 entries_left);
2191 /* if interleaving is disabled, higher-level channels
2192 and TSGs only need to be inserted once */
2193 if (!interleave_enabled)
2194 skip_next = true;
2195 }
2196
2197 if (!(*entries_left))
2198 return NULL;
2199
2200 gk20a_dbg_info("add channel %d to runlist", chid);
2201 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2202 runlist_entry[1] = 0;
2203 runlist_entry += 2;
2204 count++;
2205 (*entries_left)--;
2206 }
2178 2207
2179 if (!gk20a_is_channel_marked_as_tsg(ch) && 2208 /* for each TSG, T, on this level, insert all higher-level channels
2180 test_bit(high_prio_chid, runlist->active_channels) == 1) { 2209 and TSGs before inserting T. */
2181 gk20a_dbg_info("add high prio channel %lu to runlist", 2210 for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
2182 high_prio_chid); 2211 struct tsg_gk20a *tsg = &f->tsg[tsgid];
2183 runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); 2212
2213 if (tsg->interleave_level != cur_level)
2214 continue;
2215
2216 if (!last_level && !skip_next) {
2217 runlist_entry = gk20a_runlist_construct_locked(f,
2218 runlist,
2219 cur_level + 1,
2220 runlist_entry,
2221 interleave_enabled,
2222 false,
2223 entries_left);
2224 if (!interleave_enabled)
2225 skip_next = true;
2226 }
2227
2228 if (!(*entries_left))
2229 return NULL;
2230
2231 /* add TSG entry */
2232 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2233 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2234 runlist_entry[1] = 0;
2235 runlist_entry += 2;
2236 count++;
2237 (*entries_left)--;
2238
2239 mutex_lock(&tsg->ch_list_lock);
2240 /* add runnable channels bound to this TSG */
2241 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2242 if (!test_bit(ch->hw_chid,
2243 runlist->active_channels))
2244 continue;
2245
2246 if (!(*entries_left)) {
2247 mutex_unlock(&tsg->ch_list_lock);
2248 return NULL;
2249 }
2250
2251 gk20a_dbg_info("add channel %d to runlist",
2252 ch->hw_chid);
2253 runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
2184 runlist_entry[1] = 0; 2254 runlist_entry[1] = 0;
2185 runlist_entry += 2; 2255 runlist_entry += 2;
2186 count++; 2256 count++;
2257 (*entries_left)--;
2187 } 2258 }
2259 mutex_unlock(&tsg->ch_list_lock);
2188 } 2260 }
2189 2261
2190 return count; 2262 /* append entries from higher level if this level is empty */
2263 if (!count && !last_level)
2264 runlist_entry = gk20a_runlist_construct_locked(f,
2265 runlist,
2266 cur_level + 1,
2267 runlist_entry,
2268 interleave_enabled,
2269 true,
2270 entries_left);
2271
2272 /*
2273 * if previous and this level have entries, append
2274 * entries from higher level.
2275 *
2276 * ex. dropping from MEDIUM to LOW, need to insert HIGH
2277 */
2278 if (interleave_enabled && count && !prev_empty && !last_level)
2279 runlist_entry = gk20a_runlist_construct_locked(f,
2280 runlist,
2281 cur_level + 1,
2282 runlist_entry,
2283 interleave_enabled,
2284 false,
2285 entries_left);
2286 return runlist_entry;
2287}
2288
2289int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
2290 u32 id,
2291 bool is_tsg,
2292 u32 runlist_id,
2293 u32 new_level)
2294{
2295 gk20a_dbg_fn("");
2296
2297 if (is_tsg)
2298 g->fifo.tsg[id].interleave_level = new_level;
2299 else
2300 g->fifo.channel[id].interleave_level = new_level;
2301
2302 return 0;
2191} 2303}
2192 2304
2193static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 2305static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2198 struct fifo_gk20a *f = &g->fifo; 2310 struct fifo_gk20a *f = &g->fifo;
2199 struct fifo_runlist_info_gk20a *runlist = NULL; 2311 struct fifo_runlist_info_gk20a *runlist = NULL;
2200 u32 *runlist_entry_base = NULL; 2312 u32 *runlist_entry_base = NULL;
2201 u32 *runlist_entry = NULL;
2202 u64 runlist_iova; 2313 u64 runlist_iova;
2203 u32 old_buf, new_buf; 2314 u32 old_buf, new_buf;
2204 u32 chid, tsgid;
2205 struct channel_gk20a *ch = NULL; 2315 struct channel_gk20a *ch = NULL;
2206 struct tsg_gk20a *tsg = NULL; 2316 struct tsg_gk20a *tsg = NULL;
2207 u32 count = 0; 2317 u32 count = 0;
2208 u32 count_channels_in_tsg;
2209 runlist = &f->runlist_info[runlist_id]; 2318 runlist = &f->runlist_info[runlist_id];
2210 2319
2211 /* valid channel, add/remove it from active list. 2320 /* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
2254 2363
2255 if (hw_chid != ~0 || /* add/remove a valid channel */ 2364 if (hw_chid != ~0 || /* add/remove a valid channel */
2256 add /* resume to add all channels back */) { 2365 add /* resume to add all channels back */) {
2257 runlist_entry = runlist_entry_base; 2366 u32 max_entries = f->num_runlist_entries;
2258 2367 u32 *runlist_end;
2259 /* Runlist manipulation:
2260 Insert an entry of all high priority channels inbetween
2261 all lower priority channels. This ensure that the maximum
2262 delay a runnable high priority channel has to wait is one
2263 medium timeslice + any context switching overhead +
2264 wait on other high priority channels.
2265 add non-TSG channels first */
2266 for_each_set_bit(chid,
2267 runlist->active_channels, f->num_channels) {
2268 ch = &f->channel[chid];
2269
2270 if (!gk20a_is_channel_marked_as_tsg(ch) &&
2271 !ch->interleave) {
2272 u32 added;
2273
2274 gk20a_dbg_info("add normal prio channel %d to runlist",
2275 chid);
2276 runlist_entry[0] = ram_rl_entry_chid_f(chid);
2277 runlist_entry[1] = 0;
2278 runlist_entry += 2;
2279 count++;
2280
2281 added = gk20a_fifo_runlist_add_high_prio_entries(
2282 f,
2283 runlist,
2284 runlist_entry);
2285 count += added;
2286 runlist_entry += 2 * added;
2287 }
2288 }
2289 2368
2290 /* if there were no lower priority channels, then just 2369 runlist_end = gk20a_runlist_construct_locked(f,
2291 * add the high priority channels once. */ 2370 runlist,
2292 if (count == 0) { 2371 0,
2293 count = gk20a_fifo_runlist_add_high_prio_entries( 2372 runlist_entry_base,
2294 f, 2373 g->runlist_interleave,
2295 runlist, 2374 true,
2296 runlist_entry); 2375 &max_entries);
2297 runlist_entry += 2 * count; 2376 if (!runlist_end) {
2377 ret = -E2BIG;
2378 goto clean_up;
2298 } 2379 }
2299 2380
2300 /* now add TSG entries and channels bound to TSG */ 2381 count = (runlist_end - runlist_entry_base) / 2;
2301 mutex_lock(&f->tsg_inuse_mutex); 2382 WARN_ON(count > f->num_runlist_entries);
2302 for_each_set_bit(tsgid,
2303 runlist->active_tsgs, f->num_channels) {
2304 u32 added;
2305 tsg = &f->tsg[tsgid];
2306 /* add TSG entry */
2307 gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
2308 runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
2309 runlist_entry[1] = 0;
2310 runlist_entry += 2;
2311 count++;
2312
2313 /* add runnable channels bound to this TSG */
2314 count_channels_in_tsg = 0;
2315 mutex_lock(&tsg->ch_list_lock);
2316 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
2317 if (!test_bit(ch->hw_chid,
2318 runlist->active_channels))
2319 continue;
2320 gk20a_dbg_info("add channel %d to runlist",
2321 ch->hw_chid);
2322 runlist_entry[0] =
2323 ram_rl_entry_chid_f(ch->hw_chid);
2324 runlist_entry[1] = 0;
2325 runlist_entry += 2;
2326 count++;
2327 count_channels_in_tsg++;
2328 }
2329 mutex_unlock(&tsg->ch_list_lock);
2330
2331 WARN_ON(tsg->num_active_channels !=
2332 count_channels_in_tsg);
2333
2334 added = gk20a_fifo_runlist_add_high_prio_entries(
2335 f,
2336 runlist,
2337 runlist_entry);
2338 count += added;
2339 runlist_entry += 2 * added;
2340 }
2341 mutex_unlock(&f->tsg_inuse_mutex);
2342 } else /* suspend to remove all channels */ 2383 } else /* suspend to remove all channels */
2343 count = 0; 2384 count = 0;
2344 2385
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
2493 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); 2534 return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
2494} 2535}
2495 2536
2496int gk20a_fifo_set_channel_priority(
2497 struct gk20a *g,
2498 u32 runlist_id,
2499 u32 hw_chid,
2500 bool interleave)
2501{
2502 struct fifo_runlist_info_gk20a *runlist = NULL;
2503 struct fifo_gk20a *f = &g->fifo;
2504 struct channel_gk20a *ch = NULL;
2505
2506 if (hw_chid >= f->num_channels)
2507 return -EINVAL;
2508
2509 if (runlist_id >= f->max_runlists)
2510 return -EINVAL;
2511
2512 ch = &f->channel[hw_chid];
2513
2514 gk20a_dbg_fn("");
2515
2516 runlist = &f->runlist_info[runlist_id];
2517
2518 mutex_lock(&runlist->mutex);
2519
2520 if (ch->interleave)
2521 set_bit(hw_chid, runlist->high_prio_channels);
2522 else
2523 clear_bit(hw_chid, runlist->high_prio_channels);
2524
2525 gk20a_dbg_fn("done");
2526
2527 mutex_unlock(&runlist->mutex);
2528
2529 return 0;
2530}
2531
2532struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, 2537struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
2533 u32 hw_chid) 2538 u32 hw_chid)
2534{ 2539{
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
2545 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 2550 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
2546 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; 2551 gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
2547 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 2552 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
2553 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
2548} 2554}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index ee4e7328..0979bf2b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -31,7 +31,6 @@
31struct fifo_runlist_info_gk20a { 31struct fifo_runlist_info_gk20a {
32 unsigned long *active_channels; 32 unsigned long *active_channels;
33 unsigned long *active_tsgs; 33 unsigned long *active_tsgs;
34 unsigned long *high_prio_channels;
35 /* Each engine has its own SW and HW runlist buffer.*/ 34 /* Each engine has its own SW and HW runlist buffer.*/
36 struct mem_desc mem[MAX_RUNLIST_BUFFERS]; 35 struct mem_desc mem[MAX_RUNLIST_BUFFERS];
37 u32 cur_buffer; 36 u32 cur_buffer;
@@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
184int gk20a_fifo_wait_engine_idle(struct gk20a *g); 183int gk20a_fifo_wait_engine_idle(struct gk20a *g);
185u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); 184u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
186u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); 185u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
187int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id,
188 u32 hw_chid, bool interleave);
189u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, 186u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
190 int *__id, bool *__is_tsg); 187 int *__id, bool *__is_tsg);
191bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, 188bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
198 u32 hw_chid); 195 u32 hw_chid);
199 196
200void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); 197void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
198int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
199 u32 id,
200 bool is_tsg,
201 u32 runlist_id,
202 u32 new_level);
201#endif /*__GR_GK20A_H__*/ 203#endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index fa2c61e1..0fee58e8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -672,9 +672,6 @@ static int gk20a_init_support(struct platform_device *dev)
672 mutex_init(&g->ch_wdt_lock); 672 mutex_init(&g->ch_wdt_lock);
673 mutex_init(&g->poweroff_lock); 673 mutex_init(&g->poweroff_lock);
674 674
675 mutex_init(&g->interleave_lock);
676 g->num_interleaved_channels = 0;
677
678 g->remove_support = gk20a_remove_support; 675 g->remove_support = gk20a_remove_support;
679 return 0; 676 return 0;
680 677
@@ -1439,14 +1436,11 @@ static int gk20a_probe(struct platform_device *dev)
1439 if (tegra_platform_is_silicon()) 1436 if (tegra_platform_is_silicon())
1440 gk20a->timeouts_enabled = true; 1437 gk20a->timeouts_enabled = true;
1441 1438
1442 gk20a->interleave_high_priority = true; 1439 gk20a->runlist_interleave = true;
1443 1440
1444 gk20a->timeslice_low_priority_us = 1300; 1441 gk20a->timeslice_low_priority_us = 1300;
1445 gk20a->timeslice_medium_priority_us = 2600; 1442 gk20a->timeslice_medium_priority_us = 2600;
1446 if (gk20a->interleave_high_priority) 1443 gk20a->timeslice_high_priority_us = 5200;
1447 gk20a->timeslice_high_priority_us = 3000;
1448 else
1449 gk20a->timeslice_high_priority_us = 5200;
1450 1444
1451 /* Set up initial power settings. For non-slicon platforms, disable * 1445 /* Set up initial power settings. For non-slicon platforms, disable *
1452 * power features and for silicon platforms, read from platform data */ 1446 * power features and for silicon platforms, read from platform data */
@@ -1527,11 +1521,11 @@ static int gk20a_probe(struct platform_device *dev)
1527 platform->debugfs, 1521 platform->debugfs,
1528 &gk20a->timeslice_high_priority_us); 1522 &gk20a->timeslice_high_priority_us);
1529 1523
1530 gk20a->debugfs_interleave_high_priority = 1524 gk20a->debugfs_runlist_interleave =
1531 debugfs_create_bool("interleave_high_priority", 1525 debugfs_create_bool("runlist_interleave",
1532 S_IRUGO|S_IWUSR, 1526 S_IRUGO|S_IWUSR,
1533 platform->debugfs, 1527 platform->debugfs,
1534 &gk20a->interleave_high_priority); 1528 &gk20a->runlist_interleave);
1535 1529
1536 gr_gk20a_debugfs_init(gk20a); 1530 gr_gk20a_debugfs_init(gk20a);
1537 gk20a_pmu_debugfs_init(dev); 1531 gk20a_pmu_debugfs_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index afdbeef7..faccf04a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -54,8 +54,6 @@ struct acr_gm20b;
54 32 ns is the resolution of ptimer. */ 54 32 ns is the resolution of ptimer. */
55#define PTIMER_REF_FREQ_HZ 31250000 55#define PTIMER_REF_FREQ_HZ 31250000
56 56
57#define MAX_INTERLEAVED_CHANNELS 32
58
59struct cooling_device_gk20a { 57struct cooling_device_gk20a {
60 struct thermal_cooling_device *gk20a_cooling_dev; 58 struct thermal_cooling_device *gk20a_cooling_dev;
61 unsigned int gk20a_freq_state; 59 unsigned int gk20a_freq_state;
@@ -268,6 +266,9 @@ struct gpu_ops {
268 u32 (*get_num_fifos)(struct gk20a *g); 266 u32 (*get_num_fifos)(struct gk20a *g);
269 u32 (*get_pbdma_signature)(struct gk20a *g); 267 u32 (*get_pbdma_signature)(struct gk20a *g);
270 int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); 268 int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority);
269 int (*set_runlist_interleave)(struct gk20a *g, u32 id,
270 bool is_tsg, u32 runlist_id,
271 u32 new_level);
271 } fifo; 272 } fifo;
272 struct pmu_v { 273 struct pmu_v {
273 /*used for change of enum zbc update cmd id from ver 0 to ver1*/ 274 /*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -536,10 +537,7 @@ struct gk20a {
536 u32 timeslice_low_priority_us; 537 u32 timeslice_low_priority_us;
537 u32 timeslice_medium_priority_us; 538 u32 timeslice_medium_priority_us;
538 u32 timeslice_high_priority_us; 539 u32 timeslice_high_priority_us;
539 u32 interleave_high_priority; 540 u32 runlist_interleave;
540
541 struct mutex interleave_lock;
542 u32 num_interleaved_channels;
543 541
544 bool slcg_enabled; 542 bool slcg_enabled;
545 bool blcg_enabled; 543 bool blcg_enabled;
@@ -564,7 +562,7 @@ struct gk20a {
564 struct dentry *debugfs_timeslice_low_priority_us; 562 struct dentry *debugfs_timeslice_low_priority_us;
565 struct dentry *debugfs_timeslice_medium_priority_us; 563 struct dentry *debugfs_timeslice_medium_priority_us;
566 struct dentry *debugfs_timeslice_high_priority_us; 564 struct dentry *debugfs_timeslice_high_priority_us;
567 struct dentry *debugfs_interleave_high_priority; 565 struct dentry *debugfs_runlist_interleave;
568 566
569#endif 567#endif
570 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 568 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 4421744c..b41cca08 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
228 228
229 tsg->tsg_gr_ctx = NULL; 229 tsg->tsg_gr_ctx = NULL;
230 tsg->vm = NULL; 230 tsg->vm = NULL;
231 tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
231 232
232 filp->private_data = tsg; 233 filp->private_data = tsg;
233 234
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index bcc4d0c4..7e0a75d1 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -49,6 +49,8 @@ struct tsg_gk20a {
49 struct gr_ctx_desc *tsg_gr_ctx; 49 struct gr_ctx_desc *tsg_gr_ctx;
50 50
51 struct vm_gk20a *vm; 51 struct vm_gk20a *vm;
52
53 u32 interleave_level;
52}; 54};
53 55
54int gk20a_enable_tsg(struct tsg_gk20a *tsg); 56int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index d1deffb9..3fded03c 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B Fifo 2 * GM20B Fifo
3 * 3 *
4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -121,4 +121,5 @@ void gm20b_init_fifo(struct gpu_ops *gops)
121 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; 121 gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
122 gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos; 122 gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos;
123 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; 123 gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
124 gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
124} 125}
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index e776e97c..b4bb7f38 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Virtualized GPU Fifo 2 * Virtualized GPU Fifo
3 * 3 *
4 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -194,12 +194,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
194 if (!runlist->active_channels) 194 if (!runlist->active_channels)
195 goto clean_up_runlist_info; 195 goto clean_up_runlist_info;
196 196
197 runlist->high_prio_channels =
198 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
199 GFP_KERNEL);
200 if (!runlist->high_prio_channels)
201 goto clean_up_runlist_info;
202
203 runlist_size = sizeof(u16) * f->num_channels; 197 runlist_size = sizeof(u16) * f->num_channels;
204 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 198 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
205 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); 199 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -222,9 +216,6 @@ clean_up_runlist:
222 gk20a_gmmu_free(g, &runlist->mem[i]); 216 gk20a_gmmu_free(g, &runlist->mem[i]);
223 217
224clean_up_runlist_info: 218clean_up_runlist_info:
225 kfree(runlist->high_prio_channels);
226 runlist->high_prio_channels = NULL;
227
228 kfree(runlist->active_channels); 219 kfree(runlist->active_channels);
229 runlist->active_channels = NULL; 220 runlist->active_channels = NULL;
230 221