diff options
Diffstat (limited to 'drivers')
39 files changed, 2366 insertions, 294 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index d0e25aa2..94173976 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig | |||
@@ -54,6 +54,16 @@ config GK20A_CYCLE_STATS | |||
54 | help | 54 | help |
55 | Say Y here to enable the cycle stats debugging features. | 55 | Say Y here to enable the cycle stats debugging features. |
56 | 56 | ||
57 | config GK20A_CTXSW_TRACE | ||
58 | bool "Support GK20A Context Switch tracing" | ||
59 | depends on GK20A | ||
60 | default n | ||
61 | help | ||
62 | Enable support for the GK20A Context Switch Tracing. In this mode, | ||
63 | FECS collects timestamps for contexts loaded on GR engine. This | ||
64 | allows tracking context switches on GR engine, as well as | ||
65 | identifying processes that submitted work. | ||
66 | |||
57 | config TEGRA_GK20A | 67 | config TEGRA_GK20A |
58 | bool "Enable the GK20A GPU on Tegra" | 68 | bool "Enable the GK20A GPU on Tegra" |
59 | depends on TEGRA_GRHOST || TEGRA_HOST1X | 69 | depends on TEGRA_GRHOST || TEGRA_HOST1X |
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index f6b3a673..df660eb7 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -8,9 +8,9 @@ ccflags-y += -Werror | |||
8 | ccflags-y += -Wno-error=cpp | 8 | ccflags-y += -Wno-error=cpp |
9 | 9 | ||
10 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) | 10 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) |
11 | ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu | 11 | ccflags-y += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu |
12 | ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include | 12 | ccflags-y += -I$(srctree)/../kernel-t18x/include |
13 | ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include/uapi | 13 | ccflags-y += -I$(srctree)/../kernel-t18x/include/uapi |
14 | endif | 14 | endif |
15 | 15 | ||
16 | obj-$(CONFIG_GK20A) := nvgpu.o | 16 | obj-$(CONFIG_GK20A) := nvgpu.o |
@@ -46,6 +46,8 @@ nvgpu-y := \ | |||
46 | gk20a/cde_gk20a.o \ | 46 | gk20a/cde_gk20a.o \ |
47 | gk20a/platform_gk20a_generic.o \ | 47 | gk20a/platform_gk20a_generic.o \ |
48 | gk20a/tsg_gk20a.o \ | 48 | gk20a/tsg_gk20a.o \ |
49 | gk20a/ctxsw_trace_gk20a.o \ | ||
50 | gk20a/fecs_trace_gk20a.o \ | ||
49 | gk20a/mc_gk20a.o \ | 51 | gk20a/mc_gk20a.o \ |
50 | gm20b/hal_gm20b.o \ | 52 | gm20b/hal_gm20b.o \ |
51 | gm20b/ltc_gm20b.o \ | 53 | gm20b/ltc_gm20b.o \ |
@@ -64,7 +66,6 @@ nvgpu-y := \ | |||
64 | gm20b/debug_gm20b.o \ | 66 | gm20b/debug_gm20b.o \ |
65 | gm20b/cde_gm20b.o \ | 67 | gm20b/cde_gm20b.o \ |
66 | gm20b/therm_gm20b.o | 68 | gm20b/therm_gm20b.o |
67 | |||
68 | nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o | 69 | nvgpu-$(CONFIG_TEGRA_GK20A) += gk20a/platform_gk20a_tegra.o |
69 | nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o | 70 | nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o |
70 | 71 | ||
@@ -78,6 +79,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ | |||
78 | vgpu/debug_vgpu.o \ | 79 | vgpu/debug_vgpu.o \ |
79 | vgpu/vgpu.o \ | 80 | vgpu/vgpu.o \ |
80 | vgpu/dbg_vgpu.o \ | 81 | vgpu/dbg_vgpu.o \ |
82 | vgpu/fecs_trace_vgpu.o \ | ||
81 | vgpu/gk20a/vgpu_hal_gk20a.o \ | 83 | vgpu/gk20a/vgpu_hal_gk20a.o \ |
82 | vgpu/gk20a/vgpu_gr_gk20a.o \ | 84 | vgpu/gk20a/vgpu_gr_gk20a.o \ |
83 | vgpu/gm20b/vgpu_hal_gm20b.o \ | 85 | vgpu/gm20b/vgpu_hal_gm20b.o \ |
@@ -94,7 +96,5 @@ nvgpu-$(CONFIG_GK20A_CYCLE_STATS) += \ | |||
94 | gk20a/css_gr_gk20a.o | 96 | gk20a/css_gr_gk20a.o |
95 | 97 | ||
96 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) | 98 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) |
97 | ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/drivers/gpu/nvgpu | 99 | include ../kernel-t18x/drivers/gpu/nvgpu/Makefile |
98 | ccflags-$(CONFIG_GK20A) += -I$(srctree)/../kernel-t18x/include | ||
99 | obj-$(CONFIG_GK20A) += ../../../../kernel-t18x/drivers/gpu/nvgpu/ | ||
100 | endif | 100 | endif |
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index b6b38541..0571ca1f 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -279,13 +279,15 @@ static int gk20a_as_ioctl_get_va_regions( | |||
279 | 279 | ||
280 | for (i = 0; i < write_entries; ++i) { | 280 | for (i = 0; i < write_entries; ++i) { |
281 | struct nvgpu_as_va_region region; | 281 | struct nvgpu_as_va_region region; |
282 | struct gk20a_allocator *vma = vm->fixed.init ? | ||
283 | &vm->fixed : &vm->vma[i]; | ||
282 | 284 | ||
283 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 285 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
284 | 286 | ||
285 | region.page_size = vm->gmmu_page_sizes[i]; | 287 | region.page_size = vm->gmmu_page_sizes[i]; |
286 | region.offset = vm->vma[i].base; | 288 | region.offset = vma->base; |
287 | /* No __aeabi_uldivmod() on some platforms... */ | 289 | /* No __aeabi_uldivmod() on some platforms... */ |
288 | region.pages = (vm->vma[i].end - vm->vma[i].start) >> | 290 | region.pages = (vma->end - vma->start) >> |
289 | ilog2(region.page_size); | 291 | ilog2(region.page_size); |
290 | 292 | ||
291 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | 293 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 1f63bbd8..20976992 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | 29 | ||
30 | #include "debug_gk20a.h" | 30 | #include "debug_gk20a.h" |
31 | #include "ctxsw_trace_gk20a.h" | ||
31 | 32 | ||
32 | #include "gk20a.h" | 33 | #include "gk20a.h" |
33 | #include "dbg_gpu_gk20a.h" | 34 | #include "dbg_gpu_gk20a.h" |
@@ -44,6 +45,9 @@ | |||
44 | 45 | ||
45 | #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */ | 46 | #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */ |
46 | 47 | ||
48 | #define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000 | ||
49 | #define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000 | ||
50 | |||
47 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); | 51 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); |
48 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | 52 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); |
49 | 53 | ||
@@ -177,7 +181,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
177 | } | 181 | } |
178 | 182 | ||
179 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | 183 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, |
180 | u32 timeslice_period, bool interleave) | 184 | u32 timeslice_period) |
181 | { | 185 | { |
182 | void *inst_ptr; | 186 | void *inst_ptr; |
183 | int shift = 0, value = 0; | 187 | int shift = 0, value = 0; |
@@ -205,30 +209,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
205 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | | 209 | gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | |
206 | ccsr_channel_enable_set_true_f()); | 210 | ccsr_channel_enable_set_true_f()); |
207 | 211 | ||
208 | if (c->interleave != interleave) { | ||
209 | mutex_lock(&c->g->interleave_lock); | ||
210 | c->interleave = interleave; | ||
211 | if (interleave) | ||
212 | if (c->g->num_interleaved_channels >= | ||
213 | MAX_INTERLEAVED_CHANNELS) { | ||
214 | gk20a_err(dev_from_gk20a(c->g), | ||
215 | "Change of priority would exceed runlist length, only changing timeslice\n"); | ||
216 | c->interleave = false; | ||
217 | } else | ||
218 | c->g->num_interleaved_channels += 1; | ||
219 | else | ||
220 | c->g->num_interleaved_channels -= 1; | ||
221 | |||
222 | mutex_unlock(&c->g->interleave_lock); | ||
223 | gk20a_dbg_info("Set channel %d to interleave %d", | ||
224 | c->hw_chid, c->interleave); | ||
225 | |||
226 | gk20a_fifo_set_channel_priority( | ||
227 | c->g, 0, c->hw_chid, c->interleave); | ||
228 | c->g->ops.fifo.update_runlist( | ||
229 | c->g, 0, ~0, true, false); | ||
230 | } | ||
231 | |||
232 | return 0; | 212 | return 0; |
233 | } | 213 | } |
234 | 214 | ||
@@ -238,6 +218,12 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) | |||
238 | u64 timeout; | 218 | u64 timeout; |
239 | int val_len; | 219 | int val_len; |
240 | 220 | ||
221 | val = pbdma_acquire_retry_man_2_f() | | ||
222 | pbdma_acquire_retry_exp_2_f(); | ||
223 | |||
224 | if (!c->g->timeouts_enabled) | ||
225 | return val; | ||
226 | |||
241 | timeout = gk20a_get_channel_watchdog_timeout(c); | 227 | timeout = gk20a_get_channel_watchdog_timeout(c); |
242 | do_div(timeout, 2); /* set acquire timeout to half of channel wdt */ | 228 | do_div(timeout, 2); /* set acquire timeout to half of channel wdt */ |
243 | timeout *= 1000000UL; /* ms -> ns */ | 229 | timeout *= 1000000UL; /* ms -> ns */ |
@@ -256,11 +242,10 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) | |||
256 | man = timeout; | 242 | man = timeout; |
257 | } | 243 | } |
258 | 244 | ||
259 | val = pbdma_acquire_retry_man_2_f() | | 245 | val |= pbdma_acquire_timeout_exp_f(exp) | |
260 | pbdma_acquire_retry_exp_2_f() | | ||
261 | pbdma_acquire_timeout_exp_f(exp) | | ||
262 | pbdma_acquire_timeout_man_f(man) | | 246 | pbdma_acquire_timeout_man_f(man) | |
263 | pbdma_acquire_timeout_en_enable_f(); | 247 | pbdma_acquire_timeout_en_enable_f(); |
248 | |||
264 | return val; | 249 | return val; |
265 | } | 250 | } |
266 | 251 | ||
@@ -711,11 +696,39 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, | |||
711 | return 0; | 696 | return 0; |
712 | } | 697 | } |
713 | 698 | ||
714 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | 699 | static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, |
715 | struct nvgpu_set_error_notifier *args) { | 700 | u32 level) |
716 | void *va; | 701 | { |
702 | struct gk20a *g = ch->g; | ||
703 | int ret; | ||
704 | |||
705 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
706 | gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n"); | ||
707 | return -EINVAL; | ||
708 | } | ||
709 | |||
710 | switch (level) { | ||
711 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | ||
712 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | ||
713 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: | ||
714 | ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid, | ||
715 | false, 0, level); | ||
716 | break; | ||
717 | default: | ||
718 | ret = -EINVAL; | ||
719 | break; | ||
720 | } | ||
717 | 721 | ||
722 | return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true); | ||
723 | } | ||
724 | |||
725 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | ||
726 | struct nvgpu_set_error_notifier *args) | ||
727 | { | ||
728 | struct device *dev = dev_from_gk20a(ch->g); | ||
718 | struct dma_buf *dmabuf; | 729 | struct dma_buf *dmabuf; |
730 | void *va; | ||
731 | u64 end = args->offset + sizeof(struct nvgpu_notification); | ||
719 | 732 | ||
720 | if (!args->mem) { | 733 | if (!args->mem) { |
721 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); | 734 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); |
@@ -731,6 +744,13 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch, | |||
731 | pr_err("Invalid handle: %d\n", args->mem); | 744 | pr_err("Invalid handle: %d\n", args->mem); |
732 | return -EINVAL; | 745 | return -EINVAL; |
733 | } | 746 | } |
747 | |||
748 | if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { | ||
749 | dma_buf_put(dmabuf); | ||
750 | gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n"); | ||
751 | return -EINVAL; | ||
752 | } | ||
753 | |||
734 | /* map handle */ | 754 | /* map handle */ |
735 | va = dma_buf_vmap(dmabuf); | 755 | va = dma_buf_vmap(dmabuf); |
736 | if (!va) { | 756 | if (!va) { |
@@ -890,17 +910,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch) | |||
890 | } | 910 | } |
891 | mutex_unlock(&f->deferred_reset_mutex); | 911 | mutex_unlock(&f->deferred_reset_mutex); |
892 | 912 | ||
893 | if (ch->interleave) { | ||
894 | ch->interleave = false; | ||
895 | gk20a_fifo_set_channel_priority( | ||
896 | ch->g, 0, ch->hw_chid, ch->interleave); | ||
897 | |||
898 | mutex_lock(&f->g->interleave_lock); | ||
899 | WARN_ON(f->g->num_interleaved_channels == 0); | ||
900 | f->g->num_interleaved_channels -= 1; | ||
901 | mutex_unlock(&f->g->interleave_lock); | ||
902 | } | ||
903 | |||
904 | if (!ch->bound) | 913 | if (!ch->bound) |
905 | goto release; | 914 | goto release; |
906 | 915 | ||
@@ -912,6 +921,9 @@ static void gk20a_free_channel(struct channel_gk20a *ch) | |||
912 | 921 | ||
913 | gk20a_free_error_notifiers(ch); | 922 | gk20a_free_error_notifiers(ch); |
914 | 923 | ||
924 | if (g->ops.fecs_trace.unbind_channel) | ||
925 | g->ops.fecs_trace.unbind_channel(g, ch); | ||
926 | |||
915 | /* release channel ctx */ | 927 | /* release channel ctx */ |
916 | g->ops.gr.free_channel_ctx(ch); | 928 | g->ops.gr.free_channel_ctx(ch); |
917 | 929 | ||
@@ -1145,11 +1157,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | |||
1145 | ch->has_timedout = false; | 1157 | ch->has_timedout = false; |
1146 | ch->wdt_enabled = true; | 1158 | ch->wdt_enabled = true; |
1147 | ch->obj_class = 0; | 1159 | ch->obj_class = 0; |
1148 | ch->interleave = false; | ||
1149 | ch->clean_up.scheduled = false; | 1160 | ch->clean_up.scheduled = false; |
1150 | gk20a_fifo_set_channel_priority( | 1161 | ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; |
1151 | ch->g, 0, ch->hw_chid, ch->interleave); | ||
1152 | |||
1153 | 1162 | ||
1154 | /* The channel is *not* runnable at this point. It still needs to have | 1163 | /* The channel is *not* runnable at this point. It still needs to have |
1155 | * an address space bound and allocate a gpfifo and grctx. */ | 1164 | * an address space bound and allocate a gpfifo and grctx. */ |
@@ -1697,6 +1706,10 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) | |||
1697 | /* Need global lock since multiple channels can timeout at a time */ | 1706 | /* Need global lock since multiple channels can timeout at a time */ |
1698 | mutex_lock(&g->ch_wdt_lock); | 1707 | mutex_lock(&g->ch_wdt_lock); |
1699 | 1708 | ||
1709 | gk20a_debug_dump(g->dev); | ||
1710 | gk20a_gr_debug_dump(g->dev); | ||
1711 | |||
1712 | |||
1700 | /* Get timed out job and reset the timer */ | 1713 | /* Get timed out job and reset the timer */ |
1701 | mutex_lock(&ch->timeout.lock); | 1714 | mutex_lock(&ch->timeout.lock); |
1702 | job = ch->timeout.job; | 1715 | job = ch->timeout.job; |
@@ -2399,6 +2412,7 @@ static int gk20a_channel_wait(struct channel_gk20a *ch, | |||
2399 | u32 offset; | 2412 | u32 offset; |
2400 | unsigned long timeout; | 2413 | unsigned long timeout; |
2401 | int remain, ret = 0; | 2414 | int remain, ret = 0; |
2415 | u64 end; | ||
2402 | 2416 | ||
2403 | gk20a_dbg_fn(""); | 2417 | gk20a_dbg_fn(""); |
2404 | 2418 | ||
@@ -2414,6 +2428,7 @@ static int gk20a_channel_wait(struct channel_gk20a *ch, | |||
2414 | case NVGPU_WAIT_TYPE_NOTIFIER: | 2428 | case NVGPU_WAIT_TYPE_NOTIFIER: |
2415 | id = args->condition.notifier.dmabuf_fd; | 2429 | id = args->condition.notifier.dmabuf_fd; |
2416 | offset = args->condition.notifier.offset; | 2430 | offset = args->condition.notifier.offset; |
2431 | end = offset + sizeof(struct notification); | ||
2417 | 2432 | ||
2418 | dmabuf = dma_buf_get(id); | 2433 | dmabuf = dma_buf_get(id); |
2419 | if (IS_ERR(dmabuf)) { | 2434 | if (IS_ERR(dmabuf)) { |
@@ -2422,6 +2437,12 @@ static int gk20a_channel_wait(struct channel_gk20a *ch, | |||
2422 | return -EINVAL; | 2437 | return -EINVAL; |
2423 | } | 2438 | } |
2424 | 2439 | ||
2440 | if (end > dmabuf->size || end < sizeof(struct notification)) { | ||
2441 | dma_buf_put(dmabuf); | ||
2442 | gk20a_err(d, "invalid notifier offset\n"); | ||
2443 | return -EINVAL; | ||
2444 | } | ||
2445 | |||
2425 | notif = dma_buf_vmap(dmabuf); | 2446 | notif = dma_buf_vmap(dmabuf); |
2426 | if (!notif) { | 2447 | if (!notif) { |
2427 | gk20a_err(d, "failed to map notifier memory"); | 2448 | gk20a_err(d, "failed to map notifier memory"); |
@@ -2596,7 +2617,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait) | |||
2596 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) | 2617 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) |
2597 | { | 2618 | { |
2598 | u32 timeslice_timeout; | 2619 | u32 timeslice_timeout; |
2599 | bool interleave = false; | ||
2600 | 2620 | ||
2601 | if (gk20a_is_channel_marked_as_tsg(ch)) { | 2621 | if (gk20a_is_channel_marked_as_tsg(ch)) { |
2602 | gk20a_err(dev_from_gk20a(ch->g), | 2622 | gk20a_err(dev_from_gk20a(ch->g), |
@@ -2613,8 +2633,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) | |||
2613 | timeslice_timeout = ch->g->timeslice_medium_priority_us; | 2633 | timeslice_timeout = ch->g->timeslice_medium_priority_us; |
2614 | break; | 2634 | break; |
2615 | case NVGPU_PRIORITY_HIGH: | 2635 | case NVGPU_PRIORITY_HIGH: |
2616 | if (ch->g->interleave_high_priority) | ||
2617 | interleave = true; | ||
2618 | timeslice_timeout = ch->g->timeslice_high_priority_us; | 2636 | timeslice_timeout = ch->g->timeslice_high_priority_us; |
2619 | break; | 2637 | break; |
2620 | default: | 2638 | default: |
@@ -2623,7 +2641,22 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) | |||
2623 | } | 2641 | } |
2624 | 2642 | ||
2625 | return channel_gk20a_set_schedule_params(ch, | 2643 | return channel_gk20a_set_schedule_params(ch, |
2626 | timeslice_timeout, interleave); | 2644 | timeslice_timeout); |
2645 | } | ||
2646 | |||
2647 | int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) | ||
2648 | { | ||
2649 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
2650 | gk20a_err(dev_from_gk20a(ch->g), | ||
2651 | "invalid operation for TSG!\n"); | ||
2652 | return -EINVAL; | ||
2653 | } | ||
2654 | |||
2655 | if (timeslice < NVGPU_CHANNEL_MIN_TIMESLICE_US || | ||
2656 | timeslice > NVGPU_CHANNEL_MAX_TIMESLICE_US) | ||
2657 | return -EINVAL; | ||
2658 | |||
2659 | return channel_gk20a_set_schedule_params(ch, timeslice); | ||
2627 | } | 2660 | } |
2628 | 2661 | ||
2629 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | 2662 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, |
@@ -2778,6 +2811,7 @@ void gk20a_init_channel(struct gpu_ops *gops) | |||
2778 | gops->fifo.free_inst = channel_gk20a_free_inst; | 2811 | gops->fifo.free_inst = channel_gk20a_free_inst; |
2779 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; | 2812 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; |
2780 | gops->fifo.channel_set_priority = gk20a_channel_set_priority; | 2813 | gops->fifo.channel_set_priority = gk20a_channel_set_priority; |
2814 | gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; | ||
2781 | } | 2815 | } |
2782 | 2816 | ||
2783 | long gk20a_channel_ioctl(struct file *filp, | 2817 | long gk20a_channel_ioctl(struct file *filp, |
@@ -3028,6 +3062,30 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3028 | err = gk20a_channel_set_wdt_status(ch, | 3062 | err = gk20a_channel_set_wdt_status(ch, |
3029 | (struct nvgpu_channel_wdt_args *)buf); | 3063 | (struct nvgpu_channel_wdt_args *)buf); |
3030 | break; | 3064 | break; |
3065 | case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE: | ||
3066 | err = gk20a_busy(dev); | ||
3067 | if (err) { | ||
3068 | dev_err(&dev->dev, | ||
3069 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
3070 | __func__, cmd); | ||
3071 | break; | ||
3072 | } | ||
3073 | err = gk20a_channel_set_runlist_interleave(ch, | ||
3074 | ((struct nvgpu_runlist_interleave_args *)buf)->level); | ||
3075 | gk20a_idle(dev); | ||
3076 | break; | ||
3077 | case NVGPU_IOCTL_CHANNEL_SET_TIMESLICE: | ||
3078 | err = gk20a_busy(dev); | ||
3079 | if (err) { | ||
3080 | dev_err(&dev->dev, | ||
3081 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
3082 | __func__, cmd); | ||
3083 | break; | ||
3084 | } | ||
3085 | err = ch->g->ops.fifo.channel_set_timeslice(ch, | ||
3086 | ((struct nvgpu_timeslice_args *)buf)->timeslice_us); | ||
3087 | gk20a_idle(dev); | ||
3088 | break; | ||
3031 | default: | 3089 | default: |
3032 | dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); | 3090 | dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); |
3033 | err = -ENOTTY; | 3091 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 4aea9d19..e3fbba3e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -188,8 +188,7 @@ struct channel_gk20a { | |||
188 | spinlock_t update_fn_lock; /* make access to the two above atomic */ | 188 | spinlock_t update_fn_lock; /* make access to the two above atomic */ |
189 | struct work_struct update_fn_work; | 189 | struct work_struct update_fn_work; |
190 | 190 | ||
191 | /* true if channel is interleaved with lower priority channels */ | 191 | u32 interleave_level; |
192 | bool interleave; | ||
193 | }; | 192 | }; |
194 | 193 | ||
195 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | 194 | static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) |
@@ -276,5 +275,6 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
276 | int timeslice_period, | 275 | int timeslice_period, |
277 | int *__timeslice_timeout, int *__timeslice_scale); | 276 | int *__timeslice_timeout, int *__timeslice_scale); |
278 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); | 277 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); |
278 | int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice); | ||
279 | 279 | ||
280 | #endif /* CHANNEL_GK20A_H */ | 280 | #endif /* CHANNEL_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 8ff53d17..87f0bf74 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -115,8 +115,10 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
115 | } | 115 | } |
116 | 116 | ||
117 | num_wait_cmds = nvhost_sync_num_pts(sync_fence); | 117 | num_wait_cmds = nvhost_sync_num_pts(sync_fence); |
118 | if (num_wait_cmds == 0) | 118 | if (num_wait_cmds == 0) { |
119 | sync_fence_put(sync_fence); | ||
119 | return 0; | 120 | return 0; |
121 | } | ||
120 | 122 | ||
121 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); | 123 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); |
122 | if (err) { | 124 | if (err) { |
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 9ed5fef3..b2ae224f 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "hw_gr_gk20a.h" | 29 | #include "hw_gr_gk20a.h" |
30 | #include "hw_fb_gk20a.h" | 30 | #include "hw_fb_gk20a.h" |
31 | #include "hw_proj_gk20a.h" | 31 | #include "hw_proj_gk20a.h" |
32 | #include "hw_timer_gk20a.h" | ||
32 | 33 | ||
33 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 34 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
34 | { | 35 | { |
@@ -272,6 +273,8 @@ static int nvgpu_gpu_ioctl_inval_icache( | |||
272 | struct nvgpu_dbg_gpu_reg_op ops; | 273 | struct nvgpu_dbg_gpu_reg_op ops; |
273 | 274 | ||
274 | ch = gk20a_get_channel_from_file(args->channel_fd); | 275 | ch = gk20a_get_channel_from_file(args->channel_fd); |
276 | if (!ch) | ||
277 | return -EINVAL; | ||
275 | 278 | ||
276 | ops.op = REGOP(READ_32); | 279 | ops.op = REGOP(READ_32); |
277 | ops.type = REGOP(TYPE_GR_CTX); | 280 | ops.type = REGOP(TYPE_GR_CTX); |
@@ -528,6 +531,94 @@ static int gk20a_ctrl_get_buffer_info( | |||
528 | &args->out.id, &args->out.length); | 531 | &args->out.id, &args->out.length); |
529 | } | 532 | } |
530 | 533 | ||
534 | static inline u64 get_cpu_timestamp_tsc(void) | ||
535 | { | ||
536 | return ((u64) get_cycles()); | ||
537 | } | ||
538 | |||
539 | static inline u64 get_cpu_timestamp_jiffies(void) | ||
540 | { | ||
541 | return (get_jiffies_64() - INITIAL_JIFFIES); | ||
542 | } | ||
543 | |||
544 | static inline u64 get_cpu_timestamp_timeofday(void) | ||
545 | { | ||
546 | struct timeval tv; | ||
547 | |||
548 | do_gettimeofday(&tv); | ||
549 | return timeval_to_jiffies(&tv); | ||
550 | } | ||
551 | |||
552 | static inline int get_timestamps_zipper(struct gk20a *g, | ||
553 | u64 (*get_cpu_timestamp)(void), | ||
554 | struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) | ||
555 | { | ||
556 | int err = 0; | ||
557 | int i = 0; | ||
558 | u32 gpu_timestamp_hi_new = 0; | ||
559 | u32 gpu_timestamp_hi_old = 0; | ||
560 | |||
561 | if (gk20a_busy(g->dev)) { | ||
562 | gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); | ||
563 | err = -EINVAL; | ||
564 | goto end; | ||
565 | } | ||
566 | |||
567 | /* get zipper reads of gpu and cpu counter values */ | ||
568 | gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r()); | ||
569 | for (i = 0; i < args->count; i++) { | ||
570 | u32 gpu_timestamp_lo = 0; | ||
571 | u32 gpu_timestamp_hi = 0; | ||
572 | |||
573 | gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r()); | ||
574 | args->samples[i].cpu_timestamp = get_cpu_timestamp(); | ||
575 | rmb(); /* maintain zipper read order */ | ||
576 | gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r()); | ||
577 | |||
578 | /* pick the appropriate gpu counter hi bits */ | ||
579 | gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ? | ||
580 | gpu_timestamp_hi_old : gpu_timestamp_hi_new; | ||
581 | |||
582 | args->samples[i].gpu_timestamp = | ||
583 | ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo; | ||
584 | |||
585 | gpu_timestamp_hi_old = gpu_timestamp_hi_new; | ||
586 | } | ||
587 | |||
588 | end: | ||
589 | gk20a_idle(g->dev); | ||
590 | return err; | ||
591 | } | ||
592 | |||
593 | static int nvgpu_gpu_get_cpu_time_correlation_info( | ||
594 | struct gk20a *g, | ||
595 | struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) | ||
596 | { | ||
597 | int err = 0; | ||
598 | u64 (*get_cpu_timestamp)(void) = NULL; | ||
599 | |||
600 | if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT) | ||
601 | return -EINVAL; | ||
602 | |||
603 | switch (args->source_id) { | ||
604 | case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC: | ||
605 | get_cpu_timestamp = get_cpu_timestamp_tsc; | ||
606 | break; | ||
607 | case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_JIFFIES: | ||
608 | get_cpu_timestamp = get_cpu_timestamp_jiffies; | ||
609 | break; | ||
610 | case NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TIMEOFDAY: | ||
611 | get_cpu_timestamp = get_cpu_timestamp_timeofday; | ||
612 | break; | ||
613 | default: | ||
614 | gk20a_err(dev_from_gk20a(g), "invalid cpu clock source id\n"); | ||
615 | return -EINVAL; | ||
616 | } | ||
617 | |||
618 | err = get_timestamps_zipper(g, get_cpu_timestamp, args); | ||
619 | return err; | ||
620 | } | ||
621 | |||
531 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 622 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
532 | { | 623 | { |
533 | struct platform_device *dev = filp->private_data; | 624 | struct platform_device *dev = filp->private_data; |
@@ -760,6 +851,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
760 | (struct nvgpu_gpu_get_buffer_info_args *)buf); | 851 | (struct nvgpu_gpu_get_buffer_info_args *)buf); |
761 | break; | 852 | break; |
762 | 853 | ||
854 | case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: | ||
855 | err = nvgpu_gpu_get_cpu_time_correlation_info(g, | ||
856 | (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); | ||
857 | break; | ||
858 | |||
763 | default: | 859 | default: |
764 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | 860 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); |
765 | err = -ENOTTY; | 861 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c new file mode 100644 index 00000000..9e7c04ad --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <asm/barrier.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/circ_buf.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/jiffies.h> | ||
20 | #include <linux/wait.h> | ||
21 | #include <linux/ktime.h> | ||
22 | #include <linux/nvgpu.h> | ||
23 | #include <linux/hashtable.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/log2.h> | ||
26 | #include <uapi/linux/nvgpu.h> | ||
27 | #include "ctxsw_trace_gk20a.h" | ||
28 | #include "gk20a.h" | ||
29 | #include "gr_gk20a.h" | ||
30 | #include "hw_ctxsw_prog_gk20a.h" | ||
31 | #include "hw_gr_gk20a.h" | ||
32 | |||
33 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
34 | |||
35 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
36 | struct gk20a_ctxsw_dev { | ||
37 | struct gk20a *g; | ||
38 | |||
39 | struct nvgpu_ctxsw_ring_header *hdr; | ||
40 | struct nvgpu_ctxsw_trace_entry *ents; | ||
41 | struct nvgpu_ctxsw_trace_filter filter; | ||
42 | bool write_enabled; | ||
43 | wait_queue_head_t readout_wq; | ||
44 | size_t size; | ||
45 | |||
46 | atomic_t vma_ref; | ||
47 | |||
48 | struct mutex lock; | ||
49 | }; | ||
50 | |||
51 | |||
52 | struct gk20a_ctxsw_trace { | ||
53 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
54 | }; | ||
55 | |||
56 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
57 | { | ||
58 | return (hdr->write_idx == hdr->read_idx); | ||
59 | } | ||
60 | |||
61 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
62 | { | ||
63 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
64 | } | ||
65 | |||
66 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
67 | { | ||
68 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
69 | } | ||
70 | |||
71 | static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr) | ||
72 | { | ||
73 | return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents; | ||
74 | } | ||
75 | |||
76 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
77 | loff_t *off) | ||
78 | { | ||
79 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
80 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
81 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
82 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
83 | size_t copied = 0; | ||
84 | int err; | ||
85 | |||
86 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
87 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
88 | |||
89 | mutex_lock(&dev->lock); | ||
90 | while (ring_is_empty(hdr)) { | ||
91 | mutex_unlock(&dev->lock); | ||
92 | if (filp->f_flags & O_NONBLOCK) | ||
93 | return -EAGAIN; | ||
94 | err = wait_event_interruptible(dev->readout_wq, | ||
95 | !ring_is_empty(hdr)); | ||
96 | if (err) | ||
97 | return err; | ||
98 | mutex_lock(&dev->lock); | ||
99 | } | ||
100 | |||
101 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { | ||
102 | if (ring_is_empty(hdr)) | ||
103 | break; | ||
104 | |||
105 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], | ||
106 | sizeof(*entry))) { | ||
107 | mutex_unlock(&dev->lock); | ||
108 | return -EFAULT; | ||
109 | } | ||
110 | |||
111 | hdr->read_idx++; | ||
112 | if (hdr->read_idx >= hdr->num_ents) | ||
113 | hdr->read_idx = 0; | ||
114 | |||
115 | entry++; | ||
116 | copied += sizeof(*entry); | ||
117 | size -= sizeof(*entry); | ||
118 | } | ||
119 | |||
120 | gk20a_dbg(gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
121 | hdr->read_idx); | ||
122 | |||
123 | *off = hdr->read_idx; | ||
124 | mutex_unlock(&dev->lock); | ||
125 | |||
126 | return copied; | ||
127 | } | ||
128 | |||
129 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
130 | { | ||
131 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
132 | dev->write_enabled = true; | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
137 | { | ||
138 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
139 | dev->write_enabled = false; | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int gk20a_ctxsw_dev_ring_alloc(struct gk20a_ctxsw_dev *dev, | ||
144 | size_t size) | ||
145 | { | ||
146 | struct nvgpu_ctxsw_ring_header *hdr; | ||
147 | |||
148 | if (atomic_read(&dev->vma_ref)) | ||
149 | return -EBUSY; | ||
150 | |||
151 | if ((dev->write_enabled) || (atomic_read(&dev->vma_ref))) | ||
152 | return -EBUSY; | ||
153 | |||
154 | size = roundup(size, PAGE_SIZE); | ||
155 | hdr = vmalloc_user(size); | ||
156 | if (!hdr) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | if (dev->hdr) | ||
160 | vfree(dev->hdr); | ||
161 | |||
162 | dev->hdr = hdr; | ||
163 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
164 | dev->size = size; | ||
165 | |||
166 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
167 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
168 | hdr->num_ents = (size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
169 | / sizeof(struct nvgpu_ctxsw_trace_entry); | ||
170 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | ||
171 | hdr->drop_count = 0; | ||
172 | hdr->read_idx = 0; | ||
173 | hdr->write_idx = 0; | ||
174 | hdr->write_seqno = 0; | ||
175 | |||
176 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
177 | dev->size, dev->hdr, dev->ents, hdr->num_ents); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
182 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
183 | { | ||
184 | size_t size = args->size; | ||
185 | |||
186 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
187 | |||
188 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
189 | return -EINVAL; | ||
190 | |||
191 | return gk20a_ctxsw_dev_ring_alloc(dev, size); | ||
192 | } | ||
193 | |||
194 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
195 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
196 | { | ||
197 | dev->filter = args->filter; | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
202 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
203 | { | ||
204 | args->filter = dev->filter; | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
209 | { | ||
210 | struct gk20a *g = dev->g; | ||
211 | int err; | ||
212 | |||
213 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
214 | |||
215 | err = gk20a_busy(g->dev); | ||
216 | if (err) | ||
217 | return err; | ||
218 | |||
219 | if (g->ops.fecs_trace.flush(g)) | ||
220 | err = g->ops.fecs_trace.flush(g); | ||
221 | |||
222 | if (likely(!err)) | ||
223 | err = g->ops.fecs_trace.poll(g); | ||
224 | |||
225 | gk20a_idle(g->dev); | ||
226 | return err; | ||
227 | } | ||
228 | |||
229 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
230 | { | ||
231 | struct gk20a *g; | ||
232 | struct gk20a_ctxsw_trace *trace; | ||
233 | struct gk20a_ctxsw_dev *dev; | ||
234 | int err; | ||
235 | size_t size; | ||
236 | u32 n; | ||
237 | |||
238 | /* only one VM for now */ | ||
239 | const int vmid = 0; | ||
240 | |||
241 | g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
242 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
243 | |||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | |||
247 | err = gk20a_busy(g->dev); | ||
248 | if (err) | ||
249 | return err; | ||
250 | |||
251 | trace = g->ctxsw_trace; | ||
252 | if (!trace) { | ||
253 | err = -ENODEV; | ||
254 | goto idle; | ||
255 | } | ||
256 | |||
257 | /* Allow only one user for this device */ | ||
258 | dev = &trace->devs[vmid]; | ||
259 | mutex_lock(&dev->lock); | ||
260 | if (dev->hdr) { | ||
261 | err = -EBUSY; | ||
262 | goto done; | ||
263 | } | ||
264 | |||
265 | /* By default, allocate ring buffer big enough to accommodate | ||
266 | * FECS records with default event filter */ | ||
267 | |||
268 | /* enable all traces by default */ | ||
269 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
270 | |||
271 | /* compute max number of entries generated with this filter */ | ||
272 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
273 | |||
274 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
275 | n * sizeof(struct nvgpu_ctxsw_trace_entry); | ||
276 | gk20a_dbg(gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
277 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | ||
278 | |||
279 | err = gk20a_ctxsw_dev_ring_alloc(dev, size); | ||
280 | if (!err) { | ||
281 | filp->private_data = dev; | ||
282 | gk20a_dbg(gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
283 | filp, dev, size); | ||
284 | } | ||
285 | |||
286 | err = g->ops.fecs_trace.enable(g); | ||
287 | |||
288 | done: | ||
289 | mutex_unlock(&dev->lock); | ||
290 | |||
291 | idle: | ||
292 | gk20a_idle(g->dev); | ||
293 | |||
294 | return err; | ||
295 | } | ||
296 | |||
297 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
298 | { | ||
299 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
300 | struct gk20a *g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev); | ||
301 | |||
302 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
303 | |||
304 | mutex_lock(&dev->lock); | ||
305 | dev->write_enabled = false; | ||
306 | if (dev->hdr) { | ||
307 | vfree(dev->hdr); | ||
308 | dev->hdr = NULL; | ||
309 | } | ||
310 | |||
311 | g->ops.fecs_trace.disable(g); | ||
312 | |||
313 | mutex_unlock(&dev->lock); | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
319 | unsigned long arg) | ||
320 | { | ||
321 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
322 | struct gk20a *g = dev->g; | ||
323 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
324 | int err = 0; | ||
325 | |||
326 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
327 | |||
328 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
329 | || (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST)) | ||
330 | return -EINVAL; | ||
331 | |||
332 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE); | ||
333 | |||
334 | memset(buf, 0, sizeof(buf)); | ||
335 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
336 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
337 | return -EFAULT; | ||
338 | } | ||
339 | |||
340 | mutex_lock(&dev->lock); | ||
341 | |||
342 | switch (cmd) { | ||
343 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
344 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
345 | break; | ||
346 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
347 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
348 | break; | ||
349 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
350 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
351 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
352 | break; | ||
353 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
354 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
355 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
356 | break; | ||
357 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
358 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
359 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
360 | break; | ||
361 | case NVGPU_CTXSW_IOCTL_POLL: | ||
362 | mutex_unlock(&dev->lock); | ||
363 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
364 | mutex_lock(&dev->lock); | ||
365 | break; | ||
366 | default: | ||
367 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
368 | cmd); | ||
369 | err = -ENOTTY; | ||
370 | } | ||
371 | |||
372 | mutex_unlock(&dev->lock); | ||
373 | |||
374 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
375 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
376 | |||
377 | return err; | ||
378 | } | ||
379 | |||
380 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
381 | { | ||
382 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
383 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
384 | unsigned int mask = 0; | ||
385 | |||
386 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
387 | |||
388 | mutex_lock(&dev->lock); | ||
389 | poll_wait(filp, &dev->readout_wq, wait); | ||
390 | if (!ring_is_empty(hdr)) | ||
391 | mask |= POLLIN | POLLRDNORM; | ||
392 | mutex_unlock(&dev->lock); | ||
393 | |||
394 | return mask; | ||
395 | } | ||
396 | |||
397 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
398 | { | ||
399 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
400 | |||
401 | atomic_inc(&dev->vma_ref); | ||
402 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
403 | atomic_read(&dev->vma_ref)); | ||
404 | } | ||
405 | |||
406 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
407 | { | ||
408 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
409 | |||
410 | atomic_dec(&dev->vma_ref); | ||
411 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
412 | atomic_read(&dev->vma_ref)); | ||
413 | } | ||
414 | |||
415 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
416 | .open = gk20a_ctxsw_dev_vma_open, | ||
417 | .close = gk20a_ctxsw_dev_vma_close, | ||
418 | }; | ||
419 | |||
420 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
421 | { | ||
422 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
423 | int ret; | ||
424 | |||
425 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
426 | vma->vm_start, vma->vm_end); | ||
427 | |||
428 | ret = remap_vmalloc_range(vma, dev->hdr, 0); | ||
429 | if (likely(!ret)) { | ||
430 | vma->vm_private_data = dev; | ||
431 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
432 | vma->vm_ops->open(vma); | ||
433 | } | ||
434 | |||
435 | return ret; | ||
436 | } | ||
437 | |||
438 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
439 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
440 | { | ||
441 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
442 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
443 | int i; | ||
444 | |||
445 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
446 | dev->g = g; | ||
447 | dev->hdr = NULL; | ||
448 | dev->write_enabled = false; | ||
449 | init_waitqueue_head(&dev->readout_wq); | ||
450 | mutex_init(&dev->lock); | ||
451 | atomic_set(&dev->vma_ref, 0); | ||
452 | dev++; | ||
453 | } | ||
454 | return 0; | ||
455 | } | ||
456 | #endif | ||
457 | |||
458 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
459 | { | ||
460 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
461 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
462 | int err; | ||
463 | |||
464 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
465 | |||
466 | if (likely(trace)) | ||
467 | return 0; | ||
468 | |||
469 | trace = kzalloc(sizeof(*trace), GFP_KERNEL); | ||
470 | if (unlikely(!trace)) | ||
471 | return -ENOMEM; | ||
472 | g->ctxsw_trace = trace; | ||
473 | |||
474 | err = gk20a_ctxsw_init_devs(g); | ||
475 | if (err) | ||
476 | goto fail; | ||
477 | |||
478 | err = g->ops.fecs_trace.init(g); | ||
479 | if (unlikely(err)) | ||
480 | goto fail; | ||
481 | |||
482 | return 0; | ||
483 | |||
484 | fail: | ||
485 | kfree(trace); | ||
486 | g->ctxsw_trace = NULL; | ||
487 | return err; | ||
488 | #else | ||
489 | return 0; | ||
490 | #endif | ||
491 | } | ||
492 | |||
493 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
494 | { | ||
495 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
496 | kfree(g->ctxsw_trace); | ||
497 | g->ctxsw_trace = NULL; | ||
498 | |||
499 | g->ops.fecs_trace.deinit(g); | ||
500 | #endif | ||
501 | } | ||
502 | |||
503 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
504 | struct nvgpu_ctxsw_trace_entry *entry) | ||
505 | { | ||
506 | struct nvgpu_ctxsw_ring_header *hdr; | ||
507 | struct gk20a_ctxsw_dev *dev; | ||
508 | int ret = 0; | ||
509 | const char *reason; | ||
510 | |||
511 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
512 | return -ENODEV; | ||
513 | |||
514 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
515 | hdr = dev->hdr; | ||
516 | |||
517 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
518 | "dev=%p hdr=%p", dev, hdr); | ||
519 | |||
520 | mutex_lock(&dev->lock); | ||
521 | |||
522 | if (unlikely(!hdr)) { | ||
523 | /* device has been released */ | ||
524 | ret = -ENODEV; | ||
525 | goto done; | ||
526 | } | ||
527 | |||
528 | entry->seqno = hdr->write_seqno++; | ||
529 | |||
530 | if (!dev->write_enabled) { | ||
531 | ret = -EBUSY; | ||
532 | reason = "write disabled"; | ||
533 | goto drop; | ||
534 | } | ||
535 | |||
536 | if (unlikely(ring_is_full(hdr))) { | ||
537 | ret = -ENOSPC; | ||
538 | reason = "user fifo full"; | ||
539 | goto drop; | ||
540 | } | ||
541 | |||
542 | if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
543 | reason = "filtered out"; | ||
544 | goto filter; | ||
545 | } | ||
546 | |||
547 | gk20a_dbg(gpu_dbg_ctxsw, | ||
548 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
549 | entry->seqno, entry->context_id, entry->pid, | ||
550 | entry->tag, entry->timestamp); | ||
551 | |||
552 | dev->ents[hdr->write_idx] = *entry; | ||
553 | |||
554 | /* ensure record is written before updating write index */ | ||
555 | smp_wmb(); | ||
556 | |||
557 | hdr->write_idx++; | ||
558 | if (unlikely(hdr->write_idx >= hdr->num_ents)) | ||
559 | hdr->write_idx = 0; | ||
560 | gk20a_dbg(gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
561 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
562 | |||
563 | mutex_unlock(&dev->lock); | ||
564 | return ret; | ||
565 | |||
566 | drop: | ||
567 | hdr->drop_count++; | ||
568 | |||
569 | filter: | ||
570 | gk20a_dbg(gpu_dbg_ctxsw, | ||
571 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
572 | "tag=%x time=%llx (%s)", | ||
573 | entry->seqno, entry->context_id, entry->pid, | ||
574 | entry->tag, entry->timestamp, reason); | ||
575 | |||
576 | done: | ||
577 | mutex_unlock(&dev->lock); | ||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
582 | { | ||
583 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[vmid]; | ||
584 | |||
585 | wake_up_interruptible(&dev->readout_wq); | ||
586 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h new file mode 100644 index 00000000..c57d95d1 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __CTXSW_TRACE_GK20A_H | ||
15 | #define __CTXSW_TRACE_GK20A_H | ||
16 | |||
17 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | ||
18 | |||
19 | struct gk20a; | ||
20 | struct nvgpu_ctxsw_trace_entry; | ||
21 | struct channel_gk20a; | ||
22 | struct channel_ctx_gk20a; | ||
23 | struct gk20a_ctxsw_dev; | ||
24 | struct gk20a_fecs_trace; | ||
25 | |||
26 | |||
27 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); | ||
28 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); | ||
29 | long gk20a_ctxsw_dev_ioctl(struct file *filp, | ||
30 | unsigned int cmd, unsigned long arg); | ||
31 | ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); | ||
32 | unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); | ||
33 | int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); | ||
34 | |||
35 | int gk20a_ctxsw_trace_init(struct gk20a *); | ||
36 | int gk20a_ctxsw_trace_setup(struct gk20a *, void *ctx_ptr); | ||
37 | void gk20a_ctxsw_trace_cleanup(struct gk20a *); | ||
38 | int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); | ||
39 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); | ||
40 | |||
41 | #endif /* __CTXSW_TRACE_GK20A_H */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c new file mode 100644 index 00000000..bac36403 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -0,0 +1,763 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <asm/barrier.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/circ_buf.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/jiffies.h> | ||
20 | #include <linux/wait.h> | ||
21 | #include <linux/ktime.h> | ||
22 | #include <linux/nvgpu.h> | ||
23 | #include <linux/hashtable.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/log2.h> | ||
26 | #include <uapi/linux/nvgpu.h> | ||
27 | #include "ctxsw_trace_gk20a.h" | ||
28 | #include "fecs_trace_gk20a.h" | ||
29 | #include "gk20a.h" | ||
30 | #include "gr_gk20a.h" | ||
31 | #include "hw_ctxsw_prog_gk20a.h" | ||
32 | #include "hw_gr_gk20a.h" | ||
33 | |||
34 | /* | ||
35 | * If HW circular buffer is getting too many "buffer full" conditions, | ||
36 | * increasing this constant should help (it drives Linux' internal buffer size). | ||
37 | */ | ||
38 | #define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6) | ||
39 | #define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */ | ||
40 | #define GK20A_FECS_TRACE_FRAME_PERIOD_NS (1000000000ULL/60ULL) | ||
41 | #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 | ||
42 | |||
43 | struct gk20a_fecs_trace_record { | ||
44 | u32 magic_lo; | ||
45 | u32 magic_hi; | ||
46 | u32 context_id; | ||
47 | u32 context_ptr; | ||
48 | u32 new_context_id; | ||
49 | u32 new_context_ptr; | ||
50 | u64 ts[]; | ||
51 | }; | ||
52 | |||
53 | struct gk20a_fecs_trace_hash_ent { | ||
54 | u32 context_ptr; | ||
55 | pid_t pid; | ||
56 | struct hlist_node node; | ||
57 | }; | ||
58 | |||
59 | struct gk20a_fecs_trace { | ||
60 | |||
61 | struct mem_desc trace_buf; | ||
62 | DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS); | ||
63 | struct mutex hash_lock; | ||
64 | struct mutex poll_lock; | ||
65 | u64 sof; | ||
66 | u32 sof_mask; /* did we already send a SOF for this VM */ | ||
67 | |||
68 | struct task_struct *poll_task; | ||
69 | }; | ||
70 | |||
71 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
72 | static inline u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) | ||
73 | { | ||
74 | return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); | ||
75 | } | ||
76 | |||
77 | static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) | ||
78 | { | ||
79 | return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); | ||
80 | } | ||
81 | |||
82 | |||
83 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct channel_gk20a *ch) | ||
84 | { | ||
85 | return (u32) (sg_phys(ch->inst_block.sgt->sgl) >> 12LL); | ||
86 | } | ||
87 | |||
88 | static inline int gk20a_fecs_trace_num_ts(void) | ||
89 | { | ||
90 | return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() | ||
91 | - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); | ||
92 | } | ||
93 | |||
94 | struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( | ||
95 | struct gk20a_fecs_trace *trace, int idx) | ||
96 | { | ||
97 | return (struct gk20a_fecs_trace_record *) | ||
98 | ((u8 *) trace->trace_buf.cpu_va | ||
99 | + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); | ||
100 | } | ||
101 | |||
102 | static bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) | ||
103 | { | ||
104 | /* | ||
105 | * testing magic_hi should suffice. magic_lo is sometimes used | ||
106 | * as a sequence number in experimental ucode. | ||
107 | */ | ||
108 | return (r->magic_hi | ||
109 | == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); | ||
110 | } | ||
111 | |||
112 | static int gk20a_fecs_trace_get_read_index(struct gk20a *g) | ||
113 | { | ||
114 | return gr_gk20a_elpg_protected_call(g, | ||
115 | gk20a_readl(g, gr_fecs_mailbox1_r())); | ||
116 | } | ||
117 | |||
118 | static int gk20a_fecs_trace_get_write_index(struct gk20a *g) | ||
119 | { | ||
120 | return gr_gk20a_elpg_protected_call(g, | ||
121 | gk20a_readl(g, gr_fecs_mailbox0_r())); | ||
122 | } | ||
123 | |||
124 | static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index) | ||
125 | { | ||
126 | gk20a_dbg(gpu_dbg_ctxsw, "set read=%d", index); | ||
127 | return gr_gk20a_elpg_protected_call(g, | ||
128 | (gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0)); | ||
129 | } | ||
130 | |||
131 | void gk20a_fecs_trace_hash_dump(struct gk20a *g) | ||
132 | { | ||
133 | u32 bkt; | ||
134 | struct gk20a_fecs_trace_hash_ent *ent; | ||
135 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
136 | |||
137 | gk20a_dbg(gpu_dbg_ctxsw, "dumping hash table"); | ||
138 | |||
139 | mutex_lock(&trace->hash_lock); | ||
140 | hash_for_each(trace->pid_hash_table, bkt, ent, node) | ||
141 | { | ||
142 | gk20a_dbg(gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d", | ||
143 | ent, bkt, ent->context_ptr, ent->pid); | ||
144 | |||
145 | } | ||
146 | mutex_unlock(&trace->hash_lock); | ||
147 | } | ||
148 | |||
149 | static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid) | ||
150 | { | ||
151 | struct gk20a_fecs_trace_hash_ent *he; | ||
152 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
153 | |||
154 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
155 | "adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid); | ||
156 | |||
157 | he = kzalloc(sizeof(*he), GFP_KERNEL); | ||
158 | if (unlikely(!he)) { | ||
159 | gk20a_warn(dev_from_gk20a(g), | ||
160 | "can't alloc new hash entry for context_ptr=%x pid=%d", | ||
161 | context_ptr, pid); | ||
162 | return -ENOMEM; | ||
163 | } | ||
164 | |||
165 | he->context_ptr = context_ptr; | ||
166 | he->pid = pid; | ||
167 | mutex_lock(&trace->hash_lock); | ||
168 | hash_add(trace->pid_hash_table, &he->node, context_ptr); | ||
169 | mutex_unlock(&trace->hash_lock); | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr) | ||
174 | { | ||
175 | struct hlist_node *tmp; | ||
176 | struct gk20a_fecs_trace_hash_ent *ent; | ||
177 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
178 | |||
179 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
180 | "freeing hash entry context_ptr=%x", context_ptr); | ||
181 | |||
182 | mutex_lock(&trace->hash_lock); | ||
183 | hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node, | ||
184 | context_ptr) { | ||
185 | if (ent->context_ptr == context_ptr) { | ||
186 | hash_del(&ent->node); | ||
187 | gk20a_dbg(gpu_dbg_ctxsw, | ||
188 | "freed hash entry=%p context_ptr=%x", ent, | ||
189 | ent->context_ptr); | ||
190 | kfree(ent); | ||
191 | break; | ||
192 | } | ||
193 | } | ||
194 | mutex_unlock(&trace->hash_lock); | ||
195 | } | ||
196 | |||
197 | static void gk20a_fecs_trace_free_hash_table(struct gk20a *g) | ||
198 | { | ||
199 | u32 bkt; | ||
200 | struct hlist_node *tmp; | ||
201 | struct gk20a_fecs_trace_hash_ent *ent; | ||
202 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
203 | |||
204 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace); | ||
205 | |||
206 | mutex_lock(&trace->hash_lock); | ||
207 | hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) { | ||
208 | hash_del(&ent->node); | ||
209 | kfree(ent); | ||
210 | } | ||
211 | mutex_unlock(&trace->hash_lock); | ||
212 | |||
213 | } | ||
214 | |||
215 | static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr) | ||
216 | { | ||
217 | struct gk20a_fecs_trace_hash_ent *ent; | ||
218 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
219 | pid_t pid = 0; | ||
220 | |||
221 | mutex_lock(&trace->hash_lock); | ||
222 | hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) { | ||
223 | if (ent->context_ptr == context_ptr) { | ||
224 | gk20a_dbg(gpu_dbg_ctxsw, | ||
225 | "found context_ptr=%x -> pid=%d", | ||
226 | ent->context_ptr, ent->pid); | ||
227 | pid = ent->pid; | ||
228 | break; | ||
229 | } | ||
230 | } | ||
231 | mutex_unlock(&trace->hash_lock); | ||
232 | |||
233 | return pid; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Converts HW entry format to userspace-facing format and pushes it to the | ||
238 | * queue. | ||
239 | */ | ||
240 | static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) | ||
241 | { | ||
242 | int i; | ||
243 | struct nvgpu_ctxsw_trace_entry entry = { }; | ||
244 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
245 | pid_t cur_pid; | ||
246 | pid_t new_pid; | ||
247 | |||
248 | /* for now, only one VM */ | ||
249 | const int vmid = 0; | ||
250 | |||
251 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record( | ||
252 | trace, index); | ||
253 | |||
254 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
255 | "consuming record trace=%p read=%d record=%p", trace, index, r); | ||
256 | |||
257 | if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) { | ||
258 | gk20a_warn(dev_from_gk20a(g), | ||
259 | "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", | ||
260 | trace, index, r, r->magic_lo, r->magic_hi); | ||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr); | ||
265 | new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr); | ||
266 | |||
267 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_ctxsw, | ||
268 | "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)", | ||
269 | r->context_ptr, cur_pid, r->new_context_ptr, new_pid); | ||
270 | |||
271 | entry.context_id = r->context_id; | ||
272 | entry.vmid = vmid; | ||
273 | |||
274 | /* insert SOF event if needed */ | ||
275 | if (!(trace->sof_mask & BIT(vmid))) { | ||
276 | entry.tag = NVGPU_CTXSW_TAG_SOF; | ||
277 | entry.timestamp = trace->sof; | ||
278 | entry.context_id = 0; | ||
279 | entry.pid = 0; | ||
280 | |||
281 | gk20a_dbg(gpu_dbg_ctxsw, "SOF time=%llx", entry.timestamp); | ||
282 | gk20a_ctxsw_trace_write(g, &entry); | ||
283 | trace->sof_mask |= BIT(vmid); | ||
284 | } | ||
285 | |||
286 | /* break out FECS record into trace events */ | ||
287 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
288 | |||
289 | entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
290 | entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
291 | entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
292 | |||
293 | gk20a_dbg(gpu_dbg_ctxsw, | ||
294 | "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", | ||
295 | entry.tag, entry.timestamp, r->context_id, | ||
296 | r->new_context_id); | ||
297 | |||
298 | switch (entry.tag) { | ||
299 | case NVGPU_CTXSW_TAG_RESTORE_START: | ||
300 | case NVGPU_CTXSW_TAG_CONTEXT_START: | ||
301 | entry.context_id = r->new_context_id; | ||
302 | entry.pid = new_pid; | ||
303 | break; | ||
304 | |||
305 | case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: | ||
306 | case NVGPU_CTXSW_TAG_FE_ACK: | ||
307 | case NVGPU_CTXSW_TAG_FE_ACK_WFI: | ||
308 | case NVGPU_CTXSW_TAG_FE_ACK_GFXP: | ||
309 | case NVGPU_CTXSW_TAG_FE_ACK_CTAP: | ||
310 | case NVGPU_CTXSW_TAG_FE_ACK_CILP: | ||
311 | case NVGPU_CTXSW_TAG_SAVE_END: | ||
312 | entry.context_id = r->context_id; | ||
313 | entry.pid = cur_pid; | ||
314 | break; | ||
315 | |||
316 | default: | ||
317 | /* tags are not guaranteed to start at the beginning */ | ||
318 | WARN_ON(entry.tag && (entry.tag != NVGPU_CTXSW_TAG_INVALID_TIMESTAMP)); | ||
319 | continue; | ||
320 | } | ||
321 | |||
322 | gk20a_dbg(gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", | ||
323 | entry.tag, entry.context_id, entry.pid); | ||
324 | |||
325 | if (!entry.context_id) | ||
326 | continue; | ||
327 | |||
328 | gk20a_ctxsw_trace_write(g, &entry); | ||
329 | } | ||
330 | |||
331 | gk20a_ctxsw_trace_wake_up(g, vmid); | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | static int gk20a_fecs_trace_poll(struct gk20a *g) | ||
336 | { | ||
337 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
338 | |||
339 | int read = 0; | ||
340 | int write = 0; | ||
341 | int cnt; | ||
342 | int err; | ||
343 | |||
344 | err = gk20a_busy(g->dev); | ||
345 | if (unlikely(err)) | ||
346 | return err; | ||
347 | |||
348 | mutex_lock(&trace->poll_lock); | ||
349 | write = gk20a_fecs_trace_get_write_index(g); | ||
350 | if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) { | ||
351 | gk20a_err(dev_from_gk20a(g), | ||
352 | "failed to acquire write index, write=%d", write); | ||
353 | err = write; | ||
354 | goto done; | ||
355 | } | ||
356 | |||
357 | read = gk20a_fecs_trace_get_read_index(g); | ||
358 | |||
359 | cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); | ||
360 | if (!cnt) | ||
361 | goto done; | ||
362 | |||
363 | gk20a_dbg(gpu_dbg_ctxsw, | ||
364 | "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", | ||
365 | read, gk20a_fecs_trace_get_read_index(g), write, cnt); | ||
366 | |||
367 | /* we did not send any SOF yet */ | ||
368 | trace->sof_mask = 0; | ||
369 | |||
370 | /* consume all records */ | ||
371 | while (read != write) { | ||
372 | gk20a_fecs_trace_ring_read(g, read); | ||
373 | |||
374 | /* Get to next record. */ | ||
375 | read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); | ||
376 | gk20a_fecs_trace_set_read_index(g, read); | ||
377 | } | ||
378 | |||
379 | done: | ||
380 | /* | ||
381 | * OK, we read out all the entries... a new "frame" starts here. | ||
382 | * We remember the Start Of Frame time and insert it on the next | ||
383 | * iteration. | ||
384 | */ | ||
385 | trace->sof = gk20a_read_ptimer(g); | ||
386 | |||
387 | mutex_unlock(&trace->poll_lock); | ||
388 | gk20a_idle(g->dev); | ||
389 | return err; | ||
390 | } | ||
391 | |||
392 | static int gk20a_fecs_trace_periodic_polling(void *arg) | ||
393 | { | ||
394 | struct gk20a *g = (struct gk20a *)arg; | ||
395 | struct timespec ts = ns_to_timespec(GK20A_FECS_TRACE_FRAME_PERIOD_NS); | ||
396 | |||
397 | pr_info("%s: running\n", __func__); | ||
398 | |||
399 | while (!kthread_should_stop()) { | ||
400 | |||
401 | hrtimer_nanosleep(&ts, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | ||
402 | |||
403 | gk20a_fecs_trace_poll(g); | ||
404 | } | ||
405 | |||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | static int gk20a_fecs_trace_alloc_ring(struct gk20a *g) | ||
410 | { | ||
411 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
412 | |||
413 | return gk20a_gmmu_alloc(g, GK20A_FECS_TRACE_NUM_RECORDS | ||
414 | * ctxsw_prog_record_timestamp_record_size_in_bytes_v(), | ||
415 | &trace->trace_buf); | ||
416 | } | ||
417 | |||
418 | static void gk20a_fecs_trace_free_ring(struct gk20a *g) | ||
419 | { | ||
420 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
421 | |||
422 | gk20a_gmmu_free(g, &trace->trace_buf); | ||
423 | } | ||
424 | |||
425 | #ifdef CONFIG_DEBUG_FS | ||
426 | /* | ||
427 | * The sequence iterator functions. We simply use the count of the | ||
428 | * next line as our internal position. | ||
429 | */ | ||
430 | static void *gk20a_fecs_trace_debugfs_ring_seq_start( | ||
431 | struct seq_file *s, loff_t *pos) | ||
432 | { | ||
433 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
434 | return NULL; | ||
435 | |||
436 | return pos; | ||
437 | } | ||
438 | |||
439 | static void *gk20a_fecs_trace_debugfs_ring_seq_next( | ||
440 | struct seq_file *s, void *v, loff_t *pos) | ||
441 | { | ||
442 | ++(*pos); | ||
443 | if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS) | ||
444 | return NULL; | ||
445 | return pos; | ||
446 | } | ||
447 | |||
448 | static void gk20a_fecs_trace_debugfs_ring_seq_stop( | ||
449 | struct seq_file *s, void *v) | ||
450 | { | ||
451 | } | ||
452 | |||
453 | static int gk20a_fecs_trace_debugfs_ring_seq_show( | ||
454 | struct seq_file *s, void *v) | ||
455 | { | ||
456 | loff_t *pos = (loff_t *) v; | ||
457 | struct gk20a *g = *(struct gk20a **)s->private; | ||
458 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
459 | struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos); | ||
460 | int i; | ||
461 | const u32 invalid_tag = | ||
462 | ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); | ||
463 | u32 tag; | ||
464 | u64 timestamp; | ||
465 | |||
466 | seq_printf(s, "record #%lld (%p)\n", *pos, r); | ||
467 | seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); | ||
468 | seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); | ||
469 | if (gk20a_fecs_trace_is_valid_record(r)) { | ||
470 | seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); | ||
471 | seq_printf(s, "\tcontext_id=%08x\n", r->context_id); | ||
472 | seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); | ||
473 | seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); | ||
474 | for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { | ||
475 | tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); | ||
476 | if (tag == invalid_tag) | ||
477 | continue; | ||
478 | timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); | ||
479 | timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; | ||
480 | seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); | ||
481 | } | ||
482 | } | ||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Tie them all together into a set of seq_operations. | ||
488 | */ | ||
489 | const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = { | ||
490 | .start = gk20a_fecs_trace_debugfs_ring_seq_start, | ||
491 | .next = gk20a_fecs_trace_debugfs_ring_seq_next, | ||
492 | .stop = gk20a_fecs_trace_debugfs_ring_seq_stop, | ||
493 | .show = gk20a_fecs_trace_debugfs_ring_seq_show | ||
494 | }; | ||
495 | |||
496 | /* | ||
497 | * Time to set up the file operations for our /proc file. In this case, | ||
498 | * all we need is an open function which sets up the sequence ops. | ||
499 | */ | ||
500 | |||
501 | static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode, | ||
502 | struct file *file) | ||
503 | { | ||
504 | struct gk20a **p; | ||
505 | |||
506 | if (!capable(CAP_SYS_ADMIN)) | ||
507 | return -EPERM; | ||
508 | |||
509 | p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops, | ||
510 | sizeof(struct gk20a *)); | ||
511 | if (!p) | ||
512 | return -ENOMEM; | ||
513 | |||
514 | *p = (struct gk20a *)inode->i_private; | ||
515 | return 0; | ||
516 | }; | ||
517 | |||
518 | /* | ||
519 | * The file operations structure contains our open function along with | ||
520 | * set of the canned seq_ ops. | ||
521 | */ | ||
522 | const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = { | ||
523 | .owner = THIS_MODULE, | ||
524 | .open = gk20a_ctxsw_debugfs_ring_open, | ||
525 | .read = seq_read, | ||
526 | .llseek = seq_lseek, | ||
527 | .release = seq_release_private | ||
528 | }; | ||
529 | |||
530 | static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val) | ||
531 | { | ||
532 | *val = gk20a_fecs_trace_get_read_index((struct gk20a *)arg); | ||
533 | return 0; | ||
534 | } | ||
535 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops, | ||
536 | gk20a_fecs_trace_debugfs_read, NULL, "%llu\n"); | ||
537 | |||
538 | static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val) | ||
539 | { | ||
540 | *val = gk20a_fecs_trace_get_write_index((struct gk20a *)arg); | ||
541 | return 0; | ||
542 | } | ||
543 | DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops, | ||
544 | gk20a_fecs_trace_debugfs_write, NULL, "%llu\n"); | ||
545 | |||
546 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
547 | { | ||
548 | struct gk20a_platform *plat = platform_get_drvdata(g->dev); | ||
549 | |||
550 | debugfs_create_file("ctxsw_trace_read", 0600, plat->debugfs, g, | ||
551 | &gk20a_fecs_trace_debugfs_read_fops); | ||
552 | debugfs_create_file("ctxsw_trace_write", 0600, plat->debugfs, g, | ||
553 | &gk20a_fecs_trace_debugfs_write_fops); | ||
554 | debugfs_create_file("ctxsw_trace_ring", 0600, plat->debugfs, g, | ||
555 | &gk20a_fecs_trace_debugfs_ring_fops); | ||
556 | } | ||
557 | |||
558 | static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) | ||
559 | { | ||
560 | struct gk20a_platform *plat = platform_get_drvdata(g->dev); | ||
561 | |||
562 | debugfs_remove_recursive(plat->debugfs); | ||
563 | } | ||
564 | |||
565 | #else | ||
566 | |||
567 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | ||
568 | { | ||
569 | } | ||
570 | |||
571 | static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) | ||
572 | { | ||
573 | } | ||
574 | |||
575 | #endif /* CONFIG_DEBUG_FS */ | ||
576 | |||
577 | static int gk20a_fecs_trace_init(struct gk20a *g) | ||
578 | { | ||
579 | struct gk20a_fecs_trace *trace; | ||
580 | int err; | ||
581 | |||
582 | trace = kzalloc(sizeof(struct gk20a_fecs_trace), GFP_KERNEL); | ||
583 | if (!trace) { | ||
584 | gk20a_warn(dev_from_gk20a(g), "failed to allocate fecs_trace"); | ||
585 | return -ENOMEM; | ||
586 | } | ||
587 | g->fecs_trace = trace; | ||
588 | |||
589 | BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)); | ||
590 | err = gk20a_fecs_trace_alloc_ring(g); | ||
591 | if (err) { | ||
592 | gk20a_warn(dev_from_gk20a(g), "failed to allocate FECS ring"); | ||
593 | goto clean; | ||
594 | } | ||
595 | |||
596 | mutex_init(&trace->poll_lock); | ||
597 | mutex_init(&trace->hash_lock); | ||
598 | hash_init(trace->pid_hash_table); | ||
599 | |||
600 | gk20a_fecs_trace_debugfs_init(g); | ||
601 | return 0; | ||
602 | |||
603 | clean: | ||
604 | kfree(trace); | ||
605 | g->fecs_trace = NULL; | ||
606 | return err; | ||
607 | } | ||
608 | |||
609 | static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | ||
610 | struct channel_gk20a *ch) | ||
611 | { | ||
612 | /* | ||
613 | * map our circ_buf to the context space and store the GPU VA | ||
614 | * in the context header. | ||
615 | */ | ||
616 | |||
617 | u32 lo; | ||
618 | u32 hi; | ||
619 | phys_addr_t pa; | ||
620 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
621 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
622 | void *ctx_ptr; | ||
623 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); | ||
624 | |||
625 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
626 | "hw_chid=%d context_ptr=%x inst_block=%llx", | ||
627 | ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block)); | ||
628 | |||
629 | if (!trace) | ||
630 | return -ENOMEM; | ||
631 | |||
632 | pa = gk20a_mem_phys(&trace->trace_buf); | ||
633 | if (!pa) | ||
634 | return -ENOMEM; | ||
635 | |||
636 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
637 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0, | ||
638 | pgprot_writecombine(PAGE_KERNEL)); | ||
639 | if (!ctx_ptr) | ||
640 | return -ENOMEM; | ||
641 | |||
642 | lo = u64_lo32(pa); | ||
643 | hi = u64_hi32(pa); | ||
644 | |||
645 | gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, | ||
646 | lo, GK20A_FECS_TRACE_NUM_RECORDS); | ||
647 | |||
648 | gk20a_mem_wr32(ctx_ptr | ||
649 | + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), | ||
650 | 0, lo); | ||
651 | gk20a_mem_wr32(ctx_ptr | ||
652 | + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | ||
653 | 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); | ||
654 | gk20a_mem_wr32(ctx_ptr | ||
655 | + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | ||
656 | 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | ||
657 | GK20A_FECS_TRACE_NUM_RECORDS)); | ||
658 | |||
659 | vunmap(ctx_ptr); | ||
660 | gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); | ||
661 | |||
662 | return 0; | ||
663 | } | ||
664 | |||
665 | static int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) | ||
666 | { | ||
667 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); | ||
668 | |||
669 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
670 | "ch=%p context_ptr=%x", ch, context_ptr); | ||
671 | |||
672 | if (g->ops.fecs_trace.flush) | ||
673 | g->ops.fecs_trace.flush(g); | ||
674 | gk20a_fecs_trace_poll(g); | ||
675 | gk20a_fecs_trace_hash_del(g, context_ptr); | ||
676 | return 0; | ||
677 | } | ||
678 | |||
679 | static int gk20a_fecs_trace_reset(struct gk20a *g) | ||
680 | { | ||
681 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, ""); | ||
682 | |||
683 | if (g->ops.fecs_trace.flush) | ||
684 | g->ops.fecs_trace.flush(g); | ||
685 | gk20a_fecs_trace_poll(g); | ||
686 | return gk20a_fecs_trace_set_read_index(g, 0); | ||
687 | } | ||
688 | |||
689 | static int gk20a_fecs_trace_deinit(struct gk20a *g) | ||
690 | { | ||
691 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
692 | |||
693 | gk20a_fecs_trace_debugfs_cleanup(g); | ||
694 | kthread_stop(trace->poll_task); | ||
695 | gk20a_fecs_trace_free_ring(g); | ||
696 | gk20a_fecs_trace_free_hash_table(g); | ||
697 | |||
698 | kfree(g->fecs_trace); | ||
699 | g->fecs_trace = NULL; | ||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | static int gk20a_gr_max_entries(struct gk20a *g, | ||
704 | struct nvgpu_ctxsw_trace_filter *filter) | ||
705 | { | ||
706 | int n; | ||
707 | int tag; | ||
708 | |||
709 | /* Compute number of entries per record, with given filter */ | ||
710 | for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++) | ||
711 | n += (NVGPU_CTXSW_FILTER_ISSET(tag, filter) != 0); | ||
712 | |||
713 | /* Return max number of entries generated for the whole ring */ | ||
714 | return n * GK20A_FECS_TRACE_NUM_RECORDS; | ||
715 | } | ||
716 | |||
717 | static int gk20a_fecs_trace_enable(struct gk20a *g) | ||
718 | { | ||
719 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
720 | struct task_struct *task; | ||
721 | |||
722 | if (!trace->poll_task) { | ||
723 | task = kthread_run(gk20a_fecs_trace_periodic_polling, g, __func__); | ||
724 | if (unlikely(IS_ERR(task))) { | ||
725 | gk20a_warn(dev_from_gk20a(g), "failed to create FECS polling task"); | ||
726 | return PTR_ERR(task); | ||
727 | } | ||
728 | trace->poll_task = task; | ||
729 | } | ||
730 | |||
731 | return 0; | ||
732 | } | ||
733 | |||
734 | static int gk20a_fecs_trace_disable(struct gk20a *g) | ||
735 | { | ||
736 | struct gk20a_fecs_trace *trace = g->fecs_trace; | ||
737 | |||
738 | if (trace->poll_task) { | ||
739 | kthread_stop(trace->poll_task); | ||
740 | trace->poll_task = NULL; | ||
741 | } | ||
742 | |||
743 | return -EPERM; | ||
744 | } | ||
745 | |||
746 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) | ||
747 | { | ||
748 | ops->fecs_trace.init = gk20a_fecs_trace_init; | ||
749 | ops->fecs_trace.deinit = gk20a_fecs_trace_deinit; | ||
750 | ops->fecs_trace.enable = gk20a_fecs_trace_enable; | ||
751 | ops->fecs_trace.disable = gk20a_fecs_trace_disable; | ||
752 | ops->fecs_trace.reset = gk20a_fecs_trace_reset; | ||
753 | ops->fecs_trace.flush = NULL; | ||
754 | ops->fecs_trace.poll = gk20a_fecs_trace_poll; | ||
755 | ops->fecs_trace.bind_channel = gk20a_fecs_trace_bind_channel; | ||
756 | ops->fecs_trace.unbind_channel = gk20a_fecs_trace_unbind_channel; | ||
757 | ops->fecs_trace.max_entries = gk20a_gr_max_entries; | ||
758 | } | ||
759 | #else | ||
760 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops) | ||
761 | { | ||
762 | } | ||
763 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h new file mode 100644 index 00000000..4979d6c6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __FECS_TRACE_GK20A_H | ||
15 | #define __FECS_TRACE_GK20A_H | ||
16 | |||
17 | struct gpu_ops; | ||
18 | void gk20a_init_fecs_trace_ops(struct gpu_ops *ops); | ||
19 | |||
20 | #endif /* __FECS_TRACE_GK20A_H */ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 769960af..029a713f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include "gk20a.h" | 26 | #include "gk20a.h" |
27 | #include "debug_gk20a.h" | 27 | #include "debug_gk20a.h" |
28 | #include "ctxsw_trace_gk20a.h" | ||
28 | #include "semaphore_gk20a.h" | 29 | #include "semaphore_gk20a.h" |
29 | #include "hw_fifo_gk20a.h" | 30 | #include "hw_fifo_gk20a.h" |
30 | #include "hw_pbdma_gk20a.h" | 31 | #include "hw_pbdma_gk20a.h" |
@@ -303,12 +304,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
303 | if (!runlist->active_tsgs) | 304 | if (!runlist->active_tsgs) |
304 | goto clean_up_runlist_info; | 305 | goto clean_up_runlist_info; |
305 | 306 | ||
306 | runlist->high_prio_channels = | ||
307 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
308 | GFP_KERNEL); | ||
309 | if (!runlist->high_prio_channels) | ||
310 | goto clean_up_runlist_info; | ||
311 | |||
312 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; | 307 | runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; |
313 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 308 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
314 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 309 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
@@ -337,9 +332,6 @@ clean_up_runlist_info: | |||
337 | kfree(runlist->active_tsgs); | 332 | kfree(runlist->active_tsgs); |
338 | runlist->active_tsgs = NULL; | 333 | runlist->active_tsgs = NULL; |
339 | 334 | ||
340 | kfree(runlist->high_prio_channels); | ||
341 | runlist->high_prio_channels = NULL; | ||
342 | |||
343 | kfree(f->runlist_info); | 335 | kfree(f->runlist_info); |
344 | f->runlist_info = NULL; | 336 | f->runlist_info = NULL; |
345 | 337 | ||
@@ -471,8 +463,7 @@ static void gk20a_init_fifo_pbdma_intr_descs(struct fifo_gk20a *f) | |||
471 | /* Can be used for sw-methods, or represents | 463 | /* Can be used for sw-methods, or represents |
472 | * a recoverable timeout. */ | 464 | * a recoverable timeout. */ |
473 | f->intr.pbdma.restartable_0 = | 465 | f->intr.pbdma.restartable_0 = |
474 | pbdma_intr_0_device_pending_f() | | 466 | pbdma_intr_0_device_pending_f(); |
475 | pbdma_intr_0_acquire_pending_f(); | ||
476 | } | 467 | } |
477 | 468 | ||
478 | static int gk20a_init_fifo_setup_sw(struct gk20a *g) | 469 | static int gk20a_init_fifo_setup_sw(struct gk20a *g) |
@@ -786,13 +777,17 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) | |||
786 | if (engine_id == top_device_info_type_enum_graphics_v()) { | 777 | if (engine_id == top_device_info_type_enum_graphics_v()) { |
787 | if (support_gk20a_pmu(g->dev) && g->elpg_enabled) | 778 | if (support_gk20a_pmu(g->dev) && g->elpg_enabled) |
788 | gk20a_pmu_disable_elpg(g); | 779 | gk20a_pmu_disable_elpg(g); |
789 | /*HALT_PIPELINE method, halt GR engine*/ | 780 | /*HALT_PIPELINE method, halt GR engine*/ |
790 | if (gr_gk20a_halt_pipe(g)) | 781 | if (gr_gk20a_halt_pipe(g)) |
791 | gk20a_err(dev_from_gk20a(g), | 782 | gk20a_err(dev_from_gk20a(g), "failed to HALT gr pipe"); |
792 | "failed to HALT gr pipe"); | 783 | /* resetting engine will alter read/write index. |
793 | /* resetting engine using mc_enable_r() is not | 784 | * need to flush circular buffer before re-enabling FECS. |
794 | enough, we do full init sequence */ | 785 | */ |
795 | gk20a_gr_reset(g); | 786 | if (g->ops.fecs_trace.reset) |
787 | g->ops.fecs_trace.reset(g); | ||
788 | /* resetting engine using mc_enable_r() is not | ||
789 | enough, we do full init sequence */ | ||
790 | gk20a_gr_reset(g); | ||
796 | if (support_gk20a_pmu(g->dev) && g->elpg_enabled) | 791 | if (support_gk20a_pmu(g->dev) && g->elpg_enabled) |
797 | gk20a_pmu_enable_elpg(g); | 792 | gk20a_pmu_enable_elpg(g); |
798 | } | 793 | } |
@@ -1662,6 +1657,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, | |||
1662 | u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id)); | 1657 | u32 val = gk20a_readl(g, pbdma_acquire_r(pbdma_id)); |
1663 | val &= ~pbdma_acquire_timeout_en_enable_f(); | 1658 | val &= ~pbdma_acquire_timeout_en_enable_f(); |
1664 | gk20a_writel(g, pbdma_acquire_r(pbdma_id), val); | 1659 | gk20a_writel(g, pbdma_acquire_r(pbdma_id), val); |
1660 | if (g->timeouts_enabled) { | ||
1661 | reset = true; | ||
1662 | gk20a_err(dev_from_gk20a(g), | ||
1663 | "semaphore acquire timeout!"); | ||
1664 | } | ||
1665 | handled |= pbdma_intr_0_acquire_pending_f(); | ||
1665 | } | 1666 | } |
1666 | 1667 | ||
1667 | if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) { | 1668 | if (pbdma_intr_0 & pbdma_intr_0_pbentry_pending_f()) { |
@@ -2162,32 +2163,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) | |||
2162 | return runlist_entry_0; | 2163 | return runlist_entry_0; |
2163 | } | 2164 | } |
2164 | 2165 | ||
2165 | /* add all active high priority channels */ | 2166 | /* recursively construct a runlist with interleaved bare channels and TSGs */ |
2166 | static inline u32 gk20a_fifo_runlist_add_high_prio_entries( | 2167 | static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f, |
2167 | struct fifo_gk20a *f, | 2168 | struct fifo_runlist_info_gk20a *runlist, |
2168 | struct fifo_runlist_info_gk20a *runlist, | 2169 | u32 cur_level, |
2169 | u32 *runlist_entry) | 2170 | u32 *runlist_entry, |
2171 | bool interleave_enabled, | ||
2172 | bool prev_empty, | ||
2173 | u32 *entries_left) | ||
2170 | { | 2174 | { |
2171 | struct channel_gk20a *ch = NULL; | 2175 | bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH; |
2172 | unsigned long high_prio_chid; | 2176 | struct channel_gk20a *ch; |
2173 | u32 count = 0; | 2177 | bool skip_next = false; |
2178 | u32 chid, tsgid, count = 0; | ||
2179 | |||
2180 | gk20a_dbg_fn(""); | ||
2181 | |||
2182 | /* for each bare channel, CH, on this level, insert all higher-level | ||
2183 | channels and TSGs before inserting CH. */ | ||
2184 | for_each_set_bit(chid, runlist->active_channels, f->num_channels) { | ||
2185 | ch = &f->channel[chid]; | ||
2186 | |||
2187 | if (ch->interleave_level != cur_level) | ||
2188 | continue; | ||
2174 | 2189 | ||
2175 | for_each_set_bit(high_prio_chid, | 2190 | if (gk20a_is_channel_marked_as_tsg(ch)) |
2176 | runlist->high_prio_channels, f->num_channels) { | 2191 | continue; |
2177 | ch = &f->channel[high_prio_chid]; | 2192 | |
2193 | if (!last_level && !skip_next) { | ||
2194 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2195 | runlist, | ||
2196 | cur_level + 1, | ||
2197 | runlist_entry, | ||
2198 | interleave_enabled, | ||
2199 | false, | ||
2200 | entries_left); | ||
2201 | /* if interleaving is disabled, higher-level channels | ||
2202 | and TSGs only need to be inserted once */ | ||
2203 | if (!interleave_enabled) | ||
2204 | skip_next = true; | ||
2205 | } | ||
2178 | 2206 | ||
2179 | if (!gk20a_is_channel_marked_as_tsg(ch) && | 2207 | if (!(*entries_left)) |
2180 | test_bit(high_prio_chid, runlist->active_channels) == 1) { | 2208 | return NULL; |
2181 | gk20a_dbg_info("add high prio channel %lu to runlist", | 2209 | |
2182 | high_prio_chid); | 2210 | gk20a_dbg_info("add channel %d to runlist", chid); |
2183 | runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); | 2211 | runlist_entry[0] = ram_rl_entry_chid_f(chid); |
2212 | runlist_entry[1] = 0; | ||
2213 | runlist_entry += 2; | ||
2214 | count++; | ||
2215 | (*entries_left)--; | ||
2216 | } | ||
2217 | |||
2218 | /* for each TSG, T, on this level, insert all higher-level channels | ||
2219 | and TSGs before inserting T. */ | ||
2220 | for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { | ||
2221 | struct tsg_gk20a *tsg = &f->tsg[tsgid]; | ||
2222 | |||
2223 | if (tsg->interleave_level != cur_level) | ||
2224 | continue; | ||
2225 | |||
2226 | if (!last_level && !skip_next) { | ||
2227 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2228 | runlist, | ||
2229 | cur_level + 1, | ||
2230 | runlist_entry, | ||
2231 | interleave_enabled, | ||
2232 | false, | ||
2233 | entries_left); | ||
2234 | if (!interleave_enabled) | ||
2235 | skip_next = true; | ||
2236 | } | ||
2237 | |||
2238 | if (!(*entries_left)) | ||
2239 | return NULL; | ||
2240 | |||
2241 | /* add TSG entry */ | ||
2242 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2243 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2244 | runlist_entry[1] = 0; | ||
2245 | runlist_entry += 2; | ||
2246 | count++; | ||
2247 | (*entries_left)--; | ||
2248 | |||
2249 | mutex_lock(&tsg->ch_list_lock); | ||
2250 | /* add runnable channels bound to this TSG */ | ||
2251 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2252 | if (!test_bit(ch->hw_chid, | ||
2253 | runlist->active_channels)) | ||
2254 | continue; | ||
2255 | |||
2256 | if (!(*entries_left)) { | ||
2257 | mutex_unlock(&tsg->ch_list_lock); | ||
2258 | return NULL; | ||
2259 | } | ||
2260 | |||
2261 | gk20a_dbg_info("add channel %d to runlist", | ||
2262 | ch->hw_chid); | ||
2263 | runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid); | ||
2184 | runlist_entry[1] = 0; | 2264 | runlist_entry[1] = 0; |
2185 | runlist_entry += 2; | 2265 | runlist_entry += 2; |
2186 | count++; | 2266 | count++; |
2267 | (*entries_left)--; | ||
2187 | } | 2268 | } |
2269 | mutex_unlock(&tsg->ch_list_lock); | ||
2188 | } | 2270 | } |
2189 | 2271 | ||
2190 | return count; | 2272 | /* append entries from higher level if this level is empty */ |
2273 | if (!count && !last_level) | ||
2274 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2275 | runlist, | ||
2276 | cur_level + 1, | ||
2277 | runlist_entry, | ||
2278 | interleave_enabled, | ||
2279 | true, | ||
2280 | entries_left); | ||
2281 | |||
2282 | /* | ||
2283 | * if previous and this level have entries, append | ||
2284 | * entries from higher level. | ||
2285 | * | ||
2286 | * ex. dropping from MEDIUM to LOW, need to insert HIGH | ||
2287 | */ | ||
2288 | if (interleave_enabled && count && !prev_empty && !last_level) | ||
2289 | runlist_entry = gk20a_runlist_construct_locked(f, | ||
2290 | runlist, | ||
2291 | cur_level + 1, | ||
2292 | runlist_entry, | ||
2293 | interleave_enabled, | ||
2294 | false, | ||
2295 | entries_left); | ||
2296 | return runlist_entry; | ||
2297 | } | ||
2298 | |||
2299 | int gk20a_fifo_set_runlist_interleave(struct gk20a *g, | ||
2300 | u32 id, | ||
2301 | bool is_tsg, | ||
2302 | u32 runlist_id, | ||
2303 | u32 new_level) | ||
2304 | { | ||
2305 | gk20a_dbg_fn(""); | ||
2306 | |||
2307 | if (is_tsg) | ||
2308 | g->fifo.tsg[id].interleave_level = new_level; | ||
2309 | else | ||
2310 | g->fifo.channel[id].interleave_level = new_level; | ||
2311 | |||
2312 | return 0; | ||
2191 | } | 2313 | } |
2192 | 2314 | ||
2193 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | 2315 | static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, |
@@ -2198,14 +2320,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2198 | struct fifo_gk20a *f = &g->fifo; | 2320 | struct fifo_gk20a *f = &g->fifo; |
2199 | struct fifo_runlist_info_gk20a *runlist = NULL; | 2321 | struct fifo_runlist_info_gk20a *runlist = NULL; |
2200 | u32 *runlist_entry_base = NULL; | 2322 | u32 *runlist_entry_base = NULL; |
2201 | u32 *runlist_entry = NULL; | ||
2202 | u64 runlist_iova; | 2323 | u64 runlist_iova; |
2203 | u32 old_buf, new_buf; | 2324 | u32 old_buf, new_buf; |
2204 | u32 chid, tsgid; | ||
2205 | struct channel_gk20a *ch = NULL; | 2325 | struct channel_gk20a *ch = NULL; |
2206 | struct tsg_gk20a *tsg = NULL; | 2326 | struct tsg_gk20a *tsg = NULL; |
2207 | u32 count = 0; | 2327 | u32 count = 0; |
2208 | u32 count_channels_in_tsg; | ||
2209 | runlist = &f->runlist_info[runlist_id]; | 2328 | runlist = &f->runlist_info[runlist_id]; |
2210 | 2329 | ||
2211 | /* valid channel, add/remove it from active list. | 2330 | /* valid channel, add/remove it from active list. |
@@ -2254,91 +2373,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
2254 | 2373 | ||
2255 | if (hw_chid != ~0 || /* add/remove a valid channel */ | 2374 | if (hw_chid != ~0 || /* add/remove a valid channel */ |
2256 | add /* resume to add all channels back */) { | 2375 | add /* resume to add all channels back */) { |
2257 | runlist_entry = runlist_entry_base; | 2376 | u32 max_entries = f->num_runlist_entries; |
2258 | 2377 | u32 *runlist_end; | |
2259 | /* Runlist manipulation: | ||
2260 | Insert an entry of all high priority channels inbetween | ||
2261 | all lower priority channels. This ensure that the maximum | ||
2262 | delay a runnable high priority channel has to wait is one | ||
2263 | medium timeslice + any context switching overhead + | ||
2264 | wait on other high priority channels. | ||
2265 | add non-TSG channels first */ | ||
2266 | for_each_set_bit(chid, | ||
2267 | runlist->active_channels, f->num_channels) { | ||
2268 | ch = &f->channel[chid]; | ||
2269 | |||
2270 | if (!gk20a_is_channel_marked_as_tsg(ch) && | ||
2271 | !ch->interleave) { | ||
2272 | u32 added; | ||
2273 | |||
2274 | gk20a_dbg_info("add normal prio channel %d to runlist", | ||
2275 | chid); | ||
2276 | runlist_entry[0] = ram_rl_entry_chid_f(chid); | ||
2277 | runlist_entry[1] = 0; | ||
2278 | runlist_entry += 2; | ||
2279 | count++; | ||
2280 | |||
2281 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2282 | f, | ||
2283 | runlist, | ||
2284 | runlist_entry); | ||
2285 | count += added; | ||
2286 | runlist_entry += 2 * added; | ||
2287 | } | ||
2288 | } | ||
2289 | 2378 | ||
2290 | /* if there were no lower priority channels, then just | 2379 | runlist_end = gk20a_runlist_construct_locked(f, |
2291 | * add the high priority channels once. */ | 2380 | runlist, |
2292 | if (count == 0) { | 2381 | 0, |
2293 | count = gk20a_fifo_runlist_add_high_prio_entries( | 2382 | runlist_entry_base, |
2294 | f, | 2383 | g->runlist_interleave, |
2295 | runlist, | 2384 | true, |
2296 | runlist_entry); | 2385 | &max_entries); |
2297 | runlist_entry += 2 * count; | 2386 | if (!runlist_end) { |
2387 | ret = -E2BIG; | ||
2388 | goto clean_up; | ||
2298 | } | 2389 | } |
2299 | 2390 | ||
2300 | /* now add TSG entries and channels bound to TSG */ | 2391 | count = (runlist_end - runlist_entry_base) / 2; |
2301 | mutex_lock(&f->tsg_inuse_mutex); | 2392 | WARN_ON(count > f->num_runlist_entries); |
2302 | for_each_set_bit(tsgid, | ||
2303 | runlist->active_tsgs, f->num_channels) { | ||
2304 | u32 added; | ||
2305 | tsg = &f->tsg[tsgid]; | ||
2306 | /* add TSG entry */ | ||
2307 | gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); | ||
2308 | runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); | ||
2309 | runlist_entry[1] = 0; | ||
2310 | runlist_entry += 2; | ||
2311 | count++; | ||
2312 | |||
2313 | /* add runnable channels bound to this TSG */ | ||
2314 | count_channels_in_tsg = 0; | ||
2315 | mutex_lock(&tsg->ch_list_lock); | ||
2316 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
2317 | if (!test_bit(ch->hw_chid, | ||
2318 | runlist->active_channels)) | ||
2319 | continue; | ||
2320 | gk20a_dbg_info("add channel %d to runlist", | ||
2321 | ch->hw_chid); | ||
2322 | runlist_entry[0] = | ||
2323 | ram_rl_entry_chid_f(ch->hw_chid); | ||
2324 | runlist_entry[1] = 0; | ||
2325 | runlist_entry += 2; | ||
2326 | count++; | ||
2327 | count_channels_in_tsg++; | ||
2328 | } | ||
2329 | mutex_unlock(&tsg->ch_list_lock); | ||
2330 | |||
2331 | WARN_ON(tsg->num_active_channels != | ||
2332 | count_channels_in_tsg); | ||
2333 | |||
2334 | added = gk20a_fifo_runlist_add_high_prio_entries( | ||
2335 | f, | ||
2336 | runlist, | ||
2337 | runlist_entry); | ||
2338 | count += added; | ||
2339 | runlist_entry += 2 * added; | ||
2340 | } | ||
2341 | mutex_unlock(&f->tsg_inuse_mutex); | ||
2342 | } else /* suspend to remove all channels */ | 2393 | } else /* suspend to remove all channels */ |
2343 | count = 0; | 2394 | count = 0; |
2344 | 2395 | ||
@@ -2493,42 +2544,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) | |||
2493 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); | 2544 | return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); |
2494 | } | 2545 | } |
2495 | 2546 | ||
2496 | int gk20a_fifo_set_channel_priority( | ||
2497 | struct gk20a *g, | ||
2498 | u32 runlist_id, | ||
2499 | u32 hw_chid, | ||
2500 | bool interleave) | ||
2501 | { | ||
2502 | struct fifo_runlist_info_gk20a *runlist = NULL; | ||
2503 | struct fifo_gk20a *f = &g->fifo; | ||
2504 | struct channel_gk20a *ch = NULL; | ||
2505 | |||
2506 | if (hw_chid >= f->num_channels) | ||
2507 | return -EINVAL; | ||
2508 | |||
2509 | if (runlist_id >= f->max_runlists) | ||
2510 | return -EINVAL; | ||
2511 | |||
2512 | ch = &f->channel[hw_chid]; | ||
2513 | |||
2514 | gk20a_dbg_fn(""); | ||
2515 | |||
2516 | runlist = &f->runlist_info[runlist_id]; | ||
2517 | |||
2518 | mutex_lock(&runlist->mutex); | ||
2519 | |||
2520 | if (ch->interleave) | ||
2521 | set_bit(hw_chid, runlist->high_prio_channels); | ||
2522 | else | ||
2523 | clear_bit(hw_chid, runlist->high_prio_channels); | ||
2524 | |||
2525 | gk20a_dbg_fn("done"); | ||
2526 | |||
2527 | mutex_unlock(&runlist->mutex); | ||
2528 | |||
2529 | return 0; | ||
2530 | } | ||
2531 | |||
2532 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | 2547 | struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, |
2533 | u32 hw_chid) | 2548 | u32 hw_chid) |
2534 | { | 2549 | { |
@@ -2545,4 +2560,5 @@ void gk20a_init_fifo(struct gpu_ops *gops) | |||
2545 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | 2560 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; |
2546 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; | 2561 | gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; |
2547 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; | 2562 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; |
2563 | gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; | ||
2548 | } | 2564 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index ee4e7328..0979bf2b 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -31,7 +31,6 @@ | |||
31 | struct fifo_runlist_info_gk20a { | 31 | struct fifo_runlist_info_gk20a { |
32 | unsigned long *active_channels; | 32 | unsigned long *active_channels; |
33 | unsigned long *active_tsgs; | 33 | unsigned long *active_tsgs; |
34 | unsigned long *high_prio_channels; | ||
35 | /* Each engine has its own SW and HW runlist buffer.*/ | 34 | /* Each engine has its own SW and HW runlist buffer.*/ |
36 | struct mem_desc mem[MAX_RUNLIST_BUFFERS]; | 35 | struct mem_desc mem[MAX_RUNLIST_BUFFERS]; |
37 | u32 cur_buffer; | 36 | u32 cur_buffer; |
@@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, | |||
184 | int gk20a_fifo_wait_engine_idle(struct gk20a *g); | 183 | int gk20a_fifo_wait_engine_idle(struct gk20a *g); |
185 | u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); | 184 | u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); |
186 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); | 185 | u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); |
187 | int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id, | ||
188 | u32 hw_chid, bool interleave); | ||
189 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, | 186 | u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, |
190 | int *__id, bool *__is_tsg); | 187 | int *__id, bool *__is_tsg); |
191 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | 188 | bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, |
@@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | |||
198 | u32 hw_chid); | 195 | u32 hw_chid); |
199 | 196 | ||
200 | void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); | 197 | void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); |
198 | int gk20a_fifo_set_runlist_interleave(struct gk20a *g, | ||
199 | u32 id, | ||
200 | bool is_tsg, | ||
201 | u32 runlist_id, | ||
202 | u32 new_level); | ||
201 | #endif /*__GR_GK20A_H__*/ | 203 | #endif /*__GR_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 6a5986a7..b8753a21 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "hw_gr_gk20a.h" | 60 | #include "hw_gr_gk20a.h" |
61 | #include "hw_fb_gk20a.h" | 61 | #include "hw_fb_gk20a.h" |
62 | #include "gk20a_scale.h" | 62 | #include "gk20a_scale.h" |
63 | #include "ctxsw_trace_gk20a.h" | ||
63 | #include "dbg_gpu_gk20a.h" | 64 | #include "dbg_gpu_gk20a.h" |
64 | #include "gk20a_allocator.h" | 65 | #include "gk20a_allocator.h" |
65 | #include "hal.h" | 66 | #include "hal.h" |
@@ -80,7 +81,7 @@ | |||
80 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | 81 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ |
81 | #define INTERFACE_NAME "nvhost%s-gpu" | 82 | #define INTERFACE_NAME "nvhost%s-gpu" |
82 | 83 | ||
83 | #define GK20A_NUM_CDEVS 6 | 84 | #define GK20A_NUM_CDEVS 7 |
84 | 85 | ||
85 | #define EMC3D_DEFAULT_RATIO 750 | 86 | #define EMC3D_DEFAULT_RATIO 750 |
86 | 87 | ||
@@ -169,6 +170,19 @@ static const struct file_operations gk20a_tsg_ops = { | |||
169 | .unlocked_ioctl = gk20a_tsg_dev_ioctl, | 170 | .unlocked_ioctl = gk20a_tsg_dev_ioctl, |
170 | }; | 171 | }; |
171 | 172 | ||
173 | static const struct file_operations gk20a_ctxsw_ops = { | ||
174 | .owner = THIS_MODULE, | ||
175 | .release = gk20a_ctxsw_dev_release, | ||
176 | .open = gk20a_ctxsw_dev_open, | ||
177 | #ifdef CONFIG_COMPAT | ||
178 | .compat_ioctl = gk20a_ctxsw_dev_ioctl, | ||
179 | #endif | ||
180 | .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, | ||
181 | .poll = gk20a_ctxsw_dev_poll, | ||
182 | .read = gk20a_ctxsw_dev_read, | ||
183 | .mmap = gk20a_ctxsw_dev_mmap, | ||
184 | }; | ||
185 | |||
172 | static inline void sim_writel(struct gk20a *g, u32 r, u32 v) | 186 | static inline void sim_writel(struct gk20a *g, u32 r, u32 v) |
173 | { | 187 | { |
174 | writel(v, g->sim.regs+r); | 188 | writel(v, g->sim.regs+r); |
@@ -672,9 +686,6 @@ static int gk20a_init_support(struct platform_device *dev) | |||
672 | mutex_init(&g->ch_wdt_lock); | 686 | mutex_init(&g->ch_wdt_lock); |
673 | mutex_init(&g->poweroff_lock); | 687 | mutex_init(&g->poweroff_lock); |
674 | 688 | ||
675 | mutex_init(&g->interleave_lock); | ||
676 | g->num_interleaved_channels = 0; | ||
677 | |||
678 | g->remove_support = gk20a_remove_support; | 689 | g->remove_support = gk20a_remove_support; |
679 | return 0; | 690 | return 0; |
680 | 691 | ||
@@ -884,6 +895,10 @@ static int gk20a_pm_finalize_poweron(struct device *dev) | |||
884 | goto done; | 895 | goto done; |
885 | } | 896 | } |
886 | 897 | ||
898 | err = gk20a_ctxsw_trace_init(g); | ||
899 | if (err) | ||
900 | gk20a_warn(dev, "could not initialize ctxsw tracing"); | ||
901 | |||
887 | /* Restore the debug setting */ | 902 | /* Restore the debug setting */ |
888 | g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl); | 903 | g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl); |
889 | 904 | ||
@@ -1012,6 +1027,11 @@ void gk20a_user_deinit(struct platform_device *dev) | |||
1012 | cdev_del(&g->tsg.cdev); | 1027 | cdev_del(&g->tsg.cdev); |
1013 | } | 1028 | } |
1014 | 1029 | ||
1030 | if (g->ctxsw.node) { | ||
1031 | device_destroy(g->class, g->ctxsw.cdev.dev); | ||
1032 | cdev_del(&g->ctxsw.cdev); | ||
1033 | } | ||
1034 | |||
1015 | if (g->cdev_region) | 1035 | if (g->cdev_region) |
1016 | unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); | 1036 | unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); |
1017 | 1037 | ||
@@ -1077,6 +1097,15 @@ int gk20a_user_init(struct platform_device *dev) | |||
1077 | if (err) | 1097 | if (err) |
1078 | goto fail; | 1098 | goto fail; |
1079 | 1099 | ||
1100 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
1101 | err = gk20a_create_device(dev, devno++, "-ctxsw", | ||
1102 | &g->ctxsw.cdev, &g->ctxsw.node, | ||
1103 | &gk20a_ctxsw_ops); | ||
1104 | if (err) | ||
1105 | goto fail; | ||
1106 | #endif | ||
1107 | |||
1108 | |||
1080 | return 0; | 1109 | return 0; |
1081 | fail: | 1110 | fail: |
1082 | gk20a_user_deinit(dev); | 1111 | gk20a_user_deinit(dev); |
@@ -1400,9 +1429,11 @@ static int gk20a_probe(struct platform_device *dev) | |||
1400 | 1429 | ||
1401 | spin_lock_init(&gk20a->mc_enable_lock); | 1430 | spin_lock_init(&gk20a->mc_enable_lock); |
1402 | 1431 | ||
1432 | #ifdef CONFIG_RESET_CONTROLLER | ||
1403 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | 1433 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); |
1404 | if (IS_ERR(platform->reset_control)) | 1434 | if (IS_ERR(platform->reset_control)) |
1405 | platform->reset_control = NULL; | 1435 | platform->reset_control = NULL; |
1436 | #endif | ||
1406 | 1437 | ||
1407 | gk20a_debug_init(dev); | 1438 | gk20a_debug_init(dev); |
1408 | 1439 | ||
@@ -1439,14 +1470,11 @@ static int gk20a_probe(struct platform_device *dev) | |||
1439 | if (tegra_platform_is_silicon()) | 1470 | if (tegra_platform_is_silicon()) |
1440 | gk20a->timeouts_enabled = true; | 1471 | gk20a->timeouts_enabled = true; |
1441 | 1472 | ||
1442 | gk20a->interleave_high_priority = true; | 1473 | gk20a->runlist_interleave = true; |
1443 | 1474 | ||
1444 | gk20a->timeslice_low_priority_us = 1300; | 1475 | gk20a->timeslice_low_priority_us = 1300; |
1445 | gk20a->timeslice_medium_priority_us = 2600; | 1476 | gk20a->timeslice_medium_priority_us = 2600; |
1446 | if (gk20a->interleave_high_priority) | 1477 | gk20a->timeslice_high_priority_us = 5200; |
1447 | gk20a->timeslice_high_priority_us = 3000; | ||
1448 | else | ||
1449 | gk20a->timeslice_high_priority_us = 5200; | ||
1450 | 1478 | ||
1451 | /* Set up initial power settings. For non-slicon platforms, disable * | 1479 | /* Set up initial power settings. For non-slicon platforms, disable * |
1452 | * power features and for silicon platforms, read from platform data */ | 1480 | * power features and for silicon platforms, read from platform data */ |
@@ -1527,16 +1555,17 @@ static int gk20a_probe(struct platform_device *dev) | |||
1527 | platform->debugfs, | 1555 | platform->debugfs, |
1528 | &gk20a->timeslice_high_priority_us); | 1556 | &gk20a->timeslice_high_priority_us); |
1529 | 1557 | ||
1530 | gk20a->debugfs_interleave_high_priority = | 1558 | gk20a->debugfs_runlist_interleave = |
1531 | debugfs_create_bool("interleave_high_priority", | 1559 | debugfs_create_bool("runlist_interleave", |
1532 | S_IRUGO|S_IWUSR, | 1560 | S_IRUGO|S_IWUSR, |
1533 | platform->debugfs, | 1561 | platform->debugfs, |
1534 | &gk20a->interleave_high_priority); | 1562 | &gk20a->runlist_interleave); |
1535 | 1563 | ||
1536 | gr_gk20a_debugfs_init(gk20a); | 1564 | gr_gk20a_debugfs_init(gk20a); |
1537 | gk20a_pmu_debugfs_init(dev); | 1565 | gk20a_pmu_debugfs_init(dev); |
1538 | gk20a_cde_debugfs_init(dev); | 1566 | gk20a_cde_debugfs_init(dev); |
1539 | gk20a_alloc_debugfs_init(dev); | 1567 | gk20a_alloc_debugfs_init(dev); |
1568 | gk20a_mm_debugfs_init(dev); | ||
1540 | #endif | 1569 | #endif |
1541 | 1570 | ||
1542 | gk20a_init_gr(gk20a); | 1571 | gk20a_init_gr(gk20a); |
@@ -1558,6 +1587,8 @@ static int __exit gk20a_remove(struct platform_device *dev) | |||
1558 | if (platform->has_cde) | 1587 | if (platform->has_cde) |
1559 | gk20a_cde_destroy(g); | 1588 | gk20a_cde_destroy(g); |
1560 | 1589 | ||
1590 | gk20a_ctxsw_trace_cleanup(g); | ||
1591 | |||
1561 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | 1592 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) |
1562 | gk20a_scale_exit(dev); | 1593 | gk20a_scale_exit(dev); |
1563 | 1594 | ||
@@ -1774,7 +1805,10 @@ void gk20a_enable(struct gk20a *g, u32 units) | |||
1774 | void gk20a_reset(struct gk20a *g, u32 units) | 1805 | void gk20a_reset(struct gk20a *g, u32 units) |
1775 | { | 1806 | { |
1776 | gk20a_disable(g, units); | 1807 | gk20a_disable(g, units); |
1777 | udelay(20); | 1808 | if (units & mc_enable_ce2_enabled_f()) |
1809 | udelay(500); | ||
1810 | else | ||
1811 | udelay(20); | ||
1778 | gk20a_enable(g, units); | 1812 | gk20a_enable(g, units); |
1779 | } | 1813 | } |
1780 | 1814 | ||
@@ -2095,6 +2129,19 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name) | |||
2095 | return fw; | 2129 | return fw; |
2096 | } | 2130 | } |
2097 | 2131 | ||
2132 | |||
2133 | u64 gk20a_read_ptimer(struct gk20a *g) | ||
2134 | { | ||
2135 | u32 time_hi0 = gk20a_readl(g, timer_time_1_r()); | ||
2136 | u32 time_lo = gk20a_readl(g, timer_time_0_r()); | ||
2137 | u32 time_hi1 = gk20a_readl(g, timer_time_1_r()); | ||
2138 | u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1; | ||
2139 | u64 time = ((u64)time_hi << 32) | time_lo; | ||
2140 | |||
2141 | return time; | ||
2142 | } | ||
2143 | |||
2144 | |||
2098 | MODULE_LICENSE("GPL v2"); | 2145 | MODULE_LICENSE("GPL v2"); |
2099 | module_init(gk20a_init); | 2146 | module_init(gk20a_init); |
2100 | module_exit(gk20a_exit); | 2147 | module_exit(gk20a_exit); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 340f358a..8a1f82bc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -25,6 +25,8 @@ struct channel_gk20a; | |||
25 | struct gr_gk20a; | 25 | struct gr_gk20a; |
26 | struct sim_gk20a; | 26 | struct sim_gk20a; |
27 | struct gk20a_ctxsw_ucode_segments; | 27 | struct gk20a_ctxsw_ucode_segments; |
28 | struct gk20a_fecs_trace; | ||
29 | struct gk20a_ctxsw_trace; | ||
28 | struct acr_gm20b; | 30 | struct acr_gm20b; |
29 | 31 | ||
30 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
@@ -54,8 +56,6 @@ struct acr_gm20b; | |||
54 | 32 ns is the resolution of ptimer. */ | 56 | 32 ns is the resolution of ptimer. */ |
55 | #define PTIMER_REF_FREQ_HZ 31250000 | 57 | #define PTIMER_REF_FREQ_HZ 31250000 |
56 | 58 | ||
57 | #define MAX_INTERLEAVED_CHANNELS 32 | ||
58 | |||
59 | struct cooling_device_gk20a { | 59 | struct cooling_device_gk20a { |
60 | struct thermal_cooling_device *gk20a_cooling_dev; | 60 | struct thermal_cooling_device *gk20a_cooling_dev; |
61 | unsigned int gk20a_freq_state; | 61 | unsigned int gk20a_freq_state; |
@@ -236,6 +236,7 @@ struct gpu_ops { | |||
236 | void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod); | 236 | void (*slcg_therm_load_gating_prod)(struct gk20a *g, bool prod); |
237 | void (*slcg_xbar_load_gating_prod)(struct gk20a *g, bool prod); | 237 | void (*slcg_xbar_load_gating_prod)(struct gk20a *g, bool prod); |
238 | void (*blcg_bus_load_gating_prod)(struct gk20a *g, bool prod); | 238 | void (*blcg_bus_load_gating_prod)(struct gk20a *g, bool prod); |
239 | void (*blcg_ce_load_gating_prod)(struct gk20a *g, bool prod); | ||
239 | void (*blcg_ctxsw_firmware_load_gating_prod)(struct gk20a *g, bool prod); | 240 | void (*blcg_ctxsw_firmware_load_gating_prod)(struct gk20a *g, bool prod); |
240 | void (*blcg_fb_load_gating_prod)(struct gk20a *g, bool prod); | 241 | void (*blcg_fb_load_gating_prod)(struct gk20a *g, bool prod); |
241 | void (*blcg_fifo_load_gating_prod)(struct gk20a *g, bool prod); | 242 | void (*blcg_fifo_load_gating_prod)(struct gk20a *g, bool prod); |
@@ -267,6 +268,11 @@ struct gpu_ops { | |||
267 | u32 (*get_num_fifos)(struct gk20a *g); | 268 | u32 (*get_num_fifos)(struct gk20a *g); |
268 | u32 (*get_pbdma_signature)(struct gk20a *g); | 269 | u32 (*get_pbdma_signature)(struct gk20a *g); |
269 | int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); | 270 | int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); |
271 | int (*set_runlist_interleave)(struct gk20a *g, u32 id, | ||
272 | bool is_tsg, u32 runlist_id, | ||
273 | u32 new_level); | ||
274 | int (*channel_set_timeslice)(struct channel_gk20a *ch, | ||
275 | u32 timeslice); | ||
270 | } fifo; | 276 | } fifo; |
271 | struct pmu_v { | 277 | struct pmu_v { |
272 | /*used for change of enum zbc update cmd id from ver 0 to ver1*/ | 278 | /*used for change of enum zbc update cmd id from ver 0 to ver1*/ |
@@ -369,6 +375,19 @@ struct gpu_ops { | |||
369 | bool use_dma_for_fw_bootstrap; | 375 | bool use_dma_for_fw_bootstrap; |
370 | } gr_ctx; | 376 | } gr_ctx; |
371 | struct { | 377 | struct { |
378 | int (*init)(struct gk20a *g); | ||
379 | int (*max_entries)(struct gk20a *, | ||
380 | struct nvgpu_ctxsw_trace_filter *); | ||
381 | int (*flush)(struct gk20a *g); | ||
382 | int (*poll)(struct gk20a *g); | ||
383 | int (*enable)(struct gk20a *g); | ||
384 | int (*disable)(struct gk20a *g); | ||
385 | int (*reset)(struct gk20a *g); | ||
386 | int (*bind_channel)(struct gk20a *, struct channel_gk20a *); | ||
387 | int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); | ||
388 | int (*deinit)(struct gk20a *g); | ||
389 | } fecs_trace; | ||
390 | struct { | ||
372 | bool (*support_sparse)(struct gk20a *g); | 391 | bool (*support_sparse)(struct gk20a *g); |
373 | bool (*is_debug_mode_enabled)(struct gk20a *g); | 392 | bool (*is_debug_mode_enabled)(struct gk20a *g); |
374 | void (*set_debug_mode)(struct gk20a *g, bool enable); | 393 | void (*set_debug_mode)(struct gk20a *g, bool enable); |
@@ -535,10 +554,7 @@ struct gk20a { | |||
535 | u32 timeslice_low_priority_us; | 554 | u32 timeslice_low_priority_us; |
536 | u32 timeslice_medium_priority_us; | 555 | u32 timeslice_medium_priority_us; |
537 | u32 timeslice_high_priority_us; | 556 | u32 timeslice_high_priority_us; |
538 | u32 interleave_high_priority; | 557 | u32 runlist_interleave; |
539 | |||
540 | struct mutex interleave_lock; | ||
541 | u32 num_interleaved_channels; | ||
542 | 558 | ||
543 | bool slcg_enabled; | 559 | bool slcg_enabled; |
544 | bool blcg_enabled; | 560 | bool blcg_enabled; |
@@ -563,7 +579,7 @@ struct gk20a { | |||
563 | struct dentry *debugfs_timeslice_low_priority_us; | 579 | struct dentry *debugfs_timeslice_low_priority_us; |
564 | struct dentry *debugfs_timeslice_medium_priority_us; | 580 | struct dentry *debugfs_timeslice_medium_priority_us; |
565 | struct dentry *debugfs_timeslice_high_priority_us; | 581 | struct dentry *debugfs_timeslice_high_priority_us; |
566 | struct dentry *debugfs_interleave_high_priority; | 582 | struct dentry *debugfs_runlist_interleave; |
567 | 583 | ||
568 | #endif | 584 | #endif |
569 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | 585 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; |
@@ -575,6 +591,14 @@ struct gk20a { | |||
575 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ | 591 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ |
576 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ | 592 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ |
577 | 593 | ||
594 | /* | ||
595 | * When set subsequent VMAs will separate fixed and non-fixed | ||
596 | * allocations. This avoids conflicts with fixed and non-fixed allocs | ||
597 | * for some tests. The value in separate_fixed_allocs is used to | ||
598 | * determine the split boundary. | ||
599 | */ | ||
600 | u64 separate_fixed_allocs; | ||
601 | |||
578 | void (*remove_support)(struct platform_device *); | 602 | void (*remove_support)(struct platform_device *); |
579 | 603 | ||
580 | u64 pg_ingating_time_us; | 604 | u64 pg_ingating_time_us; |
@@ -612,6 +636,11 @@ struct gk20a { | |||
612 | struct device *node; | 636 | struct device *node; |
613 | } tsg; | 637 | } tsg; |
614 | 638 | ||
639 | struct { | ||
640 | struct cdev cdev; | ||
641 | struct device *node; | ||
642 | } ctxsw; | ||
643 | |||
615 | struct mutex client_lock; | 644 | struct mutex client_lock; |
616 | int client_refcount; /* open channels and ctrl nodes */ | 645 | int client_refcount; /* open channels and ctrl nodes */ |
617 | 646 | ||
@@ -638,6 +667,9 @@ struct gk20a { | |||
638 | 667 | ||
639 | struct gk20a_scale_profile *scale_profile; | 668 | struct gk20a_scale_profile *scale_profile; |
640 | 669 | ||
670 | struct gk20a_ctxsw_trace *ctxsw_trace; | ||
671 | struct gk20a_fecs_trace *fecs_trace; | ||
672 | |||
641 | struct device_dma_parameters dma_parms; | 673 | struct device_dma_parameters dma_parms; |
642 | 674 | ||
643 | struct gk20a_cde_app cde_app; | 675 | struct gk20a_cde_app cde_app; |
@@ -715,6 +747,7 @@ enum gk20a_dbg_categories { | |||
715 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ | 747 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ |
716 | gpu_dbg_cde = BIT(10), /* cde info messages */ | 748 | gpu_dbg_cde = BIT(10), /* cde info messages */ |
717 | gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ | 749 | gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ |
750 | gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ | ||
718 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 751 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
719 | }; | 752 | }; |
720 | 753 | ||
@@ -961,4 +994,6 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x) | |||
961 | else | 994 | else |
962 | return (timeout * 10) / scale10x; | 995 | return (timeout * 10) / scale10x; |
963 | } | 996 | } |
997 | |||
998 | u64 gk20a_read_ptimer(struct gk20a *g); | ||
964 | #endif /* GK20A_H */ | 999 | #endif /* GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index 0e6b576b..d433c9bb 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Graphics | 4 | * GK20A Graphics |
5 | * | 5 | * |
6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -30,7 +30,6 @@ | |||
30 | #include "fifo_gk20a.h" | 30 | #include "fifo_gk20a.h" |
31 | #include "pmu_gk20a.h" | 31 | #include "pmu_gk20a.h" |
32 | 32 | ||
33 | |||
34 | #define PTIMER_FP_FACTOR 1000000 | 33 | #define PTIMER_FP_FACTOR 1000000 |
35 | 34 | ||
36 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) | 35 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) |
@@ -100,6 +99,9 @@ static ssize_t blcg_enable_store(struct device *device, | |||
100 | 99 | ||
101 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) | 100 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) |
102 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, g->blcg_enabled); | 101 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, g->blcg_enabled); |
102 | if (g->ops.clock_gating.blcg_ce_load_gating_prod) | ||
103 | g->ops.clock_gating.blcg_ce_load_gating_prod(g, | ||
104 | g->blcg_enabled); | ||
103 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) | 105 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) |
104 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, g->blcg_enabled); | 106 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, g->blcg_enabled); |
105 | if (g->ops.clock_gating.blcg_fb_load_gating_prod) | 107 | if (g->ops.clock_gating.blcg_fb_load_gating_prod) |
@@ -784,8 +786,15 @@ void gk20a_remove_sysfs(struct device *dev) | |||
784 | device_remove_file(dev, &dev_attr_allow_all); | 786 | device_remove_file(dev, &dev_attr_allow_all); |
785 | device_remove_file(dev, &dev_attr_tpc_fs_mask); | 787 | device_remove_file(dev, &dev_attr_tpc_fs_mask); |
786 | 788 | ||
787 | if (g->host1x_dev && (dev->parent != &g->host1x_dev->dev)) | 789 | if (g->host1x_dev && (dev->parent != &g->host1x_dev->dev)) { |
788 | sysfs_remove_link(&g->host1x_dev->dev.kobj, dev_name(dev)); | 790 | sysfs_remove_link(&g->host1x_dev->dev.kobj, dev_name(dev)); |
791 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
792 | struct kobject *kobj = &dev->kobj; | ||
793 | struct device *parent = container_of((kobj->parent), | ||
794 | struct device, kobj); | ||
795 | sysfs_remove_link(&parent->kobj, "gpu.0"); | ||
796 | } | ||
797 | } | ||
789 | } | 798 | } |
790 | 799 | ||
791 | void gk20a_create_sysfs(struct platform_device *dev) | 800 | void gk20a_create_sysfs(struct platform_device *dev) |
@@ -817,10 +826,19 @@ void gk20a_create_sysfs(struct platform_device *dev) | |||
817 | error |= device_create_file(&dev->dev, &dev_attr_allow_all); | 826 | error |= device_create_file(&dev->dev, &dev_attr_allow_all); |
818 | error |= device_create_file(&dev->dev, &dev_attr_tpc_fs_mask); | 827 | error |= device_create_file(&dev->dev, &dev_attr_tpc_fs_mask); |
819 | 828 | ||
820 | if (g->host1x_dev && (dev->dev.parent != &g->host1x_dev->dev)) | 829 | if (g->host1x_dev && (dev->dev.parent != &g->host1x_dev->dev)) { |
821 | error |= sysfs_create_link(&g->host1x_dev->dev.kobj, | 830 | error |= sysfs_create_link(&g->host1x_dev->dev.kobj, |
822 | &dev->dev.kobj, | 831 | &dev->dev.kobj, |
823 | dev_name(&dev->dev)); | 832 | dev_name(&dev->dev)); |
833 | if (strcmp(dev_name(&dev->dev), "gpu.0")) { | ||
834 | struct kobject *kobj = &dev->dev.kobj; | ||
835 | struct device *parent = container_of((kobj->parent), | ||
836 | struct device, kobj); | ||
837 | error |= sysfs_create_link(&parent->kobj, | ||
838 | &dev->dev.kobj, "gpu.0"); | ||
839 | } | ||
840 | |||
841 | } | ||
824 | 842 | ||
825 | if (error) | 843 | if (error) |
826 | dev_err(&dev->dev, "Failed to create sysfs attributes!\n"); | 844 | dev_err(&dev->dev, "Failed to create sysfs attributes!\n"); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7e37a965..a10650be 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include "debug_gk20a.h" | 56 | #include "debug_gk20a.h" |
57 | #include "semaphore_gk20a.h" | 57 | #include "semaphore_gk20a.h" |
58 | #include "platform_gk20a.h" | 58 | #include "platform_gk20a.h" |
59 | #include "ctxsw_trace_gk20a.h" | ||
59 | 60 | ||
60 | #define BLK_SIZE (256) | 61 | #define BLK_SIZE (256) |
61 | 62 | ||
@@ -2855,6 +2856,13 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2855 | "fail to load golden ctx image"); | 2856 | "fail to load golden ctx image"); |
2856 | goto out; | 2857 | goto out; |
2857 | } | 2858 | } |
2859 | if (g->ops.fecs_trace.bind_channel) { | ||
2860 | err = g->ops.fecs_trace.bind_channel(g, c); | ||
2861 | if (err) { | ||
2862 | gk20a_warn(dev_from_gk20a(g), | ||
2863 | "fail to bind channel for ctxsw trace"); | ||
2864 | } | ||
2865 | } | ||
2858 | c->first_init = true; | 2866 | c->first_init = true; |
2859 | } | 2867 | } |
2860 | 2868 | ||
@@ -4217,7 +4225,15 @@ out: | |||
4217 | static void gr_gk20a_load_gating_prod(struct gk20a *g) | 4225 | static void gr_gk20a_load_gating_prod(struct gk20a *g) |
4218 | { | 4226 | { |
4219 | /* slcg prod values */ | 4227 | /* slcg prod values */ |
4220 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, g->slcg_enabled); | 4228 | if (g->ops.clock_gating.slcg_bus_load_gating_prod) |
4229 | g->ops.clock_gating.slcg_bus_load_gating_prod(g, | ||
4230 | g->slcg_enabled); | ||
4231 | if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) | ||
4232 | g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, | ||
4233 | g->slcg_enabled); | ||
4234 | if (g->ops.clock_gating.slcg_gr_load_gating_prod) | ||
4235 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
4236 | g->slcg_enabled); | ||
4221 | if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) | 4237 | if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) |
4222 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, | 4238 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, |
4223 | g->slcg_enabled); | 4239 | g->slcg_enabled); |
@@ -4227,6 +4243,12 @@ static void gr_gk20a_load_gating_prod(struct gk20a *g) | |||
4227 | g->slcg_enabled); | 4243 | g->slcg_enabled); |
4228 | 4244 | ||
4229 | /* blcg prod values */ | 4245 | /* blcg prod values */ |
4246 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) | ||
4247 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, | ||
4248 | g->blcg_enabled); | ||
4249 | if (g->ops.clock_gating.blcg_ce_load_gating_prod) | ||
4250 | g->ops.clock_gating.blcg_ce_load_gating_prod(g, | ||
4251 | g->blcg_enabled); | ||
4230 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); | 4252 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, g->blcg_enabled); |
4231 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) | 4253 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) |
4232 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, | 4254 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, |
@@ -7463,6 +7485,7 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, | |||
7463 | return 0; | 7485 | return 0; |
7464 | } | 7486 | } |
7465 | 7487 | ||
7488 | #ifdef CONFIG_DEBUG_FS | ||
7466 | int gr_gk20a_debugfs_init(struct gk20a *g) | 7489 | int gr_gk20a_debugfs_init(struct gk20a *g) |
7467 | { | 7490 | { |
7468 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); | 7491 | struct gk20a_platform *platform = platform_get_drvdata(g->dev); |
@@ -7474,6 +7497,7 @@ int gr_gk20a_debugfs_init(struct gk20a *g) | |||
7474 | 7497 | ||
7475 | return 0; | 7498 | return 0; |
7476 | } | 7499 | } |
7500 | #endif | ||
7477 | 7501 | ||
7478 | static void gr_gk20a_init_cyclestats(struct gk20a *g) | 7502 | static void gr_gk20a_init_cyclestats(struct gk20a *g) |
7479 | { | 7503 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index a9ad970a..9718aad2 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "gk20a_gating_reglist.h" | 22 | #include "gk20a_gating_reglist.h" |
23 | #include "channel_gk20a.h" | 23 | #include "channel_gk20a.h" |
24 | #include "gr_ctx_gk20a.h" | 24 | #include "gr_ctx_gk20a.h" |
25 | #include "fecs_trace_gk20a.h" | ||
25 | #include "mm_gk20a.h" | 26 | #include "mm_gk20a.h" |
26 | #include "mc_gk20a.h" | 27 | #include "mc_gk20a.h" |
27 | #include "pmu_gk20a.h" | 28 | #include "pmu_gk20a.h" |
@@ -57,6 +58,7 @@ int gk20a_init_hal(struct gk20a *g) | |||
57 | gk20a_init_mc(gops); | 58 | gk20a_init_mc(gops); |
58 | gk20a_init_ltc(gops); | 59 | gk20a_init_ltc(gops); |
59 | gk20a_init_gr_ops(gops); | 60 | gk20a_init_gr_ops(gops); |
61 | gk20a_init_fecs_trace_ops(gops); | ||
60 | gk20a_init_fb(gops); | 62 | gk20a_init_fb(gops); |
61 | gk20a_init_fifo(gops); | 63 | gk20a_init_fifo(gops); |
62 | gk20a_init_ce2(gops); | 64 | gk20a_init_ce2(gops); |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h index 39cbbb58..da555f7c 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -246,4 +246,192 @@ static inline u32 ctxsw_prog_main_image_context_id_o(void) | |||
246 | { | 246 | { |
247 | return 0x000000f0; | 247 | return 0x000000f0; |
248 | } | 248 | } |
249 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_o(void) | ||
250 | { | ||
251 | return 0x000000ac; | ||
252 | } | ||
253 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(u32 v) | ||
254 | { | ||
255 | return (v & 0xffff) << 0; | ||
256 | } | ||
257 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(void) | ||
258 | { | ||
259 | return 0x000000b0; | ||
260 | } | ||
261 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_v_m(void) | ||
262 | { | ||
263 | return 0xfffffff << 0; | ||
264 | } | ||
265 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_m(void) | ||
266 | { | ||
267 | return 0x3 << 28; | ||
268 | } | ||
269 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f(void) | ||
270 | { | ||
271 | return 0x0; | ||
272 | } | ||
273 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(void) | ||
274 | { | ||
275 | return 0x20000000; | ||
276 | } | ||
277 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(void) | ||
278 | { | ||
279 | return 0x30000000; | ||
280 | } | ||
281 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(void) | ||
282 | { | ||
283 | return 0x000000b4; | ||
284 | } | ||
285 | static inline u32 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u32 v) | ||
286 | { | ||
287 | return (v & 0xffffffff) << 0; | ||
288 | } | ||
289 | static inline u32 ctxsw_prog_record_timestamp_record_size_in_bytes_v(void) | ||
290 | { | ||
291 | return 0x00000080; | ||
292 | } | ||
293 | static inline u32 ctxsw_prog_record_timestamp_record_size_in_words_v(void) | ||
294 | { | ||
295 | return 0x00000020; | ||
296 | } | ||
297 | static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_o(void) | ||
298 | { | ||
299 | return 0x00000000; | ||
300 | } | ||
301 | static inline u32 ctxsw_prog_record_timestamp_magic_value_lo_v_value_v(void) | ||
302 | { | ||
303 | return 0x00000000; | ||
304 | } | ||
305 | static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_o(void) | ||
306 | { | ||
307 | return 0x00000004; | ||
308 | } | ||
309 | static inline u32 ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(void) | ||
310 | { | ||
311 | return 0x600dbeef; | ||
312 | } | ||
313 | static inline u32 ctxsw_prog_record_timestamp_context_id_o(void) | ||
314 | { | ||
315 | return 0x00000008; | ||
316 | } | ||
317 | static inline u32 ctxsw_prog_record_timestamp_context_ptr_o(void) | ||
318 | { | ||
319 | return 0x0000000c; | ||
320 | } | ||
321 | static inline u32 ctxsw_prog_record_timestamp_new_context_id_o(void) | ||
322 | { | ||
323 | return 0x00000010; | ||
324 | } | ||
325 | static inline u32 ctxsw_prog_record_timestamp_new_context_ptr_o(void) | ||
326 | { | ||
327 | return 0x00000014; | ||
328 | } | ||
329 | static inline u32 ctxsw_prog_record_timestamp_timestamp_lo_o(void) | ||
330 | { | ||
331 | return 0x00000018; | ||
332 | } | ||
333 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_o(void) | ||
334 | { | ||
335 | return 0x0000001c; | ||
336 | } | ||
337 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_f(u32 v) | ||
338 | { | ||
339 | return (v & 0xffffff) << 0; | ||
340 | } | ||
341 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_v_v(u32 r) | ||
342 | { | ||
343 | return (r >> 0) & 0xffffff; | ||
344 | } | ||
345 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_f(u32 v) | ||
346 | { | ||
347 | return (v & 0xff) << 24; | ||
348 | } | ||
349 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_m(void) | ||
350 | { | ||
351 | return 0xff << 24; | ||
352 | } | ||
353 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_v(u32 r) | ||
354 | { | ||
355 | return (r >> 24) & 0xff; | ||
356 | } | ||
357 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_v(void) | ||
358 | { | ||
359 | return 0x00000001; | ||
360 | } | ||
361 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_ctxsw_req_by_host_f(void) | ||
362 | { | ||
363 | return 0x1000000; | ||
364 | } | ||
365 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_v(void) | ||
366 | { | ||
367 | return 0x00000002; | ||
368 | } | ||
369 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_f(void) | ||
370 | { | ||
371 | return 0x2000000; | ||
372 | } | ||
373 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_v(void) | ||
374 | { | ||
375 | return 0x0000000a; | ||
376 | } | ||
377 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_wfi_f(void) | ||
378 | { | ||
379 | return 0xa000000; | ||
380 | } | ||
381 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_v(void) | ||
382 | { | ||
383 | return 0x0000000b; | ||
384 | } | ||
385 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_gfxp_f(void) | ||
386 | { | ||
387 | return 0xb000000; | ||
388 | } | ||
389 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_v(void) | ||
390 | { | ||
391 | return 0x0000000c; | ||
392 | } | ||
393 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_ctap_f(void) | ||
394 | { | ||
395 | return 0xc000000; | ||
396 | } | ||
397 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_v(void) | ||
398 | { | ||
399 | return 0x0000000d; | ||
400 | } | ||
401 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_fe_ack_cilp_f(void) | ||
402 | { | ||
403 | return 0xd000000; | ||
404 | } | ||
405 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_v(void) | ||
406 | { | ||
407 | return 0x00000003; | ||
408 | } | ||
409 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_save_end_f(void) | ||
410 | { | ||
411 | return 0x3000000; | ||
412 | } | ||
413 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_v(void) | ||
414 | { | ||
415 | return 0x00000004; | ||
416 | } | ||
417 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_restore_start_f(void) | ||
418 | { | ||
419 | return 0x4000000; | ||
420 | } | ||
421 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_v(void) | ||
422 | { | ||
423 | return 0x00000005; | ||
424 | } | ||
425 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_context_start_f(void) | ||
426 | { | ||
427 | return 0x5000000; | ||
428 | } | ||
429 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(void) | ||
430 | { | ||
431 | return 0x000000ff; | ||
432 | } | ||
433 | static inline u32 ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_f(void) | ||
434 | { | ||
435 | return 0xff000000; | ||
436 | } | ||
249 | #endif | 437 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h index 6db5654b..94770431 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2012-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -258,6 +258,10 @@ static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void) | |||
258 | { | 258 | { |
259 | return 0x1 << 20; | 259 | return 0x1 << 20; |
260 | } | 260 | } |
261 | static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_m(void) | ||
262 | { | ||
263 | return 0x1 << 21; | ||
264 | } | ||
261 | static inline u32 ltc_ltc0_lts0_intr_r(void) | 265 | static inline u32 ltc_ltc0_lts0_intr_r(void) |
262 | { | 266 | { |
263 | return 0x00141020; | 267 | return 0x00141020; |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h index 22bc50ac..4cb36cbe 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2013-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -98,4 +98,12 @@ static inline u32 timer_pri_timeout_fecs_errcode_r(void) | |||
98 | { | 98 | { |
99 | return 0x0000908c; | 99 | return 0x0000908c; |
100 | } | 100 | } |
101 | static inline u32 timer_time_0_r(void) | ||
102 | { | ||
103 | return 0x00009400; | ||
104 | } | ||
105 | static inline u32 timer_time_1_r(void) | ||
106 | { | ||
107 | return 0x00009410; | ||
108 | } | ||
101 | #endif | 109 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index c6ff07da..0d9a98b4 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -1,9 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * drivers/video/tegra/host/gk20a/ltc_gk20a.c | 2 | * GK20A L2 |
3 | * | 3 | * |
4 | * GK20A Graphics | 4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | ||
6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | 5 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -173,9 +171,17 @@ out: | |||
173 | 171 | ||
174 | static void gk20a_ltc_init_fs_state(struct gk20a *g) | 172 | static void gk20a_ltc_init_fs_state(struct gk20a *g) |
175 | { | 173 | { |
174 | u32 reg; | ||
175 | |||
176 | gk20a_dbg_info("initialize gk20a L2"); | 176 | gk20a_dbg_info("initialize gk20a L2"); |
177 | 177 | ||
178 | g->max_ltc_count = g->ltc_count = 1; | 178 | g->max_ltc_count = g->ltc_count = 1; |
179 | |||
180 | /* Disable LTC interrupts */ | ||
181 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
182 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | ||
183 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
184 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | ||
179 | } | 185 | } |
180 | 186 | ||
181 | static void gk20a_ltc_isr(struct gk20a *g) | 187 | static void gk20a_ltc_isr(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 738df2af..7a02d68e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A memory management | 2 | * GK20A memory management |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -323,7 +323,7 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
323 | if (err) | 323 | if (err) |
324 | return err; | 324 | return err; |
325 | 325 | ||
326 | /* | 326 | /* |
327 | * offset needs to be at the start of a page/cacheline boundary; | 327 | * offset needs to be at the start of a page/cacheline boundary; |
328 | * prune the preceding ctaglines that were allocated for alignment. | 328 | * prune the preceding ctaglines that were allocated for alignment. |
329 | */ | 329 | */ |
@@ -1290,12 +1290,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1290 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 1290 | int ctag_granularity = g->ops.fb.compression_page_size(g); |
1291 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); | 1291 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); |
1292 | 1292 | ||
1293 | if (clear_ctags && ctag_offset) { | ||
1294 | /* init/clear the ctag buffer */ | ||
1295 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
1296 | ctag_offset, ctag_offset + ctag_lines - 1); | ||
1297 | } | ||
1298 | |||
1299 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | 1293 | /* Allocate (or validate when map_offset != 0) the virtual address. */ |
1300 | if (!map_offset) { | 1294 | if (!map_offset) { |
1301 | map_offset = gk20a_vm_alloc_va(vm, size, | 1295 | map_offset = gk20a_vm_alloc_va(vm, size, |
@@ -1651,17 +1645,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1651 | bfr.kind_v = bfr.uc_kind_v; | 1645 | bfr.kind_v = bfr.uc_kind_v; |
1652 | } else { | 1646 | } else { |
1653 | gk20a_get_comptags(d, dmabuf, &comptags); | 1647 | gk20a_get_comptags(d, dmabuf, &comptags); |
1654 | clear_ctags = true; | ||
1655 | |||
1656 | if (comptags.lines < comptags.allocated_lines) { | ||
1657 | /* clear tail-padding comptags */ | ||
1658 | u32 ctagmin = comptags.offset + comptags.lines; | ||
1659 | u32 ctagmax = comptags.offset + | ||
1660 | comptags.allocated_lines - 1; | ||
1661 | 1648 | ||
1649 | if (g->ops.ltc.cbc_ctrl) | ||
1662 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | 1650 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, |
1663 | ctagmin, ctagmax); | 1651 | comptags.offset, |
1664 | } | 1652 | comptags.offset + |
1653 | comptags.allocated_lines - 1); | ||
1654 | else | ||
1655 | clear_ctags = true; | ||
1665 | } | 1656 | } |
1666 | } | 1657 | } |
1667 | 1658 | ||
@@ -2815,6 +2806,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2815 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 2806 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, |
2816 | kernel_vma_start, kernel_vma_limit; | 2807 | kernel_vma_start, kernel_vma_limit; |
2817 | u32 pde_lo, pde_hi; | 2808 | u32 pde_lo, pde_hi; |
2809 | struct gk20a *g = mm->g; | ||
2818 | 2810 | ||
2819 | /* note: this must match gmmu_pgsz_gk20a enum */ | 2811 | /* note: this must match gmmu_pgsz_gk20a enum */ |
2820 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; | 2812 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
@@ -2904,6 +2896,31 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2904 | goto clean_up_pdes; | 2896 | goto clean_up_pdes; |
2905 | } | 2897 | } |
2906 | 2898 | ||
2899 | /* | ||
2900 | * Attempt to make a separate VM for fixed allocations. | ||
2901 | */ | ||
2902 | if (g->separate_fixed_allocs && | ||
2903 | small_vma_start < small_vma_limit) { | ||
2904 | if (g->separate_fixed_allocs >= small_vma_limit) | ||
2905 | goto clean_up_pdes; | ||
2906 | |||
2907 | snprintf(alloc_name, sizeof(alloc_name), | ||
2908 | "gk20a_%s-fixed", name); | ||
2909 | |||
2910 | err = __gk20a_allocator_init(&vm->fixed, | ||
2911 | vm, alloc_name, | ||
2912 | small_vma_start, | ||
2913 | g->separate_fixed_allocs, | ||
2914 | SZ_4K, | ||
2915 | GPU_BALLOC_MAX_ORDER, | ||
2916 | GPU_BALLOC_GVA_SPACE); | ||
2917 | if (err) | ||
2918 | goto clean_up_ptes; | ||
2919 | |||
2920 | /* Make sure to update the user vma size. */ | ||
2921 | small_vma_start = g->separate_fixed_allocs; | ||
2922 | } | ||
2923 | |||
2907 | if (small_vma_start < small_vma_limit) { | 2924 | if (small_vma_start < small_vma_limit) { |
2908 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2925 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2909 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 2926 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
@@ -3066,14 +3083,17 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
3066 | } | 3083 | } |
3067 | 3084 | ||
3068 | vma = &vm->vma[pgsz_idx]; | 3085 | vma = &vm->vma[pgsz_idx]; |
3069 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 3086 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
3087 | if (vm->fixed.init) | ||
3088 | vma = &vm->fixed; | ||
3070 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, | 3089 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, |
3071 | (u64)args->pages * | 3090 | (u64)args->pages * |
3072 | (u64)args->page_size); | 3091 | (u64)args->page_size); |
3073 | else | 3092 | } else { |
3074 | vaddr_start = gk20a_balloc(vma, | 3093 | vaddr_start = gk20a_balloc(vma, |
3075 | (u64)args->pages * | 3094 | (u64)args->pages * |
3076 | (u64)args->page_size); | 3095 | (u64)args->page_size); |
3096 | } | ||
3077 | 3097 | ||
3078 | if (!vaddr_start) { | 3098 | if (!vaddr_start) { |
3079 | kfree(va_node); | 3099 | kfree(va_node); |
@@ -3140,7 +3160,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
3140 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 3160 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? |
3141 | gmmu_page_size_big : gmmu_page_size_small; | 3161 | gmmu_page_size_big : gmmu_page_size_small; |
3142 | 3162 | ||
3143 | vma = &vm->vma[pgsz_idx]; | 3163 | if (vm->fixed.init) |
3164 | vma = &vm->fixed; | ||
3165 | else | ||
3166 | vma = &vm->vma[pgsz_idx]; | ||
3144 | gk20a_bfree(vma, args->offset); | 3167 | gk20a_bfree(vma, args->offset); |
3145 | 3168 | ||
3146 | mutex_lock(&vm->update_gmmu_lock); | 3169 | mutex_lock(&vm->update_gmmu_lock); |
@@ -3330,6 +3353,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm) | |||
3330 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 3353 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
3331 | if (vm->vma[gmmu_page_size_small].init) | 3354 | if (vm->vma[gmmu_page_size_small].init) |
3332 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 3355 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); |
3356 | if (vm->fixed.init) | ||
3357 | gk20a_allocator_destroy(&vm->fixed); | ||
3333 | 3358 | ||
3334 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 3359 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
3335 | } | 3360 | } |
@@ -3843,6 +3868,16 @@ clean_up: | |||
3843 | return err; | 3868 | return err; |
3844 | } | 3869 | } |
3845 | 3870 | ||
3871 | void gk20a_mm_debugfs_init(struct platform_device *pdev) | ||
3872 | { | ||
3873 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
3874 | struct dentry *gpu_root = platform->debugfs; | ||
3875 | struct gk20a *g = gk20a_get_platform(pdev)->g; | ||
3876 | |||
3877 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, | ||
3878 | &g->separate_fixed_allocs); | ||
3879 | } | ||
3880 | |||
3846 | void gk20a_init_mm(struct gpu_ops *gops) | 3881 | void gk20a_init_mm(struct gpu_ops *gops) |
3847 | { | 3882 | { |
3848 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; | 3883 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; |
@@ -3863,4 +3898,3 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3863 | gops->mm.init_pdb = gk20a_mm_init_pdb; | 3898 | gops->mm.init_pdb = gk20a_mm_init_pdb; |
3864 | gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; | 3899 | gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; |
3865 | } | 3900 | } |
3866 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b8b0ca49..368b32d3 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -259,6 +259,10 @@ struct vm_gk20a { | |||
259 | struct gk20a_mm_entry pdb; | 259 | struct gk20a_mm_entry pdb; |
260 | 260 | ||
261 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; | 261 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; |
262 | |||
263 | /* If necessary, split fixed from non-fixed. */ | ||
264 | struct gk20a_allocator fixed; | ||
265 | |||
262 | struct rb_root mapped_buffers; | 266 | struct rb_root mapped_buffers; |
263 | 267 | ||
264 | struct list_head reserved_va_list; | 268 | struct list_head reserved_va_list; |
@@ -279,6 +283,7 @@ struct channel_gk20a; | |||
279 | int gk20a_init_mm_support(struct gk20a *g); | 283 | int gk20a_init_mm_support(struct gk20a *g); |
280 | int gk20a_init_mm_setup_sw(struct gk20a *g); | 284 | int gk20a_init_mm_setup_sw(struct gk20a *g); |
281 | int gk20a_init_mm_setup_hw(struct gk20a *g); | 285 | int gk20a_init_mm_setup_hw(struct gk20a *g); |
286 | void gk20a_mm_debugfs_init(struct platform_device *pdev); | ||
282 | 287 | ||
283 | int gk20a_mm_fb_flush(struct gk20a *g); | 288 | int gk20a_mm_fb_flush(struct gk20a *g); |
284 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); | 289 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); |
@@ -332,9 +337,9 @@ struct mm_gk20a { | |||
332 | #ifdef CONFIG_DEBUG_FS | 337 | #ifdef CONFIG_DEBUG_FS |
333 | u32 ltc_enabled; | 338 | u32 ltc_enabled; |
334 | u32 ltc_enabled_debug; | 339 | u32 ltc_enabled_debug; |
340 | #endif | ||
335 | u32 bypass_smmu; | 341 | u32 bypass_smmu; |
336 | u32 disable_bigpage; | 342 | u32 disable_bigpage; |
337 | #endif | ||
338 | }; | 343 | }; |
339 | 344 | ||
340 | int gk20a_mm_init(struct mm_gk20a *mm); | 345 | int gk20a_mm_init(struct mm_gk20a *mm); |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 84b3fcaf..6bffed9e 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -59,8 +59,10 @@ struct gk20a_platform { | |||
59 | struct clk *clk[3]; | 59 | struct clk *clk[3]; |
60 | int num_clks; | 60 | int num_clks; |
61 | 61 | ||
62 | #ifdef CONFIG_RESET_CONTROLLER | ||
62 | /* Reset control for device */ | 63 | /* Reset control for device */ |
63 | struct reset_control *reset_control; | 64 | struct reset_control *reset_control; |
65 | #endif | ||
64 | 66 | ||
65 | /* Delay before rail gated */ | 67 | /* Delay before rail gated */ |
66 | int railgate_delay; | 68 | int railgate_delay; |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index 60ffa381..15d6609d 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Tegra Platform Interface | 4 | * GK20A Tegra Platform Interface |
5 | * | 5 | * |
6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -822,7 +822,7 @@ static long gk20a_round_clk_rate(struct platform_device *dev, | |||
822 | return gk20a_clk_round_rate(g, rate); | 822 | return gk20a_clk_round_rate(g, rate); |
823 | } | 823 | } |
824 | 824 | ||
825 | int gk20a_set_clk_rate(struct platform_device *dev, unsigned long rate) | 825 | static int gk20a_set_clk_rate(struct platform_device *dev, unsigned long rate) |
826 | { | 826 | { |
827 | struct gk20a_platform *platform = gk20a_get_platform(dev); | 827 | struct gk20a_platform *platform = gk20a_get_platform(dev); |
828 | struct gk20a *g = platform->g; | 828 | struct gk20a *g = platform->g; |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 30592ee2..60c87979 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -4426,7 +4426,7 @@ int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id) | |||
4426 | return status; | 4426 | return status; |
4427 | } | 4427 | } |
4428 | 4428 | ||
4429 | #if CONFIG_DEBUG_FS | 4429 | #ifdef CONFIG_DEBUG_FS |
4430 | static int elpg_residency_show(struct seq_file *s, void *data) | 4430 | static int elpg_residency_show(struct seq_file *s, void *data) |
4431 | { | 4431 | { |
4432 | struct gk20a *g = s->private; | 4432 | struct gk20a *g = s->private; |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 4421744c..b41cca08 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | |||
@@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp) | |||
228 | 228 | ||
229 | tsg->tsg_gr_ctx = NULL; | 229 | tsg->tsg_gr_ctx = NULL; |
230 | tsg->vm = NULL; | 230 | tsg->vm = NULL; |
231 | tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; | ||
231 | 232 | ||
232 | filp->private_data = tsg; | 233 | filp->private_data = tsg; |
233 | 234 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index bcc4d0c4..7e0a75d1 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | |||
@@ -49,6 +49,8 @@ struct tsg_gk20a { | |||
49 | struct gr_ctx_desc *tsg_gr_ctx; | 49 | struct gr_ctx_desc *tsg_gr_ctx; |
50 | 50 | ||
51 | struct vm_gk20a *vm; | 51 | struct vm_gk20a *vm; |
52 | |||
53 | u32 interleave_level; | ||
52 | }; | 54 | }; |
53 | 55 | ||
54 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); | 56 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); |
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 9f137246..8a0be106 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -362,7 +362,7 @@ int prepare_ucode_blob(struct gk20a *g) | |||
362 | gm20b_dbg_pmu("prepare ucode blob return 0\n"); | 362 | gm20b_dbg_pmu("prepare ucode blob return 0\n"); |
363 | free_acr_resources(g, plsfm); | 363 | free_acr_resources(g, plsfm); |
364 | free_sgt: | 364 | free_sgt: |
365 | kfree(sgt); | 365 | gk20a_free_sgtable(&sgt); |
366 | return err; | 366 | return err; |
367 | } | 367 | } |
368 | 368 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index d1deffb9..b9763224 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B Fifo | 2 | * GM20B Fifo |
3 | * | 3 | * |
4 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -114,6 +114,7 @@ void gm20b_init_fifo(struct gpu_ops *gops) | |||
114 | gops->fifo.free_inst = channel_gk20a_free_inst; | 114 | gops->fifo.free_inst = channel_gk20a_free_inst; |
115 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; | 115 | gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc; |
116 | gops->fifo.channel_set_priority = gk20a_channel_set_priority; | 116 | gops->fifo.channel_set_priority = gk20a_channel_set_priority; |
117 | gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice; | ||
117 | 118 | ||
118 | gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; | 119 | gops->fifo.preempt_channel = gk20a_fifo_preempt_channel; |
119 | gops->fifo.update_runlist = gk20a_fifo_update_runlist; | 120 | gops->fifo.update_runlist = gk20a_fifo_update_runlist; |
@@ -121,4 +122,5 @@ void gm20b_init_fifo(struct gpu_ops *gops) | |||
121 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; | 122 | gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; |
122 | gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos; | 123 | gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos; |
123 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; | 124 | gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; |
125 | gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; | ||
124 | } | 126 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index 95e0c43d..aa01e945 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -286,6 +286,10 @@ static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(void) | |||
286 | { | 286 | { |
287 | return 0x1 << 30; | 287 | return 0x1 << 30; |
288 | } | 288 | } |
289 | static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_m(void) | ||
290 | { | ||
291 | return 0x1 << 21; | ||
292 | } | ||
289 | static inline u32 ltc_ltc0_lts0_intr_r(void) | 293 | static inline u32 ltc_ltc0_lts0_intr_r(void) |
290 | { | 294 | { |
291 | return 0x0014040c; | 295 | return 0x0014040c; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_timer_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_timer_gm20b.h index 126f7c8c..06d02522 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_timer_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_timer_gm20b.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -98,4 +98,12 @@ static inline u32 timer_pri_timeout_fecs_errcode_r(void) | |||
98 | { | 98 | { |
99 | return 0x0000908c; | 99 | return 0x0000908c; |
100 | } | 100 | } |
101 | static inline u32 timer_time_0_r(void) | ||
102 | { | ||
103 | return 0x00009400; | ||
104 | } | ||
105 | static inline u32 timer_time_1_r(void) | ||
106 | { | ||
107 | return 0x00009410; | ||
108 | } | ||
101 | #endif | 109 | #endif |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5b6bff7f..ffc36903 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B L2 | 2 | * GM20B L2 |
3 | * | 3 | * |
4 | * Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016 NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -190,6 +190,7 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) | |||
190 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | 190 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); |
191 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | 191 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); |
192 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); | 192 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); |
193 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
193 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | 194 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); |
194 | } | 195 | } |
195 | 196 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.c b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c index 5bd22841..6ebc4c91 100644 --- a/drivers/gpu/nvgpu/gm20b/therm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include "gk20a/gk20a.h" | 16 | #include "gk20a/gk20a.h" |
17 | #include "hw_therm_gm20b.h" | 17 | #include "hw_therm_gm20b.h" |
18 | #include "therm_gm20b.h" | ||
18 | 19 | ||
19 | static int gm20b_init_therm_setup_hw(struct gk20a *g) | 20 | static int gm20b_init_therm_setup_hw(struct gk20a *g) |
20 | { | 21 | { |
diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c new file mode 100644 index 00000000..cb955811 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.c | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/string.h> | ||
15 | #include "gk20a/gk20a.h" | ||
16 | #include "fecs_trace_vgpu.h" | ||
17 | |||
18 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops) | ||
19 | { | ||
20 | memset(&ops->fecs_trace, 0, sizeof(ops->fecs_trace)); | ||
21 | } | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h new file mode 100644 index 00000000..1aace1fe --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/fecs_trace_vgpu.h | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __FECS_TRACE_VGPU_H | ||
15 | #define __FECS_TRACE_VGPU_H | ||
16 | |||
17 | struct gpu_ops; | ||
18 | void vgpu_init_fecs_trace_ops(struct gpu_ops *ops); | ||
19 | |||
20 | #endif /* __FECS_TRACE_VGPU_H */ | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index e776e97c..9e40218d 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Virtualized GPU Fifo | 2 | * Virtualized GPU Fifo |
3 | * | 3 | * |
4 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -81,6 +81,7 @@ static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) | |||
81 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; | 81 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX; |
82 | msg.handle = platform->virt_handle; | 82 | msg.handle = platform->virt_handle; |
83 | p->id = ch->hw_chid; | 83 | p->id = ch->hw_chid; |
84 | p->pid = (u64)current->pid; | ||
84 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 85 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
85 | if (err || msg.ret) { | 86 | if (err || msg.ret) { |
86 | gk20a_err(dev_from_gk20a(g), "fail"); | 87 | gk20a_err(dev_from_gk20a(g), "fail"); |
@@ -194,12 +195,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
194 | if (!runlist->active_channels) | 195 | if (!runlist->active_channels) |
195 | goto clean_up_runlist_info; | 196 | goto clean_up_runlist_info; |
196 | 197 | ||
197 | runlist->high_prio_channels = | ||
198 | kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), | ||
199 | GFP_KERNEL); | ||
200 | if (!runlist->high_prio_channels) | ||
201 | goto clean_up_runlist_info; | ||
202 | |||
203 | runlist_size = sizeof(u16) * f->num_channels; | 198 | runlist_size = sizeof(u16) * f->num_channels; |
204 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 199 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
205 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); | 200 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
@@ -222,9 +217,6 @@ clean_up_runlist: | |||
222 | gk20a_gmmu_free(g, &runlist->mem[i]); | 217 | gk20a_gmmu_free(g, &runlist->mem[i]); |
223 | 218 | ||
224 | clean_up_runlist_info: | 219 | clean_up_runlist_info: |
225 | kfree(runlist->high_prio_channels); | ||
226 | runlist->high_prio_channels = NULL; | ||
227 | |||
228 | kfree(runlist->active_channels); | 220 | kfree(runlist->active_channels); |
229 | runlist->active_channels = NULL; | 221 | runlist->active_channels = NULL; |
230 | 222 | ||
@@ -550,6 +542,54 @@ static int vgpu_channel_set_priority(struct channel_gk20a *ch, u32 priority) | |||
550 | return err ? err : msg.ret; | 542 | return err ? err : msg.ret; |
551 | } | 543 | } |
552 | 544 | ||
545 | static int vgpu_fifo_set_runlist_interleave(struct gk20a *g, | ||
546 | u32 id, | ||
547 | bool is_tsg, | ||
548 | u32 runlist_id, | ||
549 | u32 new_level) | ||
550 | { | ||
551 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
552 | struct tegra_vgpu_cmd_msg msg; | ||
553 | struct tegra_vgpu_channel_runlist_interleave_params *p = | ||
554 | &msg.params.channel_interleave; | ||
555 | struct channel_gk20a *ch; | ||
556 | int err; | ||
557 | |||
558 | gk20a_dbg_fn(""); | ||
559 | |||
560 | /* FIXME: add support for TSGs */ | ||
561 | if (is_tsg) | ||
562 | return -ENOSYS; | ||
563 | |||
564 | ch = &g->fifo.channel[id]; | ||
565 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_RUNLIST_INTERLEAVE; | ||
566 | msg.handle = platform->virt_handle; | ||
567 | p->handle = ch->virt_ctx; | ||
568 | p->level = new_level; | ||
569 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
570 | WARN_ON(err || msg.ret); | ||
571 | return err ? err : msg.ret; | ||
572 | } | ||
573 | |||
574 | int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) | ||
575 | { | ||
576 | struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); | ||
577 | struct tegra_vgpu_cmd_msg msg; | ||
578 | struct tegra_vgpu_channel_timeslice_params *p = | ||
579 | &msg.params.channel_timeslice; | ||
580 | int err; | ||
581 | |||
582 | gk20a_dbg_fn(""); | ||
583 | |||
584 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_TIMESLICE; | ||
585 | msg.handle = platform->virt_handle; | ||
586 | p->handle = ch->virt_ctx; | ||
587 | p->timeslice_us = timeslice; | ||
588 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
589 | WARN_ON(err || msg.ret); | ||
590 | return err ? err : msg.ret; | ||
591 | } | ||
592 | |||
553 | static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, | 593 | static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, |
554 | struct channel_gk20a *ch) | 594 | struct channel_gk20a *ch) |
555 | { | 595 | { |
@@ -635,5 +675,6 @@ void vgpu_init_fifo_ops(struct gpu_ops *gops) | |||
635 | gops->fifo.update_runlist = vgpu_fifo_update_runlist; | 675 | gops->fifo.update_runlist = vgpu_fifo_update_runlist; |
636 | gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle; | 676 | gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle; |
637 | gops->fifo.channel_set_priority = vgpu_channel_set_priority; | 677 | gops->fifo.channel_set_priority = vgpu_channel_set_priority; |
678 | gops->fifo.set_runlist_interleave = vgpu_fifo_set_runlist_interleave; | ||
679 | gops->fifo.channel_set_timeslice = vgpu_channel_set_timeslice; | ||
638 | } | 680 | } |
639 | |||
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index e8328326..5a953e20 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/dma-mapping.h> | 18 | #include <linux/dma-mapping.h> |
19 | #include <linux/pm_runtime.h> | 19 | #include <linux/pm_runtime.h> |
20 | #include "vgpu/vgpu.h" | 20 | #include "vgpu/vgpu.h" |
21 | #include "vgpu/fecs_trace_vgpu.h" | ||
21 | #include "gk20a/debug_gk20a.h" | 22 | #include "gk20a/debug_gk20a.h" |
22 | #include "gk20a/hal_gk20a.h" | 23 | #include "gk20a/hal_gk20a.h" |
23 | #include "gk20a/hw_mc_gk20a.h" | 24 | #include "gk20a/hw_mc_gk20a.h" |
@@ -259,6 +260,7 @@ void vgpu_init_hal_common(struct gk20a *g) | |||
259 | vgpu_init_ltc_ops(gops); | 260 | vgpu_init_ltc_ops(gops); |
260 | vgpu_init_mm_ops(gops); | 261 | vgpu_init_mm_ops(gops); |
261 | vgpu_init_debug_ops(gops); | 262 | vgpu_init_debug_ops(gops); |
263 | vgpu_init_fecs_trace_ops(gops); | ||
262 | } | 264 | } |
263 | 265 | ||
264 | static int vgpu_init_hal(struct gk20a *g) | 266 | static int vgpu_init_hal(struct gk20a *g) |