From 9aa7de15c2a644e9c7e9c157e49087e66d4ac3d0 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 31 Aug 2016 17:04:56 -0700 Subject: gpu: nvgpu: vgpu: cyclestat snapshot support Add support for cyclestats snapshots in the virtual case Bug 1700143 JIRA EVLR-278 Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90 Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1211547 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/Makefile.nvgpu | 1 + drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 233 ++++++++++++++------------------ drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h | 119 ++++++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 18 +++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 12 +- drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 4 + drivers/gpu/nvgpu/gm206/hal_gm206.c | 4 + drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 4 + drivers/gpu/nvgpu/vgpu/css_vgpu.c | 221 ++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 4 + drivers/gpu/nvgpu/vgpu/vgpu.c | 3 + drivers/gpu/nvgpu/vgpu/vgpu.h | 8 ++ 13 files changed, 494 insertions(+), 143 deletions(-) create mode 100644 drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h create mode 100644 drivers/gpu/nvgpu/vgpu/css_vgpu.c (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 5ca2f56f..b8e38919 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -105,6 +105,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ vgpu/dbg_vgpu.o \ vgpu/fecs_trace_vgpu.o \ vgpu/tsg_vgpu.o \ + vgpu/css_vgpu.o \ vgpu/gk20a/vgpu_hal_gk20a.o \ vgpu/gk20a/vgpu_gr_gk20a.o \ vgpu/gm20b/vgpu_hal_gm20b.o \ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 49711af9..d23a8026 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -603,7 +603,7 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) mutex_lock(&ch->cs_client_mutex); if (ch->cs_client) - ret = gr_gk20a_css_flush(ch->g, ch->cs_client); + ret = gr_gk20a_css_flush(ch, ch->cs_client); else ret = -EBADF; mutex_unlock(&ch->cs_client_mutex); @@ -622,7 +622,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, if (ch->cs_client) { ret = -EEXIST; } else { - ret = gr_gk20a_css_attach(ch->g, + ret = gr_gk20a_css_attach(ch, dmabuf_fd, perfmon_id_count, perfmon_id_start, @@ -639,7 +639,7 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch) mutex_lock(&ch->cs_client_mutex); if (ch->cs_client) { - ret = gr_gk20a_css_detach(ch->g, ch->cs_client); + ret = gr_gk20a_css_detach(ch, ch->cs_client); ch->cs_client = NULL; } else { ret = 0; diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 62f60761..71614d6e 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c @@ -25,93 +25,13 @@ #include "gk20a.h" #include "hw_perf_gk20a.h" #include "hw_mc_gk20a.h" - - - -/* cycle stats fifo header (must match NvSnapshotBufferFifo) */ -struct gk20a_cs_snapshot_fifo { - /* layout description of the buffer */ - u32 start; - u32 end; - - /* snafu bits */ - u32 hw_overflow_events_occured; - u32 sw_overflow_events_occured; - - /* the kernel copies new entries to put and - * increment the put++. if put == get then - * overflowEventsOccured++ - */ - u32 put; - u32 _reserved10; - u32 _reserved11; - u32 _reserved12; - - /* the driver/client reads from get until - * put==get, get++ */ - u32 get; - u32 _reserved20; - u32 _reserved21; - u32 _reserved22; - - /* unused */ - u32 _reserved30; - u32 _reserved31; - u32 _reserved32; - u32 _reserved33; -}; - -/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ -struct gk20a_cs_snapshot_fifo_entry { - /* global 48 timestamp */ - u32 timestamp31_00:32; - u32 timestamp39_32:8; - - /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ - u32 perfmon_id:8; - - /* typically samples_counter is wired to #pmtrigger count */ - u32 samples_counter:12; - - /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ - u32 ds:1; - u32 sz:1; - u32 zero0:1; - u32 zero1:1; - - /* counter results */ - u32 event_cnt:32; - u32 trigger0_cnt:32; - u32 trigger1_cnt:32; - u32 sample_cnt:32; - - /* Local PmTrigger results for Maxwell+ or padding otherwise */ - u16 local_trigger_b_count:16; - u16 book_mark_b:16; - u16 local_trigger_a_count:16; - u16 book_mark_a:16; -}; - - -/* cycle stats snapshot client data (e.g. associated with channel) */ -struct gk20a_cs_snapshot_client { - struct list_head list; - u32 dmabuf_fd; - struct dma_buf *dma_handler; - struct gk20a_cs_snapshot_fifo *snapshot; - u32 snapshot_size; - u32 perfmon_start; - u32 perfmon_count; -}; +#include "css_gr_gk20a.h" /* check client for pointed perfmon ownership */ #define CONTAINS_PERFMON(cl, pm) \ ((cl)->perfmon_start <= (pm) && \ ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count) -/* the minimal size of HW buffer - should be enough to avoid HW overflows */ -#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) - /* the minimal size of client buffer */ #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ (sizeof(struct gk20a_cs_snapshot_fifo) + \ @@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client { /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ #define CSS_MAX_PERFMON_IDS 256 -/* local definitions to avoid hardcodes sizes and shifts */ -#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) - -/* cycle stats snapshot control structure for one HW entry and many clients */ -struct gk20a_cs_snapshot { - unsigned long perfmon_ids[PM_BITMAP_SIZE]; - struct list_head clients; - struct mem_desc hw_memdesc; - /* pointer to allocated cpu_va memory where GPU place data */ - struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; - struct gk20a_cs_snapshot_fifo_entry *hw_end; - struct gk20a_cs_snapshot_fifo_entry *hw_get; -}; - /* reports whether the hw queue overflowed */ static inline bool css_hw_get_overflow_status(struct gk20a *g) { @@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr) return 0; } -static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) +static int css_hw_enable_snapshot(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *cs_client) { - struct gk20a *g = gr->g; + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; struct gk20a_cs_snapshot *data = gr->cs_data; + u32 snapshot_size = cs_client->snapshot_size; int ret; u32 virt_addr_lo; @@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr) static void css_gr_free_shared_data(struct gr_gk20a *gr) { + struct gk20a *g = gr->g; + if (gr->cs_data) { /* the clients list is expected to be empty */ - css_hw_disable_snapshot(gr); + g->ops.css.disable_snapshot(gr); /* release the objects */ kfree(gr->cs_data); @@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon) return NULL; } -static int css_gr_flush_snapshots(struct gr_gk20a *gr) +static int css_gr_flush_snapshots(struct channel_gk20a *ch) { - struct gk20a *g = gr->g; + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; struct gk20a_cs_snapshot *css = gr->cs_data; struct gk20a_cs_snapshot_client *cur; - u32 pending; + u32 pending, completed; + bool hw_overflow; + int err; /* variables for iterating over HW entries */ u32 sid; @@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) struct gk20a_cs_snapshot_fifo *dst; struct gk20a_cs_snapshot_fifo_entry *dst_get; struct gk20a_cs_snapshot_fifo_entry *dst_put; + struct gk20a_cs_snapshot_fifo_entry *dst_nxt; struct gk20a_cs_snapshot_fifo_entry *dst_head; struct gk20a_cs_snapshot_fifo_entry *dst_tail; if (!css) return -EINVAL; - if (!css->hw_snapshot) - return -EINVAL; - if (list_empty(&css->clients)) return -EBADF; /* check data available */ - pending = css_hw_get_pending_snapshots(g); + err = g->ops.css.check_data_available(ch, &pending, &hw_overflow); + if (err) + return err; + if (!pending) return 0; - if (css_hw_get_overflow_status(g)) { + if (hw_overflow) { struct list_head *pos; list_for_each(pos, &css->clients) { @@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) } gk20a_warn(dev_from_gk20a(g), - "cyclestats: hardware overflow detected\n"); + "cyclestats: hardware overflow detected\n"); } - /* proceed all items in HW buffer */ + /* process all items in HW buffer */ sid = 0; + completed = 0; cur = NULL; dst = NULL; dst_put = NULL; @@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) dst_get = CSS_FIFO_ENTRY(dst, dst->get); dst_put = CSS_FIFO_ENTRY(dst, dst->put); dst_head = CSS_FIFO_ENTRY(dst, dst->start); - dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1; + dst_tail = CSS_FIFO_ENTRY(dst, dst->end); + + dst_nxt = dst_put + 1; + if (dst_nxt == dst_tail) + dst_nxt = dst_head; } else { /* client not found - skipping this entry */ gk20a_warn(dev_from_gk20a(g), @@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) } /* check for software overflows */ - if (dst_put + 1 == dst_get || - (dst_put == dst_tail && dst_get == dst_head)) { + if (dst_nxt == dst_get) { /* no data copy, no pointer updates */ dst->sw_overflow_events_occured++; gk20a_warn(dev_from_gk20a(g), @@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr) src->perfmon_id); } else { *dst_put = *src; - if (dst_put == dst_tail) - dst_put = dst_head; - else - dst_put++; + completed++; + + dst_put = dst_nxt++; + + if (dst_nxt == dst_tail) + dst_nxt = dst_head; } next_hw_fifo_entry: @@ -465,14 +386,17 @@ next_hw_fifo_entry: (css->hw_end - css->hw_get) * sizeof(*src)); } gr->cs_data->hw_get = src; - css_hw_set_handled_snapshots(g, sid); - if (pending != sid) { + + if (g->ops.css.set_handled_snapshots) + g->ops.css.set_handled_snapshots(g, sid); + + if (completed != sid) { /* not all entries proceed correctly. some of problems */ /* reported as overflows, some as orphaned perfmons, */ /* but it will be better notify with summary about it */ gk20a_warn(dev_from_gk20a(g), - "cyclestats: done %u from %u entries\n", - sid, pending); + "cyclestats: completed %u from %u entries\n", + completed, pending); } return 0; @@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data, } -static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, +static int css_gr_free_client_data(struct gk20a *g, + struct gk20a_cs_snapshot *data, struct gk20a_cs_snapshot_client *client) { int ret = 0; @@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, if (client->list.next && client->list.prev) list_del(&client->list); - if (client->perfmon_start && client->perfmon_count) { - if (client->perfmon_count != css_gr_release_perfmon_ids(data, + if (client->perfmon_start && client->perfmon_count + && g->ops.css.release_perfmon_ids) { + if (client->perfmon_count != g->ops.css.release_perfmon_ids(data, client->perfmon_start, client->perfmon_count)) ret = -EINVAL; } @@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data, return ret; } -static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, +static int css_gr_create_client_data(struct gk20a *g, + struct gk20a_cs_snapshot *data, u32 dmabuf_fd, u32 perfmon_count, struct gk20a_cs_snapshot_client **client) { @@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, cur->snapshot->put = cur->snapshot->start; cur->perfmon_count = perfmon_count; - if (cur->perfmon_count) { - cur->perfmon_start = css_gr_allocate_perfmon_ids(data, + + /* In virtual case, perfmon ID allocation is handled by the server + * at the time of the attach (allocate_perfmon_ids is NULL in this case) + */ + if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) { + cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data, cur->perfmon_count); if (!cur->perfmon_start) { ret = -ENOENT; @@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data, failed: *client = NULL; if (cur) - css_gr_free_client_data(data, cur); + css_gr_free_client_data(g, data, cur); return ret; } -int gr_gk20a_css_attach(struct gk20a *g, +int gr_gk20a_css_attach(struct channel_gk20a *ch, u32 dmabuf_fd, u32 perfmon_count, u32 *perfmon_start, struct gk20a_cs_snapshot_client **cs_client) { int ret = 0; + struct gk20a *g = ch->g; struct gr_gk20a *gr; /* we must have a placeholder to store pointer to client structure */ @@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g, if (ret) goto failed; - ret = css_gr_create_client_data(gr->cs_data, + ret = css_gr_create_client_data(g, gr->cs_data, dmabuf_fd, perfmon_count, cs_client); if (ret) goto failed; - ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size); + ret = g->ops.css.enable_snapshot(ch, *cs_client); if (ret) goto failed; @@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g, failed: if (gr->cs_data) { if (*cs_client) { - css_gr_free_client_data(gr->cs_data, *cs_client); + css_gr_free_client_data(g, gr->cs_data, *cs_client); *cs_client = NULL; } @@ -666,10 +598,11 @@ failed: return ret; } -int gr_gk20a_css_detach(struct gk20a *g, +int gr_gk20a_css_detach(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client) { int ret = 0; + struct gk20a *g = ch->g; struct gr_gk20a *gr; if (!cs_client) @@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g, if (gr->cs_data) { struct gk20a_cs_snapshot *data = gr->cs_data; - ret = css_gr_free_client_data(data, cs_client); + if (g->ops.css.detach_snapshot) + g->ops.css.detach_snapshot(ch, cs_client); + + ret = css_gr_free_client_data(g, data, cs_client); if (list_empty(&data->clients)) css_gr_free_shared_data(gr); } else { @@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g, return ret; } -int gr_gk20a_css_flush(struct gk20a *g, +int gr_gk20a_css_flush(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client) { int ret = 0; + struct gk20a *g = ch->g; struct gr_gk20a *gr; if (!cs_client) @@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g, gr = &g->gr; mutex_lock(&gr->cs_lock); - ret = css_gr_flush_snapshots(gr); + ret = css_gr_flush_snapshots(ch); mutex_unlock(&gr->cs_lock); return ret; @@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) mutex_unlock(&gr->cs_lock); mutex_destroy(&gr->cs_lock); } + +static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending, + bool *hw_overflow) +{ + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; + struct gk20a_cs_snapshot *css = gr->cs_data; + + if (!css->hw_snapshot) + return -EINVAL; + + *pending = css_hw_get_pending_snapshots(g); + if (!*pending) + return 0; + + *hw_overflow = css_hw_get_overflow_status(g); + return 0; +} + +void gk20a_init_css_ops(struct gpu_ops *gops) +{ + gops->css.enable_snapshot = css_hw_enable_snapshot; + gops->css.disable_snapshot = css_hw_disable_snapshot; + gops->css.check_data_available = css_hw_check_data_available; + gops->css.set_handled_snapshots = css_hw_set_handled_snapshots; + gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids; + gops->css.release_perfmon_ids = css_gr_release_perfmon_ids; +} diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h new file mode 100644 index 00000000..be638abf --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h @@ -0,0 +1,119 @@ +/* + * GK20A Cycle stats snapshots support (subsystem for gr_gk20a). + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CSS_GR_GK20A_H +#define CSS_GR_GK20A_H + +/* the minimal size of HW buffer - should be enough to avoid HW overflows */ +#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) + +/* cycle stats fifo header (must match NvSnapshotBufferFifo) */ +struct gk20a_cs_snapshot_fifo { + /* layout description of the buffer */ + u32 start; + u32 end; + + /* snafu bits */ + u32 hw_overflow_events_occured; + u32 sw_overflow_events_occured; + + /* the kernel copies new entries to put and + * increment the put++. if put == get then + * overflowEventsOccured++ + */ + u32 put; + u32 _reserved10; + u32 _reserved11; + u32 _reserved12; + + /* the driver/client reads from get until + * put==get, get++ */ + u32 get; + u32 _reserved20; + u32 _reserved21; + u32 _reserved22; + + /* unused */ + u32 _reserved30; + u32 _reserved31; + u32 _reserved32; + u32 _reserved33; +}; + +/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */ +struct gk20a_cs_snapshot_fifo_entry { + /* global 48 timestamp */ + u32 timestamp31_00:32; + u32 timestamp39_32:8; + + /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */ + u32 perfmon_id:8; + + /* typically samples_counter is wired to #pmtrigger count */ + u32 samples_counter:12; + + /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */ + u32 ds:1; + u32 sz:1; + u32 zero0:1; + u32 zero1:1; + + /* counter results */ + u32 event_cnt:32; + u32 trigger0_cnt:32; + u32 trigger1_cnt:32; + u32 sample_cnt:32; + + /* Local PmTrigger results for Maxwell+ or padding otherwise */ + u16 local_trigger_b_count:16; + u16 book_mark_b:16; + u16 local_trigger_a_count:16; + u16 book_mark_a:16; +}; + +/* cycle stats snapshot client data (e.g. associated with channel) */ +struct gk20a_cs_snapshot_client { + struct list_head list; + u32 dmabuf_fd; + struct dma_buf *dma_handler; + struct gk20a_cs_snapshot_fifo *snapshot; + u32 snapshot_size; + u32 perfmon_start; + u32 perfmon_count; +}; + +/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */ +#define CSS_MAX_PERFMON_IDS 256 + +/* local definitions to avoid hardcodes sizes and shifts */ +#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG) + +/* cycle stats snapshot control structure for one HW entry and many clients */ +struct gk20a_cs_snapshot { + unsigned long perfmon_ids[PM_BITMAP_SIZE]; + struct list_head clients; + struct mem_desc hw_memdesc; + /* pointer to allocated cpu_va memory where GPU place data */ + struct gk20a_cs_snapshot_fifo_entry *hw_snapshot; + struct gk20a_cs_snapshot_fifo_entry *hw_end; + struct gk20a_cs_snapshot_fifo_entry *hw_get; +}; + +void gk20a_init_css_ops(struct gpu_ops *gops); + +#endif /* CSS_GR_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 669ef1b9..1ca8ff77 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -657,6 +657,24 @@ struct gpu_ops { struct { int (*init)(struct gk20a *g); } bios; +#if defined(CONFIG_GK20A_CYCLE_STATS) + struct { + int (*enable_snapshot)(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *client); + void (*disable_snapshot)(struct gr_gk20a *gr); + int (*check_data_available)(struct channel_gk20a *ch, + u32 *pending, + bool *hw_overflow); + void (*set_handled_snapshots)(struct gk20a *g, u32 num); + u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data, + u32 count); + u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data, + u32 start, + u32 count); + int (*detach_snapshot)(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *client); + } css; +#endif }; struct nvgpu_bios_ucode { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2a351bc3..c337a74a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -603,16 +603,16 @@ int gr_gk20a_halt_pipe(struct gk20a *g); int gr_gk20a_debugfs_init(struct gk20a *g); #if defined(CONFIG_GK20A_CYCLE_STATS) -int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */ - u32 dmabuf_fd, /* in - dma mapped memory */ - u32 perfmon_id_count, /* in - number of perfmons*/ - u32 *perfmon_id_start, /* out- index of first pm */ +int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */ + u32 dmabuf_fd, /* in - dma mapped memory */ + u32 perfmon_id_count, /* in - number of perfmons*/ + u32 *perfmon_id_start, /* out- index of first pm */ /* out - pointer to client data used in later */ struct gk20a_cs_snapshot_client **css_client); -int gr_gk20a_css_detach(struct gk20a *g, +int gr_gk20a_css_detach(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *css_client); -int gr_gk20a_css_flush(struct gk20a *g, +int gr_gk20a_css_flush(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *css_client); void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 4da7ffad..550dffa6 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -32,6 +32,7 @@ #include "hw_proj_gk20a.h" #include "tsg_gk20a.h" #include "dbg_gpu_gk20a.h" +#include "css_gr_gk20a.h" static struct gpu_ops gk20a_ops = { .clock_gating = { @@ -157,6 +158,9 @@ int gk20a_init_hal(struct gk20a *g) gk20a_init_dbg_session_ops(gops); gk20a_init_therm_ops(gops); gk20a_init_tsg_ops(gops); +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_init_css_ops(gops); +#endif gops->name = "gk20a"; gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; gops->get_litter_value = gk20a_get_litter_value; diff --git a/drivers/gpu/nvgpu/gm206/hal_gm206.c b/drivers/gpu/nvgpu/gm206/hal_gm206.c index 3c6897ea..6b43c8e9 100644 --- a/drivers/gpu/nvgpu/gm206/hal_gm206.c +++ b/drivers/gpu/nvgpu/gm206/hal_gm206.c @@ -41,6 +41,7 @@ #include "gr_gm206.h" #include "hw_proj_gm206.h" #include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/css_gr_gk20a.h" static struct gpu_ops gm206_ops = { .clock_gating = { @@ -199,6 +200,9 @@ int gm206_init_hal(struct gk20a *g) gm20b_init_cde_ops(gops); gm20b_init_therm_ops(gops); gk20a_init_tsg_ops(gops); +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_init_css_ops(gops); +#endif gm206_init_bios(gops); switch(ver){ case GK20A_GPUID_GM206: diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 34e3b605..e30ca96f 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -36,6 +36,7 @@ #include "therm_gm20b.h" #include "hw_proj_gm20b.h" #include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/css_gr_gk20a.h" #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 #define PRIV_SECURITY_DISABLE 0x01 @@ -226,6 +227,9 @@ int gm20b_init_hal(struct gk20a *g) gm20b_init_cde_ops(gops); gm20b_init_therm_ops(gops); gk20a_init_tsg_ops(gops); +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_init_css_ops(gops); +#endif gops->name = "gm20b"; gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; gops->get_litter_value = gm20b_get_litter_value; diff --git a/drivers/gpu/nvgpu/vgpu/css_vgpu.c b/drivers/gpu/nvgpu/vgpu/css_vgpu.c new file mode 100644 index 00000000..486d3e88 --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#if defined(CONFIG_GK20A_CYCLE_STATS) + +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/platform_gk20a.h" +#include "gk20a/css_gr_gk20a.h" +#include "vgpu.h" + +struct vgpu_hw_snapshot_buffer { + struct tegra_hv_ivm_cookie *cookie; + void *buf; + struct gk20a_cs_snapshot_fifo_entry *end; + struct gk20a_cs_snapshot_fifo_entry *src_get; + struct gk20a_cs_snapshot_fifo_entry *src_put; +}; + +struct tegra_hv_ivm_cookie *css_cookie; + +int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) +{ + struct gk20a *g = gr->g; + struct device *dev = g->dev; + struct gk20a_cs_snapshot *data = gr->cs_data; + struct device_node *np = dev->of_node; + struct of_phandle_args args; + struct device_node *hv_np; + void *buf = NULL; + u32 mempool; + int err; + + gk20a_dbg_fn(""); + + if (data->hw_snapshot) + return 0; + + err = of_parse_phandle_with_fixed_args(np, + "mempool-css", 1, 0, &args); + if (err) { + dev_info(dev_from_gk20a(g), "dt missing mempool-css\n"); + goto fail; + } + + hv_np = args.np; + mempool = args.args[0]; + css_cookie = tegra_hv_mempool_reserve(hv_np, mempool); + if (IS_ERR(css_cookie)) { + dev_info(dev_from_gk20a(g), + "mempool %u reserve failed\n", mempool); + err = -EINVAL; + goto fail; + } + + /* Make sure buffer size is large enough */ + if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) { + dev_info(dev_from_gk20a(g), "mempool size %lld too small\n", + css_cookie->size); + err = -ENOMEM; + goto fail; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) + buf = ioremap_cached(css_cookie->ipa, css_cookie->size); +#else + buf = ioremap_cache(css_cookie->ipa, css_cookie->size); +#endif + if (!buf) { + dev_info(dev_from_gk20a(g), "ioremap_cache failed\n"); + err = -EINVAL; + goto fail; + } + + data->hw_snapshot = buf; + data->hw_end = data->hw_snapshot + + css_cookie->size / sizeof(struct gk20a_cs_snapshot_fifo_entry); + data->hw_get = data->hw_snapshot; + memset(data->hw_snapshot, 0xff, css_cookie->size); + return 0; +fail: + if (!IS_ERR_OR_NULL(css_cookie)) + tegra_hv_mempool_unreserve(css_cookie); + return err; +} + +static void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr) +{ + struct gk20a_cs_snapshot *data = gr->cs_data; + + if (!data->hw_snapshot) + return; + + iounmap(data->hw_snapshot); + data->hw_snapshot = NULL; + + tegra_hv_mempool_unreserve(css_cookie); + + gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n"); +} + +static int vgpu_css_flush_snapshots(struct channel_gk20a *ch, + u32 *pending, bool *hw_overflow) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p; + struct gr_gk20a *gr = &g->gr; + struct gk20a_cs_snapshot *data = gr->cs_data; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p = &msg.params.cyclestats_snapshot; + p->handle = ch->virt_ctx; + p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH; + p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + + err = (err || msg.ret) ? -1 : 0; + + *pending = p->buf_info; + *hw_overflow = p->hw_overflow; + + return err; +} + +static int vgpu_css_attach(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p = + &msg.params.cyclestats_snapshot; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH; + p->perfmon_count = cs_client->perfmon_count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) + gk20a_err(dev_from_gk20a(g), "%s failed", __func__); + else + cs_client->perfmon_start = p->perfmon_start; + + return err; +} + +static int vgpu_css_detach(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + struct gk20a *g = ch->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_channel_cyclestats_snapshot_params *p = + &msg.params.cyclestats_snapshot; + int err; + + gk20a_dbg_fn(""); + + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT; + msg.handle = vgpu_get_handle(g); + p->handle = ch->virt_ctx; + p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH; + p->perfmon_start = cs_client->perfmon_start; + p->perfmon_count = cs_client->perfmon_count; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) + gk20a_err(dev_from_gk20a(g), "%s failed", __func__); + + return err; +} + +static int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch, + struct gk20a_cs_snapshot_client *cs_client) +{ + int ret; + + ret = vgpu_css_attach(ch, cs_client); + if (ret) + return ret; + + ret = vgpu_css_init_snapshot_buffer(&ch->g->gr); + return ret; +} + +void vgpu_init_css_ops(struct gpu_ops *gops) +{ + gops->css.enable_snapshot = vgpu_css_enable_snapshot_buffer; + gops->css.disable_snapshot = vgpu_css_release_snapshot_buffer; + gops->css.check_data_available = vgpu_css_flush_snapshots; + gops->css.detach_snapshot = vgpu_css_detach; +} +#endif /* CONFIG_GK20A_CYCLE_STATS */ diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 783b2f55..89223091 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -875,6 +875,10 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) gr->g = g; +#if defined(CONFIG_GK20A_CYCLE_STATS) + mutex_init(&g->gr.cs_lock); +#endif + err = vgpu_gr_init_gr_config(g, gr); if (err) goto clean_up; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 27d98eb8..bd332583 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -334,6 +334,9 @@ void vgpu_init_hal_common(struct gk20a *g) vgpu_init_dbg_session_ops(gops); vgpu_init_fecs_trace_ops(gops); vgpu_init_tsg_ops(gops); +#if defined(CONFIG_GK20A_CYCLE_STATS) + vgpu_init_css_ops(gops); +#endif gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics; gops->read_ptimer = vgpu_read_ptimer; } diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h index 6f1059b8..4a7a6b6c 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/vgpu.h @@ -85,6 +85,9 @@ void vgpu_init_ltc_ops(struct gpu_ops *gops); void vgpu_init_mm_ops(struct gpu_ops *gops); void vgpu_init_debug_ops(struct gpu_ops *gops); void vgpu_init_tsg_ops(struct gpu_ops *gops); +#if defined(CONFIG_GK20A_CYCLE_STATS) +void vgpu_init_css_ops(struct gpu_ops *gops); +#endif int vgpu_init_mm_support(struct gk20a *g); int vgpu_init_gr_support(struct gk20a *g); int vgpu_init_fifo_support(struct gk20a *g); @@ -161,6 +164,11 @@ static inline void vgpu_init_mm_ops(struct gpu_ops *gops) static inline void vgpu_init_debug_ops(struct gpu_ops *gops) { } +#if defined(CONFIG_GK20A_CYCLE_STATS) +static inline void vgpu_init_css_ops(struct gpu_ops *gops) +{ +} +#endif static inline int vgpu_init_mm_support(struct gk20a *g) { return -ENOSYS; -- cgit v1.2.2