1 files changed, 99 insertions, 134 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 62f60761..71614d6e 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -25,93 +25,13 @@
 #include "gk20a.h"
 #include "hw_perf_gk20a.h"
 #include "hw_mc_gk20a.h"
+#include "css_gr_gk20a.h"
-/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
-struct gk20a_cs_snapshot_fifo {
-        /* layout description of the buffer */
-        u32     start;
-        u32     end;
-        /* snafu bits */
-        u32     hw_overflow_events_occured;
-        u32     sw_overflow_events_occured;
-        /* the kernel copies new entries to put and
-         * increment the put++. if put == get then
-         * overflowEventsOccured++
-         */
-        u32     put;
-        u32     _reserved10;
-        u32     _reserved11;
-        u32     _reserved12;
-        /* the driver/client reads from get until
-         * put==get, get++ */
-        u32     get;
-        u32     _reserved20;
-        u32     _reserved21;
-        u32     _reserved22;
-        /* unused */
-        u32     _reserved30;
-        u32     _reserved31;
-        u32     _reserved32;
-        u32     _reserved33;
-};
-/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
-struct gk20a_cs_snapshot_fifo_entry {
-        /* global 48 timestamp */
-        u32     timestamp31_00:32;
-        u32     timestamp39_32:8;
-        /* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
-        u32     perfmon_id:8;
-        /* typically samples_counter is wired to #pmtrigger count */
-        u32     samples_counter:12;
-        /* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
-        u32     ds:1;
-        u32     sz:1;
-        u32     zero0:1;
-        u32     zero1:1;
-        /* counter results */
-        u32     event_cnt:32;
-        u32     trigger0_cnt:32;
-        u32     trigger1_cnt:32;
-        u32     sample_cnt:32;
-        /* Local PmTrigger results for Maxwell+ or padding otherwise */
-        u16     local_trigger_b_count:16;
-        u16     book_mark_b:16;
-        u16     local_trigger_a_count:16;
-        u16     book_mark_a:16;
-};
-/* cycle stats snapshot client data (e.g. associated with channel) */
-struct gk20a_cs_snapshot_client {
-        struct list_head        list;
-        u32                     dmabuf_fd;
-        struct dma_buf          *dma_handler;
-        struct gk20a_cs_snapshot_fifo   *snapshot;
-        u32                     snapshot_size;
-        u32                     perfmon_start;
-        u32                     perfmon_count;
-};
 /* check client for pointed perfmon ownership */
 #define CONTAINS_PERFMON(cl, pm)                                \
                ((cl)->perfmon_start <= (pm) &&                 \
                ((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
-/* the minimal size of HW buffer - should be enough to avoid HW overflows */
-#define CSS_MIN_HW_SNAPSHOT_SIZE        (8 * 1024 * 1024)
 /* the minimal size of client buffer */
 #define CSS_MIN_CLIENT_SNAPSHOT_SIZE                            \
                (sizeof(struct gk20a_cs_snapshot_fifo) +        \
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client {
 /* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
 #define CSS_MAX_PERFMON_IDS     256
-/* local definitions to avoid hardcodes sizes and shifts */
-#define PM_BITMAP_SIZE  DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
-/* cycle stats snapshot control structure for one HW entry and many clients */
-struct gk20a_cs_snapshot {
-        unsigned long perfmon_ids[PM_BITMAP_SIZE];
-        struct list_head        clients;
-        struct mem_desc         hw_memdesc;
-        /* pointer to allocated cpu_va memory where GPU place data */
-        struct gk20a_cs_snapshot_fifo_entry     *hw_snapshot;
-        struct gk20a_cs_snapshot_fifo_entry     *hw_end;
-        struct gk20a_cs_snapshot_fifo_entry     *hw_get;
-};
 /* reports whether the hw queue overflowed */
 static inline bool css_hw_get_overflow_status(struct gk20a *g)
 {
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
        return 0;
 }
-static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size)
+static int css_hw_enable_snapshot(struct channel_gk20a *ch,
+                                struct gk20a_cs_snapshot_client *cs_client)
 {
-        struct gk20a *g = gr->g;
+        struct gk20a *g = ch->g;
+        struct gr_gk20a *gr = &g->gr;
        struct gk20a_cs_snapshot *data = gr->cs_data;
+        u32 snapshot_size = cs_client->snapshot_size;
        int ret;
        u32 virt_addr_lo;
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
 static void css_gr_free_shared_data(struct gr_gk20a *gr)
 {
+        struct gk20a *g = gr->g;
        if (gr->cs_data) {
                /* the clients list is expected to be empty */
-                css_hw_disable_snapshot(gr);
+                g->ops.css.disable_snapshot(gr);
                /* release the objects */
                kfree(gr->cs_data);
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon)
        return NULL;
 }
-static int css_gr_flush_snapshots(struct gr_gk20a *gr)
+static int css_gr_flush_snapshots(struct channel_gk20a *ch)
 {
-        struct gk20a *g = gr->g;
+        struct gk20a *g = ch->g;
+        struct gr_gk20a *gr = &g->gr;
        struct gk20a_cs_snapshot *css = gr->cs_data;
        struct gk20a_cs_snapshot_client *cur;
-        u32 pending;
+        u32 pending, completed;
+        bool hw_overflow;
+        int err;
        /* variables for iterating over HW entries */
        u32 sid;
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
        struct gk20a_cs_snapshot_fifo *dst;
        struct gk20a_cs_snapshot_fifo_entry *dst_get;
        struct gk20a_cs_snapshot_fifo_entry *dst_put;
+        struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
        struct gk20a_cs_snapshot_fifo_entry *dst_head;
        struct gk20a_cs_snapshot_fifo_entry *dst_tail;
        if (!css)
                return -EINVAL;
-        if (!css->hw_snapshot)
-                return -EINVAL;
        if (list_empty(&css->clients))
                return -EBADF;
        /* check data available */
-        pending = css_hw_get_pending_snapshots(g);
+        err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
+        if (err)
+                return err;
        if (!pending)
                return 0;
-        if (css_hw_get_overflow_status(g)) {
+        if (hw_overflow) {
                struct list_head *pos;
                list_for_each(pos, &css->clients) {
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
                }
                gk20a_warn(dev_from_gk20a(g),
-                           "cyclestats: hardware overflow detected\n");
+                        "cyclestats: hardware overflow detected\n");
        }
-        /* proceed all items in HW buffer */
+        /* process all items in HW buffer */
        sid = 0;
+        completed = 0;
        cur = NULL;
        dst = NULL;
        dst_put = NULL;
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
                                dst_get = CSS_FIFO_ENTRY(dst, dst->get);
                                dst_put = CSS_FIFO_ENTRY(dst, dst->put);
                                dst_head = CSS_FIFO_ENTRY(dst, dst->start);
-                                dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1;
+                                dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
+                                dst_nxt = dst_put + 1;
+                                if (dst_nxt == dst_tail)
+                                        dst_nxt = dst_head;
                        } else {
                                /* client not found - skipping this entry */
                                gk20a_warn(dev_from_gk20a(g),
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
                }
                /* check for software overflows */
-                if (dst_put + 1 == dst_get ||
+                if (dst_nxt == dst_get) {
-                        (dst_put == dst_tail && dst_get == dst_head)) {
                        /* no data copy, no pointer updates */
                        dst->sw_overflow_events_occured++;
                        gk20a_warn(dev_from_gk20a(g),
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
                                                        src->perfmon_id);
                } else {
                        *dst_put = *src;
-                        if (dst_put == dst_tail)
+                        completed++;
-                                dst_put = dst_head;
-                        else
+                        dst_put = dst_nxt++;
-                                dst_put++;
+                        if (dst_nxt == dst_tail)
+                                dst_nxt = dst_head;
                }
 next_hw_fifo_entry:
@@ -465,14 +386,17 @@ next_hw_fifo_entry:
                                (css->hw_end - css->hw_get) * sizeof(*src));
        }
        gr->cs_data->hw_get = src;
-        css_hw_set_handled_snapshots(g, sid);
-        if (pending != sid) {
+        if (g->ops.css.set_handled_snapshots)
+                g->ops.css.set_handled_snapshots(g, sid);
+        if (completed != sid) {
                /* not all entries proceed correctly. some of problems */
                /* reported as overflows, some as orphaned perfmons,   */
                /* but it will be better notify with summary about it  */
                gk20a_warn(dev_from_gk20a(g),
-                           "cyclestats: done %u from %u entries\n",
+                           "cyclestats: completed %u from %u entries\n",
-                                                        sid, pending);
+                                                        completed, pending);
        }
        return 0;
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
 }
-static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
+static int css_gr_free_client_data(struct gk20a *g,
+                                struct gk20a_cs_snapshot *data,
                                struct gk20a_cs_snapshot_client *client)
 {
        int ret = 0;
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
        if (client->list.next && client->list.prev)
                list_del(&client->list);
-        if (client->perfmon_start && client->perfmon_count) {
+        if (client->perfmon_start && client->perfmon_count
-                if (client->perfmon_count != css_gr_release_perfmon_ids(data,
+                                        && g->ops.css.release_perfmon_ids) {
+                if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
                                client->perfmon_start, client->perfmon_count))
                        ret = -EINVAL;
        }
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
        return ret;
 }
-static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
+static int css_gr_create_client_data(struct gk20a *g,
+                        struct gk20a_cs_snapshot *data,
                        u32 dmabuf_fd, u32 perfmon_count,
                        struct gk20a_cs_snapshot_client **client)
 {
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
        cur->snapshot->put = cur->snapshot->start;
        cur->perfmon_count = perfmon_count;
-        if (cur->perfmon_count) {
-                cur->perfmon_start = css_gr_allocate_perfmon_ids(data,
+        /* In virtual case, perfmon ID allocation is handled by the server
+         * at the time of the attach (allocate_perfmon_ids is NULL in this case)
+         */
+        if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
+                cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
                                                        cur->perfmon_count);
                if (!cur->perfmon_start) {
                        ret = -ENOENT;
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
 failed:
        *client = NULL;
        if (cur)
-                css_gr_free_client_data(data, cur);
+                css_gr_free_client_data(g, data, cur);
        return ret;
 }
-int gr_gk20a_css_attach(struct gk20a *g,
+int gr_gk20a_css_attach(struct channel_gk20a *ch,
                        u32 dmabuf_fd,
                        u32 perfmon_count,
                        u32 *perfmon_start,
                        struct gk20a_cs_snapshot_client **cs_client)
 {
        int ret = 0;
+        struct gk20a *g = ch->g;
        struct gr_gk20a *gr;
        /* we must have a placeholder to store pointer to client structure */
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g,
        if (ret)
                goto failed;
-        ret = css_gr_create_client_data(gr->cs_data,
+        ret = css_gr_create_client_data(g, gr->cs_data,
                                     dmabuf_fd,
                                     perfmon_count,
                                     cs_client);
        if (ret)
                goto failed;
-        ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size);
+        ret = g->ops.css.enable_snapshot(ch, *cs_client);
        if (ret)
                goto failed;
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g,
 failed:
        if (gr->cs_data) {
                if (*cs_client) {
-                        css_gr_free_client_data(gr->cs_data, *cs_client);
+                        css_gr_free_client_data(g, gr->cs_data, *cs_client);
                        *cs_client = NULL;
                }
@@ -666,10 +598,11 @@ failed:
        return ret;
 }
-int gr_gk20a_css_detach(struct gk20a *g,
+int gr_gk20a_css_detach(struct channel_gk20a *ch,
                                struct gk20a_cs_snapshot_client *cs_client)
 {
        int ret = 0;
+        struct gk20a *g = ch->g;
        struct gr_gk20a *gr;
        if (!cs_client)
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g,
        if (gr->cs_data) {
                struct gk20a_cs_snapshot *data = gr->cs_data;
-                ret = css_gr_free_client_data(data, cs_client);
+                if (g->ops.css.detach_snapshot)
+                        g->ops.css.detach_snapshot(ch, cs_client);
+                ret = css_gr_free_client_data(g, data, cs_client);
                if (list_empty(&data->clients))
                        css_gr_free_shared_data(gr);
        } else {
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g,
        return ret;
 }
-int gr_gk20a_css_flush(struct gk20a *g,
+int gr_gk20a_css_flush(struct channel_gk20a *ch,
                                struct gk20a_cs_snapshot_client *cs_client)
 {
        int ret = 0;
+        struct gk20a *g = ch->g;
        struct gr_gk20a *gr;
        if (!cs_client)
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g,
        gr = &g->gr;
        mutex_lock(&gr->cs_lock);
-        ret = css_gr_flush_snapshots(gr);
+        ret = css_gr_flush_snapshots(ch);
        mutex_unlock(&gr->cs_lock);
        return ret;
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
        mutex_unlock(&gr->cs_lock);
        mutex_destroy(&gr->cs_lock);
 }
+static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
+                                        bool *hw_overflow)
+{
+        struct gk20a *g = ch->g;
+        struct gr_gk20a *gr = &g->gr;
+        struct gk20a_cs_snapshot *css = gr->cs_data;
+        if (!css->hw_snapshot)
+                return -EINVAL;
+        *pending = css_hw_get_pending_snapshots(g);
+        if (!*pending)
+                return 0;
+        *hw_overflow = css_hw_get_overflow_status(g);
+        return 0;
+}
+void gk20a_init_css_ops(struct gpu_ops *gops)
+{
+        gops->css.enable_snapshot = css_hw_enable_snapshot;
+        gops->css.disable_snapshot = css_hw_disable_snapshot;
+        gops->css.check_data_available = css_hw_check_data_available;
+        gops->css.set_handled_snapshots = css_hw_set_handled_snapshots;
+        gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids;
+        gops->css.release_perfmon_ids = css_gr_release_perfmon_ids;
+}

diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 62f60761..71614d6e 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -25,93 +25,13 @@
25	#include "gk20a.h"	25	#include "gk20a.h"
26	#include "hw_perf_gk20a.h"	26	#include "hw_perf_gk20a.h"
27	#include "hw_mc_gk20a.h"	27	#include "hw_mc_gk20a.h"
28		28	#include "css_gr_gk20a.h"
29
30
31	/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
32	struct gk20a_cs_snapshot_fifo {
33	/* layout description of the buffer */
34	u32 start;
35	u32 end;
36
37	/* snafu bits */
38	u32 hw_overflow_events_occured;
39	u32 sw_overflow_events_occured;
40
41	/* the kernel copies new entries to put and
42	* increment the put++. if put == get then
43	* overflowEventsOccured++
44	*/
45	u32 put;
46	u32 _reserved10;
47	u32 _reserved11;
48	u32 _reserved12;
49
50	/* the driver/client reads from get until
51	* put==get, get++ */
52	u32 get;
53	u32 _reserved20;
54	u32 _reserved21;
55	u32 _reserved22;
56
57	/* unused */
58	u32 _reserved30;
59	u32 _reserved31;
60	u32 _reserved32;
61	u32 _reserved33;
62	};
63
64	/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
65	struct gk20a_cs_snapshot_fifo_entry {
66	/* global 48 timestamp */
67	u32 timestamp31_00:32;
68	u32 timestamp39_32:8;
69
70	/* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
71	u32 perfmon_id:8;
72
73	/* typically samples_counter is wired to #pmtrigger count */
74	u32 samples_counter:12;
75
76	/* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
77	u32 ds:1;
78	u32 sz:1;
79	u32 zero0:1;
80	u32 zero1:1;
81
82	/* counter results */
83	u32 event_cnt:32;
84	u32 trigger0_cnt:32;
85	u32 trigger1_cnt:32;
86	u32 sample_cnt:32;
87
88	/* Local PmTrigger results for Maxwell+ or padding otherwise */
89	u16 local_trigger_b_count:16;
90	u16 book_mark_b:16;
91	u16 local_trigger_a_count:16;
92	u16 book_mark_a:16;
93	};
94
95
96	/* cycle stats snapshot client data (e.g. associated with channel) */
97	struct gk20a_cs_snapshot_client {
98	struct list_head list;
99	u32 dmabuf_fd;
100	struct dma_buf *dma_handler;
101	struct gk20a_cs_snapshot_fifo *snapshot;
102	u32 snapshot_size;
103	u32 perfmon_start;
104	u32 perfmon_count;
105	};
106		29
107	/* check client for pointed perfmon ownership */	30	/* check client for pointed perfmon ownership */
108	#define CONTAINS_PERFMON(cl, pm) \	31	#define CONTAINS_PERFMON(cl, pm) \
109	((cl)->perfmon_start <= (pm) && \	32	((cl)->perfmon_start <= (pm) && \
110	((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)	33	((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
111		34
112	/* the minimal size of HW buffer - should be enough to avoid HW overflows */
113	#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
114
115	/* the minimal size of client buffer */	35	/* the minimal size of client buffer */
116	#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \	36	#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
117	(sizeof(struct gk20a_cs_snapshot_fifo) + \	37	(sizeof(struct gk20a_cs_snapshot_fifo) + \
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client {
131	/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */	51	/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
132	#define CSS_MAX_PERFMON_IDS 256	52	#define CSS_MAX_PERFMON_IDS 256
133		53
134	/* local definitions to avoid hardcodes sizes and shifts */
135	#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
136
137	/* cycle stats snapshot control structure for one HW entry and many clients */
138	struct gk20a_cs_snapshot {
139	unsigned long perfmon_ids[PM_BITMAP_SIZE];
140	struct list_head clients;
141	struct mem_desc hw_memdesc;
142	/* pointer to allocated cpu_va memory where GPU place data */
143	struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
144	struct gk20a_cs_snapshot_fifo_entry *hw_end;
145	struct gk20a_cs_snapshot_fifo_entry *hw_get;
146	};
147
148	/* reports whether the hw queue overflowed */	54	/* reports whether the hw queue overflowed */
149	static inline bool css_hw_get_overflow_status(struct gk20a *g)	55	static inline bool css_hw_get_overflow_status(struct gk20a *g)
150	{	56	{
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
215	return 0;	121	return 0;
216	}	122	}
217		123
218	static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size)	124	static int css_hw_enable_snapshot(struct channel_gk20a *ch,
		125	struct gk20a_cs_snapshot_client *cs_client)
219	{	126	{
220	struct gk20a *g = gr->g;	127	struct gk20a *g = ch->g;
		128	struct gr_gk20a *gr = &g->gr;
221	struct gk20a_cs_snapshot *data = gr->cs_data;	129	struct gk20a_cs_snapshot *data = gr->cs_data;
		130	u32 snapshot_size = cs_client->snapshot_size;
222	int ret;	131	int ret;
223		132
224	u32 virt_addr_lo;	133	u32 virt_addr_lo;
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
317		226
318	static void css_gr_free_shared_data(struct gr_gk20a *gr)	227	static void css_gr_free_shared_data(struct gr_gk20a *gr)
319	{	228	{
		229	struct gk20a *g = gr->g;
		230
320	if (gr->cs_data) {	231	if (gr->cs_data) {
321	/* the clients list is expected to be empty */	232	/* the clients list is expected to be empty */
322	css_hw_disable_snapshot(gr);	233	g->ops.css.disable_snapshot(gr);
323		234
324	/* release the objects */	235	/* release the objects */
325	kfree(gr->cs_data);	236	kfree(gr->cs_data);
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon)
344	return NULL;	255	return NULL;
345	}	256	}
346		257
347	static int css_gr_flush_snapshots(struct gr_gk20a *gr)	258	static int css_gr_flush_snapshots(struct channel_gk20a *ch)
348	{	259	{
349	struct gk20a *g = gr->g;	260	struct gk20a *g = ch->g;
		261	struct gr_gk20a *gr = &g->gr;
350	struct gk20a_cs_snapshot *css = gr->cs_data;	262	struct gk20a_cs_snapshot *css = gr->cs_data;
351	struct gk20a_cs_snapshot_client *cur;	263	struct gk20a_cs_snapshot_client *cur;
352	u32 pending;	264	u32 pending, completed;
		265	bool hw_overflow;
		266	int err;
353		267
354	/* variables for iterating over HW entries */	268	/* variables for iterating over HW entries */
355	u32 sid;	269	u32 sid;
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
360	struct gk20a_cs_snapshot_fifo *dst;	274	struct gk20a_cs_snapshot_fifo *dst;
361	struct gk20a_cs_snapshot_fifo_entry *dst_get;	275	struct gk20a_cs_snapshot_fifo_entry *dst_get;
362	struct gk20a_cs_snapshot_fifo_entry *dst_put;	276	struct gk20a_cs_snapshot_fifo_entry *dst_put;
		277	struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
363	struct gk20a_cs_snapshot_fifo_entry *dst_head;	278	struct gk20a_cs_snapshot_fifo_entry *dst_head;
364	struct gk20a_cs_snapshot_fifo_entry *dst_tail;	279	struct gk20a_cs_snapshot_fifo_entry *dst_tail;
365		280
366	if (!css)	281	if (!css)
367	return -EINVAL;	282	return -EINVAL;
368		283
369	if (!css->hw_snapshot)
370	return -EINVAL;
371
372	if (list_empty(&css->clients))	284	if (list_empty(&css->clients))
373	return -EBADF;	285	return -EBADF;
374		286
375	/* check data available */	287	/* check data available */
376	pending = css_hw_get_pending_snapshots(g);	288	err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
		289	if (err)
		290	return err;
		291
377	if (!pending)	292	if (!pending)
378	return 0;	293	return 0;
379		294
380	if (css_hw_get_overflow_status(g)) {	295	if (hw_overflow) {
381	struct list_head *pos;	296	struct list_head *pos;
382		297
383	list_for_each(pos, &css->clients) {	298	list_for_each(pos, &css->clients) {
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
387	}	302	}
388		303
389	gk20a_warn(dev_from_gk20a(g),	304	gk20a_warn(dev_from_gk20a(g),
390	"cyclestats: hardware overflow detected\n");	305	"cyclestats: hardware overflow detected\n");
391	}	306	}
392		307
393	/* proceed all items in HW buffer */	308	/* process all items in HW buffer */
394	sid = 0;	309	sid = 0;
		310	completed = 0;
395	cur = NULL;	311	cur = NULL;
396	dst = NULL;	312	dst = NULL;
397	dst_put = NULL;	313	dst_put = NULL;
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
419	dst_get = CSS_FIFO_ENTRY(dst, dst->get);	335	dst_get = CSS_FIFO_ENTRY(dst, dst->get);
420	dst_put = CSS_FIFO_ENTRY(dst, dst->put);	336	dst_put = CSS_FIFO_ENTRY(dst, dst->put);
421	dst_head = CSS_FIFO_ENTRY(dst, dst->start);	337	dst_head = CSS_FIFO_ENTRY(dst, dst->start);
422	dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1;	338	dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
		339
		340	dst_nxt = dst_put + 1;
		341	if (dst_nxt == dst_tail)
		342	dst_nxt = dst_head;
423	} else {	343	} else {
424	/* client not found - skipping this entry */	344	/* client not found - skipping this entry */
425	gk20a_warn(dev_from_gk20a(g),	345	gk20a_warn(dev_from_gk20a(g),
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
430	}	350	}
431		351
432	/* check for software overflows */	352	/* check for software overflows */
433	if (dst_put + 1 == dst_get \|\|	353	if (dst_nxt == dst_get) {
434	(dst_put == dst_tail && dst_get == dst_head)) {
435	/* no data copy, no pointer updates */	354	/* no data copy, no pointer updates */
436	dst->sw_overflow_events_occured++;	355	dst->sw_overflow_events_occured++;
437	gk20a_warn(dev_from_gk20a(g),	356	gk20a_warn(dev_from_gk20a(g),
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
439	src->perfmon_id);	358	src->perfmon_id);
440	} else {	359	} else {
441	dst_put = src;	360	dst_put = src;
442	if (dst_put == dst_tail)	361	completed++;
443	dst_put = dst_head;	362
444	else	363	dst_put = dst_nxt++;
445	dst_put++;	364
		365	if (dst_nxt == dst_tail)
		366	dst_nxt = dst_head;
446	}	367	}
447		368
448	next_hw_fifo_entry:	369	next_hw_fifo_entry:
@@ -465,14 +386,17 @@ next_hw_fifo_entry:
465	(css->hw_end - css->hw_get) * sizeof(*src));	386	(css->hw_end - css->hw_get) * sizeof(*src));
466	}	387	}
467	gr->cs_data->hw_get = src;	388	gr->cs_data->hw_get = src;
468	css_hw_set_handled_snapshots(g, sid);	389
469	if (pending != sid) {	390	if (g->ops.css.set_handled_snapshots)
		391	g->ops.css.set_handled_snapshots(g, sid);
		392
		393	if (completed != sid) {
470	/* not all entries proceed correctly. some of problems */	394	/* not all entries proceed correctly. some of problems */
471	/* reported as overflows, some as orphaned perfmons, */	395	/* reported as overflows, some as orphaned perfmons, */
472	/* but it will be better notify with summary about it */	396	/* but it will be better notify with summary about it */
473	gk20a_warn(dev_from_gk20a(g),	397	gk20a_warn(dev_from_gk20a(g),
474	"cyclestats: done %u from %u entries\n",	398	"cyclestats: completed %u from %u entries\n",
475	sid, pending);	399	completed, pending);
476	}	400	}
477		401
478	return 0;	402	return 0;
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
511	}	435	}
512		436
513		437
514	static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,	438	static int css_gr_free_client_data(struct gk20a *g,
		439	struct gk20a_cs_snapshot *data,
515	struct gk20a_cs_snapshot_client *client)	440	struct gk20a_cs_snapshot_client *client)
516	{	441	{
517	int ret = 0;	442	int ret = 0;
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
519	if (client->list.next && client->list.prev)	444	if (client->list.next && client->list.prev)
520	list_del(&client->list);	445	list_del(&client->list);
521		446
522	if (client->perfmon_start && client->perfmon_count) {	447	if (client->perfmon_start && client->perfmon_count
523	if (client->perfmon_count != css_gr_release_perfmon_ids(data,	448	&& g->ops.css.release_perfmon_ids) {
		449	if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
524	client->perfmon_start, client->perfmon_count))	450	client->perfmon_start, client->perfmon_count))
525	ret = -EINVAL;	451	ret = -EINVAL;
526	}	452	}
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
536	return ret;	462	return ret;
537	}	463	}
538		464
539	static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,	465	static int css_gr_create_client_data(struct gk20a *g,
		466	struct gk20a_cs_snapshot *data,
540	u32 dmabuf_fd, u32 perfmon_count,	467	u32 dmabuf_fd, u32 perfmon_count,
541	struct gk20a_cs_snapshot_client **client)	468	struct gk20a_cs_snapshot_client **client)
542	{	469	{
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
581	cur->snapshot->put = cur->snapshot->start;	508	cur->snapshot->put = cur->snapshot->start;
582		509
583	cur->perfmon_count = perfmon_count;	510	cur->perfmon_count = perfmon_count;
584	if (cur->perfmon_count) {	511
585	cur->perfmon_start = css_gr_allocate_perfmon_ids(data,	512	/* In virtual case, perfmon ID allocation is handled by the server
		513	* at the time of the attach (allocate_perfmon_ids is NULL in this case)
		514	*/
		515	if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
		516	cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
586	cur->perfmon_count);	517	cur->perfmon_count);
587	if (!cur->perfmon_start) {	518	if (!cur->perfmon_start) {
588	ret = -ENOENT;	519	ret = -ENOENT;
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
598	failed:	529	failed:
599	*client = NULL;	530	*client = NULL;
600	if (cur)	531	if (cur)
601	css_gr_free_client_data(data, cur);	532	css_gr_free_client_data(g, data, cur);
602		533
603	return ret;	534	return ret;
604	}	535	}
605		536
606		537
607	int gr_gk20a_css_attach(struct gk20a *g,	538	int gr_gk20a_css_attach(struct channel_gk20a *ch,
608	u32 dmabuf_fd,	539	u32 dmabuf_fd,
609	u32 perfmon_count,	540	u32 perfmon_count,
610	u32 *perfmon_start,	541	u32 *perfmon_start,
611	struct gk20a_cs_snapshot_client **cs_client)	542	struct gk20a_cs_snapshot_client **cs_client)
612	{	543	{
613	int ret = 0;	544	int ret = 0;
		545	struct gk20a *g = ch->g;
614	struct gr_gk20a *gr;	546	struct gr_gk20a *gr;
615		547
616	/* we must have a placeholder to store pointer to client structure */	548	/* we must have a placeholder to store pointer to client structure */
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g,
630	if (ret)	562	if (ret)
631	goto failed;	563	goto failed;
632		564
633	ret = css_gr_create_client_data(gr->cs_data,	565	ret = css_gr_create_client_data(g, gr->cs_data,
634	dmabuf_fd,	566	dmabuf_fd,
635	perfmon_count,	567	perfmon_count,
636	cs_client);	568	cs_client);
637	if (ret)	569	if (ret)
638	goto failed;	570	goto failed;
639		571
640	ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size);	572	ret = g->ops.css.enable_snapshot(ch, *cs_client);
641	if (ret)	573	if (ret)
642	goto failed;	574	goto failed;
643		575
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g,
651	failed:	583	failed:
652	if (gr->cs_data) {	584	if (gr->cs_data) {
653	if (*cs_client) {	585	if (*cs_client) {
654	css_gr_free_client_data(gr->cs_data, *cs_client);	586	css_gr_free_client_data(g, gr->cs_data, *cs_client);
655	*cs_client = NULL;	587	*cs_client = NULL;
656	}	588	}
657		589
@@ -666,10 +598,11 @@ failed:
666	return ret;	598	return ret;
667	}	599	}
668		600
669	int gr_gk20a_css_detach(struct gk20a *g,	601	int gr_gk20a_css_detach(struct channel_gk20a *ch,
670	struct gk20a_cs_snapshot_client *cs_client)	602	struct gk20a_cs_snapshot_client *cs_client)
671	{	603	{
672	int ret = 0;	604	int ret = 0;
		605	struct gk20a *g = ch->g;
673	struct gr_gk20a *gr;	606	struct gr_gk20a *gr;
674		607
675	if (!cs_client)	608	if (!cs_client)
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g,
680	if (gr->cs_data) {	613	if (gr->cs_data) {
681	struct gk20a_cs_snapshot *data = gr->cs_data;	614	struct gk20a_cs_snapshot *data = gr->cs_data;
682		615
683	ret = css_gr_free_client_data(data, cs_client);	616	if (g->ops.css.detach_snapshot)
		617	g->ops.css.detach_snapshot(ch, cs_client);
		618
		619	ret = css_gr_free_client_data(g, data, cs_client);
684	if (list_empty(&data->clients))	620	if (list_empty(&data->clients))
685	css_gr_free_shared_data(gr);	621	css_gr_free_shared_data(gr);
686	} else {	622	} else {
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g,
691	return ret;	627	return ret;
692	}	628	}
693		629
694	int gr_gk20a_css_flush(struct gk20a *g,	630	int gr_gk20a_css_flush(struct channel_gk20a *ch,
695	struct gk20a_cs_snapshot_client *cs_client)	631	struct gk20a_cs_snapshot_client *cs_client)
696	{	632	{
697	int ret = 0;	633	int ret = 0;
		634	struct gk20a *g = ch->g;
698	struct gr_gk20a *gr;	635	struct gr_gk20a *gr;
699		636
700	if (!cs_client)	637	if (!cs_client)
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g,
702		639
703	gr = &g->gr;	640	gr = &g->gr;
704	mutex_lock(&gr->cs_lock);	641	mutex_lock(&gr->cs_lock);
705	ret = css_gr_flush_snapshots(gr);	642	ret = css_gr_flush_snapshots(ch);
706	mutex_unlock(&gr->cs_lock);	643	mutex_unlock(&gr->cs_lock);
707		644
708	return ret;	645	return ret;
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
718	mutex_unlock(&gr->cs_lock);	655	mutex_unlock(&gr->cs_lock);
719	mutex_destroy(&gr->cs_lock);	656	mutex_destroy(&gr->cs_lock);
720	}	657	}
		658
		659	static int css_hw_check_data_available(struct channel_gk20a ch, u32 pending,
		660	bool *hw_overflow)
		661	{
		662	struct gk20a *g = ch->g;
		663	struct gr_gk20a *gr = &g->gr;
		664	struct gk20a_cs_snapshot *css = gr->cs_data;
		665
		666	if (!css->hw_snapshot)
		667	return -EINVAL;
		668
		669	*pending = css_hw_get_pending_snapshots(g);
		670	if (!*pending)
		671	return 0;
		672
		673	*hw_overflow = css_hw_get_overflow_status(g);
		674	return 0;
		675	}
		676
		677	void gk20a_init_css_ops(struct gpu_ops *gops)
		678	{
		679	gops->css.enable_snapshot = css_hw_enable_snapshot;
		680	gops->css.disable_snapshot = css_hw_disable_snapshot;
		681	gops->css.check_data_available = css_hw_check_data_available;
		682	gops->css.set_handled_snapshots = css_hw_set_handled_snapshots;
		683	gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids;
		684	gops->css.release_perfmon_ids = css_gr_release_perfmon_ids;
		685	}