1 files changed, 73 insertions, 35 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 703553687b20..50d9e67745af 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -29,12 +29,22 @@
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
+void
+nouveau_dma_pre_init(struct nouveau_channel *chan)
+{
+        chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+        chan->dma.put  = 0;
+        chan->dma.cur  = chan->dma.put;
+        chan->dma.free = chan->dma.max - chan->dma.cur;
+}
 int
 nouveau_dma_init(struct nouveau_channel *chan)
 {
        struct drm_device *dev = chan->dev;
        struct drm_nouveau_private *dev_priv = dev->dev_private;
        struct nouveau_gpuobj *m2mf = NULL;
+        struct nouveau_gpuobj *nvsw = NULL;
        int ret, i;
        /* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
@@ -47,6 +57,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
        if (ret)
                return ret;
+        /* Create an NV_SW object for various sync purposes */
+        ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
+        if (ret)
+                return ret;
+        ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
+        if (ret)
+                return ret;
        /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
        ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);
        if (ret)
@@ -64,12 +83,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
                        return ret;
        }
-        /* Initialise DMA vars */
-        chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
-        chan->dma.put  = 0;
-        chan->dma.cur  = chan->dma.put;
-        chan->dma.free = chan->dma.max - chan->dma.cur;
        /* Insert NOPS for NOUVEAU_DMA_SKIPS */
        ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
        if (ret)
@@ -87,6 +100,13 @@ nouveau_dma_init(struct nouveau_channel *chan)
        BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
        OUT_RING(chan, NvNotify0);
+        /* Initialise NV_SW */
+        ret = RING_SPACE(chan, 2);
+        if (ret)
+                return ret;
+        BEGIN_RING(chan, NvSubSw, 0, 1);
+        OUT_RING(chan, NvSw);
        /* Sit back and pray the channel works.. */
        FIRE_RING(chan);
@@ -106,47 +126,52 @@ OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
        chan->dma.cur += nr_dwords;
 }
-static inline bool
+/* Fetch and adjust GPU GET pointer
-READ_GET(struct nouveau_channel *chan, uint32_t *get)
+ *
+ * Returns:
+ *  value >= 0, the adjusted GET pointer
+ *  -EINVAL if GET pointer currently outside main push buffer
+ *  -EBUSY if timeout exceeded
+ */
+static inline int
+READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 {
        uint32_t val;
        val = nvchan_rd32(chan, chan->user_get);
-        if (val < chan->pushbuf_base ||
-            val >= chan->pushbuf_base + chan->pushbuf_bo->bo.mem.size) {
+        /* reset counter as long as GET is still advancing, this is
-                /* meaningless to dma_wait() except to know whether the
+         * to avoid misdetecting a GPU lockup if the GPU happens to
-                 * GPU has stalled or not
+         * just be processing an operation that takes a long time
-                 */
+         */
-                *get = val;
+        if (val != *prev_get) {
-                return false;
+                *prev_get = val;
+                *timeout = 0;
+        }
+        if ((++*timeout & 0xff) == 0) {
+                DRM_UDELAY(1);
+                if (*timeout > 100000)
+                        return -EBUSY;
        }
-        *get = (val - chan->pushbuf_base) >> 2;
+        if (val < chan->pushbuf_base ||
-        return true;
+            val > chan->pushbuf_base + (chan->dma.max << 2))
+                return -EINVAL;
+        return (val - chan->pushbuf_base) >> 2;
 }
 int
 nouveau_dma_wait(struct nouveau_channel *chan, int size)
 {
-        uint32_t get, prev_get = 0, cnt = 0;
+        uint32_t prev_get = 0, cnt = 0;
-        bool get_valid;
+        int get;
        while (chan->dma.free < size) {
-                /* reset counter as long as GET is still advancing, this is
+                get = READ_GET(chan, &prev_get, &cnt);
-                 * to avoid misdetecting a GPU lockup if the GPU happens to
+                if (unlikely(get == -EBUSY))
-                 * just be processing an operation that takes a long time
+                        return -EBUSY;
-                 */
-                get_valid = READ_GET(chan, &get);
-                if (get != prev_get) {
-                        prev_get = get;
-                        cnt = 0;
-                }
-                if ((++cnt & 0xff) == 0) {
-                        DRM_UDELAY(1);
-                        if (cnt > 100000)
-                                return -EBUSY;
-                }
                /* loop until we have a usable GET pointer.  the value
                 * we read from the GPU may be outside the main ring if
@@ -157,7 +182,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
                 * from the SKIPS area, so the code below doesn't have to deal
                 * with some fun corner cases.
                 */
-                if (!get_valid || get < NOUVEAU_DMA_SKIPS)
+                if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS)
                        continue;
                if (get <= chan->dma.cur) {
@@ -183,6 +208,19 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
                         * after processing the currently pending commands.
                         */
                        OUT_RING(chan, chan->pushbuf_base | 0x20000000);
+                        /* wait for GET to depart from the skips area.
+                         * prevents writing GET==PUT and causing a race
+                         * condition that causes us to think the GPU is
+                         * idle when it's not.
+                         */
+                        do {
+                                get = READ_GET(chan, &prev_get, &cnt);
+                                if (unlikely(get == -EBUSY))
+                                        return -EBUSY;
+                                if (unlikely(get == -EINVAL))
+                                        continue;
+                        } while (get <= NOUVEAU_DMA_SKIPS);
                        WRITE_PUT(NOUVEAU_DMA_SKIPS);
                        /* we're now submitting commands at the start of

diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 703553687b20..50d9e67745af 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -29,12 +29,22 @@
29	#include "nouveau_drv.h"	29	#include "nouveau_drv.h"
30	#include "nouveau_dma.h"	30	#include "nouveau_dma.h"
31		31
		32	void
		33	nouveau_dma_pre_init(struct nouveau_channel *chan)
		34	{
		35	chan->dma.max = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
		36	chan->dma.put = 0;
		37	chan->dma.cur = chan->dma.put;
		38	chan->dma.free = chan->dma.max - chan->dma.cur;
		39	}
		40
32	int	41	int
33	nouveau_dma_init(struct nouveau_channel *chan)	42	nouveau_dma_init(struct nouveau_channel *chan)
34	{	43	{
35	struct drm_device *dev = chan->dev;	44	struct drm_device *dev = chan->dev;
36	struct drm_nouveau_private *dev_priv = dev->dev_private;	45	struct drm_nouveau_private *dev_priv = dev->dev_private;
37	struct nouveau_gpuobj *m2mf = NULL;	46	struct nouveau_gpuobj *m2mf = NULL;
		47	struct nouveau_gpuobj *nvsw = NULL;
38	int ret, i;	48	int ret, i;
39		49
40	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */	50	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
@@ -47,6 +57,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
47	if (ret)	57	if (ret)
48	return ret;	58	return ret;
49		59
		60	/* Create an NV_SW object for various sync purposes */
		61	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
		62	if (ret)
		63	return ret;
		64
		65	ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
		66	if (ret)
		67	return ret;
		68
50	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */	69	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
51	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);	70	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);
52	if (ret)	71	if (ret)
@@ -64,12 +83,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
64	return ret;	83	return ret;
65	}	84	}
66		85
67	/* Initialise DMA vars */
68	chan->dma.max = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
69	chan->dma.put = 0;
70	chan->dma.cur = chan->dma.put;
71	chan->dma.free = chan->dma.max - chan->dma.cur;
72
73	/* Insert NOPS for NOUVEAU_DMA_SKIPS */	86	/* Insert NOPS for NOUVEAU_DMA_SKIPS */
74	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);	87	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
75	if (ret)	88	if (ret)
@@ -87,6 +100,13 @@ nouveau_dma_init(struct nouveau_channel *chan)
87	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);	100	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
88	OUT_RING(chan, NvNotify0);	101	OUT_RING(chan, NvNotify0);
89		102
		103	/* Initialise NV_SW */
		104	ret = RING_SPACE(chan, 2);
		105	if (ret)
		106	return ret;
		107	BEGIN_RING(chan, NvSubSw, 0, 1);
		108	OUT_RING(chan, NvSw);
		109
90	/* Sit back and pray the channel works.. */	110	/* Sit back and pray the channel works.. */
91	FIRE_RING(chan);	111	FIRE_RING(chan);
92		112
@@ -106,47 +126,52 @@ OUT_RINGp(struct nouveau_channel chan, const void data, unsigned nr_dwords)
106	chan->dma.cur += nr_dwords;	126	chan->dma.cur += nr_dwords;
107	}	127	}
108		128
109	static inline bool	129	/* Fetch and adjust GPU GET pointer
110	READ_GET(struct nouveau_channel chan, uint32_t get)	130	*
		131	* Returns:
		132	* value >= 0, the adjusted GET pointer
		133	* -EINVAL if GET pointer currently outside main push buffer
		134	* -EBUSY if timeout exceeded
		135	*/
		136	static inline int
		137	READ_GET(struct nouveau_channel chan, uint32_t prev_get, uint32_t *timeout)
111	{	138	{
112	uint32_t val;	139	uint32_t val;
113		140
114	val = nvchan_rd32(chan, chan->user_get);	141	val = nvchan_rd32(chan, chan->user_get);
115	if (val < chan->pushbuf_base \|\|	142
116	val >= chan->pushbuf_base + chan->pushbuf_bo->bo.mem.size) {	143	/* reset counter as long as GET is still advancing, this is
117	/* meaningless to dma_wait() except to know whether the	144	* to avoid misdetecting a GPU lockup if the GPU happens to
118	* GPU has stalled or not	145	* just be processing an operation that takes a long time
119	*/	146	*/
120	*get = val;	147	if (val != *prev_get) {
121	return false;	148	*prev_get = val;
		149	*timeout = 0;
		150	}
		151
		152	if ((++*timeout & 0xff) == 0) {
		153	DRM_UDELAY(1);
		154	if (*timeout > 100000)
		155	return -EBUSY;
122	}	156	}
123		157
124	*get = (val - chan->pushbuf_base) >> 2;	158	if (val < chan->pushbuf_base \|\|
125	return true;	159	val > chan->pushbuf_base + (chan->dma.max << 2))
		160	return -EINVAL;
		161
		162	return (val - chan->pushbuf_base) >> 2;
126	}	163	}
127		164
128	int	165	int
129	nouveau_dma_wait(struct nouveau_channel *chan, int size)	166	nouveau_dma_wait(struct nouveau_channel *chan, int size)
130	{	167	{
131	uint32_t get, prev_get = 0, cnt = 0;	168	uint32_t prev_get = 0, cnt = 0;
132	bool get_valid;	169	int get;
133		170
134	while (chan->dma.free < size) {	171	while (chan->dma.free < size) {
135	/* reset counter as long as GET is still advancing, this is	172	get = READ_GET(chan, &prev_get, &cnt);
136	* to avoid misdetecting a GPU lockup if the GPU happens to	173	if (unlikely(get == -EBUSY))
137	* just be processing an operation that takes a long time	174	return -EBUSY;
138	*/
139	get_valid = READ_GET(chan, &get);
140	if (get != prev_get) {
141	prev_get = get;
142	cnt = 0;
143	}
144
145	if ((++cnt & 0xff) == 0) {
146	DRM_UDELAY(1);
147	if (cnt > 100000)
148	return -EBUSY;
149	}
150		175
151	/* loop until we have a usable GET pointer. the value	176	/* loop until we have a usable GET pointer. the value
152	* we read from the GPU may be outside the main ring if	177	* we read from the GPU may be outside the main ring if
@@ -157,7 +182,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
157	* from the SKIPS area, so the code below doesn't have to deal	182	* from the SKIPS area, so the code below doesn't have to deal
158	* with some fun corner cases.	183	* with some fun corner cases.
159	*/	184	*/
160	if (!get_valid \|\| get < NOUVEAU_DMA_SKIPS)	185	if (unlikely(get == -EINVAL) \|\| get < NOUVEAU_DMA_SKIPS)
161	continue;	186	continue;
162		187
163	if (get <= chan->dma.cur) {	188	if (get <= chan->dma.cur) {
@@ -183,6 +208,19 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
183	* after processing the currently pending commands.	208	* after processing the currently pending commands.
184	*/	209	*/
185	OUT_RING(chan, chan->pushbuf_base \| 0x20000000);	210	OUT_RING(chan, chan->pushbuf_base \| 0x20000000);
		211
		212	/* wait for GET to depart from the skips area.
		213	* prevents writing GET==PUT and causing a race
		214	* condition that causes us to think the GPU is
		215	* idle when it's not.
		216	*/
		217	do {
		218	get = READ_GET(chan, &prev_get, &cnt);
		219	if (unlikely(get == -EBUSY))
		220	return -EBUSY;
		221	if (unlikely(get == -EINVAL))
		222	continue;
		223	} while (get <= NOUVEAU_DMA_SKIPS);
186	WRITE_PUT(NOUVEAU_DMA_SKIPS);	224	WRITE_PUT(NOUVEAU_DMA_SKIPS);
187		225
188	/* we're now submitting commands at the start of	226	/* we're now submitting commands at the start of