aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vmwgfx
diff options
context:
space:
mode:
authorThomas Hellstrom <thellstrom@vmware.com>2011-10-04 14:13:30 -0400
committerDave Airlie <airlied@redhat.com>2011-10-05 05:17:22 -0400
commite2fa3a76839ada0d788549607263a036aa654243 (patch)
tree24883fcf9f80483aed537661a49ed389d0dff671 /drivers/gpu/drm/vmwgfx
parente93daed8e2fd5ce3dc98efe9938426127a534ccc (diff)
vmwgfx: Fix up query processing
Previously, query results could be placed in any buffer object, but since we didn't allow pinned buffer objects, query results could be written when that buffer was evicted, corrupting data in other buffers. Now, require that buffers holding query results are no more than two pages large, and allow one single pinned such buffer. When the command submission code encounters query result structures in other buffers, the queries in the pinned buffer will be finished using a query barrier for the last hardware context using the buffer. Also if the command submission code detects that a new hardware context is used for queries, all queries of the previous hardware context is also flushed. Currently we use waiting for a no-op occlusion query as a query barrier for a specific context. The query buffer is also flushed and unpinned on context destructions, master drops and before scanout bo placement. Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> Reviewed-by: Jakob Bornecrantz <jakob@vmware.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/vmwgfx')
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c86
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h24
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c375
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c57
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c8
6 files changed, 572 insertions, 22 deletions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index 7f744a82892a..3fa884db08ab 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -42,6 +42,7 @@
42 * May only be called by the current master since it assumes that the 42 * May only be called by the current master since it assumes that the
43 * master lock is the current master's lock. 43 * master lock is the current master's lock.
44 * This function takes the master's lock in write mode. 44 * This function takes the master's lock in write mode.
45 * Flushes and unpins the query bo to avoid failures.
45 * 46 *
46 * Returns 47 * Returns
47 * -ERESTARTSYS if interrupted by a signal. 48 * -ERESTARTSYS if interrupted by a signal.
@@ -59,6 +60,8 @@ int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
59 if (unlikely(ret != 0)) 60 if (unlikely(ret != 0))
60 return ret; 61 return ret;
61 62
63 vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
64
62 ret = ttm_bo_reserve(bo, interruptible, false, false, 0); 65 ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
63 if (unlikely(ret != 0)) 66 if (unlikely(ret != 0))
64 goto err; 67 goto err;
@@ -78,6 +81,7 @@ err:
78 * May only be called by the current master since it assumes that the 81 * May only be called by the current master since it assumes that the
79 * master lock is the current master's lock. 82 * master lock is the current master's lock.
80 * This function takes the master's lock in write mode. 83 * This function takes the master's lock in write mode.
84 * Flushes and unpins the query bo if @pin == true to avoid failures.
81 * 85 *
82 * @dev_priv: Driver private. 86 * @dev_priv: Driver private.
83 * @buf: DMA buffer to move. 87 * @buf: DMA buffer to move.
@@ -100,6 +104,9 @@ int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
100 if (unlikely(ret != 0)) 104 if (unlikely(ret != 0))
101 return ret; 105 return ret;
102 106
107 if (pin)
108 vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
109
103 ret = ttm_bo_reserve(bo, interruptible, false, false, 0); 110 ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
104 if (unlikely(ret != 0)) 111 if (unlikely(ret != 0))
105 goto err; 112 goto err;
@@ -177,6 +184,7 @@ int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
177 * May only be called by the current master since it assumes that the 184 * May only be called by the current master since it assumes that the
178 * master lock is the current master's lock. 185 * master lock is the current master's lock.
179 * This function takes the master's lock in write mode. 186 * This function takes the master's lock in write mode.
187 * Flushes and unpins the query bo if @pin == true to avoid failures.
180 * 188 *
181 * @dev_priv: Driver private. 189 * @dev_priv: Driver private.
182 * @buf: DMA buffer to move. 190 * @buf: DMA buffer to move.
@@ -205,6 +213,9 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
205 if (unlikely(ret != 0)) 213 if (unlikely(ret != 0))
206 return ret; 214 return ret;
207 215
216 if (pin)
217 vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
218
208 ret = ttm_bo_reserve(bo, interruptible, false, false, 0); 219 ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
209 if (unlikely(ret != 0)) 220 if (unlikely(ret != 0))
210 goto err_unlock; 221 goto err_unlock;
@@ -276,3 +287,36 @@ void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
276 ptr->offset = 0; 287 ptr->offset = 0;
277 } 288 }
278} 289}
290
291
292/**
293 * vmw_bo_pin - Pin or unpin a buffer object without moving it.
294 *
295 * @bo: The buffer object. Must be reserved, and present either in VRAM
296 * or GMR memory.
297 * @pin: Whether to pin or unpin.
298 *
299 */
300void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
301{
302 uint32_t pl_flags;
303 struct ttm_placement placement;
304 uint32_t old_mem_type = bo->mem.mem_type;
305 int ret;
306
307 BUG_ON(!atomic_read(&bo->reserved));
308 BUG_ON(old_mem_type != TTM_PL_VRAM &&
309 old_mem_type != VMW_PL_FLAG_GMR);
310
311 pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED;
312 if (pin)
313 pl_flags |= TTM_PL_FLAG_NO_EVICT;
314
315 memset(&placement, 0, sizeof(placement));
316 placement.num_placement = 1;
317 placement.placement = &pl_flags;
318
319 ret = ttm_bo_validate(bo, &placement, false, true, true);
320
321 BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
322}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index ace4402214c6..7b88104144ca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -213,6 +213,72 @@ static void vmw_print_capabilities(uint32_t capabilities)
213 DRM_INFO(" Screen Object 2.\n"); 213 DRM_INFO(" Screen Object 2.\n");
214} 214}
215 215
216
217/**
218 * vmw_execbuf_prepare_dummy_query - Initialize a query result structure at
219 * the start of a buffer object.
220 *
221 * @dev_priv: The device private structure.
222 *
223 * This function will idle the buffer using an uninterruptible wait, then
224 * map the first page and initialize a pending occlusion query result structure,
225 * Finally it will unmap the buffer.
226 *
227 * TODO: Since we're only mapping a single page, we should optimize the map
228 * to use kmap_atomic / iomap_atomic.
229 */
230static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv)
231{
232 struct ttm_bo_kmap_obj map;
233 volatile SVGA3dQueryResult *result;
234 bool dummy;
235 int ret;
236 struct ttm_bo_device *bdev = &dev_priv->bdev;
237 struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
238
239 ttm_bo_reserve(bo, false, false, false, 0);
240 spin_lock(&bdev->fence_lock);
241 ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
242 spin_unlock(&bdev->fence_lock);
243 if (unlikely(ret != 0))
244 (void) vmw_fallback_wait(dev_priv, false, true, 0, false,
245 10*HZ);
246
247 ret = ttm_bo_kmap(bo, 0, 1, &map);
248 if (likely(ret == 0)) {
249 result = ttm_kmap_obj_virtual(&map, &dummy);
250 result->totalSize = sizeof(*result);
251 result->state = SVGA3D_QUERYSTATE_PENDING;
252 result->result32 = 0xff;
253 ttm_bo_kunmap(&map);
254 } else
255 DRM_ERROR("Dummy query buffer map failed.\n");
256 ttm_bo_unreserve(bo);
257}
258
259
260/**
261 * vmw_dummy_query_bo_create - create a bo to hold a dummy query result
262 *
263 * @dev_priv: A device private structure.
264 *
265 * This function creates a small buffer object that holds the query
266 * result for dummy queries emitted as query barriers.
267 * No interruptible waits are done within this function.
268 *
269 * Returns an error if bo creation fails.
270 */
271static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
272{
273 return ttm_bo_create(&dev_priv->bdev,
274 PAGE_SIZE,
275 ttm_bo_type_device,
276 &vmw_vram_sys_placement,
277 0, 0, false, NULL,
278 &dev_priv->dummy_query_bo);
279}
280
281
216static int vmw_request_device(struct vmw_private *dev_priv) 282static int vmw_request_device(struct vmw_private *dev_priv)
217{ 283{
218 int ret; 284 int ret;
@@ -223,12 +289,29 @@ static int vmw_request_device(struct vmw_private *dev_priv)
223 return ret; 289 return ret;
224 } 290 }
225 vmw_fence_fifo_up(dev_priv->fman); 291 vmw_fence_fifo_up(dev_priv->fman);
292 ret = vmw_dummy_query_bo_create(dev_priv);
293 if (unlikely(ret != 0))
294 goto out_no_query_bo;
295 vmw_dummy_query_bo_prepare(dev_priv);
226 296
227 return 0; 297 return 0;
298
299out_no_query_bo:
300 vmw_fence_fifo_down(dev_priv->fman);
301 vmw_fifo_release(dev_priv, &dev_priv->fifo);
302 return ret;
228} 303}
229 304
230static void vmw_release_device(struct vmw_private *dev_priv) 305static void vmw_release_device(struct vmw_private *dev_priv)
231{ 306{
307 /*
308 * Previous destructions should've released
309 * the pinned bo.
310 */
311
312 BUG_ON(dev_priv->pinned_bo != NULL);
313
314 ttm_bo_unref(&dev_priv->dummy_query_bo);
232 vmw_fence_fifo_down(dev_priv->fman); 315 vmw_fence_fifo_down(dev_priv->fman);
233 vmw_fifo_release(dev_priv, &dev_priv->fifo); 316 vmw_fifo_release(dev_priv, &dev_priv->fifo);
234} 317}
@@ -794,6 +877,8 @@ static void vmw_master_drop(struct drm_device *dev,
794 877
795 vmw_fp->locked_master = drm_master_get(file_priv->master); 878 vmw_fp->locked_master = drm_master_get(file_priv->master);
796 ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile); 879 ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
880 vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
881
797 if (unlikely((ret != 0))) { 882 if (unlikely((ret != 0))) {
798 DRM_ERROR("Unable to lock TTM at VT switch.\n"); 883 DRM_ERROR("Unable to lock TTM at VT switch.\n");
799 drm_master_put(&vmw_fp->locked_master); 884 drm_master_put(&vmw_fp->locked_master);
@@ -844,6 +929,7 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
844 * This empties VRAM and unbinds all GMR bindings. 929 * This empties VRAM and unbinds all GMR bindings.
845 * Buffer contents is moved to swappable memory. 930 * Buffer contents is moved to swappable memory.
846 */ 931 */
932 vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
847 ttm_bo_swapout_all(&dev_priv->bdev); 933 ttm_bo_swapout_all(&dev_priv->bdev);
848 934
849 break; 935 break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b22b96a491a4..d8d6a8659119 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -82,6 +82,7 @@ struct vmw_resource {
82 void (*hw_destroy) (struct vmw_resource *res); 82 void (*hw_destroy) (struct vmw_resource *res);
83 void (*res_free) (struct vmw_resource *res); 83 void (*res_free) (struct vmw_resource *res);
84 bool on_validate_list; 84 bool on_validate_list;
85 struct list_head query_head; /* Protected by the cmdbuf mutex */
85 /* TODO is a generic snooper needed? */ 86 /* TODO is a generic snooper needed? */
86#if 0 87#if 0
87 void (*snoop)(struct vmw_resource *res, 88 void (*snoop)(struct vmw_resource *res,
@@ -142,6 +143,7 @@ struct vmw_sw_context{
142 uint32_t last_cid; 143 uint32_t last_cid;
143 bool cid_valid; 144 bool cid_valid;
144 bool kernel; /**< is the called made from the kernel */ 145 bool kernel; /**< is the called made from the kernel */
146 struct vmw_resource *cur_ctx;
145 uint32_t last_sid; 147 uint32_t last_sid;
146 uint32_t sid_translation; 148 uint32_t sid_translation;
147 bool sid_valid; 149 bool sid_valid;
@@ -155,6 +157,11 @@ struct vmw_sw_context{
155 uint32_t cmd_bounce_size; 157 uint32_t cmd_bounce_size;
156 struct vmw_resource *resources[VMWGFX_MAX_VALIDATIONS]; 158 struct vmw_resource *resources[VMWGFX_MAX_VALIDATIONS];
157 uint32_t num_ref_resources; 159 uint32_t num_ref_resources;
160 uint32_t fence_flags;
161 struct list_head query_list;
162 struct ttm_buffer_object *cur_query_bo;
163 uint32_t cur_query_cid;
164 bool query_cid_valid;
158}; 165};
159 166
160struct vmw_legacy_display; 167struct vmw_legacy_display;
@@ -294,6 +301,16 @@ struct vmw_private {
294 301
295 struct mutex release_mutex; 302 struct mutex release_mutex;
296 uint32_t num_3d_resources; 303 uint32_t num_3d_resources;
304
305 /*
306 * Query processing. These members
307 * are protected by the cmdbuf mutex.
308 */
309
310 struct ttm_buffer_object *dummy_query_bo;
311 struct ttm_buffer_object *pinned_bo;
312 uint32_t query_cid;
313 bool dummy_query_bo_pinned;
297}; 314};
298 315
299static inline struct vmw_private *vmw_priv(struct drm_device *dev) 316static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -418,6 +435,7 @@ extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
418 bool interruptible); 435 bool interruptible);
419extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf, 436extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
420 SVGAGuestPtr *ptr); 437 SVGAGuestPtr *ptr);
438extern void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin);
421 439
422/** 440/**
423 * Misc Ioctl functionality - vmwgfx_ioctl.c 441 * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -447,6 +465,8 @@ extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
447extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason); 465extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
448extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv); 466extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
449extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv); 467extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
468extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
469 uint32_t cid);
450 470
451/** 471/**
452 * TTM glue - vmwgfx_ttm_glue.c 472 * TTM glue - vmwgfx_ttm_glue.c
@@ -485,6 +505,10 @@ extern int vmw_execbuf_process(struct drm_file *file_priv,
485 struct drm_vmw_fence_rep __user 505 struct drm_vmw_fence_rep __user
486 *user_fence_rep); 506 *user_fence_rep);
487 507
508extern void
509vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
510 bool only_on_cid_match, uint32_t cid);
511
488/** 512/**
489 * IRQs and wating - vmwgfx_irq.c 513 * IRQs and wating - vmwgfx_irq.c
490 */ 514 */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index dea0474f6f3b..efa1d1cc0414 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,7 +44,6 @@ static int vmw_cmd_ok(struct vmw_private *dev_priv,
44 return 0; 44 return 0;
45} 45}
46 46
47
48static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context, 47static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
49 struct vmw_resource **p_res) 48 struct vmw_resource **p_res)
50{ 49{
@@ -68,6 +67,54 @@ out:
68 return ret; 67 return ret;
69} 68}
70 69
70/**
71 * vmw_bo_to_validate_list - add a bo to a validate list
72 *
73 * @sw_context: The software context used for this command submission batch.
74 * @bo: The buffer object to add.
75 * @fence_flags: Fence flags to be or'ed with any other fence flags for
76 * this buffer on this submission batch.
77 * @p_val_node: If non-NULL Will be updated with the validate node number
78 * on return.
79 *
80 * Returns -EINVAL if the limit of number of buffer objects per command
81 * submission is reached.
82 */
83static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
84 struct ttm_buffer_object *bo,
85 uint32_t fence_flags,
86 uint32_t *p_val_node)
87{
88 uint32_t val_node;
89 struct ttm_validate_buffer *val_buf;
90
91 val_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
92
93 if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
94 DRM_ERROR("Max number of DMA buffers per submission"
95 " exceeded.\n");
96 return -EINVAL;
97 }
98
99 val_buf = &sw_context->val_bufs[val_node];
100 if (unlikely(val_node == sw_context->cur_val_buf)) {
101 val_buf->new_sync_obj_arg = NULL;
102 val_buf->bo = ttm_bo_reference(bo);
103 val_buf->usage = TTM_USAGE_READWRITE;
104 list_add_tail(&val_buf->head, &sw_context->validate_nodes);
105 ++sw_context->cur_val_buf;
106 }
107
108 val_buf->new_sync_obj_arg = (void *)
109 ((unsigned long) val_buf->new_sync_obj_arg | fence_flags);
110 sw_context->fence_flags |= fence_flags;
111
112 if (p_val_node)
113 *p_val_node = val_node;
114
115 return 0;
116}
117
71static int vmw_cmd_cid_check(struct vmw_private *dev_priv, 118static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
72 struct vmw_sw_context *sw_context, 119 struct vmw_sw_context *sw_context,
73 SVGA3dCmdHeader *header) 120 SVGA3dCmdHeader *header)
@@ -94,6 +141,7 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
94 141
95 sw_context->last_cid = cmd->cid; 142 sw_context->last_cid = cmd->cid;
96 sw_context->cid_valid = true; 143 sw_context->cid_valid = true;
144 sw_context->cur_ctx = ctx;
97 return vmw_resource_to_validate_list(sw_context, &ctx); 145 return vmw_resource_to_validate_list(sw_context, &ctx);
98} 146}
99 147
@@ -114,7 +162,8 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
114 return 0; 162 return 0;
115 } 163 }
116 164
117 ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile, 165 ret = vmw_user_surface_lookup_handle(dev_priv,
166 sw_context->tfile,
118 *sid, &srf); 167 *sid, &srf);
119 if (unlikely(ret != 0)) { 168 if (unlikely(ret != 0)) {
120 DRM_ERROR("Could ot find or use surface 0x%08x " 169 DRM_ERROR("Could ot find or use surface 0x%08x "
@@ -225,6 +274,168 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
225 return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid); 274 return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
226} 275}
227 276
277/**
278 * vmw_query_bo_switch_prepare - Prepare to switch pinned buffer for queries.
279 *
280 * @dev_priv: The device private structure.
281 * @cid: The hardware context for the next query.
282 * @new_query_bo: The new buffer holding query results.
283 * @sw_context: The software context used for this command submission.
284 *
285 * This function checks whether @new_query_bo is suitable for holding
286 * query results, and if another buffer currently is pinned for query
287 * results. If so, the function prepares the state of @sw_context for
288 * switching pinned buffers after successful submission of the current
289 * command batch. It also checks whether we're using a new query context.
290 * In that case, it makes sure we emit a query barrier for the old
291 * context before the current query buffer is fenced.
292 */
293static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
294 uint32_t cid,
295 struct ttm_buffer_object *new_query_bo,
296 struct vmw_sw_context *sw_context)
297{
298 int ret;
299 bool add_cid = false;
300 uint32_t cid_to_add;
301
302 if (unlikely(new_query_bo != sw_context->cur_query_bo)) {
303
304 if (unlikely(new_query_bo->num_pages > 4)) {
305 DRM_ERROR("Query buffer too large.\n");
306 return -EINVAL;
307 }
308
309 if (unlikely(sw_context->cur_query_bo != NULL)) {
310 BUG_ON(!sw_context->query_cid_valid);
311 add_cid = true;
312 cid_to_add = sw_context->cur_query_cid;
313 ret = vmw_bo_to_validate_list(sw_context,
314 sw_context->cur_query_bo,
315 DRM_VMW_FENCE_FLAG_EXEC,
316 NULL);
317 if (unlikely(ret != 0))
318 return ret;
319 }
320 sw_context->cur_query_bo = new_query_bo;
321
322 ret = vmw_bo_to_validate_list(sw_context,
323 dev_priv->dummy_query_bo,
324 DRM_VMW_FENCE_FLAG_EXEC,
325 NULL);
326 if (unlikely(ret != 0))
327 return ret;
328
329 }
330
331 if (unlikely(cid != sw_context->cur_query_cid &&
332 sw_context->query_cid_valid)) {
333 add_cid = true;
334 cid_to_add = sw_context->cur_query_cid;
335 }
336
337 sw_context->cur_query_cid = cid;
338 sw_context->query_cid_valid = true;
339
340 if (add_cid) {
341 struct vmw_resource *ctx = sw_context->cur_ctx;
342
343 if (list_empty(&ctx->query_head))
344 list_add_tail(&ctx->query_head,
345 &sw_context->query_list);
346 ret = vmw_bo_to_validate_list(sw_context,
347 dev_priv->dummy_query_bo,
348 DRM_VMW_FENCE_FLAG_EXEC,
349 NULL);
350 if (unlikely(ret != 0))
351 return ret;
352 }
353 return 0;
354}
355
356
357/**
358 * vmw_query_bo_switch_commit - Finalize switching pinned query buffer
359 *
360 * @dev_priv: The device private structure.
361 * @sw_context: The software context used for this command submission batch.
362 *
363 * This function will check if we're switching query buffers, and will then,
364 * if no other query waits are issued this command submission batch,
365 * issue a dummy occlusion query wait used as a query barrier. When the fence
366 * object following that query wait has signaled, we are sure that all
367 * preseding queries have finished, and the old query buffer can be unpinned.
368 * However, since both the new query buffer and the old one are fenced with
369 * that fence, we can do an asynchronus unpin now, and be sure that the
370 * old query buffer won't be moved until the fence has signaled.
371 *
372 * As mentioned above, both the new - and old query buffers need to be fenced
373 * using a sequence emitted *after* calling this function.
374 */
375static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
376 struct vmw_sw_context *sw_context)
377{
378
379 struct vmw_resource *ctx, *next_ctx;
380 int ret;
381
382 /*
383 * The validate list should still hold references to all
384 * contexts here.
385 */
386
387 list_for_each_entry_safe(ctx, next_ctx, &sw_context->query_list,
388 query_head) {
389 list_del_init(&ctx->query_head);
390
391 BUG_ON(!ctx->on_validate_list);
392
393 ret = vmw_fifo_emit_dummy_query(dev_priv, ctx->id);
394
395 if (unlikely(ret != 0))
396 DRM_ERROR("Out of fifo space for dummy query.\n");
397 }
398
399 if (dev_priv->pinned_bo != sw_context->cur_query_bo) {
400 if (dev_priv->pinned_bo) {
401 vmw_bo_pin(dev_priv->pinned_bo, false);
402 ttm_bo_unref(&dev_priv->pinned_bo);
403 }
404
405 vmw_bo_pin(sw_context->cur_query_bo, true);
406
407 /*
408 * We pin also the dummy_query_bo buffer so that we
409 * don't need to validate it when emitting
410 * dummy queries in context destroy paths.
411 */
412
413 vmw_bo_pin(dev_priv->dummy_query_bo, true);
414 dev_priv->dummy_query_bo_pinned = true;
415
416 dev_priv->query_cid = sw_context->cur_query_cid;
417 dev_priv->pinned_bo =
418 ttm_bo_reference(sw_context->cur_query_bo);
419 }
420}
421
422/**
423 * vmw_query_switch_backoff - clear query barrier list
424 * @sw_context: The sw context used for this submission batch.
425 *
426 * This function is used as part of an error path, where a previously
427 * set up list of query barriers needs to be cleared.
428 *
429 */
430static void vmw_query_switch_backoff(struct vmw_sw_context *sw_context)
431{
432 struct list_head *list, *next;
433
434 list_for_each_safe(list, next, &sw_context->query_list) {
435 list_del_init(list);
436 }
437}
438
228static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, 439static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
229 struct vmw_sw_context *sw_context, 440 struct vmw_sw_context *sw_context,
230 SVGAGuestPtr *ptr, 441 SVGAGuestPtr *ptr,
@@ -234,8 +445,6 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
234 struct ttm_buffer_object *bo; 445 struct ttm_buffer_object *bo;
235 uint32_t handle = ptr->gmrId; 446 uint32_t handle = ptr->gmrId;
236 struct vmw_relocation *reloc; 447 struct vmw_relocation *reloc;
237 uint32_t cur_validate_node;
238 struct ttm_validate_buffer *val_buf;
239 int ret; 448 int ret;
240 449
241 ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); 450 ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
@@ -255,23 +464,11 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
255 reloc = &sw_context->relocs[sw_context->cur_reloc++]; 464 reloc = &sw_context->relocs[sw_context->cur_reloc++];
256 reloc->location = ptr; 465 reloc->location = ptr;
257 466
258 cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf); 467 ret = vmw_bo_to_validate_list(sw_context, bo, DRM_VMW_FENCE_FLAG_EXEC,
259 if (unlikely(cur_validate_node >= VMWGFX_MAX_VALIDATIONS)) { 468 &reloc->index);
260 DRM_ERROR("Max number of DMA buffers per submission" 469 if (unlikely(ret != 0))
261 " exceeded.\n");
262 ret = -EINVAL;
263 goto out_no_reloc; 470 goto out_no_reloc;
264 }
265 471
266 reloc->index = cur_validate_node;
267 if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
268 val_buf = &sw_context->val_bufs[cur_validate_node];
269 val_buf->bo = ttm_bo_reference(bo);
270 val_buf->usage = TTM_USAGE_READWRITE;
271 val_buf->new_sync_obj_arg = (void *) DRM_VMW_FENCE_FLAG_EXEC;
272 list_add_tail(&val_buf->head, &sw_context->validate_nodes);
273 ++sw_context->cur_val_buf;
274 }
275 *vmw_bo_p = vmw_bo; 472 *vmw_bo_p = vmw_bo;
276 return 0; 473 return 0;
277 474
@@ -303,8 +500,11 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
303 if (unlikely(ret != 0)) 500 if (unlikely(ret != 0))
304 return ret; 501 return ret;
305 502
503 ret = vmw_query_bo_switch_prepare(dev_priv, cmd->q.cid,
504 &vmw_bo->base, sw_context);
505
306 vmw_dmabuf_unreference(&vmw_bo); 506 vmw_dmabuf_unreference(&vmw_bo);
307 return 0; 507 return ret;
308} 508}
309 509
310static int vmw_cmd_wait_query(struct vmw_private *dev_priv, 510static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
@@ -317,6 +517,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
317 SVGA3dCmdWaitForQuery q; 517 SVGA3dCmdWaitForQuery q;
318 } *cmd; 518 } *cmd;
319 int ret; 519 int ret;
520 struct vmw_resource *ctx;
320 521
321 cmd = container_of(header, struct vmw_query_cmd, header); 522 cmd = container_of(header, struct vmw_query_cmd, header);
322 ret = vmw_cmd_cid_check(dev_priv, sw_context, header); 523 ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
@@ -330,6 +531,16 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
330 return ret; 531 return ret;
331 532
332 vmw_dmabuf_unreference(&vmw_bo); 533 vmw_dmabuf_unreference(&vmw_bo);
534
535 /*
536 * This wait will act as a barrier for previous waits for this
537 * context.
538 */
539
540 ctx = sw_context->cur_ctx;
541 if (!list_empty(&ctx->query_head))
542 list_del_init(&ctx->query_head);
543
333 return 0; 544 return 0;
334} 545}
335 546
@@ -687,6 +898,16 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
687{ 898{
688 int ret; 899 int ret;
689 900
901
902 /*
903 * Don't validate pinned buffers.
904 */
905
906 if (bo == dev_priv->pinned_bo ||
907 (bo == dev_priv->dummy_query_bo &&
908 dev_priv->dummy_query_bo_pinned))
909 return 0;
910
690 /** 911 /**
691 * Put BO in VRAM if there is space, otherwise as a GMR. 912 * Put BO in VRAM if there is space, otherwise as a GMR.
692 * If there is no space in VRAM and GMR ids are all used up, 913 * If there is no space in VRAM and GMR ids are all used up,
@@ -846,6 +1067,11 @@ int vmw_execbuf_process(struct drm_file *file_priv,
846 sw_context->cur_reloc = 0; 1067 sw_context->cur_reloc = 0;
847 sw_context->cur_val_buf = 0; 1068 sw_context->cur_val_buf = 0;
848 sw_context->num_ref_resources = 0; 1069 sw_context->num_ref_resources = 0;
1070 sw_context->fence_flags = 0;
1071 INIT_LIST_HEAD(&sw_context->query_list);
1072 sw_context->cur_query_bo = dev_priv->pinned_bo;
1073 sw_context->cur_query_cid = dev_priv->query_cid;
1074 sw_context->query_cid_valid = (dev_priv->pinned_bo != NULL);
849 1075
850 INIT_LIST_HEAD(&sw_context->validate_nodes); 1076 INIT_LIST_HEAD(&sw_context->validate_nodes);
851 1077
@@ -882,6 +1108,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
882 memcpy(cmd, kernel_commands, command_size); 1108 memcpy(cmd, kernel_commands, command_size);
883 vmw_fifo_commit(dev_priv, command_size); 1109 vmw_fifo_commit(dev_priv, command_size);
884 1110
1111 vmw_query_bo_switch_commit(dev_priv, sw_context);
885 ret = vmw_execbuf_fence_commands(file_priv, dev_priv, 1112 ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
886 &fence, 1113 &fence,
887 (user_fence_rep) ? &handle : NULL); 1114 (user_fence_rep) ? &handle : NULL);
@@ -940,6 +1167,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
940out_err: 1167out_err:
941 vmw_free_relocations(sw_context); 1168 vmw_free_relocations(sw_context);
942out_throttle: 1169out_throttle:
1170 vmw_query_switch_backoff(sw_context);
943 ttm_eu_backoff_reservation(&sw_context->validate_nodes); 1171 ttm_eu_backoff_reservation(&sw_context->validate_nodes);
944 vmw_clear_validations(sw_context); 1172 vmw_clear_validations(sw_context);
945out_unlock: 1173out_unlock:
@@ -947,6 +1175,113 @@ out_unlock:
947 return ret; 1175 return ret;
948} 1176}
949 1177
1178/**
1179 * vmw_execbuf_unpin_panic - Idle the fifo and unpin the query buffer.
1180 *
1181 * @dev_priv: The device private structure.
1182 *
1183 * This function is called to idle the fifo and unpin the query buffer
1184 * if the normal way to do this hits an error, which should typically be
1185 * extremely rare.
1186 */
1187static void vmw_execbuf_unpin_panic(struct vmw_private *dev_priv)
1188{
1189 DRM_ERROR("Can't unpin query buffer. Trying to recover.\n");
1190
1191 (void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ);
1192 vmw_bo_pin(dev_priv->pinned_bo, false);
1193 vmw_bo_pin(dev_priv->dummy_query_bo, false);
1194 dev_priv->dummy_query_bo_pinned = false;
1195}
1196
1197
1198/**
1199 * vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
1200 * query bo.
1201 *
1202 * @dev_priv: The device private structure.
1203 * @only_on_cid_match: Only flush and unpin if the current active query cid
1204 * matches @cid.
1205 * @cid: Optional context id to match.
1206 *
1207 * This function should be used to unpin the pinned query bo, or
1208 * as a query barrier when we need to make sure that all queries have
1209 * finished before the next fifo command. (For example on hardware
1210 * context destructions where the hardware may otherwise leak unfinished
1211 * queries).
1212 *
1213 * This function does not return any failure codes, but make attempts
1214 * to do safe unpinning in case of errors.
1215 *
1216 * The function will synchronize on the previous query barrier, and will
1217 * thus not finish until that barrier has executed.
1218 */
1219void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
1220 bool only_on_cid_match, uint32_t cid)
1221{
1222 int ret = 0;
1223 struct list_head validate_list;
1224 struct ttm_validate_buffer pinned_val, query_val;
1225 struct vmw_fence_obj *fence;
1226
1227 mutex_lock(&dev_priv->cmdbuf_mutex);
1228
1229 if (dev_priv->pinned_bo == NULL)
1230 goto out_unlock;
1231
1232 if (only_on_cid_match && cid != dev_priv->query_cid)
1233 goto out_unlock;
1234
1235 INIT_LIST_HEAD(&validate_list);
1236
1237 pinned_val.new_sync_obj_arg = (void *)(unsigned long)
1238 DRM_VMW_FENCE_FLAG_EXEC;
1239 pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
1240 list_add_tail(&pinned_val.head, &validate_list);
1241
1242 query_val.new_sync_obj_arg = pinned_val.new_sync_obj_arg;
1243 query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
1244 list_add_tail(&query_val.head, &validate_list);
1245
1246 do {
1247 ret = ttm_eu_reserve_buffers(&validate_list);
1248 } while (ret == -ERESTARTSYS);
1249
1250 if (unlikely(ret != 0)) {
1251 vmw_execbuf_unpin_panic(dev_priv);
1252 goto out_no_reserve;
1253 }
1254
1255 ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
1256 if (unlikely(ret != 0)) {
1257 vmw_execbuf_unpin_panic(dev_priv);
1258 goto out_no_emit;
1259 }
1260
1261 vmw_bo_pin(dev_priv->pinned_bo, false);
1262 vmw_bo_pin(dev_priv->dummy_query_bo, false);
1263 dev_priv->dummy_query_bo_pinned = false;
1264
1265 (void) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
1266 ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
1267
1268 ttm_bo_unref(&query_val.bo);
1269 ttm_bo_unref(&pinned_val.bo);
1270 ttm_bo_unref(&dev_priv->pinned_bo);
1271
1272out_unlock:
1273 mutex_unlock(&dev_priv->cmdbuf_mutex);
1274 return;
1275
1276out_no_emit:
1277 ttm_eu_backoff_reservation(&validate_list);
1278out_no_reserve:
1279 ttm_bo_unref(&query_val.bo);
1280 ttm_bo_unref(&pinned_val.bo);
1281 ttm_bo_unref(&dev_priv->pinned_bo);
1282 mutex_unlock(&dev_priv->cmdbuf_mutex);
1283}
1284
950 1285
951int vmw_execbuf_ioctl(struct drm_device *dev, void *data, 1286int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
952 struct drm_file *file_priv) 1287 struct drm_file *file_priv)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index d7ed33e732a0..62d6377b8ee8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -505,3 +505,60 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
505out_err: 505out_err:
506 return ret; 506 return ret;
507} 507}
508
509/**
510 * vmw_fifo_emit_dummy_query - emits a dummy query to the fifo.
511 *
512 * @dev_priv: The device private structure.
513 * @cid: The hardware context id used for the query.
514 *
515 * This function is used to emit a dummy occlusion query with
516 * no primitives rendered between query begin and query end.
517 * It's used to provide a query barrier, in order to know that when
518 * this query is finished, all preceding queries are also finished.
519 *
520 * A Query results structure should have been initialized at the start
521 * of the dev_priv->dummy_query_bo buffer object. And that buffer object
522 * must also be either reserved or pinned when this function is called.
523 *
524 * Returns -ENOMEM on failure to reserve fifo space.
525 */
526int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
527 uint32_t cid)
528{
529 /*
530 * A query wait without a preceding query end will
531 * actually finish all queries for this cid
532 * without writing to the query result structure.
533 */
534
535 struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
536 struct {
537 SVGA3dCmdHeader header;
538 SVGA3dCmdWaitForQuery body;
539 } *cmd;
540
541 cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
542
543 if (unlikely(cmd == NULL)) {
544 DRM_ERROR("Out of fifo space for dummy query.\n");
545 return -ENOMEM;
546 }
547
548 cmd->header.id = SVGA_3D_CMD_WAIT_FOR_QUERY;
549 cmd->header.size = sizeof(cmd->body);
550 cmd->body.cid = cid;
551 cmd->body.type = SVGA3D_QUERYTYPE_OCCLUSION;
552
553 if (bo->mem.mem_type == TTM_PL_VRAM) {
554 cmd->body.guestResult.gmrId = SVGA_GMR_FRAMEBUFFER;
555 cmd->body.guestResult.offset = bo->offset;
556 } else {
557 cmd->body.guestResult.gmrId = bo->mem.start;
558 cmd->body.guestResult.offset = 0;
559 }
560
561 vmw_fifo_commit(dev_priv, sizeof(*cmd));
562
563 return 0;
564}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c1b6ffd4ce7b..36c9d033220a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -126,7 +126,7 @@ static int vmw_resource_init(struct vmw_private *dev_priv,
126 res->idr = idr; 126 res->idr = idr;
127 res->avail = false; 127 res->avail = false;
128 res->dev_priv = dev_priv; 128 res->dev_priv = dev_priv;
129 129 INIT_LIST_HEAD(&res->query_head);
130 do { 130 do {
131 if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) 131 if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
132 return -ENOMEM; 132 return -ENOMEM;
@@ -194,8 +194,12 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
194 struct { 194 struct {
195 SVGA3dCmdHeader header; 195 SVGA3dCmdHeader header;
196 SVGA3dCmdDestroyContext body; 196 SVGA3dCmdDestroyContext body;
197 } *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); 197 } *cmd;
198 198
199
200 vmw_execbuf_release_pinned_bo(dev_priv, true, res->id);
201
202 cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
199 if (unlikely(cmd == NULL)) { 203 if (unlikely(cmd == NULL)) {
200 DRM_ERROR("Failed reserving FIFO space for surface " 204 DRM_ERROR("Failed reserving FIFO space for surface "
201 "destruction.\n"); 205 "destruction.\n");