aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-08-29 15:49:51 -0400
committerEric Anholt <eric@anholt.net>2009-08-29 20:37:21 -0400
commita09ba7faf75fa4b21980d81de8e5f3d5c0785ccf (patch)
treedd0e468fa4a1a287fc365dc5b4ce546b68234377 /drivers/gpu
parentf8aed700c6ec46ddade6570004ce25332283b306 (diff)
drm/i915: Fix CPU-spinning hangs related to fence usage by using an LRU.
The lack of a proper LRU was partially worked around by taking the fence from the object containing the oldest seqno. But if there are multiple objects inactive, then they don't have seqnos and the first fence reg among them would be chosen. If you were trying to copy data between two mappings, this could result in each page fault stealing the fence from the other argument, and your application hanging. https://bugs.freedesktop.org/show_bug.cgi?id=23566 https://bugs.freedesktop.org/show_bug.cgi?id=23220 https://bugs.freedesktop.org/show_bug.cgi?id=23253 https://bugs.freedesktop.org/show_bug.cgi?id=23366 Cc: Stable Team <stable@kernel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c86
2 files changed, 54 insertions, 38 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7537f57d8a87..11fc4b66c889 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -384,6 +384,9 @@ typedef struct drm_i915_private {
384 */ 384 */
385 struct list_head inactive_list; 385 struct list_head inactive_list;
386 386
387 /** LRU list of objects with fence regs on them. */
388 struct list_head fence_list;
389
387 /** 390 /**
388 * List of breadcrumbs associated with GPU requests currently 391 * List of breadcrumbs associated with GPU requests currently
389 * outstanding. 392 * outstanding.
@@ -451,6 +454,9 @@ struct drm_i915_gem_object {
451 /** This object's place on the active/flushing/inactive lists */ 454 /** This object's place on the active/flushing/inactive lists */
452 struct list_head list; 455 struct list_head list;
453 456
457 /** This object's place on the fenced object LRU */
458 struct list_head fence_list;
459
454 /** 460 /**
455 * This is set if the object is on the active or flushing lists 461 * This is set if the object is on the active or flushing lists
456 * (has pending rendering), and is not set if it's on inactive (ready 462 * (has pending rendering), and is not set if it's on inactive (ready
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 140bee142fc2..0c07a755b3a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -978,6 +978,7 @@ int
978i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 978i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
979 struct drm_file *file_priv) 979 struct drm_file *file_priv)
980{ 980{
981 struct drm_i915_private *dev_priv = dev->dev_private;
981 struct drm_i915_gem_set_domain *args = data; 982 struct drm_i915_gem_set_domain *args = data;
982 struct drm_gem_object *obj; 983 struct drm_gem_object *obj;
983 uint32_t read_domains = args->read_domains; 984 uint32_t read_domains = args->read_domains;
@@ -1010,8 +1011,18 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1010 obj, obj->size, read_domains, write_domain); 1011 obj, obj->size, read_domains, write_domain);
1011#endif 1012#endif
1012 if (read_domains & I915_GEM_DOMAIN_GTT) { 1013 if (read_domains & I915_GEM_DOMAIN_GTT) {
1014 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1015
1013 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1016 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1014 1017
1018 /* Update the LRU on the fence for the CPU access that's
1019 * about to occur.
1020 */
1021 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1022 list_move_tail(&obj_priv->fence_list,
1023 &dev_priv->mm.fence_list);
1024 }
1025
1015 /* Silently promote "you're not bound, there was nothing to do" 1026 /* Silently promote "you're not bound, there was nothing to do"
1016 * to success, since the client was just asking us to 1027 * to success, since the client was just asking us to
1017 * make sure everything was done. 1028 * make sure everything was done.
@@ -1155,8 +1166,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1155 } 1166 }
1156 1167
1157 /* Need a new fence register? */ 1168 /* Need a new fence register? */
1158 if (obj_priv->fence_reg == I915_FENCE_REG_NONE && 1169 if (obj_priv->tiling_mode != I915_TILING_NONE) {
1159 obj_priv->tiling_mode != I915_TILING_NONE) {
1160 ret = i915_gem_object_get_fence_reg(obj); 1170 ret = i915_gem_object_get_fence_reg(obj);
1161 if (ret) { 1171 if (ret) {
1162 mutex_unlock(&dev->struct_mutex); 1172 mutex_unlock(&dev->struct_mutex);
@@ -2208,6 +2218,12 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2208 struct drm_i915_gem_object *old_obj_priv = NULL; 2218 struct drm_i915_gem_object *old_obj_priv = NULL;
2209 int i, ret, avail; 2219 int i, ret, avail;
2210 2220
2221 /* Just update our place in the LRU if our fence is getting used. */
2222 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2223 list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2224 return 0;
2225 }
2226
2211 switch (obj_priv->tiling_mode) { 2227 switch (obj_priv->tiling_mode) {
2212 case I915_TILING_NONE: 2228 case I915_TILING_NONE:
2213 WARN(1, "allocating a fence for non-tiled object?\n"); 2229 WARN(1, "allocating a fence for non-tiled object?\n");
@@ -2229,7 +2245,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2229 } 2245 }
2230 2246
2231 /* First try to find a free reg */ 2247 /* First try to find a free reg */
2232try_again:
2233 avail = 0; 2248 avail = 0;
2234 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2249 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2235 reg = &dev_priv->fence_regs[i]; 2250 reg = &dev_priv->fence_regs[i];
@@ -2243,52 +2258,41 @@ try_again:
2243 2258
2244 /* None available, try to steal one or wait for a user to finish */ 2259 /* None available, try to steal one or wait for a user to finish */
2245 if (i == dev_priv->num_fence_regs) { 2260 if (i == dev_priv->num_fence_regs) {
2246 uint32_t seqno = dev_priv->mm.next_gem_seqno; 2261 struct drm_gem_object *old_obj = NULL;
2247 2262
2248 if (avail == 0) 2263 if (avail == 0)
2249 return -ENOSPC; 2264 return -ENOSPC;
2250 2265
2251 for (i = dev_priv->fence_reg_start; 2266 list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
2252 i < dev_priv->num_fence_regs; i++) { 2267 fence_list) {
2253 uint32_t this_seqno; 2268 old_obj = old_obj_priv->obj;
2254 2269
2255 reg = &dev_priv->fence_regs[i]; 2270 reg = &dev_priv->fence_regs[old_obj_priv->fence_reg];
2256 old_obj_priv = reg->obj->driver_private;
2257 2271
2258 if (old_obj_priv->pin_count) 2272 if (old_obj_priv->pin_count)
2259 continue; 2273 continue;
2260 2274
2275 /* Take a reference, as otherwise the wait_rendering
2276 * below may cause the object to get freed out from
2277 * under us.
2278 */
2279 drm_gem_object_reference(old_obj);
2280
2261 /* i915 uses fences for GPU access to tiled buffers */ 2281 /* i915 uses fences for GPU access to tiled buffers */
2262 if (IS_I965G(dev) || !old_obj_priv->active) 2282 if (IS_I965G(dev) || !old_obj_priv->active)
2263 break; 2283 break;
2264 2284
2265 /* find the seqno of the first available fence */ 2285 /* This brings the object to the head of the LRU if it
2266 this_seqno = old_obj_priv->last_rendering_seqno; 2286 * had been written to. The only way this should
2267 if (this_seqno != 0 && 2287 * result in us waiting longer than the expected
2268 reg->obj->write_domain == 0 && 2288 * optimal amount of time is if there was a
2269 i915_seqno_passed(seqno, this_seqno)) 2289 * fence-using buffer later that was read-only.
2270 seqno = this_seqno; 2290 */
2271 } 2291 i915_gem_object_flush_gpu_write_domain(old_obj);
2272 2292 ret = i915_gem_object_wait_rendering(old_obj);
2273 /* 2293 if (ret != 0)
2274 * Now things get ugly... we have to wait for one of the
2275 * objects to finish before trying again.
2276 */
2277 if (i == dev_priv->num_fence_regs) {
2278 if (seqno == dev_priv->mm.next_gem_seqno) {
2279 i915_gem_flush(dev,
2280 I915_GEM_GPU_DOMAINS,
2281 I915_GEM_GPU_DOMAINS);
2282 seqno = i915_add_request(dev, NULL,
2283 I915_GEM_GPU_DOMAINS);
2284 if (seqno == 0)
2285 return -ENOMEM;
2286 }
2287
2288 ret = i915_wait_request(dev, seqno);
2289 if (ret)
2290 return ret; 2294 return ret;
2291 goto try_again; 2295 break;
2292 } 2296 }
2293 2297
2294 /* 2298 /*
@@ -2296,10 +2300,15 @@ try_again:
2296 * for this object next time we need it. 2300 * for this object next time we need it.
2297 */ 2301 */
2298 i915_gem_release_mmap(reg->obj); 2302 i915_gem_release_mmap(reg->obj);
2303 i = old_obj_priv->fence_reg;
2299 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2304 old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
2305 list_del_init(&old_obj_priv->fence_list);
2306 drm_gem_object_unreference(old_obj);
2300 } 2307 }
2301 2308
2302 obj_priv->fence_reg = i; 2309 obj_priv->fence_reg = i;
2310 list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2311
2303 reg->obj = obj; 2312 reg->obj = obj;
2304 2313
2305 if (IS_I965G(dev)) 2314 if (IS_I965G(dev))
@@ -2342,6 +2351,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2342 2351
2343 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2352 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
2344 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2353 obj_priv->fence_reg = I915_FENCE_REG_NONE;
2354 list_del_init(&obj_priv->fence_list);
2345} 2355}
2346 2356
2347/** 2357/**
@@ -3595,9 +3605,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3595 * Pre-965 chips need a fence register set up in order to 3605 * Pre-965 chips need a fence register set up in order to
3596 * properly handle tiled surfaces. 3606 * properly handle tiled surfaces.
3597 */ 3607 */
3598 if (!IS_I965G(dev) && 3608 if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
3599 obj_priv->fence_reg == I915_FENCE_REG_NONE &&
3600 obj_priv->tiling_mode != I915_TILING_NONE) {
3601 ret = i915_gem_object_get_fence_reg(obj); 3609 ret = i915_gem_object_get_fence_reg(obj);
3602 if (ret != 0) { 3610 if (ret != 0) {
3603 if (ret != -EBUSY && ret != -ERESTARTSYS) 3611 if (ret != -EBUSY && ret != -ERESTARTSYS)
@@ -3806,6 +3814,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
3806 obj_priv->obj = obj; 3814 obj_priv->obj = obj;
3807 obj_priv->fence_reg = I915_FENCE_REG_NONE; 3815 obj_priv->fence_reg = I915_FENCE_REG_NONE;
3808 INIT_LIST_HEAD(&obj_priv->list); 3816 INIT_LIST_HEAD(&obj_priv->list);
3817 INIT_LIST_HEAD(&obj_priv->fence_list);
3809 3818
3810 return 0; 3819 return 0;
3811} 3820}
@@ -4253,6 +4262,7 @@ i915_gem_load(struct drm_device *dev)
4253 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4262 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4254 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4263 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4255 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4264 INIT_LIST_HEAD(&dev_priv->mm.request_list);
4265 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4256 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4266 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4257 i915_gem_retire_work_handler); 4267 i915_gem_retire_work_handler);
4258 dev_priv->mm.next_gem_seqno = 1; 4268 dev_priv->mm.next_gem_seqno = 1;