aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMika Kuoppala <mika.kuoppala@linux.intel.com>2015-01-28 10:03:14 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2015-01-29 12:03:07 -0500
commitb8d24a06568368076ebd5a858a011699a97bfa42 (patch)
treecb627b2237cc410389311838260072429541337c
parent397f6fa6b1d5150add9043d7ac60e32307eb7c6b (diff)
drm/i915: Remove nested work in gpu error handling
Now when we declare gpu errors only through our own dedicated hangcheck workqueue there is no need to have a separate workqueue for handling the resetting and waking up the clients as the deadlock concerns are no more. The only exception is i915_debugfs::i915_set_wedged, which triggers error handling through process context. However as this is only used through test harness it is responsibility for test harness not to introduce hangs through both debug interface and through hangcheck mechanism at the same time. Remove gpu_error.work and let the hangcheck work do the tasks it used to. v2: Add a big warning sign into i915_debugfs::i915_set_wedged (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c11
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c1
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c34
4 files changed, 24 insertions, 24 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3b332a493674..211d4949a675 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3969,6 +3969,17 @@ i915_wedged_set(void *data, u64 val)
3969 struct drm_device *dev = data; 3969 struct drm_device *dev = data;
3970 struct drm_i915_private *dev_priv = dev->dev_private; 3970 struct drm_i915_private *dev_priv = dev->dev_private;
3971 3971
3972 /*
3973 * There is no safeguard against this debugfs entry colliding
3974 * with the hangcheck calling same i915_handle_error() in
3975 * parallel, causing an explosion. For now we assume that the
3976 * test harness is responsible enough not to inject gpu hangs
3977 * while it is writing to 'i915_wedged'
3978 */
3979
3980 if (i915_reset_in_progress(&dev_priv->gpu_error))
3981 return -EAGAIN;
3982
3972 intel_runtime_pm_get(dev_priv); 3983 intel_runtime_pm_get(dev_priv);
3973 3984
3974 i915_handle_error(dev, val, 3985 i915_handle_error(dev, val,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 6eaf79504b58..1a46787129e7 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -945,7 +945,6 @@ int i915_driver_unload(struct drm_device *dev)
945 945
946 /* Free error state after interrupts are fully disabled. */ 946 /* Free error state after interrupts are fully disabled. */
947 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 947 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
948 cancel_work_sync(&dev_priv->gpu_error.work);
949 i915_destroy_error_state(dev); 948 i915_destroy_error_state(dev);
950 949
951 if (dev->pdev->msi_enabled) 950 if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7add8cd6a54f..1c01034a9b0a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1352,8 +1352,6 @@ struct i915_gpu_error {
1352 spinlock_t lock; 1352 spinlock_t lock;
1353 /* Protected by the above dev->gpu_error.lock. */ 1353 /* Protected by the above dev->gpu_error.lock. */
1354 struct drm_i915_error_state *first_error; 1354 struct drm_i915_error_state *first_error;
1355 struct work_struct work;
1356
1357 1355
1358 unsigned long missed_irq_rings; 1356 unsigned long missed_irq_rings;
1359 1357
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 23bfe2232b6a..53c5f9e39fe3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2421,19 +2421,15 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
2421} 2421}
2422 2422
2423/** 2423/**
2424 * i915_error_work_func - do process context error handling work 2424 * i915_reset_and_wakeup - do process context error handling work
2425 * @work: work struct
2426 * 2425 *
2427 * Fire an error uevent so userspace can see that a hang or error 2426 * Fire an error uevent so userspace can see that a hang or error
2428 * was detected. 2427 * was detected.
2429 */ 2428 */
2430static void i915_error_work_func(struct work_struct *work) 2429static void i915_reset_and_wakeup(struct drm_device *dev)
2431{ 2430{
2432 struct i915_gpu_error *error = container_of(work, struct i915_gpu_error, 2431 struct drm_i915_private *dev_priv = to_i915(dev);
2433 work); 2432 struct i915_gpu_error *error = &dev_priv->gpu_error;
2434 struct drm_i915_private *dev_priv =
2435 container_of(error, struct drm_i915_private, gpu_error);
2436 struct drm_device *dev = dev_priv->dev;
2437 char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; 2433 char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
2438 char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; 2434 char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
2439 char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; 2435 char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2600,10 +2596,10 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
2600} 2596}
2601 2597
2602/** 2598/**
2603 * i915_handle_error - handle an error interrupt 2599 * i915_handle_error - handle a gpu error
2604 * @dev: drm device 2600 * @dev: drm device
2605 * 2601 *
2606 * Do some basic checking of regsiter state at error interrupt time and 2602 * Do some basic checking of regsiter state at error time and
2607 * dump it to the syslog. Also call i915_capture_error_state() to make 2603 * dump it to the syslog. Also call i915_capture_error_state() to make
2608 * sure we get a record and make it available in debugfs. Fire a uevent 2604 * sure we get a record and make it available in debugfs. Fire a uevent
2609 * so userspace knows something bad happened (should trigger collection 2605 * so userspace knows something bad happened (should trigger collection
@@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
2616 va_list args; 2612 va_list args;
2617 char error_msg[80]; 2613 char error_msg[80];
2618 2614
2615 if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
2616 return;
2617
2619 va_start(args, fmt); 2618 va_start(args, fmt);
2620 vscnprintf(error_msg, sizeof(error_msg), fmt, args); 2619 vscnprintf(error_msg, sizeof(error_msg), fmt, args);
2621 va_end(args); 2620 va_end(args);
@@ -2628,9 +2627,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
2628 &dev_priv->gpu_error.reset_counter); 2627 &dev_priv->gpu_error.reset_counter);
2629 2628
2630 /* 2629 /*
2631 * Wakeup waiting processes so that the reset work function 2630 * Wakeup waiting processes so that the reset function
2632 * i915_error_work_func doesn't deadlock trying to grab various 2631 * i915_reset_and_wakeup doesn't deadlock trying to grab
2633 * locks. By bumping the reset counter first, the woken 2632 * various locks. By bumping the reset counter first, the woken
2634 * processes will see a reset in progress and back off, 2633 * processes will see a reset in progress and back off,
2635 * releasing their locks and then wait for the reset completion. 2634 * releasing their locks and then wait for the reset completion.
2636 * We must do this for _all_ gpu waiters that might hold locks 2635 * We must do this for _all_ gpu waiters that might hold locks
@@ -2643,13 +2642,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
2643 i915_error_wake_up(dev_priv, false); 2642 i915_error_wake_up(dev_priv, false);
2644 } 2643 }
2645 2644
2646 /* 2645 i915_reset_and_wakeup(dev);
2647 * Our reset work can grab modeset locks (since it needs to reset the
2648 * state of outstanding pagelips). Hence it must not be run on our own
2649 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
2650 * code will deadlock.
2651 */
2652 schedule_work(&dev_priv->gpu_error.work);
2653} 2646}
2654 2647
2655/* Called from drm generic code, passed 'crtc' which 2648/* Called from drm generic code, passed 'crtc' which
@@ -4345,7 +4338,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
4345 4338
4346 INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); 4339 INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
4347 INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func); 4340 INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func);
4348 INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func);
4349 INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); 4341 INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
4350 INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); 4342 INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
4351 4343