aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorCindy H Kao <cindy.h.kao@intel.com>2010-04-07 23:07:47 -0400
committerInaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>2010-05-11 17:05:39 -0400
commit599e59538448ee49d5470f226bb191b2f78aa3a2 (patch)
tree03c8c92a907c19e28bb5f9eef0a1121081515b31 /drivers
parentf4e413458104210bc29aa5c437882c68b4b20100 (diff)
wimax/i2400m: add the error recovery mechanism on TX path
This patch adds an error recovery mechanism on TX path. The intention is to bring back the device to some known state whenever TX sees -110 (-ETIMEOUT) on copying the data to the HW FIFO. The TX failure could mean a device bus stuck or function stuck, so the current error recovery implementation is to trigger a bus reset and expect this can bring back the device. Since the TX work is done in a thread context, there may be a queue of TX works already that all hit the -ETIMEOUT error condition because the device has somewhat stuck already. We don't want any consecutive bus resets simply because multiple TX works in the queue all hit the same device erratum, the flag "error_recovery" is introduced to denote if we are ready for taking any error recovery. See @error_recovery doc in i2400m.h. Signed-off-by: Cindy H Kao <cindy.h.kao@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/wimax/i2400m/driver.c74
-rw-r--r--drivers/net/wimax/i2400m/i2400m.h14
-rw-r--r--drivers/net/wimax/i2400m/sdio-tx.c4
3 files changed, 92 insertions, 0 deletions
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 1674dba43f83..d83fe84407bf 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -395,6 +395,16 @@ retry:
395 result = i2400m_dev_initialize(i2400m); 395 result = i2400m_dev_initialize(i2400m);
396 if (result < 0) 396 if (result < 0)
397 goto error_dev_initialize; 397 goto error_dev_initialize;
398
399 /* We don't want any additional unwanted error recovery triggered
400 * from any other context so if anything went wrong before we come
401 * here, let's keep i2400m->error_recovery untouched and leave it to
402 * dev_reset_handle(). See dev_reset_handle(). */
403
404 atomic_dec(&i2400m->error_recovery);
405 /* Every thing works so far, ok, now we are ready to
406 * take error recovery if it's required. */
407
398 /* At this point, reports will come for the device and set it 408 /* At this point, reports will come for the device and set it
399 * to the right state if it is different than UNINITIALIZED */ 409 * to the right state if it is different than UNINITIALIZED */
400 d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n", 410 d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
@@ -770,6 +780,66 @@ int i2400m_dev_reset_handle(struct i2400m *i2400m, const char *reason)
770EXPORT_SYMBOL_GPL(i2400m_dev_reset_handle); 780EXPORT_SYMBOL_GPL(i2400m_dev_reset_handle);
771 781
772 782
783 /*
784 * The actual work of error recovery.
785 *
786 * The current implementation of error recovery is to trigger a bus reset.
787 */
788static
789void __i2400m_error_recovery(struct work_struct *ws)
790{
791 struct i2400m_work *iw = container_of(ws, struct i2400m_work, ws);
792 struct i2400m *i2400m = iw->i2400m;
793
794 i2400m_reset(i2400m, I2400M_RT_BUS);
795
796 i2400m_put(i2400m);
797 kfree(iw);
798 return;
799}
800
801/*
802 * Schedule a work struct for error recovery.
803 *
804 * The intention of error recovery is to bring back the device to some
805 * known state whenever TX sees -110 (-ETIMEOUT) on copying the data to
806 * the device. The TX failure could mean a device bus stuck, so the current
807 * error recovery implementation is to trigger a bus reset to the device
808 * and hopefully it can bring back the device.
809 *
810 * The actual work of error recovery has to be in a thread context because
811 * it is kicked off in the TX thread (i2400ms->tx_workqueue) which is to be
812 * destroyed by the error recovery mechanism (currently a bus reset).
813 *
814 * Also, there may be already a queue of TX works that all hit
815 * the -ETIMEOUT error condition because the device is stuck already.
816 * Since bus reset is used as the error recovery mechanism and we don't
817 * want consecutive bus resets simply because the multiple TX works
818 * in the queue all hit the same device erratum, the flag "error_recovery"
819 * is introduced for preventing unwanted consecutive bus resets.
820 *
821 * Error recovery shall only be invoked again if previous one was completed.
822 * The flag error_recovery is set when error recovery mechanism is scheduled,
823 * and is checked when we need to schedule another error recovery. If it is
824 * in place already, then we shouldn't schedule another one.
825 */
826void i2400m_error_recovery(struct i2400m *i2400m)
827{
828 struct device *dev = i2400m_dev(i2400m);
829
830 if (atomic_add_return(1, &i2400m->error_recovery) == 1) {
831 if (i2400m_schedule_work(i2400m, __i2400m_error_recovery,
832 GFP_ATOMIC, NULL, 0) < 0) {
833 dev_err(dev, "run out of memory for "
834 "scheduling an error recovery ?\n");
835 atomic_dec(&i2400m->error_recovery);
836 }
837 } else
838 atomic_dec(&i2400m->error_recovery);
839 return;
840}
841EXPORT_SYMBOL_GPL(i2400m_error_recovery);
842
773/* 843/*
774 * Alloc the command and ack buffers for boot mode 844 * Alloc the command and ack buffers for boot mode
775 * 845 *
@@ -839,6 +909,10 @@ void i2400m_init(struct i2400m *i2400m)
839 atomic_set(&i2400m->bus_reset_retries, 0); 909 atomic_set(&i2400m->bus_reset_retries, 0);
840 910
841 i2400m->alive = 0; 911 i2400m->alive = 0;
912
913 /* initialize error_recovery to 1 for denoting we
914 * are not yet ready to take any error recovery */
915 atomic_set(&i2400m->error_recovery, 1);
842} 916}
843EXPORT_SYMBOL_GPL(i2400m_init); 917EXPORT_SYMBOL_GPL(i2400m_init);
844 918
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index ad8e6a3be1e3..7a9c2c5b25cb 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -545,6 +545,15 @@ struct i2400m_barker_db;
545 * all the device reboot events detected can be still handled properly 545 * all the device reboot events detected can be still handled properly
546 * by either dev_reset_handle() or .pre_reset/.post_reset as long as 546 * by either dev_reset_handle() or .pre_reset/.post_reset as long as
547 * the driver presents. It is set 0 along with @updown in dev_stop(). 547 * the driver presents. It is set 0 along with @updown in dev_stop().
548 *
549 * @error_recovery: flag to denote if we are ready to take an error recovery.
550 * 0 for ready to take an error recovery; 1 for not ready. It is
551 * initialized to 1 while probe() since we don't tend to take any error
552 * recovery during probe(). It is decremented by 1 whenever dev_start()
553 * succeeds to indicate we are ready to take error recovery from now on.
554 * It is checked every time we wanna schedule an error recovery. If an
555 * error recovery is already in place (error_recovery was set 1), we
556 * should not schedule another one until the last one is done.
548 */ 557 */
549struct i2400m { 558struct i2400m {
550 struct wimax_dev wimax_dev; /* FIRST! See doc */ 559 struct wimax_dev wimax_dev; /* FIRST! See doc */
@@ -625,6 +634,10 @@ struct i2400m {
625 634
626 /* if the device is expected to be alive */ 635 /* if the device is expected to be alive */
627 unsigned alive; 636 unsigned alive;
637
638 /* 0 if we are ready for error recovery; 1 if not ready */
639 atomic_t error_recovery;
640
628}; 641};
629 642
630 643
@@ -847,6 +860,7 @@ void i2400m_put(struct i2400m *i2400m)
847extern int i2400m_dev_reset_handle(struct i2400m *, const char *); 860extern int i2400m_dev_reset_handle(struct i2400m *, const char *);
848extern int i2400m_pre_reset(struct i2400m *); 861extern int i2400m_pre_reset(struct i2400m *);
849extern int i2400m_post_reset(struct i2400m *); 862extern int i2400m_post_reset(struct i2400m *);
863extern void i2400m_error_recovery(struct i2400m *);
850 864
851/* 865/*
852 * _setup()/_release() are called by the probe/disconnect functions of 866 * _setup()/_release() are called by the probe/disconnect functions of
diff --git a/drivers/net/wimax/i2400m/sdio-tx.c b/drivers/net/wimax/i2400m/sdio-tx.c
index 412b6a8eaef2..b53cd1c80e3e 100644
--- a/drivers/net/wimax/i2400m/sdio-tx.c
+++ b/drivers/net/wimax/i2400m/sdio-tx.c
@@ -98,6 +98,10 @@ void i2400ms_tx_submit(struct work_struct *ws)
98 tx_msg_size, result); 98 tx_msg_size, result);
99 } 99 }
100 100
101 if (result == -ETIMEDOUT) {
102 i2400m_error_recovery(i2400m);
103 break;
104 }
101 d_printf(2, dev, "TX: %zub submitted\n", tx_msg_size); 105 d_printf(2, dev, "TX: %zub submitted\n", tx_msg_size);
102 } 106 }
103 107