diff options
author | Inaky Perez-Gonzalez <inaky@linux.intel.com> | 2009-09-04 17:50:59 -0400 |
---|---|---|
committer | Inaky Perez-Gonzalez <inaky@linux.intel.com> | 2009-10-19 02:55:55 -0400 |
commit | 923d708fed9d47c7b4d67694500d766337663e29 (patch) | |
tree | 54cc91baf9f495f924e467625e2bf50cbce79f48 | |
parent | ebc5f62b76ad540ff7b3e438506638009e7812a6 (diff) |
wimax/i2400m: fix reboot echo/ack barker deadlock
The i2400m based devices can get in a sort of a deadlock some times;
when they boot, they send a reboot "barker" (a magic number) and then
the driver has to echo that same barker to ack reception
(echo/ack). Then the device does a final ack by sending an ACK barker.
The first time this happens, we don't know ahead of time with barker
the device is going to send, as different device models and SKUs will
send different barker depending on the EEPROM programming.
If the device has sent the barker before the driver has been able to
read it, the driver looses, as it doesn't know which barker it has to
echo/ack back. With older devices, we tried a couple of combinations
and that always worked; but now, with adding support for more, in
which we have an unlimited number of new barkers, that is not an
option.
So we rework said case so that when the device gets stuck, we just
cycle through all the known types until one forces the device to send
an ack. Otherwise, the driver gives up and aborts.
Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
-rw-r--r-- | drivers/net/wimax/i2400m/fw.c | 63 | ||||
-rw-r--r-- | include/linux/wimax/i2400m.h | 2 |
2 files changed, 52 insertions, 13 deletions
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index 55fe011a9633..eef236d85af3 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c | |||
@@ -812,7 +812,7 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, | |||
812 | * | 812 | * |
813 | * @i2400m: device descriptor | 813 | * @i2400m: device descriptor |
814 | * @flags: | 814 | * @flags: |
815 | * I2400M_BRI_SOFT: a reboot notification has been seen | 815 | * I2400M_BRI_SOFT: a reboot barker has been seen |
816 | * already, so don't wait for it. | 816 | * already, so don't wait for it. |
817 | * | 817 | * |
818 | * I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait | 818 | * I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait |
@@ -829,8 +829,9 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, | |||
829 | * main phases to this: | 829 | * main phases to this: |
830 | * | 830 | * |
831 | * a. (1) send a reboot command and (2) get a reboot barker | 831 | * a. (1) send a reboot command and (2) get a reboot barker |
832 | * b. (1) ack the reboot sending a reboot barker and (2) getting an | 832 | * |
833 | * ack barker in return | 833 | * b. (1) echo/ack the reboot sending the reboot barker back and (2) |
834 | * getting an ack barker in return | ||
834 | * | 835 | * |
835 | * We want to skip (a) in some cases [soft]. The state machine is | 836 | * We want to skip (a) in some cases [soft]. The state machine is |
836 | * horrible, but it is basically: on each phase, send what has to be | 837 | * horrible, but it is basically: on each phase, send what has to be |
@@ -838,6 +839,16 @@ int i2400m_dnload_finalize(struct i2400m *i2400m, | |||
838 | * have to backtrack and retry, so we keep a max tries counter for | 839 | * have to backtrack and retry, so we keep a max tries counter for |
839 | * that. | 840 | * that. |
840 | * | 841 | * |
842 | * It sucks because we don't know ahead of time which is going to be | ||
843 | * the reboot barker (the device might send different ones depending | ||
844 | * on its EEPROM config) and once the device reboots and waits for the | ||
845 | * echo/ack reboot barker being sent back, it doesn't understand | ||
846 | * anything else. So we can be left at the point where we don't know | ||
847 | * what to send to it -- cold reset and bus reset seem to have little | ||
848 | * effect. So the function iterates (in this case) through all the | ||
849 | * known barkers and tries them all until an ACK is | ||
850 | * received. Otherwise, it gives up. | ||
851 | * | ||
841 | * If we get a timeout after sending a warm reset, we do it again. | 852 | * If we get a timeout after sending a warm reset, we do it again. |
842 | */ | 853 | */ |
843 | int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) | 854 | int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) |
@@ -848,6 +859,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) | |||
848 | struct i2400m_bootrom_header ack; | 859 | struct i2400m_bootrom_header ack; |
849 | int count = i2400m->bus_bm_retries; | 860 | int count = i2400m->bus_bm_retries; |
850 | int ack_timeout_cnt = 1; | 861 | int ack_timeout_cnt = 1; |
862 | unsigned i; | ||
851 | 863 | ||
852 | BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data)); | 864 | BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data)); |
853 | BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER)); | 865 | BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER)); |
@@ -858,6 +870,7 @@ int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags) | |||
858 | if (flags & I2400M_BRI_SOFT) | 870 | if (flags & I2400M_BRI_SOFT) |
859 | goto do_reboot_ack; | 871 | goto do_reboot_ack; |
860 | do_reboot: | 872 | do_reboot: |
873 | ack_timeout_cnt = 1; | ||
861 | if (--count < 0) | 874 | if (--count < 0) |
862 | goto error_timeout; | 875 | goto error_timeout; |
863 | d_printf(4, dev, "device reboot: reboot command [%d # left]\n", | 876 | d_printf(4, dev, "device reboot: reboot command [%d # left]\n", |
@@ -869,22 +882,47 @@ do_reboot: | |||
869 | flags &= ~I2400M_BRI_NO_REBOOT; | 882 | flags &= ~I2400M_BRI_NO_REBOOT; |
870 | switch (result) { | 883 | switch (result) { |
871 | case -ERESTARTSYS: | 884 | case -ERESTARTSYS: |
885 | /* | ||
886 | * at this point, i2400m_bm_cmd(), through | ||
887 | * __i2400m_bm_ack_process(), has updated | ||
888 | * i2400m->barker and we are good to go. | ||
889 | */ | ||
872 | d_printf(4, dev, "device reboot: got reboot barker\n"); | 890 | d_printf(4, dev, "device reboot: got reboot barker\n"); |
873 | break; | 891 | break; |
874 | case -EISCONN: /* we don't know how it got here...but we follow it */ | 892 | case -EISCONN: /* we don't know how it got here...but we follow it */ |
875 | d_printf(4, dev, "device reboot: got ack barker - whatever\n"); | 893 | d_printf(4, dev, "device reboot: got ack barker - whatever\n"); |
876 | goto do_reboot; | 894 | goto do_reboot; |
877 | case -ETIMEDOUT: /* device has timed out, we might be in boot | 895 | case -ETIMEDOUT: |
878 | * mode already and expecting an ack, let's try | 896 | /* |
879 | * that */ | 897 | * Device has timed out, we might be in boot mode |
880 | if (i2400m->barker == NULL) { | 898 | * already and expecting an ack; if we don't know what |
881 | dev_info(dev, "warm reset timed out, unknown barker " | 899 | * the barker is, we just send them all. Cold reset |
882 | "type, rebooting\n"); | 900 | * and bus reset don't work. Beats me. |
883 | goto do_reboot; | 901 | */ |
884 | } else { | 902 | if (i2400m->barker != NULL) { |
885 | dev_info(dev, "warm reset timed out, trying an ack\n"); | 903 | dev_err(dev, "device boot: reboot barker timed out, " |
904 | "trying (set) %08x echo/ack\n", | ||
905 | le32_to_cpu(i2400m->barker->data[0])); | ||
886 | goto do_reboot_ack; | 906 | goto do_reboot_ack; |
887 | } | 907 | } |
908 | for (i = 0; i < i2400m_barker_db_used; i++) { | ||
909 | struct i2400m_barker_db *barker = &i2400m_barker_db[i]; | ||
910 | memcpy(cmd, barker->data, sizeof(barker->data)); | ||
911 | result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd), | ||
912 | &ack, sizeof(ack), | ||
913 | I2400M_BM_CMD_RAW); | ||
914 | if (result == -EISCONN) { | ||
915 | dev_warn(dev, "device boot: got ack barker " | ||
916 | "after sending echo/ack barker " | ||
917 | "#%d/%08x; rebooting j.i.c.\n", | ||
918 | i, le32_to_cpu(barker->data[0])); | ||
919 | flags &= ~I2400M_BRI_NO_REBOOT; | ||
920 | goto do_reboot; | ||
921 | } | ||
922 | } | ||
923 | dev_err(dev, "device boot: tried all the echo/acks, could " | ||
924 | "not get device to respond; giving up"); | ||
925 | result = -ESHUTDOWN; | ||
888 | case -EPROTO: | 926 | case -EPROTO: |
889 | case -ESHUTDOWN: /* dev is gone */ | 927 | case -ESHUTDOWN: /* dev is gone */ |
890 | case -EINTR: /* user cancelled */ | 928 | case -EINTR: /* user cancelled */ |
@@ -892,6 +930,7 @@ do_reboot: | |||
892 | default: | 930 | default: |
893 | dev_err(dev, "device reboot: error %d while waiting " | 931 | dev_err(dev, "device reboot: error %d while waiting " |
894 | "for reboot barker - rebooting\n", result); | 932 | "for reboot barker - rebooting\n", result); |
933 | d_dump(1, dev, &ack, result); | ||
895 | goto do_reboot; | 934 | goto do_reboot; |
896 | } | 935 | } |
897 | /* At this point we ack back with 4 REBOOT barkers and expect | 936 | /* At this point we ack back with 4 REBOOT barkers and expect |
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h index d6e2a3595682..fd5af05083cb 100644 --- a/include/linux/wimax/i2400m.h +++ b/include/linux/wimax/i2400m.h | |||
@@ -138,7 +138,7 @@ struct i2400m_bcf_hdr { | |||
138 | __le32 module_id; | 138 | __le32 module_id; |
139 | __le32 module_vendor; | 139 | __le32 module_vendor; |
140 | __le32 date; /* BCD YYYMMDD */ | 140 | __le32 date; /* BCD YYYMMDD */ |
141 | __le32 size; | 141 | __le32 size; /* in dwords */ |
142 | __le32 key_size; /* in dwords */ | 142 | __le32 key_size; /* in dwords */ |
143 | __le32 modulus_size; /* in dwords */ | 143 | __le32 modulus_size; /* in dwords */ |
144 | __le32 exponent_size; /* in dwords */ | 144 | __le32 exponent_size; /* in dwords */ |