aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/char/ipmi/ipmi_bt_sm.c
diff options
context:
space:
mode:
authorCorey Minyard <minyard@acm.org>2006-12-06 23:41:14 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:47 -0500
commit4d7cbac7c870ca66d8fb27d68188efbb5de2dffa (patch)
tree8abf21f9eb42347cfa9d7a071bce9390c6995583 /drivers/char/ipmi/ipmi_bt_sm.c
parent168b35a7f67c5a8189e6b92780dfb5262604057c (diff)
[PATCH] IPMI: Fix BT long busy
The IPMI BT subdriver has been patched to survive "long busy" timeouts seen during firmware upgrades and resets. The patch never returns the HOSED state, synthesizes response messages with meaningful completion codes, and recovers gracefully when the hardware finishes the long busy. The subdriver now issues a "Get BT Capabilities" command and properly uses those results. More informative completion codes are returned on error from transaction starts; this logic was propogated to the KCS and SMIC subdrivers. Finally, indent and other style quirks were normalized. Signed-off-by: Rocky Craig <rocky.craig@hp.com> Signed-off-by: Corey Minyard <minyard@acm.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/char/ipmi/ipmi_bt_sm.c')
-rw-r--r--drivers/char/ipmi/ipmi_bt_sm.c641
1 files changed, 393 insertions, 248 deletions
diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 0030cd8e2e95..6c59baa887a8 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -33,11 +33,13 @@
33#include <linux/ipmi_msgdefs.h> /* for completion codes */ 33#include <linux/ipmi_msgdefs.h> /* for completion codes */
34#include "ipmi_si_sm.h" 34#include "ipmi_si_sm.h"
35 35
36static int bt_debug = 0x00; /* Production value 0, see following flags */ 36#define BT_DEBUG_OFF 0 /* Used in production */
37#define BT_DEBUG_ENABLE 1 /* Generic messages */
38#define BT_DEBUG_MSG 2 /* Prints all request/response buffers */
39#define BT_DEBUG_STATES 4 /* Verbose look at state changes */
40
41static int bt_debug = BT_DEBUG_OFF;
37 42
38#define BT_DEBUG_ENABLE 1
39#define BT_DEBUG_MSG 2
40#define BT_DEBUG_STATES 4
41module_param(bt_debug, int, 0644); 43module_param(bt_debug, int, 0644);
42MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); 44MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
43 45
@@ -47,38 +49,54 @@ MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
47 Since the Open IPMI architecture is single-message oriented at this 49 Since the Open IPMI architecture is single-message oriented at this
48 stage, the queue depth of BT is of no concern. */ 50 stage, the queue depth of BT is of no concern. */
49 51
50#define BT_NORMAL_TIMEOUT 5000000 /* seconds in microseconds */ 52#define BT_NORMAL_TIMEOUT 5 /* seconds */
51#define BT_RETRY_LIMIT 2 53#define BT_NORMAL_RETRY_LIMIT 2
52#define BT_RESET_DELAY 6000000 /* 6 seconds after warm reset */ 54#define BT_RESET_DELAY 6 /* seconds after warm reset */
55
56/* States are written in chronological order and usually cover
57 multiple rows of the state table discussion in the IPMI spec. */
53 58
54enum bt_states { 59enum bt_states {
55 BT_STATE_IDLE, 60 BT_STATE_IDLE = 0, /* Order is critical in this list */
56 BT_STATE_XACTION_START, 61 BT_STATE_XACTION_START,
57 BT_STATE_WRITE_BYTES, 62 BT_STATE_WRITE_BYTES,
58 BT_STATE_WRITE_END,
59 BT_STATE_WRITE_CONSUME, 63 BT_STATE_WRITE_CONSUME,
60 BT_STATE_B2H_WAIT, 64 BT_STATE_READ_WAIT,
61 BT_STATE_READ_END, 65 BT_STATE_CLEAR_B2H,
62 BT_STATE_RESET1, /* These must come last */ 66 BT_STATE_READ_BYTES,
67 BT_STATE_RESET1, /* These must come last */
63 BT_STATE_RESET2, 68 BT_STATE_RESET2,
64 BT_STATE_RESET3, 69 BT_STATE_RESET3,
65 BT_STATE_RESTART, 70 BT_STATE_RESTART,
66 BT_STATE_HOSED 71 BT_STATE_PRINTME,
72 BT_STATE_CAPABILITIES_BEGIN,
73 BT_STATE_CAPABILITIES_END,
74 BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */
67}; 75};
68 76
77/* Macros seen at the end of state "case" blocks. They help with legibility
78 and debugging. */
79
80#define BT_STATE_CHANGE(X,Y) { bt->state = X; return Y; }
81
82#define BT_SI_SM_RETURN(Y) { last_printed = BT_STATE_PRINTME; return Y; }
83
69struct si_sm_data { 84struct si_sm_data {
70 enum bt_states state; 85 enum bt_states state;
71 enum bt_states last_state; /* assist printing and resets */
72 unsigned char seq; /* BT sequence number */ 86 unsigned char seq; /* BT sequence number */
73 struct si_sm_io *io; 87 struct si_sm_io *io;
74 unsigned char write_data[IPMI_MAX_MSG_LENGTH]; 88 unsigned char write_data[IPMI_MAX_MSG_LENGTH];
75 int write_count; 89 int write_count;
76 unsigned char read_data[IPMI_MAX_MSG_LENGTH]; 90 unsigned char read_data[IPMI_MAX_MSG_LENGTH];
77 int read_count; 91 int read_count;
78 int truncated; 92 int truncated;
79 long timeout; 93 long timeout; /* microseconds countdown */
80 unsigned int error_retries; /* end of "common" fields */ 94 int error_retries; /* end of "common" fields */
81 int nonzero_status; /* hung BMCs stay all 0 */ 95 int nonzero_status; /* hung BMCs stay all 0 */
96 enum bt_states complete; /* to divert the state machine */
97 int BT_CAP_outreqs;
98 long BT_CAP_req2rsp;
99 int BT_CAP_retries; /* Recommended retries */
82}; 100};
83 101
84#define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */ 102#define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */
@@ -111,86 +129,118 @@ struct si_sm_data {
111static char *state2txt(unsigned char state) 129static char *state2txt(unsigned char state)
112{ 130{
113 switch (state) { 131 switch (state) {
114 case BT_STATE_IDLE: return("IDLE"); 132 case BT_STATE_IDLE: return("IDLE");
115 case BT_STATE_XACTION_START: return("XACTION"); 133 case BT_STATE_XACTION_START: return("XACTION");
116 case BT_STATE_WRITE_BYTES: return("WR_BYTES"); 134 case BT_STATE_WRITE_BYTES: return("WR_BYTES");
117 case BT_STATE_WRITE_END: return("WR_END"); 135 case BT_STATE_WRITE_CONSUME: return("WR_CONSUME");
118 case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); 136 case BT_STATE_READ_WAIT: return("RD_WAIT");
119 case BT_STATE_B2H_WAIT: return("B2H_WAIT"); 137 case BT_STATE_CLEAR_B2H: return("CLEAR_B2H");
120 case BT_STATE_READ_END: return("RD_END"); 138 case BT_STATE_READ_BYTES: return("RD_BYTES");
121 case BT_STATE_RESET1: return("RESET1"); 139 case BT_STATE_RESET1: return("RESET1");
122 case BT_STATE_RESET2: return("RESET2"); 140 case BT_STATE_RESET2: return("RESET2");
123 case BT_STATE_RESET3: return("RESET3"); 141 case BT_STATE_RESET3: return("RESET3");
124 case BT_STATE_RESTART: return("RESTART"); 142 case BT_STATE_RESTART: return("RESTART");
125 case BT_STATE_HOSED: return("HOSED"); 143 case BT_STATE_LONG_BUSY: return("LONG_BUSY");
144 case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN");
145 case BT_STATE_CAPABILITIES_END: return("CAP_END");
126 } 146 }
127 return("BAD STATE"); 147 return("BAD STATE");
128} 148}
129#define STATE2TXT state2txt(bt->state) 149#define STATE2TXT state2txt(bt->state)
130 150
131static char *status2txt(unsigned char status, char *buf) 151static char *status2txt(unsigned char status)
132{ 152{
153 /*
154 * This cannot be called by two threads at the same time and
155 * the buffer is always consumed immediately, so the static is
156 * safe to use.
157 */
158 static char buf[40];
159
133 strcpy(buf, "[ "); 160 strcpy(buf, "[ ");
134 if (status & BT_B_BUSY) strcat(buf, "B_BUSY "); 161 if (status & BT_B_BUSY)
135 if (status & BT_H_BUSY) strcat(buf, "H_BUSY "); 162 strcat(buf, "B_BUSY ");
136 if (status & BT_OEM0) strcat(buf, "OEM0 "); 163 if (status & BT_H_BUSY)
137 if (status & BT_SMS_ATN) strcat(buf, "SMS "); 164 strcat(buf, "H_BUSY ");
138 if (status & BT_B2H_ATN) strcat(buf, "B2H "); 165 if (status & BT_OEM0)
139 if (status & BT_H2B_ATN) strcat(buf, "H2B "); 166 strcat(buf, "OEM0 ");
167 if (status & BT_SMS_ATN)
168 strcat(buf, "SMS ");
169 if (status & BT_B2H_ATN)
170 strcat(buf, "B2H ");
171 if (status & BT_H2B_ATN)
172 strcat(buf, "H2B ");
140 strcat(buf, "]"); 173 strcat(buf, "]");
141 return buf; 174 return buf;
142} 175}
143#define STATUS2TXT(buf) status2txt(status, buf) 176#define STATUS2TXT status2txt(status)
177
178/* called externally at insmod time, and internally on cleanup */
144 179
145/* This will be called from within this module on a hosed condition */
146#define FIRST_SEQ 0
147static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) 180static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io)
148{ 181{
149 bt->state = BT_STATE_IDLE; 182 memset(bt, 0, sizeof(struct si_sm_data));
150 bt->last_state = BT_STATE_IDLE; 183 if (bt->io != io) { /* external: one-time only things */
151 bt->seq = FIRST_SEQ; 184 bt->io = io;
152 bt->io = io; 185 bt->seq = 0;
153 bt->write_count = 0; 186 }
154 bt->read_count = 0; 187 bt->state = BT_STATE_IDLE; /* start here */
155 bt->error_retries = 0; 188 bt->complete = BT_STATE_IDLE; /* end here */
156 bt->nonzero_status = 0; 189 bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * 1000000;
157 bt->truncated = 0; 190 bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT;
158 bt->timeout = BT_NORMAL_TIMEOUT; 191 /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */
159 return 3; /* We claim 3 bytes of space; ought to check SPMI table */ 192 return 3; /* We claim 3 bytes of space; ought to check SPMI table */
160} 193}
161 194
195/* Jam a completion code (probably an error) into a response */
196
197static void force_result(struct si_sm_data *bt, unsigned char completion_code)
198{
199 bt->read_data[0] = 4; /* # following bytes */
200 bt->read_data[1] = bt->write_data[1] | 4; /* Odd NetFn/LUN */
201 bt->read_data[2] = bt->write_data[2]; /* seq (ignored) */
202 bt->read_data[3] = bt->write_data[3]; /* Command */
203 bt->read_data[4] = completion_code;
204 bt->read_count = 5;
205}
206
207/* The upper state machine starts here */
208
162static int bt_start_transaction(struct si_sm_data *bt, 209static int bt_start_transaction(struct si_sm_data *bt,
163 unsigned char *data, 210 unsigned char *data,
164 unsigned int size) 211 unsigned int size)
165{ 212{
166 unsigned int i; 213 unsigned int i;
167 214
168 if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2))) 215 if (size < 2)
169 return -1; 216 return IPMI_REQ_LEN_INVALID_ERR;
217 if (size > IPMI_MAX_MSG_LENGTH)
218 return IPMI_REQ_LEN_EXCEEDED_ERR;
170 219
171 if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED)) 220 if (bt->state == BT_STATE_LONG_BUSY)
172 return -2; 221 return IPMI_NODE_BUSY_ERR;
222
223 if (bt->state != BT_STATE_IDLE)
224 return IPMI_NOT_IN_MY_STATE_ERR;
173 225
174 if (bt_debug & BT_DEBUG_MSG) { 226 if (bt_debug & BT_DEBUG_MSG) {
175 printk(KERN_WARNING "+++++++++++++++++++++++++++++++++++++\n"); 227 printk(KERN_WARNING "BT: +++++++++++++++++ New command\n");
176 printk(KERN_WARNING "BT: write seq=0x%02X:", bt->seq); 228 printk(KERN_WARNING "BT: NetFn/LUN CMD [%d data]:", size - 2);
177 for (i = 0; i < size; i ++) 229 for (i = 0; i < size; i ++)
178 printk (" %02x", data[i]); 230 printk (" %02x", data[i]);
179 printk("\n"); 231 printk("\n");
180 } 232 }
181 bt->write_data[0] = size + 1; /* all data plus seq byte */ 233 bt->write_data[0] = size + 1; /* all data plus seq byte */
182 bt->write_data[1] = *data; /* NetFn/LUN */ 234 bt->write_data[1] = *data; /* NetFn/LUN */
183 bt->write_data[2] = bt->seq; 235 bt->write_data[2] = bt->seq++;
184 memcpy(bt->write_data + 3, data + 1, size - 1); 236 memcpy(bt->write_data + 3, data + 1, size - 1);
185 bt->write_count = size + 2; 237 bt->write_count = size + 2;
186
187 bt->error_retries = 0; 238 bt->error_retries = 0;
188 bt->nonzero_status = 0; 239 bt->nonzero_status = 0;
189 bt->read_count = 0;
190 bt->truncated = 0; 240 bt->truncated = 0;
191 bt->state = BT_STATE_XACTION_START; 241 bt->state = BT_STATE_XACTION_START;
192 bt->last_state = BT_STATE_IDLE; 242 bt->timeout = bt->BT_CAP_req2rsp;
193 bt->timeout = BT_NORMAL_TIMEOUT; 243 force_result(bt, IPMI_ERR_UNSPECIFIED);
194 return 0; 244 return 0;
195} 245}
196 246
@@ -198,38 +248,30 @@ static int bt_start_transaction(struct si_sm_data *bt,
198 it calls this. Strip out the length and seq bytes. */ 248 it calls this. Strip out the length and seq bytes. */
199 249
200static int bt_get_result(struct si_sm_data *bt, 250static int bt_get_result(struct si_sm_data *bt,
201 unsigned char *data, 251 unsigned char *data,
202 unsigned int length) 252 unsigned int length)
203{ 253{
204 int i, msg_len; 254 int i, msg_len;
205 255
206 msg_len = bt->read_count - 2; /* account for length & seq */ 256 msg_len = bt->read_count - 2; /* account for length & seq */
207 /* Always NetFn, Cmd, cCode */
208 if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) { 257 if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) {
209 printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len); 258 force_result(bt, IPMI_ERR_UNSPECIFIED);
210 data[0] = bt->write_data[1] | 0x4; /* Kludge a response */
211 data[1] = bt->write_data[3];
212 data[2] = IPMI_ERR_UNSPECIFIED;
213 msg_len = 3; 259 msg_len = 3;
214 } else { 260 }
215 data[0] = bt->read_data[1]; 261 data[0] = bt->read_data[1];
216 data[1] = bt->read_data[3]; 262 data[1] = bt->read_data[3];
217 if (length < msg_len) 263 if (length < msg_len || bt->truncated) {
218 bt->truncated = 1; 264 data[2] = IPMI_ERR_MSG_TRUNCATED;
219 if (bt->truncated) { /* can be set in read_all_bytes() */ 265 msg_len = 3;
220 data[2] = IPMI_ERR_MSG_TRUNCATED; 266 } else
221 msg_len = 3; 267 memcpy(data + 2, bt->read_data + 4, msg_len - 2);
222 } else
223 memcpy(data + 2, bt->read_data + 4, msg_len - 2);
224 268
225 if (bt_debug & BT_DEBUG_MSG) { 269 if (bt_debug & BT_DEBUG_MSG) {
226 printk (KERN_WARNING "BT: res (raw)"); 270 printk (KERN_WARNING "BT: result %d bytes:", msg_len);
227 for (i = 0; i < msg_len; i++) 271 for (i = 0; i < msg_len; i++)
228 printk(" %02x", data[i]); 272 printk(" %02x", data[i]);
229 printk ("\n"); 273 printk ("\n");
230 }
231 } 274 }
232 bt->read_count = 0; /* paranoia */
233 return msg_len; 275 return msg_len;
234} 276}
235 277
@@ -238,22 +280,40 @@ static int bt_get_result(struct si_sm_data *bt,
238 280
239static void reset_flags(struct si_sm_data *bt) 281static void reset_flags(struct si_sm_data *bt)
240{ 282{
283 if (bt_debug)
284 printk(KERN_WARNING "IPMI BT: flag reset %s\n",
285 status2txt(BT_STATUS));
241 if (BT_STATUS & BT_H_BUSY) 286 if (BT_STATUS & BT_H_BUSY)
242 BT_CONTROL(BT_H_BUSY); 287 BT_CONTROL(BT_H_BUSY); /* force clear */
243 if (BT_STATUS & BT_B_BUSY) 288 BT_CONTROL(BT_CLR_WR_PTR); /* always reset */
244 BT_CONTROL(BT_B_BUSY); 289 BT_CONTROL(BT_SMS_ATN); /* always clear */
245 BT_CONTROL(BT_CLR_WR_PTR); 290 BT_INTMASK_W(BT_BMC_HWRST);
246 BT_CONTROL(BT_SMS_ATN); 291}
247 292
248 if (BT_STATUS & BT_B2H_ATN) { 293/* Get rid of an unwanted/stale response. This should only be needed for
249 int i; 294 BMCs that support multiple outstanding requests. */
250 BT_CONTROL(BT_H_BUSY); 295
251 BT_CONTROL(BT_B2H_ATN); 296static void drain_BMC2HOST(struct si_sm_data *bt)
252 BT_CONTROL(BT_CLR_RD_PTR); 297{
253 for (i = 0; i < IPMI_MAX_MSG_LENGTH + 2; i++) 298 int i, size;
254 BMC2HOST; 299
255 BT_CONTROL(BT_H_BUSY); 300 if (!(BT_STATUS & BT_B2H_ATN)) /* Not signalling a response */
256 } 301 return;
302
303 BT_CONTROL(BT_H_BUSY); /* now set */
304 BT_CONTROL(BT_B2H_ATN); /* always clear */
305 BT_STATUS; /* pause */
306 BT_CONTROL(BT_B2H_ATN); /* some BMCs are stubborn */
307 BT_CONTROL(BT_CLR_RD_PTR); /* always reset */
308 if (bt_debug)
309 printk(KERN_WARNING "IPMI BT: stale response %s; ",
310 status2txt(BT_STATUS));
311 size = BMC2HOST;
312 for (i = 0; i < size ; i++)
313 BMC2HOST;
314 BT_CONTROL(BT_H_BUSY); /* now clear */
315 if (bt_debug)
316 printk("drained %d bytes\n", size + 1);
257} 317}
258 318
259static inline void write_all_bytes(struct si_sm_data *bt) 319static inline void write_all_bytes(struct si_sm_data *bt)
@@ -261,201 +321,256 @@ static inline void write_all_bytes(struct si_sm_data *bt)
261 int i; 321 int i;
262 322
263 if (bt_debug & BT_DEBUG_MSG) { 323 if (bt_debug & BT_DEBUG_MSG) {
264 printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", 324 printk(KERN_WARNING "BT: write %d bytes seq=0x%02X",
265 bt->write_count, bt->seq); 325 bt->write_count, bt->seq);
266 for (i = 0; i < bt->write_count; i++) 326 for (i = 0; i < bt->write_count; i++)
267 printk (" %02x", bt->write_data[i]); 327 printk (" %02x", bt->write_data[i]);
268 printk ("\n"); 328 printk ("\n");
269 } 329 }
270 for (i = 0; i < bt->write_count; i++) 330 for (i = 0; i < bt->write_count; i++)
271 HOST2BMC(bt->write_data[i]); 331 HOST2BMC(bt->write_data[i]);
272} 332}
273 333
274static inline int read_all_bytes(struct si_sm_data *bt) 334static inline int read_all_bytes(struct si_sm_data *bt)
275{ 335{
276 unsigned char i; 336 unsigned char i;
277 337
338 /* length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode.
339 Keep layout of first four bytes aligned with write_data[] */
340
278 bt->read_data[0] = BMC2HOST; 341 bt->read_data[0] = BMC2HOST;
279 bt->read_count = bt->read_data[0]; 342 bt->read_count = bt->read_data[0];
280 if (bt_debug & BT_DEBUG_MSG)
281 printk(KERN_WARNING "BT: read %d bytes:", bt->read_count);
282 343
283 /* minimum: length, NetFn, Seq, Cmd, cCode == 5 total, or 4 more
284 following the length byte. */
285 if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) { 344 if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) {
286 if (bt_debug & BT_DEBUG_MSG) 345 if (bt_debug & BT_DEBUG_MSG)
287 printk("bad length %d\n", bt->read_count); 346 printk(KERN_WARNING "BT: bad raw rsp len=%d\n",
347 bt->read_count);
288 bt->truncated = 1; 348 bt->truncated = 1;
289 return 1; /* let next XACTION START clean it up */ 349 return 1; /* let next XACTION START clean it up */
290 } 350 }
291 for (i = 1; i <= bt->read_count; i++) 351 for (i = 1; i <= bt->read_count; i++)
292 bt->read_data[i] = BMC2HOST; 352 bt->read_data[i] = BMC2HOST;
293 bt->read_count++; /* account for the length byte */ 353 bt->read_count++; /* Account internally for length byte */
294 354
295 if (bt_debug & BT_DEBUG_MSG) { 355 if (bt_debug & BT_DEBUG_MSG) {
296 for (i = 0; i < bt->read_count; i++) 356 int max = bt->read_count;
357
358 printk(KERN_WARNING "BT: got %d bytes seq=0x%02X",
359 max, bt->read_data[2]);
360 if (max > 16)
361 max = 16;
362 for (i = 0; i < max; i++)
297 printk (" %02x", bt->read_data[i]); 363 printk (" %02x", bt->read_data[i]);
298 printk ("\n"); 364 printk ("%s\n", bt->read_count == max ? "" : " ...");
299 } 365 }
300 if (bt->seq != bt->write_data[2]) /* idiot check */
301 printk(KERN_DEBUG "BT: internal error: sequence mismatch\n");
302 366
303 /* per the spec, the (NetFn, Seq, Cmd) tuples should match */ 367 /* per the spec, the (NetFn[1], Seq[2], Cmd[3]) tuples must match */
304 if ((bt->read_data[3] == bt->write_data[3]) && /* Cmd */ 368 if ((bt->read_data[3] == bt->write_data[3]) &&
305 (bt->read_data[2] == bt->write_data[2]) && /* Sequence */ 369 (bt->read_data[2] == bt->write_data[2]) &&
306 ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) 370 ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8)))
307 return 1; 371 return 1;
308 372
309 if (bt_debug & BT_DEBUG_MSG) 373 if (bt_debug & BT_DEBUG_MSG)
310 printk(KERN_WARNING "BT: bad packet: " 374 printk(KERN_WARNING "IPMI BT: bad packet: "
311 "want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n", 375 "want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n",
312 bt->write_data[1], bt->write_data[2], bt->write_data[3], 376 bt->write_data[1] | 0x04, bt->write_data[2], bt->write_data[3],
313 bt->read_data[1], bt->read_data[2], bt->read_data[3]); 377 bt->read_data[1], bt->read_data[2], bt->read_data[3]);
314 return 0; 378 return 0;
315} 379}
316 380
317/* Modifies bt->state appropriately, need to get into the bt_event() switch */ 381/* Restart if retries are left, or return an error completion code */
318 382
319static void error_recovery(struct si_sm_data *bt, char *reason) 383static enum si_sm_result error_recovery(struct si_sm_data *bt,
384 unsigned char status,
385 unsigned char cCode)
320{ 386{
321 unsigned char status; 387 char *reason;
322 char buf[40]; /* For getting status */
323 388
324 bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */ 389 bt->timeout = bt->BT_CAP_req2rsp;
325 390
326 status = BT_STATUS; 391 switch (cCode) {
327 printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT, 392 case IPMI_TIMEOUT_ERR:
328 STATUS2TXT(buf)); 393 reason = "timeout";
394 break;
395 default:
396 reason = "internal error";
397 break;
398 }
399
400 printk(KERN_WARNING "IPMI BT: %s in %s %s ", /* open-ended line */
401 reason, STATE2TXT, STATUS2TXT);
329 402
403 /* Per the IPMI spec, retries are based on the sequence number
404 known only to this module, so manage a restart here. */
330 (bt->error_retries)++; 405 (bt->error_retries)++;
331 if (bt->error_retries > BT_RETRY_LIMIT) { 406 if (bt->error_retries < bt->BT_CAP_retries) {
332 printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT); 407 printk("%d retries left\n",
333 bt->state = BT_STATE_HOSED; 408 bt->BT_CAP_retries - bt->error_retries);
334 if (!bt->nonzero_status) 409 bt->state = BT_STATE_RESTART;
335 printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); 410 return SI_SM_CALL_WITHOUT_DELAY;
336 else if (bt->error_retries <= BT_RETRY_LIMIT + 1) {
337 printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n");
338 bt->state = BT_STATE_RESET1;
339 }
340 return;
341 } 411 }
342 412
343 /* Sometimes the BMC queues get in an "off-by-one" state...*/ 413 printk("failed %d retries, sending error response\n",
344 if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) { 414 bt->BT_CAP_retries);
345 printk(KERN_DEBUG "retry B2H_WAIT\n"); 415 if (!bt->nonzero_status)
346 return; 416 printk(KERN_ERR "IPMI BT: stuck, try power cycle\n");
417
418 /* this is most likely during insmod */
419 else if (bt->seq <= (unsigned char)(bt->BT_CAP_retries & 0xFF)) {
420 printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n");
421 bt->state = BT_STATE_RESET1;
422 return SI_SM_CALL_WITHOUT_DELAY;
347 } 423 }
348 424
349 printk(KERN_DEBUG "restart command\n"); 425 /* Concoct a useful error message, set up the next state, and
350 bt->state = BT_STATE_RESTART; 426 be done with this sequence. */
427
428 bt->state = BT_STATE_IDLE;
429 switch (cCode) {
430 case IPMI_TIMEOUT_ERR:
431 if (status & BT_B_BUSY) {
432 cCode = IPMI_NODE_BUSY_ERR;
433 bt->state = BT_STATE_LONG_BUSY;
434 }
435 break;
436 default:
437 break;
438 }
439 force_result(bt, cCode);
440 return SI_SM_TRANSACTION_COMPLETE;
351} 441}
352 442
353/* Check the status and (possibly) advance the BT state machine. The 443/* Check status and (usually) take action and change this state machine. */
354 default return is SI_SM_CALL_WITH_DELAY. */
355 444
356static enum si_sm_result bt_event(struct si_sm_data *bt, long time) 445static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
357{ 446{
358 unsigned char status; 447 unsigned char status, BT_CAP[8];
359 char buf[40]; /* For getting status */ 448 static enum bt_states last_printed = BT_STATE_PRINTME;
360 int i; 449 int i;
361 450
362 status = BT_STATUS; 451 status = BT_STATUS;
363 bt->nonzero_status |= status; 452 bt->nonzero_status |= status;
364 453 if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) {
365 if ((bt_debug & BT_DEBUG_STATES) && (bt->state != bt->last_state))
366 printk(KERN_WARNING "BT: %s %s TO=%ld - %ld \n", 454 printk(KERN_WARNING "BT: %s %s TO=%ld - %ld \n",
367 STATE2TXT, 455 STATE2TXT,
368 STATUS2TXT(buf), 456 STATUS2TXT,
369 bt->timeout, 457 bt->timeout,
370 time); 458 time);
371 bt->last_state = bt->state; 459 last_printed = bt->state;
460 }
372 461
373 if (bt->state == BT_STATE_HOSED) 462 /* Commands that time out may still (eventually) provide a response.
374 return SI_SM_HOSED; 463 This stale response will get in the way of a new response so remove
464 it if possible (hopefully during IDLE). Even if it comes up later
465 it will be rejected by its (now-forgotten) seq number. */
466
467 if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN)) {
468 drain_BMC2HOST(bt);
469 BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY);
470 }
375 471
376 if (bt->state != BT_STATE_IDLE) { /* do timeout test */ 472 if ((bt->state != BT_STATE_IDLE) &&
473 (bt->state < BT_STATE_PRINTME)) { /* check timeout */
377 bt->timeout -= time; 474 bt->timeout -= time;
378 if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) { 475 if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1))
379 error_recovery(bt, "timed out"); 476 return error_recovery(bt,
380 return SI_SM_CALL_WITHOUT_DELAY; 477 status,
381 } 478 IPMI_TIMEOUT_ERR);
382 } 479 }
383 480
384 switch (bt->state) { 481 switch (bt->state) {
385 482
386 case BT_STATE_IDLE: /* check for asynchronous messages */ 483 /* Idle state first checks for asynchronous messages from another
484 channel, then does some opportunistic housekeeping. */
485
486 case BT_STATE_IDLE:
387 if (status & BT_SMS_ATN) { 487 if (status & BT_SMS_ATN) {
388 BT_CONTROL(BT_SMS_ATN); /* clear it */ 488 BT_CONTROL(BT_SMS_ATN); /* clear it */
389 return SI_SM_ATTN; 489 return SI_SM_ATTN;
390 } 490 }
391 return SI_SM_IDLE;
392 491
393 case BT_STATE_XACTION_START: 492 if (status & BT_H_BUSY) /* clear a leftover H_BUSY */
394 if (status & BT_H_BUSY) {
395 BT_CONTROL(BT_H_BUSY); 493 BT_CONTROL(BT_H_BUSY);
396 break;
397 }
398 if (status & BT_B2H_ATN)
399 break;
400 bt->state = BT_STATE_WRITE_BYTES;
401 return SI_SM_CALL_WITHOUT_DELAY; /* for logging */
402 494
403 case BT_STATE_WRITE_BYTES: 495 /* Read BT capabilities if it hasn't been done yet */
496 if (!bt->BT_CAP_outreqs)
497 BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
498 SI_SM_CALL_WITHOUT_DELAY);
499 bt->timeout = bt->BT_CAP_req2rsp;
500 BT_SI_SM_RETURN(SI_SM_IDLE);
501
502 case BT_STATE_XACTION_START:
404 if (status & (BT_B_BUSY | BT_H2B_ATN)) 503 if (status & (BT_B_BUSY | BT_H2B_ATN))
405 break; 504 BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY);
505 if (BT_STATUS & BT_H_BUSY)
506 BT_CONTROL(BT_H_BUSY); /* force clear */
507 BT_STATE_CHANGE(BT_STATE_WRITE_BYTES,
508 SI_SM_CALL_WITHOUT_DELAY);
509
510 case BT_STATE_WRITE_BYTES:
511 if (status & BT_H_BUSY)
512 BT_CONTROL(BT_H_BUSY); /* clear */
406 BT_CONTROL(BT_CLR_WR_PTR); 513 BT_CONTROL(BT_CLR_WR_PTR);
407 write_all_bytes(bt); 514 write_all_bytes(bt);
408 BT_CONTROL(BT_H2B_ATN); /* clears too fast to catch? */ 515 BT_CONTROL(BT_H2B_ATN); /* can clear too fast to catch */
409 bt->state = BT_STATE_WRITE_CONSUME; 516 BT_STATE_CHANGE(BT_STATE_WRITE_CONSUME,
410 return SI_SM_CALL_WITHOUT_DELAY; /* it MIGHT sail through */ 517 SI_SM_CALL_WITHOUT_DELAY);
411
412 case BT_STATE_WRITE_CONSUME: /* BMCs usually blow right thru here */
413 if (status & (BT_H2B_ATN | BT_B_BUSY))
414 break;
415 bt->state = BT_STATE_B2H_WAIT;
416 /* fall through with status */
417
418 /* Stay in BT_STATE_B2H_WAIT until a packet matches. However, spinning
419 hard here, constantly reading status, seems to hold off the
420 generation of B2H_ATN so ALWAYS return CALL_WITH_DELAY. */
421
422 case BT_STATE_B2H_WAIT:
423 if (!(status & BT_B2H_ATN))
424 break;
425
426 /* Assume ordered, uncached writes: no need to wait */
427 if (!(status & BT_H_BUSY))
428 BT_CONTROL(BT_H_BUSY); /* set */
429 BT_CONTROL(BT_B2H_ATN); /* clear it, ACK to the BMC */
430 BT_CONTROL(BT_CLR_RD_PTR); /* reset the queue */
431 i = read_all_bytes(bt);
432 BT_CONTROL(BT_H_BUSY); /* clear */
433 if (!i) /* Try this state again */
434 break;
435 bt->state = BT_STATE_READ_END;
436 return SI_SM_CALL_WITHOUT_DELAY; /* for logging */
437
438 case BT_STATE_READ_END:
439
440 /* I could wait on BT_H_BUSY to go clear for a truly clean
441 exit. However, this is already done in XACTION_START
442 and the (possible) extra loop/status/possible wait affects
443 performance. So, as long as it works, just ignore H_BUSY */
444
445#ifdef MAKE_THIS_TRUE_IF_NECESSARY
446 518
447 if (status & BT_H_BUSY) 519 case BT_STATE_WRITE_CONSUME:
448 break; 520 if (status & (BT_B_BUSY | BT_H2B_ATN))
449#endif 521 BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY);
450 bt->seq++; 522 BT_STATE_CHANGE(BT_STATE_READ_WAIT,
451 bt->state = BT_STATE_IDLE; 523 SI_SM_CALL_WITHOUT_DELAY);
452 return SI_SM_TRANSACTION_COMPLETE; 524
525 /* Spinning hard can suppress B2H_ATN and force a timeout */
526
527 case BT_STATE_READ_WAIT:
528 if (!(status & BT_B2H_ATN))
529 BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY);
530 BT_CONTROL(BT_H_BUSY); /* set */
531
532 /* Uncached, ordered writes should just proceeed serially but
533 some BMCs don't clear B2H_ATN with one hit. Fast-path a
534 workaround without too much penalty to the general case. */
535
536 BT_CONTROL(BT_B2H_ATN); /* clear it to ACK the BMC */
537 BT_STATE_CHANGE(BT_STATE_CLEAR_B2H,
538 SI_SM_CALL_WITHOUT_DELAY);
539
540 case BT_STATE_CLEAR_B2H:
541 if (status & BT_B2H_ATN) { /* keep hitting it */
542 BT_CONTROL(BT_B2H_ATN);
543 BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY);
544 }
545 BT_STATE_CHANGE(BT_STATE_READ_BYTES,
546 SI_SM_CALL_WITHOUT_DELAY);
547
548 case BT_STATE_READ_BYTES:
549 if (!(status & BT_H_BUSY)) /* check in case of retry */
550 BT_CONTROL(BT_H_BUSY);
551 BT_CONTROL(BT_CLR_RD_PTR); /* start of BMC2HOST buffer */
552 i = read_all_bytes(bt); /* true == packet seq match */
553 BT_CONTROL(BT_H_BUSY); /* NOW clear */
554 if (!i) /* Not my message */
555 BT_STATE_CHANGE(BT_STATE_READ_WAIT,
556 SI_SM_CALL_WITHOUT_DELAY);
557 bt->state = bt->complete;
558 return bt->state == BT_STATE_IDLE ? /* where to next? */
559 SI_SM_TRANSACTION_COMPLETE : /* normal */
560 SI_SM_CALL_WITHOUT_DELAY; /* Startup magic */
561
562 case BT_STATE_LONG_BUSY: /* For example: after FW update */
563 if (!(status & BT_B_BUSY)) {
564 reset_flags(bt); /* next state is now IDLE */
565 bt_init_data(bt, bt->io);
566 }
567 return SI_SM_CALL_WITH_DELAY; /* No repeat printing */
453 568
454 case BT_STATE_RESET1: 569 case BT_STATE_RESET1:
455 reset_flags(bt); 570 reset_flags(bt);
456 bt->timeout = BT_RESET_DELAY; 571 drain_BMC2HOST(bt);
457 bt->state = BT_STATE_RESET2; 572 BT_STATE_CHANGE(BT_STATE_RESET2,
458 break; 573 SI_SM_CALL_WITH_DELAY);
459 574
460 case BT_STATE_RESET2: /* Send a soft reset */ 575 case BT_STATE_RESET2: /* Send a soft reset */
461 BT_CONTROL(BT_CLR_WR_PTR); 576 BT_CONTROL(BT_CLR_WR_PTR);
@@ -464,29 +579,59 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
464 HOST2BMC(42); /* Sequence number */ 579 HOST2BMC(42); /* Sequence number */
465 HOST2BMC(3); /* Cmd == Soft reset */ 580 HOST2BMC(3); /* Cmd == Soft reset */
466 BT_CONTROL(BT_H2B_ATN); 581 BT_CONTROL(BT_H2B_ATN);
467 bt->state = BT_STATE_RESET3; 582 bt->timeout = BT_RESET_DELAY * 1000000;
468 break; 583 BT_STATE_CHANGE(BT_STATE_RESET3,
584 SI_SM_CALL_WITH_DELAY);
469 585
470 case BT_STATE_RESET3: 586 case BT_STATE_RESET3: /* Hold off everything for a bit */
471 if (bt->timeout > 0) 587 if (bt->timeout > 0)
472 return SI_SM_CALL_WITH_DELAY; 588 return SI_SM_CALL_WITH_DELAY;
473 bt->state = BT_STATE_RESTART; /* printk in debug modes */ 589 drain_BMC2HOST(bt);
474 break; 590 BT_STATE_CHANGE(BT_STATE_RESTART,
591 SI_SM_CALL_WITH_DELAY);
475 592
476 case BT_STATE_RESTART: /* don't reset retries! */ 593 case BT_STATE_RESTART: /* don't reset retries or seq! */
477 reset_flags(bt);
478 bt->write_data[2] = ++bt->seq;
479 bt->read_count = 0; 594 bt->read_count = 0;
480 bt->nonzero_status = 0; 595 bt->nonzero_status = 0;
481 bt->timeout = BT_NORMAL_TIMEOUT; 596 bt->timeout = bt->BT_CAP_req2rsp;
482 bt->state = BT_STATE_XACTION_START; 597 BT_STATE_CHANGE(BT_STATE_XACTION_START,
483 break; 598 SI_SM_CALL_WITH_DELAY);
484 599
485 default: /* HOSED is supposed to be caught much earlier */ 600 /* Get BT Capabilities, using timing of upper level state machine.
486 error_recovery(bt, "internal logic error"); 601 Set outreqs to prevent infinite loop on timeout. */
487 break; 602 case BT_STATE_CAPABILITIES_BEGIN:
488 } 603 bt->BT_CAP_outreqs = 1;
489 return SI_SM_CALL_WITH_DELAY; 604 {
605 unsigned char GetBT_CAP[] = { 0x18, 0x36 };
606 bt->state = BT_STATE_IDLE;
607 bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP));
608 }
609 bt->complete = BT_STATE_CAPABILITIES_END;
610 BT_STATE_CHANGE(BT_STATE_XACTION_START,
611 SI_SM_CALL_WITH_DELAY);
612
613 case BT_STATE_CAPABILITIES_END:
614 i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP));
615 bt_init_data(bt, bt->io);
616 if ((i == 8) && !BT_CAP[2]) {
617 bt->BT_CAP_outreqs = BT_CAP[3];
618 bt->BT_CAP_req2rsp = BT_CAP[6] * 1000000;
619 bt->BT_CAP_retries = BT_CAP[7];
620 } else
621 printk(KERN_WARNING "IPMI BT: using default values\n");
622 if (!bt->BT_CAP_outreqs)
623 bt->BT_CAP_outreqs = 1;
624 printk(KERN_WARNING "IPMI BT: req2rsp=%ld secs retries=%d\n",
625 bt->BT_CAP_req2rsp / 1000000L, bt->BT_CAP_retries);
626 bt->timeout = bt->BT_CAP_req2rsp;
627 return SI_SM_CALL_WITHOUT_DELAY;
628
629 default: /* should never occur */
630 return error_recovery(bt,
631 status,
632 IPMI_ERR_UNSPECIFIED);
633 }
634 return SI_SM_CALL_WITH_DELAY;
490} 635}
491 636
492static int bt_detect(struct si_sm_data *bt) 637static int bt_detect(struct si_sm_data *bt)
@@ -497,7 +642,7 @@ static int bt_detect(struct si_sm_data *bt)
497 test that first. The calling routine uses negative logic. */ 642 test that first. The calling routine uses negative logic. */
498 643
499 if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF)) 644 if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF))
500 return 1; 645 return 1;
501 reset_flags(bt); 646 reset_flags(bt);
502 return 0; 647 return 0;
503} 648}
@@ -513,11 +658,11 @@ static int bt_size(void)
513 658
514struct si_sm_handlers bt_smi_handlers = 659struct si_sm_handlers bt_smi_handlers =
515{ 660{
516 .init_data = bt_init_data, 661 .init_data = bt_init_data,
517 .start_transaction = bt_start_transaction, 662 .start_transaction = bt_start_transaction,
518 .get_result = bt_get_result, 663 .get_result = bt_get_result,
519 .event = bt_event, 664 .event = bt_event,
520 .detect = bt_detect, 665 .detect = bt_detect,
521 .cleanup = bt_cleanup, 666 .cleanup = bt_cleanup,
522 .size = bt_size, 667 .size = bt_size,
523}; 668};