aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHannes Reinecke <hare@suse.de>2013-10-23 04:51:21 -0400
committerJames Bottomley <JBottomley@Parallels.com>2013-10-25 07:17:59 -0400
commitb45620229dd67ff1daffa8adce57f37b37860f78 (patch)
tree22f8577437188e6699fa9d1f05841ea1f545a68f
parent6b1e5a45d4eaa75e28f2d170ea43ab8fc6dd34d8 (diff)
[SCSI] Add 'eh_deadline' to limit SCSI EH runtime
This patchs adds an 'eh_deadline' sysfs attribute to the scsi host which limits the overall runtime of the SCSI EH. The 'eh_deadline' value is stored in the now obsolete field 'resetting'. When a command is failed the start time of the EH is stored in 'last_reset'. If the overall runtime of the SCSI EH is longer than last_reset + eh_deadline, the EH is short-circuited and falls through to issue a host reset only. [jejb: add comments in Scsi_Host about new fields] Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/hosts.c7
-rw-r--r--drivers/scsi/scsi_error.c130
-rw-r--r--drivers/scsi/scsi_sysfs.c37
-rw-r--r--include/scsi/scsi_host.h5
4 files changed, 173 insertions, 6 deletions
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index df0c3c71ea43..f334859024c0 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -316,6 +316,12 @@ static void scsi_host_dev_release(struct device *dev)
316 kfree(shost); 316 kfree(shost);
317} 317}
318 318
319static unsigned int shost_eh_deadline;
320
321module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR);
322MODULE_PARM_DESC(eh_deadline,
323 "SCSI EH timeout in seconds (should be between 1 and 2^32-1)");
324
319static struct device_type scsi_host_type = { 325static struct device_type scsi_host_type = {
320 .name = "scsi_host", 326 .name = "scsi_host",
321 .release = scsi_host_dev_release, 327 .release = scsi_host_dev_release,
@@ -388,6 +394,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
388 shost->unchecked_isa_dma = sht->unchecked_isa_dma; 394 shost->unchecked_isa_dma = sht->unchecked_isa_dma;
389 shost->use_clustering = sht->use_clustering; 395 shost->use_clustering = sht->use_clustering;
390 shost->ordered_tag = sht->ordered_tag; 396 shost->ordered_tag = sht->ordered_tag;
397 shost->eh_deadline = shost_eh_deadline * HZ;
391 398
392 if (sht->supported_mode == MODE_UNKNOWN) 399 if (sht->supported_mode == MODE_UNKNOWN)
393 /* means we didn't set it ... default to INITIATOR */ 400 /* means we didn't set it ... default to INITIATOR */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 83e591b60193..edae9e20f886 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -87,6 +87,18 @@ void scsi_schedule_eh(struct Scsi_Host *shost)
87} 87}
88EXPORT_SYMBOL_GPL(scsi_schedule_eh); 88EXPORT_SYMBOL_GPL(scsi_schedule_eh);
89 89
90static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
91{
92 if (!shost->last_reset || !shost->eh_deadline)
93 return 0;
94
95 if (time_before(jiffies,
96 shost->last_reset + shost->eh_deadline))
97 return 0;
98
99 return 1;
100}
101
90/** 102/**
91 * scsi_eh_scmd_add - add scsi cmd to error handling. 103 * scsi_eh_scmd_add - add scsi cmd to error handling.
92 * @scmd: scmd to run eh on. 104 * @scmd: scmd to run eh on.
@@ -109,6 +121,9 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
109 if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) 121 if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
110 goto out_unlock; 122 goto out_unlock;
111 123
124 if (shost->eh_deadline && !shost->last_reset)
125 shost->last_reset = jiffies;
126
112 ret = 1; 127 ret = 1;
113 scmd->eh_eflags |= eh_flag; 128 scmd->eh_eflags |= eh_flag;
114 list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); 129 list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
@@ -138,6 +153,9 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
138 trace_scsi_dispatch_cmd_timeout(scmd); 153 trace_scsi_dispatch_cmd_timeout(scmd);
139 scsi_log_completion(scmd, TIMEOUT_ERROR); 154 scsi_log_completion(scmd, TIMEOUT_ERROR);
140 155
156 if (host->eh_deadline && !host->last_reset)
157 host->last_reset = jiffies;
158
141 if (host->transportt->eh_timed_out) 159 if (host->transportt->eh_timed_out)
142 rtn = host->transportt->eh_timed_out(scmd); 160 rtn = host->transportt->eh_timed_out(scmd);
143 else if (host->hostt->eh_timed_out) 161 else if (host->hostt->eh_timed_out)
@@ -990,13 +1008,26 @@ int scsi_eh_get_sense(struct list_head *work_q,
990 struct list_head *done_q) 1008 struct list_head *done_q)
991{ 1009{
992 struct scsi_cmnd *scmd, *next; 1010 struct scsi_cmnd *scmd, *next;
1011 struct Scsi_Host *shost;
993 int rtn; 1012 int rtn;
1013 unsigned long flags;
994 1014
995 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1015 list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
996 if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || 1016 if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
997 SCSI_SENSE_VALID(scmd)) 1017 SCSI_SENSE_VALID(scmd))
998 continue; 1018 continue;
999 1019
1020 shost = scmd->device->host;
1021 spin_lock_irqsave(shost->host_lock, flags);
1022 if (scsi_host_eh_past_deadline(shost)) {
1023 spin_unlock_irqrestore(shost->host_lock, flags);
1024 SCSI_LOG_ERROR_RECOVERY(3,
1025 shost_printk(KERN_INFO, shost,
1026 "skip %s, past eh deadline\n",
1027 __func__));
1028 break;
1029 }
1030 spin_unlock_irqrestore(shost->host_lock, flags);
1000 SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, 1031 SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
1001 "%s: requesting sense\n", 1032 "%s: requesting sense\n",
1002 current->comm)); 1033 current->comm));
@@ -1082,11 +1113,28 @@ static int scsi_eh_test_devices(struct list_head *cmd_list,
1082 struct scsi_cmnd *scmd, *next; 1113 struct scsi_cmnd *scmd, *next;
1083 struct scsi_device *sdev; 1114 struct scsi_device *sdev;
1084 int finish_cmds; 1115 int finish_cmds;
1116 unsigned long flags;
1085 1117
1086 while (!list_empty(cmd_list)) { 1118 while (!list_empty(cmd_list)) {
1087 scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); 1119 scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
1088 sdev = scmd->device; 1120 sdev = scmd->device;
1089 1121
1122 if (!try_stu) {
1123 spin_lock_irqsave(sdev->host->host_lock, flags);
1124 if (scsi_host_eh_past_deadline(sdev->host)) {
1125 /* Push items back onto work_q */
1126 list_splice_init(cmd_list, work_q);
1127 spin_unlock_irqrestore(sdev->host->host_lock,
1128 flags);
1129 SCSI_LOG_ERROR_RECOVERY(3,
1130 shost_printk(KERN_INFO, sdev->host,
1131 "skip %s, past eh deadline",
1132 __func__));
1133 break;
1134 }
1135 spin_unlock_irqrestore(sdev->host->host_lock, flags);
1136 }
1137
1090 finish_cmds = !scsi_device_online(scmd->device) || 1138 finish_cmds = !scsi_device_online(scmd->device) ||
1091 (try_stu && !scsi_eh_try_stu(scmd) && 1139 (try_stu && !scsi_eh_try_stu(scmd) &&
1092 !scsi_eh_tur(scmd)) || 1140 !scsi_eh_tur(scmd)) ||
@@ -1122,14 +1170,28 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
1122 struct scsi_cmnd *scmd, *next; 1170 struct scsi_cmnd *scmd, *next;
1123 LIST_HEAD(check_list); 1171 LIST_HEAD(check_list);
1124 int rtn; 1172 int rtn;
1173 struct Scsi_Host *shost;
1174 unsigned long flags;
1125 1175
1126 list_for_each_entry_safe(scmd, next, work_q, eh_entry) { 1176 list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1127 if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) 1177 if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
1128 continue; 1178 continue;
1179 shost = scmd->device->host;
1180 spin_lock_irqsave(shost->host_lock, flags);
1181 if (scsi_host_eh_past_deadline(shost)) {
1182 spin_unlock_irqrestore(shost->host_lock, flags);
1183 list_splice_init(&check_list, work_q);
1184 SCSI_LOG_ERROR_RECOVERY(3,
1185 shost_printk(KERN_INFO, shost,
1186 "skip %s, past eh deadline\n",
1187 __func__));
1188 return list_empty(work_q);
1189 }
1190 spin_unlock_irqrestore(shost->host_lock, flags);
1129 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" 1191 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
1130 "0x%p\n", current->comm, 1192 "0x%p\n", current->comm,
1131 scmd)); 1193 scmd));
1132 rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd); 1194 rtn = scsi_try_to_abort_cmd(shost->hostt, scmd);
1133 if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { 1195 if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1134 scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; 1196 scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
1135 if (rtn == FAST_IO_FAIL) 1197 if (rtn == FAST_IO_FAIL)
@@ -1187,8 +1249,19 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
1187{ 1249{
1188 struct scsi_cmnd *scmd, *stu_scmd, *next; 1250 struct scsi_cmnd *scmd, *stu_scmd, *next;
1189 struct scsi_device *sdev; 1251 struct scsi_device *sdev;
1252 unsigned long flags;
1190 1253
1191 shost_for_each_device(sdev, shost) { 1254 shost_for_each_device(sdev, shost) {
1255 spin_lock_irqsave(shost->host_lock, flags);
1256 if (scsi_host_eh_past_deadline(shost)) {
1257 spin_unlock_irqrestore(shost->host_lock, flags);
1258 SCSI_LOG_ERROR_RECOVERY(3,
1259 shost_printk(KERN_INFO, shost,
1260 "skip %s, past eh deadline\n",
1261 __func__));
1262 break;
1263 }
1264 spin_unlock_irqrestore(shost->host_lock, flags);
1192 stu_scmd = NULL; 1265 stu_scmd = NULL;
1193 list_for_each_entry(scmd, work_q, eh_entry) 1266 list_for_each_entry(scmd, work_q, eh_entry)
1194 if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && 1267 if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
@@ -1241,9 +1314,20 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
1241{ 1314{
1242 struct scsi_cmnd *scmd, *bdr_scmd, *next; 1315 struct scsi_cmnd *scmd, *bdr_scmd, *next;
1243 struct scsi_device *sdev; 1316 struct scsi_device *sdev;
1317 unsigned long flags;
1244 int rtn; 1318 int rtn;
1245 1319
1246 shost_for_each_device(sdev, shost) { 1320 shost_for_each_device(sdev, shost) {
1321 spin_lock_irqsave(shost->host_lock, flags);
1322 if (scsi_host_eh_past_deadline(shost)) {
1323 spin_unlock_irqrestore(shost->host_lock, flags);
1324 SCSI_LOG_ERROR_RECOVERY(3,
1325 shost_printk(KERN_INFO, shost,
1326 "skip %s, past eh deadline\n",
1327 __func__));
1328 break;
1329 }
1330 spin_unlock_irqrestore(shost->host_lock, flags);
1247 bdr_scmd = NULL; 1331 bdr_scmd = NULL;
1248 list_for_each_entry(scmd, work_q, eh_entry) 1332 list_for_each_entry(scmd, work_q, eh_entry)
1249 if (scmd->device == sdev) { 1333 if (scmd->device == sdev) {
@@ -1303,6 +1387,21 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,
1303 struct scsi_cmnd *next, *scmd; 1387 struct scsi_cmnd *next, *scmd;
1304 int rtn; 1388 int rtn;
1305 unsigned int id; 1389 unsigned int id;
1390 unsigned long flags;
1391
1392 spin_lock_irqsave(shost->host_lock, flags);
1393 if (scsi_host_eh_past_deadline(shost)) {
1394 spin_unlock_irqrestore(shost->host_lock, flags);
1395 /* push back on work queue for further processing */
1396 list_splice_init(&check_list, work_q);
1397 list_splice_init(&tmp_list, work_q);
1398 SCSI_LOG_ERROR_RECOVERY(3,
1399 shost_printk(KERN_INFO, shost,
1400 "skip %s, past eh deadline\n",
1401 __func__));
1402 return list_empty(work_q);
1403 }
1404 spin_unlock_irqrestore(shost->host_lock, flags);
1306 1405
1307 scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); 1406 scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
1308 id = scmd_id(scmd); 1407 id = scmd_id(scmd);
@@ -1347,6 +1446,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1347 LIST_HEAD(check_list); 1446 LIST_HEAD(check_list);
1348 unsigned int channel; 1447 unsigned int channel;
1349 int rtn; 1448 int rtn;
1449 unsigned long flags;
1350 1450
1351 /* 1451 /*
1352 * we really want to loop over the various channels, and do this on 1452 * we really want to loop over the various channels, and do this on
@@ -1356,6 +1456,18 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1356 */ 1456 */
1357 1457
1358 for (channel = 0; channel <= shost->max_channel; channel++) { 1458 for (channel = 0; channel <= shost->max_channel; channel++) {
1459 spin_lock_irqsave(shost->host_lock, flags);
1460 if (scsi_host_eh_past_deadline(shost)) {
1461 spin_unlock_irqrestore(shost->host_lock, flags);
1462 list_splice_init(&check_list, work_q);
1463 SCSI_LOG_ERROR_RECOVERY(3,
1464 shost_printk(KERN_INFO, shost,
1465 "skip %s, past eh deadline\n",
1466 __func__));
1467 return list_empty(work_q);
1468 }
1469 spin_unlock_irqrestore(shost->host_lock, flags);
1470
1359 chan_scmd = NULL; 1471 chan_scmd = NULL;
1360 list_for_each_entry(scmd, work_q, eh_entry) { 1472 list_for_each_entry(scmd, work_q, eh_entry) {
1361 if (channel == scmd_channel(scmd)) { 1473 if (channel == scmd_channel(scmd)) {
@@ -1755,8 +1867,9 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
1755 * will be requests for character device operations, and also for 1867 * will be requests for character device operations, and also for
1756 * ioctls to queued block devices. 1868 * ioctls to queued block devices.
1757 */ 1869 */
1758 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", 1870 SCSI_LOG_ERROR_RECOVERY(3,
1759 __func__)); 1871 printk("scsi_eh_%d waking up host to restart\n",
1872 shost->host_no));
1760 1873
1761 spin_lock_irqsave(shost->host_lock, flags); 1874 spin_lock_irqsave(shost->host_lock, flags);
1762 if (scsi_host_set_state(shost, SHOST_RUNNING)) 1875 if (scsi_host_set_state(shost, SHOST_RUNNING))
@@ -1883,6 +1996,10 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
1883 if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q)) 1996 if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1884 scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); 1997 scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1885 1998
1999 spin_lock_irqsave(shost->host_lock, flags);
2000 if (shost->eh_deadline)
2001 shost->last_reset = 0;
2002 spin_unlock_irqrestore(shost->host_lock, flags);
1886 scsi_eh_flush_done_q(&eh_done_q); 2003 scsi_eh_flush_done_q(&eh_done_q);
1887} 2004}
1888 2005
@@ -1909,7 +2026,7 @@ int scsi_error_handler(void *data)
1909 if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || 2026 if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
1910 shost->host_failed != shost->host_busy) { 2027 shost->host_failed != shost->host_busy) {
1911 SCSI_LOG_ERROR_RECOVERY(1, 2028 SCSI_LOG_ERROR_RECOVERY(1,
1912 printk("Error handler scsi_eh_%d sleeping\n", 2029 printk("scsi_eh_%d: sleeping\n",
1913 shost->host_no)); 2030 shost->host_no));
1914 schedule(); 2031 schedule();
1915 continue; 2032 continue;
@@ -1917,8 +2034,9 @@ int scsi_error_handler(void *data)
1917 2034
1918 __set_current_state(TASK_RUNNING); 2035 __set_current_state(TASK_RUNNING);
1919 SCSI_LOG_ERROR_RECOVERY(1, 2036 SCSI_LOG_ERROR_RECOVERY(1,
1920 printk("Error handler scsi_eh_%d waking up\n", 2037 printk("scsi_eh_%d: waking up %d/%d/%d\n",
1921 shost->host_no)); 2038 shost->host_no, shost->host_eh_scheduled,
2039 shost->host_failed, shost->host_busy));
1922 2040
1923 /* 2041 /*
1924 * We have a host that is failing for some reason. Figure out 2042 * We have a host that is failing for some reason. Figure out
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index a73471074a02..8ff62c26a41c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -281,6 +281,42 @@ exit_store_host_reset:
281 281
282static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset); 282static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset);
283 283
284static ssize_t
285show_shost_eh_deadline(struct device *dev,
286 struct device_attribute *attr, char *buf)
287{
288 struct Scsi_Host *shost = class_to_shost(dev);
289
290 return sprintf(buf, "%d\n", shost->eh_deadline / HZ);
291}
292
293static ssize_t
294store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
295 const char *buf, size_t count)
296{
297 struct Scsi_Host *shost = class_to_shost(dev);
298 int ret = -EINVAL;
299 int deadline;
300 unsigned long flags;
301
302 if (shost->transportt && shost->transportt->eh_strategy_handler)
303 return ret;
304
305 if (sscanf(buf, "%d\n", &deadline) == 1) {
306 spin_lock_irqsave(shost->host_lock, flags);
307 if (scsi_host_in_recovery(shost))
308 ret = -EBUSY;
309 else {
310 shost->eh_deadline = deadline * HZ;
311 ret = count;
312 }
313 spin_unlock_irqrestore(shost->host_lock, flags);
314 }
315 return ret;
316}
317
318static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
319
284shost_rd_attr(unique_id, "%u\n"); 320shost_rd_attr(unique_id, "%u\n");
285shost_rd_attr(host_busy, "%hu\n"); 321shost_rd_attr(host_busy, "%hu\n");
286shost_rd_attr(cmd_per_lun, "%hd\n"); 322shost_rd_attr(cmd_per_lun, "%hd\n");
@@ -308,6 +344,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = {
308 &dev_attr_prot_capabilities.attr, 344 &dev_attr_prot_capabilities.attr,
309 &dev_attr_prot_guard_type.attr, 345 &dev_attr_prot_guard_type.attr,
310 &dev_attr_host_reset.attr, 346 &dev_attr_host_reset.attr,
347 &dev_attr_eh_deadline.attr,
311 NULL 348 NULL
312}; 349};
313 350
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index a74b7d9afe8e..546084964d55 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -599,6 +599,11 @@ struct Scsi_Host {
599 599
600 unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */ 600 unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
601 601
602 /* next two fields are used to bound the time spent in error handling */
603 int eh_deadline;
604 unsigned long last_reset;
605
606
602 /* 607 /*
603 * These three parameters can be used to allow for wide scsi, 608 * These three parameters can be used to allow for wide scsi,
604 * and for host adapters that support multiple busses 609 * and for host adapters that support multiple busses