aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/ata/libata-eh.c184
-rw-r--r--include/linux/libata.h1
2 files changed, 125 insertions, 60 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 1abfdba8d99b..31738627ec64 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -44,6 +44,12 @@
44 44
45#include "libata.h" 45#include "libata.h"
46 46
47enum {
48 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
51};
52
47static void __ata_port_freeze(struct ata_port *ap); 53static void __ata_port_freeze(struct ata_port *ap);
48static void ata_eh_finish(struct ata_port *ap); 54static void ata_eh_finish(struct ata_port *ap);
49static void ata_eh_handle_port_suspend(struct ata_port *ap); 55static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -65,12 +71,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io,
65 ent->timestamp = get_jiffies_64(); 71 ent->timestamp = get_jiffies_64();
66} 72}
67 73
68static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering) 74static void ata_ering_clear(struct ata_ering *ering)
69{ 75{
70 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 76 memset(ering, 0, sizeof(*ering));
71 if (!ent->err_mask)
72 return NULL;
73 return ent;
74} 77}
75 78
76static int ata_ering_map(struct ata_ering *ering, 79static int ata_ering_map(struct ata_ering *ering,
@@ -1159,87 +1162,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1159 return action; 1162 return action;
1160} 1163}
1161 1164
1162static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) 1165static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
1163{ 1166{
1164 if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) 1167 if (err_mask & AC_ERR_ATA_BUS)
1165 return 1; 1168 return 1;
1166 1169
1167 if (ent->is_io) { 1170 if (err_mask & AC_ERR_TIMEOUT)
1168 if (ent->err_mask & AC_ERR_HSM) 1171 return 2;
1169 return 1; 1172
1170 if ((ent->err_mask & 1173 if (is_io) {
1171 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1174 if (err_mask & AC_ERR_HSM)
1172 return 2; 1175 return 2;
1176 if ((err_mask &
1177 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1178 return 3;
1173 } 1179 }
1174 1180
1175 return 0; 1181 return 0;
1176} 1182}
1177 1183
1178struct speed_down_needed_arg { 1184struct speed_down_verdict_arg {
1179 u64 since; 1185 u64 since;
1180 int nr_errors[3]; 1186 int nr_errors[4];
1181}; 1187};
1182 1188
1183static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) 1189static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1184{ 1190{
1185 struct speed_down_needed_arg *arg = void_arg; 1191 struct speed_down_verdict_arg *arg = void_arg;
1192 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);
1186 1193
1187 if (ent->timestamp < arg->since) 1194 if (ent->timestamp < arg->since)
1188 return -1; 1195 return -1;
1189 1196
1190 arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; 1197 arg->nr_errors[cat]++;
1191 return 0; 1198 return 0;
1192} 1199}
1193 1200
1194/** 1201/**
1195 * ata_eh_speed_down_needed - Determine wheter speed down is necessary 1202 * ata_eh_speed_down_verdict - Determine speed down verdict
1196 * @dev: Device of interest 1203 * @dev: Device of interest
1197 * 1204 *
1198 * This function examines error ring of @dev and determines 1205 * This function examines error ring of @dev and determines
1199 * whether speed down is necessary. Speed down is necessary if 1206 * whether NCQ needs to be turned off, transfer speed should be
1200 * there have been more than 3 of Cat-1 errors or 10 of Cat-2 1207 * stepped down, or falling back to PIO is necessary.
1201 * errors during last 15 minutes. 1208 *
1209 * Cat-1 is ATA_BUS error for any command.
1202 * 1210 *
1203 * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM 1211 * Cat-2 is TIMEOUT for any command or HSM violation for known
1204 * violation for known supported commands. 1212 * supported commands.
1205 * 1213 *
1206 * Cat-2 errors are unclassified DEV error for known supported 1214 * Cat-3 is is unclassified DEV error for known supported
1207 * command. 1215 * command.
1208 * 1216 *
1217 * NCQ needs to be turned off if there have been more than 3
1218 * Cat-2 + Cat-3 errors during last 10 minutes.
1219 *
1220 * Speed down is necessary if there have been more than 3 Cat-1 +
1221 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes.
1222 *
1223 * Falling back to PIO mode is necessary if there have been more
1224 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes.
1225 *
1209 * LOCKING: 1226 * LOCKING:
1210 * Inherited from caller. 1227 * Inherited from caller.
1211 * 1228 *
1212 * RETURNS: 1229 * RETURNS:
1213 * 1 if speed down is necessary, 0 otherwise 1230 * OR of ATA_EH_SPDN_* flags.
1214 */ 1231 */
1215static int ata_eh_speed_down_needed(struct ata_device *dev) 1232static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1216{ 1233{
1217 const u64 interval = 15LLU * 60 * HZ; 1234 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1218 static const int err_limits[3] = { -1, 3, 10 }; 1235 u64 j64 = get_jiffies_64();
1219 struct speed_down_needed_arg arg; 1236 struct speed_down_verdict_arg arg;
1220 struct ata_ering_entry *ent; 1237 unsigned int verdict = 0;
1221 int err_cat;
1222 u64 j64;
1223 1238
1224 ent = ata_ering_top(&dev->ering); 1239 /* scan past 10 mins of error history */
1225 if (!ent) 1240 memset(&arg, 0, sizeof(arg));
1226 return 0; 1241 arg.since = j64 - min(j64, j10mins);
1242 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1227 1243
1228 err_cat = ata_eh_categorize_ering_entry(ent); 1244 if (arg.nr_errors[2] + arg.nr_errors[3] > 3)
1229 if (err_cat == 0) 1245 verdict |= ATA_EH_SPDN_NCQ_OFF;
1230 return 0; 1246 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)
1247 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1231 1248
1249 /* scan past 3 mins of error history */
1232 memset(&arg, 0, sizeof(arg)); 1250 memset(&arg, 0, sizeof(arg));
1251 arg.since = j64 - min(j64, j5mins);
1252 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1233 1253
1234 j64 = get_jiffies_64(); 1254 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10)
1235 if (j64 >= interval) 1255 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1236 arg.since = j64 - interval;
1237 else
1238 arg.since = 0;
1239
1240 ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
1241 1256
1242 return arg.nr_errors[err_cat] > err_limits[err_cat]; 1257 return verdict;
1243} 1258}
1244 1259
1245/** 1260/**
@@ -1257,31 +1272,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev)
1257 * Kernel thread context (may sleep). 1272 * Kernel thread context (may sleep).
1258 * 1273 *
1259 * RETURNS: 1274 * RETURNS:
1260 * 0 on success, -errno otherwise 1275 * Determined recovery action.
1261 */ 1276 */
1262static int ata_eh_speed_down(struct ata_device *dev, int is_io, 1277static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
1263 unsigned int err_mask) 1278 unsigned int err_mask)
1264{ 1279{
1265 if (!err_mask) 1280 unsigned int verdict;
1281 unsigned int action = 0;
1282
1283 /* don't bother if Cat-0 error */
1284 if (ata_eh_categorize_error(is_io, err_mask) == 0)
1266 return 0; 1285 return 0;
1267 1286
1268 /* record error and determine whether speed down is necessary */ 1287 /* record error and determine whether speed down is necessary */
1269 ata_ering_record(&dev->ering, is_io, err_mask); 1288 ata_ering_record(&dev->ering, is_io, err_mask);
1289 verdict = ata_eh_speed_down_verdict(dev);
1270 1290
1271 if (!ata_eh_speed_down_needed(dev)) 1291 /* turn off NCQ? */
1272 return 0; 1292 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
1293 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
1294 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
1295 dev->flags |= ATA_DFLAG_NCQ_OFF;
1296 ata_dev_printk(dev, KERN_WARNING,
1297 "NCQ disabled due to excessive errors\n");
1298 goto done;
1299 }
1300
1301 /* speed down? */
1302 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
1303 /* speed down SATA link speed if possible */
1304 if (sata_down_spd_limit(dev->ap) == 0) {
1305 action |= ATA_EH_HARDRESET;
1306 goto done;
1307 }
1273 1308
1274 /* speed down SATA link speed if possible */ 1309 /* lower transfer mode */
1275 if (sata_down_spd_limit(dev->ap) == 0) 1310 if (dev->spdn_cnt < 2) {
1276 return ATA_EH_HARDRESET; 1311 static const int dma_dnxfer_sel[] =
1312 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
1313 static const int pio_dnxfer_sel[] =
1314 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
1315 int sel;
1277 1316
1278 /* lower transfer mode */ 1317 if (dev->xfer_shift != ATA_SHIFT_PIO)
1279 if (ata_down_xfermask_limit(dev, ATA_DNXFER_ANY) == 0) 1318 sel = dma_dnxfer_sel[dev->spdn_cnt];
1280 return ATA_EH_SOFTRESET; 1319 else
1320 sel = pio_dnxfer_sel[dev->spdn_cnt];
1321
1322 dev->spdn_cnt++;
1323
1324 if (ata_down_xfermask_limit(dev, sel) == 0) {
1325 action |= ATA_EH_SOFTRESET;
1326 goto done;
1327 }
1328 }
1329 }
1330
1331 /* Fall back to PIO? Slowing down to PIO is meaningless for
1332 * SATA. Consider it only for PATA.
1333 */
1334 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
1335 (dev->ap->cbl != ATA_CBL_SATA) &&
1336 (dev->xfer_shift != ATA_SHIFT_PIO)) {
1337 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
1338 dev->spdn_cnt = 0;
1339 action |= ATA_EH_SOFTRESET;
1340 goto done;
1341 }
1342 }
1281 1343
1282 ata_dev_printk(dev, KERN_ERR,
1283 "speed down requested but no transfer mode left\n");
1284 return 0; 1344 return 0;
1345 done:
1346 /* device has been slowed down, blow error history */
1347 ata_ering_clear(&dev->ering);
1348 return action;
1285} 1349}
1286 1350
1287/** 1351/**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 60dfc5f7e882..5db50fa594ac 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -495,6 +495,7 @@ struct ata_device {
495 495
496 /* error history */ 496 /* error history */
497 struct ata_ering ering; 497 struct ata_ering ering;
498 int spdn_cnt;
498 unsigned int horkage; /* List of broken features */ 499 unsigned int horkage; /* List of broken features */
499#ifdef CONFIG_SATA_ACPI 500#ifdef CONFIG_SATA_ACPI
500 /* ACPI objects info */ 501 /* ACPI objects info */