diff options
author | Steffen Maier <maier@linux.ibm.com> | 2019-10-01 06:49:49 -0400 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2019-10-03 21:45:53 -0400 |
commit | 2190168aaea42c31bff7b9a967e7b045f07df095 (patch) | |
tree | 967cabf75189175be824e5fd7f84989eba2612b5 | |
parent | 8f8fed0cdbbd6cdbf28d9ebe662f45765d2f7d39 (diff) |
scsi: zfcp: fix reaction on bit error threshold notification
On excessive bit errors for the FCP channel ingress fibre path, the channel
notifies us. Previously, we only emitted a kernel message and a trace
record. Since performance can become suboptimal with I/O timeouts due to
bit errors, we now stop using an FCP device by default on channel
notification so multipath on top can timely failover to other paths. A new
module parameter zfcp.ber_stop can be used to get zfcp old behavior.
User explanation of new kernel message:
* Description:
* The FCP channel reported that its bit error threshold has been exceeded.
* These errors might result from a problem with the physical components
* of the local fibre link into the FCP channel.
* The problem might be damage or malfunction of the cable or
* cable connection between the FCP channel and
* the adjacent fabric switch port or the point-to-point peer.
* Find details about the errors in the HBA trace for the FCP device.
* The zfcp device driver closed down the FCP device
* to limit the performance impact from possible I/O command timeouts.
* User action:
* Check for problems on the local fibre link, ensure that fibre optics are
* clean and functional, and all cables are properly plugged.
* After the repair action, you can manually recover the FCP device by
* writing "0" into its "failed" sysfs attribute.
* If recovery through sysfs is not possible, set the CHPID of the device
* offline and back online on the service element.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: <stable@vger.kernel.org> #2.6.30+
Link: https://lore.kernel.org/r/20191001104949.42810-1-maier@linux.ibm.com
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Steffen Maier <maier@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r-- | drivers/s390/scsi/zfcp_fsf.c | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 296bbc3c4606..cf63916814cc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c | |||
@@ -27,6 +27,11 @@ | |||
27 | 27 | ||
28 | struct kmem_cache *zfcp_fsf_qtcb_cache; | 28 | struct kmem_cache *zfcp_fsf_qtcb_cache; |
29 | 29 | ||
30 | static bool ber_stop = true; | ||
31 | module_param(ber_stop, bool, 0600); | ||
32 | MODULE_PARM_DESC(ber_stop, | ||
33 | "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); | ||
34 | |||
30 | static void zfcp_fsf_request_timeout_handler(struct timer_list *t) | 35 | static void zfcp_fsf_request_timeout_handler(struct timer_list *t) |
31 | { | 36 | { |
32 | struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); | 37 | struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); |
@@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) | |||
236 | case FSF_STATUS_READ_SENSE_DATA_AVAIL: | 241 | case FSF_STATUS_READ_SENSE_DATA_AVAIL: |
237 | break; | 242 | break; |
238 | case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: | 243 | case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: |
239 | dev_warn(&adapter->ccw_device->dev, | ||
240 | "The error threshold for checksum statistics " | ||
241 | "has been exceeded\n"); | ||
242 | zfcp_dbf_hba_bit_err("fssrh_3", req); | 244 | zfcp_dbf_hba_bit_err("fssrh_3", req); |
245 | if (ber_stop) { | ||
246 | dev_warn(&adapter->ccw_device->dev, | ||
247 | "All paths over this FCP device are disused because of excessive bit errors\n"); | ||
248 | zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); | ||
249 | } else { | ||
250 | dev_warn(&adapter->ccw_device->dev, | ||
251 | "The error threshold for checksum statistics has been exceeded\n"); | ||
252 | } | ||
243 | break; | 253 | break; |
244 | case FSF_STATUS_READ_LINK_DOWN: | 254 | case FSF_STATUS_READ_LINK_DOWN: |
245 | zfcp_fsf_status_read_link_down(req); | 255 | zfcp_fsf_status_read_link_down(req); |