aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2013-10-26 08:33:30 -0400
committerRoland Dreier <roland@purestorage.com>2013-11-08 17:43:15 -0500
commit29c17324803c8a3bb5b2b69309e43571164cc4de (patch)
tree93279b89d0b16228eccd2dc5f1d2b29c17c4e3af
parent9dd69a600a680fab1c9235a644c886d8d6a2da2a (diff)
scsi_transport_srp: Add transport layer error handling
Add the necessary functions in the SRP transport module to allow an SRP initiator driver to implement transport layer error handling similar to the functionality already provided by the FC transport layer. This includes: - Support for implementing fast_io_fail_tmo, the time that should elapse after having detected a transport layer problem and before failing I/O. - Support for implementing dev_loss_tmo, the time that should elapse after having detected a transport layer problem and before removing a remote port. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: David Dillow <dillowda@ornl.gov> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--Documentation/ABI/stable/sysfs-transport-srp31
-rw-r--r--drivers/scsi/scsi_transport_srp.c430
-rw-r--r--include/scsi/scsi_transport_srp.h74
3 files changed, 532 insertions, 3 deletions
diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp
index b36fb0dc13c8..8b6acc775b71 100644
--- a/Documentation/ABI/stable/sysfs-transport-srp
+++ b/Documentation/ABI/stable/sysfs-transport-srp
@@ -5,6 +5,24 @@ Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
5Description: Instructs an SRP initiator to disconnect from a target and to 5Description: Instructs an SRP initiator to disconnect from a target and to
6 remove all LUNs imported from that target. 6 remove all LUNs imported from that target.
7 7
8What: /sys/class/srp_remote_ports/port-<h>:<n>/dev_loss_tmo
9Date: February 1, 2014
10KernelVersion: 3.13
11Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
12Description: Number of seconds the SCSI layer will wait after a transport
13 layer error has been observed before removing a target port.
14 Zero means immediate removal. Setting this attribute to "off"
15 will disable the dev_loss timer.
16
17What: /sys/class/srp_remote_ports/port-<h>:<n>/fast_io_fail_tmo
18Date: February 1, 2014
19KernelVersion: 3.13
20Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
21Description: Number of seconds the SCSI layer will wait after a transport
22 layer error has been observed before failing I/O. Zero means
23 failing I/O immediately. Setting this attribute to "off" will
24 disable the fast_io_fail timer.
25
8What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id 26What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
9Date: June 27, 2007 27Date: June 27, 2007
10KernelVersion: 2.6.24 28KernelVersion: 2.6.24
@@ -17,3 +35,16 @@ Date: June 27, 2007
17KernelVersion: 2.6.24 35KernelVersion: 2.6.24
18Contact: linux-scsi@vger.kernel.org 36Contact: linux-scsi@vger.kernel.org
19Description: Role of the remote port. Either "SRP Initiator" or "SRP Target". 37Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".
38
39What: /sys/class/srp_remote_ports/port-<h>:<n>/state
40Date: February 1, 2014
41KernelVersion: 3.13
42Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
43Description: State of the transport layer used for communication with the
44 remote port. "running" if the transport layer is operational;
45 "blocked" if a transport layer error has been encountered but
46 the fast_io_fail_tmo timer has not yet fired; "fail-fast"
47 after the fast_io_fail_tmo timer has fired and before the
48 "dev_loss_tmo" timer has fired; "lost" after the
49 "dev_loss_tmo" timer has fired and before the port is finally
50 removed.
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index f7ba94aa52cb..2696e26b3423 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -24,12 +24,15 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/delay.h>
27 28
28#include <scsi/scsi.h> 29#include <scsi/scsi.h>
30#include <scsi/scsi_cmnd.h>
29#include <scsi/scsi_device.h> 31#include <scsi/scsi_device.h>
30#include <scsi/scsi_host.h> 32#include <scsi/scsi_host.h>
31#include <scsi/scsi_transport.h> 33#include <scsi/scsi_transport.h>
32#include <scsi/scsi_transport_srp.h> 34#include <scsi/scsi_transport_srp.h>
35#include "scsi_priv.h"
33#include "scsi_transport_srp_internal.h" 36#include "scsi_transport_srp_internal.h"
34 37
35struct srp_host_attrs { 38struct srp_host_attrs {
@@ -38,7 +41,7 @@ struct srp_host_attrs {
38#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) 41#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data)
39 42
40#define SRP_HOST_ATTRS 0 43#define SRP_HOST_ATTRS 0
41#define SRP_RPORT_ATTRS 3 44#define SRP_RPORT_ATTRS 6
42 45
43struct srp_internal { 46struct srp_internal {
44 struct scsi_transport_template t; 47 struct scsi_transport_template t;
@@ -54,6 +57,34 @@ struct srp_internal {
54 57
55#define dev_to_rport(d) container_of(d, struct srp_rport, dev) 58#define dev_to_rport(d) container_of(d, struct srp_rport, dev)
56#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent) 59#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent)
60static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
61{
62 return dev_to_shost(r->dev.parent);
63}
64
65/**
66 * srp_tmo_valid() - check timeout combination validity
67 *
68 * The combination of the timeout parameters must be such that SCSI commands
69 * are finished in a reasonable time. Hence do not allow the fast I/O fail
70 * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
71 * parameters must be such that multipath can detect failed paths timely.
72 * Hence do not allow both parameters to be disabled simultaneously.
73 */
74int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo)
75{
76 if (fast_io_fail_tmo < 0 && dev_loss_tmo < 0)
77 return -EINVAL;
78 if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
79 return -EINVAL;
80 if (dev_loss_tmo >= LONG_MAX / HZ)
81 return -EINVAL;
82 if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
83 fast_io_fail_tmo >= dev_loss_tmo)
84 return -EINVAL;
85 return 0;
86}
87EXPORT_SYMBOL_GPL(srp_tmo_valid);
57 88
58static int srp_host_setup(struct transport_container *tc, struct device *dev, 89static int srp_host_setup(struct transport_container *tc, struct device *dev,
59 struct device *cdev) 90 struct device *cdev)
@@ -134,10 +165,383 @@ static ssize_t store_srp_rport_delete(struct device *dev,
134 165
135static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); 166static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
136 167
168static ssize_t show_srp_rport_state(struct device *dev,
169 struct device_attribute *attr,
170 char *buf)
171{
172 static const char *const state_name[] = {
173 [SRP_RPORT_RUNNING] = "running",
174 [SRP_RPORT_BLOCKED] = "blocked",
175 [SRP_RPORT_FAIL_FAST] = "fail-fast",
176 [SRP_RPORT_LOST] = "lost",
177 };
178 struct srp_rport *rport = transport_class_to_srp_rport(dev);
179 enum srp_rport_state state = rport->state;
180
181 return sprintf(buf, "%s\n",
182 (unsigned)state < ARRAY_SIZE(state_name) ?
183 state_name[state] : "???");
184}
185
186static DEVICE_ATTR(state, S_IRUGO, show_srp_rport_state, NULL);
187
188static ssize_t srp_show_tmo(char *buf, int tmo)
189{
190 return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n");
191}
192
193static int srp_parse_tmo(int *tmo, const char *buf)
194{
195 int res = 0;
196
197 if (strncmp(buf, "off", 3) != 0)
198 res = kstrtoint(buf, 0, tmo);
199 else
200 *tmo = -1;
201
202 return res;
203}
204
205static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev,
206 struct device_attribute *attr,
207 char *buf)
208{
209 struct srp_rport *rport = transport_class_to_srp_rport(dev);
210
211 return srp_show_tmo(buf, rport->fast_io_fail_tmo);
212}
213
214static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev,
215 struct device_attribute *attr,
216 const char *buf, size_t count)
217{
218 struct srp_rport *rport = transport_class_to_srp_rport(dev);
219 int res;
220 int fast_io_fail_tmo;
221
222 res = srp_parse_tmo(&fast_io_fail_tmo, buf);
223 if (res)
224 goto out;
225 res = srp_tmo_valid(fast_io_fail_tmo, rport->dev_loss_tmo);
226 if (res)
227 goto out;
228 rport->fast_io_fail_tmo = fast_io_fail_tmo;
229 res = count;
230
231out:
232 return res;
233}
234
235static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
236 show_srp_rport_fast_io_fail_tmo,
237 store_srp_rport_fast_io_fail_tmo);
238
239static ssize_t show_srp_rport_dev_loss_tmo(struct device *dev,
240 struct device_attribute *attr,
241 char *buf)
242{
243 struct srp_rport *rport = transport_class_to_srp_rport(dev);
244
245 return srp_show_tmo(buf, rport->dev_loss_tmo);
246}
247
248static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev,
249 struct device_attribute *attr,
250 const char *buf, size_t count)
251{
252 struct srp_rport *rport = transport_class_to_srp_rport(dev);
253 int res;
254 int dev_loss_tmo;
255
256 res = srp_parse_tmo(&dev_loss_tmo, buf);
257 if (res)
258 goto out;
259 res = srp_tmo_valid(rport->fast_io_fail_tmo, dev_loss_tmo);
260 if (res)
261 goto out;
262 rport->dev_loss_tmo = dev_loss_tmo;
263 res = count;
264
265out:
266 return res;
267}
268
269static DEVICE_ATTR(dev_loss_tmo, S_IRUGO | S_IWUSR,
270 show_srp_rport_dev_loss_tmo,
271 store_srp_rport_dev_loss_tmo);
272
273static int srp_rport_set_state(struct srp_rport *rport,
274 enum srp_rport_state new_state)
275{
276 enum srp_rport_state old_state = rport->state;
277
278 lockdep_assert_held(&rport->mutex);
279
280 switch (new_state) {
281 case SRP_RPORT_RUNNING:
282 switch (old_state) {
283 case SRP_RPORT_LOST:
284 goto invalid;
285 default:
286 break;
287 }
288 break;
289 case SRP_RPORT_BLOCKED:
290 switch (old_state) {
291 case SRP_RPORT_RUNNING:
292 break;
293 default:
294 goto invalid;
295 }
296 break;
297 case SRP_RPORT_FAIL_FAST:
298 switch (old_state) {
299 case SRP_RPORT_LOST:
300 goto invalid;
301 default:
302 break;
303 }
304 break;
305 case SRP_RPORT_LOST:
306 break;
307 }
308 rport->state = new_state;
309 return 0;
310
311invalid:
312 return -EINVAL;
313}
314
315static void __rport_fail_io_fast(struct srp_rport *rport)
316{
317 struct Scsi_Host *shost = rport_to_shost(rport);
318 struct srp_internal *i;
319
320 lockdep_assert_held(&rport->mutex);
321
322 if (srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST))
323 return;
324 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
325
326 /* Involve the LLD if possible to terminate all I/O on the rport. */
327 i = to_srp_internal(shost->transportt);
328 if (i->f->terminate_rport_io)
329 i->f->terminate_rport_io(rport);
330}
331
332/**
333 * rport_fast_io_fail_timedout() - fast I/O failure timeout handler
334 */
335static void rport_fast_io_fail_timedout(struct work_struct *work)
336{
337 struct srp_rport *rport = container_of(to_delayed_work(work),
338 struct srp_rport, fast_io_fail_work);
339 struct Scsi_Host *shost = rport_to_shost(rport);
340
341 pr_info("fast_io_fail_tmo expired for SRP %s / %s.\n",
342 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
343
344 mutex_lock(&rport->mutex);
345 if (rport->state == SRP_RPORT_BLOCKED)
346 __rport_fail_io_fast(rport);
347 mutex_unlock(&rport->mutex);
348}
349
350/**
351 * rport_dev_loss_timedout() - device loss timeout handler
352 */
353static void rport_dev_loss_timedout(struct work_struct *work)
354{
355 struct srp_rport *rport = container_of(to_delayed_work(work),
356 struct srp_rport, dev_loss_work);
357 struct Scsi_Host *shost = rport_to_shost(rport);
358 struct srp_internal *i = to_srp_internal(shost->transportt);
359
360 pr_info("dev_loss_tmo expired for SRP %s / %s.\n",
361 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
362
363 mutex_lock(&rport->mutex);
364 WARN_ON(srp_rport_set_state(rport, SRP_RPORT_LOST) != 0);
365 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
366 mutex_unlock(&rport->mutex);
367
368 i->f->rport_delete(rport);
369}
370
371static void __srp_start_tl_fail_timers(struct srp_rport *rport)
372{
373 struct Scsi_Host *shost = rport_to_shost(rport);
374 int fast_io_fail_tmo, dev_loss_tmo;
375
376 lockdep_assert_held(&rport->mutex);
377
378 if (!rport->deleted) {
379 fast_io_fail_tmo = rport->fast_io_fail_tmo;
380 dev_loss_tmo = rport->dev_loss_tmo;
381 pr_debug("%s current state: %d\n",
382 dev_name(&shost->shost_gendev), rport->state);
383
384 if (fast_io_fail_tmo >= 0 &&
385 srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
386 pr_debug("%s new state: %d\n",
387 dev_name(&shost->shost_gendev),
388 rport->state);
389 scsi_target_block(&shost->shost_gendev);
390 queue_delayed_work(system_long_wq,
391 &rport->fast_io_fail_work,
392 1UL * fast_io_fail_tmo * HZ);
393 }
394 if (dev_loss_tmo >= 0)
395 queue_delayed_work(system_long_wq,
396 &rport->dev_loss_work,
397 1UL * dev_loss_tmo * HZ);
398 } else {
399 pr_debug("%s has already been deleted\n",
400 dev_name(&shost->shost_gendev));
401 srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST);
402 scsi_target_unblock(&shost->shost_gendev,
403 SDEV_TRANSPORT_OFFLINE);
404 }
405}
406
407/**
408 * srp_start_tl_fail_timers() - start the transport layer failure timers
409 *
410 * Start the transport layer fast I/O failure and device loss timers. Do not
411 * modify a timer that was already started.
412 */
413void srp_start_tl_fail_timers(struct srp_rport *rport)
414{
415 mutex_lock(&rport->mutex);
416 __srp_start_tl_fail_timers(rport);
417 mutex_unlock(&rport->mutex);
418}
419EXPORT_SYMBOL(srp_start_tl_fail_timers);
420
421/**
422 * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
423 */
424static int scsi_request_fn_active(struct Scsi_Host *shost)
425{
426 struct scsi_device *sdev;
427 struct request_queue *q;
428 int request_fn_active = 0;
429
430 shost_for_each_device(sdev, shost) {
431 q = sdev->request_queue;
432
433 spin_lock_irq(q->queue_lock);
434 request_fn_active += q->request_fn_active;
435 spin_unlock_irq(q->queue_lock);
436 }
437
438 return request_fn_active;
439}
440
441/**
442 * srp_reconnect_rport() - reconnect to an SRP target port
443 *
444 * Blocks SCSI command queueing before invoking reconnect() such that
445 * queuecommand() won't be invoked concurrently with reconnect() from outside
446 * the SCSI EH. This is important since a reconnect() implementation may
447 * reallocate resources needed by queuecommand().
448 *
449 * Notes:
450 * - This function neither waits until outstanding requests have finished nor
451 * tries to abort these. It is the responsibility of the reconnect()
452 * function to finish outstanding commands before reconnecting to the target
453 * port.
454 * - It is the responsibility of the caller to ensure that the resources
455 * reallocated by the reconnect() function won't be used while this function
456 * is in progress. One possible strategy is to invoke this function from
457 * the context of the SCSI EH thread only. Another possible strategy is to
458 * lock the rport mutex inside each SCSI LLD callback that can be invoked by
459 * the SCSI EH (the scsi_host_template.eh_*() functions and also the
460 * scsi_host_template.queuecommand() function).
461 */
462int srp_reconnect_rport(struct srp_rport *rport)
463{
464 struct Scsi_Host *shost = rport_to_shost(rport);
465 struct srp_internal *i = to_srp_internal(shost->transportt);
466 struct scsi_device *sdev;
467 int res;
468
469 pr_debug("SCSI host %s\n", dev_name(&shost->shost_gendev));
470
471 res = mutex_lock_interruptible(&rport->mutex);
472 if (res)
473 goto out;
474 scsi_target_block(&shost->shost_gendev);
475 while (scsi_request_fn_active(shost))
476 msleep(20);
477 res = i->f->reconnect(rport);
478 pr_debug("%s (state %d): transport.reconnect() returned %d\n",
479 dev_name(&shost->shost_gendev), rport->state, res);
480 if (res == 0) {
481 cancel_delayed_work(&rport->fast_io_fail_work);
482 cancel_delayed_work(&rport->dev_loss_work);
483
484 srp_rport_set_state(rport, SRP_RPORT_RUNNING);
485 scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING);
486 /*
487 * If the SCSI error handler has offlined one or more devices,
488 * invoking scsi_target_unblock() won't change the state of
489 * these devices into running so do that explicitly.
490 */
491 spin_lock_irq(shost->host_lock);
492 __shost_for_each_device(sdev, shost)
493 if (sdev->sdev_state == SDEV_OFFLINE)
494 sdev->sdev_state = SDEV_RUNNING;
495 spin_unlock_irq(shost->host_lock);
496 } else if (rport->state == SRP_RPORT_RUNNING) {
497 /*
498 * srp_reconnect_rport() was invoked with fast_io_fail
499 * off. Mark the port as failed and start the TL failure
500 * timers if these had not yet been started.
501 */
502 __rport_fail_io_fast(rport);
503 scsi_target_unblock(&shost->shost_gendev,
504 SDEV_TRANSPORT_OFFLINE);
505 __srp_start_tl_fail_timers(rport);
506 } else if (rport->state != SRP_RPORT_BLOCKED) {
507 scsi_target_unblock(&shost->shost_gendev,
508 SDEV_TRANSPORT_OFFLINE);
509 }
510 mutex_unlock(&rport->mutex);
511
512out:
513 return res;
514}
515EXPORT_SYMBOL(srp_reconnect_rport);
516
517/**
518 * srp_timed_out() - SRP transport intercept of the SCSI timeout EH
519 *
520 * If a timeout occurs while an rport is in the blocked state, ask the SCSI
521 * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
522 * handle the timeout (BLK_EH_NOT_HANDLED).
523 *
524 * Note: This function is called from soft-IRQ context and with the request
525 * queue lock held.
526 */
527static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
528{
529 struct scsi_device *sdev = scmd->device;
530 struct Scsi_Host *shost = sdev->host;
531 struct srp_internal *i = to_srp_internal(shost->transportt);
532
533 pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev));
534 return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
535 BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
536}
537
137static void srp_rport_release(struct device *dev) 538static void srp_rport_release(struct device *dev)
138{ 539{
139 struct srp_rport *rport = dev_to_rport(dev); 540 struct srp_rport *rport = dev_to_rport(dev);
140 541
542 cancel_delayed_work_sync(&rport->fast_io_fail_work);
543 cancel_delayed_work_sync(&rport->dev_loss_work);
544
141 put_device(dev->parent); 545 put_device(dev->parent);
142 kfree(rport); 546 kfree(rport);
143} 547}
@@ -214,12 +618,15 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
214{ 618{
215 struct srp_rport *rport; 619 struct srp_rport *rport;
216 struct device *parent = &shost->shost_gendev; 620 struct device *parent = &shost->shost_gendev;
621 struct srp_internal *i = to_srp_internal(shost->transportt);
217 int id, ret; 622 int id, ret;
218 623
219 rport = kzalloc(sizeof(*rport), GFP_KERNEL); 624 rport = kzalloc(sizeof(*rport), GFP_KERNEL);
220 if (!rport) 625 if (!rport)
221 return ERR_PTR(-ENOMEM); 626 return ERR_PTR(-ENOMEM);
222 627
628 mutex_init(&rport->mutex);
629
223 device_initialize(&rport->dev); 630 device_initialize(&rport->dev);
224 631
225 rport->dev.parent = get_device(parent); 632 rport->dev.parent = get_device(parent);
@@ -228,6 +635,13 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
228 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); 635 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id));
229 rport->roles = ids->roles; 636 rport->roles = ids->roles;
230 637
638 rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ?
639 *i->f->fast_io_fail_tmo : 15;
640 rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60;
641 INIT_DELAYED_WORK(&rport->fast_io_fail_work,
642 rport_fast_io_fail_timedout);
643 INIT_DELAYED_WORK(&rport->dev_loss_work, rport_dev_loss_timedout);
644
231 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id); 645 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id);
232 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id); 646 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id);
233 647
@@ -277,6 +691,13 @@ void srp_rport_del(struct srp_rport *rport)
277 transport_remove_device(dev); 691 transport_remove_device(dev);
278 device_del(dev); 692 device_del(dev);
279 transport_destroy_device(dev); 693 transport_destroy_device(dev);
694
695 mutex_lock(&rport->mutex);
696 if (rport->state == SRP_RPORT_BLOCKED)
697 __rport_fail_io_fast(rport);
698 rport->deleted = true;
699 mutex_unlock(&rport->mutex);
700
280 put_device(dev); 701 put_device(dev);
281} 702}
282EXPORT_SYMBOL_GPL(srp_rport_del); 703EXPORT_SYMBOL_GPL(srp_rport_del);
@@ -328,6 +749,8 @@ srp_attach_transport(struct srp_function_template *ft)
328 if (!i) 749 if (!i)
329 return NULL; 750 return NULL;
330 751
752 i->t.eh_timed_out = srp_timed_out;
753
331 i->t.tsk_mgmt_response = srp_tsk_mgmt_response; 754 i->t.tsk_mgmt_response = srp_tsk_mgmt_response;
332 i->t.it_nexus_response = srp_it_nexus_response; 755 i->t.it_nexus_response = srp_it_nexus_response;
333 756
@@ -345,6 +768,11 @@ srp_attach_transport(struct srp_function_template *ft)
345 count = 0; 768 count = 0;
346 i->rport_attrs[count++] = &dev_attr_port_id; 769 i->rport_attrs[count++] = &dev_attr_port_id;
347 i->rport_attrs[count++] = &dev_attr_roles; 770 i->rport_attrs[count++] = &dev_attr_roles;
771 if (ft->has_rport_state) {
772 i->rport_attrs[count++] = &dev_attr_state;
773 i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo;
774 i->rport_attrs[count++] = &dev_attr_dev_loss_tmo;
775 }
348 if (ft->rport_delete) 776 if (ft->rport_delete)
349 i->rport_attrs[count++] = &dev_attr_delete; 777 i->rport_attrs[count++] = &dev_attr_delete;
350 i->rport_attrs[count++] = NULL; 778 i->rport_attrs[count++] = NULL;
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index 5a2d2d1081c1..ee7001677f64 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -13,6 +13,26 @@ struct srp_rport_identifiers {
13 u8 roles; 13 u8 roles;
14}; 14};
15 15
16/**
17 * enum srp_rport_state - SRP transport layer state
18 * @SRP_RPORT_RUNNING: Transport layer operational.
19 * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer
20 * is running and I/O has been blocked.
21 * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
22 * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed.
23 */
24enum srp_rport_state {
25 SRP_RPORT_RUNNING,
26 SRP_RPORT_BLOCKED,
27 SRP_RPORT_FAIL_FAST,
28 SRP_RPORT_LOST,
29};
30
31/**
32 * struct srp_rport
33 * @lld_data: LLD private data.
34 * @mutex: Protects against concurrent rport fast_io_fail / dev_loss_tmo.
35 */
16struct srp_rport { 36struct srp_rport {
17 /* for initiator and target drivers */ 37 /* for initiator and target drivers */
18 38
@@ -23,11 +43,38 @@ struct srp_rport {
23 43
24 /* for initiator drivers */ 44 /* for initiator drivers */
25 45
26 void *lld_data; /* LLD private data */ 46 void *lld_data;
47
48 struct mutex mutex;
49 enum srp_rport_state state;
50 bool deleted;
51 int fast_io_fail_tmo;
52 int dev_loss_tmo;
53 struct delayed_work fast_io_fail_work;
54 struct delayed_work dev_loss_work;
27}; 55};
28 56
57/**
58 * struct srp_function_template
59 * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
60 * dev_loss_tmo sysfs attribute for an rport.
61 * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
62 * timer if the device on which it has been queued is blocked.
63 * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value.
64 * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value.
65 * @reconnect: Callback function for reconnecting to the target. See also
66 * srp_reconnect_rport().
67 * @terminate_rport_io: Callback function for terminating all outstanding I/O
68 * requests for an rport.
69 */
29struct srp_function_template { 70struct srp_function_template {
30 /* for initiator drivers */ 71 /* for initiator drivers */
72 bool has_rport_state;
73 bool reset_timer_if_blocked;
74 int *fast_io_fail_tmo;
75 int *dev_loss_tmo;
76 int (*reconnect)(struct srp_rport *rport);
77 void (*terminate_rport_io)(struct srp_rport *rport);
31 void (*rport_delete)(struct srp_rport *rport); 78 void (*rport_delete)(struct srp_rport *rport);
32 /* for target drivers */ 79 /* for target drivers */
33 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); 80 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
@@ -43,7 +90,30 @@ extern void srp_rport_put(struct srp_rport *rport);
43extern struct srp_rport *srp_rport_add(struct Scsi_Host *, 90extern struct srp_rport *srp_rport_add(struct Scsi_Host *,
44 struct srp_rport_identifiers *); 91 struct srp_rport_identifiers *);
45extern void srp_rport_del(struct srp_rport *); 92extern void srp_rport_del(struct srp_rport *);
46 93extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo);
94extern int srp_reconnect_rport(struct srp_rport *rport);
95extern void srp_start_tl_fail_timers(struct srp_rport *rport);
47extern void srp_remove_host(struct Scsi_Host *); 96extern void srp_remove_host(struct Scsi_Host *);
48 97
98/**
99 * srp_chkready() - evaluate the transport layer state before I/O
100 *
101 * Returns a SCSI result code that can be returned by the LLD queuecommand()
102 * implementation. The role of this function is similar to that of
103 * fc_remote_port_chkready().
104 */
105static inline int srp_chkready(struct srp_rport *rport)
106{
107 switch (rport->state) {
108 case SRP_RPORT_RUNNING:
109 case SRP_RPORT_BLOCKED:
110 default:
111 return 0;
112 case SRP_RPORT_FAIL_FAST:
113 return DID_TRANSPORT_FAILFAST << 16;
114 case SRP_RPORT_LOST:
115 return DID_NO_CONNECT << 16;
116 }
117}
118
49#endif 119#endif