aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2010-09-06 11:57:14 -0400
committerJeff Garzik <jgarzik@redhat.com>2010-10-21 20:21:05 -0400
commitc0c362b60e259e3480a36ef70280d545818844f0 (patch)
treed9871b719cd76f9f683278f938662e080a6ad9d7
parent97750cebb3000a9cc08f8ce8dc8c7143be7d7201 (diff)
libata: implement cross-port EH exclusion
In libata, the non-EH code paths should always take and release ap->lock explicitly when accessing hardware or shared data structures. However, once EH is active, it's assumed that the port is owned by EH and EH methods don't explicitly take ap->lock unless race from irq handler or other code paths are expected. However, libata EH didn't guarantee exclusion among EHs for ports of the same host. IOW, multiple EHs may execute in parallel on multiple ports of the same controller. In many cases, especially in SATA, the ports are completely independent of each other and this doesn't cause problems; however, there are cases where different ports share the same resource, which lead to obscure timing related bugs such as the one fixed by commit 213373cf (ata_piix: fix locking around SIDPR access). This patch implements exclusion among EHs of the same host. When EH begins, it acquires per-host EH ownership by calling ata_eh_acquire(). When EH finishes, the ownership is released by calling ata_eh_release(). EH ownership is also released whenever the EH thread goes to sleep from ata_msleep() or explicitly and reacquired after waking up. This ensures that while EH is actively accessing the hardware, it has exclusive access to it while allowing EHs to interleave and progress in parallel as they hit waiting stages, which dominate the time spent in EH. This achieves cross-port EH exclusion without pervasive and fragile changes while still allowing parallel EH for the most part. This was first reported by yuanding02@gmail.com more than three years ago in the following bugzilla. :-) https://bugzilla.kernel.org/show_bug.cgi?id=8223 Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Reported-by: yuanding02@gmail.com Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--drivers/ata/libata-core.c30
-rw-r--r--drivers/ata/libata-eh.c44
-rw-r--r--drivers/ata/libata.h2
-rw-r--r--include/linux/libata.h5
4 files changed, 80 insertions, 1 deletions
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 42d9ce29f50d..7f77c67d267c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1628,8 +1628,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1628 } 1628 }
1629 } 1629 }
1630 1630
1631 if (ap->ops->error_handler)
1632 ata_eh_release(ap);
1633
1631 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout)); 1634 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
1632 1635
1636 if (ap->ops->error_handler)
1637 ata_eh_acquire(ap);
1638
1633 ata_sff_flush_pio_task(ap); 1639 ata_sff_flush_pio_task(ap);
1634 1640
1635 if (!rc) { 1641 if (!rc) {
@@ -5570,6 +5576,7 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
5570 dev_set_drvdata(dev, host); 5576 dev_set_drvdata(dev, host);
5571 5577
5572 spin_lock_init(&host->lock); 5578 spin_lock_init(&host->lock);
5579 mutex_init(&host->eh_mutex);
5573 host->dev = dev; 5580 host->dev = dev;
5574 host->n_ports = max_ports; 5581 host->n_ports = max_ports;
5575 5582
@@ -5867,6 +5874,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
5867 unsigned long flags, struct ata_port_operations *ops) 5874 unsigned long flags, struct ata_port_operations *ops)
5868{ 5875{
5869 spin_lock_init(&host->lock); 5876 spin_lock_init(&host->lock);
5877 mutex_init(&host->eh_mutex);
5870 host->dev = dev; 5878 host->dev = dev;
5871 host->flags = flags; 5879 host->flags = flags;
5872 host->ops = ops; 5880 host->ops = ops;
@@ -6483,9 +6491,31 @@ int ata_ratelimit(void)
6483 return __ratelimit(&ratelimit); 6491 return __ratelimit(&ratelimit);
6484} 6492}
6485 6493
6494/**
6495 * ata_msleep - ATA EH owner aware msleep
6496 * @ap: ATA port to attribute the sleep to
6497 * @msecs: duration to sleep in milliseconds
6498 *
6499 * Sleeps @msecs. If the current task is owner of @ap's EH, the
6500 * ownership is released before going to sleep and reacquired
6501 * after the sleep is complete. IOW, other ports sharing the
6502 * @ap->host will be allowed to own the EH while this task is
6503 * sleeping.
6504 *
6505 * LOCKING:
6506 * Might sleep.
6507 */
6486void ata_msleep(struct ata_port *ap, unsigned int msecs) 6508void ata_msleep(struct ata_port *ap, unsigned int msecs)
6487{ 6509{
6510 bool owns_eh = ap && ap->host->eh_owner == current;
6511
6512 if (owns_eh)
6513 ata_eh_release(ap);
6514
6488 msleep(msecs); 6515 msleep(msecs);
6516
6517 if (owns_eh)
6518 ata_eh_acquire(ap);
6489} 6519}
6490 6520
6491/** 6521/**
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 6780f4d16e81..5e590504f3aa 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -463,6 +463,41 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
463} 463}
464 464
465/** 465/**
466 * ata_eh_acquire - acquire EH ownership
467 * @ap: ATA port to acquire EH ownership for
468 *
469 * Acquire EH ownership for @ap. This is the basic exclusion
470 * mechanism for ports sharing a host. Only one port hanging off
471 * the same host can claim the ownership of EH.
472 *
473 * LOCKING:
474 * EH context.
475 */
476void ata_eh_acquire(struct ata_port *ap)
477{
478 mutex_lock(&ap->host->eh_mutex);
479 WARN_ON_ONCE(ap->host->eh_owner);
480 ap->host->eh_owner = current;
481}
482
483/**
484 * ata_eh_release - release EH ownership
485 * @ap: ATA port to release EH ownership for
486 *
487 * Release EH ownership for @ap if the caller. The caller must
488 * have acquired EH ownership using ata_eh_acquire() previously.
489 *
490 * LOCKING:
491 * EH context.
492 */
493void ata_eh_release(struct ata_port *ap)
494{
495 WARN_ON_ONCE(ap->host->eh_owner != current);
496 ap->host->eh_owner = NULL;
497 mutex_unlock(&ap->host->eh_mutex);
498}
499
500/**
466 * ata_scsi_timed_out - SCSI layer time out callback 501 * ata_scsi_timed_out - SCSI layer time out callback
467 * @cmd: timed out SCSI command 502 * @cmd: timed out SCSI command
468 * 503 *
@@ -639,11 +674,13 @@ void ata_scsi_error(struct Scsi_Host *host)
639 /* If we timed raced normal completion and there is nothing to 674 /* If we timed raced normal completion and there is nothing to
640 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 675 recover nr_timedout == 0 why exactly are we doing error recovery ? */
641 676
642 repeat:
643 /* invoke error handler */ 677 /* invoke error handler */
644 if (ap->ops->error_handler) { 678 if (ap->ops->error_handler) {
645 struct ata_link *link; 679 struct ata_link *link;
646 680
681 /* acquire EH ownership */
682 ata_eh_acquire(ap);
683 repeat:
647 /* kill fast drain timer */ 684 /* kill fast drain timer */
648 del_timer_sync(&ap->fastdrain_timer); 685 del_timer_sync(&ap->fastdrain_timer);
649 686
@@ -718,6 +755,7 @@ void ata_scsi_error(struct Scsi_Host *host)
718 host->host_eh_scheduled = 0; 755 host->host_eh_scheduled = 0;
719 756
720 spin_unlock_irqrestore(ap->lock, flags); 757 spin_unlock_irqrestore(ap->lock, flags);
758 ata_eh_release(ap);
721 } else { 759 } else {
722 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 760 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
723 ap->ops->eng_timeout(ap); 761 ap->ops->eng_timeout(ap);
@@ -2818,8 +2856,10 @@ int ata_eh_reset(struct ata_link *link, int classify,
2818 "reset failed (errno=%d), retrying in %u secs\n", 2856 "reset failed (errno=%d), retrying in %u secs\n",
2819 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2857 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
2820 2858
2859 ata_eh_release(ap);
2821 while (delta) 2860 while (delta)
2822 delta = schedule_timeout_uninterruptible(delta); 2861 delta = schedule_timeout_uninterruptible(delta);
2862 ata_eh_acquire(ap);
2823 } 2863 }
2824 2864
2825 if (try == max_tries - 1) { 2865 if (try == max_tries - 1) {
@@ -3635,8 +3675,10 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3635 if (time_before_eq(deadline, now)) 3675 if (time_before_eq(deadline, now))
3636 break; 3676 break;
3637 3677
3678 ata_eh_release(ap);
3638 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3679 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3639 deadline - now); 3680 deadline - now);
3681 ata_eh_acquire(ap);
3640 } while (deadline); 3682 } while (deadline);
3641 ata_for_each_link(link, ap, EDGE) { 3683 ata_for_each_link(link, ap, EDGE) {
3642 ata_for_each_dev(dev, link, ALL) { 3684 ata_for_each_dev(dev, link, ALL) {
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 7c070a4b1c08..a9be110dbf51 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -145,6 +145,8 @@ extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
145/* libata-eh.c */ 145/* libata-eh.c */
146extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); 146extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
147extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); 147extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
148extern void ata_eh_acquire(struct ata_port *ap);
149extern void ata_eh_release(struct ata_port *ap);
148extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 150extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
149extern void ata_scsi_error(struct Scsi_Host *host); 151extern void ata_scsi_error(struct Scsi_Host *host);
150extern void ata_port_wait_eh(struct ata_port *ap); 152extern void ata_port_wait_eh(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2fbd22bd68ce..52112d39d71e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -37,6 +37,7 @@
37#include <scsi/scsi_host.h> 37#include <scsi/scsi_host.h>
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/cdrom.h> 39#include <linux/cdrom.h>
40#include <linux/sched.h>
40 41
41/* 42/*
42 * Define if arch has non-standard setup. This is a _PCI_ standard 43 * Define if arch has non-standard setup. This is a _PCI_ standard
@@ -535,6 +536,10 @@ struct ata_host {
535 void *private_data; 536 void *private_data;
536 struct ata_port_operations *ops; 537 struct ata_port_operations *ops;
537 unsigned long flags; 538 unsigned long flags;
539
540 struct mutex eh_mutex;
541 struct task_struct *eh_owner;
542
538#ifdef CONFIG_ATA_ACPI 543#ifdef CONFIG_ATA_ACPI
539 acpi_handle acpi_handle; 544 acpi_handle acpi_handle;
540#endif 545#endif