diff options
author | Mark Lord <liml@rtr.ca> | 2009-03-10 22:01:17 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@redhat.com> | 2009-03-24 22:35:37 -0400 |
commit | 2b748a0a344847fe6b924407bbe153e1878c9f09 (patch) | |
tree | 0a84cdbc70d0b9a7e974299bbaa0cba4b9610a58 /drivers/ata | |
parent | 40f21b1124a9552bc093469280eb8239dc5f73d7 (diff) |
sata_mv: implement IRQ coalescing (v2)
Add IRQ coalescing to sata_mv (off by default).
This feature can reduce total interrupt overhead for RAID setups
in some situations, by deferring the interrupt signal until one or both of:
a) a specified io_count (completed SATA commands) is achieved, or
b) a specified time interval elapses after an IO completion.
For now, module parameters are used to set the irq_coalescing_io_count
and irq_coalescing_usecs (timeout) globally. These may eventually
be supplemented with sysfs attributes, so that thresholds can be set
on-the-fly and on a per-chip (or even per-host_controller) basis.
Signed-off-by: Mark Lord <mlord@pobox.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Diffstat (limited to 'drivers/ata')
-rw-r--r-- | drivers/ata/sata_mv.c | 143 |
1 files changed, 135 insertions, 8 deletions
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 206220ec5820..ef385451ffd5 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c | |||
@@ -34,10 +34,7 @@ | |||
34 | * | 34 | * |
35 | * --> Develop a low-power-consumption strategy, and implement it. | 35 | * --> Develop a low-power-consumption strategy, and implement it. |
36 | * | 36 | * |
37 | * --> [Experiment, low priority] Investigate interrupt coalescing. | 37 | * --> Add sysfs attributes for per-chip / per-HC IRQ coalescing thresholds. |
38 | * Quite often, especially with PCI Message Signalled Interrupts (MSI), | ||
39 | * the overhead reduced by interrupt mitigation is quite often not | ||
40 | * worth the latency cost. | ||
41 | * | 38 | * |
42 | * --> [Experiment, Marvell value added] Is it possible to use target | 39 | * --> [Experiment, Marvell value added] Is it possible to use target |
43 | * mode to cross-connect two Linux boxes with Marvell cards? If so, | 40 | * mode to cross-connect two Linux boxes with Marvell cards? If so, |
@@ -67,7 +64,7 @@ | |||
67 | #include <linux/libata.h> | 64 | #include <linux/libata.h> |
68 | 65 | ||
69 | #define DRV_NAME "sata_mv" | 66 | #define DRV_NAME "sata_mv" |
70 | #define DRV_VERSION "1.26" | 67 | #define DRV_VERSION "1.27" |
71 | 68 | ||
72 | /* | 69 | /* |
73 | * module options | 70 | * module options |
@@ -79,6 +76,16 @@ module_param(msi, int, S_IRUGO); | |||
79 | MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)"); | 76 | MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)"); |
80 | #endif | 77 | #endif |
81 | 78 | ||
79 | static int irq_coalescing_io_count; | ||
80 | module_param(irq_coalescing_io_count, int, S_IRUGO); | ||
81 | MODULE_PARM_DESC(irq_coalescing_io_count, | ||
82 | "IRQ coalescing I/O count threshold (0..255)"); | ||
83 | |||
84 | static int irq_coalescing_usecs; | ||
85 | module_param(irq_coalescing_usecs, int, S_IRUGO); | ||
86 | MODULE_PARM_DESC(irq_coalescing_usecs, | ||
87 | "IRQ coalescing time threshold in usecs"); | ||
88 | |||
82 | enum { | 89 | enum { |
83 | /* BAR's are enumerated in terms of pci_resource_start() terms */ | 90 | /* BAR's are enumerated in terms of pci_resource_start() terms */ |
84 | MV_PRIMARY_BAR = 0, /* offset 0x10: memory space */ | 91 | MV_PRIMARY_BAR = 0, /* offset 0x10: memory space */ |
@@ -88,8 +95,33 @@ enum { | |||
88 | MV_MAJOR_REG_AREA_SZ = 0x10000, /* 64KB */ | 95 | MV_MAJOR_REG_AREA_SZ = 0x10000, /* 64KB */ |
89 | MV_MINOR_REG_AREA_SZ = 0x2000, /* 8KB */ | 96 | MV_MINOR_REG_AREA_SZ = 0x2000, /* 8KB */ |
90 | 97 | ||
98 | /* For use with both IRQ coalescing methods ("all ports" or "per-HC" */ | ||
99 | COAL_CLOCKS_PER_USEC = 150, /* for calculating COAL_TIMEs */ | ||
100 | MAX_COAL_TIME_THRESHOLD = ((1 << 24) - 1), /* internal clocks count */ | ||
101 | MAX_COAL_IO_COUNT = 255, /* completed I/O count */ | ||
102 | |||
91 | MV_PCI_REG_BASE = 0, | 103 | MV_PCI_REG_BASE = 0, |
92 | 104 | ||
105 | /* | ||
106 | * Per-chip ("all ports") interrupt coalescing feature. | ||
107 | * This is only for GEN_II / GEN_IIE hardware. | ||
108 | * | ||
109 | * Coalescing defers the interrupt until either the IO_THRESHOLD | ||
110 | * (count of completed I/Os) is met, or the TIME_THRESHOLD is met. | ||
111 | */ | ||
112 | MV_COAL_REG_BASE = 0x18000, | ||
113 | MV_IRQ_COAL_CAUSE = (MV_COAL_REG_BASE + 0x08), | ||
114 | ALL_PORTS_COAL_IRQ = (1 << 4), /* all ports irq event */ | ||
115 | |||
116 | MV_IRQ_COAL_IO_THRESHOLD = (MV_COAL_REG_BASE + 0xcc), | ||
117 | MV_IRQ_COAL_TIME_THRESHOLD = (MV_COAL_REG_BASE + 0xd0), | ||
118 | |||
119 | /* | ||
120 | * Registers for the (unused here) transaction coalescing feature: | ||
121 | */ | ||
122 | MV_TRAN_COAL_CAUSE_LO = (MV_COAL_REG_BASE + 0x88), | ||
123 | MV_TRAN_COAL_CAUSE_HI = (MV_COAL_REG_BASE + 0x8c), | ||
124 | |||
93 | MV_SATAHC0_REG_BASE = 0x20000, | 125 | MV_SATAHC0_REG_BASE = 0x20000, |
94 | MV_FLASH_CTL_OFS = 0x1046c, | 126 | MV_FLASH_CTL_OFS = 0x1046c, |
95 | MV_GPIO_PORT_CTL_OFS = 0x104f0, | 127 | MV_GPIO_PORT_CTL_OFS = 0x104f0, |
@@ -186,6 +218,8 @@ enum { | |||
186 | DONE_IRQ = (1 << 1), /* shift by (2 * port #) */ | 218 | DONE_IRQ = (1 << 1), /* shift by (2 * port #) */ |
187 | HC0_IRQ_PEND = 0x1ff, /* bits 0-8 = HC0's ports */ | 219 | HC0_IRQ_PEND = 0x1ff, /* bits 0-8 = HC0's ports */ |
188 | HC_SHIFT = 9, /* bits 9-17 = HC1's ports */ | 220 | HC_SHIFT = 9, /* bits 9-17 = HC1's ports */ |
221 | DONE_IRQ_0_3 = 0x000000aa, /* DONE_IRQ ports 0,1,2,3 */ | ||
222 | DONE_IRQ_4_7 = (DONE_IRQ_0_3 << HC_SHIFT), /* 4,5,6,7 */ | ||
189 | PCI_ERR = (1 << 18), | 223 | PCI_ERR = (1 << 18), |
190 | TRAN_COAL_LO_DONE = (1 << 19), /* transaction coalescing */ | 224 | TRAN_COAL_LO_DONE = (1 << 19), /* transaction coalescing */ |
191 | TRAN_COAL_HI_DONE = (1 << 20), /* transaction coalescing */ | 225 | TRAN_COAL_HI_DONE = (1 << 20), /* transaction coalescing */ |
@@ -207,6 +241,16 @@ enum { | |||
207 | HC_COAL_IRQ = (1 << 4), /* IRQ coalescing */ | 241 | HC_COAL_IRQ = (1 << 4), /* IRQ coalescing */ |
208 | DEV_IRQ = (1 << 8), /* shift by port # */ | 242 | DEV_IRQ = (1 << 8), /* shift by port # */ |
209 | 243 | ||
244 | /* | ||
245 | * Per-HC (Host-Controller) interrupt coalescing feature. | ||
246 | * This is present on all chip generations. | ||
247 | * | ||
248 | * Coalescing defers the interrupt until either the IO_THRESHOLD | ||
249 | * (count of completed I/Os) is met, or the TIME_THRESHOLD is met. | ||
250 | */ | ||
251 | HC_IRQ_COAL_IO_THRESHOLD_OFS = 0x000c, | ||
252 | HC_IRQ_COAL_TIME_THRESHOLD_OFS = 0x0010, | ||
253 | |||
210 | /* Shadow block registers */ | 254 | /* Shadow block registers */ |
211 | SHD_BLK_OFS = 0x100, | 255 | SHD_BLK_OFS = 0x100, |
212 | SHD_CTL_AST_OFS = 0x20, /* ofs from SHD_BLK_OFS */ | 256 | SHD_CTL_AST_OFS = 0x20, /* ofs from SHD_BLK_OFS */ |
@@ -897,6 +941,23 @@ static void mv_set_edma_ptrs(void __iomem *port_mmio, | |||
897 | port_mmio + EDMA_RSP_Q_OUT_PTR_OFS); | 941 | port_mmio + EDMA_RSP_Q_OUT_PTR_OFS); |
898 | } | 942 | } |
899 | 943 | ||
944 | static void mv_write_main_irq_mask(u32 mask, struct mv_host_priv *hpriv) | ||
945 | { | ||
946 | /* | ||
947 | * When writing to the main_irq_mask in hardware, | ||
948 | * we must ensure exclusivity between the interrupt coalescing bits | ||
949 | * and the corresponding individual port DONE_IRQ bits. | ||
950 | * | ||
951 | * Note that this register is really an "IRQ enable" register, | ||
952 | * not an "IRQ mask" register as Marvell's naming might suggest. | ||
953 | */ | ||
954 | if (mask & (ALL_PORTS_COAL_DONE | PORTS_0_3_COAL_DONE)) | ||
955 | mask &= ~DONE_IRQ_0_3; | ||
956 | if (mask & (ALL_PORTS_COAL_DONE | PORTS_4_7_COAL_DONE)) | ||
957 | mask &= ~DONE_IRQ_4_7; | ||
958 | writelfl(mask, hpriv->main_irq_mask_addr); | ||
959 | } | ||
960 | |||
900 | static void mv_set_main_irq_mask(struct ata_host *host, | 961 | static void mv_set_main_irq_mask(struct ata_host *host, |
901 | u32 disable_bits, u32 enable_bits) | 962 | u32 disable_bits, u32 enable_bits) |
902 | { | 963 | { |
@@ -907,7 +968,7 @@ static void mv_set_main_irq_mask(struct ata_host *host, | |||
907 | new_mask = (old_mask & ~disable_bits) | enable_bits; | 968 | new_mask = (old_mask & ~disable_bits) | enable_bits; |
908 | if (new_mask != old_mask) { | 969 | if (new_mask != old_mask) { |
909 | hpriv->main_irq_mask = new_mask; | 970 | hpriv->main_irq_mask = new_mask; |
910 | writelfl(new_mask, hpriv->main_irq_mask_addr); | 971 | mv_write_main_irq_mask(new_mask, hpriv); |
911 | } | 972 | } |
912 | } | 973 | } |
913 | 974 | ||
@@ -948,6 +1009,64 @@ static void mv_clear_and_enable_port_irqs(struct ata_port *ap, | |||
948 | mv_enable_port_irqs(ap, port_irqs); | 1009 | mv_enable_port_irqs(ap, port_irqs); |
949 | } | 1010 | } |
950 | 1011 | ||
1012 | static void mv_set_irq_coalescing(struct ata_host *host, | ||
1013 | unsigned int count, unsigned int usecs) | ||
1014 | { | ||
1015 | struct mv_host_priv *hpriv = host->private_data; | ||
1016 | void __iomem *mmio = hpriv->base, *hc_mmio; | ||
1017 | u32 coal_enable = 0; | ||
1018 | unsigned long flags; | ||
1019 | unsigned int clks; | ||
1020 | const u32 coal_disable = PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE | | ||
1021 | ALL_PORTS_COAL_DONE; | ||
1022 | |||
1023 | /* Disable IRQ coalescing if either threshold is zero */ | ||
1024 | if (!usecs || !count) { | ||
1025 | clks = count = 0; | ||
1026 | } else { | ||
1027 | /* Respect maximum limits of the hardware */ | ||
1028 | clks = usecs * COAL_CLOCKS_PER_USEC; | ||
1029 | if (clks > MAX_COAL_TIME_THRESHOLD) | ||
1030 | clks = MAX_COAL_TIME_THRESHOLD; | ||
1031 | if (count > MAX_COAL_IO_COUNT) | ||
1032 | count = MAX_COAL_IO_COUNT; | ||
1033 | } | ||
1034 | |||
1035 | spin_lock_irqsave(&host->lock, flags); | ||
1036 | |||
1037 | #if 0 /* disabled pending functional clarification from Marvell */ | ||
1038 | if (!IS_GEN_I(hpriv)) { | ||
1039 | /* | ||
1040 | * GEN_II/GEN_IIE: global thresholds for the entire chip. | ||
1041 | */ | ||
1042 | writel(clks, mmio + MV_IRQ_COAL_TIME_THRESHOLD); | ||
1043 | writel(count, mmio + MV_IRQ_COAL_IO_THRESHOLD); | ||
1044 | /* clear leftover coal IRQ bit */ | ||
1045 | writelfl(~ALL_PORTS_COAL_IRQ, mmio + MV_IRQ_COAL_CAUSE); | ||
1046 | clks = count = 0; /* so as to clear the alternate regs below */ | ||
1047 | coal_enable = ALL_PORTS_COAL_DONE; | ||
1048 | } | ||
1049 | #endif | ||
1050 | /* | ||
1051 | * All chips: independent thresholds for each HC on the chip. | ||
1052 | */ | ||
1053 | hc_mmio = mv_hc_base_from_port(mmio, 0); | ||
1054 | writel(clks, hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD_OFS); | ||
1055 | writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD_OFS); | ||
1056 | coal_enable |= PORTS_0_3_COAL_DONE; | ||
1057 | if (hpriv->n_ports > 4) { | ||
1058 | hc_mmio = mv_hc_base_from_port(mmio, MV_PORTS_PER_HC); | ||
1059 | writel(clks, hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD_OFS); | ||
1060 | writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD_OFS); | ||
1061 | coal_enable |= PORTS_4_7_COAL_DONE; | ||
1062 | } | ||
1063 | if (!count) | ||
1064 | coal_enable = 0; | ||
1065 | mv_set_main_irq_mask(host, coal_disable, coal_enable); | ||
1066 | |||
1067 | spin_unlock_irqrestore(&host->lock, flags); | ||
1068 | } | ||
1069 | |||
951 | /** | 1070 | /** |
952 | * mv_start_edma - Enable eDMA engine | 1071 | * mv_start_edma - Enable eDMA engine |
953 | * @base: port base address | 1072 | * @base: port base address |
@@ -2500,6 +2619,10 @@ static int mv_host_intr(struct ata_host *host, u32 main_irq_cause) | |||
2500 | void __iomem *mmio = hpriv->base, *hc_mmio; | 2619 | void __iomem *mmio = hpriv->base, *hc_mmio; |
2501 | unsigned int handled = 0, port; | 2620 | unsigned int handled = 0, port; |
2502 | 2621 | ||
2622 | /* If asserted, clear the "all ports" IRQ coalescing bit */ | ||
2623 | if (main_irq_cause & ALL_PORTS_COAL_DONE) | ||
2624 | writel(~ALL_PORTS_COAL_IRQ, mmio + MV_IRQ_COAL_CAUSE); | ||
2625 | |||
2503 | for (port = 0; port < hpriv->n_ports; port++) { | 2626 | for (port = 0; port < hpriv->n_ports; port++) { |
2504 | struct ata_port *ap = host->ports[port]; | 2627 | struct ata_port *ap = host->ports[port]; |
2505 | unsigned int p, shift, hardport, port_cause; | 2628 | unsigned int p, shift, hardport, port_cause; |
@@ -2532,6 +2655,8 @@ static int mv_host_intr(struct ata_host *host, u32 main_irq_cause) | |||
2532 | * to ack (only) those ports via hc_irq_cause. | 2655 | * to ack (only) those ports via hc_irq_cause. |
2533 | */ | 2656 | */ |
2534 | ack_irqs = 0; | 2657 | ack_irqs = 0; |
2658 | if (hc_cause & PORTS_0_3_COAL_DONE) | ||
2659 | ack_irqs = HC_COAL_IRQ; | ||
2535 | for (p = 0; p < MV_PORTS_PER_HC; ++p) { | 2660 | for (p = 0; p < MV_PORTS_PER_HC; ++p) { |
2536 | if ((port + p) >= hpriv->n_ports) | 2661 | if ((port + p) >= hpriv->n_ports) |
2537 | break; | 2662 | break; |
@@ -2620,7 +2745,7 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance) | |||
2620 | 2745 | ||
2621 | /* for MSI: block new interrupts while in here */ | 2746 | /* for MSI: block new interrupts while in here */ |
2622 | if (using_msi) | 2747 | if (using_msi) |
2623 | writel(0, hpriv->main_irq_mask_addr); | 2748 | mv_write_main_irq_mask(0, hpriv); |
2624 | 2749 | ||
2625 | main_irq_cause = readl(hpriv->main_irq_cause_addr); | 2750 | main_irq_cause = readl(hpriv->main_irq_cause_addr); |
2626 | pending_irqs = main_irq_cause & hpriv->main_irq_mask; | 2751 | pending_irqs = main_irq_cause & hpriv->main_irq_mask; |
@@ -2637,7 +2762,7 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance) | |||
2637 | 2762 | ||
2638 | /* for MSI: unmask; interrupt cause bits will retrigger now */ | 2763 | /* for MSI: unmask; interrupt cause bits will retrigger now */ |
2639 | if (using_msi) | 2764 | if (using_msi) |
2640 | writel(hpriv->main_irq_mask, hpriv->main_irq_mask_addr); | 2765 | mv_write_main_irq_mask(hpriv->main_irq_mask, hpriv); |
2641 | 2766 | ||
2642 | spin_unlock(&host->lock); | 2767 | spin_unlock(&host->lock); |
2643 | 2768 | ||
@@ -3546,6 +3671,8 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) | |||
3546 | * The per-port interrupts get done later as ports are set up. | 3671 | * The per-port interrupts get done later as ports are set up. |
3547 | */ | 3672 | */ |
3548 | mv_set_main_irq_mask(host, 0, PCI_ERR); | 3673 | mv_set_main_irq_mask(host, 0, PCI_ERR); |
3674 | mv_set_irq_coalescing(host, irq_coalescing_io_count, | ||
3675 | irq_coalescing_usecs); | ||
3549 | done: | 3676 | done: |
3550 | return rc; | 3677 | return rc; |
3551 | } | 3678 | } |