aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-09-23 04:31:11 -0400
committerNeilBrown <neilb@suse.de>2009-09-23 04:31:11 -0400
commit4b3df5668c8ebaebd8d66a5a94374be3e3b2ef0c (patch)
tree51a231742e211143f5845edf4b09d1712dcd2771 /drivers
parent1ef04fefe2241087d9db7e9615c3f11b516e36cf (diff)
parent1f6672d44c1ae7408b43c06170ec34eb0a0e9b9f (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx into for-linus
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dca/dca-core.c124
-rw-r--r--drivers/dma/Kconfig14
-rw-r--r--drivers/dma/Makefile4
-rw-r--r--drivers/dma/at_hdmac.c60
-rw-r--r--drivers/dma/at_hdmac_regs.h1
-rw-r--r--drivers/dma/dmaengine.c94
-rw-r--r--drivers/dma/dmatest.c40
-rw-r--r--drivers/dma/dw_dmac.c50
-rw-r--r--drivers/dma/dw_dmac_regs.h1
-rw-r--r--drivers/dma/fsldma.c288
-rw-r--r--drivers/dma/fsldma.h4
-rw-r--r--drivers/dma/ioat.c202
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dca.c (renamed from drivers/dma/ioat_dca.c)13
-rw-r--r--drivers/dma/ioat/dma.c1238
-rw-r--r--drivers/dma/ioat/dma.h337
-rw-r--r--drivers/dma/ioat/dma_v2.c871
-rw-r--r--drivers/dma/ioat/dma_v2.h190
-rw-r--r--drivers/dma/ioat/dma_v3.c1223
-rw-r--r--drivers/dma/ioat/hw.h215
-rw-r--r--drivers/dma/ioat/pci.c210
-rw-r--r--drivers/dma/ioat/registers.h (renamed from drivers/dma/ioatdma_registers.h)54
-rw-r--r--drivers/dma/ioat_dma.c1741
-rw-r--r--drivers/dma/ioatdma.h165
-rw-r--r--drivers/dma/ioatdma_hw.h70
-rw-r--r--drivers/dma/iop-adma.c491
-rw-r--r--drivers/dma/iovlock.c10
-rw-r--r--drivers/dma/mv_xor.c7
-rw-r--r--drivers/dma/mv_xor.h4
-rw-r--r--drivers/dma/shdma.c786
-rw-r--r--drivers/dma/shdma.h64
-rw-r--r--drivers/dma/txx9dmac.c24
-rw-r--r--drivers/dma/txx9dmac.h1
-rw-r--r--drivers/idle/i7300_idle.c20
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/raid5.c1475
-rw-r--r--drivers/md/raid5.h28
-rw-r--r--drivers/mmc/host/atmel-mci.c9
38 files changed, 7160 insertions, 2996 deletions
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb59..52e6bb70a490 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
28#include <linux/device.h> 28#include <linux/device.h>
29#include <linux/dca.h> 29#include <linux/dca.h>
30 30
31#define DCA_VERSION "1.8" 31#define DCA_VERSION "1.12.1"
32 32
33MODULE_VERSION(DCA_VERSION); 33MODULE_VERSION(DCA_VERSION);
34MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
36 36
37static DEFINE_SPINLOCK(dca_lock); 37static DEFINE_SPINLOCK(dca_lock);
38 38
39static LIST_HEAD(dca_providers); 39static LIST_HEAD(dca_domains);
40 40
41static struct dca_provider *dca_find_provider_by_dev(struct device *dev) 41static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
42{ 42{
43 struct dca_provider *dca, *ret = NULL; 43 struct pci_dev *pdev = to_pci_dev(dev);
44 struct pci_bus *bus = pdev->bus;
44 45
45 list_for_each_entry(dca, &dca_providers, node) { 46 while (bus->parent)
46 if ((!dev) || (dca->ops->dev_managed(dca, dev))) { 47 bus = bus->parent;
47 ret = dca; 48
48 break; 49 return bus;
49 } 50}
51
52static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
53{
54 struct dca_domain *domain;
55
56 domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
57 if (!domain)
58 return NULL;
59
60 INIT_LIST_HEAD(&domain->dca_providers);
61 domain->pci_rc = rc;
62
63 return domain;
64}
65
66static void dca_free_domain(struct dca_domain *domain)
67{
68 list_del(&domain->node);
69 kfree(domain);
70}
71
72static struct dca_domain *dca_find_domain(struct pci_bus *rc)
73{
74 struct dca_domain *domain;
75
76 list_for_each_entry(domain, &dca_domains, node)
77 if (domain->pci_rc == rc)
78 return domain;
79
80 return NULL;
81}
82
83static struct dca_domain *dca_get_domain(struct device *dev)
84{
85 struct pci_bus *rc;
86 struct dca_domain *domain;
87
88 rc = dca_pci_rc_from_dev(dev);
89 domain = dca_find_domain(rc);
90
91 if (!domain) {
92 domain = dca_allocate_domain(rc);
93 if (domain)
94 list_add(&domain->node, &dca_domains);
95 }
96
97 return domain;
98}
99
100static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
101{
102 struct dca_provider *dca;
103 struct pci_bus *rc;
104 struct dca_domain *domain;
105
106 if (dev) {
107 rc = dca_pci_rc_from_dev(dev);
108 domain = dca_find_domain(rc);
109 if (!domain)
110 return NULL;
111 } else {
112 if (!list_empty(&dca_domains))
113 domain = list_first_entry(&dca_domains,
114 struct dca_domain,
115 node);
116 else
117 return NULL;
50 } 118 }
51 119
52 return ret; 120 list_for_each_entry(dca, &domain->dca_providers, node)
121 if ((!dev) || (dca->ops->dev_managed(dca, dev)))
122 return dca;
123
124 return NULL;
53} 125}
54 126
55/** 127/**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
61 struct dca_provider *dca; 133 struct dca_provider *dca;
62 int err, slot = -ENODEV; 134 int err, slot = -ENODEV;
63 unsigned long flags; 135 unsigned long flags;
136 struct pci_bus *pci_rc;
137 struct dca_domain *domain;
64 138
65 if (!dev) 139 if (!dev)
66 return -EFAULT; 140 return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
74 return -EEXIST; 148 return -EEXIST;
75 } 149 }
76 150
77 list_for_each_entry(dca, &dca_providers, node) { 151 pci_rc = dca_pci_rc_from_dev(dev);
152 domain = dca_find_domain(pci_rc);
153 if (!domain) {
154 spin_unlock_irqrestore(&dca_lock, flags);
155 return -ENODEV;
156 }
157
158 list_for_each_entry(dca, &domain->dca_providers, node) {
78 slot = dca->ops->add_requester(dca, dev); 159 slot = dca->ops->add_requester(dca, dev);
79 if (slot >= 0) 160 if (slot >= 0)
80 break; 161 break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
222{ 303{
223 int err; 304 int err;
224 unsigned long flags; 305 unsigned long flags;
306 struct dca_domain *domain;
225 307
226 err = dca_sysfs_add_provider(dca, dev); 308 err = dca_sysfs_add_provider(dca, dev);
227 if (err) 309 if (err)
228 return err; 310 return err;
229 311
230 spin_lock_irqsave(&dca_lock, flags); 312 spin_lock_irqsave(&dca_lock, flags);
231 list_add(&dca->node, &dca_providers); 313 domain = dca_get_domain(dev);
314 if (!domain) {
315 spin_unlock_irqrestore(&dca_lock, flags);
316 return -ENODEV;
317 }
318 list_add(&dca->node, &domain->dca_providers);
232 spin_unlock_irqrestore(&dca_lock, flags); 319 spin_unlock_irqrestore(&dca_lock, flags);
233 320
234 blocking_notifier_call_chain(&dca_provider_chain, 321 blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
241 * unregister_dca_provider - remove a dca provider 328 * unregister_dca_provider - remove a dca provider
242 * @dca - struct created by alloc_dca_provider() 329 * @dca - struct created by alloc_dca_provider()
243 */ 330 */
244void unregister_dca_provider(struct dca_provider *dca) 331void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
245{ 332{
246 unsigned long flags; 333 unsigned long flags;
334 struct pci_bus *pci_rc;
335 struct dca_domain *domain;
247 336
248 blocking_notifier_call_chain(&dca_provider_chain, 337 blocking_notifier_call_chain(&dca_provider_chain,
249 DCA_PROVIDER_REMOVE, NULL); 338 DCA_PROVIDER_REMOVE, NULL);
250 339
251 spin_lock_irqsave(&dca_lock, flags); 340 spin_lock_irqsave(&dca_lock, flags);
341
252 list_del(&dca->node); 342 list_del(&dca->node);
343
344 pci_rc = dca_pci_rc_from_dev(dev);
345 domain = dca_find_domain(pci_rc);
346 if (list_empty(&domain->dca_providers))
347 dca_free_domain(domain);
348
253 spin_unlock_irqrestore(&dca_lock, flags); 349 spin_unlock_irqrestore(&dca_lock, flags);
254 350
255 dca_sysfs_remove_provider(dca); 351 dca_sysfs_remove_provider(dca);
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
276 372
277static int __init dca_init(void) 373static int __init dca_init(void)
278{ 374{
279 printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); 375 pr_info("dca service started, version %s\n", DCA_VERSION);
280 return dca_sysfs_init(); 376 return dca_sysfs_init();
281} 377}
282 378
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 81e1020fb514..5903a88351bf 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
17 17
18comment "DMA Devices" 18comment "DMA Devices"
19 19
20config ASYNC_TX_DISABLE_CHANNEL_SWITCH
21 bool
22
20config INTEL_IOATDMA 23config INTEL_IOATDMA
21 tristate "Intel I/OAT DMA support" 24 tristate "Intel I/OAT DMA support"
22 depends on PCI && X86 25 depends on PCI && X86
23 select DMA_ENGINE 26 select DMA_ENGINE
24 select DCA 27 select DCA
28 select ASYNC_TX_DISABLE_CHANNEL_SWITCH
25 help 29 help
26 Enable support for the Intel(R) I/OAT DMA engine present 30 Enable support for the Intel(R) I/OAT DMA engine present
27 in recent Intel Xeon chipsets. 31 in recent Intel Xeon chipsets.
@@ -97,6 +101,14 @@ config TXX9_DMAC
97 Support the TXx9 SoC internal DMA controller. This can be 101 Support the TXx9 SoC internal DMA controller. This can be
98 integrated in chips such as the Toshiba TX4927/38/39. 102 integrated in chips such as the Toshiba TX4927/38/39.
99 103
104config SH_DMAE
105 tristate "Renesas SuperH DMAC support"
106 depends on SUPERH && SH_DMA
107 depends on !SH_DMA_API
108 select DMA_ENGINE
109 help
110 Enable support for the Renesas SuperH DMA controllers.
111
100config DMA_ENGINE 112config DMA_ENGINE
101 bool 113 bool
102 114
@@ -116,7 +128,7 @@ config NET_DMA
116 128
117config ASYNC_TX_DMA 129config ASYNC_TX_DMA
118 bool "Async_tx: Offload support for the async_tx api" 130 bool "Async_tx: Offload support for the async_tx api"
119 depends on DMA_ENGINE && !HIGHMEM64G 131 depends on DMA_ENGINE
120 help 132 help
121 This allows the async_tx api to take advantage of offload engines for 133 This allows the async_tx api to take advantage of offload engines for
122 memcpy, memset, xor, and raid6 p+q operations. If your platform has 134 memcpy, memset, xor, and raid6 p+q operations. If your platform has
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 40e1e0083571..eca71ba78ae9 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,8 +1,7 @@
1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o 1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
2obj-$(CONFIG_NET_DMA) += iovlock.o 2obj-$(CONFIG_NET_DMA) += iovlock.o
3obj-$(CONFIG_DMATEST) += dmatest.o 3obj-$(CONFIG_DMATEST) += dmatest.o
4obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 4obj-$(CONFIG_INTEL_IOATDMA) += ioat/
5ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
6obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o 5obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
7obj-$(CONFIG_FSL_DMA) += fsldma.o 6obj-$(CONFIG_FSL_DMA) += fsldma.o
8obj-$(CONFIG_MV_XOR) += mv_xor.o 7obj-$(CONFIG_MV_XOR) += mv_xor.o
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
10obj-$(CONFIG_AT_HDMAC) += at_hdmac.o 9obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
11obj-$(CONFIG_MX3_IPU) += ipu/ 10obj-$(CONFIG_MX3_IPU) += ipu/
12obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o 11obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
12obj-$(CONFIG_SH_DMAE) += shdma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c8522e6f1ad2..7585c4164bd5 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
87 desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); 87 desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
88 if (desc) { 88 if (desc) {
89 memset(desc, 0, sizeof(struct at_desc)); 89 memset(desc, 0, sizeof(struct at_desc));
90 INIT_LIST_HEAD(&desc->tx_list);
90 dma_async_tx_descriptor_init(&desc->txd, chan); 91 dma_async_tx_descriptor_init(&desc->txd, chan);
91 /* txd.flags will be overwritten in prep functions */ 92 /* txd.flags will be overwritten in prep functions */
92 desc->txd.flags = DMA_CTRL_ACK; 93 desc->txd.flags = DMA_CTRL_ACK;
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
150 struct at_desc *child; 151 struct at_desc *child;
151 152
152 spin_lock_bh(&atchan->lock); 153 spin_lock_bh(&atchan->lock);
153 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 154 list_for_each_entry(child, &desc->tx_list, desc_node)
154 dev_vdbg(chan2dev(&atchan->chan_common), 155 dev_vdbg(chan2dev(&atchan->chan_common),
155 "moving child desc %p to freelist\n", 156 "moving child desc %p to freelist\n",
156 child); 157 child);
157 list_splice_init(&desc->txd.tx_list, &atchan->free_list); 158 list_splice_init(&desc->tx_list, &atchan->free_list);
158 dev_vdbg(chan2dev(&atchan->chan_common), 159 dev_vdbg(chan2dev(&atchan->chan_common),
159 "moving desc %p to freelist\n", desc); 160 "moving desc %p to freelist\n", desc);
160 list_add(&desc->desc_node, &atchan->free_list); 161 list_add(&desc->desc_node, &atchan->free_list);
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
247 param = txd->callback_param; 248 param = txd->callback_param;
248 249
249 /* move children to free_list */ 250 /* move children to free_list */
250 list_splice_init(&txd->tx_list, &atchan->free_list); 251 list_splice_init(&desc->tx_list, &atchan->free_list);
251 /* move myself to free_list */ 252 /* move myself to free_list */
252 list_move(&desc->desc_node, &atchan->free_list); 253 list_move(&desc->desc_node, &atchan->free_list);
253 254
254 /* unmap dma addresses */ 255 /* unmap dma addresses */
255 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { 256 if (!atchan->chan_common.private) {
256 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) 257 struct device *parent = chan2parent(&atchan->chan_common);
257 dma_unmap_single(chan2parent(&atchan->chan_common), 258 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
258 desc->lli.daddr, 259 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
259 desc->len, DMA_FROM_DEVICE); 260 dma_unmap_single(parent,
260 else 261 desc->lli.daddr,
261 dma_unmap_page(chan2parent(&atchan->chan_common), 262 desc->len, DMA_FROM_DEVICE);
262 desc->lli.daddr, 263 else
263 desc->len, DMA_FROM_DEVICE); 264 dma_unmap_page(parent,
264 } 265 desc->lli.daddr,
265 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 266 desc->len, DMA_FROM_DEVICE);
266 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) 267 }
267 dma_unmap_single(chan2parent(&atchan->chan_common), 268 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
268 desc->lli.saddr, 269 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
269 desc->len, DMA_TO_DEVICE); 270 dma_unmap_single(parent,
270 else 271 desc->lli.saddr,
271 dma_unmap_page(chan2parent(&atchan->chan_common), 272 desc->len, DMA_TO_DEVICE);
272 desc->lli.saddr, 273 else
273 desc->len, DMA_TO_DEVICE); 274 dma_unmap_page(parent,
275 desc->lli.saddr,
276 desc->len, DMA_TO_DEVICE);
277 }
274 } 278 }
275 279
276 /* 280 /*
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
334 /* This one is currently in progress */ 338 /* This one is currently in progress */
335 return; 339 return;
336 340
337 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 341 list_for_each_entry(child, &desc->tx_list, desc_node)
338 if (!(child->lli.ctrla & ATC_DONE)) 342 if (!(child->lli.ctrla & ATC_DONE))
339 /* Currently in progress */ 343 /* Currently in progress */
340 return; 344 return;
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
407 dev_crit(chan2dev(&atchan->chan_common), 411 dev_crit(chan2dev(&atchan->chan_common),
408 " cookie: %d\n", bad_desc->txd.cookie); 412 " cookie: %d\n", bad_desc->txd.cookie);
409 atc_dump_lli(atchan, &bad_desc->lli); 413 atc_dump_lli(atchan, &bad_desc->lli);
410 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 414 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
411 atc_dump_lli(atchan, &child->lli); 415 atc_dump_lli(atchan, &child->lli);
412 416
413 /* Pretend the descriptor completed successfully */ 417 /* Pretend the descriptor completed successfully */
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
587 prev->lli.dscr = desc->txd.phys; 591 prev->lli.dscr = desc->txd.phys;
588 /* insert the link descriptor to the LD ring */ 592 /* insert the link descriptor to the LD ring */
589 list_add_tail(&desc->desc_node, 593 list_add_tail(&desc->desc_node,
590 &first->txd.tx_list); 594 &first->tx_list);
591 } 595 }
592 prev = desc; 596 prev = desc;
593 } 597 }
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
646 650
647 reg_width = atslave->reg_width; 651 reg_width = atslave->reg_width;
648 652
649 sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
650
651 ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; 653 ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
652 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; 654 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
653 655
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
687 prev->lli.dscr = desc->txd.phys; 689 prev->lli.dscr = desc->txd.phys;
688 /* insert the link descriptor to the LD ring */ 690 /* insert the link descriptor to the LD ring */
689 list_add_tail(&desc->desc_node, 691 list_add_tail(&desc->desc_node,
690 &first->txd.tx_list); 692 &first->tx_list);
691 } 693 }
692 prev = desc; 694 prev = desc;
693 total_len += len; 695 total_len += len;
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
729 prev->lli.dscr = desc->txd.phys; 731 prev->lli.dscr = desc->txd.phys;
730 /* insert the link descriptor to the LD ring */ 732 /* insert the link descriptor to the LD ring */
731 list_add_tail(&desc->desc_node, 733 list_add_tail(&desc->desc_node,
732 &first->txd.tx_list); 734 &first->tx_list);
733 } 735 }
734 prev = desc; 736 prev = desc;
735 total_len += len; 737 total_len += len;
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 4c972afc49ec..495457e3dc4b 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -165,6 +165,7 @@ struct at_desc {
165 struct at_lli lli; 165 struct at_lli lli;
166 166
167 /* THEN values for driver housekeeping */ 167 /* THEN values for driver housekeeping */
168 struct list_head tx_list;
168 struct dma_async_tx_descriptor txd; 169 struct dma_async_tx_descriptor txd;
169 struct list_head desc_node; 170 struct list_head desc_node;
170 size_t len; 171 size_t len;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4ff..bd0b248de2cf 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
608} 608}
609EXPORT_SYMBOL(dmaengine_put); 609EXPORT_SYMBOL(dmaengine_put);
610 610
611static bool device_has_all_tx_types(struct dma_device *device)
612{
613 /* A device that satisfies this test has channels that will never cause
614 * an async_tx channel switch event as all possible operation types can
615 * be handled.
616 */
617 #ifdef CONFIG_ASYNC_TX_DMA
618 if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
619 return false;
620 #endif
621
622 #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
623 if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
624 return false;
625 #endif
626
627 #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
628 if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
629 return false;
630 #endif
631
632 #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
633 if (!dma_has_cap(DMA_XOR, device->cap_mask))
634 return false;
635 #endif
636
637 #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
638 if (!dma_has_cap(DMA_PQ, device->cap_mask))
639 return false;
640 #endif
641
642 return true;
643}
644
611static int get_dma_id(struct dma_device *device) 645static int get_dma_id(struct dma_device *device)
612{ 646{
613 int rc; 647 int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
644 !device->device_prep_dma_memcpy); 678 !device->device_prep_dma_memcpy);
645 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && 679 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
646 !device->device_prep_dma_xor); 680 !device->device_prep_dma_xor);
647 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 681 BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
648 !device->device_prep_dma_zero_sum); 682 !device->device_prep_dma_xor_val);
683 BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
684 !device->device_prep_dma_pq);
685 BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
686 !device->device_prep_dma_pq_val);
649 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && 687 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
650 !device->device_prep_dma_memset); 688 !device->device_prep_dma_memset);
651 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && 689 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
661 BUG_ON(!device->device_issue_pending); 699 BUG_ON(!device->device_issue_pending);
662 BUG_ON(!device->dev); 700 BUG_ON(!device->dev);
663 701
702 /* note: this only matters in the
703 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
704 */
705 if (device_has_all_tx_types(device))
706 dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
707
664 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); 708 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
665 if (!idr_ref) 709 if (!idr_ref)
666 return -ENOMEM; 710 return -ENOMEM;
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
933{ 977{
934 tx->chan = chan; 978 tx->chan = chan;
935 spin_lock_init(&tx->lock); 979 spin_lock_init(&tx->lock);
936 INIT_LIST_HEAD(&tx->tx_list);
937} 980}
938EXPORT_SYMBOL(dma_async_tx_descriptor_init); 981EXPORT_SYMBOL(dma_async_tx_descriptor_init);
939 982
940/* dma_wait_for_async_tx - spin wait for a transaction to complete 983/* dma_wait_for_async_tx - spin wait for a transaction to complete
941 * @tx: in-flight transaction to wait on 984 * @tx: in-flight transaction to wait on
942 *
943 * This routine assumes that tx was obtained from a call to async_memcpy,
944 * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
945 * and submitted). Walking the parent chain is only meant to cover for DMA
946 * drivers that do not implement the DMA_INTERRUPT capability and may race with
947 * the driver's descriptor cleanup routine.
948 */ 985 */
949enum dma_status 986enum dma_status
950dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 987dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
951{ 988{
952 enum dma_status status; 989 unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
953 struct dma_async_tx_descriptor *iter;
954 struct dma_async_tx_descriptor *parent;
955 990
956 if (!tx) 991 if (!tx)
957 return DMA_SUCCESS; 992 return DMA_SUCCESS;
958 993
959 WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" 994 while (tx->cookie == -EBUSY) {
960 " %s\n", __func__, dma_chan_name(tx->chan)); 995 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
961 996 pr_err("%s timeout waiting for descriptor submission\n",
962 /* poll through the dependency chain, return when tx is complete */ 997 __func__);
963 do { 998 return DMA_ERROR;
964 iter = tx; 999 }
965 1000 cpu_relax();
966 /* find the root of the unsubmitted dependency chain */ 1001 }
967 do { 1002 return dma_sync_wait(tx->chan, tx->cookie);
968 parent = iter->parent;
969 if (!parent)
970 break;
971 else
972 iter = parent;
973 } while (parent);
974
975 /* there is a small window for ->parent == NULL and
976 * ->cookie == -EBUSY
977 */
978 while (iter->cookie == -EBUSY)
979 cpu_relax();
980
981 status = dma_sync_wait(iter->chan, iter->cookie);
982 } while (status == DMA_IN_PROGRESS || (iter != tx));
983
984 return status;
985} 1003}
986EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 1004EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
987 1005
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d93017fc7872..a32a4cf7b1e0 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
48MODULE_PARM_DESC(xor_sources, 48MODULE_PARM_DESC(xor_sources,
49 "Number of xor source buffers (default: 3)"); 49 "Number of xor source buffers (default: 3)");
50 50
51static unsigned int pq_sources = 3;
52module_param(pq_sources, uint, S_IRUGO);
53MODULE_PARM_DESC(pq_sources,
54 "Number of p+q source buffers (default: 3)");
55
51/* 56/*
52 * Initialization patterns. All bytes in the source buffer has bit 7 57 * Initialization patterns. All bytes in the source buffer has bit 7
53 * set, all bytes in the destination buffer has bit 7 cleared. 58 * set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
232 dma_cookie_t cookie; 237 dma_cookie_t cookie;
233 enum dma_status status; 238 enum dma_status status;
234 enum dma_ctrl_flags flags; 239 enum dma_ctrl_flags flags;
240 u8 pq_coefs[pq_sources];
235 int ret; 241 int ret;
236 int src_cnt; 242 int src_cnt;
237 int dst_cnt; 243 int dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
248 else if (thread->type == DMA_XOR) { 254 else if (thread->type == DMA_XOR) {
249 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ 255 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
250 dst_cnt = 1; 256 dst_cnt = 1;
257 } else if (thread->type == DMA_PQ) {
258 src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
259 dst_cnt = 2;
260 for (i = 0; i < pq_sources; i++)
261 pq_coefs[i] = 1;
251 } else 262 } else
252 goto err_srcs; 263 goto err_srcs;
253 264
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
283 dma_addr_t dma_dsts[dst_cnt]; 294 dma_addr_t dma_dsts[dst_cnt];
284 struct completion cmp; 295 struct completion cmp;
285 unsigned long tmo = msecs_to_jiffies(3000); 296 unsigned long tmo = msecs_to_jiffies(3000);
297 u8 align = 0;
286 298
287 total_tests++; 299 total_tests++;
288 300
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
290 src_off = dmatest_random() % (test_buf_size - len + 1); 302 src_off = dmatest_random() % (test_buf_size - len + 1);
291 dst_off = dmatest_random() % (test_buf_size - len + 1); 303 dst_off = dmatest_random() % (test_buf_size - len + 1);
292 304
305 /* honor alignment restrictions */
306 if (thread->type == DMA_MEMCPY)
307 align = dev->copy_align;
308 else if (thread->type == DMA_XOR)
309 align = dev->xor_align;
310 else if (thread->type == DMA_PQ)
311 align = dev->pq_align;
312
313 len = (len >> align) << align;
314 src_off = (src_off >> align) << align;
315 dst_off = (dst_off >> align) << align;
316
293 dmatest_init_srcs(thread->srcs, src_off, len); 317 dmatest_init_srcs(thread->srcs, src_off, len);
294 dmatest_init_dsts(thread->dsts, dst_off, len); 318 dmatest_init_dsts(thread->dsts, dst_off, len);
295 319
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
306 DMA_BIDIRECTIONAL); 330 DMA_BIDIRECTIONAL);
307 } 331 }
308 332
333
309 if (thread->type == DMA_MEMCPY) 334 if (thread->type == DMA_MEMCPY)
310 tx = dev->device_prep_dma_memcpy(chan, 335 tx = dev->device_prep_dma_memcpy(chan,
311 dma_dsts[0] + dst_off, 336 dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
316 dma_dsts[0] + dst_off, 341 dma_dsts[0] + dst_off,
317 dma_srcs, xor_sources, 342 dma_srcs, xor_sources,
318 len, flags); 343 len, flags);
344 else if (thread->type == DMA_PQ) {
345 dma_addr_t dma_pq[dst_cnt];
346
347 for (i = 0; i < dst_cnt; i++)
348 dma_pq[i] = dma_dsts[i] + dst_off;
349 tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
350 pq_sources, pq_coefs,
351 len, flags);
352 }
319 353
320 if (!tx) { 354 if (!tx) {
321 for (i = 0; i < src_cnt; i++) 355 for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
459 op = "copy"; 493 op = "copy";
460 else if (type == DMA_XOR) 494 else if (type == DMA_XOR)
461 op = "xor"; 495 op = "xor";
496 else if (type == DMA_PQ)
497 op = "pq";
462 else 498 else
463 return -EINVAL; 499 return -EINVAL;
464 500
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
514 cnt = dmatest_add_threads(dtc, DMA_XOR); 550 cnt = dmatest_add_threads(dtc, DMA_XOR);
515 thread_count += cnt > 0 ? cnt : 0; 551 thread_count += cnt > 0 ? cnt : 0;
516 } 552 }
553 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
554 cnt = dmatest_add_threads(dtc, DMA_PQ);
555 thread_count += cnt > 0 ?: 0;
556 }
517 557
518 pr_info("dmatest: Started %u threads using %s\n", 558 pr_info("dmatest: Started %u threads using %s\n",
519 thread_count, dma_chan_name(chan)); 559 thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 933c143b6a74..2eea823516a7 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
116{ 116{
117 struct dw_desc *child; 117 struct dw_desc *child;
118 118
119 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 119 list_for_each_entry(child, &desc->tx_list, desc_node)
120 dma_sync_single_for_cpu(chan2parent(&dwc->chan), 120 dma_sync_single_for_cpu(chan2parent(&dwc->chan),
121 child->txd.phys, sizeof(child->lli), 121 child->txd.phys, sizeof(child->lli),
122 DMA_TO_DEVICE); 122 DMA_TO_DEVICE);
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
137 dwc_sync_desc_for_cpu(dwc, desc); 137 dwc_sync_desc_for_cpu(dwc, desc);
138 138
139 spin_lock_bh(&dwc->lock); 139 spin_lock_bh(&dwc->lock);
140 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 140 list_for_each_entry(child, &desc->tx_list, desc_node)
141 dev_vdbg(chan2dev(&dwc->chan), 141 dev_vdbg(chan2dev(&dwc->chan),
142 "moving child desc %p to freelist\n", 142 "moving child desc %p to freelist\n",
143 child); 143 child);
144 list_splice_init(&desc->txd.tx_list, &dwc->free_list); 144 list_splice_init(&desc->tx_list, &dwc->free_list);
145 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); 145 dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
146 list_add(&desc->desc_node, &dwc->free_list); 146 list_add(&desc->desc_node, &dwc->free_list);
147 spin_unlock_bh(&dwc->lock); 147 spin_unlock_bh(&dwc->lock);
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
209 param = txd->callback_param; 209 param = txd->callback_param;
210 210
211 dwc_sync_desc_for_cpu(dwc, desc); 211 dwc_sync_desc_for_cpu(dwc, desc);
212 list_splice_init(&txd->tx_list, &dwc->free_list); 212 list_splice_init(&desc->tx_list, &dwc->free_list);
213 list_move(&desc->desc_node, &dwc->free_list); 213 list_move(&desc->desc_node, &dwc->free_list);
214 214
215 /* 215 if (!dwc->chan.private) {
216 * We use dma_unmap_page() regardless of how the buffers were 216 struct device *parent = chan2parent(&dwc->chan);
217 * mapped before they were submitted... 217 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
218 */ 218 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
219 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) 219 dma_unmap_single(parent, desc->lli.dar,
220 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, 220 desc->len, DMA_FROM_DEVICE);
221 desc->len, DMA_FROM_DEVICE); 221 else
222 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) 222 dma_unmap_page(parent, desc->lli.dar,
223 dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, 223 desc->len, DMA_FROM_DEVICE);
224 desc->len, DMA_TO_DEVICE); 224 }
225 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
226 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
227 dma_unmap_single(parent, desc->lli.sar,
228 desc->len, DMA_TO_DEVICE);
229 else
230 dma_unmap_page(parent, desc->lli.sar,
231 desc->len, DMA_TO_DEVICE);
232 }
233 }
225 234
226 /* 235 /*
227 * The API requires that no submissions are done from a 236 * The API requires that no submissions are done from a
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
289 /* This one is currently in progress */ 298 /* This one is currently in progress */
290 return; 299 return;
291 300
292 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 301 list_for_each_entry(child, &desc->tx_list, desc_node)
293 if (child->lli.llp == llp) 302 if (child->lli.llp == llp)
294 /* Currently in progress */ 303 /* Currently in progress */
295 return; 304 return;
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
356 dev_printk(KERN_CRIT, chan2dev(&dwc->chan), 365 dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
357 " cookie: %d\n", bad_desc->txd.cookie); 366 " cookie: %d\n", bad_desc->txd.cookie);
358 dwc_dump_lli(dwc, &bad_desc->lli); 367 dwc_dump_lli(dwc, &bad_desc->lli);
359 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 368 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
360 dwc_dump_lli(dwc, &child->lli); 369 dwc_dump_lli(dwc, &child->lli);
361 370
362 /* Pretend the descriptor completed successfully */ 371 /* Pretend the descriptor completed successfully */
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
608 prev->txd.phys, sizeof(prev->lli), 617 prev->txd.phys, sizeof(prev->lli),
609 DMA_TO_DEVICE); 618 DMA_TO_DEVICE);
610 list_add_tail(&desc->desc_node, 619 list_add_tail(&desc->desc_node,
611 &first->txd.tx_list); 620 &first->tx_list);
612 } 621 }
613 prev = desc; 622 prev = desc;
614 } 623 }
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
658 reg_width = dws->reg_width; 667 reg_width = dws->reg_width;
659 prev = first = NULL; 668 prev = first = NULL;
660 669
661 sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
662
663 switch (direction) { 670 switch (direction) {
664 case DMA_TO_DEVICE: 671 case DMA_TO_DEVICE:
665 ctllo = (DWC_DEFAULT_CTLLO 672 ctllo = (DWC_DEFAULT_CTLLO
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
700 sizeof(prev->lli), 707 sizeof(prev->lli),
701 DMA_TO_DEVICE); 708 DMA_TO_DEVICE);
702 list_add_tail(&desc->desc_node, 709 list_add_tail(&desc->desc_node,
703 &first->txd.tx_list); 710 &first->tx_list);
704 } 711 }
705 prev = desc; 712 prev = desc;
706 total_len += len; 713 total_len += len;
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
746 sizeof(prev->lli), 753 sizeof(prev->lli),
747 DMA_TO_DEVICE); 754 DMA_TO_DEVICE);
748 list_add_tail(&desc->desc_node, 755 list_add_tail(&desc->desc_node,
749 &first->txd.tx_list); 756 &first->tx_list);
750 } 757 }
751 prev = desc; 758 prev = desc;
752 total_len += len; 759 total_len += len;
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
902 break; 909 break;
903 } 910 }
904 911
912 INIT_LIST_HEAD(&desc->tx_list);
905 dma_async_tx_descriptor_init(&desc->txd, chan); 913 dma_async_tx_descriptor_init(&desc->txd, chan);
906 desc->txd.tx_submit = dwc_tx_submit; 914 desc->txd.tx_submit = dwc_tx_submit;
907 desc->txd.flags = DMA_CTRL_ACK; 915 desc->txd.flags = DMA_CTRL_ACK;
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 13a580767031..d9a939f67f46 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -217,6 +217,7 @@ struct dw_desc {
217 217
218 /* THEN values for driver housekeeping */ 218 /* THEN values for driver housekeeping */
219 struct list_head desc_node; 219 struct list_head desc_node;
220 struct list_head tx_list;
220 struct dma_async_tx_descriptor txd; 221 struct dma_async_tx_descriptor txd;
221 size_t len; 222 size_t len;
222}; 223};
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index ef87a8984145..296f9e747fac 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -34,6 +34,7 @@
34#include <linux/dmapool.h> 34#include <linux/dmapool.h>
35#include <linux/of_platform.h> 35#include <linux/of_platform.h>
36 36
37#include <asm/fsldma.h>
37#include "fsldma.h" 38#include "fsldma.h"
38 39
39static void dma_init(struct fsl_dma_chan *fsl_chan) 40static void dma_init(struct fsl_dma_chan *fsl_chan)
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
280} 281}
281 282
282/** 283/**
283 * fsl_chan_toggle_ext_pause - Toggle channel external pause status 284 * fsl_chan_set_request_count - Set DMA Request Count for external control
284 * @fsl_chan : Freescale DMA channel 285 * @fsl_chan : Freescale DMA channel
285 * @size : Pause control size, 0 for disable external pause control. 286 * @size : Number of bytes to transfer in a single request
286 * The maximum is 1024. 287 *
288 * The Freescale DMA channel can be controlled by the external signal DREQ#.
289 * The DMA request count is how many bytes are allowed to transfer before
290 * pausing the channel, after which a new assertion of DREQ# resumes channel
291 * operation.
287 * 292 *
288 * The Freescale DMA channel can be controlled by the external 293 * A size of 0 disables external pause control. The maximum size is 1024.
289 * signal DREQ#. The pause control size is how many bytes are allowed
290 * to transfer before pausing the channel, after which a new assertion
291 * of DREQ# resumes channel operation.
292 */ 294 */
293static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) 295static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
294{ 296{
295 if (size > 1024) 297 BUG_ON(size > 1024);
296 return; 298 DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
299 DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
300 | ((__ilog2(size) << 24) & 0x0f000000),
301 32);
302}
297 303
298 if (size) { 304/**
299 DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 305 * fsl_chan_toggle_ext_pause - Toggle channel external pause status
300 DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) 306 * @fsl_chan : Freescale DMA channel
301 | ((__ilog2(size) << 24) & 0x0f000000), 307 * @enable : 0 is disabled, 1 is enabled.
302 32); 308 *
309 * The Freescale DMA channel can be controlled by the external signal DREQ#.
310 * The DMA Request Count feature should be used in addition to this feature
311 * to set the number of bytes to transfer before pausing the channel.
312 */
313static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
314{
315 if (enable)
303 fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; 316 fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
304 } else 317 else
305 fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; 318 fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
306} 319}
307 320
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
326static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) 339static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
327{ 340{
328 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); 341 struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
329 struct fsl_desc_sw *desc; 342 struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
343 struct fsl_desc_sw *child;
330 unsigned long flags; 344 unsigned long flags;
331 dma_cookie_t cookie; 345 dma_cookie_t cookie;
332 346
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
334 spin_lock_irqsave(&fsl_chan->desc_lock, flags); 348 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
335 349
336 cookie = fsl_chan->common.cookie; 350 cookie = fsl_chan->common.cookie;
337 list_for_each_entry(desc, &tx->tx_list, node) { 351 list_for_each_entry(child, &desc->tx_list, node) {
338 cookie++; 352 cookie++;
339 if (cookie < 0) 353 if (cookie < 0)
340 cookie = 1; 354 cookie = 1;
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
343 } 357 }
344 358
345 fsl_chan->common.cookie = cookie; 359 fsl_chan->common.cookie = cookie;
346 append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); 360 append_ld_queue(fsl_chan, desc);
347 list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); 361 list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
348 362
349 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); 363 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
350 364
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
366 desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); 380 desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
367 if (desc_sw) { 381 if (desc_sw) {
368 memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); 382 memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
383 INIT_LIST_HEAD(&desc_sw->tx_list);
369 dma_async_tx_descriptor_init(&desc_sw->async_tx, 384 dma_async_tx_descriptor_init(&desc_sw->async_tx,
370 &fsl_chan->common); 385 &fsl_chan->common);
371 desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; 386 desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
455 new->async_tx.flags = flags; 470 new->async_tx.flags = flags;
456 471
457 /* Insert the link descriptor to the LD ring */ 472 /* Insert the link descriptor to the LD ring */
458 list_add_tail(&new->node, &new->async_tx.tx_list); 473 list_add_tail(&new->node, &new->tx_list);
459 474
460 /* Set End-of-link to the last link descriptor of new list*/ 475 /* Set End-of-link to the last link descriptor of new list*/
461 set_ld_eol(fsl_chan, new); 476 set_ld_eol(fsl_chan, new);
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
513 dma_dest += copy; 528 dma_dest += copy;
514 529
515 /* Insert the link descriptor to the LD ring */ 530 /* Insert the link descriptor to the LD ring */
516 list_add_tail(&new->node, &first->async_tx.tx_list); 531 list_add_tail(&new->node, &first->tx_list);
517 } while (len); 532 } while (len);
518 533
519 new->async_tx.flags = flags; /* client is in control of this ack */ 534 new->async_tx.flags = flags; /* client is in control of this ack */
@@ -528,7 +543,7 @@ fail:
528 if (!first) 543 if (!first)
529 return NULL; 544 return NULL;
530 545
531 list = &first->async_tx.tx_list; 546 list = &first->tx_list;
532 list_for_each_entry_safe_reverse(new, prev, list, node) { 547 list_for_each_entry_safe_reverse(new, prev, list, node) {
533 list_del(&new->node); 548 list_del(&new->node);
534 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); 549 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@@ -538,6 +553,229 @@ fail:
538} 553}
539 554
540/** 555/**
556 * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
557 * @chan: DMA channel
558 * @sgl: scatterlist to transfer to/from
559 * @sg_len: number of entries in @scatterlist
560 * @direction: DMA direction
561 * @flags: DMAEngine flags
562 *
563 * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
564 * DMA_SLAVE API, this gets the device-specific information from the
565 * chan->private variable.
566 */
567static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
568 struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
569 enum dma_data_direction direction, unsigned long flags)
570{
571 struct fsl_dma_chan *fsl_chan;
572 struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
573 struct fsl_dma_slave *slave;
574 struct list_head *tx_list;
575 size_t copy;
576
577 int i;
578 struct scatterlist *sg;
579 size_t sg_used;
580 size_t hw_used;
581 struct fsl_dma_hw_addr *hw;
582 dma_addr_t dma_dst, dma_src;
583
584 if (!chan)
585 return NULL;
586
587 if (!chan->private)
588 return NULL;
589
590 fsl_chan = to_fsl_chan(chan);
591 slave = chan->private;
592
593 if (list_empty(&slave->addresses))
594 return NULL;
595
596 hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
597 hw_used = 0;
598
599 /*
600 * Build the hardware transaction to copy from the scatterlist to
601 * the hardware, or from the hardware to the scatterlist
602 *
603 * If you are copying from the hardware to the scatterlist and it
604 * takes two hardware entries to fill an entire page, then both
605 * hardware entries will be coalesced into the same page
606 *
607 * If you are copying from the scatterlist to the hardware and a
608 * single page can fill two hardware entries, then the data will
609 * be read out of the page into the first hardware entry, and so on
610 */
611 for_each_sg(sgl, sg, sg_len, i) {
612 sg_used = 0;
613
614 /* Loop until the entire scatterlist entry is used */
615 while (sg_used < sg_dma_len(sg)) {
616
617 /*
618 * If we've used up the current hardware address/length
619 * pair, we need to load a new one
620 *
621 * This is done in a while loop so that descriptors with
622 * length == 0 will be skipped
623 */
624 while (hw_used >= hw->length) {
625
626 /*
627 * If the current hardware entry is the last
628 * entry in the list, we're finished
629 */
630 if (list_is_last(&hw->entry, &slave->addresses))
631 goto finished;
632
633 /* Get the next hardware address/length pair */
634 hw = list_entry(hw->entry.next,
635 struct fsl_dma_hw_addr, entry);
636 hw_used = 0;
637 }
638
639 /* Allocate the link descriptor from DMA pool */
640 new = fsl_dma_alloc_descriptor(fsl_chan);
641 if (!new) {
642 dev_err(fsl_chan->dev, "No free memory for "
643 "link descriptor\n");
644 goto fail;
645 }
646#ifdef FSL_DMA_LD_DEBUG
647 dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
648#endif
649
650 /*
651 * Calculate the maximum number of bytes to transfer,
652 * making sure it is less than the DMA controller limit
653 */
654 copy = min_t(size_t, sg_dma_len(sg) - sg_used,
655 hw->length - hw_used);
656 copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
657
658 /*
659 * DMA_FROM_DEVICE
660 * from the hardware to the scatterlist
661 *
662 * DMA_TO_DEVICE
663 * from the scatterlist to the hardware
664 */
665 if (direction == DMA_FROM_DEVICE) {
666 dma_src = hw->address + hw_used;
667 dma_dst = sg_dma_address(sg) + sg_used;
668 } else {
669 dma_src = sg_dma_address(sg) + sg_used;
670 dma_dst = hw->address + hw_used;
671 }
672
673 /* Fill in the descriptor */
674 set_desc_cnt(fsl_chan, &new->hw, copy);
675 set_desc_src(fsl_chan, &new->hw, dma_src);
676 set_desc_dest(fsl_chan, &new->hw, dma_dst);
677
678 /*
679 * If this is not the first descriptor, chain the
680 * current descriptor after the previous descriptor
681 */
682 if (!first) {
683 first = new;
684 } else {
685 set_desc_next(fsl_chan, &prev->hw,
686 new->async_tx.phys);
687 }
688
689 new->async_tx.cookie = 0;
690 async_tx_ack(&new->async_tx);
691
692 prev = new;
693 sg_used += copy;
694 hw_used += copy;
695
696 /* Insert the link descriptor into the LD ring */
697 list_add_tail(&new->node, &first->tx_list);
698 }
699 }
700
701finished:
702
703 /* All of the hardware address/length pairs had length == 0 */
704 if (!first || !new)
705 return NULL;
706
707 new->async_tx.flags = flags;
708 new->async_tx.cookie = -EBUSY;
709
710 /* Set End-of-link to the last link descriptor of new list */
711 set_ld_eol(fsl_chan, new);
712
713 /* Enable extra controller features */
714 if (fsl_chan->set_src_loop_size)
715 fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
716
717 if (fsl_chan->set_dest_loop_size)
718 fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
719
720 if (fsl_chan->toggle_ext_start)
721 fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
722
723 if (fsl_chan->toggle_ext_pause)
724 fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
725
726 if (fsl_chan->set_request_count)
727 fsl_chan->set_request_count(fsl_chan, slave->request_count);
728
729 return &first->async_tx;
730
731fail:
732 /* If first was not set, then we failed to allocate the very first
733 * descriptor, and we're done */
734 if (!first)
735 return NULL;
736
737 /*
738 * First is set, so all of the descriptors we allocated have been added
739 * to first->tx_list, INCLUDING "first" itself. Therefore we
740 * must traverse the list backwards freeing each descriptor in turn
741 *
742 * We're re-using variables for the loop, oh well
743 */
744 tx_list = &first->tx_list;
745 list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
746 list_del_init(&new->node);
747 dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
748 }
749
750 return NULL;
751}
752
753static void fsl_dma_device_terminate_all(struct dma_chan *chan)
754{
755 struct fsl_dma_chan *fsl_chan;
756 struct fsl_desc_sw *desc, *tmp;
757 unsigned long flags;
758
759 if (!chan)
760 return;
761
762 fsl_chan = to_fsl_chan(chan);
763
764 /* Halt the DMA engine */
765 dma_halt(fsl_chan);
766
767 spin_lock_irqsave(&fsl_chan->desc_lock, flags);
768
769 /* Remove and free all of the descriptors in the LD queue */
770 list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
771 list_del(&desc->node);
772 dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
773 }
774
775 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
776}
777
778/**
541 * fsl_dma_update_completed_cookie - Update the completed cookie. 779 * fsl_dma_update_completed_cookie - Update the completed cookie.
542 * @fsl_chan : Freescale DMA channel 780 * @fsl_chan : Freescale DMA channel
543 */ 781 */
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
883 new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; 1121 new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
884 new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; 1122 new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
885 new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; 1123 new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
1124 new_fsl_chan->set_request_count = fsl_chan_set_request_count;
886 } 1125 }
887 1126
888 spin_lock_init(&new_fsl_chan->desc_lock); 1127 spin_lock_init(&new_fsl_chan->desc_lock);
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
962 1201
963 dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); 1202 dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
964 dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); 1203 dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
1204 dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
965 fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; 1205 fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
966 fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; 1206 fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
967 fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; 1207 fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
968 fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; 1208 fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
969 fdev->common.device_is_tx_complete = fsl_dma_is_complete; 1209 fdev->common.device_is_tx_complete = fsl_dma_is_complete;
970 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; 1210 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
1211 fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
1212 fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
971 fdev->common.dev = &dev->dev; 1213 fdev->common.dev = &dev->dev;
972 1214
973 fdev->irq = irq_of_parse_and_map(dev->node, 0); 1215 fdev->irq = irq_of_parse_and_map(dev->node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index dc7f26865797..0df14cbb8ca3 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
90struct fsl_desc_sw { 90struct fsl_desc_sw {
91 struct fsl_dma_ld_hw hw; 91 struct fsl_dma_ld_hw hw;
92 struct list_head node; 92 struct list_head node;
93 struct list_head tx_list;
93 struct dma_async_tx_descriptor async_tx; 94 struct dma_async_tx_descriptor async_tx;
94 struct list_head *ld; 95 struct list_head *ld;
95 void *priv; 96 void *priv;
@@ -143,10 +144,11 @@ struct fsl_dma_chan {
143 struct tasklet_struct tasklet; 144 struct tasklet_struct tasklet;
144 u32 feature; 145 u32 feature;
145 146
146 void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); 147 void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
147 void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); 148 void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
148 void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); 149 void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
149 void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); 150 void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
151 void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
150}; 152};
151 153
152#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) 154#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
deleted file mode 100644
index 2225bb6ba3d1..000000000000
--- a/drivers/dma/ioat.c
+++ /dev/null
@@ -1,202 +0,0 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dca.h>
33#include "ioatdma.h"
34#include "ioatdma_registers.h"
35#include "ioatdma_hw.h"
36
37MODULE_VERSION(IOAT_DMA_VERSION);
38MODULE_LICENSE("GPL");
39MODULE_AUTHOR("Intel Corporation");
40
41static struct pci_device_id ioat_pci_tbl[] = {
42 /* I/OAT v1 platforms */
43 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
44 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
45 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
46 { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
47
48 /* I/OAT v2 platforms */
49 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
50
51 /* I/OAT v3 platforms */
52 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
53 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
54 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
55 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
56 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
57 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
58 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
59 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
60 { 0, }
61};
62
63struct ioat_device {
64 struct pci_dev *pdev;
65 void __iomem *iobase;
66 struct ioatdma_device *dma;
67 struct dca_provider *dca;
68};
69
70static int __devinit ioat_probe(struct pci_dev *pdev,
71 const struct pci_device_id *id);
72static void __devexit ioat_remove(struct pci_dev *pdev);
73
74static int ioat_dca_enabled = 1;
75module_param(ioat_dca_enabled, int, 0644);
76MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
77
78static struct pci_driver ioat_pci_driver = {
79 .name = "ioatdma",
80 .id_table = ioat_pci_tbl,
81 .probe = ioat_probe,
82 .remove = __devexit_p(ioat_remove),
83};
84
85static int __devinit ioat_probe(struct pci_dev *pdev,
86 const struct pci_device_id *id)
87{
88 void __iomem *iobase;
89 struct ioat_device *device;
90 unsigned long mmio_start, mmio_len;
91 int err;
92
93 err = pci_enable_device(pdev);
94 if (err)
95 goto err_enable_device;
96
97 err = pci_request_regions(pdev, ioat_pci_driver.name);
98 if (err)
99 goto err_request_regions;
100
101 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
102 if (err)
103 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
104 if (err)
105 goto err_set_dma_mask;
106
107 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
108 if (err)
109 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
110 if (err)
111 goto err_set_dma_mask;
112
113 mmio_start = pci_resource_start(pdev, 0);
114 mmio_len = pci_resource_len(pdev, 0);
115 iobase = ioremap(mmio_start, mmio_len);
116 if (!iobase) {
117 err = -ENOMEM;
118 goto err_ioremap;
119 }
120
121 device = kzalloc(sizeof(*device), GFP_KERNEL);
122 if (!device) {
123 err = -ENOMEM;
124 goto err_kzalloc;
125 }
126 device->pdev = pdev;
127 pci_set_drvdata(pdev, device);
128 device->iobase = iobase;
129
130 pci_set_master(pdev);
131
132 switch (readb(iobase + IOAT_VER_OFFSET)) {
133 case IOAT_VER_1_2:
134 device->dma = ioat_dma_probe(pdev, iobase);
135 if (device->dma && ioat_dca_enabled)
136 device->dca = ioat_dca_init(pdev, iobase);
137 break;
138 case IOAT_VER_2_0:
139 device->dma = ioat_dma_probe(pdev, iobase);
140 if (device->dma && ioat_dca_enabled)
141 device->dca = ioat2_dca_init(pdev, iobase);
142 break;
143 case IOAT_VER_3_0:
144 device->dma = ioat_dma_probe(pdev, iobase);
145 if (device->dma && ioat_dca_enabled)
146 device->dca = ioat3_dca_init(pdev, iobase);
147 break;
148 default:
149 err = -ENODEV;
150 break;
151 }
152 if (!device->dma)
153 err = -ENODEV;
154
155 if (err)
156 goto err_version;
157
158 return 0;
159
160err_version:
161 kfree(device);
162err_kzalloc:
163 iounmap(iobase);
164err_ioremap:
165err_set_dma_mask:
166 pci_release_regions(pdev);
167 pci_disable_device(pdev);
168err_request_regions:
169err_enable_device:
170 return err;
171}
172
173static void __devexit ioat_remove(struct pci_dev *pdev)
174{
175 struct ioat_device *device = pci_get_drvdata(pdev);
176
177 dev_err(&pdev->dev, "Removing dma and dca services\n");
178 if (device->dca) {
179 unregister_dca_provider(device->dca);
180 free_dca_provider(device->dca);
181 device->dca = NULL;
182 }
183
184 if (device->dma) {
185 ioat_dma_remove(device->dma);
186 device->dma = NULL;
187 }
188
189 kfree(device);
190}
191
192static int __init ioat_init_module(void)
193{
194 return pci_register_driver(&ioat_pci_driver);
195}
196module_init(ioat_init_module);
197
198static void __exit ioat_exit_module(void)
199{
200 pci_unregister_driver(&ioat_pci_driver);
201}
202module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 000000000000..8997d3fb9051
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
2ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c
index c012a1e15043..69d02615c4d6 100644
--- a/drivers/dma/ioat_dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -33,8 +33,8 @@
33#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) 33#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
34#endif 34#endif
35 35
36#include "ioatdma.h" 36#include "dma.h"
37#include "ioatdma_registers.h" 37#include "registers.h"
38 38
39/* 39/*
40 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 40 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
@@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = {
242}; 242};
243 243
244 244
245struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) 245struct dca_provider * __devinit
246ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
246{ 247{
247 struct dca_provider *dca; 248 struct dca_provider *dca;
248 struct ioat_dca_priv *ioatdca; 249 struct ioat_dca_priv *ioatdca;
@@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
407 return slots; 408 return slots;
408} 409}
409 410
410struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) 411struct dca_provider * __devinit
412ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
411{ 413{
412 struct dca_provider *dca; 414 struct dca_provider *dca;
413 struct ioat_dca_priv *ioatdca; 415 struct ioat_dca_priv *ioatdca;
@@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
602 return slots; 604 return slots;
603} 605}
604 606
605struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) 607struct dca_provider * __devinit
608ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
606{ 609{
607 struct dca_provider *dca; 610 struct dca_provider *dca;
608 struct ioat_dca_priv *ioatdca; 611 struct ioat_dca_priv *ioatdca;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 000000000000..c524d36d3c2e
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "registers.h"
39#include "hw.h"
40
41int ioat_pending_level = 4;
42module_param(ioat_pending_level, int, 0644);
43MODULE_PARM_DESC(ioat_pending_level,
44 "high-water mark for pushing ioat descriptors (default: 4)");
45
46/* internal functions */
47static void ioat1_cleanup(struct ioat_dma_chan *ioat);
48static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
49
50/**
51 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
52 * @irq: interrupt id
53 * @data: interrupt data
54 */
55static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
56{
57 struct ioatdma_device *instance = data;
58 struct ioat_chan_common *chan;
59 unsigned long attnstatus;
60 int bit;
61 u8 intrctrl;
62
63 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
64
65 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
66 return IRQ_NONE;
67
68 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
69 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
70 return IRQ_NONE;
71 }
72
73 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
74 for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
75 chan = ioat_chan_by_index(instance, bit);
76 tasklet_schedule(&chan->cleanup_task);
77 }
78
79 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
80 return IRQ_HANDLED;
81}
82
83/**
84 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
85 * @irq: interrupt id
86 * @data: interrupt data
87 */
88static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
89{
90 struct ioat_chan_common *chan = data;
91
92 tasklet_schedule(&chan->cleanup_task);
93
94 return IRQ_HANDLED;
95}
96
97static void ioat1_cleanup_tasklet(unsigned long data);
98
99/* common channel initialization */
100void ioat_init_channel(struct ioatdma_device *device,
101 struct ioat_chan_common *chan, int idx,
102 void (*timer_fn)(unsigned long),
103 void (*tasklet)(unsigned long),
104 unsigned long ioat)
105{
106 struct dma_device *dma = &device->common;
107
108 chan->device = device;
109 chan->reg_base = device->reg_base + (0x80 * (idx + 1));
110 spin_lock_init(&chan->cleanup_lock);
111 chan->common.device = dma;
112 list_add_tail(&chan->common.device_node, &dma->channels);
113 device->idx[idx] = chan;
114 init_timer(&chan->timer);
115 chan->timer.function = timer_fn;
116 chan->timer.data = ioat;
117 tasklet_init(&chan->cleanup_task, tasklet, ioat);
118 tasklet_disable(&chan->cleanup_task);
119}
120
121static void ioat1_timer_event(unsigned long data);
122
123/**
124 * ioat1_dma_enumerate_channels - find and initialize the device's channels
125 * @device: the device to be enumerated
126 */
127static int ioat1_enumerate_channels(struct ioatdma_device *device)
128{
129 u8 xfercap_scale;
130 u32 xfercap;
131 int i;
132 struct ioat_dma_chan *ioat;
133 struct device *dev = &device->pdev->dev;
134 struct dma_device *dma = &device->common;
135
136 INIT_LIST_HEAD(&dma->channels);
137 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
138 dma->chancnt &= 0x1f; /* bits [4:0] valid */
139 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
140 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
141 dma->chancnt, ARRAY_SIZE(device->idx));
142 dma->chancnt = ARRAY_SIZE(device->idx);
143 }
144 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
145 xfercap_scale &= 0x1f; /* bits [4:0] valid */
146 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
147 dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
148
149#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
150 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
151 dma->chancnt--;
152#endif
153 for (i = 0; i < dma->chancnt; i++) {
154 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
155 if (!ioat)
156 break;
157
158 ioat_init_channel(device, &ioat->base, i,
159 ioat1_timer_event,
160 ioat1_cleanup_tasklet,
161 (unsigned long) ioat);
162 ioat->xfercap = xfercap;
163 spin_lock_init(&ioat->desc_lock);
164 INIT_LIST_HEAD(&ioat->free_desc);
165 INIT_LIST_HEAD(&ioat->used_desc);
166 }
167 dma->chancnt = i;
168 return i;
169}
170
171/**
172 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
173 * descriptors to hw
174 * @chan: DMA channel handle
175 */
176static inline void
177__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
178{
179 void __iomem *reg_base = ioat->base.reg_base;
180
181 dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
182 __func__, ioat->pending);
183 ioat->pending = 0;
184 writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
185}
186
187static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
188{
189 struct ioat_dma_chan *ioat = to_ioat_chan(chan);
190
191 if (ioat->pending > 0) {
192 spin_lock_bh(&ioat->desc_lock);
193 __ioat1_dma_memcpy_issue_pending(ioat);
194 spin_unlock_bh(&ioat->desc_lock);
195 }
196}
197
198/**
199 * ioat1_reset_channel - restart a channel
200 * @ioat: IOAT DMA channel handle
201 */
202static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
203{
204 struct ioat_chan_common *chan = &ioat->base;
205 void __iomem *reg_base = chan->reg_base;
206 u32 chansts, chanerr;
207
208 dev_warn(to_dev(chan), "reset\n");
209 chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
210 chansts = *chan->completion & IOAT_CHANSTS_STATUS;
211 if (chanerr) {
212 dev_err(to_dev(chan),
213 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
214 chan_num(chan), chansts, chanerr);
215 writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
216 }
217
218 /*
219 * whack it upside the head with a reset
220 * and wait for things to settle out.
221 * force the pending count to a really big negative
222 * to make sure no one forces an issue_pending
223 * while we're waiting.
224 */
225
226 ioat->pending = INT_MIN;
227 writeb(IOAT_CHANCMD_RESET,
228 reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
229 set_bit(IOAT_RESET_PENDING, &chan->state);
230 mod_timer(&chan->timer, jiffies + RESET_DELAY);
231}
232
233static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
234{
235 struct dma_chan *c = tx->chan;
236 struct ioat_dma_chan *ioat = to_ioat_chan(c);
237 struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
238 struct ioat_chan_common *chan = &ioat->base;
239 struct ioat_desc_sw *first;
240 struct ioat_desc_sw *chain_tail;
241 dma_cookie_t cookie;
242
243 spin_lock_bh(&ioat->desc_lock);
244 /* cookie incr and addition to used_list must be atomic */
245 cookie = c->cookie;
246 cookie++;
247 if (cookie < 0)
248 cookie = 1;
249 c->cookie = cookie;
250 tx->cookie = cookie;
251 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
252
253 /* write address into NextDescriptor field of last desc in chain */
254 first = to_ioat_desc(desc->tx_list.next);
255 chain_tail = to_ioat_desc(ioat->used_desc.prev);
256 /* make descriptor updates globally visible before chaining */
257 wmb();
258 chain_tail->hw->next = first->txd.phys;
259 list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
260 dump_desc_dbg(ioat, chain_tail);
261 dump_desc_dbg(ioat, first);
262
263 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
264 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
265
266 ioat->active += desc->hw->tx_cnt;
267 ioat->pending += desc->hw->tx_cnt;
268 if (ioat->pending >= ioat_pending_level)
269 __ioat1_dma_memcpy_issue_pending(ioat);
270 spin_unlock_bh(&ioat->desc_lock);
271
272 return cookie;
273}
274
275/**
276 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
277 * @ioat: the channel supplying the memory pool for the descriptors
278 * @flags: allocation flags
279 */
280static struct ioat_desc_sw *
281ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
282{
283 struct ioat_dma_descriptor *desc;
284 struct ioat_desc_sw *desc_sw;
285 struct ioatdma_device *ioatdma_device;
286 dma_addr_t phys;
287
288 ioatdma_device = ioat->base.device;
289 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
290 if (unlikely(!desc))
291 return NULL;
292
293 desc_sw = kzalloc(sizeof(*desc_sw), flags);
294 if (unlikely(!desc_sw)) {
295 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
296 return NULL;
297 }
298
299 memset(desc, 0, sizeof(*desc));
300
301 INIT_LIST_HEAD(&desc_sw->tx_list);
302 dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
303 desc_sw->txd.tx_submit = ioat1_tx_submit;
304 desc_sw->hw = desc;
305 desc_sw->txd.phys = phys;
306 set_desc_id(desc_sw, -1);
307
308 return desc_sw;
309}
310
311static int ioat_initial_desc_count = 256;
312module_param(ioat_initial_desc_count, int, 0644);
313MODULE_PARM_DESC(ioat_initial_desc_count,
314 "ioat1: initial descriptors per channel (default: 256)");
315/**
316 * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
317 * @chan: the channel to be filled out
318 */
319static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
320{
321 struct ioat_dma_chan *ioat = to_ioat_chan(c);
322 struct ioat_chan_common *chan = &ioat->base;
323 struct ioat_desc_sw *desc;
324 u32 chanerr;
325 int i;
326 LIST_HEAD(tmp_list);
327
328 /* have we already been set up? */
329 if (!list_empty(&ioat->free_desc))
330 return ioat->desccount;
331
332 /* Setup register to interrupt and write completion status on error */
333 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
334
335 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
336 if (chanerr) {
337 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
338 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
339 }
340
341 /* Allocate descriptors */
342 for (i = 0; i < ioat_initial_desc_count; i++) {
343 desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
344 if (!desc) {
345 dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
346 break;
347 }
348 set_desc_id(desc, i);
349 list_add_tail(&desc->node, &tmp_list);
350 }
351 spin_lock_bh(&ioat->desc_lock);
352 ioat->desccount = i;
353 list_splice(&tmp_list, &ioat->free_desc);
354 spin_unlock_bh(&ioat->desc_lock);
355
356 /* allocate a completion writeback area */
357 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
358 chan->completion = pci_pool_alloc(chan->device->completion_pool,
359 GFP_KERNEL, &chan->completion_dma);
360 memset(chan->completion, 0, sizeof(*chan->completion));
361 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
362 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
363 writel(((u64) chan->completion_dma) >> 32,
364 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
365
366 tasklet_enable(&chan->cleanup_task);
367 ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
368 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
369 __func__, ioat->desccount);
370 return ioat->desccount;
371}
372
373/**
374 * ioat1_dma_free_chan_resources - release all the descriptors
375 * @chan: the channel to be cleaned
376 */
377static void ioat1_dma_free_chan_resources(struct dma_chan *c)
378{
379 struct ioat_dma_chan *ioat = to_ioat_chan(c);
380 struct ioat_chan_common *chan = &ioat->base;
381 struct ioatdma_device *ioatdma_device = chan->device;
382 struct ioat_desc_sw *desc, *_desc;
383 int in_use_descs = 0;
384
385 /* Before freeing channel resources first check
386 * if they have been previously allocated for this channel.
387 */
388 if (ioat->desccount == 0)
389 return;
390
391 tasklet_disable(&chan->cleanup_task);
392 del_timer_sync(&chan->timer);
393 ioat1_cleanup(ioat);
394
395 /* Delay 100ms after reset to allow internal DMA logic to quiesce
396 * before removing DMA descriptor resources.
397 */
398 writeb(IOAT_CHANCMD_RESET,
399 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
400 mdelay(100);
401
402 spin_lock_bh(&ioat->desc_lock);
403 list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
404 dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
405 __func__, desc_id(desc));
406 dump_desc_dbg(ioat, desc);
407 in_use_descs++;
408 list_del(&desc->node);
409 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
410 desc->txd.phys);
411 kfree(desc);
412 }
413 list_for_each_entry_safe(desc, _desc,
414 &ioat->free_desc, node) {
415 list_del(&desc->node);
416 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
417 desc->txd.phys);
418 kfree(desc);
419 }
420 spin_unlock_bh(&ioat->desc_lock);
421
422 pci_pool_free(ioatdma_device->completion_pool,
423 chan->completion,
424 chan->completion_dma);
425
426 /* one is ok since we left it on there on purpose */
427 if (in_use_descs > 1)
428 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
429 in_use_descs - 1);
430
431 chan->last_completion = 0;
432 chan->completion_dma = 0;
433 ioat->pending = 0;
434 ioat->desccount = 0;
435}
436
437/**
438 * ioat1_dma_get_next_descriptor - return the next available descriptor
439 * @ioat: IOAT DMA channel handle
440 *
441 * Gets the next descriptor from the chain, and must be called with the
442 * channel's desc_lock held. Allocates more descriptors if the channel
443 * has run out.
444 */
445static struct ioat_desc_sw *
446ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
447{
448 struct ioat_desc_sw *new;
449
450 if (!list_empty(&ioat->free_desc)) {
451 new = to_ioat_desc(ioat->free_desc.next);
452 list_del(&new->node);
453 } else {
454 /* try to get another desc */
455 new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
456 if (!new) {
457 dev_err(to_dev(&ioat->base), "alloc failed\n");
458 return NULL;
459 }
460 }
461 dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
462 __func__, desc_id(new));
463 prefetch(new->hw);
464 return new;
465}
466
467static struct dma_async_tx_descriptor *
468ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
469 dma_addr_t dma_src, size_t len, unsigned long flags)
470{
471 struct ioat_dma_chan *ioat = to_ioat_chan(c);
472 struct ioat_desc_sw *desc;
473 size_t copy;
474 LIST_HEAD(chain);
475 dma_addr_t src = dma_src;
476 dma_addr_t dest = dma_dest;
477 size_t total_len = len;
478 struct ioat_dma_descriptor *hw = NULL;
479 int tx_cnt = 0;
480
481 spin_lock_bh(&ioat->desc_lock);
482 desc = ioat1_dma_get_next_descriptor(ioat);
483 do {
484 if (!desc)
485 break;
486
487 tx_cnt++;
488 copy = min_t(size_t, len, ioat->xfercap);
489
490 hw = desc->hw;
491 hw->size = copy;
492 hw->ctl = 0;
493 hw->src_addr = src;
494 hw->dst_addr = dest;
495
496 list_add_tail(&desc->node, &chain);
497
498 len -= copy;
499 dest += copy;
500 src += copy;
501 if (len) {
502 struct ioat_desc_sw *next;
503
504 async_tx_ack(&desc->txd);
505 next = ioat1_dma_get_next_descriptor(ioat);
506 hw->next = next ? next->txd.phys : 0;
507 dump_desc_dbg(ioat, desc);
508 desc = next;
509 } else
510 hw->next = 0;
511 } while (len);
512
513 if (!desc) {
514 struct ioat_chan_common *chan = &ioat->base;
515
516 dev_err(to_dev(chan),
517 "chan%d - get_next_desc failed\n", chan_num(chan));
518 list_splice(&chain, &ioat->free_desc);
519 spin_unlock_bh(&ioat->desc_lock);
520 return NULL;
521 }
522 spin_unlock_bh(&ioat->desc_lock);
523
524 desc->txd.flags = flags;
525 desc->len = total_len;
526 list_splice(&chain, &desc->tx_list);
527 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
528 hw->ctl_f.compl_write = 1;
529 hw->tx_cnt = tx_cnt;
530 dump_desc_dbg(ioat, desc);
531
532 return &desc->txd;
533}
534
535static void ioat1_cleanup_tasklet(unsigned long data)
536{
537 struct ioat_dma_chan *chan = (void *)data;
538
539 ioat1_cleanup(chan);
540 writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
541}
542
543void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
544 size_t len, struct ioat_dma_descriptor *hw)
545{
546 struct pci_dev *pdev = chan->device->pdev;
547 size_t offset = len - hw->size;
548
549 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
550 ioat_unmap(pdev, hw->dst_addr - offset, len,
551 PCI_DMA_FROMDEVICE, flags, 1);
552
553 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
554 ioat_unmap(pdev, hw->src_addr - offset, len,
555 PCI_DMA_TODEVICE, flags, 0);
556}
557
558unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
559{
560 unsigned long phys_complete;
561 u64 completion;
562
563 completion = *chan->completion;
564 phys_complete = ioat_chansts_to_addr(completion);
565
566 dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
567 (unsigned long long) phys_complete);
568
569 if (is_ioat_halted(completion)) {
570 u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
571 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
572 chanerr);
573
574 /* TODO do something to salvage the situation */
575 }
576
577 return phys_complete;
578}
579
580bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
581 unsigned long *phys_complete)
582{
583 *phys_complete = ioat_get_current_completion(chan);
584 if (*phys_complete == chan->last_completion)
585 return false;
586 clear_bit(IOAT_COMPLETION_ACK, &chan->state);
587 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
588
589 return true;
590}
591
592static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
593{
594 struct ioat_chan_common *chan = &ioat->base;
595 struct list_head *_desc, *n;
596 struct dma_async_tx_descriptor *tx;
597
598 dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
599 __func__, phys_complete);
600 list_for_each_safe(_desc, n, &ioat->used_desc) {
601 struct ioat_desc_sw *desc;
602
603 prefetch(n);
604 desc = list_entry(_desc, typeof(*desc), node);
605 tx = &desc->txd;
606 /*
607 * Incoming DMA requests may use multiple descriptors,
608 * due to exceeding xfercap, perhaps. If so, only the
609 * last one will have a cookie, and require unmapping.
610 */
611 dump_desc_dbg(ioat, desc);
612 if (tx->cookie) {
613 chan->completed_cookie = tx->cookie;
614 tx->cookie = 0;
615 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
616 ioat->active -= desc->hw->tx_cnt;
617 if (tx->callback) {
618 tx->callback(tx->callback_param);
619 tx->callback = NULL;
620 }
621 }
622
623 if (tx->phys != phys_complete) {
624 /*
625 * a completed entry, but not the last, so clean
626 * up if the client is done with the descriptor
627 */
628 if (async_tx_test_ack(tx))
629 list_move_tail(&desc->node, &ioat->free_desc);
630 } else {
631 /*
632 * last used desc. Do not remove, so we can
633 * append from it.
634 */
635
636 /* if nothing else is pending, cancel the
637 * completion timeout
638 */
639 if (n == &ioat->used_desc) {
640 dev_dbg(to_dev(chan),
641 "%s cancel completion timeout\n",
642 __func__);
643 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
644 }
645
646 /* TODO check status bits? */
647 break;
648 }
649 }
650
651 chan->last_completion = phys_complete;
652}
653
654/**
655 * ioat1_cleanup - cleanup up finished descriptors
656 * @chan: ioat channel to be cleaned up
657 *
658 * To prevent lock contention we defer cleanup when the locks are
659 * contended with a terminal timeout that forces cleanup and catches
660 * completion notification errors.
661 */
662static void ioat1_cleanup(struct ioat_dma_chan *ioat)
663{
664 struct ioat_chan_common *chan = &ioat->base;
665 unsigned long phys_complete;
666
667 prefetch(chan->completion);
668
669 if (!spin_trylock_bh(&chan->cleanup_lock))
670 return;
671
672 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
673 spin_unlock_bh(&chan->cleanup_lock);
674 return;
675 }
676
677 if (!spin_trylock_bh(&ioat->desc_lock)) {
678 spin_unlock_bh(&chan->cleanup_lock);
679 return;
680 }
681
682 __cleanup(ioat, phys_complete);
683
684 spin_unlock_bh(&ioat->desc_lock);
685 spin_unlock_bh(&chan->cleanup_lock);
686}
687
688static void ioat1_timer_event(unsigned long data)
689{
690 struct ioat_dma_chan *ioat = (void *) data;
691 struct ioat_chan_common *chan = &ioat->base;
692
693 dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
694
695 spin_lock_bh(&chan->cleanup_lock);
696 if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
697 struct ioat_desc_sw *desc;
698
699 spin_lock_bh(&ioat->desc_lock);
700
701 /* restart active descriptors */
702 desc = to_ioat_desc(ioat->used_desc.prev);
703 ioat_set_chainaddr(ioat, desc->txd.phys);
704 ioat_start(chan);
705
706 ioat->pending = 0;
707 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
708 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
709 spin_unlock_bh(&ioat->desc_lock);
710 } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
711 unsigned long phys_complete;
712
713 spin_lock_bh(&ioat->desc_lock);
714 /* if we haven't made progress and we have already
715 * acknowledged a pending completion once, then be more
716 * forceful with a restart
717 */
718 if (ioat_cleanup_preamble(chan, &phys_complete))
719 __cleanup(ioat, phys_complete);
720 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
721 ioat1_reset_channel(ioat);
722 else {
723 u64 status = ioat_chansts(chan);
724
725 /* manually update the last completion address */
726 if (ioat_chansts_to_addr(status) != 0)
727 *chan->completion = status;
728
729 set_bit(IOAT_COMPLETION_ACK, &chan->state);
730 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
731 }
732 spin_unlock_bh(&ioat->desc_lock);
733 }
734 spin_unlock_bh(&chan->cleanup_lock);
735}
736
737static enum dma_status
738ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
739 dma_cookie_t *done, dma_cookie_t *used)
740{
741 struct ioat_dma_chan *ioat = to_ioat_chan(c);
742
743 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
744 return DMA_SUCCESS;
745
746 ioat1_cleanup(ioat);
747
748 return ioat_is_complete(c, cookie, done, used);
749}
750
751static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
752{
753 struct ioat_chan_common *chan = &ioat->base;
754 struct ioat_desc_sw *desc;
755 struct ioat_dma_descriptor *hw;
756
757 spin_lock_bh(&ioat->desc_lock);
758
759 desc = ioat1_dma_get_next_descriptor(ioat);
760
761 if (!desc) {
762 dev_err(to_dev(chan),
763 "Unable to start null desc - get next desc failed\n");
764 spin_unlock_bh(&ioat->desc_lock);
765 return;
766 }
767
768 hw = desc->hw;
769 hw->ctl = 0;
770 hw->ctl_f.null = 1;
771 hw->ctl_f.int_en = 1;
772 hw->ctl_f.compl_write = 1;
773 /* set size to non-zero value (channel returns error when size is 0) */
774 hw->size = NULL_DESC_BUFFER_SIZE;
775 hw->src_addr = 0;
776 hw->dst_addr = 0;
777 async_tx_ack(&desc->txd);
778 hw->next = 0;
779 list_add_tail(&desc->node, &ioat->used_desc);
780 dump_desc_dbg(ioat, desc);
781
782 ioat_set_chainaddr(ioat, desc->txd.phys);
783 ioat_start(chan);
784 spin_unlock_bh(&ioat->desc_lock);
785}
786
787/*
788 * Perform a IOAT transaction to verify the HW works.
789 */
790#define IOAT_TEST_SIZE 2000
791
792static void __devinit ioat_dma_test_callback(void *dma_async_param)
793{
794 struct completion *cmp = dma_async_param;
795
796 complete(cmp);
797}
798
799/**
800 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
801 * @device: device to be tested
802 */
803int __devinit ioat_dma_self_test(struct ioatdma_device *device)
804{
805 int i;
806 u8 *src;
807 u8 *dest;
808 struct dma_device *dma = &device->common;
809 struct device *dev = &device->pdev->dev;
810 struct dma_chan *dma_chan;
811 struct dma_async_tx_descriptor *tx;
812 dma_addr_t dma_dest, dma_src;
813 dma_cookie_t cookie;
814 int err = 0;
815 struct completion cmp;
816 unsigned long tmo;
817 unsigned long flags;
818
819 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
820 if (!src)
821 return -ENOMEM;
822 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
823 if (!dest) {
824 kfree(src);
825 return -ENOMEM;
826 }
827
828 /* Fill in src buffer */
829 for (i = 0; i < IOAT_TEST_SIZE; i++)
830 src[i] = (u8)i;
831
832 /* Start copy, using first DMA channel */
833 dma_chan = container_of(dma->channels.next, struct dma_chan,
834 device_node);
835 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
836 dev_err(dev, "selftest cannot allocate chan resource\n");
837 err = -ENODEV;
838 goto out;
839 }
840
841 dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
842 dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
843 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
844 DMA_PREP_INTERRUPT;
845 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
846 IOAT_TEST_SIZE, flags);
847 if (!tx) {
848 dev_err(dev, "Self-test prep failed, disabling\n");
849 err = -ENODEV;
850 goto free_resources;
851 }
852
853 async_tx_ack(tx);
854 init_completion(&cmp);
855 tx->callback = ioat_dma_test_callback;
856 tx->callback_param = &cmp;
857 cookie = tx->tx_submit(tx);
858 if (cookie < 0) {
859 dev_err(dev, "Self-test setup failed, disabling\n");
860 err = -ENODEV;
861 goto free_resources;
862 }
863 dma->device_issue_pending(dma_chan);
864
865 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
866
867 if (tmo == 0 ||
868 dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
869 != DMA_SUCCESS) {
870 dev_err(dev, "Self-test copy timed out, disabling\n");
871 err = -ENODEV;
872 goto free_resources;
873 }
874 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
875 dev_err(dev, "Self-test copy failed compare, disabling\n");
876 err = -ENODEV;
877 goto free_resources;
878 }
879
880free_resources:
881 dma->device_free_chan_resources(dma_chan);
882out:
883 kfree(src);
884 kfree(dest);
885 return err;
886}
887
888static char ioat_interrupt_style[32] = "msix";
889module_param_string(ioat_interrupt_style, ioat_interrupt_style,
890 sizeof(ioat_interrupt_style), 0644);
891MODULE_PARM_DESC(ioat_interrupt_style,
892 "set ioat interrupt style: msix (default), "
893 "msix-single-vector, msi, intx)");
894
895/**
896 * ioat_dma_setup_interrupts - setup interrupt handler
897 * @device: ioat device
898 */
899static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
900{
901 struct ioat_chan_common *chan;
902 struct pci_dev *pdev = device->pdev;
903 struct device *dev = &pdev->dev;
904 struct msix_entry *msix;
905 int i, j, msixcnt;
906 int err = -EINVAL;
907 u8 intrctrl = 0;
908
909 if (!strcmp(ioat_interrupt_style, "msix"))
910 goto msix;
911 if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
912 goto msix_single_vector;
913 if (!strcmp(ioat_interrupt_style, "msi"))
914 goto msi;
915 if (!strcmp(ioat_interrupt_style, "intx"))
916 goto intx;
917 dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
918 goto err_no_irq;
919
920msix:
921 /* The number of MSI-X vectors should equal the number of channels */
922 msixcnt = device->common.chancnt;
923 for (i = 0; i < msixcnt; i++)
924 device->msix_entries[i].entry = i;
925
926 err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
927 if (err < 0)
928 goto msi;
929 if (err > 0)
930 goto msix_single_vector;
931
932 for (i = 0; i < msixcnt; i++) {
933 msix = &device->msix_entries[i];
934 chan = ioat_chan_by_index(device, i);
935 err = devm_request_irq(dev, msix->vector,
936 ioat_dma_do_interrupt_msix, 0,
937 "ioat-msix", chan);
938 if (err) {
939 for (j = 0; j < i; j++) {
940 msix = &device->msix_entries[j];
941 chan = ioat_chan_by_index(device, j);
942 devm_free_irq(dev, msix->vector, chan);
943 }
944 goto msix_single_vector;
945 }
946 }
947 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
948 goto done;
949
950msix_single_vector:
951 msix = &device->msix_entries[0];
952 msix->entry = 0;
953 err = pci_enable_msix(pdev, device->msix_entries, 1);
954 if (err)
955 goto msi;
956
957 err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
958 "ioat-msix", device);
959 if (err) {
960 pci_disable_msix(pdev);
961 goto msi;
962 }
963 goto done;
964
965msi:
966 err = pci_enable_msi(pdev);
967 if (err)
968 goto intx;
969
970 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
971 "ioat-msi", device);
972 if (err) {
973 pci_disable_msi(pdev);
974 goto intx;
975 }
976 goto done;
977
978intx:
979 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
980 IRQF_SHARED, "ioat-intx", device);
981 if (err)
982 goto err_no_irq;
983
984done:
985 if (device->intr_quirk)
986 device->intr_quirk(device);
987 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
988 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
989 return 0;
990
991err_no_irq:
992 /* Disable all interrupt generation */
993 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
994 dev_err(dev, "no usable interrupts\n");
995 return err;
996}
997
998static void ioat_disable_interrupts(struct ioatdma_device *device)
999{
1000 /* Disable all interrupt generation */
1001 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1002}
1003
1004int __devinit ioat_probe(struct ioatdma_device *device)
1005{
1006 int err = -ENODEV;
1007 struct dma_device *dma = &device->common;
1008 struct pci_dev *pdev = device->pdev;
1009 struct device *dev = &pdev->dev;
1010
1011 /* DMA coherent memory pool for DMA descriptor allocations */
1012 device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1013 sizeof(struct ioat_dma_descriptor),
1014 64, 0);
1015 if (!device->dma_pool) {
1016 err = -ENOMEM;
1017 goto err_dma_pool;
1018 }
1019
1020 device->completion_pool = pci_pool_create("completion_pool", pdev,
1021 sizeof(u64), SMP_CACHE_BYTES,
1022 SMP_CACHE_BYTES);
1023
1024 if (!device->completion_pool) {
1025 err = -ENOMEM;
1026 goto err_completion_pool;
1027 }
1028
1029 device->enumerate_channels(device);
1030
1031 dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1032 dma->dev = &pdev->dev;
1033
1034 if (!dma->chancnt) {
1035 dev_err(dev, "zero channels detected\n");
1036 goto err_setup_interrupts;
1037 }
1038
1039 err = ioat_dma_setup_interrupts(device);
1040 if (err)
1041 goto err_setup_interrupts;
1042
1043 err = device->self_test(device);
1044 if (err)
1045 goto err_self_test;
1046
1047 return 0;
1048
1049err_self_test:
1050 ioat_disable_interrupts(device);
1051err_setup_interrupts:
1052 pci_pool_destroy(device->completion_pool);
1053err_completion_pool:
1054 pci_pool_destroy(device->dma_pool);
1055err_dma_pool:
1056 return err;
1057}
1058
1059int __devinit ioat_register(struct ioatdma_device *device)
1060{
1061 int err = dma_async_device_register(&device->common);
1062
1063 if (err) {
1064 ioat_disable_interrupts(device);
1065 pci_pool_destroy(device->completion_pool);
1066 pci_pool_destroy(device->dma_pool);
1067 }
1068
1069 return err;
1070}
1071
1072/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
1073static void ioat1_intr_quirk(struct ioatdma_device *device)
1074{
1075 struct pci_dev *pdev = device->pdev;
1076 u32 dmactrl;
1077
1078 pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1079 if (pdev->msi_enabled)
1080 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1081 else
1082 dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
1083 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1084}
1085
1086static ssize_t ring_size_show(struct dma_chan *c, char *page)
1087{
1088 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1089
1090 return sprintf(page, "%d\n", ioat->desccount);
1091}
1092static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
1093
1094static ssize_t ring_active_show(struct dma_chan *c, char *page)
1095{
1096 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1097
1098 return sprintf(page, "%d\n", ioat->active);
1099}
1100static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
1101
1102static ssize_t cap_show(struct dma_chan *c, char *page)
1103{
1104 struct dma_device *dma = c->device;
1105
1106 return sprintf(page, "copy%s%s%s%s%s%s\n",
1107 dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
1108 dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
1109 dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
1110 dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
1111 dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
1112 dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
1113
1114}
1115struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
1116
1117static ssize_t version_show(struct dma_chan *c, char *page)
1118{
1119 struct dma_device *dma = c->device;
1120 struct ioatdma_device *device = to_ioatdma_device(dma);
1121
1122 return sprintf(page, "%d.%d\n",
1123 device->version >> 4, device->version & 0xf);
1124}
1125struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
1126
1127static struct attribute *ioat1_attrs[] = {
1128 &ring_size_attr.attr,
1129 &ring_active_attr.attr,
1130 &ioat_cap_attr.attr,
1131 &ioat_version_attr.attr,
1132 NULL,
1133};
1134
1135static ssize_t
1136ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1137{
1138 struct ioat_sysfs_entry *entry;
1139 struct ioat_chan_common *chan;
1140
1141 entry = container_of(attr, struct ioat_sysfs_entry, attr);
1142 chan = container_of(kobj, struct ioat_chan_common, kobj);
1143
1144 if (!entry->show)
1145 return -EIO;
1146 return entry->show(&chan->common, page);
1147}
1148
1149struct sysfs_ops ioat_sysfs_ops = {
1150 .show = ioat_attr_show,
1151};
1152
1153static struct kobj_type ioat1_ktype = {
1154 .sysfs_ops = &ioat_sysfs_ops,
1155 .default_attrs = ioat1_attrs,
1156};
1157
1158void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
1159{
1160 struct dma_device *dma = &device->common;
1161 struct dma_chan *c;
1162
1163 list_for_each_entry(c, &dma->channels, device_node) {
1164 struct ioat_chan_common *chan = to_chan_common(c);
1165 struct kobject *parent = &c->dev->device.kobj;
1166 int err;
1167
1168 err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
1169 if (err) {
1170 dev_warn(to_dev(chan),
1171 "sysfs init error (%d), continuing...\n", err);
1172 kobject_put(&chan->kobj);
1173 set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
1174 }
1175 }
1176}
1177
1178void ioat_kobject_del(struct ioatdma_device *device)
1179{
1180 struct dma_device *dma = &device->common;
1181 struct dma_chan *c;
1182
1183 list_for_each_entry(c, &dma->channels, device_node) {
1184 struct ioat_chan_common *chan = to_chan_common(c);
1185
1186 if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
1187 kobject_del(&chan->kobj);
1188 kobject_put(&chan->kobj);
1189 }
1190 }
1191}
1192
1193int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1194{
1195 struct pci_dev *pdev = device->pdev;
1196 struct dma_device *dma;
1197 int err;
1198
1199 device->intr_quirk = ioat1_intr_quirk;
1200 device->enumerate_channels = ioat1_enumerate_channels;
1201 device->self_test = ioat_dma_self_test;
1202 dma = &device->common;
1203 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1204 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1205 dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
1206 dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
1207 dma->device_is_tx_complete = ioat1_dma_is_complete;
1208
1209 err = ioat_probe(device);
1210 if (err)
1211 return err;
1212 ioat_set_tcp_copy_break(4096);
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216 ioat_kobject_add(device, &ioat1_ktype);
1217
1218 if (dca)
1219 device->dca = ioat_dca_init(pdev, device->reg_base);
1220
1221 return err;
1222}
1223
1224void __devexit ioat_dma_remove(struct ioatdma_device *device)
1225{
1226 struct dma_device *dma = &device->common;
1227
1228 ioat_disable_interrupts(device);
1229
1230 ioat_kobject_del(device);
1231
1232 dma_async_device_unregister(dma);
1233
1234 pci_pool_destroy(device->dma_pool);
1235 pci_pool_destroy(device->completion_pool);
1236
1237 INIT_LIST_HEAD(&dma->channels);
1238}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 000000000000..c14fdfeb7f33
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,337 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_H
22#define IOATDMA_H
23
24#include <linux/dmaengine.h>
25#include "hw.h"
26#include "registers.h"
27#include <linux/init.h>
28#include <linux/dmapool.h>
29#include <linux/cache.h>
30#include <linux/pci_ids.h>
31#include <net/tcp.h>
32
33#define IOAT_DMA_VERSION "4.00"
34
35#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
36#define IOAT_DMA_DCA_ANY_CPU ~0
37
38#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
39#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
40#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
41#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
42
43#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
44
45/*
46 * workaround for IOAT ver.3.0 null descriptor issue
47 * (channel returns error when size is 0)
48 */
49#define NULL_DESC_BUFFER_SIZE 1
50
51/**
52 * struct ioatdma_device - internal representation of a IOAT device
53 * @pdev: PCI-Express device
54 * @reg_base: MMIO register space base address
55 * @dma_pool: for allocating DMA descriptors
56 * @common: embedded struct dma_device
57 * @version: version of ioatdma device
58 * @msix_entries: irq handlers
59 * @idx: per channel data
60 * @dca: direct cache access context
61 * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
62 * @enumerate_channels: hw version specific channel enumeration
63 * @cleanup_tasklet: select between the v2 and v3 cleanup routines
64 * @timer_fn: select between the v2 and v3 timer watchdog routines
65 * @self_test: hardware version specific self test for each supported op type
66 *
67 * Note: the v3 cleanup routine supports raid operations
68 */
69struct ioatdma_device {
70 struct pci_dev *pdev;
71 void __iomem *reg_base;
72 struct pci_pool *dma_pool;
73 struct pci_pool *completion_pool;
74 struct dma_device common;
75 u8 version;
76 struct msix_entry msix_entries[4];
77 struct ioat_chan_common *idx[4];
78 struct dca_provider *dca;
79 void (*intr_quirk)(struct ioatdma_device *device);
80 int (*enumerate_channels)(struct ioatdma_device *device);
81 void (*cleanup_tasklet)(unsigned long data);
82 void (*timer_fn)(unsigned long data);
83 int (*self_test)(struct ioatdma_device *device);
84};
85
86struct ioat_chan_common {
87 struct dma_chan common;
88 void __iomem *reg_base;
89 unsigned long last_completion;
90 spinlock_t cleanup_lock;
91 dma_cookie_t completed_cookie;
92 unsigned long state;
93 #define IOAT_COMPLETION_PENDING 0
94 #define IOAT_COMPLETION_ACK 1
95 #define IOAT_RESET_PENDING 2
96 #define IOAT_KOBJ_INIT_FAIL 3
97 struct timer_list timer;
98 #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
99 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
100 #define RESET_DELAY msecs_to_jiffies(100)
101 struct ioatdma_device *device;
102 dma_addr_t completion_dma;
103 u64 *completion;
104 struct tasklet_struct cleanup_task;
105 struct kobject kobj;
106};
107
108struct ioat_sysfs_entry {
109 struct attribute attr;
110 ssize_t (*show)(struct dma_chan *, char *);
111};
112
113/**
114 * struct ioat_dma_chan - internal representation of a DMA channel
115 */
116struct ioat_dma_chan {
117 struct ioat_chan_common base;
118
119 size_t xfercap; /* XFERCAP register value expanded out */
120
121 spinlock_t desc_lock;
122 struct list_head free_desc;
123 struct list_head used_desc;
124
125 int pending;
126 u16 desccount;
127 u16 active;
128};
129
130static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
131{
132 return container_of(c, struct ioat_chan_common, common);
133}
134
135static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
136{
137 struct ioat_chan_common *chan = to_chan_common(c);
138
139 return container_of(chan, struct ioat_dma_chan, base);
140}
141
142/**
143 * ioat_is_complete - poll the status of an ioat transaction
144 * @c: channel handle
145 * @cookie: transaction identifier
146 * @done: if set, updated with last completed transaction
147 * @used: if set, updated with last used transaction
148 */
149static inline enum dma_status
150ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
151 dma_cookie_t *done, dma_cookie_t *used)
152{
153 struct ioat_chan_common *chan = to_chan_common(c);
154 dma_cookie_t last_used;
155 dma_cookie_t last_complete;
156
157 last_used = c->cookie;
158 last_complete = chan->completed_cookie;
159
160 if (done)
161 *done = last_complete;
162 if (used)
163 *used = last_used;
164
165 return dma_async_is_complete(cookie, last_complete, last_used);
166}
167
168/* wrapper around hardware descriptor format + additional software fields */
169
170/**
171 * struct ioat_desc_sw - wrapper around hardware descriptor
172 * @hw: hardware DMA descriptor (for memcpy)
173 * @node: this descriptor will either be on the free list,
174 * or attached to a transaction list (tx_list)
175 * @txd: the generic software descriptor for all engines
176 * @id: identifier for debug
177 */
178struct ioat_desc_sw {
179 struct ioat_dma_descriptor *hw;
180 struct list_head node;
181 size_t len;
182 struct list_head tx_list;
183 struct dma_async_tx_descriptor txd;
184 #ifdef DEBUG
185 int id;
186 #endif
187};
188
189#ifdef DEBUG
190#define set_desc_id(desc, i) ((desc)->id = (i))
191#define desc_id(desc) ((desc)->id)
192#else
193#define set_desc_id(desc, i)
194#define desc_id(desc) (0)
195#endif
196
197static inline void
198__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
199 struct dma_async_tx_descriptor *tx, int id)
200{
201 struct device *dev = to_dev(chan);
202
203 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
204 " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
205 (unsigned long long) tx->phys,
206 (unsigned long long) hw->next, tx->cookie, tx->flags,
207 hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
208}
209
210#define dump_desc_dbg(c, d) \
211 ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
212
213static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
214{
215 #ifdef CONFIG_NET_DMA
216 sysctl_tcp_dma_copybreak = copybreak;
217 #endif
218}
219
220static inline struct ioat_chan_common *
221ioat_chan_by_index(struct ioatdma_device *device, int index)
222{
223 return device->idx[index];
224}
225
226static inline u64 ioat_chansts(struct ioat_chan_common *chan)
227{
228 u8 ver = chan->device->version;
229 u64 status;
230 u32 status_lo;
231
232 /* We need to read the low address first as this causes the
233 * chipset to latch the upper bits for the subsequent read
234 */
235 status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
236 status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
237 status <<= 32;
238 status |= status_lo;
239
240 return status;
241}
242
243static inline void ioat_start(struct ioat_chan_common *chan)
244{
245 u8 ver = chan->device->version;
246
247 writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
248}
249
250static inline u64 ioat_chansts_to_addr(u64 status)
251{
252 return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
253}
254
255static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
256{
257 return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
258}
259
260static inline void ioat_suspend(struct ioat_chan_common *chan)
261{
262 u8 ver = chan->device->version;
263
264 writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
265}
266
267static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
268{
269 struct ioat_chan_common *chan = &ioat->base;
270
271 writel(addr & 0x00000000FFFFFFFF,
272 chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
273 writel(addr >> 32,
274 chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
275}
276
277static inline bool is_ioat_active(unsigned long status)
278{
279 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
280}
281
282static inline bool is_ioat_idle(unsigned long status)
283{
284 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
285}
286
287static inline bool is_ioat_halted(unsigned long status)
288{
289 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
290}
291
292static inline bool is_ioat_suspended(unsigned long status)
293{
294 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
295}
296
297/* channel was fatally programmed */
298static inline bool is_ioat_bug(unsigned long err)
299{
300 return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
301 IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
302 IOAT_CHANERR_LENGTH_ERR));
303}
304
305static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
306 int direction, enum dma_ctrl_flags flags, bool dst)
307{
308 if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
309 (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
310 pci_unmap_single(pdev, addr, len, direction);
311 else
312 pci_unmap_page(pdev, addr, len, direction);
313}
314
315int __devinit ioat_probe(struct ioatdma_device *device);
316int __devinit ioat_register(struct ioatdma_device *device);
317int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
318int __devinit ioat_dma_self_test(struct ioatdma_device *device);
319void __devexit ioat_dma_remove(struct ioatdma_device *device);
320struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
321 void __iomem *iobase);
322unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
323void ioat_init_channel(struct ioatdma_device *device,
324 struct ioat_chan_common *chan, int idx,
325 void (*timer_fn)(unsigned long),
326 void (*tasklet)(unsigned long),
327 unsigned long ioat);
328void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
329 size_t len, struct ioat_dma_descriptor *hw);
330bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
331 unsigned long *phys_complete);
332void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
333void ioat_kobject_del(struct ioatdma_device *device);
334extern struct sysfs_ops ioat_sysfs_ops;
335extern struct ioat_sysfs_entry ioat_version_attr;
336extern struct ioat_sysfs_entry ioat_cap_attr;
337#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 000000000000..96ffab7d37a7
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,871 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
25 * does asynchronous data movement and checksumming operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "dma_v2.h"
39#include "registers.h"
40#include "hw.h"
41
42int ioat_ring_alloc_order = 8;
43module_param(ioat_ring_alloc_order, int, 0644);
44MODULE_PARM_DESC(ioat_ring_alloc_order,
45 "ioat2+: allocate 2^n descriptors per channel"
46 " (default: 8 max: 16)");
47static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
48module_param(ioat_ring_max_alloc_order, int, 0644);
49MODULE_PARM_DESC(ioat_ring_max_alloc_order,
50 "ioat2+: upper limit for ring size (default: 16)");
51
52void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
53{
54 void * __iomem reg_base = ioat->base.reg_base;
55
56 ioat->pending = 0;
57 ioat->dmacount += ioat2_ring_pending(ioat);
58 ioat->issued = ioat->head;
59 /* make descriptor updates globally visible before notifying channel */
60 wmb();
61 writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
62 dev_dbg(to_dev(&ioat->base),
63 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
64 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
65}
66
67void ioat2_issue_pending(struct dma_chan *chan)
68{
69 struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
70
71 spin_lock_bh(&ioat->ring_lock);
72 if (ioat->pending == 1)
73 __ioat2_issue_pending(ioat);
74 spin_unlock_bh(&ioat->ring_lock);
75}
76
77/**
78 * ioat2_update_pending - log pending descriptors
79 * @ioat: ioat2+ channel
80 *
81 * set pending to '1' unless pending is already set to '2', pending == 2
82 * indicates that submission is temporarily blocked due to an in-flight
83 * reset. If we are already above the ioat_pending_level threshold then
84 * just issue pending.
85 *
86 * called with ring_lock held
87 */
88static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
89{
90 if (unlikely(ioat->pending == 2))
91 return;
92 else if (ioat2_ring_pending(ioat) > ioat_pending_level)
93 __ioat2_issue_pending(ioat);
94 else
95 ioat->pending = 1;
96}
97
98static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
99{
100 struct ioat_ring_ent *desc;
101 struct ioat_dma_descriptor *hw;
102 int idx;
103
104 if (ioat2_ring_space(ioat) < 1) {
105 dev_err(to_dev(&ioat->base),
106 "Unable to start null desc - ring full\n");
107 return;
108 }
109
110 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
111 __func__, ioat->head, ioat->tail, ioat->issued);
112 idx = ioat2_desc_alloc(ioat, 1);
113 desc = ioat2_get_ring_ent(ioat, idx);
114
115 hw = desc->hw;
116 hw->ctl = 0;
117 hw->ctl_f.null = 1;
118 hw->ctl_f.int_en = 1;
119 hw->ctl_f.compl_write = 1;
120 /* set size to non-zero value (channel returns error when size is 0) */
121 hw->size = NULL_DESC_BUFFER_SIZE;
122 hw->src_addr = 0;
123 hw->dst_addr = 0;
124 async_tx_ack(&desc->txd);
125 ioat2_set_chainaddr(ioat, desc->txd.phys);
126 dump_desc_dbg(ioat, desc);
127 __ioat2_issue_pending(ioat);
128}
129
130static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
131{
132 spin_lock_bh(&ioat->ring_lock);
133 __ioat2_start_null_desc(ioat);
134 spin_unlock_bh(&ioat->ring_lock);
135}
136
137static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
138{
139 struct ioat_chan_common *chan = &ioat->base;
140 struct dma_async_tx_descriptor *tx;
141 struct ioat_ring_ent *desc;
142 bool seen_current = false;
143 u16 active;
144 int i;
145
146 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
147 __func__, ioat->head, ioat->tail, ioat->issued);
148
149 active = ioat2_ring_active(ioat);
150 for (i = 0; i < active && !seen_current; i++) {
151 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
152 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
153 tx = &desc->txd;
154 dump_desc_dbg(ioat, desc);
155 if (tx->cookie) {
156 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
157 chan->completed_cookie = tx->cookie;
158 tx->cookie = 0;
159 if (tx->callback) {
160 tx->callback(tx->callback_param);
161 tx->callback = NULL;
162 }
163 }
164
165 if (tx->phys == phys_complete)
166 seen_current = true;
167 }
168 ioat->tail += i;
169 BUG_ON(!seen_current); /* no active descs have written a completion? */
170
171 chan->last_completion = phys_complete;
172 if (ioat->head == ioat->tail) {
173 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
174 __func__);
175 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
176 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
177 }
178}
179
180/**
181 * ioat2_cleanup - clean finished descriptors (advance tail pointer)
182 * @chan: ioat channel to be cleaned up
183 */
184static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
185{
186 struct ioat_chan_common *chan = &ioat->base;
187 unsigned long phys_complete;
188
189 prefetch(chan->completion);
190
191 if (!spin_trylock_bh(&chan->cleanup_lock))
192 return;
193
194 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
195 spin_unlock_bh(&chan->cleanup_lock);
196 return;
197 }
198
199 if (!spin_trylock_bh(&ioat->ring_lock)) {
200 spin_unlock_bh(&chan->cleanup_lock);
201 return;
202 }
203
204 __cleanup(ioat, phys_complete);
205
206 spin_unlock_bh(&ioat->ring_lock);
207 spin_unlock_bh(&chan->cleanup_lock);
208}
209
210void ioat2_cleanup_tasklet(unsigned long data)
211{
212 struct ioat2_dma_chan *ioat = (void *) data;
213
214 ioat2_cleanup(ioat);
215 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
216}
217
218void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
219{
220 struct ioat_chan_common *chan = &ioat->base;
221
222 /* set the tail to be re-issued */
223 ioat->issued = ioat->tail;
224 ioat->dmacount = 0;
225 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
226 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
227
228 dev_dbg(to_dev(chan),
229 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
230 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
231
232 if (ioat2_ring_pending(ioat)) {
233 struct ioat_ring_ent *desc;
234
235 desc = ioat2_get_ring_ent(ioat, ioat->tail);
236 ioat2_set_chainaddr(ioat, desc->txd.phys);
237 __ioat2_issue_pending(ioat);
238 } else
239 __ioat2_start_null_desc(ioat);
240}
241
242static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
243{
244 struct ioat_chan_common *chan = &ioat->base;
245 unsigned long phys_complete;
246 u32 status;
247
248 status = ioat_chansts(chan);
249 if (is_ioat_active(status) || is_ioat_idle(status))
250 ioat_suspend(chan);
251 while (is_ioat_active(status) || is_ioat_idle(status)) {
252 status = ioat_chansts(chan);
253 cpu_relax();
254 }
255
256 if (ioat_cleanup_preamble(chan, &phys_complete))
257 __cleanup(ioat, phys_complete);
258
259 __ioat2_restart_chan(ioat);
260}
261
262void ioat2_timer_event(unsigned long data)
263{
264 struct ioat2_dma_chan *ioat = (void *) data;
265 struct ioat_chan_common *chan = &ioat->base;
266
267 spin_lock_bh(&chan->cleanup_lock);
268 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
269 unsigned long phys_complete;
270 u64 status;
271
272 spin_lock_bh(&ioat->ring_lock);
273 status = ioat_chansts(chan);
274
275 /* when halted due to errors check for channel
276 * programming errors before advancing the completion state
277 */
278 if (is_ioat_halted(status)) {
279 u32 chanerr;
280
281 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
282 BUG_ON(is_ioat_bug(chanerr));
283 }
284
285 /* if we haven't made progress and we have already
286 * acknowledged a pending completion once, then be more
287 * forceful with a restart
288 */
289 if (ioat_cleanup_preamble(chan, &phys_complete))
290 __cleanup(ioat, phys_complete);
291 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
292 ioat2_restart_channel(ioat);
293 else {
294 set_bit(IOAT_COMPLETION_ACK, &chan->state);
295 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
296 }
297 spin_unlock_bh(&ioat->ring_lock);
298 } else {
299 u16 active;
300
301 /* if the ring is idle, empty, and oversized try to step
302 * down the size
303 */
304 spin_lock_bh(&ioat->ring_lock);
305 active = ioat2_ring_active(ioat);
306 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
307 reshape_ring(ioat, ioat->alloc_order-1);
308 spin_unlock_bh(&ioat->ring_lock);
309
310 /* keep shrinking until we get back to our minimum
311 * default size
312 */
313 if (ioat->alloc_order > ioat_get_alloc_order())
314 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
315 }
316 spin_unlock_bh(&chan->cleanup_lock);
317}
318
319/**
320 * ioat2_enumerate_channels - find and initialize the device's channels
321 * @device: the device to be enumerated
322 */
323int ioat2_enumerate_channels(struct ioatdma_device *device)
324{
325 struct ioat2_dma_chan *ioat;
326 struct device *dev = &device->pdev->dev;
327 struct dma_device *dma = &device->common;
328 u8 xfercap_log;
329 int i;
330
331 INIT_LIST_HEAD(&dma->channels);
332 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
333 dma->chancnt &= 0x1f; /* bits [4:0] valid */
334 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
335 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
336 dma->chancnt, ARRAY_SIZE(device->idx));
337 dma->chancnt = ARRAY_SIZE(device->idx);
338 }
339 xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
340 xfercap_log &= 0x1f; /* bits [4:0] valid */
341 if (xfercap_log == 0)
342 return 0;
343 dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
344
345 /* FIXME which i/oat version is i7300? */
346#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
347 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
348 dma->chancnt--;
349#endif
350 for (i = 0; i < dma->chancnt; i++) {
351 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
352 if (!ioat)
353 break;
354
355 ioat_init_channel(device, &ioat->base, i,
356 device->timer_fn,
357 device->cleanup_tasklet,
358 (unsigned long) ioat);
359 ioat->xfercap_log = xfercap_log;
360 spin_lock_init(&ioat->ring_lock);
361 }
362 dma->chancnt = i;
363 return i;
364}
365
366static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
367{
368 struct dma_chan *c = tx->chan;
369 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
370 struct ioat_chan_common *chan = &ioat->base;
371 dma_cookie_t cookie = c->cookie;
372
373 cookie++;
374 if (cookie < 0)
375 cookie = 1;
376 tx->cookie = cookie;
377 c->cookie = cookie;
378 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
379
380 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
381 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
382 ioat2_update_pending(ioat);
383 spin_unlock_bh(&ioat->ring_lock);
384
385 return cookie;
386}
387
388static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
389{
390 struct ioat_dma_descriptor *hw;
391 struct ioat_ring_ent *desc;
392 struct ioatdma_device *dma;
393 dma_addr_t phys;
394
395 dma = to_ioatdma_device(chan->device);
396 hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
397 if (!hw)
398 return NULL;
399 memset(hw, 0, sizeof(*hw));
400
401 desc = kmem_cache_alloc(ioat2_cache, flags);
402 if (!desc) {
403 pci_pool_free(dma->dma_pool, hw, phys);
404 return NULL;
405 }
406 memset(desc, 0, sizeof(*desc));
407
408 dma_async_tx_descriptor_init(&desc->txd, chan);
409 desc->txd.tx_submit = ioat2_tx_submit_unlock;
410 desc->hw = hw;
411 desc->txd.phys = phys;
412 return desc;
413}
414
415static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
416{
417 struct ioatdma_device *dma;
418
419 dma = to_ioatdma_device(chan->device);
420 pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
421 kmem_cache_free(ioat2_cache, desc);
422}
423
424static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
425{
426 struct ioat_ring_ent **ring;
427 int descs = 1 << order;
428 int i;
429
430 if (order > ioat_get_max_alloc_order())
431 return NULL;
432
433 /* allocate the array to hold the software ring */
434 ring = kcalloc(descs, sizeof(*ring), flags);
435 if (!ring)
436 return NULL;
437 for (i = 0; i < descs; i++) {
438 ring[i] = ioat2_alloc_ring_ent(c, flags);
439 if (!ring[i]) {
440 while (i--)
441 ioat2_free_ring_ent(ring[i], c);
442 kfree(ring);
443 return NULL;
444 }
445 set_desc_id(ring[i], i);
446 }
447
448 /* link descs */
449 for (i = 0; i < descs-1; i++) {
450 struct ioat_ring_ent *next = ring[i+1];
451 struct ioat_dma_descriptor *hw = ring[i]->hw;
452
453 hw->next = next->txd.phys;
454 }
455 ring[i]->hw->next = ring[0]->txd.phys;
456
457 return ring;
458}
459
460/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
461 * @chan: channel to be initialized
462 */
463int ioat2_alloc_chan_resources(struct dma_chan *c)
464{
465 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
466 struct ioat_chan_common *chan = &ioat->base;
467 struct ioat_ring_ent **ring;
468 u32 chanerr;
469 int order;
470
471 /* have we already been set up? */
472 if (ioat->ring)
473 return 1 << ioat->alloc_order;
474
475 /* Setup register to interrupt and write completion status on error */
476 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
477
478 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
479 if (chanerr) {
480 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
481 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
482 }
483
484 /* allocate a completion writeback area */
485 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
486 chan->completion = pci_pool_alloc(chan->device->completion_pool,
487 GFP_KERNEL, &chan->completion_dma);
488 if (!chan->completion)
489 return -ENOMEM;
490
491 memset(chan->completion, 0, sizeof(*chan->completion));
492 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
493 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
494 writel(((u64) chan->completion_dma) >> 32,
495 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
496
497 order = ioat_get_alloc_order();
498 ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
499 if (!ring)
500 return -ENOMEM;
501
502 spin_lock_bh(&ioat->ring_lock);
503 ioat->ring = ring;
504 ioat->head = 0;
505 ioat->issued = 0;
506 ioat->tail = 0;
507 ioat->pending = 0;
508 ioat->alloc_order = order;
509 spin_unlock_bh(&ioat->ring_lock);
510
511 tasklet_enable(&chan->cleanup_task);
512 ioat2_start_null_desc(ioat);
513
514 return 1 << ioat->alloc_order;
515}
516
517bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
518{
519 /* reshape differs from normal ring allocation in that we want
520 * to allocate a new software ring while only
521 * extending/truncating the hardware ring
522 */
523 struct ioat_chan_common *chan = &ioat->base;
524 struct dma_chan *c = &chan->common;
525 const u16 curr_size = ioat2_ring_mask(ioat) + 1;
526 const u16 active = ioat2_ring_active(ioat);
527 const u16 new_size = 1 << order;
528 struct ioat_ring_ent **ring;
529 u16 i;
530
531 if (order > ioat_get_max_alloc_order())
532 return false;
533
534 /* double check that we have at least 1 free descriptor */
535 if (active == curr_size)
536 return false;
537
538 /* when shrinking, verify that we can hold the current active
539 * set in the new ring
540 */
541 if (active >= new_size)
542 return false;
543
544 /* allocate the array to hold the software ring */
545 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
546 if (!ring)
547 return false;
548
549 /* allocate/trim descriptors as needed */
550 if (new_size > curr_size) {
551 /* copy current descriptors to the new ring */
552 for (i = 0; i < curr_size; i++) {
553 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
554 u16 new_idx = (ioat->tail+i) & (new_size-1);
555
556 ring[new_idx] = ioat->ring[curr_idx];
557 set_desc_id(ring[new_idx], new_idx);
558 }
559
560 /* add new descriptors to the ring */
561 for (i = curr_size; i < new_size; i++) {
562 u16 new_idx = (ioat->tail+i) & (new_size-1);
563
564 ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
565 if (!ring[new_idx]) {
566 while (i--) {
567 u16 new_idx = (ioat->tail+i) & (new_size-1);
568
569 ioat2_free_ring_ent(ring[new_idx], c);
570 }
571 kfree(ring);
572 return false;
573 }
574 set_desc_id(ring[new_idx], new_idx);
575 }
576
577 /* hw link new descriptors */
578 for (i = curr_size-1; i < new_size; i++) {
579 u16 new_idx = (ioat->tail+i) & (new_size-1);
580 struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
581 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
582
583 hw->next = next->txd.phys;
584 }
585 } else {
586 struct ioat_dma_descriptor *hw;
587 struct ioat_ring_ent *next;
588
589 /* copy current descriptors to the new ring, dropping the
590 * removed descriptors
591 */
592 for (i = 0; i < new_size; i++) {
593 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
594 u16 new_idx = (ioat->tail+i) & (new_size-1);
595
596 ring[new_idx] = ioat->ring[curr_idx];
597 set_desc_id(ring[new_idx], new_idx);
598 }
599
600 /* free deleted descriptors */
601 for (i = new_size; i < curr_size; i++) {
602 struct ioat_ring_ent *ent;
603
604 ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
605 ioat2_free_ring_ent(ent, c);
606 }
607
608 /* fix up hardware ring */
609 hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
610 next = ring[(ioat->tail+new_size) & (new_size-1)];
611 hw->next = next->txd.phys;
612 }
613
614 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
615 __func__, new_size);
616
617 kfree(ioat->ring);
618 ioat->ring = ring;
619 ioat->alloc_order = order;
620
621 return true;
622}
623
624/**
625 * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
626 * @idx: gets starting descriptor index on successful allocation
627 * @ioat: ioat2,3 channel (ring) to operate on
628 * @num_descs: allocation length
629 */
630int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
631{
632 struct ioat_chan_common *chan = &ioat->base;
633
634 spin_lock_bh(&ioat->ring_lock);
635 /* never allow the last descriptor to be consumed, we need at
636 * least one free at all times to allow for on-the-fly ring
637 * resizing.
638 */
639 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
640 if (reshape_ring(ioat, ioat->alloc_order + 1) &&
641 ioat2_ring_space(ioat) > num_descs)
642 break;
643
644 if (printk_ratelimit())
645 dev_dbg(to_dev(chan),
646 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
647 __func__, num_descs, ioat->head, ioat->tail,
648 ioat->issued);
649 spin_unlock_bh(&ioat->ring_lock);
650
651 /* progress reclaim in the allocation failure case we
652 * may be called under bh_disabled so we need to trigger
653 * the timer event directly
654 */
655 spin_lock_bh(&chan->cleanup_lock);
656 if (jiffies > chan->timer.expires &&
657 timer_pending(&chan->timer)) {
658 struct ioatdma_device *device = chan->device;
659
660 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
661 spin_unlock_bh(&chan->cleanup_lock);
662 device->timer_fn((unsigned long) ioat);
663 } else
664 spin_unlock_bh(&chan->cleanup_lock);
665 return -ENOMEM;
666 }
667
668 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
669 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
670
671 *idx = ioat2_desc_alloc(ioat, num_descs);
672 return 0; /* with ioat->ring_lock held */
673}
674
675struct dma_async_tx_descriptor *
676ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
677 dma_addr_t dma_src, size_t len, unsigned long flags)
678{
679 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
680 struct ioat_dma_descriptor *hw;
681 struct ioat_ring_ent *desc;
682 dma_addr_t dst = dma_dest;
683 dma_addr_t src = dma_src;
684 size_t total_len = len;
685 int num_descs;
686 u16 idx;
687 int i;
688
689 num_descs = ioat2_xferlen_to_descs(ioat, len);
690 if (likely(num_descs) &&
691 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
692 /* pass */;
693 else
694 return NULL;
695 i = 0;
696 do {
697 size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
698
699 desc = ioat2_get_ring_ent(ioat, idx + i);
700 hw = desc->hw;
701
702 hw->size = copy;
703 hw->ctl = 0;
704 hw->src_addr = src;
705 hw->dst_addr = dst;
706
707 len -= copy;
708 dst += copy;
709 src += copy;
710 dump_desc_dbg(ioat, desc);
711 } while (++i < num_descs);
712
713 desc->txd.flags = flags;
714 desc->len = total_len;
715 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
716 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 hw->ctl_f.compl_write = 1;
718 dump_desc_dbg(ioat, desc);
719 /* we leave the channel locked to ensure in order submission */
720
721 return &desc->txd;
722}
723
724/**
725 * ioat2_free_chan_resources - release all the descriptors
726 * @chan: the channel to be cleaned
727 */
728void ioat2_free_chan_resources(struct dma_chan *c)
729{
730 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
731 struct ioat_chan_common *chan = &ioat->base;
732 struct ioatdma_device *device = chan->device;
733 struct ioat_ring_ent *desc;
734 const u16 total_descs = 1 << ioat->alloc_order;
735 int descs;
736 int i;
737
738 /* Before freeing channel resources first check
739 * if they have been previously allocated for this channel.
740 */
741 if (!ioat->ring)
742 return;
743
744 tasklet_disable(&chan->cleanup_task);
745 del_timer_sync(&chan->timer);
746 device->cleanup_tasklet((unsigned long) ioat);
747
748 /* Delay 100ms after reset to allow internal DMA logic to quiesce
749 * before removing DMA descriptor resources.
750 */
751 writeb(IOAT_CHANCMD_RESET,
752 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
753 mdelay(100);
754
755 spin_lock_bh(&ioat->ring_lock);
756 descs = ioat2_ring_space(ioat);
757 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
758 for (i = 0; i < descs; i++) {
759 desc = ioat2_get_ring_ent(ioat, ioat->head + i);
760 ioat2_free_ring_ent(desc, c);
761 }
762
763 if (descs < total_descs)
764 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
765 total_descs - descs);
766
767 for (i = 0; i < total_descs - descs; i++) {
768 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
769 dump_desc_dbg(ioat, desc);
770 ioat2_free_ring_ent(desc, c);
771 }
772
773 kfree(ioat->ring);
774 ioat->ring = NULL;
775 ioat->alloc_order = 0;
776 pci_pool_free(device->completion_pool, chan->completion,
777 chan->completion_dma);
778 spin_unlock_bh(&ioat->ring_lock);
779
780 chan->last_completion = 0;
781 chan->completion_dma = 0;
782 ioat->pending = 0;
783 ioat->dmacount = 0;
784}
785
786enum dma_status
787ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
788 dma_cookie_t *done, dma_cookie_t *used)
789{
790 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
791 struct ioatdma_device *device = ioat->base.device;
792
793 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
794 return DMA_SUCCESS;
795
796 device->cleanup_tasklet((unsigned long) ioat);
797
798 return ioat_is_complete(c, cookie, done, used);
799}
800
801static ssize_t ring_size_show(struct dma_chan *c, char *page)
802{
803 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
804
805 return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
806}
807static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
808
809static ssize_t ring_active_show(struct dma_chan *c, char *page)
810{
811 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
812
813 /* ...taken outside the lock, no need to be precise */
814 return sprintf(page, "%d\n", ioat2_ring_active(ioat));
815}
816static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
817
818static struct attribute *ioat2_attrs[] = {
819 &ring_size_attr.attr,
820 &ring_active_attr.attr,
821 &ioat_cap_attr.attr,
822 &ioat_version_attr.attr,
823 NULL,
824};
825
826struct kobj_type ioat2_ktype = {
827 .sysfs_ops = &ioat_sysfs_ops,
828 .default_attrs = ioat2_attrs,
829};
830
831int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
832{
833 struct pci_dev *pdev = device->pdev;
834 struct dma_device *dma;
835 struct dma_chan *c;
836 struct ioat_chan_common *chan;
837 int err;
838
839 device->enumerate_channels = ioat2_enumerate_channels;
840 device->cleanup_tasklet = ioat2_cleanup_tasklet;
841 device->timer_fn = ioat2_timer_event;
842 device->self_test = ioat_dma_self_test;
843 dma = &device->common;
844 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
845 dma->device_issue_pending = ioat2_issue_pending;
846 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
847 dma->device_free_chan_resources = ioat2_free_chan_resources;
848 dma->device_is_tx_complete = ioat2_is_complete;
849
850 err = ioat_probe(device);
851 if (err)
852 return err;
853 ioat_set_tcp_copy_break(2048);
854
855 list_for_each_entry(c, &dma->channels, device_node) {
856 chan = to_chan_common(c);
857 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
858 chan->reg_base + IOAT_DCACTRL_OFFSET);
859 }
860
861 err = ioat_register(device);
862 if (err)
863 return err;
864
865 ioat_kobject_add(device, &ioat2_ktype);
866
867 if (dca)
868 device->dca = ioat2_dca_init(pdev, device->reg_base);
869
870 return err;
871}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 000000000000..1d849ef74d5f
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,190 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_V2_H
22#define IOATDMA_V2_H
23
24#include <linux/dmaengine.h>
25#include "dma.h"
26#include "hw.h"
27
28
29extern int ioat_pending_level;
30extern int ioat_ring_alloc_order;
31
32/*
33 * workaround for IOAT ver.3.0 null descriptor issue
34 * (channel returns error when size is 0)
35 */
36#define NULL_DESC_BUFFER_SIZE 1
37
38#define IOAT_MAX_ORDER 16
39#define ioat_get_alloc_order() \
40 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
41#define ioat_get_max_alloc_order() \
42 (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
43
44/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
45 * @base: common ioat channel parameters
46 * @xfercap_log; log2 of channel max transfer length (for fast division)
47 * @head: allocated index
48 * @issued: hardware notification point
49 * @tail: cleanup index
50 * @pending: lock free indicator for issued != head
51 * @dmacount: identical to 'head' except for occasionally resetting to zero
52 * @alloc_order: log2 of the number of allocated descriptors
53 * @ring: software ring buffer implementation of hardware ring
54 * @ring_lock: protects ring attributes
55 */
56struct ioat2_dma_chan {
57 struct ioat_chan_common base;
58 size_t xfercap_log;
59 u16 head;
60 u16 issued;
61 u16 tail;
62 u16 dmacount;
63 u16 alloc_order;
64 int pending;
65 struct ioat_ring_ent **ring;
66 spinlock_t ring_lock;
67};
68
69static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
70{
71 struct ioat_chan_common *chan = to_chan_common(c);
72
73 return container_of(chan, struct ioat2_dma_chan, base);
74}
75
76static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
77{
78 return (1 << ioat->alloc_order) - 1;
79}
80
81/* count of descriptors in flight with the engine */
82static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
83{
84 return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
85}
86
87/* count of descriptors pending submission to hardware */
88static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
89{
90 return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
91}
92
93static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
94{
95 u16 num_descs = ioat2_ring_mask(ioat) + 1;
96 u16 active = ioat2_ring_active(ioat);
97
98 BUG_ON(active > num_descs);
99
100 return num_descs - active;
101}
102
103/* assumes caller already checked space */
104static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
105{
106 ioat->head += len;
107 return ioat->head - len;
108}
109
110static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
111{
112 u16 num_descs = len >> ioat->xfercap_log;
113
114 num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
115 return num_descs;
116}
117
118/**
119 * struct ioat_ring_ent - wrapper around hardware descriptor
120 * @hw: hardware DMA descriptor (for memcpy)
121 * @fill: hardware fill descriptor
122 * @xor: hardware xor descriptor
123 * @xor_ex: hardware xor extension descriptor
124 * @pq: hardware pq descriptor
125 * @pq_ex: hardware pq extension descriptor
126 * @pqu: hardware pq update descriptor
127 * @raw: hardware raw (un-typed) descriptor
128 * @txd: the generic software descriptor for all engines
129 * @len: total transaction length for unmap
130 * @result: asynchronous result of validate operations
131 * @id: identifier for debug
132 */
133
134struct ioat_ring_ent {
135 union {
136 struct ioat_dma_descriptor *hw;
137 struct ioat_fill_descriptor *fill;
138 struct ioat_xor_descriptor *xor;
139 struct ioat_xor_ext_descriptor *xor_ex;
140 struct ioat_pq_descriptor *pq;
141 struct ioat_pq_ext_descriptor *pq_ex;
142 struct ioat_pq_update_descriptor *pqu;
143 struct ioat_raw_descriptor *raw;
144 };
145 size_t len;
146 struct dma_async_tx_descriptor txd;
147 enum sum_check_flags *result;
148 #ifdef DEBUG
149 int id;
150 #endif
151};
152
153static inline struct ioat_ring_ent *
154ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
155{
156 return ioat->ring[idx & ioat2_ring_mask(ioat)];
157}
158
159static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
160{
161 struct ioat_chan_common *chan = &ioat->base;
162
163 writel(addr & 0x00000000FFFFFFFF,
164 chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
165 writel(addr >> 32,
166 chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
167}
168
169int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
170int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
171struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
172struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
173int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
174int ioat2_enumerate_channels(struct ioatdma_device *device);
175struct dma_async_tx_descriptor *
176ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
177 dma_addr_t dma_src, size_t len, unsigned long flags);
178void ioat2_issue_pending(struct dma_chan *chan);
179int ioat2_alloc_chan_resources(struct dma_chan *c);
180void ioat2_free_chan_resources(struct dma_chan *c);
181enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
182 dma_cookie_t *done, dma_cookie_t *used);
183void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
184bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
185void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
186void ioat2_cleanup_tasklet(unsigned long data);
187void ioat2_timer_event(unsigned long data);
188extern struct kobj_type ioat2_ktype;
189extern struct kmem_cache *ioat2_cache;
190#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 000000000000..35d1e33afd5b
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1223 @@
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * The full GNU General Public License is included in this distribution in
23 * the file called "COPYING".
24 *
25 * BSD LICENSE
26 *
27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52 * POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/*
56 * Support routines for v3+ hardware
57 */
58
59#include <linux/pci.h>
60#include <linux/dmaengine.h>
61#include <linux/dma-mapping.h>
62#include "registers.h"
63#include "hw.h"
64#include "dma.h"
65#include "dma_v2.h"
66
67/* ioat hardware assumes at least two sources for raid operations */
68#define src_cnt_to_sw(x) ((x) + 2)
69#define src_cnt_to_hw(x) ((x) - 2)
70
71/* provide a lookup table for setting the source address in the base or
72 * extended descriptor of an xor or pq descriptor
73 */
74static const u8 xor_idx_to_desc __read_mostly = 0xd0;
75static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
76static const u8 pq_idx_to_desc __read_mostly = 0xf8;
77static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
78
79static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
80{
81 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
82
83 return raw->field[xor_idx_to_field[idx]];
84}
85
86static void xor_set_src(struct ioat_raw_descriptor *descs[2],
87 dma_addr_t addr, u32 offset, int idx)
88{
89 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
90
91 raw->field[xor_idx_to_field[idx]] = addr + offset;
92}
93
94static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
95{
96 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
97
98 return raw->field[pq_idx_to_field[idx]];
99}
100
101static void pq_set_src(struct ioat_raw_descriptor *descs[2],
102 dma_addr_t addr, u32 offset, u8 coef, int idx)
103{
104 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
105 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
106
107 raw->field[pq_idx_to_field[idx]] = addr + offset;
108 pq->coef[idx] = coef;
109}
110
111static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
112 struct ioat_ring_ent *desc, int idx)
113{
114 struct ioat_chan_common *chan = &ioat->base;
115 struct pci_dev *pdev = chan->device->pdev;
116 size_t len = desc->len;
117 size_t offset = len - desc->hw->size;
118 struct dma_async_tx_descriptor *tx = &desc->txd;
119 enum dma_ctrl_flags flags = tx->flags;
120
121 switch (desc->hw->ctl_f.op) {
122 case IOAT_OP_COPY:
123 if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
124 ioat_dma_unmap(chan, flags, len, desc->hw);
125 break;
126 case IOAT_OP_FILL: {
127 struct ioat_fill_descriptor *hw = desc->fill;
128
129 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
130 ioat_unmap(pdev, hw->dst_addr - offset, len,
131 PCI_DMA_FROMDEVICE, flags, 1);
132 break;
133 }
134 case IOAT_OP_XOR_VAL:
135 case IOAT_OP_XOR: {
136 struct ioat_xor_descriptor *xor = desc->xor;
137 struct ioat_ring_ent *ext;
138 struct ioat_xor_ext_descriptor *xor_ex = NULL;
139 int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
140 struct ioat_raw_descriptor *descs[2];
141 int i;
142
143 if (src_cnt > 5) {
144 ext = ioat2_get_ring_ent(ioat, idx + 1);
145 xor_ex = ext->xor_ex;
146 }
147
148 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
149 descs[0] = (struct ioat_raw_descriptor *) xor;
150 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
151 for (i = 0; i < src_cnt; i++) {
152 dma_addr_t src = xor_get_src(descs, i);
153
154 ioat_unmap(pdev, src - offset, len,
155 PCI_DMA_TODEVICE, flags, 0);
156 }
157
158 /* dest is a source in xor validate operations */
159 if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
160 ioat_unmap(pdev, xor->dst_addr - offset, len,
161 PCI_DMA_TODEVICE, flags, 1);
162 break;
163 }
164 }
165
166 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
167 ioat_unmap(pdev, xor->dst_addr - offset, len,
168 PCI_DMA_FROMDEVICE, flags, 1);
169 break;
170 }
171 case IOAT_OP_PQ_VAL:
172 case IOAT_OP_PQ: {
173 struct ioat_pq_descriptor *pq = desc->pq;
174 struct ioat_ring_ent *ext;
175 struct ioat_pq_ext_descriptor *pq_ex = NULL;
176 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
177 struct ioat_raw_descriptor *descs[2];
178 int i;
179
180 if (src_cnt > 3) {
181 ext = ioat2_get_ring_ent(ioat, idx + 1);
182 pq_ex = ext->pq_ex;
183 }
184
185 /* in the 'continue' case don't unmap the dests as sources */
186 if (dmaf_p_disabled_continue(flags))
187 src_cnt--;
188 else if (dmaf_continue(flags))
189 src_cnt -= 3;
190
191 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
192 descs[0] = (struct ioat_raw_descriptor *) pq;
193 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
194 for (i = 0; i < src_cnt; i++) {
195 dma_addr_t src = pq_get_src(descs, i);
196
197 ioat_unmap(pdev, src - offset, len,
198 PCI_DMA_TODEVICE, flags, 0);
199 }
200
201 /* the dests are sources in pq validate operations */
202 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
203 if (!(flags & DMA_PREP_PQ_DISABLE_P))
204 ioat_unmap(pdev, pq->p_addr - offset,
205 len, PCI_DMA_TODEVICE, flags, 0);
206 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
207 ioat_unmap(pdev, pq->q_addr - offset,
208 len, PCI_DMA_TODEVICE, flags, 0);
209 break;
210 }
211 }
212
213 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
214 if (!(flags & DMA_PREP_PQ_DISABLE_P))
215 ioat_unmap(pdev, pq->p_addr - offset, len,
216 PCI_DMA_BIDIRECTIONAL, flags, 1);
217 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
218 ioat_unmap(pdev, pq->q_addr - offset, len,
219 PCI_DMA_BIDIRECTIONAL, flags, 1);
220 }
221 break;
222 }
223 default:
224 dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
225 __func__, desc->hw->ctl_f.op);
226 }
227}
228
229static bool desc_has_ext(struct ioat_ring_ent *desc)
230{
231 struct ioat_dma_descriptor *hw = desc->hw;
232
233 if (hw->ctl_f.op == IOAT_OP_XOR ||
234 hw->ctl_f.op == IOAT_OP_XOR_VAL) {
235 struct ioat_xor_descriptor *xor = desc->xor;
236
237 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
238 return true;
239 } else if (hw->ctl_f.op == IOAT_OP_PQ ||
240 hw->ctl_f.op == IOAT_OP_PQ_VAL) {
241 struct ioat_pq_descriptor *pq = desc->pq;
242
243 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
244 return true;
245 }
246
247 return false;
248}
249
250/**
251 * __cleanup - reclaim used descriptors
252 * @ioat: channel (ring) to clean
253 *
254 * The difference from the dma_v2.c __cleanup() is that this routine
255 * handles extended descriptors and dma-unmapping raid operations.
256 */
257static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
258{
259 struct ioat_chan_common *chan = &ioat->base;
260 struct ioat_ring_ent *desc;
261 bool seen_current = false;
262 u16 active;
263 int i;
264
265 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
266 __func__, ioat->head, ioat->tail, ioat->issued);
267
268 active = ioat2_ring_active(ioat);
269 for (i = 0; i < active && !seen_current; i++) {
270 struct dma_async_tx_descriptor *tx;
271
272 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
273 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
274 dump_desc_dbg(ioat, desc);
275 tx = &desc->txd;
276 if (tx->cookie) {
277 chan->completed_cookie = tx->cookie;
278 ioat3_dma_unmap(ioat, desc, ioat->tail + i);
279 tx->cookie = 0;
280 if (tx->callback) {
281 tx->callback(tx->callback_param);
282 tx->callback = NULL;
283 }
284 }
285
286 if (tx->phys == phys_complete)
287 seen_current = true;
288
289 /* skip extended descriptors */
290 if (desc_has_ext(desc)) {
291 BUG_ON(i + 1 >= active);
292 i++;
293 }
294 }
295 ioat->tail += i;
296 BUG_ON(!seen_current); /* no active descs have written a completion? */
297 chan->last_completion = phys_complete;
298 if (ioat->head == ioat->tail) {
299 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
300 __func__);
301 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
302 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
303 }
304}
305
306static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
307{
308 struct ioat_chan_common *chan = &ioat->base;
309 unsigned long phys_complete;
310
311 prefetch(chan->completion);
312
313 if (!spin_trylock_bh(&chan->cleanup_lock))
314 return;
315
316 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
317 spin_unlock_bh(&chan->cleanup_lock);
318 return;
319 }
320
321 if (!spin_trylock_bh(&ioat->ring_lock)) {
322 spin_unlock_bh(&chan->cleanup_lock);
323 return;
324 }
325
326 __cleanup(ioat, phys_complete);
327
328 spin_unlock_bh(&ioat->ring_lock);
329 spin_unlock_bh(&chan->cleanup_lock);
330}
331
332static void ioat3_cleanup_tasklet(unsigned long data)
333{
334 struct ioat2_dma_chan *ioat = (void *) data;
335
336 ioat3_cleanup(ioat);
337 writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
338 ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
339}
340
341static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
342{
343 struct ioat_chan_common *chan = &ioat->base;
344 unsigned long phys_complete;
345 u32 status;
346
347 status = ioat_chansts(chan);
348 if (is_ioat_active(status) || is_ioat_idle(status))
349 ioat_suspend(chan);
350 while (is_ioat_active(status) || is_ioat_idle(status)) {
351 status = ioat_chansts(chan);
352 cpu_relax();
353 }
354
355 if (ioat_cleanup_preamble(chan, &phys_complete))
356 __cleanup(ioat, phys_complete);
357
358 __ioat2_restart_chan(ioat);
359}
360
361static void ioat3_timer_event(unsigned long data)
362{
363 struct ioat2_dma_chan *ioat = (void *) data;
364 struct ioat_chan_common *chan = &ioat->base;
365
366 spin_lock_bh(&chan->cleanup_lock);
367 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
368 unsigned long phys_complete;
369 u64 status;
370
371 spin_lock_bh(&ioat->ring_lock);
372 status = ioat_chansts(chan);
373
374 /* when halted due to errors check for channel
375 * programming errors before advancing the completion state
376 */
377 if (is_ioat_halted(status)) {
378 u32 chanerr;
379
380 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
381 BUG_ON(is_ioat_bug(chanerr));
382 }
383
384 /* if we haven't made progress and we have already
385 * acknowledged a pending completion once, then be more
386 * forceful with a restart
387 */
388 if (ioat_cleanup_preamble(chan, &phys_complete))
389 __cleanup(ioat, phys_complete);
390 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
391 ioat3_restart_channel(ioat);
392 else {
393 set_bit(IOAT_COMPLETION_ACK, &chan->state);
394 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
395 }
396 spin_unlock_bh(&ioat->ring_lock);
397 } else {
398 u16 active;
399
400 /* if the ring is idle, empty, and oversized try to step
401 * down the size
402 */
403 spin_lock_bh(&ioat->ring_lock);
404 active = ioat2_ring_active(ioat);
405 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
406 reshape_ring(ioat, ioat->alloc_order-1);
407 spin_unlock_bh(&ioat->ring_lock);
408
409 /* keep shrinking until we get back to our minimum
410 * default size
411 */
412 if (ioat->alloc_order > ioat_get_alloc_order())
413 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
414 }
415 spin_unlock_bh(&chan->cleanup_lock);
416}
417
418static enum dma_status
419ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
420 dma_cookie_t *done, dma_cookie_t *used)
421{
422 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
423
424 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
425 return DMA_SUCCESS;
426
427 ioat3_cleanup(ioat);
428
429 return ioat_is_complete(c, cookie, done, used);
430}
431
432static struct dma_async_tx_descriptor *
433ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
434 size_t len, unsigned long flags)
435{
436 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
437 struct ioat_ring_ent *desc;
438 size_t total_len = len;
439 struct ioat_fill_descriptor *fill;
440 int num_descs;
441 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
442 u16 idx;
443 int i;
444
445 num_descs = ioat2_xferlen_to_descs(ioat, len);
446 if (likely(num_descs) &&
447 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
448 /* pass */;
449 else
450 return NULL;
451 i = 0;
452 do {
453 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
454
455 desc = ioat2_get_ring_ent(ioat, idx + i);
456 fill = desc->fill;
457
458 fill->size = xfer_size;
459 fill->src_data = src_data;
460 fill->dst_addr = dest;
461 fill->ctl = 0;
462 fill->ctl_f.op = IOAT_OP_FILL;
463
464 len -= xfer_size;
465 dest += xfer_size;
466 dump_desc_dbg(ioat, desc);
467 } while (++i < num_descs);
468
469 desc->txd.flags = flags;
470 desc->len = total_len;
471 fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
472 fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
473 fill->ctl_f.compl_write = 1;
474 dump_desc_dbg(ioat, desc);
475
476 /* we leave the channel locked to ensure in order submission */
477 return &desc->txd;
478}
479
480static struct dma_async_tx_descriptor *
481__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
482 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
483 size_t len, unsigned long flags)
484{
485 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
486 struct ioat_ring_ent *compl_desc;
487 struct ioat_ring_ent *desc;
488 struct ioat_ring_ent *ext;
489 size_t total_len = len;
490 struct ioat_xor_descriptor *xor;
491 struct ioat_xor_ext_descriptor *xor_ex = NULL;
492 struct ioat_dma_descriptor *hw;
493 u32 offset = 0;
494 int num_descs;
495 int with_ext;
496 int i;
497 u16 idx;
498 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
499
500 BUG_ON(src_cnt < 2);
501
502 num_descs = ioat2_xferlen_to_descs(ioat, len);
503 /* we need 2x the number of descriptors to cover greater than 5
504 * sources
505 */
506 if (src_cnt > 5) {
507 with_ext = 1;
508 num_descs *= 2;
509 } else
510 with_ext = 0;
511
512 /* completion writes from the raid engine may pass completion
513 * writes from the legacy engine, so we need one extra null
514 * (legacy) descriptor to ensure all completion writes arrive in
515 * order.
516 */
517 if (likely(num_descs) &&
518 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
519 /* pass */;
520 else
521 return NULL;
522 i = 0;
523 do {
524 struct ioat_raw_descriptor *descs[2];
525 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
526 int s;
527
528 desc = ioat2_get_ring_ent(ioat, idx + i);
529 xor = desc->xor;
530
531 /* save a branch by unconditionally retrieving the
532 * extended descriptor xor_set_src() knows to not write
533 * to it in the single descriptor case
534 */
535 ext = ioat2_get_ring_ent(ioat, idx + i + 1);
536 xor_ex = ext->xor_ex;
537
538 descs[0] = (struct ioat_raw_descriptor *) xor;
539 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
540 for (s = 0; s < src_cnt; s++)
541 xor_set_src(descs, src[s], offset, s);
542 xor->size = xfer_size;
543 xor->dst_addr = dest + offset;
544 xor->ctl = 0;
545 xor->ctl_f.op = op;
546 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
547
548 len -= xfer_size;
549 offset += xfer_size;
550 dump_desc_dbg(ioat, desc);
551 } while ((i += 1 + with_ext) < num_descs);
552
553 /* last xor descriptor carries the unmap parameters and fence bit */
554 desc->txd.flags = flags;
555 desc->len = total_len;
556 if (result)
557 desc->result = result;
558 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
559
560 /* completion descriptor carries interrupt bit */
561 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
562 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
563 hw = compl_desc->hw;
564 hw->ctl = 0;
565 hw->ctl_f.null = 1;
566 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
567 hw->ctl_f.compl_write = 1;
568 hw->size = NULL_DESC_BUFFER_SIZE;
569 dump_desc_dbg(ioat, compl_desc);
570
571 /* we leave the channel locked to ensure in order submission */
572 return &desc->txd;
573}
574
575static struct dma_async_tx_descriptor *
576ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
577 unsigned int src_cnt, size_t len, unsigned long flags)
578{
579 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
580}
581
582struct dma_async_tx_descriptor *
583ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
584 unsigned int src_cnt, size_t len,
585 enum sum_check_flags *result, unsigned long flags)
586{
587 /* the cleanup routine only sets bits on validate failure, it
588 * does not clear bits on validate success... so clear it here
589 */
590 *result = 0;
591
592 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
593 src_cnt - 1, len, flags);
594}
595
596static void
597dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
598{
599 struct device *dev = to_dev(&ioat->base);
600 struct ioat_pq_descriptor *pq = desc->pq;
601 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
602 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
603 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
604 int i;
605
606 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
607 " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
608 desc_id(desc), (unsigned long long) desc->txd.phys,
609 (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
610 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
611 pq->ctl_f.compl_write,
612 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
613 pq->ctl_f.src_cnt);
614 for (i = 0; i < src_cnt; i++)
615 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
616 (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
617 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
618 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
619}
620
621static struct dma_async_tx_descriptor *
622__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
623 const dma_addr_t *dst, const dma_addr_t *src,
624 unsigned int src_cnt, const unsigned char *scf,
625 size_t len, unsigned long flags)
626{
627 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
628 struct ioat_chan_common *chan = &ioat->base;
629 struct ioat_ring_ent *compl_desc;
630 struct ioat_ring_ent *desc;
631 struct ioat_ring_ent *ext;
632 size_t total_len = len;
633 struct ioat_pq_descriptor *pq;
634 struct ioat_pq_ext_descriptor *pq_ex = NULL;
635 struct ioat_dma_descriptor *hw;
636 u32 offset = 0;
637 int num_descs;
638 int with_ext;
639 int i, s;
640 u16 idx;
641 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
642
643 dev_dbg(to_dev(chan), "%s\n", __func__);
644 /* the engine requires at least two sources (we provide
645 * at least 1 implied source in the DMA_PREP_CONTINUE case)
646 */
647 BUG_ON(src_cnt + dmaf_continue(flags) < 2);
648
649 num_descs = ioat2_xferlen_to_descs(ioat, len);
650 /* we need 2x the number of descriptors to cover greater than 3
651 * sources
652 */
653 if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
654 with_ext = 1;
655 num_descs *= 2;
656 } else
657 with_ext = 0;
658
659 /* completion writes from the raid engine may pass completion
660 * writes from the legacy engine, so we need one extra null
661 * (legacy) descriptor to ensure all completion writes arrive in
662 * order.
663 */
664 if (likely(num_descs) &&
665 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
666 /* pass */;
667 else
668 return NULL;
669 i = 0;
670 do {
671 struct ioat_raw_descriptor *descs[2];
672 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
673
674 desc = ioat2_get_ring_ent(ioat, idx + i);
675 pq = desc->pq;
676
677 /* save a branch by unconditionally retrieving the
678 * extended descriptor pq_set_src() knows to not write
679 * to it in the single descriptor case
680 */
681 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
682 pq_ex = ext->pq_ex;
683
684 descs[0] = (struct ioat_raw_descriptor *) pq;
685 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
686
687 for (s = 0; s < src_cnt; s++)
688 pq_set_src(descs, src[s], offset, scf[s], s);
689
690 /* see the comment for dma_maxpq in include/linux/dmaengine.h */
691 if (dmaf_p_disabled_continue(flags))
692 pq_set_src(descs, dst[1], offset, 1, s++);
693 else if (dmaf_continue(flags)) {
694 pq_set_src(descs, dst[0], offset, 0, s++);
695 pq_set_src(descs, dst[1], offset, 1, s++);
696 pq_set_src(descs, dst[1], offset, 0, s++);
697 }
698 pq->size = xfer_size;
699 pq->p_addr = dst[0] + offset;
700 pq->q_addr = dst[1] + offset;
701 pq->ctl = 0;
702 pq->ctl_f.op = op;
703 pq->ctl_f.src_cnt = src_cnt_to_hw(s);
704 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
705 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
706
707 len -= xfer_size;
708 offset += xfer_size;
709 } while ((i += 1 + with_ext) < num_descs);
710
711 /* last pq descriptor carries the unmap parameters and fence bit */
712 desc->txd.flags = flags;
713 desc->len = total_len;
714 if (result)
715 desc->result = result;
716 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 dump_pq_desc_dbg(ioat, desc, ext);
718
719 /* completion descriptor carries interrupt bit */
720 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
721 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
722 hw = compl_desc->hw;
723 hw->ctl = 0;
724 hw->ctl_f.null = 1;
725 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
726 hw->ctl_f.compl_write = 1;
727 hw->size = NULL_DESC_BUFFER_SIZE;
728 dump_desc_dbg(ioat, compl_desc);
729
730 /* we leave the channel locked to ensure in order submission */
731 return &desc->txd;
732}
733
734static struct dma_async_tx_descriptor *
735ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
736 unsigned int src_cnt, const unsigned char *scf, size_t len,
737 unsigned long flags)
738{
739 /* handle the single source multiply case from the raid6
740 * recovery path
741 */
742 if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
743 dma_addr_t single_source[2];
744 unsigned char single_source_coef[2];
745
746 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
747 single_source[0] = src[0];
748 single_source[1] = src[0];
749 single_source_coef[0] = scf[0];
750 single_source_coef[1] = 0;
751
752 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
753 single_source_coef, len, flags);
754 } else
755 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
756 len, flags);
757}
758
759struct dma_async_tx_descriptor *
760ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
761 unsigned int src_cnt, const unsigned char *scf, size_t len,
762 enum sum_check_flags *pqres, unsigned long flags)
763{
764 /* the cleanup routine only sets bits on validate failure, it
765 * does not clear bits on validate success... so clear it here
766 */
767 *pqres = 0;
768
769 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
770 flags);
771}
772
773static struct dma_async_tx_descriptor *
774ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
775 unsigned int src_cnt, size_t len, unsigned long flags)
776{
777 unsigned char scf[src_cnt];
778 dma_addr_t pq[2];
779
780 memset(scf, 0, src_cnt);
781 flags |= DMA_PREP_PQ_DISABLE_Q;
782 pq[0] = dst;
783 pq[1] = ~0;
784
785 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
786 flags);
787}
788
789struct dma_async_tx_descriptor *
790ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
791 unsigned int src_cnt, size_t len,
792 enum sum_check_flags *result, unsigned long flags)
793{
794 unsigned char scf[src_cnt];
795 dma_addr_t pq[2];
796
797 /* the cleanup routine only sets bits on validate failure, it
798 * does not clear bits on validate success... so clear it here
799 */
800 *result = 0;
801
802 memset(scf, 0, src_cnt);
803 flags |= DMA_PREP_PQ_DISABLE_Q;
804 pq[0] = src[0];
805 pq[1] = ~0;
806
807 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
808 len, flags);
809}
810
811static struct dma_async_tx_descriptor *
812ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
813{
814 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
815 struct ioat_ring_ent *desc;
816 struct ioat_dma_descriptor *hw;
817 u16 idx;
818
819 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
820 desc = ioat2_get_ring_ent(ioat, idx);
821 else
822 return NULL;
823
824 hw = desc->hw;
825 hw->ctl = 0;
826 hw->ctl_f.null = 1;
827 hw->ctl_f.int_en = 1;
828 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
829 hw->ctl_f.compl_write = 1;
830 hw->size = NULL_DESC_BUFFER_SIZE;
831 hw->src_addr = 0;
832 hw->dst_addr = 0;
833
834 desc->txd.flags = flags;
835 desc->len = 1;
836
837 dump_desc_dbg(ioat, desc);
838
839 /* we leave the channel locked to ensure in order submission */
840 return &desc->txd;
841}
842
843static void __devinit ioat3_dma_test_callback(void *dma_async_param)
844{
845 struct completion *cmp = dma_async_param;
846
847 complete(cmp);
848}
849
850#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
851static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
852{
853 int i, src_idx;
854 struct page *dest;
855 struct page *xor_srcs[IOAT_NUM_SRC_TEST];
856 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
857 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
858 dma_addr_t dma_addr, dest_dma;
859 struct dma_async_tx_descriptor *tx;
860 struct dma_chan *dma_chan;
861 dma_cookie_t cookie;
862 u8 cmp_byte = 0;
863 u32 cmp_word;
864 u32 xor_val_result;
865 int err = 0;
866 struct completion cmp;
867 unsigned long tmo;
868 struct device *dev = &device->pdev->dev;
869 struct dma_device *dma = &device->common;
870
871 dev_dbg(dev, "%s\n", __func__);
872
873 if (!dma_has_cap(DMA_XOR, dma->cap_mask))
874 return 0;
875
876 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
877 xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
878 if (!xor_srcs[src_idx]) {
879 while (src_idx--)
880 __free_page(xor_srcs[src_idx]);
881 return -ENOMEM;
882 }
883 }
884
885 dest = alloc_page(GFP_KERNEL);
886 if (!dest) {
887 while (src_idx--)
888 __free_page(xor_srcs[src_idx]);
889 return -ENOMEM;
890 }
891
892 /* Fill in src buffers */
893 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
894 u8 *ptr = page_address(xor_srcs[src_idx]);
895 for (i = 0; i < PAGE_SIZE; i++)
896 ptr[i] = (1 << src_idx);
897 }
898
899 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
900 cmp_byte ^= (u8) (1 << src_idx);
901
902 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
903 (cmp_byte << 8) | cmp_byte;
904
905 memset(page_address(dest), 0, PAGE_SIZE);
906
907 dma_chan = container_of(dma->channels.next, struct dma_chan,
908 device_node);
909 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
910 err = -ENODEV;
911 goto out;
912 }
913
914 /* test xor */
915 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
916 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
917 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
918 DMA_TO_DEVICE);
919 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
920 IOAT_NUM_SRC_TEST, PAGE_SIZE,
921 DMA_PREP_INTERRUPT);
922
923 if (!tx) {
924 dev_err(dev, "Self-test xor prep failed\n");
925 err = -ENODEV;
926 goto free_resources;
927 }
928
929 async_tx_ack(tx);
930 init_completion(&cmp);
931 tx->callback = ioat3_dma_test_callback;
932 tx->callback_param = &cmp;
933 cookie = tx->tx_submit(tx);
934 if (cookie < 0) {
935 dev_err(dev, "Self-test xor setup failed\n");
936 err = -ENODEV;
937 goto free_resources;
938 }
939 dma->device_issue_pending(dma_chan);
940
941 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
942
943 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
944 dev_err(dev, "Self-test xor timed out\n");
945 err = -ENODEV;
946 goto free_resources;
947 }
948
949 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
950 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
951 u32 *ptr = page_address(dest);
952 if (ptr[i] != cmp_word) {
953 dev_err(dev, "Self-test xor failed compare\n");
954 err = -ENODEV;
955 goto free_resources;
956 }
957 }
958 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
959
960 /* skip validate if the capability is not present */
961 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
962 goto free_resources;
963
964 /* validate the sources with the destintation page */
965 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
966 xor_val_srcs[i] = xor_srcs[i];
967 xor_val_srcs[i] = dest;
968
969 xor_val_result = 1;
970
971 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
972 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
973 DMA_TO_DEVICE);
974 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
975 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
976 &xor_val_result, DMA_PREP_INTERRUPT);
977 if (!tx) {
978 dev_err(dev, "Self-test zero prep failed\n");
979 err = -ENODEV;
980 goto free_resources;
981 }
982
983 async_tx_ack(tx);
984 init_completion(&cmp);
985 tx->callback = ioat3_dma_test_callback;
986 tx->callback_param = &cmp;
987 cookie = tx->tx_submit(tx);
988 if (cookie < 0) {
989 dev_err(dev, "Self-test zero setup failed\n");
990 err = -ENODEV;
991 goto free_resources;
992 }
993 dma->device_issue_pending(dma_chan);
994
995 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
996
997 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
998 dev_err(dev, "Self-test validate timed out\n");
999 err = -ENODEV;
1000 goto free_resources;
1001 }
1002
1003 if (xor_val_result != 0) {
1004 dev_err(dev, "Self-test validate failed compare\n");
1005 err = -ENODEV;
1006 goto free_resources;
1007 }
1008
1009 /* skip memset if the capability is not present */
1010 if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1011 goto free_resources;
1012
1013 /* test memset */
1014 dma_addr = dma_map_page(dev, dest, 0,
1015 PAGE_SIZE, DMA_FROM_DEVICE);
1016 tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1017 DMA_PREP_INTERRUPT);
1018 if (!tx) {
1019 dev_err(dev, "Self-test memset prep failed\n");
1020 err = -ENODEV;
1021 goto free_resources;
1022 }
1023
1024 async_tx_ack(tx);
1025 init_completion(&cmp);
1026 tx->callback = ioat3_dma_test_callback;
1027 tx->callback_param = &cmp;
1028 cookie = tx->tx_submit(tx);
1029 if (cookie < 0) {
1030 dev_err(dev, "Self-test memset setup failed\n");
1031 err = -ENODEV;
1032 goto free_resources;
1033 }
1034 dma->device_issue_pending(dma_chan);
1035
1036 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1037
1038 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1039 dev_err(dev, "Self-test memset timed out\n");
1040 err = -ENODEV;
1041 goto free_resources;
1042 }
1043
1044 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1045 u32 *ptr = page_address(dest);
1046 if (ptr[i]) {
1047 dev_err(dev, "Self-test memset failed compare\n");
1048 err = -ENODEV;
1049 goto free_resources;
1050 }
1051 }
1052
1053 /* test for non-zero parity sum */
1054 xor_val_result = 0;
1055 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1056 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1057 DMA_TO_DEVICE);
1058 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1059 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1060 &xor_val_result, DMA_PREP_INTERRUPT);
1061 if (!tx) {
1062 dev_err(dev, "Self-test 2nd zero prep failed\n");
1063 err = -ENODEV;
1064 goto free_resources;
1065 }
1066
1067 async_tx_ack(tx);
1068 init_completion(&cmp);
1069 tx->callback = ioat3_dma_test_callback;
1070 tx->callback_param = &cmp;
1071 cookie = tx->tx_submit(tx);
1072 if (cookie < 0) {
1073 dev_err(dev, "Self-test 2nd zero setup failed\n");
1074 err = -ENODEV;
1075 goto free_resources;
1076 }
1077 dma->device_issue_pending(dma_chan);
1078
1079 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1080
1081 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1082 dev_err(dev, "Self-test 2nd validate timed out\n");
1083 err = -ENODEV;
1084 goto free_resources;
1085 }
1086
1087 if (xor_val_result != SUM_CHECK_P_RESULT) {
1088 dev_err(dev, "Self-test validate failed compare\n");
1089 err = -ENODEV;
1090 goto free_resources;
1091 }
1092
1093free_resources:
1094 dma->device_free_chan_resources(dma_chan);
1095out:
1096 src_idx = IOAT_NUM_SRC_TEST;
1097 while (src_idx--)
1098 __free_page(xor_srcs[src_idx]);
1099 __free_page(dest);
1100 return err;
1101}
1102
1103static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1104{
1105 int rc = ioat_dma_self_test(device);
1106
1107 if (rc)
1108 return rc;
1109
1110 rc = ioat_xor_val_self_test(device);
1111 if (rc)
1112 return rc;
1113
1114 return 0;
1115}
1116
1117int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1118{
1119 struct pci_dev *pdev = device->pdev;
1120 struct dma_device *dma;
1121 struct dma_chan *c;
1122 struct ioat_chan_common *chan;
1123 bool is_raid_device = false;
1124 int err;
1125 u16 dev_id;
1126 u32 cap;
1127
1128 device->enumerate_channels = ioat2_enumerate_channels;
1129 device->self_test = ioat3_dma_self_test;
1130 dma = &device->common;
1131 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1132 dma->device_issue_pending = ioat2_issue_pending;
1133 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1134 dma->device_free_chan_resources = ioat2_free_chan_resources;
1135
1136 dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1137 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1138
1139 cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1140 if (cap & IOAT_CAP_XOR) {
1141 is_raid_device = true;
1142 dma->max_xor = 8;
1143 dma->xor_align = 2;
1144
1145 dma_cap_set(DMA_XOR, dma->cap_mask);
1146 dma->device_prep_dma_xor = ioat3_prep_xor;
1147
1148 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1149 dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1150 }
1151 if (cap & IOAT_CAP_PQ) {
1152 is_raid_device = true;
1153 dma_set_maxpq(dma, 8, 0);
1154 dma->pq_align = 2;
1155
1156 dma_cap_set(DMA_PQ, dma->cap_mask);
1157 dma->device_prep_dma_pq = ioat3_prep_pq;
1158
1159 dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1160 dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1161
1162 if (!(cap & IOAT_CAP_XOR)) {
1163 dma->max_xor = 8;
1164 dma->xor_align = 2;
1165
1166 dma_cap_set(DMA_XOR, dma->cap_mask);
1167 dma->device_prep_dma_xor = ioat3_prep_pqxor;
1168
1169 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1170 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1171 }
1172 }
1173 if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1174 dma_cap_set(DMA_MEMSET, dma->cap_mask);
1175 dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1176 }
1177
1178
1179 if (is_raid_device) {
1180 dma->device_is_tx_complete = ioat3_is_complete;
1181 device->cleanup_tasklet = ioat3_cleanup_tasklet;
1182 device->timer_fn = ioat3_timer_event;
1183 } else {
1184 dma->device_is_tx_complete = ioat2_is_complete;
1185 device->cleanup_tasklet = ioat2_cleanup_tasklet;
1186 device->timer_fn = ioat2_timer_event;
1187 }
1188
1189 /* -= IOAT ver.3 workarounds =- */
1190 /* Write CHANERRMSK_INT with 3E07h to mask out the errors
1191 * that can cause stability issues for IOAT ver.3
1192 */
1193 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1194
1195 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1196 * (workaround for spurious config parity error after restart)
1197 */
1198 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1199 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1200 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1201
1202 err = ioat_probe(device);
1203 if (err)
1204 return err;
1205 ioat_set_tcp_copy_break(262144);
1206
1207 list_for_each_entry(c, &dma->channels, device_node) {
1208 chan = to_chan_common(c);
1209 writel(IOAT_DMA_DCA_ANY_CPU,
1210 chan->reg_base + IOAT_DCACTRL_OFFSET);
1211 }
1212
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216
1217 ioat_kobject_add(device, &ioat2_ktype);
1218
1219 if (dca)
1220 device->dca = ioat3_dca_init(pdev, device->reg_base);
1221
1222 return 0;
1223}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 000000000000..99afb12bd409
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,215 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_HW_H_
22#define _IOAT_HW_H_
23
24/* PCI Configuration Space Values */
25#define IOAT_PCI_VID 0x8086
26#define IOAT_MMIO_BAR 0
27
28/* CB device ID's */
29#define IOAT_PCI_DID_5000 0x1A38
30#define IOAT_PCI_DID_CNB 0x360B
31#define IOAT_PCI_DID_SCNB 0x65FF
32#define IOAT_PCI_DID_SNB 0x402F
33
34#define IOAT_PCI_RID 0x00
35#define IOAT_PCI_SVID 0x8086
36#define IOAT_PCI_SID 0x8086
37#define IOAT_VER_1_2 0x12 /* Version 1.2 */
38#define IOAT_VER_2_0 0x20 /* Version 2.0 */
39#define IOAT_VER_3_0 0x30 /* Version 3.0 */
40#define IOAT_VER_3_2 0x32 /* Version 3.2 */
41
42struct ioat_dma_descriptor {
43 uint32_t size;
44 union {
45 uint32_t ctl;
46 struct {
47 unsigned int int_en:1;
48 unsigned int src_snoop_dis:1;
49 unsigned int dest_snoop_dis:1;
50 unsigned int compl_write:1;
51 unsigned int fence:1;
52 unsigned int null:1;
53 unsigned int src_brk:1;
54 unsigned int dest_brk:1;
55 unsigned int bundle:1;
56 unsigned int dest_dca:1;
57 unsigned int hint:1;
58 unsigned int rsvd2:13;
59 #define IOAT_OP_COPY 0x00
60 unsigned int op:8;
61 } ctl_f;
62 };
63 uint64_t src_addr;
64 uint64_t dst_addr;
65 uint64_t next;
66 uint64_t rsv1;
67 uint64_t rsv2;
68 /* store some driver data in an unused portion of the descriptor */
69 union {
70 uint64_t user1;
71 uint64_t tx_cnt;
72 };
73 uint64_t user2;
74};
75
76struct ioat_fill_descriptor {
77 uint32_t size;
78 union {
79 uint32_t ctl;
80 struct {
81 unsigned int int_en:1;
82 unsigned int rsvd:1;
83 unsigned int dest_snoop_dis:1;
84 unsigned int compl_write:1;
85 unsigned int fence:1;
86 unsigned int rsvd2:2;
87 unsigned int dest_brk:1;
88 unsigned int bundle:1;
89 unsigned int rsvd4:15;
90 #define IOAT_OP_FILL 0x01
91 unsigned int op:8;
92 } ctl_f;
93 };
94 uint64_t src_data;
95 uint64_t dst_addr;
96 uint64_t next;
97 uint64_t rsv1;
98 uint64_t next_dst_addr;
99 uint64_t user1;
100 uint64_t user2;
101};
102
103struct ioat_xor_descriptor {
104 uint32_t size;
105 union {
106 uint32_t ctl;
107 struct {
108 unsigned int int_en:1;
109 unsigned int src_snoop_dis:1;
110 unsigned int dest_snoop_dis:1;
111 unsigned int compl_write:1;
112 unsigned int fence:1;
113 unsigned int src_cnt:3;
114 unsigned int bundle:1;
115 unsigned int dest_dca:1;
116 unsigned int hint:1;
117 unsigned int rsvd:13;
118 #define IOAT_OP_XOR 0x87
119 #define IOAT_OP_XOR_VAL 0x88
120 unsigned int op:8;
121 } ctl_f;
122 };
123 uint64_t src_addr;
124 uint64_t dst_addr;
125 uint64_t next;
126 uint64_t src_addr2;
127 uint64_t src_addr3;
128 uint64_t src_addr4;
129 uint64_t src_addr5;
130};
131
132struct ioat_xor_ext_descriptor {
133 uint64_t src_addr6;
134 uint64_t src_addr7;
135 uint64_t src_addr8;
136 uint64_t next;
137 uint64_t rsvd[4];
138};
139
140struct ioat_pq_descriptor {
141 uint32_t size;
142 union {
143 uint32_t ctl;
144 struct {
145 unsigned int int_en:1;
146 unsigned int src_snoop_dis:1;
147 unsigned int dest_snoop_dis:1;
148 unsigned int compl_write:1;
149 unsigned int fence:1;
150 unsigned int src_cnt:3;
151 unsigned int bundle:1;
152 unsigned int dest_dca:1;
153 unsigned int hint:1;
154 unsigned int p_disable:1;
155 unsigned int q_disable:1;
156 unsigned int rsvd:11;
157 #define IOAT_OP_PQ 0x89
158 #define IOAT_OP_PQ_VAL 0x8a
159 unsigned int op:8;
160 } ctl_f;
161 };
162 uint64_t src_addr;
163 uint64_t p_addr;
164 uint64_t next;
165 uint64_t src_addr2;
166 uint64_t src_addr3;
167 uint8_t coef[8];
168 uint64_t q_addr;
169};
170
171struct ioat_pq_ext_descriptor {
172 uint64_t src_addr4;
173 uint64_t src_addr5;
174 uint64_t src_addr6;
175 uint64_t next;
176 uint64_t src_addr7;
177 uint64_t src_addr8;
178 uint64_t rsvd[2];
179};
180
181struct ioat_pq_update_descriptor {
182 uint32_t size;
183 union {
184 uint32_t ctl;
185 struct {
186 unsigned int int_en:1;
187 unsigned int src_snoop_dis:1;
188 unsigned int dest_snoop_dis:1;
189 unsigned int compl_write:1;
190 unsigned int fence:1;
191 unsigned int src_cnt:3;
192 unsigned int bundle:1;
193 unsigned int dest_dca:1;
194 unsigned int hint:1;
195 unsigned int p_disable:1;
196 unsigned int q_disable:1;
197 unsigned int rsvd:3;
198 unsigned int coef:8;
199 #define IOAT_OP_PQ_UP 0x8b
200 unsigned int op:8;
201 } ctl_f;
202 };
203 uint64_t src_addr;
204 uint64_t p_addr;
205 uint64_t next;
206 uint64_t src_addr2;
207 uint64_t p_src;
208 uint64_t q_src;
209 uint64_t q_addr;
210};
211
212struct ioat_raw_descriptor {
213 uint64_t field[8];
214};
215#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 000000000000..d545fae30f37
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dca.h>
33#include "dma.h"
34#include "dma_v2.h"
35#include "registers.h"
36#include "hw.h"
37
38MODULE_VERSION(IOAT_DMA_VERSION);
39MODULE_LICENSE("Dual BSD/GPL");
40MODULE_AUTHOR("Intel Corporation");
41
42static struct pci_device_id ioat_pci_tbl[] = {
43 /* I/OAT v1 platforms */
44 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
45 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
46 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
47 { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
48
49 /* I/OAT v2 platforms */
50 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
51
52 /* I/OAT v3 platforms */
53 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
54 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
55 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
56 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
57 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
58 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
59 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
60 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
61
62 /* I/OAT v3.2 platforms */
63 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
64 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
65 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
66 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
67 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
68 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
69 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
70 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
71 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
73
74 { 0, }
75};
76MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
77
78static int __devinit ioat_pci_probe(struct pci_dev *pdev,
79 const struct pci_device_id *id);
80static void __devexit ioat_remove(struct pci_dev *pdev);
81
82static int ioat_dca_enabled = 1;
83module_param(ioat_dca_enabled, int, 0644);
84MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
85
86struct kmem_cache *ioat2_cache;
87
88#define DRV_NAME "ioatdma"
89
90static struct pci_driver ioat_pci_driver = {
91 .name = DRV_NAME,
92 .id_table = ioat_pci_tbl,
93 .probe = ioat_pci_probe,
94 .remove = __devexit_p(ioat_remove),
95};
96
97static struct ioatdma_device *
98alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
99{
100 struct device *dev = &pdev->dev;
101 struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
102
103 if (!d)
104 return NULL;
105 d->pdev = pdev;
106 d->reg_base = iobase;
107 return d;
108}
109
110static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
111{
112 void __iomem * const *iomap;
113 struct device *dev = &pdev->dev;
114 struct ioatdma_device *device;
115 int err;
116
117 err = pcim_enable_device(pdev);
118 if (err)
119 return err;
120
121 err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
122 if (err)
123 return err;
124 iomap = pcim_iomap_table(pdev);
125 if (!iomap)
126 return -ENOMEM;
127
128 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
129 if (err)
130 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
131 if (err)
132 return err;
133
134 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
135 if (err)
136 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
137 if (err)
138 return err;
139
140 device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
141 if (!device)
142 return -ENOMEM;
143
144 pci_set_master(pdev);
145
146 device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
147 if (!device)
148 return -ENOMEM;
149 pci_set_drvdata(pdev, device);
150
151 device->version = readb(device->reg_base + IOAT_VER_OFFSET);
152 if (device->version == IOAT_VER_1_2)
153 err = ioat1_dma_probe(device, ioat_dca_enabled);
154 else if (device->version == IOAT_VER_2_0)
155 err = ioat2_dma_probe(device, ioat_dca_enabled);
156 else if (device->version >= IOAT_VER_3_0)
157 err = ioat3_dma_probe(device, ioat_dca_enabled);
158 else
159 return -ENODEV;
160
161 if (err) {
162 dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
163 return -ENODEV;
164 }
165
166 return 0;
167}
168
169static void __devexit ioat_remove(struct pci_dev *pdev)
170{
171 struct ioatdma_device *device = pci_get_drvdata(pdev);
172
173 if (!device)
174 return;
175
176 dev_err(&pdev->dev, "Removing dma and dca services\n");
177 if (device->dca) {
178 unregister_dca_provider(device->dca, &pdev->dev);
179 free_dca_provider(device->dca);
180 device->dca = NULL;
181 }
182 ioat_dma_remove(device);
183}
184
185static int __init ioat_init_module(void)
186{
187 int err;
188
189 pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
190 DRV_NAME, IOAT_DMA_VERSION);
191
192 ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
193 0, SLAB_HWCACHE_ALIGN, NULL);
194 if (!ioat2_cache)
195 return -ENOMEM;
196
197 err = pci_register_driver(&ioat_pci_driver);
198 if (err)
199 kmem_cache_destroy(ioat2_cache);
200
201 return err;
202}
203module_init(ioat_init_module);
204
205static void __exit ioat_exit_module(void)
206{
207 pci_unregister_driver(&ioat_pci_driver);
208 kmem_cache_destroy(ioat2_cache);
209}
210module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h
index 49bc277424f8..63038e18ab03 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -64,18 +64,37 @@
64 64
65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ 65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
67#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
68#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
69#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
70
71#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
72#define IOAT_CAP_PAGE_BREAK 0x00000001
73#define IOAT_CAP_CRC 0x00000002
74#define IOAT_CAP_SKIP_MARKER 0x00000004
75#define IOAT_CAP_DCA 0x00000010
76#define IOAT_CAP_CRC_MOVE 0x00000020
77#define IOAT_CAP_FILL_BLOCK 0x00000040
78#define IOAT_CAP_APIC 0x00000080
79#define IOAT_CAP_XOR 0x00000100
80#define IOAT_CAP_PQ 0x00000200
67 81
68#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ 82#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
69 83
70/* DMA Channel Registers */ 84/* DMA Channel Registers */
71#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ 85#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
72#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 86#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
87#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
73#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 88#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
74#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 89#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
75#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 90#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
76#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 91#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
77#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 92#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
78#define IOAT_CHANCTRL_INT_DISABLE 0x0001 93#define IOAT_CHANCTRL_INT_REARM 0x0001
94#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
95 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
96 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
97 IOAT_CHANCTRL_ERR_INT_EN)
79 98
80#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ 99#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
81#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ 100#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
@@ -94,14 +113,14 @@
94#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C 113#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
95#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ 114#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
96 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) 115 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
97#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F 116#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
98#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 117#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
99#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 118#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
100#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 119#define IOAT_CHANSTS_STATUS 0x7ULL
101#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 120#define IOAT_CHANSTS_ACTIVE 0x0
102#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 121#define IOAT_CHANSTS_DONE 0x1
103#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 122#define IOAT_CHANSTS_SUSPENDED 0x2
104#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 123#define IOAT_CHANSTS_HALTED 0x3
105 124
106 125
107 126
@@ -204,22 +223,27 @@
204#define IOAT_CDAR_OFFSET_HIGH 0x24 223#define IOAT_CDAR_OFFSET_HIGH 0x24
205 224
206#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ 225#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
207#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 226#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
208#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 227#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
209#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 228#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
210#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 229#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
211#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 230#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
212#define IOAT_CHANERR_CHANCMD_ERR 0x0020 231#define IOAT_CHANERR_CHANCMD_ERR 0x0020
213#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 232#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
214#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 233#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
215#define IOAT_CHANERR_READ_DATA_ERR 0x0100 234#define IOAT_CHANERR_READ_DATA_ERR 0x0100
216#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 235#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
217#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 236#define IOAT_CHANERR_CONTROL_ERR 0x0400
218#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 237#define IOAT_CHANERR_LENGTH_ERR 0x0800
219#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 238#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
220#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 239#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
221#define IOAT_CHANERR_SOFT_ERR 0x4000 240#define IOAT_CHANERR_SOFT_ERR 0x4000
222#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 241#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
242#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
243#define IOAT_CHANERR_XOR_Q_ERR 0x20000
244#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
245
246#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
223 247
224#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ 248#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
225 249
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
deleted file mode 100644
index a600fc0f7962..000000000000
--- a/drivers/dma/ioat_dma.c
+++ /dev/null
@@ -1,1741 +0,0 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "ioatdma.h"
38#include "ioatdma_registers.h"
39#include "ioatdma_hw.h"
40
41#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
42#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
43#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
44#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
45
46#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
47static int ioat_pending_level = 4;
48module_param(ioat_pending_level, int, 0644);
49MODULE_PARM_DESC(ioat_pending_level,
50 "high-water mark for pushing ioat descriptors (default: 4)");
51
52#define RESET_DELAY msecs_to_jiffies(100)
53#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
54static void ioat_dma_chan_reset_part2(struct work_struct *work);
55static void ioat_dma_chan_watchdog(struct work_struct *work);
56
57/*
58 * workaround for IOAT ver.3.0 null descriptor issue
59 * (channel returns error when size is 0)
60 */
61#define NULL_DESC_BUFFER_SIZE 1
62
63/* internal functions */
64static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
65static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
66
67static struct ioat_desc_sw *
68ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
69static struct ioat_desc_sw *
70ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
71
72static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
73 struct ioatdma_device *device,
74 int index)
75{
76 return device->idx[index];
77}
78
79/**
80 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
81 * @irq: interrupt id
82 * @data: interrupt data
83 */
84static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
85{
86 struct ioatdma_device *instance = data;
87 struct ioat_dma_chan *ioat_chan;
88 unsigned long attnstatus;
89 int bit;
90 u8 intrctrl;
91
92 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
93
94 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
95 return IRQ_NONE;
96
97 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
98 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
99 return IRQ_NONE;
100 }
101
102 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
103 for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
104 ioat_chan = ioat_lookup_chan_by_index(instance, bit);
105 tasklet_schedule(&ioat_chan->cleanup_task);
106 }
107
108 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
109 return IRQ_HANDLED;
110}
111
112/**
113 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
114 * @irq: interrupt id
115 * @data: interrupt data
116 */
117static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
118{
119 struct ioat_dma_chan *ioat_chan = data;
120
121 tasklet_schedule(&ioat_chan->cleanup_task);
122
123 return IRQ_HANDLED;
124}
125
126static void ioat_dma_cleanup_tasklet(unsigned long data);
127
128/**
129 * ioat_dma_enumerate_channels - find and initialize the device's channels
130 * @device: the device to be enumerated
131 */
132static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
133{
134 u8 xfercap_scale;
135 u32 xfercap;
136 int i;
137 struct ioat_dma_chan *ioat_chan;
138
139 /*
140 * IOAT ver.3 workarounds
141 */
142 if (device->version == IOAT_VER_3_0) {
143 u32 chan_err_mask;
144 u16 dev_id;
145 u32 dmauncerrsts;
146
147 /*
148 * Write CHANERRMSK_INT with 3E07h to mask out the errors
149 * that can cause stability issues for IOAT ver.3
150 */
151 chan_err_mask = 0x3E07;
152 pci_write_config_dword(device->pdev,
153 IOAT_PCI_CHANERRMASK_INT_OFFSET,
154 chan_err_mask);
155
156 /*
157 * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
158 * (workaround for spurious config parity error after restart)
159 */
160 pci_read_config_word(device->pdev,
161 IOAT_PCI_DEVICE_ID_OFFSET,
162 &dev_id);
163 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
164 dmauncerrsts = 0x10;
165 pci_write_config_dword(device->pdev,
166 IOAT_PCI_DMAUNCERRSTS_OFFSET,
167 dmauncerrsts);
168 }
169 }
170
171 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
172 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
173 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
174
175#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
176 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
177 device->common.chancnt--;
178 }
179#endif
180 for (i = 0; i < device->common.chancnt; i++) {
181 ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
182 if (!ioat_chan) {
183 device->common.chancnt = i;
184 break;
185 }
186
187 ioat_chan->device = device;
188 ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
189 ioat_chan->xfercap = xfercap;
190 ioat_chan->desccount = 0;
191 INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
192 if (ioat_chan->device->version == IOAT_VER_2_0)
193 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
194 IOAT_DMA_DCA_ANY_CPU,
195 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
196 else if (ioat_chan->device->version == IOAT_VER_3_0)
197 writel(IOAT_DMA_DCA_ANY_CPU,
198 ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
199 spin_lock_init(&ioat_chan->cleanup_lock);
200 spin_lock_init(&ioat_chan->desc_lock);
201 INIT_LIST_HEAD(&ioat_chan->free_desc);
202 INIT_LIST_HEAD(&ioat_chan->used_desc);
203 /* This should be made common somewhere in dmaengine.c */
204 ioat_chan->common.device = &device->common;
205 list_add_tail(&ioat_chan->common.device_node,
206 &device->common.channels);
207 device->idx[i] = ioat_chan;
208 tasklet_init(&ioat_chan->cleanup_task,
209 ioat_dma_cleanup_tasklet,
210 (unsigned long) ioat_chan);
211 tasklet_disable(&ioat_chan->cleanup_task);
212 }
213 return device->common.chancnt;
214}
215
216/**
217 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
218 * descriptors to hw
219 * @chan: DMA channel handle
220 */
221static inline void __ioat1_dma_memcpy_issue_pending(
222 struct ioat_dma_chan *ioat_chan)
223{
224 ioat_chan->pending = 0;
225 writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
226}
227
228static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
229{
230 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
231
232 if (ioat_chan->pending > 0) {
233 spin_lock_bh(&ioat_chan->desc_lock);
234 __ioat1_dma_memcpy_issue_pending(ioat_chan);
235 spin_unlock_bh(&ioat_chan->desc_lock);
236 }
237}
238
239static inline void __ioat2_dma_memcpy_issue_pending(
240 struct ioat_dma_chan *ioat_chan)
241{
242 ioat_chan->pending = 0;
243 writew(ioat_chan->dmacount,
244 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
245}
246
247static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
248{
249 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
250
251 if (ioat_chan->pending > 0) {
252 spin_lock_bh(&ioat_chan->desc_lock);
253 __ioat2_dma_memcpy_issue_pending(ioat_chan);
254 spin_unlock_bh(&ioat_chan->desc_lock);
255 }
256}
257
258
259/**
260 * ioat_dma_chan_reset_part2 - reinit the channel after a reset
261 */
262static void ioat_dma_chan_reset_part2(struct work_struct *work)
263{
264 struct ioat_dma_chan *ioat_chan =
265 container_of(work, struct ioat_dma_chan, work.work);
266 struct ioat_desc_sw *desc;
267
268 spin_lock_bh(&ioat_chan->cleanup_lock);
269 spin_lock_bh(&ioat_chan->desc_lock);
270
271 ioat_chan->completion_virt->low = 0;
272 ioat_chan->completion_virt->high = 0;
273 ioat_chan->pending = 0;
274
275 /*
276 * count the descriptors waiting, and be sure to do it
277 * right for both the CB1 line and the CB2 ring
278 */
279 ioat_chan->dmacount = 0;
280 if (ioat_chan->used_desc.prev) {
281 desc = to_ioat_desc(ioat_chan->used_desc.prev);
282 do {
283 ioat_chan->dmacount++;
284 desc = to_ioat_desc(desc->node.next);
285 } while (&desc->node != ioat_chan->used_desc.next);
286 }
287
288 /*
289 * write the new starting descriptor address
290 * this puts channel engine into ARMED state
291 */
292 desc = to_ioat_desc(ioat_chan->used_desc.prev);
293 switch (ioat_chan->device->version) {
294 case IOAT_VER_1_2:
295 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
296 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
297 writel(((u64) desc->async_tx.phys) >> 32,
298 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
299
300 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
301 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
302 break;
303 case IOAT_VER_2_0:
304 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
305 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
306 writel(((u64) desc->async_tx.phys) >> 32,
307 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
308
309 /* tell the engine to go with what's left to be done */
310 writew(ioat_chan->dmacount,
311 ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
312
313 break;
314 }
315 dev_err(&ioat_chan->device->pdev->dev,
316 "chan%d reset - %d descs waiting, %d total desc\n",
317 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
318
319 spin_unlock_bh(&ioat_chan->desc_lock);
320 spin_unlock_bh(&ioat_chan->cleanup_lock);
321}
322
323/**
324 * ioat_dma_reset_channel - restart a channel
325 * @ioat_chan: IOAT DMA channel handle
326 */
327static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
328{
329 u32 chansts, chanerr;
330
331 if (!ioat_chan->used_desc.prev)
332 return;
333
334 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
335 chansts = (ioat_chan->completion_virt->low
336 & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
337 if (chanerr) {
338 dev_err(&ioat_chan->device->pdev->dev,
339 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
340 chan_num(ioat_chan), chansts, chanerr);
341 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
342 }
343
344 /*
345 * whack it upside the head with a reset
346 * and wait for things to settle out.
347 * force the pending count to a really big negative
348 * to make sure no one forces an issue_pending
349 * while we're waiting.
350 */
351
352 spin_lock_bh(&ioat_chan->desc_lock);
353 ioat_chan->pending = INT_MIN;
354 writeb(IOAT_CHANCMD_RESET,
355 ioat_chan->reg_base
356 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
357 spin_unlock_bh(&ioat_chan->desc_lock);
358
359 /* schedule the 2nd half instead of sleeping a long time */
360 schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
361}
362
363/**
364 * ioat_dma_chan_watchdog - watch for stuck channels
365 */
366static void ioat_dma_chan_watchdog(struct work_struct *work)
367{
368 struct ioatdma_device *device =
369 container_of(work, struct ioatdma_device, work.work);
370 struct ioat_dma_chan *ioat_chan;
371 int i;
372
373 union {
374 u64 full;
375 struct {
376 u32 low;
377 u32 high;
378 };
379 } completion_hw;
380 unsigned long compl_desc_addr_hw;
381
382 for (i = 0; i < device->common.chancnt; i++) {
383 ioat_chan = ioat_lookup_chan_by_index(device, i);
384
385 if (ioat_chan->device->version == IOAT_VER_1_2
386 /* have we started processing anything yet */
387 && ioat_chan->last_completion
388 /* have we completed any since last watchdog cycle? */
389 && (ioat_chan->last_completion ==
390 ioat_chan->watchdog_completion)
391 /* has TCP stuck on one cookie since last watchdog? */
392 && (ioat_chan->watchdog_tcp_cookie ==
393 ioat_chan->watchdog_last_tcp_cookie)
394 && (ioat_chan->watchdog_tcp_cookie !=
395 ioat_chan->completed_cookie)
396 /* is there something in the chain to be processed? */
397 /* CB1 chain always has at least the last one processed */
398 && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
399 && ioat_chan->pending == 0) {
400
401 /*
402 * check CHANSTS register for completed
403 * descriptor address.
404 * if it is different than completion writeback,
405 * it is not zero
406 * and it has changed since the last watchdog
407 * we can assume that channel
408 * is still working correctly
409 * and the problem is in completion writeback.
410 * update completion writeback
411 * with actual CHANSTS value
412 * else
413 * try resetting the channel
414 */
415
416 completion_hw.low = readl(ioat_chan->reg_base +
417 IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
418 completion_hw.high = readl(ioat_chan->reg_base +
419 IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
420#if (BITS_PER_LONG == 64)
421 compl_desc_addr_hw =
422 completion_hw.full
423 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
424#else
425 compl_desc_addr_hw =
426 completion_hw.low & IOAT_LOW_COMPLETION_MASK;
427#endif
428
429 if ((compl_desc_addr_hw != 0)
430 && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
431 && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
432 ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
433 ioat_chan->completion_virt->low = completion_hw.low;
434 ioat_chan->completion_virt->high = completion_hw.high;
435 } else {
436 ioat_dma_reset_channel(ioat_chan);
437 ioat_chan->watchdog_completion = 0;
438 ioat_chan->last_compl_desc_addr_hw = 0;
439 }
440
441 /*
442 * for version 2.0 if there are descriptors yet to be processed
443 * and the last completed hasn't changed since the last watchdog
444 * if they haven't hit the pending level
445 * issue the pending to push them through
446 * else
447 * try resetting the channel
448 */
449 } else if (ioat_chan->device->version == IOAT_VER_2_0
450 && ioat_chan->used_desc.prev
451 && ioat_chan->last_completion
452 && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
453
454 if (ioat_chan->pending < ioat_pending_level)
455 ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
456 else {
457 ioat_dma_reset_channel(ioat_chan);
458 ioat_chan->watchdog_completion = 0;
459 }
460 } else {
461 ioat_chan->last_compl_desc_addr_hw = 0;
462 ioat_chan->watchdog_completion
463 = ioat_chan->last_completion;
464 }
465
466 ioat_chan->watchdog_last_tcp_cookie =
467 ioat_chan->watchdog_tcp_cookie;
468 }
469
470 schedule_delayed_work(&device->work, WATCHDOG_DELAY);
471}
472
473static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
474{
475 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
476 struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
477 struct ioat_desc_sw *prev, *new;
478 struct ioat_dma_descriptor *hw;
479 dma_cookie_t cookie;
480 LIST_HEAD(new_chain);
481 u32 copy;
482 size_t len;
483 dma_addr_t src, dst;
484 unsigned long orig_flags;
485 unsigned int desc_count = 0;
486
487 /* src and dest and len are stored in the initial descriptor */
488 len = first->len;
489 src = first->src;
490 dst = first->dst;
491 orig_flags = first->async_tx.flags;
492 new = first;
493
494 spin_lock_bh(&ioat_chan->desc_lock);
495 prev = to_ioat_desc(ioat_chan->used_desc.prev);
496 prefetch(prev->hw);
497 do {
498 copy = min_t(size_t, len, ioat_chan->xfercap);
499
500 async_tx_ack(&new->async_tx);
501
502 hw = new->hw;
503 hw->size = copy;
504 hw->ctl = 0;
505 hw->src_addr = src;
506 hw->dst_addr = dst;
507 hw->next = 0;
508
509 /* chain together the physical address list for the HW */
510 wmb();
511 prev->hw->next = (u64) new->async_tx.phys;
512
513 len -= copy;
514 dst += copy;
515 src += copy;
516
517 list_add_tail(&new->node, &new_chain);
518 desc_count++;
519 prev = new;
520 } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
521
522 if (!new) {
523 dev_err(&ioat_chan->device->pdev->dev,
524 "tx submit failed\n");
525 spin_unlock_bh(&ioat_chan->desc_lock);
526 return -ENOMEM;
527 }
528
529 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
530 if (first->async_tx.callback) {
531 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
532 if (first != new) {
533 /* move callback into to last desc */
534 new->async_tx.callback = first->async_tx.callback;
535 new->async_tx.callback_param
536 = first->async_tx.callback_param;
537 first->async_tx.callback = NULL;
538 first->async_tx.callback_param = NULL;
539 }
540 }
541
542 new->tx_cnt = desc_count;
543 new->async_tx.flags = orig_flags; /* client is in control of this ack */
544
545 /* store the original values for use in later cleanup */
546 if (new != first) {
547 new->src = first->src;
548 new->dst = first->dst;
549 new->len = first->len;
550 }
551
552 /* cookie incr and addition to used_list must be atomic */
553 cookie = ioat_chan->common.cookie;
554 cookie++;
555 if (cookie < 0)
556 cookie = 1;
557 ioat_chan->common.cookie = new->async_tx.cookie = cookie;
558
559 /* write address into NextDescriptor field of last desc in chain */
560 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
561 first->async_tx.phys;
562 list_splice_tail(&new_chain, &ioat_chan->used_desc);
563
564 ioat_chan->dmacount += desc_count;
565 ioat_chan->pending += desc_count;
566 if (ioat_chan->pending >= ioat_pending_level)
567 __ioat1_dma_memcpy_issue_pending(ioat_chan);
568 spin_unlock_bh(&ioat_chan->desc_lock);
569
570 return cookie;
571}
572
573static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
574{
575 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
576 struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
577 struct ioat_desc_sw *new;
578 struct ioat_dma_descriptor *hw;
579 dma_cookie_t cookie;
580 u32 copy;
581 size_t len;
582 dma_addr_t src, dst;
583 unsigned long orig_flags;
584 unsigned int desc_count = 0;
585
586 /* src and dest and len are stored in the initial descriptor */
587 len = first->len;
588 src = first->src;
589 dst = first->dst;
590 orig_flags = first->async_tx.flags;
591 new = first;
592
593 /*
594 * ioat_chan->desc_lock is still in force in version 2 path
595 * it gets unlocked at end of this function
596 */
597 do {
598 copy = min_t(size_t, len, ioat_chan->xfercap);
599
600 async_tx_ack(&new->async_tx);
601
602 hw = new->hw;
603 hw->size = copy;
604 hw->ctl = 0;
605 hw->src_addr = src;
606 hw->dst_addr = dst;
607
608 len -= copy;
609 dst += copy;
610 src += copy;
611 desc_count++;
612 } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
613
614 if (!new) {
615 dev_err(&ioat_chan->device->pdev->dev,
616 "tx submit failed\n");
617 spin_unlock_bh(&ioat_chan->desc_lock);
618 return -ENOMEM;
619 }
620
621 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
622 if (first->async_tx.callback) {
623 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
624 if (first != new) {
625 /* move callback into to last desc */
626 new->async_tx.callback = first->async_tx.callback;
627 new->async_tx.callback_param
628 = first->async_tx.callback_param;
629 first->async_tx.callback = NULL;
630 first->async_tx.callback_param = NULL;
631 }
632 }
633
634 new->tx_cnt = desc_count;
635 new->async_tx.flags = orig_flags; /* client is in control of this ack */
636
637 /* store the original values for use in later cleanup */
638 if (new != first) {
639 new->src = first->src;
640 new->dst = first->dst;
641 new->len = first->len;
642 }
643
644 /* cookie incr and addition to used_list must be atomic */
645 cookie = ioat_chan->common.cookie;
646 cookie++;
647 if (cookie < 0)
648 cookie = 1;
649 ioat_chan->common.cookie = new->async_tx.cookie = cookie;
650
651 ioat_chan->dmacount += desc_count;
652 ioat_chan->pending += desc_count;
653 if (ioat_chan->pending >= ioat_pending_level)
654 __ioat2_dma_memcpy_issue_pending(ioat_chan);
655 spin_unlock_bh(&ioat_chan->desc_lock);
656
657 return cookie;
658}
659
660/**
661 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
662 * @ioat_chan: the channel supplying the memory pool for the descriptors
663 * @flags: allocation flags
664 */
665static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
666 struct ioat_dma_chan *ioat_chan,
667 gfp_t flags)
668{
669 struct ioat_dma_descriptor *desc;
670 struct ioat_desc_sw *desc_sw;
671 struct ioatdma_device *ioatdma_device;
672 dma_addr_t phys;
673
674 ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
675 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
676 if (unlikely(!desc))
677 return NULL;
678
679 desc_sw = kzalloc(sizeof(*desc_sw), flags);
680 if (unlikely(!desc_sw)) {
681 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
682 return NULL;
683 }
684
685 memset(desc, 0, sizeof(*desc));
686 dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
687 switch (ioat_chan->device->version) {
688 case IOAT_VER_1_2:
689 desc_sw->async_tx.tx_submit = ioat1_tx_submit;
690 break;
691 case IOAT_VER_2_0:
692 case IOAT_VER_3_0:
693 desc_sw->async_tx.tx_submit = ioat2_tx_submit;
694 break;
695 }
696
697 desc_sw->hw = desc;
698 desc_sw->async_tx.phys = phys;
699
700 return desc_sw;
701}
702
703static int ioat_initial_desc_count = 256;
704module_param(ioat_initial_desc_count, int, 0644);
705MODULE_PARM_DESC(ioat_initial_desc_count,
706 "initial descriptors per channel (default: 256)");
707
708/**
709 * ioat2_dma_massage_chan_desc - link the descriptors into a circle
710 * @ioat_chan: the channel to be massaged
711 */
712static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
713{
714 struct ioat_desc_sw *desc, *_desc;
715
716 /* setup used_desc */
717 ioat_chan->used_desc.next = ioat_chan->free_desc.next;
718 ioat_chan->used_desc.prev = NULL;
719
720 /* pull free_desc out of the circle so that every node is a hw
721 * descriptor, but leave it pointing to the list
722 */
723 ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
724 ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
725
726 /* circle link the hw descriptors */
727 desc = to_ioat_desc(ioat_chan->free_desc.next);
728 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
729 list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
730 desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
731 }
732}
733
734/**
735 * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
736 * @chan: the channel to be filled out
737 */
738static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
739{
740 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
741 struct ioat_desc_sw *desc;
742 u16 chanctrl;
743 u32 chanerr;
744 int i;
745 LIST_HEAD(tmp_list);
746
747 /* have we already been set up? */
748 if (!list_empty(&ioat_chan->free_desc))
749 return ioat_chan->desccount;
750
751 /* Setup register to interrupt and write completion status on error */
752 chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
753 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
754 IOAT_CHANCTRL_ERR_COMPLETION_EN;
755 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
756
757 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
758 if (chanerr) {
759 dev_err(&ioat_chan->device->pdev->dev,
760 "CHANERR = %x, clearing\n", chanerr);
761 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
762 }
763
764 /* Allocate descriptors */
765 for (i = 0; i < ioat_initial_desc_count; i++) {
766 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
767 if (!desc) {
768 dev_err(&ioat_chan->device->pdev->dev,
769 "Only %d initial descriptors\n", i);
770 break;
771 }
772 list_add_tail(&desc->node, &tmp_list);
773 }
774 spin_lock_bh(&ioat_chan->desc_lock);
775 ioat_chan->desccount = i;
776 list_splice(&tmp_list, &ioat_chan->free_desc);
777 if (ioat_chan->device->version != IOAT_VER_1_2)
778 ioat2_dma_massage_chan_desc(ioat_chan);
779 spin_unlock_bh(&ioat_chan->desc_lock);
780
781 /* allocate a completion writeback area */
782 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
783 ioat_chan->completion_virt =
784 pci_pool_alloc(ioat_chan->device->completion_pool,
785 GFP_KERNEL,
786 &ioat_chan->completion_addr);
787 memset(ioat_chan->completion_virt, 0,
788 sizeof(*ioat_chan->completion_virt));
789 writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
790 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
791 writel(((u64) ioat_chan->completion_addr) >> 32,
792 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
793
794 tasklet_enable(&ioat_chan->cleanup_task);
795 ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
796 return ioat_chan->desccount;
797}
798
799/**
800 * ioat_dma_free_chan_resources - release all the descriptors
801 * @chan: the channel to be cleaned
802 */
803static void ioat_dma_free_chan_resources(struct dma_chan *chan)
804{
805 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
806 struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
807 struct ioat_desc_sw *desc, *_desc;
808 int in_use_descs = 0;
809
810 /* Before freeing channel resources first check
811 * if they have been previously allocated for this channel.
812 */
813 if (ioat_chan->desccount == 0)
814 return;
815
816 tasklet_disable(&ioat_chan->cleanup_task);
817 ioat_dma_memcpy_cleanup(ioat_chan);
818
819 /* Delay 100ms after reset to allow internal DMA logic to quiesce
820 * before removing DMA descriptor resources.
821 */
822 writeb(IOAT_CHANCMD_RESET,
823 ioat_chan->reg_base
824 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
825 mdelay(100);
826
827 spin_lock_bh(&ioat_chan->desc_lock);
828 switch (ioat_chan->device->version) {
829 case IOAT_VER_1_2:
830 list_for_each_entry_safe(desc, _desc,
831 &ioat_chan->used_desc, node) {
832 in_use_descs++;
833 list_del(&desc->node);
834 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
835 desc->async_tx.phys);
836 kfree(desc);
837 }
838 list_for_each_entry_safe(desc, _desc,
839 &ioat_chan->free_desc, node) {
840 list_del(&desc->node);
841 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
842 desc->async_tx.phys);
843 kfree(desc);
844 }
845 break;
846 case IOAT_VER_2_0:
847 case IOAT_VER_3_0:
848 list_for_each_entry_safe(desc, _desc,
849 ioat_chan->free_desc.next, node) {
850 list_del(&desc->node);
851 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
852 desc->async_tx.phys);
853 kfree(desc);
854 }
855 desc = to_ioat_desc(ioat_chan->free_desc.next);
856 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
857 desc->async_tx.phys);
858 kfree(desc);
859 INIT_LIST_HEAD(&ioat_chan->free_desc);
860 INIT_LIST_HEAD(&ioat_chan->used_desc);
861 break;
862 }
863 spin_unlock_bh(&ioat_chan->desc_lock);
864
865 pci_pool_free(ioatdma_device->completion_pool,
866 ioat_chan->completion_virt,
867 ioat_chan->completion_addr);
868
869 /* one is ok since we left it on there on purpose */
870 if (in_use_descs > 1)
871 dev_err(&ioat_chan->device->pdev->dev,
872 "Freeing %d in use descriptors!\n",
873 in_use_descs - 1);
874
875 ioat_chan->last_completion = ioat_chan->completion_addr = 0;
876 ioat_chan->pending = 0;
877 ioat_chan->dmacount = 0;
878 ioat_chan->desccount = 0;
879 ioat_chan->watchdog_completion = 0;
880 ioat_chan->last_compl_desc_addr_hw = 0;
881 ioat_chan->watchdog_tcp_cookie =
882 ioat_chan->watchdog_last_tcp_cookie = 0;
883}
884
885/**
886 * ioat_dma_get_next_descriptor - return the next available descriptor
887 * @ioat_chan: IOAT DMA channel handle
888 *
889 * Gets the next descriptor from the chain, and must be called with the
890 * channel's desc_lock held. Allocates more descriptors if the channel
891 * has run out.
892 */
893static struct ioat_desc_sw *
894ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
895{
896 struct ioat_desc_sw *new;
897
898 if (!list_empty(&ioat_chan->free_desc)) {
899 new = to_ioat_desc(ioat_chan->free_desc.next);
900 list_del(&new->node);
901 } else {
902 /* try to get another desc */
903 new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
904 if (!new) {
905 dev_err(&ioat_chan->device->pdev->dev,
906 "alloc failed\n");
907 return NULL;
908 }
909 }
910
911 prefetch(new->hw);
912 return new;
913}
914
915static struct ioat_desc_sw *
916ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
917{
918 struct ioat_desc_sw *new;
919
920 /*
921 * used.prev points to where to start processing
922 * used.next points to next free descriptor
923 * if used.prev == NULL, there are none waiting to be processed
924 * if used.next == used.prev.prev, there is only one free descriptor,
925 * and we need to use it to as a noop descriptor before
926 * linking in a new set of descriptors, since the device
927 * has probably already read the pointer to it
928 */
929 if (ioat_chan->used_desc.prev &&
930 ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
931
932 struct ioat_desc_sw *desc;
933 struct ioat_desc_sw *noop_desc;
934 int i;
935
936 /* set up the noop descriptor */
937 noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
938 /* set size to non-zero value (channel returns error when size is 0) */
939 noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
940 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
941 noop_desc->hw->src_addr = 0;
942 noop_desc->hw->dst_addr = 0;
943
944 ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
945 ioat_chan->pending++;
946 ioat_chan->dmacount++;
947
948 /* try to get a few more descriptors */
949 for (i = 16; i; i--) {
950 desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
951 if (!desc) {
952 dev_err(&ioat_chan->device->pdev->dev,
953 "alloc failed\n");
954 break;
955 }
956 list_add_tail(&desc->node, ioat_chan->used_desc.next);
957
958 desc->hw->next
959 = to_ioat_desc(desc->node.next)->async_tx.phys;
960 to_ioat_desc(desc->node.prev)->hw->next
961 = desc->async_tx.phys;
962 ioat_chan->desccount++;
963 }
964
965 ioat_chan->used_desc.next = noop_desc->node.next;
966 }
967 new = to_ioat_desc(ioat_chan->used_desc.next);
968 prefetch(new);
969 ioat_chan->used_desc.next = new->node.next;
970
971 if (ioat_chan->used_desc.prev == NULL)
972 ioat_chan->used_desc.prev = &new->node;
973
974 prefetch(new->hw);
975 return new;
976}
977
978static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
979 struct ioat_dma_chan *ioat_chan)
980{
981 if (!ioat_chan)
982 return NULL;
983
984 switch (ioat_chan->device->version) {
985 case IOAT_VER_1_2:
986 return ioat1_dma_get_next_descriptor(ioat_chan);
987 case IOAT_VER_2_0:
988 case IOAT_VER_3_0:
989 return ioat2_dma_get_next_descriptor(ioat_chan);
990 }
991 return NULL;
992}
993
994static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
995 struct dma_chan *chan,
996 dma_addr_t dma_dest,
997 dma_addr_t dma_src,
998 size_t len,
999 unsigned long flags)
1000{
1001 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1002 struct ioat_desc_sw *new;
1003
1004 spin_lock_bh(&ioat_chan->desc_lock);
1005 new = ioat_dma_get_next_descriptor(ioat_chan);
1006 spin_unlock_bh(&ioat_chan->desc_lock);
1007
1008 if (new) {
1009 new->len = len;
1010 new->dst = dma_dest;
1011 new->src = dma_src;
1012 new->async_tx.flags = flags;
1013 return &new->async_tx;
1014 } else {
1015 dev_err(&ioat_chan->device->pdev->dev,
1016 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
1017 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
1018 return NULL;
1019 }
1020}
1021
1022static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
1023 struct dma_chan *chan,
1024 dma_addr_t dma_dest,
1025 dma_addr_t dma_src,
1026 size_t len,
1027 unsigned long flags)
1028{
1029 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1030 struct ioat_desc_sw *new;
1031
1032 spin_lock_bh(&ioat_chan->desc_lock);
1033 new = ioat2_dma_get_next_descriptor(ioat_chan);
1034
1035 /*
1036 * leave ioat_chan->desc_lock set in ioat 2 path
1037 * it will get unlocked at end of tx_submit
1038 */
1039
1040 if (new) {
1041 new->len = len;
1042 new->dst = dma_dest;
1043 new->src = dma_src;
1044 new->async_tx.flags = flags;
1045 return &new->async_tx;
1046 } else {
1047 spin_unlock_bh(&ioat_chan->desc_lock);
1048 dev_err(&ioat_chan->device->pdev->dev,
1049 "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
1050 chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
1051 return NULL;
1052 }
1053}
1054
1055static void ioat_dma_cleanup_tasklet(unsigned long data)
1056{
1057 struct ioat_dma_chan *chan = (void *)data;
1058 ioat_dma_memcpy_cleanup(chan);
1059 writew(IOAT_CHANCTRL_INT_DISABLE,
1060 chan->reg_base + IOAT_CHANCTRL_OFFSET);
1061}
1062
1063static void
1064ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
1065{
1066 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
1067 if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
1068 pci_unmap_single(ioat_chan->device->pdev,
1069 pci_unmap_addr(desc, dst),
1070 pci_unmap_len(desc, len),
1071 PCI_DMA_FROMDEVICE);
1072 else
1073 pci_unmap_page(ioat_chan->device->pdev,
1074 pci_unmap_addr(desc, dst),
1075 pci_unmap_len(desc, len),
1076 PCI_DMA_FROMDEVICE);
1077 }
1078
1079 if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
1080 if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
1081 pci_unmap_single(ioat_chan->device->pdev,
1082 pci_unmap_addr(desc, src),
1083 pci_unmap_len(desc, len),
1084 PCI_DMA_TODEVICE);
1085 else
1086 pci_unmap_page(ioat_chan->device->pdev,
1087 pci_unmap_addr(desc, src),
1088 pci_unmap_len(desc, len),
1089 PCI_DMA_TODEVICE);
1090 }
1091}
1092
1093/**
1094 * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
1095 * @chan: ioat channel to be cleaned up
1096 */
1097static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
1098{
1099 unsigned long phys_complete;
1100 struct ioat_desc_sw *desc, *_desc;
1101 dma_cookie_t cookie = 0;
1102 unsigned long desc_phys;
1103 struct ioat_desc_sw *latest_desc;
1104
1105 prefetch(ioat_chan->completion_virt);
1106
1107 if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
1108 return;
1109
1110 /* The completion writeback can happen at any time,
1111 so reads by the driver need to be atomic operations
1112 The descriptor physical addresses are limited to 32-bits
1113 when the CPU can only do a 32-bit mov */
1114
1115#if (BITS_PER_LONG == 64)
1116 phys_complete =
1117 ioat_chan->completion_virt->full
1118 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1119#else
1120 phys_complete =
1121 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
1122#endif
1123
1124 if ((ioat_chan->completion_virt->full
1125 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
1126 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
1127 dev_err(&ioat_chan->device->pdev->dev,
1128 "Channel halted, chanerr = %x\n",
1129 readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
1130
1131 /* TODO do something to salvage the situation */
1132 }
1133
1134 if (phys_complete == ioat_chan->last_completion) {
1135 spin_unlock_bh(&ioat_chan->cleanup_lock);
1136 /*
1137 * perhaps we're stuck so hard that the watchdog can't go off?
1138 * try to catch it after 2 seconds
1139 */
1140 if (ioat_chan->device->version != IOAT_VER_3_0) {
1141 if (time_after(jiffies,
1142 ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
1143 ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
1144 ioat_chan->last_completion_time = jiffies;
1145 }
1146 }
1147 return;
1148 }
1149 ioat_chan->last_completion_time = jiffies;
1150
1151 cookie = 0;
1152 if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
1153 spin_unlock_bh(&ioat_chan->cleanup_lock);
1154 return;
1155 }
1156
1157 switch (ioat_chan->device->version) {
1158 case IOAT_VER_1_2:
1159 list_for_each_entry_safe(desc, _desc,
1160 &ioat_chan->used_desc, node) {
1161
1162 /*
1163 * Incoming DMA requests may use multiple descriptors,
1164 * due to exceeding xfercap, perhaps. If so, only the
1165 * last one will have a cookie, and require unmapping.
1166 */
1167 if (desc->async_tx.cookie) {
1168 cookie = desc->async_tx.cookie;
1169 ioat_dma_unmap(ioat_chan, desc);
1170 if (desc->async_tx.callback) {
1171 desc->async_tx.callback(desc->async_tx.callback_param);
1172 desc->async_tx.callback = NULL;
1173 }
1174 }
1175
1176 if (desc->async_tx.phys != phys_complete) {
1177 /*
1178 * a completed entry, but not the last, so clean
1179 * up if the client is done with the descriptor
1180 */
1181 if (async_tx_test_ack(&desc->async_tx)) {
1182 list_move_tail(&desc->node,
1183 &ioat_chan->free_desc);
1184 } else
1185 desc->async_tx.cookie = 0;
1186 } else {
1187 /*
1188 * last used desc. Do not remove, so we can
1189 * append from it, but don't look at it next
1190 * time, either
1191 */
1192 desc->async_tx.cookie = 0;
1193
1194 /* TODO check status bits? */
1195 break;
1196 }
1197 }
1198 break;
1199 case IOAT_VER_2_0:
1200 case IOAT_VER_3_0:
1201 /* has some other thread has already cleaned up? */
1202 if (ioat_chan->used_desc.prev == NULL)
1203 break;
1204
1205 /* work backwards to find latest finished desc */
1206 desc = to_ioat_desc(ioat_chan->used_desc.next);
1207 latest_desc = NULL;
1208 do {
1209 desc = to_ioat_desc(desc->node.prev);
1210 desc_phys = (unsigned long)desc->async_tx.phys
1211 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
1212 if (desc_phys == phys_complete) {
1213 latest_desc = desc;
1214 break;
1215 }
1216 } while (&desc->node != ioat_chan->used_desc.prev);
1217
1218 if (latest_desc != NULL) {
1219
1220 /* work forwards to clear finished descriptors */
1221 for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
1222 &desc->node != latest_desc->node.next &&
1223 &desc->node != ioat_chan->used_desc.next;
1224 desc = to_ioat_desc(desc->node.next)) {
1225 if (desc->async_tx.cookie) {
1226 cookie = desc->async_tx.cookie;
1227 desc->async_tx.cookie = 0;
1228 ioat_dma_unmap(ioat_chan, desc);
1229 if (desc->async_tx.callback) {
1230 desc->async_tx.callback(desc->async_tx.callback_param);
1231 desc->async_tx.callback = NULL;
1232 }
1233 }
1234 }
1235
1236 /* move used.prev up beyond those that are finished */
1237 if (&desc->node == ioat_chan->used_desc.next)
1238 ioat_chan->used_desc.prev = NULL;
1239 else
1240 ioat_chan->used_desc.prev = &desc->node;
1241 }
1242 break;
1243 }
1244
1245 spin_unlock_bh(&ioat_chan->desc_lock);
1246
1247 ioat_chan->last_completion = phys_complete;
1248 if (cookie != 0)
1249 ioat_chan->completed_cookie = cookie;
1250
1251 spin_unlock_bh(&ioat_chan->cleanup_lock);
1252}
1253
1254/**
1255 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
1256 * @chan: IOAT DMA channel handle
1257 * @cookie: DMA transaction identifier
1258 * @done: if not %NULL, updated with last completed transaction
1259 * @used: if not %NULL, updated with last used transaction
1260 */
1261static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
1262 dma_cookie_t cookie,
1263 dma_cookie_t *done,
1264 dma_cookie_t *used)
1265{
1266 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
1267 dma_cookie_t last_used;
1268 dma_cookie_t last_complete;
1269 enum dma_status ret;
1270
1271 last_used = chan->cookie;
1272 last_complete = ioat_chan->completed_cookie;
1273 ioat_chan->watchdog_tcp_cookie = cookie;
1274
1275 if (done)
1276 *done = last_complete;
1277 if (used)
1278 *used = last_used;
1279
1280 ret = dma_async_is_complete(cookie, last_complete, last_used);
1281 if (ret == DMA_SUCCESS)
1282 return ret;
1283
1284 ioat_dma_memcpy_cleanup(ioat_chan);
1285
1286 last_used = chan->cookie;
1287 last_complete = ioat_chan->completed_cookie;
1288
1289 if (done)
1290 *done = last_complete;
1291 if (used)
1292 *used = last_used;
1293
1294 return dma_async_is_complete(cookie, last_complete, last_used);
1295}
1296
1297static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
1298{
1299 struct ioat_desc_sw *desc;
1300
1301 spin_lock_bh(&ioat_chan->desc_lock);
1302
1303 desc = ioat_dma_get_next_descriptor(ioat_chan);
1304
1305 if (!desc) {
1306 dev_err(&ioat_chan->device->pdev->dev,
1307 "Unable to start null desc - get next desc failed\n");
1308 spin_unlock_bh(&ioat_chan->desc_lock);
1309 return;
1310 }
1311
1312 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
1313 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
1314 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
1315 /* set size to non-zero value (channel returns error when size is 0) */
1316 desc->hw->size = NULL_DESC_BUFFER_SIZE;
1317 desc->hw->src_addr = 0;
1318 desc->hw->dst_addr = 0;
1319 async_tx_ack(&desc->async_tx);
1320 switch (ioat_chan->device->version) {
1321 case IOAT_VER_1_2:
1322 desc->hw->next = 0;
1323 list_add_tail(&desc->node, &ioat_chan->used_desc);
1324
1325 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
1326 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
1327 writel(((u64) desc->async_tx.phys) >> 32,
1328 ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
1329
1330 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
1331 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
1332 break;
1333 case IOAT_VER_2_0:
1334 case IOAT_VER_3_0:
1335 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
1336 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
1337 writel(((u64) desc->async_tx.phys) >> 32,
1338 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
1339
1340 ioat_chan->dmacount++;
1341 __ioat2_dma_memcpy_issue_pending(ioat_chan);
1342 break;
1343 }
1344 spin_unlock_bh(&ioat_chan->desc_lock);
1345}
1346
1347/*
1348 * Perform a IOAT transaction to verify the HW works.
1349 */
1350#define IOAT_TEST_SIZE 2000
1351
1352static void ioat_dma_test_callback(void *dma_async_param)
1353{
1354 struct completion *cmp = dma_async_param;
1355
1356 complete(cmp);
1357}
1358
1359/**
1360 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
1361 * @device: device to be tested
1362 */
1363static int ioat_dma_self_test(struct ioatdma_device *device)
1364{
1365 int i;
1366 u8 *src;
1367 u8 *dest;
1368 struct dma_chan *dma_chan;
1369 struct dma_async_tx_descriptor *tx;
1370 dma_addr_t dma_dest, dma_src;
1371 dma_cookie_t cookie;
1372 int err = 0;
1373 struct completion cmp;
1374 unsigned long tmo;
1375 unsigned long flags;
1376
1377 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1378 if (!src)
1379 return -ENOMEM;
1380 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
1381 if (!dest) {
1382 kfree(src);
1383 return -ENOMEM;
1384 }
1385
1386 /* Fill in src buffer */
1387 for (i = 0; i < IOAT_TEST_SIZE; i++)
1388 src[i] = (u8)i;
1389
1390 /* Start copy, using first DMA channel */
1391 dma_chan = container_of(device->common.channels.next,
1392 struct dma_chan,
1393 device_node);
1394 if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
1395 dev_err(&device->pdev->dev,
1396 "selftest cannot allocate chan resource\n");
1397 err = -ENODEV;
1398 goto out;
1399 }
1400
1401 dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
1402 DMA_TO_DEVICE);
1403 dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
1404 DMA_FROM_DEVICE);
1405 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
1406 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
1407 IOAT_TEST_SIZE, flags);
1408 if (!tx) {
1409 dev_err(&device->pdev->dev,
1410 "Self-test prep failed, disabling\n");
1411 err = -ENODEV;
1412 goto free_resources;
1413 }
1414
1415 async_tx_ack(tx);
1416 init_completion(&cmp);
1417 tx->callback = ioat_dma_test_callback;
1418 tx->callback_param = &cmp;
1419 cookie = tx->tx_submit(tx);
1420 if (cookie < 0) {
1421 dev_err(&device->pdev->dev,
1422 "Self-test setup failed, disabling\n");
1423 err = -ENODEV;
1424 goto free_resources;
1425 }
1426 device->common.device_issue_pending(dma_chan);
1427
1428 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1429
1430 if (tmo == 0 ||
1431 device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
1432 != DMA_SUCCESS) {
1433 dev_err(&device->pdev->dev,
1434 "Self-test copy timed out, disabling\n");
1435 err = -ENODEV;
1436 goto free_resources;
1437 }
1438 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
1439 dev_err(&device->pdev->dev,
1440 "Self-test copy failed compare, disabling\n");
1441 err = -ENODEV;
1442 goto free_resources;
1443 }
1444
1445free_resources:
1446 device->common.device_free_chan_resources(dma_chan);
1447out:
1448 kfree(src);
1449 kfree(dest);
1450 return err;
1451}
1452
1453static char ioat_interrupt_style[32] = "msix";
1454module_param_string(ioat_interrupt_style, ioat_interrupt_style,
1455 sizeof(ioat_interrupt_style), 0644);
1456MODULE_PARM_DESC(ioat_interrupt_style,
1457 "set ioat interrupt style: msix (default), "
1458 "msix-single-vector, msi, intx)");
1459
1460/**
1461 * ioat_dma_setup_interrupts - setup interrupt handler
1462 * @device: ioat device
1463 */
1464static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
1465{
1466 struct ioat_dma_chan *ioat_chan;
1467 int err, i, j, msixcnt;
1468 u8 intrctrl = 0;
1469
1470 if (!strcmp(ioat_interrupt_style, "msix"))
1471 goto msix;
1472 if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
1473 goto msix_single_vector;
1474 if (!strcmp(ioat_interrupt_style, "msi"))
1475 goto msi;
1476 if (!strcmp(ioat_interrupt_style, "intx"))
1477 goto intx;
1478 dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
1479 ioat_interrupt_style);
1480 goto err_no_irq;
1481
1482msix:
1483 /* The number of MSI-X vectors should equal the number of channels */
1484 msixcnt = device->common.chancnt;
1485 for (i = 0; i < msixcnt; i++)
1486 device->msix_entries[i].entry = i;
1487
1488 err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
1489 if (err < 0)
1490 goto msi;
1491 if (err > 0)
1492 goto msix_single_vector;
1493
1494 for (i = 0; i < msixcnt; i++) {
1495 ioat_chan = ioat_lookup_chan_by_index(device, i);
1496 err = request_irq(device->msix_entries[i].vector,
1497 ioat_dma_do_interrupt_msix,
1498 0, "ioat-msix", ioat_chan);
1499 if (err) {
1500 for (j = 0; j < i; j++) {
1501 ioat_chan =
1502 ioat_lookup_chan_by_index(device, j);
1503 free_irq(device->msix_entries[j].vector,
1504 ioat_chan);
1505 }
1506 goto msix_single_vector;
1507 }
1508 }
1509 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
1510 device->irq_mode = msix_multi_vector;
1511 goto done;
1512
1513msix_single_vector:
1514 device->msix_entries[0].entry = 0;
1515 err = pci_enable_msix(device->pdev, device->msix_entries, 1);
1516 if (err)
1517 goto msi;
1518
1519 err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
1520 0, "ioat-msix", device);
1521 if (err) {
1522 pci_disable_msix(device->pdev);
1523 goto msi;
1524 }
1525 device->irq_mode = msix_single_vector;
1526 goto done;
1527
1528msi:
1529 err = pci_enable_msi(device->pdev);
1530 if (err)
1531 goto intx;
1532
1533 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
1534 0, "ioat-msi", device);
1535 if (err) {
1536 pci_disable_msi(device->pdev);
1537 goto intx;
1538 }
1539 /*
1540 * CB 1.2 devices need a bit set in configuration space to enable MSI
1541 */
1542 if (device->version == IOAT_VER_1_2) {
1543 u32 dmactrl;
1544 pci_read_config_dword(device->pdev,
1545 IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1546 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1547 pci_write_config_dword(device->pdev,
1548 IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1549 }
1550 device->irq_mode = msi;
1551 goto done;
1552
1553intx:
1554 err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
1555 IRQF_SHARED, "ioat-intx", device);
1556 if (err)
1557 goto err_no_irq;
1558 device->irq_mode = intx;
1559
1560done:
1561 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
1562 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
1563 return 0;
1564
1565err_no_irq:
1566 /* Disable all interrupt generation */
1567 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1568 dev_err(&device->pdev->dev, "no usable interrupts\n");
1569 device->irq_mode = none;
1570 return -1;
1571}
1572
1573/**
1574 * ioat_dma_remove_interrupts - remove whatever interrupts were set
1575 * @device: ioat device
1576 */
1577static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
1578{
1579 struct ioat_dma_chan *ioat_chan;
1580 int i;
1581
1582 /* Disable all interrupt generation */
1583 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1584
1585 switch (device->irq_mode) {
1586 case msix_multi_vector:
1587 for (i = 0; i < device->common.chancnt; i++) {
1588 ioat_chan = ioat_lookup_chan_by_index(device, i);
1589 free_irq(device->msix_entries[i].vector, ioat_chan);
1590 }
1591 pci_disable_msix(device->pdev);
1592 break;
1593 case msix_single_vector:
1594 free_irq(device->msix_entries[0].vector, device);
1595 pci_disable_msix(device->pdev);
1596 break;
1597 case msi:
1598 free_irq(device->pdev->irq, device);
1599 pci_disable_msi(device->pdev);
1600 break;
1601 case intx:
1602 free_irq(device->pdev->irq, device);
1603 break;
1604 case none:
1605 dev_warn(&device->pdev->dev,
1606 "call to %s without interrupts setup\n", __func__);
1607 }
1608 device->irq_mode = none;
1609}
1610
1611struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
1612 void __iomem *iobase)
1613{
1614 int err;
1615 struct ioatdma_device *device;
1616
1617 device = kzalloc(sizeof(*device), GFP_KERNEL);
1618 if (!device) {
1619 err = -ENOMEM;
1620 goto err_kzalloc;
1621 }
1622 device->pdev = pdev;
1623 device->reg_base = iobase;
1624 device->version = readb(device->reg_base + IOAT_VER_OFFSET);
1625
1626 /* DMA coherent memory pool for DMA descriptor allocations */
1627 device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1628 sizeof(struct ioat_dma_descriptor),
1629 64, 0);
1630 if (!device->dma_pool) {
1631 err = -ENOMEM;
1632 goto err_dma_pool;
1633 }
1634
1635 device->completion_pool = pci_pool_create("completion_pool", pdev,
1636 sizeof(u64), SMP_CACHE_BYTES,
1637 SMP_CACHE_BYTES);
1638 if (!device->completion_pool) {
1639 err = -ENOMEM;
1640 goto err_completion_pool;
1641 }
1642
1643 INIT_LIST_HEAD(&device->common.channels);
1644 ioat_dma_enumerate_channels(device);
1645
1646 device->common.device_alloc_chan_resources =
1647 ioat_dma_alloc_chan_resources;
1648 device->common.device_free_chan_resources =
1649 ioat_dma_free_chan_resources;
1650 device->common.dev = &pdev->dev;
1651
1652 dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
1653 device->common.device_is_tx_complete = ioat_dma_is_complete;
1654 switch (device->version) {
1655 case IOAT_VER_1_2:
1656 device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1657 device->common.device_issue_pending =
1658 ioat1_dma_memcpy_issue_pending;
1659 break;
1660 case IOAT_VER_2_0:
1661 case IOAT_VER_3_0:
1662 device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
1663 device->common.device_issue_pending =
1664 ioat2_dma_memcpy_issue_pending;
1665 break;
1666 }
1667
1668 dev_err(&device->pdev->dev,
1669 "Intel(R) I/OAT DMA Engine found,"
1670 " %d channels, device version 0x%02x, driver version %s\n",
1671 device->common.chancnt, device->version, IOAT_DMA_VERSION);
1672
1673 if (!device->common.chancnt) {
1674 dev_err(&device->pdev->dev,
1675 "Intel(R) I/OAT DMA Engine problem found: "
1676 "zero channels detected\n");
1677 goto err_setup_interrupts;
1678 }
1679
1680 err = ioat_dma_setup_interrupts(device);
1681 if (err)
1682 goto err_setup_interrupts;
1683
1684 err = ioat_dma_self_test(device);
1685 if (err)
1686 goto err_self_test;
1687
1688 ioat_set_tcp_copy_break(device);
1689
1690 dma_async_device_register(&device->common);
1691
1692 if (device->version != IOAT_VER_3_0) {
1693 INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
1694 schedule_delayed_work(&device->work,
1695 WATCHDOG_DELAY);
1696 }
1697
1698 return device;
1699
1700err_self_test:
1701 ioat_dma_remove_interrupts(device);
1702err_setup_interrupts:
1703 pci_pool_destroy(device->completion_pool);
1704err_completion_pool:
1705 pci_pool_destroy(device->dma_pool);
1706err_dma_pool:
1707 kfree(device);
1708err_kzalloc:
1709 dev_err(&pdev->dev,
1710 "Intel(R) I/OAT DMA Engine initialization failed\n");
1711 return NULL;
1712}
1713
1714void ioat_dma_remove(struct ioatdma_device *device)
1715{
1716 struct dma_chan *chan, *_chan;
1717 struct ioat_dma_chan *ioat_chan;
1718
1719 if (device->version != IOAT_VER_3_0)
1720 cancel_delayed_work(&device->work);
1721
1722 ioat_dma_remove_interrupts(device);
1723
1724 dma_async_device_unregister(&device->common);
1725
1726 pci_pool_destroy(device->dma_pool);
1727 pci_pool_destroy(device->completion_pool);
1728
1729 iounmap(device->reg_base);
1730 pci_release_regions(device->pdev);
1731 pci_disable_device(device->pdev);
1732
1733 list_for_each_entry_safe(chan, _chan,
1734 &device->common.channels, device_node) {
1735 ioat_chan = to_ioat_chan(chan);
1736 list_del(&chan->device_node);
1737 kfree(ioat_chan);
1738 }
1739 kfree(device);
1740}
1741
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
deleted file mode 100644
index a52ff4bd4601..000000000000
--- a/drivers/dma/ioatdma.h
+++ /dev/null
@@ -1,165 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_H
22#define IOATDMA_H
23
24#include <linux/dmaengine.h>
25#include "ioatdma_hw.h"
26#include <linux/init.h>
27#include <linux/dmapool.h>
28#include <linux/cache.h>
29#include <linux/pci_ids.h>
30#include <net/tcp.h>
31
32#define IOAT_DMA_VERSION "3.64"
33
34enum ioat_interrupt {
35 none = 0,
36 msix_multi_vector = 1,
37 msix_single_vector = 2,
38 msi = 3,
39 intx = 4,
40};
41
42#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
43#define IOAT_DMA_DCA_ANY_CPU ~0
44#define IOAT_WATCHDOG_PERIOD (2 * HZ)
45
46
47/**
48 * struct ioatdma_device - internal representation of a IOAT device
49 * @pdev: PCI-Express device
50 * @reg_base: MMIO register space base address
51 * @dma_pool: for allocating DMA descriptors
52 * @common: embedded struct dma_device
53 * @version: version of ioatdma device
54 * @irq_mode: which style irq to use
55 * @msix_entries: irq handlers
56 * @idx: per channel data
57 */
58
59struct ioatdma_device {
60 struct pci_dev *pdev;
61 void __iomem *reg_base;
62 struct pci_pool *dma_pool;
63 struct pci_pool *completion_pool;
64 struct dma_device common;
65 u8 version;
66 enum ioat_interrupt irq_mode;
67 struct delayed_work work;
68 struct msix_entry msix_entries[4];
69 struct ioat_dma_chan *idx[4];
70};
71
72/**
73 * struct ioat_dma_chan - internal representation of a DMA channel
74 */
75struct ioat_dma_chan {
76
77 void __iomem *reg_base;
78
79 dma_cookie_t completed_cookie;
80 unsigned long last_completion;
81 unsigned long last_completion_time;
82
83 size_t xfercap; /* XFERCAP register value expanded out */
84
85 spinlock_t cleanup_lock;
86 spinlock_t desc_lock;
87 struct list_head free_desc;
88 struct list_head used_desc;
89 unsigned long watchdog_completion;
90 int watchdog_tcp_cookie;
91 u32 watchdog_last_tcp_cookie;
92 struct delayed_work work;
93
94 int pending;
95 int dmacount;
96 int desccount;
97
98 struct ioatdma_device *device;
99 struct dma_chan common;
100
101 dma_addr_t completion_addr;
102 union {
103 u64 full; /* HW completion writeback */
104 struct {
105 u32 low;
106 u32 high;
107 };
108 } *completion_virt;
109 unsigned long last_compl_desc_addr_hw;
110 struct tasklet_struct cleanup_task;
111};
112
113/* wrapper around hardware descriptor format + additional software fields */
114
115/**
116 * struct ioat_desc_sw - wrapper around hardware descriptor
117 * @hw: hardware DMA descriptor
118 * @node: this descriptor will either be on the free list,
119 * or attached to a transaction list (async_tx.tx_list)
120 * @tx_cnt: number of descriptors required to complete the transaction
121 * @async_tx: the generic software descriptor for all engines
122 */
123struct ioat_desc_sw {
124 struct ioat_dma_descriptor *hw;
125 struct list_head node;
126 int tx_cnt;
127 size_t len;
128 dma_addr_t src;
129 dma_addr_t dst;
130 struct dma_async_tx_descriptor async_tx;
131};
132
133static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
134{
135 #ifdef CONFIG_NET_DMA
136 switch (dev->version) {
137 case IOAT_VER_1_2:
138 sysctl_tcp_dma_copybreak = 4096;
139 break;
140 case IOAT_VER_2_0:
141 sysctl_tcp_dma_copybreak = 2048;
142 break;
143 case IOAT_VER_3_0:
144 sysctl_tcp_dma_copybreak = 262144;
145 break;
146 }
147 #endif
148}
149
150#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
151struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
152 void __iomem *iobase);
153void ioat_dma_remove(struct ioatdma_device *device);
154struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
155struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
156struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
157#else
158#define ioat_dma_probe(pdev, iobase) NULL
159#define ioat_dma_remove(device) do { } while (0)
160#define ioat_dca_init(pdev, iobase) NULL
161#define ioat2_dca_init(pdev, iobase) NULL
162#define ioat3_dca_init(pdev, iobase) NULL
163#endif
164
165#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
deleted file mode 100644
index afa57eef86c9..000000000000
--- a/drivers/dma/ioatdma_hw.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_HW_H_
22#define _IOAT_HW_H_
23
24/* PCI Configuration Space Values */
25#define IOAT_PCI_VID 0x8086
26
27/* CB device ID's */
28#define IOAT_PCI_DID_5000 0x1A38
29#define IOAT_PCI_DID_CNB 0x360B
30#define IOAT_PCI_DID_SCNB 0x65FF
31#define IOAT_PCI_DID_SNB 0x402F
32
33#define IOAT_PCI_RID 0x00
34#define IOAT_PCI_SVID 0x8086
35#define IOAT_PCI_SID 0x8086
36#define IOAT_VER_1_2 0x12 /* Version 1.2 */
37#define IOAT_VER_2_0 0x20 /* Version 2.0 */
38#define IOAT_VER_3_0 0x30 /* Version 3.0 */
39
40struct ioat_dma_descriptor {
41 uint32_t size;
42 uint32_t ctl;
43 uint64_t src_addr;
44 uint64_t dst_addr;
45 uint64_t next;
46 uint64_t rsv1;
47 uint64_t rsv2;
48 uint64_t user1;
49 uint64_t user2;
50};
51
52#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
53#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
54#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
55#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
56#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
57#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
58#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
59#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
60#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
61#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
62#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
63
64#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
65#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
66
67#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
68#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
69
70#endif
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 2f052265122f..645ca8d54ec4 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -31,6 +31,7 @@
31#include <linux/platform_device.h> 31#include <linux/platform_device.h>
32#include <linux/memory.h> 32#include <linux/memory.h>
33#include <linux/ioport.h> 33#include <linux/ioport.h>
34#include <linux/raid/pq.h>
34 35
35#include <mach/adma.h> 36#include <mach/adma.h>
36 37
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
57 } 58 }
58} 59}
59 60
61static void
62iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
63{
64 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 struct iop_adma_desc_slot *unmap = desc->group_head;
66 struct device *dev = &iop_chan->device->pdev->dev;
67 u32 len = unmap->unmap_len;
68 enum dma_ctrl_flags flags = tx->flags;
69 u32 src_cnt;
70 dma_addr_t addr;
71 dma_addr_t dest;
72
73 src_cnt = unmap->unmap_src_cnt;
74 dest = iop_desc_get_dest_addr(unmap, iop_chan);
75 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
76 enum dma_data_direction dir;
77
78 if (src_cnt > 1) /* is xor? */
79 dir = DMA_BIDIRECTIONAL;
80 else
81 dir = DMA_FROM_DEVICE;
82
83 dma_unmap_page(dev, dest, len, dir);
84 }
85
86 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
87 while (src_cnt--) {
88 addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
89 if (addr == dest)
90 continue;
91 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
92 }
93 }
94 desc->group_head = NULL;
95}
96
97static void
98iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
99{
100 struct dma_async_tx_descriptor *tx = &desc->async_tx;
101 struct iop_adma_desc_slot *unmap = desc->group_head;
102 struct device *dev = &iop_chan->device->pdev->dev;
103 u32 len = unmap->unmap_len;
104 enum dma_ctrl_flags flags = tx->flags;
105 u32 src_cnt = unmap->unmap_src_cnt;
106 dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
107 dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
108 int i;
109
110 if (tx->flags & DMA_PREP_CONTINUE)
111 src_cnt -= 3;
112
113 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
114 dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
115 dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
116 }
117
118 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
119 dma_addr_t addr;
120
121 for (i = 0; i < src_cnt; i++) {
122 addr = iop_desc_get_src_addr(unmap, iop_chan, i);
123 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
124 }
125 if (desc->pq_check_result) {
126 dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
127 dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
128 }
129 }
130
131 desc->group_head = NULL;
132}
133
134
60static dma_cookie_t 135static dma_cookie_t
61iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, 136iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
62 struct iop_adma_chan *iop_chan, dma_cookie_t cookie) 137 struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
63{ 138{
64 BUG_ON(desc->async_tx.cookie < 0); 139 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 if (desc->async_tx.cookie > 0) { 140
66 cookie = desc->async_tx.cookie; 141 BUG_ON(tx->cookie < 0);
67 desc->async_tx.cookie = 0; 142 if (tx->cookie > 0) {
143 cookie = tx->cookie;
144 tx->cookie = 0;
68 145
69 /* call the callback (must not sleep or submit new 146 /* call the callback (must not sleep or submit new
70 * operations to this channel) 147 * operations to this channel)
71 */ 148 */
72 if (desc->async_tx.callback) 149 if (tx->callback)
73 desc->async_tx.callback( 150 tx->callback(tx->callback_param);
74 desc->async_tx.callback_param);
75 151
76 /* unmap dma addresses 152 /* unmap dma addresses
77 * (unmap_single vs unmap_page?) 153 * (unmap_single vs unmap_page?)
78 */ 154 */
79 if (desc->group_head && desc->unmap_len) { 155 if (desc->group_head && desc->unmap_len) {
80 struct iop_adma_desc_slot *unmap = desc->group_head; 156 if (iop_desc_is_pq(desc))
81 struct device *dev = 157 iop_desc_unmap_pq(iop_chan, desc);
82 &iop_chan->device->pdev->dev; 158 else
83 u32 len = unmap->unmap_len; 159 iop_desc_unmap(iop_chan, desc);
84 enum dma_ctrl_flags flags = desc->async_tx.flags;
85 u32 src_cnt;
86 dma_addr_t addr;
87 dma_addr_t dest;
88
89 src_cnt = unmap->unmap_src_cnt;
90 dest = iop_desc_get_dest_addr(unmap, iop_chan);
91 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
92 enum dma_data_direction dir;
93
94 if (src_cnt > 1) /* is xor? */
95 dir = DMA_BIDIRECTIONAL;
96 else
97 dir = DMA_FROM_DEVICE;
98
99 dma_unmap_page(dev, dest, len, dir);
100 }
101
102 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
103 while (src_cnt--) {
104 addr = iop_desc_get_src_addr(unmap,
105 iop_chan,
106 src_cnt);
107 if (addr == dest)
108 continue;
109 dma_unmap_page(dev, addr, len,
110 DMA_TO_DEVICE);
111 }
112 }
113 desc->group_head = NULL;
114 } 160 }
115 } 161 }
116 162
117 /* run dependent operations */ 163 /* run dependent operations */
118 dma_run_dependencies(&desc->async_tx); 164 dma_run_dependencies(tx);
119 165
120 return cookie; 166 return cookie;
121} 167}
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
287{ 333{
288 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; 334 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
289 335
290 spin_lock(&iop_chan->lock); 336 /* lockdep will flag depedency submissions as potentially
337 * recursive locking, this is not the case as a dependency
338 * submission will never recurse a channels submit routine.
339 * There are checks in async_tx.c to prevent this.
340 */
341 spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
291 __iop_adma_slot_cleanup(iop_chan); 342 __iop_adma_slot_cleanup(iop_chan);
292 spin_unlock(&iop_chan->lock); 343 spin_unlock(&iop_chan->lock);
293} 344}
@@ -370,7 +421,7 @@ retry:
370 } 421 }
371 alloc_tail->group_head = alloc_start; 422 alloc_tail->group_head = alloc_start;
372 alloc_tail->async_tx.cookie = -EBUSY; 423 alloc_tail->async_tx.cookie = -EBUSY;
373 list_splice(&chain, &alloc_tail->async_tx.tx_list); 424 list_splice(&chain, &alloc_tail->tx_list);
374 iop_chan->last_used = last_used; 425 iop_chan->last_used = last_used;
375 iop_desc_clear_next_desc(alloc_start); 426 iop_desc_clear_next_desc(alloc_start);
376 iop_desc_clear_next_desc(alloc_tail); 427 iop_desc_clear_next_desc(alloc_tail);
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
429 480
430 old_chain_tail = list_entry(iop_chan->chain.prev, 481 old_chain_tail = list_entry(iop_chan->chain.prev,
431 struct iop_adma_desc_slot, chain_node); 482 struct iop_adma_desc_slot, chain_node);
432 list_splice_init(&sw_desc->async_tx.tx_list, 483 list_splice_init(&sw_desc->tx_list,
433 &old_chain_tail->chain_node); 484 &old_chain_tail->chain_node);
434 485
435 /* fix up the hardware chain */ 486 /* fix up the hardware chain */
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
496 547
497 dma_async_tx_descriptor_init(&slot->async_tx, chan); 548 dma_async_tx_descriptor_init(&slot->async_tx, chan);
498 slot->async_tx.tx_submit = iop_adma_tx_submit; 549 slot->async_tx.tx_submit = iop_adma_tx_submit;
550 INIT_LIST_HEAD(&slot->tx_list);
499 INIT_LIST_HEAD(&slot->chain_node); 551 INIT_LIST_HEAD(&slot->chain_node);
500 INIT_LIST_HEAD(&slot->slot_node); 552 INIT_LIST_HEAD(&slot->slot_node);
501 hw_desc = (char *) iop_chan->device->dma_desc_pool; 553 hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
660} 712}
661 713
662static struct dma_async_tx_descriptor * 714static struct dma_async_tx_descriptor *
663iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, 715iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
664 unsigned int src_cnt, size_t len, u32 *result, 716 unsigned int src_cnt, size_t len, u32 *result,
665 unsigned long flags) 717 unsigned long flags)
666{ 718{
667 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 719 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
668 struct iop_adma_desc_slot *sw_desc, *grp_start; 720 struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
696 return sw_desc ? &sw_desc->async_tx : NULL; 748 return sw_desc ? &sw_desc->async_tx : NULL;
697} 749}
698 750
751static struct dma_async_tx_descriptor *
752iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
753 unsigned int src_cnt, const unsigned char *scf, size_t len,
754 unsigned long flags)
755{
756 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
757 struct iop_adma_desc_slot *sw_desc, *g;
758 int slot_cnt, slots_per_op;
759 int continue_srcs;
760
761 if (unlikely(!len))
762 return NULL;
763 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
764
765 dev_dbg(iop_chan->device->common.dev,
766 "%s src_cnt: %d len: %u flags: %lx\n",
767 __func__, src_cnt, len, flags);
768
769 if (dmaf_p_disabled_continue(flags))
770 continue_srcs = 1+src_cnt;
771 else if (dmaf_continue(flags))
772 continue_srcs = 3+src_cnt;
773 else
774 continue_srcs = 0+src_cnt;
775
776 spin_lock_bh(&iop_chan->lock);
777 slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
778 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
779 if (sw_desc) {
780 int i;
781
782 g = sw_desc->group_head;
783 iop_desc_set_byte_count(g, iop_chan, len);
784
785 /* even if P is disabled its destination address (bits
786 * [3:0]) must match Q. It is ok if P points to an
787 * invalid address, it won't be written.
788 */
789 if (flags & DMA_PREP_PQ_DISABLE_P)
790 dst[0] = dst[1] & 0x7;
791
792 iop_desc_set_pq_addr(g, dst);
793 sw_desc->unmap_src_cnt = src_cnt;
794 sw_desc->unmap_len = len;
795 sw_desc->async_tx.flags = flags;
796 for (i = 0; i < src_cnt; i++)
797 iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
798
799 /* if we are continuing a previous operation factor in
800 * the old p and q values, see the comment for dma_maxpq
801 * in include/linux/dmaengine.h
802 */
803 if (dmaf_p_disabled_continue(flags))
804 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
805 else if (dmaf_continue(flags)) {
806 iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
807 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
808 iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
809 }
810 iop_desc_init_pq(g, i, flags);
811 }
812 spin_unlock_bh(&iop_chan->lock);
813
814 return sw_desc ? &sw_desc->async_tx : NULL;
815}
816
817static struct dma_async_tx_descriptor *
818iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
819 unsigned int src_cnt, const unsigned char *scf,
820 size_t len, enum sum_check_flags *pqres,
821 unsigned long flags)
822{
823 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
824 struct iop_adma_desc_slot *sw_desc, *g;
825 int slot_cnt, slots_per_op;
826
827 if (unlikely(!len))
828 return NULL;
829 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
830
831 dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
832 __func__, src_cnt, len);
833
834 spin_lock_bh(&iop_chan->lock);
835 slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
836 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
837 if (sw_desc) {
838 /* for validate operations p and q are tagged onto the
839 * end of the source list
840 */
841 int pq_idx = src_cnt;
842
843 g = sw_desc->group_head;
844 iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
845 iop_desc_set_pq_zero_sum_byte_count(g, len);
846 g->pq_check_result = pqres;
847 pr_debug("\t%s: g->pq_check_result: %p\n",
848 __func__, g->pq_check_result);
849 sw_desc->unmap_src_cnt = src_cnt+2;
850 sw_desc->unmap_len = len;
851 sw_desc->async_tx.flags = flags;
852 while (src_cnt--)
853 iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
854 src[src_cnt],
855 scf[src_cnt]);
856 iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
857 }
858 spin_unlock_bh(&iop_chan->lock);
859
860 return sw_desc ? &sw_desc->async_tx : NULL;
861}
862
699static void iop_adma_free_chan_resources(struct dma_chan *chan) 863static void iop_adma_free_chan_resources(struct dma_chan *chan)
700{ 864{
701 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 865 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -906,7 +1070,7 @@ out:
906 1070
907#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ 1071#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
908static int __devinit 1072static int __devinit
909iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) 1073iop_adma_xor_val_self_test(struct iop_adma_device *device)
910{ 1074{
911 int i, src_idx; 1075 int i, src_idx;
912 struct page *dest; 1076 struct page *dest;
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1002 PAGE_SIZE, DMA_TO_DEVICE); 1166 PAGE_SIZE, DMA_TO_DEVICE);
1003 1167
1004 /* skip zero sum if the capability is not present */ 1168 /* skip zero sum if the capability is not present */
1005 if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) 1169 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1006 goto free_resources; 1170 goto free_resources;
1007 1171
1008 /* zero sum the sources with the destintation page */ 1172 /* zero sum the sources with the destintation page */
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1016 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1180 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1017 zero_sum_srcs[i], 0, PAGE_SIZE, 1181 zero_sum_srcs[i], 0, PAGE_SIZE,
1018 DMA_TO_DEVICE); 1182 DMA_TO_DEVICE);
1019 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1183 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1020 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1184 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1021 &zero_sum_result, 1185 &zero_sum_result,
1022 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1186 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1023 1187
1024 cookie = iop_adma_tx_submit(tx); 1188 cookie = iop_adma_tx_submit(tx);
1025 iop_adma_issue_pending(dma_chan); 1189 iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1072 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1236 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1073 zero_sum_srcs[i], 0, PAGE_SIZE, 1237 zero_sum_srcs[i], 0, PAGE_SIZE,
1074 DMA_TO_DEVICE); 1238 DMA_TO_DEVICE);
1075 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1239 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1076 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1240 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1077 &zero_sum_result, 1241 &zero_sum_result,
1078 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1242 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1079 1243
1080 cookie = iop_adma_tx_submit(tx); 1244 cookie = iop_adma_tx_submit(tx);
1081 iop_adma_issue_pending(dma_chan); 1245 iop_adma_issue_pending(dma_chan);
@@ -1105,6 +1269,170 @@ out:
1105 return err; 1269 return err;
1106} 1270}
1107 1271
1272#ifdef CONFIG_MD_RAID6_PQ
1273static int __devinit
1274iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
1275{
1276 /* combined sources, software pq results, and extra hw pq results */
1277 struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
1278 /* ptr to the extra hw pq buffers defined above */
1279 struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
1280 /* address conversion buffers (dma_map / page_address) */
1281 void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
1282 dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
1283 dma_addr_t pq_dest[2];
1284
1285 int i;
1286 struct dma_async_tx_descriptor *tx;
1287 struct dma_chan *dma_chan;
1288 dma_cookie_t cookie;
1289 u32 zero_sum_result;
1290 int err = 0;
1291 struct device *dev;
1292
1293 dev_dbg(device->common.dev, "%s\n", __func__);
1294
1295 for (i = 0; i < ARRAY_SIZE(pq); i++) {
1296 pq[i] = alloc_page(GFP_KERNEL);
1297 if (!pq[i]) {
1298 while (i--)
1299 __free_page(pq[i]);
1300 return -ENOMEM;
1301 }
1302 }
1303
1304 /* Fill in src buffers */
1305 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
1306 pq_sw[i] = page_address(pq[i]);
1307 memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
1308 }
1309 pq_sw[i] = page_address(pq[i]);
1310 pq_sw[i+1] = page_address(pq[i+1]);
1311
1312 dma_chan = container_of(device->common.channels.next,
1313 struct dma_chan,
1314 device_node);
1315 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
1316 err = -ENODEV;
1317 goto out;
1318 }
1319
1320 dev = dma_chan->device->dev;
1321
1322 /* initialize the dests */
1323 memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
1324 memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
1325
1326 /* test pq */
1327 pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1328 pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1329 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
1330 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1331 DMA_TO_DEVICE);
1332
1333 tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
1334 IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
1335 PAGE_SIZE,
1336 DMA_PREP_INTERRUPT |
1337 DMA_CTRL_ACK);
1338
1339 cookie = iop_adma_tx_submit(tx);
1340 iop_adma_issue_pending(dma_chan);
1341 msleep(8);
1342
1343 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1344 DMA_SUCCESS) {
1345 dev_err(dev, "Self-test pq timed out, disabling\n");
1346 err = -ENODEV;
1347 goto free_resources;
1348 }
1349
1350 raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
1351
1352 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
1353 page_address(pq_hw[0]), PAGE_SIZE) != 0) {
1354 dev_err(dev, "Self-test p failed compare, disabling\n");
1355 err = -ENODEV;
1356 goto free_resources;
1357 }
1358 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
1359 page_address(pq_hw[1]), PAGE_SIZE) != 0) {
1360 dev_err(dev, "Self-test q failed compare, disabling\n");
1361 err = -ENODEV;
1362 goto free_resources;
1363 }
1364
1365 /* test correct zero sum using the software generated pq values */
1366 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1367 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1368 DMA_TO_DEVICE);
1369
1370 zero_sum_result = ~0;
1371 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1372 pq_src, IOP_ADMA_NUM_SRC_TEST,
1373 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1374 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1375
1376 cookie = iop_adma_tx_submit(tx);
1377 iop_adma_issue_pending(dma_chan);
1378 msleep(8);
1379
1380 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1381 DMA_SUCCESS) {
1382 dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
1383 err = -ENODEV;
1384 goto free_resources;
1385 }
1386
1387 if (zero_sum_result != 0) {
1388 dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
1389 zero_sum_result);
1390 err = -ENODEV;
1391 goto free_resources;
1392 }
1393
1394 /* test incorrect zero sum */
1395 i = IOP_ADMA_NUM_SRC_TEST;
1396 memset(pq_sw[i] + 100, 0, 100);
1397 memset(pq_sw[i+1] + 200, 0, 200);
1398 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1399 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1400 DMA_TO_DEVICE);
1401
1402 zero_sum_result = 0;
1403 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1404 pq_src, IOP_ADMA_NUM_SRC_TEST,
1405 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1406 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1407
1408 cookie = iop_adma_tx_submit(tx);
1409 iop_adma_issue_pending(dma_chan);
1410 msleep(8);
1411
1412 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1413 DMA_SUCCESS) {
1414 dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
1415 err = -ENODEV;
1416 goto free_resources;
1417 }
1418
1419 if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
1420 dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
1421 zero_sum_result);
1422 err = -ENODEV;
1423 goto free_resources;
1424 }
1425
1426free_resources:
1427 iop_adma_free_chan_resources(dma_chan);
1428out:
1429 i = ARRAY_SIZE(pq);
1430 while (i--)
1431 __free_page(pq[i]);
1432 return err;
1433}
1434#endif
1435
1108static int __devexit iop_adma_remove(struct platform_device *dev) 1436static int __devexit iop_adma_remove(struct platform_device *dev)
1109{ 1437{
1110 struct iop_adma_device *device = platform_get_drvdata(dev); 1438 struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1192 dma_dev->max_xor = iop_adma_get_max_xor(); 1520 dma_dev->max_xor = iop_adma_get_max_xor();
1193 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; 1521 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
1194 } 1522 }
1195 if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) 1523 if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
1196 dma_dev->device_prep_dma_zero_sum = 1524 dma_dev->device_prep_dma_xor_val =
1197 iop_adma_prep_dma_zero_sum; 1525 iop_adma_prep_dma_xor_val;
1526 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
1527 dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
1528 dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
1529 }
1530 if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
1531 dma_dev->device_prep_dma_pq_val =
1532 iop_adma_prep_dma_pq_val;
1198 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) 1533 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1199 dma_dev->device_prep_dma_interrupt = 1534 dma_dev->device_prep_dma_interrupt =
1200 iop_adma_prep_dma_interrupt; 1535 iop_adma_prep_dma_interrupt;
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1248 } 1583 }
1249 1584
1250 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || 1585 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
1251 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { 1586 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
1252 ret = iop_adma_xor_zero_sum_self_test(adev); 1587 ret = iop_adma_xor_val_self_test(adev);
1253 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); 1588 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1254 if (ret) 1589 if (ret)
1255 goto err_free_iop_chan; 1590 goto err_free_iop_chan;
1256 } 1591 }
1257 1592
1593 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
1594 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
1595 #ifdef CONFIG_MD_RAID6_PQ
1596 ret = iop_adma_pq_zero_sum_self_test(adev);
1597 dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
1598 #else
1599 /* can not test raid6, so do not publish capability */
1600 dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
1601 dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
1602 ret = 0;
1603 #endif
1604 if (ret)
1605 goto err_free_iop_chan;
1606 }
1607
1258 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " 1608 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
1259 "( %s%s%s%s%s%s%s%s%s%s)\n", 1609 "( %s%s%s%s%s%s%s)\n",
1260 dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", 1610 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
1261 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", 1611 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
1262 dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
1263 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", 1612 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1264 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", 1613 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
1265 dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
1266 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", 1614 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
1267 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
1268 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", 1615 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1269 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); 1616 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1270 1617
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
1296 if (sw_desc) { 1643 if (sw_desc) {
1297 grp_start = sw_desc->group_head; 1644 grp_start = sw_desc->group_head;
1298 1645
1299 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1646 list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
1300 async_tx_ack(&sw_desc->async_tx); 1647 async_tx_ack(&sw_desc->async_tx);
1301 iop_desc_init_memcpy(grp_start, 0); 1648 iop_desc_init_memcpy(grp_start, 0);
1302 iop_desc_set_byte_count(grp_start, iop_chan, 0); 1649 iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
1352 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 1699 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
1353 if (sw_desc) { 1700 if (sw_desc) {
1354 grp_start = sw_desc->group_head; 1701 grp_start = sw_desc->group_head;
1355 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1702 list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
1356 async_tx_ack(&sw_desc->async_tx); 1703 async_tx_ack(&sw_desc->async_tx);
1357 iop_desc_init_null_xor(grp_start, 2, 0); 1704 iop_desc_init_null_xor(grp_start, 2, 0);
1358 iop_desc_set_byte_count(grp_start, iop_chan, 0); 1705 iop_desc_set_byte_count(grp_start, iop_chan, 0);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
index 9f6fe46a9b87..c0a272c73682 100644
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
183 iov_byte_offset, 183 iov_byte_offset,
184 kdata, 184 kdata,
185 copy); 185 copy);
186 /* poll for a descriptor slot */
187 if (unlikely(dma_cookie < 0)) {
188 dma_async_issue_pending(chan);
189 continue;
190 }
186 191
187 len -= copy; 192 len -= copy;
188 iov[iovec_idx].iov_len -= copy; 193 iov[iovec_idx].iov_len -= copy;
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
248 page, 253 page,
249 offset, 254 offset,
250 copy); 255 copy);
256 /* poll for a descriptor slot */
257 if (unlikely(dma_cookie < 0)) {
258 dma_async_issue_pending(chan);
259 continue;
260 }
251 261
252 len -= copy; 262 len -= copy;
253 iov[iovec_idx].iov_len -= copy; 263 iov[iovec_idx].iov_len -= copy;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 3f23eabe09f2..466ab10c1ff1 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -517,7 +517,7 @@ retry:
517 } 517 }
518 alloc_tail->group_head = alloc_start; 518 alloc_tail->group_head = alloc_start;
519 alloc_tail->async_tx.cookie = -EBUSY; 519 alloc_tail->async_tx.cookie = -EBUSY;
520 list_splice(&chain, &alloc_tail->async_tx.tx_list); 520 list_splice(&chain, &alloc_tail->tx_list);
521 mv_chan->last_used = last_used; 521 mv_chan->last_used = last_used;
522 mv_desc_clear_next_desc(alloc_start); 522 mv_desc_clear_next_desc(alloc_start);
523 mv_desc_clear_next_desc(alloc_tail); 523 mv_desc_clear_next_desc(alloc_tail);
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
565 cookie = mv_desc_assign_cookie(mv_chan, sw_desc); 565 cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
566 566
567 if (list_empty(&mv_chan->chain)) 567 if (list_empty(&mv_chan->chain))
568 list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); 568 list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
569 else { 569 else {
570 new_hw_chain = 0; 570 new_hw_chain = 0;
571 571
572 old_chain_tail = list_entry(mv_chan->chain.prev, 572 old_chain_tail = list_entry(mv_chan->chain.prev,
573 struct mv_xor_desc_slot, 573 struct mv_xor_desc_slot,
574 chain_node); 574 chain_node);
575 list_splice_init(&grp_start->async_tx.tx_list, 575 list_splice_init(&grp_start->tx_list,
576 &old_chain_tail->chain_node); 576 &old_chain_tail->chain_node);
577 577
578 if (!mv_can_chain(grp_start)) 578 if (!mv_can_chain(grp_start))
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
632 slot->async_tx.tx_submit = mv_xor_tx_submit; 632 slot->async_tx.tx_submit = mv_xor_tx_submit;
633 INIT_LIST_HEAD(&slot->chain_node); 633 INIT_LIST_HEAD(&slot->chain_node);
634 INIT_LIST_HEAD(&slot->slot_node); 634 INIT_LIST_HEAD(&slot->slot_node);
635 INIT_LIST_HEAD(&slot->tx_list);
635 hw_desc = (char *) mv_chan->device->dma_desc_pool; 636 hw_desc = (char *) mv_chan->device->dma_desc_pool;
636 slot->async_tx.phys = 637 slot->async_tx.phys =
637 (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; 638 (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 06cafe1ef521..977b592e976b 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -126,9 +126,8 @@ struct mv_xor_chan {
126 * @idx: pool index 126 * @idx: pool index
127 * @unmap_src_cnt: number of xor sources 127 * @unmap_src_cnt: number of xor sources
128 * @unmap_len: transaction bytecount 128 * @unmap_len: transaction bytecount
129 * @tx_list: list of slots that make up a multi-descriptor transaction
129 * @async_tx: support for the async_tx api 130 * @async_tx: support for the async_tx api
130 * @group_list: list of slots that make up a multi-descriptor transaction
131 * for example transfer lengths larger than the supported hw max
132 * @xor_check_result: result of zero sum 131 * @xor_check_result: result of zero sum
133 * @crc32_result: result crc calculation 132 * @crc32_result: result crc calculation
134 */ 133 */
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
145 u16 unmap_src_cnt; 144 u16 unmap_src_cnt;
146 u32 value; 145 u32 value;
147 size_t unmap_len; 146 size_t unmap_len;
147 struct list_head tx_list;
148 struct dma_async_tx_descriptor async_tx; 148 struct dma_async_tx_descriptor async_tx;
149 union { 149 union {
150 u32 *xor_check_result; 150 u32 *xor_check_result;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644
index 000000000000..b3b065c4e5c1
--- /dev/null
+++ b/drivers/dma/shdma.c
@@ -0,0 +1,786 @@
1/*
2 * Renesas SuperH DMA Engine support
3 *
4 * base is drivers/dma/flsdma.c
5 *
6 * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
7 * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
8 * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
9 *
10 * This is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * - DMA of SuperH does not have Hardware DMA chain mode.
16 * - MAX DMA size is 16MB.
17 *
18 */
19
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/interrupt.h>
23#include <linux/dmaengine.h>
24#include <linux/delay.h>
25#include <linux/dma-mapping.h>
26#include <linux/dmapool.h>
27#include <linux/platform_device.h>
28#include <cpu/dma.h>
29#include <asm/dma-sh.h>
30#include "shdma.h"
31
32/* DMA descriptor control */
33#define DESC_LAST (-1)
34#define DESC_COMP (1)
35#define DESC_NCOMP (0)
36
37#define NR_DESCS_PER_CHANNEL 32
38/*
39 * Define the default configuration for dual address memory-memory transfer.
40 * The 0x400 value represents auto-request, external->external.
41 *
42 * And this driver set 4byte burst mode.
43 * If you want to change mode, you need to change RS_DEFAULT of value.
44 * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
45 */
46#define RS_DEFAULT (RS_DUAL)
47
48#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
49static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
50{
51 ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
52}
53
54static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
55{
56 return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
57}
58
59static void dmae_init(struct sh_dmae_chan *sh_chan)
60{
61 u32 chcr = RS_DEFAULT; /* default is DUAL mode */
62 sh_dmae_writel(sh_chan, chcr, CHCR);
63}
64
65/*
66 * Reset DMA controller
67 *
68 * SH7780 has two DMAOR register
69 */
70static void sh_dmae_ctl_stop(int id)
71{
72 unsigned short dmaor = dmaor_read_reg(id);
73
74 dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
75 dmaor_write_reg(id, dmaor);
76}
77
78static int sh_dmae_rst(int id)
79{
80 unsigned short dmaor;
81
82 sh_dmae_ctl_stop(id);
83 dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
84
85 dmaor_write_reg(id, dmaor);
86 if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
87 pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
88 return -EINVAL;
89 }
90 return 0;
91}
92
93static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
94{
95 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
96 if (chcr & CHCR_DE) {
97 if (!(chcr & CHCR_TE))
98 return -EBUSY; /* working */
99 }
100 return 0; /* waiting */
101}
102
103static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
104{
105 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
106 return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
107}
108
109static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
110{
111 sh_dmae_writel(sh_chan, hw.sar, SAR);
112 sh_dmae_writel(sh_chan, hw.dar, DAR);
113 sh_dmae_writel(sh_chan,
114 (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
115}
116
117static void dmae_start(struct sh_dmae_chan *sh_chan)
118{
119 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
120
121 chcr |= (CHCR_DE|CHCR_IE);
122 sh_dmae_writel(sh_chan, chcr, CHCR);
123}
124
125static void dmae_halt(struct sh_dmae_chan *sh_chan)
126{
127 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
128
129 chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
130 sh_dmae_writel(sh_chan, chcr, CHCR);
131}
132
133static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
134{
135 int ret = dmae_is_idle(sh_chan);
136 /* When DMA was working, can not set data to CHCR */
137 if (ret)
138 return ret;
139
140 sh_dmae_writel(sh_chan, val, CHCR);
141 return 0;
142}
143
144#define DMARS1_ADDR 0x04
145#define DMARS2_ADDR 0x08
146#define DMARS_SHIFT 8
147#define DMARS_CHAN_MSK 0x01
148static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
149{
150 u32 addr;
151 int shift = 0;
152 int ret = dmae_is_idle(sh_chan);
153 if (ret)
154 return ret;
155
156 if (sh_chan->id & DMARS_CHAN_MSK)
157 shift = DMARS_SHIFT;
158
159 switch (sh_chan->id) {
160 /* DMARS0 */
161 case 0:
162 case 1:
163 addr = SH_DMARS_BASE;
164 break;
165 /* DMARS1 */
166 case 2:
167 case 3:
168 addr = (SH_DMARS_BASE + DMARS1_ADDR);
169 break;
170 /* DMARS2 */
171 case 4:
172 case 5:
173 addr = (SH_DMARS_BASE + DMARS2_ADDR);
174 break;
175 default:
176 return -EINVAL;
177 }
178
179 ctrl_outw((val << shift) |
180 (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
181 addr);
182
183 return 0;
184}
185
186static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
187{
188 struct sh_desc *desc = tx_to_sh_desc(tx);
189 struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
190 dma_cookie_t cookie;
191
192 spin_lock_bh(&sh_chan->desc_lock);
193
194 cookie = sh_chan->common.cookie;
195 cookie++;
196 if (cookie < 0)
197 cookie = 1;
198
199 /* If desc only in the case of 1 */
200 if (desc->async_tx.cookie != -EBUSY)
201 desc->async_tx.cookie = cookie;
202 sh_chan->common.cookie = desc->async_tx.cookie;
203
204 list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
205
206 spin_unlock_bh(&sh_chan->desc_lock);
207
208 return cookie;
209}
210
211static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
212{
213 struct sh_desc *desc, *_desc, *ret = NULL;
214
215 spin_lock_bh(&sh_chan->desc_lock);
216 list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
217 if (async_tx_test_ack(&desc->async_tx)) {
218 list_del(&desc->node);
219 ret = desc;
220 break;
221 }
222 }
223 spin_unlock_bh(&sh_chan->desc_lock);
224
225 return ret;
226}
227
228static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
229{
230 if (desc) {
231 spin_lock_bh(&sh_chan->desc_lock);
232
233 list_splice_init(&desc->tx_list, &sh_chan->ld_free);
234 list_add(&desc->node, &sh_chan->ld_free);
235
236 spin_unlock_bh(&sh_chan->desc_lock);
237 }
238}
239
240static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
241{
242 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
243 struct sh_desc *desc;
244
245 spin_lock_bh(&sh_chan->desc_lock);
246 while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
247 spin_unlock_bh(&sh_chan->desc_lock);
248 desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
249 if (!desc) {
250 spin_lock_bh(&sh_chan->desc_lock);
251 break;
252 }
253 dma_async_tx_descriptor_init(&desc->async_tx,
254 &sh_chan->common);
255 desc->async_tx.tx_submit = sh_dmae_tx_submit;
256 desc->async_tx.flags = DMA_CTRL_ACK;
257 INIT_LIST_HEAD(&desc->tx_list);
258 sh_dmae_put_desc(sh_chan, desc);
259
260 spin_lock_bh(&sh_chan->desc_lock);
261 sh_chan->descs_allocated++;
262 }
263 spin_unlock_bh(&sh_chan->desc_lock);
264
265 return sh_chan->descs_allocated;
266}
267
268/*
269 * sh_dma_free_chan_resources - Free all resources of the channel.
270 */
271static void sh_dmae_free_chan_resources(struct dma_chan *chan)
272{
273 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
274 struct sh_desc *desc, *_desc;
275 LIST_HEAD(list);
276
277 BUG_ON(!list_empty(&sh_chan->ld_queue));
278 spin_lock_bh(&sh_chan->desc_lock);
279
280 list_splice_init(&sh_chan->ld_free, &list);
281 sh_chan->descs_allocated = 0;
282
283 spin_unlock_bh(&sh_chan->desc_lock);
284
285 list_for_each_entry_safe(desc, _desc, &list, node)
286 kfree(desc);
287}
288
289static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
290 struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
291 size_t len, unsigned long flags)
292{
293 struct sh_dmae_chan *sh_chan;
294 struct sh_desc *first = NULL, *prev = NULL, *new;
295 size_t copy_size;
296
297 if (!chan)
298 return NULL;
299
300 if (!len)
301 return NULL;
302
303 sh_chan = to_sh_chan(chan);
304
305 do {
306 /* Allocate the link descriptor from DMA pool */
307 new = sh_dmae_get_desc(sh_chan);
308 if (!new) {
309 dev_err(sh_chan->dev,
310 "No free memory for link descriptor\n");
311 goto err_get_desc;
312 }
313
314 copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
315
316 new->hw.sar = dma_src;
317 new->hw.dar = dma_dest;
318 new->hw.tcr = copy_size;
319 if (!first)
320 first = new;
321
322 new->mark = DESC_NCOMP;
323 async_tx_ack(&new->async_tx);
324
325 prev = new;
326 len -= copy_size;
327 dma_src += copy_size;
328 dma_dest += copy_size;
329 /* Insert the link descriptor to the LD ring */
330 list_add_tail(&new->node, &first->tx_list);
331 } while (len);
332
333 new->async_tx.flags = flags; /* client is in control of this ack */
334 new->async_tx.cookie = -EBUSY; /* Last desc */
335
336 return &first->async_tx;
337
338err_get_desc:
339 sh_dmae_put_desc(sh_chan, first);
340 return NULL;
341
342}
343
344/*
345 * sh_chan_ld_cleanup - Clean up link descriptors
346 *
347 * This function clean up the ld_queue of DMA channel.
348 */
349static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
350{
351 struct sh_desc *desc, *_desc;
352
353 spin_lock_bh(&sh_chan->desc_lock);
354 list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
355 dma_async_tx_callback callback;
356 void *callback_param;
357
358 /* non send data */
359 if (desc->mark == DESC_NCOMP)
360 break;
361
362 /* send data sesc */
363 callback = desc->async_tx.callback;
364 callback_param = desc->async_tx.callback_param;
365
366 /* Remove from ld_queue list */
367 list_splice_init(&desc->tx_list, &sh_chan->ld_free);
368
369 dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
370 desc);
371
372 list_move(&desc->node, &sh_chan->ld_free);
373 /* Run the link descriptor callback function */
374 if (callback) {
375 spin_unlock_bh(&sh_chan->desc_lock);
376 dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
377 desc);
378 callback(callback_param);
379 spin_lock_bh(&sh_chan->desc_lock);
380 }
381 }
382 spin_unlock_bh(&sh_chan->desc_lock);
383}
384
385static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
386{
387 struct list_head *ld_node;
388 struct sh_dmae_regs hw;
389
390 /* DMA work check */
391 if (dmae_is_idle(sh_chan))
392 return;
393
394 /* Find the first un-transfer desciptor */
395 for (ld_node = sh_chan->ld_queue.next;
396 (ld_node != &sh_chan->ld_queue)
397 && (to_sh_desc(ld_node)->mark == DESC_COMP);
398 ld_node = ld_node->next)
399 cpu_relax();
400
401 if (ld_node != &sh_chan->ld_queue) {
402 /* Get the ld start address from ld_queue */
403 hw = to_sh_desc(ld_node)->hw;
404 dmae_set_reg(sh_chan, hw);
405 dmae_start(sh_chan);
406 }
407}
408
409static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
410{
411 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
412 sh_chan_xfer_ld_queue(sh_chan);
413}
414
415static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
416 dma_cookie_t cookie,
417 dma_cookie_t *done,
418 dma_cookie_t *used)
419{
420 struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
421 dma_cookie_t last_used;
422 dma_cookie_t last_complete;
423
424 sh_dmae_chan_ld_cleanup(sh_chan);
425
426 last_used = chan->cookie;
427 last_complete = sh_chan->completed_cookie;
428 if (last_complete == -EBUSY)
429 last_complete = last_used;
430
431 if (done)
432 *done = last_complete;
433
434 if (used)
435 *used = last_used;
436
437 return dma_async_is_complete(cookie, last_complete, last_used);
438}
439
440static irqreturn_t sh_dmae_interrupt(int irq, void *data)
441{
442 irqreturn_t ret = IRQ_NONE;
443 struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
444 u32 chcr = sh_dmae_readl(sh_chan, CHCR);
445
446 if (chcr & CHCR_TE) {
447 /* DMA stop */
448 dmae_halt(sh_chan);
449
450 ret = IRQ_HANDLED;
451 tasklet_schedule(&sh_chan->tasklet);
452 }
453
454 return ret;
455}
456
457#if defined(CONFIG_CPU_SH4)
458static irqreturn_t sh_dmae_err(int irq, void *data)
459{
460 int err = 0;
461 struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
462
463 /* IRQ Multi */
464 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
465 int cnt = 0;
466 switch (irq) {
467#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
468 case DMTE6_IRQ:
469 cnt++;
470#endif
471 case DMTE0_IRQ:
472 if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
473 disable_irq(irq);
474 return IRQ_HANDLED;
475 }
476 default:
477 return IRQ_NONE;
478 }
479 } else {
480 /* reset dma controller */
481 err = sh_dmae_rst(0);
482 if (err)
483 return err;
484 if (shdev->pdata.mode & SHDMA_DMAOR1) {
485 err = sh_dmae_rst(1);
486 if (err)
487 return err;
488 }
489 disable_irq(irq);
490 return IRQ_HANDLED;
491 }
492}
493#endif
494
495static void dmae_do_tasklet(unsigned long data)
496{
497 struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
498 struct sh_desc *desc, *_desc, *cur_desc = NULL;
499 u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
500 list_for_each_entry_safe(desc, _desc,
501 &sh_chan->ld_queue, node) {
502 if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
503 cur_desc = desc;
504 break;
505 }
506 }
507
508 if (cur_desc) {
509 switch (cur_desc->async_tx.cookie) {
510 case 0: /* other desc data */
511 break;
512 case -EBUSY: /* last desc */
513 sh_chan->completed_cookie =
514 cur_desc->async_tx.cookie;
515 break;
516 default: /* first desc ( 0 < )*/
517 sh_chan->completed_cookie =
518 cur_desc->async_tx.cookie - 1;
519 break;
520 }
521 cur_desc->mark = DESC_COMP;
522 }
523 /* Next desc */
524 sh_chan_xfer_ld_queue(sh_chan);
525 sh_dmae_chan_ld_cleanup(sh_chan);
526}
527
528static unsigned int get_dmae_irq(unsigned int id)
529{
530 unsigned int irq = 0;
531 if (id < ARRAY_SIZE(dmte_irq_map))
532 irq = dmte_irq_map[id];
533 return irq;
534}
535
536static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
537{
538 int err;
539 unsigned int irq = get_dmae_irq(id);
540 unsigned long irqflags = IRQF_DISABLED;
541 struct sh_dmae_chan *new_sh_chan;
542
543 /* alloc channel */
544 new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
545 if (!new_sh_chan) {
546 dev_err(shdev->common.dev, "No free memory for allocating "
547 "dma channels!\n");
548 return -ENOMEM;
549 }
550
551 new_sh_chan->dev = shdev->common.dev;
552 new_sh_chan->id = id;
553
554 /* Init DMA tasklet */
555 tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
556 (unsigned long)new_sh_chan);
557
558 /* Init the channel */
559 dmae_init(new_sh_chan);
560
561 spin_lock_init(&new_sh_chan->desc_lock);
562
563 /* Init descripter manage list */
564 INIT_LIST_HEAD(&new_sh_chan->ld_queue);
565 INIT_LIST_HEAD(&new_sh_chan->ld_free);
566
567 /* copy struct dma_device */
568 new_sh_chan->common.device = &shdev->common;
569
570 /* Add the channel to DMA device channel list */
571 list_add_tail(&new_sh_chan->common.device_node,
572 &shdev->common.channels);
573 shdev->common.chancnt++;
574
575 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
576 irqflags = IRQF_SHARED;
577#if defined(DMTE6_IRQ)
578 if (irq >= DMTE6_IRQ)
579 irq = DMTE6_IRQ;
580 else
581#endif
582 irq = DMTE0_IRQ;
583 }
584
585 snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
586 "sh-dmae%d", new_sh_chan->id);
587
588 /* set up channel irq */
589 err = request_irq(irq, &sh_dmae_interrupt,
590 irqflags, new_sh_chan->dev_id, new_sh_chan);
591 if (err) {
592 dev_err(shdev->common.dev, "DMA channel %d request_irq error "
593 "with return %d\n", id, err);
594 goto err_no_irq;
595 }
596
597 /* CHCR register control function */
598 new_sh_chan->set_chcr = dmae_set_chcr;
599 /* DMARS register control function */
600 new_sh_chan->set_dmars = dmae_set_dmars;
601
602 shdev->chan[id] = new_sh_chan;
603 return 0;
604
605err_no_irq:
606 /* remove from dmaengine device node */
607 list_del(&new_sh_chan->common.device_node);
608 kfree(new_sh_chan);
609 return err;
610}
611
612static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
613{
614 int i;
615
616 for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
617 if (shdev->chan[i]) {
618 struct sh_dmae_chan *shchan = shdev->chan[i];
619 if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
620 free_irq(dmte_irq_map[i], shchan);
621
622 list_del(&shchan->common.device_node);
623 kfree(shchan);
624 shdev->chan[i] = NULL;
625 }
626 }
627 shdev->common.chancnt = 0;
628}
629
630static int __init sh_dmae_probe(struct platform_device *pdev)
631{
632 int err = 0, cnt, ecnt;
633 unsigned long irqflags = IRQF_DISABLED;
634#if defined(CONFIG_CPU_SH4)
635 int eirq[] = { DMAE0_IRQ,
636#if defined(DMAE1_IRQ)
637 DMAE1_IRQ
638#endif
639 };
640#endif
641 struct sh_dmae_device *shdev;
642
643 shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
644 if (!shdev) {
645 dev_err(&pdev->dev, "No enough memory\n");
646 err = -ENOMEM;
647 goto shdev_err;
648 }
649
650 /* get platform data */
651 if (!pdev->dev.platform_data)
652 goto shdev_err;
653
654 /* platform data */
655 memcpy(&shdev->pdata, pdev->dev.platform_data,
656 sizeof(struct sh_dmae_pdata));
657
658 /* reset dma controller */
659 err = sh_dmae_rst(0);
660 if (err)
661 goto rst_err;
662
663 /* SH7780/85/23 has DMAOR1 */
664 if (shdev->pdata.mode & SHDMA_DMAOR1) {
665 err = sh_dmae_rst(1);
666 if (err)
667 goto rst_err;
668 }
669
670 INIT_LIST_HEAD(&shdev->common.channels);
671
672 dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
673 shdev->common.device_alloc_chan_resources
674 = sh_dmae_alloc_chan_resources;
675 shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
676 shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
677 shdev->common.device_is_tx_complete = sh_dmae_is_complete;
678 shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
679 shdev->common.dev = &pdev->dev;
680
681#if defined(CONFIG_CPU_SH4)
682 /* Non Mix IRQ mode SH7722/SH7730 etc... */
683 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
684 irqflags = IRQF_SHARED;
685 eirq[0] = DMTE0_IRQ;
686#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
687 eirq[1] = DMTE6_IRQ;
688#endif
689 }
690
691 for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
692 err = request_irq(eirq[ecnt], sh_dmae_err,
693 irqflags, "DMAC Address Error", shdev);
694 if (err) {
695 dev_err(&pdev->dev, "DMA device request_irq"
696 "error (irq %d) with return %d\n",
697 eirq[ecnt], err);
698 goto eirq_err;
699 }
700 }
701#endif /* CONFIG_CPU_SH4 */
702
703 /* Create DMA Channel */
704 for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
705 err = sh_dmae_chan_probe(shdev, cnt);
706 if (err)
707 goto chan_probe_err;
708 }
709
710 platform_set_drvdata(pdev, shdev);
711 dma_async_device_register(&shdev->common);
712
713 return err;
714
715chan_probe_err:
716 sh_dmae_chan_remove(shdev);
717
718eirq_err:
719 for (ecnt-- ; ecnt >= 0; ecnt--)
720 free_irq(eirq[ecnt], shdev);
721
722rst_err:
723 kfree(shdev);
724
725shdev_err:
726 return err;
727}
728
729static int __exit sh_dmae_remove(struct platform_device *pdev)
730{
731 struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
732
733 dma_async_device_unregister(&shdev->common);
734
735 if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
736 free_irq(DMTE0_IRQ, shdev);
737#if defined(DMTE6_IRQ)
738 free_irq(DMTE6_IRQ, shdev);
739#endif
740 }
741
742 /* channel data remove */
743 sh_dmae_chan_remove(shdev);
744
745 if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
746 free_irq(DMAE0_IRQ, shdev);
747#if defined(DMAE1_IRQ)
748 free_irq(DMAE1_IRQ, shdev);
749#endif
750 }
751 kfree(shdev);
752
753 return 0;
754}
755
756static void sh_dmae_shutdown(struct platform_device *pdev)
757{
758 struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
759 sh_dmae_ctl_stop(0);
760 if (shdev->pdata.mode & SHDMA_DMAOR1)
761 sh_dmae_ctl_stop(1);
762}
763
764static struct platform_driver sh_dmae_driver = {
765 .remove = __exit_p(sh_dmae_remove),
766 .shutdown = sh_dmae_shutdown,
767 .driver = {
768 .name = "sh-dma-engine",
769 },
770};
771
772static int __init sh_dmae_init(void)
773{
774 return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
775}
776module_init(sh_dmae_init);
777
778static void __exit sh_dmae_exit(void)
779{
780 platform_driver_unregister(&sh_dmae_driver);
781}
782module_exit(sh_dmae_exit);
783
784MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
785MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
786MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644
index 000000000000..2b4bc15a2c0a
--- /dev/null
+++ b/drivers/dma/shdma.h
@@ -0,0 +1,64 @@
1/*
2 * Renesas SuperH DMA Engine support
3 *
4 * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
5 * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
6 *
7 * This is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 */
13#ifndef __DMA_SHDMA_H
14#define __DMA_SHDMA_H
15
16#include <linux/device.h>
17#include <linux/dmapool.h>
18#include <linux/dmaengine.h>
19
20#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */
21
22struct sh_dmae_regs {
23 u32 sar; /* SAR / source address */
24 u32 dar; /* DAR / destination address */
25 u32 tcr; /* TCR / transfer count */
26};
27
28struct sh_desc {
29 struct list_head tx_list;
30 struct sh_dmae_regs hw;
31 struct list_head node;
32 struct dma_async_tx_descriptor async_tx;
33 int mark;
34};
35
36struct sh_dmae_chan {
37 dma_cookie_t completed_cookie; /* The maximum cookie completed */
38 spinlock_t desc_lock; /* Descriptor operation lock */
39 struct list_head ld_queue; /* Link descriptors queue */
40 struct list_head ld_free; /* Link descriptors free */
41 struct dma_chan common; /* DMA common channel */
42 struct device *dev; /* Channel device */
43 struct tasklet_struct tasklet; /* Tasklet */
44 int descs_allocated; /* desc count */
45 int id; /* Raw id of this channel */
46 char dev_id[16]; /* unique name per DMAC of channel */
47
48 /* Set chcr */
49 int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
50 /* Set DMA resource */
51 int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
52};
53
54struct sh_dmae_device {
55 struct dma_device common;
56 struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
57 struct sh_dmae_pdata pdata;
58};
59
60#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
61#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
62#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
63
64#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 7837930146a4..fb6bb64e8861 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
180 180
181static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) 181static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
182{ 182{
183 if (!list_empty(&desc->txd.tx_list)) 183 if (!list_empty(&desc->tx_list))
184 desc = list_entry(desc->txd.tx_list.prev, 184 desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
185 struct txx9dmac_desc, desc_node);
186 return desc; 185 return desc;
187} 186}
188 187
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
197 desc = kzalloc(sizeof(*desc), flags); 196 desc = kzalloc(sizeof(*desc), flags);
198 if (!desc) 197 if (!desc)
199 return NULL; 198 return NULL;
199 INIT_LIST_HEAD(&desc->tx_list);
200 dma_async_tx_descriptor_init(&desc->txd, &dc->chan); 200 dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
201 desc->txd.tx_submit = txx9dmac_tx_submit; 201 desc->txd.tx_submit = txx9dmac_tx_submit;
202 /* txd.flags will be overwritten in prep funcs */ 202 /* txd.flags will be overwritten in prep funcs */
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
245 struct txx9dmac_dev *ddev = dc->ddev; 245 struct txx9dmac_dev *ddev = dc->ddev;
246 struct txx9dmac_desc *child; 246 struct txx9dmac_desc *child;
247 247
248 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 248 list_for_each_entry(child, &desc->tx_list, desc_node)
249 dma_sync_single_for_cpu(chan2parent(&dc->chan), 249 dma_sync_single_for_cpu(chan2parent(&dc->chan),
250 child->txd.phys, ddev->descsize, 250 child->txd.phys, ddev->descsize,
251 DMA_TO_DEVICE); 251 DMA_TO_DEVICE);
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
267 txx9dmac_sync_desc_for_cpu(dc, desc); 267 txx9dmac_sync_desc_for_cpu(dc, desc);
268 268
269 spin_lock_bh(&dc->lock); 269 spin_lock_bh(&dc->lock);
270 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 270 list_for_each_entry(child, &desc->tx_list, desc_node)
271 dev_vdbg(chan2dev(&dc->chan), 271 dev_vdbg(chan2dev(&dc->chan),
272 "moving child desc %p to freelist\n", 272 "moving child desc %p to freelist\n",
273 child); 273 child);
274 list_splice_init(&desc->txd.tx_list, &dc->free_list); 274 list_splice_init(&desc->tx_list, &dc->free_list);
275 dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", 275 dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
276 desc); 276 desc);
277 list_add(&desc->desc_node, &dc->free_list); 277 list_add(&desc->desc_node, &dc->free_list);
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
429 param = txd->callback_param; 429 param = txd->callback_param;
430 430
431 txx9dmac_sync_desc_for_cpu(dc, desc); 431 txx9dmac_sync_desc_for_cpu(dc, desc);
432 list_splice_init(&txd->tx_list, &dc->free_list); 432 list_splice_init(&desc->tx_list, &dc->free_list);
433 list_move(&desc->desc_node, &dc->free_list); 433 list_move(&desc->desc_node, &dc->free_list);
434 434
435 if (!ds) { 435 if (!ds) {
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
571 "Bad descriptor submitted for DMA! (cookie: %d)\n", 571 "Bad descriptor submitted for DMA! (cookie: %d)\n",
572 bad_desc->txd.cookie); 572 bad_desc->txd.cookie);
573 txx9dmac_dump_desc(dc, &bad_desc->hwdesc); 573 txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
574 list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) 574 list_for_each_entry(child, &bad_desc->tx_list, desc_node)
575 txx9dmac_dump_desc(dc, &child->hwdesc); 575 txx9dmac_dump_desc(dc, &child->hwdesc);
576 /* Pretend the descriptor completed successfully */ 576 /* Pretend the descriptor completed successfully */
577 txx9dmac_descriptor_complete(dc, bad_desc); 577 txx9dmac_descriptor_complete(dc, bad_desc);
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
613 return; 613 return;
614 } 614 }
615 615
616 list_for_each_entry(child, &desc->txd.tx_list, desc_node) 616 list_for_each_entry(child, &desc->tx_list, desc_node)
617 if (desc_read_CHAR(dc, child) == chain) { 617 if (desc_read_CHAR(dc, child) == chain) {
618 /* Currently in progress */ 618 /* Currently in progress */
619 if (csr & TXX9_DMA_CSR_ABCHC) 619 if (csr & TXX9_DMA_CSR_ABCHC)
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
823 dma_sync_single_for_device(chan2parent(&dc->chan), 823 dma_sync_single_for_device(chan2parent(&dc->chan),
824 prev->txd.phys, ddev->descsize, 824 prev->txd.phys, ddev->descsize,
825 DMA_TO_DEVICE); 825 DMA_TO_DEVICE);
826 list_add_tail(&desc->desc_node, 826 list_add_tail(&desc->desc_node, &first->tx_list);
827 &first->txd.tx_list);
828 } 827 }
829 prev = desc; 828 prev = desc;
830 } 829 }
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
919 prev->txd.phys, 918 prev->txd.phys,
920 ddev->descsize, 919 ddev->descsize,
921 DMA_TO_DEVICE); 920 DMA_TO_DEVICE);
922 list_add_tail(&desc->desc_node, 921 list_add_tail(&desc->desc_node, &first->tx_list);
923 &first->txd.tx_list);
924 } 922 }
925 prev = desc; 923 prev = desc;
926 } 924 }
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
index c907ff01d276..365d42366b9f 100644
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -231,6 +231,7 @@ struct txx9dmac_desc {
231 231
232 /* THEN values for driver housekeeping */ 232 /* THEN values for driver housekeeping */
233 struct list_head desc_node ____cacheline_aligned; 233 struct list_head desc_node ____cacheline_aligned;
234 struct list_head tx_list;
234 struct dma_async_tx_descriptor txd; 235 struct dma_async_tx_descriptor txd;
235 size_t len; 236 size_t len;
236}; 237};
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index 949c97ff57e3..1f20a042a4f5 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -29,8 +29,8 @@
29 29
30#include <asm/idle.h> 30#include <asm/idle.h>
31 31
32#include "../dma/ioatdma_hw.h" 32#include "../dma/ioat/hw.h"
33#include "../dma/ioatdma_registers.h" 33#include "../dma/ioat/registers.h"
34 34
35#define I7300_IDLE_DRIVER_VERSION "1.55" 35#define I7300_IDLE_DRIVER_VERSION "1.55"
36#define I7300_PRINT "i7300_idle:" 36#define I7300_PRINT "i7300_idle:"
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
126 udelay(10); 126 udelay(10);
127 127
128 sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 128 sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
129 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 129 IOAT_CHANSTS_STATUS;
130 130
131 if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) 131 if (sts != IOAT_CHANSTS_ACTIVE)
132 break; 132 break;
133 133
134 } 134 }
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
160 udelay(1000); 160 udelay(1000);
161 161
162 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 162 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
163 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 163 IOAT_CHANSTS_STATUS;
164 164
165 if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { 165 if (chan_sts != IOAT_CHANSTS_DONE) {
166 /* Not complete, reset the channel */ 166 /* Not complete, reset the channel */
167 writeb(IOAT_CHANCMD_RESET, 167 writeb(IOAT_CHANCMD_RESET,
168 ioat_chanbase + IOAT1_CHANCMD_OFFSET); 168 ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
288 ioat_chanbase + IOAT1_CHANCMD_OFFSET); 288 ioat_chanbase + IOAT1_CHANCMD_OFFSET);
289 289
290 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 290 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
291 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 291 IOAT_CHANSTS_STATUS;
292 292
293 if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { 293 if (chan_sts != IOAT_CHANSTS_ACTIVE) {
294 writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); 294 writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
295 break; 295 break;
296 } 296 }
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
298 } 298 }
299 299
300 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & 300 chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
301 IOAT_CHANSTS_DMA_TRANSFER_STATUS; 301 IOAT_CHANSTS_STATUS;
302 302
303 /* 303 /*
304 * We tried to reset multiple times. If IO A/T channel is still active 304 * We tried to reset multiple times. If IO A/T channel is still active
305 * flag an error and return without cleanup. Memory leak is better 305 * flag an error and return without cleanup. Memory leak is better
306 * than random corruption in that extreme error situation. 306 * than random corruption in that extreme error situation.
307 */ 307 */
308 if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { 308 if (chan_sts == IOAT_CHANSTS_ACTIVE) {
309 printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." 309 printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
310 " Not freeing resources\n"); 310 " Not freeing resources\n");
311 return; 311 return;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 020f9573fd82..2158377a1359 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
124 select MD_RAID6_PQ 124 select MD_RAID6_PQ
125 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
126 select ASYNC_XOR 126 select ASYNC_XOR
127 select ASYNC_PQ
128 select ASYNC_RAID6_RECOV
127 ---help--- 129 ---help---
128 A RAID-5 set of N drives with a capacity of C MB per drive provides 130 A RAID-5 set of N drives with a capacity of C MB per drive provides
129 the capacity of C * (N - 1) MB, and protects against a failure 131 the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
152 154
153 If unsure, say Y. 155 If unsure, say Y.
154 156
157config MULTICORE_RAID456
158 bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
159 depends on MD_RAID456
160 depends on SMP
161 depends on EXPERIMENTAL
162 ---help---
163 Enable the raid456 module to dispatch per-stripe raid operations to a
164 thread pool.
165
166 If unsure, say N.
167
155config MD_RAID6_PQ 168config MD_RAID6_PQ
156 tristate 169 tristate
157 170
171config ASYNC_RAID6_TEST
172 tristate "Self test for hardware accelerated raid6 recovery"
173 depends on MD_RAID6_PQ
174 select ASYNC_RAID6_RECOV
175 ---help---
176 This is a one-shot self test that permutes through the
177 recovery of all the possible two disk failure scenarios for a
178 N-disk array. Recovery is performed with the asynchronous
179 raid6 recovery routines, and will optionally use an offload
180 engine if one is available.
181
182 If unsure, say N.
183
158config MD_MULTIPATH 184config MD_MULTIPATH
159 tristate "Multipath I/O support" 185 tristate "Multipath I/O support"
160 depends on BLK_DEV_MD 186 depends on BLK_DEV_MD
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9db84c98a41d..94829804ab7f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
47#include <linux/kthread.h> 47#include <linux/kthread.h>
48#include <linux/raid/pq.h> 48#include <linux/raid/pq.h>
49#include <linux/async_tx.h> 49#include <linux/async_tx.h>
50#include <linux/async.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
52#include <linux/cpu.h>
51#include "md.h" 53#include "md.h"
52#include "raid5.h" 54#include "raid5.h"
53#include "bitmap.h" 55#include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
499 struct page *bio_page; 501 struct page *bio_page;
500 int i; 502 int i;
501 int page_offset; 503 int page_offset;
504 struct async_submit_ctl submit;
505 enum async_tx_flags flags = 0;
502 506
503 if (bio->bi_sector >= sector) 507 if (bio->bi_sector >= sector)
504 page_offset = (signed)(bio->bi_sector - sector) * 512; 508 page_offset = (signed)(bio->bi_sector - sector) * 512;
505 else 509 else
506 page_offset = (signed)(sector - bio->bi_sector) * -512; 510 page_offset = (signed)(sector - bio->bi_sector) * -512;
511
512 if (frombio)
513 flags |= ASYNC_TX_FENCE;
514 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
515
507 bio_for_each_segment(bvl, bio, i) { 516 bio_for_each_segment(bvl, bio, i) {
508 int len = bio_iovec_idx(bio, i)->bv_len; 517 int len = bio_iovec_idx(bio, i)->bv_len;
509 int clen; 518 int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
525 bio_page = bio_iovec_idx(bio, i)->bv_page; 534 bio_page = bio_iovec_idx(bio, i)->bv_page;
526 if (frombio) 535 if (frombio)
527 tx = async_memcpy(page, bio_page, page_offset, 536 tx = async_memcpy(page, bio_page, page_offset,
528 b_offset, clen, 537 b_offset, clen, &submit);
529 ASYNC_TX_DEP_ACK,
530 tx, NULL, NULL);
531 else 538 else
532 tx = async_memcpy(bio_page, page, b_offset, 539 tx = async_memcpy(bio_page, page, b_offset,
533 page_offset, clen, 540 page_offset, clen, &submit);
534 ASYNC_TX_DEP_ACK,
535 tx, NULL, NULL);
536 } 541 }
542 /* chain the operations */
543 submit.depend_tx = tx;
544
537 if (clen < len) /* hit end of page */ 545 if (clen < len) /* hit end of page */
538 break; 546 break;
539 page_offset += len; 547 page_offset += len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
592{ 600{
593 struct dma_async_tx_descriptor *tx = NULL; 601 struct dma_async_tx_descriptor *tx = NULL;
594 raid5_conf_t *conf = sh->raid_conf; 602 raid5_conf_t *conf = sh->raid_conf;
603 struct async_submit_ctl submit;
595 int i; 604 int i;
596 605
597 pr_debug("%s: stripe %llu\n", __func__, 606 pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
615 } 624 }
616 625
617 atomic_inc(&sh->count); 626 atomic_inc(&sh->count);
618 async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 627 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
619 ops_complete_biofill, sh); 628 async_trigger_callback(&submit);
620} 629}
621 630
622static void ops_complete_compute5(void *stripe_head_ref) 631static void mark_target_uptodate(struct stripe_head *sh, int target)
623{ 632{
624 struct stripe_head *sh = stripe_head_ref; 633 struct r5dev *tgt;
625 int target = sh->ops.target;
626 struct r5dev *tgt = &sh->dev[target];
627 634
628 pr_debug("%s: stripe %llu\n", __func__, 635 if (target < 0)
629 (unsigned long long)sh->sector); 636 return;
630 637
638 tgt = &sh->dev[target];
631 set_bit(R5_UPTODATE, &tgt->flags); 639 set_bit(R5_UPTODATE, &tgt->flags);
632 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 640 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
633 clear_bit(R5_Wantcompute, &tgt->flags); 641 clear_bit(R5_Wantcompute, &tgt->flags);
642}
643
644static void ops_complete_compute(void *stripe_head_ref)
645{
646 struct stripe_head *sh = stripe_head_ref;
647
648 pr_debug("%s: stripe %llu\n", __func__,
649 (unsigned long long)sh->sector);
650
651 /* mark the computed target(s) as uptodate */
652 mark_target_uptodate(sh, sh->ops.target);
653 mark_target_uptodate(sh, sh->ops.target2);
654
634 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); 655 clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
635 if (sh->check_state == check_state_compute_run) 656 if (sh->check_state == check_state_compute_run)
636 sh->check_state = check_state_compute_result; 657 sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
638 release_stripe(sh); 659 release_stripe(sh);
639} 660}
640 661
641static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) 662/* return a pointer to the address conversion region of the scribble buffer */
663static addr_conv_t *to_addr_conv(struct stripe_head *sh,
664 struct raid5_percpu *percpu)
665{
666 return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
667}
668
669static struct dma_async_tx_descriptor *
670ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
642{ 671{
643 /* kernel stack size limits the total number of disks */
644 int disks = sh->disks; 672 int disks = sh->disks;
645 struct page *xor_srcs[disks]; 673 struct page **xor_srcs = percpu->scribble;
646 int target = sh->ops.target; 674 int target = sh->ops.target;
647 struct r5dev *tgt = &sh->dev[target]; 675 struct r5dev *tgt = &sh->dev[target];
648 struct page *xor_dest = tgt->page; 676 struct page *xor_dest = tgt->page;
649 int count = 0; 677 int count = 0;
650 struct dma_async_tx_descriptor *tx; 678 struct dma_async_tx_descriptor *tx;
679 struct async_submit_ctl submit;
651 int i; 680 int i;
652 681
653 pr_debug("%s: stripe %llu block: %d\n", 682 pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
660 689
661 atomic_inc(&sh->count); 690 atomic_inc(&sh->count);
662 691
692 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
693 ops_complete_compute, sh, to_addr_conv(sh, percpu));
663 if (unlikely(count == 1)) 694 if (unlikely(count == 1))
664 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 695 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
665 0, NULL, ops_complete_compute5, sh);
666 else 696 else
667 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 697 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
668 ASYNC_TX_XOR_ZERO_DST, NULL,
669 ops_complete_compute5, sh);
670 698
671 return tx; 699 return tx;
672} 700}
673 701
702/* set_syndrome_sources - populate source buffers for gen_syndrome
703 * @srcs - (struct page *) array of size sh->disks
704 * @sh - stripe_head to parse
705 *
706 * Populates srcs in proper layout order for the stripe and returns the
707 * 'count' of sources to be used in a call to async_gen_syndrome. The P
708 * destination buffer is recorded in srcs[count] and the Q destination
709 * is recorded in srcs[count+1]].
710 */
711static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
712{
713 int disks = sh->disks;
714 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
715 int d0_idx = raid6_d0(sh);
716 int count;
717 int i;
718
719 for (i = 0; i < disks; i++)
720 srcs[i] = (void *)raid6_empty_zero_page;
721
722 count = 0;
723 i = d0_idx;
724 do {
725 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
726
727 srcs[slot] = sh->dev[i].page;
728 i = raid6_next_disk(i, disks);
729 } while (i != d0_idx);
730 BUG_ON(count != syndrome_disks);
731
732 return count;
733}
734
735static struct dma_async_tx_descriptor *
736ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
737{
738 int disks = sh->disks;
739 struct page **blocks = percpu->scribble;
740 int target;
741 int qd_idx = sh->qd_idx;
742 struct dma_async_tx_descriptor *tx;
743 struct async_submit_ctl submit;
744 struct r5dev *tgt;
745 struct page *dest;
746 int i;
747 int count;
748
749 if (sh->ops.target < 0)
750 target = sh->ops.target2;
751 else if (sh->ops.target2 < 0)
752 target = sh->ops.target;
753 else
754 /* we should only have one valid target */
755 BUG();
756 BUG_ON(target < 0);
757 pr_debug("%s: stripe %llu block: %d\n",
758 __func__, (unsigned long long)sh->sector, target);
759
760 tgt = &sh->dev[target];
761 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
762 dest = tgt->page;
763
764 atomic_inc(&sh->count);
765
766 if (target == qd_idx) {
767 count = set_syndrome_sources(blocks, sh);
768 blocks[count] = NULL; /* regenerating p is not necessary */
769 BUG_ON(blocks[count+1] != dest); /* q should already be set */
770 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
771 ops_complete_compute, sh,
772 to_addr_conv(sh, percpu));
773 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
774 } else {
775 /* Compute any data- or p-drive using XOR */
776 count = 0;
777 for (i = disks; i-- ; ) {
778 if (i == target || i == qd_idx)
779 continue;
780 blocks[count++] = sh->dev[i].page;
781 }
782
783 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
784 NULL, ops_complete_compute, sh,
785 to_addr_conv(sh, percpu));
786 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
787 }
788
789 return tx;
790}
791
792static struct dma_async_tx_descriptor *
793ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
794{
795 int i, count, disks = sh->disks;
796 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
797 int d0_idx = raid6_d0(sh);
798 int faila = -1, failb = -1;
799 int target = sh->ops.target;
800 int target2 = sh->ops.target2;
801 struct r5dev *tgt = &sh->dev[target];
802 struct r5dev *tgt2 = &sh->dev[target2];
803 struct dma_async_tx_descriptor *tx;
804 struct page **blocks = percpu->scribble;
805 struct async_submit_ctl submit;
806
807 pr_debug("%s: stripe %llu block1: %d block2: %d\n",
808 __func__, (unsigned long long)sh->sector, target, target2);
809 BUG_ON(target < 0 || target2 < 0);
810 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
811 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
812
813 /* we need to open-code set_syndrome_sources to handle the
814 * slot number conversion for 'faila' and 'failb'
815 */
816 for (i = 0; i < disks ; i++)
817 blocks[i] = (void *)raid6_empty_zero_page;
818 count = 0;
819 i = d0_idx;
820 do {
821 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
822
823 blocks[slot] = sh->dev[i].page;
824
825 if (i == target)
826 faila = slot;
827 if (i == target2)
828 failb = slot;
829 i = raid6_next_disk(i, disks);
830 } while (i != d0_idx);
831 BUG_ON(count != syndrome_disks);
832
833 BUG_ON(faila == failb);
834 if (failb < faila)
835 swap(faila, failb);
836 pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
837 __func__, (unsigned long long)sh->sector, faila, failb);
838
839 atomic_inc(&sh->count);
840
841 if (failb == syndrome_disks+1) {
842 /* Q disk is one of the missing disks */
843 if (faila == syndrome_disks) {
844 /* Missing P+Q, just recompute */
845 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
846 ops_complete_compute, sh,
847 to_addr_conv(sh, percpu));
848 return async_gen_syndrome(blocks, 0, count+2,
849 STRIPE_SIZE, &submit);
850 } else {
851 struct page *dest;
852 int data_target;
853 int qd_idx = sh->qd_idx;
854
855 /* Missing D+Q: recompute D from P, then recompute Q */
856 if (target == qd_idx)
857 data_target = target2;
858 else
859 data_target = target;
860
861 count = 0;
862 for (i = disks; i-- ; ) {
863 if (i == data_target || i == qd_idx)
864 continue;
865 blocks[count++] = sh->dev[i].page;
866 }
867 dest = sh->dev[data_target].page;
868 init_async_submit(&submit,
869 ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
870 NULL, NULL, NULL,
871 to_addr_conv(sh, percpu));
872 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
873 &submit);
874
875 count = set_syndrome_sources(blocks, sh);
876 init_async_submit(&submit, ASYNC_TX_FENCE, tx,
877 ops_complete_compute, sh,
878 to_addr_conv(sh, percpu));
879 return async_gen_syndrome(blocks, 0, count+2,
880 STRIPE_SIZE, &submit);
881 }
882 } else {
883 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
884 ops_complete_compute, sh,
885 to_addr_conv(sh, percpu));
886 if (failb == syndrome_disks) {
887 /* We're missing D+P. */
888 return async_raid6_datap_recov(syndrome_disks+2,
889 STRIPE_SIZE, faila,
890 blocks, &submit);
891 } else {
892 /* We're missing D+D. */
893 return async_raid6_2data_recov(syndrome_disks+2,
894 STRIPE_SIZE, faila, failb,
895 blocks, &submit);
896 }
897 }
898}
899
900
674static void ops_complete_prexor(void *stripe_head_ref) 901static void ops_complete_prexor(void *stripe_head_ref)
675{ 902{
676 struct stripe_head *sh = stripe_head_ref; 903 struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
680} 907}
681 908
682static struct dma_async_tx_descriptor * 909static struct dma_async_tx_descriptor *
683ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 910ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
911 struct dma_async_tx_descriptor *tx)
684{ 912{
685 /* kernel stack size limits the total number of disks */
686 int disks = sh->disks; 913 int disks = sh->disks;
687 struct page *xor_srcs[disks]; 914 struct page **xor_srcs = percpu->scribble;
688 int count = 0, pd_idx = sh->pd_idx, i; 915 int count = 0, pd_idx = sh->pd_idx, i;
916 struct async_submit_ctl submit;
689 917
690 /* existing parity data subtracted */ 918 /* existing parity data subtracted */
691 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 919 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
700 xor_srcs[count++] = dev->page; 928 xor_srcs[count++] = dev->page;
701 } 929 }
702 930
703 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 931 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
704 ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, 932 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
705 ops_complete_prexor, sh); 933 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
706 934
707 return tx; 935 return tx;
708} 936}
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
742 return tx; 970 return tx;
743} 971}
744 972
745static void ops_complete_postxor(void *stripe_head_ref) 973static void ops_complete_reconstruct(void *stripe_head_ref)
746{ 974{
747 struct stripe_head *sh = stripe_head_ref; 975 struct stripe_head *sh = stripe_head_ref;
748 int disks = sh->disks, i, pd_idx = sh->pd_idx; 976 int disks = sh->disks;
977 int pd_idx = sh->pd_idx;
978 int qd_idx = sh->qd_idx;
979 int i;
749 980
750 pr_debug("%s: stripe %llu\n", __func__, 981 pr_debug("%s: stripe %llu\n", __func__,
751 (unsigned long long)sh->sector); 982 (unsigned long long)sh->sector);
752 983
753 for (i = disks; i--; ) { 984 for (i = disks; i--; ) {
754 struct r5dev *dev = &sh->dev[i]; 985 struct r5dev *dev = &sh->dev[i];
755 if (dev->written || i == pd_idx) 986
987 if (dev->written || i == pd_idx || i == qd_idx)
756 set_bit(R5_UPTODATE, &dev->flags); 988 set_bit(R5_UPTODATE, &dev->flags);
757 } 989 }
758 990
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
770} 1002}
771 1003
772static void 1004static void
773ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 1005ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
1006 struct dma_async_tx_descriptor *tx)
774{ 1007{
775 /* kernel stack size limits the total number of disks */
776 int disks = sh->disks; 1008 int disks = sh->disks;
777 struct page *xor_srcs[disks]; 1009 struct page **xor_srcs = percpu->scribble;
778 1010 struct async_submit_ctl submit;
779 int count = 0, pd_idx = sh->pd_idx, i; 1011 int count = 0, pd_idx = sh->pd_idx, i;
780 struct page *xor_dest; 1012 struct page *xor_dest;
781 int prexor = 0; 1013 int prexor = 0;
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
809 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 1041 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
810 * for the synchronous xor case 1042 * for the synchronous xor case
811 */ 1043 */
812 flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | 1044 flags = ASYNC_TX_ACK |
813 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 1045 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
814 1046
815 atomic_inc(&sh->count); 1047 atomic_inc(&sh->count);
816 1048
817 if (unlikely(count == 1)) { 1049 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
818 flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); 1050 to_addr_conv(sh, percpu));
819 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 1051 if (unlikely(count == 1))
820 flags, tx, ops_complete_postxor, sh); 1052 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
821 } else 1053 else
822 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1054 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
823 flags, tx, ops_complete_postxor, sh); 1055}
1056
1057static void
1058ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1059 struct dma_async_tx_descriptor *tx)
1060{
1061 struct async_submit_ctl submit;
1062 struct page **blocks = percpu->scribble;
1063 int count;
1064
1065 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1066
1067 count = set_syndrome_sources(blocks, sh);
1068
1069 atomic_inc(&sh->count);
1070
1071 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
1072 sh, to_addr_conv(sh, percpu));
1073 async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
824} 1074}
825 1075
826static void ops_complete_check(void *stripe_head_ref) 1076static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref)
835 release_stripe(sh); 1085 release_stripe(sh);
836} 1086}
837 1087
838static void ops_run_check(struct stripe_head *sh) 1088static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
839{ 1089{
840 /* kernel stack size limits the total number of disks */
841 int disks = sh->disks; 1090 int disks = sh->disks;
842 struct page *xor_srcs[disks]; 1091 int pd_idx = sh->pd_idx;
1092 int qd_idx = sh->qd_idx;
1093 struct page *xor_dest;
1094 struct page **xor_srcs = percpu->scribble;
843 struct dma_async_tx_descriptor *tx; 1095 struct dma_async_tx_descriptor *tx;
844 1096 struct async_submit_ctl submit;
845 int count = 0, pd_idx = sh->pd_idx, i; 1097 int count;
846 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1098 int i;
847 1099
848 pr_debug("%s: stripe %llu\n", __func__, 1100 pr_debug("%s: stripe %llu\n", __func__,
849 (unsigned long long)sh->sector); 1101 (unsigned long long)sh->sector);
850 1102
1103 count = 0;
1104 xor_dest = sh->dev[pd_idx].page;
1105 xor_srcs[count++] = xor_dest;
851 for (i = disks; i--; ) { 1106 for (i = disks; i--; ) {
852 struct r5dev *dev = &sh->dev[i]; 1107 if (i == pd_idx || i == qd_idx)
853 if (i != pd_idx) 1108 continue;
854 xor_srcs[count++] = dev->page; 1109 xor_srcs[count++] = sh->dev[i].page;
855 } 1110 }
856 1111
857 tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1112 init_async_submit(&submit, 0, NULL, NULL, NULL,
858 &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); 1113 to_addr_conv(sh, percpu));
1114 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
1115 &sh->ops.zero_sum_result, &submit);
1116
1117 atomic_inc(&sh->count);
1118 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
1119 tx = async_trigger_callback(&submit);
1120}
1121
1122static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
1123{
1124 struct page **srcs = percpu->scribble;
1125 struct async_submit_ctl submit;
1126 int count;
1127
1128 pr_debug("%s: stripe %llu checkp: %d\n", __func__,
1129 (unsigned long long)sh->sector, checkp);
1130
1131 count = set_syndrome_sources(srcs, sh);
1132 if (!checkp)
1133 srcs[count] = NULL;
859 1134
860 atomic_inc(&sh->count); 1135 atomic_inc(&sh->count);
861 tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 1136 init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
862 ops_complete_check, sh); 1137 sh, to_addr_conv(sh, percpu));
1138 async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
1139 &sh->ops.zero_sum_result, percpu->spare_page, &submit);
863} 1140}
864 1141
865static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) 1142static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
866{ 1143{
867 int overlap_clear = 0, i, disks = sh->disks; 1144 int overlap_clear = 0, i, disks = sh->disks;
868 struct dma_async_tx_descriptor *tx = NULL; 1145 struct dma_async_tx_descriptor *tx = NULL;
1146 raid5_conf_t *conf = sh->raid_conf;
1147 int level = conf->level;
1148 struct raid5_percpu *percpu;
1149 unsigned long cpu;
869 1150
1151 cpu = get_cpu();
1152 percpu = per_cpu_ptr(conf->percpu, cpu);
870 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 1153 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
871 ops_run_biofill(sh); 1154 ops_run_biofill(sh);
872 overlap_clear++; 1155 overlap_clear++;
873 } 1156 }
874 1157
875 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 1158 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
876 tx = ops_run_compute5(sh); 1159 if (level < 6)
877 /* terminate the chain if postxor is not set to be run */ 1160 tx = ops_run_compute5(sh, percpu);
878 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1161 else {
1162 if (sh->ops.target2 < 0 || sh->ops.target < 0)
1163 tx = ops_run_compute6_1(sh, percpu);
1164 else
1165 tx = ops_run_compute6_2(sh, percpu);
1166 }
1167 /* terminate the chain if reconstruct is not set to be run */
1168 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
879 async_tx_ack(tx); 1169 async_tx_ack(tx);
880 } 1170 }
881 1171
882 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 1172 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
883 tx = ops_run_prexor(sh, tx); 1173 tx = ops_run_prexor(sh, percpu, tx);
884 1174
885 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 1175 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
886 tx = ops_run_biodrain(sh, tx); 1176 tx = ops_run_biodrain(sh, tx);
887 overlap_clear++; 1177 overlap_clear++;
888 } 1178 }
889 1179
890 if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1180 if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
891 ops_run_postxor(sh, tx); 1181 if (level < 6)
1182 ops_run_reconstruct5(sh, percpu, tx);
1183 else
1184 ops_run_reconstruct6(sh, percpu, tx);
1185 }
892 1186
893 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 1187 if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
894 ops_run_check(sh); 1188 if (sh->check_state == check_state_run)
1189 ops_run_check_p(sh, percpu);
1190 else if (sh->check_state == check_state_run_q)
1191 ops_run_check_pq(sh, percpu, 0);
1192 else if (sh->check_state == check_state_run_pq)
1193 ops_run_check_pq(sh, percpu, 1);
1194 else
1195 BUG();
1196 }
895 1197
896 if (overlap_clear) 1198 if (overlap_clear)
897 for (i = disks; i--; ) { 1199 for (i = disks; i--; ) {
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
899 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 1201 if (test_and_clear_bit(R5_Overlap, &dev->flags))
900 wake_up(&sh->raid_conf->wait_for_overlap); 1202 wake_up(&sh->raid_conf->wait_for_overlap);
901 } 1203 }
1204 put_cpu();
902} 1205}
903 1206
904static int grow_one_stripe(raid5_conf_t *conf) 1207static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
948 return 0; 1251 return 0;
949} 1252}
950 1253
1254/**
1255 * scribble_len - return the required size of the scribble region
1256 * @num - total number of disks in the array
1257 *
1258 * The size must be enough to contain:
1259 * 1/ a struct page pointer for each device in the array +2
1260 * 2/ room to convert each entry in (1) to its corresponding dma
1261 * (dma_map_page()) or page (page_address()) address.
1262 *
1263 * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
1264 * calculate over all devices (not just the data blocks), using zeros in place
1265 * of the P and Q blocks.
1266 */
1267static size_t scribble_len(int num)
1268{
1269 size_t len;
1270
1271 len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
1272
1273 return len;
1274}
1275
951static int resize_stripes(raid5_conf_t *conf, int newsize) 1276static int resize_stripes(raid5_conf_t *conf, int newsize)
952{ 1277{
953 /* Make all the stripes able to hold 'newsize' devices. 1278 /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
976 struct stripe_head *osh, *nsh; 1301 struct stripe_head *osh, *nsh;
977 LIST_HEAD(newstripes); 1302 LIST_HEAD(newstripes);
978 struct disk_info *ndisks; 1303 struct disk_info *ndisks;
1304 unsigned long cpu;
979 int err; 1305 int err;
980 struct kmem_cache *sc; 1306 struct kmem_cache *sc;
981 int i; 1307 int i;
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1041 /* Step 3. 1367 /* Step 3.
1042 * At this point, we are holding all the stripes so the array 1368 * At this point, we are holding all the stripes so the array
1043 * is completely stalled, so now is a good time to resize 1369 * is completely stalled, so now is a good time to resize
1044 * conf->disks. 1370 * conf->disks and the scribble region
1045 */ 1371 */
1046 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); 1372 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
1047 if (ndisks) { 1373 if (ndisks) {
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1052 } else 1378 } else
1053 err = -ENOMEM; 1379 err = -ENOMEM;
1054 1380
1381 get_online_cpus();
1382 conf->scribble_len = scribble_len(newsize);
1383 for_each_present_cpu(cpu) {
1384 struct raid5_percpu *percpu;
1385 void *scribble;
1386
1387 percpu = per_cpu_ptr(conf->percpu, cpu);
1388 scribble = kmalloc(conf->scribble_len, GFP_NOIO);
1389
1390 if (scribble) {
1391 kfree(percpu->scribble);
1392 percpu->scribble = scribble;
1393 } else {
1394 err = -ENOMEM;
1395 break;
1396 }
1397 }
1398 put_online_cpus();
1399
1055 /* Step 4, return new stripes to service */ 1400 /* Step 4, return new stripes to service */
1056 while(!list_empty(&newstripes)) { 1401 while(!list_empty(&newstripes)) {
1057 nsh = list_entry(newstripes.next, struct stripe_head, lru); 1402 nsh = list_entry(newstripes.next, struct stripe_head, lru);
1058 list_del_init(&nsh->lru); 1403 list_del_init(&nsh->lru);
1404
1059 for (i=conf->raid_disks; i < newsize; i++) 1405 for (i=conf->raid_disks; i < newsize; i++)
1060 if (nsh->dev[i].page == NULL) { 1406 if (nsh->dev[i].page == NULL) {
1061 struct page *p = alloc_page(GFP_NOIO); 1407 struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1594} 1940}
1595 1941
1596 1942
1597
1598/*
1599 * Copy data between a page in the stripe cache, and one or more bion
1600 * The page could align with the middle of the bio, or there could be
1601 * several bion, each with several bio_vecs, which cover part of the page
1602 * Multiple bion are linked together on bi_next. There may be extras
1603 * at the end of this list. We ignore them.
1604 */
1605static void copy_data(int frombio, struct bio *bio,
1606 struct page *page,
1607 sector_t sector)
1608{
1609 char *pa = page_address(page);
1610 struct bio_vec *bvl;
1611 int i;
1612 int page_offset;
1613
1614 if (bio->bi_sector >= sector)
1615 page_offset = (signed)(bio->bi_sector - sector) * 512;
1616 else
1617 page_offset = (signed)(sector - bio->bi_sector) * -512;
1618 bio_for_each_segment(bvl, bio, i) {
1619 int len = bio_iovec_idx(bio,i)->bv_len;
1620 int clen;
1621 int b_offset = 0;
1622
1623 if (page_offset < 0) {
1624 b_offset = -page_offset;
1625 page_offset += b_offset;
1626 len -= b_offset;
1627 }
1628
1629 if (len > 0 && page_offset + len > STRIPE_SIZE)
1630 clen = STRIPE_SIZE - page_offset;
1631 else clen = len;
1632
1633 if (clen > 0) {
1634 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
1635 if (frombio)
1636 memcpy(pa+page_offset, ba+b_offset, clen);
1637 else
1638 memcpy(ba+b_offset, pa+page_offset, clen);
1639 __bio_kunmap_atomic(ba, KM_USER0);
1640 }
1641 if (clen < len) /* hit end of page */
1642 break;
1643 page_offset += len;
1644 }
1645}
1646
1647#define check_xor() do { \
1648 if (count == MAX_XOR_BLOCKS) { \
1649 xor_blocks(count, STRIPE_SIZE, dest, ptr);\
1650 count = 0; \
1651 } \
1652 } while(0)
1653
1654static void compute_parity6(struct stripe_head *sh, int method)
1655{
1656 raid5_conf_t *conf = sh->raid_conf;
1657 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1658 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1659 struct bio *chosen;
1660 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1661 void *ptrs[syndrome_disks+2];
1662
1663 pd_idx = sh->pd_idx;
1664 qd_idx = sh->qd_idx;
1665 d0_idx = raid6_d0(sh);
1666
1667 pr_debug("compute_parity, stripe %llu, method %d\n",
1668 (unsigned long long)sh->sector, method);
1669
1670 switch(method) {
1671 case READ_MODIFY_WRITE:
1672 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
1673 case RECONSTRUCT_WRITE:
1674 for (i= disks; i-- ;)
1675 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
1676 chosen = sh->dev[i].towrite;
1677 sh->dev[i].towrite = NULL;
1678
1679 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1680 wake_up(&conf->wait_for_overlap);
1681
1682 BUG_ON(sh->dev[i].written);
1683 sh->dev[i].written = chosen;
1684 }
1685 break;
1686 case CHECK_PARITY:
1687 BUG(); /* Not implemented yet */
1688 }
1689
1690 for (i = disks; i--;)
1691 if (sh->dev[i].written) {
1692 sector_t sector = sh->dev[i].sector;
1693 struct bio *wbi = sh->dev[i].written;
1694 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1695 copy_data(1, wbi, sh->dev[i].page, sector);
1696 wbi = r5_next_bio(wbi, sector);
1697 }
1698
1699 set_bit(R5_LOCKED, &sh->dev[i].flags);
1700 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1701 }
1702
1703 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1704
1705 for (i = 0; i < disks; i++)
1706 ptrs[i] = (void *)raid6_empty_zero_page;
1707
1708 count = 0;
1709 i = d0_idx;
1710 do {
1711 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1712
1713 ptrs[slot] = page_address(sh->dev[i].page);
1714 if (slot < syndrome_disks &&
1715 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1716 printk(KERN_ERR "block %d/%d not uptodate "
1717 "on parity calc\n", i, count);
1718 BUG();
1719 }
1720
1721 i = raid6_next_disk(i, disks);
1722 } while (i != d0_idx);
1723 BUG_ON(count != syndrome_disks);
1724
1725 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1726
1727 switch(method) {
1728 case RECONSTRUCT_WRITE:
1729 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1730 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1731 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1732 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
1733 break;
1734 case UPDATE_PARITY:
1735 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1736 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1737 break;
1738 }
1739}
1740
1741
1742/* Compute one missing block */
1743static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1744{
1745 int i, count, disks = sh->disks;
1746 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1747 int qd_idx = sh->qd_idx;
1748
1749 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1750 (unsigned long long)sh->sector, dd_idx);
1751
1752 if ( dd_idx == qd_idx ) {
1753 /* We're actually computing the Q drive */
1754 compute_parity6(sh, UPDATE_PARITY);
1755 } else {
1756 dest = page_address(sh->dev[dd_idx].page);
1757 if (!nozero) memset(dest, 0, STRIPE_SIZE);
1758 count = 0;
1759 for (i = disks ; i--; ) {
1760 if (i == dd_idx || i == qd_idx)
1761 continue;
1762 p = page_address(sh->dev[i].page);
1763 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
1764 ptr[count++] = p;
1765 else
1766 printk("compute_block() %d, stripe %llu, %d"
1767 " not present\n", dd_idx,
1768 (unsigned long long)sh->sector, i);
1769
1770 check_xor();
1771 }
1772 if (count)
1773 xor_blocks(count, STRIPE_SIZE, dest, ptr);
1774 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1775 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1776 }
1777}
1778
1779/* Compute two missing blocks */
1780static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1781{
1782 int i, count, disks = sh->disks;
1783 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1784 int d0_idx = raid6_d0(sh);
1785 int faila = -1, failb = -1;
1786 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1787 void *ptrs[syndrome_disks+2];
1788
1789 for (i = 0; i < disks ; i++)
1790 ptrs[i] = (void *)raid6_empty_zero_page;
1791 count = 0;
1792 i = d0_idx;
1793 do {
1794 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1795
1796 ptrs[slot] = page_address(sh->dev[i].page);
1797
1798 if (i == dd_idx1)
1799 faila = slot;
1800 if (i == dd_idx2)
1801 failb = slot;
1802 i = raid6_next_disk(i, disks);
1803 } while (i != d0_idx);
1804 BUG_ON(count != syndrome_disks);
1805
1806 BUG_ON(faila == failb);
1807 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1808
1809 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1810 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1811 faila, failb);
1812
1813 if (failb == syndrome_disks+1) {
1814 /* Q disk is one of the missing disks */
1815 if (faila == syndrome_disks) {
1816 /* Missing P+Q, just recompute */
1817 compute_parity6(sh, UPDATE_PARITY);
1818 return;
1819 } else {
1820 /* We're missing D+Q; recompute D from P */
1821 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1822 dd_idx2 : dd_idx1),
1823 0);
1824 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1825 return;
1826 }
1827 }
1828
1829 /* We're missing D+P or D+D; */
1830 if (failb == syndrome_disks) {
1831 /* We're missing D+P. */
1832 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1833 } else {
1834 /* We're missing D+D. */
1835 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1836 ptrs);
1837 }
1838
1839 /* Both the above update both missing blocks */
1840 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1841 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1842}
1843
1844static void 1943static void
1845schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, 1944schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
1846 int rcw, int expand) 1945 int rcw, int expand)
1847{ 1946{
1848 int i, pd_idx = sh->pd_idx, disks = sh->disks; 1947 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1948 raid5_conf_t *conf = sh->raid_conf;
1949 int level = conf->level;
1849 1950
1850 if (rcw) { 1951 if (rcw) {
1851 /* if we are not expanding this is a proper write request, and 1952 /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1858 } else 1959 } else
1859 sh->reconstruct_state = reconstruct_state_run; 1960 sh->reconstruct_state = reconstruct_state_run;
1860 1961
1861 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1962 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1862 1963
1863 for (i = disks; i--; ) { 1964 for (i = disks; i--; ) {
1864 struct r5dev *dev = &sh->dev[i]; 1965 struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1871 s->locked++; 1972 s->locked++;
1872 } 1973 }
1873 } 1974 }
1874 if (s->locked + 1 == disks) 1975 if (s->locked + conf->max_degraded == disks)
1875 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 1976 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
1876 atomic_inc(&sh->raid_conf->pending_full_writes); 1977 atomic_inc(&conf->pending_full_writes);
1877 } else { 1978 } else {
1979 BUG_ON(level == 6);
1878 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1980 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1879 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1981 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1880 1982
1881 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 1983 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
1882 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 1984 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
1883 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 1985 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1884 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1986 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1885 1987
1886 for (i = disks; i--; ) { 1988 for (i = disks; i--; ) {
1887 struct r5dev *dev = &sh->dev[i]; 1989 struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1899 } 2001 }
1900 } 2002 }
1901 2003
1902 /* keep the parity disk locked while asynchronous operations 2004 /* keep the parity disk(s) locked while asynchronous operations
1903 * are in flight 2005 * are in flight
1904 */ 2006 */
1905 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 2007 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1906 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 2008 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1907 s->locked++; 2009 s->locked++;
1908 2010
2011 if (level == 6) {
2012 int qd_idx = sh->qd_idx;
2013 struct r5dev *dev = &sh->dev[qd_idx];
2014
2015 set_bit(R5_LOCKED, &dev->flags);
2016 clear_bit(R5_UPTODATE, &dev->flags);
2017 s->locked++;
2018 }
2019
1909 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 2020 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
1910 __func__, (unsigned long long)sh->sector, 2021 __func__, (unsigned long long)sh->sector,
1911 s->locked, s->ops_request); 2022 s->locked, s->ops_request);
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1986 2097
1987static void end_reshape(raid5_conf_t *conf); 2098static void end_reshape(raid5_conf_t *conf);
1988 2099
1989static int page_is_zero(struct page *p)
1990{
1991 char *a = page_address(p);
1992 return ((*(u32*)a) == 0 &&
1993 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1994}
1995
1996static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, 2100static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
1997 struct stripe_head *sh) 2101 struct stripe_head *sh)
1998{ 2102{
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
2132 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 2236 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2133 set_bit(R5_Wantcompute, &dev->flags); 2237 set_bit(R5_Wantcompute, &dev->flags);
2134 sh->ops.target = disk_idx; 2238 sh->ops.target = disk_idx;
2239 sh->ops.target2 = -1;
2135 s->req_compute = 1; 2240 s->req_compute = 1;
2136 /* Careful: from this point on 'uptodate' is in the eye 2241 /* Careful: from this point on 'uptodate' is in the eye
2137 * of raid5_run_ops which services 'compute' operations 2242 * of raid_run_ops which services 'compute' operations
2138 * before writes. R5_Wantcompute flags a block that will 2243 * before writes. R5_Wantcompute flags a block that will
2139 * be R5_UPTODATE by the time it is needed for a 2244 * be R5_UPTODATE by the time it is needed for a
2140 * subsequent operation. 2245 * subsequent operation.
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
2173 set_bit(STRIPE_HANDLE, &sh->state); 2278 set_bit(STRIPE_HANDLE, &sh->state);
2174} 2279}
2175 2280
2176static void handle_stripe_fill6(struct stripe_head *sh, 2281/* fetch_block6 - checks the given member device to see if its data needs
2177 struct stripe_head_state *s, struct r6_state *r6s, 2282 * to be read or computed to satisfy a request.
2178 int disks) 2283 *
2284 * Returns 1 when no more member devices need to be checked, otherwise returns
2285 * 0 to tell the loop in handle_stripe_fill6 to continue
2286 */
2287static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
2288 struct r6_state *r6s, int disk_idx, int disks)
2179{ 2289{
2180 int i; 2290 struct r5dev *dev = &sh->dev[disk_idx];
2181 for (i = disks; i--; ) { 2291 struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
2182 struct r5dev *dev = &sh->dev[i]; 2292 &sh->dev[r6s->failed_num[1]] };
2183 if (!test_bit(R5_LOCKED, &dev->flags) && 2293
2184 !test_bit(R5_UPTODATE, &dev->flags) && 2294 if (!test_bit(R5_LOCKED, &dev->flags) &&
2185 (dev->toread || (dev->towrite && 2295 !test_bit(R5_UPTODATE, &dev->flags) &&
2186 !test_bit(R5_OVERWRITE, &dev->flags)) || 2296 (dev->toread ||
2187 s->syncing || s->expanding || 2297 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2188 (s->failed >= 1 && 2298 s->syncing || s->expanding ||
2189 (sh->dev[r6s->failed_num[0]].toread || 2299 (s->failed >= 1 &&
2190 s->to_write)) || 2300 (fdev[0]->toread || s->to_write)) ||
2191 (s->failed >= 2 && 2301 (s->failed >= 2 &&
2192 (sh->dev[r6s->failed_num[1]].toread || 2302 (fdev[1]->toread || s->to_write)))) {
2193 s->to_write)))) { 2303 /* we would like to get this block, possibly by computing it,
2194 /* we would like to get this block, possibly 2304 * otherwise read it if the backing disk is insync
2195 * by computing it, but we might not be able to 2305 */
2306 BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
2307 BUG_ON(test_bit(R5_Wantread, &dev->flags));
2308 if ((s->uptodate == disks - 1) &&
2309 (s->failed && (disk_idx == r6s->failed_num[0] ||
2310 disk_idx == r6s->failed_num[1]))) {
2311 /* have disk failed, and we're requested to fetch it;
2312 * do compute it
2196 */ 2313 */
2197 if ((s->uptodate == disks - 1) && 2314 pr_debug("Computing stripe %llu block %d\n",
2198 (s->failed && (i == r6s->failed_num[0] || 2315 (unsigned long long)sh->sector, disk_idx);
2199 i == r6s->failed_num[1]))) { 2316 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2200 pr_debug("Computing stripe %llu block %d\n", 2317 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2201 (unsigned long long)sh->sector, i); 2318 set_bit(R5_Wantcompute, &dev->flags);
2202 compute_block_1(sh, i, 0); 2319 sh->ops.target = disk_idx;
2203 s->uptodate++; 2320 sh->ops.target2 = -1; /* no 2nd target */
2204 } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { 2321 s->req_compute = 1;
2205 /* Computing 2-failure is *very* expensive; only 2322 s->uptodate++;
2206 * do it if failed >= 2 2323 return 1;
2207 */ 2324 } else if (s->uptodate == disks-2 && s->failed >= 2) {
2208 int other; 2325 /* Computing 2-failure is *very* expensive; only
2209 for (other = disks; other--; ) { 2326 * do it if failed >= 2
2210 if (other == i) 2327 */
2211 continue; 2328 int other;
2212 if (!test_bit(R5_UPTODATE, 2329 for (other = disks; other--; ) {
2213 &sh->dev[other].flags)) 2330 if (other == disk_idx)
2214 break; 2331 continue;
2215 } 2332 if (!test_bit(R5_UPTODATE,
2216 BUG_ON(other < 0); 2333 &sh->dev[other].flags))
2217 pr_debug("Computing stripe %llu blocks %d,%d\n", 2334 break;
2218 (unsigned long long)sh->sector,
2219 i, other);
2220 compute_block_2(sh, i, other);
2221 s->uptodate += 2;
2222 } else if (test_bit(R5_Insync, &dev->flags)) {
2223 set_bit(R5_LOCKED, &dev->flags);
2224 set_bit(R5_Wantread, &dev->flags);
2225 s->locked++;
2226 pr_debug("Reading block %d (sync=%d)\n",
2227 i, s->syncing);
2228 } 2335 }
2336 BUG_ON(other < 0);
2337 pr_debug("Computing stripe %llu blocks %d,%d\n",
2338 (unsigned long long)sh->sector,
2339 disk_idx, other);
2340 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2341 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2342 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
2343 set_bit(R5_Wantcompute, &sh->dev[other].flags);
2344 sh->ops.target = disk_idx;
2345 sh->ops.target2 = other;
2346 s->uptodate += 2;
2347 s->req_compute = 1;
2348 return 1;
2349 } else if (test_bit(R5_Insync, &dev->flags)) {
2350 set_bit(R5_LOCKED, &dev->flags);
2351 set_bit(R5_Wantread, &dev->flags);
2352 s->locked++;
2353 pr_debug("Reading block %d (sync=%d)\n",
2354 disk_idx, s->syncing);
2229 } 2355 }
2230 } 2356 }
2357
2358 return 0;
2359}
2360
2361/**
2362 * handle_stripe_fill6 - read or compute data to satisfy pending requests.
2363 */
2364static void handle_stripe_fill6(struct stripe_head *sh,
2365 struct stripe_head_state *s, struct r6_state *r6s,
2366 int disks)
2367{
2368 int i;
2369
2370 /* look for blocks to read/compute, skip this if a compute
2371 * is already in flight, or if the stripe contents are in the
2372 * midst of changing due to a write
2373 */
2374 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
2375 !sh->reconstruct_state)
2376 for (i = disks; i--; )
2377 if (fetch_block6(sh, s, r6s, i, disks))
2378 break;
2231 set_bit(STRIPE_HANDLE, &sh->state); 2379 set_bit(STRIPE_HANDLE, &sh->state);
2232} 2380}
2233 2381
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
2361 */ 2509 */
2362 /* since handle_stripe can be called at any time we need to handle the 2510 /* since handle_stripe can be called at any time we need to handle the
2363 * case where a compute block operation has been submitted and then a 2511 * case where a compute block operation has been submitted and then a
2364 * subsequent call wants to start a write request. raid5_run_ops only 2512 * subsequent call wants to start a write request. raid_run_ops only
2365 * handles the case where compute block and postxor are requested 2513 * handles the case where compute block and reconstruct are requested
2366 * simultaneously. If this is not the case then new writes need to be 2514 * simultaneously. If this is not the case then new writes need to be
2367 * held off until the compute completes. 2515 * held off until the compute completes.
2368 */ 2516 */
2369 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 2517 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2370 (s->locked == 0 && (rcw == 0 || rmw == 0) && 2518 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2371 !test_bit(STRIPE_BIT_DELAY, &sh->state))) 2519 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2372 schedule_reconstruction5(sh, s, rcw == 0, 0); 2520 schedule_reconstruction(sh, s, rcw == 0, 0);
2373} 2521}
2374 2522
2375static void handle_stripe_dirtying6(raid5_conf_t *conf, 2523static void handle_stripe_dirtying6(raid5_conf_t *conf,
2376 struct stripe_head *sh, struct stripe_head_state *s, 2524 struct stripe_head *sh, struct stripe_head_state *s,
2377 struct r6_state *r6s, int disks) 2525 struct r6_state *r6s, int disks)
2378{ 2526{
2379 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2527 int rcw = 0, pd_idx = sh->pd_idx, i;
2380 int qd_idx = sh->qd_idx; 2528 int qd_idx = sh->qd_idx;
2529
2530 set_bit(STRIPE_HANDLE, &sh->state);
2381 for (i = disks; i--; ) { 2531 for (i = disks; i--; ) {
2382 struct r5dev *dev = &sh->dev[i]; 2532 struct r5dev *dev = &sh->dev[i];
2383 /* Would I have to read this buffer for reconstruct_write */ 2533 /* check if we haven't enough data */
2384 if (!test_bit(R5_OVERWRITE, &dev->flags) 2534 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2385 && i != pd_idx && i != qd_idx 2535 i != pd_idx && i != qd_idx &&
2386 && (!test_bit(R5_LOCKED, &dev->flags) 2536 !test_bit(R5_LOCKED, &dev->flags) &&
2387 ) && 2537 !(test_bit(R5_UPTODATE, &dev->flags) ||
2388 !test_bit(R5_UPTODATE, &dev->flags)) { 2538 test_bit(R5_Wantcompute, &dev->flags))) {
2389 if (test_bit(R5_Insync, &dev->flags)) rcw++; 2539 rcw++;
2390 else { 2540 if (!test_bit(R5_Insync, &dev->flags))
2391 pr_debug("raid6: must_compute: " 2541 continue; /* it's a failed drive */
2392 "disk %d flags=%#lx\n", i, dev->flags); 2542
2393 must_compute++; 2543 if (
2544 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2545 pr_debug("Read_old stripe %llu "
2546 "block %d for Reconstruct\n",
2547 (unsigned long long)sh->sector, i);
2548 set_bit(R5_LOCKED, &dev->flags);
2549 set_bit(R5_Wantread, &dev->flags);
2550 s->locked++;
2551 } else {
2552 pr_debug("Request delayed stripe %llu "
2553 "block %d for Reconstruct\n",
2554 (unsigned long long)sh->sector, i);
2555 set_bit(STRIPE_DELAYED, &sh->state);
2556 set_bit(STRIPE_HANDLE, &sh->state);
2394 } 2557 }
2395 } 2558 }
2396 } 2559 }
2397 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2398 (unsigned long long)sh->sector, rcw, must_compute);
2399 set_bit(STRIPE_HANDLE, &sh->state);
2400
2401 if (rcw > 0)
2402 /* want reconstruct write, but need to get some data */
2403 for (i = disks; i--; ) {
2404 struct r5dev *dev = &sh->dev[i];
2405 if (!test_bit(R5_OVERWRITE, &dev->flags)
2406 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2407 && !test_bit(R5_LOCKED, &dev->flags) &&
2408 !test_bit(R5_UPTODATE, &dev->flags) &&
2409 test_bit(R5_Insync, &dev->flags)) {
2410 if (
2411 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2412 pr_debug("Read_old stripe %llu "
2413 "block %d for Reconstruct\n",
2414 (unsigned long long)sh->sector, i);
2415 set_bit(R5_LOCKED, &dev->flags);
2416 set_bit(R5_Wantread, &dev->flags);
2417 s->locked++;
2418 } else {
2419 pr_debug("Request delayed stripe %llu "
2420 "block %d for Reconstruct\n",
2421 (unsigned long long)sh->sector, i);
2422 set_bit(STRIPE_DELAYED, &sh->state);
2423 set_bit(STRIPE_HANDLE, &sh->state);
2424 }
2425 }
2426 }
2427 /* now if nothing is locked, and if we have enough data, we can start a 2560 /* now if nothing is locked, and if we have enough data, we can start a
2428 * write request 2561 * write request
2429 */ 2562 */
2430 if (s->locked == 0 && rcw == 0 && 2563 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2564 s->locked == 0 && rcw == 0 &&
2431 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2565 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2432 if (must_compute > 0) { 2566 schedule_reconstruction(sh, s, 1, 0);
2433 /* We have failed blocks and need to compute them */
2434 switch (s->failed) {
2435 case 0:
2436 BUG();
2437 case 1:
2438 compute_block_1(sh, r6s->failed_num[0], 0);
2439 break;
2440 case 2:
2441 compute_block_2(sh, r6s->failed_num[0],
2442 r6s->failed_num[1]);
2443 break;
2444 default: /* This request should have been failed? */
2445 BUG();
2446 }
2447 }
2448
2449 pr_debug("Computing parity for stripe %llu\n",
2450 (unsigned long long)sh->sector);
2451 compute_parity6(sh, RECONSTRUCT_WRITE);
2452 /* now every locked buffer is ready to be written */
2453 for (i = disks; i--; )
2454 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2455 pr_debug("Writing stripe %llu block %d\n",
2456 (unsigned long long)sh->sector, i);
2457 s->locked++;
2458 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2459 }
2460 if (s->locked == disks)
2461 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2462 atomic_inc(&conf->pending_full_writes);
2463 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2464 set_bit(STRIPE_INSYNC, &sh->state);
2465
2466 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2467 atomic_dec(&conf->preread_active_stripes);
2468 if (atomic_read(&conf->preread_active_stripes) <
2469 IO_THRESHOLD)
2470 md_wakeup_thread(conf->mddev->thread);
2471 }
2472 } 2567 }
2473} 2568}
2474 2569
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2527 * we are done. Otherwise update the mismatch count and repair 2622 * we are done. Otherwise update the mismatch count and repair
2528 * parity if !MD_RECOVERY_CHECK 2623 * parity if !MD_RECOVERY_CHECK
2529 */ 2624 */
2530 if (sh->ops.zero_sum_result == 0) 2625 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
2531 /* parity is correct (on disc, 2626 /* parity is correct (on disc,
2532 * not in buffer any more) 2627 * not in buffer any more)
2533 */ 2628 */
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2544 set_bit(R5_Wantcompute, 2639 set_bit(R5_Wantcompute,
2545 &sh->dev[sh->pd_idx].flags); 2640 &sh->dev[sh->pd_idx].flags);
2546 sh->ops.target = sh->pd_idx; 2641 sh->ops.target = sh->pd_idx;
2642 sh->ops.target2 = -1;
2547 s->uptodate++; 2643 s->uptodate++;
2548 } 2644 }
2549 } 2645 }
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2560 2656
2561 2657
2562static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, 2658static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2563 struct stripe_head_state *s, 2659 struct stripe_head_state *s,
2564 struct r6_state *r6s, struct page *tmp_page, 2660 struct r6_state *r6s, int disks)
2565 int disks)
2566{ 2661{
2567 int update_p = 0, update_q = 0;
2568 struct r5dev *dev;
2569 int pd_idx = sh->pd_idx; 2662 int pd_idx = sh->pd_idx;
2570 int qd_idx = sh->qd_idx; 2663 int qd_idx = sh->qd_idx;
2664 struct r5dev *dev;
2571 2665
2572 set_bit(STRIPE_HANDLE, &sh->state); 2666 set_bit(STRIPE_HANDLE, &sh->state);
2573 2667
2574 BUG_ON(s->failed > 2); 2668 BUG_ON(s->failed > 2);
2575 BUG_ON(s->uptodate < disks); 2669
2576 /* Want to check and possibly repair P and Q. 2670 /* Want to check and possibly repair P and Q.
2577 * However there could be one 'failed' device, in which 2671 * However there could be one 'failed' device, in which
2578 * case we can only check one of them, possibly using the 2672 * case we can only check one of them, possibly using the
2579 * other to generate missing data 2673 * other to generate missing data
2580 */ 2674 */
2581 2675
2582 /* If !tmp_page, we cannot do the calculations, 2676 switch (sh->check_state) {
2583 * but as we have set STRIPE_HANDLE, we will soon be called 2677 case check_state_idle:
2584 * by stripe_handle with a tmp_page - just wait until then. 2678 /* start a new check operation if there are < 2 failures */
2585 */
2586 if (tmp_page) {
2587 if (s->failed == r6s->q_failed) { 2679 if (s->failed == r6s->q_failed) {
2588 /* The only possible failed device holds 'Q', so it 2680 /* The only possible failed device holds Q, so it
2589 * makes sense to check P (If anything else were failed, 2681 * makes sense to check P (If anything else were failed,
2590 * we would have used P to recreate it). 2682 * we would have used P to recreate it).
2591 */ 2683 */
2592 compute_block_1(sh, pd_idx, 1); 2684 sh->check_state = check_state_run;
2593 if (!page_is_zero(sh->dev[pd_idx].page)) {
2594 compute_block_1(sh, pd_idx, 0);
2595 update_p = 1;
2596 }
2597 } 2685 }
2598 if (!r6s->q_failed && s->failed < 2) { 2686 if (!r6s->q_failed && s->failed < 2) {
2599 /* q is not failed, and we didn't use it to generate 2687 /* Q is not failed, and we didn't use it to generate
2600 * anything, so it makes sense to check it 2688 * anything, so it makes sense to check it
2601 */ 2689 */
2602 memcpy(page_address(tmp_page), 2690 if (sh->check_state == check_state_run)
2603 page_address(sh->dev[qd_idx].page), 2691 sh->check_state = check_state_run_pq;
2604 STRIPE_SIZE); 2692 else
2605 compute_parity6(sh, UPDATE_PARITY); 2693 sh->check_state = check_state_run_q;
2606 if (memcmp(page_address(tmp_page),
2607 page_address(sh->dev[qd_idx].page),
2608 STRIPE_SIZE) != 0) {
2609 clear_bit(STRIPE_INSYNC, &sh->state);
2610 update_q = 1;
2611 }
2612 } 2694 }
2613 if (update_p || update_q) { 2695
2614 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2696 /* discard potentially stale zero_sum_result */
2615 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2697 sh->ops.zero_sum_result = 0;
2616 /* don't try to repair!! */ 2698
2617 update_p = update_q = 0; 2699 if (sh->check_state == check_state_run) {
2700 /* async_xor_zero_sum destroys the contents of P */
2701 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2702 s->uptodate--;
2703 }
2704 if (sh->check_state >= check_state_run &&
2705 sh->check_state <= check_state_run_pq) {
2706 /* async_syndrome_zero_sum preserves P and Q, so
2707 * no need to mark them !uptodate here
2708 */
2709 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2710 break;
2618 } 2711 }
2619 2712
2713 /* we have 2-disk failure */
2714 BUG_ON(s->failed != 2);
2715 /* fall through */
2716 case check_state_compute_result:
2717 sh->check_state = check_state_idle;
2718
2719 /* check that a write has not made the stripe insync */
2720 if (test_bit(STRIPE_INSYNC, &sh->state))
2721 break;
2722
2620 /* now write out any block on a failed drive, 2723 /* now write out any block on a failed drive,
2621 * or P or Q if they need it 2724 * or P or Q if they were recomputed
2622 */ 2725 */
2623 2726 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2624 if (s->failed == 2) { 2727 if (s->failed == 2) {
2625 dev = &sh->dev[r6s->failed_num[1]]; 2728 dev = &sh->dev[r6s->failed_num[1]];
2626 s->locked++; 2729 s->locked++;
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2633 set_bit(R5_LOCKED, &dev->flags); 2736 set_bit(R5_LOCKED, &dev->flags);
2634 set_bit(R5_Wantwrite, &dev->flags); 2737 set_bit(R5_Wantwrite, &dev->flags);
2635 } 2738 }
2636 2739 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2637 if (update_p) {
2638 dev = &sh->dev[pd_idx]; 2740 dev = &sh->dev[pd_idx];
2639 s->locked++; 2741 s->locked++;
2640 set_bit(R5_LOCKED, &dev->flags); 2742 set_bit(R5_LOCKED, &dev->flags);
2641 set_bit(R5_Wantwrite, &dev->flags); 2743 set_bit(R5_Wantwrite, &dev->flags);
2642 } 2744 }
2643 if (update_q) { 2745 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2644 dev = &sh->dev[qd_idx]; 2746 dev = &sh->dev[qd_idx];
2645 s->locked++; 2747 s->locked++;
2646 set_bit(R5_LOCKED, &dev->flags); 2748 set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2649 clear_bit(STRIPE_DEGRADED, &sh->state); 2751 clear_bit(STRIPE_DEGRADED, &sh->state);
2650 2752
2651 set_bit(STRIPE_INSYNC, &sh->state); 2753 set_bit(STRIPE_INSYNC, &sh->state);
2754 break;
2755 case check_state_run:
2756 case check_state_run_q:
2757 case check_state_run_pq:
2758 break; /* we will be called again upon completion */
2759 case check_state_check_result:
2760 sh->check_state = check_state_idle;
2761
2762 /* handle a successful check operation, if parity is correct
2763 * we are done. Otherwise update the mismatch count and repair
2764 * parity if !MD_RECOVERY_CHECK
2765 */
2766 if (sh->ops.zero_sum_result == 0) {
2767 /* both parities are correct */
2768 if (!s->failed)
2769 set_bit(STRIPE_INSYNC, &sh->state);
2770 else {
2771 /* in contrast to the raid5 case we can validate
2772 * parity, but still have a failure to write
2773 * back
2774 */
2775 sh->check_state = check_state_compute_result;
2776 /* Returning at this point means that we may go
2777 * off and bring p and/or q uptodate again so
2778 * we make sure to check zero_sum_result again
2779 * to verify if p or q need writeback
2780 */
2781 }
2782 } else {
2783 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2784 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2785 /* don't try to repair!! */
2786 set_bit(STRIPE_INSYNC, &sh->state);
2787 else {
2788 int *target = &sh->ops.target;
2789
2790 sh->ops.target = -1;
2791 sh->ops.target2 = -1;
2792 sh->check_state = check_state_compute_run;
2793 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2794 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2795 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2796 set_bit(R5_Wantcompute,
2797 &sh->dev[pd_idx].flags);
2798 *target = pd_idx;
2799 target = &sh->ops.target2;
2800 s->uptodate++;
2801 }
2802 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2803 set_bit(R5_Wantcompute,
2804 &sh->dev[qd_idx].flags);
2805 *target = qd_idx;
2806 s->uptodate++;
2807 }
2808 }
2809 }
2810 break;
2811 case check_state_compute_run:
2812 break;
2813 default:
2814 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
2815 __func__, sh->check_state,
2816 (unsigned long long) sh->sector);
2817 BUG();
2652 } 2818 }
2653} 2819}
2654 2820
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2666 if (i != sh->pd_idx && i != sh->qd_idx) { 2832 if (i != sh->pd_idx && i != sh->qd_idx) {
2667 int dd_idx, j; 2833 int dd_idx, j;
2668 struct stripe_head *sh2; 2834 struct stripe_head *sh2;
2835 struct async_submit_ctl submit;
2669 2836
2670 sector_t bn = compute_blocknr(sh, i, 1); 2837 sector_t bn = compute_blocknr(sh, i, 1);
2671 sector_t s = raid5_compute_sector(conf, bn, 0, 2838 sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2685 } 2852 }
2686 2853
2687 /* place all the copies on one channel */ 2854 /* place all the copies on one channel */
2855 init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
2688 tx = async_memcpy(sh2->dev[dd_idx].page, 2856 tx = async_memcpy(sh2->dev[dd_idx].page,
2689 sh->dev[i].page, 0, 0, STRIPE_SIZE, 2857 sh->dev[i].page, 0, 0, STRIPE_SIZE,
2690 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 2858 &submit);
2691 2859
2692 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2860 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
2693 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2861 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2974,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2974 /* Need to write out all blocks after computing parity */ 3142 /* Need to write out all blocks after computing parity */
2975 sh->disks = conf->raid_disks; 3143 sh->disks = conf->raid_disks;
2976 stripe_set_idx(sh->sector, conf, 0, sh); 3144 stripe_set_idx(sh->sector, conf, 0, sh);
2977 schedule_reconstruction5(sh, &s, 1, 1); 3145 schedule_reconstruction(sh, &s, 1, 1);
2978 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 3146 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2979 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3147 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2980 atomic_dec(&conf->reshape_stripes); 3148 atomic_dec(&conf->reshape_stripes);
@@ -2994,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2994 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3162 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
2995 3163
2996 if (s.ops_request) 3164 if (s.ops_request)
2997 raid5_run_ops(sh, s.ops_request); 3165 raid_run_ops(sh, s.ops_request);
2998 3166
2999 ops_run_io(sh, &s); 3167 ops_run_io(sh, &s);
3000 3168
@@ -3003,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh)
3003 return blocked_rdev == NULL; 3171 return blocked_rdev == NULL;
3004} 3172}
3005 3173
3006static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 3174static bool handle_stripe6(struct stripe_head *sh)
3007{ 3175{
3008 raid5_conf_t *conf = sh->raid_conf; 3176 raid5_conf_t *conf = sh->raid_conf;
3009 int disks = sh->disks; 3177 int disks = sh->disks;
@@ -3015,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3015 mdk_rdev_t *blocked_rdev = NULL; 3183 mdk_rdev_t *blocked_rdev = NULL;
3016 3184
3017 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3185 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3018 "pd_idx=%d, qd_idx=%d\n", 3186 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
3019 (unsigned long long)sh->sector, sh->state, 3187 (unsigned long long)sh->sector, sh->state,
3020 atomic_read(&sh->count), pd_idx, qd_idx); 3188 atomic_read(&sh->count), pd_idx, qd_idx,
3189 sh->check_state, sh->reconstruct_state);
3021 memset(&s, 0, sizeof(s)); 3190 memset(&s, 0, sizeof(s));
3022 3191
3023 spin_lock(&sh->lock); 3192 spin_lock(&sh->lock);
@@ -3037,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3037 3206
3038 pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 3207 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
3039 i, dev->flags, dev->toread, dev->towrite, dev->written); 3208 i, dev->flags, dev->toread, dev->towrite, dev->written);
3040 /* maybe we can reply to a read */ 3209 /* maybe we can reply to a read
3041 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 3210 *
3042 struct bio *rbi, *rbi2; 3211 * new wantfill requests are only permitted while
3043 pr_debug("Return read for disc %d\n", i); 3212 * ops_complete_biofill is guaranteed to be inactive
3044 spin_lock_irq(&conf->device_lock); 3213 */
3045 rbi = dev->toread; 3214 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
3046 dev->toread = NULL; 3215 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
3047 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 3216 set_bit(R5_Wantfill, &dev->flags);
3048 wake_up(&conf->wait_for_overlap);
3049 spin_unlock_irq(&conf->device_lock);
3050 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
3051 copy_data(0, rbi, dev->page, dev->sector);
3052 rbi2 = r5_next_bio(rbi, dev->sector);
3053 spin_lock_irq(&conf->device_lock);
3054 if (!raid5_dec_bi_phys_segments(rbi)) {
3055 rbi->bi_next = return_bi;
3056 return_bi = rbi;
3057 }
3058 spin_unlock_irq(&conf->device_lock);
3059 rbi = rbi2;
3060 }
3061 }
3062 3217
3063 /* now count some things */ 3218 /* now count some things */
3064 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 3219 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
3065 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 3220 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
3221 if (test_bit(R5_Wantcompute, &dev->flags)) {
3222 s.compute++;
3223 BUG_ON(s.compute > 2);
3224 }
3066 3225
3067 3226 if (test_bit(R5_Wantfill, &dev->flags)) {
3068 if (dev->toread) 3227 s.to_fill++;
3228 } else if (dev->toread)
3069 s.to_read++; 3229 s.to_read++;
3070 if (dev->towrite) { 3230 if (dev->towrite) {
3071 s.to_write++; 3231 s.to_write++;
@@ -3106,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3106 blocked_rdev = NULL; 3266 blocked_rdev = NULL;
3107 } 3267 }
3108 3268
3269 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
3270 set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
3271 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
3272 }
3273
3109 pr_debug("locked=%d uptodate=%d to_read=%d" 3274 pr_debug("locked=%d uptodate=%d to_read=%d"
3110 " to_write=%d failed=%d failed_num=%d,%d\n", 3275 " to_write=%d failed=%d failed_num=%d,%d\n",
3111 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 3276 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3146,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3146 * or to load a block that is being partially written. 3311 * or to load a block that is being partially written.
3147 */ 3312 */
3148 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 3313 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
3149 (s.syncing && (s.uptodate < disks)) || s.expanding) 3314 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
3150 handle_stripe_fill6(sh, &s, &r6s, disks); 3315 handle_stripe_fill6(sh, &s, &r6s, disks);
3151 3316
3152 /* now to consider writing and what else, if anything should be read */ 3317 /* Now we check to see if any write operations have recently
3153 if (s.to_write) 3318 * completed
3319 */
3320 if (sh->reconstruct_state == reconstruct_state_drain_result) {
3321 int qd_idx = sh->qd_idx;
3322
3323 sh->reconstruct_state = reconstruct_state_idle;
3324 /* All the 'written' buffers and the parity blocks are ready to
3325 * be written back to disk
3326 */
3327 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
3328 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
3329 for (i = disks; i--; ) {
3330 dev = &sh->dev[i];
3331 if (test_bit(R5_LOCKED, &dev->flags) &&
3332 (i == sh->pd_idx || i == qd_idx ||
3333 dev->written)) {
3334 pr_debug("Writing block %d\n", i);
3335 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
3336 set_bit(R5_Wantwrite, &dev->flags);
3337 if (!test_bit(R5_Insync, &dev->flags) ||
3338 ((i == sh->pd_idx || i == qd_idx) &&
3339 s.failed == 0))
3340 set_bit(STRIPE_INSYNC, &sh->state);
3341 }
3342 }
3343 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3344 atomic_dec(&conf->preread_active_stripes);
3345 if (atomic_read(&conf->preread_active_stripes) <
3346 IO_THRESHOLD)
3347 md_wakeup_thread(conf->mddev->thread);
3348 }
3349 }
3350
3351 /* Now to consider new write requests and what else, if anything
3352 * should be read. We do not handle new writes when:
3353 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
3354 * 2/ A 'check' operation is in flight, as it may clobber the parity
3355 * block.
3356 */
3357 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3154 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); 3358 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3155 3359
3156 /* maybe we need to check and possibly fix the parity for this stripe 3360 /* maybe we need to check and possibly fix the parity for this stripe
3157 * Any reads will already have been scheduled, so we just see if enough 3361 * Any reads will already have been scheduled, so we just see if enough
3158 * data is available 3362 * data is available. The parity check is held off while parity
3363 * dependent operations are in flight.
3159 */ 3364 */
3160 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 3365 if (sh->check_state ||
3161 handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); 3366 (s.syncing && s.locked == 0 &&
3367 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3368 !test_bit(STRIPE_INSYNC, &sh->state)))
3369 handle_parity_checks6(conf, sh, &s, &r6s, disks);
3162 3370
3163 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3371 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
3164 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3372 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3179,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3179 set_bit(R5_Wantwrite, &dev->flags); 3387 set_bit(R5_Wantwrite, &dev->flags);
3180 set_bit(R5_ReWrite, &dev->flags); 3388 set_bit(R5_ReWrite, &dev->flags);
3181 set_bit(R5_LOCKED, &dev->flags); 3389 set_bit(R5_LOCKED, &dev->flags);
3390 s.locked++;
3182 } else { 3391 } else {
3183 /* let's read it back */ 3392 /* let's read it back */
3184 set_bit(R5_Wantread, &dev->flags); 3393 set_bit(R5_Wantread, &dev->flags);
3185 set_bit(R5_LOCKED, &dev->flags); 3394 set_bit(R5_LOCKED, &dev->flags);
3395 s.locked++;
3186 } 3396 }
3187 } 3397 }
3188 } 3398 }
3189 3399
3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3400 /* Finish reconstruct operations initiated by the expansion process */
3401 if (sh->reconstruct_state == reconstruct_state_result) {
3402 sh->reconstruct_state = reconstruct_state_idle;
3403 clear_bit(STRIPE_EXPANDING, &sh->state);
3404 for (i = conf->raid_disks; i--; ) {
3405 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3406 set_bit(R5_LOCKED, &sh->dev[i].flags);
3407 s.locked++;
3408 }
3409 }
3410
3411 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
3412 !sh->reconstruct_state) {
3191 struct stripe_head *sh2 3413 struct stripe_head *sh2
3192 = get_active_stripe(conf, sh->sector, 1, 1, 1); 3414 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3415 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3208,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3208 /* Need to write out all blocks after computing P&Q */ 3430 /* Need to write out all blocks after computing P&Q */
3209 sh->disks = conf->raid_disks; 3431 sh->disks = conf->raid_disks;
3210 stripe_set_idx(sh->sector, conf, 0, sh); 3432 stripe_set_idx(sh->sector, conf, 0, sh);
3211 compute_parity6(sh, RECONSTRUCT_WRITE); 3433 schedule_reconstruction(sh, &s, 1, 1);
3212 for (i = conf->raid_disks ; i-- ; ) { 3434 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
3213 set_bit(R5_LOCKED, &sh->dev[i].flags);
3214 s.locked++;
3215 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3216 }
3217 clear_bit(STRIPE_EXPANDING, &sh->state);
3218 } else if (s.expanded) {
3219 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3435 clear_bit(STRIPE_EXPAND_READY, &sh->state);
3220 atomic_dec(&conf->reshape_stripes); 3436 atomic_dec(&conf->reshape_stripes);
3221 wake_up(&conf->wait_for_overlap); 3437 wake_up(&conf->wait_for_overlap);
@@ -3233,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3233 if (unlikely(blocked_rdev)) 3449 if (unlikely(blocked_rdev))
3234 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3450 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
3235 3451
3452 if (s.ops_request)
3453 raid_run_ops(sh, s.ops_request);
3454
3236 ops_run_io(sh, &s); 3455 ops_run_io(sh, &s);
3237 3456
3238 return_io(return_bi); 3457 return_io(return_bi);
@@ -3241,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3241} 3460}
3242 3461
3243/* returns true if the stripe was handled */ 3462/* returns true if the stripe was handled */
3244static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) 3463static bool handle_stripe(struct stripe_head *sh)
3245{ 3464{
3246 if (sh->raid_conf->level == 6) 3465 if (sh->raid_conf->level == 6)
3247 return handle_stripe6(sh, tmp_page); 3466 return handle_stripe6(sh);
3248 else 3467 else
3249 return handle_stripe5(sh); 3468 return handle_stripe5(sh);
3250} 3469}
3251 3470
3252
3253
3254static void raid5_activate_delayed(raid5_conf_t *conf) 3471static void raid5_activate_delayed(raid5_conf_t *conf)
3255{ 3472{
3256 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 3473 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -4061,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4061 spin_unlock(&sh->lock); 4278 spin_unlock(&sh->lock);
4062 4279
4063 /* wait for any blocked device to be handled */ 4280 /* wait for any blocked device to be handled */
4064 while(unlikely(!handle_stripe(sh, NULL))) 4281 while (unlikely(!handle_stripe(sh)))
4065 ; 4282 ;
4066 release_stripe(sh); 4283 release_stripe(sh);
4067 4284
@@ -4118,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4118 return handled; 4335 return handled;
4119 } 4336 }
4120 4337
4121 handle_stripe(sh, NULL); 4338 handle_stripe(sh);
4122 release_stripe(sh); 4339 release_stripe(sh);
4123 handled++; 4340 handled++;
4124 } 4341 }
@@ -4132,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4132 return handled; 4349 return handled;
4133} 4350}
4134 4351
4352#ifdef CONFIG_MULTICORE_RAID456
4353static void __process_stripe(void *param, async_cookie_t cookie)
4354{
4355 struct stripe_head *sh = param;
4356
4357 handle_stripe(sh);
4358 release_stripe(sh);
4359}
4360
4361static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4362{
4363 async_schedule_domain(__process_stripe, sh, domain);
4364}
4365
4366static void synchronize_stripe_processing(struct list_head *domain)
4367{
4368 async_synchronize_full_domain(domain);
4369}
4370#else
4371static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4372{
4373 handle_stripe(sh);
4374 release_stripe(sh);
4375 cond_resched();
4376}
4377
4378static void synchronize_stripe_processing(struct list_head *domain)
4379{
4380}
4381#endif
4135 4382
4136 4383
4137/* 4384/*
@@ -4146,6 +4393,7 @@ static void raid5d(mddev_t *mddev)
4146 struct stripe_head *sh; 4393 struct stripe_head *sh;
4147 raid5_conf_t *conf = mddev->private; 4394 raid5_conf_t *conf = mddev->private;
4148 int handled; 4395 int handled;
4396 LIST_HEAD(raid_domain);
4149 4397
4150 pr_debug("+++ raid5d active\n"); 4398 pr_debug("+++ raid5d active\n");
4151 4399
@@ -4182,8 +4430,7 @@ static void raid5d(mddev_t *mddev)
4182 spin_unlock_irq(&conf->device_lock); 4430 spin_unlock_irq(&conf->device_lock);
4183 4431
4184 handled++; 4432 handled++;
4185 handle_stripe(sh, conf->spare_page); 4433 process_stripe(sh, &raid_domain);
4186 release_stripe(sh);
4187 4434
4188 spin_lock_irq(&conf->device_lock); 4435 spin_lock_irq(&conf->device_lock);
4189 } 4436 }
@@ -4191,6 +4438,7 @@ static void raid5d(mddev_t *mddev)
4191 4438
4192 spin_unlock_irq(&conf->device_lock); 4439 spin_unlock_irq(&conf->device_lock);
4193 4440
4441 synchronize_stripe_processing(&raid_domain);
4194 async_tx_issue_pending_all(); 4442 async_tx_issue_pending_all();
4195 unplug_slaves(mddev); 4443 unplug_slaves(mddev);
4196 4444
@@ -4323,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4323 return sectors * (raid_disks - conf->max_degraded); 4571 return sectors * (raid_disks - conf->max_degraded);
4324} 4572}
4325 4573
4574static void raid5_free_percpu(raid5_conf_t *conf)
4575{
4576 struct raid5_percpu *percpu;
4577 unsigned long cpu;
4578
4579 if (!conf->percpu)
4580 return;
4581
4582 get_online_cpus();
4583 for_each_possible_cpu(cpu) {
4584 percpu = per_cpu_ptr(conf->percpu, cpu);
4585 safe_put_page(percpu->spare_page);
4586 kfree(percpu->scribble);
4587 }
4588#ifdef CONFIG_HOTPLUG_CPU
4589 unregister_cpu_notifier(&conf->cpu_notify);
4590#endif
4591 put_online_cpus();
4592
4593 free_percpu(conf->percpu);
4594}
4595
4326static void free_conf(raid5_conf_t *conf) 4596static void free_conf(raid5_conf_t *conf)
4327{ 4597{
4328 shrink_stripes(conf); 4598 shrink_stripes(conf);
4329 safe_put_page(conf->spare_page); 4599 raid5_free_percpu(conf);
4330 kfree(conf->disks); 4600 kfree(conf->disks);
4331 kfree(conf->stripe_hashtbl); 4601 kfree(conf->stripe_hashtbl);
4332 kfree(conf); 4602 kfree(conf);
4333} 4603}
4334 4604
4605#ifdef CONFIG_HOTPLUG_CPU
4606static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4607 void *hcpu)
4608{
4609 raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
4610 long cpu = (long)hcpu;
4611 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
4612
4613 switch (action) {
4614 case CPU_UP_PREPARE:
4615 case CPU_UP_PREPARE_FROZEN:
4616 if (conf->level == 6 && !percpu->spare_page)
4617 percpu->spare_page = alloc_page(GFP_KERNEL);
4618 if (!percpu->scribble)
4619 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4620
4621 if (!percpu->scribble ||
4622 (conf->level == 6 && !percpu->spare_page)) {
4623 safe_put_page(percpu->spare_page);
4624 kfree(percpu->scribble);
4625 pr_err("%s: failed memory allocation for cpu%ld\n",
4626 __func__, cpu);
4627 return NOTIFY_BAD;
4628 }
4629 break;
4630 case CPU_DEAD:
4631 case CPU_DEAD_FROZEN:
4632 safe_put_page(percpu->spare_page);
4633 kfree(percpu->scribble);
4634 percpu->spare_page = NULL;
4635 percpu->scribble = NULL;
4636 break;
4637 default:
4638 break;
4639 }
4640 return NOTIFY_OK;
4641}
4642#endif
4643
4644static int raid5_alloc_percpu(raid5_conf_t *conf)
4645{
4646 unsigned long cpu;
4647 struct page *spare_page;
4648 struct raid5_percpu *allcpus;
4649 void *scribble;
4650 int err;
4651
4652 allcpus = alloc_percpu(struct raid5_percpu);
4653 if (!allcpus)
4654 return -ENOMEM;
4655 conf->percpu = allcpus;
4656
4657 get_online_cpus();
4658 err = 0;
4659 for_each_present_cpu(cpu) {
4660 if (conf->level == 6) {
4661 spare_page = alloc_page(GFP_KERNEL);
4662 if (!spare_page) {
4663 err = -ENOMEM;
4664 break;
4665 }
4666 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4667 }
4668 scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
4669 if (!scribble) {
4670 err = -ENOMEM;
4671 break;
4672 }
4673 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4674 }
4675#ifdef CONFIG_HOTPLUG_CPU
4676 conf->cpu_notify.notifier_call = raid456_cpu_notify;
4677 conf->cpu_notify.priority = 0;
4678 if (err == 0)
4679 err = register_cpu_notifier(&conf->cpu_notify);
4680#endif
4681 put_online_cpus();
4682
4683 return err;
4684}
4685
4335static raid5_conf_t *setup_conf(mddev_t *mddev) 4686static raid5_conf_t *setup_conf(mddev_t *mddev)
4336{ 4687{
4337 raid5_conf_t *conf; 4688 raid5_conf_t *conf;
@@ -4373,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4373 goto abort; 4724 goto abort;
4374 4725
4375 conf->raid_disks = mddev->raid_disks; 4726 conf->raid_disks = mddev->raid_disks;
4727 conf->scribble_len = scribble_len(conf->raid_disks);
4376 if (mddev->reshape_position == MaxSector) 4728 if (mddev->reshape_position == MaxSector)
4377 conf->previous_raid_disks = mddev->raid_disks; 4729 conf->previous_raid_disks = mddev->raid_disks;
4378 else 4730 else
@@ -4388,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4388 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4740 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4389 goto abort; 4741 goto abort;
4390 4742
4391 if (mddev->new_level == 6) { 4743 conf->level = mddev->new_level;
4392 conf->spare_page = alloc_page(GFP_KERNEL); 4744 if (raid5_alloc_percpu(conf) != 0)
4393 if (!conf->spare_page) 4745 goto abort;
4394 goto abort; 4746
4395 }
4396 spin_lock_init(&conf->device_lock); 4747 spin_lock_init(&conf->device_lock);
4397 init_waitqueue_head(&conf->wait_for_stripe); 4748 init_waitqueue_head(&conf->wait_for_stripe);
4398 init_waitqueue_head(&conf->wait_for_overlap); 4749 init_waitqueue_head(&conf->wait_for_overlap);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9459689c4ea0..2390e0e83daf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
2#define _RAID5_H 2#define _RAID5_H
3 3
4#include <linux/raid/xor.h> 4#include <linux/raid/xor.h>
5#include <linux/dmaengine.h>
5 6
6/* 7/*
7 * 8 *
@@ -175,7 +176,9 @@
175 */ 176 */
176enum check_states { 177enum check_states {
177 check_state_idle = 0, 178 check_state_idle = 0,
178 check_state_run, /* parity check */ 179 check_state_run, /* xor parity check */
180 check_state_run_q, /* q-parity check */
181 check_state_run_pq, /* pq dual parity check */
179 check_state_check_result, 182 check_state_check_result,
180 check_state_compute_run, /* parity repair */ 183 check_state_compute_run, /* parity repair */
181 check_state_compute_result, 184 check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
215 * @target - STRIPE_OP_COMPUTE_BLK target 218 * @target - STRIPE_OP_COMPUTE_BLK target
216 */ 219 */
217 struct stripe_operations { 220 struct stripe_operations {
218 int target; 221 int target, target2;
219 u32 zero_sum_result; 222 enum sum_check_flags zero_sum_result;
220 } ops; 223 } ops;
221 struct r5dev { 224 struct r5dev {
222 struct bio req; 225 struct bio req;
@@ -298,7 +301,7 @@ struct r6_state {
298#define STRIPE_OP_COMPUTE_BLK 1 301#define STRIPE_OP_COMPUTE_BLK 1
299#define STRIPE_OP_PREXOR 2 302#define STRIPE_OP_PREXOR 2
300#define STRIPE_OP_BIODRAIN 3 303#define STRIPE_OP_BIODRAIN 3
301#define STRIPE_OP_POSTXOR 4 304#define STRIPE_OP_RECONSTRUCT 4
302#define STRIPE_OP_CHECK 5 305#define STRIPE_OP_CHECK 5
303 306
304/* 307/*
@@ -385,8 +388,21 @@ struct raid5_private_data {
385 * (fresh device added). 388 * (fresh device added).
386 * Cleared when a sync completes. 389 * Cleared when a sync completes.
387 */ 390 */
388 391 /* per cpu variables */
389 struct page *spare_page; /* Used when checking P/Q in raid6 */ 392 struct raid5_percpu {
393 struct page *spare_page; /* Used when checking P/Q in raid6 */
394 void *scribble; /* space for constructing buffer
395 * lists and performing address
396 * conversions
397 */
398 } *percpu;
399 size_t scribble_len; /* size of scribble region must be
400 * associated with conf to handle
401 * cpu hotplug while reshaping
402 */
403#ifdef CONFIG_HOTPLUG_CPU
404 struct notifier_block cpu_notify;
405#endif
390 406
391 /* 407 /*
392 * Free stripes pool 408 * Free stripes pool
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 7b603e4b41db..5e10d3663ab5 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -576,6 +576,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
576 struct scatterlist *sg; 576 struct scatterlist *sg;
577 unsigned int i; 577 unsigned int i;
578 enum dma_data_direction direction; 578 enum dma_data_direction direction;
579 unsigned int sglen;
579 580
580 /* 581 /*
581 * We don't do DMA on "complex" transfers, i.e. with 582 * We don't do DMA on "complex" transfers, i.e. with
@@ -605,11 +606,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
605 else 606 else
606 direction = DMA_TO_DEVICE; 607 direction = DMA_TO_DEVICE;
607 608
609 sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
610 if (sglen != data->sg_len)
611 goto unmap_exit;
608 desc = chan->device->device_prep_slave_sg(chan, 612 desc = chan->device->device_prep_slave_sg(chan,
609 data->sg, data->sg_len, direction, 613 data->sg, data->sg_len, direction,
610 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 614 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
611 if (!desc) 615 if (!desc)
612 return -ENOMEM; 616 goto unmap_exit;
613 617
614 host->dma.data_desc = desc; 618 host->dma.data_desc = desc;
615 desc->callback = atmci_dma_complete; 619 desc->callback = atmci_dma_complete;
@@ -620,6 +624,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
620 chan->device->device_issue_pending(chan); 624 chan->device->device_issue_pending(chan);
621 625
622 return 0; 626 return 0;
627unmap_exit:
628 dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
629 return -ENOMEM;
623} 630}
624 631
625#else /* CONFIG_MMC_ATMELMCI_DMA */ 632#else /* CONFIG_MMC_ATMELMCI_DMA */