aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/ioat')
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dca.c684
-rw-r--r--drivers/dma/ioat/dma.c1238
-rw-r--r--drivers/dma/ioat/dma.h337
-rw-r--r--drivers/dma/ioat/dma_v2.c871
-rw-r--r--drivers/dma/ioat/dma_v2.h190
-rw-r--r--drivers/dma/ioat/dma_v3.c1223
-rw-r--r--drivers/dma/ioat/hw.h215
-rw-r--r--drivers/dma/ioat/pci.c210
-rw-r--r--drivers/dma/ioat/registers.h250
10 files changed, 5220 insertions, 0 deletions
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 000000000000..8997d3fb9051
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
2ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644
index 000000000000..69d02615c4d6
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,684 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23#include <linux/kernel.h>
24#include <linux/pci.h>
25#include <linux/smp.h>
26#include <linux/interrupt.h>
27#include <linux/dca.h>
28
29/* either a kernel change is needed, or we need something like this in kernel */
30#ifndef CONFIG_SMP
31#include <asm/smp.h>
32#undef cpu_physical_id
33#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
34#endif
35
36#include "dma.h"
37#include "registers.h"
38
39/*
40 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
41 * contain the bit number of the APIC ID to map into the DCA tag. If the valid
42 * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
43 */
44#define DCA_TAG_MAP_VALID 0x80
45
46#define DCA3_TAG_MAP_BIT_TO_INV 0x80
47#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
48#define DCA3_TAG_MAP_LITERAL_VAL 0x1
49
50#define DCA_TAG_MAP_MASK 0xDF
51
52/* expected tag map bytes for I/OAT ver.2 */
53#define DCA2_TAG_MAP_BYTE0 0x80
54#define DCA2_TAG_MAP_BYTE1 0x0
55#define DCA2_TAG_MAP_BYTE2 0x81
56#define DCA2_TAG_MAP_BYTE3 0x82
57#define DCA2_TAG_MAP_BYTE4 0x82
58
59/* verify if tag map matches expected values */
60static inline int dca2_tag_map_valid(u8 *tag_map)
61{
62 return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
63 (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
64 (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
65 (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
66 (tag_map[4] == DCA2_TAG_MAP_BYTE4));
67}
68
69/*
70 * "Legacy" DCA systems do not implement the DCA register set in the
71 * I/OAT device. Software needs direct support for their tag mappings.
72 */
73
74#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
75#define IOAT_TAG_MAP_LEN 8
76
77static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
78 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
79static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
80 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
81static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
82 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
83static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
84
85/* pack PCI B/D/F into a u16 */
86static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
87{
88 return (pci->bus->number << 8) | pci->devfn;
89}
90
91static int dca_enabled_in_bios(struct pci_dev *pdev)
92{
93 /* CPUID level 9 returns DCA configuration */
94 /* Bit 0 indicates DCA enabled by the BIOS */
95 unsigned long cpuid_level_9;
96 int res;
97
98 cpuid_level_9 = cpuid_eax(9);
99 res = test_bit(0, &cpuid_level_9);
100 if (!res)
101 dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
102
103 return res;
104}
105
106static int system_has_dca_enabled(struct pci_dev *pdev)
107{
108 if (boot_cpu_has(X86_FEATURE_DCA))
109 return dca_enabled_in_bios(pdev);
110
111 dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
112 return 0;
113}
114
115struct ioat_dca_slot {
116 struct pci_dev *pdev; /* requester device */
117 u16 rid; /* requester id, as used by IOAT */
118};
119
120#define IOAT_DCA_MAX_REQ 6
121#define IOAT3_DCA_MAX_REQ 2
122
123struct ioat_dca_priv {
124 void __iomem *iobase;
125 void __iomem *dca_base;
126 int max_requesters;
127 int requester_count;
128 u8 tag_map[IOAT_TAG_MAP_LEN];
129 struct ioat_dca_slot req_slots[0];
130};
131
132/* 5000 series chipset DCA Port Requester ID Table Entry Format
133 * [15:8] PCI-Express Bus Number
134 * [7:3] PCI-Express Device Number
135 * [2:0] PCI-Express Function Number
136 *
137 * 5000 series chipset DCA control register format
138 * [7:1] Reserved (0)
139 * [0] Ignore Function Number
140 */
141
142static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
143{
144 struct ioat_dca_priv *ioatdca = dca_priv(dca);
145 struct pci_dev *pdev;
146 int i;
147 u16 id;
148
149 /* This implementation only supports PCI-Express */
150 if (dev->bus != &pci_bus_type)
151 return -ENODEV;
152 pdev = to_pci_dev(dev);
153 id = dcaid_from_pcidev(pdev);
154
155 if (ioatdca->requester_count == ioatdca->max_requesters)
156 return -ENODEV;
157
158 for (i = 0; i < ioatdca->max_requesters; i++) {
159 if (ioatdca->req_slots[i].pdev == NULL) {
160 /* found an empty slot */
161 ioatdca->requester_count++;
162 ioatdca->req_slots[i].pdev = pdev;
163 ioatdca->req_slots[i].rid = id;
164 writew(id, ioatdca->dca_base + (i * 4));
165 /* make sure the ignore function bit is off */
166 writeb(0, ioatdca->dca_base + (i * 4) + 2);
167 return i;
168 }
169 }
170 /* Error, ioatdma->requester_count is out of whack */
171 return -EFAULT;
172}
173
174static int ioat_dca_remove_requester(struct dca_provider *dca,
175 struct device *dev)
176{
177 struct ioat_dca_priv *ioatdca = dca_priv(dca);
178 struct pci_dev *pdev;
179 int i;
180
181 /* This implementation only supports PCI-Express */
182 if (dev->bus != &pci_bus_type)
183 return -ENODEV;
184 pdev = to_pci_dev(dev);
185
186 for (i = 0; i < ioatdca->max_requesters; i++) {
187 if (ioatdca->req_slots[i].pdev == pdev) {
188 writew(0, ioatdca->dca_base + (i * 4));
189 ioatdca->req_slots[i].pdev = NULL;
190 ioatdca->req_slots[i].rid = 0;
191 ioatdca->requester_count--;
192 return i;
193 }
194 }
195 return -ENODEV;
196}
197
198static u8 ioat_dca_get_tag(struct dca_provider *dca,
199 struct device *dev,
200 int cpu)
201{
202 struct ioat_dca_priv *ioatdca = dca_priv(dca);
203 int i, apic_id, bit, value;
204 u8 entry, tag;
205
206 tag = 0;
207 apic_id = cpu_physical_id(cpu);
208
209 for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
210 entry = ioatdca->tag_map[i];
211 if (entry & DCA_TAG_MAP_VALID) {
212 bit = entry & ~DCA_TAG_MAP_VALID;
213 value = (apic_id & (1 << bit)) ? 1 : 0;
214 } else {
215 value = entry ? 1 : 0;
216 }
217 tag |= (value << i);
218 }
219 return tag;
220}
221
222static int ioat_dca_dev_managed(struct dca_provider *dca,
223 struct device *dev)
224{
225 struct ioat_dca_priv *ioatdca = dca_priv(dca);
226 struct pci_dev *pdev;
227 int i;
228
229 pdev = to_pci_dev(dev);
230 for (i = 0; i < ioatdca->max_requesters; i++) {
231 if (ioatdca->req_slots[i].pdev == pdev)
232 return 1;
233 }
234 return 0;
235}
236
237static struct dca_ops ioat_dca_ops = {
238 .add_requester = ioat_dca_add_requester,
239 .remove_requester = ioat_dca_remove_requester,
240 .get_tag = ioat_dca_get_tag,
241 .dev_managed = ioat_dca_dev_managed,
242};
243
244
245struct dca_provider * __devinit
246ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
247{
248 struct dca_provider *dca;
249 struct ioat_dca_priv *ioatdca;
250 u8 *tag_map = NULL;
251 int i;
252 int err;
253 u8 version;
254 u8 max_requesters;
255
256 if (!system_has_dca_enabled(pdev))
257 return NULL;
258
259 /* I/OAT v1 systems must have a known tag_map to support DCA */
260 switch (pdev->vendor) {
261 case PCI_VENDOR_ID_INTEL:
262 switch (pdev->device) {
263 case PCI_DEVICE_ID_INTEL_IOAT:
264 tag_map = ioat_tag_map_BNB;
265 break;
266 case PCI_DEVICE_ID_INTEL_IOAT_CNB:
267 tag_map = ioat_tag_map_CNB;
268 break;
269 case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
270 tag_map = ioat_tag_map_SCNB;
271 break;
272 }
273 break;
274 case PCI_VENDOR_ID_UNISYS:
275 switch (pdev->device) {
276 case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
277 tag_map = ioat_tag_map_UNISYS;
278 break;
279 }
280 break;
281 }
282 if (tag_map == NULL)
283 return NULL;
284
285 version = readb(iobase + IOAT_VER_OFFSET);
286 if (version == IOAT_VER_3_0)
287 max_requesters = IOAT3_DCA_MAX_REQ;
288 else
289 max_requesters = IOAT_DCA_MAX_REQ;
290
291 dca = alloc_dca_provider(&ioat_dca_ops,
292 sizeof(*ioatdca) +
293 (sizeof(struct ioat_dca_slot) * max_requesters));
294 if (!dca)
295 return NULL;
296
297 ioatdca = dca_priv(dca);
298 ioatdca->max_requesters = max_requesters;
299 ioatdca->dca_base = iobase + 0x54;
300
301 /* copy over the APIC ID to DCA tag mapping */
302 for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
303 ioatdca->tag_map[i] = tag_map[i];
304
305 err = register_dca_provider(dca, &pdev->dev);
306 if (err) {
307 free_dca_provider(dca);
308 return NULL;
309 }
310
311 return dca;
312}
313
314
315static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
316{
317 struct ioat_dca_priv *ioatdca = dca_priv(dca);
318 struct pci_dev *pdev;
319 int i;
320 u16 id;
321 u16 global_req_table;
322
323 /* This implementation only supports PCI-Express */
324 if (dev->bus != &pci_bus_type)
325 return -ENODEV;
326 pdev = to_pci_dev(dev);
327 id = dcaid_from_pcidev(pdev);
328
329 if (ioatdca->requester_count == ioatdca->max_requesters)
330 return -ENODEV;
331
332 for (i = 0; i < ioatdca->max_requesters; i++) {
333 if (ioatdca->req_slots[i].pdev == NULL) {
334 /* found an empty slot */
335 ioatdca->requester_count++;
336 ioatdca->req_slots[i].pdev = pdev;
337 ioatdca->req_slots[i].rid = id;
338 global_req_table =
339 readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
340 writel(id | IOAT_DCA_GREQID_VALID,
341 ioatdca->iobase + global_req_table + (i * 4));
342 return i;
343 }
344 }
345 /* Error, ioatdma->requester_count is out of whack */
346 return -EFAULT;
347}
348
349static int ioat2_dca_remove_requester(struct dca_provider *dca,
350 struct device *dev)
351{
352 struct ioat_dca_priv *ioatdca = dca_priv(dca);
353 struct pci_dev *pdev;
354 int i;
355 u16 global_req_table;
356
357 /* This implementation only supports PCI-Express */
358 if (dev->bus != &pci_bus_type)
359 return -ENODEV;
360 pdev = to_pci_dev(dev);
361
362 for (i = 0; i < ioatdca->max_requesters; i++) {
363 if (ioatdca->req_slots[i].pdev == pdev) {
364 global_req_table =
365 readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
366 writel(0, ioatdca->iobase + global_req_table + (i * 4));
367 ioatdca->req_slots[i].pdev = NULL;
368 ioatdca->req_slots[i].rid = 0;
369 ioatdca->requester_count--;
370 return i;
371 }
372 }
373 return -ENODEV;
374}
375
376static u8 ioat2_dca_get_tag(struct dca_provider *dca,
377 struct device *dev,
378 int cpu)
379{
380 u8 tag;
381
382 tag = ioat_dca_get_tag(dca, dev, cpu);
383 tag = (~tag) & 0x1F;
384 return tag;
385}
386
387static struct dca_ops ioat2_dca_ops = {
388 .add_requester = ioat2_dca_add_requester,
389 .remove_requester = ioat2_dca_remove_requester,
390 .get_tag = ioat2_dca_get_tag,
391 .dev_managed = ioat_dca_dev_managed,
392};
393
394static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
395{
396 int slots = 0;
397 u32 req;
398 u16 global_req_table;
399
400 global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
401 if (global_req_table == 0)
402 return 0;
403 do {
404 req = readl(iobase + global_req_table + (slots * sizeof(u32)));
405 slots++;
406 } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
407
408 return slots;
409}
410
411struct dca_provider * __devinit
412ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
413{
414 struct dca_provider *dca;
415 struct ioat_dca_priv *ioatdca;
416 int slots;
417 int i;
418 int err;
419 u32 tag_map;
420 u16 dca_offset;
421 u16 csi_fsb_control;
422 u16 pcie_control;
423 u8 bit;
424
425 if (!system_has_dca_enabled(pdev))
426 return NULL;
427
428 dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
429 if (dca_offset == 0)
430 return NULL;
431
432 slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
433 if (slots == 0)
434 return NULL;
435
436 dca = alloc_dca_provider(&ioat2_dca_ops,
437 sizeof(*ioatdca)
438 + (sizeof(struct ioat_dca_slot) * slots));
439 if (!dca)
440 return NULL;
441
442 ioatdca = dca_priv(dca);
443 ioatdca->iobase = iobase;
444 ioatdca->dca_base = iobase + dca_offset;
445 ioatdca->max_requesters = slots;
446
447 /* some bios might not know to turn these on */
448 csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
449 if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
450 csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
451 writew(csi_fsb_control,
452 ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
453 }
454 pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
455 if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
456 pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
457 writew(pcie_control,
458 ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
459 }
460
461
462 /* TODO version, compatibility and configuration checks */
463
464 /* copy out the APIC to DCA tag map */
465 tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
466 for (i = 0; i < 5; i++) {
467 bit = (tag_map >> (4 * i)) & 0x0f;
468 if (bit < 8)
469 ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
470 else
471 ioatdca->tag_map[i] = 0;
472 }
473
474 if (!dca2_tag_map_valid(ioatdca->tag_map)) {
475 dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
476 "disabling DCA\n");
477 free_dca_provider(dca);
478 return NULL;
479 }
480
481 err = register_dca_provider(dca, &pdev->dev);
482 if (err) {
483 free_dca_provider(dca);
484 return NULL;
485 }
486
487 return dca;
488}
489
490static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
491{
492 struct ioat_dca_priv *ioatdca = dca_priv(dca);
493 struct pci_dev *pdev;
494 int i;
495 u16 id;
496 u16 global_req_table;
497
498 /* This implementation only supports PCI-Express */
499 if (dev->bus != &pci_bus_type)
500 return -ENODEV;
501 pdev = to_pci_dev(dev);
502 id = dcaid_from_pcidev(pdev);
503
504 if (ioatdca->requester_count == ioatdca->max_requesters)
505 return -ENODEV;
506
507 for (i = 0; i < ioatdca->max_requesters; i++) {
508 if (ioatdca->req_slots[i].pdev == NULL) {
509 /* found an empty slot */
510 ioatdca->requester_count++;
511 ioatdca->req_slots[i].pdev = pdev;
512 ioatdca->req_slots[i].rid = id;
513 global_req_table =
514 readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
515 writel(id | IOAT_DCA_GREQID_VALID,
516 ioatdca->iobase + global_req_table + (i * 4));
517 return i;
518 }
519 }
520 /* Error, ioatdma->requester_count is out of whack */
521 return -EFAULT;
522}
523
524static int ioat3_dca_remove_requester(struct dca_provider *dca,
525 struct device *dev)
526{
527 struct ioat_dca_priv *ioatdca = dca_priv(dca);
528 struct pci_dev *pdev;
529 int i;
530 u16 global_req_table;
531
532 /* This implementation only supports PCI-Express */
533 if (dev->bus != &pci_bus_type)
534 return -ENODEV;
535 pdev = to_pci_dev(dev);
536
537 for (i = 0; i < ioatdca->max_requesters; i++) {
538 if (ioatdca->req_slots[i].pdev == pdev) {
539 global_req_table =
540 readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
541 writel(0, ioatdca->iobase + global_req_table + (i * 4));
542 ioatdca->req_slots[i].pdev = NULL;
543 ioatdca->req_slots[i].rid = 0;
544 ioatdca->requester_count--;
545 return i;
546 }
547 }
548 return -ENODEV;
549}
550
551static u8 ioat3_dca_get_tag(struct dca_provider *dca,
552 struct device *dev,
553 int cpu)
554{
555 u8 tag;
556
557 struct ioat_dca_priv *ioatdca = dca_priv(dca);
558 int i, apic_id, bit, value;
559 u8 entry;
560
561 tag = 0;
562 apic_id = cpu_physical_id(cpu);
563
564 for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
565 entry = ioatdca->tag_map[i];
566 if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
567 bit = entry &
568 ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
569 value = (apic_id & (1 << bit)) ? 1 : 0;
570 } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
571 bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
572 value = (apic_id & (1 << bit)) ? 0 : 1;
573 } else {
574 value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
575 }
576 tag |= (value << i);
577 }
578
579 return tag;
580}
581
582static struct dca_ops ioat3_dca_ops = {
583 .add_requester = ioat3_dca_add_requester,
584 .remove_requester = ioat3_dca_remove_requester,
585 .get_tag = ioat3_dca_get_tag,
586 .dev_managed = ioat_dca_dev_managed,
587};
588
589static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
590{
591 int slots = 0;
592 u32 req;
593 u16 global_req_table;
594
595 global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
596 if (global_req_table == 0)
597 return 0;
598
599 do {
600 req = readl(iobase + global_req_table + (slots * sizeof(u32)));
601 slots++;
602 } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
603
604 return slots;
605}
606
607struct dca_provider * __devinit
608ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
609{
610 struct dca_provider *dca;
611 struct ioat_dca_priv *ioatdca;
612 int slots;
613 int i;
614 int err;
615 u16 dca_offset;
616 u16 csi_fsb_control;
617 u16 pcie_control;
618 u8 bit;
619
620 union {
621 u64 full;
622 struct {
623 u32 low;
624 u32 high;
625 };
626 } tag_map;
627
628 if (!system_has_dca_enabled(pdev))
629 return NULL;
630
631 dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
632 if (dca_offset == 0)
633 return NULL;
634
635 slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
636 if (slots == 0)
637 return NULL;
638
639 dca = alloc_dca_provider(&ioat3_dca_ops,
640 sizeof(*ioatdca)
641 + (sizeof(struct ioat_dca_slot) * slots));
642 if (!dca)
643 return NULL;
644
645 ioatdca = dca_priv(dca);
646 ioatdca->iobase = iobase;
647 ioatdca->dca_base = iobase + dca_offset;
648 ioatdca->max_requesters = slots;
649
650 /* some bios might not know to turn these on */
651 csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
652 if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
653 csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
654 writew(csi_fsb_control,
655 ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
656 }
657 pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
658 if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
659 pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
660 writew(pcie_control,
661 ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
662 }
663
664
665 /* TODO version, compatibility and configuration checks */
666
667 /* copy out the APIC to DCA tag map */
668 tag_map.low =
669 readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
670 tag_map.high =
671 readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
672 for (i = 0; i < 8; i++) {
673 bit = tag_map.full >> (8 * i);
674 ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
675 }
676
677 err = register_dca_provider(dca, &pdev->dev);
678 if (err) {
679 free_dca_provider(dca);
680 return NULL;
681 }
682
683 return dca;
684}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 000000000000..c524d36d3c2e
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1238 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "registers.h"
39#include "hw.h"
40
41int ioat_pending_level = 4;
42module_param(ioat_pending_level, int, 0644);
43MODULE_PARM_DESC(ioat_pending_level,
44 "high-water mark for pushing ioat descriptors (default: 4)");
45
46/* internal functions */
47static void ioat1_cleanup(struct ioat_dma_chan *ioat);
48static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
49
50/**
51 * ioat_dma_do_interrupt - handler used for single vector interrupt mode
52 * @irq: interrupt id
53 * @data: interrupt data
54 */
55static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
56{
57 struct ioatdma_device *instance = data;
58 struct ioat_chan_common *chan;
59 unsigned long attnstatus;
60 int bit;
61 u8 intrctrl;
62
63 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
64
65 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
66 return IRQ_NONE;
67
68 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
69 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
70 return IRQ_NONE;
71 }
72
73 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
74 for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
75 chan = ioat_chan_by_index(instance, bit);
76 tasklet_schedule(&chan->cleanup_task);
77 }
78
79 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
80 return IRQ_HANDLED;
81}
82
83/**
84 * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
85 * @irq: interrupt id
86 * @data: interrupt data
87 */
88static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
89{
90 struct ioat_chan_common *chan = data;
91
92 tasklet_schedule(&chan->cleanup_task);
93
94 return IRQ_HANDLED;
95}
96
97static void ioat1_cleanup_tasklet(unsigned long data);
98
99/* common channel initialization */
100void ioat_init_channel(struct ioatdma_device *device,
101 struct ioat_chan_common *chan, int idx,
102 void (*timer_fn)(unsigned long),
103 void (*tasklet)(unsigned long),
104 unsigned long ioat)
105{
106 struct dma_device *dma = &device->common;
107
108 chan->device = device;
109 chan->reg_base = device->reg_base + (0x80 * (idx + 1));
110 spin_lock_init(&chan->cleanup_lock);
111 chan->common.device = dma;
112 list_add_tail(&chan->common.device_node, &dma->channels);
113 device->idx[idx] = chan;
114 init_timer(&chan->timer);
115 chan->timer.function = timer_fn;
116 chan->timer.data = ioat;
117 tasklet_init(&chan->cleanup_task, tasklet, ioat);
118 tasklet_disable(&chan->cleanup_task);
119}
120
121static void ioat1_timer_event(unsigned long data);
122
123/**
124 * ioat1_dma_enumerate_channels - find and initialize the device's channels
125 * @device: the device to be enumerated
126 */
127static int ioat1_enumerate_channels(struct ioatdma_device *device)
128{
129 u8 xfercap_scale;
130 u32 xfercap;
131 int i;
132 struct ioat_dma_chan *ioat;
133 struct device *dev = &device->pdev->dev;
134 struct dma_device *dma = &device->common;
135
136 INIT_LIST_HEAD(&dma->channels);
137 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
138 dma->chancnt &= 0x1f; /* bits [4:0] valid */
139 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
140 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
141 dma->chancnt, ARRAY_SIZE(device->idx));
142 dma->chancnt = ARRAY_SIZE(device->idx);
143 }
144 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
145 xfercap_scale &= 0x1f; /* bits [4:0] valid */
146 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
147 dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
148
149#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
150 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
151 dma->chancnt--;
152#endif
153 for (i = 0; i < dma->chancnt; i++) {
154 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
155 if (!ioat)
156 break;
157
158 ioat_init_channel(device, &ioat->base, i,
159 ioat1_timer_event,
160 ioat1_cleanup_tasklet,
161 (unsigned long) ioat);
162 ioat->xfercap = xfercap;
163 spin_lock_init(&ioat->desc_lock);
164 INIT_LIST_HEAD(&ioat->free_desc);
165 INIT_LIST_HEAD(&ioat->used_desc);
166 }
167 dma->chancnt = i;
168 return i;
169}
170
171/**
172 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
173 * descriptors to hw
174 * @chan: DMA channel handle
175 */
176static inline void
177__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
178{
179 void __iomem *reg_base = ioat->base.reg_base;
180
181 dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
182 __func__, ioat->pending);
183 ioat->pending = 0;
184 writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
185}
186
187static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
188{
189 struct ioat_dma_chan *ioat = to_ioat_chan(chan);
190
191 if (ioat->pending > 0) {
192 spin_lock_bh(&ioat->desc_lock);
193 __ioat1_dma_memcpy_issue_pending(ioat);
194 spin_unlock_bh(&ioat->desc_lock);
195 }
196}
197
198/**
199 * ioat1_reset_channel - restart a channel
200 * @ioat: IOAT DMA channel handle
201 */
202static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
203{
204 struct ioat_chan_common *chan = &ioat->base;
205 void __iomem *reg_base = chan->reg_base;
206 u32 chansts, chanerr;
207
208 dev_warn(to_dev(chan), "reset\n");
209 chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
210 chansts = *chan->completion & IOAT_CHANSTS_STATUS;
211 if (chanerr) {
212 dev_err(to_dev(chan),
213 "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
214 chan_num(chan), chansts, chanerr);
215 writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
216 }
217
218 /*
219 * whack it upside the head with a reset
220 * and wait for things to settle out.
221 * force the pending count to a really big negative
222 * to make sure no one forces an issue_pending
223 * while we're waiting.
224 */
225
226 ioat->pending = INT_MIN;
227 writeb(IOAT_CHANCMD_RESET,
228 reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
229 set_bit(IOAT_RESET_PENDING, &chan->state);
230 mod_timer(&chan->timer, jiffies + RESET_DELAY);
231}
232
233static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
234{
235 struct dma_chan *c = tx->chan;
236 struct ioat_dma_chan *ioat = to_ioat_chan(c);
237 struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
238 struct ioat_chan_common *chan = &ioat->base;
239 struct ioat_desc_sw *first;
240 struct ioat_desc_sw *chain_tail;
241 dma_cookie_t cookie;
242
243 spin_lock_bh(&ioat->desc_lock);
244 /* cookie incr and addition to used_list must be atomic */
245 cookie = c->cookie;
246 cookie++;
247 if (cookie < 0)
248 cookie = 1;
249 c->cookie = cookie;
250 tx->cookie = cookie;
251 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
252
253 /* write address into NextDescriptor field of last desc in chain */
254 first = to_ioat_desc(desc->tx_list.next);
255 chain_tail = to_ioat_desc(ioat->used_desc.prev);
256 /* make descriptor updates globally visible before chaining */
257 wmb();
258 chain_tail->hw->next = first->txd.phys;
259 list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
260 dump_desc_dbg(ioat, chain_tail);
261 dump_desc_dbg(ioat, first);
262
263 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
264 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
265
266 ioat->active += desc->hw->tx_cnt;
267 ioat->pending += desc->hw->tx_cnt;
268 if (ioat->pending >= ioat_pending_level)
269 __ioat1_dma_memcpy_issue_pending(ioat);
270 spin_unlock_bh(&ioat->desc_lock);
271
272 return cookie;
273}
274
275/**
276 * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
277 * @ioat: the channel supplying the memory pool for the descriptors
278 * @flags: allocation flags
279 */
280static struct ioat_desc_sw *
281ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
282{
283 struct ioat_dma_descriptor *desc;
284 struct ioat_desc_sw *desc_sw;
285 struct ioatdma_device *ioatdma_device;
286 dma_addr_t phys;
287
288 ioatdma_device = ioat->base.device;
289 desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
290 if (unlikely(!desc))
291 return NULL;
292
293 desc_sw = kzalloc(sizeof(*desc_sw), flags);
294 if (unlikely(!desc_sw)) {
295 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
296 return NULL;
297 }
298
299 memset(desc, 0, sizeof(*desc));
300
301 INIT_LIST_HEAD(&desc_sw->tx_list);
302 dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
303 desc_sw->txd.tx_submit = ioat1_tx_submit;
304 desc_sw->hw = desc;
305 desc_sw->txd.phys = phys;
306 set_desc_id(desc_sw, -1);
307
308 return desc_sw;
309}
310
311static int ioat_initial_desc_count = 256;
312module_param(ioat_initial_desc_count, int, 0644);
313MODULE_PARM_DESC(ioat_initial_desc_count,
314 "ioat1: initial descriptors per channel (default: 256)");
315/**
316 * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
317 * @chan: the channel to be filled out
318 */
319static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
320{
321 struct ioat_dma_chan *ioat = to_ioat_chan(c);
322 struct ioat_chan_common *chan = &ioat->base;
323 struct ioat_desc_sw *desc;
324 u32 chanerr;
325 int i;
326 LIST_HEAD(tmp_list);
327
328 /* have we already been set up? */
329 if (!list_empty(&ioat->free_desc))
330 return ioat->desccount;
331
332 /* Setup register to interrupt and write completion status on error */
333 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
334
335 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
336 if (chanerr) {
337 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
338 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
339 }
340
341 /* Allocate descriptors */
342 for (i = 0; i < ioat_initial_desc_count; i++) {
343 desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
344 if (!desc) {
345 dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
346 break;
347 }
348 set_desc_id(desc, i);
349 list_add_tail(&desc->node, &tmp_list);
350 }
351 spin_lock_bh(&ioat->desc_lock);
352 ioat->desccount = i;
353 list_splice(&tmp_list, &ioat->free_desc);
354 spin_unlock_bh(&ioat->desc_lock);
355
356 /* allocate a completion writeback area */
357 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
358 chan->completion = pci_pool_alloc(chan->device->completion_pool,
359 GFP_KERNEL, &chan->completion_dma);
360 memset(chan->completion, 0, sizeof(*chan->completion));
361 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
362 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
363 writel(((u64) chan->completion_dma) >> 32,
364 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
365
366 tasklet_enable(&chan->cleanup_task);
367 ioat1_dma_start_null_desc(ioat); /* give chain to dma device */
368 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
369 __func__, ioat->desccount);
370 return ioat->desccount;
371}
372
373/**
374 * ioat1_dma_free_chan_resources - release all the descriptors
375 * @chan: the channel to be cleaned
376 */
377static void ioat1_dma_free_chan_resources(struct dma_chan *c)
378{
379 struct ioat_dma_chan *ioat = to_ioat_chan(c);
380 struct ioat_chan_common *chan = &ioat->base;
381 struct ioatdma_device *ioatdma_device = chan->device;
382 struct ioat_desc_sw *desc, *_desc;
383 int in_use_descs = 0;
384
385 /* Before freeing channel resources first check
386 * if they have been previously allocated for this channel.
387 */
388 if (ioat->desccount == 0)
389 return;
390
391 tasklet_disable(&chan->cleanup_task);
392 del_timer_sync(&chan->timer);
393 ioat1_cleanup(ioat);
394
395 /* Delay 100ms after reset to allow internal DMA logic to quiesce
396 * before removing DMA descriptor resources.
397 */
398 writeb(IOAT_CHANCMD_RESET,
399 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
400 mdelay(100);
401
402 spin_lock_bh(&ioat->desc_lock);
403 list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
404 dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
405 __func__, desc_id(desc));
406 dump_desc_dbg(ioat, desc);
407 in_use_descs++;
408 list_del(&desc->node);
409 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
410 desc->txd.phys);
411 kfree(desc);
412 }
413 list_for_each_entry_safe(desc, _desc,
414 &ioat->free_desc, node) {
415 list_del(&desc->node);
416 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
417 desc->txd.phys);
418 kfree(desc);
419 }
420 spin_unlock_bh(&ioat->desc_lock);
421
422 pci_pool_free(ioatdma_device->completion_pool,
423 chan->completion,
424 chan->completion_dma);
425
426 /* one is ok since we left it on there on purpose */
427 if (in_use_descs > 1)
428 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
429 in_use_descs - 1);
430
431 chan->last_completion = 0;
432 chan->completion_dma = 0;
433 ioat->pending = 0;
434 ioat->desccount = 0;
435}
436
437/**
438 * ioat1_dma_get_next_descriptor - return the next available descriptor
439 * @ioat: IOAT DMA channel handle
440 *
441 * Gets the next descriptor from the chain, and must be called with the
442 * channel's desc_lock held. Allocates more descriptors if the channel
443 * has run out.
444 */
445static struct ioat_desc_sw *
446ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
447{
448 struct ioat_desc_sw *new;
449
450 if (!list_empty(&ioat->free_desc)) {
451 new = to_ioat_desc(ioat->free_desc.next);
452 list_del(&new->node);
453 } else {
454 /* try to get another desc */
455 new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
456 if (!new) {
457 dev_err(to_dev(&ioat->base), "alloc failed\n");
458 return NULL;
459 }
460 }
461 dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
462 __func__, desc_id(new));
463 prefetch(new->hw);
464 return new;
465}
466
467static struct dma_async_tx_descriptor *
468ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
469 dma_addr_t dma_src, size_t len, unsigned long flags)
470{
471 struct ioat_dma_chan *ioat = to_ioat_chan(c);
472 struct ioat_desc_sw *desc;
473 size_t copy;
474 LIST_HEAD(chain);
475 dma_addr_t src = dma_src;
476 dma_addr_t dest = dma_dest;
477 size_t total_len = len;
478 struct ioat_dma_descriptor *hw = NULL;
479 int tx_cnt = 0;
480
481 spin_lock_bh(&ioat->desc_lock);
482 desc = ioat1_dma_get_next_descriptor(ioat);
483 do {
484 if (!desc)
485 break;
486
487 tx_cnt++;
488 copy = min_t(size_t, len, ioat->xfercap);
489
490 hw = desc->hw;
491 hw->size = copy;
492 hw->ctl = 0;
493 hw->src_addr = src;
494 hw->dst_addr = dest;
495
496 list_add_tail(&desc->node, &chain);
497
498 len -= copy;
499 dest += copy;
500 src += copy;
501 if (len) {
502 struct ioat_desc_sw *next;
503
504 async_tx_ack(&desc->txd);
505 next = ioat1_dma_get_next_descriptor(ioat);
506 hw->next = next ? next->txd.phys : 0;
507 dump_desc_dbg(ioat, desc);
508 desc = next;
509 } else
510 hw->next = 0;
511 } while (len);
512
513 if (!desc) {
514 struct ioat_chan_common *chan = &ioat->base;
515
516 dev_err(to_dev(chan),
517 "chan%d - get_next_desc failed\n", chan_num(chan));
518 list_splice(&chain, &ioat->free_desc);
519 spin_unlock_bh(&ioat->desc_lock);
520 return NULL;
521 }
522 spin_unlock_bh(&ioat->desc_lock);
523
524 desc->txd.flags = flags;
525 desc->len = total_len;
526 list_splice(&chain, &desc->tx_list);
527 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
528 hw->ctl_f.compl_write = 1;
529 hw->tx_cnt = tx_cnt;
530 dump_desc_dbg(ioat, desc);
531
532 return &desc->txd;
533}
534
535static void ioat1_cleanup_tasklet(unsigned long data)
536{
537 struct ioat_dma_chan *chan = (void *)data;
538
539 ioat1_cleanup(chan);
540 writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
541}
542
543void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
544 size_t len, struct ioat_dma_descriptor *hw)
545{
546 struct pci_dev *pdev = chan->device->pdev;
547 size_t offset = len - hw->size;
548
549 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
550 ioat_unmap(pdev, hw->dst_addr - offset, len,
551 PCI_DMA_FROMDEVICE, flags, 1);
552
553 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
554 ioat_unmap(pdev, hw->src_addr - offset, len,
555 PCI_DMA_TODEVICE, flags, 0);
556}
557
558unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
559{
560 unsigned long phys_complete;
561 u64 completion;
562
563 completion = *chan->completion;
564 phys_complete = ioat_chansts_to_addr(completion);
565
566 dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
567 (unsigned long long) phys_complete);
568
569 if (is_ioat_halted(completion)) {
570 u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
571 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
572 chanerr);
573
574 /* TODO do something to salvage the situation */
575 }
576
577 return phys_complete;
578}
579
580bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
581 unsigned long *phys_complete)
582{
583 *phys_complete = ioat_get_current_completion(chan);
584 if (*phys_complete == chan->last_completion)
585 return false;
586 clear_bit(IOAT_COMPLETION_ACK, &chan->state);
587 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
588
589 return true;
590}
591
592static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
593{
594 struct ioat_chan_common *chan = &ioat->base;
595 struct list_head *_desc, *n;
596 struct dma_async_tx_descriptor *tx;
597
598 dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
599 __func__, phys_complete);
600 list_for_each_safe(_desc, n, &ioat->used_desc) {
601 struct ioat_desc_sw *desc;
602
603 prefetch(n);
604 desc = list_entry(_desc, typeof(*desc), node);
605 tx = &desc->txd;
606 /*
607 * Incoming DMA requests may use multiple descriptors,
608 * due to exceeding xfercap, perhaps. If so, only the
609 * last one will have a cookie, and require unmapping.
610 */
611 dump_desc_dbg(ioat, desc);
612 if (tx->cookie) {
613 chan->completed_cookie = tx->cookie;
614 tx->cookie = 0;
615 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
616 ioat->active -= desc->hw->tx_cnt;
617 if (tx->callback) {
618 tx->callback(tx->callback_param);
619 tx->callback = NULL;
620 }
621 }
622
623 if (tx->phys != phys_complete) {
624 /*
625 * a completed entry, but not the last, so clean
626 * up if the client is done with the descriptor
627 */
628 if (async_tx_test_ack(tx))
629 list_move_tail(&desc->node, &ioat->free_desc);
630 } else {
631 /*
632 * last used desc. Do not remove, so we can
633 * append from it.
634 */
635
636 /* if nothing else is pending, cancel the
637 * completion timeout
638 */
639 if (n == &ioat->used_desc) {
640 dev_dbg(to_dev(chan),
641 "%s cancel completion timeout\n",
642 __func__);
643 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
644 }
645
646 /* TODO check status bits? */
647 break;
648 }
649 }
650
651 chan->last_completion = phys_complete;
652}
653
654/**
655 * ioat1_cleanup - cleanup up finished descriptors
656 * @chan: ioat channel to be cleaned up
657 *
658 * To prevent lock contention we defer cleanup when the locks are
659 * contended with a terminal timeout that forces cleanup and catches
660 * completion notification errors.
661 */
662static void ioat1_cleanup(struct ioat_dma_chan *ioat)
663{
664 struct ioat_chan_common *chan = &ioat->base;
665 unsigned long phys_complete;
666
667 prefetch(chan->completion);
668
669 if (!spin_trylock_bh(&chan->cleanup_lock))
670 return;
671
672 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
673 spin_unlock_bh(&chan->cleanup_lock);
674 return;
675 }
676
677 if (!spin_trylock_bh(&ioat->desc_lock)) {
678 spin_unlock_bh(&chan->cleanup_lock);
679 return;
680 }
681
682 __cleanup(ioat, phys_complete);
683
684 spin_unlock_bh(&ioat->desc_lock);
685 spin_unlock_bh(&chan->cleanup_lock);
686}
687
688static void ioat1_timer_event(unsigned long data)
689{
690 struct ioat_dma_chan *ioat = (void *) data;
691 struct ioat_chan_common *chan = &ioat->base;
692
693 dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
694
695 spin_lock_bh(&chan->cleanup_lock);
696 if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
697 struct ioat_desc_sw *desc;
698
699 spin_lock_bh(&ioat->desc_lock);
700
701 /* restart active descriptors */
702 desc = to_ioat_desc(ioat->used_desc.prev);
703 ioat_set_chainaddr(ioat, desc->txd.phys);
704 ioat_start(chan);
705
706 ioat->pending = 0;
707 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
708 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
709 spin_unlock_bh(&ioat->desc_lock);
710 } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
711 unsigned long phys_complete;
712
713 spin_lock_bh(&ioat->desc_lock);
714 /* if we haven't made progress and we have already
715 * acknowledged a pending completion once, then be more
716 * forceful with a restart
717 */
718 if (ioat_cleanup_preamble(chan, &phys_complete))
719 __cleanup(ioat, phys_complete);
720 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
721 ioat1_reset_channel(ioat);
722 else {
723 u64 status = ioat_chansts(chan);
724
725 /* manually update the last completion address */
726 if (ioat_chansts_to_addr(status) != 0)
727 *chan->completion = status;
728
729 set_bit(IOAT_COMPLETION_ACK, &chan->state);
730 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
731 }
732 spin_unlock_bh(&ioat->desc_lock);
733 }
734 spin_unlock_bh(&chan->cleanup_lock);
735}
736
737static enum dma_status
738ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
739 dma_cookie_t *done, dma_cookie_t *used)
740{
741 struct ioat_dma_chan *ioat = to_ioat_chan(c);
742
743 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
744 return DMA_SUCCESS;
745
746 ioat1_cleanup(ioat);
747
748 return ioat_is_complete(c, cookie, done, used);
749}
750
751static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
752{
753 struct ioat_chan_common *chan = &ioat->base;
754 struct ioat_desc_sw *desc;
755 struct ioat_dma_descriptor *hw;
756
757 spin_lock_bh(&ioat->desc_lock);
758
759 desc = ioat1_dma_get_next_descriptor(ioat);
760
761 if (!desc) {
762 dev_err(to_dev(chan),
763 "Unable to start null desc - get next desc failed\n");
764 spin_unlock_bh(&ioat->desc_lock);
765 return;
766 }
767
768 hw = desc->hw;
769 hw->ctl = 0;
770 hw->ctl_f.null = 1;
771 hw->ctl_f.int_en = 1;
772 hw->ctl_f.compl_write = 1;
773 /* set size to non-zero value (channel returns error when size is 0) */
774 hw->size = NULL_DESC_BUFFER_SIZE;
775 hw->src_addr = 0;
776 hw->dst_addr = 0;
777 async_tx_ack(&desc->txd);
778 hw->next = 0;
779 list_add_tail(&desc->node, &ioat->used_desc);
780 dump_desc_dbg(ioat, desc);
781
782 ioat_set_chainaddr(ioat, desc->txd.phys);
783 ioat_start(chan);
784 spin_unlock_bh(&ioat->desc_lock);
785}
786
787/*
788 * Perform a IOAT transaction to verify the HW works.
789 */
790#define IOAT_TEST_SIZE 2000
791
792static void __devinit ioat_dma_test_callback(void *dma_async_param)
793{
794 struct completion *cmp = dma_async_param;
795
796 complete(cmp);
797}
798
799/**
800 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
801 * @device: device to be tested
802 */
803int __devinit ioat_dma_self_test(struct ioatdma_device *device)
804{
805 int i;
806 u8 *src;
807 u8 *dest;
808 struct dma_device *dma = &device->common;
809 struct device *dev = &device->pdev->dev;
810 struct dma_chan *dma_chan;
811 struct dma_async_tx_descriptor *tx;
812 dma_addr_t dma_dest, dma_src;
813 dma_cookie_t cookie;
814 int err = 0;
815 struct completion cmp;
816 unsigned long tmo;
817 unsigned long flags;
818
819 src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
820 if (!src)
821 return -ENOMEM;
822 dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
823 if (!dest) {
824 kfree(src);
825 return -ENOMEM;
826 }
827
828 /* Fill in src buffer */
829 for (i = 0; i < IOAT_TEST_SIZE; i++)
830 src[i] = (u8)i;
831
832 /* Start copy, using first DMA channel */
833 dma_chan = container_of(dma->channels.next, struct dma_chan,
834 device_node);
835 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
836 dev_err(dev, "selftest cannot allocate chan resource\n");
837 err = -ENODEV;
838 goto out;
839 }
840
841 dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
842 dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
843 flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
844 DMA_PREP_INTERRUPT;
845 tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
846 IOAT_TEST_SIZE, flags);
847 if (!tx) {
848 dev_err(dev, "Self-test prep failed, disabling\n");
849 err = -ENODEV;
850 goto free_resources;
851 }
852
853 async_tx_ack(tx);
854 init_completion(&cmp);
855 tx->callback = ioat_dma_test_callback;
856 tx->callback_param = &cmp;
857 cookie = tx->tx_submit(tx);
858 if (cookie < 0) {
859 dev_err(dev, "Self-test setup failed, disabling\n");
860 err = -ENODEV;
861 goto free_resources;
862 }
863 dma->device_issue_pending(dma_chan);
864
865 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
866
867 if (tmo == 0 ||
868 dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
869 != DMA_SUCCESS) {
870 dev_err(dev, "Self-test copy timed out, disabling\n");
871 err = -ENODEV;
872 goto free_resources;
873 }
874 if (memcmp(src, dest, IOAT_TEST_SIZE)) {
875 dev_err(dev, "Self-test copy failed compare, disabling\n");
876 err = -ENODEV;
877 goto free_resources;
878 }
879
880free_resources:
881 dma->device_free_chan_resources(dma_chan);
882out:
883 kfree(src);
884 kfree(dest);
885 return err;
886}
887
888static char ioat_interrupt_style[32] = "msix";
889module_param_string(ioat_interrupt_style, ioat_interrupt_style,
890 sizeof(ioat_interrupt_style), 0644);
891MODULE_PARM_DESC(ioat_interrupt_style,
892 "set ioat interrupt style: msix (default), "
893 "msix-single-vector, msi, intx)");
894
895/**
896 * ioat_dma_setup_interrupts - setup interrupt handler
897 * @device: ioat device
898 */
899static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
900{
901 struct ioat_chan_common *chan;
902 struct pci_dev *pdev = device->pdev;
903 struct device *dev = &pdev->dev;
904 struct msix_entry *msix;
905 int i, j, msixcnt;
906 int err = -EINVAL;
907 u8 intrctrl = 0;
908
909 if (!strcmp(ioat_interrupt_style, "msix"))
910 goto msix;
911 if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
912 goto msix_single_vector;
913 if (!strcmp(ioat_interrupt_style, "msi"))
914 goto msi;
915 if (!strcmp(ioat_interrupt_style, "intx"))
916 goto intx;
917 dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
918 goto err_no_irq;
919
920msix:
921 /* The number of MSI-X vectors should equal the number of channels */
922 msixcnt = device->common.chancnt;
923 for (i = 0; i < msixcnt; i++)
924 device->msix_entries[i].entry = i;
925
926 err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
927 if (err < 0)
928 goto msi;
929 if (err > 0)
930 goto msix_single_vector;
931
932 for (i = 0; i < msixcnt; i++) {
933 msix = &device->msix_entries[i];
934 chan = ioat_chan_by_index(device, i);
935 err = devm_request_irq(dev, msix->vector,
936 ioat_dma_do_interrupt_msix, 0,
937 "ioat-msix", chan);
938 if (err) {
939 for (j = 0; j < i; j++) {
940 msix = &device->msix_entries[j];
941 chan = ioat_chan_by_index(device, j);
942 devm_free_irq(dev, msix->vector, chan);
943 }
944 goto msix_single_vector;
945 }
946 }
947 intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
948 goto done;
949
950msix_single_vector:
951 msix = &device->msix_entries[0];
952 msix->entry = 0;
953 err = pci_enable_msix(pdev, device->msix_entries, 1);
954 if (err)
955 goto msi;
956
957 err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
958 "ioat-msix", device);
959 if (err) {
960 pci_disable_msix(pdev);
961 goto msi;
962 }
963 goto done;
964
965msi:
966 err = pci_enable_msi(pdev);
967 if (err)
968 goto intx;
969
970 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
971 "ioat-msi", device);
972 if (err) {
973 pci_disable_msi(pdev);
974 goto intx;
975 }
976 goto done;
977
978intx:
979 err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
980 IRQF_SHARED, "ioat-intx", device);
981 if (err)
982 goto err_no_irq;
983
984done:
985 if (device->intr_quirk)
986 device->intr_quirk(device);
987 intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
988 writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
989 return 0;
990
991err_no_irq:
992 /* Disable all interrupt generation */
993 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
994 dev_err(dev, "no usable interrupts\n");
995 return err;
996}
997
998static void ioat_disable_interrupts(struct ioatdma_device *device)
999{
1000 /* Disable all interrupt generation */
1001 writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1002}
1003
1004int __devinit ioat_probe(struct ioatdma_device *device)
1005{
1006 int err = -ENODEV;
1007 struct dma_device *dma = &device->common;
1008 struct pci_dev *pdev = device->pdev;
1009 struct device *dev = &pdev->dev;
1010
1011 /* DMA coherent memory pool for DMA descriptor allocations */
1012 device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1013 sizeof(struct ioat_dma_descriptor),
1014 64, 0);
1015 if (!device->dma_pool) {
1016 err = -ENOMEM;
1017 goto err_dma_pool;
1018 }
1019
1020 device->completion_pool = pci_pool_create("completion_pool", pdev,
1021 sizeof(u64), SMP_CACHE_BYTES,
1022 SMP_CACHE_BYTES);
1023
1024 if (!device->completion_pool) {
1025 err = -ENOMEM;
1026 goto err_completion_pool;
1027 }
1028
1029 device->enumerate_channels(device);
1030
1031 dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1032 dma->dev = &pdev->dev;
1033
1034 if (!dma->chancnt) {
1035 dev_err(dev, "zero channels detected\n");
1036 goto err_setup_interrupts;
1037 }
1038
1039 err = ioat_dma_setup_interrupts(device);
1040 if (err)
1041 goto err_setup_interrupts;
1042
1043 err = device->self_test(device);
1044 if (err)
1045 goto err_self_test;
1046
1047 return 0;
1048
1049err_self_test:
1050 ioat_disable_interrupts(device);
1051err_setup_interrupts:
1052 pci_pool_destroy(device->completion_pool);
1053err_completion_pool:
1054 pci_pool_destroy(device->dma_pool);
1055err_dma_pool:
1056 return err;
1057}
1058
1059int __devinit ioat_register(struct ioatdma_device *device)
1060{
1061 int err = dma_async_device_register(&device->common);
1062
1063 if (err) {
1064 ioat_disable_interrupts(device);
1065 pci_pool_destroy(device->completion_pool);
1066 pci_pool_destroy(device->dma_pool);
1067 }
1068
1069 return err;
1070}
1071
1072/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
1073static void ioat1_intr_quirk(struct ioatdma_device *device)
1074{
1075 struct pci_dev *pdev = device->pdev;
1076 u32 dmactrl;
1077
1078 pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1079 if (pdev->msi_enabled)
1080 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1081 else
1082 dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
1083 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1084}
1085
1086static ssize_t ring_size_show(struct dma_chan *c, char *page)
1087{
1088 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1089
1090 return sprintf(page, "%d\n", ioat->desccount);
1091}
1092static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
1093
1094static ssize_t ring_active_show(struct dma_chan *c, char *page)
1095{
1096 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1097
1098 return sprintf(page, "%d\n", ioat->active);
1099}
1100static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
1101
1102static ssize_t cap_show(struct dma_chan *c, char *page)
1103{
1104 struct dma_device *dma = c->device;
1105
1106 return sprintf(page, "copy%s%s%s%s%s%s\n",
1107 dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
1108 dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
1109 dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
1110 dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
1111 dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
1112 dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
1113
1114}
1115struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
1116
1117static ssize_t version_show(struct dma_chan *c, char *page)
1118{
1119 struct dma_device *dma = c->device;
1120 struct ioatdma_device *device = to_ioatdma_device(dma);
1121
1122 return sprintf(page, "%d.%d\n",
1123 device->version >> 4, device->version & 0xf);
1124}
1125struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
1126
1127static struct attribute *ioat1_attrs[] = {
1128 &ring_size_attr.attr,
1129 &ring_active_attr.attr,
1130 &ioat_cap_attr.attr,
1131 &ioat_version_attr.attr,
1132 NULL,
1133};
1134
1135static ssize_t
1136ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1137{
1138 struct ioat_sysfs_entry *entry;
1139 struct ioat_chan_common *chan;
1140
1141 entry = container_of(attr, struct ioat_sysfs_entry, attr);
1142 chan = container_of(kobj, struct ioat_chan_common, kobj);
1143
1144 if (!entry->show)
1145 return -EIO;
1146 return entry->show(&chan->common, page);
1147}
1148
1149struct sysfs_ops ioat_sysfs_ops = {
1150 .show = ioat_attr_show,
1151};
1152
1153static struct kobj_type ioat1_ktype = {
1154 .sysfs_ops = &ioat_sysfs_ops,
1155 .default_attrs = ioat1_attrs,
1156};
1157
1158void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
1159{
1160 struct dma_device *dma = &device->common;
1161 struct dma_chan *c;
1162
1163 list_for_each_entry(c, &dma->channels, device_node) {
1164 struct ioat_chan_common *chan = to_chan_common(c);
1165 struct kobject *parent = &c->dev->device.kobj;
1166 int err;
1167
1168 err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
1169 if (err) {
1170 dev_warn(to_dev(chan),
1171 "sysfs init error (%d), continuing...\n", err);
1172 kobject_put(&chan->kobj);
1173 set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
1174 }
1175 }
1176}
1177
1178void ioat_kobject_del(struct ioatdma_device *device)
1179{
1180 struct dma_device *dma = &device->common;
1181 struct dma_chan *c;
1182
1183 list_for_each_entry(c, &dma->channels, device_node) {
1184 struct ioat_chan_common *chan = to_chan_common(c);
1185
1186 if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
1187 kobject_del(&chan->kobj);
1188 kobject_put(&chan->kobj);
1189 }
1190 }
1191}
1192
1193int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1194{
1195 struct pci_dev *pdev = device->pdev;
1196 struct dma_device *dma;
1197 int err;
1198
1199 device->intr_quirk = ioat1_intr_quirk;
1200 device->enumerate_channels = ioat1_enumerate_channels;
1201 device->self_test = ioat_dma_self_test;
1202 dma = &device->common;
1203 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1204 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1205 dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
1206 dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
1207 dma->device_is_tx_complete = ioat1_dma_is_complete;
1208
1209 err = ioat_probe(device);
1210 if (err)
1211 return err;
1212 ioat_set_tcp_copy_break(4096);
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216 ioat_kobject_add(device, &ioat1_ktype);
1217
1218 if (dca)
1219 device->dca = ioat_dca_init(pdev, device->reg_base);
1220
1221 return err;
1222}
1223
1224void __devexit ioat_dma_remove(struct ioatdma_device *device)
1225{
1226 struct dma_device *dma = &device->common;
1227
1228 ioat_disable_interrupts(device);
1229
1230 ioat_kobject_del(device);
1231
1232 dma_async_device_unregister(dma);
1233
1234 pci_pool_destroy(device->dma_pool);
1235 pci_pool_destroy(device->completion_pool);
1236
1237 INIT_LIST_HEAD(&dma->channels);
1238}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 000000000000..c14fdfeb7f33
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,337 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_H
22#define IOATDMA_H
23
24#include <linux/dmaengine.h>
25#include "hw.h"
26#include "registers.h"
27#include <linux/init.h>
28#include <linux/dmapool.h>
29#include <linux/cache.h>
30#include <linux/pci_ids.h>
31#include <net/tcp.h>
32
33#define IOAT_DMA_VERSION "4.00"
34
35#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
36#define IOAT_DMA_DCA_ANY_CPU ~0
37
38#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
39#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
40#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
41#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
42
43#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
44
45/*
46 * workaround for IOAT ver.3.0 null descriptor issue
47 * (channel returns error when size is 0)
48 */
49#define NULL_DESC_BUFFER_SIZE 1
50
51/**
52 * struct ioatdma_device - internal representation of a IOAT device
53 * @pdev: PCI-Express device
54 * @reg_base: MMIO register space base address
55 * @dma_pool: for allocating DMA descriptors
56 * @common: embedded struct dma_device
57 * @version: version of ioatdma device
58 * @msix_entries: irq handlers
59 * @idx: per channel data
60 * @dca: direct cache access context
61 * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
62 * @enumerate_channels: hw version specific channel enumeration
63 * @cleanup_tasklet: select between the v2 and v3 cleanup routines
64 * @timer_fn: select between the v2 and v3 timer watchdog routines
65 * @self_test: hardware version specific self test for each supported op type
66 *
67 * Note: the v3 cleanup routine supports raid operations
68 */
69struct ioatdma_device {
70 struct pci_dev *pdev;
71 void __iomem *reg_base;
72 struct pci_pool *dma_pool;
73 struct pci_pool *completion_pool;
74 struct dma_device common;
75 u8 version;
76 struct msix_entry msix_entries[4];
77 struct ioat_chan_common *idx[4];
78 struct dca_provider *dca;
79 void (*intr_quirk)(struct ioatdma_device *device);
80 int (*enumerate_channels)(struct ioatdma_device *device);
81 void (*cleanup_tasklet)(unsigned long data);
82 void (*timer_fn)(unsigned long data);
83 int (*self_test)(struct ioatdma_device *device);
84};
85
86struct ioat_chan_common {
87 struct dma_chan common;
88 void __iomem *reg_base;
89 unsigned long last_completion;
90 spinlock_t cleanup_lock;
91 dma_cookie_t completed_cookie;
92 unsigned long state;
93 #define IOAT_COMPLETION_PENDING 0
94 #define IOAT_COMPLETION_ACK 1
95 #define IOAT_RESET_PENDING 2
96 #define IOAT_KOBJ_INIT_FAIL 3
97 struct timer_list timer;
98 #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
99 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
100 #define RESET_DELAY msecs_to_jiffies(100)
101 struct ioatdma_device *device;
102 dma_addr_t completion_dma;
103 u64 *completion;
104 struct tasklet_struct cleanup_task;
105 struct kobject kobj;
106};
107
108struct ioat_sysfs_entry {
109 struct attribute attr;
110 ssize_t (*show)(struct dma_chan *, char *);
111};
112
113/**
114 * struct ioat_dma_chan - internal representation of a DMA channel
115 */
116struct ioat_dma_chan {
117 struct ioat_chan_common base;
118
119 size_t xfercap; /* XFERCAP register value expanded out */
120
121 spinlock_t desc_lock;
122 struct list_head free_desc;
123 struct list_head used_desc;
124
125 int pending;
126 u16 desccount;
127 u16 active;
128};
129
130static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
131{
132 return container_of(c, struct ioat_chan_common, common);
133}
134
135static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
136{
137 struct ioat_chan_common *chan = to_chan_common(c);
138
139 return container_of(chan, struct ioat_dma_chan, base);
140}
141
142/**
143 * ioat_is_complete - poll the status of an ioat transaction
144 * @c: channel handle
145 * @cookie: transaction identifier
146 * @done: if set, updated with last completed transaction
147 * @used: if set, updated with last used transaction
148 */
149static inline enum dma_status
150ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
151 dma_cookie_t *done, dma_cookie_t *used)
152{
153 struct ioat_chan_common *chan = to_chan_common(c);
154 dma_cookie_t last_used;
155 dma_cookie_t last_complete;
156
157 last_used = c->cookie;
158 last_complete = chan->completed_cookie;
159
160 if (done)
161 *done = last_complete;
162 if (used)
163 *used = last_used;
164
165 return dma_async_is_complete(cookie, last_complete, last_used);
166}
167
168/* wrapper around hardware descriptor format + additional software fields */
169
170/**
171 * struct ioat_desc_sw - wrapper around hardware descriptor
172 * @hw: hardware DMA descriptor (for memcpy)
173 * @node: this descriptor will either be on the free list,
174 * or attached to a transaction list (tx_list)
175 * @txd: the generic software descriptor for all engines
176 * @id: identifier for debug
177 */
178struct ioat_desc_sw {
179 struct ioat_dma_descriptor *hw;
180 struct list_head node;
181 size_t len;
182 struct list_head tx_list;
183 struct dma_async_tx_descriptor txd;
184 #ifdef DEBUG
185 int id;
186 #endif
187};
188
189#ifdef DEBUG
190#define set_desc_id(desc, i) ((desc)->id = (i))
191#define desc_id(desc) ((desc)->id)
192#else
193#define set_desc_id(desc, i)
194#define desc_id(desc) (0)
195#endif
196
197static inline void
198__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
199 struct dma_async_tx_descriptor *tx, int id)
200{
201 struct device *dev = to_dev(chan);
202
203 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
204 " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
205 (unsigned long long) tx->phys,
206 (unsigned long long) hw->next, tx->cookie, tx->flags,
207 hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
208}
209
210#define dump_desc_dbg(c, d) \
211 ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
212
213static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
214{
215 #ifdef CONFIG_NET_DMA
216 sysctl_tcp_dma_copybreak = copybreak;
217 #endif
218}
219
220static inline struct ioat_chan_common *
221ioat_chan_by_index(struct ioatdma_device *device, int index)
222{
223 return device->idx[index];
224}
225
226static inline u64 ioat_chansts(struct ioat_chan_common *chan)
227{
228 u8 ver = chan->device->version;
229 u64 status;
230 u32 status_lo;
231
232 /* We need to read the low address first as this causes the
233 * chipset to latch the upper bits for the subsequent read
234 */
235 status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
236 status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
237 status <<= 32;
238 status |= status_lo;
239
240 return status;
241}
242
243static inline void ioat_start(struct ioat_chan_common *chan)
244{
245 u8 ver = chan->device->version;
246
247 writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
248}
249
250static inline u64 ioat_chansts_to_addr(u64 status)
251{
252 return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
253}
254
255static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
256{
257 return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
258}
259
260static inline void ioat_suspend(struct ioat_chan_common *chan)
261{
262 u8 ver = chan->device->version;
263
264 writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
265}
266
267static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
268{
269 struct ioat_chan_common *chan = &ioat->base;
270
271 writel(addr & 0x00000000FFFFFFFF,
272 chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
273 writel(addr >> 32,
274 chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
275}
276
277static inline bool is_ioat_active(unsigned long status)
278{
279 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
280}
281
282static inline bool is_ioat_idle(unsigned long status)
283{
284 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
285}
286
287static inline bool is_ioat_halted(unsigned long status)
288{
289 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
290}
291
292static inline bool is_ioat_suspended(unsigned long status)
293{
294 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
295}
296
297/* channel was fatally programmed */
298static inline bool is_ioat_bug(unsigned long err)
299{
300 return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
301 IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
302 IOAT_CHANERR_LENGTH_ERR));
303}
304
305static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
306 int direction, enum dma_ctrl_flags flags, bool dst)
307{
308 if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
309 (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
310 pci_unmap_single(pdev, addr, len, direction);
311 else
312 pci_unmap_page(pdev, addr, len, direction);
313}
314
315int __devinit ioat_probe(struct ioatdma_device *device);
316int __devinit ioat_register(struct ioatdma_device *device);
317int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
318int __devinit ioat_dma_self_test(struct ioatdma_device *device);
319void __devexit ioat_dma_remove(struct ioatdma_device *device);
320struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
321 void __iomem *iobase);
322unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
323void ioat_init_channel(struct ioatdma_device *device,
324 struct ioat_chan_common *chan, int idx,
325 void (*timer_fn)(unsigned long),
326 void (*tasklet)(unsigned long),
327 unsigned long ioat);
328void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
329 size_t len, struct ioat_dma_descriptor *hw);
330bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
331 unsigned long *phys_complete);
332void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
333void ioat_kobject_del(struct ioatdma_device *device);
334extern struct sysfs_ops ioat_sysfs_ops;
335extern struct ioat_sysfs_entry ioat_version_attr;
336extern struct ioat_sysfs_entry ioat_cap_attr;
337#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644
index 000000000000..96ffab7d37a7
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.c
@@ -0,0 +1,871 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2004 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
25 * does asynchronous data movement and checksumming operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dmaengine.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/workqueue.h>
36#include <linux/i7300_idle.h>
37#include "dma.h"
38#include "dma_v2.h"
39#include "registers.h"
40#include "hw.h"
41
42int ioat_ring_alloc_order = 8;
43module_param(ioat_ring_alloc_order, int, 0644);
44MODULE_PARM_DESC(ioat_ring_alloc_order,
45 "ioat2+: allocate 2^n descriptors per channel"
46 " (default: 8 max: 16)");
47static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
48module_param(ioat_ring_max_alloc_order, int, 0644);
49MODULE_PARM_DESC(ioat_ring_max_alloc_order,
50 "ioat2+: upper limit for ring size (default: 16)");
51
52void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
53{
54 void * __iomem reg_base = ioat->base.reg_base;
55
56 ioat->pending = 0;
57 ioat->dmacount += ioat2_ring_pending(ioat);
58 ioat->issued = ioat->head;
59 /* make descriptor updates globally visible before notifying channel */
60 wmb();
61 writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
62 dev_dbg(to_dev(&ioat->base),
63 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
64 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
65}
66
67void ioat2_issue_pending(struct dma_chan *chan)
68{
69 struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
70
71 spin_lock_bh(&ioat->ring_lock);
72 if (ioat->pending == 1)
73 __ioat2_issue_pending(ioat);
74 spin_unlock_bh(&ioat->ring_lock);
75}
76
77/**
78 * ioat2_update_pending - log pending descriptors
79 * @ioat: ioat2+ channel
80 *
81 * set pending to '1' unless pending is already set to '2', pending == 2
82 * indicates that submission is temporarily blocked due to an in-flight
83 * reset. If we are already above the ioat_pending_level threshold then
84 * just issue pending.
85 *
86 * called with ring_lock held
87 */
88static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
89{
90 if (unlikely(ioat->pending == 2))
91 return;
92 else if (ioat2_ring_pending(ioat) > ioat_pending_level)
93 __ioat2_issue_pending(ioat);
94 else
95 ioat->pending = 1;
96}
97
98static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
99{
100 struct ioat_ring_ent *desc;
101 struct ioat_dma_descriptor *hw;
102 int idx;
103
104 if (ioat2_ring_space(ioat) < 1) {
105 dev_err(to_dev(&ioat->base),
106 "Unable to start null desc - ring full\n");
107 return;
108 }
109
110 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
111 __func__, ioat->head, ioat->tail, ioat->issued);
112 idx = ioat2_desc_alloc(ioat, 1);
113 desc = ioat2_get_ring_ent(ioat, idx);
114
115 hw = desc->hw;
116 hw->ctl = 0;
117 hw->ctl_f.null = 1;
118 hw->ctl_f.int_en = 1;
119 hw->ctl_f.compl_write = 1;
120 /* set size to non-zero value (channel returns error when size is 0) */
121 hw->size = NULL_DESC_BUFFER_SIZE;
122 hw->src_addr = 0;
123 hw->dst_addr = 0;
124 async_tx_ack(&desc->txd);
125 ioat2_set_chainaddr(ioat, desc->txd.phys);
126 dump_desc_dbg(ioat, desc);
127 __ioat2_issue_pending(ioat);
128}
129
130static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
131{
132 spin_lock_bh(&ioat->ring_lock);
133 __ioat2_start_null_desc(ioat);
134 spin_unlock_bh(&ioat->ring_lock);
135}
136
137static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
138{
139 struct ioat_chan_common *chan = &ioat->base;
140 struct dma_async_tx_descriptor *tx;
141 struct ioat_ring_ent *desc;
142 bool seen_current = false;
143 u16 active;
144 int i;
145
146 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
147 __func__, ioat->head, ioat->tail, ioat->issued);
148
149 active = ioat2_ring_active(ioat);
150 for (i = 0; i < active && !seen_current; i++) {
151 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
152 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
153 tx = &desc->txd;
154 dump_desc_dbg(ioat, desc);
155 if (tx->cookie) {
156 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
157 chan->completed_cookie = tx->cookie;
158 tx->cookie = 0;
159 if (tx->callback) {
160 tx->callback(tx->callback_param);
161 tx->callback = NULL;
162 }
163 }
164
165 if (tx->phys == phys_complete)
166 seen_current = true;
167 }
168 ioat->tail += i;
169 BUG_ON(!seen_current); /* no active descs have written a completion? */
170
171 chan->last_completion = phys_complete;
172 if (ioat->head == ioat->tail) {
173 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
174 __func__);
175 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
176 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
177 }
178}
179
180/**
181 * ioat2_cleanup - clean finished descriptors (advance tail pointer)
182 * @chan: ioat channel to be cleaned up
183 */
184static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
185{
186 struct ioat_chan_common *chan = &ioat->base;
187 unsigned long phys_complete;
188
189 prefetch(chan->completion);
190
191 if (!spin_trylock_bh(&chan->cleanup_lock))
192 return;
193
194 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
195 spin_unlock_bh(&chan->cleanup_lock);
196 return;
197 }
198
199 if (!spin_trylock_bh(&ioat->ring_lock)) {
200 spin_unlock_bh(&chan->cleanup_lock);
201 return;
202 }
203
204 __cleanup(ioat, phys_complete);
205
206 spin_unlock_bh(&ioat->ring_lock);
207 spin_unlock_bh(&chan->cleanup_lock);
208}
209
210void ioat2_cleanup_tasklet(unsigned long data)
211{
212 struct ioat2_dma_chan *ioat = (void *) data;
213
214 ioat2_cleanup(ioat);
215 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
216}
217
218void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
219{
220 struct ioat_chan_common *chan = &ioat->base;
221
222 /* set the tail to be re-issued */
223 ioat->issued = ioat->tail;
224 ioat->dmacount = 0;
225 set_bit(IOAT_COMPLETION_PENDING, &chan->state);
226 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
227
228 dev_dbg(to_dev(chan),
229 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
230 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
231
232 if (ioat2_ring_pending(ioat)) {
233 struct ioat_ring_ent *desc;
234
235 desc = ioat2_get_ring_ent(ioat, ioat->tail);
236 ioat2_set_chainaddr(ioat, desc->txd.phys);
237 __ioat2_issue_pending(ioat);
238 } else
239 __ioat2_start_null_desc(ioat);
240}
241
242static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
243{
244 struct ioat_chan_common *chan = &ioat->base;
245 unsigned long phys_complete;
246 u32 status;
247
248 status = ioat_chansts(chan);
249 if (is_ioat_active(status) || is_ioat_idle(status))
250 ioat_suspend(chan);
251 while (is_ioat_active(status) || is_ioat_idle(status)) {
252 status = ioat_chansts(chan);
253 cpu_relax();
254 }
255
256 if (ioat_cleanup_preamble(chan, &phys_complete))
257 __cleanup(ioat, phys_complete);
258
259 __ioat2_restart_chan(ioat);
260}
261
262void ioat2_timer_event(unsigned long data)
263{
264 struct ioat2_dma_chan *ioat = (void *) data;
265 struct ioat_chan_common *chan = &ioat->base;
266
267 spin_lock_bh(&chan->cleanup_lock);
268 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
269 unsigned long phys_complete;
270 u64 status;
271
272 spin_lock_bh(&ioat->ring_lock);
273 status = ioat_chansts(chan);
274
275 /* when halted due to errors check for channel
276 * programming errors before advancing the completion state
277 */
278 if (is_ioat_halted(status)) {
279 u32 chanerr;
280
281 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
282 BUG_ON(is_ioat_bug(chanerr));
283 }
284
285 /* if we haven't made progress and we have already
286 * acknowledged a pending completion once, then be more
287 * forceful with a restart
288 */
289 if (ioat_cleanup_preamble(chan, &phys_complete))
290 __cleanup(ioat, phys_complete);
291 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
292 ioat2_restart_channel(ioat);
293 else {
294 set_bit(IOAT_COMPLETION_ACK, &chan->state);
295 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
296 }
297 spin_unlock_bh(&ioat->ring_lock);
298 } else {
299 u16 active;
300
301 /* if the ring is idle, empty, and oversized try to step
302 * down the size
303 */
304 spin_lock_bh(&ioat->ring_lock);
305 active = ioat2_ring_active(ioat);
306 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
307 reshape_ring(ioat, ioat->alloc_order-1);
308 spin_unlock_bh(&ioat->ring_lock);
309
310 /* keep shrinking until we get back to our minimum
311 * default size
312 */
313 if (ioat->alloc_order > ioat_get_alloc_order())
314 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
315 }
316 spin_unlock_bh(&chan->cleanup_lock);
317}
318
319/**
320 * ioat2_enumerate_channels - find and initialize the device's channels
321 * @device: the device to be enumerated
322 */
323int ioat2_enumerate_channels(struct ioatdma_device *device)
324{
325 struct ioat2_dma_chan *ioat;
326 struct device *dev = &device->pdev->dev;
327 struct dma_device *dma = &device->common;
328 u8 xfercap_log;
329 int i;
330
331 INIT_LIST_HEAD(&dma->channels);
332 dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
333 dma->chancnt &= 0x1f; /* bits [4:0] valid */
334 if (dma->chancnt > ARRAY_SIZE(device->idx)) {
335 dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
336 dma->chancnt, ARRAY_SIZE(device->idx));
337 dma->chancnt = ARRAY_SIZE(device->idx);
338 }
339 xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
340 xfercap_log &= 0x1f; /* bits [4:0] valid */
341 if (xfercap_log == 0)
342 return 0;
343 dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
344
345 /* FIXME which i/oat version is i7300? */
346#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
347 if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
348 dma->chancnt--;
349#endif
350 for (i = 0; i < dma->chancnt; i++) {
351 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
352 if (!ioat)
353 break;
354
355 ioat_init_channel(device, &ioat->base, i,
356 device->timer_fn,
357 device->cleanup_tasklet,
358 (unsigned long) ioat);
359 ioat->xfercap_log = xfercap_log;
360 spin_lock_init(&ioat->ring_lock);
361 }
362 dma->chancnt = i;
363 return i;
364}
365
366static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
367{
368 struct dma_chan *c = tx->chan;
369 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
370 struct ioat_chan_common *chan = &ioat->base;
371 dma_cookie_t cookie = c->cookie;
372
373 cookie++;
374 if (cookie < 0)
375 cookie = 1;
376 tx->cookie = cookie;
377 c->cookie = cookie;
378 dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
379
380 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
381 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
382 ioat2_update_pending(ioat);
383 spin_unlock_bh(&ioat->ring_lock);
384
385 return cookie;
386}
387
388static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
389{
390 struct ioat_dma_descriptor *hw;
391 struct ioat_ring_ent *desc;
392 struct ioatdma_device *dma;
393 dma_addr_t phys;
394
395 dma = to_ioatdma_device(chan->device);
396 hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
397 if (!hw)
398 return NULL;
399 memset(hw, 0, sizeof(*hw));
400
401 desc = kmem_cache_alloc(ioat2_cache, flags);
402 if (!desc) {
403 pci_pool_free(dma->dma_pool, hw, phys);
404 return NULL;
405 }
406 memset(desc, 0, sizeof(*desc));
407
408 dma_async_tx_descriptor_init(&desc->txd, chan);
409 desc->txd.tx_submit = ioat2_tx_submit_unlock;
410 desc->hw = hw;
411 desc->txd.phys = phys;
412 return desc;
413}
414
415static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
416{
417 struct ioatdma_device *dma;
418
419 dma = to_ioatdma_device(chan->device);
420 pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
421 kmem_cache_free(ioat2_cache, desc);
422}
423
424static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
425{
426 struct ioat_ring_ent **ring;
427 int descs = 1 << order;
428 int i;
429
430 if (order > ioat_get_max_alloc_order())
431 return NULL;
432
433 /* allocate the array to hold the software ring */
434 ring = kcalloc(descs, sizeof(*ring), flags);
435 if (!ring)
436 return NULL;
437 for (i = 0; i < descs; i++) {
438 ring[i] = ioat2_alloc_ring_ent(c, flags);
439 if (!ring[i]) {
440 while (i--)
441 ioat2_free_ring_ent(ring[i], c);
442 kfree(ring);
443 return NULL;
444 }
445 set_desc_id(ring[i], i);
446 }
447
448 /* link descs */
449 for (i = 0; i < descs-1; i++) {
450 struct ioat_ring_ent *next = ring[i+1];
451 struct ioat_dma_descriptor *hw = ring[i]->hw;
452
453 hw->next = next->txd.phys;
454 }
455 ring[i]->hw->next = ring[0]->txd.phys;
456
457 return ring;
458}
459
460/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
461 * @chan: channel to be initialized
462 */
463int ioat2_alloc_chan_resources(struct dma_chan *c)
464{
465 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
466 struct ioat_chan_common *chan = &ioat->base;
467 struct ioat_ring_ent **ring;
468 u32 chanerr;
469 int order;
470
471 /* have we already been set up? */
472 if (ioat->ring)
473 return 1 << ioat->alloc_order;
474
475 /* Setup register to interrupt and write completion status on error */
476 writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
477
478 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
479 if (chanerr) {
480 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
481 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
482 }
483
484 /* allocate a completion writeback area */
485 /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
486 chan->completion = pci_pool_alloc(chan->device->completion_pool,
487 GFP_KERNEL, &chan->completion_dma);
488 if (!chan->completion)
489 return -ENOMEM;
490
491 memset(chan->completion, 0, sizeof(*chan->completion));
492 writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
493 chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
494 writel(((u64) chan->completion_dma) >> 32,
495 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
496
497 order = ioat_get_alloc_order();
498 ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
499 if (!ring)
500 return -ENOMEM;
501
502 spin_lock_bh(&ioat->ring_lock);
503 ioat->ring = ring;
504 ioat->head = 0;
505 ioat->issued = 0;
506 ioat->tail = 0;
507 ioat->pending = 0;
508 ioat->alloc_order = order;
509 spin_unlock_bh(&ioat->ring_lock);
510
511 tasklet_enable(&chan->cleanup_task);
512 ioat2_start_null_desc(ioat);
513
514 return 1 << ioat->alloc_order;
515}
516
517bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
518{
519 /* reshape differs from normal ring allocation in that we want
520 * to allocate a new software ring while only
521 * extending/truncating the hardware ring
522 */
523 struct ioat_chan_common *chan = &ioat->base;
524 struct dma_chan *c = &chan->common;
525 const u16 curr_size = ioat2_ring_mask(ioat) + 1;
526 const u16 active = ioat2_ring_active(ioat);
527 const u16 new_size = 1 << order;
528 struct ioat_ring_ent **ring;
529 u16 i;
530
531 if (order > ioat_get_max_alloc_order())
532 return false;
533
534 /* double check that we have at least 1 free descriptor */
535 if (active == curr_size)
536 return false;
537
538 /* when shrinking, verify that we can hold the current active
539 * set in the new ring
540 */
541 if (active >= new_size)
542 return false;
543
544 /* allocate the array to hold the software ring */
545 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
546 if (!ring)
547 return false;
548
549 /* allocate/trim descriptors as needed */
550 if (new_size > curr_size) {
551 /* copy current descriptors to the new ring */
552 for (i = 0; i < curr_size; i++) {
553 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
554 u16 new_idx = (ioat->tail+i) & (new_size-1);
555
556 ring[new_idx] = ioat->ring[curr_idx];
557 set_desc_id(ring[new_idx], new_idx);
558 }
559
560 /* add new descriptors to the ring */
561 for (i = curr_size; i < new_size; i++) {
562 u16 new_idx = (ioat->tail+i) & (new_size-1);
563
564 ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
565 if (!ring[new_idx]) {
566 while (i--) {
567 u16 new_idx = (ioat->tail+i) & (new_size-1);
568
569 ioat2_free_ring_ent(ring[new_idx], c);
570 }
571 kfree(ring);
572 return false;
573 }
574 set_desc_id(ring[new_idx], new_idx);
575 }
576
577 /* hw link new descriptors */
578 for (i = curr_size-1; i < new_size; i++) {
579 u16 new_idx = (ioat->tail+i) & (new_size-1);
580 struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
581 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
582
583 hw->next = next->txd.phys;
584 }
585 } else {
586 struct ioat_dma_descriptor *hw;
587 struct ioat_ring_ent *next;
588
589 /* copy current descriptors to the new ring, dropping the
590 * removed descriptors
591 */
592 for (i = 0; i < new_size; i++) {
593 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
594 u16 new_idx = (ioat->tail+i) & (new_size-1);
595
596 ring[new_idx] = ioat->ring[curr_idx];
597 set_desc_id(ring[new_idx], new_idx);
598 }
599
600 /* free deleted descriptors */
601 for (i = new_size; i < curr_size; i++) {
602 struct ioat_ring_ent *ent;
603
604 ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
605 ioat2_free_ring_ent(ent, c);
606 }
607
608 /* fix up hardware ring */
609 hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
610 next = ring[(ioat->tail+new_size) & (new_size-1)];
611 hw->next = next->txd.phys;
612 }
613
614 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
615 __func__, new_size);
616
617 kfree(ioat->ring);
618 ioat->ring = ring;
619 ioat->alloc_order = order;
620
621 return true;
622}
623
624/**
625 * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
626 * @idx: gets starting descriptor index on successful allocation
627 * @ioat: ioat2,3 channel (ring) to operate on
628 * @num_descs: allocation length
629 */
630int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
631{
632 struct ioat_chan_common *chan = &ioat->base;
633
634 spin_lock_bh(&ioat->ring_lock);
635 /* never allow the last descriptor to be consumed, we need at
636 * least one free at all times to allow for on-the-fly ring
637 * resizing.
638 */
639 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
640 if (reshape_ring(ioat, ioat->alloc_order + 1) &&
641 ioat2_ring_space(ioat) > num_descs)
642 break;
643
644 if (printk_ratelimit())
645 dev_dbg(to_dev(chan),
646 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
647 __func__, num_descs, ioat->head, ioat->tail,
648 ioat->issued);
649 spin_unlock_bh(&ioat->ring_lock);
650
651 /* progress reclaim in the allocation failure case we
652 * may be called under bh_disabled so we need to trigger
653 * the timer event directly
654 */
655 spin_lock_bh(&chan->cleanup_lock);
656 if (jiffies > chan->timer.expires &&
657 timer_pending(&chan->timer)) {
658 struct ioatdma_device *device = chan->device;
659
660 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
661 spin_unlock_bh(&chan->cleanup_lock);
662 device->timer_fn((unsigned long) ioat);
663 } else
664 spin_unlock_bh(&chan->cleanup_lock);
665 return -ENOMEM;
666 }
667
668 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
669 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
670
671 *idx = ioat2_desc_alloc(ioat, num_descs);
672 return 0; /* with ioat->ring_lock held */
673}
674
675struct dma_async_tx_descriptor *
676ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
677 dma_addr_t dma_src, size_t len, unsigned long flags)
678{
679 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
680 struct ioat_dma_descriptor *hw;
681 struct ioat_ring_ent *desc;
682 dma_addr_t dst = dma_dest;
683 dma_addr_t src = dma_src;
684 size_t total_len = len;
685 int num_descs;
686 u16 idx;
687 int i;
688
689 num_descs = ioat2_xferlen_to_descs(ioat, len);
690 if (likely(num_descs) &&
691 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
692 /* pass */;
693 else
694 return NULL;
695 i = 0;
696 do {
697 size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
698
699 desc = ioat2_get_ring_ent(ioat, idx + i);
700 hw = desc->hw;
701
702 hw->size = copy;
703 hw->ctl = 0;
704 hw->src_addr = src;
705 hw->dst_addr = dst;
706
707 len -= copy;
708 dst += copy;
709 src += copy;
710 dump_desc_dbg(ioat, desc);
711 } while (++i < num_descs);
712
713 desc->txd.flags = flags;
714 desc->len = total_len;
715 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
716 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 hw->ctl_f.compl_write = 1;
718 dump_desc_dbg(ioat, desc);
719 /* we leave the channel locked to ensure in order submission */
720
721 return &desc->txd;
722}
723
724/**
725 * ioat2_free_chan_resources - release all the descriptors
726 * @chan: the channel to be cleaned
727 */
728void ioat2_free_chan_resources(struct dma_chan *c)
729{
730 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
731 struct ioat_chan_common *chan = &ioat->base;
732 struct ioatdma_device *device = chan->device;
733 struct ioat_ring_ent *desc;
734 const u16 total_descs = 1 << ioat->alloc_order;
735 int descs;
736 int i;
737
738 /* Before freeing channel resources first check
739 * if they have been previously allocated for this channel.
740 */
741 if (!ioat->ring)
742 return;
743
744 tasklet_disable(&chan->cleanup_task);
745 del_timer_sync(&chan->timer);
746 device->cleanup_tasklet((unsigned long) ioat);
747
748 /* Delay 100ms after reset to allow internal DMA logic to quiesce
749 * before removing DMA descriptor resources.
750 */
751 writeb(IOAT_CHANCMD_RESET,
752 chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
753 mdelay(100);
754
755 spin_lock_bh(&ioat->ring_lock);
756 descs = ioat2_ring_space(ioat);
757 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
758 for (i = 0; i < descs; i++) {
759 desc = ioat2_get_ring_ent(ioat, ioat->head + i);
760 ioat2_free_ring_ent(desc, c);
761 }
762
763 if (descs < total_descs)
764 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
765 total_descs - descs);
766
767 for (i = 0; i < total_descs - descs; i++) {
768 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
769 dump_desc_dbg(ioat, desc);
770 ioat2_free_ring_ent(desc, c);
771 }
772
773 kfree(ioat->ring);
774 ioat->ring = NULL;
775 ioat->alloc_order = 0;
776 pci_pool_free(device->completion_pool, chan->completion,
777 chan->completion_dma);
778 spin_unlock_bh(&ioat->ring_lock);
779
780 chan->last_completion = 0;
781 chan->completion_dma = 0;
782 ioat->pending = 0;
783 ioat->dmacount = 0;
784}
785
786enum dma_status
787ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
788 dma_cookie_t *done, dma_cookie_t *used)
789{
790 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
791 struct ioatdma_device *device = ioat->base.device;
792
793 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
794 return DMA_SUCCESS;
795
796 device->cleanup_tasklet((unsigned long) ioat);
797
798 return ioat_is_complete(c, cookie, done, used);
799}
800
801static ssize_t ring_size_show(struct dma_chan *c, char *page)
802{
803 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
804
805 return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
806}
807static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
808
809static ssize_t ring_active_show(struct dma_chan *c, char *page)
810{
811 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
812
813 /* ...taken outside the lock, no need to be precise */
814 return sprintf(page, "%d\n", ioat2_ring_active(ioat));
815}
816static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
817
818static struct attribute *ioat2_attrs[] = {
819 &ring_size_attr.attr,
820 &ring_active_attr.attr,
821 &ioat_cap_attr.attr,
822 &ioat_version_attr.attr,
823 NULL,
824};
825
826struct kobj_type ioat2_ktype = {
827 .sysfs_ops = &ioat_sysfs_ops,
828 .default_attrs = ioat2_attrs,
829};
830
831int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
832{
833 struct pci_dev *pdev = device->pdev;
834 struct dma_device *dma;
835 struct dma_chan *c;
836 struct ioat_chan_common *chan;
837 int err;
838
839 device->enumerate_channels = ioat2_enumerate_channels;
840 device->cleanup_tasklet = ioat2_cleanup_tasklet;
841 device->timer_fn = ioat2_timer_event;
842 device->self_test = ioat_dma_self_test;
843 dma = &device->common;
844 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
845 dma->device_issue_pending = ioat2_issue_pending;
846 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
847 dma->device_free_chan_resources = ioat2_free_chan_resources;
848 dma->device_is_tx_complete = ioat2_is_complete;
849
850 err = ioat_probe(device);
851 if (err)
852 return err;
853 ioat_set_tcp_copy_break(2048);
854
855 list_for_each_entry(c, &dma->channels, device_node) {
856 chan = to_chan_common(c);
857 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
858 chan->reg_base + IOAT_DCACTRL_OFFSET);
859 }
860
861 err = ioat_register(device);
862 if (err)
863 return err;
864
865 ioat_kobject_add(device, &ioat2_ktype);
866
867 if (dca)
868 device->dca = ioat2_dca_init(pdev, device->reg_base);
869
870 return err;
871}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644
index 000000000000..1d849ef74d5f
--- /dev/null
+++ b/drivers/dma/ioat/dma_v2.h
@@ -0,0 +1,190 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_V2_H
22#define IOATDMA_V2_H
23
24#include <linux/dmaengine.h>
25#include "dma.h"
26#include "hw.h"
27
28
29extern int ioat_pending_level;
30extern int ioat_ring_alloc_order;
31
32/*
33 * workaround for IOAT ver.3.0 null descriptor issue
34 * (channel returns error when size is 0)
35 */
36#define NULL_DESC_BUFFER_SIZE 1
37
38#define IOAT_MAX_ORDER 16
39#define ioat_get_alloc_order() \
40 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
41#define ioat_get_max_alloc_order() \
42 (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
43
44/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
45 * @base: common ioat channel parameters
46 * @xfercap_log; log2 of channel max transfer length (for fast division)
47 * @head: allocated index
48 * @issued: hardware notification point
49 * @tail: cleanup index
50 * @pending: lock free indicator for issued != head
51 * @dmacount: identical to 'head' except for occasionally resetting to zero
52 * @alloc_order: log2 of the number of allocated descriptors
53 * @ring: software ring buffer implementation of hardware ring
54 * @ring_lock: protects ring attributes
55 */
56struct ioat2_dma_chan {
57 struct ioat_chan_common base;
58 size_t xfercap_log;
59 u16 head;
60 u16 issued;
61 u16 tail;
62 u16 dmacount;
63 u16 alloc_order;
64 int pending;
65 struct ioat_ring_ent **ring;
66 spinlock_t ring_lock;
67};
68
69static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
70{
71 struct ioat_chan_common *chan = to_chan_common(c);
72
73 return container_of(chan, struct ioat2_dma_chan, base);
74}
75
76static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
77{
78 return (1 << ioat->alloc_order) - 1;
79}
80
81/* count of descriptors in flight with the engine */
82static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
83{
84 return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
85}
86
87/* count of descriptors pending submission to hardware */
88static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
89{
90 return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
91}
92
93static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
94{
95 u16 num_descs = ioat2_ring_mask(ioat) + 1;
96 u16 active = ioat2_ring_active(ioat);
97
98 BUG_ON(active > num_descs);
99
100 return num_descs - active;
101}
102
103/* assumes caller already checked space */
104static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
105{
106 ioat->head += len;
107 return ioat->head - len;
108}
109
110static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
111{
112 u16 num_descs = len >> ioat->xfercap_log;
113
114 num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
115 return num_descs;
116}
117
118/**
119 * struct ioat_ring_ent - wrapper around hardware descriptor
120 * @hw: hardware DMA descriptor (for memcpy)
121 * @fill: hardware fill descriptor
122 * @xor: hardware xor descriptor
123 * @xor_ex: hardware xor extension descriptor
124 * @pq: hardware pq descriptor
125 * @pq_ex: hardware pq extension descriptor
126 * @pqu: hardware pq update descriptor
127 * @raw: hardware raw (un-typed) descriptor
128 * @txd: the generic software descriptor for all engines
129 * @len: total transaction length for unmap
130 * @result: asynchronous result of validate operations
131 * @id: identifier for debug
132 */
133
134struct ioat_ring_ent {
135 union {
136 struct ioat_dma_descriptor *hw;
137 struct ioat_fill_descriptor *fill;
138 struct ioat_xor_descriptor *xor;
139 struct ioat_xor_ext_descriptor *xor_ex;
140 struct ioat_pq_descriptor *pq;
141 struct ioat_pq_ext_descriptor *pq_ex;
142 struct ioat_pq_update_descriptor *pqu;
143 struct ioat_raw_descriptor *raw;
144 };
145 size_t len;
146 struct dma_async_tx_descriptor txd;
147 enum sum_check_flags *result;
148 #ifdef DEBUG
149 int id;
150 #endif
151};
152
153static inline struct ioat_ring_ent *
154ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
155{
156 return ioat->ring[idx & ioat2_ring_mask(ioat)];
157}
158
159static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
160{
161 struct ioat_chan_common *chan = &ioat->base;
162
163 writel(addr & 0x00000000FFFFFFFF,
164 chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
165 writel(addr >> 32,
166 chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
167}
168
169int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
170int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
171struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
172struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
173int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
174int ioat2_enumerate_channels(struct ioatdma_device *device);
175struct dma_async_tx_descriptor *
176ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
177 dma_addr_t dma_src, size_t len, unsigned long flags);
178void ioat2_issue_pending(struct dma_chan *chan);
179int ioat2_alloc_chan_resources(struct dma_chan *c);
180void ioat2_free_chan_resources(struct dma_chan *c);
181enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
182 dma_cookie_t *done, dma_cookie_t *used);
183void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
184bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
185void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
186void ioat2_cleanup_tasklet(unsigned long data);
187void ioat2_timer_event(unsigned long data);
188extern struct kobj_type ioat2_ktype;
189extern struct kmem_cache *ioat2_cache;
190#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 000000000000..35d1e33afd5b
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1223 @@
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * The full GNU General Public License is included in this distribution in
23 * the file called "COPYING".
24 *
25 * BSD LICENSE
26 *
27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52 * POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/*
56 * Support routines for v3+ hardware
57 */
58
59#include <linux/pci.h>
60#include <linux/dmaengine.h>
61#include <linux/dma-mapping.h>
62#include "registers.h"
63#include "hw.h"
64#include "dma.h"
65#include "dma_v2.h"
66
67/* ioat hardware assumes at least two sources for raid operations */
68#define src_cnt_to_sw(x) ((x) + 2)
69#define src_cnt_to_hw(x) ((x) - 2)
70
71/* provide a lookup table for setting the source address in the base or
72 * extended descriptor of an xor or pq descriptor
73 */
74static const u8 xor_idx_to_desc __read_mostly = 0xd0;
75static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
76static const u8 pq_idx_to_desc __read_mostly = 0xf8;
77static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
78
79static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
80{
81 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
82
83 return raw->field[xor_idx_to_field[idx]];
84}
85
86static void xor_set_src(struct ioat_raw_descriptor *descs[2],
87 dma_addr_t addr, u32 offset, int idx)
88{
89 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
90
91 raw->field[xor_idx_to_field[idx]] = addr + offset;
92}
93
94static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
95{
96 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
97
98 return raw->field[pq_idx_to_field[idx]];
99}
100
101static void pq_set_src(struct ioat_raw_descriptor *descs[2],
102 dma_addr_t addr, u32 offset, u8 coef, int idx)
103{
104 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
105 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
106
107 raw->field[pq_idx_to_field[idx]] = addr + offset;
108 pq->coef[idx] = coef;
109}
110
111static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
112 struct ioat_ring_ent *desc, int idx)
113{
114 struct ioat_chan_common *chan = &ioat->base;
115 struct pci_dev *pdev = chan->device->pdev;
116 size_t len = desc->len;
117 size_t offset = len - desc->hw->size;
118 struct dma_async_tx_descriptor *tx = &desc->txd;
119 enum dma_ctrl_flags flags = tx->flags;
120
121 switch (desc->hw->ctl_f.op) {
122 case IOAT_OP_COPY:
123 if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
124 ioat_dma_unmap(chan, flags, len, desc->hw);
125 break;
126 case IOAT_OP_FILL: {
127 struct ioat_fill_descriptor *hw = desc->fill;
128
129 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
130 ioat_unmap(pdev, hw->dst_addr - offset, len,
131 PCI_DMA_FROMDEVICE, flags, 1);
132 break;
133 }
134 case IOAT_OP_XOR_VAL:
135 case IOAT_OP_XOR: {
136 struct ioat_xor_descriptor *xor = desc->xor;
137 struct ioat_ring_ent *ext;
138 struct ioat_xor_ext_descriptor *xor_ex = NULL;
139 int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
140 struct ioat_raw_descriptor *descs[2];
141 int i;
142
143 if (src_cnt > 5) {
144 ext = ioat2_get_ring_ent(ioat, idx + 1);
145 xor_ex = ext->xor_ex;
146 }
147
148 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
149 descs[0] = (struct ioat_raw_descriptor *) xor;
150 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
151 for (i = 0; i < src_cnt; i++) {
152 dma_addr_t src = xor_get_src(descs, i);
153
154 ioat_unmap(pdev, src - offset, len,
155 PCI_DMA_TODEVICE, flags, 0);
156 }
157
158 /* dest is a source in xor validate operations */
159 if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
160 ioat_unmap(pdev, xor->dst_addr - offset, len,
161 PCI_DMA_TODEVICE, flags, 1);
162 break;
163 }
164 }
165
166 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
167 ioat_unmap(pdev, xor->dst_addr - offset, len,
168 PCI_DMA_FROMDEVICE, flags, 1);
169 break;
170 }
171 case IOAT_OP_PQ_VAL:
172 case IOAT_OP_PQ: {
173 struct ioat_pq_descriptor *pq = desc->pq;
174 struct ioat_ring_ent *ext;
175 struct ioat_pq_ext_descriptor *pq_ex = NULL;
176 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
177 struct ioat_raw_descriptor *descs[2];
178 int i;
179
180 if (src_cnt > 3) {
181 ext = ioat2_get_ring_ent(ioat, idx + 1);
182 pq_ex = ext->pq_ex;
183 }
184
185 /* in the 'continue' case don't unmap the dests as sources */
186 if (dmaf_p_disabled_continue(flags))
187 src_cnt--;
188 else if (dmaf_continue(flags))
189 src_cnt -= 3;
190
191 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
192 descs[0] = (struct ioat_raw_descriptor *) pq;
193 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
194 for (i = 0; i < src_cnt; i++) {
195 dma_addr_t src = pq_get_src(descs, i);
196
197 ioat_unmap(pdev, src - offset, len,
198 PCI_DMA_TODEVICE, flags, 0);
199 }
200
201 /* the dests are sources in pq validate operations */
202 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
203 if (!(flags & DMA_PREP_PQ_DISABLE_P))
204 ioat_unmap(pdev, pq->p_addr - offset,
205 len, PCI_DMA_TODEVICE, flags, 0);
206 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
207 ioat_unmap(pdev, pq->q_addr - offset,
208 len, PCI_DMA_TODEVICE, flags, 0);
209 break;
210 }
211 }
212
213 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
214 if (!(flags & DMA_PREP_PQ_DISABLE_P))
215 ioat_unmap(pdev, pq->p_addr - offset, len,
216 PCI_DMA_BIDIRECTIONAL, flags, 1);
217 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
218 ioat_unmap(pdev, pq->q_addr - offset, len,
219 PCI_DMA_BIDIRECTIONAL, flags, 1);
220 }
221 break;
222 }
223 default:
224 dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
225 __func__, desc->hw->ctl_f.op);
226 }
227}
228
229static bool desc_has_ext(struct ioat_ring_ent *desc)
230{
231 struct ioat_dma_descriptor *hw = desc->hw;
232
233 if (hw->ctl_f.op == IOAT_OP_XOR ||
234 hw->ctl_f.op == IOAT_OP_XOR_VAL) {
235 struct ioat_xor_descriptor *xor = desc->xor;
236
237 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
238 return true;
239 } else if (hw->ctl_f.op == IOAT_OP_PQ ||
240 hw->ctl_f.op == IOAT_OP_PQ_VAL) {
241 struct ioat_pq_descriptor *pq = desc->pq;
242
243 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
244 return true;
245 }
246
247 return false;
248}
249
250/**
251 * __cleanup - reclaim used descriptors
252 * @ioat: channel (ring) to clean
253 *
254 * The difference from the dma_v2.c __cleanup() is that this routine
255 * handles extended descriptors and dma-unmapping raid operations.
256 */
257static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
258{
259 struct ioat_chan_common *chan = &ioat->base;
260 struct ioat_ring_ent *desc;
261 bool seen_current = false;
262 u16 active;
263 int i;
264
265 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
266 __func__, ioat->head, ioat->tail, ioat->issued);
267
268 active = ioat2_ring_active(ioat);
269 for (i = 0; i < active && !seen_current; i++) {
270 struct dma_async_tx_descriptor *tx;
271
272 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
273 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
274 dump_desc_dbg(ioat, desc);
275 tx = &desc->txd;
276 if (tx->cookie) {
277 chan->completed_cookie = tx->cookie;
278 ioat3_dma_unmap(ioat, desc, ioat->tail + i);
279 tx->cookie = 0;
280 if (tx->callback) {
281 tx->callback(tx->callback_param);
282 tx->callback = NULL;
283 }
284 }
285
286 if (tx->phys == phys_complete)
287 seen_current = true;
288
289 /* skip extended descriptors */
290 if (desc_has_ext(desc)) {
291 BUG_ON(i + 1 >= active);
292 i++;
293 }
294 }
295 ioat->tail += i;
296 BUG_ON(!seen_current); /* no active descs have written a completion? */
297 chan->last_completion = phys_complete;
298 if (ioat->head == ioat->tail) {
299 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
300 __func__);
301 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
302 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
303 }
304}
305
306static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
307{
308 struct ioat_chan_common *chan = &ioat->base;
309 unsigned long phys_complete;
310
311 prefetch(chan->completion);
312
313 if (!spin_trylock_bh(&chan->cleanup_lock))
314 return;
315
316 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
317 spin_unlock_bh(&chan->cleanup_lock);
318 return;
319 }
320
321 if (!spin_trylock_bh(&ioat->ring_lock)) {
322 spin_unlock_bh(&chan->cleanup_lock);
323 return;
324 }
325
326 __cleanup(ioat, phys_complete);
327
328 spin_unlock_bh(&ioat->ring_lock);
329 spin_unlock_bh(&chan->cleanup_lock);
330}
331
332static void ioat3_cleanup_tasklet(unsigned long data)
333{
334 struct ioat2_dma_chan *ioat = (void *) data;
335
336 ioat3_cleanup(ioat);
337 writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
338 ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
339}
340
341static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
342{
343 struct ioat_chan_common *chan = &ioat->base;
344 unsigned long phys_complete;
345 u32 status;
346
347 status = ioat_chansts(chan);
348 if (is_ioat_active(status) || is_ioat_idle(status))
349 ioat_suspend(chan);
350 while (is_ioat_active(status) || is_ioat_idle(status)) {
351 status = ioat_chansts(chan);
352 cpu_relax();
353 }
354
355 if (ioat_cleanup_preamble(chan, &phys_complete))
356 __cleanup(ioat, phys_complete);
357
358 __ioat2_restart_chan(ioat);
359}
360
361static void ioat3_timer_event(unsigned long data)
362{
363 struct ioat2_dma_chan *ioat = (void *) data;
364 struct ioat_chan_common *chan = &ioat->base;
365
366 spin_lock_bh(&chan->cleanup_lock);
367 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
368 unsigned long phys_complete;
369 u64 status;
370
371 spin_lock_bh(&ioat->ring_lock);
372 status = ioat_chansts(chan);
373
374 /* when halted due to errors check for channel
375 * programming errors before advancing the completion state
376 */
377 if (is_ioat_halted(status)) {
378 u32 chanerr;
379
380 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
381 BUG_ON(is_ioat_bug(chanerr));
382 }
383
384 /* if we haven't made progress and we have already
385 * acknowledged a pending completion once, then be more
386 * forceful with a restart
387 */
388 if (ioat_cleanup_preamble(chan, &phys_complete))
389 __cleanup(ioat, phys_complete);
390 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
391 ioat3_restart_channel(ioat);
392 else {
393 set_bit(IOAT_COMPLETION_ACK, &chan->state);
394 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
395 }
396 spin_unlock_bh(&ioat->ring_lock);
397 } else {
398 u16 active;
399
400 /* if the ring is idle, empty, and oversized try to step
401 * down the size
402 */
403 spin_lock_bh(&ioat->ring_lock);
404 active = ioat2_ring_active(ioat);
405 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
406 reshape_ring(ioat, ioat->alloc_order-1);
407 spin_unlock_bh(&ioat->ring_lock);
408
409 /* keep shrinking until we get back to our minimum
410 * default size
411 */
412 if (ioat->alloc_order > ioat_get_alloc_order())
413 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
414 }
415 spin_unlock_bh(&chan->cleanup_lock);
416}
417
418static enum dma_status
419ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
420 dma_cookie_t *done, dma_cookie_t *used)
421{
422 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
423
424 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
425 return DMA_SUCCESS;
426
427 ioat3_cleanup(ioat);
428
429 return ioat_is_complete(c, cookie, done, used);
430}
431
432static struct dma_async_tx_descriptor *
433ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
434 size_t len, unsigned long flags)
435{
436 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
437 struct ioat_ring_ent *desc;
438 size_t total_len = len;
439 struct ioat_fill_descriptor *fill;
440 int num_descs;
441 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
442 u16 idx;
443 int i;
444
445 num_descs = ioat2_xferlen_to_descs(ioat, len);
446 if (likely(num_descs) &&
447 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
448 /* pass */;
449 else
450 return NULL;
451 i = 0;
452 do {
453 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
454
455 desc = ioat2_get_ring_ent(ioat, idx + i);
456 fill = desc->fill;
457
458 fill->size = xfer_size;
459 fill->src_data = src_data;
460 fill->dst_addr = dest;
461 fill->ctl = 0;
462 fill->ctl_f.op = IOAT_OP_FILL;
463
464 len -= xfer_size;
465 dest += xfer_size;
466 dump_desc_dbg(ioat, desc);
467 } while (++i < num_descs);
468
469 desc->txd.flags = flags;
470 desc->len = total_len;
471 fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
472 fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
473 fill->ctl_f.compl_write = 1;
474 dump_desc_dbg(ioat, desc);
475
476 /* we leave the channel locked to ensure in order submission */
477 return &desc->txd;
478}
479
480static struct dma_async_tx_descriptor *
481__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
482 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
483 size_t len, unsigned long flags)
484{
485 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
486 struct ioat_ring_ent *compl_desc;
487 struct ioat_ring_ent *desc;
488 struct ioat_ring_ent *ext;
489 size_t total_len = len;
490 struct ioat_xor_descriptor *xor;
491 struct ioat_xor_ext_descriptor *xor_ex = NULL;
492 struct ioat_dma_descriptor *hw;
493 u32 offset = 0;
494 int num_descs;
495 int with_ext;
496 int i;
497 u16 idx;
498 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
499
500 BUG_ON(src_cnt < 2);
501
502 num_descs = ioat2_xferlen_to_descs(ioat, len);
503 /* we need 2x the number of descriptors to cover greater than 5
504 * sources
505 */
506 if (src_cnt > 5) {
507 with_ext = 1;
508 num_descs *= 2;
509 } else
510 with_ext = 0;
511
512 /* completion writes from the raid engine may pass completion
513 * writes from the legacy engine, so we need one extra null
514 * (legacy) descriptor to ensure all completion writes arrive in
515 * order.
516 */
517 if (likely(num_descs) &&
518 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
519 /* pass */;
520 else
521 return NULL;
522 i = 0;
523 do {
524 struct ioat_raw_descriptor *descs[2];
525 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
526 int s;
527
528 desc = ioat2_get_ring_ent(ioat, idx + i);
529 xor = desc->xor;
530
531 /* save a branch by unconditionally retrieving the
532 * extended descriptor xor_set_src() knows to not write
533 * to it in the single descriptor case
534 */
535 ext = ioat2_get_ring_ent(ioat, idx + i + 1);
536 xor_ex = ext->xor_ex;
537
538 descs[0] = (struct ioat_raw_descriptor *) xor;
539 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
540 for (s = 0; s < src_cnt; s++)
541 xor_set_src(descs, src[s], offset, s);
542 xor->size = xfer_size;
543 xor->dst_addr = dest + offset;
544 xor->ctl = 0;
545 xor->ctl_f.op = op;
546 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
547
548 len -= xfer_size;
549 offset += xfer_size;
550 dump_desc_dbg(ioat, desc);
551 } while ((i += 1 + with_ext) < num_descs);
552
553 /* last xor descriptor carries the unmap parameters and fence bit */
554 desc->txd.flags = flags;
555 desc->len = total_len;
556 if (result)
557 desc->result = result;
558 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
559
560 /* completion descriptor carries interrupt bit */
561 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
562 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
563 hw = compl_desc->hw;
564 hw->ctl = 0;
565 hw->ctl_f.null = 1;
566 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
567 hw->ctl_f.compl_write = 1;
568 hw->size = NULL_DESC_BUFFER_SIZE;
569 dump_desc_dbg(ioat, compl_desc);
570
571 /* we leave the channel locked to ensure in order submission */
572 return &desc->txd;
573}
574
575static struct dma_async_tx_descriptor *
576ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
577 unsigned int src_cnt, size_t len, unsigned long flags)
578{
579 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
580}
581
582struct dma_async_tx_descriptor *
583ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
584 unsigned int src_cnt, size_t len,
585 enum sum_check_flags *result, unsigned long flags)
586{
587 /* the cleanup routine only sets bits on validate failure, it
588 * does not clear bits on validate success... so clear it here
589 */
590 *result = 0;
591
592 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
593 src_cnt - 1, len, flags);
594}
595
596static void
597dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
598{
599 struct device *dev = to_dev(&ioat->base);
600 struct ioat_pq_descriptor *pq = desc->pq;
601 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
602 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
603 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
604 int i;
605
606 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
607 " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
608 desc_id(desc), (unsigned long long) desc->txd.phys,
609 (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
610 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
611 pq->ctl_f.compl_write,
612 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
613 pq->ctl_f.src_cnt);
614 for (i = 0; i < src_cnt; i++)
615 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
616 (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
617 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
618 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
619}
620
621static struct dma_async_tx_descriptor *
622__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
623 const dma_addr_t *dst, const dma_addr_t *src,
624 unsigned int src_cnt, const unsigned char *scf,
625 size_t len, unsigned long flags)
626{
627 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
628 struct ioat_chan_common *chan = &ioat->base;
629 struct ioat_ring_ent *compl_desc;
630 struct ioat_ring_ent *desc;
631 struct ioat_ring_ent *ext;
632 size_t total_len = len;
633 struct ioat_pq_descriptor *pq;
634 struct ioat_pq_ext_descriptor *pq_ex = NULL;
635 struct ioat_dma_descriptor *hw;
636 u32 offset = 0;
637 int num_descs;
638 int with_ext;
639 int i, s;
640 u16 idx;
641 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
642
643 dev_dbg(to_dev(chan), "%s\n", __func__);
644 /* the engine requires at least two sources (we provide
645 * at least 1 implied source in the DMA_PREP_CONTINUE case)
646 */
647 BUG_ON(src_cnt + dmaf_continue(flags) < 2);
648
649 num_descs = ioat2_xferlen_to_descs(ioat, len);
650 /* we need 2x the number of descriptors to cover greater than 3
651 * sources
652 */
653 if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
654 with_ext = 1;
655 num_descs *= 2;
656 } else
657 with_ext = 0;
658
659 /* completion writes from the raid engine may pass completion
660 * writes from the legacy engine, so we need one extra null
661 * (legacy) descriptor to ensure all completion writes arrive in
662 * order.
663 */
664 if (likely(num_descs) &&
665 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
666 /* pass */;
667 else
668 return NULL;
669 i = 0;
670 do {
671 struct ioat_raw_descriptor *descs[2];
672 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
673
674 desc = ioat2_get_ring_ent(ioat, idx + i);
675 pq = desc->pq;
676
677 /* save a branch by unconditionally retrieving the
678 * extended descriptor pq_set_src() knows to not write
679 * to it in the single descriptor case
680 */
681 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
682 pq_ex = ext->pq_ex;
683
684 descs[0] = (struct ioat_raw_descriptor *) pq;
685 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
686
687 for (s = 0; s < src_cnt; s++)
688 pq_set_src(descs, src[s], offset, scf[s], s);
689
690 /* see the comment for dma_maxpq in include/linux/dmaengine.h */
691 if (dmaf_p_disabled_continue(flags))
692 pq_set_src(descs, dst[1], offset, 1, s++);
693 else if (dmaf_continue(flags)) {
694 pq_set_src(descs, dst[0], offset, 0, s++);
695 pq_set_src(descs, dst[1], offset, 1, s++);
696 pq_set_src(descs, dst[1], offset, 0, s++);
697 }
698 pq->size = xfer_size;
699 pq->p_addr = dst[0] + offset;
700 pq->q_addr = dst[1] + offset;
701 pq->ctl = 0;
702 pq->ctl_f.op = op;
703 pq->ctl_f.src_cnt = src_cnt_to_hw(s);
704 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
705 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
706
707 len -= xfer_size;
708 offset += xfer_size;
709 } while ((i += 1 + with_ext) < num_descs);
710
711 /* last pq descriptor carries the unmap parameters and fence bit */
712 desc->txd.flags = flags;
713 desc->len = total_len;
714 if (result)
715 desc->result = result;
716 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
717 dump_pq_desc_dbg(ioat, desc, ext);
718
719 /* completion descriptor carries interrupt bit */
720 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
721 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
722 hw = compl_desc->hw;
723 hw->ctl = 0;
724 hw->ctl_f.null = 1;
725 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
726 hw->ctl_f.compl_write = 1;
727 hw->size = NULL_DESC_BUFFER_SIZE;
728 dump_desc_dbg(ioat, compl_desc);
729
730 /* we leave the channel locked to ensure in order submission */
731 return &desc->txd;
732}
733
734static struct dma_async_tx_descriptor *
735ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
736 unsigned int src_cnt, const unsigned char *scf, size_t len,
737 unsigned long flags)
738{
739 /* handle the single source multiply case from the raid6
740 * recovery path
741 */
742 if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
743 dma_addr_t single_source[2];
744 unsigned char single_source_coef[2];
745
746 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
747 single_source[0] = src[0];
748 single_source[1] = src[0];
749 single_source_coef[0] = scf[0];
750 single_source_coef[1] = 0;
751
752 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
753 single_source_coef, len, flags);
754 } else
755 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
756 len, flags);
757}
758
759struct dma_async_tx_descriptor *
760ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
761 unsigned int src_cnt, const unsigned char *scf, size_t len,
762 enum sum_check_flags *pqres, unsigned long flags)
763{
764 /* the cleanup routine only sets bits on validate failure, it
765 * does not clear bits on validate success... so clear it here
766 */
767 *pqres = 0;
768
769 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
770 flags);
771}
772
773static struct dma_async_tx_descriptor *
774ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
775 unsigned int src_cnt, size_t len, unsigned long flags)
776{
777 unsigned char scf[src_cnt];
778 dma_addr_t pq[2];
779
780 memset(scf, 0, src_cnt);
781 flags |= DMA_PREP_PQ_DISABLE_Q;
782 pq[0] = dst;
783 pq[1] = ~0;
784
785 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
786 flags);
787}
788
789struct dma_async_tx_descriptor *
790ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
791 unsigned int src_cnt, size_t len,
792 enum sum_check_flags *result, unsigned long flags)
793{
794 unsigned char scf[src_cnt];
795 dma_addr_t pq[2];
796
797 /* the cleanup routine only sets bits on validate failure, it
798 * does not clear bits on validate success... so clear it here
799 */
800 *result = 0;
801
802 memset(scf, 0, src_cnt);
803 flags |= DMA_PREP_PQ_DISABLE_Q;
804 pq[0] = src[0];
805 pq[1] = ~0;
806
807 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
808 len, flags);
809}
810
811static struct dma_async_tx_descriptor *
812ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
813{
814 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
815 struct ioat_ring_ent *desc;
816 struct ioat_dma_descriptor *hw;
817 u16 idx;
818
819 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
820 desc = ioat2_get_ring_ent(ioat, idx);
821 else
822 return NULL;
823
824 hw = desc->hw;
825 hw->ctl = 0;
826 hw->ctl_f.null = 1;
827 hw->ctl_f.int_en = 1;
828 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
829 hw->ctl_f.compl_write = 1;
830 hw->size = NULL_DESC_BUFFER_SIZE;
831 hw->src_addr = 0;
832 hw->dst_addr = 0;
833
834 desc->txd.flags = flags;
835 desc->len = 1;
836
837 dump_desc_dbg(ioat, desc);
838
839 /* we leave the channel locked to ensure in order submission */
840 return &desc->txd;
841}
842
843static void __devinit ioat3_dma_test_callback(void *dma_async_param)
844{
845 struct completion *cmp = dma_async_param;
846
847 complete(cmp);
848}
849
850#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
851static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
852{
853 int i, src_idx;
854 struct page *dest;
855 struct page *xor_srcs[IOAT_NUM_SRC_TEST];
856 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
857 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
858 dma_addr_t dma_addr, dest_dma;
859 struct dma_async_tx_descriptor *tx;
860 struct dma_chan *dma_chan;
861 dma_cookie_t cookie;
862 u8 cmp_byte = 0;
863 u32 cmp_word;
864 u32 xor_val_result;
865 int err = 0;
866 struct completion cmp;
867 unsigned long tmo;
868 struct device *dev = &device->pdev->dev;
869 struct dma_device *dma = &device->common;
870
871 dev_dbg(dev, "%s\n", __func__);
872
873 if (!dma_has_cap(DMA_XOR, dma->cap_mask))
874 return 0;
875
876 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
877 xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
878 if (!xor_srcs[src_idx]) {
879 while (src_idx--)
880 __free_page(xor_srcs[src_idx]);
881 return -ENOMEM;
882 }
883 }
884
885 dest = alloc_page(GFP_KERNEL);
886 if (!dest) {
887 while (src_idx--)
888 __free_page(xor_srcs[src_idx]);
889 return -ENOMEM;
890 }
891
892 /* Fill in src buffers */
893 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
894 u8 *ptr = page_address(xor_srcs[src_idx]);
895 for (i = 0; i < PAGE_SIZE; i++)
896 ptr[i] = (1 << src_idx);
897 }
898
899 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
900 cmp_byte ^= (u8) (1 << src_idx);
901
902 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
903 (cmp_byte << 8) | cmp_byte;
904
905 memset(page_address(dest), 0, PAGE_SIZE);
906
907 dma_chan = container_of(dma->channels.next, struct dma_chan,
908 device_node);
909 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
910 err = -ENODEV;
911 goto out;
912 }
913
914 /* test xor */
915 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
916 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
917 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
918 DMA_TO_DEVICE);
919 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
920 IOAT_NUM_SRC_TEST, PAGE_SIZE,
921 DMA_PREP_INTERRUPT);
922
923 if (!tx) {
924 dev_err(dev, "Self-test xor prep failed\n");
925 err = -ENODEV;
926 goto free_resources;
927 }
928
929 async_tx_ack(tx);
930 init_completion(&cmp);
931 tx->callback = ioat3_dma_test_callback;
932 tx->callback_param = &cmp;
933 cookie = tx->tx_submit(tx);
934 if (cookie < 0) {
935 dev_err(dev, "Self-test xor setup failed\n");
936 err = -ENODEV;
937 goto free_resources;
938 }
939 dma->device_issue_pending(dma_chan);
940
941 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
942
943 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
944 dev_err(dev, "Self-test xor timed out\n");
945 err = -ENODEV;
946 goto free_resources;
947 }
948
949 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
950 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
951 u32 *ptr = page_address(dest);
952 if (ptr[i] != cmp_word) {
953 dev_err(dev, "Self-test xor failed compare\n");
954 err = -ENODEV;
955 goto free_resources;
956 }
957 }
958 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
959
960 /* skip validate if the capability is not present */
961 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
962 goto free_resources;
963
964 /* validate the sources with the destintation page */
965 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
966 xor_val_srcs[i] = xor_srcs[i];
967 xor_val_srcs[i] = dest;
968
969 xor_val_result = 1;
970
971 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
972 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
973 DMA_TO_DEVICE);
974 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
975 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
976 &xor_val_result, DMA_PREP_INTERRUPT);
977 if (!tx) {
978 dev_err(dev, "Self-test zero prep failed\n");
979 err = -ENODEV;
980 goto free_resources;
981 }
982
983 async_tx_ack(tx);
984 init_completion(&cmp);
985 tx->callback = ioat3_dma_test_callback;
986 tx->callback_param = &cmp;
987 cookie = tx->tx_submit(tx);
988 if (cookie < 0) {
989 dev_err(dev, "Self-test zero setup failed\n");
990 err = -ENODEV;
991 goto free_resources;
992 }
993 dma->device_issue_pending(dma_chan);
994
995 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
996
997 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
998 dev_err(dev, "Self-test validate timed out\n");
999 err = -ENODEV;
1000 goto free_resources;
1001 }
1002
1003 if (xor_val_result != 0) {
1004 dev_err(dev, "Self-test validate failed compare\n");
1005 err = -ENODEV;
1006 goto free_resources;
1007 }
1008
1009 /* skip memset if the capability is not present */
1010 if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1011 goto free_resources;
1012
1013 /* test memset */
1014 dma_addr = dma_map_page(dev, dest, 0,
1015 PAGE_SIZE, DMA_FROM_DEVICE);
1016 tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1017 DMA_PREP_INTERRUPT);
1018 if (!tx) {
1019 dev_err(dev, "Self-test memset prep failed\n");
1020 err = -ENODEV;
1021 goto free_resources;
1022 }
1023
1024 async_tx_ack(tx);
1025 init_completion(&cmp);
1026 tx->callback = ioat3_dma_test_callback;
1027 tx->callback_param = &cmp;
1028 cookie = tx->tx_submit(tx);
1029 if (cookie < 0) {
1030 dev_err(dev, "Self-test memset setup failed\n");
1031 err = -ENODEV;
1032 goto free_resources;
1033 }
1034 dma->device_issue_pending(dma_chan);
1035
1036 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1037
1038 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1039 dev_err(dev, "Self-test memset timed out\n");
1040 err = -ENODEV;
1041 goto free_resources;
1042 }
1043
1044 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1045 u32 *ptr = page_address(dest);
1046 if (ptr[i]) {
1047 dev_err(dev, "Self-test memset failed compare\n");
1048 err = -ENODEV;
1049 goto free_resources;
1050 }
1051 }
1052
1053 /* test for non-zero parity sum */
1054 xor_val_result = 0;
1055 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1056 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1057 DMA_TO_DEVICE);
1058 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1059 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1060 &xor_val_result, DMA_PREP_INTERRUPT);
1061 if (!tx) {
1062 dev_err(dev, "Self-test 2nd zero prep failed\n");
1063 err = -ENODEV;
1064 goto free_resources;
1065 }
1066
1067 async_tx_ack(tx);
1068 init_completion(&cmp);
1069 tx->callback = ioat3_dma_test_callback;
1070 tx->callback_param = &cmp;
1071 cookie = tx->tx_submit(tx);
1072 if (cookie < 0) {
1073 dev_err(dev, "Self-test 2nd zero setup failed\n");
1074 err = -ENODEV;
1075 goto free_resources;
1076 }
1077 dma->device_issue_pending(dma_chan);
1078
1079 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1080
1081 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1082 dev_err(dev, "Self-test 2nd validate timed out\n");
1083 err = -ENODEV;
1084 goto free_resources;
1085 }
1086
1087 if (xor_val_result != SUM_CHECK_P_RESULT) {
1088 dev_err(dev, "Self-test validate failed compare\n");
1089 err = -ENODEV;
1090 goto free_resources;
1091 }
1092
1093free_resources:
1094 dma->device_free_chan_resources(dma_chan);
1095out:
1096 src_idx = IOAT_NUM_SRC_TEST;
1097 while (src_idx--)
1098 __free_page(xor_srcs[src_idx]);
1099 __free_page(dest);
1100 return err;
1101}
1102
1103static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1104{
1105 int rc = ioat_dma_self_test(device);
1106
1107 if (rc)
1108 return rc;
1109
1110 rc = ioat_xor_val_self_test(device);
1111 if (rc)
1112 return rc;
1113
1114 return 0;
1115}
1116
1117int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1118{
1119 struct pci_dev *pdev = device->pdev;
1120 struct dma_device *dma;
1121 struct dma_chan *c;
1122 struct ioat_chan_common *chan;
1123 bool is_raid_device = false;
1124 int err;
1125 u16 dev_id;
1126 u32 cap;
1127
1128 device->enumerate_channels = ioat2_enumerate_channels;
1129 device->self_test = ioat3_dma_self_test;
1130 dma = &device->common;
1131 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1132 dma->device_issue_pending = ioat2_issue_pending;
1133 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1134 dma->device_free_chan_resources = ioat2_free_chan_resources;
1135
1136 dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1137 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1138
1139 cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1140 if (cap & IOAT_CAP_XOR) {
1141 is_raid_device = true;
1142 dma->max_xor = 8;
1143 dma->xor_align = 2;
1144
1145 dma_cap_set(DMA_XOR, dma->cap_mask);
1146 dma->device_prep_dma_xor = ioat3_prep_xor;
1147
1148 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1149 dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1150 }
1151 if (cap & IOAT_CAP_PQ) {
1152 is_raid_device = true;
1153 dma_set_maxpq(dma, 8, 0);
1154 dma->pq_align = 2;
1155
1156 dma_cap_set(DMA_PQ, dma->cap_mask);
1157 dma->device_prep_dma_pq = ioat3_prep_pq;
1158
1159 dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1160 dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1161
1162 if (!(cap & IOAT_CAP_XOR)) {
1163 dma->max_xor = 8;
1164 dma->xor_align = 2;
1165
1166 dma_cap_set(DMA_XOR, dma->cap_mask);
1167 dma->device_prep_dma_xor = ioat3_prep_pqxor;
1168
1169 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1170 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1171 }
1172 }
1173 if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1174 dma_cap_set(DMA_MEMSET, dma->cap_mask);
1175 dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1176 }
1177
1178
1179 if (is_raid_device) {
1180 dma->device_is_tx_complete = ioat3_is_complete;
1181 device->cleanup_tasklet = ioat3_cleanup_tasklet;
1182 device->timer_fn = ioat3_timer_event;
1183 } else {
1184 dma->device_is_tx_complete = ioat2_is_complete;
1185 device->cleanup_tasklet = ioat2_cleanup_tasklet;
1186 device->timer_fn = ioat2_timer_event;
1187 }
1188
1189 /* -= IOAT ver.3 workarounds =- */
1190 /* Write CHANERRMSK_INT with 3E07h to mask out the errors
1191 * that can cause stability issues for IOAT ver.3
1192 */
1193 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1194
1195 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1196 * (workaround for spurious config parity error after restart)
1197 */
1198 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1199 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1200 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1201
1202 err = ioat_probe(device);
1203 if (err)
1204 return err;
1205 ioat_set_tcp_copy_break(262144);
1206
1207 list_for_each_entry(c, &dma->channels, device_node) {
1208 chan = to_chan_common(c);
1209 writel(IOAT_DMA_DCA_ANY_CPU,
1210 chan->reg_base + IOAT_DCACTRL_OFFSET);
1211 }
1212
1213 err = ioat_register(device);
1214 if (err)
1215 return err;
1216
1217 ioat_kobject_add(device, &ioat2_ktype);
1218
1219 if (dca)
1220 device->dca = ioat3_dca_init(pdev, device->reg_base);
1221
1222 return 0;
1223}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 000000000000..99afb12bd409
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,215 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_HW_H_
22#define _IOAT_HW_H_
23
24/* PCI Configuration Space Values */
25#define IOAT_PCI_VID 0x8086
26#define IOAT_MMIO_BAR 0
27
28/* CB device ID's */
29#define IOAT_PCI_DID_5000 0x1A38
30#define IOAT_PCI_DID_CNB 0x360B
31#define IOAT_PCI_DID_SCNB 0x65FF
32#define IOAT_PCI_DID_SNB 0x402F
33
34#define IOAT_PCI_RID 0x00
35#define IOAT_PCI_SVID 0x8086
36#define IOAT_PCI_SID 0x8086
37#define IOAT_VER_1_2 0x12 /* Version 1.2 */
38#define IOAT_VER_2_0 0x20 /* Version 2.0 */
39#define IOAT_VER_3_0 0x30 /* Version 3.0 */
40#define IOAT_VER_3_2 0x32 /* Version 3.2 */
41
42struct ioat_dma_descriptor {
43 uint32_t size;
44 union {
45 uint32_t ctl;
46 struct {
47 unsigned int int_en:1;
48 unsigned int src_snoop_dis:1;
49 unsigned int dest_snoop_dis:1;
50 unsigned int compl_write:1;
51 unsigned int fence:1;
52 unsigned int null:1;
53 unsigned int src_brk:1;
54 unsigned int dest_brk:1;
55 unsigned int bundle:1;
56 unsigned int dest_dca:1;
57 unsigned int hint:1;
58 unsigned int rsvd2:13;
59 #define IOAT_OP_COPY 0x00
60 unsigned int op:8;
61 } ctl_f;
62 };
63 uint64_t src_addr;
64 uint64_t dst_addr;
65 uint64_t next;
66 uint64_t rsv1;
67 uint64_t rsv2;
68 /* store some driver data in an unused portion of the descriptor */
69 union {
70 uint64_t user1;
71 uint64_t tx_cnt;
72 };
73 uint64_t user2;
74};
75
76struct ioat_fill_descriptor {
77 uint32_t size;
78 union {
79 uint32_t ctl;
80 struct {
81 unsigned int int_en:1;
82 unsigned int rsvd:1;
83 unsigned int dest_snoop_dis:1;
84 unsigned int compl_write:1;
85 unsigned int fence:1;
86 unsigned int rsvd2:2;
87 unsigned int dest_brk:1;
88 unsigned int bundle:1;
89 unsigned int rsvd4:15;
90 #define IOAT_OP_FILL 0x01
91 unsigned int op:8;
92 } ctl_f;
93 };
94 uint64_t src_data;
95 uint64_t dst_addr;
96 uint64_t next;
97 uint64_t rsv1;
98 uint64_t next_dst_addr;
99 uint64_t user1;
100 uint64_t user2;
101};
102
103struct ioat_xor_descriptor {
104 uint32_t size;
105 union {
106 uint32_t ctl;
107 struct {
108 unsigned int int_en:1;
109 unsigned int src_snoop_dis:1;
110 unsigned int dest_snoop_dis:1;
111 unsigned int compl_write:1;
112 unsigned int fence:1;
113 unsigned int src_cnt:3;
114 unsigned int bundle:1;
115 unsigned int dest_dca:1;
116 unsigned int hint:1;
117 unsigned int rsvd:13;
118 #define IOAT_OP_XOR 0x87
119 #define IOAT_OP_XOR_VAL 0x88
120 unsigned int op:8;
121 } ctl_f;
122 };
123 uint64_t src_addr;
124 uint64_t dst_addr;
125 uint64_t next;
126 uint64_t src_addr2;
127 uint64_t src_addr3;
128 uint64_t src_addr4;
129 uint64_t src_addr5;
130};
131
132struct ioat_xor_ext_descriptor {
133 uint64_t src_addr6;
134 uint64_t src_addr7;
135 uint64_t src_addr8;
136 uint64_t next;
137 uint64_t rsvd[4];
138};
139
140struct ioat_pq_descriptor {
141 uint32_t size;
142 union {
143 uint32_t ctl;
144 struct {
145 unsigned int int_en:1;
146 unsigned int src_snoop_dis:1;
147 unsigned int dest_snoop_dis:1;
148 unsigned int compl_write:1;
149 unsigned int fence:1;
150 unsigned int src_cnt:3;
151 unsigned int bundle:1;
152 unsigned int dest_dca:1;
153 unsigned int hint:1;
154 unsigned int p_disable:1;
155 unsigned int q_disable:1;
156 unsigned int rsvd:11;
157 #define IOAT_OP_PQ 0x89
158 #define IOAT_OP_PQ_VAL 0x8a
159 unsigned int op:8;
160 } ctl_f;
161 };
162 uint64_t src_addr;
163 uint64_t p_addr;
164 uint64_t next;
165 uint64_t src_addr2;
166 uint64_t src_addr3;
167 uint8_t coef[8];
168 uint64_t q_addr;
169};
170
171struct ioat_pq_ext_descriptor {
172 uint64_t src_addr4;
173 uint64_t src_addr5;
174 uint64_t src_addr6;
175 uint64_t next;
176 uint64_t src_addr7;
177 uint64_t src_addr8;
178 uint64_t rsvd[2];
179};
180
181struct ioat_pq_update_descriptor {
182 uint32_t size;
183 union {
184 uint32_t ctl;
185 struct {
186 unsigned int int_en:1;
187 unsigned int src_snoop_dis:1;
188 unsigned int dest_snoop_dis:1;
189 unsigned int compl_write:1;
190 unsigned int fence:1;
191 unsigned int src_cnt:3;
192 unsigned int bundle:1;
193 unsigned int dest_dca:1;
194 unsigned int hint:1;
195 unsigned int p_disable:1;
196 unsigned int q_disable:1;
197 unsigned int rsvd:3;
198 unsigned int coef:8;
199 #define IOAT_OP_PQ_UP 0x8b
200 unsigned int op:8;
201 } ctl_f;
202 };
203 uint64_t src_addr;
204 uint64_t p_addr;
205 uint64_t next;
206 uint64_t src_addr2;
207 uint64_t p_src;
208 uint64_t q_src;
209 uint64_t q_addr;
210};
211
212struct ioat_raw_descriptor {
213 uint64_t field[8];
214};
215#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644
index 000000000000..d545fae30f37
--- /dev/null
+++ b/drivers/dma/ioat/pci.c
@@ -0,0 +1,210 @@
1/*
2 * Intel I/OAT DMA Linux driver
3 * Copyright(c) 2007 - 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
20 *
21 */
22
23/*
24 * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25 * copy operations.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/interrupt.h>
32#include <linux/dca.h>
33#include "dma.h"
34#include "dma_v2.h"
35#include "registers.h"
36#include "hw.h"
37
38MODULE_VERSION(IOAT_DMA_VERSION);
39MODULE_LICENSE("Dual BSD/GPL");
40MODULE_AUTHOR("Intel Corporation");
41
42static struct pci_device_id ioat_pci_tbl[] = {
43 /* I/OAT v1 platforms */
44 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
45 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
46 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
47 { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
48
49 /* I/OAT v2 platforms */
50 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
51
52 /* I/OAT v3 platforms */
53 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
54 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
55 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
56 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
57 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
58 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
59 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
60 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
61
62 /* I/OAT v3.2 platforms */
63 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
64 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
65 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
66 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
67 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
68 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
69 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
70 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
71 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
73
74 { 0, }
75};
76MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
77
78static int __devinit ioat_pci_probe(struct pci_dev *pdev,
79 const struct pci_device_id *id);
80static void __devexit ioat_remove(struct pci_dev *pdev);
81
82static int ioat_dca_enabled = 1;
83module_param(ioat_dca_enabled, int, 0644);
84MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
85
86struct kmem_cache *ioat2_cache;
87
88#define DRV_NAME "ioatdma"
89
90static struct pci_driver ioat_pci_driver = {
91 .name = DRV_NAME,
92 .id_table = ioat_pci_tbl,
93 .probe = ioat_pci_probe,
94 .remove = __devexit_p(ioat_remove),
95};
96
97static struct ioatdma_device *
98alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
99{
100 struct device *dev = &pdev->dev;
101 struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
102
103 if (!d)
104 return NULL;
105 d->pdev = pdev;
106 d->reg_base = iobase;
107 return d;
108}
109
110static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
111{
112 void __iomem * const *iomap;
113 struct device *dev = &pdev->dev;
114 struct ioatdma_device *device;
115 int err;
116
117 err = pcim_enable_device(pdev);
118 if (err)
119 return err;
120
121 err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
122 if (err)
123 return err;
124 iomap = pcim_iomap_table(pdev);
125 if (!iomap)
126 return -ENOMEM;
127
128 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
129 if (err)
130 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
131 if (err)
132 return err;
133
134 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
135 if (err)
136 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
137 if (err)
138 return err;
139
140 device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
141 if (!device)
142 return -ENOMEM;
143
144 pci_set_master(pdev);
145
146 device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
147 if (!device)
148 return -ENOMEM;
149 pci_set_drvdata(pdev, device);
150
151 device->version = readb(device->reg_base + IOAT_VER_OFFSET);
152 if (device->version == IOAT_VER_1_2)
153 err = ioat1_dma_probe(device, ioat_dca_enabled);
154 else if (device->version == IOAT_VER_2_0)
155 err = ioat2_dma_probe(device, ioat_dca_enabled);
156 else if (device->version >= IOAT_VER_3_0)
157 err = ioat3_dma_probe(device, ioat_dca_enabled);
158 else
159 return -ENODEV;
160
161 if (err) {
162 dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
163 return -ENODEV;
164 }
165
166 return 0;
167}
168
169static void __devexit ioat_remove(struct pci_dev *pdev)
170{
171 struct ioatdma_device *device = pci_get_drvdata(pdev);
172
173 if (!device)
174 return;
175
176 dev_err(&pdev->dev, "Removing dma and dca services\n");
177 if (device->dca) {
178 unregister_dca_provider(device->dca, &pdev->dev);
179 free_dca_provider(device->dca);
180 device->dca = NULL;
181 }
182 ioat_dma_remove(device);
183}
184
185static int __init ioat_init_module(void)
186{
187 int err;
188
189 pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
190 DRV_NAME, IOAT_DMA_VERSION);
191
192 ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
193 0, SLAB_HWCACHE_ALIGN, NULL);
194 if (!ioat2_cache)
195 return -ENOMEM;
196
197 err = pci_register_driver(&ioat_pci_driver);
198 if (err)
199 kmem_cache_destroy(ioat2_cache);
200
201 return err;
202}
203module_init(ioat_init_module);
204
205static void __exit ioat_exit_module(void)
206{
207 pci_unregister_driver(&ioat_pci_driver);
208 kmem_cache_destroy(ioat2_cache);
209}
210module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644
index 000000000000..63038e18ab03
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,250 @@
1/*
2 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef _IOAT_REGISTERS_H_
22#define _IOAT_REGISTERS_H_
23
24#define IOAT_PCI_DMACTRL_OFFSET 0x48
25#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
26#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
27
28#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
29#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
30#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
31
32/* MMIO Device Registers */
33#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
34
35#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
36#define IOAT_XFERCAP_4KB 12
37#define IOAT_XFERCAP_8KB 13
38#define IOAT_XFERCAP_16KB 14
39#define IOAT_XFERCAP_32KB 15
40#define IOAT_XFERCAP_32GB 0
41
42#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
43#define IOAT_GENCTRL_DEBUG_EN 0x01
44
45#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
46#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
47#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
48#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
49#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
50
51#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
52
53#define IOAT_VER_OFFSET 0x08 /* 8-bit */
54#define IOAT_VER_MAJOR_MASK 0xF0
55#define IOAT_VER_MINOR_MASK 0x0F
56#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
57#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
58
59#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
60
61#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
62#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
63#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
64
65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
67#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
68#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
69#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
70
71#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
72#define IOAT_CAP_PAGE_BREAK 0x00000001
73#define IOAT_CAP_CRC 0x00000002
74#define IOAT_CAP_SKIP_MARKER 0x00000004
75#define IOAT_CAP_DCA 0x00000010
76#define IOAT_CAP_CRC_MOVE 0x00000020
77#define IOAT_CAP_FILL_BLOCK 0x00000040
78#define IOAT_CAP_APIC 0x00000080
79#define IOAT_CAP_XOR 0x00000100
80#define IOAT_CAP_PQ 0x00000200
81
82#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
83
84/* DMA Channel Registers */
85#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
86#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
87#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
88#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
89#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
90#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
91#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
92#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
93#define IOAT_CHANCTRL_INT_REARM 0x0001
94#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
95 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
96 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
97 IOAT_CHANCTRL_ERR_INT_EN)
98
99#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
100#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
101#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
102
103
104#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
105#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
106#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
107 ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
108#define IOAT1_CHANSTS_OFFSET_LOW 0x04
109#define IOAT2_CHANSTS_OFFSET_LOW 0x08
110#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
111 ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
112#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
113#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
114#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
115 ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
116#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
117#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
118#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
119#define IOAT_CHANSTS_STATUS 0x7ULL
120#define IOAT_CHANSTS_ACTIVE 0x0
121#define IOAT_CHANSTS_DONE 0x1
122#define IOAT_CHANSTS_SUSPENDED 0x2
123#define IOAT_CHANSTS_HALTED 0x3
124
125
126
127#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
128
129#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
130#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
131#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
132
133/* CB DCA Memory Space Registers */
134#define IOAT_DCAOFFSET_OFFSET 0x14
135/* CB_BAR + IOAT_DCAOFFSET value */
136#define IOAT_DCA_VER_OFFSET 0x00
137#define IOAT_DCA_VER_MAJOR_MASK 0xF0
138#define IOAT_DCA_VER_MINOR_MASK 0x0F
139
140#define IOAT_DCA_COMP_OFFSET 0x02
141#define IOAT_DCA_COMP_V1 0x1
142
143#define IOAT_FSB_CAPABILITY_OFFSET 0x04
144#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
145
146#define IOAT_PCI_CAPABILITY_OFFSET 0x06
147#define IOAT_PCI_CAPABILITY_MEMWR 0x1
148
149#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
150#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
151
152#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
153#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
154
155#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
156#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
157#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
158#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
159#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
160#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
161#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
162#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
163#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
164#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
165#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
166#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
167
168#define IOAT_DCA_GREQID_OFFSET 0x10
169#define IOAT_DCA_GREQID_SIZE 0x04
170#define IOAT_DCA_GREQID_MASK 0xFFFF
171#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
172#define IOAT_DCA_GREQID_VALID 0x20000000
173#define IOAT_DCA_GREQID_LASTID 0x80000000
174
175#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
176#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
177
178#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
179#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
180
181#define IOAT3_CSI_CONTROL_OFFSET 0x0C
182#define IOAT3_CSI_CONTROL_PREFETCH 0x1
183
184#define IOAT3_PCI_CONTROL_OFFSET 0x0E
185#define IOAT3_PCI_CONTROL_MEMWR 0x1
186
187#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
188#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
189#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
190
191#define IOAT3_DCA_GREQID_OFFSET 0x02
192
193#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
194#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
195#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
196 ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
197#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
198#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
199#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
200 ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
201#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
202#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
203#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
204 ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
205
206#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
207#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
208#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
209 ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
210#define IOAT_CHANCMD_RESET 0x20
211#define IOAT_CHANCMD_RESUME 0x10
212#define IOAT_CHANCMD_ABORT 0x08
213#define IOAT_CHANCMD_SUSPEND 0x04
214#define IOAT_CHANCMD_APPEND 0x02
215#define IOAT_CHANCMD_START 0x01
216
217#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
218#define IOAT_CHANCMP_OFFSET_LOW 0x18
219#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
220
221#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
222#define IOAT_CDAR_OFFSET_LOW 0x20
223#define IOAT_CDAR_OFFSET_HIGH 0x24
224
225#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
226#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
227#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
228#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
229#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
230#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
231#define IOAT_CHANERR_CHANCMD_ERR 0x0020
232#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
233#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
234#define IOAT_CHANERR_READ_DATA_ERR 0x0100
235#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
236#define IOAT_CHANERR_CONTROL_ERR 0x0400
237#define IOAT_CHANERR_LENGTH_ERR 0x0800
238#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
239#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
240#define IOAT_CHANERR_SOFT_ERR 0x4000
241#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
242#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
243#define IOAT_CHANERR_XOR_Q_ERR 0x20000
244#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
245
246#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
247
248#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
249
250#endif /* _IOAT_REGISTERS_H_ */