diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:57 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:57 -0400 |
commit | b094ad3be564e7cc59cca4ff0256550d3a55dd3b (patch) | |
tree | d69f515b2ee6af2b0f12bb3028d7c7f5b3390794 /drivers/dma/ioat/dma_v3.c | |
parent | e61dacaeb3918cd00cd642e8fb0828324ac59819 (diff) |
ioat3: xor support
ioat3.2 adds xor offload support for up to 8 sources. It can also
perform an xor-zero-sum operation to validate whether all given sources
sum to zero, without writing to a destination. Xor descriptors differ
from memcpy in that one operation may require multiple descriptors
depending on the number of sources. When the number of sources exceeds
5 an extended descriptor is needed. These descriptors need to be
accounted for when updating the DMA_COUNT register.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 218 |
1 files changed, 216 insertions, 2 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 0913d11e09ee..957c205f91d0 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -64,8 +64,33 @@ | |||
64 | #include "dma.h" | 64 | #include "dma.h" |
65 | #include "dma_v2.h" | 65 | #include "dma_v2.h" |
66 | 66 | ||
67 | /* ioat hardware assumes at least two sources for raid operations */ | ||
68 | #define src_cnt_to_sw(x) ((x) + 2) | ||
69 | #define src_cnt_to_hw(x) ((x) - 2) | ||
70 | |||
71 | /* provide a lookup table for setting the source address in the base or | ||
72 | * extended descriptor of an xor descriptor | ||
73 | */ | ||
74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | ||
75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | ||
76 | |||
77 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
78 | { | ||
79 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
80 | |||
81 | return raw->field[xor_idx_to_field[idx]]; | ||
82 | } | ||
83 | |||
84 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | ||
85 | dma_addr_t addr, u32 offset, int idx) | ||
86 | { | ||
87 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
88 | |||
89 | raw->field[xor_idx_to_field[idx]] = addr + offset; | ||
90 | } | ||
91 | |||
67 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | 92 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, |
68 | struct ioat_ring_ent *desc) | 93 | struct ioat_ring_ent *desc, int idx) |
69 | { | 94 | { |
70 | struct ioat_chan_common *chan = &ioat->base; | 95 | struct ioat_chan_common *chan = &ioat->base; |
71 | struct pci_dev *pdev = chan->device->pdev; | 96 | struct pci_dev *pdev = chan->device->pdev; |
@@ -86,13 +111,71 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | |||
86 | PCI_DMA_FROMDEVICE, flags, 1); | 111 | PCI_DMA_FROMDEVICE, flags, 1); |
87 | break; | 112 | break; |
88 | } | 113 | } |
114 | case IOAT_OP_XOR_VAL: | ||
115 | case IOAT_OP_XOR: { | ||
116 | struct ioat_xor_descriptor *xor = desc->xor; | ||
117 | struct ioat_ring_ent *ext; | ||
118 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
119 | int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); | ||
120 | struct ioat_raw_descriptor *descs[2]; | ||
121 | int i; | ||
122 | |||
123 | if (src_cnt > 5) { | ||
124 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
125 | xor_ex = ext->xor_ex; | ||
126 | } | ||
127 | |||
128 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
129 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
130 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
131 | for (i = 0; i < src_cnt; i++) { | ||
132 | dma_addr_t src = xor_get_src(descs, i); | ||
133 | |||
134 | ioat_unmap(pdev, src - offset, len, | ||
135 | PCI_DMA_TODEVICE, flags, 0); | ||
136 | } | ||
137 | |||
138 | /* dest is a source in xor validate operations */ | ||
139 | if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
140 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
141 | PCI_DMA_TODEVICE, flags, 1); | ||
142 | break; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
147 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
148 | PCI_DMA_FROMDEVICE, flags, 1); | ||
149 | break; | ||
150 | } | ||
89 | default: | 151 | default: |
90 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | 152 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", |
91 | __func__, desc->hw->ctl_f.op); | 153 | __func__, desc->hw->ctl_f.op); |
92 | } | 154 | } |
93 | } | 155 | } |
94 | 156 | ||
157 | static bool desc_has_ext(struct ioat_ring_ent *desc) | ||
158 | { | ||
159 | struct ioat_dma_descriptor *hw = desc->hw; | ||
160 | |||
161 | if (hw->ctl_f.op == IOAT_OP_XOR || | ||
162 | hw->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
163 | struct ioat_xor_descriptor *xor = desc->xor; | ||
95 | 164 | ||
165 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | ||
166 | return true; | ||
167 | } | ||
168 | |||
169 | return false; | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * __cleanup - reclaim used descriptors | ||
174 | * @ioat: channel (ring) to clean | ||
175 | * | ||
176 | * The difference from the dma_v2.c __cleanup() is that this routine | ||
177 | * handles extended descriptors and dma-unmapping raid operations. | ||
178 | */ | ||
96 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | 179 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) |
97 | { | 180 | { |
98 | struct ioat_chan_common *chan = &ioat->base; | 181 | struct ioat_chan_common *chan = &ioat->base; |
@@ -114,7 +197,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
114 | tx = &desc->txd; | 197 | tx = &desc->txd; |
115 | if (tx->cookie) { | 198 | if (tx->cookie) { |
116 | chan->completed_cookie = tx->cookie; | 199 | chan->completed_cookie = tx->cookie; |
117 | ioat3_dma_unmap(ioat, desc); | 200 | ioat3_dma_unmap(ioat, desc, ioat->tail + i); |
118 | tx->cookie = 0; | 201 | tx->cookie = 0; |
119 | if (tx->callback) { | 202 | if (tx->callback) { |
120 | tx->callback(tx->callback_param); | 203 | tx->callback(tx->callback_param); |
@@ -124,6 +207,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
124 | 207 | ||
125 | if (tx->phys == phys_complete) | 208 | if (tx->phys == phys_complete) |
126 | seen_current = true; | 209 | seen_current = true; |
210 | |||
211 | /* skip extended descriptors */ | ||
212 | if (desc_has_ext(desc)) { | ||
213 | BUG_ON(i + 1 >= active); | ||
214 | i++; | ||
215 | } | ||
127 | } | 216 | } |
128 | ioat->tail += i; | 217 | ioat->tail += i; |
129 | BUG_ON(!seen_current); /* no active descs have written a completion? */ | 218 | BUG_ON(!seen_current); /* no active descs have written a completion? */ |
@@ -309,6 +398,121 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, | |||
309 | return &desc->txd; | 398 | return &desc->txd; |
310 | } | 399 | } |
311 | 400 | ||
401 | static struct dma_async_tx_descriptor * | ||
402 | __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
403 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | ||
404 | size_t len, unsigned long flags) | ||
405 | { | ||
406 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
407 | struct ioat_ring_ent *compl_desc; | ||
408 | struct ioat_ring_ent *desc; | ||
409 | struct ioat_ring_ent *ext; | ||
410 | size_t total_len = len; | ||
411 | struct ioat_xor_descriptor *xor; | ||
412 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
413 | struct ioat_dma_descriptor *hw; | ||
414 | u32 offset = 0; | ||
415 | int num_descs; | ||
416 | int with_ext; | ||
417 | int i; | ||
418 | u16 idx; | ||
419 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | ||
420 | |||
421 | BUG_ON(src_cnt < 2); | ||
422 | |||
423 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
424 | /* we need 2x the number of descriptors to cover greater than 5 | ||
425 | * sources | ||
426 | */ | ||
427 | if (src_cnt > 5) { | ||
428 | with_ext = 1; | ||
429 | num_descs *= 2; | ||
430 | } else | ||
431 | with_ext = 0; | ||
432 | |||
433 | /* completion writes from the raid engine may pass completion | ||
434 | * writes from the legacy engine, so we need one extra null | ||
435 | * (legacy) descriptor to ensure all completion writes arrive in | ||
436 | * order. | ||
437 | */ | ||
438 | if (likely(num_descs) && | ||
439 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
440 | /* pass */; | ||
441 | else | ||
442 | return NULL; | ||
443 | for (i = 0; i < num_descs; i += 1 + with_ext) { | ||
444 | struct ioat_raw_descriptor *descs[2]; | ||
445 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
446 | int s; | ||
447 | |||
448 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
449 | xor = desc->xor; | ||
450 | |||
451 | /* save a branch by unconditionally retrieving the | ||
452 | * extended descriptor xor_set_src() knows to not write | ||
453 | * to it in the single descriptor case | ||
454 | */ | ||
455 | ext = ioat2_get_ring_ent(ioat, idx + i + 1); | ||
456 | xor_ex = ext->xor_ex; | ||
457 | |||
458 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
459 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
460 | for (s = 0; s < src_cnt; s++) | ||
461 | xor_set_src(descs, src[s], offset, s); | ||
462 | xor->size = xfer_size; | ||
463 | xor->dst_addr = dest + offset; | ||
464 | xor->ctl = 0; | ||
465 | xor->ctl_f.op = op; | ||
466 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | ||
467 | |||
468 | len -= xfer_size; | ||
469 | offset += xfer_size; | ||
470 | dump_desc_dbg(ioat, desc); | ||
471 | } | ||
472 | |||
473 | /* last xor descriptor carries the unmap parameters and fence bit */ | ||
474 | desc->txd.flags = flags; | ||
475 | desc->len = total_len; | ||
476 | if (result) | ||
477 | desc->result = result; | ||
478 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
479 | |||
480 | /* completion descriptor carries interrupt bit */ | ||
481 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
482 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
483 | hw = compl_desc->hw; | ||
484 | hw->ctl = 0; | ||
485 | hw->ctl_f.null = 1; | ||
486 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
487 | hw->ctl_f.compl_write = 1; | ||
488 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
489 | dump_desc_dbg(ioat, compl_desc); | ||
490 | |||
491 | /* we leave the channel locked to ensure in order submission */ | ||
492 | return &desc->txd; | ||
493 | } | ||
494 | |||
495 | static struct dma_async_tx_descriptor * | ||
496 | ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | ||
497 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
498 | { | ||
499 | return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); | ||
500 | } | ||
501 | |||
502 | struct dma_async_tx_descriptor * | ||
503 | ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | ||
504 | unsigned int src_cnt, size_t len, | ||
505 | enum sum_check_flags *result, unsigned long flags) | ||
506 | { | ||
507 | /* the cleanup routine only sets bits on validate failure, it | ||
508 | * does not clear bits on validate success... so clear it here | ||
509 | */ | ||
510 | *result = 0; | ||
511 | |||
512 | return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], | ||
513 | src_cnt - 1, len, flags); | ||
514 | } | ||
515 | |||
312 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | 516 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) |
313 | { | 517 | { |
314 | struct pci_dev *pdev = device->pdev; | 518 | struct pci_dev *pdev = device->pdev; |
@@ -333,6 +537,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
333 | dma_cap_set(DMA_MEMSET, dma->cap_mask); | 537 | dma_cap_set(DMA_MEMSET, dma->cap_mask); |
334 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; | 538 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; |
335 | } | 539 | } |
540 | if (cap & IOAT_CAP_XOR) { | ||
541 | dma->max_xor = 8; | ||
542 | dma->xor_align = 2; | ||
543 | |||
544 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
545 | dma->device_prep_dma_xor = ioat3_prep_xor; | ||
546 | |||
547 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
548 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | ||
549 | } | ||
336 | 550 | ||
337 | /* -= IOAT ver.3 workarounds =- */ | 551 | /* -= IOAT ver.3 workarounds =- */ |
338 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | 552 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors |