diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:59 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:59 -0400 |
commit | d69d235b7da2778891640ee95efcd68075978904 (patch) | |
tree | e7f22d38d8a742ddbca167af123f4987ada8926c /drivers | |
parent | 9de6fc717bdc574cf5faf9d46ce0f9d6265c7952 (diff) |
ioat3: pq support
ioat3.2 adds support for raid6 syndrome generation (xor sum of galois
field multiplication products) using up to 8 sources. It can also
perform an pq-zero-sum operation to validate whether the syndrome for a
given set of sources matches a previously computed syndrome.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 265 |
1 files changed, 264 insertions, 1 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 927c08b08861..ca2af0fa1c36 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -69,10 +69,12 @@ | |||
69 | #define src_cnt_to_hw(x) ((x) - 2) | 69 | #define src_cnt_to_hw(x) ((x) - 2) |
70 | 70 | ||
71 | /* provide a lookup table for setting the source address in the base or | 71 | /* provide a lookup table for setting the source address in the base or |
72 | * extended descriptor of an xor descriptor | 72 | * extended descriptor of an xor or pq descriptor |
73 | */ | 73 | */ |
74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | 74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; |
75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | 75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; |
76 | static const u8 pq_idx_to_desc __read_mostly = 0xf8; | ||
77 | static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; | ||
76 | 78 | ||
77 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | 79 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) |
78 | { | 80 | { |
@@ -89,6 +91,23 @@ static void xor_set_src(struct ioat_raw_descriptor *descs[2], | |||
89 | raw->field[xor_idx_to_field[idx]] = addr + offset; | 91 | raw->field[xor_idx_to_field[idx]] = addr + offset; |
90 | } | 92 | } |
91 | 93 | ||
94 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
95 | { | ||
96 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
97 | |||
98 | return raw->field[pq_idx_to_field[idx]]; | ||
99 | } | ||
100 | |||
101 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | ||
102 | dma_addr_t addr, u32 offset, u8 coef, int idx) | ||
103 | { | ||
104 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | ||
105 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
106 | |||
107 | raw->field[pq_idx_to_field[idx]] = addr + offset; | ||
108 | pq->coef[idx] = coef; | ||
109 | } | ||
110 | |||
92 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | 111 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, |
93 | struct ioat_ring_ent *desc, int idx) | 112 | struct ioat_ring_ent *desc, int idx) |
94 | { | 113 | { |
@@ -148,6 +167,58 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | |||
148 | PCI_DMA_FROMDEVICE, flags, 1); | 167 | PCI_DMA_FROMDEVICE, flags, 1); |
149 | break; | 168 | break; |
150 | } | 169 | } |
170 | case IOAT_OP_PQ_VAL: | ||
171 | case IOAT_OP_PQ: { | ||
172 | struct ioat_pq_descriptor *pq = desc->pq; | ||
173 | struct ioat_ring_ent *ext; | ||
174 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
175 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
176 | struct ioat_raw_descriptor *descs[2]; | ||
177 | int i; | ||
178 | |||
179 | if (src_cnt > 3) { | ||
180 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
181 | pq_ex = ext->pq_ex; | ||
182 | } | ||
183 | |||
184 | /* in the 'continue' case don't unmap the dests as sources */ | ||
185 | if (dmaf_p_disabled_continue(flags)) | ||
186 | src_cnt--; | ||
187 | else if (dmaf_continue(flags)) | ||
188 | src_cnt -= 3; | ||
189 | |||
190 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
191 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
192 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
193 | for (i = 0; i < src_cnt; i++) { | ||
194 | dma_addr_t src = pq_get_src(descs, i); | ||
195 | |||
196 | ioat_unmap(pdev, src - offset, len, | ||
197 | PCI_DMA_TODEVICE, flags, 0); | ||
198 | } | ||
199 | |||
200 | /* the dests are sources in pq validate operations */ | ||
201 | if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
202 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
203 | ioat_unmap(pdev, pq->p_addr - offset, | ||
204 | len, PCI_DMA_TODEVICE, flags, 0); | ||
205 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
206 | ioat_unmap(pdev, pq->q_addr - offset, | ||
207 | len, PCI_DMA_TODEVICE, flags, 0); | ||
208 | break; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
213 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
214 | ioat_unmap(pdev, pq->p_addr - offset, len, | ||
215 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
216 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
217 | ioat_unmap(pdev, pq->q_addr - offset, len, | ||
218 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
219 | } | ||
220 | break; | ||
221 | } | ||
151 | default: | 222 | default: |
152 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | 223 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", |
153 | __func__, desc->hw->ctl_f.op); | 224 | __func__, desc->hw->ctl_f.op); |
@@ -164,6 +235,12 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) | |||
164 | 235 | ||
165 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | 236 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) |
166 | return true; | 237 | return true; |
238 | } else if (hw->ctl_f.op == IOAT_OP_PQ || | ||
239 | hw->ctl_f.op == IOAT_OP_PQ_VAL) { | ||
240 | struct ioat_pq_descriptor *pq = desc->pq; | ||
241 | |||
242 | if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) | ||
243 | return true; | ||
167 | } | 244 | } |
168 | 245 | ||
169 | return false; | 246 | return false; |
@@ -513,6 +590,182 @@ ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | |||
513 | src_cnt - 1, len, flags); | 590 | src_cnt - 1, len, flags); |
514 | } | 591 | } |
515 | 592 | ||
593 | static void | ||
594 | dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) | ||
595 | { | ||
596 | struct device *dev = to_dev(&ioat->base); | ||
597 | struct ioat_pq_descriptor *pq = desc->pq; | ||
598 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | ||
599 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | ||
600 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
601 | int i; | ||
602 | |||
603 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | ||
604 | " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", | ||
605 | desc_id(desc), (unsigned long long) desc->txd.phys, | ||
606 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | ||
607 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, | ||
608 | pq->ctl_f.compl_write, | ||
609 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | ||
610 | pq->ctl_f.src_cnt); | ||
611 | for (i = 0; i < src_cnt; i++) | ||
612 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | ||
613 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | ||
614 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | ||
615 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | ||
616 | } | ||
617 | |||
618 | static struct dma_async_tx_descriptor * | ||
619 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
620 | const dma_addr_t *dst, const dma_addr_t *src, | ||
621 | unsigned int src_cnt, const unsigned char *scf, | ||
622 | size_t len, unsigned long flags) | ||
623 | { | ||
624 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
625 | struct ioat_chan_common *chan = &ioat->base; | ||
626 | struct ioat_ring_ent *compl_desc; | ||
627 | struct ioat_ring_ent *desc; | ||
628 | struct ioat_ring_ent *ext; | ||
629 | size_t total_len = len; | ||
630 | struct ioat_pq_descriptor *pq; | ||
631 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
632 | struct ioat_dma_descriptor *hw; | ||
633 | u32 offset = 0; | ||
634 | int num_descs; | ||
635 | int with_ext; | ||
636 | int i, s; | ||
637 | u16 idx; | ||
638 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | ||
639 | |||
640 | dev_dbg(to_dev(chan), "%s\n", __func__); | ||
641 | /* the engine requires at least two sources (we provide | ||
642 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | ||
643 | */ | ||
644 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | ||
645 | |||
646 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
647 | /* we need 2x the number of descriptors to cover greater than 3 | ||
648 | * sources | ||
649 | */ | ||
650 | if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { | ||
651 | with_ext = 1; | ||
652 | num_descs *= 2; | ||
653 | } else | ||
654 | with_ext = 0; | ||
655 | |||
656 | /* completion writes from the raid engine may pass completion | ||
657 | * writes from the legacy engine, so we need one extra null | ||
658 | * (legacy) descriptor to ensure all completion writes arrive in | ||
659 | * order. | ||
660 | */ | ||
661 | if (likely(num_descs) && | ||
662 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
663 | /* pass */; | ||
664 | else | ||
665 | return NULL; | ||
666 | for (i = 0; i < num_descs; i += 1 + with_ext) { | ||
667 | struct ioat_raw_descriptor *descs[2]; | ||
668 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
669 | |||
670 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
671 | pq = desc->pq; | ||
672 | |||
673 | /* save a branch by unconditionally retrieving the | ||
674 | * extended descriptor pq_set_src() knows to not write | ||
675 | * to it in the single descriptor case | ||
676 | */ | ||
677 | ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); | ||
678 | pq_ex = ext->pq_ex; | ||
679 | |||
680 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
681 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
682 | |||
683 | for (s = 0; s < src_cnt; s++) | ||
684 | pq_set_src(descs, src[s], offset, scf[s], s); | ||
685 | |||
686 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | ||
687 | if (dmaf_p_disabled_continue(flags)) | ||
688 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
689 | else if (dmaf_continue(flags)) { | ||
690 | pq_set_src(descs, dst[0], offset, 0, s++); | ||
691 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
692 | pq_set_src(descs, dst[1], offset, 0, s++); | ||
693 | } | ||
694 | pq->size = xfer_size; | ||
695 | pq->p_addr = dst[0] + offset; | ||
696 | pq->q_addr = dst[1] + offset; | ||
697 | pq->ctl = 0; | ||
698 | pq->ctl_f.op = op; | ||
699 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | ||
700 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
701 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | ||
702 | |||
703 | len -= xfer_size; | ||
704 | offset += xfer_size; | ||
705 | } | ||
706 | |||
707 | /* last pq descriptor carries the unmap parameters and fence bit */ | ||
708 | desc->txd.flags = flags; | ||
709 | desc->len = total_len; | ||
710 | if (result) | ||
711 | desc->result = result; | ||
712 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
713 | dump_pq_desc_dbg(ioat, desc, ext); | ||
714 | |||
715 | /* completion descriptor carries interrupt bit */ | ||
716 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
717 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
718 | hw = compl_desc->hw; | ||
719 | hw->ctl = 0; | ||
720 | hw->ctl_f.null = 1; | ||
721 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
722 | hw->ctl_f.compl_write = 1; | ||
723 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
724 | dump_desc_dbg(ioat, compl_desc); | ||
725 | |||
726 | /* we leave the channel locked to ensure in order submission */ | ||
727 | return &desc->txd; | ||
728 | } | ||
729 | |||
730 | static struct dma_async_tx_descriptor * | ||
731 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
732 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
733 | unsigned long flags) | ||
734 | { | ||
735 | /* handle the single source multiply case from the raid6 | ||
736 | * recovery path | ||
737 | */ | ||
738 | if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { | ||
739 | dma_addr_t single_source[2]; | ||
740 | unsigned char single_source_coef[2]; | ||
741 | |||
742 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | ||
743 | single_source[0] = src[0]; | ||
744 | single_source[1] = src[0]; | ||
745 | single_source_coef[0] = scf[0]; | ||
746 | single_source_coef[1] = 0; | ||
747 | |||
748 | return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, | ||
749 | single_source_coef, len, flags); | ||
750 | } else | ||
751 | return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, | ||
752 | len, flags); | ||
753 | } | ||
754 | |||
755 | struct dma_async_tx_descriptor * | ||
756 | ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
757 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
758 | enum sum_check_flags *pqres, unsigned long flags) | ||
759 | { | ||
760 | /* the cleanup routine only sets bits on validate failure, it | ||
761 | * does not clear bits on validate success... so clear it here | ||
762 | */ | ||
763 | *pqres = 0; | ||
764 | |||
765 | return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | ||
766 | flags); | ||
767 | } | ||
768 | |||
516 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) | 769 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) |
517 | { | 770 | { |
518 | struct completion *cmp = dma_async_param; | 771 | struct completion *cmp = dma_async_param; |
@@ -822,6 +1075,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
822 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | 1075 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); |
823 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | 1076 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; |
824 | } | 1077 | } |
1078 | if (cap & IOAT_CAP_PQ) { | ||
1079 | dma_set_maxpq(dma, 8, 0); | ||
1080 | dma->pq_align = 2; | ||
1081 | |||
1082 | dma_cap_set(DMA_PQ, dma->cap_mask); | ||
1083 | dma->device_prep_dma_pq = ioat3_prep_pq; | ||
1084 | |||
1085 | dma_cap_set(DMA_PQ_VAL, dma->cap_mask); | ||
1086 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; | ||
1087 | } | ||
825 | 1088 | ||
826 | /* -= IOAT ver.3 workarounds =- */ | 1089 | /* -= IOAT ver.3 workarounds =- */ |
827 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | 1090 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors |